From 751d5a49b85dd9bfcf3fdaa99297c348b8ae80bc Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Fri, 27 Mar 2026 12:17:12 -0400 Subject: [PATCH] Cleaner vocal synth: less static, click-free note transitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Jitter reduced (0.3% → 0.1%), shimmer reduced (2% → 0.8%) - Breath noise halved (0.08 → 0.04), mix 85/15 → 92/8 - 10ms fade in/out on every vocal note prevents clicks - Smoother syllable transitions Co-Authored-By: Claude Opus 4.6 (1M context) --- pytheory/play.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pytheory/play.py b/pytheory/play.py index c2c1a57..4e90b4c 100644 --- a/pytheory/play.py +++ b/pytheory/play.py @@ -945,8 +945,8 @@ def vocal_wave(hz, peak=SAMPLE_PEAK, n_samples=SAMPLE_RATE, lyric="ah"): # Asymmetric pulse: slow open phase, sharp closure, then closed phase. # Much more "voice-like" than a sine or sawtooth. # Jitter (pitch irregularity) + shimmer (amplitude irregularity) - jitter = rng.normal(0, hz * 0.003, n_samples) # ~0.3% pitch jitter - shimmer = 1.0 + rng.normal(0, 0.02, n_samples) # ~2% amp shimmer + jitter = rng.normal(0, hz * 0.001, n_samples) # ~0.1% pitch jitter + shimmer = 1.0 + rng.normal(0, 0.008, n_samples) # ~0.8% amp shimmer # Vibrato vib = hz * 0.001 * numpy.sin(2 * numpy.pi * 5.5 * t) inst_freq = hz + vib + jitter @@ -959,9 +959,9 @@ def vocal_wave(hz, peak=SAMPLE_PEAK, n_samples=SAMPLE_RATE, lyric="ah"): -numpy.sin(numpy.pi * (saw - 0.6) / 0.4) * 0.8) # sharp fall glottal *= shimmer - # Aspiration noise (breathiness) - breath = rng.normal(0, 0.08, n_samples) - source = glottal * 0.85 + breath * 0.15 + # Aspiration noise (breathiness) — subtle + breath = rng.normal(0, 0.04, n_samples) + source = glottal * 0.92 + breath * 0.08 # ── Formant filtering ── n_vowels = len(vowels_in_lyric) @@ -1039,6 +1039,12 @@ def vocal_wave(hz, peak=SAMPLE_PEAK, n_samples=SAMPLE_RATE, lyric="ah"): ws *= numpy.linspace(0.5, 0, wl) out[:wl] = ws * 0.4 + out[:wl] * 0.6 + # Soft edges — prevent clicks at note boundaries + fade_samples = min(int(SAMPLE_RATE * 0.01), n_samples // 4) + if fade_samples > 0: + out[:fade_samples] *= numpy.linspace(0, 1, fade_samples) + out[-fade_samples:] *= numpy.linspace(1, 0, fade_samples) + mx = numpy.abs(out).max() if mx > 0: out /= mx