From da3e3433d009c07e393df33ccc71c34b425cfd38 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Jun 2026 03:37:51 -0400 Subject: [PATCH] Pronunciation overrides for CMU howlers stasis (STAH-seez?!) and kinda (KIH-nda) get corrected primaries; the dict entry stays as a secondary candidate. Oasis of stasis now glows as the perfect rhyme it is. Co-Authored-By: Claude Opus 4.8 (1M context) --- app.py | 10 ++++++++++ tests/test_rhymes.py | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/app.py b/app.py index b2318a5..0575ccb 100644 --- a/app.py +++ b/app.py @@ -85,6 +85,14 @@ def _norm_r(phones: str) -> str: return " ".join(pl) +#: corrections for the CMU dict's occasional howlers — the dict entry is +#: kept as a secondary candidate, the fix leads +OVERRIDES = { + "stasis": "S T EY1 S IH0 S", # CMU: "STAH-seez" + "kinda": "K AY1 N D AH0", # CMU: "KIH-nda" +} + + @lru_cache(maxsize=None) def phones_candidates(word: str) -> tuple[str, ...]: """Plausible pronunciations for a word: CMU dict variants, a few @@ -94,6 +102,8 @@ def phones_candidates(word: str) -> tuple[str, ...]: so the rhyme passes get to match on any of them.""" w = word.lower().strip("'") cands = list(pronouncing.phones_for_word(w)[:3]) + if w in OVERRIDES: + cands.insert(0, OVERRIDES[w]) if not cands and w.endswith("in"): # runnin' -> running cands = pronouncing.phones_for_word(w + "g")[:1] if not cands and w.endswith("'s"): diff --git a/tests/test_rhymes.py b/tests/test_rhymes.py index cd5285b..c9b0a6d 100644 --- a/tests/test_rhymes.py +++ b/tests/test_rhymes.py @@ -430,3 +430,8 @@ def test_weak_ending_rhymes_at_line_end(): "Red and yellow then came to be\n" "Reaching out to me") group_with(text, "see", "infancy", "be", "me") + + +def test_cmu_override_stasis(): + # CMU transcribes stasis as "STAH-seez"; everyone says STAY-sis + group_with("Oasis of stasis", "oasis", "stasis")