Pronunciation overrides for CMU howlers

stasis (STAH-seez?!) and kinda (KIH-nda) get corrected primaries;
the dict entry stays as a secondary candidate. Oasis of stasis now
glows as the perfect rhyme it is.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 03:37:51 -04:00
parent e6a721264b
commit da3e3433d0
2 changed files with 15 additions and 0 deletions
+10
View File
@@ -85,6 +85,14 @@ def _norm_r(phones: str) -> str:
return " ".join(pl)
#: corrections for the CMU dict's occasional howlers — the dict entry is
#: kept as a secondary candidate, the fix leads
OVERRIDES = {
"stasis": "S T EY1 S IH0 S", # CMU: "STAH-seez"
"kinda": "K AY1 N D AH0", # CMU: "KIH-nda"
}
@lru_cache(maxsize=None)
def phones_candidates(word: str) -> tuple[str, ...]:
"""Plausible pronunciations for a word: CMU dict variants, a few
@@ -94,6 +102,8 @@ def phones_candidates(word: str) -> tuple[str, ...]:
so the rhyme passes get to match on any of them."""
w = word.lower().strip("'")
cands = list(pronouncing.phones_for_word(w)[:3])
if w in OVERRIDES:
cands.insert(0, OVERRIDES[w])
if not cands and w.endswith("in"): # runnin' -> running
cands = pronouncing.phones_for_word(w + "g")[:1]
if not cands and w.endswith("'s"):
+5
View File
@@ -430,3 +430,8 @@ def test_weak_ending_rhymes_at_line_end():
"Red and yellow then came to be\n"
"Reaching out to me")
group_with(text, "see", "infancy", "be", "me")
def test_cmu_override_stasis():
# CMU transcribes stasis as "STAH-seez"; everyone says STAY-sis
group_with("Oasis of stasis", "oasis", "stasis")