Cot-caught merger + end-position assonance for phrases

AO merges into AA except before R (thought/lot, off/forgotten; car/
core stay apart). Line-ending phrases may rhyme on their pure vowel
run like end words always could — "forgotten" / "off of" lands.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 03:51:19 -04:00
parent e49e5f2844
commit 35b68cf7b8
2 changed files with 47 additions and 7 deletions
+33 -7
View File
@@ -71,17 +71,22 @@ STOPWORDS = frozenset(
# --------------------------------------------------------------------------
def _norm_r(phones: str) -> str:
"""Neutralize vowel contrasts that English loses before R: CMU has
fear = F IH1 R but hear = HH IY1 R, yet they rhyme in every dialect
(the NEAR vowel). Same for UH/UW (cure, tour)."""
"""Neutralize contrasts most American English doesn't keep: IH/IY and
UH/UW merge before R (fear/hear, cure/tour — the NEAR vowel), and
AO merges into AA everywhere else (the cot-caught merger: thought/
lot, off/forgotten). Before R the AA/AO split survives (car/core)."""
pl = phones.split()
for i in range(len(pl) - 1):
if pl[i + 1][0] == "R" and pl[i][-1].isdigit():
base, stress = pl[i][:-1], pl[i][-1]
for i in range(len(pl)):
if not pl[i][-1].isdigit():
continue
base, stress = pl[i][:-1], pl[i][-1]
if i + 1 < len(pl) and pl[i + 1][0] == "R":
if base == "IH":
pl[i] = "IY" + stress
elif base == "UH":
pl[i] = "UW" + stress
elif base == "AO":
pl[i] = "AA" + stress
return " ".join(pl)
@@ -656,10 +661,31 @@ def analyze(draft: Draft):
else:
for key in filter(None, keys):
by_slant[(t["sid"], key)].append(t)
# line-ending PHRASES get the same end-position privilege as words:
# pure vowel-run matching ("forgotten" / "off of" — AA-schwa)
end_spans = defaultdict(list)
for g in raw_groups:
for t in g["toks"]:
if " " not in t["word"]:
end_spans[t["line"]].append((t["start"], t["end"]))
for p in phrases:
if not p["is_end"] or id(p) in grouped or len(p["vowels"]) < 2:
continue
if any(s < p["end"] <= e for s, e in end_spans[p["line"]]):
continue # the tail word already claimed this line's ending
key = "v:" + " ".join(p["vowels"])
gi = group_by_slant.get((p["sid"], key))
if gi is not None:
raw_groups[gi]["toks"].append(p)
p["slant"] = True
grouped.add(id(p))
else:
by_slant[(p["sid"], key)].append(p)
for (sid, key), toks in sorted(by_slant.items(),
key=lambda kv: (-len(kv[1]), kv[0][1])):
toks = [t for t in toks if id(t) not in grouped]
if len(toks) >= 2 and len({t["word"].lower() for t in toks}) >= 2:
if len(toks) >= 2 and len({t["word"].split()[0] for t in toks}) >= 2:
raw_groups.append({"toks": toks, "slant": True, "key": key})
grouped.update(id(t) for t in toks)
+14
View File
@@ -445,3 +445,17 @@ def test_weak_ending_requires_matching_coda():
"Weaving heaven among hellish screams")
fam = group_with(text, "entirety", "divinity")
assert "screams" not in fam
def test_cot_caught_merger():
# thought (AO T) and lot (AA T) are identical in merged dialects
group_with("I thought about it a lot\nI gave it everything I got",
"thought", "lot", "got")
def test_end_phrase_pure_vowel_rhyme():
# forgotten / "off of": AA-schwa at both line ends — assonance is
# allowed at the line boundary, like it is for single words
text = ("I realize shit and write it down to be forgotten\n"
"Building a database to reference my life off of")
group_with(text, "forgotten", "off of")