Scope rhyme matching to within a stanza

Each blank-line-separated stanza is its own rhyme scheme. The perfect-
rhyme pass is now keyed per stanza, and the multi/coda fusion steps
won't union families across stanza boundaries (slant/multi/consonance
buckets were already stanza-keyed). light/night in verse 1 no longer
links to bright/sight in verse 2.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-08 03:00:53 -04:00
parent 676a596a66
commit 0bd3dab542
2 changed files with 25 additions and 3 deletions
+8 -3
View File
@@ -628,19 +628,20 @@ def analyze(draft: Draft):
if not t["is_end"] and (w in STOPWORDS or w in refrain or len(w) < 2):
continue
for key in rime_keys(t["word"]):
by_rime[key].append(t)
by_rime[(t["sid"], key)].append(t)
for p in phrases:
# all phrases compete on exact rime — "is it" matches "visit"
# phone-for-phone; the qualification below stops stopword-anchored
# phrases from pairing with nothing but each other
by_rime["p:" + p["rime"]].append(p)
by_rime[(p["sid"], "p:" + p["rime"])].append(p)
# biggest buckets claim their tokens first, so a word with several
# candidate pronunciations joins its best-supported rhyme group.
# Each group remembers its founding key — the sound it's about.
raw_groups: list[dict] = []
claimed: set[int] = set()
for key, toks in sorted(by_rime.items(), key=lambda kv: (-len(kv[1]), kv[0])):
for (sid, key), toks in sorted(by_rime.items(),
key=lambda kv: (-len(kv[1]), kv[0][1])):
toks = [t for t in toks if id(t) not in claimed]
# a SECONDARY pronunciation only reaches nearby partners (the
# verb pred-i-KATE shouldn't hand "predicate" to a hook's "eight"
@@ -1080,6 +1081,8 @@ def analyze(draft: Draft):
# single vowel, the final codas must nest too — forever (.)
# and sequential (L) share EH-x and still aren't one family
if va == vb or _tail_of(va, vb) or _tail_of(vb, va):
if raw_groups[ai]["toks"][0]["sid"] != raw_groups[bi]["toks"][0]["sid"]:
continue # families don't fuse across stanzas
if (len(va) == 1 and len(vb) == 1
and not _sets_nest(_gtags(ai), _gtags(bi))):
continue
@@ -1152,6 +1155,8 @@ def analyze(draft: Draft):
continue
s, l = (ca, cb) if len(ca) < len(cb) else (cb, ca)
nested = l[:len(s)] == s or l[len(l) - len(s):] == s
if raw_groups[gi]["toks"][0]["sid"] != raw_groups[gj]["toks"][0]["sid"]:
continue # no coda fusion across stanzas
# end-dominated families rhyme on their bare vowel, the way
# lone endings always could (blood/mud + thugs/drugs — the
# Kanye chorus chain); mid-line families keep their codas
+17
View File
@@ -723,3 +723,20 @@ def test_weak_ending_needs_a_full_vowel():
assert "middle" not in highlighted(text)
# but a full-vowel feminine ending still pairs (infancy/see on IY)
group_with("all I can see\nin my infancy", "see", "infancy")
def test_no_matching_across_stanzas():
# each stanza is its own rhyme world: light/night in stanza 1 must
# not group with bright/sight in stanza 2
text = ("the city light\nshines every night\n\n"
"the future's bright\nit's in my sight")
res = analyze(Draft(text=text))
from collections import defaultdict
bg = defaultdict(set)
for t in res["tokens"]:
bg[t["g"]].add(res["lines"][t["l"]][t["s"]:t["e"]].lower())
s1 = next(s for s in bg.values() if "light" in s)
assert "bright" not in s1 and "sight" not in s1
# but within each stanza they still rhyme
assert "night" in s1
assert any({"bright", "sight"} <= s for s in bg.values())