Scope rhyme matching to within a stanza

Each blank-line-separated stanza is its own rhyme scheme. The perfect- rhyme pass is now keyed per stanza, and the multi/coda fusion steps won't union families across stanza boundaries (slant/multi/consonance buckets were already stanza-keyed). light/night in verse 1 no longer links to bright/sight in verse 2. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-11 17:08:33 +00:00 · 2026-06-08 03:00:53 -04:00
parent 676a596a66
commit 0bd3dab542
2 changed files with 25 additions and 3 deletions
@@ -628,19 +628,20 @@ def analyze(draft: Draft):
        if not t["is_end"] and (w in STOPWORDS or w in refrain or len(w) < 2):
            continue
        for key in rime_keys(t["word"]):
-            by_rime[key].append(t)
+            by_rime[(t["sid"], key)].append(t)
    for p in phrases:
        # all phrases compete on exact rime — "is it" matches "visit"
        # phone-for-phone; the qualification below stops stopword-anchored
        # phrases from pairing with nothing but each other
-        by_rime["p:" + p["rime"]].append(p)
+        by_rime[(p["sid"], "p:" + p["rime"])].append(p)

    # biggest buckets claim their tokens first, so a word with several
    # candidate pronunciations joins its best-supported rhyme group.
    # Each group remembers its founding key — the sound it's about.
    raw_groups: list[dict] = []
    claimed: set[int] = set()
-    for key, toks in sorted(by_rime.items(), key=lambda kv: (-len(kv[1]), kv[0])):
+    for (sid, key), toks in sorted(by_rime.items(),
+                                   key=lambda kv: (-len(kv[1]), kv[0][1])):
        toks = [t for t in toks if id(t) not in claimed]
        # a SECONDARY pronunciation only reaches nearby partners (the
        # verb pred-i-KATE shouldn't hand "predicate" to a hook's "eight"
@@ -1080,6 +1081,8 @@ def analyze(draft: Draft):
            # single vowel, the final codas must nest too — forever (.)
            # and sequential (L) share EH-x and still aren't one family
            if va == vb or _tail_of(va, vb) or _tail_of(vb, va):
+                if raw_groups[ai]["toks"][0]["sid"] != raw_groups[bi]["toks"][0]["sid"]:
+                    continue  # families don't fuse across stanzas
                if (len(va) == 1 and len(vb) == 1
                        and not _sets_nest(_gtags(ai), _gtags(bi))):
                    continue
@@ -1152,6 +1155,8 @@ def analyze(draft: Draft):
                continue
            s, l = (ca, cb) if len(ca) < len(cb) else (cb, ca)
            nested = l[:len(s)] == s or l[len(l) - len(s):] == s
+            if raw_groups[gi]["toks"][0]["sid"] != raw_groups[gj]["toks"][0]["sid"]:
+                continue  # no coda fusion across stanzas
            # end-dominated families rhyme on their bare vowel, the way
            # lone endings always could (blood/mud + thugs/drugs — the
            # Kanye chorus chain); mid-line families keep their codas
@@ -723,3 +723,20 @@ def test_weak_ending_needs_a_full_vowel():
    assert "middle" not in highlighted(text)
    # but a full-vowel feminine ending still pairs (infancy/see on IY)
    group_with("all I can see\nin my infancy", "see", "infancy")
+
+
+def test_no_matching_across_stanzas():
+    # each stanza is its own rhyme world: light/night in stanza 1 must
+    # not group with bright/sight in stanza 2
+    text = ("the city light\nshines every night\n\n"
+            "the future's bright\nit's in my sight")
+    res = analyze(Draft(text=text))
+    from collections import defaultdict
+    bg = defaultdict(set)
+    for t in res["tokens"]:
+        bg[t["g"]].add(res["lines"][t["l"]][t["s"]:t["e"]].lower())
+    s1 = next(s for s in bg.values() if "light" in s)
+    assert "bright" not in s1 and "sight" not in s1
+    # but within each stanza they still rhyme
+    assert "night" in s1
+    assert any({"bright", "sight"} <= s for s in bg.values())