mirror of
https://github.com/kennethreitz/rhymepad.org.git
synced 2026-06-11 17:08:33 +00:00
Scope rhyme matching to within a stanza
Each blank-line-separated stanza is its own rhyme scheme. The perfect- rhyme pass is now keyed per stanza, and the multi/coda fusion steps won't union families across stanza boundaries (slant/multi/consonance buckets were already stanza-keyed). light/night in verse 1 no longer links to bright/sight in verse 2. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -628,19 +628,20 @@ def analyze(draft: Draft):
|
||||
if not t["is_end"] and (w in STOPWORDS or w in refrain or len(w) < 2):
|
||||
continue
|
||||
for key in rime_keys(t["word"]):
|
||||
by_rime[key].append(t)
|
||||
by_rime[(t["sid"], key)].append(t)
|
||||
for p in phrases:
|
||||
# all phrases compete on exact rime — "is it" matches "visit"
|
||||
# phone-for-phone; the qualification below stops stopword-anchored
|
||||
# phrases from pairing with nothing but each other
|
||||
by_rime["p:" + p["rime"]].append(p)
|
||||
by_rime[(p["sid"], "p:" + p["rime"])].append(p)
|
||||
|
||||
# biggest buckets claim their tokens first, so a word with several
|
||||
# candidate pronunciations joins its best-supported rhyme group.
|
||||
# Each group remembers its founding key — the sound it's about.
|
||||
raw_groups: list[dict] = []
|
||||
claimed: set[int] = set()
|
||||
for key, toks in sorted(by_rime.items(), key=lambda kv: (-len(kv[1]), kv[0])):
|
||||
for (sid, key), toks in sorted(by_rime.items(),
|
||||
key=lambda kv: (-len(kv[1]), kv[0][1])):
|
||||
toks = [t for t in toks if id(t) not in claimed]
|
||||
# a SECONDARY pronunciation only reaches nearby partners (the
|
||||
# verb pred-i-KATE shouldn't hand "predicate" to a hook's "eight"
|
||||
@@ -1080,6 +1081,8 @@ def analyze(draft: Draft):
|
||||
# single vowel, the final codas must nest too — forever (.)
|
||||
# and sequential (L) share EH-x and still aren't one family
|
||||
if va == vb or _tail_of(va, vb) or _tail_of(vb, va):
|
||||
if raw_groups[ai]["toks"][0]["sid"] != raw_groups[bi]["toks"][0]["sid"]:
|
||||
continue # families don't fuse across stanzas
|
||||
if (len(va) == 1 and len(vb) == 1
|
||||
and not _sets_nest(_gtags(ai), _gtags(bi))):
|
||||
continue
|
||||
@@ -1152,6 +1155,8 @@ def analyze(draft: Draft):
|
||||
continue
|
||||
s, l = (ca, cb) if len(ca) < len(cb) else (cb, ca)
|
||||
nested = l[:len(s)] == s or l[len(l) - len(s):] == s
|
||||
if raw_groups[gi]["toks"][0]["sid"] != raw_groups[gj]["toks"][0]["sid"]:
|
||||
continue # no coda fusion across stanzas
|
||||
# end-dominated families rhyme on their bare vowel, the way
|
||||
# lone endings always could (blood/mud + thugs/drugs — the
|
||||
# Kanye chorus chain); mid-line families keep their codas
|
||||
|
||||
@@ -723,3 +723,20 @@ def test_weak_ending_needs_a_full_vowel():
|
||||
assert "middle" not in highlighted(text)
|
||||
# but a full-vowel feminine ending still pairs (infancy/see on IY)
|
||||
group_with("all I can see\nin my infancy", "see", "infancy")
|
||||
|
||||
|
||||
def test_no_matching_across_stanzas():
|
||||
# each stanza is its own rhyme world: light/night in stanza 1 must
|
||||
# not group with bright/sight in stanza 2
|
||||
text = ("the city light\nshines every night\n\n"
|
||||
"the future's bright\nit's in my sight")
|
||||
res = analyze(Draft(text=text))
|
||||
from collections import defaultdict
|
||||
bg = defaultdict(set)
|
||||
for t in res["tokens"]:
|
||||
bg[t["g"]].add(res["lines"][t["l"]][t["s"]:t["e"]].lower())
|
||||
s1 = next(s for s in bg.values() if "light" in s)
|
||||
assert "bright" not in s1 and "sight" not in s1
|
||||
# but within each stanza they still rhyme
|
||||
assert "night" in s1
|
||||
assert any({"bright", "sight"} <= s for s in bg.values())
|
||||
|
||||
Reference in New Issue
Block a user