From d2d38d0d5a84f008f9f053edfe0455784a05b2be Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Jun 2026 02:45:39 -0400 Subject: [PATCH] Both-halves phrases may still tie into a third family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit four-inch and door hinge join the orange/storage/porridge clan even though four/door and inch/hinge each rhyme on their own — a phrase whose target group differs from both halves' groups carries new information. The full orange verse is now a test. Co-Authored-By: Claude Opus 4.8 (1M context) --- app.py | 35 +++++++++++++++++++++-------------- tests/test_rhymes.py | 10 ++++++++++ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/app.py b/app.py index f99ac2c..5484566 100644 --- a/app.py +++ b/app.py @@ -556,30 +556,37 @@ def analyze(draft: Draft): for key in keys: by_multi[(t["sid"], key)].append(t) - # a phrase only competes when no word inside it already rhymes + # which group holds each already-rhyming word, per line grouped_spans = defaultdict(list) - for t in tokens: - if id(t) in grouped: - grouped_spans[t["line"]].append((t["start"], t["end"])) + for gi, g in enumerate(raw_groups): + for t in g["toks"]: + if " " not in t["word"]: + grouped_spans[t["line"]].append((t["start"], t["end"], gi)) for p in phrases: if p["weak"]: continue - # a phrase yields only when BOTH its halves already rhyme (pure - # redundancy); if one half is new, the phrase carries information - # (beast mode / sleep though: though already rhymes, beast doesn't) - spans = grouped_spans[p["line"]] - a_taken = any(s <= p["start"] < e for s, e in spans) - b_taken = any(s < p["end"] <= e for s, e in spans) - if a_taken and b_taken: - continue vs = p["vowels"] if len(vs) >= 3 or (len(vs) == 2 and vs[1] not in REDUCED): key = _multi_key(vs) else: # V+schwa phrases must bring consonant support key = _m2_key(p["rime"].split()) - if key: - attach_or_collect(p, key, by_multi, group_by_multi) + if not key: + continue + spans = grouped_spans[p["line"]] + a_gi = next((gi for s, e, gi in spans if s <= p["start"] < e), None) + b_gi = next((gi for s, e, gi in spans if s < p["end"] <= e), None) + if a_gi is not None and b_gi is not None: + # both halves already rhyme — the phrase only matters if it + # ties into a THIRD family (four-inch joining the orange/ + # storage clan while four sits with door and inch with hinge) + gi = group_by_multi.get((p["sid"], key)) + if gi is not None and gi != a_gi and gi != b_gi: + raw_groups[gi]["toks"].append(p) + p["slant"] = True + grouped.add(id(p)) + continue + attach_or_collect(p, key, by_multi, group_by_multi) # biggest buckets claim first (a token may sit in several via its # anchors); distinctness by anchor word, so the phrase "fire burns" diff --git a/tests/test_rhymes.py b/tests/test_rhymes.py index ac73e30..f0753de 100644 --- a/tests/test_rhymes.py +++ b/tests/test_rhymes.py @@ -356,3 +356,13 @@ def test_phrase_joins_slant_group_via_coda_consensus(): "It's by the door hinge\n" "eating pourage") group_with(text, "orange", "door hinge", "pourage") + + +def test_the_full_orange_verse(): + # the Eminem demonstration: every line ties back to orange + text = ("I put my orange\nfour-inch\ndoor hinge\nin storage,\n" + "and ate porridge with George") + group_with(text, "orange", "storage", "porridge", + "four-inch", "door hinge") + group_with(text, "four", "door", "george") + group_with(text, "inch", "hinge")