From c60b9d6d43a347f9056cd412fb2e1e044db279ba Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Jun 2026 01:41:49 -0400 Subject: [PATCH] Overlapping rhyme layers: phrases as a second visual channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phrases now compete in the perfect-rhyme pass anchored at their first word's stressed vowel ("stir up" = ER AH P = "syrup"), render as underlines while words render as fills, and can overlap in different colors — the syrup/cup knot finally displays both schemes. Stopword- anchored phrases attach only on exact rime identity. Segment-based highlight renderer replaces the span-per-token one. Co-Authored-By: Claude Opus 4.8 (1M context) --- app.py | 50 ++++++++++++++++++++++++++++++++---- static/index.html | 60 ++++++++++++++++++++++++++------------------ tests/test_rhymes.py | 25 ++++++++++++++++++ 3 files changed, 106 insertions(+), 29 deletions(-) diff --git a/app.py b/app.py index b68b9e9..318c9db 100644 --- a/app.py +++ b/app.py @@ -348,20 +348,33 @@ def analyze(draft: Draft): line_toks[t["line"]].append(t) for toks in line_toks.values(): for a, b in zip(toks, toks[1:]): - if a["word"].lower() in STOPWORDS or b["word"].lower() in STOPWORDS: - continue pa, pb = phones_for(a["word"]), phones_for(b["word"]) if not (pa and pb): continue + # the phrase's rime runs from the anchor's stressed vowel + # through the end of the tail: "stir up" = ER AH P, which is + # a PERFECT rhyme with syrup + pl = pa.split() + start = 0 + for i in range(len(pl) - 1, -1, -1): + if pl[i][-1] in "12": + start = i + break phrases.append({ "line": a["line"], "start": a["start"], "end": b["end"], "word": a["word"].lower() + " " + b["word"].lower(), "is_end": b["is_end"], "sid": a["sid"], "gid": None, "slant": False, "vowels": _tail_vowels(pa) + _all_vowels(pb), + "rime": DIGITS.sub("", " ".join(pl[start:] + pb.split())), + # a phrase touching a stopword ("were up") may still match + # perfectly, but never competes in the vowel-only passes + "weak": (a["word"].lower() in STOPWORDS + or b["word"].lower() in STOPWORDS), }) # pass 1: perfect rhymes (shared rime), anywhere in a line — this is - # what catches internal rhymes + # what catches internal rhymes. Phrases compete too, so "stir up" + # perfect-rhymes "syrup" even while its "up" rhymes with "cup". by_rime = defaultdict(list) for t in tokens: w = t["word"].lower() @@ -369,6 +382,10 @@ def analyze(draft: Draft): continue for key in rime_keys(t["word"]): by_rime[key].append(t) + for p in phrases: + # "bought it" competes; "to me" / "but I" never found anything + if p["word"].split()[0] not in STOPWORDS: + by_rime["p:" + p["rime"]].append(p) # biggest buckets claim their tokens first, so a word with several # candidate pronunciations joins its best-supported rhyme group. @@ -379,7 +396,9 @@ def analyze(draft: Draft): toks = [t for t in toks if id(t) not in claimed] if len(toks) < 2: continue - distinct = {t["word"].lower() for t in toks} + # distinctness by anchor word, so "fire burns" can't pose as a + # rhyme partner for the "fire" it starts with + distinct = {t["word"].split()[0].lower() for t in toks} end_count = sum(t["is_end"] for t in toks) if len(distinct) < 2 and end_count < 2: continue # the same word repeated mid-line isn't a rhyme @@ -447,6 +466,8 @@ def analyze(draft: Draft): if id(t) in grouped: grouped_spans[t["line"]].append((t["start"], t["end"])) for p in phrases: + if p["weak"]: + continue if any(s < p["end"] and p["start"] < e for s, e in grouped_spans[p["line"]]): continue @@ -487,6 +508,25 @@ def analyze(draft: Draft): for (sid, key), toks in by_vc.items(): if len(toks) >= 2 and len({t["word"].lower() for t in toks}) >= 2: raw_groups.append({"toks": toks, "slant": True, "key": key}) + grouped.update(id(t) for t in toks) + + # stopword-anchored phrases ("were up") never compete on their own, + # but an exact rime match against any grouped token lets them ride + # along — perfect phone identity carries no transitive-key risk + rime_map: dict[tuple, int] = {} + for gi, g in enumerate(raw_groups): + for t in g["toks"]: + keys = ("p:" + t["rime"],) if "rime" in t else rime_keys(t["word"]) + for k in keys: + if k.startswith("p:"): + rime_map.setdefault((t["sid"], k), gi) + for p in phrases: + if id(p) in grouped or p["word"].split()[0] not in STOPWORDS: + continue + gi = rime_map.get((p["sid"], "p:" + p["rime"])) + if gi is not None: + raw_groups[gi]["toks"].append(p) + grouped.add(id(p)) # stable colors: order groups by first appearance raw_groups.sort(key=lambda g: min((t["line"], t["start"]) for t in g["toks"])) @@ -536,7 +576,7 @@ def analyze(draft: Draft): toks_out = [ {"l": t["line"], "s": t["start"], "e": t["end"], "g": t["gid"], - "end": t["is_end"], + "end": t["is_end"], "ph": "vowels" in t, "slant": t["slant"] or groups_out[t["gid"]]["slant"]} for t in [*tokens, *phrases] if t["gid"] is not None ] diff --git a/static/index.html b/static/index.html index 92788d8..1c6a592 100644 --- a/static/index.html +++ b/static/index.html @@ -111,19 +111,11 @@ outline: none; } #editor::placeholder { color: #5a5249; } - /* colored rhyme tokens — background tint only, so the textarea text on - top stays crisp and box metrics stay identical (no horizontal padding!) */ - .rh { - border-radius: 4px; - background: color-mix(in srgb, var(--c) 15%, transparent); - } - .rh.end { - background: color-mix(in srgb, var(--c) 24%, transparent); - box-shadow: inset 0 -2px 0 0 var(--c); - } - .rh.slant.end { - box-shadow: inset 0 -2px 0 0 color-mix(in srgb, var(--c) 45%, transparent); - } + /* colored rhyme segments — words tint the background, phrases draw the + underline; both can overlap in different colors. Background/shadow + only, so the textarea text on top stays crisp and box metrics stay + identical (no horizontal padding!) */ + .seg { border-radius: 4px; } .offbeat { text-decoration: underline wavy color-mix(in srgb, var(--r6) 45%, transparent); text-decoration-skip-ink: none; @@ -484,21 +476,41 @@ function render(){ analysis.groups.forEach(g=>{ groupInfo[g.id] = g; }); analysis.tokens.forEach(t=>{ (tokByLine[t.l] ||= []).push(t); }); } + const colorOf = t => `var(--r${groupInfo[t.g].color % COLORS})`; let html = ''; lines.forEach((line, i)=>{ // only apply spans if this line still matches what the server analyzed const fresh = analysis && analysis.lines[i] === line; - const toks = fresh ? (tokByLine[i] || []) : []; - toks.sort((a,b)=>a.s-b.s); - let pos = 0, h = ''; - toks.forEach(t=>{ - const g = groupInfo[t.g]; if(!g || t.s < pos) return; - h += esc(line.slice(pos, t.s)); - const cls = 'rh' + (t.end ? ' end' : '') + (t.slant ? ' slant' : ''); - h += `` + esc(line.slice(t.s, t.e)) + ''; - pos = t.e; - }); - h += esc(line.slice(pos)); + const toks = (fresh ? (tokByLine[i] || []) : []).filter(t=>groupInfo[t.g]); + const words = toks.filter(t=>!t.ph); + const phrases = toks.filter(t=>t.ph); + let h = ''; + if(!toks.length){ + h = esc(line); + }else{ + const cuts = new Set([0, line.length]); + toks.forEach(t=>{ cuts.add(t.s); cuts.add(t.e); }); + const pts = [...cuts].sort((a,b)=>a-b); + for(let k = 0; k < pts.length - 1; k++){ + const a = pts[k], b = pts[k+1]; + const text = esc(line.slice(a, b)); + if(!text) continue; + const w = words.find(t=>t.s <= a && b <= t.e); + const p = phrases.find(t=>t.s <= a && b <= t.e); + if(!w && !p){ h += text; continue; } + let style = ''; + if(w) style += `background:color-mix(in srgb, ${colorOf(w)} ${w.end ? 24 : 15}%, transparent);`; + let ul = null; + if(w && w.end){ + ul = w.slant ? `color-mix(in srgb, ${colorOf(w)} 45%, transparent)` : colorOf(w); + }else if(p){ + ul = p.end && !p.slant ? colorOf(p) + : `color-mix(in srgb, ${colorOf(p)} 55%, transparent)`; + } + if(ul) style += `box-shadow:inset 0 -2px 0 0 ${ul};`; + h += `${text}`; + } + } const m = fresh && analysis.meter ? analysis.meter.find(x=>x.l===i) : null; html += (m && m.off ? `${h}` : h) + '\n'; }); diff --git a/tests/test_rhymes.py b/tests/test_rhymes.py index b77171d..71fa4be 100644 --- a/tests/test_rhymes.py +++ b/tests/test_rhymes.py @@ -239,3 +239,28 @@ def test_meter_coaching_needs_a_pattern_to_break(): # two lines of wildly different lengths: no dominant pattern, no flags res = analyze(Draft(text="short line here\na very much longer line that runs on and on")) assert not any(m["off"] for m in res["meter"]) + + +# ------------------------------------------------------------------ layers + +def test_layered_phrase_rides_second_group(): + # Em's syrup knot: "up" fills with the cup chain, while "stir up" + # rides the syrup/burden group as a second layer + text = ("Maybe your cup is full of syrup and lean\n" + "fill the cup up to the brim\n" + "Maybe I need to stir up shit\n" + "the burden is mine") + syrup = group_with(text, "syrup", "burden", "stir up") + cups = group_with(text, "cup", "up") + assert "stir up" not in cups and "syrup" not in cups + + +def test_weak_phrase_attaches_but_never_founds(): + # "were up" (stopword tail) joins an existing ER group... + text = ("Maybe your cup is full of syrup and lean\n" + "fill the cup up to the brim\n" + "shake the world up if it were up to me\n" + "the burden is mine") + group_with(text, "syrup", "burden", "were up") + # ...but two weak phrases alone can't create a group + assert "were up" not in highlighted("it were up to him\nit were up to her")