From c60b9d6d43a347f9056cd412fb2e1e044db279ba Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.org>
Date: Sun, 7 Jun 2026 01:41:49 -0400
Subject: [PATCH] Overlapping rhyme layers: phrases as a second visual channel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phrases now compete in the perfect-rhyme pass anchored at their first
word's stressed vowel ("stir up" = ER AH P = "syrup"), render as
underlines while words render as fills, and can overlap in different
colors — the syrup/cup knot finally displays both schemes. Stopword-
anchored phrases attach only on exact rime identity. Segment-based
highlight renderer replaces the span-per-token one.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 app.py               | 50 ++++++++++++++++++++++++++++++++----
 static/index.html    | 60 ++++++++++++++++++++++++++------------------
 tests/test_rhymes.py | 25 ++++++++++++++++++
 3 files changed, 106 insertions(+), 29 deletions(-)

diff --git a/app.py b/app.py
index b68b9e9..318c9db 100644
--- a/app.py
+++ b/app.py
@@ -348,20 +348,33 @@ def analyze(draft: Draft):
         line_toks[t["line"]].append(t)
     for toks in line_toks.values():
         for a, b in zip(toks, toks[1:]):
-            if a["word"].lower() in STOPWORDS or b["word"].lower() in STOPWORDS:
-                continue
             pa, pb = phones_for(a["word"]), phones_for(b["word"])
             if not (pa and pb):
                 continue
+            # the phrase's rime runs from the anchor's stressed vowel
+            # through the end of the tail: "stir up" = ER AH P, which is
+            # a PERFECT rhyme with syrup
+            pl = pa.split()
+            start = 0
+            for i in range(len(pl) - 1, -1, -1):
+                if pl[i][-1] in "12":
+                    start = i
+                    break
             phrases.append({
                 "line": a["line"], "start": a["start"], "end": b["end"],
                 "word": a["word"].lower() + " " + b["word"].lower(),
                 "is_end": b["is_end"], "sid": a["sid"], "gid": None,
                 "slant": False, "vowels": _tail_vowels(pa) + _all_vowels(pb),
+                "rime": DIGITS.sub("", " ".join(pl[start:] + pb.split())),
+                # a phrase touching a stopword ("were up") may still match
+                # perfectly, but never competes in the vowel-only passes
+                "weak": (a["word"].lower() in STOPWORDS
+                         or b["word"].lower() in STOPWORDS),
             })
 
     # pass 1: perfect rhymes (shared rime), anywhere in a line — this is
-    # what catches internal rhymes
+    # what catches internal rhymes. Phrases compete too, so "stir up"
+    # perfect-rhymes "syrup" even while its "up" rhymes with "cup".
     by_rime = defaultdict(list)
     for t in tokens:
         w = t["word"].lower()
@@ -369,6 +382,10 @@ def analyze(draft: Draft):
             continue
         for key in rime_keys(t["word"]):
             by_rime[key].append(t)
+    for p in phrases:
+        # "bought it" competes; "to me" / "but I" never found anything
+        if p["word"].split()[0] not in STOPWORDS:
+            by_rime["p:" + p["rime"]].append(p)
 
     # biggest buckets claim their tokens first, so a word with several
     # candidate pronunciations joins its best-supported rhyme group.
@@ -379,7 +396,9 @@ def analyze(draft: Draft):
         toks = [t for t in toks if id(t) not in claimed]
         if len(toks) < 2:
             continue
-        distinct = {t["word"].lower() for t in toks}
+        # distinctness by anchor word, so "fire burns" can't pose as a
+        # rhyme partner for the "fire" it starts with
+        distinct = {t["word"].split()[0].lower() for t in toks}
         end_count = sum(t["is_end"] for t in toks)
         if len(distinct) < 2 and end_count < 2:
             continue  # the same word repeated mid-line isn't a rhyme
@@ -447,6 +466,8 @@ def analyze(draft: Draft):
         if id(t) in grouped:
             grouped_spans[t["line"]].append((t["start"], t["end"]))
     for p in phrases:
+        if p["weak"]:
+            continue
         if any(s < p["end"] and p["start"] < e
                for s, e in grouped_spans[p["line"]]):
             continue
@@ -487,6 +508,25 @@ def analyze(draft: Draft):
     for (sid, key), toks in by_vc.items():
         if len(toks) >= 2 and len({t["word"].lower() for t in toks}) >= 2:
             raw_groups.append({"toks": toks, "slant": True, "key": key})
+            grouped.update(id(t) for t in toks)
+
+    # stopword-anchored phrases ("were up") never compete on their own,
+    # but an exact rime match against any grouped token lets them ride
+    # along — perfect phone identity carries no transitive-key risk
+    rime_map: dict[tuple, int] = {}
+    for gi, g in enumerate(raw_groups):
+        for t in g["toks"]:
+            keys = ("p:" + t["rime"],) if "rime" in t else rime_keys(t["word"])
+            for k in keys:
+                if k.startswith("p:"):
+                    rime_map.setdefault((t["sid"], k), gi)
+    for p in phrases:
+        if id(p) in grouped or p["word"].split()[0] not in STOPWORDS:
+            continue
+        gi = rime_map.get((p["sid"], "p:" + p["rime"]))
+        if gi is not None:
+            raw_groups[gi]["toks"].append(p)
+            grouped.add(id(p))
 
     # stable colors: order groups by first appearance
     raw_groups.sort(key=lambda g: min((t["line"], t["start"]) for t in g["toks"]))
@@ -536,7 +576,7 @@ def analyze(draft: Draft):
 
     toks_out = [
         {"l": t["line"], "s": t["start"], "e": t["end"], "g": t["gid"],
-         "end": t["is_end"],
+         "end": t["is_end"], "ph": "vowels" in t,
          "slant": t["slant"] or groups_out[t["gid"]]["slant"]}
         for t in [*tokens, *phrases] if t["gid"] is not None
     ]
diff --git a/static/index.html b/static/index.html
index 92788d8..1c6a592 100644
--- a/static/index.html
+++ b/static/index.html
@@ -111,19 +111,11 @@
     outline: none;
   }
   #editor::placeholder { color: #5a5249; }
-  /* colored rhyme tokens — background tint only, so the textarea text on
-     top stays crisp and box metrics stay identical (no horizontal padding!) */
-  .rh {
-    border-radius: 4px;
-    background: color-mix(in srgb, var(--c) 15%, transparent);
-  }
-  .rh.end {
-    background: color-mix(in srgb, var(--c) 24%, transparent);
-    box-shadow: inset 0 -2px 0 0 var(--c);
-  }
-  .rh.slant.end {
-    box-shadow: inset 0 -2px 0 0 color-mix(in srgb, var(--c) 45%, transparent);
-  }
+  /* colored rhyme segments — words tint the background, phrases draw the
+     underline; both can overlap in different colors. Background/shadow
+     only, so the textarea text on top stays crisp and box metrics stay
+     identical (no horizontal padding!) */
+  .seg { border-radius: 4px; }
   .offbeat {
     text-decoration: underline wavy color-mix(in srgb, var(--r6) 45%, transparent);
     text-decoration-skip-ink: none;
@@ -484,21 +476,41 @@ function render(){
     analysis.groups.forEach(g=>{ groupInfo[g.id] = g; });
     analysis.tokens.forEach(t=>{ (tokByLine[t.l] ||= []).push(t); });
   }
+  const colorOf = t => `var(--r${groupInfo[t.g].color % COLORS})`;
   let html = '';
   lines.forEach((line, i)=>{
     // only apply spans if this line still matches what the server analyzed
     const fresh = analysis && analysis.lines[i] === line;
-    const toks = fresh ? (tokByLine[i] || []) : [];
-    toks.sort((a,b)=>a.s-b.s);
-    let pos = 0, h = '';
-    toks.forEach(t=>{
-      const g = groupInfo[t.g]; if(!g || t.s < pos) return;
-      h += esc(line.slice(pos, t.s));
-      const cls = 'rh' + (t.end ? ' end' : '') + (t.slant ? ' slant' : '');
-      h += `<span class="${cls}" style="--c:var(--r${g.color % COLORS})">` + esc(line.slice(t.s, t.e)) + '</span>';
-      pos = t.e;
-    });
-    h += esc(line.slice(pos));
+    const toks = (fresh ? (tokByLine[i] || []) : []).filter(t=>groupInfo[t.g]);
+    const words = toks.filter(t=>!t.ph);
+    const phrases = toks.filter(t=>t.ph);
+    let h = '';
+    if(!toks.length){
+      h = esc(line);
+    }else{
+      const cuts = new Set([0, line.length]);
+      toks.forEach(t=>{ cuts.add(t.s); cuts.add(t.e); });
+      const pts = [...cuts].sort((a,b)=>a-b);
+      for(let k = 0; k < pts.length - 1; k++){
+        const a = pts[k], b = pts[k+1];
+        const text = esc(line.slice(a, b));
+        if(!text) continue;
+        const w = words.find(t=>t.s <= a && b <= t.e);
+        const p = phrases.find(t=>t.s <= a && b <= t.e);
+        if(!w && !p){ h += text; continue; }
+        let style = '';
+        if(w) style += `background:color-mix(in srgb, ${colorOf(w)} ${w.end ? 24 : 15}%, transparent);`;
+        let ul = null;
+        if(w && w.end){
+          ul = w.slant ? `color-mix(in srgb, ${colorOf(w)} 45%, transparent)` : colorOf(w);
+        }else if(p){
+          ul = p.end && !p.slant ? colorOf(p)
+             : `color-mix(in srgb, ${colorOf(p)} 55%, transparent)`;
+        }
+        if(ul) style += `box-shadow:inset 0 -2px 0 0 ${ul};`;
+        h += `<span class="seg" style="${style}">${text}</span>`;
+      }
+    }
     const m = fresh && analysis.meter ? analysis.meter.find(x=>x.l===i) : null;
     html += (m && m.off ? `<span class="offbeat">${h}</span>` : h) + '\n';
   });
diff --git a/tests/test_rhymes.py b/tests/test_rhymes.py
index b77171d..71fa4be 100644
--- a/tests/test_rhymes.py
+++ b/tests/test_rhymes.py
@@ -239,3 +239,28 @@ def test_meter_coaching_needs_a_pattern_to_break():
     # two lines of wildly different lengths: no dominant pattern, no flags
     res = analyze(Draft(text="short line here\na very much longer line that runs on and on"))
     assert not any(m["off"] for m in res["meter"])
+
+
+# ------------------------------------------------------------------ layers
+
+def test_layered_phrase_rides_second_group():
+    # Em's syrup knot: "up" fills with the cup chain, while "stir up"
+    # rides the syrup/burden group as a second layer
+    text = ("Maybe your cup is full of syrup and lean\n"
+            "fill the cup up to the brim\n"
+            "Maybe I need to stir up shit\n"
+            "the burden is mine")
+    syrup = group_with(text, "syrup", "burden", "stir up")
+    cups = group_with(text, "cup", "up")
+    assert "stir up" not in cups and "syrup" not in cups
+
+
+def test_weak_phrase_attaches_but_never_founds():
+    # "were up" (stopword tail) joins an existing ER group...
+    text = ("Maybe your cup is full of syrup and lean\n"
+            "fill the cup up to the brim\n"
+            "shake the world up if it were up to me\n"
+            "the burden is mine")
+    group_with(text, "syrup", "burden", "were up")
+    # ...but two weak phrases alone can't create a group
+    assert "were up" not in highlighted("it were up to him\nit were up to her")