Multi-anchor rhyme keys + Dictionary mode in the sidebar

KET-a-mine can now rhyme from its first primary stress, not just its dictionary-final one, so it joins meth-am-PHET-a-mine's family; multi buckets claim biggest-first since tokens carry several anchor keys. The lookup panel grew a fourth mode: Dictionary. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-11 17:08:33 +00:00 · 2026-06-07 02:14:24 -04:00
parent 03fc5d16d0
commit da82cd6ee2
3 changed files with 63 additions and 8 deletions
@@ -105,6 +105,12 @@ def phones_candidates(word: str) -> tuple[str, ...]:
                g = g2p_phones(w)
                if g:
                    cands.append(g)
+                if w.endswith("ine"):
+                    # -ine is ambiguous (valentine vs ketamine); keep the
+                    # "-een" reading as a candidate too
+                    g = g2p_phones(w[:-3] + "een")
+                    if g:
+                        cands.append(g)
            except Exception:
                pass  # g2p model unavailable
        if len(w) >= 6:
@@ -257,6 +263,31 @@ def vc_key(word: str) -> str | None:
    return "c:" + key


+def multi_keys(word: str) -> tuple[str, ...]:
+    """Multisyllabic keys across all candidate pronunciations, anchored at
+    the last stressed vowel AND the first primary stress — KET-a-mine can
+    rhyme from its first syllable (meth-am-PHET-a-mine) even though its
+    dictionary stress sits at the end."""
+    out = []
+    for ph in phones_candidates(word):
+        pl = ph.split()
+        anchors = set()
+        for i in range(len(pl) - 1, -1, -1):
+            if pl[i][-1] in "12":
+                anchors.add(i)
+                break
+        for i, p in enumerate(pl):
+            if p[-1] == "1":
+                anchors.add(i)
+                break
+        for a in anchors or {0}:
+            vs = [DIGITS.sub("", p) for p in pl[a:] if p[-1].isdigit()]
+            k = _multi_key(vs)
+            if k:
+                out.append(k)
+    return tuple(dict.fromkeys(out))
+
+
 def slant_key(word: str) -> str | None:
    phones = phones_for(word)
    if phones:
@@ -478,10 +509,17 @@ def analyze(draft: Draft):
        w = t["word"].lower()
        if not t["is_end"] and (w in STOPWORDS or len(w) < 2):
            continue
-        ph = phones_for(t["word"])
-        key = _multi_key(_tail_vowels(ph)) if ph else None
-        if key:
-            attach_or_collect(t, key, by_multi, group_by_multi)
+        keys = multi_keys(t["word"])
+        for key in keys:  # join an existing family if any anchor fits
+            gi = group_by_multi.get((t["sid"], key))
+            if gi is not None:
+                raw_groups[gi]["toks"].append(t)
+                t["slant"] = True
+                grouped.add(id(t))
+                break
+        else:
+            for key in keys:
+                by_multi[(t["sid"], key)].append(t)

    # a phrase only competes when no word inside it already rhymes
    grouped_spans = defaultdict(list)
@@ -498,9 +536,12 @@ def analyze(draft: Draft):
        if key:
            attach_or_collect(p, key, by_multi, group_by_multi)

-    # distinctness by anchor word, so the phrase "fire burns" can't pose
-    # as a different word than the "fire" it starts with
-    for (sid, key), toks in by_multi.items():
+    # biggest buckets claim first (a token may sit in several via its
+    # anchors); distinctness by anchor word, so the phrase "fire burns"
+    # can't pose as a different word than the "fire" it starts with
+    for (sid, key), toks in sorted(by_multi.items(),
+                                   key=lambda kv: (-len(kv[1]), kv[0][1])):
+        toks = [t for t in toks if id(t) not in grouped]
        if len(toks) >= 2 and len({t["word"].split()[0] for t in toks}) >= 2:
            raw_groups.append({"toks": toks, "slant": True, "key": key})
            grouped.update(id(t) for t in toks)
@@ -270,8 +270,9 @@ Double-click any word to look it up on the right."></textarea>
      </div>
      <div class="seg" id="modeSeg">
        <button class="active" data-mode="rhyme">Rhymes</button>
-        <button data-mode="near">Near rhymes</button>
+        <button data-mode="near">Near</button>
        <button data-mode="syn">Synonyms</button>
+        <button data-mode="dict">Dictionary</button>
      </div>
      <div id="defBox"></div>
      <div id="lookupResults">
@@ -564,6 +565,11 @@ async function doLookup(){
  const word = document.getElementById('lookupInput').value.trim();
  if(!word) return;
  defBox.innerHTML = '';
+  if(mode === 'dict'){
+    resultsBox.innerHTML = '';
+    showDefinition(word);
+    return;
+  }
  resultsBox.innerHTML = '<p class="muted">Searching…</p>';
  try{
    const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`);
@@ -308,3 +308,11 @@ def test_lookup_synonyms_wordnet():

 def test_lookup_synonyms_unknown_word():
    assert lookup("xqzzqx", mode="syn")["known"] is False
+
+
+def test_ine_ending_gets_een_candidate():
+    # g2p reads OOV "-ine" like valentine (AY N); Em says ketamine with
+    # an -een. Both candidates compete; the perfect pass picks the match.
+    text = ("Even ketamine or methamphetamine with the minithin\n"
+            "It better be at least seventy or three-hundred milligram")
+    group_with(text, "ketamine", "methamphetamine")