Multi-anchor rhyme keys + Dictionary mode in the sidebar

KET-a-mine can now rhyme from its first primary stress, not just its
dictionary-final one, so it joins meth-am-PHET-a-mine's family; multi
buckets claim biggest-first since tokens carry several anchor keys.
The lookup panel grew a fourth mode: Dictionary.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 02:14:24 -04:00
parent 03fc5d16d0
commit da82cd6ee2
3 changed files with 63 additions and 8 deletions
+48 -7
View File
@@ -105,6 +105,12 @@ def phones_candidates(word: str) -> tuple[str, ...]:
g = g2p_phones(w)
if g:
cands.append(g)
if w.endswith("ine"):
# -ine is ambiguous (valentine vs ketamine); keep the
# "-een" reading as a candidate too
g = g2p_phones(w[:-3] + "een")
if g:
cands.append(g)
except Exception:
pass # g2p model unavailable
if len(w) >= 6:
@@ -257,6 +263,31 @@ def vc_key(word: str) -> str | None:
return "c:" + key
def multi_keys(word: str) -> tuple[str, ...]:
"""Multisyllabic keys across all candidate pronunciations, anchored at
the last stressed vowel AND the first primary stress — KET-a-mine can
rhyme from its first syllable (meth-am-PHET-a-mine) even though its
dictionary stress sits at the end."""
out = []
for ph in phones_candidates(word):
pl = ph.split()
anchors = set()
for i in range(len(pl) - 1, -1, -1):
if pl[i][-1] in "12":
anchors.add(i)
break
for i, p in enumerate(pl):
if p[-1] == "1":
anchors.add(i)
break
for a in anchors or {0}:
vs = [DIGITS.sub("", p) for p in pl[a:] if p[-1].isdigit()]
k = _multi_key(vs)
if k:
out.append(k)
return tuple(dict.fromkeys(out))
def slant_key(word: str) -> str | None:
phones = phones_for(word)
if phones:
@@ -478,10 +509,17 @@ def analyze(draft: Draft):
w = t["word"].lower()
if not t["is_end"] and (w in STOPWORDS or len(w) < 2):
continue
ph = phones_for(t["word"])
key = _multi_key(_tail_vowels(ph)) if ph else None
if key:
attach_or_collect(t, key, by_multi, group_by_multi)
keys = multi_keys(t["word"])
for key in keys: # join an existing family if any anchor fits
gi = group_by_multi.get((t["sid"], key))
if gi is not None:
raw_groups[gi]["toks"].append(t)
t["slant"] = True
grouped.add(id(t))
break
else:
for key in keys:
by_multi[(t["sid"], key)].append(t)
# a phrase only competes when no word inside it already rhymes
grouped_spans = defaultdict(list)
@@ -498,9 +536,12 @@ def analyze(draft: Draft):
if key:
attach_or_collect(p, key, by_multi, group_by_multi)
# distinctness by anchor word, so the phrase "fire burns" can't pose
# as a different word than the "fire" it starts with
for (sid, key), toks in by_multi.items():
# biggest buckets claim first (a token may sit in several via its
# anchors); distinctness by anchor word, so the phrase "fire burns"
# can't pose as a different word than the "fire" it starts with
for (sid, key), toks in sorted(by_multi.items(),
key=lambda kv: (-len(kv[1]), kv[0][1])):
toks = [t for t in toks if id(t) not in grouped]
if len(toks) >= 2 and len({t["word"].split()[0] for t in toks}) >= 2:
raw_groups.append({"toks": toks, "slant": True, "key": key})
grouped.update(id(t) for t in toks)
+7 -1
View File
@@ -270,8 +270,9 @@ Double-click any word to look it up on the right."></textarea>
</div>
<div class="seg" id="modeSeg">
<button class="active" data-mode="rhyme">Rhymes</button>
<button data-mode="near">Near rhymes</button>
<button data-mode="near">Near</button>
<button data-mode="syn">Synonyms</button>
<button data-mode="dict">Dictionary</button>
</div>
<div id="defBox"></div>
<div id="lookupResults">
@@ -564,6 +565,11 @@ async function doLookup(){
const word = document.getElementById('lookupInput').value.trim();
if(!word) return;
defBox.innerHTML = '';
if(mode === 'dict'){
resultsBox.innerHTML = '';
showDefinition(word);
return;
}
resultsBox.innerHTML = '<p class="muted">Searching…</p>';
try{
const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`);
+8
View File
@@ -308,3 +308,11 @@ def test_lookup_synonyms_wordnet():
def test_lookup_synonyms_unknown_word():
assert lookup("xqzzqx", mode="syn")["known"] is False
def test_ine_ending_gets_een_candidate():
# g2p reads OOV "-ine" like valentine (AY N); Em says ketamine with
# an -een. Both candidates compete; the perfect pass picks the match.
text = ("Even ketamine or methamphetamine with the minithin\n"
"It better be at least seventy or three-hundred milligram")
group_with(text, "ketamine", "methamphetamine")