mirror of
https://github.com/kennethreitz/rhymepad.org.git
synced 2026-06-11 17:08:33 +00:00
Multi-anchor rhyme keys + Dictionary mode in the sidebar
KET-a-mine can now rhyme from its first primary stress, not just its dictionary-final one, so it joins meth-am-PHET-a-mine's family; multi buckets claim biggest-first since tokens carry several anchor keys. The lookup panel grew a fourth mode: Dictionary. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -105,6 +105,12 @@ def phones_candidates(word: str) -> tuple[str, ...]:
|
||||
g = g2p_phones(w)
|
||||
if g:
|
||||
cands.append(g)
|
||||
if w.endswith("ine"):
|
||||
# -ine is ambiguous (valentine vs ketamine); keep the
|
||||
# "-een" reading as a candidate too
|
||||
g = g2p_phones(w[:-3] + "een")
|
||||
if g:
|
||||
cands.append(g)
|
||||
except Exception:
|
||||
pass # g2p model unavailable
|
||||
if len(w) >= 6:
|
||||
@@ -257,6 +263,31 @@ def vc_key(word: str) -> str | None:
|
||||
return "c:" + key
|
||||
|
||||
|
||||
def multi_keys(word: str) -> tuple[str, ...]:
|
||||
"""Multisyllabic keys across all candidate pronunciations, anchored at
|
||||
the last stressed vowel AND the first primary stress — KET-a-mine can
|
||||
rhyme from its first syllable (meth-am-PHET-a-mine) even though its
|
||||
dictionary stress sits at the end."""
|
||||
out = []
|
||||
for ph in phones_candidates(word):
|
||||
pl = ph.split()
|
||||
anchors = set()
|
||||
for i in range(len(pl) - 1, -1, -1):
|
||||
if pl[i][-1] in "12":
|
||||
anchors.add(i)
|
||||
break
|
||||
for i, p in enumerate(pl):
|
||||
if p[-1] == "1":
|
||||
anchors.add(i)
|
||||
break
|
||||
for a in anchors or {0}:
|
||||
vs = [DIGITS.sub("", p) for p in pl[a:] if p[-1].isdigit()]
|
||||
k = _multi_key(vs)
|
||||
if k:
|
||||
out.append(k)
|
||||
return tuple(dict.fromkeys(out))
|
||||
|
||||
|
||||
def slant_key(word: str) -> str | None:
|
||||
phones = phones_for(word)
|
||||
if phones:
|
||||
@@ -478,10 +509,17 @@ def analyze(draft: Draft):
|
||||
w = t["word"].lower()
|
||||
if not t["is_end"] and (w in STOPWORDS or len(w) < 2):
|
||||
continue
|
||||
ph = phones_for(t["word"])
|
||||
key = _multi_key(_tail_vowels(ph)) if ph else None
|
||||
if key:
|
||||
attach_or_collect(t, key, by_multi, group_by_multi)
|
||||
keys = multi_keys(t["word"])
|
||||
for key in keys: # join an existing family if any anchor fits
|
||||
gi = group_by_multi.get((t["sid"], key))
|
||||
if gi is not None:
|
||||
raw_groups[gi]["toks"].append(t)
|
||||
t["slant"] = True
|
||||
grouped.add(id(t))
|
||||
break
|
||||
else:
|
||||
for key in keys:
|
||||
by_multi[(t["sid"], key)].append(t)
|
||||
|
||||
# a phrase only competes when no word inside it already rhymes
|
||||
grouped_spans = defaultdict(list)
|
||||
@@ -498,9 +536,12 @@ def analyze(draft: Draft):
|
||||
if key:
|
||||
attach_or_collect(p, key, by_multi, group_by_multi)
|
||||
|
||||
# distinctness by anchor word, so the phrase "fire burns" can't pose
|
||||
# as a different word than the "fire" it starts with
|
||||
for (sid, key), toks in by_multi.items():
|
||||
# biggest buckets claim first (a token may sit in several via its
|
||||
# anchors); distinctness by anchor word, so the phrase "fire burns"
|
||||
# can't pose as a different word than the "fire" it starts with
|
||||
for (sid, key), toks in sorted(by_multi.items(),
|
||||
key=lambda kv: (-len(kv[1]), kv[0][1])):
|
||||
toks = [t for t in toks if id(t) not in grouped]
|
||||
if len(toks) >= 2 and len({t["word"].split()[0] for t in toks}) >= 2:
|
||||
raw_groups.append({"toks": toks, "slant": True, "key": key})
|
||||
grouped.update(id(t) for t in toks)
|
||||
|
||||
+7
-1
@@ -270,8 +270,9 @@ Double-click any word to look it up on the right."></textarea>
|
||||
</div>
|
||||
<div class="seg" id="modeSeg">
|
||||
<button class="active" data-mode="rhyme">Rhymes</button>
|
||||
<button data-mode="near">Near rhymes</button>
|
||||
<button data-mode="near">Near</button>
|
||||
<button data-mode="syn">Synonyms</button>
|
||||
<button data-mode="dict">Dictionary</button>
|
||||
</div>
|
||||
<div id="defBox"></div>
|
||||
<div id="lookupResults">
|
||||
@@ -564,6 +565,11 @@ async function doLookup(){
|
||||
const word = document.getElementById('lookupInput').value.trim();
|
||||
if(!word) return;
|
||||
defBox.innerHTML = '';
|
||||
if(mode === 'dict'){
|
||||
resultsBox.innerHTML = '';
|
||||
showDefinition(word);
|
||||
return;
|
||||
}
|
||||
resultsBox.innerHTML = '<p class="muted">Searching…</p>';
|
||||
try{
|
||||
const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`);
|
||||
|
||||
@@ -308,3 +308,11 @@ def test_lookup_synonyms_wordnet():
|
||||
|
||||
def test_lookup_synonyms_unknown_word():
|
||||
assert lookup("xqzzqx", mode="syn")["known"] is False
|
||||
|
||||
|
||||
def test_ine_ending_gets_een_candidate():
|
||||
# g2p reads OOV "-ine" like valentine (AY N); Em says ketamine with
|
||||
# an -een. Both candidates compete; the perfect pass picks the match.
|
||||
text = ("Even ketamine or methamphetamine with the minithin\n"
|
||||
"It better be at least seventy or three-hundred milligram")
|
||||
group_with(text, "ketamine", "methamphetamine")
|
||||
|
||||
Reference in New Issue
Block a user