mirror of
https://github.com/kennethreitz/rhymepad.org.git
synced 2026-06-11 17:08:33 +00:00
Consonant-supported phrase matching + richer synonyms
- V+schwa phrases now need their coda consonant to agree with the family they join: door hinge keeps orange's R, sloth hugs no longer rides over/shoulder. 3-vowel and full-vowel phrase runs unchanged. - Synonyms lemmatize the query (keys -> key) and return sections: synonyms, opposites, broader, related (antonyms, hypernyms, satellites, derivational forms) — frequency-ranked, deduped. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -183,6 +183,9 @@ def founding_projections(key: str) -> dict[str, str]:
|
||||
mk = _multi_key(vowels)
|
||||
if mk:
|
||||
out["multi"] = mk
|
||||
mk2 = _m2_key(ph)
|
||||
if mk2:
|
||||
out["multi2"] = mk2
|
||||
elif key.startswith("v:"): # vowel tail
|
||||
out["slant"] = key
|
||||
mk = _multi_key(key[2:].split())
|
||||
@@ -190,6 +193,8 @@ def founding_projections(key: str) -> dict[str, str]:
|
||||
out["multi"] = mk
|
||||
elif key.startswith("m:"):
|
||||
out["multi"] = key
|
||||
elif key.startswith("m2:"):
|
||||
out["multi2"] = key
|
||||
elif key.startswith("c:"):
|
||||
out["vc"] = key
|
||||
return out
|
||||
@@ -263,6 +268,20 @@ def vc_key(word: str) -> str | None:
|
||||
return "c:" + key
|
||||
|
||||
|
||||
def _m2_key(seq: list[str]) -> str | None:
|
||||
"""Vowel + coda consonant + one reduced vowel — the consonant-supported
|
||||
variant of a 2-vowel key. V+schwa alone is too weak for phrases:
|
||||
door hinge shares orange's R, but sloth hugs has nothing of shoulder."""
|
||||
ph = [DIGITS.sub("", p) for p in seq]
|
||||
vowels = [p for p in ph if p in ARPA_VOWELS]
|
||||
if len(vowels) != 2 or vowels[1] not in REDUCED:
|
||||
return None
|
||||
vi = ph.index(vowels[0])
|
||||
if vi + 1 >= len(ph) or ph[vi + 1] in ARPA_VOWELS:
|
||||
return None # open syllable — no coda to lean on
|
||||
return f"m2:{vowels[0]} {ph[vi + 1]} x"
|
||||
|
||||
|
||||
def multi_keys(word: str) -> tuple[str, ...]:
|
||||
"""Multisyllabic keys across all candidate pronunciations, anchored at
|
||||
the last stressed vowel AND the first primary stress — KET-a-mine can
|
||||
@@ -285,6 +304,9 @@ def multi_keys(word: str) -> tuple[str, ...]:
|
||||
k = _multi_key(vs)
|
||||
if k:
|
||||
out.append(k)
|
||||
k2 = _m2_key(pl[a:]) # joinable by consonant-supported phrases
|
||||
if k2:
|
||||
out.append(k2)
|
||||
return tuple(dict.fromkeys(out))
|
||||
|
||||
|
||||
@@ -501,7 +523,7 @@ def analyze(draft: Draft):
|
||||
# (placement / creation both carry EY AH; orange / door hinge both
|
||||
# carry AO + schwa). Single-vowel assonance is too noisy to flag
|
||||
# mid-line, so it stays end-of-line only (pass 2).
|
||||
group_by_multi = gmap_for("multi")
|
||||
group_by_multi = {**gmap_for("multi"), **gmap_for("multi2")}
|
||||
by_multi = defaultdict(list)
|
||||
for t in tokens:
|
||||
if id(t) in grouped:
|
||||
@@ -532,7 +554,12 @@ def analyze(draft: Draft):
|
||||
if any(s < p["end"] and p["start"] < e
|
||||
for s, e in grouped_spans[p["line"]]):
|
||||
continue
|
||||
key = _multi_key(p["vowels"])
|
||||
vs = p["vowels"]
|
||||
if len(vs) >= 3 or (len(vs) == 2 and vs[1] not in REDUCED):
|
||||
key = _multi_key(vs)
|
||||
else:
|
||||
# V+schwa phrases must bring consonant support
|
||||
key = _m2_key(p["rime"].split())
|
||||
if key:
|
||||
attach_or_collect(p, key, by_multi, group_by_multi)
|
||||
|
||||
@@ -743,21 +770,61 @@ POS_NAMES = {"n": "noun", "v": "verb", "a": "adjective",
|
||||
|
||||
|
||||
def synonyms_for(w: str, limit: int) -> list[dict]:
|
||||
"""Sense-grouped synonyms from WordNet, frequency-ranked."""
|
||||
"""Word associations from WordNet, in sections: synonyms, opposites,
|
||||
broader terms, and related words. Input is lemmatized first so
|
||||
'keys' and 'feeling' resolve to 'key' and 'feel'."""
|
||||
wn = get_wordnet()
|
||||
seen: dict[str, str] = {}
|
||||
for ss in wn.synsets(w):
|
||||
lemmas = list(ss.lemmas())
|
||||
if ss.pos() in ("a", "s"): # adjectives: pull in the satellites
|
||||
base = w
|
||||
for pos in ("n", "v", "a", "r"):
|
||||
m = wn.morphy(w, pos)
|
||||
if m:
|
||||
base = m
|
||||
break
|
||||
|
||||
sections: dict[str, dict[str, str]] = {
|
||||
"synonyms": {}, "opposites": {}, "broader": {}, "related": {}}
|
||||
|
||||
def add(bucket, lemma, pos):
|
||||
name = lemma.name().replace("_", " ").lower()
|
||||
if name not in (w, base) and re.fullmatch(r"[a-z' -]+", name):
|
||||
sections[bucket].setdefault(name, POS_NAMES.get(pos, pos))
|
||||
|
||||
for ss in wn.synsets(base):
|
||||
pos = ss.pos()
|
||||
for lemma in ss.lemmas():
|
||||
add("synonyms", lemma, pos)
|
||||
for ant in lemma.antonyms():
|
||||
add("opposites", ant, pos)
|
||||
for dr in lemma.derivationally_related_forms():
|
||||
add("related", dr, dr.synset().pos())
|
||||
if pos in ("a", "s"): # adjectives: the satellite clusters
|
||||
for sim in ss.similar_tos():
|
||||
lemmas.extend(sim.lemmas())
|
||||
for lemma in lemmas:
|
||||
name = lemma.name().replace("_", " ").lower()
|
||||
if name != w and re.fullmatch(r"[a-z' -]+", name):
|
||||
seen.setdefault(name, POS_NAMES.get(ss.pos(), ss.pos()))
|
||||
ranked = sorted(seen.items(),
|
||||
key=lambda kv: (-zipf_frequency(kv[0], "en"), kv[0]))
|
||||
return [{"word": n, "pos": p} for n, p in ranked[:limit]]
|
||||
for lemma in sim.lemmas():
|
||||
add("related", lemma, pos)
|
||||
for hyper in ss.hypernyms():
|
||||
for lemma in hyper.lemmas():
|
||||
add("broader", lemma, pos)
|
||||
for hypo in ss.hyponyms()[:8]:
|
||||
for lemma in hypo.lemmas():
|
||||
add("related", lemma, pos)
|
||||
for other in ss.also_sees() + ss.attributes():
|
||||
for lemma in other.lemmas():
|
||||
add("related", lemma, other.pos())
|
||||
|
||||
# a word belongs to its strongest section only
|
||||
caps = {"synonyms": 30, "opposites": 10, "broader": 12, "related": 20}
|
||||
taken: set[str] = set()
|
||||
out = []
|
||||
for label in ("synonyms", "opposites", "broader", "related"):
|
||||
items = {n: p for n, p in sections[label].items() if n not in taken}
|
||||
ranked = sorted(items.items(),
|
||||
key=lambda kv: (-zipf_frequency(kv[0], "en"), kv[0]))
|
||||
ranked = ranked[:caps[label]]
|
||||
taken.update(n for n, _ in ranked)
|
||||
if ranked:
|
||||
out.append({"label": label,
|
||||
"words": [{"word": n, "pos": p} for n, p in ranked]})
|
||||
return out
|
||||
|
||||
|
||||
def _ranked(words, exclude: set[str], limit: int) -> list[dict]:
|
||||
@@ -781,8 +848,9 @@ def _ranked(words, exclude: set[str], limit: int) -> list[dict]:
|
||||
def lookup(word: str, mode: str = "rhyme", limit: int = 60):
|
||||
w = word.strip().lower()
|
||||
if mode == "syn":
|
||||
words = synonyms_for(w, limit)
|
||||
return {"word": w, "mode": mode, "known": bool(words), "words": words}
|
||||
sections = synonyms_for(w, limit)
|
||||
return {"word": w, "mode": mode, "known": bool(sections),
|
||||
"sections": sections}
|
||||
phones = phones_for(w)
|
||||
if not phones:
|
||||
return {"word": w, "mode": mode, "known": False, "words": []}
|
||||
|
||||
+7
-11
@@ -578,7 +578,7 @@ async function doLookup(){
|
||||
resultsBox.innerHTML = `<p class="muted">No synonyms found for “${esc(word)}”.</p>`;
|
||||
return;
|
||||
}
|
||||
renderByPos(word, data.words);
|
||||
renderSections(word, data.sections);
|
||||
}else{
|
||||
if(!data.known){
|
||||
resultsBox.innerHTML = `<p class="muted">“${esc(word)}” isn't in the pronunciation dictionary.</p>`;
|
||||
@@ -644,17 +644,13 @@ function renderChips(label, words){
|
||||
resultsBox.innerHTML = `<div class="res-label">${label}</div>` + chipHtml(words.slice(0,50));
|
||||
wireChips();
|
||||
}
|
||||
function renderByPos(word, items){
|
||||
if(!items.length){ resultsBox.innerHTML = `<p class="muted">No synonyms for “${esc(word)}”.</p>`; return; }
|
||||
const byPos = {};
|
||||
items.forEach(d=>{ (byPos[d.pos || 'related'] ||= []).push(d.word); });
|
||||
let h = `<div class="res-label">Synonyms for “${esc(word)}”</div>`;
|
||||
['noun','verb','adjective','adverb','related'].forEach(pos=>{
|
||||
if(!byPos[pos]) return;
|
||||
h += `<div class="res-label">${pos}s</div>` + chipHtml(byPos[pos]);
|
||||
delete byPos[pos];
|
||||
function renderSections(word, sections){
|
||||
if(!sections.length){ resultsBox.innerHTML = `<p class="muted">No synonyms for “${esc(word)}”.</p>`; return; }
|
||||
let h = '';
|
||||
sections.forEach(s=>{
|
||||
h += `<div class="res-label">${esc(s.label)}</div>` +
|
||||
chipHtml(s.words.map(d=>d.word), s.label === 'synonyms' ? '' : 'near');
|
||||
});
|
||||
Object.keys(byPos).forEach(pos=>{ h += `<div class="res-label">${esc(pos)}</div>` + chipHtml(byPos[pos]); });
|
||||
resultsBox.innerHTML = h;
|
||||
wireChips();
|
||||
}
|
||||
|
||||
+18
-4
@@ -300,10 +300,16 @@ def test_perfect_subgroup_fuses_with_slant_family():
|
||||
|
||||
|
||||
def test_lookup_synonyms_wordnet():
|
||||
words = {w["word"] for w in lookup("happy", mode="syn")["words"]}
|
||||
assert "glad" in words
|
||||
big = {w["word"]: w["pos"] for w in lookup("big", mode="syn")["words"]}
|
||||
assert "large" in big and big["large"] == "adjective"
|
||||
data = lookup("happy", mode="syn")
|
||||
flat = {w["word"] for s in data["sections"] for w in s["words"]}
|
||||
assert "glad" in flat
|
||||
labels = [s["label"] for s in data["sections"]]
|
||||
assert "synonyms" in labels and "opposites" in labels
|
||||
|
||||
|
||||
def test_lookup_synonyms_lemmatized():
|
||||
data = lookup("keys", mode="syn") # morphy: keys -> key
|
||||
assert data["known"] is True
|
||||
|
||||
|
||||
def test_lookup_synonyms_unknown_word():
|
||||
@@ -322,3 +328,11 @@ def test_rhyme_mode_includes_near():
|
||||
data = lookup("hold", mode="rhyme")
|
||||
assert "gold" in {w["word"] for w in data["words"]}
|
||||
assert "home" in {w["word"] for w in data["near"]}
|
||||
|
||||
|
||||
def test_schwa_phrase_needs_consonant_support():
|
||||
# «sloth hugs» (OW-TH + schwa) must not ride the over/shoulder family
|
||||
# just because the vowels rhyme — door hinge gets in on orange's R
|
||||
text = ("Looming over your shoulder, like a sloth hugs a tree,\n"
|
||||
"Thinking it won't fall, yet there it goes. Damn, it's free.")
|
||||
assert "sloth hugs" not in highlighted(text)
|
||||
|
||||
Reference in New Issue
Block a user