Consonant-supported phrase matching + richer synonyms

- V+schwa phrases now need their coda consonant to agree with the
  family they join: door hinge keeps orange's R, sloth hugs no longer
  rides over/shoulder. 3-vowel and full-vowel phrase runs unchanged.
- Synonyms lemmatize the query (keys -> key) and return sections:
  synonyms, opposites, broader, related (antonyms, hypernyms,
  satellites, derivational forms) — frequency-ranked, deduped.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 02:27:28 -04:00
parent 74983245ad
commit 924cc54fd5
3 changed files with 110 additions and 32 deletions
+85 -17
View File
@@ -183,6 +183,9 @@ def founding_projections(key: str) -> dict[str, str]:
mk = _multi_key(vowels)
if mk:
out["multi"] = mk
mk2 = _m2_key(ph)
if mk2:
out["multi2"] = mk2
elif key.startswith("v:"): # vowel tail
out["slant"] = key
mk = _multi_key(key[2:].split())
@@ -190,6 +193,8 @@ def founding_projections(key: str) -> dict[str, str]:
out["multi"] = mk
elif key.startswith("m:"):
out["multi"] = key
elif key.startswith("m2:"):
out["multi2"] = key
elif key.startswith("c:"):
out["vc"] = key
return out
@@ -263,6 +268,20 @@ def vc_key(word: str) -> str | None:
return "c:" + key
def _m2_key(seq: list[str]) -> str | None:
"""Vowel + coda consonant + one reduced vowel — the consonant-supported
variant of a 2-vowel key. V+schwa alone is too weak for phrases:
door hinge shares orange's R, but sloth hugs has nothing of shoulder."""
ph = [DIGITS.sub("", p) for p in seq]
vowels = [p for p in ph if p in ARPA_VOWELS]
if len(vowels) != 2 or vowels[1] not in REDUCED:
return None
vi = ph.index(vowels[0])
if vi + 1 >= len(ph) or ph[vi + 1] in ARPA_VOWELS:
return None # open syllable — no coda to lean on
return f"m2:{vowels[0]} {ph[vi + 1]} x"
def multi_keys(word: str) -> tuple[str, ...]:
"""Multisyllabic keys across all candidate pronunciations, anchored at
the last stressed vowel AND the first primary stress — KET-a-mine can
@@ -285,6 +304,9 @@ def multi_keys(word: str) -> tuple[str, ...]:
k = _multi_key(vs)
if k:
out.append(k)
k2 = _m2_key(pl[a:]) # joinable by consonant-supported phrases
if k2:
out.append(k2)
return tuple(dict.fromkeys(out))
@@ -501,7 +523,7 @@ def analyze(draft: Draft):
# (placement / creation both carry EY AH; orange / door hinge both
# carry AO + schwa). Single-vowel assonance is too noisy to flag
# mid-line, so it stays end-of-line only (pass 2).
group_by_multi = gmap_for("multi")
group_by_multi = {**gmap_for("multi"), **gmap_for("multi2")}
by_multi = defaultdict(list)
for t in tokens:
if id(t) in grouped:
@@ -532,7 +554,12 @@ def analyze(draft: Draft):
if any(s < p["end"] and p["start"] < e
for s, e in grouped_spans[p["line"]]):
continue
key = _multi_key(p["vowels"])
vs = p["vowels"]
if len(vs) >= 3 or (len(vs) == 2 and vs[1] not in REDUCED):
key = _multi_key(vs)
else:
# V+schwa phrases must bring consonant support
key = _m2_key(p["rime"].split())
if key:
attach_or_collect(p, key, by_multi, group_by_multi)
@@ -743,21 +770,61 @@ POS_NAMES = {"n": "noun", "v": "verb", "a": "adjective",
def synonyms_for(w: str, limit: int) -> list[dict]:
"""Sense-grouped synonyms from WordNet, frequency-ranked."""
"""Word associations from WordNet, in sections: synonyms, opposites,
broader terms, and related words. Input is lemmatized first so
'keys' and 'feeling' resolve to 'key' and 'feel'."""
wn = get_wordnet()
seen: dict[str, str] = {}
for ss in wn.synsets(w):
lemmas = list(ss.lemmas())
if ss.pos() in ("a", "s"): # adjectives: pull in the satellites
base = w
for pos in ("n", "v", "a", "r"):
m = wn.morphy(w, pos)
if m:
base = m
break
sections: dict[str, dict[str, str]] = {
"synonyms": {}, "opposites": {}, "broader": {}, "related": {}}
def add(bucket, lemma, pos):
name = lemma.name().replace("_", " ").lower()
if name not in (w, base) and re.fullmatch(r"[a-z' -]+", name):
sections[bucket].setdefault(name, POS_NAMES.get(pos, pos))
for ss in wn.synsets(base):
pos = ss.pos()
for lemma in ss.lemmas():
add("synonyms", lemma, pos)
for ant in lemma.antonyms():
add("opposites", ant, pos)
for dr in lemma.derivationally_related_forms():
add("related", dr, dr.synset().pos())
if pos in ("a", "s"): # adjectives: the satellite clusters
for sim in ss.similar_tos():
lemmas.extend(sim.lemmas())
for lemma in lemmas:
name = lemma.name().replace("_", " ").lower()
if name != w and re.fullmatch(r"[a-z' -]+", name):
seen.setdefault(name, POS_NAMES.get(ss.pos(), ss.pos()))
ranked = sorted(seen.items(),
key=lambda kv: (-zipf_frequency(kv[0], "en"), kv[0]))
return [{"word": n, "pos": p} for n, p in ranked[:limit]]
for lemma in sim.lemmas():
add("related", lemma, pos)
for hyper in ss.hypernyms():
for lemma in hyper.lemmas():
add("broader", lemma, pos)
for hypo in ss.hyponyms()[:8]:
for lemma in hypo.lemmas():
add("related", lemma, pos)
for other in ss.also_sees() + ss.attributes():
for lemma in other.lemmas():
add("related", lemma, other.pos())
# a word belongs to its strongest section only
caps = {"synonyms": 30, "opposites": 10, "broader": 12, "related": 20}
taken: set[str] = set()
out = []
for label in ("synonyms", "opposites", "broader", "related"):
items = {n: p for n, p in sections[label].items() if n not in taken}
ranked = sorted(items.items(),
key=lambda kv: (-zipf_frequency(kv[0], "en"), kv[0]))
ranked = ranked[:caps[label]]
taken.update(n for n, _ in ranked)
if ranked:
out.append({"label": label,
"words": [{"word": n, "pos": p} for n, p in ranked]})
return out
def _ranked(words, exclude: set[str], limit: int) -> list[dict]:
@@ -781,8 +848,9 @@ def _ranked(words, exclude: set[str], limit: int) -> list[dict]:
def lookup(word: str, mode: str = "rhyme", limit: int = 60):
w = word.strip().lower()
if mode == "syn":
words = synonyms_for(w, limit)
return {"word": w, "mode": mode, "known": bool(words), "words": words}
sections = synonyms_for(w, limit)
return {"word": w, "mode": mode, "known": bool(sections),
"sections": sections}
phones = phones_for(w)
if not phones:
return {"word": w, "mode": mode, "known": False, "words": []}
+7 -11
View File
@@ -578,7 +578,7 @@ async function doLookup(){
resultsBox.innerHTML = `<p class="muted">No synonyms found for “${esc(word)}”.</p>`;
return;
}
renderByPos(word, data.words);
renderSections(word, data.sections);
}else{
if(!data.known){
resultsBox.innerHTML = `<p class="muted">“${esc(word)}” isn't in the pronunciation dictionary.</p>`;
@@ -644,17 +644,13 @@ function renderChips(label, words){
resultsBox.innerHTML = `<div class="res-label">${label}</div>` + chipHtml(words.slice(0,50));
wireChips();
}
function renderByPos(word, items){
if(!items.length){ resultsBox.innerHTML = `<p class="muted">No synonyms for “${esc(word)}”.</p>`; return; }
const byPos = {};
items.forEach(d=>{ (byPos[d.pos || 'related'] ||= []).push(d.word); });
let h = `<div class="res-label">Synonyms for “${esc(word)}</div>`;
['noun','verb','adjective','adverb','related'].forEach(pos=>{
if(!byPos[pos]) return;
h += `<div class="res-label">${pos}s</div>` + chipHtml(byPos[pos]);
delete byPos[pos];
function renderSections(word, sections){
if(!sections.length){ resultsBox.innerHTML = `<p class="muted">No synonyms for “${esc(word)}”.</p>`; return; }
let h = '';
sections.forEach(s=>{
h += `<div class="res-label">${esc(s.label)}</div>` +
chipHtml(s.words.map(d=>d.word), s.label === 'synonyms' ? '' : 'near');
});
Object.keys(byPos).forEach(pos=>{ h += `<div class="res-label">${esc(pos)}</div>` + chipHtml(byPos[pos]); });
resultsBox.innerHTML = h;
wireChips();
}
+18 -4
View File
@@ -300,10 +300,16 @@ def test_perfect_subgroup_fuses_with_slant_family():
def test_lookup_synonyms_wordnet():
words = {w["word"] for w in lookup("happy", mode="syn")["words"]}
assert "glad" in words
big = {w["word"]: w["pos"] for w in lookup("big", mode="syn")["words"]}
assert "large" in big and big["large"] == "adjective"
data = lookup("happy", mode="syn")
flat = {w["word"] for s in data["sections"] for w in s["words"]}
assert "glad" in flat
labels = [s["label"] for s in data["sections"]]
assert "synonyms" in labels and "opposites" in labels
def test_lookup_synonyms_lemmatized():
data = lookup("keys", mode="syn") # morphy: keys -> key
assert data["known"] is True
def test_lookup_synonyms_unknown_word():
@@ -322,3 +328,11 @@ def test_rhyme_mode_includes_near():
data = lookup("hold", mode="rhyme")
assert "gold" in {w["word"] for w in data["words"]}
assert "home" in {w["word"] for w in data["near"]}
def test_schwa_phrase_needs_consonant_support():
# «sloth hugs» (OW-TH + schwa) must not ride the over/shoulder family
# just because the vowels rhyme — door hinge gets in on orange's R
text = ("Looming over your shoulder, like a sloth hugs a tree,\n"
"Thinking it won't fall, yet there it goes. Damn, it's free.")
assert "sloth hugs" not in highlighted(text)