WordNet synonyms + double-click opens definitions

- mode=syn now served locally by NLTK WordNet (already a g2p-en dep),
  sense-grouped by part of speech and frequency-ranked; Datamuse gone
- Double-clicking a word in the draft opens its definition card; the
  card grew a Rhymes button for the old jump
- NLTK wordnet data baked into the Docker image and CI

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 02:09:00 -04:00
parent e33bf58b11
commit 69b8ab4e93
6 changed files with 105 additions and 16 deletions
+1 -1
View File
@@ -18,5 +18,5 @@ jobs:
# under both its old and new (nltk>=3.9) names
- run: |
uv run python -c "import nltk;
[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict')]"
[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict','wordnet','omw-1.4')]"
- run: uv run pytest
+2 -2
View File
@@ -16,9 +16,9 @@ RUN uv sync --frozen --no-dev --no-install-project
COPY app.py ./
COPY static ./static
# Pre-warm NLTK data so the g2p-en fallback never downloads at request time
# Pre-warm NLTK data so g2p-en and WordNet never download at request time
RUN uv run python -c "import nltk;\
[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict')]" || true
[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict','wordnet','omw-1.4')]" || true
EXPOSE 8000
+7 -5
View File
@@ -31,9 +31,11 @@ Then open <http://127.0.0.1:8000>.
## API
- `POST /api/analyze` `{"text": "..."}` → token spans, rhyme groups,
per-stanza schemes
- `GET /api/lookup?word=light&mode=rhyme|near` → frequency-ranked
rhymes / near rhymes, grouped by syllable count
per-stanza schemes, per-line meter
- `GET /api/lookup?word=light&mode=rhyme|near|syn` → frequency-ranked
rhymes / near rhymes (by syllable count) and WordNet synonyms (by
part of speech)
Synonyms come from the free [Datamuse](https://www.datamuse.com/api/)
API, client-side.
Dictionary definitions in the UI come from the free
[dictionaryapi.dev](https://dictionaryapi.dev/); everything else is
served locally.
+47
View File
@@ -27,6 +27,10 @@ async def lifespan(app: FastAPI):
g2p_phones("warmup")
except Exception:
pass # model unavailable; spelling fallbacks still work
try:
get_wordnet()
except Exception:
pass
get_slant_index()
yield
@@ -675,6 +679,46 @@ def get_slant_index() -> dict[str, set[str]]:
return _slant_index
_wordnet = None
def get_wordnet():
"""WordNet via NLTK (already a g2p-en dependency), data on demand."""
global _wordnet
if _wordnet is None:
from nltk.corpus import wordnet as wn
try:
wn.synsets("test")
except LookupError:
import nltk
nltk.download("wordnet", quiet=True)
nltk.download("omw-1.4", quiet=True)
_wordnet = wn
return _wordnet
POS_NAMES = {"n": "noun", "v": "verb", "a": "adjective",
"s": "adjective", "r": "adverb"}
def synonyms_for(w: str, limit: int) -> list[dict]:
"""Sense-grouped synonyms from WordNet, frequency-ranked."""
wn = get_wordnet()
seen: dict[str, str] = {}
for ss in wn.synsets(w):
lemmas = list(ss.lemmas())
if ss.pos() in ("a", "s"): # adjectives: pull in the satellites
for sim in ss.similar_tos():
lemmas.extend(sim.lemmas())
for lemma in lemmas:
name = lemma.name().replace("_", " ").lower()
if name != w and re.fullmatch(r"[a-z' -]+", name):
seen.setdefault(name, POS_NAMES.get(ss.pos(), ss.pos()))
ranked = sorted(seen.items(),
key=lambda kv: (-zipf_frequency(kv[0], "en"), kv[0]))
return [{"word": n, "pos": p} for n, p in ranked[:limit]]
def _ranked(words, exclude: set[str], limit: int) -> list[dict]:
scored = []
for w in set(words):
@@ -695,6 +739,9 @@ def _ranked(words, exclude: set[str], limit: int) -> list[dict]:
@app.get("/api/lookup")
def lookup(word: str, mode: str = "rhyme", limit: int = 60):
w = word.strip().lower()
if mode == "syn":
words = synonyms_for(w, limit)
return {"word": w, "mode": mode, "known": bool(words), "words": words}
phones = phones_for(w)
if not phones:
return {"word": w, "mode": mode, "known": False, "words": []}
+37 -8
View File
@@ -200,6 +200,7 @@
text-transform: uppercase; letter-spacing: .06em; margin-right: 4px;
}
.btn.small { padding: 5px 10px; font-size: 12px; }
.def-actions { display: flex; gap: 8px; }
.chip {
font-size: 13px; background: var(--panel-2); border: 1px solid var(--line);
color: var(--ink); padding: 5px 10px; border-radius: 999px; cursor: pointer;
@@ -277,7 +278,7 @@ Double-click any word to look it up on the right."></textarea>
</div>
<div id="defBox"></div>
<div id="lookupResults">
<p class="muted">Type a word and hit Go, or double-click a word in your draft. Click any result to see its definition.</p>
<p class="muted">Type a word and hit Go for rhymes. Double-click a word in your draft for its definition. Click any result chip for its definition too.</p>
</div>
<div class="legend">
<div class="res-label">Current stanza</div>
@@ -544,7 +545,11 @@ editor.addEventListener('click', ()=>{ buildReadout(); buildLegend(); });
editor.addEventListener('dblclick', ()=>{
const sel = editor.value.slice(editor.selectionStart, editor.selectionEnd)
.trim().replace(/[^A-Za-z']/g,'');
if(sel){ document.getElementById('lookupInput').value = sel; doLookup(); }
if(sel){
document.getElementById('lookupInput').value = sel;
tab('lookup');
showDefinition(sel);
}
});
/* ---------- insert at cursor ---------- */
@@ -582,13 +587,15 @@ async function doLookup(){
defBox.innerHTML = '';
resultsBox.innerHTML = '<p class="muted">Searching…</p>';
try{
const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`);
const data = await r.json();
if(mode === 'syn'){
const r = await fetch(`https://api.datamuse.com/words?rel_syn=${encodeURIComponent(word)}&ml=${encodeURIComponent(word)}&max=60`);
const data = await r.json();
renderChips(`Synonyms / related for “${esc(word)}`, data.map(d=>d.word));
if(!data.known){
resultsBox.innerHTML = `<p class="muted">No synonyms found for “${esc(word)}”.</p>`;
return;
}
renderByPos(word, data.words);
}else{
const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`);
const data = await r.json();
if(!data.known){
resultsBox.innerHTML = `<p class="muted">“${esc(word)}” isn't in the pronunciation dictionary.</p>`;
return;
@@ -619,9 +626,16 @@ function defCard(word, inner){
<div class="defhead"><b>${esc(word)}</b><span class="phon" id="defPhon"></span>
<span class="defx" title="Dismiss">×</span></div>
${inner}
<div class="def-actions"><button class="btn small" id="defInsert">Insert at cursor</button></div>
<div class="def-actions">
<button class="btn small" id="defInsert">Insert at cursor</button>
<button class="btn small" id="defRhymes">Rhymes</button>
</div>
</div>`;
defBox.querySelector('#defInsert').addEventListener('click', ()=> insertAtCursor(word));
defBox.querySelector('#defRhymes').addEventListener('click', ()=>{
document.getElementById('lookupInput').value = word;
doLookup();
});
defBox.querySelector('.defx').addEventListener('click', ()=>{ defBox.innerHTML=''; });
}
async function showDefinition(word){
@@ -647,6 +661,21 @@ function renderChips(label, words){
resultsBox.innerHTML = `<div class="res-label">${label}</div>` + chipHtml(words.slice(0,50));
wireChips();
}
function renderByPos(word, items){
if(!items.length){ resultsBox.innerHTML = `<p class="muted">No synonyms for “${esc(word)}”.</p>`; return; }
const byPos = {};
items.forEach(d=>{ (byPos[d.pos || 'related'] ||= []).push(d.word); });
let h = `<div class="res-label">Synonyms for “${esc(word)}”</div>`;
['noun','verb','adjective','adverb','related'].forEach(pos=>{
if(!byPos[pos]) return;
h += `<div class="res-label">${pos}s</div>` + chipHtml(byPos[pos]);
delete byPos[pos];
});
Object.keys(byPos).forEach(pos=>{ h += `<div class="res-label">${esc(pos)}</div>` + chipHtml(byPos[pos]); });
resultsBox.innerHTML = h;
wireChips();
}
function renderBySyllable(word, items, label){
if(!items.length){ resultsBox.innerHTML = `<p class="muted">No results for “${esc(word)}”.</p>`; return; }
const bySyl = {};
+11
View File
@@ -297,3 +297,14 @@ def test_perfect_subgroup_fuses_with_slant_family():
"Unplugged toaster")
group_with(text, "soldier", "shoulder", "older", "colder", "holster",
"coaster", "roaster", "poster", "toaster")
def test_lookup_synonyms_wordnet():
words = {w["word"] for w in lookup("happy", mode="syn")["words"]}
assert "glad" in words
big = {w["word"]: w["pos"] for w in lookup("big", mode="syn")["words"]}
assert "large" in big and big["large"] == "adjective"
def test_lookup_synonyms_unknown_word():
assert lookup("xqzzqx", mode="syn")["known"] is False