diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ba3c2a..14f3aa1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,5 +18,5 @@ jobs: # under both its old and new (nltk>=3.9) names - run: | uv run python -c "import nltk; - [nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict')]" + [nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict','wordnet','omw-1.4')]" - run: uv run pytest diff --git a/Dockerfile b/Dockerfile index e1ed684..4b7a743 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,9 +16,9 @@ RUN uv sync --frozen --no-dev --no-install-project COPY app.py ./ COPY static ./static -# Pre-warm NLTK data so the g2p-en fallback never downloads at request time +# Pre-warm NLTK data so g2p-en and WordNet never download at request time RUN uv run python -c "import nltk;\ -[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict')]" || true +[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict','wordnet','omw-1.4')]" || true EXPOSE 8000 diff --git a/README.md b/README.md index b6d8174..74544fb 100644 --- a/README.md +++ b/README.md @@ -31,9 +31,11 @@ Then open . ## API - `POST /api/analyze` `{"text": "..."}` → token spans, rhyme groups, - per-stanza schemes -- `GET /api/lookup?word=light&mode=rhyme|near` → frequency-ranked - rhymes / near rhymes, grouped by syllable count + per-stanza schemes, per-line meter +- `GET /api/lookup?word=light&mode=rhyme|near|syn` → frequency-ranked + rhymes / near rhymes (by syllable count) and WordNet synonyms (by + part of speech) -Synonyms come from the free [Datamuse](https://www.datamuse.com/api/) -API, client-side. +Dictionary definitions in the UI come from the free +[dictionaryapi.dev](https://dictionaryapi.dev/); everything else is +served locally. diff --git a/app.py b/app.py index eb48fdf..5b75ab9 100644 --- a/app.py +++ b/app.py @@ -27,6 +27,10 @@ async def lifespan(app: FastAPI): g2p_phones("warmup") except Exception: pass # model unavailable; spelling fallbacks still work + try: + get_wordnet() + except Exception: + pass get_slant_index() yield @@ -675,6 +679,46 @@ def get_slant_index() -> dict[str, set[str]]: return _slant_index +_wordnet = None + + +def get_wordnet(): + """WordNet via NLTK (already a g2p-en dependency), data on demand.""" + global _wordnet + if _wordnet is None: + from nltk.corpus import wordnet as wn + try: + wn.synsets("test") + except LookupError: + import nltk + nltk.download("wordnet", quiet=True) + nltk.download("omw-1.4", quiet=True) + _wordnet = wn + return _wordnet + + +POS_NAMES = {"n": "noun", "v": "verb", "a": "adjective", + "s": "adjective", "r": "adverb"} + + +def synonyms_for(w: str, limit: int) -> list[dict]: + """Sense-grouped synonyms from WordNet, frequency-ranked.""" + wn = get_wordnet() + seen: dict[str, str] = {} + for ss in wn.synsets(w): + lemmas = list(ss.lemmas()) + if ss.pos() in ("a", "s"): # adjectives: pull in the satellites + for sim in ss.similar_tos(): + lemmas.extend(sim.lemmas()) + for lemma in lemmas: + name = lemma.name().replace("_", " ").lower() + if name != w and re.fullmatch(r"[a-z' -]+", name): + seen.setdefault(name, POS_NAMES.get(ss.pos(), ss.pos())) + ranked = sorted(seen.items(), + key=lambda kv: (-zipf_frequency(kv[0], "en"), kv[0])) + return [{"word": n, "pos": p} for n, p in ranked[:limit]] + + def _ranked(words, exclude: set[str], limit: int) -> list[dict]: scored = [] for w in set(words): @@ -695,6 +739,9 @@ def _ranked(words, exclude: set[str], limit: int) -> list[dict]: @app.get("/api/lookup") def lookup(word: str, mode: str = "rhyme", limit: int = 60): w = word.strip().lower() + if mode == "syn": + words = synonyms_for(w, limit) + return {"word": w, "mode": mode, "known": bool(words), "words": words} phones = phones_for(w) if not phones: return {"word": w, "mode": mode, "known": False, "words": []} diff --git a/static/index.html b/static/index.html index 780586d..df488f2 100644 --- a/static/index.html +++ b/static/index.html @@ -200,6 +200,7 @@ text-transform: uppercase; letter-spacing: .06em; margin-right: 4px; } .btn.small { padding: 5px 10px; font-size: 12px; } + .def-actions { display: flex; gap: 8px; } .chip { font-size: 13px; background: var(--panel-2); border: 1px solid var(--line); color: var(--ink); padding: 5px 10px; border-radius: 999px; cursor: pointer; @@ -277,7 +278,7 @@ Double-click any word to look it up on the right.">
-

Type a word and hit Go, or double-click a word in your draft. Click any result to see its definition.

+

Type a word and hit Go for rhymes. Double-click a word in your draft for its definition. Click any result chip for its definition too.

Current stanza
@@ -544,7 +545,11 @@ editor.addEventListener('click', ()=>{ buildReadout(); buildLegend(); }); editor.addEventListener('dblclick', ()=>{ const sel = editor.value.slice(editor.selectionStart, editor.selectionEnd) .trim().replace(/[^A-Za-z']/g,''); - if(sel){ document.getElementById('lookupInput').value = sel; doLookup(); } + if(sel){ + document.getElementById('lookupInput').value = sel; + tab('lookup'); + showDefinition(sel); + } }); /* ---------- insert at cursor ---------- */ @@ -582,13 +587,15 @@ async function doLookup(){ defBox.innerHTML = ''; resultsBox.innerHTML = '

Searching…

'; try{ + const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`); + const data = await r.json(); if(mode === 'syn'){ - const r = await fetch(`https://api.datamuse.com/words?rel_syn=${encodeURIComponent(word)}&ml=${encodeURIComponent(word)}&max=60`); - const data = await r.json(); - renderChips(`Synonyms / related for “${esc(word)}”`, data.map(d=>d.word)); + if(!data.known){ + resultsBox.innerHTML = `

No synonyms found for “${esc(word)}”.

`; + return; + } + renderByPos(word, data.words); }else{ - const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`); - const data = await r.json(); if(!data.known){ resultsBox.innerHTML = `

“${esc(word)}” isn't in the pronunciation dictionary.

`; return; @@ -619,9 +626,16 @@ function defCard(word, inner){
${esc(word)} ×
${inner} -
+
+ + +
`; defBox.querySelector('#defInsert').addEventListener('click', ()=> insertAtCursor(word)); + defBox.querySelector('#defRhymes').addEventListener('click', ()=>{ + document.getElementById('lookupInput').value = word; + doLookup(); + }); defBox.querySelector('.defx').addEventListener('click', ()=>{ defBox.innerHTML=''; }); } async function showDefinition(word){ @@ -647,6 +661,21 @@ function renderChips(label, words){ resultsBox.innerHTML = `
${label}
` + chipHtml(words.slice(0,50)); wireChips(); } +function renderByPos(word, items){ + if(!items.length){ resultsBox.innerHTML = `

No synonyms for “${esc(word)}”.

`; return; } + const byPos = {}; + items.forEach(d=>{ (byPos[d.pos || 'related'] ||= []).push(d.word); }); + let h = `
Synonyms for “${esc(word)}”
`; + ['noun','verb','adjective','adverb','related'].forEach(pos=>{ + if(!byPos[pos]) return; + h += `
${pos}s
` + chipHtml(byPos[pos]); + delete byPos[pos]; + }); + Object.keys(byPos).forEach(pos=>{ h += `
${esc(pos)}
` + chipHtml(byPos[pos]); }); + resultsBox.innerHTML = h; + wireChips(); +} + function renderBySyllable(word, items, label){ if(!items.length){ resultsBox.innerHTML = `

No results for “${esc(word)}”.

`; return; } const bySyl = {}; diff --git a/tests/test_rhymes.py b/tests/test_rhymes.py index 0a478e6..ed13804 100644 --- a/tests/test_rhymes.py +++ b/tests/test_rhymes.py @@ -297,3 +297,14 @@ def test_perfect_subgroup_fuses_with_slant_family(): "Unplugged toaster") group_with(text, "soldier", "shoulder", "older", "colder", "holster", "coaster", "roaster", "poster", "toaster") + + +def test_lookup_synonyms_wordnet(): + words = {w["word"] for w in lookup("happy", mode="syn")["words"]} + assert "glad" in words + big = {w["word"]: w["pos"] for w in lookup("big", mode="syn")["words"]} + assert "large" in big and big["large"] == "adjective" + + +def test_lookup_synonyms_unknown_word(): + assert lookup("xqzzqx", mode="syn")["known"] is False