mirror of
https://github.com/kennethreitz/rhymepad.org.git
synced 2026-06-11 17:08:33 +00:00
WordNet synonyms + double-click opens definitions
- mode=syn now served locally by NLTK WordNet (already a g2p-en dep), sense-grouped by part of speech and frequency-ranked; Datamuse gone - Double-clicking a word in the draft opens its definition card; the card grew a Rhymes button for the old jump - NLTK wordnet data baked into the Docker image and CI Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -18,5 +18,5 @@ jobs:
|
||||
# under both its old and new (nltk>=3.9) names
|
||||
- run: |
|
||||
uv run python -c "import nltk;
|
||||
[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict')]"
|
||||
[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict','wordnet','omw-1.4')]"
|
||||
- run: uv run pytest
|
||||
|
||||
+2
-2
@@ -16,9 +16,9 @@ RUN uv sync --frozen --no-dev --no-install-project
|
||||
COPY app.py ./
|
||||
COPY static ./static
|
||||
|
||||
# Pre-warm NLTK data so the g2p-en fallback never downloads at request time
|
||||
# Pre-warm NLTK data so g2p-en and WordNet never download at request time
|
||||
RUN uv run python -c "import nltk;\
|
||||
[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict')]" || true
|
||||
[nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict','wordnet','omw-1.4')]" || true
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
|
||||
@@ -31,9 +31,11 @@ Then open <http://127.0.0.1:8000>.
|
||||
## API
|
||||
|
||||
- `POST /api/analyze` `{"text": "..."}` → token spans, rhyme groups,
|
||||
per-stanza schemes
|
||||
- `GET /api/lookup?word=light&mode=rhyme|near` → frequency-ranked
|
||||
rhymes / near rhymes, grouped by syllable count
|
||||
per-stanza schemes, per-line meter
|
||||
- `GET /api/lookup?word=light&mode=rhyme|near|syn` → frequency-ranked
|
||||
rhymes / near rhymes (by syllable count) and WordNet synonyms (by
|
||||
part of speech)
|
||||
|
||||
Synonyms come from the free [Datamuse](https://www.datamuse.com/api/)
|
||||
API, client-side.
|
||||
Dictionary definitions in the UI come from the free
|
||||
[dictionaryapi.dev](https://dictionaryapi.dev/); everything else is
|
||||
served locally.
|
||||
|
||||
@@ -27,6 +27,10 @@ async def lifespan(app: FastAPI):
|
||||
g2p_phones("warmup")
|
||||
except Exception:
|
||||
pass # model unavailable; spelling fallbacks still work
|
||||
try:
|
||||
get_wordnet()
|
||||
except Exception:
|
||||
pass
|
||||
get_slant_index()
|
||||
yield
|
||||
|
||||
@@ -675,6 +679,46 @@ def get_slant_index() -> dict[str, set[str]]:
|
||||
return _slant_index
|
||||
|
||||
|
||||
_wordnet = None
|
||||
|
||||
|
||||
def get_wordnet():
|
||||
"""WordNet via NLTK (already a g2p-en dependency), data on demand."""
|
||||
global _wordnet
|
||||
if _wordnet is None:
|
||||
from nltk.corpus import wordnet as wn
|
||||
try:
|
||||
wn.synsets("test")
|
||||
except LookupError:
|
||||
import nltk
|
||||
nltk.download("wordnet", quiet=True)
|
||||
nltk.download("omw-1.4", quiet=True)
|
||||
_wordnet = wn
|
||||
return _wordnet
|
||||
|
||||
|
||||
POS_NAMES = {"n": "noun", "v": "verb", "a": "adjective",
|
||||
"s": "adjective", "r": "adverb"}
|
||||
|
||||
|
||||
def synonyms_for(w: str, limit: int) -> list[dict]:
|
||||
"""Sense-grouped synonyms from WordNet, frequency-ranked."""
|
||||
wn = get_wordnet()
|
||||
seen: dict[str, str] = {}
|
||||
for ss in wn.synsets(w):
|
||||
lemmas = list(ss.lemmas())
|
||||
if ss.pos() in ("a", "s"): # adjectives: pull in the satellites
|
||||
for sim in ss.similar_tos():
|
||||
lemmas.extend(sim.lemmas())
|
||||
for lemma in lemmas:
|
||||
name = lemma.name().replace("_", " ").lower()
|
||||
if name != w and re.fullmatch(r"[a-z' -]+", name):
|
||||
seen.setdefault(name, POS_NAMES.get(ss.pos(), ss.pos()))
|
||||
ranked = sorted(seen.items(),
|
||||
key=lambda kv: (-zipf_frequency(kv[0], "en"), kv[0]))
|
||||
return [{"word": n, "pos": p} for n, p in ranked[:limit]]
|
||||
|
||||
|
||||
def _ranked(words, exclude: set[str], limit: int) -> list[dict]:
|
||||
scored = []
|
||||
for w in set(words):
|
||||
@@ -695,6 +739,9 @@ def _ranked(words, exclude: set[str], limit: int) -> list[dict]:
|
||||
@app.get("/api/lookup")
|
||||
def lookup(word: str, mode: str = "rhyme", limit: int = 60):
|
||||
w = word.strip().lower()
|
||||
if mode == "syn":
|
||||
words = synonyms_for(w, limit)
|
||||
return {"word": w, "mode": mode, "known": bool(words), "words": words}
|
||||
phones = phones_for(w)
|
||||
if not phones:
|
||||
return {"word": w, "mode": mode, "known": False, "words": []}
|
||||
|
||||
+37
-8
@@ -200,6 +200,7 @@
|
||||
text-transform: uppercase; letter-spacing: .06em; margin-right: 4px;
|
||||
}
|
||||
.btn.small { padding: 5px 10px; font-size: 12px; }
|
||||
.def-actions { display: flex; gap: 8px; }
|
||||
.chip {
|
||||
font-size: 13px; background: var(--panel-2); border: 1px solid var(--line);
|
||||
color: var(--ink); padding: 5px 10px; border-radius: 999px; cursor: pointer;
|
||||
@@ -277,7 +278,7 @@ Double-click any word to look it up on the right."></textarea>
|
||||
</div>
|
||||
<div id="defBox"></div>
|
||||
<div id="lookupResults">
|
||||
<p class="muted">Type a word and hit Go, or double-click a word in your draft. Click any result to see its definition.</p>
|
||||
<p class="muted">Type a word and hit Go for rhymes. Double-click a word in your draft for its definition. Click any result chip for its definition too.</p>
|
||||
</div>
|
||||
<div class="legend">
|
||||
<div class="res-label">Current stanza</div>
|
||||
@@ -544,7 +545,11 @@ editor.addEventListener('click', ()=>{ buildReadout(); buildLegend(); });
|
||||
editor.addEventListener('dblclick', ()=>{
|
||||
const sel = editor.value.slice(editor.selectionStart, editor.selectionEnd)
|
||||
.trim().replace(/[^A-Za-z']/g,'');
|
||||
if(sel){ document.getElementById('lookupInput').value = sel; doLookup(); }
|
||||
if(sel){
|
||||
document.getElementById('lookupInput').value = sel;
|
||||
tab('lookup');
|
||||
showDefinition(sel);
|
||||
}
|
||||
});
|
||||
|
||||
/* ---------- insert at cursor ---------- */
|
||||
@@ -582,13 +587,15 @@ async function doLookup(){
|
||||
defBox.innerHTML = '';
|
||||
resultsBox.innerHTML = '<p class="muted">Searching…</p>';
|
||||
try{
|
||||
const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`);
|
||||
const data = await r.json();
|
||||
if(mode === 'syn'){
|
||||
const r = await fetch(`https://api.datamuse.com/words?rel_syn=${encodeURIComponent(word)}&ml=${encodeURIComponent(word)}&max=60`);
|
||||
const data = await r.json();
|
||||
renderChips(`Synonyms / related for “${esc(word)}”`, data.map(d=>d.word));
|
||||
if(!data.known){
|
||||
resultsBox.innerHTML = `<p class="muted">No synonyms found for “${esc(word)}”.</p>`;
|
||||
return;
|
||||
}
|
||||
renderByPos(word, data.words);
|
||||
}else{
|
||||
const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`);
|
||||
const data = await r.json();
|
||||
if(!data.known){
|
||||
resultsBox.innerHTML = `<p class="muted">“${esc(word)}” isn't in the pronunciation dictionary.</p>`;
|
||||
return;
|
||||
@@ -619,9 +626,16 @@ function defCard(word, inner){
|
||||
<div class="defhead"><b>${esc(word)}</b><span class="phon" id="defPhon"></span>
|
||||
<span class="defx" title="Dismiss">×</span></div>
|
||||
${inner}
|
||||
<div class="def-actions"><button class="btn small" id="defInsert">Insert at cursor</button></div>
|
||||
<div class="def-actions">
|
||||
<button class="btn small" id="defInsert">Insert at cursor</button>
|
||||
<button class="btn small" id="defRhymes">Rhymes</button>
|
||||
</div>
|
||||
</div>`;
|
||||
defBox.querySelector('#defInsert').addEventListener('click', ()=> insertAtCursor(word));
|
||||
defBox.querySelector('#defRhymes').addEventListener('click', ()=>{
|
||||
document.getElementById('lookupInput').value = word;
|
||||
doLookup();
|
||||
});
|
||||
defBox.querySelector('.defx').addEventListener('click', ()=>{ defBox.innerHTML=''; });
|
||||
}
|
||||
async function showDefinition(word){
|
||||
@@ -647,6 +661,21 @@ function renderChips(label, words){
|
||||
resultsBox.innerHTML = `<div class="res-label">${label}</div>` + chipHtml(words.slice(0,50));
|
||||
wireChips();
|
||||
}
|
||||
function renderByPos(word, items){
|
||||
if(!items.length){ resultsBox.innerHTML = `<p class="muted">No synonyms for “${esc(word)}”.</p>`; return; }
|
||||
const byPos = {};
|
||||
items.forEach(d=>{ (byPos[d.pos || 'related'] ||= []).push(d.word); });
|
||||
let h = `<div class="res-label">Synonyms for “${esc(word)}”</div>`;
|
||||
['noun','verb','adjective','adverb','related'].forEach(pos=>{
|
||||
if(!byPos[pos]) return;
|
||||
h += `<div class="res-label">${pos}s</div>` + chipHtml(byPos[pos]);
|
||||
delete byPos[pos];
|
||||
});
|
||||
Object.keys(byPos).forEach(pos=>{ h += `<div class="res-label">${esc(pos)}</div>` + chipHtml(byPos[pos]); });
|
||||
resultsBox.innerHTML = h;
|
||||
wireChips();
|
||||
}
|
||||
|
||||
function renderBySyllable(word, items, label){
|
||||
if(!items.length){ resultsBox.innerHTML = `<p class="muted">No results for “${esc(word)}”.</p>`; return; }
|
||||
const bySyl = {};
|
||||
|
||||
@@ -297,3 +297,14 @@ def test_perfect_subgroup_fuses_with_slant_family():
|
||||
"Unplugged toaster")
|
||||
group_with(text, "soldier", "shoulder", "older", "colder", "holster",
|
||||
"coaster", "roaster", "poster", "toaster")
|
||||
|
||||
|
||||
def test_lookup_synonyms_wordnet():
|
||||
words = {w["word"] for w in lookup("happy", mode="syn")["words"]}
|
||||
assert "glad" in words
|
||||
big = {w["word"]: w["pos"] for w in lookup("big", mode="syn")["words"]}
|
||||
assert "large" in big and big["large"] == "adjective"
|
||||
|
||||
|
||||
def test_lookup_synonyms_unknown_word():
|
||||
assert lookup("xqzzqx", mode="syn")["known"] is False
|
||||
|
||||
Reference in New Issue
Block a user