diff --git a/app.py b/app.py index 274fdb8..410aff4 100644 --- a/app.py +++ b/app.py @@ -579,6 +579,17 @@ def analyze(draft: Draft): claimed: set[int] = set() for key, toks in sorted(by_rime.items(), key=lambda kv: (-len(kv[1]), kv[0])): toks = [t for t in toks if id(t) not in claimed] + # a SECONDARY pronunciation only reaches nearby partners (the + # verb pred-i-KATE shouldn't hand "predicate" to a hook's "eight" + # forty lines away when the noun rhymes with its neighbor) + kept = [] + for t in toks: + if "rime" in t or len(rime_keys(t["word"])) == 1: + kept.append(t) # phrases and unambiguous words: anywhere + elif any(abs(o["line"] - t["line"]) <= 4 + for o in toks if o is not t): + kept.append(t) + toks = kept if len(toks) < 2: continue # distinctness by anchor word, so "fire burns" can't pose as a @@ -1086,6 +1097,92 @@ def _ranked(words, exclude: set[str], limit: int) -> list[dict]: return out +_phones_index: dict[str, list[str]] | None = None +_rime_index: dict[str, list[str]] | None = None + + +SQUEEZABLE = {"AH", "IH", "EH", "UH", "ER"} # vowels that reduce unstressed + + +def _squeeze(phones: str) -> str: + return " ".join("x" if p in SQUEEZABLE else p for p in phones.split()) + + +def get_mosaic_indexes(): + """full-phones -> words (exact and vowel-squeezed) and rime -> words, + normalized, built once.""" + global _phones_index, _rime_index + if _phones_index is None: + pronouncing.init_cmu() + pidx: dict[str, list[str]] = defaultdict(list) + ridx: dict[str, list[str]] = defaultdict(list) + for w, phones in pronouncing.pronunciations: + if not w.isalpha() or zipf_frequency(w, "en") < 3.0: + continue + ph = _norm_r(phones) + bare = DIGITS.sub("", ph) + pidx[bare].append(w) + squeezed = _squeeze(bare) + if squeezed != bare: + pidx[squeezed].append(w) + ridx[DIGITS.sub("", pronouncing.rhyming_part(ph))].append(w) + _phones_index, _rime_index = pidx, ridx + return _phones_index, _rime_index + + +def mosaics_for(w: str, limit: int) -> list[dict]: + """Two-word phrases that rhyme with w perfectly: split its rime at + every consonant boundary and search both halves (placement -> + race + meant).""" + phones = phones_for(w) + if not phones: + return [] + rime = DIGITS.sub("", pronouncing.rhyming_part(phones)).split() + pidx, ridx = get_mosaic_indexes() + pairs: dict[str, float] = {} + for i in range(1, len(rime)): + left, right = " ".join(rime[:i]), " ".join(rime[i:]) + if not any(p in ARPA_VOWELS for p in rime[:i]): + continue + if not any(p in ARPA_VOWELS for p in rime[i:]): + continue + # exact tail match, plus squeezed: where the target carries a + # reducible vowel, the tail word's own vowel may squeeze to fit + # (placement -> place + meant, the EH collapsing to a schwa) + tails = {right: 2.0, _squeeze(right): 0.0} + for a in ridx.get(left, []): + if len(a) < 2: + continue + for tail_key, bonus in tails.items(): + for b in pidx.get(tail_key, []): + if a == w or b == w: + continue + phrase = f"{a} {b}" + score = (zipf_frequency(a, "en") + + zipf_frequency(b, "en") + bonus) + if score > pairs.get(phrase, 0): + pairs[phrase] = score + ranked = sorted(pairs.items(), key=lambda kv: (-kv[1], kv[0])) + return [{"word": p, "syl": None} for p, _ in ranked[:limit]] + + +@app.get("/api/word") +def word_info(word: str): + """Phonetic anatomy of a word: phones, syllables, stress, rime.""" + w = word.strip().lower()[:64] + phones = phones_for(w) + if not phones: + return {"word": w, "known": False} + pl = phones.split() + stress = "".join("1" if p[-1] in "12" else "0" + for p in pl if p[-1].isdigit()) + rime = DIGITS.sub("", pronouncing.rhyming_part(phones)) + return {"word": w, "known": True, + "phones": DIGITS.sub("", phones), "syl": len(stress), + "stress": stress, "rime": rime, + "zipf": round(zipf_frequency(w, "en"), 1)} + + @app.get("/api/lookup") def lookup(word: str, mode: str = "rhyme", limit: int = 60): w = word.strip().lower()[:64] @@ -1094,6 +1191,9 @@ def lookup(word: str, mode: str = "rhyme", limit: int = 60): sections = synonyms_for(w, limit) return {"word": w, "mode": mode, "known": bool(sections), "sections": sections} + if mode == "mosaic": + words = mosaics_for(w, limit) + return {"word": w, "mode": mode, "known": bool(words), "words": words} phones = phones_for(w) if not phones: return {"word": w, "mode": mode, "known": False, "words": []} diff --git a/static/index.html b/static/index.html index 19996e0..6ef2155 100644 --- a/static/index.html +++ b/static/index.html @@ -209,6 +209,8 @@ } .btn.small { padding: 5px 10px; font-size: 12px; } .def-actions { display: flex; gap: 8px; } + .defphon { font-size: 12px; margin: 0 0 10px; line-height: 1.6; } + .defphon i { color: var(--accent-2); font-style: normal; } .chip { font-size: 13px; background: var(--panel-2); border: 1px solid var(--line); color: var(--ink); padding: 5px 10px; border-radius: 999px; cursor: pointer; @@ -296,6 +298,7 @@ Double-click any word to look it up on the right.">
+
@@ -650,7 +653,13 @@ async function doLookup(){ try{ const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`); const data = await r.json(); - if(mode === 'syn'){ + if(mode === 'mosaic'){ + if(!data.known){ + resultsBox.innerHTML = `

No two-word mosaics for “${esc(word)}” — try a word with two or more syllables.

`; + return; + } + renderMosaics(word, data.words); + }else if(mode === 'syn'){ if(!data.known){ resultsBox.innerHTML = `

No synonyms found for “${esc(word)}”.

`; return; @@ -698,8 +707,37 @@ function defCard(word, inner){ defBox.querySelector('#defInsert').addEventListener('click', ()=> insertAtCursor(word)); defBox.querySelector('.defx').addEventListener('click', ()=>{ defBox.innerHTML=''; }); } +function draftMates(word){ + if(!analysis) return []; + const lw = word.toLowerCase(); + const byG = {}; + analysis.tokens.forEach(t=>{ + if(t.ph) return; + const w = analysis.lines[t.l].slice(t.s, t.e).toLowerCase(); + (byG[t.g] ||= new Set()).add(w); + }); + const mates = new Set(); + Object.values(byG).forEach(set=>{ + if(set.has(lw)) set.forEach(w=>{ if(w !== lw) mates.add(w); }); + }); + return [...mates].slice(0, 8); +} + async function showDefinition(word){ defCard(word, '

Looking it up…

'); + // phonetic anatomy + draft cross-reference, fetched alongside + fetch(`/api/word?word=${encodeURIComponent(word)}`).then(r=>r.json()).then(info=>{ + const box = defBox.querySelector('.defcard'); + if(!box || !info.known) return; + const dots = [...info.stress].map(s=>s==='1' ? '●' : '○').join(''); + const mates = draftMates(word); + const div = document.createElement('div'); + div.className = 'muted defphon'; + div.innerHTML = `/${esc(info.phones.toLowerCase())}/ · ${info.syl} syl ${dots}` + + ` · rhymes on ${esc(info.rime.toLowerCase())}` + + (mates.length ? `
in your draft: ${mates.map(esc).join(', ')}` : ''); + box.insertBefore(div, box.querySelector('.def-actions')); + }).catch(()=>{}); try{ const r = await fetch(`https://api.dictionaryapi.dev/api/v2/entries/en/${encodeURIComponent(word)}`); if(!r.ok) throw new Error('no entry'); @@ -721,6 +759,15 @@ function renderChips(label, words){ resultsBox.innerHTML = `
${label}
` + chipHtml(words.slice(0,50)); wireChips(); } +function renderMosaics(word, items){ + let h = `
two-word rhymes for “${esc(word)}” — click to insert
`; + h += chipHtml(items.map(d=>d.word)); + resultsBox.innerHTML = h; + resultsBox.querySelectorAll('.chip').forEach(c=>{ + c.addEventListener('click', ()=> insertAtCursor(c.dataset.w)); + }); +} + function renderSections(word, sections){ if(!sections.length){ resultsBox.innerHTML = `

No synonyms for “${esc(word)}”.

`; return; } let h = ''; diff --git a/tests/test_rhymes.py b/tests/test_rhymes.py index a7dff9a..6e96282 100644 --- a/tests/test_rhymes.py +++ b/tests/test_rhymes.py @@ -505,3 +505,30 @@ def test_consonance_is_local(): filler = "la la la\n" * 7 text = "I gave a dumber speech\n" + filler + "I always keep my word" assert "keep" not in highlighted(text) + + +def test_secondary_pronunciation_stays_local(): + # predicate's verb reading (pred-i-KATE) must not hand it to the + # hook's "eight" far away when the noun rhymes with its neighbor + filler = "nothing on this line\n" * 8 + text = ("Six-foot, seven-foot, eight-foot bunch\n" + filler + + "You niggas are gelatin, peanuts to an elephant\n" + "I got through that sentence like a subject and a predicate") + group_with(text, "gelatin", "elephant", "predicate") + + +# -------------------------------------------------------------- new tools + +def test_mosaic_generator(): + from app import mosaics_for + words = {m["word"] for m in mosaics_for("placement", 20)} + assert "place meant" in words + creation = {m["word"] for m in mosaics_for("creation", 20)} + assert "way shun" in creation + + +def test_word_info(): + from app import word_info + info = word_info(word="tonight") + assert info["syl"] == 2 and info["stress"] == "01" + assert info["rime"] == "AY T"