Sidebar exploration tools: mosaics, phonetics, draft cross-reference

- Mosaics submode: two-word phrases that rhyme with a word perfectly,
  built by splitting its rime and searching both halves with reduced-
  vowel squeezing (placement -> place meant, creation -> way shun)
- Definition cards show phonetic anatomy (/phones/ · syllables ·
  stress dots · the rhyming tail) and what the word already rhymes
  with in your draft
- Secondary pronunciations only claim partners within 4 lines, so the
  verb pred-i-KATE can't hand predicate to a distant hook's "eight"

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 04:17:45 -04:00
parent 79dee968fa
commit 43bbd527a4
3 changed files with 175 additions and 1 deletions
+100
View File
@@ -579,6 +579,17 @@ def analyze(draft: Draft):
claimed: set[int] = set()
for key, toks in sorted(by_rime.items(), key=lambda kv: (-len(kv[1]), kv[0])):
toks = [t for t in toks if id(t) not in claimed]
# a SECONDARY pronunciation only reaches nearby partners (the
# verb pred-i-KATE shouldn't hand "predicate" to a hook's "eight"
# forty lines away when the noun rhymes with its neighbor)
kept = []
for t in toks:
if "rime" in t or len(rime_keys(t["word"])) == 1:
kept.append(t) # phrases and unambiguous words: anywhere
elif any(abs(o["line"] - t["line"]) <= 4
for o in toks if o is not t):
kept.append(t)
toks = kept
if len(toks) < 2:
continue
# distinctness by anchor word, so "fire burns" can't pose as a
@@ -1086,6 +1097,92 @@ def _ranked(words, exclude: set[str], limit: int) -> list[dict]:
return out
_phones_index: dict[str, list[str]] | None = None
_rime_index: dict[str, list[str]] | None = None
SQUEEZABLE = {"AH", "IH", "EH", "UH", "ER"} # vowels that reduce unstressed
def _squeeze(phones: str) -> str:
return " ".join("x" if p in SQUEEZABLE else p for p in phones.split())
def get_mosaic_indexes():
"""full-phones -> words (exact and vowel-squeezed) and rime -> words,
normalized, built once."""
global _phones_index, _rime_index
if _phones_index is None:
pronouncing.init_cmu()
pidx: dict[str, list[str]] = defaultdict(list)
ridx: dict[str, list[str]] = defaultdict(list)
for w, phones in pronouncing.pronunciations:
if not w.isalpha() or zipf_frequency(w, "en") < 3.0:
continue
ph = _norm_r(phones)
bare = DIGITS.sub("", ph)
pidx[bare].append(w)
squeezed = _squeeze(bare)
if squeezed != bare:
pidx[squeezed].append(w)
ridx[DIGITS.sub("", pronouncing.rhyming_part(ph))].append(w)
_phones_index, _rime_index = pidx, ridx
return _phones_index, _rime_index
def mosaics_for(w: str, limit: int) -> list[dict]:
"""Two-word phrases that rhyme with w perfectly: split its rime at
every consonant boundary and search both halves (placement ->
race + meant)."""
phones = phones_for(w)
if not phones:
return []
rime = DIGITS.sub("", pronouncing.rhyming_part(phones)).split()
pidx, ridx = get_mosaic_indexes()
pairs: dict[str, float] = {}
for i in range(1, len(rime)):
left, right = " ".join(rime[:i]), " ".join(rime[i:])
if not any(p in ARPA_VOWELS for p in rime[:i]):
continue
if not any(p in ARPA_VOWELS for p in rime[i:]):
continue
# exact tail match, plus squeezed: where the target carries a
# reducible vowel, the tail word's own vowel may squeeze to fit
# (placement -> place + meant, the EH collapsing to a schwa)
tails = {right: 2.0, _squeeze(right): 0.0}
for a in ridx.get(left, []):
if len(a) < 2:
continue
for tail_key, bonus in tails.items():
for b in pidx.get(tail_key, []):
if a == w or b == w:
continue
phrase = f"{a} {b}"
score = (zipf_frequency(a, "en")
+ zipf_frequency(b, "en") + bonus)
if score > pairs.get(phrase, 0):
pairs[phrase] = score
ranked = sorted(pairs.items(), key=lambda kv: (-kv[1], kv[0]))
return [{"word": p, "syl": None} for p, _ in ranked[:limit]]
@app.get("/api/word")
def word_info(word: str):
"""Phonetic anatomy of a word: phones, syllables, stress, rime."""
w = word.strip().lower()[:64]
phones = phones_for(w)
if not phones:
return {"word": w, "known": False}
pl = phones.split()
stress = "".join("1" if p[-1] in "12" else "0"
for p in pl if p[-1].isdigit())
rime = DIGITS.sub("", pronouncing.rhyming_part(phones))
return {"word": w, "known": True,
"phones": DIGITS.sub("", phones), "syl": len(stress),
"stress": stress, "rime": rime,
"zipf": round(zipf_frequency(w, "en"), 1)}
@app.get("/api/lookup")
def lookup(word: str, mode: str = "rhyme", limit: int = 60):
w = word.strip().lower()[:64]
@@ -1094,6 +1191,9 @@ def lookup(word: str, mode: str = "rhyme", limit: int = 60):
sections = synonyms_for(w, limit)
return {"word": w, "mode": mode, "known": bool(sections),
"sections": sections}
if mode == "mosaic":
words = mosaics_for(w, limit)
return {"word": w, "mode": mode, "known": bool(words), "words": words}
phones = phones_for(w)
if not phones:
return {"word": w, "mode": mode, "known": False, "words": []}
+48 -1
View File
@@ -209,6 +209,8 @@
}
.btn.small { padding: 5px 10px; font-size: 12px; }
.def-actions { display: flex; gap: 8px; }
.defphon { font-size: 12px; margin: 0 0 10px; line-height: 1.6; }
.defphon i { color: var(--accent-2); font-style: normal; }
.chip {
font-size: 13px; background: var(--panel-2); border: 1px solid var(--line);
color: var(--ink); padding: 5px 10px; border-radius: 999px; cursor: pointer;
@@ -296,6 +298,7 @@ Double-click any word to look it up on the right."></textarea>
<div class="seg" id="modeSeg">
<button class="active" data-mode="rhyme">Rhymes</button>
<button data-mode="syn">Synonyms</button>
<button data-mode="mosaic" title="Two-word phrases that rhyme with it">Mosaics</button>
</div>
<div id="defBox"></div>
<div id="lookupResults">
@@ -650,7 +653,13 @@ async function doLookup(){
try{
const r = await fetch(`/api/lookup?word=${encodeURIComponent(word)}&mode=${mode}`);
const data = await r.json();
if(mode === 'syn'){
if(mode === 'mosaic'){
if(!data.known){
resultsBox.innerHTML = `<p class="muted">No two-word mosaics for “${esc(word)}” — try a word with two or more syllables.</p>`;
return;
}
renderMosaics(word, data.words);
}else if(mode === 'syn'){
if(!data.known){
resultsBox.innerHTML = `<p class="muted">No synonyms found for “${esc(word)}”.</p>`;
return;
@@ -698,8 +707,37 @@ function defCard(word, inner){
defBox.querySelector('#defInsert').addEventListener('click', ()=> insertAtCursor(word));
defBox.querySelector('.defx').addEventListener('click', ()=>{ defBox.innerHTML=''; });
}
function draftMates(word){
if(!analysis) return [];
const lw = word.toLowerCase();
const byG = {};
analysis.tokens.forEach(t=>{
if(t.ph) return;
const w = analysis.lines[t.l].slice(t.s, t.e).toLowerCase();
(byG[t.g] ||= new Set()).add(w);
});
const mates = new Set();
Object.values(byG).forEach(set=>{
if(set.has(lw)) set.forEach(w=>{ if(w !== lw) mates.add(w); });
});
return [...mates].slice(0, 8);
}
async function showDefinition(word){
defCard(word, '<p class="muted">Looking it up…</p>');
// phonetic anatomy + draft cross-reference, fetched alongside
fetch(`/api/word?word=${encodeURIComponent(word)}`).then(r=>r.json()).then(info=>{
const box = defBox.querySelector('.defcard');
if(!box || !info.known) return;
const dots = [...info.stress].map(s=>s==='1' ? '●' : '○').join('');
const mates = draftMates(word);
const div = document.createElement('div');
div.className = 'muted defphon';
div.innerHTML = `/${esc(info.phones.toLowerCase())}/ · ${info.syl} syl ${dots}` +
` · rhymes on <i>${esc(info.rime.toLowerCase())}</i>` +
(mates.length ? `<br>in your draft: ${mates.map(esc).join(', ')}` : '');
box.insertBefore(div, box.querySelector('.def-actions'));
}).catch(()=>{});
try{
const r = await fetch(`https://api.dictionaryapi.dev/api/v2/entries/en/${encodeURIComponent(word)}`);
if(!r.ok) throw new Error('no entry');
@@ -721,6 +759,15 @@ function renderChips(label, words){
resultsBox.innerHTML = `<div class="res-label">${label}</div>` + chipHtml(words.slice(0,50));
wireChips();
}
function renderMosaics(word, items){
let h = `<div class="res-label">two-word rhymes for “${esc(word)}” — click to insert</div>`;
h += chipHtml(items.map(d=>d.word));
resultsBox.innerHTML = h;
resultsBox.querySelectorAll('.chip').forEach(c=>{
c.addEventListener('click', ()=> insertAtCursor(c.dataset.w));
});
}
function renderSections(word, sections){
if(!sections.length){ resultsBox.innerHTML = `<p class="muted">No synonyms for “${esc(word)}”.</p>`; return; }
let h = '';
+27
View File
@@ -505,3 +505,30 @@ def test_consonance_is_local():
filler = "la la la\n" * 7
text = "I gave a dumber speech\n" + filler + "I always keep my word"
assert "keep" not in highlighted(text)
def test_secondary_pronunciation_stays_local():
# predicate's verb reading (pred-i-KATE) must not hand it to the
# hook's "eight" far away when the noun rhymes with its neighbor
filler = "nothing on this line\n" * 8
text = ("Six-foot, seven-foot, eight-foot bunch\n" + filler +
"You niggas are gelatin, peanuts to an elephant\n"
"I got through that sentence like a subject and a predicate")
group_with(text, "gelatin", "elephant", "predicate")
# -------------------------------------------------------------- new tools
def test_mosaic_generator():
from app import mosaics_for
words = {m["word"] for m in mosaics_for("placement", 20)}
assert "place meant" in words
creation = {m["word"] for m in mosaics_for("creation", 20)}
assert "way shun" in creation
def test_word_info():
from app import word_info
info = word_info(word="tonight")
assert info["syl"] == 2 and info["stress"] == "01"
assert info["rime"] == "AY T"