Multis lookup and per-line syllable counts

- Multis: dictionary search by vowel skeleton — single words and
  two-word combos matching the target's run from its stressed anchor
  (charisma -> little/women/business; vacation plans -> information
  back). Perfect rhymes excluded; phrases supported as targets; new
  "multis" row in the rhymes section. Indexes warm at boot.
- Counts toggle: syllables per line in dim digits at the editor's
  right edge, wrap-safe and live; carried into the PNG export.
- Suffix fusion: multi families whose vowel keys are end-aligned
  containments merge (back pocket / rap profit / off it / still in
  office = one Em chain), with trailing schwas stripped before
  comparison; the longest end token now owns the scheme slot.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 13:23:29 -04:00
parent f59e059f0d
commit b204c8d9b9
3 changed files with 242 additions and 26 deletions
+173 -24
View File
@@ -32,6 +32,7 @@ async def lifespan(app: FastAPI):
except Exception:
pass
get_slant_index()
get_multi_indexes()
yield
@@ -901,25 +902,63 @@ def analyze(draft: Draft):
# (shoulder/older/colder) shouldn't split colors with the slant family
# it lives inside (soldier/holster/coaster). Perfect members keep the
# strong styling; the slant side keeps per-token slant marks.
by_family: dict[str, dict] = {}
fused: list[dict] = []
for g in raw_groups:
mk = founding_projections(g["key"]).get("multi")
tgt = by_family.get(mk) if mk else None
if tgt is None:
if mk:
by_family[mk] = g
fused.append(g)
mkeys = [founding_projections(g["key"]).get("multi") for g in raw_groups]
mparent = list(range(len(raw_groups)))
def mfind(i):
while mparent[i] != i:
mparent[i] = mparent[mparent[i]]
i = mparent[i]
return i
def _tail_of(short, long):
return len(short) <= len(long) and long[len(long) - len(short):] == short
def _sig(key):
vs = key[2:].split()
while vs and vs[-1] == "x":
vs.pop() # trailing schwas fall off the beat on both sides
return vs
for ai in range(len(raw_groups)):
if not mkeys[ai]:
continue
if g["slant"]:
for t in g["toks"]:
t["slant"] = True
elif tgt["slant"]:
for t in tgt["toks"]:
t["slant"] = True
tgt["slant"] = False
tgt["key"] = g["key"]
tgt["toks"].extend(g["toks"])
va = _sig(mkeys[ai])
if not va:
continue
for bi in range(ai + 1, len(raw_groups)):
if not mkeys[bi]:
continue
vb = _sig(mkeys[bi])
if not vb:
continue
# equal keys fuse; so do END-ALIGNED containments — a family
# rhyming on AA-x is the tail of one rhyming on AE-AA-x
# (back pocket / rap profit / office), the longer just
# carries lead syllables
if va == vb or _tail_of(va, vb) or _tail_of(vb, va):
mparent[mfind(ai)] = mfind(bi)
mclusters = defaultdict(list)
for gi in range(len(raw_groups)):
mclusters[mfind(gi)].append(gi)
fused: list[dict] = []
for members in mclusters.values():
if len(members) == 1:
fused.append(raw_groups[members[0]])
continue
hub = max(members, key=lambda gi: (not raw_groups[gi]["slant"],
len(raw_groups[gi]["toks"])))
core = raw_groups[hub]
for gi in members:
if gi == hub:
continue
g = raw_groups[gi]
if g["slant"]:
for t in g["toks"]:
t["slant"] = True
core["toks"].extend(g["toks"])
fused.append(core)
raw_groups = fused
# fuse single-vowel perfect families whose coda classes NEST — the
@@ -1029,12 +1068,17 @@ def analyze(draft: Draft):
t["gid"] = gid
groups_out.append({"id": gid, "color": gid % COLORS, "slant": g["slant"]})
# stanza rhyme schemes from line-ending groups
# (a grouped end word wins over a grouped end phrase)
end_gid: dict[int, int] = {}
# stanza rhyme schemes from line-ending groups: the token covering
# the most of the line's tail owns the slot — Em rhymes "-cock it",
# not "it"
end_best: dict[int, tuple[int, int]] = {}
for t in [*tokens, *phrases]:
if t["is_end"] and t["gid"] is not None:
end_gid.setdefault(t["line"], t["gid"])
span = t["end"] - t["start"]
cur = end_best.get(t["line"])
if cur is None or span > cur[0]:
end_best[t["line"]] = (span, t["gid"])
end_gid = {ln: gid for ln, (sp, gid) in end_best.items()}
last_tok = {}
for t in tokens:
if t["is_end"]:
@@ -1340,6 +1384,109 @@ def word_info(word: str):
"zipf": round(zipf_frequency(w, "en"), 1)}
_multi_left: dict[str, list[str]] | None = None
_multi_right: dict[str, list[str]] | None = None
def _squeeze_vs(vs: list[str]) -> str:
"""Vowel skeleton: first vowel exact, later reduced vowels merge to x
— the same equivalence the multi detection passes use."""
return " ".join([vs[0]] + ["x" if v in REDUCED else v for v in vs[1:]])
def get_multi_indexes():
"""tail-skeleton -> words (left halves) and full-skeleton -> words
(right halves / whole-word matches), built once."""
global _multi_left, _multi_right
if _multi_left is None:
pronouncing.init_cmu()
left: dict[str, list[str]] = defaultdict(list)
right: dict[str, list[str]] = defaultdict(list)
seen = set()
for w, phones in pronouncing.pronunciations:
if (w in seen or not w.isalpha() or len(w) < 3
or zipf_frequency(w, "en") < 3.0):
continue
seen.add(w)
ph = _norm_r(phones)
tail = _tail_vowels(ph)
if tail:
left[_squeeze_vs(tail)].append(w)
full = _all_vowels(ph)
stressed = any(p[-1] in "12" for p in ph.split())
if full and stressed:
right[_squeeze_vs(full)].append(w)
_multi_left, _multi_right = left, right
return _multi_left, _multi_right
def target_skeleton(w: str) -> list[str] | None:
"""Vowel run from the first primary stress — where a rap multi
anchors (e-LE-va-tor reads from its EH)."""
if " " in w:
parts = w.split()
pa = phones_for(parts[0])
rest = [phones_for(p) for p in parts[1:]]
if not pa or not all(rest):
return None
vs = _tail_vowels(pa)
for ph in rest:
vs += _all_vowels(ph)
return vs if len(vs) >= 2 else None
ph = phones_for(w)
if not ph:
return None
pl = ph.split()
vi = next((i for i, p in enumerate(pl) if p[-1] == "1"),
next((i for i, p in enumerate(pl) if p[-1].isdigit()), None))
if vi is None:
return None
vs = [DIGITS.sub("", p) for p in pl[vi:] if p[-1].isdigit()]
return vs if len(vs) >= 2 else None
def multis_for(w: str, exclude: set, limit: int = 14) -> list[str]:
"""Multisyllabic rhymes: single words and two-word combos whose
vowel skeleton matches the target's (elevator -> hella paper)."""
vs = target_skeleton(w)
if not vs:
return []
skel = _squeeze_vs(vs)
left, right = get_multi_indexes()
avoid = set(exclude) | set(w.split()) | {w}
scored: list[tuple[float, str]] = []
for cand in right.get(skel, []):
if cand not in avoid:
scored.append((zipf_frequency(cand, "en"), cand))
parts = skel.split()
for i in range(1, len(parts)):
lk, rk = " ".join(parts[:i]), " ".join(parts[i:])
if not any(v != "x" for v in parts[i:]):
continue # the right half must carry a full vowel
lefts = sorted((w2 for w2 in left.get(lk, [])
if w2 not in STOPWORDS and w2 not in avoid),
key=lambda w2: -zipf_frequency(w2, "en"))[:8]
rights = sorted((w2 for w2 in right.get(rk, [])
if w2 not in STOPWORDS and w2 not in avoid),
key=lambda w2: -zipf_frequency(w2, "en"))[:8]
for a in lefts:
za = min(zipf_frequency(a, "en"), 5.0)
for b in rights:
if b == a:
continue
scored.append((za + min(zipf_frequency(b, "en"), 5.0) - 4.0,
f"{a} {b}"))
scored.sort(key=lambda t: (-t[0], -len(t[1]), t[1]))
out, seen = [], set()
for _, c in scored:
if c not in seen:
seen.add(c)
out.append(c)
if len(out) >= limit:
break
return out
@app.get("/api/lookup")
def lookup(word: str, mode: str = "rhyme", limit: int = 60):
w = word.strip().lower()[:64]
@@ -1361,11 +1508,13 @@ def lookup(word: str, mode: str = "rhyme", limit: int = 60):
if mode == "near":
words = _ranked(near_cands, {w}, limit)
return {"word": w, "mode": mode, "known": True, "words": words}
# rhyme mode carries both: perfect in "words", slant in "near"
# rhyme mode carries it all: perfect, slant, and multis
words = _ranked(perfect, {w}, limit)
near = _ranked(near_cands, {w}, limit // 2)
target = word.strip().lower()[:64] if rhyme_on else w
multis = multis_for(target, perfect)
return {"word": w, "mode": mode, "known": True, "words": words,
"near": near, "rhyme_on": rhyme_on}
"near": near, "rhyme_on": rhyme_on, "multis": multis}
app.mount("/", StaticFiles(directory=Path(__file__).parent / "static",
+41 -2
View File
@@ -114,6 +114,11 @@
z-index: 1;
}
#highlight .anno { color: #6a5f52; }
.sylcount {
position: absolute; right: 9px;
font-size: 10px; line-height: 1.6; color: #5d5347;
pointer-events: none;
}
#editor::selection { background: rgba(232,129,74,0.22); color: transparent; }
#stresslayer {
pointer-events: none;
@@ -289,6 +294,7 @@ Double-click any word to look it up on the right."></textarea>
<label class="mtoggle" title="Color-code rhyme families"><input type="checkbox" id="rhymeToggle" checked> rhyme</label>
<label class="mtoggle" title="Underline words that share an initial sound"><input type="checkbox" id="allitToggle"> alliteration</label>
<label class="mtoggle" title="Sheet music for your flow — syllable emphasis dots under each word"><input type="checkbox" id="stressToggle"> rhythm</label>
<label class="mtoggle" title="Syllables per line, at the right edge"><input type="checkbox" id="countsToggle"> counts</label>
<div class="scheme-readout" id="schemeReadout"></div>
</div>
</div>
@@ -361,6 +367,9 @@ allitToggle.addEventListener('change', render);
const rhymeToggle = document.getElementById('rhymeToggle');
rhymeToggle.checked = true;
rhymeToggle.addEventListener('change', render);
const countsToggle = document.getElementById('countsToggle');
countsToggle.checked = false;
countsToggle.addEventListener('change', render);
const schemeReadout = document.getElementById('schemeReadout');
const COLORS = 12;
@@ -579,9 +588,11 @@ function render(){
h += `<span class="hseg" style="${style}">${text}</span>`;
}
}
html += (/^\s*[#([]/.test(line) ? `<span class="anno">${h}</span>` : h) + '\n';
const lcls = 'lmark' + (/^\s*[#([]/.test(line) ? ' anno' : '');
html += (line ? `<span class="${lcls}" data-l="${i}">${h}</span>` : '') + '\n';
});
highlight.innerHTML = html;
renderCounts(lines);
renderStress(lines);
highlight.scrollTop = editor.scrollTop;
highlight.scrollLeft = editor.scrollLeft;
@@ -608,6 +619,21 @@ function cadenceColors(){
return map;
}
function renderCounts(lines){
highlight.querySelectorAll('.sylcount').forEach(el=>el.remove());
if(!countsToggle.checked || !analysis || !analysis.meter) return;
analysis.meter.forEach(m=>{
if(analysis.lines[m.l] !== lines[m.l]) return; // line being edited
const mark = highlight.querySelector(`.lmark[data-l="${m.l}"]`);
if(!mark) return;
const d = document.createElement('div');
d.className = 'sylcount';
d.textContent = m.syl;
d.style.top = (mark.offsetTop + 5) + 'px';
highlight.appendChild(d);
});
}
function renderStress(lines){
if(!stressToggle.checked){ stresslayer.innerHTML = ''; return; }
const byLine = {};
@@ -826,6 +852,8 @@ function paintSections(){
});
const near = e.rhyme.near || [];
if(near.length) h += `<div class="res-label sub">near</div>` + chipHtml(near, 'near');
const multis = e.rhyme.multis || [];
if(multis.length) h += `<div class="res-label sub">multis</div>` + chipHtml(multis);
}
if(e.syn && e.syn.known && e.syn.sections.length){
h += `<div class="res-label">synonyms</div>`;
@@ -857,7 +885,7 @@ document.getElementById('exportBtn').addEventListener('click', async ()=>{
const probe = document.createElement('canvas').getContext('2d');
probe.font = font;
const w = Math.ceil(Math.max(220, ...lines.map(l=>probe.measureText(l).width)) + PAD * 2);
const w = Math.ceil(Math.max(220, ...lines.map(l=>probe.measureText(l).width)) + PAD * 2 + (countsToggle.checked ? 28 : 0));
const h = Math.ceil(lines.length * LH + PAD * 2 + 18);
const canvas = document.createElement('canvas');
canvas.width = w * S; canvas.height = h * S;
@@ -906,6 +934,17 @@ document.getElementById('exportBtn').addEventListener('click', async ()=>{
}
x.fillStyle = /^\s*[#([]/.test(line) ? '#6a5f52' : ink;
x.fillText(line, PAD, y);
if(countsToggle.checked && fresh && analysis.meter){
const cm = analysis.meter.find(mm=>mm.l===i);
if(cm){
x.font = "10px 'Spline Sans Mono', monospace";
x.textAlign = 'right';
x.fillStyle = '#5d5347';
x.fillText(String(cm.syl), w - 10, y);
x.textAlign = 'left';
x.font = font;
}
}
if(rhythm && fresh){
const spans = (analysis.stress.filter(s=>s.l===i)).sort((a,b)=>a.s-b.s);
x.font = "8px 'Spline Sans Mono', monospace";
+28
View File
@@ -642,3 +642,31 @@ def test_end_dominated_vowel_families_fuse():
"The top is not enough\n"
"No choice, sellin' drugs")
group_with(text, "love", "blood", "thugs", "mud", "enough", "drugs")
def test_em_back_pocket_quintet_is_one_family():
# AE-AA mosaics, plain AA endings, and IH-x-AA triples all share the
# vowel tail — suffix fusion reads them as Em wrote them: one chain
text = ("got a laptop in my back pocket\n"
"My pen'll go off when I half-cock it\n"
"Got a fat knot from that rap profit\n"
"Made a livin' and a killin' off it\n"
"Ever since Bill Clinton was still in office")
group_with(text, "back pocket", "rap profit", "off it",
"still in office", "laptop", "pocket", "profit", "office")
assert scheme(text) == "aaaaa"
# ------------------------------------------------------------------ multis
def test_multis_generator():
from app import multis_for
ch = multis_for("charisma", set())
assert "little" in ch # IH-x skeleton, not a perfect rhyme
el = multis_for("elevator", set())
assert any(" " in m for m in el) # two-word combos exist
def test_multis_in_lookup_response():
data = lookup("placement", mode="rhyme")
assert data["multis"] and "basement" not in data["multis"] # perfects excluded