mirror of
https://github.com/kennethreitz/rhymepad.org.git
synced 2026-06-11 17:08:33 +00:00
Multis lookup and per-line syllable counts
- Multis: dictionary search by vowel skeleton — single words and two-word combos matching the target's run from its stressed anchor (charisma -> little/women/business; vacation plans -> information back). Perfect rhymes excluded; phrases supported as targets; new "multis" row in the rhymes section. Indexes warm at boot. - Counts toggle: syllables per line in dim digits at the editor's right edge, wrap-safe and live; carried into the PNG export. - Suffix fusion: multi families whose vowel keys are end-aligned containments merge (back pocket / rap profit / off it / still in office = one Em chain), with trailing schwas stripped before comparison; the longest end token now owns the scheme slot. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -32,6 +32,7 @@ async def lifespan(app: FastAPI):
|
||||
except Exception:
|
||||
pass
|
||||
get_slant_index()
|
||||
get_multi_indexes()
|
||||
yield
|
||||
|
||||
|
||||
@@ -901,25 +902,63 @@ def analyze(draft: Draft):
|
||||
# (shoulder/older/colder) shouldn't split colors with the slant family
|
||||
# it lives inside (soldier/holster/coaster). Perfect members keep the
|
||||
# strong styling; the slant side keeps per-token slant marks.
|
||||
by_family: dict[str, dict] = {}
|
||||
fused: list[dict] = []
|
||||
for g in raw_groups:
|
||||
mk = founding_projections(g["key"]).get("multi")
|
||||
tgt = by_family.get(mk) if mk else None
|
||||
if tgt is None:
|
||||
if mk:
|
||||
by_family[mk] = g
|
||||
fused.append(g)
|
||||
mkeys = [founding_projections(g["key"]).get("multi") for g in raw_groups]
|
||||
mparent = list(range(len(raw_groups)))
|
||||
|
||||
def mfind(i):
|
||||
while mparent[i] != i:
|
||||
mparent[i] = mparent[mparent[i]]
|
||||
i = mparent[i]
|
||||
return i
|
||||
|
||||
def _tail_of(short, long):
|
||||
return len(short) <= len(long) and long[len(long) - len(short):] == short
|
||||
|
||||
def _sig(key):
|
||||
vs = key[2:].split()
|
||||
while vs and vs[-1] == "x":
|
||||
vs.pop() # trailing schwas fall off the beat on both sides
|
||||
return vs
|
||||
|
||||
for ai in range(len(raw_groups)):
|
||||
if not mkeys[ai]:
|
||||
continue
|
||||
if g["slant"]:
|
||||
for t in g["toks"]:
|
||||
t["slant"] = True
|
||||
elif tgt["slant"]:
|
||||
for t in tgt["toks"]:
|
||||
t["slant"] = True
|
||||
tgt["slant"] = False
|
||||
tgt["key"] = g["key"]
|
||||
tgt["toks"].extend(g["toks"])
|
||||
va = _sig(mkeys[ai])
|
||||
if not va:
|
||||
continue
|
||||
for bi in range(ai + 1, len(raw_groups)):
|
||||
if not mkeys[bi]:
|
||||
continue
|
||||
vb = _sig(mkeys[bi])
|
||||
if not vb:
|
||||
continue
|
||||
# equal keys fuse; so do END-ALIGNED containments — a family
|
||||
# rhyming on AA-x is the tail of one rhyming on AE-AA-x
|
||||
# (back pocket / rap profit / office), the longer just
|
||||
# carries lead syllables
|
||||
if va == vb or _tail_of(va, vb) or _tail_of(vb, va):
|
||||
mparent[mfind(ai)] = mfind(bi)
|
||||
|
||||
mclusters = defaultdict(list)
|
||||
for gi in range(len(raw_groups)):
|
||||
mclusters[mfind(gi)].append(gi)
|
||||
fused: list[dict] = []
|
||||
for members in mclusters.values():
|
||||
if len(members) == 1:
|
||||
fused.append(raw_groups[members[0]])
|
||||
continue
|
||||
hub = max(members, key=lambda gi: (not raw_groups[gi]["slant"],
|
||||
len(raw_groups[gi]["toks"])))
|
||||
core = raw_groups[hub]
|
||||
for gi in members:
|
||||
if gi == hub:
|
||||
continue
|
||||
g = raw_groups[gi]
|
||||
if g["slant"]:
|
||||
for t in g["toks"]:
|
||||
t["slant"] = True
|
||||
core["toks"].extend(g["toks"])
|
||||
fused.append(core)
|
||||
raw_groups = fused
|
||||
|
||||
# fuse single-vowel perfect families whose coda classes NEST — the
|
||||
@@ -1029,12 +1068,17 @@ def analyze(draft: Draft):
|
||||
t["gid"] = gid
|
||||
groups_out.append({"id": gid, "color": gid % COLORS, "slant": g["slant"]})
|
||||
|
||||
# stanza rhyme schemes from line-ending groups
|
||||
# (a grouped end word wins over a grouped end phrase)
|
||||
end_gid: dict[int, int] = {}
|
||||
# stanza rhyme schemes from line-ending groups: the token covering
|
||||
# the most of the line's tail owns the slot — Em rhymes "-cock it",
|
||||
# not "it"
|
||||
end_best: dict[int, tuple[int, int]] = {}
|
||||
for t in [*tokens, *phrases]:
|
||||
if t["is_end"] and t["gid"] is not None:
|
||||
end_gid.setdefault(t["line"], t["gid"])
|
||||
span = t["end"] - t["start"]
|
||||
cur = end_best.get(t["line"])
|
||||
if cur is None or span > cur[0]:
|
||||
end_best[t["line"]] = (span, t["gid"])
|
||||
end_gid = {ln: gid for ln, (sp, gid) in end_best.items()}
|
||||
last_tok = {}
|
||||
for t in tokens:
|
||||
if t["is_end"]:
|
||||
@@ -1340,6 +1384,109 @@ def word_info(word: str):
|
||||
"zipf": round(zipf_frequency(w, "en"), 1)}
|
||||
|
||||
|
||||
_multi_left: dict[str, list[str]] | None = None
|
||||
_multi_right: dict[str, list[str]] | None = None
|
||||
|
||||
|
||||
def _squeeze_vs(vs: list[str]) -> str:
|
||||
"""Vowel skeleton: first vowel exact, later reduced vowels merge to x
|
||||
— the same equivalence the multi detection passes use."""
|
||||
return " ".join([vs[0]] + ["x" if v in REDUCED else v for v in vs[1:]])
|
||||
|
||||
|
||||
def get_multi_indexes():
|
||||
"""tail-skeleton -> words (left halves) and full-skeleton -> words
|
||||
(right halves / whole-word matches), built once."""
|
||||
global _multi_left, _multi_right
|
||||
if _multi_left is None:
|
||||
pronouncing.init_cmu()
|
||||
left: dict[str, list[str]] = defaultdict(list)
|
||||
right: dict[str, list[str]] = defaultdict(list)
|
||||
seen = set()
|
||||
for w, phones in pronouncing.pronunciations:
|
||||
if (w in seen or not w.isalpha() or len(w) < 3
|
||||
or zipf_frequency(w, "en") < 3.0):
|
||||
continue
|
||||
seen.add(w)
|
||||
ph = _norm_r(phones)
|
||||
tail = _tail_vowels(ph)
|
||||
if tail:
|
||||
left[_squeeze_vs(tail)].append(w)
|
||||
full = _all_vowels(ph)
|
||||
stressed = any(p[-1] in "12" for p in ph.split())
|
||||
if full and stressed:
|
||||
right[_squeeze_vs(full)].append(w)
|
||||
_multi_left, _multi_right = left, right
|
||||
return _multi_left, _multi_right
|
||||
|
||||
|
||||
def target_skeleton(w: str) -> list[str] | None:
|
||||
"""Vowel run from the first primary stress — where a rap multi
|
||||
anchors (e-LE-va-tor reads from its EH)."""
|
||||
if " " in w:
|
||||
parts = w.split()
|
||||
pa = phones_for(parts[0])
|
||||
rest = [phones_for(p) for p in parts[1:]]
|
||||
if not pa or not all(rest):
|
||||
return None
|
||||
vs = _tail_vowels(pa)
|
||||
for ph in rest:
|
||||
vs += _all_vowels(ph)
|
||||
return vs if len(vs) >= 2 else None
|
||||
ph = phones_for(w)
|
||||
if not ph:
|
||||
return None
|
||||
pl = ph.split()
|
||||
vi = next((i for i, p in enumerate(pl) if p[-1] == "1"),
|
||||
next((i for i, p in enumerate(pl) if p[-1].isdigit()), None))
|
||||
if vi is None:
|
||||
return None
|
||||
vs = [DIGITS.sub("", p) for p in pl[vi:] if p[-1].isdigit()]
|
||||
return vs if len(vs) >= 2 else None
|
||||
|
||||
|
||||
def multis_for(w: str, exclude: set, limit: int = 14) -> list[str]:
|
||||
"""Multisyllabic rhymes: single words and two-word combos whose
|
||||
vowel skeleton matches the target's (elevator -> hella paper)."""
|
||||
vs = target_skeleton(w)
|
||||
if not vs:
|
||||
return []
|
||||
skel = _squeeze_vs(vs)
|
||||
left, right = get_multi_indexes()
|
||||
avoid = set(exclude) | set(w.split()) | {w}
|
||||
scored: list[tuple[float, str]] = []
|
||||
for cand in right.get(skel, []):
|
||||
if cand not in avoid:
|
||||
scored.append((zipf_frequency(cand, "en"), cand))
|
||||
parts = skel.split()
|
||||
for i in range(1, len(parts)):
|
||||
lk, rk = " ".join(parts[:i]), " ".join(parts[i:])
|
||||
if not any(v != "x" for v in parts[i:]):
|
||||
continue # the right half must carry a full vowel
|
||||
lefts = sorted((w2 for w2 in left.get(lk, [])
|
||||
if w2 not in STOPWORDS and w2 not in avoid),
|
||||
key=lambda w2: -zipf_frequency(w2, "en"))[:8]
|
||||
rights = sorted((w2 for w2 in right.get(rk, [])
|
||||
if w2 not in STOPWORDS and w2 not in avoid),
|
||||
key=lambda w2: -zipf_frequency(w2, "en"))[:8]
|
||||
for a in lefts:
|
||||
za = min(zipf_frequency(a, "en"), 5.0)
|
||||
for b in rights:
|
||||
if b == a:
|
||||
continue
|
||||
scored.append((za + min(zipf_frequency(b, "en"), 5.0) - 4.0,
|
||||
f"{a} {b}"))
|
||||
scored.sort(key=lambda t: (-t[0], -len(t[1]), t[1]))
|
||||
out, seen = [], set()
|
||||
for _, c in scored:
|
||||
if c not in seen:
|
||||
seen.add(c)
|
||||
out.append(c)
|
||||
if len(out) >= limit:
|
||||
break
|
||||
return out
|
||||
|
||||
|
||||
@app.get("/api/lookup")
|
||||
def lookup(word: str, mode: str = "rhyme", limit: int = 60):
|
||||
w = word.strip().lower()[:64]
|
||||
@@ -1361,11 +1508,13 @@ def lookup(word: str, mode: str = "rhyme", limit: int = 60):
|
||||
if mode == "near":
|
||||
words = _ranked(near_cands, {w}, limit)
|
||||
return {"word": w, "mode": mode, "known": True, "words": words}
|
||||
# rhyme mode carries both: perfect in "words", slant in "near"
|
||||
# rhyme mode carries it all: perfect, slant, and multis
|
||||
words = _ranked(perfect, {w}, limit)
|
||||
near = _ranked(near_cands, {w}, limit // 2)
|
||||
target = word.strip().lower()[:64] if rhyme_on else w
|
||||
multis = multis_for(target, perfect)
|
||||
return {"word": w, "mode": mode, "known": True, "words": words,
|
||||
"near": near, "rhyme_on": rhyme_on}
|
||||
"near": near, "rhyme_on": rhyme_on, "multis": multis}
|
||||
|
||||
|
||||
app.mount("/", StaticFiles(directory=Path(__file__).parent / "static",
|
||||
|
||||
+41
-2
@@ -114,6 +114,11 @@
|
||||
z-index: 1;
|
||||
}
|
||||
#highlight .anno { color: #6a5f52; }
|
||||
.sylcount {
|
||||
position: absolute; right: 9px;
|
||||
font-size: 10px; line-height: 1.6; color: #5d5347;
|
||||
pointer-events: none;
|
||||
}
|
||||
#editor::selection { background: rgba(232,129,74,0.22); color: transparent; }
|
||||
#stresslayer {
|
||||
pointer-events: none;
|
||||
@@ -289,6 +294,7 @@ Double-click any word to look it up on the right."></textarea>
|
||||
<label class="mtoggle" title="Color-code rhyme families"><input type="checkbox" id="rhymeToggle" checked> rhyme</label>
|
||||
<label class="mtoggle" title="Underline words that share an initial sound"><input type="checkbox" id="allitToggle"> alliteration</label>
|
||||
<label class="mtoggle" title="Sheet music for your flow — syllable emphasis dots under each word"><input type="checkbox" id="stressToggle"> rhythm</label>
|
||||
<label class="mtoggle" title="Syllables per line, at the right edge"><input type="checkbox" id="countsToggle"> counts</label>
|
||||
<div class="scheme-readout" id="schemeReadout"></div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -361,6 +367,9 @@ allitToggle.addEventListener('change', render);
|
||||
const rhymeToggle = document.getElementById('rhymeToggle');
|
||||
rhymeToggle.checked = true;
|
||||
rhymeToggle.addEventListener('change', render);
|
||||
const countsToggle = document.getElementById('countsToggle');
|
||||
countsToggle.checked = false;
|
||||
countsToggle.addEventListener('change', render);
|
||||
const schemeReadout = document.getElementById('schemeReadout');
|
||||
const COLORS = 12;
|
||||
|
||||
@@ -579,9 +588,11 @@ function render(){
|
||||
h += `<span class="hseg" style="${style}">${text}</span>`;
|
||||
}
|
||||
}
|
||||
html += (/^\s*[#([]/.test(line) ? `<span class="anno">${h}</span>` : h) + '\n';
|
||||
const lcls = 'lmark' + (/^\s*[#([]/.test(line) ? ' anno' : '');
|
||||
html += (line ? `<span class="${lcls}" data-l="${i}">${h}</span>` : '') + '\n';
|
||||
});
|
||||
highlight.innerHTML = html;
|
||||
renderCounts(lines);
|
||||
renderStress(lines);
|
||||
highlight.scrollTop = editor.scrollTop;
|
||||
highlight.scrollLeft = editor.scrollLeft;
|
||||
@@ -608,6 +619,21 @@ function cadenceColors(){
|
||||
return map;
|
||||
}
|
||||
|
||||
function renderCounts(lines){
|
||||
highlight.querySelectorAll('.sylcount').forEach(el=>el.remove());
|
||||
if(!countsToggle.checked || !analysis || !analysis.meter) return;
|
||||
analysis.meter.forEach(m=>{
|
||||
if(analysis.lines[m.l] !== lines[m.l]) return; // line being edited
|
||||
const mark = highlight.querySelector(`.lmark[data-l="${m.l}"]`);
|
||||
if(!mark) return;
|
||||
const d = document.createElement('div');
|
||||
d.className = 'sylcount';
|
||||
d.textContent = m.syl;
|
||||
d.style.top = (mark.offsetTop + 5) + 'px';
|
||||
highlight.appendChild(d);
|
||||
});
|
||||
}
|
||||
|
||||
function renderStress(lines){
|
||||
if(!stressToggle.checked){ stresslayer.innerHTML = ''; return; }
|
||||
const byLine = {};
|
||||
@@ -826,6 +852,8 @@ function paintSections(){
|
||||
});
|
||||
const near = e.rhyme.near || [];
|
||||
if(near.length) h += `<div class="res-label sub">near</div>` + chipHtml(near, 'near');
|
||||
const multis = e.rhyme.multis || [];
|
||||
if(multis.length) h += `<div class="res-label sub">multis</div>` + chipHtml(multis);
|
||||
}
|
||||
if(e.syn && e.syn.known && e.syn.sections.length){
|
||||
h += `<div class="res-label">synonyms</div>`;
|
||||
@@ -857,7 +885,7 @@ document.getElementById('exportBtn').addEventListener('click', async ()=>{
|
||||
|
||||
const probe = document.createElement('canvas').getContext('2d');
|
||||
probe.font = font;
|
||||
const w = Math.ceil(Math.max(220, ...lines.map(l=>probe.measureText(l).width)) + PAD * 2);
|
||||
const w = Math.ceil(Math.max(220, ...lines.map(l=>probe.measureText(l).width)) + PAD * 2 + (countsToggle.checked ? 28 : 0));
|
||||
const h = Math.ceil(lines.length * LH + PAD * 2 + 18);
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = w * S; canvas.height = h * S;
|
||||
@@ -906,6 +934,17 @@ document.getElementById('exportBtn').addEventListener('click', async ()=>{
|
||||
}
|
||||
x.fillStyle = /^\s*[#([]/.test(line) ? '#6a5f52' : ink;
|
||||
x.fillText(line, PAD, y);
|
||||
if(countsToggle.checked && fresh && analysis.meter){
|
||||
const cm = analysis.meter.find(mm=>mm.l===i);
|
||||
if(cm){
|
||||
x.font = "10px 'Spline Sans Mono', monospace";
|
||||
x.textAlign = 'right';
|
||||
x.fillStyle = '#5d5347';
|
||||
x.fillText(String(cm.syl), w - 10, y);
|
||||
x.textAlign = 'left';
|
||||
x.font = font;
|
||||
}
|
||||
}
|
||||
if(rhythm && fresh){
|
||||
const spans = (analysis.stress.filter(s=>s.l===i)).sort((a,b)=>a.s-b.s);
|
||||
x.font = "8px 'Spline Sans Mono', monospace";
|
||||
|
||||
@@ -642,3 +642,31 @@ def test_end_dominated_vowel_families_fuse():
|
||||
"The top is not enough\n"
|
||||
"No choice, sellin' drugs")
|
||||
group_with(text, "love", "blood", "thugs", "mud", "enough", "drugs")
|
||||
|
||||
|
||||
def test_em_back_pocket_quintet_is_one_family():
|
||||
# AE-AA mosaics, plain AA endings, and IH-x-AA triples all share the
|
||||
# vowel tail — suffix fusion reads them as Em wrote them: one chain
|
||||
text = ("got a laptop in my back pocket\n"
|
||||
"My pen'll go off when I half-cock it\n"
|
||||
"Got a fat knot from that rap profit\n"
|
||||
"Made a livin' and a killin' off it\n"
|
||||
"Ever since Bill Clinton was still in office")
|
||||
group_with(text, "back pocket", "rap profit", "off it",
|
||||
"still in office", "laptop", "pocket", "profit", "office")
|
||||
assert scheme(text) == "aaaaa"
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ multis
|
||||
|
||||
def test_multis_generator():
|
||||
from app import multis_for
|
||||
ch = multis_for("charisma", set())
|
||||
assert "little" in ch # IH-x skeleton, not a perfect rhyme
|
||||
el = multis_for("elevator", set())
|
||||
assert any(" " in m for m in el) # two-word combos exist
|
||||
|
||||
|
||||
def test_multis_in_lookup_response():
|
||||
data = lookup("placement", mode="rhyme")
|
||||
assert data["multis"] and "basement" not in data["multis"] # perfects excluded
|
||||
|
||||
Reference in New Issue
Block a user