mirror of
https://github.com/kennethreitz/rhymepad.org.git
synced 2026-06-11 17:08:33 +00:00
c60b9d6d43
Phrases now compete in the perfect-rhyme pass anchored at their first
word's stressed vowel ("stir up" = ER AH P = "syrup"), render as
underlines while words render as fills, and can overlap in different
colors — the syrup/cup knot finally displays both schemes. Stopword-
anchored phrases attach only on exact rime identity. Segment-based
highlight renderer replaces the span-per-token one.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
267 lines
9.3 KiB
Python
267 lines
9.3 KiB
Python
"""Regression suite for RhymePad's rhyme detection.
|
|
|
|
Every case here came from a real verse that exposed a gap during
|
|
development — Lil Wayne ("6 Foot 7 Foot"), Eminem ("Godzilla"),
|
|
Kanye ("Power"), and assorted hand-rolled edge cases.
|
|
"""
|
|
|
|
from collections import defaultdict
|
|
|
|
import pytest
|
|
|
|
from app import Draft, analyze, lookup
|
|
|
|
|
|
def groups(text: str) -> list[set[str]]:
|
|
"""Rhyme groups as sets of (lowercased) highlighted spans."""
|
|
res = analyze(Draft(text=text))
|
|
by_gid = defaultdict(set)
|
|
for t in res["tokens"]:
|
|
by_gid[t["g"]].add(res["lines"][t["l"]][t["s"]:t["e"]].lower())
|
|
return list(by_gid.values())
|
|
|
|
|
|
def group_with(text: str, *words: str) -> set[str]:
|
|
"""The group containing words[0]; asserts all words share it."""
|
|
for g in groups(text):
|
|
if words[0].lower() in g:
|
|
missing = {w.lower() for w in words} - g
|
|
assert not missing, f"{missing} not grouped with {words[0]!r} in {g}"
|
|
return g
|
|
pytest.fail(f"{words[0]!r} not highlighted at all")
|
|
|
|
|
|
def highlighted(text: str) -> set[str]:
|
|
gs = groups(text)
|
|
return set().union(*gs) if gs else set()
|
|
|
|
|
|
def scheme(text: str) -> str:
|
|
return analyze(Draft(text=text))["stanzas"][0]["scheme"]
|
|
|
|
|
|
# ---------------------------------------------------------------- perfect
|
|
|
|
def test_perfect_end_rhymes():
|
|
text = ("I keep the cadence tucked beneath my tongue tonight\n"
|
|
"the city hums in amber under fading light\n"
|
|
"I trace a melody that never quite takes flight\n"
|
|
"and let the silence answer everything I write")
|
|
group_with(text, "tonight", "light", "flight", "write", "quite")
|
|
assert scheme(text) == "aaaa"
|
|
|
|
|
|
def test_internal_perfect_rhymes():
|
|
text = ("no time to waste, I climb in haste and find my place\n"
|
|
"the rhythm shows, the river flows, wherever it goes")
|
|
group_with(text, "time", "climb")
|
|
group_with(text, "waste", "haste")
|
|
group_with(text, "shows", "flows", "goes")
|
|
|
|
|
|
def test_say_weigh():
|
|
group_with("say this weigh that", "say", "weigh")
|
|
|
|
|
|
def test_abab_scheme():
|
|
assert scheme("the cat\nso blue\na hat\nso true") == "abab"
|
|
|
|
|
|
# ------------------------------------------------------------------ slant
|
|
|
|
def test_end_slant_joins_existing_perfect_group():
|
|
# "time" (AY M) slant-joins the mind/find (AY N D) perfect group
|
|
text = ("the placement of creation\n"
|
|
"eternal precipice of mind\n"
|
|
"where it goes\n"
|
|
"i can't find the time")
|
|
group_with(text, "mind", "find", "time")
|
|
group_with(text, "placement", "creation")
|
|
assert scheme(text) == "abcb"
|
|
|
|
|
|
def test_kanye_hours_power():
|
|
text = ("The clock's ticking, I just count the hours\n"
|
|
"Stop tripping, I'm tripping off the power")
|
|
group_with(text, "hours", "power")
|
|
group_with(text, "ticking", "tripping")
|
|
# the multi-word pass catches the full phrase pair too
|
|
group_with(text, "clock's ticking", "stop tripping")
|
|
assert scheme(text) == "aa"
|
|
|
|
|
|
def test_reduced_vowel_merge_vodka_lasagna():
|
|
# AA + schwa, with different reduced vowels — the Wayne chain
|
|
text = ("vodka with a spritzer\n"
|
|
"real Gs move in silence like lasagna")
|
|
group_with(text, "vodka", "lasagna")
|
|
|
|
|
|
def test_consonance_bliss_exist():
|
|
# vowel + first coda consonant (IH S), full codas differ;
|
|
# "whisps" is OOV and needs the g2p model
|
|
group_with("bliss whisps in darker nights exist",
|
|
"bliss", "whisps", "exist")
|
|
|
|
|
|
# ------------------------------------------------- unknown words / repairs
|
|
|
|
def test_compound_split_heresay_neigh():
|
|
text = ("the rhythm and the gotcha of the heresay\n"
|
|
"neigh")
|
|
group_with(text, "heresay", "neigh")
|
|
|
|
|
|
def test_multiword_orange_door_hinge_porage():
|
|
text = ("an orange door hinge\n"
|
|
"porage")
|
|
group_with(text, "orange", "door hinge", "porage")
|
|
|
|
|
|
def test_cmu_variant_get_rhymes_with_hit():
|
|
# CMU's secondary pronunciation of "get" is "git"
|
|
group_with("take the hit\nthat's what you get", "hit", "get")
|
|
|
|
|
|
# ------------------------------------------------------------------ noise
|
|
|
|
def test_no_transitive_chaining():
|
|
# peanuts rhymes with what's via one pronunciation (AH T S) and with
|
|
# people via another (IY + schwa) — the two groups must stay separate
|
|
text = ("So misunderstood, but what's a world without enigma?\n"
|
|
"You niggas are gelatin, peanuts to an elephant\n"
|
|
"Sleep is the cousin, what a fuckin' family picture\n"
|
|
"People say I'm borderline crazy, sorta, kinda\n"
|
|
"Woman of my dreams, I don't sleep, so I can't find her")
|
|
whats = group_with(text, "what's", "peanuts")
|
|
assert "sleep" not in whats and "people" not in whats
|
|
sleep = group_with(text, "sleep", "people")
|
|
assert "what's" not in sleep
|
|
|
|
|
|
def test_filler_words_not_highlighted_midline():
|
|
text = ("I'm a monster and I'm heartless, I'm the king\n"
|
|
"and I'm the sting")
|
|
assert "i'm" not in highlighted(text)
|
|
group_with(text, "king", "sting")
|
|
|
|
|
|
def test_em_counts_at_line_end_only():
|
|
text = ("Fill 'em with the venom and eliminate 'em\n"
|
|
"I told 'em owls fly, I Minute Maid 'em")
|
|
assert scheme(text) == "aa" # ...ate 'em / Maid 'em
|
|
# but the mid-line 'em's stay dark
|
|
res = analyze(Draft(text=text))
|
|
em_tokens = [t for t in res["tokens"]
|
|
if res["lines"][t["l"]][t["s"]:t["e"]].lower() == "em"]
|
|
assert all(t["end"] for t in em_tokens)
|
|
|
|
|
|
def test_repeated_word_midline_is_not_a_rhyme():
|
|
assert highlighted("the fire burns the fire down") == set()
|
|
|
|
|
|
# ----------------------------------------------------------------- lookup
|
|
|
|
def test_lookup_rhymes():
|
|
words = {w["word"] for w in lookup("light", mode="rhyme")["words"]}
|
|
assert "night" in words and "tonight" in words
|
|
assert "light" not in words
|
|
|
|
|
|
def test_lookup_near_rhymes_exclude_perfect():
|
|
words = {w["word"] for w in lookup("hold", mode="near")["words"]}
|
|
assert "home" in words # shared vowel, different coda
|
|
assert "gold" not in words # perfect rhymes live in the other mode
|
|
|
|
|
|
# ------------------------------------------------------------------ shape
|
|
|
|
def test_stanzas_and_legend():
|
|
res = analyze(Draft(text="the cat\nso blue\na hat\nso true"))
|
|
(st,) = res["stanzas"]
|
|
assert st["lines"] == [0, 1, 2, 3]
|
|
a, b = st["legend"][0], st["legend"][1]
|
|
assert a["ch"] == "a" and b["ch"] == "b"
|
|
assert a["color"] is not None and b["color"] is not None
|
|
assert a["color"] != b["color"]
|
|
|
|
|
|
def test_blank_lines_split_stanzas():
|
|
res = analyze(Draft(text="the cat\na hat\n\nso blue\nso true"))
|
|
assert [s["scheme"] for s in res["stanzas"]] == ["aa", "aa"]
|
|
|
|
|
|
# ------------------------------------------------------------------ meter
|
|
|
|
def meter_of(line: str) -> dict:
|
|
res = analyze(Draft(text=line))
|
|
return res["meter"][0]
|
|
|
|
|
|
def test_iambic_pentameter():
|
|
m = meter_of("Shall I compare thee to a summer's day")
|
|
assert m["syl"] == 10
|
|
assert m["label"] == "iambic pentameter"
|
|
|
|
|
|
def test_trochaic_tetrameter():
|
|
m = meter_of("Tyger Tyger burning bright")
|
|
assert m["syl"] == 7 # catalectic — final unstressed syllable dropped
|
|
assert m["label"] == "trochaic tetrameter"
|
|
|
|
|
|
def test_syllables_always_reported():
|
|
m = meter_of("the city hums in amber under fading light")
|
|
assert m["syl"] == 12
|
|
assert m["stress"]
|
|
|
|
|
|
def test_trailing_schwa_trimmed_militia_commissioner():
|
|
# commissioner has one extra reduced syllable (IH-AH-ER vs IH-AH)
|
|
text = ("Swagger down pat, call my shit Patricia\n"
|
|
"Young Money militia, and I am the commissioner")
|
|
group_with(text, "patricia", "militia", "commissioner")
|
|
|
|
|
|
def test_meter_coaching_flags_outlier_line():
|
|
text = ("I walk the lonely road tonight\n"
|
|
"I hold the heavy stone of light\n"
|
|
"I call the fading stars to fight\n"
|
|
"and everybody wonders where the time has gone")
|
|
res = analyze(Draft(text=text))
|
|
flagged = [m["l"] for m in res["meter"] if m["off"]]
|
|
assert flagged == [3]
|
|
assert res["meter"][3]["target"] == 8
|
|
|
|
|
|
def test_meter_coaching_needs_a_pattern_to_break():
|
|
# two lines of wildly different lengths: no dominant pattern, no flags
|
|
res = analyze(Draft(text="short line here\na very much longer line that runs on and on"))
|
|
assert not any(m["off"] for m in res["meter"])
|
|
|
|
|
|
# ------------------------------------------------------------------ layers
|
|
|
|
def test_layered_phrase_rides_second_group():
|
|
# Em's syrup knot: "up" fills with the cup chain, while "stir up"
|
|
# rides the syrup/burden group as a second layer
|
|
text = ("Maybe your cup is full of syrup and lean\n"
|
|
"fill the cup up to the brim\n"
|
|
"Maybe I need to stir up shit\n"
|
|
"the burden is mine")
|
|
syrup = group_with(text, "syrup", "burden", "stir up")
|
|
cups = group_with(text, "cup", "up")
|
|
assert "stir up" not in cups and "syrup" not in cups
|
|
|
|
|
|
def test_weak_phrase_attaches_but_never_founds():
|
|
# "were up" (stopword tail) joins an existing ER group...
|
|
text = ("Maybe your cup is full of syrup and lean\n"
|
|
"fill the cup up to the brim\n"
|
|
"shake the world up if it were up to me\n"
|
|
"the burden is mine")
|
|
group_with(text, "syrup", "burden", "were up")
|
|
# ...but two weak phrases alone can't create a group
|
|
assert "were up" not in highlighted("it were up to him\nit were up to her")
|