mirror of
https://github.com/kennethreitz/rhymepad.org.git
synced 2026-06-11 17:08:33 +00:00
42d48b2c42
Whole rhyming word fills dimly; a bright underline marks just the rhyming part (tonight -> under 'ight'), so the precise interlock shows without the fill looking ragged when the phoneme->letter guess is a letter off. Removed 'scheme' from the toolbar readout. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
749 lines
28 KiB
Python
749 lines
28 KiB
Python
"""Regression suite for RhymePad's rhyme detection.
|
|
|
|
Every case here came from a real verse that exposed a gap during
|
|
development — Lil Wayne ("6 Foot 7 Foot"), Eminem ("Godzilla"),
|
|
Kanye ("Power"), and assorted hand-rolled edge cases.
|
|
"""
|
|
|
|
from collections import defaultdict
|
|
|
|
import pytest
|
|
|
|
from app import Draft, analyze, lookup
|
|
|
|
|
|
def groups(text: str) -> list[set[str]]:
|
|
"""Rhyme groups as sets of (lowercased) highlighted spans."""
|
|
res = analyze(Draft(text=text))
|
|
by_gid = defaultdict(set)
|
|
for t in res["tokens"]:
|
|
by_gid[t["g"]].add(res["lines"][t["l"]][t["s"]:t["e"]].lower())
|
|
return list(by_gid.values())
|
|
|
|
|
|
def group_with(text: str, *words: str) -> set[str]:
|
|
"""The group containing words[0]; asserts all words share it."""
|
|
for g in groups(text):
|
|
if words[0].lower() in g:
|
|
missing = {w.lower() for w in words} - g
|
|
assert not missing, f"{missing} not grouped with {words[0]!r} in {g}"
|
|
return g
|
|
pytest.fail(f"{words[0]!r} not highlighted at all")
|
|
|
|
|
|
def highlighted(text: str) -> set[str]:
|
|
gs = groups(text)
|
|
return set().union(*gs) if gs else set()
|
|
|
|
|
|
def scheme(text: str) -> str:
|
|
return analyze(Draft(text=text))["stanzas"][0]["scheme"]
|
|
|
|
|
|
# ---------------------------------------------------------------- perfect
|
|
|
|
def test_perfect_end_rhymes():
|
|
text = ("I keep the cadence tucked beneath my tongue tonight\n"
|
|
"the city hums in amber under fading light\n"
|
|
"I trace a melody that never quite takes flight\n"
|
|
"and let the silence answer everything I write")
|
|
group_with(text, "tonight", "light", "flight", "write", "quite")
|
|
assert scheme(text) == "aaaa"
|
|
|
|
|
|
def test_internal_perfect_rhymes():
|
|
text = ("no time to waste, I climb in haste and find my place\n"
|
|
"the rhythm shows, the river flows, wherever it goes")
|
|
group_with(text, "time", "climb")
|
|
group_with(text, "waste", "haste")
|
|
group_with(text, "shows", "flows", "goes")
|
|
|
|
|
|
def test_say_weigh():
|
|
group_with("say this weigh that", "say", "weigh")
|
|
|
|
|
|
def test_abab_scheme():
|
|
assert scheme("the cat\nso blue\na hat\nso true") == "abab"
|
|
|
|
|
|
# ------------------------------------------------------------------ slant
|
|
|
|
def test_end_slant_joins_existing_perfect_group():
|
|
# "time" (AY M) slant-joins the mind/find (AY N D) perfect group
|
|
text = ("the placement of creation\n"
|
|
"eternal precipice of mind\n"
|
|
"where it goes\n"
|
|
"i can't find the time")
|
|
group_with(text, "mind", "find", "time")
|
|
group_with(text, "placement", "creation")
|
|
assert scheme(text) == "abcb"
|
|
|
|
|
|
def test_kanye_hours_power():
|
|
text = ("The clock's ticking, I just count the hours\n"
|
|
"Stop tripping, I'm tripping off the power")
|
|
group_with(text, "hours", "power")
|
|
group_with(text, "ticking", "tripping")
|
|
# the multi-word pass catches the full phrase pair too
|
|
group_with(text, "clock's ticking", "stop tripping")
|
|
assert scheme(text) == "aa"
|
|
|
|
|
|
def test_reduced_vowel_merge_vodka_lasagna():
|
|
# AA + schwa, with different reduced vowels — the Wayne chain
|
|
text = ("vodka with a spritzer\n"
|
|
"real Gs move in silence like lasagna")
|
|
group_with(text, "vodka", "lasagna")
|
|
|
|
|
|
def test_consonance_bliss_exist():
|
|
# vowel + first coda consonant (IH S), full codas differ;
|
|
# "whisps" is OOV and needs the g2p model
|
|
group_with("bliss whisps in darker nights exist",
|
|
"bliss", "whisps", "exist")
|
|
|
|
|
|
# ------------------------------------------------- unknown words / repairs
|
|
|
|
def test_compound_split_heresay_neigh():
|
|
text = ("the rhythm and the gotcha of the heresay\n"
|
|
"neigh")
|
|
group_with(text, "heresay", "neigh")
|
|
|
|
|
|
def test_multiword_orange_door_hinge_porage():
|
|
text = ("an orange door hinge\n"
|
|
"porage")
|
|
group_with(text, "orange", "door hinge", "porage")
|
|
|
|
|
|
def test_cmu_variant_get_rhymes_with_hit():
|
|
# CMU's secondary pronunciation of "get" is "git"
|
|
group_with("take the hit\nthat's what you get", "hit", "get")
|
|
|
|
|
|
# ------------------------------------------------------------------ noise
|
|
|
|
def test_no_transitive_chaining():
|
|
# peanuts rhymes with what's via one pronunciation (AH T S) and with
|
|
# people via another (IY + schwa) — the two groups must stay separate
|
|
text = ("So misunderstood, but what's a world without enigma?\n"
|
|
"You niggas are gelatin, peanuts to an elephant\n"
|
|
"Sleep is the cousin, what a fuckin' family picture\n"
|
|
"People say I'm borderline crazy, sorta, kinda\n"
|
|
"Woman of my dreams, I don't sleep, so I can't find her")
|
|
whats = group_with(text, "what's", "peanuts")
|
|
assert "sleep" not in whats and "people" not in whats
|
|
sleep = group_with(text, "sleep", "people")
|
|
assert "what's" not in sleep
|
|
|
|
|
|
def test_filler_words_not_highlighted_midline():
|
|
text = ("I'm a monster and I'm heartless, I'm the king\n"
|
|
"and I'm the sting")
|
|
assert "i'm" not in highlighted(text)
|
|
group_with(text, "king", "sting")
|
|
|
|
|
|
def test_em_counts_at_line_end_only():
|
|
text = ("Fill 'em with the venom and eliminate 'em\n"
|
|
"I told 'em owls fly, I Minute Maid 'em")
|
|
assert scheme(text) == "aa" # ...ate 'em / Maid 'em
|
|
# but the mid-line 'em's stay dark
|
|
res = analyze(Draft(text=text))
|
|
em_tokens = [t for t in res["tokens"]
|
|
if res["lines"][t["l"]][t["s"]:t["e"]].lower() == "em"]
|
|
assert all(t["end"] for t in em_tokens)
|
|
|
|
|
|
def test_repeated_word_midline_is_not_a_rhyme():
|
|
assert highlighted("the fire burns the fire down") == set()
|
|
|
|
|
|
# ----------------------------------------------------------------- lookup
|
|
|
|
def test_lookup_rhymes():
|
|
words = {w["word"] for w in lookup("light", mode="rhyme")["words"]}
|
|
assert "night" in words and "tonight" in words
|
|
assert "light" not in words
|
|
|
|
|
|
def test_lookup_near_rhymes_exclude_perfect():
|
|
words = {w["word"] for w in lookup("hold", mode="near")["words"]}
|
|
assert "home" in words # shared vowel, different coda
|
|
assert "gold" not in words # perfect rhymes live in the other mode
|
|
|
|
|
|
# ------------------------------------------------------------------ shape
|
|
|
|
def test_stanzas_and_legend():
|
|
res = analyze(Draft(text="the cat\nso blue\na hat\nso true"))
|
|
(st,) = res["stanzas"]
|
|
assert st["lines"] == [0, 1, 2, 3]
|
|
a, b = st["legend"][0], st["legend"][1]
|
|
assert a["ch"] == "a" and b["ch"] == "b"
|
|
assert a["color"] is not None and b["color"] is not None
|
|
assert a["color"] != b["color"]
|
|
|
|
|
|
def test_blank_lines_split_stanzas():
|
|
res = analyze(Draft(text="the cat\na hat\n\nso blue\nso true"))
|
|
assert [s["scheme"] for s in res["stanzas"]] == ["aa", "aa"]
|
|
|
|
|
|
# ------------------------------------------------------------------ meter
|
|
|
|
def meter_of(line: str) -> dict:
|
|
res = analyze(Draft(text=line))
|
|
return res["meter"][0]
|
|
|
|
|
|
def test_iambic_pentameter():
|
|
m = meter_of("Shall I compare thee to a summer's day")
|
|
assert m["syl"] == 10
|
|
assert m["label"] == "iambic pentameter"
|
|
|
|
|
|
def test_trochaic_tetrameter():
|
|
m = meter_of("Tyger Tyger burning bright")
|
|
assert m["syl"] == 7 # catalectic — final unstressed syllable dropped
|
|
assert m["label"] == "trochaic tetrameter"
|
|
|
|
|
|
def test_syllables_always_reported():
|
|
m = meter_of("the city hums in amber under fading light")
|
|
assert m["syl"] == 12
|
|
assert m["stress"]
|
|
|
|
|
|
def test_trailing_schwa_trimmed_militia_commissioner():
|
|
# commissioner has one extra reduced syllable (IH-AH-ER vs IH-AH)
|
|
text = ("Swagger down pat, call my shit Patricia\n"
|
|
"Young Money militia, and I am the commissioner")
|
|
group_with(text, "patricia", "militia", "commissioner")
|
|
|
|
|
|
def test_meter_coaching_flags_outlier_line():
|
|
text = ("I walk the lonely road tonight\n"
|
|
"I hold the heavy stone of light\n"
|
|
"I call the fading stars to fight\n"
|
|
"and everybody wonders where the time has gone")
|
|
res = analyze(Draft(text=text))
|
|
flagged = [m["l"] for m in res["meter"] if m["off"]]
|
|
assert flagged == [3]
|
|
assert res["meter"][3]["target"] == 8
|
|
|
|
|
|
def test_meter_coaching_needs_a_pattern_to_break():
|
|
# two lines of wildly different lengths: no dominant pattern, no flags
|
|
res = analyze(Draft(text="short line here\na very much longer line that runs on and on"))
|
|
assert not any(m["off"] for m in res["meter"])
|
|
|
|
|
|
# ------------------------------------------------------------------ layers
|
|
|
|
def test_layered_phrase_rides_second_group():
|
|
# Em's syrup knot: "up" fills with the cup chain, while "stir up"
|
|
# rides the syrup/burden group as a second layer
|
|
text = ("Maybe your cup is full of syrup and lean\n"
|
|
"fill the cup up to the brim\n"
|
|
"Maybe I need to stir up shit\n"
|
|
"the burden is mine")
|
|
syrup = group_with(text, "syrup", "stir up") # perfect ER-AH-P
|
|
assert "burden" not in syrup # ER-schwa with a mismatched coda — too loose
|
|
cups = group_with(text, "cup", "up")
|
|
assert "stir up" not in cups and "syrup" not in cups
|
|
|
|
|
|
def test_weak_phrase_attaches_but_never_founds():
|
|
# "were up" (stopword tail) joins an existing ER group...
|
|
text = ("Maybe your cup is full of syrup and lean\n"
|
|
"fill the cup up to the brim\n"
|
|
"shake the world up if it were up to me\n"
|
|
"the burden is mine")
|
|
group_with(text, "syrup", "were up") # perfect ER-AH-P phrase match
|
|
# ...but two weak phrases alone can't create a group
|
|
assert "were up" not in highlighted("it were up to him\nit were up to her")
|
|
|
|
|
|
def test_annotation_lines_ignored():
|
|
text = ("[Chorus]\n"
|
|
"the cat\n"
|
|
"a hat\n"
|
|
"# note: tighten this verse\n"
|
|
"(yeah)\n"
|
|
"so blue\n"
|
|
"so true")
|
|
res = analyze(Draft(text=text))
|
|
assert "chorus" not in highlighted(text)
|
|
assert "note" not in highlighted(text)
|
|
# annotations don't split the stanza or earn scheme letters
|
|
(st,) = res["stanzas"]
|
|
assert st["lines"] == [1, 2, 5, 6]
|
|
assert st["scheme"] == "aabb"
|
|
|
|
|
|
def test_perfect_subgroup_fuses_with_slant_family():
|
|
# shoulder/older/colder (perfect, OW L D ER) live inside the bigger
|
|
# OW-schwa family — one color for the whole column
|
|
text = ("Skunk, bug, soldier\n"
|
|
"Tongue, shrub, shoulder\n"
|
|
"One month older\n"
|
|
"Sponge, mob, colder\n"
|
|
"Nun, rug, holster\n"
|
|
"Lug nut, coaster\n"
|
|
"Lung, jug, roaster\n"
|
|
"Young Thug poster\n"
|
|
"Unplugged toaster")
|
|
group_with(text, "soldier", "shoulder", "older", "colder", "holster",
|
|
"coaster", "roaster", "poster", "toaster")
|
|
|
|
|
|
def test_lookup_synonyms_wordnet():
|
|
data = lookup("happy", mode="syn")
|
|
flat = {w["word"] for s in data["sections"] for w in s["words"]}
|
|
assert "glad" in flat
|
|
labels = [s["label"] for s in data["sections"]]
|
|
assert "synonyms" in labels and "opposites" in labels
|
|
|
|
|
|
def test_lookup_synonyms_lemmatized():
|
|
data = lookup("keys", mode="syn") # morphy: keys -> key
|
|
assert data["known"] is True
|
|
|
|
|
|
def test_lookup_synonyms_unknown_word():
|
|
assert lookup("xqzzqx", mode="syn")["known"] is False
|
|
|
|
|
|
def test_ine_ending_gets_een_candidate():
|
|
# g2p reads OOV "-ine" like valentine (AY N); Em says ketamine with
|
|
# an -een. Both candidates compete; the perfect pass picks the match.
|
|
text = ("Even ketamine or methamphetamine with the minithin\n"
|
|
"It better be at least seventy or three-hundred milligram")
|
|
group_with(text, "ketamine", "methamphetamine")
|
|
|
|
|
|
def test_rhyme_mode_includes_near():
|
|
data = lookup("hold", mode="rhyme")
|
|
assert "gold" in {w["word"] for w in data["words"]}
|
|
assert "home" in {w["word"] for w in data["near"]}
|
|
|
|
|
|
def test_schwa_phrase_needs_consonant_support():
|
|
# «sloth hugs» (OW-TH + schwa) must not ride the over/shoulder family
|
|
# just because the vowels rhyme — door hinge gets in on orange's R
|
|
text = ("Looming over your shoulder, like a sloth hugs a tree,\n"
|
|
"Thinking it won't fall, yet there it goes. Damn, it's free.")
|
|
assert "sloth hugs" not in highlighted(text)
|
|
|
|
|
|
def test_phrase_with_one_new_half_is_not_suppressed():
|
|
# though/mode already rhyme as words, but beast/sleep/seats only
|
|
# rhyme through the phrase pairs — those must survive
|
|
text = ("Look, I woke up in beast mode\n"
|
|
"With my girl, that's beauty and the beast though\n"
|
|
"Been top 5, these niggas sleep though\n"
|
|
"Only thing that sold out is the seats though")
|
|
group_with(text, "beast mode", "beast though", "sleep though",
|
|
"seats though")
|
|
|
|
|
|
def test_phrase_joins_slant_group_via_coda_consensus():
|
|
# orange + pourage found their group on vowels alone (end slant);
|
|
# door hinge joins because 2+ members agree on the AO-R coda
|
|
text = ("Pick up an orange\n"
|
|
"It's by the door hinge\n"
|
|
"eating pourage")
|
|
group_with(text, "orange", "door hinge", "pourage")
|
|
|
|
|
|
def test_the_full_orange_verse():
|
|
# the Eminem demonstration: every line ties back to orange
|
|
text = ("I put my orange\nfour-inch\ndoor hinge\nin storage,\n"
|
|
"and ate porridge with George")
|
|
group_with(text, "orange", "storage", "porridge",
|
|
"four-inch", "door hinge")
|
|
# four/door/george keep their own thread (george is outside any
|
|
# phrase), but inch/hinge are pure mirrors of the grouped phrases —
|
|
# the compound reading wins and paints them orange
|
|
group_with(text, "four", "door", "george")
|
|
assert "hinge" not in highlighted(text) # only «door hinge» paints
|
|
|
|
|
|
def test_mosaic_triples_kanye_power():
|
|
text = ("I'm living in that 21st century\n"
|
|
"Doing something mean to it\n"
|
|
"Do it better than anybody you ever seen do it\n"
|
|
"Screams from the haters, got a nice ring to it\n"
|
|
"I guess every superhero need his theme music")
|
|
group_with(text, "mean to it", "seen do it", "theme music")
|
|
|
|
|
|
def test_mosaic_finds_perfect_members_other_anchor():
|
|
# Gambino: mastermind founds a perfect group with rind (AY N D), but
|
|
# its first-stress run (AE..AY) is what "pass the time" rhymes with
|
|
text = ("Gambino is a mastermind, fuck a bitch to pass the time\n"
|
|
"Mass appeal, orange rind, smoke your green, I'm spendin' mine")
|
|
group_with(text, "mastermind", "rind", "pass the time")
|
|
|
|
|
|
def test_eliot_is_it_visit():
|
|
# Prufrock's signature mosaic: phone-for-phone identical rimes
|
|
text = ('Oh, do not ask, "What is it?"\n'
|
|
"Let us go and make our visit.")
|
|
group_with(text, "is it", "visit")
|
|
assert scheme(text) == "aa"
|
|
|
|
|
|
def test_near_vowel_neutralized_before_r():
|
|
# CMU: fear = F IH1 R, hear = HH IY1 R — but they rhyme in every
|
|
# dialect of English (the NEAR vowel)
|
|
text = ("i fear the sounds above my head\n"
|
|
"as i sit here calmly in bed\n"
|
|
"i hear a rumble oh so loud\n"
|
|
"sounds like mania and a crowd")
|
|
group_with(text, "fear", "here", "hear")
|
|
group_with(text, "head", "bed")
|
|
group_with(text, "loud", "crowd")
|
|
assert scheme(text) == "aabb"
|
|
|
|
|
|
def test_nasal_codas_merge():
|
|
# Em delivers damn/hand/plans as one sound; M/N/NG share a coda
|
|
# class — and the engine finds the even fuller mosaic
|
|
text = ("If you never gave a damn, raise your hand\n"
|
|
"'Cause I'm about to set trip, vacation plans")
|
|
group_with(text, "gave a damn", "vacation plans")
|
|
group_with(text, "hand", "plans")
|
|
|
|
|
|
def test_analyze_rejects_oversized_drafts():
|
|
import pytest as _pytest
|
|
from fastapi import HTTPException
|
|
with _pytest.raises(HTTPException):
|
|
analyze(Draft(text="a" * 200_000))
|
|
|
|
|
|
def test_weak_ending_rhymes_at_line_end():
|
|
# Lateralus: stress sits on IN-fancy, but the weak final syllable
|
|
# carries the rhyme at the line end
|
|
text = ("All I see\n"
|
|
"In my infancy\n"
|
|
"Red and yellow then came to be\n"
|
|
"Reaching out to me")
|
|
group_with(text, "see", "infancy", "be", "me")
|
|
|
|
|
|
def test_cmu_override_stasis():
|
|
# CMU transcribes stasis as "STAH-seez"; everyone says STAY-sis
|
|
group_with("Oasis of stasis", "oasis", "stasis")
|
|
|
|
|
|
def test_weak_ending_requires_matching_coda():
|
|
# divinity ends open (..tee); screams ends IY M Z — same vowel,
|
|
# different coda, not the same family
|
|
text = ("Roots surrounding our entirety\n"
|
|
"Tangled in divinity\n"
|
|
"Weaving heaven among hellish screams")
|
|
fam = group_with(text, "entirety", "divinity")
|
|
assert "screams" not in fam
|
|
|
|
|
|
def test_cot_caught_merger():
|
|
# thought (AO T) and lot (AA T) are identical in merged dialects
|
|
group_with("I thought about it a lot\nI gave it everything I got",
|
|
"thought", "lot", "got")
|
|
|
|
|
|
def test_end_phrase_pure_vowel_rhyme():
|
|
# forgotten / "off of": AA-schwa at both line ends — assonance is
|
|
# allowed at the line boundary, like it is for single words
|
|
text = ("I realize shit and write it down to be forgotten\n"
|
|
"Building a database to reference my life off of")
|
|
group_with(text, "forgotten", "off of")
|
|
|
|
|
|
def test_weak_endings_never_split_rich_chains():
|
|
# the weak -er ending must not steal commissioner from the militia
|
|
# family it rhymes with multisyllabically
|
|
text = ("Ahem, excuse my charisma, vodka with a spritzer\n"
|
|
"Swagger down pat; call my shit Patricia\n"
|
|
"Young Money militia and I am the commissioner\n"
|
|
"You no wan' start Weezy 'cause the F is for finisher")
|
|
group_with(text, "charisma", "patricia", "militia",
|
|
"commissioner", "finisher", "spritzer")
|
|
|
|
|
|
def test_schwa_heavy_phrase_cannot_join_word_family():
|
|
# "Two bitches" (UW + schwas) must not ride the Buddha/scuba chain
|
|
text = ("doin' it like Buddha, scuba, Cuba\n"
|
|
"Two bitches at the same time")
|
|
assert "two bitches" not in highlighted(text)
|
|
|
|
|
|
def test_swimmers_twist_her_finisher_one_family():
|
|
text = ("Young Money militia and I am the commissioner\n"
|
|
"You no wan' start Weezy cause the F is for finisher\n"
|
|
"Two bitches at the same time, synchronized swimmers\n"
|
|
"Got the girl twisted cause she open when you twist her\n"
|
|
"Never met the bitch, but I fuck her like I missed her")
|
|
group_with(text, "swimmers", "finisher", "commissioner",
|
|
"twist her", "missed her")
|
|
|
|
|
|
def test_refrain_words_go_dark_midline():
|
|
text = ("the bitch is cold\n"
|
|
"the bitch is old\n"
|
|
"a bitch like this\n"
|
|
"a bitch like that\n"
|
|
"bitch got gold")
|
|
group_with(text, "cold", "old", "gold")
|
|
assert "bitch" not in highlighted(text)
|
|
|
|
|
|
def test_consonance_is_local():
|
|
# speech (line 1) and keep (line 9): a coda match eight lines apart
|
|
# is not a rhyme
|
|
filler = "la la la\n" * 7
|
|
text = "I gave a dumber speech\n" + filler + "I always keep my word"
|
|
assert "keep" not in highlighted(text)
|
|
|
|
|
|
def test_secondary_pronunciation_stays_local():
|
|
# predicate's verb reading (pred-i-KATE) must not hand it to the
|
|
# hook's "eight" far away when the noun rhymes with its neighbor
|
|
filler = "nothing on this line\n" * 8
|
|
text = ("Six-foot, seven-foot, eight-foot bunch\n" + filler +
|
|
"You niggas are gelatin, peanuts to an elephant\n"
|
|
"I got through that sentence like a subject and a predicate")
|
|
group_with(text, "gelatin", "elephant", "predicate")
|
|
|
|
|
|
# -------------------------------------------------------------- new tools
|
|
|
|
def test_word_info():
|
|
from app import word_info
|
|
info = word_info(word="tonight")
|
|
assert info["syl"] == 2 and info["stress"] == "01"
|
|
assert info["rime"] == "AY T"
|
|
|
|
|
|
def test_word_inside_grouped_phrase_still_rhymes():
|
|
# «all this time» rides a mosaic with «lost in my», but the word
|
|
# time must still pair with mind at word level
|
|
text = ("lost in my mind for a while\n"
|
|
"wastin' all this time with style")
|
|
group_with(text, "mind", "time")
|
|
group_with(text, "while", "style")
|
|
|
|
|
|
def test_inline_adlibs_excluded():
|
|
# (yeah) is delivery, not text — bunch keeps the line-ending slot
|
|
text = ("Six-foot, seven-foot, eight-foot bunch (yeah)\n"
|
|
"I roll with the gang, throw a punch (what)")
|
|
assert scheme(text) == "aa"
|
|
assert "yeah" not in highlighted(text)
|
|
group_with(text, "bunch", "punch")
|
|
|
|
|
|
def test_alliteration_detected():
|
|
res = analyze(Draft(text="Peter Piper picked a peck of pickled peppers"))
|
|
words = {res["lines"][t["l"]][t["s"]:t["e"]].lower() for t in res["allit"]}
|
|
assert {"peter", "piper", "picked", "peck", "pickled", "peppers"} <= words
|
|
assert len({t["g"] for t in res["allit"]}) == 1
|
|
|
|
|
|
def test_alliteration_needs_three_and_locality():
|
|
# two same-onset words far apart are coincidence, not craft
|
|
res = analyze(Draft(text="big dog barks\nnothing here\nnothing there\nbright day"))
|
|
assert res["allit"] == []
|
|
|
|
|
|
def test_word_senses():
|
|
from app import word_info
|
|
assert word_info(word="light")["senses"] >= 10
|
|
|
|
|
|
def test_unanswered_endings_reported():
|
|
res = analyze(Draft(text="the cat\nso blue\na hat\nthe end"))
|
|
opens = {res["lines"][o["l"]][o["s"]:o["e"]] for o in res["open"]}
|
|
assert "blue" in opens and "end" in opens
|
|
assert "hat" not in opens and "cat" not in opens
|
|
|
|
|
|
def test_hook_coda_chain_fuses():
|
|
# Logic's hook: wrist (IH S T) is the hub; this (IH S) and shit
|
|
# (IH T) ride it as slant satellites — one color, kinda-rhymes kept
|
|
text = ("Yeah I've been killin' this shit\n"
|
|
"Yeah I've been hard in the paint, not a single assist\n"
|
|
"Yeah I've been flickin' that wrist\n"
|
|
"Yeah I've been cookin' that shit, now they fuckin' with this")
|
|
group_with(text, "shit", "assist", "wrist", "this")
|
|
|
|
|
|
def test_vamonos_dominoes():
|
|
# final s/z voicing neutralizes: -nos rhymes -noes
|
|
text = ("my members go quicker than vamonos\n"
|
|
"He dead, she dead, he in jail\n"
|
|
"Everyone fallin' like dominoes")
|
|
group_with(text, "vamonos", "dominoes")
|
|
|
|
|
|
def test_repeated_this_joins_the_hook_chain():
|
|
# the unstressed CMU variant (DH IH0 S) used to found "this" on a
|
|
# consonant-led key that the coda-nest fuse couldn't parse
|
|
text = ("Yeah I've been killin' this shit\n"
|
|
"not a single assist\n"
|
|
"flickin' that wrist\n"
|
|
"now they fuckin' with this\n"
|
|
"killin' this shit\n"
|
|
"not a single assist\n"
|
|
"flickin' that wrist\n"
|
|
"now they fuckin' with this")
|
|
group_with(text, "shit", "assist", "wrist", "this")
|
|
|
|
|
|
def test_repetition_alone_does_not_color():
|
|
text = ("Six-foot, seven-foot, eight-foot bunch\n"
|
|
"Six-foot, seven-foot, eight-foot bunch")
|
|
assert highlighted(text) == set()
|
|
assert scheme(text) == "aa" # ...but refrains share a scheme letter
|
|
# and a repeated unrhymed ending isn't flagged "unanswered" either
|
|
res = analyze(Draft(text=text))
|
|
assert res["open"] == []
|
|
|
|
|
|
def test_repetition_colors_once_a_differing_word_joins():
|
|
text = "it was Tammy\npure whammy\nstill Tammy"
|
|
group_with(text, "tammy", "whammy")
|
|
|
|
|
|
def test_paren_words_highlight_but_never_end():
|
|
# (justify greed) rhymes internally with need/breed, but the line's
|
|
# ending slot belongs to "tragedies", outside the parens
|
|
text = ("How can we still succeed taking what we don't need?\n"
|
|
"Telling lies, alibis, selling all the hate that we breed,\n"
|
|
"Super-size our tragedies (you can't define me, or justify greed),")
|
|
group_with(text, "need", "breed", "greed", "succeed")
|
|
res = analyze(Draft(text=text))
|
|
ends = {res["lines"][t["l"]][t["s"]:t["e"]].lower()
|
|
for t in res["tokens"] if t["end"] and not t["ph"]}
|
|
assert "greed" not in ends
|
|
|
|
|
|
def test_end_dominated_vowel_families_fuse():
|
|
# the Kanye chorus chain: blood/mud (AH D) and thugs/drugs (AH G Z)
|
|
# are one AH family when both live at line ends
|
|
text = ("Shower us with your love\n"
|
|
"Wash us in the blood\n"
|
|
"Drop this for the thugs\n"
|
|
"Know I grew up in the mud\n"
|
|
"The top is not enough\n"
|
|
"No choice, sellin' drugs")
|
|
group_with(text, "love", "blood", "thugs", "mud", "enough", "drugs")
|
|
|
|
|
|
def test_em_back_pocket_quintet_is_one_family():
|
|
# AE-AA mosaics, plain AA endings, and IH-x-AA triples all share the
|
|
# vowel tail — suffix fusion reads them as Em wrote them: one chain
|
|
text = ("got a laptop in my back pocket\n"
|
|
"My pen'll go off when I half-cock it\n"
|
|
"Got a fat knot from that rap profit\n"
|
|
"Made a livin' and a killin' off it\n"
|
|
"Ever since Bill Clinton was still in office")
|
|
group_with(text, "back pocket", "rap profit", "off it",
|
|
"still in office", "laptop", "pocket", "profit", "office")
|
|
assert scheme(text) == "aaaaa"
|
|
|
|
|
|
# ------------------------------------------------------------------ multis
|
|
|
|
def test_multis_generator():
|
|
from app import multis_for
|
|
ch = multis_for("charisma", set())
|
|
assert "little" in ch # IH-x skeleton, not a perfect rhyme
|
|
el = multis_for("elevator", set())
|
|
assert any(" " in m for m in el) # two-word combos exist
|
|
|
|
|
|
def test_multis_in_lookup_response():
|
|
data = lookup("placement", mode="rhyme")
|
|
assert data["multis"] and "basement" not in data["multis"] # perfects excluded
|
|
|
|
|
|
def test_forever_does_not_glue_to_sequential():
|
|
# both carry EH-x, but forever ends open (.) and sequential closes
|
|
# on L — and «forever (forever» phrase carpets obey refrain muting
|
|
text = ("Diamonds are forever (forever, forever)\n"
|
|
"Diamonds are forever (forever, forever, forever)\n"
|
|
"Forever ever? Forever ever? Ever, ever?\n"
|
|
"this is a question of writing style\n"
|
|
"going the extra mile\n"
|
|
"quintessential\n"
|
|
"sequential")
|
|
seq = group_with(text, "quintessential", "sequential")
|
|
assert "forever" not in seq and "ever" not in seq
|
|
assert "question" not in seq and "extra" not in seq
|
|
group_with(text, "style", "mile")
|
|
|
|
|
|
def test_vowel_families_are_local():
|
|
# a 100-line gap can't chain a vowel family — Baby/Daddy/loan/being
|
|
# were one teal blob across all of "All Me"
|
|
text = "Baby girl I see you\n" + ("filler line here\n" * 100) + "my old daddy\n"
|
|
from collections import defaultdict
|
|
res = analyze(Draft(text=text))
|
|
bg = defaultdict(set)
|
|
for tok in res["tokens"]:
|
|
bg[tok["g"]].add(res["lines"][tok["l"]][tok["s"]:tok["e"]].lower())
|
|
assert not any({"baby", "daddy"} <= s for s in bg.values())
|
|
|
|
|
|
def test_garbage_javascript_dont_attach():
|
|
# both AA-x, but garbage ends JH and javascript ends PT — neither
|
|
# nests with dollar/scholar's open coda, so they don't join
|
|
text = ("Garbage collected greatness, a dollar-sign disease\n"
|
|
"your JavaScript lost at sea, the notebook of the scholar")
|
|
group_with(text, "dollar", "scholar")
|
|
res = analyze(Draft(text=text))
|
|
from collections import defaultdict
|
|
bg = defaultdict(set)
|
|
for t in res["tokens"]:
|
|
bg[t["g"]].add(res["lines"][t["l"]][t["s"]:t["e"]].lower())
|
|
dfam = next(s for s in bg.values() if "dollar" in s)
|
|
assert "garbage" not in dfam and "javascript" not in dfam
|
|
|
|
|
|
def test_weak_ending_needs_a_full_vowel():
|
|
# middle and unavoidable share only a schwa+L tail — too weak to rhyme
|
|
text = ("Wednesday has given up and admitted it's the middle\n"
|
|
"slightly apologetic and entirely unavoidable")
|
|
assert "middle" not in highlighted(text)
|
|
# but a full-vowel feminine ending still pairs (infancy/see on IY)
|
|
group_with("all I can see\nin my infancy", "see", "infancy")
|
|
|
|
|
|
def test_no_matching_across_stanzas():
|
|
# each stanza is its own rhyme world: light/night in stanza 1 must
|
|
# not group with bright/sight in stanza 2
|
|
text = ("the city light\nshines every night\n\n"
|
|
"the future's bright\nit's in my sight")
|
|
res = analyze(Draft(text=text))
|
|
from collections import defaultdict
|
|
bg = defaultdict(set)
|
|
for t in res["tokens"]:
|
|
bg[t["g"]].add(res["lines"][t["l"]][t["s"]:t["e"]].lower())
|
|
s1 = next(s for s in bg.values() if "light" in s)
|
|
assert "bright" not in s1 and "sight" not in s1
|
|
# but within each stanza they still rhyme
|
|
assert "night" in s1
|
|
assert any({"bright", "sight"} <= s for s in bg.values())
|
|
|
|
|
|
def test_rhyme_char_start():
|
|
from app import rhyme_char_start
|
|
assert rhyme_char_start("tonight") == 4 # 'ight'
|
|
assert rhyme_char_start("write") == 2 # 'ite' (silent-e skipped)
|