"""Regression suite for RhymePad's rhyme detection. Every case here came from a real verse that exposed a gap during development — Lil Wayne ("6 Foot 7 Foot"), Eminem ("Godzilla"), Kanye ("Power"), and assorted hand-rolled edge cases. """ from collections import defaultdict import pytest from app import Draft, analyze, lookup def groups(text: str) -> list[set[str]]: """Rhyme groups as sets of (lowercased) highlighted spans.""" res = analyze(Draft(text=text)) by_gid = defaultdict(set) for t in res["tokens"]: by_gid[t["g"]].add(res["lines"][t["l"]][t["s"]:t["e"]].lower()) return list(by_gid.values()) def group_with(text: str, *words: str) -> set[str]: """The group containing words[0]; asserts all words share it.""" for g in groups(text): if words[0].lower() in g: missing = {w.lower() for w in words} - g assert not missing, f"{missing} not grouped with {words[0]!r} in {g}" return g pytest.fail(f"{words[0]!r} not highlighted at all") def highlighted(text: str) -> set[str]: gs = groups(text) return set().union(*gs) if gs else set() def scheme(text: str) -> str: return analyze(Draft(text=text))["stanzas"][0]["scheme"] # ---------------------------------------------------------------- perfect def test_perfect_end_rhymes(): text = ("I keep the cadence tucked beneath my tongue tonight\n" "the city hums in amber under fading light\n" "I trace a melody that never quite takes flight\n" "and let the silence answer everything I write") group_with(text, "tonight", "light", "flight", "write", "quite") assert scheme(text) == "aaaa" def test_internal_perfect_rhymes(): text = ("no time to waste, I climb in haste and find my place\n" "the rhythm shows, the river flows, wherever it goes") group_with(text, "time", "climb") group_with(text, "waste", "haste") group_with(text, "shows", "flows", "goes") def test_say_weigh(): group_with("say this weigh that", "say", "weigh") def test_abab_scheme(): assert scheme("the cat\nso blue\na hat\nso true") == "abab" # ------------------------------------------------------------------ slant def test_end_slant_joins_existing_perfect_group(): # "time" (AY M) slant-joins the mind/find (AY N D) perfect group text = ("the placement of creation\n" "eternal precipice of mind\n" "where it goes\n" "i can't find the time") group_with(text, "mind", "find", "time") group_with(text, "placement", "creation") assert scheme(text) == "abcb" def test_kanye_hours_power(): text = ("The clock's ticking, I just count the hours\n" "Stop tripping, I'm tripping off the power") group_with(text, "hours", "power") group_with(text, "ticking", "tripping") # the multi-word pass catches the full phrase pair too group_with(text, "clock's ticking", "stop tripping") assert scheme(text) == "aa" def test_reduced_vowel_merge_vodka_lasagna(): # AA + schwa, with different reduced vowels — the Wayne chain text = ("vodka with a spritzer\n" "real Gs move in silence like lasagna") group_with(text, "vodka", "lasagna") def test_consonance_bliss_exist(): # vowel + first coda consonant (IH S), full codas differ; # "whisps" is OOV and needs the g2p model group_with("bliss whisps in darker nights exist", "bliss", "whisps", "exist") # ------------------------------------------------- unknown words / repairs def test_compound_split_heresay_neigh(): text = ("the rhythm and the gotcha of the heresay\n" "neigh") group_with(text, "heresay", "neigh") def test_multiword_orange_door_hinge_porage(): text = ("an orange door hinge\n" "porage") group_with(text, "orange", "door hinge", "porage") def test_cmu_variant_get_rhymes_with_hit(): # CMU's secondary pronunciation of "get" is "git" group_with("take the hit\nthat's what you get", "hit", "get") # ------------------------------------------------------------------ noise def test_no_transitive_chaining(): # peanuts rhymes with what's via one pronunciation (AH T S) and with # people via another (IY + schwa) — the two groups must stay separate text = ("So misunderstood, but what's a world without enigma?\n" "You niggas are gelatin, peanuts to an elephant\n" "Sleep is the cousin, what a fuckin' family picture\n" "People say I'm borderline crazy, sorta, kinda\n" "Woman of my dreams, I don't sleep, so I can't find her") whats = group_with(text, "what's", "peanuts") assert "sleep" not in whats and "people" not in whats sleep = group_with(text, "sleep", "people") assert "what's" not in sleep def test_filler_words_not_highlighted_midline(): text = ("I'm a monster and I'm heartless, I'm the king\n" "and I'm the sting") assert "i'm" not in highlighted(text) group_with(text, "king", "sting") def test_em_counts_at_line_end_only(): text = ("Fill 'em with the venom and eliminate 'em\n" "I told 'em owls fly, I Minute Maid 'em") assert scheme(text) == "aa" # ...ate 'em / Maid 'em # but the mid-line 'em's stay dark res = analyze(Draft(text=text)) em_tokens = [t for t in res["tokens"] if res["lines"][t["l"]][t["s"]:t["e"]].lower() == "em"] assert all(t["end"] for t in em_tokens) def test_repeated_word_midline_is_not_a_rhyme(): assert highlighted("the fire burns the fire down") == set() # ----------------------------------------------------------------- lookup def test_lookup_rhymes(): words = {w["word"] for w in lookup("light", mode="rhyme")["words"]} assert "night" in words and "tonight" in words assert "light" not in words def test_lookup_near_rhymes_exclude_perfect(): words = {w["word"] for w in lookup("hold", mode="near")["words"]} assert "home" in words # shared vowel, different coda assert "gold" not in words # perfect rhymes live in the other mode # ------------------------------------------------------------------ shape def test_stanzas_and_legend(): res = analyze(Draft(text="the cat\nso blue\na hat\nso true")) (st,) = res["stanzas"] assert st["lines"] == [0, 1, 2, 3] a, b = st["legend"][0], st["legend"][1] assert a["ch"] == "a" and b["ch"] == "b" assert a["color"] is not None and b["color"] is not None assert a["color"] != b["color"] def test_blank_lines_split_stanzas(): res = analyze(Draft(text="the cat\na hat\n\nso blue\nso true")) assert [s["scheme"] for s in res["stanzas"]] == ["aa", "aa"] # ------------------------------------------------------------------ meter def meter_of(line: str) -> dict: res = analyze(Draft(text=line)) return res["meter"][0] def test_iambic_pentameter(): m = meter_of("Shall I compare thee to a summer's day") assert m["syl"] == 10 assert m["label"] == "iambic pentameter" def test_trochaic_tetrameter(): m = meter_of("Tyger Tyger burning bright") assert m["syl"] == 7 # catalectic — final unstressed syllable dropped assert m["label"] == "trochaic tetrameter" def test_syllables_always_reported(): m = meter_of("the city hums in amber under fading light") assert m["syl"] == 12 assert m["stress"] def test_trailing_schwa_trimmed_militia_commissioner(): # commissioner has one extra reduced syllable (IH-AH-ER vs IH-AH) text = ("Swagger down pat, call my shit Patricia\n" "Young Money militia, and I am the commissioner") group_with(text, "patricia", "militia", "commissioner") def test_meter_coaching_flags_outlier_line(): text = ("I walk the lonely road tonight\n" "I hold the heavy stone of light\n" "I call the fading stars to fight\n" "and everybody wonders where the time has gone") res = analyze(Draft(text=text)) flagged = [m["l"] for m in res["meter"] if m["off"]] assert flagged == [3] assert res["meter"][3]["target"] == 8 def test_meter_coaching_needs_a_pattern_to_break(): # two lines of wildly different lengths: no dominant pattern, no flags res = analyze(Draft(text="short line here\na very much longer line that runs on and on")) assert not any(m["off"] for m in res["meter"]) # ------------------------------------------------------------------ layers def test_layered_phrase_rides_second_group(): # Em's syrup knot: "up" fills with the cup chain, while "stir up" # rides the syrup/burden group as a second layer text = ("Maybe your cup is full of syrup and lean\n" "fill the cup up to the brim\n" "Maybe I need to stir up shit\n" "the burden is mine") syrup = group_with(text, "syrup", "burden", "stir up") cups = group_with(text, "cup", "up") assert "stir up" not in cups and "syrup" not in cups def test_weak_phrase_attaches_but_never_founds(): # "were up" (stopword tail) joins an existing ER group... text = ("Maybe your cup is full of syrup and lean\n" "fill the cup up to the brim\n" "shake the world up if it were up to me\n" "the burden is mine") group_with(text, "syrup", "burden", "were up") # ...but two weak phrases alone can't create a group assert "were up" not in highlighted("it were up to him\nit were up to her") def test_annotation_lines_ignored(): text = ("[Chorus]\n" "the cat\n" "a hat\n" "# note: tighten this verse\n" "(yeah)\n" "so blue\n" "so true") res = analyze(Draft(text=text)) assert "chorus" not in highlighted(text) assert "note" not in highlighted(text) # annotations don't split the stanza or earn scheme letters (st,) = res["stanzas"] assert st["lines"] == [1, 2, 5, 6] assert st["scheme"] == "aabb" def test_perfect_subgroup_fuses_with_slant_family(): # shoulder/older/colder (perfect, OW L D ER) live inside the bigger # OW-schwa family — one color for the whole column text = ("Skunk, bug, soldier\n" "Tongue, shrub, shoulder\n" "One month older\n" "Sponge, mob, colder\n" "Nun, rug, holster\n" "Lug nut, coaster\n" "Lung, jug, roaster\n" "Young Thug poster\n" "Unplugged toaster") group_with(text, "soldier", "shoulder", "older", "colder", "holster", "coaster", "roaster", "poster", "toaster")