diff --git a/app.py b/app.py index 04a2b8a..274fdb8 100644 --- a/app.py +++ b/app.py @@ -551,13 +551,18 @@ def analyze(draft: Draft): "weak": False, }) + # words the draft leans on as refrain/filler (4+ uses) stop lighting + # up mid-line — their line-end uses still count + counts = Counter(t["word"].lower() for t in tokens) + refrain = {w for w, c in counts.items() if c >= 4} + # pass 1: perfect rhymes (shared rime), anywhere in a line — this is # what catches internal rhymes. Phrases compete too, so "stir up" # perfect-rhymes "syrup" even while its "up" rhymes with "cup". by_rime = defaultdict(list) for t in tokens: w = t["word"].lower() - if not t["is_end"] and (w in STOPWORDS or len(w) < 2): + if not t["is_end"] and (w in STOPWORDS or w in refrain or len(w) < 2): continue for key in rime_keys(t["word"]): by_rime[key].append(t) @@ -689,7 +694,7 @@ def analyze(draft: Draft): if id(t) in grouped: continue w = t["word"].lower() - if not t["is_end"] and (w in STOPWORDS or len(w) < 2): + if not t["is_end"] and (w in STOPWORDS or w in refrain or len(w) < 2): continue keys = multi_keys(t["word"]) for key in keys: # join an existing family if any anchor fits @@ -796,15 +801,29 @@ def analyze(draft: Draft): for s, e in phrase_spans[t["line"]]): continue w = t["word"].lower() - if not t["is_end"] and (w in STOPWORDS or len(w) < 2): + if not t["is_end"] and (w in STOPWORDS or w in refrain or len(w) < 2): continue key = vc_key(t["word"]) if key: attach_or_collect(t, key, by_vc, group_by_vc) + def flush_cluster(cluster, key): + if (len(cluster) >= 2 + and len({t["word"].lower() for t in cluster}) >= 2): + raw_groups.append({"toks": list(cluster), "slant": True, + "key": key}) + grouped.update(id(t) for t in cluster) for (sid, key), toks in by_vc.items(): - if len(toks) >= 2 and len({t["word"].lower() for t in toks}) >= 2: - raw_groups.append({"toks": toks, "slant": True, "key": key}) - grouped.update(id(t) for t in toks) + # consonance is local evidence: a coda match ten lines away isn't + # a rhyme, so clusters break on gaps of more than two lines + toks = sorted((t for t in toks if id(t) not in grouped), + key=lambda t: t["line"]) + cluster = [] + for t in toks: + if cluster and t["line"] - cluster[-1]["line"] > 2: + flush_cluster(cluster, key) + cluster = [] + cluster.append(t) + flush_cluster(cluster, key) # pass 5: weak endings, the LAST resort — infancy rhymes see on its # unstressed final syllable, but only after every richer reading diff --git a/tests/test_rhymes.py b/tests/test_rhymes.py index dcf6955..a7dff9a 100644 --- a/tests/test_rhymes.py +++ b/tests/test_rhymes.py @@ -477,3 +477,31 @@ def test_schwa_heavy_phrase_cannot_join_word_family(): text = ("doin' it like Buddha, scuba, Cuba\n" "Two bitches at the same time") assert "two bitches" not in highlighted(text) + + +def test_swimmers_twist_her_finisher_one_family(): + text = ("Young Money militia and I am the commissioner\n" + "You no wan' start Weezy cause the F is for finisher\n" + "Two bitches at the same time, synchronized swimmers\n" + "Got the girl twisted cause she open when you twist her\n" + "Never met the bitch, but I fuck her like I missed her") + group_with(text, "swimmers", "finisher", "commissioner", + "twist her", "missed her") + + +def test_refrain_words_go_dark_midline(): + text = ("the bitch is cold\n" + "the bitch is old\n" + "a bitch like this\n" + "a bitch like that\n" + "bitch got gold") + group_with(text, "cold", "old", "gold") + assert "bitch" not in highlighted(text) + + +def test_consonance_is_local(): + # speech (line 1) and keep (line 9): a coda match eight lines apart + # is not a rhyme + filler = "la la la\n" * 7 + text = "I gave a dumber speech\n" + filler + "I always keep my word" + assert "keep" not in highlighted(text)