mirror of
https://github.com/kennethreitz/rhymepad.org.git
synced 2026-06-11 17:08:33 +00:00
Schwa-heavy phrases pair only with parallel phrases
A phrase run with <2 full vowels can't barge into a word family
("Two bitches" -> the Tuna chain) but may pair with a structurally
parallel phrase (clock's ticking / stop tripping).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -709,10 +709,12 @@ def analyze(draft: Draft):
|
||||
for t in g["toks"]:
|
||||
if " " not in t["word"]:
|
||||
grouped_spans[t["line"]].append((t["start"], t["end"], gi))
|
||||
by_par = defaultdict(list)
|
||||
for p in phrases:
|
||||
if p["weak"]:
|
||||
continue
|
||||
vs = p["vowels"]
|
||||
full = sum(1 for v in vs if v not in REDUCED)
|
||||
if len(vs) >= 3 or (len(vs) == 2 and vs[1] not in REDUCED):
|
||||
key = _multi_key(vs)
|
||||
else:
|
||||
@@ -725,6 +727,12 @@ def analyze(draft: Draft):
|
||||
# anchor + schwa-tails ("Sleep is the") prove nothing
|
||||
if sum(v != "x" for v in key[2:].split()) < 2:
|
||||
continue
|
||||
if full < 2 and not key.startswith("m2:"):
|
||||
# a schwa-heavy run can't barge into a word family ("Two
|
||||
# bitches" -> the Tuna chain), but parallel phrases may pair
|
||||
# with each other (clock's ticking / stop tripping)
|
||||
by_par[(p["sid"], key)].append(p)
|
||||
continue
|
||||
spans = grouped_spans[p["line"]]
|
||||
a_gi = next((gi for s, e, gi in spans if s <= p["start"] < e), None)
|
||||
b_gi = next((gi for s, e, gi in spans if s < p["end"] <= e), None)
|
||||
@@ -746,6 +754,12 @@ def analyze(draft: Draft):
|
||||
continue
|
||||
attach_or_collect(p, key, by_multi, group_by_multi)
|
||||
|
||||
for (sid, key), toks in by_par.items():
|
||||
toks = [t for t in toks if id(t) not in grouped]
|
||||
if len(toks) >= 2 and len({t["word"].split()[0] for t in toks}) >= 2:
|
||||
raw_groups.append({"toks": toks, "slant": True, "key": key})
|
||||
grouped.update(id(t) for t in toks)
|
||||
|
||||
# biggest buckets claim first (a token may sit in several via its
|
||||
# anchors); distinctness by anchor word, so the phrase "fire burns"
|
||||
# can't pose as a different word than the "fire" it starts with
|
||||
|
||||
@@ -470,3 +470,10 @@ def test_weak_endings_never_split_rich_chains():
|
||||
"You no wan' start Weezy 'cause the F is for finisher")
|
||||
group_with(text, "charisma", "patricia", "militia",
|
||||
"commissioner", "finisher", "spritzer")
|
||||
|
||||
|
||||
def test_schwa_heavy_phrase_cannot_join_word_family():
|
||||
# "Two bitches" (UW + schwas) must not ride the Buddha/scuba chain
|
||||
text = ("doin' it like Buddha, scuba, Cuba\n"
|
||||
"Two bitches at the same time")
|
||||
assert "two bitches" not in highlighted(text)
|
||||
|
||||
Reference in New Issue
Block a user