Compound reading wins: mirror word groups dissolve into phrase families

four/door and inch/hinge were pure mirrors of «four-inch»/«door
hinge» yet their word fills painted over the orange-family phrase
color. A word group living entirely inside one family's phrases now
dissolves, so the phrase color paints the whole compound.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 12:21:16 -04:00
parent faa3522956
commit d7fed935f0
2 changed files with 35 additions and 1 deletions
+31
View File
@@ -980,6 +980,37 @@ def analyze(draft: Draft):
fused2.append(core)
raw_groups = fused2
# a word group living ENTIRELY inside one family's phrases is a
# mirror of that family — four/door inside «four-inch»/«door hinge»:
# the compound reading wins and the mirror dissolves, so the phrase
# color paints the words too
grouped_phrase_spans = []
for gi, g in enumerate(raw_groups):
for t in g["toks"]:
if " " in t["word"]:
grouped_phrase_spans.append(
(t["line"], t["start"], t["end"], gi))
if grouped_phrase_spans:
kept_groups = []
for gi, g in enumerate(raw_groups):
if any(" " in t["word"] for t in g["toks"]):
kept_groups.append(g)
continue
covers: set[int] = set()
all_covered = True
for t in g["toks"]:
f = {pg for (pl, ps, pe, pg) in grouped_phrase_spans
if pl == t["line"] and ps <= t["start"]
and t["end"] <= pe and pg != gi}
if not f:
all_covered = False
break
covers |= f
if all_covered and len(covers) == 1:
continue
kept_groups.append(g)
raw_groups = kept_groups
# stable colors: order groups by first appearance
raw_groups.sort(key=lambda g: min((t["line"], t["start"]) for t in g["toks"]))
groups_out = []
+4 -1
View File
@@ -364,8 +364,11 @@ def test_the_full_orange_verse():
"and ate porridge with George")
group_with(text, "orange", "storage", "porridge",
"four-inch", "door hinge")
# four/door/george keep their own thread (george is outside any
# phrase), but inch/hinge are pure mirrors of the grouped phrases —
# the compound reading wins and paints them orange
group_with(text, "four", "door", "george")
group_with(text, "inch", "hinge")
assert "hinge" not in highlighted(text) # only «door hinge» paints
def test_mosaic_triples_kanye_power():