Smart paste: normalize typographic punctuation, strip lyrics-site cruft

Curly apostrophes broke phonetic tokenization (don't -> don + t);
pasted and dropped-in text now normalizes quotes/nbsp and drops
Genius-style cruft lines (Contributors, "... Lyrics" headers,
Translations, You might also like, Source:, Embed).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 12:39:13 -04:00
parent e865fc17e9
commit a7cb3ef4ad
+41 -1
View File
@@ -955,6 +955,46 @@ document.getElementById('exportBtn').addEventListener('click', async ()=>{
});
});
/* ============================================================
SMART PASTE — typographic quotes break phonetic tokenization
(dont -> don + t), and lyrics sites ship cruft lines. Pasted
text gets normalized and de-crufted on the way in.
============================================================ */
function cleanPaste(t){
t = t.replace(/[\u2018\u2019\u02BC]/g, "'")
.replace(/[\u201C\u201D]/g, '"')
.replace(/\u00A0/g, ' ');
const out = [];
t.split('\n').forEach((l, i)=>{
const s = l.trim();
if(i < 4 && /^\d+\s*Contributors/i.test(s)) return;
if(i < 4 && /Lyrics$/.test(s) && s.length < 60) return;
if(/^Translations$/i.test(s)) return;
if(/^You might also like/i.test(s)) return;
if(/^Source:\s/i.test(s)) return;
if(/^\d*\s*Embed$/.test(s)) return;
out.push(l);
});
return out.join('\n');
}
editor.addEventListener('paste', e=>{
const text = e.clipboardData && e.clipboardData.getData('text/plain');
if(!text) return;
const cleaned = cleanPaste(text);
if(cleaned === text) return;
e.preventDefault();
editor.setRangeText(cleaned, editor.selectionStart, editor.selectionEnd, 'end');
render(); analyzeSoon();
});
lookupInput.addEventListener('paste', e=>{
const text = e.clipboardData && e.clipboardData.getData('text/plain');
if(text && /[\u2018\u2019]/.test(text)){
e.preventDefault();
lookupInput.setRangeText(text.replace(/[\u2018\u2019]/g, "'"),
lookupInput.selectionStart, lookupInput.selectionEnd, 'end');
}
});
/* ============================================================
FILES IN, FILES OUT — drafts shouldn't be hostage to localStorage
============================================================ */
@@ -979,7 +1019,7 @@ document.addEventListener('drop', async e=>{
const files = [...(e.dataTransfer?.files || [])].slice(0, 8);
for(const f of files){
if(f.size > 1024 * 1024) continue;
const text = await f.text();
const text = cleanPaste(await f.text());
if(!text.trim()) continue;
const id = newId();
docsState.docs.push({id, title: titleOf(text)});