From a7cb3ef4ad0a03f46e675bfe62aaa3f9240baf24 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Jun 2026 12:39:13 -0400 Subject: [PATCH] Smart paste: normalize typographic punctuation, strip lyrics-site cruft Curly apostrophes broke phonetic tokenization (don't -> don + t); pasted and dropped-in text now normalizes quotes/nbsp and drops Genius-style cruft lines (Contributors, "... Lyrics" headers, Translations, You might also like, Source:, Embed). Co-Authored-By: Claude Opus 4.8 (1M context) --- static/index.html | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/static/index.html b/static/index.html index 5de2ff7..6f9e1ab 100644 --- a/static/index.html +++ b/static/index.html @@ -955,6 +955,46 @@ document.getElementById('exportBtn').addEventListener('click', async ()=>{ }); }); +/* ============================================================ + SMART PASTE — typographic quotes break phonetic tokenization + (don’t -> don + t), and lyrics sites ship cruft lines. Pasted + text gets normalized and de-crufted on the way in. +============================================================ */ +function cleanPaste(t){ + t = t.replace(/[\u2018\u2019\u02BC]/g, "'") + .replace(/[\u201C\u201D]/g, '"') + .replace(/\u00A0/g, ' '); + const out = []; + t.split('\n').forEach((l, i)=>{ + const s = l.trim(); + if(i < 4 && /^\d+\s*Contributors/i.test(s)) return; + if(i < 4 && /Lyrics$/.test(s) && s.length < 60) return; + if(/^Translations$/i.test(s)) return; + if(/^You might also like/i.test(s)) return; + if(/^Source:\s/i.test(s)) return; + if(/^\d*\s*Embed$/.test(s)) return; + out.push(l); + }); + return out.join('\n'); +} +editor.addEventListener('paste', e=>{ + const text = e.clipboardData && e.clipboardData.getData('text/plain'); + if(!text) return; + const cleaned = cleanPaste(text); + if(cleaned === text) return; + e.preventDefault(); + editor.setRangeText(cleaned, editor.selectionStart, editor.selectionEnd, 'end'); + render(); analyzeSoon(); +}); +lookupInput.addEventListener('paste', e=>{ + const text = e.clipboardData && e.clipboardData.getData('text/plain'); + if(text && /[\u2018\u2019]/.test(text)){ + e.preventDefault(); + lookupInput.setRangeText(text.replace(/[\u2018\u2019]/g, "'"), + lookupInput.selectionStart, lookupInput.selectionEnd, 'end'); + } +}); + /* ============================================================ FILES IN, FILES OUT — drafts shouldn't be hostage to localStorage ============================================================ */ @@ -979,7 +1019,7 @@ document.addEventListener('drop', async e=>{ const files = [...(e.dataTransfer?.files || [])].slice(0, 8); for(const f of files){ if(f.size > 1024 * 1024) continue; - const text = await f.text(); + const text = cleanPaste(await f.text()); if(!text.trim()) continue; const id = newId(); docsState.docs.push({id, title: titleOf(text)});