From a62dce399a8ea14dd25b71450d303d6bcd1ad3d8 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Jun 2026 01:53:15 -0400 Subject: [PATCH] CI: pre-download NLTK data for g2p-en MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same pre-warm as the Dockerfile — without the tagger the g2p fallback dies and the OOV-word tests fail. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 56e817d..3ba3c2a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,4 +14,9 @@ jobs: with: enable-cache: true - run: uv sync --frozen + # same NLTK pre-warm as the Dockerfile: g2p-en needs the tagger + # under both its old and new (nltk>=3.9) names + - run: | + uv run python -c "import nltk; + [nltk.download(p, quiet=True) for p in ('averaged_perceptron_tagger','averaged_perceptron_tagger_eng','cmudict')]" - run: uv run pytest