Integrate and Enhance Linting with Ruff (#295)

Co-authored-by: Jason Liu <jxnl@users.noreply.github.com>
2026-06-05 06:46:15 +00:00 · 2023-12-29 04:21:22 +08:00
parent 8ac5518b10
commit b583309818
10 changed files with 127 additions and 29 deletions
@@ -1,4 +1,4 @@
-name: Continuous Integration
+name: MyPy

 on:
  push:
@@ -24,6 +24,7 @@ jobs:
        uses: actions/setup-python@v4
        with:
          python-version: 3.9
+          cache: "pip"
      - name: Install dev dependencies
        run: |
          python3 -m pip install --upgrade pip setuptools wheel
@@ -1,8 +1,42 @@
 name: Ruff
-on: [push, pull_request]
+
+on:
+  push:
+  pull_request:
+    branches: [ main ]
+
+env:
+  WORKING_DIRECTORY: "."
+  RUFF_OUTPUT_FILENAME: "ruff.log"
+  CUSTOM_FLAGS: ""
+  CUSTOM_PACKAGES: "instructor examples tests"
+
 jobs:
-  ruff:
-    runs-on: ubuntu-latest
+  Ruff:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
    steps:
-      - uses: actions/checkout@v3
-      - uses: chartboost/ruff-action@v1
+      - name: Checkout code
+        uses: actions/checkout@v3
+      - name: Environment setup
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+          cache: "pip"
+      - name: Install dev dependencies
+        run: |
+          python3 -m pip install --upgrade pip setuptools wheel
+          python3 -m pip install -r requirements.txt
+      - name: Run Continuous Integration Action
+        run: |
+          export CUSTOM_PACKAGES="${{ env.CUSTOM_PACKAGES }}" &&
+          export CUSTOM_FLAGS="${{ env.CUSTOM_FLAGS }}" &&
+          curl -sSL https://raw.githubusercontent.com/gao-hongnan/omniverse/continuous-integration/scripts/devops/continuous-integration/lint_ruff.sh |
+          bash | tee ${{ env.WORKING_DIRECTORY }}/${{ env.RUFF_OUTPUT_FILENAME }}
+      - name: Upload Artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: ruff-log
+          path: ${{ env.WORKING_DIRECTORY }}/${{ env.RUFF_OUTPUT_FILENAME }}
@@ -1,21 +1,21 @@
 repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    # Ruff version.
-    rev: v0.1.7
+    rev: v0.1.7 # Ruff version
    hooks:
-      # Run the linter.
-      - id: ruff
+      - id: ruff # Run the linter.
+        name: Run Linter Check (Ruff)
        args: [ --fix ]
-      # Run the formatter.
-      - id: ruff-format
+        files: ^(instructor|tests|examples)/
+      - id: ruff-format       # Run the formatter.
+        name: Run Formatter (Ruff)
  - repo: local
    hooks:
      - id: ci_type_mypy
-        name: Static Type Check
+        name: Run Type Check (Mypy)
        entry: >
            bash -c 'set -o pipefail;
            export CUSTOM_PACKAGES="instructor/cli/cli.py instructor/cli/usage.py" &&
-            export CUSTOM_FLAGS="--python-version=3.9 --color-output --no-pretty" &&
+            export CUSTOM_FLAGS="--python-version=3.9 --color-output --no-pretty --follow-imports=skip" &&
            curl -sSL https://raw.githubusercontent.com/gao-hongnan/omniverse/continuous-integration/scripts/devops/continuous-integration/type_mypy.sh |
            bash'
        language: system
@@ -0,0 +1,62 @@
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".git-rewrite",
+    ".hg",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "venv",
+]
+
+# Same as Black.
+line-length = 88
+output-format = "grouped"
+
+target-version = "py39"
+
+[lint]
+select = [
+  # bugbear rules
+  "B",
+  # remove unused imports
+  "F401",
+  # bare except statements
+  "E722",
+  # unused arguments
+  "ARG",
+]
+ignore = [
+  # mutable defaults
+  "B006",
+  "B018",
+]
+
+unfixable = [
+  # disable auto fix for print statements
+  "T201",
+  "T203",
+]
+ignore-init-module-imports = true
+
+[extend-per-file-ignores]
+"instructor/distil.py" = ["ARG002"]
+"tests/test_distil.py" = ["ARG001"]
+"tests/test_patch.py" = ["ARG001"]
+"examples/task_planner/task_planner_topological_sort.py" = ["ARG002"]
+"examples/citation_with_extraction/main.py" = ["ARG001"]
+
@@ -115,7 +115,7 @@ def summarize_article(article: str, summary_steps: int = 3):
        max_retries=2,
    )
    summary_chain.append(summary.summary)
-    for i in range(summary_steps):
+    for _i in range(summary_steps):
        new_summary: RewrittenSummary = client.chat.completions.create(
            model="gpt-4-0613",
            messages=[
@@ -127,7 +127,7 @@ def summarize_article(article: str, summary_steps: int = 3):

                Perform the following two tasks
                - Identify 1-3 informative entities from the following article which is missing from the previous summary
-                - Write a new denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities 
+                - Write a new denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities

                Guidelines
                - Make every word count: re-write the previous summary to improve flow and make space for additional entities
@@ -48,5 +48,5 @@ def distil_summarization(text: str) -> GeneratedSummary:
 with open("test.csv", "r") as file:
    reader = csv.reader(file)
    next(reader)  # Skip the header
-    for article, summary in reader:
+    for article, _summary in reader:
        distil_summarization(article)
@@ -125,7 +125,7 @@ def get_api_key(request: Request):

 # Route to handle SSE events and return users
@app.post("/extract", response_class=StreamingResponse)
-async def extract(question: Question, openai_key=Depends(get_api_key)):
+async def extract(question: Question, openai_key: str = Depends(get_api_key)):
    raise Exception(
        "The 'openai.api_key' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(api_key=openai_key)'"
    )
@@ -18,7 +18,7 @@ from jinja2 import Template
 from models import {{title}}

 import openai
-import instructor 
+import instructor

 instructor.patch()

@@ -59,7 +59,7 @@ def load_json_schema(json_schema_path: str) -> dict:
        with open(json_schema_path) as f:
            return json.load(f)
    except Exception as e:
-        raise ValueError(f"Failed to load JSON schema: {e}")
+        raise ValueError(f"Failed to load JSON schema: {e}") from e


 def generate_pydantic_model(json_schema_path: str):
@@ -1,10 +1,10 @@
 """
 Proof of Concept for a task planning and execution system using
-OpenAIs Functions and topological sort, based on the idea in 
+OpenAIs Functions and topological sort, based on the idea in
 query_planner_execution.py.py.

-Additionally: There are also cases where the "pure" recursive approach has advantages; 
-If subtasks for different parent tasks that start in parallel have different runtimes, 
+Additionally: There are also cases where the "pure" recursive approach has advantages;
+If subtasks for different parent tasks that start in parallel have different runtimes,
 we will wait unnecessarily with my current implementation.

 Added by Jan Philipp Harries / @jpdus
@@ -39,14 +39,14 @@ class Task(BaseModel):
    id: int = Field(..., description="Unique id of the task")
    task: str = Field(
        ...,
-        description="""Contains the task in text form. If there are multiple tasks, 
+        description="""Contains the task in text form. If there are multiple tasks,
        this task can only be executed when all dependant subtasks have been answered.""",
    )
    subtasks: List[int] = Field(
        default_factory=list,
-        description="""List of the IDs of subtasks that need to be answered before 
-        we can answer the main question. Use a subtask when anything may be unknown 
-        and we need to ask multiple questions to get the answer. 
+        description="""List of the IDs of subtasks that need to be answered before
+        we can answer the main question. Use a subtask when anything may be unknown
+        and we need to ask multiple questions to get the answer.
        Dependencies must only be other tasks.""",
    )

@@ -1,11 +1,12 @@
 mkdocs-material
 pillow
 cairosvg
-mkdocstrings 
+mkdocstrings
 mkdocstrings-python
 openai
 pydantic
-pytest
+pytest>=7.4.0,<8.0.0
+pytest-asyncio>=0.21.1
 aiohttp==3.9.1
 yarl==1.8.1
 frozenlist==1.3.1