diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 875a7c5..3e76d25 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -1,4 +1,4 @@ -name: Continuous Integration +name: MyPy on: push: @@ -24,6 +24,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.9 + cache: "pip" - name: Install dev dependencies run: | python3 -m pip install --upgrade pip setuptools wheel diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index e8133f2..5b8cf5e 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -1,8 +1,42 @@ name: Ruff -on: [push, pull_request] + +on: + push: + pull_request: + branches: [ main ] + +env: + WORKING_DIRECTORY: "." + RUFF_OUTPUT_FILENAME: "ruff.log" + CUSTOM_FLAGS: "" + CUSTOM_PACKAGES: "instructor examples tests" + jobs: - ruff: - runs-on: ubuntu-latest + Ruff: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] steps: - - uses: actions/checkout@v3 - - uses: chartboost/ruff-action@v1 \ No newline at end of file + - name: Checkout code + uses: actions/checkout@v3 + - name: Environment setup + uses: actions/setup-python@v4 + with: + python-version: 3.9 + cache: "pip" + - name: Install dev dependencies + run: | + python3 -m pip install --upgrade pip setuptools wheel + python3 -m pip install -r requirements.txt + - name: Run Continuous Integration Action + run: | + export CUSTOM_PACKAGES="${{ env.CUSTOM_PACKAGES }}" && + export CUSTOM_FLAGS="${{ env.CUSTOM_FLAGS }}" && + curl -sSL https://raw.githubusercontent.com/gao-hongnan/omniverse/continuous-integration/scripts/devops/continuous-integration/lint_ruff.sh | + bash | tee ${{ env.WORKING_DIRECTORY }}/${{ env.RUFF_OUTPUT_FILENAME }} + - name: Upload Artifacts + uses: actions/upload-artifact@v3 + with: + name: ruff-log + path: ${{ env.WORKING_DIRECTORY }}/${{ env.RUFF_OUTPUT_FILENAME }} \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 77331c0..98dae55 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,21 +1,21 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.1.7 + rev: v0.1.7 # Ruff version hooks: - # Run the linter. - - id: ruff + - id: ruff # Run the linter. + name: Run Linter Check (Ruff) args: [ --fix ] - # Run the formatter. - - id: ruff-format + files: ^(instructor|tests|examples)/ + - id: ruff-format # Run the formatter. + name: Run Formatter (Ruff) - repo: local hooks: - id: ci_type_mypy - name: Static Type Check + name: Run Type Check (Mypy) entry: > bash -c 'set -o pipefail; export CUSTOM_PACKAGES="instructor/cli/cli.py instructor/cli/usage.py" && - export CUSTOM_FLAGS="--python-version=3.9 --color-output --no-pretty" && + export CUSTOM_FLAGS="--python-version=3.9 --color-output --no-pretty --follow-imports=skip" && curl -sSL https://raw.githubusercontent.com/gao-hongnan/omniverse/continuous-integration/scripts/devops/continuous-integration/type_mypy.sh | bash' language: system diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..e6a022d --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,62 @@ +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", +] + +# Same as Black. +line-length = 88 +output-format = "grouped" + +target-version = "py39" + +[lint] +select = [ + # bugbear rules + "B", + # remove unused imports + "F401", + # bare except statements + "E722", + # unused arguments + "ARG", +] +ignore = [ + # mutable defaults + "B006", + "B018", +] + +unfixable = [ + # disable auto fix for print statements + "T201", + "T203", +] +ignore-init-module-imports = true + +[extend-per-file-ignores] +"instructor/distil.py" = ["ARG002"] +"tests/test_distil.py" = ["ARG001"] +"tests/test_patch.py" = ["ARG001"] +"examples/task_planner/task_planner_topological_sort.py" = ["ARG002"] +"examples/citation_with_extraction/main.py" = ["ARG001"] + diff --git a/examples/chain-of-density/chain_of_density.py b/examples/chain-of-density/chain_of_density.py index 706373a..a0d73ed 100644 --- a/examples/chain-of-density/chain_of_density.py +++ b/examples/chain-of-density/chain_of_density.py @@ -115,7 +115,7 @@ def summarize_article(article: str, summary_steps: int = 3): max_retries=2, ) summary_chain.append(summary.summary) - for i in range(summary_steps): + for _i in range(summary_steps): new_summary: RewrittenSummary = client.chat.completions.create( model="gpt-4-0613", messages=[ @@ -127,7 +127,7 @@ def summarize_article(article: str, summary_steps: int = 3): Perform the following two tasks - Identify 1-3 informative entities from the following article which is missing from the previous summary - - Write a new denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities + - Write a new denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities Guidelines - Make every word count: re-write the previous summary to improve flow and make space for additional entities diff --git a/examples/chain-of-density/finetune.py b/examples/chain-of-density/finetune.py index 85dbce5..17ba8fe 100644 --- a/examples/chain-of-density/finetune.py +++ b/examples/chain-of-density/finetune.py @@ -48,5 +48,5 @@ def distil_summarization(text: str) -> GeneratedSummary: with open("test.csv", "r") as file: reader = csv.reader(file) next(reader) # Skip the header - for article, summary in reader: + for article, _summary in reader: distil_summarization(article) diff --git a/examples/citation_with_extraction/main.py b/examples/citation_with_extraction/main.py index ebc7ecb..342ac4a 100644 --- a/examples/citation_with_extraction/main.py +++ b/examples/citation_with_extraction/main.py @@ -125,7 +125,7 @@ def get_api_key(request: Request): # Route to handle SSE events and return users @app.post("/extract", response_class=StreamingResponse) -async def extract(question: Question, openai_key=Depends(get_api_key)): +async def extract(question: Question, openai_key: str = Depends(get_api_key)): raise Exception( "The 'openai.api_key' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(api_key=openai_key)'" ) diff --git a/examples/codegen-from-schema/create_fastapi_app.py b/examples/codegen-from-schema/create_fastapi_app.py index 205ee22..b3ec85c 100644 --- a/examples/codegen-from-schema/create_fastapi_app.py +++ b/examples/codegen-from-schema/create_fastapi_app.py @@ -18,7 +18,7 @@ from jinja2 import Template from models import {{title}} import openai -import instructor +import instructor instructor.patch() @@ -59,7 +59,7 @@ def load_json_schema(json_schema_path: str) -> dict: with open(json_schema_path) as f: return json.load(f) except Exception as e: - raise ValueError(f"Failed to load JSON schema: {e}") + raise ValueError(f"Failed to load JSON schema: {e}") from e def generate_pydantic_model(json_schema_path: str): diff --git a/examples/task_planner/task_planner_topological_sort.py b/examples/task_planner/task_planner_topological_sort.py index 78b0ed4..064d30f 100644 --- a/examples/task_planner/task_planner_topological_sort.py +++ b/examples/task_planner/task_planner_topological_sort.py @@ -1,10 +1,10 @@ """ Proof of Concept for a task planning and execution system using -OpenAIs Functions and topological sort, based on the idea in +OpenAIs Functions and topological sort, based on the idea in query_planner_execution.py.py. -Additionally: There are also cases where the "pure" recursive approach has advantages; -If subtasks for different parent tasks that start in parallel have different runtimes, +Additionally: There are also cases where the "pure" recursive approach has advantages; +If subtasks for different parent tasks that start in parallel have different runtimes, we will wait unnecessarily with my current implementation. Added by Jan Philipp Harries / @jpdus @@ -39,14 +39,14 @@ class Task(BaseModel): id: int = Field(..., description="Unique id of the task") task: str = Field( ..., - description="""Contains the task in text form. If there are multiple tasks, + description="""Contains the task in text form. If there are multiple tasks, this task can only be executed when all dependant subtasks have been answered.""", ) subtasks: List[int] = Field( default_factory=list, - description="""List of the IDs of subtasks that need to be answered before - we can answer the main question. Use a subtask when anything may be unknown - and we need to ask multiple questions to get the answer. + description="""List of the IDs of subtasks that need to be answered before + we can answer the main question. Use a subtask when anything may be unknown + and we need to ask multiple questions to get the answer. Dependencies must only be other tasks.""", ) diff --git a/requirements-doc.txt b/requirements-doc.txt index d47fc80..c1cc2a7 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -1,11 +1,12 @@ mkdocs-material pillow cairosvg -mkdocstrings +mkdocstrings mkdocstrings-python openai pydantic -pytest +pytest>=7.4.0,<8.0.0 +pytest-asyncio>=0.21.1 aiohttp==3.9.1 yarl==1.8.1 frozenlist==1.3.1