From 62919b2cf25813d1fc366c89d62e77dbc55b3854 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 19 Feb 2024 21:02:00 -0500 Subject: [PATCH] docs: update code snippets and text across multiple documentation files (#450) --- README.md | 2 +- docs/concepts/caching.md | 4 +- docs/concepts/lists.md | 10 +- docs/concepts/maybe.md | 2 +- docs/concepts/models.md | 2 +- docs/concepts/parallel.md | 4 +- docs/concepts/partial.md | 142 +++---------- docs/concepts/raw_response.md | 25 ++- docs/concepts/reask_validation.md | 2 +- docs/concepts/types.md | 324 +++++++++++++++++++++++++++++- docs/concepts/usage.md | 2 +- docs/index.md | 30 +-- mkdocs.yml | 4 +- poetry.lock | 131 +++++++++++- pyproject.toml | 1 + tests/openai/docs/test_docs.py | 26 +-- 16 files changed, 535 insertions(+), 176 deletions(-) diff --git a/README.md b/README.md index b823ca1..126a085 100644 --- a/README.md +++ b/README.md @@ -200,7 +200,7 @@ except ValidationError as e: """ 1 validation error for QuestionAnswer answer - Assertion failed, The statement promotes objectionable behavior. [type=assertion_error, input_value='The meaning of life is to be evil and steal', input_type=str] + Assertion failed, The statement promotes objectionable behavior by encouraging evil and stealing, which goes against the rule of not saying objectionable things. [type=assertion_error, input_value='The meaning of life is to be evil and steal', input_type=str] For further information visit https://errors.pydantic.dev/2.6/v/assertion_error """ ``` diff --git a/docs/concepts/caching.md b/docs/concepts/caching.md index 35c91d6..25b519c 100644 --- a/docs/concepts/caching.md +++ b/docs/concepts/caching.md @@ -33,12 +33,12 @@ def extract(data) -> UserDetail: start = time.perf_counter() # (1) model = extract("Extract jason is 25 years old") print(f"Time taken: {time.perf_counter() - start}") -#> Time taken: 0.41433916706591845 +#> Time taken: 0.8392175831831992 start = time.perf_counter() model = extract("Extract jason is 25 years old") # (2) print(f"Time taken: {time.perf_counter() - start}") -#> Time taken: 1.7080456018447876e-06 +#> Time taken: 8.33999365568161e-07 ``` 1. Using `time.perf_counter()` to measure the time taken to run the function is better than using `time.time()` because it's more accurate and less susceptible to system clock changes. diff --git a/docs/concepts/lists.md b/docs/concepts/lists.md index 366d1e7..ad98ce7 100644 --- a/docs/concepts/lists.md +++ b/docs/concepts/lists.md @@ -79,10 +79,8 @@ users = client.chat.completions.create( ) for user in users: print(user) - #> ('tasks', [User(name='Jason', age=10), User(name='John', age=30)]) - -#> name="Jason" "age"=10 -#> name="John" "age"=10 + #> name='Jason' age=10 + #> name='John' age=30 ``` ## Streaming Tasks @@ -159,8 +157,8 @@ async def print_iterable_results(): ) async for m in model: print(m) - #> name='John Doe' age=32 - #> name='Jane Smith' age=28 + #> name='John Smith' age=30 + #> name='Mary Jane' age=28 import asyncio diff --git a/docs/concepts/maybe.md b/docs/concepts/maybe.md index f25f48f..183340b 100644 --- a/docs/concepts/maybe.md +++ b/docs/concepts/maybe.md @@ -89,7 +89,7 @@ print(user2.model_dump_json(indent=2)) { "result": null, "error": false, - "message": "Unknown user" + "message": null } """ ``` diff --git a/docs/concepts/models.md b/docs/concepts/models.md index 293eabe..7a6d1e4 100644 --- a/docs/concepts/models.md +++ b/docs/concepts/models.md @@ -150,7 +150,7 @@ class SearchQuery(BaseModel): def execute(self): print(f"Searching for {self.query} of type {self.query_type}") - #> Searching for cat of type image + #> Searching for cat pictures of type image return "Results for cat" diff --git a/docs/concepts/parallel.md b/docs/concepts/parallel.md index 4ff493d..2152533 100644 --- a/docs/concepts/parallel.md +++ b/docs/concepts/parallel.md @@ -44,9 +44,9 @@ function_calls = client.chat.completions.create( for fc in function_calls: print(fc) - #> location='Toronto' units='metric' + #> location='Toronto' units='imperial' #> location='Dallas' units='imperial' - #> query='super bowl winner' + #> query='who won the super bowl' ``` 1. Set the mode to `PARALLEL_TOOLS` to enable parallel function calling. diff --git a/docs/concepts/partial.md b/docs/concepts/partial.md index 8805c8e..7a543e7 100644 --- a/docs/concepts/partial.md +++ b/docs/concepts/partial.md @@ -99,9 +99,31 @@ for extraction in extraction_stream: console.clear() console.print(obj) -print(extraction.model_dump_json()) +print(extraction.model_dump_json(indent=2)) """ -{"users":[{"name":"John Doe","email":"johndoe@email.com","twitter":"@TechGuru44"},{"name":"Jane Smith","email":"janesmith@email.com","twitter":"@DigitalDiva88"},{"name":"Alex Johnson","email":"alexj@email.com","twitter":"@CodeMaster2023"}],"date":"2024-03-15","location":"Grand Tech Arena located at 4521 Innovation Drive","budget":50000,"deadline":"2024-02-20"} +{ + "users": [ + { + "name": "John Doe", + "email": "johndoe@email.com", + "twitter": "@TechGuru44" + }, + { + "name": "Jane Smith", + "email": "janesmith@email.com", + "twitter": "@DigitalDiva88" + }, + { + "name": "Alex Johnson", + "email": "alexj@email.com", + "twitter": "@CodeMaster2023" + } + ], + "date": "2024-03-15", + "location": "Grand Tech Arena located at 4521 Innovation Drive", + "budget": 50000, + "deadline": "2024-02-20" +} """ ``` @@ -118,10 +140,10 @@ import instructor from openai import AsyncOpenAI from pydantic import BaseModel -client = instructor.apatch(AsyncOpenAI()) +client = instructor.patch(AsyncOpenAI()) -class UserExtract(BaseModel): +class User(BaseModel): name: str age: int @@ -129,117 +151,19 @@ class UserExtract(BaseModel): async def print_partial_results(): user = await client.chat.completions.create( model="gpt-4-turbo-preview", - response_model=instructor.Partial[UserExtract], + response_model=instructor.Partial[User], max_retries=2, stream=True, messages=[ - {"role": "user", "content": "Jason Liu is 12 years old"}, + {"role": "user", "content": "Jason is 12 years old"}, ], ) async for m in user: - print(m.model_dump_json(indent=2)) - """ - { - "name": null, - "age": null - } - """ - """ - { - "name": "", - "age": null - } - """ - """ - { - "name": "Jason", - "age": null - } - """ - """ - { - "name": "Jason Liu", - "age": null - } - """ - """ - { - "name": "Jason Liu", - "age": 12 - } - """ - """ - { - "name": "", - "age": null - } - """ - """ - { - "name": "Jason", - "age": null - } - """ - """ - { - "name": "Jason Liu", - "age": null - } - """ - """ - { - "name": "Jason Liu", - "age": 12 - } - """ - """ - { - "name": "", - "age": null - } - """ - """ - { - "name": "Jason", - "age": null - } - """ - """ - { - "name": "Jason Liu", - "age": null - } - """ - """ - { - "name": "Jason Liu", - "age": 12 - } - """ - """ - { - "name": "", - "age": null - } - """ - """ - { - "name": "Jason", - "age": null - } - """ - """ - { - "name": "Jason Liu", - "age": null - } - """ - """ - { - "name": "Jason Liu", - "age": 12 - } - """ + print(m) + #> name=None age=None + #> name='' age=None + #> name='Jason' age=None + #> name='Jason' age=12 import asyncio diff --git a/docs/concepts/raw_response.md b/docs/concepts/raw_response.md index 13fa632..7248282 100644 --- a/docs/concepts/raw_response.md +++ b/docs/concepts/raw_response.md @@ -25,7 +25,7 @@ user: UserExtract = client.chat.completions.create( print(user._raw_response) """ ChatCompletion( - id='chatcmpl-8pOAsSOIHAmngMBBki3BLN3p552L0', + id='chatcmpl-8u9bsrmmf5YjZyfCtQymoZV8LK1qg', choices=[ Choice( finish_reason='stop', @@ -34,19 +34,24 @@ ChatCompletion( message=ChatCompletionMessage( content=None, role='assistant', - function_call=FunctionCall( - arguments='{\n "name": "Jason",\n "age": 25\n}', - name='UserExtract', - ), - tool_calls=None, + function_call=None, + tool_calls=[ + ChatCompletionMessageToolCall( + id='call_O5rpXf47YgXiYrYWv45yZUeM', + function=Function( + arguments='{"name":"Jason","age":25}', name='UserExtract' + ), + type='function', + ) + ], ), ) ], - created=1707258346, - model='gpt-3.5-turbo-0613', + created=1708394000, + model='gpt-3.5-turbo-0125', object='chat.completion', - system_fingerprint=None, - usage=CompletionUsage(completion_tokens=16, prompt_tokens=73, total_tokens=89), + system_fingerprint='fp_69829325d0', + usage=CompletionUsage(completion_tokens=9, prompt_tokens=82, total_tokens=91), ) """ ``` diff --git a/docs/concepts/reask_validation.md b/docs/concepts/reask_validation.md index 9f176c4..dc21efe 100644 --- a/docs/concepts/reask_validation.md +++ b/docs/concepts/reask_validation.md @@ -91,7 +91,7 @@ except ValidationError as e: """ 1 validation error for QuestionAnswer answer - Assertion failed, The statement promotes objectionable behavior. [type=assertion_error, input_value='The meaning of life is to be evil and steal', input_type=str] + Assertion failed, The statement promotes objectionable behavior by encouraging evil and theft. [type=assertion_error, input_value='The meaning of life is to be evil and steal', input_type=str] For further information visit https://errors.pydantic.dev/2.6/v/assertion_error """ ``` diff --git a/docs/concepts/types.md b/docs/concepts/types.md index 106d4cd..cd62fad 100644 --- a/docs/concepts/types.md +++ b/docs/concepts/types.md @@ -1,3 +1,323 @@ -!!! warning "This page is a work in progress" +# Support for Simple Types - This page is a work in progress. Check out [Pydantic's documentation](https://docs.pydantic.dev/latest/concepts/types/) +Aside from the recommended `pydantic.BaseModel`, and [Iterable](lists.md), and [Partial](partial.md), + +Instructor supports simple types like `str`, `int`, `float`, `bool`, `Union`, `Literal`, out of the box. You can use these types directly in your response models. + +To add more descriptions you can also use `typing.Annotated` to include more information about the type. + +## What happens behind the scenes? + +We will actually wrap the response model with a `pydantic.BaseModel` of the following form: + +```python +from typing import Annotated +from pydantic import create_model, Field, BaseModel + +typehint = Annotated[bool, Field(description="Sample Description")] + +model = create_model("Response", content=(typehint, ...), __base__=BaseModel) + +print(model.model_json_schema()) +""" +{ + 'properties': { + 'content': { + 'description': 'Sample Description', + 'title': 'Content', + 'type': 'boolean', + } + }, + 'title': 'Response', + 'type': 'object', +} +""" +``` + +## Primitive Types (str, int, float, bool) + +```python +import instructor +import openai + +client = instructor.patch(openai.OpenAI()) + +# Response model with simple types like str, int, float, bool +resp = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=bool, + messages=[ + { + "role": "user", + "content": "Is it true that Paris is the capital of France?", + }, + ], +) +assert resp is True, "Paris is the capital of France" +print(resp) +#> True +``` + +## Annotated + +Annotations can be used to add more information about the type. This can be useful for adding descriptions to the type, along with more complex information like field names, and more. + +```python +import instructor +import openai +from typing import Annotated +from pydantic import Field + +client = instructor.patch(openai.OpenAI()) + +UpperCaseStr = Annotated[str, Field(description="string must be upper case")] + +# Response model with simple types like str, int, float, bool +resp = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=UpperCaseStr, + messages=[ + { + "role": "user", + "content": "What is the capital of france?", + }, + ], +) +assert resp == "PARIS", "Paris is the capital of France" +print(resp) +#> PARIS +``` + +## Literal + +When doing simple classification Literals go quite well, they support literal of string, int, bool. + +```python +import instructor +import openai +from typing import Literal + +client = instructor.patch(openai.OpenAI()) + +resp = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=Literal["BILLING", "SHIPPING"], + messages=[ + { + "role": "user", + "content": "Classify the following messages: 'I am having trouble with my billing'", + }, + ], +) +assert resp == "BILLING" +print(resp) +#> BILLING +``` + +## Enum + +Enums are harder to get right without some addition promping but are useful if these are values that are shared across the application. + +```python +import instructor +import openai +from enum import Enum + + +class Label(str, Enum): + BILLING = "BILLING" + SHIPPING = "SHIPPING" + + +client = instructor.patch(openai.OpenAI()) + +resp = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=Label, + messages=[ + { + "role": "user", + "content": "Classify the following messages: 'I am having trouble with my billing'", + }, + ], +) +assert resp == Label.BILLING +print(resp) +#> BILLING +``` + +## List + +```python +import instructor +import openai +from typing import List + +client = instructor.patch(openai.OpenAI()) + +resp = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=List[int], + messages=[ + { + "role": "user", + "content": "Give me the first 5 prime numbers", + }, + ], +) + +assert resp == [2, 3, 5, 7, 11] +print(resp) +#> [2, 3, 5, 7, 11] +``` + +## Union + +Union is a great way to handle multiple types of responses, similar to multiple function calls but not limited to the function calling api, like in JSON_SCHEMA modes. + +```python +import instructor +import openai +from pydantic import BaseModel +from typing import Union + +client = instructor.patch(openai.OpenAI()) + + +class Add(BaseModel): + a: int + b: int + + +class Weather(BaseModel): + location: str + + +resp = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=Union[Add, Weather], + messages=[ + { + "role": "user", + "content": "What is 5 + 5?", + }, + ], +) + +assert resp == Add(a=5, b=5) +print(resp) +#> a=5 b=5 +``` + +## Complex Types + +### Pandas DataFrame + +This is a more complex example, where we use a custom type to convert markdown to a pandas DataFrame. + +```python +from io import StringIO +from typing import Annotated, Any +from pydantic import BeforeValidator, PlainSerializer, InstanceOf, WithJsonSchema +import pandas as pd +import instructor +import openai + + +def md_to_df(data: Any) -> Any: + # Convert markdown to DataFrame + if isinstance(data, str): + return ( + pd.read_csv( + StringIO(data), # Process data + sep="|", + index_col=1, + ) + .dropna(axis=1, how="all") + .iloc[1:] + .applymap(lambda x: x.strip()) + ) + return data + + +MarkdownDataFrame = Annotated[ + # Validates final type + InstanceOf[pd.DataFrame], + # Converts markdown to DataFrame + BeforeValidator(md_to_df), + # Converts DataFrame to markdown on model_dump_json + PlainSerializer(lambda df: df.to_markdown()), + # Adds a description to the type + WithJsonSchema( + { + "type": "string", + "description": """ + The markdown representation of the table, + each one should be tidy, do not try to join + tables that should be seperate""", + } + ), +] + + +client = instructor.patch(openai.OpenAI()) + +resp = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=MarkdownDataFrame, + messages=[ + { + "role": "user", + "content": "Jason is 20, Sarah is 30, and John is 40", + }, + ], +) + +assert isinstance(resp, pd.DataFrame) +print(resp) +""" + Age + Name +Jason 20 +Sarah 30 +John 40 +""" +``` + +### Lists of Unions + +Just like Unions we can use List of Unions to represent multiple types of responses. This will feel similar to the parallel function calls but not limited to the function calling api, like in JSON_SCHEMA modes. + +```python +import instructor +import openai +from pydantic import BaseModel +from typing import Union, List + +client = instructor.patch(openai.OpenAI()) + + +class Weather(BaseModel, frozen=True): + location: str + + +class Add(BaseModel, frozen=True): + a: int + b: int + + +resp = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=List[Union[Add, Weather]], + messages=[ + { + "role": "user", + "content": "Add 5 and 5, and also whats the weather in Toronto?", + }, + ], +) + +assert resp == [Add(a=5, b=5), Weather(location="Toronto")] +print(resp) +#> [Add(a=5, b=5), Weather(location='Toronto')] +``` diff --git a/docs/concepts/usage.md b/docs/concepts/usage.md index c1d068d..6eed360 100644 --- a/docs/concepts/usage.md +++ b/docs/concepts/usage.md @@ -23,5 +23,5 @@ user: UserExtract = client.chat.completions.create( ) print(user._raw_response.usage) -#> CompletionUsage(completion_tokens=16, prompt_tokens=73, total_tokens=89) +#> CompletionUsage(completion_tokens=9, prompt_tokens=82, total_tokens=91) ``` diff --git a/docs/index.md b/docs/index.md index e85ff78..ffe30a8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -127,7 +127,7 @@ print(response.model_dump_json(indent=2)) print(user._raw_response.model_dump_json(indent=2)) """ { - "id": "chatcmpl-8pOAKwq8OXZVvOCMw4dv713oKplLF", + "id": "chatcmpl-8u9e2TV3ehCgLsRxNLLeAbzpEmBuZ", "choices": [ { "finish_reason": "stop", @@ -136,22 +136,28 @@ print(response.model_dump_json(indent=2)) "message": { "content": null, "role": "assistant", - "function_call": { - "arguments": "{\n \"name\": \"Jason\",\n \"age\": 25\n}", - "name": "UserDetail" - }, - "tool_calls": null + "function_call": null, + "tool_calls": [ + { + "id": "call_3ZuQhfteTLEy7CUokjwnLBHr", + "function": { + "arguments": "{\"name\":\"Jason\",\"age\":25}", + "name": "UserDetail" + }, + "type": "function" + } + ] } } ], - "created": 1707258312, - "model": "gpt-3.5-turbo-0613", + "created": 1708394134, + "model": "gpt-3.5-turbo-0125", "object": "chat.completion", - "system_fingerprint": null, + "system_fingerprint": "fp_69829325d0", "usage": { - "completion_tokens": 16, - "prompt_tokens": 72, - "total_tokens": 88 + "completion_tokens": 9, + "prompt_tokens": 81, + "total_tokens": 90 } } """ diff --git a/mkdocs.yml b/mkdocs.yml index a2710c5..9c5e865 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -128,6 +128,8 @@ nav: - Philosophy: 'concepts/philosophy.md' - Models: 'concepts/models.md' - Fields: 'concepts/fields.md' + - Types: 'concepts/types.md' + - Validators: "concepts/reask_validation.md" - Usage Tokens: 'concepts/usage.md' - Missing: "concepts/maybe.md" - Patching: 'concepts/patching.md' @@ -138,9 +140,7 @@ nav: - Raw Response: 'concepts/raw_response.md' - FastAPI: 'concepts/fastapi.md' - Caching: 'concepts/caching.md' - - Validators: "concepts/reask_validation.md" - Distillation: "concepts/distillation.md" - - Types: 'concepts/types.md' - Union: 'concepts/union.md' - Alias: 'concepts/alias.md' - Enums: 'concepts/enums.md' diff --git a/poetry.lock b/poetry.lock index a169820..e210db8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1871,6 +1871,51 @@ files = [ {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, ] +[[package]] +name = "numpy" +version = "1.26.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, +] + [[package]] name = "openai" version = "1.12.0" @@ -1915,6 +1960,78 @@ files = [ {file = "paginate-0.5.6.tar.gz", hash = "sha256:5e6007b6a9398177a7e1648d04fdd9f8c9766a1a945bceac82f1929e8c78af2d"}, ] +[[package]] +name = "pandas" +version = "2.2.0" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8108ee1712bb4fa2c16981fba7e68b3f6ea330277f5ca34fa8d557e986a11670"}, + {file = "pandas-2.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:736da9ad4033aeab51d067fc3bd69a0ba36f5a60f66a527b3d72e2030e63280a"}, + {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e0b4fc3ddceb56ec8a287313bc22abe17ab0eb184069f08fc6a9352a769b18"}, + {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20404d2adefe92aed3b38da41d0847a143a09be982a31b85bc7dd565bdba0f4e"}, + {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ea3ee3f125032bfcade3a4cf85131ed064b4f8dd23e5ce6fa16473e48ebcaf5"}, + {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9670b3ac00a387620489dfc1bca66db47a787f4e55911f1293063a78b108df1"}, + {file = "pandas-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a946f210383c7e6d16312d30b238fd508d80d927014f3b33fb5b15c2f895430"}, + {file = "pandas-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a1b438fa26b208005c997e78672f1aa8138f67002e833312e6230f3e57fa87d5"}, + {file = "pandas-2.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ce2fbc8d9bf303ce54a476116165220a1fedf15985b09656b4b4275300e920b"}, + {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2707514a7bec41a4ab81f2ccce8b382961a29fbe9492eab1305bb075b2b1ff4f"}, + {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85793cbdc2d5bc32620dc8ffa715423f0c680dacacf55056ba13454a5be5de88"}, + {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cfd6c2491dc821b10c716ad6776e7ab311f7df5d16038d0b7458bc0b67dc10f3"}, + {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a146b9dcacc3123aa2b399df1a284de5f46287a4ab4fbfc237eac98a92ebcb71"}, + {file = "pandas-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbc1b53c0e1fdf16388c33c3cca160f798d38aea2978004dd3f4d3dec56454c9"}, + {file = "pandas-2.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a41d06f308a024981dcaa6c41f2f2be46a6b186b902c94c2674e8cb5c42985bc"}, + {file = "pandas-2.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:159205c99d7a5ce89ecfc37cb08ed179de7783737cea403b295b5eda8e9c56d1"}, + {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1e1f3861ea9132b32f2133788f3b14911b68102d562715d71bd0013bc45440"}, + {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:761cb99b42a69005dec2b08854fb1d4888fdf7b05db23a8c5a099e4b886a2106"}, + {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a20628faaf444da122b2a64b1e5360cde100ee6283ae8effa0d8745153809a2e"}, + {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f5be5d03ea2073627e7111f61b9f1f0d9625dc3c4d8dda72cc827b0c58a1d042"}, + {file = "pandas-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:a626795722d893ed6aacb64d2401d017ddc8a2341b49e0384ab9bf7112bdec30"}, + {file = "pandas-2.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9f66419d4a41132eb7e9a73dcec9486cf5019f52d90dd35547af11bc58f8637d"}, + {file = "pandas-2.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57abcaeda83fb80d447f28ab0cc7b32b13978f6f733875ebd1ed14f8fbc0f4ab"}, + {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e60f1f7dba3c2d5ca159e18c46a34e7ca7247a73b5dd1a22b6d59707ed6b899a"}, + {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb61dc8567b798b969bcc1fc964788f5a68214d333cade8319c7ab33e2b5d88a"}, + {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:52826b5f4ed658fa2b729264d63f6732b8b29949c7fd234510d57c61dbeadfcd"}, + {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bde2bc699dbd80d7bc7f9cab1e23a95c4375de615860ca089f34e7c64f4a8de7"}, + {file = "pandas-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3de918a754bbf2da2381e8a3dcc45eede8cd7775b047b923f9006d5f876802ae"}, + {file = "pandas-2.2.0.tar.gz", hash = "sha256:30b83f7c3eb217fb4d1b494a57a2fda5444f17834f5df2de6b2ffff68dc3c8e2"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + [[package]] name = "pandocfilters" version = "1.5.1" @@ -2372,6 +2489,17 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "pytz" +version = "2024.1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, +] + [[package]] name = "pywin32" version = "306" @@ -2420,7 +2548,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -3300,4 +3427,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "feb8e2f78640f5be62972a35f24179c09e206ff2d765c764eba7a390561705fd" +content-hash = "8a8af75e70138f0eb799bbb22def7e4d69e29a3b85b66f7d5b50c1096114cf79" diff --git a/pyproject.toml b/pyproject.toml index 32b562d..7fe6e9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ mkdocs-minify-plugin = "^0.8.0" fastapi = "^0.109.2" redis = "^5.0.1" diskcache = "^5.6.3" +pandas = "^2.2.0" [build-system] requires = ["poetry-core"] diff --git a/tests/openai/docs/test_docs.py b/tests/openai/docs/test_docs.py index cb6337c..ab388c3 100644 --- a/tests/openai/docs/test_docs.py +++ b/tests/openai/docs/test_docs.py @@ -12,27 +12,6 @@ def test_readme(example: CodeExample, eval_example: EvalExample): eval_example.run(example) -@pytest.mark.parametrize("example", find_examples("docs/index.md"), ids=str) -def test_index(example: CodeExample, eval_example: EvalExample): - if eval_example.update_examples: - eval_example.format(example) - eval_example.run_print_update(example) - else: - eval_example.lint(example) - eval_example.run(example) - - -@pytest.mark.skip("Blogs have too many small examples") -@pytest.mark.parametrize("example", find_examples("docs/blog"), ids=str) -def test_format_blog(example: CodeExample, eval_example: EvalExample): - if eval_example.update_examples: - eval_example.format(example) - eval_example.run_print_update(example) - else: - eval_example.lint(example) - eval_example.run(example) - - @pytest.mark.parametrize("example", find_examples("docs/concepts"), ids=str) def test_format_concepts(example: CodeExample, eval_example: EvalExample): if eval_example.update_examples: @@ -43,9 +22,8 @@ def test_format_concepts(example: CodeExample, eval_example: EvalExample): eval_example.run(example) -@pytest.mark.skip("Examples are too long") -@pytest.mark.parametrize("example", find_examples("docs/examples"), ids=str) -def test_format_examples(example: CodeExample, eval_example: EvalExample): +@pytest.mark.parametrize("example", find_examples("docs/index.md"), ids=str) +def test_index(example: CodeExample, eval_example: EvalExample): if eval_example.update_examples: eval_example.format(example) eval_example.run_print_update(example)