From b2a38ffe105dcfd4fb7f65f1bdcbadf2459a93aa Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 13 Dec 2023 00:05:54 -0500 Subject: [PATCH] Support JSON_OBJECT mode from Anyscale (#275) --- instructor/cli/usage.py | 2 +- instructor/dsl/multitask.py | 4 +- instructor/dsl/validators.py | 6 +-- instructor/function_calls.py | 17 ++----- instructor/patch.py | 34 +++++++------- tests/openai/{evals => }/conftest.py | 17 +++++++ .../openai/evals/test_classification_enums.py | 3 +- .../evals/test_classification_literals.py | 3 +- tests/openai/evals/test_entities.py | 5 +-- tests/openai/evals/test_extract_users.py | 3 +- tests/openai/test_modes.py | 21 +++++---- tests/openai/test_multitask.py | 20 ++++----- tests/openai/test_patch.py | 44 +++++++++---------- tests/openai/test_validators.py | 16 ++++--- tests/openai/util.py | 18 ++++++++ 15 files changed, 115 insertions(+), 98 deletions(-) rename tests/openai/{evals => }/conftest.py (61%) create mode 100644 tests/openai/util.py diff --git a/instructor/cli/usage.py b/instructor/cli/usage.py index cf2f64c..6d58b48 100644 --- a/instructor/cli/usage.py +++ b/instructor/cli/usage.py @@ -146,7 +146,7 @@ def group_and_sum_by_date_and_snapshot(usage_data: List[Dict[str, Any]]) -> Tabl return table -@app.command(help="Displays OpenAI API usage data for the past N days.") # type: ignore +@app.command(help="Displays OpenAI API usage data for the past N days.") # type: ignore def list( n: int = typer.Option(0, help="Number of days."), ) -> None: diff --git a/instructor/dsl/multitask.py b/instructor/dsl/multitask.py index 2e0552d..727936d 100644 --- a/instructor/dsl/multitask.py +++ b/instructor/dsl/multitask.py @@ -59,7 +59,7 @@ class MultiTaskBase: if mode == Mode.FUNCTIONS: if json_chunk := chunk.choices[0].delta.function_call.arguments: yield json_chunk - elif mode in {Mode.JSON, Mode.MD_JSON}: + elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}: if json_chunk := chunk.choices[0].delta.content: yield json_chunk elif mode == Mode.TOOLS: @@ -79,7 +79,7 @@ class MultiTaskBase: if mode == Mode.FUNCTIONS: if json_chunk := chunk.choices[0].delta.function_call.arguments: yield json_chunk - elif mode in {Mode.JSON, Mode.MD_JSON}: + elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}: if json_chunk := chunk.choices[0].delta.content: yield json_chunk elif mode == Mode.TOOLS: diff --git a/instructor/dsl/validators.py b/instructor/dsl/validators.py index c5f7739..7b309c9 100644 --- a/instructor/dsl/validators.py +++ b/instructor/dsl/validators.py @@ -67,12 +67,9 @@ def llm_validator( openai_client (OpenAI): The OpenAI client to use (default: None) """ - openai_client = openai_client or OpenAI() - def llm(v): resp = openai_client.chat.completions.create( - functions=[Validator.openai_schema], - function_call={"name": Validator.openai_schema["name"]}, + response_model=Validator, messages=[ { "role": "system", @@ -86,7 +83,6 @@ def llm_validator( model=model, temperature=temperature, ) # type: ignore - resp = Validator.from_response(resp) # If the response is not valid, return the reason, this could be used in # the future to generate a better response, via reasking mechanism. diff --git a/instructor/function_calls.py b/instructor/function_calls.py index 1826707..3def9a0 100644 --- a/instructor/function_calls.py +++ b/instructor/function_calls.py @@ -12,6 +12,7 @@ class Mode(enum.Enum): TOOLS: str = "tool_call" JSON: str = "json_mode" MD_JSON: str = "markdown_json_mode" + JSON_SCHEMA: str = "json_schema_mode" class OpenAISchema(BaseModel): @@ -147,13 +148,7 @@ class OpenAISchema(BaseModel): context=validation_context, strict=strict, ) - elif mode == Mode.JSON: - return cls.model_validate_json( - message.content, - context=validation_context, - strict=strict, - ) - elif mode == Mode.MD_JSON: + elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}: return cls.model_validate_json( message.content, context=validation_context, @@ -211,13 +206,7 @@ class OpenAISchema(BaseModel): context=validation_context, strict=strict, ) - elif mode == Mode.JSON: - return cls.model_validate_json( - message.content, - context=validation_context, - strict=strict, - ) - elif mode == Mode.MD_JSON: + elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}: return cls.model_validate_json( message.content, context=validation_context, diff --git a/instructor/patch.py b/instructor/patch.py index 418c6c1..bb24fdf 100644 --- a/instructor/patch.py +++ b/instructor/patch.py @@ -91,26 +91,28 @@ def handle_response_model( "type": "function", "function": {"name": response_model.openai_schema["name"]}, } - elif mode == Mode.JSON or mode == Mode.MD_JSON: + elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}: + # If its a JSON Mode we need to massage the prompt a bit + # in order to get the response we want in a json format + message = f""" + As a genius expert, your task is to understand the content and provide + the parsed objects in json that match the following json_schema:\n + {response_model.model_json_schema()['properties']} + """ + # Check for nested models + if "$defs" in response_model.model_json_schema(): + message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}" + if mode == Mode.JSON: new_kwargs["response_format"] = {"type": "json_object"} - message = f"""Make sure that your response to any message matches the json_schema below, - do not deviate at all: \n{response_model.model_json_schema()['properties']} - """ - # Check for nested models - if "$defs" in response_model.model_json_schema(): - message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}" - else: - message = f""" - As a genius expert, your task is to understand the content and provide - the parsed objects in json that match the following json_schema (do not deviate at all and its okay if you cant be exact):\n - {response_model.model_json_schema()['properties']} - """ - # Check for nested models - if "$defs" in response_model.model_json_schema(): - message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}" + elif mode == Mode.JSON_SCHEMA: + new_kwargs["response_format"] = { + "type": "json_object", + "schema": response_model.model_json_schema(), + } + elif mode == Mode.MD_JSON: new_kwargs["messages"].append( { "role": "assistant", diff --git a/tests/openai/evals/conftest.py b/tests/openai/conftest.py similarity index 61% rename from tests/openai/evals/conftest.py rename to tests/openai/conftest.py index 6039834..b41f260 100644 --- a/tests/openai/evals/conftest.py +++ b/tests/openai/conftest.py @@ -22,6 +22,15 @@ def client(): base_url="https://braintrustproxy.com/v1", ) ) + elif ( + os.environ.get("OPENAI_BASE_URL", None) + == "https://api.endpoints.anyscale.com/v1" + ): + yield OpenAI( + api_key=os.environ["OPENAI_API_KEY"], + base_url="https://api.endpoints.anyscale.com/v1", + ) + else: yield OpenAI() @@ -35,5 +44,13 @@ def aclient(): base_url="https://braintrustproxy.com/v1", ) ) + elif ( + os.environ.get("OPENAI_BASE_URL", None) + == "https://api.endpoints.anyscale.com/v1" + ): + yield AsyncOpenAI( + api_key=os.environ["OPENAI_API_KEY"], + base_url="https://api.endpoints.anyscale.com/v1", + ) else: yield AsyncOpenAI() diff --git a/tests/openai/evals/test_classification_enums.py b/tests/openai/evals/test_classification_enums.py index a29d391..e9d7597 100644 --- a/tests/openai/evals/test_classification_enums.py +++ b/tests/openai/evals/test_classification_enums.py @@ -8,6 +8,7 @@ import instructor from pydantic import BaseModel from instructor.function_calls import Mode +from tests.openai.util import models, modes class Labels(str, enum.Enum): @@ -23,8 +24,6 @@ class SinglePrediction(BaseModel): class_label: Labels -models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"] -modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS] data = [ ( "I am a spammer", diff --git a/tests/openai/evals/test_classification_literals.py b/tests/openai/evals/test_classification_literals.py index 2a53baa..5c00e75 100644 --- a/tests/openai/evals/test_classification_literals.py +++ b/tests/openai/evals/test_classification_literals.py @@ -7,6 +7,7 @@ import instructor from pydantic import BaseModel from instructor.function_calls import Mode +from tests.openai.util import models, modes class SinglePrediction(BaseModel): @@ -17,8 +18,6 @@ class SinglePrediction(BaseModel): class_label: Literal["spam", "not_spam"] -models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"] -modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS] data = [ ("I am a spammer", "spam"), ("I am not a spammer", "not_spam"), diff --git a/tests/openai/evals/test_entities.py b/tests/openai/evals/test_entities.py index b162479..8c74ab0 100644 --- a/tests/openai/evals/test_entities.py +++ b/tests/openai/evals/test_entities.py @@ -6,6 +6,7 @@ import pytest import instructor from instructor.function_calls import Mode +from tests.openai.util import models, modes class Property(BaseModel): @@ -84,10 +85,6 @@ The contract can be terminated with a 30-day notice, unless there are outstandin """ -models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"] -modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS] - - @pytest.mark.parametrize("model, mode", product(models, modes)) def test_extract(model, mode, client): client = instructor.patch(client, mode=mode) diff --git a/tests/openai/evals/test_extract_users.py b/tests/openai/evals/test_extract_users.py index 18ef2b4..969fca3 100644 --- a/tests/openai/evals/test_extract_users.py +++ b/tests/openai/evals/test_extract_users.py @@ -3,6 +3,7 @@ from itertools import product from pydantic import BaseModel import instructor from instructor.function_calls import Mode +from tests.openai.util import models, modes class UserDetails(BaseModel): @@ -11,13 +12,11 @@ class UserDetails(BaseModel): # Lists for models, test data, and modes -models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"] test_data = [ ("Jason is 10", "Jason", 10), ("Alice is 25", "Alice", 25), ("Bob is 35", "Bob", 35), ] -modes = [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS] @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes)) diff --git a/tests/openai/test_modes.py b/tests/openai/test_modes.py index f38ca2b..bedddfd 100644 --- a/tests/openai/test_modes.py +++ b/tests/openai/test_modes.py @@ -1,11 +1,11 @@ -from openai import OpenAI +from itertools import product from pydantic import BaseModel, Field from typing import List import pytest import instructor -from instructor.function_calls import Mode +from tests.openai.util import models, modes class Item(BaseModel): @@ -18,10 +18,9 @@ class Order(BaseModel): customer: str -@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON]) -def test_nested(mode): - client = instructor.patch(OpenAI(), mode=mode) - +@pytest.mark.parametrize("model, mode", product(models, modes)) +def test_nested(model, mode, client): + client = instructor.patch(client, mode=mode) content = """ Order Details: Customer: Jason @@ -33,7 +32,7 @@ def test_nested(mode): """ resp = client.chat.completions.create( - model="gpt-3.5-turbo-1106", + model=model, response_model=Order, messages=[ { @@ -62,9 +61,9 @@ class LibraryRecord(BaseModel): library_id: str -@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON]) -def test_complex_nested_model(mode): - client = instructor.patch(OpenAI(), mode=mode) +@pytest.mark.parametrize("model, mode", product(models, modes)) +def test_complex_nested_model(model, mode, client): + client = instructor.patch(client, mode=mode) content = """ Library visit details: @@ -76,7 +75,7 @@ def test_complex_nested_model(mode): """ resp = client.chat.completions.create( - model="gpt-3.5-turbo-1106", + model=model, response_model=LibraryRecord, messages=[ { diff --git a/tests/openai/test_multitask.py b/tests/openai/test_multitask.py index 13dbc8d..23b869f 100644 --- a/tests/openai/test_multitask.py +++ b/tests/openai/test_multitask.py @@ -1,10 +1,10 @@ +from itertools import product from typing import Iterable -from openai import OpenAI, AsyncOpenAI from pydantic import BaseModel import pytest import instructor -from instructor.function_calls import Mode +from tests.openai.util import models, modes class User(BaseModel): @@ -15,13 +15,13 @@ class User(BaseModel): Users = Iterable[User] -@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON]) -def test_multi_user(mode): - client = instructor.patch(OpenAI(), mode=mode) +@pytest.mark.parametrize("model, mode", product(models, modes)) +def test_multi_user(model, mode, client): + client = instructor.patch(client, mode=mode) def stream_extract(input: str) -> Iterable[User]: return client.chat.completions.create( - model="gpt-3.5-turbo-1106", + model=model, stream=True, response_model=Users, messages=[ @@ -50,13 +50,13 @@ def test_multi_user(mode): @pytest.mark.asyncio -@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON]) -async def test_multi_user_tools_mode_async(mode): - client = instructor.patch(AsyncOpenAI(), mode=mode) +@pytest.mark.parametrize("model, mode", product(models, modes)) +async def test_multi_user_tools_mode_async(model, mode, aclient): + client = instructor.patch(aclient, mode=mode) async def stream_extract(input: str) -> Iterable[User]: return await client.chat.completions.create( - model="gpt-3.5-turbo-1106", + model=model, stream=True, response_model=Users, messages=[ diff --git a/tests/openai/test_patch.py b/tests/openai/test_patch.py index 5d353fc..e56bcdc 100644 --- a/tests/openai/test_patch.py +++ b/tests/openai/test_patch.py @@ -1,13 +1,9 @@ +from itertools import product from pydantic import BaseModel, field_validator import pytest import instructor -from openai import OpenAI, AsyncOpenAI - -from instructor.function_calls import Mode - -aclient = instructor.patch(AsyncOpenAI()) -client = instructor.patch(OpenAI()) +from tests.openai.util import models, modes class UserExtract(BaseModel): @@ -15,11 +11,11 @@ class UserExtract(BaseModel): age: int -@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON]) -def test_runmodel(mode): - client = instructor.patch(OpenAI(), mode=mode) +@pytest.mark.parametrize("model, mode", product(models, modes)) +def test_runmodel(model, mode, client): + client = instructor.patch(client, mode=mode) model = client.chat.completions.create( - model="gpt-3.5-turbo-1106", + model=model, response_model=UserExtract, max_retries=2, messages=[ @@ -34,12 +30,12 @@ def test_runmodel(mode): ), "The raw response should be available from OpenAI" -@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON]) +@pytest.mark.parametrize("model, mode", product(models, modes)) @pytest.mark.asyncio -async def test_runmodel_async(mode): - aclient = instructor.patch(AsyncOpenAI(), mode=mode) +async def test_runmodel_async(model, mode, aclient): + aclient = instructor.patch(aclient, mode=mode) model = await aclient.chat.completions.create( - model="gpt-3.5-turbo-1106", + model=model, response_model=UserExtract, max_retries=2, messages=[ @@ -62,15 +58,17 @@ class UserExtractValidated(BaseModel): @classmethod def validate_name(cls, v): if v.upper() != v: - raise ValueError("Name should be uppercase") + raise ValueError( + "Name should be uppercase, make sure to use the `uppercase` version of the name" + ) return v -@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.MD_JSON]) -def test_runmodel_validator(mode): - client = instructor.patch(OpenAI(), mode=mode) +@pytest.mark.parametrize("model, mode", product(models, modes)) +def test_runmodel_validator(model, mode, client): + client = instructor.patch(client, mode=mode) model = client.chat.completions.create( - model="gpt-3.5-turbo-1106", + model=model, response_model=UserExtractValidated, max_retries=2, messages=[ @@ -84,12 +82,12 @@ def test_runmodel_validator(mode): ), "The raw response should be available from OpenAI" -@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.MD_JSON]) +@pytest.mark.parametrize("model, mode", product(models, modes)) @pytest.mark.asyncio -async def test_runmodel_async_validator(mode): - aclient = instructor.patch(AsyncOpenAI(), mode=mode) +async def test_runmodel_async_validator(model, mode, aclient): + aclient = instructor.patch(aclient, mode=mode) model = await aclient.chat.completions.create( - model="gpt-3.5-turbo-1106", + model=model, response_model=UserExtractValidated, max_retries=2, messages=[ diff --git a/tests/openai/test_validators.py b/tests/openai/test_validators.py index ef62836..b5f46a6 100644 --- a/tests/openai/test_validators.py +++ b/tests/openai/test_validators.py @@ -1,17 +1,16 @@ +from itertools import product import pytest import instructor from typing_extensions import Annotated from pydantic import BaseModel, AfterValidator, BeforeValidator, ValidationError -from openai import OpenAI from instructor.dsl.validators import llm_validator - -client = instructor.patch(OpenAI()) +from tests.openai.util import models, modes -def test_patch_completes_successfully(): +def test_patch_completes_successfully(client): class Response(BaseModel): message: Annotated[ str, AfterValidator(instructor.openai_moderation(client=client)) @@ -21,13 +20,18 @@ def test_patch_completes_successfully(): Response(message="I want to make them suffer the consequences") -def test_runmodel_validator_error(): +@pytest.mark.parametrize("model, mode", product(models, modes)) +def test_runmodel_validator_error(model, mode, client): + client = instructor.patch(client, mode=mode) + class QuestionAnswerNoEvil(BaseModel): question: str answer: Annotated[ str, BeforeValidator( - llm_validator("don't say objectionable things", openai_client=client) + llm_validator( + "don't say objectionable things", model=model, openai_client=client + ) ), ] diff --git a/tests/openai/util.py b/tests/openai/util.py new file mode 100644 index 0000000..e6a8e62 --- /dev/null +++ b/tests/openai/util.py @@ -0,0 +1,18 @@ +import os +import instructor + +if os.getenv("OPENAI_BASE_URL", None) == "https://api.endpoints.anyscale.com/v1": + models = ["mistralai/Mistral-7B-Instruct-v0.1"] + modes = [instructor.Mode.JSON_SCHEMA] +else: + models = ["gpt-3.5-turbo-1106", "gpt-4", "gpt-4-1106-preview"] + modes = [ + instructor.Mode.FUNCTIONS, + instructor.Mode.JSON, + instructor.Mode.TOOLS, + instructor.Mode.MD_JSON, + ] + +if __name__ == "__main__": + print(models) + print(modes)