From b2a38ffe105dcfd4fb7f65f1bdcbadf2459a93aa Mon Sep 17 00:00:00 2001
From: Jason Liu <jxnl@users.noreply.github.com>
Date: Wed, 13 Dec 2023 00:05:54 -0500
Subject: [PATCH] Support JSON_OBJECT mode from Anyscale (#275)

---
 instructor/cli/usage.py                       |  2 +-
 instructor/dsl/multitask.py                   |  4 +-
 instructor/dsl/validators.py                  |  6 +--
 instructor/function_calls.py                  | 17 ++-----
 instructor/patch.py                           | 34 +++++++-------
 tests/openai/{evals => }/conftest.py          | 17 +++++++
 .../openai/evals/test_classification_enums.py |  3 +-
 .../evals/test_classification_literals.py     |  3 +-
 tests/openai/evals/test_entities.py           |  5 +--
 tests/openai/evals/test_extract_users.py      |  3 +-
 tests/openai/test_modes.py                    | 21 +++++----
 tests/openai/test_multitask.py                | 20 ++++-----
 tests/openai/test_patch.py                    | 44 +++++++++----------
 tests/openai/test_validators.py               | 16 ++++---
 tests/openai/util.py                          | 18 ++++++++
 15 files changed, 115 insertions(+), 98 deletions(-)
 rename tests/openai/{evals => }/conftest.py (61%)
 create mode 100644 tests/openai/util.py

diff --git a/instructor/cli/usage.py b/instructor/cli/usage.py
index cf2f64c..6d58b48 100644
--- a/instructor/cli/usage.py
+++ b/instructor/cli/usage.py
@@ -146,7 +146,7 @@ def group_and_sum_by_date_and_snapshot(usage_data: List[Dict[str, Any]]) -> Tabl
     return table
 
 
-@app.command(help="Displays OpenAI API usage data for the past N days.") # type: ignore
+@app.command(help="Displays OpenAI API usage data for the past N days.")  # type: ignore
 def list(
     n: int = typer.Option(0, help="Number of days."),
 ) -> None:
diff --git a/instructor/dsl/multitask.py b/instructor/dsl/multitask.py
index 2e0552d..727936d 100644
--- a/instructor/dsl/multitask.py
+++ b/instructor/dsl/multitask.py
@@ -59,7 +59,7 @@ class MultiTaskBase:
                 if mode == Mode.FUNCTIONS:
                     if json_chunk := chunk.choices[0].delta.function_call.arguments:
                         yield json_chunk
-                elif mode in {Mode.JSON, Mode.MD_JSON}:
+                elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
                     if json_chunk := chunk.choices[0].delta.content:
                         yield json_chunk
                 elif mode == Mode.TOOLS:
@@ -79,7 +79,7 @@ class MultiTaskBase:
                 if mode == Mode.FUNCTIONS:
                     if json_chunk := chunk.choices[0].delta.function_call.arguments:
                         yield json_chunk
-                elif mode in {Mode.JSON, Mode.MD_JSON}:
+                elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
                     if json_chunk := chunk.choices[0].delta.content:
                         yield json_chunk
                 elif mode == Mode.TOOLS:
diff --git a/instructor/dsl/validators.py b/instructor/dsl/validators.py
index c5f7739..7b309c9 100644
--- a/instructor/dsl/validators.py
+++ b/instructor/dsl/validators.py
@@ -67,12 +67,9 @@ def llm_validator(
         openai_client (OpenAI): The OpenAI client to use (default: None)
     """
 
-    openai_client = openai_client or OpenAI()
-
     def llm(v):
         resp = openai_client.chat.completions.create(
-            functions=[Validator.openai_schema],
-            function_call={"name": Validator.openai_schema["name"]},
+            response_model=Validator,
             messages=[
                 {
                     "role": "system",
@@ -86,7 +83,6 @@ def llm_validator(
             model=model,
             temperature=temperature,
         )  # type: ignore
-        resp = Validator.from_response(resp)
 
         # If the response is  not valid, return the reason, this could be used in
         # the future to generate a better response, via reasking mechanism.
diff --git a/instructor/function_calls.py b/instructor/function_calls.py
index 1826707..3def9a0 100644
--- a/instructor/function_calls.py
+++ b/instructor/function_calls.py
@@ -12,6 +12,7 @@ class Mode(enum.Enum):
     TOOLS: str = "tool_call"
     JSON: str = "json_mode"
     MD_JSON: str = "markdown_json_mode"
+    JSON_SCHEMA: str = "json_schema_mode"
 
 
 class OpenAISchema(BaseModel):
@@ -147,13 +148,7 @@ class OpenAISchema(BaseModel):
                 context=validation_context,
                 strict=strict,
             )
-        elif mode == Mode.JSON:
-            return cls.model_validate_json(
-                message.content,
-                context=validation_context,
-                strict=strict,
-            )
-        elif mode == Mode.MD_JSON:
+        elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}:
             return cls.model_validate_json(
                 message.content,
                 context=validation_context,
@@ -211,13 +206,7 @@ class OpenAISchema(BaseModel):
                 context=validation_context,
                 strict=strict,
             )
-        elif mode == Mode.JSON:
-            return cls.model_validate_json(
-                message.content,
-                context=validation_context,
-                strict=strict,
-            )
-        elif mode == Mode.MD_JSON:
+        elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}:
             return cls.model_validate_json(
                 message.content,
                 context=validation_context,
diff --git a/instructor/patch.py b/instructor/patch.py
index 418c6c1..bb24fdf 100644
--- a/instructor/patch.py
+++ b/instructor/patch.py
@@ -91,26 +91,28 @@ def handle_response_model(
                 "type": "function",
                 "function": {"name": response_model.openai_schema["name"]},
             }
-        elif mode == Mode.JSON or mode == Mode.MD_JSON:
+        elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
+            # If its a JSON Mode we need to massage the prompt a bit
+            # in order to get the response we want in a json format
+            message = f"""
+                As a genius expert, your task is to understand the content and provide 
+                the parsed objects in json that match the following json_schema:\n
+                {response_model.model_json_schema()['properties']}
+                """
+            # Check for nested models
+            if "$defs" in response_model.model_json_schema():
+                message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
+
             if mode == Mode.JSON:
                 new_kwargs["response_format"] = {"type": "json_object"}
-                message = f"""Make sure that your response to any message matches the json_schema below,
-                            do not deviate at all: \n{response_model.model_json_schema()['properties']}
-                            """
-                # Check for nested models
-                if "$defs" in response_model.model_json_schema():
-                    message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
 
-            else:
-                message = f"""
-                    As a genius expert, your task is to understand the content and provide 
-                    the parsed objects in json that match the following json_schema (do not deviate at all and its okay if you cant be exact):\n
-                    {response_model.model_json_schema()['properties']}
-                    """
-                # Check for nested models
-                if "$defs" in response_model.model_json_schema():
-                    message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
+            elif mode == Mode.JSON_SCHEMA:
+                new_kwargs["response_format"] = {
+                    "type": "json_object",
+                    "schema": response_model.model_json_schema(),
+                }
 
+            elif mode == Mode.MD_JSON:
                 new_kwargs["messages"].append(
                     {
                         "role": "assistant",
diff --git a/tests/openai/evals/conftest.py b/tests/openai/conftest.py
similarity index 61%
rename from tests/openai/evals/conftest.py
rename to tests/openai/conftest.py
index 6039834..b41f260 100644
--- a/tests/openai/evals/conftest.py
+++ b/tests/openai/conftest.py
@@ -22,6 +22,15 @@ def client():
                 base_url="https://braintrustproxy.com/v1",
             )
         )
+    elif (
+        os.environ.get("OPENAI_BASE_URL", None)
+        == "https://api.endpoints.anyscale.com/v1"
+    ):
+        yield OpenAI(
+            api_key=os.environ["OPENAI_API_KEY"],
+            base_url="https://api.endpoints.anyscale.com/v1",
+        )
+
     else:
         yield OpenAI()
 
@@ -35,5 +44,13 @@ def aclient():
                 base_url="https://braintrustproxy.com/v1",
             )
         )
+    elif (
+        os.environ.get("OPENAI_BASE_URL", None)
+        == "https://api.endpoints.anyscale.com/v1"
+    ):
+        yield AsyncOpenAI(
+            api_key=os.environ["OPENAI_API_KEY"],
+            base_url="https://api.endpoints.anyscale.com/v1",
+        )
     else:
         yield AsyncOpenAI()
diff --git a/tests/openai/evals/test_classification_enums.py b/tests/openai/evals/test_classification_enums.py
index a29d391..e9d7597 100644
--- a/tests/openai/evals/test_classification_enums.py
+++ b/tests/openai/evals/test_classification_enums.py
@@ -8,6 +8,7 @@ import instructor
 from pydantic import BaseModel
 
 from instructor.function_calls import Mode
+from tests.openai.util import models, modes
 
 
 class Labels(str, enum.Enum):
@@ -23,8 +24,6 @@ class SinglePrediction(BaseModel):
     class_label: Labels
 
 
-models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
-modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS]
 data = [
     (
         "I am a spammer",
diff --git a/tests/openai/evals/test_classification_literals.py b/tests/openai/evals/test_classification_literals.py
index 2a53baa..5c00e75 100644
--- a/tests/openai/evals/test_classification_literals.py
+++ b/tests/openai/evals/test_classification_literals.py
@@ -7,6 +7,7 @@ import instructor
 from pydantic import BaseModel
 
 from instructor.function_calls import Mode
+from tests.openai.util import models, modes
 
 
 class SinglePrediction(BaseModel):
@@ -17,8 +18,6 @@ class SinglePrediction(BaseModel):
     class_label: Literal["spam", "not_spam"]
 
 
-models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
-modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS]
 data = [
     ("I am a spammer", "spam"),
     ("I am not a spammer", "not_spam"),
diff --git a/tests/openai/evals/test_entities.py b/tests/openai/evals/test_entities.py
index b162479..8c74ab0 100644
--- a/tests/openai/evals/test_entities.py
+++ b/tests/openai/evals/test_entities.py
@@ -6,6 +6,7 @@ import pytest
 import instructor
 
 from instructor.function_calls import Mode
+from tests.openai.util import models, modes
 
 
 class Property(BaseModel):
@@ -84,10 +85,6 @@ The contract can be terminated with a 30-day notice, unless there are outstandin
 """
 
 
-models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
-modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS]
-
-
 @pytest.mark.parametrize("model, mode", product(models, modes))
 def test_extract(model, mode, client):
     client = instructor.patch(client, mode=mode)
diff --git a/tests/openai/evals/test_extract_users.py b/tests/openai/evals/test_extract_users.py
index 18ef2b4..969fca3 100644
--- a/tests/openai/evals/test_extract_users.py
+++ b/tests/openai/evals/test_extract_users.py
@@ -3,6 +3,7 @@ from itertools import product
 from pydantic import BaseModel
 import instructor
 from instructor.function_calls import Mode
+from tests.openai.util import models, modes
 
 
 class UserDetails(BaseModel):
@@ -11,13 +12,11 @@ class UserDetails(BaseModel):
 
 
 # Lists for models, test data, and modes
-models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
 test_data = [
     ("Jason is 10", "Jason", 10),
     ("Alice is 25", "Alice", 25),
     ("Bob is 35", "Bob", 35),
 ]
-modes = [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS]
 
 
 @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes))
diff --git a/tests/openai/test_modes.py b/tests/openai/test_modes.py
index f38ca2b..bedddfd 100644
--- a/tests/openai/test_modes.py
+++ b/tests/openai/test_modes.py
@@ -1,11 +1,11 @@
-from openai import OpenAI
+from itertools import product
 from pydantic import BaseModel, Field
 from typing import List
 
 import pytest
 
 import instructor
-from instructor.function_calls import Mode
+from tests.openai.util import models, modes
 
 
 class Item(BaseModel):
@@ -18,10 +18,9 @@ class Order(BaseModel):
     customer: str
 
 
-@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
-def test_nested(mode):
-    client = instructor.patch(OpenAI(), mode=mode)
-
+@pytest.mark.parametrize("model, mode", product(models, modes))
+def test_nested(model, mode, client):
+    client = instructor.patch(client, mode=mode)
     content = """
     Order Details:
     Customer: Jason
@@ -33,7 +32,7 @@ def test_nested(mode):
     """
 
     resp = client.chat.completions.create(
-        model="gpt-3.5-turbo-1106",
+        model=model,
         response_model=Order,
         messages=[
             {
@@ -62,9 +61,9 @@ class LibraryRecord(BaseModel):
     library_id: str
 
 
-@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
-def test_complex_nested_model(mode):
-    client = instructor.patch(OpenAI(), mode=mode)
+@pytest.mark.parametrize("model, mode", product(models, modes))
+def test_complex_nested_model(model, mode, client):
+    client = instructor.patch(client, mode=mode)
 
     content = """
     Library visit details:
@@ -76,7 +75,7 @@ def test_complex_nested_model(mode):
     """
 
     resp = client.chat.completions.create(
-        model="gpt-3.5-turbo-1106",
+        model=model,
         response_model=LibraryRecord,
         messages=[
             {
diff --git a/tests/openai/test_multitask.py b/tests/openai/test_multitask.py
index 13dbc8d..23b869f 100644
--- a/tests/openai/test_multitask.py
+++ b/tests/openai/test_multitask.py
@@ -1,10 +1,10 @@
+from itertools import product
 from typing import Iterable
-from openai import OpenAI, AsyncOpenAI
 from pydantic import BaseModel
 import pytest
 
 import instructor
-from instructor.function_calls import Mode
+from tests.openai.util import models, modes
 
 
 class User(BaseModel):
@@ -15,13 +15,13 @@ class User(BaseModel):
 Users = Iterable[User]
 
 
-@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
-def test_multi_user(mode):
-    client = instructor.patch(OpenAI(), mode=mode)
+@pytest.mark.parametrize("model, mode", product(models, modes))
+def test_multi_user(model, mode, client):
+    client = instructor.patch(client, mode=mode)
 
     def stream_extract(input: str) -> Iterable[User]:
         return client.chat.completions.create(
-            model="gpt-3.5-turbo-1106",
+            model=model,
             stream=True,
             response_model=Users,
             messages=[
@@ -50,13 +50,13 @@ def test_multi_user(mode):
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
-async def test_multi_user_tools_mode_async(mode):
-    client = instructor.patch(AsyncOpenAI(), mode=mode)
+@pytest.mark.parametrize("model, mode", product(models, modes))
+async def test_multi_user_tools_mode_async(model, mode, aclient):
+    client = instructor.patch(aclient, mode=mode)
 
     async def stream_extract(input: str) -> Iterable[User]:
         return await client.chat.completions.create(
-            model="gpt-3.5-turbo-1106",
+            model=model,
             stream=True,
             response_model=Users,
             messages=[
diff --git a/tests/openai/test_patch.py b/tests/openai/test_patch.py
index 5d353fc..e56bcdc 100644
--- a/tests/openai/test_patch.py
+++ b/tests/openai/test_patch.py
@@ -1,13 +1,9 @@
+from itertools import product
 from pydantic import BaseModel, field_validator
 import pytest
 import instructor
 
-from openai import OpenAI, AsyncOpenAI
-
-from instructor.function_calls import Mode
-
-aclient = instructor.patch(AsyncOpenAI())
-client = instructor.patch(OpenAI())
+from tests.openai.util import models, modes
 
 
 class UserExtract(BaseModel):
@@ -15,11 +11,11 @@ class UserExtract(BaseModel):
     age: int
 
 
-@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
-def test_runmodel(mode):
-    client = instructor.patch(OpenAI(), mode=mode)
+@pytest.mark.parametrize("model, mode", product(models, modes))
+def test_runmodel(model, mode, client):
+    client = instructor.patch(client, mode=mode)
     model = client.chat.completions.create(
-        model="gpt-3.5-turbo-1106",
+        model=model,
         response_model=UserExtract,
         max_retries=2,
         messages=[
@@ -34,12 +30,12 @@ def test_runmodel(mode):
     ), "The raw response should be available from OpenAI"
 
 
-@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
+@pytest.mark.parametrize("model, mode", product(models, modes))
 @pytest.mark.asyncio
-async def test_runmodel_async(mode):
-    aclient = instructor.patch(AsyncOpenAI(), mode=mode)
+async def test_runmodel_async(model, mode, aclient):
+    aclient = instructor.patch(aclient, mode=mode)
     model = await aclient.chat.completions.create(
-        model="gpt-3.5-turbo-1106",
+        model=model,
         response_model=UserExtract,
         max_retries=2,
         messages=[
@@ -62,15 +58,17 @@ class UserExtractValidated(BaseModel):
     @classmethod
     def validate_name(cls, v):
         if v.upper() != v:
-            raise ValueError("Name should be uppercase")
+            raise ValueError(
+                "Name should be uppercase, make sure to use the `uppercase` version of the name"
+            )
         return v
 
 
-@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.MD_JSON])
-def test_runmodel_validator(mode):
-    client = instructor.patch(OpenAI(), mode=mode)
+@pytest.mark.parametrize("model, mode", product(models, modes))
+def test_runmodel_validator(model, mode, client):
+    client = instructor.patch(client, mode=mode)
     model = client.chat.completions.create(
-        model="gpt-3.5-turbo-1106",
+        model=model,
         response_model=UserExtractValidated,
         max_retries=2,
         messages=[
@@ -84,12 +82,12 @@ def test_runmodel_validator(mode):
     ), "The raw response should be available from OpenAI"
 
 
-@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.MD_JSON])
+@pytest.mark.parametrize("model, mode", product(models, modes))
 @pytest.mark.asyncio
-async def test_runmodel_async_validator(mode):
-    aclient = instructor.patch(AsyncOpenAI(), mode=mode)
+async def test_runmodel_async_validator(model, mode, aclient):
+    aclient = instructor.patch(aclient, mode=mode)
     model = await aclient.chat.completions.create(
-        model="gpt-3.5-turbo-1106",
+        model=model,
         response_model=UserExtractValidated,
         max_retries=2,
         messages=[
diff --git a/tests/openai/test_validators.py b/tests/openai/test_validators.py
index ef62836..b5f46a6 100644
--- a/tests/openai/test_validators.py
+++ b/tests/openai/test_validators.py
@@ -1,17 +1,16 @@
+from itertools import product
 import pytest
 
 import instructor
 
 from typing_extensions import Annotated
 from pydantic import BaseModel, AfterValidator, BeforeValidator, ValidationError
-from openai import OpenAI
 
 from instructor.dsl.validators import llm_validator
-
-client = instructor.patch(OpenAI())
+from tests.openai.util import models, modes
 
 
-def test_patch_completes_successfully():
+def test_patch_completes_successfully(client):
     class Response(BaseModel):
         message: Annotated[
             str, AfterValidator(instructor.openai_moderation(client=client))
@@ -21,13 +20,18 @@ def test_patch_completes_successfully():
         Response(message="I want to make them suffer the consequences")
 
 
-def test_runmodel_validator_error():
+@pytest.mark.parametrize("model, mode", product(models, modes))
+def test_runmodel_validator_error(model, mode, client):
+    client = instructor.patch(client, mode=mode)
+
     class QuestionAnswerNoEvil(BaseModel):
         question: str
         answer: Annotated[
             str,
             BeforeValidator(
-                llm_validator("don't say objectionable things", openai_client=client)
+                llm_validator(
+                    "don't say objectionable things", model=model, openai_client=client
+                )
             ),
         ]
 
diff --git a/tests/openai/util.py b/tests/openai/util.py
new file mode 100644
index 0000000..e6a8e62
--- /dev/null
+++ b/tests/openai/util.py
@@ -0,0 +1,18 @@
+import os
+import instructor
+
+if os.getenv("OPENAI_BASE_URL", None) == "https://api.endpoints.anyscale.com/v1":
+    models = ["mistralai/Mistral-7B-Instruct-v0.1"]
+    modes = [instructor.Mode.JSON_SCHEMA]
+else:
+    models = ["gpt-3.5-turbo-1106", "gpt-4", "gpt-4-1106-preview"]
+    modes = [
+        instructor.Mode.FUNCTIONS,
+        instructor.Mode.JSON,
+        instructor.Mode.TOOLS,
+        instructor.Mode.MD_JSON,
+    ]
+
+if __name__ == "__main__":
+    print(models)
+    print(modes)