Support JSON_OBJECT mode from Anyscale (#275)

This commit is contained in:
Jason Liu
2023-12-13 00:05:54 -05:00
committed by GitHub
parent cb5385c313
commit b2a38ffe10
15 changed files with 115 additions and 98 deletions
+1 -1
View File
@@ -146,7 +146,7 @@ def group_and_sum_by_date_and_snapshot(usage_data: List[Dict[str, Any]]) -> Tabl
return table
@app.command(help="Displays OpenAI API usage data for the past N days.") # type: ignore
@app.command(help="Displays OpenAI API usage data for the past N days.") # type: ignore
def list(
n: int = typer.Option(0, help="Number of days."),
) -> None:
+2 -2
View File
@@ -59,7 +59,7 @@ class MultiTaskBase:
if mode == Mode.FUNCTIONS:
if json_chunk := chunk.choices[0].delta.function_call.arguments:
yield json_chunk
elif mode in {Mode.JSON, Mode.MD_JSON}:
elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
if json_chunk := chunk.choices[0].delta.content:
yield json_chunk
elif mode == Mode.TOOLS:
@@ -79,7 +79,7 @@ class MultiTaskBase:
if mode == Mode.FUNCTIONS:
if json_chunk := chunk.choices[0].delta.function_call.arguments:
yield json_chunk
elif mode in {Mode.JSON, Mode.MD_JSON}:
elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
if json_chunk := chunk.choices[0].delta.content:
yield json_chunk
elif mode == Mode.TOOLS:
+1 -5
View File
@@ -67,12 +67,9 @@ def llm_validator(
openai_client (OpenAI): The OpenAI client to use (default: None)
"""
openai_client = openai_client or OpenAI()
def llm(v):
resp = openai_client.chat.completions.create(
functions=[Validator.openai_schema],
function_call={"name": Validator.openai_schema["name"]},
response_model=Validator,
messages=[
{
"role": "system",
@@ -86,7 +83,6 @@ def llm_validator(
model=model,
temperature=temperature,
) # type: ignore
resp = Validator.from_response(resp)
# If the response is not valid, return the reason, this could be used in
# the future to generate a better response, via reasking mechanism.
+3 -14
View File
@@ -12,6 +12,7 @@ class Mode(enum.Enum):
TOOLS: str = "tool_call"
JSON: str = "json_mode"
MD_JSON: str = "markdown_json_mode"
JSON_SCHEMA: str = "json_schema_mode"
class OpenAISchema(BaseModel):
@@ -147,13 +148,7 @@ class OpenAISchema(BaseModel):
context=validation_context,
strict=strict,
)
elif mode == Mode.JSON:
return cls.model_validate_json(
message.content,
context=validation_context,
strict=strict,
)
elif mode == Mode.MD_JSON:
elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}:
return cls.model_validate_json(
message.content,
context=validation_context,
@@ -211,13 +206,7 @@ class OpenAISchema(BaseModel):
context=validation_context,
strict=strict,
)
elif mode == Mode.JSON:
return cls.model_validate_json(
message.content,
context=validation_context,
strict=strict,
)
elif mode == Mode.MD_JSON:
elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}:
return cls.model_validate_json(
message.content,
context=validation_context,
+18 -16
View File
@@ -91,26 +91,28 @@ def handle_response_model(
"type": "function",
"function": {"name": response_model.openai_schema["name"]},
}
elif mode == Mode.JSON or mode == Mode.MD_JSON:
elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
# If its a JSON Mode we need to massage the prompt a bit
# in order to get the response we want in a json format
message = f"""
As a genius expert, your task is to understand the content and provide
the parsed objects in json that match the following json_schema:\n
{response_model.model_json_schema()['properties']}
"""
# Check for nested models
if "$defs" in response_model.model_json_schema():
message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
if mode == Mode.JSON:
new_kwargs["response_format"] = {"type": "json_object"}
message = f"""Make sure that your response to any message matches the json_schema below,
do not deviate at all: \n{response_model.model_json_schema()['properties']}
"""
# Check for nested models
if "$defs" in response_model.model_json_schema():
message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
else:
message = f"""
As a genius expert, your task is to understand the content and provide
the parsed objects in json that match the following json_schema (do not deviate at all and its okay if you cant be exact):\n
{response_model.model_json_schema()['properties']}
"""
# Check for nested models
if "$defs" in response_model.model_json_schema():
message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
elif mode == Mode.JSON_SCHEMA:
new_kwargs["response_format"] = {
"type": "json_object",
"schema": response_model.model_json_schema(),
}
elif mode == Mode.MD_JSON:
new_kwargs["messages"].append(
{
"role": "assistant",
@@ -22,6 +22,15 @@ def client():
base_url="https://braintrustproxy.com/v1",
)
)
elif (
os.environ.get("OPENAI_BASE_URL", None)
== "https://api.endpoints.anyscale.com/v1"
):
yield OpenAI(
api_key=os.environ["OPENAI_API_KEY"],
base_url="https://api.endpoints.anyscale.com/v1",
)
else:
yield OpenAI()
@@ -35,5 +44,13 @@ def aclient():
base_url="https://braintrustproxy.com/v1",
)
)
elif (
os.environ.get("OPENAI_BASE_URL", None)
== "https://api.endpoints.anyscale.com/v1"
):
yield AsyncOpenAI(
api_key=os.environ["OPENAI_API_KEY"],
base_url="https://api.endpoints.anyscale.com/v1",
)
else:
yield AsyncOpenAI()
@@ -8,6 +8,7 @@ import instructor
from pydantic import BaseModel
from instructor.function_calls import Mode
from tests.openai.util import models, modes
class Labels(str, enum.Enum):
@@ -23,8 +24,6 @@ class SinglePrediction(BaseModel):
class_label: Labels
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS]
data = [
(
"I am a spammer",
@@ -7,6 +7,7 @@ import instructor
from pydantic import BaseModel
from instructor.function_calls import Mode
from tests.openai.util import models, modes
class SinglePrediction(BaseModel):
@@ -17,8 +18,6 @@ class SinglePrediction(BaseModel):
class_label: Literal["spam", "not_spam"]
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS]
data = [
("I am a spammer", "spam"),
("I am not a spammer", "not_spam"),
+1 -4
View File
@@ -6,6 +6,7 @@ import pytest
import instructor
from instructor.function_calls import Mode
from tests.openai.util import models, modes
class Property(BaseModel):
@@ -84,10 +85,6 @@ The contract can be terminated with a 30-day notice, unless there are outstandin
"""
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS]
@pytest.mark.parametrize("model, mode", product(models, modes))
def test_extract(model, mode, client):
client = instructor.patch(client, mode=mode)
+1 -2
View File
@@ -3,6 +3,7 @@ from itertools import product
from pydantic import BaseModel
import instructor
from instructor.function_calls import Mode
from tests.openai.util import models, modes
class UserDetails(BaseModel):
@@ -11,13 +12,11 @@ class UserDetails(BaseModel):
# Lists for models, test data, and modes
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
test_data = [
("Jason is 10", "Jason", 10),
("Alice is 25", "Alice", 25),
("Bob is 35", "Bob", 35),
]
modes = [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS]
@pytest.mark.parametrize("model, data, mode", product(models, test_data, modes))
+10 -11
View File
@@ -1,11 +1,11 @@
from openai import OpenAI
from itertools import product
from pydantic import BaseModel, Field
from typing import List
import pytest
import instructor
from instructor.function_calls import Mode
from tests.openai.util import models, modes
class Item(BaseModel):
@@ -18,10 +18,9 @@ class Order(BaseModel):
customer: str
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
def test_nested(mode):
client = instructor.patch(OpenAI(), mode=mode)
@pytest.mark.parametrize("model, mode", product(models, modes))
def test_nested(model, mode, client):
client = instructor.patch(client, mode=mode)
content = """
Order Details:
Customer: Jason
@@ -33,7 +32,7 @@ def test_nested(mode):
"""
resp = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
model=model,
response_model=Order,
messages=[
{
@@ -62,9 +61,9 @@ class LibraryRecord(BaseModel):
library_id: str
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
def test_complex_nested_model(mode):
client = instructor.patch(OpenAI(), mode=mode)
@pytest.mark.parametrize("model, mode", product(models, modes))
def test_complex_nested_model(model, mode, client):
client = instructor.patch(client, mode=mode)
content = """
Library visit details:
@@ -76,7 +75,7 @@ def test_complex_nested_model(mode):
"""
resp = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
model=model,
response_model=LibraryRecord,
messages=[
{
+10 -10
View File
@@ -1,10 +1,10 @@
from itertools import product
from typing import Iterable
from openai import OpenAI, AsyncOpenAI
from pydantic import BaseModel
import pytest
import instructor
from instructor.function_calls import Mode
from tests.openai.util import models, modes
class User(BaseModel):
@@ -15,13 +15,13 @@ class User(BaseModel):
Users = Iterable[User]
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
def test_multi_user(mode):
client = instructor.patch(OpenAI(), mode=mode)
@pytest.mark.parametrize("model, mode", product(models, modes))
def test_multi_user(model, mode, client):
client = instructor.patch(client, mode=mode)
def stream_extract(input: str) -> Iterable[User]:
return client.chat.completions.create(
model="gpt-3.5-turbo-1106",
model=model,
stream=True,
response_model=Users,
messages=[
@@ -50,13 +50,13 @@ def test_multi_user(mode):
@pytest.mark.asyncio
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
async def test_multi_user_tools_mode_async(mode):
client = instructor.patch(AsyncOpenAI(), mode=mode)
@pytest.mark.parametrize("model, mode", product(models, modes))
async def test_multi_user_tools_mode_async(model, mode, aclient):
client = instructor.patch(aclient, mode=mode)
async def stream_extract(input: str) -> Iterable[User]:
return await client.chat.completions.create(
model="gpt-3.5-turbo-1106",
model=model,
stream=True,
response_model=Users,
messages=[
+21 -23
View File
@@ -1,13 +1,9 @@
from itertools import product
from pydantic import BaseModel, field_validator
import pytest
import instructor
from openai import OpenAI, AsyncOpenAI
from instructor.function_calls import Mode
aclient = instructor.patch(AsyncOpenAI())
client = instructor.patch(OpenAI())
from tests.openai.util import models, modes
class UserExtract(BaseModel):
@@ -15,11 +11,11 @@ class UserExtract(BaseModel):
age: int
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
def test_runmodel(mode):
client = instructor.patch(OpenAI(), mode=mode)
@pytest.mark.parametrize("model, mode", product(models, modes))
def test_runmodel(model, mode, client):
client = instructor.patch(client, mode=mode)
model = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
model=model,
response_model=UserExtract,
max_retries=2,
messages=[
@@ -34,12 +30,12 @@ def test_runmodel(mode):
), "The raw response should be available from OpenAI"
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
@pytest.mark.parametrize("model, mode", product(models, modes))
@pytest.mark.asyncio
async def test_runmodel_async(mode):
aclient = instructor.patch(AsyncOpenAI(), mode=mode)
async def test_runmodel_async(model, mode, aclient):
aclient = instructor.patch(aclient, mode=mode)
model = await aclient.chat.completions.create(
model="gpt-3.5-turbo-1106",
model=model,
response_model=UserExtract,
max_retries=2,
messages=[
@@ -62,15 +58,17 @@ class UserExtractValidated(BaseModel):
@classmethod
def validate_name(cls, v):
if v.upper() != v:
raise ValueError("Name should be uppercase")
raise ValueError(
"Name should be uppercase, make sure to use the `uppercase` version of the name"
)
return v
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.MD_JSON])
def test_runmodel_validator(mode):
client = instructor.patch(OpenAI(), mode=mode)
@pytest.mark.parametrize("model, mode", product(models, modes))
def test_runmodel_validator(model, mode, client):
client = instructor.patch(client, mode=mode)
model = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
model=model,
response_model=UserExtractValidated,
max_retries=2,
messages=[
@@ -84,12 +82,12 @@ def test_runmodel_validator(mode):
), "The raw response should be available from OpenAI"
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.MD_JSON])
@pytest.mark.parametrize("model, mode", product(models, modes))
@pytest.mark.asyncio
async def test_runmodel_async_validator(mode):
aclient = instructor.patch(AsyncOpenAI(), mode=mode)
async def test_runmodel_async_validator(model, mode, aclient):
aclient = instructor.patch(aclient, mode=mode)
model = await aclient.chat.completions.create(
model="gpt-3.5-turbo-1106",
model=model,
response_model=UserExtractValidated,
max_retries=2,
messages=[
+10 -6
View File
@@ -1,17 +1,16 @@
from itertools import product
import pytest
import instructor
from typing_extensions import Annotated
from pydantic import BaseModel, AfterValidator, BeforeValidator, ValidationError
from openai import OpenAI
from instructor.dsl.validators import llm_validator
client = instructor.patch(OpenAI())
from tests.openai.util import models, modes
def test_patch_completes_successfully():
def test_patch_completes_successfully(client):
class Response(BaseModel):
message: Annotated[
str, AfterValidator(instructor.openai_moderation(client=client))
@@ -21,13 +20,18 @@ def test_patch_completes_successfully():
Response(message="I want to make them suffer the consequences")
def test_runmodel_validator_error():
@pytest.mark.parametrize("model, mode", product(models, modes))
def test_runmodel_validator_error(model, mode, client):
client = instructor.patch(client, mode=mode)
class QuestionAnswerNoEvil(BaseModel):
question: str
answer: Annotated[
str,
BeforeValidator(
llm_validator("don't say objectionable things", openai_client=client)
llm_validator(
"don't say objectionable things", model=model, openai_client=client
)
),
]
+18
View File
@@ -0,0 +1,18 @@
import os
import instructor
if os.getenv("OPENAI_BASE_URL", None) == "https://api.endpoints.anyscale.com/v1":
models = ["mistralai/Mistral-7B-Instruct-v0.1"]
modes = [instructor.Mode.JSON_SCHEMA]
else:
models = ["gpt-3.5-turbo-1106", "gpt-4", "gpt-4-1106-preview"]
modes = [
instructor.Mode.FUNCTIONS,
instructor.Mode.JSON,
instructor.Mode.TOOLS,
instructor.Mode.MD_JSON,
]
if __name__ == "__main__":
print(models)
print(modes)