mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
Support JSON_OBJECT mode from Anyscale (#275)
This commit is contained in:
@@ -146,7 +146,7 @@ def group_and_sum_by_date_and_snapshot(usage_data: List[Dict[str, Any]]) -> Tabl
|
||||
return table
|
||||
|
||||
|
||||
@app.command(help="Displays OpenAI API usage data for the past N days.") # type: ignore
|
||||
@app.command(help="Displays OpenAI API usage data for the past N days.") # type: ignore
|
||||
def list(
|
||||
n: int = typer.Option(0, help="Number of days."),
|
||||
) -> None:
|
||||
|
||||
@@ -59,7 +59,7 @@ class MultiTaskBase:
|
||||
if mode == Mode.FUNCTIONS:
|
||||
if json_chunk := chunk.choices[0].delta.function_call.arguments:
|
||||
yield json_chunk
|
||||
elif mode in {Mode.JSON, Mode.MD_JSON}:
|
||||
elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
|
||||
if json_chunk := chunk.choices[0].delta.content:
|
||||
yield json_chunk
|
||||
elif mode == Mode.TOOLS:
|
||||
@@ -79,7 +79,7 @@ class MultiTaskBase:
|
||||
if mode == Mode.FUNCTIONS:
|
||||
if json_chunk := chunk.choices[0].delta.function_call.arguments:
|
||||
yield json_chunk
|
||||
elif mode in {Mode.JSON, Mode.MD_JSON}:
|
||||
elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
|
||||
if json_chunk := chunk.choices[0].delta.content:
|
||||
yield json_chunk
|
||||
elif mode == Mode.TOOLS:
|
||||
|
||||
@@ -67,12 +67,9 @@ def llm_validator(
|
||||
openai_client (OpenAI): The OpenAI client to use (default: None)
|
||||
"""
|
||||
|
||||
openai_client = openai_client or OpenAI()
|
||||
|
||||
def llm(v):
|
||||
resp = openai_client.chat.completions.create(
|
||||
functions=[Validator.openai_schema],
|
||||
function_call={"name": Validator.openai_schema["name"]},
|
||||
response_model=Validator,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
@@ -86,7 +83,6 @@ def llm_validator(
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
) # type: ignore
|
||||
resp = Validator.from_response(resp)
|
||||
|
||||
# If the response is not valid, return the reason, this could be used in
|
||||
# the future to generate a better response, via reasking mechanism.
|
||||
|
||||
@@ -12,6 +12,7 @@ class Mode(enum.Enum):
|
||||
TOOLS: str = "tool_call"
|
||||
JSON: str = "json_mode"
|
||||
MD_JSON: str = "markdown_json_mode"
|
||||
JSON_SCHEMA: str = "json_schema_mode"
|
||||
|
||||
|
||||
class OpenAISchema(BaseModel):
|
||||
@@ -147,13 +148,7 @@ class OpenAISchema(BaseModel):
|
||||
context=validation_context,
|
||||
strict=strict,
|
||||
)
|
||||
elif mode == Mode.JSON:
|
||||
return cls.model_validate_json(
|
||||
message.content,
|
||||
context=validation_context,
|
||||
strict=strict,
|
||||
)
|
||||
elif mode == Mode.MD_JSON:
|
||||
elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}:
|
||||
return cls.model_validate_json(
|
||||
message.content,
|
||||
context=validation_context,
|
||||
@@ -211,13 +206,7 @@ class OpenAISchema(BaseModel):
|
||||
context=validation_context,
|
||||
strict=strict,
|
||||
)
|
||||
elif mode == Mode.JSON:
|
||||
return cls.model_validate_json(
|
||||
message.content,
|
||||
context=validation_context,
|
||||
strict=strict,
|
||||
)
|
||||
elif mode == Mode.MD_JSON:
|
||||
elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}:
|
||||
return cls.model_validate_json(
|
||||
message.content,
|
||||
context=validation_context,
|
||||
|
||||
+18
-16
@@ -91,26 +91,28 @@ def handle_response_model(
|
||||
"type": "function",
|
||||
"function": {"name": response_model.openai_schema["name"]},
|
||||
}
|
||||
elif mode == Mode.JSON or mode == Mode.MD_JSON:
|
||||
elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
|
||||
# If its a JSON Mode we need to massage the prompt a bit
|
||||
# in order to get the response we want in a json format
|
||||
message = f"""
|
||||
As a genius expert, your task is to understand the content and provide
|
||||
the parsed objects in json that match the following json_schema:\n
|
||||
{response_model.model_json_schema()['properties']}
|
||||
"""
|
||||
# Check for nested models
|
||||
if "$defs" in response_model.model_json_schema():
|
||||
message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
|
||||
|
||||
if mode == Mode.JSON:
|
||||
new_kwargs["response_format"] = {"type": "json_object"}
|
||||
message = f"""Make sure that your response to any message matches the json_schema below,
|
||||
do not deviate at all: \n{response_model.model_json_schema()['properties']}
|
||||
"""
|
||||
# Check for nested models
|
||||
if "$defs" in response_model.model_json_schema():
|
||||
message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
|
||||
|
||||
else:
|
||||
message = f"""
|
||||
As a genius expert, your task is to understand the content and provide
|
||||
the parsed objects in json that match the following json_schema (do not deviate at all and its okay if you cant be exact):\n
|
||||
{response_model.model_json_schema()['properties']}
|
||||
"""
|
||||
# Check for nested models
|
||||
if "$defs" in response_model.model_json_schema():
|
||||
message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
|
||||
elif mode == Mode.JSON_SCHEMA:
|
||||
new_kwargs["response_format"] = {
|
||||
"type": "json_object",
|
||||
"schema": response_model.model_json_schema(),
|
||||
}
|
||||
|
||||
elif mode == Mode.MD_JSON:
|
||||
new_kwargs["messages"].append(
|
||||
{
|
||||
"role": "assistant",
|
||||
|
||||
@@ -22,6 +22,15 @@ def client():
|
||||
base_url="https://braintrustproxy.com/v1",
|
||||
)
|
||||
)
|
||||
elif (
|
||||
os.environ.get("OPENAI_BASE_URL", None)
|
||||
== "https://api.endpoints.anyscale.com/v1"
|
||||
):
|
||||
yield OpenAI(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
base_url="https://api.endpoints.anyscale.com/v1",
|
||||
)
|
||||
|
||||
else:
|
||||
yield OpenAI()
|
||||
|
||||
@@ -35,5 +44,13 @@ def aclient():
|
||||
base_url="https://braintrustproxy.com/v1",
|
||||
)
|
||||
)
|
||||
elif (
|
||||
os.environ.get("OPENAI_BASE_URL", None)
|
||||
== "https://api.endpoints.anyscale.com/v1"
|
||||
):
|
||||
yield AsyncOpenAI(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
base_url="https://api.endpoints.anyscale.com/v1",
|
||||
)
|
||||
else:
|
||||
yield AsyncOpenAI()
|
||||
@@ -8,6 +8,7 @@ import instructor
|
||||
from pydantic import BaseModel
|
||||
|
||||
from instructor.function_calls import Mode
|
||||
from tests.openai.util import models, modes
|
||||
|
||||
|
||||
class Labels(str, enum.Enum):
|
||||
@@ -23,8 +24,6 @@ class SinglePrediction(BaseModel):
|
||||
class_label: Labels
|
||||
|
||||
|
||||
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
|
||||
modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS]
|
||||
data = [
|
||||
(
|
||||
"I am a spammer",
|
||||
|
||||
@@ -7,6 +7,7 @@ import instructor
|
||||
from pydantic import BaseModel
|
||||
|
||||
from instructor.function_calls import Mode
|
||||
from tests.openai.util import models, modes
|
||||
|
||||
|
||||
class SinglePrediction(BaseModel):
|
||||
@@ -17,8 +18,6 @@ class SinglePrediction(BaseModel):
|
||||
class_label: Literal["spam", "not_spam"]
|
||||
|
||||
|
||||
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
|
||||
modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS]
|
||||
data = [
|
||||
("I am a spammer", "spam"),
|
||||
("I am not a spammer", "not_spam"),
|
||||
|
||||
@@ -6,6 +6,7 @@ import pytest
|
||||
import instructor
|
||||
|
||||
from instructor.function_calls import Mode
|
||||
from tests.openai.util import models, modes
|
||||
|
||||
|
||||
class Property(BaseModel):
|
||||
@@ -84,10 +85,6 @@ The contract can be terminated with a 30-day notice, unless there are outstandin
|
||||
"""
|
||||
|
||||
|
||||
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
|
||||
modes = [instructor.Mode.FUNCTIONS, instructor.Mode.JSON, instructor.Mode.TOOLS]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
def test_extract(model, mode, client):
|
||||
client = instructor.patch(client, mode=mode)
|
||||
|
||||
@@ -3,6 +3,7 @@ from itertools import product
|
||||
from pydantic import BaseModel
|
||||
import instructor
|
||||
from instructor.function_calls import Mode
|
||||
from tests.openai.util import models, modes
|
||||
|
||||
|
||||
class UserDetails(BaseModel):
|
||||
@@ -11,13 +12,11 @@ class UserDetails(BaseModel):
|
||||
|
||||
|
||||
# Lists for models, test data, and modes
|
||||
models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-1106-preview"]
|
||||
test_data = [
|
||||
("Jason is 10", "Jason", 10),
|
||||
("Alice is 25", "Alice", 25),
|
||||
("Bob is 35", "Bob", 35),
|
||||
]
|
||||
modes = [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model, data, mode", product(models, test_data, modes))
|
||||
|
||||
+10
-11
@@ -1,11 +1,11 @@
|
||||
from openai import OpenAI
|
||||
from itertools import product
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
|
||||
import instructor
|
||||
from instructor.function_calls import Mode
|
||||
from tests.openai.util import models, modes
|
||||
|
||||
|
||||
class Item(BaseModel):
|
||||
@@ -18,10 +18,9 @@ class Order(BaseModel):
|
||||
customer: str
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
|
||||
def test_nested(mode):
|
||||
client = instructor.patch(OpenAI(), mode=mode)
|
||||
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
def test_nested(model, mode, client):
|
||||
client = instructor.patch(client, mode=mode)
|
||||
content = """
|
||||
Order Details:
|
||||
Customer: Jason
|
||||
@@ -33,7 +32,7 @@ def test_nested(mode):
|
||||
"""
|
||||
|
||||
resp = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
model=model,
|
||||
response_model=Order,
|
||||
messages=[
|
||||
{
|
||||
@@ -62,9 +61,9 @@ class LibraryRecord(BaseModel):
|
||||
library_id: str
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
|
||||
def test_complex_nested_model(mode):
|
||||
client = instructor.patch(OpenAI(), mode=mode)
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
def test_complex_nested_model(model, mode, client):
|
||||
client = instructor.patch(client, mode=mode)
|
||||
|
||||
content = """
|
||||
Library visit details:
|
||||
@@ -76,7 +75,7 @@ def test_complex_nested_model(mode):
|
||||
"""
|
||||
|
||||
resp = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
model=model,
|
||||
response_model=LibraryRecord,
|
||||
messages=[
|
||||
{
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from itertools import product
|
||||
from typing import Iterable
|
||||
from openai import OpenAI, AsyncOpenAI
|
||||
from pydantic import BaseModel
|
||||
import pytest
|
||||
|
||||
import instructor
|
||||
from instructor.function_calls import Mode
|
||||
from tests.openai.util import models, modes
|
||||
|
||||
|
||||
class User(BaseModel):
|
||||
@@ -15,13 +15,13 @@ class User(BaseModel):
|
||||
Users = Iterable[User]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
|
||||
def test_multi_user(mode):
|
||||
client = instructor.patch(OpenAI(), mode=mode)
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
def test_multi_user(model, mode, client):
|
||||
client = instructor.patch(client, mode=mode)
|
||||
|
||||
def stream_extract(input: str) -> Iterable[User]:
|
||||
return client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
model=model,
|
||||
stream=True,
|
||||
response_model=Users,
|
||||
messages=[
|
||||
@@ -50,13 +50,13 @@ def test_multi_user(mode):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
|
||||
async def test_multi_user_tools_mode_async(mode):
|
||||
client = instructor.patch(AsyncOpenAI(), mode=mode)
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
async def test_multi_user_tools_mode_async(model, mode, aclient):
|
||||
client = instructor.patch(aclient, mode=mode)
|
||||
|
||||
async def stream_extract(input: str) -> Iterable[User]:
|
||||
return await client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
model=model,
|
||||
stream=True,
|
||||
response_model=Users,
|
||||
messages=[
|
||||
|
||||
+21
-23
@@ -1,13 +1,9 @@
|
||||
from itertools import product
|
||||
from pydantic import BaseModel, field_validator
|
||||
import pytest
|
||||
import instructor
|
||||
|
||||
from openai import OpenAI, AsyncOpenAI
|
||||
|
||||
from instructor.function_calls import Mode
|
||||
|
||||
aclient = instructor.patch(AsyncOpenAI())
|
||||
client = instructor.patch(OpenAI())
|
||||
from tests.openai.util import models, modes
|
||||
|
||||
|
||||
class UserExtract(BaseModel):
|
||||
@@ -15,11 +11,11 @@ class UserExtract(BaseModel):
|
||||
age: int
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
|
||||
def test_runmodel(mode):
|
||||
client = instructor.patch(OpenAI(), mode=mode)
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
def test_runmodel(model, mode, client):
|
||||
client = instructor.patch(client, mode=mode)
|
||||
model = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
model=model,
|
||||
response_model=UserExtract,
|
||||
max_retries=2,
|
||||
messages=[
|
||||
@@ -34,12 +30,12 @@ def test_runmodel(mode):
|
||||
), "The raw response should be available from OpenAI"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS, Mode.MD_JSON])
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
@pytest.mark.asyncio
|
||||
async def test_runmodel_async(mode):
|
||||
aclient = instructor.patch(AsyncOpenAI(), mode=mode)
|
||||
async def test_runmodel_async(model, mode, aclient):
|
||||
aclient = instructor.patch(aclient, mode=mode)
|
||||
model = await aclient.chat.completions.create(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
model=model,
|
||||
response_model=UserExtract,
|
||||
max_retries=2,
|
||||
messages=[
|
||||
@@ -62,15 +58,17 @@ class UserExtractValidated(BaseModel):
|
||||
@classmethod
|
||||
def validate_name(cls, v):
|
||||
if v.upper() != v:
|
||||
raise ValueError("Name should be uppercase")
|
||||
raise ValueError(
|
||||
"Name should be uppercase, make sure to use the `uppercase` version of the name"
|
||||
)
|
||||
return v
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.MD_JSON])
|
||||
def test_runmodel_validator(mode):
|
||||
client = instructor.patch(OpenAI(), mode=mode)
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
def test_runmodel_validator(model, mode, client):
|
||||
client = instructor.patch(client, mode=mode)
|
||||
model = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
model=model,
|
||||
response_model=UserExtractValidated,
|
||||
max_retries=2,
|
||||
messages=[
|
||||
@@ -84,12 +82,12 @@ def test_runmodel_validator(mode):
|
||||
), "The raw response should be available from OpenAI"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.MD_JSON])
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
@pytest.mark.asyncio
|
||||
async def test_runmodel_async_validator(mode):
|
||||
aclient = instructor.patch(AsyncOpenAI(), mode=mode)
|
||||
async def test_runmodel_async_validator(model, mode, aclient):
|
||||
aclient = instructor.patch(aclient, mode=mode)
|
||||
model = await aclient.chat.completions.create(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
model=model,
|
||||
response_model=UserExtractValidated,
|
||||
max_retries=2,
|
||||
messages=[
|
||||
|
||||
@@ -1,17 +1,16 @@
|
||||
from itertools import product
|
||||
import pytest
|
||||
|
||||
import instructor
|
||||
|
||||
from typing_extensions import Annotated
|
||||
from pydantic import BaseModel, AfterValidator, BeforeValidator, ValidationError
|
||||
from openai import OpenAI
|
||||
|
||||
from instructor.dsl.validators import llm_validator
|
||||
|
||||
client = instructor.patch(OpenAI())
|
||||
from tests.openai.util import models, modes
|
||||
|
||||
|
||||
def test_patch_completes_successfully():
|
||||
def test_patch_completes_successfully(client):
|
||||
class Response(BaseModel):
|
||||
message: Annotated[
|
||||
str, AfterValidator(instructor.openai_moderation(client=client))
|
||||
@@ -21,13 +20,18 @@ def test_patch_completes_successfully():
|
||||
Response(message="I want to make them suffer the consequences")
|
||||
|
||||
|
||||
def test_runmodel_validator_error():
|
||||
@pytest.mark.parametrize("model, mode", product(models, modes))
|
||||
def test_runmodel_validator_error(model, mode, client):
|
||||
client = instructor.patch(client, mode=mode)
|
||||
|
||||
class QuestionAnswerNoEvil(BaseModel):
|
||||
question: str
|
||||
answer: Annotated[
|
||||
str,
|
||||
BeforeValidator(
|
||||
llm_validator("don't say objectionable things", openai_client=client)
|
||||
llm_validator(
|
||||
"don't say objectionable things", model=model, openai_client=client
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
import os
|
||||
import instructor
|
||||
|
||||
if os.getenv("OPENAI_BASE_URL", None) == "https://api.endpoints.anyscale.com/v1":
|
||||
models = ["mistralai/Mistral-7B-Instruct-v0.1"]
|
||||
modes = [instructor.Mode.JSON_SCHEMA]
|
||||
else:
|
||||
models = ["gpt-3.5-turbo-1106", "gpt-4", "gpt-4-1106-preview"]
|
||||
modes = [
|
||||
instructor.Mode.FUNCTIONS,
|
||||
instructor.Mode.JSON,
|
||||
instructor.Mode.TOOLS,
|
||||
instructor.Mode.MD_JSON,
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(models)
|
||||
print(modes)
|
||||
Reference in New Issue
Block a user