fix: black and synthetic examples

This commit is contained in:
Jason Liu
2024-03-04 15:27:52 -05:00
parent ed2a70254a
commit edc5ef684b
7 changed files with 155 additions and 33 deletions
+63
View File
@@ -0,0 +1,63 @@
# How should I include examples?
To enhance the clarity and usability of your model and prompt, incorporating examples directly into the JSON schema extra of your Pydantic model is highly recommended. This approach not only streamlines the integration of practical examples but also ensures that they are easily accessible and understandable within the context of your model's schema.
```python
import openai
import instructor
from typing import Iterable
from pydantic import BaseModel, Field, ConfigDict
client = instructor.patch(openai.OpenAI())
class SyntheticQA(BaseModel):
question: str
answer: str
model_config = ConfigDict(
json_schema_extra={
"examples": [
{"question": "What is the capital of France?", "answer": "Paris"},
{
"question": "What is the largest planet in our solar system?",
"answer": "Jupiter",
},
{
"question": "Who wrote 'To Kill a Mockingbird'?",
"answer": "Harper Lee",
},
{
"question": "What element does 'O' represent on the periodic table?",
"answer": "Oxygen",
},
]
}
)
def get_synthetic_data() -> Iterable[SyntheticQA]:
return client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Generate synthetic examples"},
{
"role": "user",
"content": "Generate the exact examples you see in the examples of this prompt. ",
},
],
response_model=Iterable[SyntheticQA],
) # type: ignore
if __name__ == "__main__":
for example in get_synthetic_data():
print(example)
"""
question='What is the capital of France?' answer='Paris'
question='What is the largest planet in our solar system?' answer='Jupiter'
question="Who wrote 'To Kill a Mockingbird'?" answer='Harper Lee'
question="What element does 'O' represent on the periodic table?" answer='Oxygen'
"""
```
+1 -1
View File
@@ -23,4 +23,4 @@ resp = patched_chat(
},
],
)
print(resp)
print(resp)
+56
View File
@@ -0,0 +1,56 @@
import openai
import instructor
from typing import Iterable
from pydantic import BaseModel, ConfigDict
client = instructor.patch(openai.OpenAI())
class SyntheticQA(BaseModel):
question: str
answer: str
model_config = ConfigDict(
json_schema_extra={
"examples": [
{"question": "What is the capital of France?", "answer": "Paris"},
{
"question": "What is the largest planet in our solar system?",
"answer": "Jupiter",
},
{
"question": "Who wrote 'To Kill a Mockingbird'?",
"answer": "Harper Lee",
},
{
"question": "What element does 'O' represent on the periodic table?",
"answer": "Oxygen",
},
]
}
)
def get_synthetic_data() -> Iterable[SyntheticQA]:
return client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Generate synthetic examples"},
{
"role": "user",
"content": "Generate the exact examples you see in the examples of this prompt. ",
},
],
response_model=Iterable[SyntheticQA],
) # type: ignore
if __name__ == "__main__":
for example in get_synthetic_data():
print(example)
"""
question='What is the capital of France?' answer='Paris'
question='What is the largest planet in our solar system?' answer='Jupiter'
question="Who wrote 'To Kill a Mockingbird'?" answer='Harper Lee'
question="What element does 'O' represent on the periodic table?" answer='Oxygen'
"""
+8 -8
View File
@@ -12,13 +12,14 @@ import instructor
load_dotenv(find_dotenv())
IMAGE_FILE = "image-file.txt" # file with all the images to be processed
IMAGE_FILE = "image-file.txt" # file with all the images to be processed
# Add logger
logging.basicConfig()
logger = logging.getLogger("app")
logger.setLevel("INFO")
class Competitor(BaseModel):
name: str
features: Optional[List[str]]
@@ -30,13 +31,12 @@ class Industry(BaseModel):
Represents competitors from a specific industry extracted from an image using AI.
"""
name: str = Field(
description="The name of the industry"
)
name: str = Field(description="The name of the industry")
competitor_list: List[Competitor] = Field(
description="A list of competitors for this industry"
)
class Competition(BaseModel):
"""
Represents competitors extracted from an image using AI.
@@ -49,10 +49,10 @@ class Competition(BaseModel):
description="A list of industries and their competitors"
)
# Define clients
client_image = instructor.patch(
OpenAI(), mode=instructor.Mode.MD_JSON
)
client_image = instructor.patch(OpenAI(), mode=instructor.Mode.MD_JSON)
# Define functions
def read_images(image_urls: List[str]) -> Competition:
@@ -85,7 +85,6 @@ def read_images(image_urls: List[str]) -> Competition:
)
def process_and_identify_competitors():
"""
Main function to process the image list file and identify competitors.
@@ -121,6 +120,7 @@ def process_and_identify_competitors():
indent=4,
)
if __name__ == "__main__":
process_and_identify_competitors()
+22 -22
View File
@@ -53,27 +53,27 @@ MODEL_COSTS: Dict[
ModelNames,
Union[Dict[str, float], float],
] = {
"gpt-4-0125-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000},
"gpt-4-turbo-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000},
"gpt-4-1106-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000},
"gpt-4-vision-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000},
"gpt-4": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000},
"gpt-4-0314": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000},
"gpt-4-0613": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000},
"gpt-4-32k": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000},
"gpt-4-32k-0314": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000},
"gpt-4-32k-0613": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000},
"gpt-3.5-turbo": {"prompt": 0.0005 / 1000, "completion": 0.0015 / 1000},
"gpt-3.5-turbo-16k": {"prompt": 0.0030 / 1000, "completion": 0.0040 / 1000},
"gpt-3.5-turbo-0301": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000},
"gpt-3.5-turbo-0613": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000},
"gpt-3.5-turbo-1106": {"prompt": 0.0010 / 1000, "completion": 0.0020 / 1000},
"gpt-3.5-turbo-0125": {"prompt": 0.0005 / 1000, "completion": 0.0015 / 1000},
"gpt-3.5-turbo-16k-0613": {"prompt": 0.0030 / 1000, "completion": 0.0040 / 1000},
"gpt-3.5-turbo-instruct": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000},
"text-embedding-3-small": 0.00002 / 1000,
"text-embedding-3-large": 0.00013 / 1000,
"text-embedding-ada-002": 0.00010 / 1000,
"gpt-4-0125-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000},
"gpt-4-turbo-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000},
"gpt-4-1106-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000},
"gpt-4-vision-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000},
"gpt-4": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000},
"gpt-4-0314": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000},
"gpt-4-0613": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000},
"gpt-4-32k": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000},
"gpt-4-32k-0314": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000},
"gpt-4-32k-0613": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000},
"gpt-3.5-turbo": {"prompt": 0.0005 / 1000, "completion": 0.0015 / 1000},
"gpt-3.5-turbo-16k": {"prompt": 0.0030 / 1000, "completion": 0.0040 / 1000},
"gpt-3.5-turbo-0301": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000},
"gpt-3.5-turbo-0613": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000},
"gpt-3.5-turbo-1106": {"prompt": 0.0010 / 1000, "completion": 0.0020 / 1000},
"gpt-3.5-turbo-0125": {"prompt": 0.0005 / 1000, "completion": 0.0015 / 1000},
"gpt-3.5-turbo-16k-0613": {"prompt": 0.0030 / 1000, "completion": 0.0040 / 1000},
"gpt-3.5-turbo-instruct": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000},
"text-embedding-3-small": 0.00002 / 1000,
"text-embedding-3-large": 0.00013 / 1000,
"text-embedding-ada-002": 0.00010 / 1000,
}
@@ -83,7 +83,7 @@ def get_model_cost(
"""Get the cost details for a given model."""
if model in MODEL_COSTS:
return MODEL_COSTS[model]
if model.startswith("gpt-3.5-turbo-16k"):
return MODEL_COSTS["gpt-3.5-turbo-16k"]
elif model.startswith("gpt-3.5-turbo"):
+4 -2
View File
@@ -134,11 +134,13 @@ def handle_response_model(
elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
# If its a JSON Mode we need to massage the prompt a bit
# in order to get the response we want in a json format
message = dedent(f"""
message = dedent(
f"""
As a genius expert, your task is to understand the content and provide
the parsed objects in json that match the following json_schema:\n
{response_model.model_json_schema()['properties']}
""")
"""
)
# Check for nested models
if "$defs" in response_model.model_json_schema():
message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}"
+1
View File
@@ -163,6 +163,7 @@ nav:
- Image to Ad Copy: 'examples/image_to_ad_copy.md'
- Ollama: 'examples/ollama.md'
- SQLModel Integration: 'examples/sqlmodel.md'
- Including Examples in Prompt: 'examples/examples.md'
- Hub:
- Introducing Instructor Hub: 'hub/index.md'
- Single Classification Model: 'hub/single_classification.md'