diff --git a/docs/examples/examples.md b/docs/examples/examples.md new file mode 100644 index 0000000..10ef635 --- /dev/null +++ b/docs/examples/examples.md @@ -0,0 +1,63 @@ +# How should I include examples? + +To enhance the clarity and usability of your model and prompt, incorporating examples directly into the JSON schema extra of your Pydantic model is highly recommended. This approach not only streamlines the integration of practical examples but also ensures that they are easily accessible and understandable within the context of your model's schema. + + +```python +import openai +import instructor +from typing import Iterable +from pydantic import BaseModel, Field, ConfigDict + +client = instructor.patch(openai.OpenAI()) + + +class SyntheticQA(BaseModel): + question: str + answer: str + + model_config = ConfigDict( + json_schema_extra={ + "examples": [ + {"question": "What is the capital of France?", "answer": "Paris"}, + { + "question": "What is the largest planet in our solar system?", + "answer": "Jupiter", + }, + { + "question": "Who wrote 'To Kill a Mockingbird'?", + "answer": "Harper Lee", + }, + { + "question": "What element does 'O' represent on the periodic table?", + "answer": "Oxygen", + }, + ] + } + ) + + +def get_synthetic_data() -> Iterable[SyntheticQA]: + return client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "Generate synthetic examples"}, + { + "role": "user", + "content": "Generate the exact examples you see in the examples of this prompt. ", + }, + ], + response_model=Iterable[SyntheticQA], + ) # type: ignore + + +if __name__ == "__main__": + for example in get_synthetic_data(): + print(example) + """ + question='What is the capital of France?' answer='Paris' + question='What is the largest planet in our solar system?' answer='Jupiter' + question="Who wrote 'To Kill a Mockingbird'?" answer='Harper Lee' + question="What element does 'O' represent on the periodic table?" answer='Oxygen' + """ +``` \ No newline at end of file diff --git a/examples/mistral/mistral.py b/examples/mistral/mistral.py index 25fcf09..a58ddff 100644 --- a/examples/mistral/mistral.py +++ b/examples/mistral/mistral.py @@ -23,4 +23,4 @@ resp = patched_chat( }, ], ) -print(resp) \ No newline at end of file +print(resp) diff --git a/examples/synethic-data/run.py b/examples/synethic-data/run.py new file mode 100644 index 0000000..2ed06b3 --- /dev/null +++ b/examples/synethic-data/run.py @@ -0,0 +1,56 @@ +import openai +import instructor +from typing import Iterable +from pydantic import BaseModel, ConfigDict + +client = instructor.patch(openai.OpenAI()) + + +class SyntheticQA(BaseModel): + question: str + answer: str + + model_config = ConfigDict( + json_schema_extra={ + "examples": [ + {"question": "What is the capital of France?", "answer": "Paris"}, + { + "question": "What is the largest planet in our solar system?", + "answer": "Jupiter", + }, + { + "question": "Who wrote 'To Kill a Mockingbird'?", + "answer": "Harper Lee", + }, + { + "question": "What element does 'O' represent on the periodic table?", + "answer": "Oxygen", + }, + ] + } + ) + + +def get_synthetic_data() -> Iterable[SyntheticQA]: + return client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "Generate synthetic examples"}, + { + "role": "user", + "content": "Generate the exact examples you see in the examples of this prompt. ", + }, + ], + response_model=Iterable[SyntheticQA], + ) # type: ignore + + +if __name__ == "__main__": + for example in get_synthetic_data(): + print(example) + """ + question='What is the capital of France?' answer='Paris' + question='What is the largest planet in our solar system?' answer='Jupiter' + question="Who wrote 'To Kill a Mockingbird'?" answer='Harper Lee' + question="What element does 'O' represent on the periodic table?" answer='Oxygen' + """ \ No newline at end of file diff --git a/examples/vision/slides.py b/examples/vision/slides.py index 4a342c9..0eb884c 100644 --- a/examples/vision/slides.py +++ b/examples/vision/slides.py @@ -12,13 +12,14 @@ import instructor load_dotenv(find_dotenv()) -IMAGE_FILE = "image-file.txt" # file with all the images to be processed +IMAGE_FILE = "image-file.txt" # file with all the images to be processed # Add logger logging.basicConfig() logger = logging.getLogger("app") logger.setLevel("INFO") + class Competitor(BaseModel): name: str features: Optional[List[str]] @@ -30,13 +31,12 @@ class Industry(BaseModel): Represents competitors from a specific industry extracted from an image using AI. """ - name: str = Field( - description="The name of the industry" - ) + name: str = Field(description="The name of the industry") competitor_list: List[Competitor] = Field( description="A list of competitors for this industry" ) + class Competition(BaseModel): """ Represents competitors extracted from an image using AI. @@ -49,10 +49,10 @@ class Competition(BaseModel): description="A list of industries and their competitors" ) + # Define clients -client_image = instructor.patch( - OpenAI(), mode=instructor.Mode.MD_JSON -) +client_image = instructor.patch(OpenAI(), mode=instructor.Mode.MD_JSON) + # Define functions def read_images(image_urls: List[str]) -> Competition: @@ -85,7 +85,6 @@ def read_images(image_urls: List[str]) -> Competition: ) - def process_and_identify_competitors(): """ Main function to process the image list file and identify competitors. @@ -121,6 +120,7 @@ def process_and_identify_competitors(): indent=4, ) + if __name__ == "__main__": process_and_identify_competitors() diff --git a/instructor/cli/usage.py b/instructor/cli/usage.py index 87d2e63..c029606 100644 --- a/instructor/cli/usage.py +++ b/instructor/cli/usage.py @@ -53,27 +53,27 @@ MODEL_COSTS: Dict[ ModelNames, Union[Dict[str, float], float], ] = { - "gpt-4-0125-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000}, - "gpt-4-turbo-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000}, - "gpt-4-1106-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000}, - "gpt-4-vision-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000}, - "gpt-4": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000}, - "gpt-4-0314": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000}, - "gpt-4-0613": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000}, - "gpt-4-32k": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000}, - "gpt-4-32k-0314": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000}, - "gpt-4-32k-0613": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000}, - "gpt-3.5-turbo": {"prompt": 0.0005 / 1000, "completion": 0.0015 / 1000}, - "gpt-3.5-turbo-16k": {"prompt": 0.0030 / 1000, "completion": 0.0040 / 1000}, - "gpt-3.5-turbo-0301": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000}, - "gpt-3.5-turbo-0613": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000}, - "gpt-3.5-turbo-1106": {"prompt": 0.0010 / 1000, "completion": 0.0020 / 1000}, - "gpt-3.5-turbo-0125": {"prompt": 0.0005 / 1000, "completion": 0.0015 / 1000}, - "gpt-3.5-turbo-16k-0613": {"prompt": 0.0030 / 1000, "completion": 0.0040 / 1000}, - "gpt-3.5-turbo-instruct": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000}, - "text-embedding-3-small": 0.00002 / 1000, - "text-embedding-3-large": 0.00013 / 1000, - "text-embedding-ada-002": 0.00010 / 1000, + "gpt-4-0125-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000}, + "gpt-4-turbo-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000}, + "gpt-4-1106-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000}, + "gpt-4-vision-preview": {"prompt": 0.01 / 1000, "completion": 0.03 / 1000}, + "gpt-4": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000}, + "gpt-4-0314": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000}, + "gpt-4-0613": {"prompt": 0.03 / 1000, "completion": 0.06 / 1000}, + "gpt-4-32k": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000}, + "gpt-4-32k-0314": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000}, + "gpt-4-32k-0613": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000}, + "gpt-3.5-turbo": {"prompt": 0.0005 / 1000, "completion": 0.0015 / 1000}, + "gpt-3.5-turbo-16k": {"prompt": 0.0030 / 1000, "completion": 0.0040 / 1000}, + "gpt-3.5-turbo-0301": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000}, + "gpt-3.5-turbo-0613": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000}, + "gpt-3.5-turbo-1106": {"prompt": 0.0010 / 1000, "completion": 0.0020 / 1000}, + "gpt-3.5-turbo-0125": {"prompt": 0.0005 / 1000, "completion": 0.0015 / 1000}, + "gpt-3.5-turbo-16k-0613": {"prompt": 0.0030 / 1000, "completion": 0.0040 / 1000}, + "gpt-3.5-turbo-instruct": {"prompt": 0.0015 / 1000, "completion": 0.0020 / 1000}, + "text-embedding-3-small": 0.00002 / 1000, + "text-embedding-3-large": 0.00013 / 1000, + "text-embedding-ada-002": 0.00010 / 1000, } @@ -83,7 +83,7 @@ def get_model_cost( """Get the cost details for a given model.""" if model in MODEL_COSTS: return MODEL_COSTS[model] - + if model.startswith("gpt-3.5-turbo-16k"): return MODEL_COSTS["gpt-3.5-turbo-16k"] elif model.startswith("gpt-3.5-turbo"): diff --git a/instructor/patch.py b/instructor/patch.py index 5ce7050..c81e571 100644 --- a/instructor/patch.py +++ b/instructor/patch.py @@ -134,11 +134,13 @@ def handle_response_model( elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}: # If its a JSON Mode we need to massage the prompt a bit # in order to get the response we want in a json format - message = dedent(f""" + message = dedent( + f""" As a genius expert, your task is to understand the content and provide the parsed objects in json that match the following json_schema:\n {response_model.model_json_schema()['properties']} - """) + """ + ) # Check for nested models if "$defs" in response_model.model_json_schema(): message += f"\nHere are some more definitions to adhere too:\n{response_model.model_json_schema()['$defs']}" diff --git a/mkdocs.yml b/mkdocs.yml index aa5df9e..7a796c4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -163,6 +163,7 @@ nav: - Image to Ad Copy: 'examples/image_to_ad_copy.md' - Ollama: 'examples/ollama.md' - SQLModel Integration: 'examples/sqlmodel.md' + - Including Examples in Prompt: 'examples/examples.md' - Hub: - Introducing Instructor Hub: 'hub/index.md' - Single Classification Model: 'hub/single_classification.md'