From fac2171162f141826c2ca6964cb0ca6ab10649a5 Mon Sep 17 00:00:00 2001 From: Ivan Leo Date: Sun, 12 Nov 2023 23:01:46 +0800 Subject: [PATCH 01/40] Added support for model suffix and added migrations for new OpenAI SDK (#169) --- instructor/cli/jobs.py | 55 ++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/instructor/cli/jobs.py b/instructor/cli/jobs.py index d378cd5..a289173 100644 --- a/instructor/cli/jobs.py +++ b/instructor/cli/jobs.py @@ -7,6 +7,8 @@ from rich.live import Live from rich.table import Table from rich.console import Console from datetime import datetime +from typing import cast +from openai.types.fine_tuning import FineTuningJob client = OpenAI() app = typer.Typer() @@ -15,7 +17,8 @@ console = Console() def generate_table(jobs): # Sorting the jobs by creation time - jobs = sorted(jobs, key=lambda x: x["created_at"], reverse=True) + jobs = sorted(jobs, key=lambda x: (cast(FineTuningJob, x)).created_at, reverse=True) + jobs = cast(List[FineTuningJob], jobs) table = Table( title="OpenAI Fine Tuning Job Monitoring", @@ -37,23 +40,21 @@ def generate_table(jobs): "succeeded": "✅", "failed": "❌", "cancelled": "🚫", - }.get(job["status"], "❓") + }.get(job.status, "❓") finished_at = ( - str(datetime.fromtimestamp(job["finished_at"])) - if job["finished_at"] - else "N/A" + str(datetime.fromtimestamp(job.finished_at)) if job.finished_at else "N/A" ) table.add_row( - job["id"], - f"{status_emoji} [{status_color(job['status'])}]{job['status']}[/]", - str(datetime.fromtimestamp(job["created_at"])), + job.id, + f"{status_emoji} [{status_color(job.status)}]{job.status}[/]", + str(datetime.fromtimestamp(job.created_at)), finished_at, - job["fine_tuned_model"], - job["training_file"], - str(job["hyperparameters"]["n_epochs"]), - job["model"], + job.fine_tuned_model, + job.training_file, + str(job.hyperparameters.n_epochs), + job.model, ) return table @@ -66,12 +67,12 @@ def status_color(status: str) -> str: def get_jobs(limit: int = 5) -> List: - return client.fine_tuning.list(limit=limit)["data"] + return client.fine_tuning.jobs.list(limit=limit).data def get_file_status(file_id: str) -> str: response = client.files.retrieve(file_id) - return response["status"] + return response.status @app.command( @@ -124,7 +125,7 @@ def create_from_id( with console.status( f"[bold green]Creating fine-tuning job from ID {id}...", spinner="dots" ): - job = client.fine_tuning.create( + job = client.fine_tuning.jobs.create( training_file=id, model=model, hyperparameters=hyperparameters_dict if hyperparameters_dict else None, @@ -151,6 +152,7 @@ def create_from_file( None, help="Learning rate multiplier for fine-tuning", show_default=False ), validation_file: str = typer.Option(None, help="Path to the validation file"), + model_suffix: str = typer.Option(None, help="Suffix to identify the model"), ): hyperparameters_dict = {} if n_epochs is not None: @@ -163,13 +165,13 @@ def create_from_file( with open(file, "rb") as file: response = client.files.create(file=file, purpose="fine-tune") - file_id = response["id"] + file_id = response.id validation_file_id = None if validation_file: with open(validation_file, "rb") as val_file: val_response = client.files.create(file=val_file, purpose="fine-tune") - validation_file_id = val_response["id"] + validation_file_id = val_response.id with console.status(f"Monitoring upload: {file_id} before finetuning...") as status: status.spinner_style = "dots" @@ -190,19 +192,26 @@ def create_from_file( time.sleep(poll) - job = client.fine_tuning.create( + additional_params = {} + if hyperparameters_dict: + additional_params["hyperparameters"] = hyperparameters_dict + if validation_file: + additional_params["validation_file"] = validation_file + if model_suffix: + additional_params["suffix"] = model_suffix + + job = client.fine_tuning.jobs.create( training_file=file_id, model=model, - hyperparameters=hyperparameters_dict if hyperparameters_dict else None, - validation_file=validation_file_id if validation_file else None, + **additional_params, ) if validation_file_id: console.log( - f"[bold green]Fine-tuning job created with ID: {job['id']} from file ID: {file_id} and validation_file ID: {validation_file_id}" + f"[bold green]Fine-tuning job created with ID: {job.id} from file ID: {file_id} and validation_file ID: {validation_file_id}" ) else: console.log( - f"[bold green]Fine-tuning job created with ID: {job['id']} from file ID: {file_id}" + f"[bold green]Fine-tuning job created with ID: {job.id} from file ID: {file_id}" ) watch(limit=5, poll=poll, screen=False) @@ -213,7 +222,7 @@ def create_from_file( def cancel(id: str = typer.Argument(..., help="ID of the fine-tuning job to cancel")): with console.status(f"[bold red]Cancelling job {id}...", spinner="dots"): try: - client.fine_tuning.cancel(id) + client.fine_tuning.jobs.cancel(id) console.log(f"[bold red]Job {id} cancelled successfully!") except Exception as e: console.log(f"[bold red]Error cancelling job {id}: {e}") From 913985710c9c3ea41e87291399a5b053b7be2a86 Mon Sep 17 00:00:00 2001 From: Ivan Leo Date: Sun, 12 Nov 2023 23:12:58 +0800 Subject: [PATCH 02/40] Chain of density (#135) Co-authored-by: Jason --- docs/blog/posts/chain-of-density.md | 467 ++++++++++++++++++ docs/blog/posts/img/chain-of-density.png | Bin 0 -> 67547 bytes examples/chain-of-density/Readme.md | 31 ++ examples/chain-of-density/chain_of_density.py | 151 ++++++ examples/chain-of-density/finetune.py | 48 ++ examples/chain-of-density/requirements.txt | 5 + examples/chain-of-density/run.py | 230 --------- mkdocs.yml | 15 + 8 files changed, 717 insertions(+), 230 deletions(-) create mode 100644 docs/blog/posts/chain-of-density.md create mode 100644 docs/blog/posts/img/chain-of-density.png create mode 100644 examples/chain-of-density/Readme.md create mode 100644 examples/chain-of-density/chain_of_density.py create mode 100644 examples/chain-of-density/finetune.py create mode 100644 examples/chain-of-density/requirements.txt delete mode 100644 examples/chain-of-density/run.py diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md new file mode 100644 index 0000000..5e07816 --- /dev/null +++ b/docs/blog/posts/chain-of-density.md @@ -0,0 +1,467 @@ +--- +draft: False +date: 2023-11-05 +tags: + - pydantic + - validation + - chain of density + - finetuneing + - gpt-3.5-turbo + - distilation +authors: + - ivanleomk + - jxnl +--- + +# Better Summaries by Finetuning Chain of Density + +> Discover how to distil an interative method like chain of density into a single finetune. + +In this article, we'll guide you through implementing the original Chain of Density method using Instructor, then show how to distile a GPT 3.5 model to match GPT-4's iterative summarization capabilities. Using these methods were able to increase latency by 20x, reduce costs by 50x and maintain entity density. + +By the end you'll end up with a GPT 3.5 model, (fine-tuned using Instructor's great tooling), capable of producing summaries that rival the effectiveness of Chain of Density. As always, all code is readily available in our `examples/chain-of-density` folder in our repo for your reference. + +??? abstract "Datasets and Colab Notebook" + + We've also uploaded all our generated data to Hugging Face [here](https://huggingface.co/datasets/ivanleomk/gpt4-chain-of-density) for you to use if you'd like to try reproducing these experiments. We've also added a [Colab Instance](https://colab.research.google.com/drive/1iBkrEh2G5U8yh8RmI8EkWxjLq6zIIuVm?usp=sharing) for you to check our generated values. + +## Part 1) Chain of Density + +Summarizing extensive texts with AI can be challenging, often relying on inconsistent techniques. Salesforce AI Research's novel method, chain of density, enhances AI-based text summarization, outperforming human-generated summaries. + +Initially, an AI produces a summary, then refines it through multiple iterations, adding missing article entities. Each iteration adds new article entities to the summary, keeping length consistent, leading to an entity-dense, informative summary called Chain Of Density. + +First introduced by Salesforce's AI Research wing in their paper - [From Sparse to Dense: GPT-4 Summarization with Chain of Density Prompting](https://arxiv.org/abs/2309.04269). The team has found that this method is able to consistently beats similar summaries written by human annotators. + +??? info "Implementation Details" + + Note that our implementation uses a validator to ensure that the rewritten summary has a minimum length rather than a prompt. We also perform just 3 and not 5 rounds of rewrites, resulting in a lower final entity density. + +### Original Prompt + +We can implement the original prompt using `pip install instructor` by breaking down the entire process into smaller api calls. This allows us to introduce validation at each step to ensure that we're getting the results that we want. + +??? note "Original Chain of Density Prompt" + + ``` + Article: {{ARTICLE}} + + You will generate increasingly concise, entity-dense summaries of the + above Article. + + Repeat the following 2 steps 5 times. + + Step 1. Identify 1-3 informative Entities (";" delimited) from the + Article which are missing from the previously generated summary. + Step 2. Write a new, denser summary of identical length which covers + every entity and detail from the previous summary plus the Missing + Entities. + + A Missing Entity is: + - Relevant: to the main story. + - Specific: descriptive yet concise (5 words or fewer). + - Novel; not in the previous summary. + - Faithful: present in the Article. + - Anywhere: located anywhere in the Article. + + Guidelines: + - The first summary should be long (4-5 sentences, -80 words) yet + highly non-specific, containing little information beyond the + entities marked as missing. Use overly verbose language and fillers + (e.g., "this article discusses") to reach -80 words. + - Make every word count: re-write the previous summary to improve + flow and make space for additional entities. + - Make space with fusion, compression, and removal of uninformative + phrases like "the article discusses" + - The summaries should become highly dense and concise yet + self-contained, e.g., easily understood without the Article. + - Missing entities can appear anywhere in the new summary. + - Never drop entities from the previous summary. If space cannot be + made, add fewer new entities. + + Remember, use the exact same number of words for each summary. + + Answer in JSON. The JSON should be a list (length 5) of dictionaries + whose keys are "Missing_Entities" and "Denser_Summary" + ``` + +
+ ![RAG](img/chain-of-density.png) +
Improved process with Instructor
+
+ +### Data Modelling + +#### Initial Summary + +Let's start by walking through some of the data models that we'll be using as the `response_model` for our open ai function calls + +Firstly, we'll need a data model for the initial summary that we will be generating. We'll take the description of this class straight from the original prompt. Its important to note that these docstrings serve a purpose, they are directly used by the LLM when generating the outputs. + +```py +class InitialSummary(BaseModel): + """ + This is an initial summary which should be long ( 4-5 sentences, ~80 words) + yet highly non-specific, containing little information beyond the entities marked as missing. + Use overly verbose languages and fillers (Eg. This article discusses) to reach ~80 words. + """ + + summary: str = Field( + ..., + description="This is a summary of the article provided which is overly verbose and uses fillers. It should be roughly 80 words in length", + ) +``` + +#### Rewritten Summary + +We'll also need one additional class to help model the rewritten schema + +```py +class RewrittenSummary(BaseModel): + """ + This is a new, denser summary of identical length which covers every entity + and detail from the previous summary plus the Missing Entities. + + Guidelines + - Make every word count : Rewrite the previous summary to improve flow and make space for additional entities + - Never drop entities from the previous summary. If space cannot be made, add fewer new entities. + - The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article. + - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses" + - Missing entities can appear anywhere in the new summary + + An Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title. + """ + + summary: str = Field( + ..., + description="This is a new, denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities. It should have the same length ( ~ 80 words ) as the previous summary and should be easily understood without the Article", + ) + absent: List[str] = Field( + ..., + default_factory=list, + description="this is a list of Entities found absent from the new summary that were present in the previous summary", + ) + missing: List[str] = Field( + default_factory=list, + description="This is a list of 1-3 informative Entities from the Article that are missing from the new summary which should be included in the next generated summary.", + ) +``` + +!!! tip "Using Pydantic Validators with Instructor" + + For a more in-depth walkthrough on how to use `Pydantic` validators with the `Instructor` + library, we recommend checking out our previous article on LLM + validation - [Good LLM Validation is just Good Validation](/instructor/blog/2023/10/23/good-llm-validation-is-just-good-validation/) + +Ideally, we'd like for `Missing` to have a length between 1 and 3, `Absent` to be an empty list and for our rewritten summaries to keep a minimum entity density. With `Instructor`, we can implement this logic using native `Pydantic` validators that are simply declared as part of the class itself. + +```py hl_lines="8 40 44" +import nltk +import spacy + +nlp = spacy.load("en_core_web_sm") + +@field_validator("summary") +def min_length(cls, v: str): + tokens = nltk.word_tokenize(v) #(1)! + num_tokens = len(tokens) + if num_tokens < 60: + raise ValueError( + "The current summary is too short. Please make sure that you generate a new summary that is around 80 words long." + ) + return v + +@field_validator("missing") +def has_missing_entities(cls, missing_entities: List[str]): + if len(missing_entities) == 0: + raise ValueError( + "You must identify 1-3 informative Entities from the Article which are missing from the previously generated summary to be used in a new summary" + ) + return missing_entities + +@field_validator("absent") +def has_no_absent_entities(cls, absent_entities: List[str]): + absent_entity_string = ",".join(absent_entities) + if len(absent_entities) > 0: + print(f"Detected absent entities of {absent_entity_string}") + raise ValueError( + f"Do not omit the following Entities {absent_entity_string} from the new summary" + ) + return absent_entities + +@field_validator("summary") + def min_entity_density(cls, v: str): + tokens = nltk.word_tokenize(v) + num_tokens = len(tokens) + + # Extract Entities + doc = nlp(v) #(2)! + num_entities = len(doc.ents) + + density = num_entities / num_tokens + if density < 0.08: #(3)! + raise ValueError( + f"The summary of {v} has too few entities. Please regenerate a new summary with more new entities added to it. Remember that new entities can be added at any point of the summary." + ) + + return v +``` + +1. Similar to the original paper, we utilize the `NLTK` word tokenizer to count the number of tokens within our generated sentences. + We aim for at least 60 tokens in our generated summary so that we don't lose information. + +2. We also use the spaCy library to calculate the entity density of the generated summary. + +3. We also implement a minimum entity density so that we stay within a given range. 0.08 is arbitrarily chosen in this case + +### Putting it all Together + +Now that we have our models and the rough flow figured out, let's implement a function to summarize a piece of text using `Chain Of Density` summarization. + +```py hl_lines="4 9-24 38-68" +from openai import OpenAI +import instructor + +client = instructor.patch(OpenAI()) #(1)! + +def summarize_article(article: str, summary_steps: int = 3): + summary_chain = [] + # We first generate an initial summary + summary: InitialSummary = client.chat.completions.create( # (2)! + model="gpt-4-0613", + response_model=InitialSummary, + messages=[ + { + "role": "system", + "content": "Write a summary about the article that is long (4-5 sentences) yet highly non-specific. Use overly, verbose language and fillers(eg.,'this article discusses') to reach ~80 words", + }, + {"role": "user", "content": f"Here is the Article: {article}"}, + { + "role": "user", + "content": "The generated summary should be about 80 words.", + }, + ], + max_retries=2, + ) + prev_summary = None + summary_chain.append(summary.summary) + for i in range(summary_steps): + missing_entity_message = ( + [] + if prev_summary is None + else [ + { + "role": "user", + "content": f"Please include these Missing Entities: {','.join(prev_summary.missing)}", + }, + ] + ) + new_summary: RewrittenSummary = client.chat.completions.create( # (3)! + model="gpt-4-0613", + messages=[ + { + "role": "system", + "content": """ + You are going to generate an increasingly concise,entity-dense summary of the following article. + + Perform the following two tasks + - Identify 1-3 informative entities from the following article which is missing from the previous summary + - Write a new denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities + + Guidelines + - Make every word count: re-write the previous summary to improve flow and make space for additional entities + - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses". + - The summaries should become highly dense and concise yet self-contained, e.g., easily understood without the Article. + - Missing entities can appear anywhere in the new summary + - Never drop entities from the previous summary. If space cannot be made, add fewer new entities. + """, + }, + {"role": "user", "content": f"Here is the Article: {article}"}, + { + "role": "user", + "content": f"Here is the previous summary: {summary_chain[-1]}", + }, + *missing_entity_message, + ], + max_retries=3, #(4)! + max_tokens=1000, + response_model=RewrittenSummary, + ) + summary_chain.append(new_summary.summary) + prev_summary = new_summary + + return summary_chain +``` + +1. We need to apply a `patch` function on the `OpenAI` client for us to get all + of the benefits that `Instructor` provides. With a simple `patch`, we can get + **automatic type coercion of our outputs and automatic retries for invalid outputs** + out of the box! + +2. We first generate an initial summary. Note here that we explictly ask for a summary that has + 80 words and is lengthy with overly verbose fillers in the system prompt + +3. We slightly modify the original system prompt used in the original paper to perform a rewrite of the summary. + Using `Instructor`, we also get validation of the generated output with our `field_validator`s that we defined above + +4. If you've chosen a value that is larger than 0.08, make sure to increase this value in case you need to do multiple rewrites + +This summarization function yields a result which triples the number of entities while mantaining the same number of tokens. We can also see that stylistically, the summary is a lot more natural. + +**First Iteration** + +> This article discusses the highly-anticipated boxing match between Manny Pacquiao and Floyd Mayweather. The article revolves around Manny Pacquiao's statements about his upcoming fight and his preparations for the same. A portion of the article provides details about the financial stipulations of the match and its significance in the sporting arena. Quotes from Pacquiao illustrating his determination and his battle strategy are highlighted. The tone of the article is largely centered around creating a build-up to the upcoming mega event. + +**Final Iteration** + +> Manny Pacquiao, the Filipino boxer, anticipates the forthcoming May 2 showdown at the MGM Grand as the fight of his life, against the undefeated American Floyd Mayweather, in a $300m bout. Despite being seen as the underdog in this high-stakes Las Vegas match, Pacquiao is confident, promising a warrior's spirit and assuring the fans who have been awaiting this encounter for a decade, that it will indeed be the biggest sporting spectacle in history worthy of their anticipation + +## Part 2) Fine-Tuning + +In this section, we'll look into how to fine-tune a GPT 3.5 model so that it is able to perform at an equivalent level as a GPT-4 model. We'll then compare the performance of our model against that of `GPT-4` and `GPT-4-Turbo` to see how it stacks up. + +### Creating a Training Set + +Let's first segregate our train and test set so that we don't have any sort of contamination - this corresponds to our `train.csv` and `test.csv` in our [Hugging Face Dataset](https://huggingface.co/datasets/ivanleomk/gpt4-chain-of-density). Now, we just need to import the `Instructions` module from the `Instructor` package which allows you to generate a nicely formatted `.jsonl` file to be used for fine-tuning + +```py hl_lines="2 9 11-18 37 40" +from typing import List +from chain_of_density import summarize_article #(1)! +import csv +import logging +import instructor +from pydantic import BaseModel + +logging.basicConfig(level=logging.INFO) #(2)! + +instructions = instructor.Instructions( #(3)! + name="Chain Of Density", + finetune_format="messages", + # log handler is used to save the data to a file + # you can imagine saving it to a database or other storage + # based on your needs! + log_handlers=[logging.FileHandler("generated.jsonl")], +) + +class GeneratedSummary(BaseModel): + """ + This represents a highly concise summary that includes as many entities as possible from the original source article. + + An Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title. + + Guidelines + - Make every word count + - The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article. + - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses" + """ + + summary: str = Field( + ..., + description="This represents the final summary generated that captures the meaning of the original article which is as concise as possible. ", + ) + +@instructions.distil #(4)! +def distil_summarization(text: str) -> GeneratedSummary: + summary_chain: List[str] = summarize_article(text) + return GeneratedSummary(summary=summary_chain[-1]) #(5)! + +with open("train.csv", "r") as file: + reader = csv.reader(file) + next(reader) # Skip the header + for article, summary in reader: + # Run Distillisation to generate the values + distil_summarization(article) +``` + +1. In this example, we're using the summarize_article that we defined up above. We saved it in a local file called `chain_of_density.py`, + hence the import + +2. We also need to configure logging at the `INFO` level. This is very important, if this is not configured, your output will not be generated. + +3. We instantiate a `Instruction` object which will help us handle the conversion of our function calls into a valid `.jsonl` file. We also define + the name of the `.jsonl` file in the `log_handlers` parameter + +4. We add in an `instructions.distil` annotation so that we automatically capture the input and output of the function we'd like to + fine-tune our model to output + +5. We return a `Pydantic` object which matches the annotation that we use on our function. Note that we must specify a `Pydantic` object to + be returned when using the `instructions.distil` annotation + +!!! warning "Rate Limiting" + + We recommend running this script on a small subset of the dataset first to test you've got everything configured nicely. + Don't forget to add in rate limiting error handling with `tenacity` and set the `OPENAI_API_KEY` shell environment variable + before running any subsequent commands + +### Creating Fine-Tuning Jobs + +Once we run this script, we'll have a new file called `generated.jsonl` in our local repository. Now all that's left is to run the command below to start fine-tuning your first model! + +```sh +instructor jobs create-from-file generated.jsonl +``` + +??? notes "Finetuning Reference" + + Checking out our [Finetuning CLI](/instructor/cli/finetune/) to learn about other hyperparameters that you can tune to improve your model's performance. + +Once the job is complete, all we need to do is to then change the annotation in the function call to `distil_summarization` in our original file above to start using our new model. + +```py +@instructions.distil(model='gpt-3.5-turbo:finetuned-123', mode="dispatch") #(1)! +def distil_summarization(text: str) -> GeneratedSummary: + summary_chain: List[str] = summarize_article(text) + return GeneratedSummary(summary=summary_chain[-1]) +``` + +1. Don't forget to replace this with your new model id. OpenAI identifies fine tuned models with an id of + ft:gpt-3.5-turbo-0613:personal:: under their Fine-tuning tab on their dashboard + +With that, you've now got your own fine-tuned model ready to go and serve data in production. We've seen how Instructor can make your life easier, from fine-tuning to distillation. + +## Results and Benchmarks + +We fine-tuned a total of 3 different models, giving each 20, 50 and 76 samples respectively to see if more data improved the models. We then compared the output of these fine tuned models to GPT-4 and GPT-3 summaries that were generated using chain-of-density methods. + +We'll be comparing these models in three main ways + +- Entity Density : This is entities per token, the higher the better for density. +- Latency : Time to last token generated in seconds +- Costs : How much does the entire experiment cost + +We used a total of 20 articles as a validation set which our fine tuned models had not seen before. This was the overall performance that we observed. + +| Model | Mean Latency (s) | Mean Entity Count | Mean Entity Density | Tokens | +| ------------------- | ---------------- | ----------------- | ------------------- | ------ | +| GPT-4 (COD) | 49.5 | 11.3 | 0.138 | 81.65 | +| GPT-3 (COD) | 145.94 | 11.05 | 0.105 | 105.7 | +| 3.5 Finetuned (20) | 2.25 | 14.7 | 0.154 | 95.45 | +| 3.5 Finetuned (50) | 2.09 | 12.4 | 0.140 | 88.35 | +| 3.5 Finetuned (76) | 2.17 | 11.65 | 0.142 | 82.05 | + +??? notes "Finetuning Datasets" + + For our finetuned models, we did a few optimisations to raise the performance. + + We only included summaries that had a minimum density of 0.15 in the dataset, took the summary in the entire chain with the highest density as the final one, forced every regenerated summary to have a minimum density of 0.12 and regenerated summaries up to three times if they didn't meet the summaries. **This is a much more expensive strategy and can cost up to 2.5x or more what we do in this tutorial** + + This resulted in the total cost of $63.46 to generate just 75 examples due to the stringent requirements, translating to about $0.85 per generated summary example. + +Using the OpenAI Usage Dashboard, we can calculate the cost of generating 20 summaries as seen below. + +| Model | Training Cost ($) | Inference Cost ($) | Tokens Used | Total Cost ($) | +| ------------------- | ----------------- | ------------------ | ----------- | -------------- | +| 3.5 Finetuned (20) | 0.664 | 0.207 | 56,573 | 0.817 | +| 3.5 Finetuned (50) | 1.368 | 0.165 | 49,057 | 1.266 | +| 3.5 Finetuned (76) | 1.824 | 0.174 | 51,583 | 2.481 | +| GPT-4 (COD) | - | 12.9 | 409,062 | 12.9 | +| GPT-3 (COD) | - | 0.45 | 290,164 | 0.45 | + + +Here, we can see that `GPT-4` has an approximate inference cost of `0.65` per summary while our finetuned models have an inference cost of `0.0091` per summary which is ~ `72x` cheaper. + +## Conclusions + +Finetuning this iterative method was 20-40x faster while improving overall performance, resulting in massive efficiency gains by finetuning and distilling capabilities into specialized models. + +We've seen how `Instructor` can make your life easier, from data modeling to distilation and finetuning. If you enjoy the content or want to try out `instructor` check out the [github](https://github.com/jxnl/instructor) and don't forget to give us a star! diff --git a/docs/blog/posts/img/chain-of-density.png b/docs/blog/posts/img/chain-of-density.png new file mode 100644 index 0000000000000000000000000000000000000000..75e361a00477d48ec40f78b1eaf979eeac5a1d7c GIT binary patch literal 67547 zcmd4(byU>t8$OC6APikfHxf#B4I!mSBhnxp(mix{DjkvvqNE@V(nzOtBO%g_fb3_i z*YEF~fA-qzoVDLIEa#p1#PfOLey;nvu6x3j6(3-slcFOaAYjQplu|`NK%4`APa!Da zlRTwoGzbVcv@9hhm1QL*sg)h=%q*=<5fGT3yTmohwJG1~P^wg|lqQ!V3Js8BJ`R+@ z>zclg{Cr6JQuB^?ba7n!1ALWhgyO)87$HHSCv_15&O~a+j2%C2ND#5^H_0fxgDpAw zl4I{uU=QIb3r-**V2+lLI8{7P{QTizSqN_Hh77Vw;uN%y3B#o2)F$gY#T^H`2OT<9 z@1NfLh*`G3G#Wo`^!1?hqm9QwTK|U6Pz;Bypx-TebyspWSaQumu5$Z>01V13v;lWr zTaToXyUX_+sF>fnFQYEQN~GB<%W~{0N-=4@HMuKD>KZF0KT-QzzM?TkPgtbdH5pBB zB@{R@FeJEnYf0HkR#m{+qWzGgg(F130g<5c{l=M_Tay`avwTvM(9wruq{ZcsAI7p|UVAjp#}QD5-V>iF|JKBc)IvN|<2H91@r`f5f8fF5+aGA2$R;;_ zWc4C~zE(HYlr>XOK)46qLlAC+S|T8WcQ?QvDfmM`Kzb2^fC~P_2Y*u8h=0CCoXbY~ z`~E5X!26FSWo5y?kBuEoO>Lbl?3{`Am|uZQja#Z|I%_J(3mMzluo;@z8JV)dZ0zBu zAV6V4;H{0RvmrIi#@g0N2qr@N^9UjE9{w>qE%nbsoUKG?H5HVpCG8wdsrlGA*f?lK z(W$Aap^heILaI_SzfTAMCqirC>})T@&hF;s#^%P&X6Iqwb889Q3qJ6qb>Qp2xnXk_Q&EJ8~QztP|SeznsSX8GTpY@L2T z7I;8*_&4mFY#i)=uMJLx!ao&KwuG5lYe`w!fbIeJ5ar|GhyFbN|Kpqg?)cA{n*W{2 z&&~bsng4wAf1dfo$<$HO&Ia7lS@ge=`F--gU;aK3$_{_>e|q9qH~;(;^s^{Bl>P6R ziK44_a_S-=h$F~K-B*L%*m{YYajWB`3Zvta-~5(JDPA9wQS-YL?t5!Nf&pkw%`6dU zNcYfDI1D>8kYxHMGLY6_Pvj>!o{FsdnA`4dz0xytEij)yyx3M$a&vJxek{Um_fqJ| zXBq<5Pg3H@|GwVJ7`%VGS`;PyzaK~WNmWxKB=r+oYQ%qD!^P(R zd+J+x@!a|zzr;pJnBRY|htA(f{=FKm|2=}ZjS?;cQ}%zahKN7sf%0$rq_TUE@6^4+ z)SQa^=W1b~d71yT4P5#EKh1Mtth+v2iM1|%e>L&c*L1;3@_%}BI#66MY{xv)odJ`W zgPP_2C)ymHYTKpi1t0FDtMA7cZ6R1z1SJ!?P70IH-W5-O(XI>ouZzOO>mbhZ8V=VQ z(@%ZQ>1RE68x7C)mXq464rmDZ(DDoR3K`xf-oH`f35=4QBfsC;GR+|YvWHAS7zj;EWSdnKUXPqwXS zJyK1R&=@o9CC*M}>=cyO$4}p=qb#>E+44bC8F@e+>IohU>JY&EA)@{gt+xM7p?5XUmag!!l&S z4=CN+r37f*zke;j@%?dbUXkW~zM8CHUs~3RzO+-ni|zgW2rbL&K$aRMBkmS5fB6!` z<6zD>RwQlpdHu?ImM?aKEW`h-j9O&XyLAih>o5K6J0ZK1U|`x_y3Lai>R6ZCcih&P z^tpcU1`@(V35Au};7|ySNgY=Vijt}8yPIOj8lDhH9oFwQSy@8(X)3?eti)ijH|$BA zfvIfco8yZ)p7HE$Av(pl&wtAu*B^x;u4nHB9-T}Om|#LhwQFfP=7X7hFnpChd3ww; z=eC(`Bu2Z2{Z0tO)Pa7z-@=At8=pxo(c#<@khejKC1m<(c?I9Zd&{6k3YU!VQ?BY*|eY9 z)^D5bFG+@AmfuCIKs&U?O>6a&PyJ`X`9Z$mKU_`lujN zM>U^x)*uk86$}OCxQ?9&hKl=6?fRDYM2Q|N1G)c?Q6(`gU0BX#{KW!nC7R>>OZ*I? zNcYt=;lE+Yzm9OO<85+>V;GY8B7H>+Oa0b+!|e|fRbNq(HI2~7EVQ<~C z;`0kM5F9MZ+l0UrjpIV>zFDNI?tEM0_h7z9R%Nckn=pUrqJAQy`4Zo_$p-YgxHs);6VxM-Ye&`J%SUZoir`#DT2JSf?a<{)NNm684T-J=!^*zfX4K3G!w<0@k*} zO6OF+%g(Ie)`toGD%KnNS;xio_@LYTJ?P@nrsVjhOyd0}FzkqobZG16DW&IPA(c@G zSbK78we5-$esW&bQCX_L)@~)Mzgn0rGQVC=qsQ@!N!yuqNeu%3G{(hco!0ubcaLt~ zf?qa-LHRIu=4N$+Ce~HP=9+Xi(+vqF*GWTE48dVHX%#ggrk_1Hq=@`XC|@U_{$(b* zo;tFsjGukQaNdLTz5EUQ`MdALfAt8dpLoAV-B+&8qm?NHr5sxo*QtSK!b?T`SnS4dvDaTni!!Z*cV8}zeGafOnH-Ld&f zsOpEvPsBl6(`cLhzJ0lz=Y1k@Tju8oWE-N5z1ldjY$mNzcU}QgpqN2i6YX&%{xjO& zbIo6yM%ihJev`B$@qy$juKy9fxP66Dv4s4HPUyW__zD>#^G`c+=4Pz^IRUlhno(7H zX6Cd5gp_{WVg&h_qH{&NFlXxjG5?4Vc^T_?5#H)kvd)D)n(Rp?Xm_-<9|v#iXOrKvSQ!KP*&}D} zjqt*zKOg;qBFakeFD3{y=j03#!Pe$o1Hy3IQ{Ss60?%XqLm}c6cmQ(b^~m=YH|(`y zEG0ZvMB8mV)z^+Fa?DTgLoq9;TvoG&x8sc@03t4$g&%z6Qsp}%&JG8~COlN0i2k?K z-ax=Qi!iXhmKXhTwsOKQl}dkhvHP)CUhCY%a#`)XUL)r3+S``Srjb4c0mEh#zoc^H z(GrNK&)83H*YB*viczX-nI^_1l^luvIBVPSp0+F~CJuDudPG*>xFc)%ck>>(pPeJc zj|ecqDYlMnttssK>a>k9%j-pq_C1xJ1)qz4)rKG6ztO9XcP@b*m_r{K&#fnB%nWv! z(BlVz@wyP};l1ELu}@qNZ$neV?eQ10J=@);iyinD4;hu`R?u|n=WWaM+#y*@F^-`% z|NJ8d4n<~QH2@>DUY#ybHC-O`4uLsoKkLvHd|O>JXU#TO1UmOxpr3EX+Fz#EswN>*$ zgr_ElE&7j#?KLcT9riabG#n0)CFweLbF3yj{+^TO;SzfY_C1Yb$Hd<75aJWCm22oZ z6KMPxd#t3Tx1_f_Rbl>FQ0+US*wxYF6b?g)6wjU7wm$Ab-LG4TS-&5CjKaSThuxti zn(+{PgZWrV#O3U0R7p<#&b%hH3$(xL3X2jdK<3fBMFytOhgJXeOMto{aMV@-1sgC0 zhB=L^*My!M7Hnn?mzB)6#!%Dee6>y6s`xgcp7XH%K{um$>3tz&!Sx2vTQqL@IQerc z)5NDCHXpA}){3Qvu&6|#ER9DaRSVa*nPUs?9zUwh0z2K_<>6qp5G7?Pw~XGM=*}*H zS8&z!Ts{v~+buB2gL7xL8s>3C&kO`kCXL&`{`ed!vg=x#(a`dkrSqfj^^c{EyjWXD z3PR^;o9ek%fCxuTe0RupQZ4bd(lX92{E<*YdfDn(GTi3f6<)L+r`t7IzAq)U^Fm2A znTH-it)Na+UVHZsl;dqk%k=79KiFryPSU=&1xDe7MHf7x;Yu_=_I!McEDy(q%CZyF zpJ)=ofa~jJ)k*38<+=RjaEYdSuKg#eQoDSBeN%D7E;*demO^JdPddsX*Cc|DuNq^QnJT&&(oGJdjQG;z~ChL22&mRi5bVidUPXS2FrSitSMyaQ&1%=E6 ztXN@TO0oy30Ip6uRb2nLFqerYHND82tX{vaUC*pIz4QbZ-y%8WI!N1fuLB^zIBM(x zIhA3NY_ItpzN8}TyOA^C5zM~2cg{>NN=^NVry#^`PLFzzD)4Isq7UjOv*}6f&NoWl zLao{bF%YME)=@4jR~Wtxuhq6$g-hp{&Rwf02QxV%(x6WQ5WM{v74hWT1pA__o_)(r zn~zzsZtUpHkrV)xecOgYt0zDHIA%XYmU&KSmf_Hed(&6obk?a2%vM6%nkAG--hu`q zJ!h3>Ny=bVYF4BxkOtHjRl7yp&4)0T)a2^5wec9WwUj)+DE&2*R5y#=w*k%==WyOo zHkLTgLl_yotk20EqHgr+>HgM)@AhsPwO4AVS+3$iGc#>I$($G70 z^R#ihYDT+8;gjX?T~j!yA(9+{2sJ#dTJUivlby%xf41G<57s`a9R#$44D+?bcIf6-dY-8Y2l`n_Nf6Q-=^m_ciW)vP|SLgLym}y*f1}e5B-RN z++|FKhOeZ^Z5?E0=qe^Uq}N@CQ7ysd3D6~}#-YQqS;yX+nF=gEEn524+?f)8@`Y&m z>=J2y&I0275zQmQG-}B=24SW_J1wYG*1LEf5Z+yMDq&Y^P2a0y*2_R-%%*nDPIAdO zZe=_bMSaLK77w}YW02f_C~;|}$1QHhqI-nPPrQ1+tQK!ZPP-=vLdtC>uL;7r^xfnfmDL1OV!iQ-laYa9gU-r zlsJOv6-&!uGN<(%$|ktlM+42GP}dzR{PyI#+npZ};eCyJ#D z@sl{K8RoDr|8`-PVu&*1{JftGWn-z3r2v|#l%qj()y|^?w!iTWd%_IK735gETA9=r zrX3fLys2&RoM6XGp&GYKx+iUnn($V)x-8B(Rv;nOWb{IfRY`28O12cf+hlYCoNU=o(``GYMjoDnI$ks#>(2%`&PrZhvQ2teQrp`k6G zvHLiuzn{Q$ff`~yA}L}0U^xq?N^;Bth7e$Sv@yKqAU)eBxSqj~kE>R@mMO{p;6Q*D zTM~K6;f@;OaL_4Pxs0*jjg(|v$M9grxTO1juUedBVaP_f!<9o47{P<07k#889X*ni z9er&S#0_C@BFfv77Vyn3Z@I7Sg|s!VmllMg=6i)GiX5xeU&FKJ z&6n%x=96T!KCGGqwl6ZGgF6YLAOFdWd4&AyPMo3rZR-cRQP~|<+lPH$A41Y<5Ed;R<_-y3j}6Oh@f$Mpu|=lcABSnkC3cbF zWoqm&OT?uZwi6=W^2g*hd%f}bU9yc6CIbhXDP`JX`2kNDZqRwF+8zSIH90C~hyYWm z?0C>KT&n0vx{RP2ONnsUYrKHfPHm#)_J*~Lz%WI0-+}e1mKh~tOd6RWWcMFDem%UT zUVR1#eG{l{qc4SODt@*TwuLFjbR35gMFJWqEB%>pb=WdiuQ7RL)8 zGp64SmO;z9QL-LQE5&;pu}(@eAcDs@;F-A_$=)P@%NJmtwZ0DEOr=dG^UhqG?u%2! zHh&pBclSv(xwAWmBUOKpD49QY)LKzux)B0drdW`p4ypdLcGB|lAW>@duiqaeaNHy{ z3gbXAa>Ppxx_>7)H9643)3s7iIuS~S2ZEuRmR^e1uG5p zFooq3TKnOrY-!Br{$*xn&%b2zwBs3(?&@(hjVtz?cX@TSx6-nS%-+C@)nvpB*siM7 zW>(s1T(SAM7=R()?lFmlNL;J8jcC?a-F>CoG-eq@2Gtzy|G++QIPi^v^R*_q<@ zGV>PclUmx3yyu@s1jF1>yj9J4IHr5=)XhXTlraBnPF$>bmzOY^4b|MCtRyn0FtYDX zi}ampDRLTt0kP{#zv}0f^AL=^v)wNrrk~Z1_r!ZHzy{F_mvDv^YET_EUX$|Ip6^l4 z6B{yrE(5v7`1{unaz4ng2c1e)^2KV4#9ESl{XnfN_h&1o-Fpf$QTj~ex=zN|o*57iR1c6NQS~Jib!+Qt?*`|3(_^swVV+5a*_i+>;6Xp zUDHz!7c1nc|y6r6wl?Ukd8Ed^RF)Q?1i6{TTxP8tmt%74V#)| zJ9*wf=w34t9C4xxZ&yCnk~jmAqwMvf)8XCID>xaB#XqaOfA+<(^l85{M9Zjj)PMye z)>3qbXDq4b7xzSs_(4HDS9lx;qx06Ge8uc3V1|iOXxf4`%b&gU*eq<*v1@$7B0Fph zG=d6N&L6(lr@jg!=j5LXGCiKv063mbo<7D8Pm~3|#7VZsD~V3AFeg3vxZg?E`^^5V z8}`+353r%%**xh>XD^<;$i8W}0}?^|FV7!Lzs;&G5CPdUYg!sde0LB?NvRt^5;ApR z@DJ1yVhdvS_zlIQ2kf*a9AB<=vNg_PIyarFjCJWiSO_*U%y(WsM_ zisk&J?n!zB5Hb4v_S&54zm?R7P`FOEyuYvZ{-K;z^y)H6^SUd*NGM5f0dSr0PHsHe zp5d1VNJQb}w&x+xA*NdCufI=xwAS)yHAvBq^|xbhJbDp4SytB9!(2LhhfLkQ0R#hy z0>zhG!~tpU<>zhVAf?9$O?B+KyDfrv1;?v4KUe`Zv;@c7AXBzBph*W=(s)dZ#e(-K z7TEI@7WHS@0Er7v)n_VwS#X|y3c;bG)Q2qvmjUg8Ps{s7W+a8%?C{!1KFg%=ZiBI@ zLCjtU(bL&{!mX)_0t(N=e*W$630Ub4og~G>+<@ko^4!czxxLck&RQszq#VlINDohS z9Ev+>#X5E#bp2(3?kT}hJq_st3Kuo_W*9!M>jf$QadnXnX-E3TZAFcX=Z%DljoHio zvEgNi=C_g8a^ikhpq&gOUbPPRiw-ZXN@`o-B!!Uc7BTKBvBVDGc+fsx|L_z5a88mF z&yum-7p6z$xm8{UkasX+CaiT}o!Cjr4p0va`e^P<8uoNA1_js2b!g{GTfW`a&^2sL z(8%)r9?d`hkb!gXM;IO*qFlww*L6|cN+kd`_~e7!tHZs`Y^|X)Z18>5Mt>WSWMbAp zWHQ_s%1NsBtjhuvjo*W`VC&g8s9DBj`5jRQ)OC3M-Si81D+yg>vOy?#@v-H8*sDvs zpc^qkYHVddD!v&GO(Bi*Z2PvGdkK=!iV(C$aww74P^8NwvPhdXhDh+D0IkWj9S-_EA5Ni+yo z?BHtCYvHZ3HpBqWS%7hrt$}p3Y%l z;X!`{qF`~U^a#Sb!ri($W2x*)ceG?GmzSjD=xhZCjcXf4p-0HECF!MSVW+bq{XQ(G zE`T;F*XtglCRLqfcDJpbbLBJV2}rkktdua?xH&5v%?$kvyA1y(cCl4aoFhrOvt(xQ z?Z?Hg9hLy8$WfZT-Ku>@Z|qQ4#Am8TR#=VfH6X%t1T8m)viHo#xrW95@SXl@_;0L+ zw`uKwc7Y8Y1?#_Io zl?b;6wJV2(_OHeqostVc2=#QefC{k7W&8f6d<*X=Pvp*6A(wCJKSN%u~yaS0DZ=Iob`-k{Ua0EW0Y_$4L@S9^-%9>{(Vx6&Iu&SG$6jxS!(h*K<4m{Zds z64U6!&|C_d?=7~d>q{{sJw-#xiP>Onj@-@>Da+%OGL8s&P=4_Jaux_HuXx!W#wr)g zhQX*Tv?8nuzvpGTtG_WOMX^Of!GzE~h&ihE_zfX$;63E_Cl=G?CPaO1$~O;NiAZEb ze>Qh@*d1o8Ou8`pdZQK zTInBHsdJD$u+|?&vR;=`a&diH9!;$NBD{6{g;cB)1Ny1_(ZiVwrZXYs0CyaOBU`m>p@y4WE@vWvZ$$ zU0gKJ@;2MO(*BXQJ8PDDbxq+QdyZLjsk_M=;VDeoI9VkY5^_sOq?eQLPTK4604PDn z$JP5+U7CmT)*F)-8_`uqtM;rC4J?M!8vMs{v+rm8Q7J9-)u8!7r1orG`=ZN*G_v_H<$LlXrS9i#RTp>K#Ymw@^-(=2Kf(LEuu78J7Rz-HRc#QtGSb?NQ9 zlK1By^M)!eyOa0u_*=OlcG(Rw!d&suPG4~wE& zPh1__H&_4~LKRsiApase-n3!TRb_axPeAmRG6k|6xx+?8E-N9%2dt0XyzYDgY*ITTlzKvRWA z()#8BDJ72@y!kmxT5mu7q(Db7obSerFnSxEUm{OcBfBFll3> zo0a0W#%O+m!%vDIHUqFA$2ZFp3Qr;evaD2IeDAq(CQmMYHem^;*ZB9}w%#_z% zAnAE;blkOdSDxUL^K)As^sxKpaixQHJ6%WG83+!Sed>?9WxV#8H{bkDKW{N8Kh8}z z;))NGa$7=w_nI!ux8XW$+(c8+s^^p6LytRNYYF3DR5r;y*o>fc?&sbz$*Sv>T@D_Y zMh*4u^}J59->eXZqtC?`$JbwR{|3qiBWPnL^89nH@245X`?#GfmEJBg47_w7j2}ID zva*wk9yfWRO*lKtFOe?oai`|p{w*e@KLC*mE**N8>0c}kJ(&qa2ox&E;+xDCVJo#7 zzkB0pS)rnxQZ@arEQ||&=;KbMj$(ux%QgF(N!tGJ5tCI3v1I(t5C5b*aJ^8~nL9AN z$N|CvoEcRc@+-6DJC&>uj(0<1*XM&(H3=nuv~YT~v03p)%eB0LvfUo<-XY5ut~*&x ze2)HVHXZGUqEbcXp{Mz4+z|ZJ4xLw0X-jkh?_!K;QtTuxmb5*xS=Xui@&NzFb*Ml4#b1b4*@<&*VBEO=K`do)i!!!!DcEMD(JXHF~&j z&iO`_a@W;M!r2nmw)8MXrv^C(b~TR>yr90T=ms&SNa3U4^87Hc=e;tzxzrsw!clgH=9qC(ApZ@JU;}&!(mu(7~w@j}@sCb7z=u+7XF$ z+ICZ!E)-~h+c@Lz$?(2D4f(+TmcD9KtKi-Tc)#;-`Kv|l$duH0)VkpiC_`-<$KLGx zwI@+i`KP_);&^KK;g^p6$GSH15ad$PAM&~W#v^d4R}moPd;3-{`0qXj#_u5=5Po^a z(|TBb&ct5=blg3V+ZDcLyRH8FS>c^T9SxjoqncLtDt^xgxDqY?7WgU(-qxvU{+{*l zH@$%0HKR;H>01w>p{GQ}LZM%VI@dz0D?&385h0i~m909lQ38$wq&*TmkK& zGyiIi+rQ62tL%VC!8Lp@^BvbMe?r`i*;*&KHj6Q!toq-59unPfuN!^x^j`5lEfPlu zmn$e6Q8W4PKrrq=#aGnuF0B0bCU@_E&T4)1`1${ImNXbuz2ny-5q}>XjA|TsNOWce z>3^L%;P&A=&YzPX{bG53kE#Udq`O5tx3&HqRXzBq{=c%Jl^!eyfS=Hgt$ywKH|@r~ zK?i6b)cgND0$7Uwk3E{xFPq=@vd!$6D&P2NUij2R6p(jV;pGC|I_h<#aJ%D?b{=WR-4lra)n}6qg znE3y#L!CcKy_4zed5r9c2K~N~H>KC@AHUKSTYV+sarB8&$(G!ykF&e_AFG@H7<^6Y zCBCj`hU@^^h6W%2=YHEoAbHtVvDX>{Q`7%>{6{(GV~(ufDn6Gp{jmZ^KLbh&cwRq@rpl>Q>5>hD?~d6Zp(s`_KUi)zZl z@S^nNkDjIpnHhrS8Ng^dC3_pX*Y?W14*DA}conmEq)vf9f&c}R7?)7XIryIaHIY>6~va}Dl$a<=E zF8-U1)Go-IwD}jwPkqpUaAHR+>IH)V;-C#~Wx*CX8sb=q;tsi6kTG&^6^N`@c1`DI z)T{w1aR08;lY3jM$xna!dlu95>YUNwW(Hyr4`*PCL2U)3ly;(3KKen-*ayXSUb=FjbsEDNZW+I6%jC`TXPXiYUnxyhtC5FP8bRp z0mH4ya&J}f+mS@=7n{q*RONz!FP5V|D;n?} z%uyuV41K^TWEja>RR*0>$^?vsPKsd&37nTo>`yliE&?`|aa?hGDEafwOV;z*?Jr#z1%mvN;^Uq{3i;rECS_Jb|LX zQqbOZ^+E{X6HBzP0D!&+SJep}4}eCm3F`s0CkcaM1~Kpmlj-|jxd@{w2{|lHI1%27qDKt`>p zy#Nfhvip%_HkHrOY$X(MyMg!18*U;?f5Npt0oeh*DVkXzj5C}8`jP+xr}k@py1YHX zb#%5l#}`V^tzni}FJa>5bHKl%9GHu*rdgHTZFz~XI=|D-HUlh0?DREGIU9cs_Trmp zuk@Wi0*^R@bz%73{1jTaNnvR~co!4&vnF+>u%&1)^`R6FC3F%S3s{4pv+bS)*b)!X zE3&A+c$ZNvF$5)%yUjW*1rzB*sC+LSxMs{|=wzZG?NO&&Q{2=)IFZjlb0=7A=ypHx z$A{;mS$|=aIA!~dfR5#;k2&WcvtYeUL%YLt!4r4%sf{#xo?7=WZJ+qZV>*)yI+_T; zS+x0mkQ8#ltvmi-QG{JKO#)I0QeBYSR{9P4;e_M7XJ1WY_jz)+iFNiOhy;XNL7 zh}?bWUEl>V927al?4tCv9?*(DC$w|hkq|^@w(K*%@5cJ4DT>s`nLB~lLZ8}M0&f8U z*BWCL21qN!KoVd-ZnwUoX$C6IVcF)}ciLyIz(->hNSiuHw)6uy@uqLNz}}RD;4v*B zUp4)V`7n(xCH36oQydLLeg(<8we!Xh%@)}7T-gFV>$j^gCiFdv(OgknxQcTIDKed& zNj52XRK?F=p-J-pX%4M&HGE>23VvuC3kXVBbYReZ<+eNTO8OYfwyXt(;nT$bI*V<- zw*Yze&LyDX2#NFuK0K6U!k9QpmAizlLF~SY%ZpX(KNWV{v98XiU-}^#jm;Rk8jltenfehpEVJPWKM%`OR}h z{$Al&I@=IBS6s*^p%Es?a7XH|1oaaP71PIkPuUrthY7B;{(pBKTd;zChxSX@vDsz| z+&A~|9>qT(1c2OS>KJFiNB4@;@a?KKvyyd;vLI12;9R}fX$uS614$GzDoD= zEvd!0wm#h$Zc|TIIDJ`o^y|^1C8$5-UI3XI8@|PP7&pBzaJMbeB_Qz1 z5Ahc97m8&H34ZNUZy!=W$t7!1NOmmMZ@g3W?o64<3Z707E#S{v7KtSOM46je;?Jb2 zF>Kw-H?MQXC?dqI&~QE8w_yuwehP15)|OLQ|Mj$Lf#Ua!>ga3UsT_lSdNUSb$t>?d zEZM$fjQnhVR3Vu&pK7pD)aYI{q*CO@DU7a}cPDTA{9DAY4(LZ4 zYuGSz4L~Dc;9;*l0i6{lj)y4PHauBw9QkU5=J&u@kRkFm($b5iiBf3JfD9enU^5s2 zsU-f;bE4P#tLU#Sa6~pcU%FZeDM_&so{x!p;!_s|6-uM=KL0aHo&;~E_V8V>s!#KB zzD0_EN29LMDp#jr_*yDA=MM+}uU?8Pc-E!H>-?O7#q^7TOsm_rtoL935`>lS!kyAu z%P&*@4=+DBuRp>T%kR-e-s^)%@oijUep}t`TQc-`Ke3U;L-=`xSi7XQpVEdE07O*_ z@@pNt7od^~e`G(1Jgt8k;aoh-PDl?v!YuO9_Pl>~*h)tKv|DqS9!783|9tf8B(U#- zFFrs{;2Sq{ai!|n#K#|>OFD!>lxf44W5ENFYA|9_2&v@?tK5LQ*sGp9j;TDjxtut~e|0n(3WPl<^k3Py1^`USt{4g(U8?Zj zCob`EFTmtlD7kNJ2&mDjc~V#F+SR1-8I^EESjs3IJ%(>3g#Jmd#RJ6K!fhoUl>rDO z2rSn046g8<=2SFfcgYU(GcjiW6K)?(Ip68Z!YD>J3 zS{bJ1lKP!z0bHk=tW6KZX??CJne}8mZpuUv8&cX1&Jf9HS(kV8twc|p!K;pXS*vwn zK$dF*AdtYZAc46+6ngEbU%L*knfqzVr@B516+wDEV4Tfjn&+_5>GTi>6|3ZRvi`LG z)#R%txYm}m0HU>Mq1IQgKYNBhIb#14UBv(o??_1Ni=3O?p7c>cle^M{T6f^Pwr3cL z{EL%Mbg@AgY&FfxhG4RH*zdKVhPtn%cD=yG4yci`SkddqYXn=m=}rFD?ha^$3=3$E zLP>BogEMj)L5x8gBsm2LsP8TJy^P@HNT50`Fj#&v<_u?WG? z=b`(A)645iSRL=+09J4HRV%g>8zkj7w^Y40WSgJiNAPjq{px{ zk(N-yXNZ)3H(5@Y#s_6B0CRuJ%&FPOcM(Ch`1mklg;ScNr0M%+6~d?D>iK69y=+I< z*PJQ1<%*C;oWpuW4Ul_LGM_*uV-$@lKz)E)3xO{8?f1~W*>7H!9^h?-E;@lcW{vXM zNz>Q|&Ef=0f^g$qBJ^mSp zj5jmIP#WR&kV`-oUsAonE}SFZCpK8|qEy8Qjo`Yb8q~J6k%k0$*|RNi)&`K_c?}4z zBj?ACCZ-qE1-W)85l{}S&S*m3_P{S31+zy8Bd&02Yh?lZO=1F zw&32dkO5%tH`qP?%R^=h`iG=9w&QUmk{i)Z;C4f7gNEVGI)EfnZ!fk7)YI7G&4!su z+`=6PBC|G3n|)#f;>2x)%6E1K+0Fd0{a##_5(&R2#YHRdHcM+NGwQ8 z2HHtz2e3*KZac(0Xi7=+WNjD*nPaT!&8MUYql&`CXwt}ZtIc8QgXUq!PJ<%VPmb^V zVEReh(}||u3^hl<;geZGT-rUx=I@beXgUqjY{kE}Xof}cp00?{b&tX$Owr;Fa5Ly# zBx>k$wkQc&FuBj&8SI_QkNWZvSTKS>(E^Hx;GR+GhjHzxz1Q~xQBoHf2I1~_qm^z( zekdcqT_e#i^21+-aZl{_l))Y1ln_`yZ@!77)KW`3T_h7-6{)eUwZxr0mQ3J`sfry! zY`x`iv|)G%qw^44Kz&W)f!hdCMCO^-L^-ZKFS-R$;3s+q3PV0@w^~J{NFf-4-7m%c zGoBFUo);+Y=pXIpB(ktxS?!F09(-DKpNfHAKT1rVQt{%T$|a)-;b==q!l9c*dM+E~ z1vKP-q*xAVastUl;6Bl|i0};Geg%%#!?+WLHnTKJKWpy8-X)%k~md zP>r%}5L0GgJEC8aO(BRCBVbpr;UfwjnuPO?o(Ef1{4_S*KO zQ5ciy>yp*u2ckhd)RzsIu%~ljc3mK8w zARnsLGs_GNqF8q{&v+7`fjpTU9(H`w7Ifb%bKJ~9BDmy-gw2%btw8a>eiV|wbEnDb z?K3rE_%8HD2RM{Z1`cM40~ygq?FV@?A0Z=4p!oZTEhkbL1|jcPLU^H5mJvn#!Ap1{?{56{r}UM+vHOrfvL)>9GUA1-3+=ZJJh%2Rx%c95pL@w@p_%XB$a>{4 z$TY8qQ`qfh>-eUgcpvx0IFPzu&0z@IDG3FPSedw4Kr-& zy~VLV-OtKj?uUhUx#)C;`@-elVErvXG4Xw-qD>{XT0>o*%4t|7ooK!2yYdAkKK>1W zKr+VAxPRx+nr_P-J7q9^J*D&3;5n$}(m3G49 zxL=5KsW@e|!bA#kmPKxMAEMhN*Aa@`f_#IAKUqSUMYyq>4zS2ZA<+==I6}h3H|jPR zkWmbG0@4qAigDTn2QhNTM4El{^b6_C0$YqlJ&r8WF)|NmB2t%b=2bTny(*`m|IvaU zh%IPr~f=}imx{bi^J5eKm*)-roJMUep8ke(PGa^o~4rNAvv(*ecN=v7-}CQqv= zg#MYrq&!zv)$GaO?FlK^sBzHxue=H5cAu$-Xk3f0`I|qG)>d;VL7ibqc(TB~=?0Lk z=f+rJd^l2&K~snA#>oSDy0JYr2CSvbz7L$h<^9r4{RrjB3ltpdrYUlW+^3hd z0S$Y|1hn^)`Il#~@Y=FMO{rbqa1njZWIIj7CFzsA>lmASG2h$U{FoGfmTWm@57N$O zNU$@-;PdjncUXmED+zjHPKFiU&SIdl2vyBq^3fJ|0k0QYpg|P@w5@w3X!*<9V_R?P zs(oGKK5QhSARhU>ac<*oz&x1`at7MEi_Y$S-vyK36vJpBuLz>wFn7ea_*{mGAzElD zHbnM7G~V)&&0&LzkAxX)d=5rvk-=$@*on?U!Yr0T)%lE0K0cIk#SQ2$}^-kNQz~xQesLBrpaAnJMa&PmFDjd&K3J zTS*!=4!ty%r(w2uu@Lum`jE5Tx%%p32#i=^x`g-cDm}{v;E6Cm>6<;@Jn8+2y;sTY zlDai0Mua#5B6#s5OsNS6ikr2wNUdp|n9(f!q-8wf8(RCsPGaNprNL7C7kCqxj;tp5 z3&Ec(KK@z@azF{IPSDIef}Mr!cg*PZAiZ2airCGK%-C^kfXOPE(z6F`Frd85^m`Ur zRMntvMZ)viCrhOh6ZVO#pO=))%Oi1`$Fih+p67iJ{ z#Yi=VrZSwM!sUDos?V^82J-*;VxOxgU-U^xzo` zaT=>`)G~J+^lzvQxc|DHjV$1bSS_ntJ`jJBO~Du?fbU}w1P1q{+MVyxVeKy*-T82b z<+WOqkbVXNH>eg-f78+3%#-}noBPHTXnd?i6lU)XZnu8v!>ZNdD5jDNAkYyh?2Jh6 zW7RL&1}q_;%~Q|*3{_jfL{RbVkAPS=xkaxpgY1^an%Q=g-Zzw>tI_PPc9D4VT^Znw z-+y`SWjgVN{EA*F0xf0!2$Y$!Fj_{AmQ^8Ps6FY`{4haO^6V=w!|H63zT8)kkEa3o z*S06`i!h(h}nYk3v%?Bt6Breeq#m%ow2 z9J#T7jts%<*aYAWNHt{>?>^KqVv$&r5OAmbM#7=tBX|P*mO5M>+nT7^De-N+{0__Q zj_(KZ4SmjYBtxe!52jw;o~~I-v$F1(oZZrl_5Xm)@EJm9;&tDuXs9;vo{m+|E2n!8 z48ZS@_)k@ue2~q~N%Ak1x?N@g>E!Vb?n}00?+R3B!a1oA4ZqC=H(uC0@Uwpb60dgB ziD+U)M>q`u82Orx=JNqk0eb^Y)5IrfM@J&SacVeQ<3L!Tr`C~XmL=qZ-VVCg_e?65KC%J?2seMq zK3PtW$F+8wVIA%El|$l2_ur|iZ_GYQEFL>{X{bZqXf(`eq)*b!ts_$1DW;`Xa>FRE zMJ>=Q%DIM|C3|47KUWxdu?Oaa|0ccw$jZ_?TnH3fBWn5v`+E|j&0tAddv9KMHe9u9 zhynV;U(>2OlCAEOx&^CSIU90WXp%0dh^zX3?{IOQ+wIXEqTHV37;li>8Bc=EPoV*B zs!g(m2;XrXk0i&ecH~Ms-HeR)Ae0K0vX5izyHsc|vjsg#p6E4k_m$_$7?OXK&OF(xS^)!Q)$<1H@RxeZWe!t5wUv zp42%?=;<=NO_^n6=>n?$v$>EVj1IeehV)^!gc;9X;l=|NvaeNW@3QKSZggc(9Q8CB z##caoEDFaf*TYL}b?!qp;1&+@U071;shB0?6A&=wU4J^NL1pp)CBs%sgKXe~K}nUp zl6smFDvu4CLj$L#(xk|To*;SJ_JC!9I`JompBM*^q4q_%K3ryf#5yKX`&_`x{(;K& zB;pgpQCuhb=|@ocIJGzcCcRL4(ypWW39DJ~q!%bB_RmyD>2=x-qa{wN;1d^IPkLMh zDszQEttp9mL6pw2lgcv@NRK?J0PcI9Z$%X$pkp~A;lH_1m6ds8XZuZF43{h5fEtQ@ zygt!{^5|e7ga8#s1zs!J2J*GI{$Bo#yE!T@fCMp@W;yM8HoTYHB9WH{eSuVH$>d=? zc=)KPHRFJYyEW7?)t=pcF;(9Zp@D1iVaAyWA{nC<*!*vOqC`?@13 z%qB-?L!X#U$_!gLdb+jAye8rk<>$cD&k`>p*7Jo%PD&*yJepNl@f@yu%*5nQ$Mbkb>wn%7u?0+3>Py$@ zLansPly0dd^9%6%G_9DJihu^Su2Z#c*ebtayl$q%Vw^Wt`?{yoE|WW&MykpikT}T= zTwA%CeeL?#!lBSy$cMvR@Tt+2l58x{>ESP2zuu)J~AP2@2zGCxc0d5yGKj# zdKL>Tc>Z5hon=&&PuQ<13F(mTRO#+6QCg)N>F!N;cXxx}U!+4)x*L=R>F(|`+xIvs*kM^7ec?CHL%(5)c3Cj*mGP7nC>li?GlQ?F(FF7$MDJ@=lN zkBU)E5jnf^)lgfW)%`3gV5Pyp9iSlF|@IC*;kat~jQE$d5 zgZvs5*q?RUMrl;P+V(6LLaz`yS4lD};2V=-w=)Sr!k5gf9^<%v(KTycW&9L7MmCeb zY|vq*sKyV6PNFA~MfStBDJP;wZ|c9I@ATAmll6v(xV&01MW^;S*DY*hnv>?clLecL zH?TGP46!xQseymEwr)4KjFF)AgghgJlX`YC1FEit-ga&wsc0jwYeVMfwzIByMyGu7T$HPpj*~_)WlSX%|0oHvFNBs3BaMVP7LBcg zg4?|BLvWRZCL|K+fyAj2Z~k-<5(Ab4oZZZ)YBm+EHSH_C8P7=zRWSC=}YmZeTTQN;Mi{8GlUSnmjesa-NVr z6Yu=k>25$TsjkchIYg!rIl_U|r>yUTro7``>+d$J9;1W^g^ zeq3^7$DpdvNJ;_ho-hT3H=VC!ii#(aAG}9e)koxH|6 zy%z~&md-L;# z11TL7L9}09I`}q~Bp@e6h*?>ywU*62jQy;AYBUyS3eC2D&NL;3<909M>4hP~4iK1# zHDk344s5q*2$?kFmAxm}^hB2wqyChZ9CXZ#;PB@B8|*30r1B}D`r0QVrd~HTBV=r> zF4tksHlV7=z&f#NqUko<_Zt8L1 zZp%TofJ?PPn^mKF5YfyRN}=O>>G-hM;0zyM5caN>q| z3P?mLYbv_?UETz=b}$$@GvoP=UvKzjwsx8IqNp(8gXvl33tXenkx+$kyz2b@RlO>vaZSQu8*<`rA>pV1-bWC9TbgsDL zFXP0SwSCsCJ;hC?bqf}J%nX4f+c`-r9nxDIv*?aw8CXUB9PuZJ-Ww#X06ik&j3iiQ zzwIoupkB>MRJ-&ZU#_?2Nh&;}J&T@pb}t{!dldOwx5Ud^XWN^_cYpZ0@MZ`{xS z9Gcr06VCAwVdT3f2WFH7dYLS8if4KJpS7;3l9ctfmZSf@nj)hxi2NKuF?%4~hcteL z-6dt6LgHZ3Er<;tjEyOahPQhIPznf7{d(xM(oi?WrZw>jg|B?|ec1jPpX6DrzB7KG zw8+40+wA9LVTMLgPbz4?I!QV&EA6P7i|awpw&Ozk>hnj6Uq}NG9Z*hskH&VZVyjo! zFqMF{GIaf`PrW8FO>Z)JD8wm@nL(fMgcxbtWPRZOo?(@=c^3$M0^-bT>%`^^8`-Mt z=+!MshsBdi0s23gUW-QS-Dgp7E9C=D-JE#R=JY&XnMIdQXRR{q7TdD2DLu@by%GDJ z#OrfZN>|by>ESus!+`Bc;0NVqW*(Ayq3ft05$FL2bYZ8laEraT)}Satv{oN}zSY=M zs+}Xe)+*eaDKHOGkeuvZ)00x$S;^7Wd6+-(ud{1sPii3c6~>cWU**1D+mw$y(lTEu zYPf$h9_$qE(?g29rzbN;m{rm=+Ogq5-z_8%>hgBXx{p^NhHb(Frg1H|v`=y*Gzv3b)Q>jGkdDVb(ECZUGxaUJ%>(>!C{@fx{rOE?B2O>2mY&dG zw1>Y)fvmD3QeLlb&N?1YM^?^Zug^|k*DA6wGfloim%0kbpR}A4-D!~RNa5tFu$#+j zs2foJ!r5VMvqU7yVQFk+TOBB2L`5#Il<`UX-MCQ}uzRp%qQ`uAxv_ING_t%b%j(KQ z;d)}tX29z4#PQVJ{xnNtvC{D**zVRS_yNnL7;W{{y17Dk58+?=1L?YtH#P%g(|xdr zM~0i#RLtlvebfuoCw&TM)yp>p8#Lz3&2L}?eYyw_ka&F-4Ded(HudHw`7sInALyy! z*3sDKLJ%&;?6@{9qgCjMQPAEMW$D1{Aul_Yjk~yGJdPdH@c!C(;`>8smtDwhPJ5<# z;$Pf&d*ey$H!*Z<7un|6`h_N>+nkUXmT+c7c+^Kk-D|XxaL^=>&AXD0n6@`#yA@MV z{CSXm+&^g((!IcePDSk8G0v>D-OrCtpvgVVm%1(dD2+w8Y1Z#eQ$gjd zK%GYE>oDevK*vi!c{Kgr$)Qz@u$5RNSSR@Dl2{$fhTk-_qgLb@T26$hyTRFX5SCad z7+Q^8BhwhhUZfpmYqK^VcAEkkzCa(h!r)AU+o1X8Tqyhv{j#>2FT#%FHg6bgeb5Uj zq)3Hp8S{Wpj6W;71Z{=0ag`l`4_~qMIv@;bC)&GIV!>l9UO~!K5^Q0=ws@0;P>|kZ zI5K&;K2lJXnRcP`*4mR(v}--P>Grv4)C<{l1wknOV>hAvq4YJiz+{wSDYJHG(Oj;Y z;c! zM)UYJ!!QTCIj33>ZBQ10ZBx3@`8=N{5$83a|BtBnO(|xMOf05Gfj~Ssb!byE+iDa@ zWd>X#E6Ls_c`!7gMC{MvS!&3~gv@t~D*I?^2KLH$b389SbKJKEFn*p4^YuzSK&FXm z{Z>azd+KMCalJadWIn8unc1TRTBN<`L9%kh2=rK6|Fib3nNOV_E?Tj@bu#(FS@0u1 zlHrMZvFE0cZ!4gHxLEvmoH=7fKkB7$ew1HNct4iSTeM>vyJzS@E0Z?$d7#uZN;!dV zr%y!QdI;2XKmso8GRvTUK6rs>6Ho1Tg8=GtI-?1J#$ZD0?k(e`Ne4+EU#%^6JLm?-UVPMIX% z1l0&JceoK zb`5N8lZZ>fzvWqw@29DoQOlxWr?&$Mn+sE>-Sd-aUD~-;Or>21pPAG?4n>AnPJUer zaV{r=r>1~@s-Zf<;fSjo65B9mN}|1+_U%~XP2wA4q9lE*-cQaBR7qNlDT6d{ zE(&?3(r(8bOD7NNz+Ub4HoZsaN35VRFf>BdV%K_n<2kzbs1RIlKfu1Hb zrsid0W3h*?nYNZgku|jZL*oq0EPke>X~r3JJL>_|{+s(ln)O2IN_7pV7mzSjJbOAgs&| z(vP%EmF;k@S>orQEcQ!5bRXU}GkdmIFbuahtw0Uj9pkRh1We7HG0lT{ovzFNd+Kor z0%6ip$_kB6q*7yKh{4~2GIekA$%1w<|vy~hs7<6ilE8=+CL$Lj75uk=m?sZUGPubrsEgcGD z-4MqL6NZh4!gz_S8y%;SWT&EC?X{70$7_F=*BYYKx$cj&CI6Wy(_PAqVZlG8f2MEL zlWOy-g|7Kb5G>W4n;S|^8VaYu38Ov{g$r_LYp?o&zB=L(eyn)j$scBgf87~KraoI2 z+-wjYN~l7jzEPW=hGhlKDfY0X4tcC~hu33iatNamLKOl$@31>a)z|+)8$98xuY2K4 zt;(uo5?^$kNcANL{b=!l+otw-!&u-_oD7nv3Gf(vl%(MInnb=g)iS(wGL45;7Kgtp zwOoO2fBdU@9rz)#+DPQyuRuZI$$W7Puvqk#OxW9&zOaQ^b8{Y^O^@5E4l>I1p z7bt(u4I7C>D2d$A?E^5O6R0v_?jxb~KU;!qhPu=mu-usSVP`|W-vb2L{9T7@Nw(|D zja44Ujd!?<$i^PT$oW43essVVa0}YmD8{`}Ro6*%bb317Od%yCEQ`+|A>Rq?zd6}w zSW0eCyV{X8E@Y|7_hFslN^55Y+N0OI0Pf%-O&kd~aM0LMk@AsVtj2~x?SEZ>115rs z-nPf%+7Z!-zhs5YZSkFMg5%c2jRScGKR~nTx0?0{zIF56-ggE2T<1wSfB!0gAJro? zU{8u-mooWhyjohMU$&Ky|EH`PSA`Qvo9MCoFjgVu(bhG|=gVtk10Wr=fBL^b&}><- zuOzKgO62|h>EoOTCicJ=4rYm7G-|ng3`VI#=t4X!RQ8j{a@Z(f2$oyw!?ppsApqDF z&Q$i`+X6Em?@Awb{*?plC$kH z8pmb}G<;e!aKD#hFZ2**-l@SRVfgbI79Mf7?nGkBGeAA7x-ysCA5`_`iHVWzh~e#9 zKt?}`gnf%EgB%cI_XXgIX%5yy5gts>x7q3H+#wpn7O&0FDO{dX_c60)v!4F48e6~G%Bv|gj3 zBYeG}!K1MVQ)6Y+)4l%6FtuTOKkdh4y@mcl4ZpV^Q}R&I5D7gIM1o5!hwJT_bpY$UX?HQ}9N}YG``hvgZpfp!t!(kglAjy0w-q zZMFClKgVtmB@_UPi%0vQDZ^R8T{e*Es4Th^^q+mDc8_N1+NR;!}wqrTMoL!-q4Mu(9{^>toM;*rMEgt-c3pw85o1N4%_Hw<_ z$d4v8v#~H{a;En%wh_dTAGSwt*s&OqVowWqvRZH`$L^x%m>9B_E8y(C>I*hM@Fvws zA7Su_;8f9=zvnZ_HA@d)C75&SXZ6^s>5H(DTFnTLZj>i&F~254GB#Zko1!o2{)WtK zceZ&9JgWWOY#Hv-O_Ny9HD>-N2wVAA zd=^LOb_bImSZ%H~j;0|&TxAm;S=ken%o+t=J!bxblXR0&t4T%%c>Z55M16qQKk!TdM1yxLz|5r5g9zjJR z(ep^$GpJvv6wQA+%+_`$nAahrZrnyE{jD+!xJeS1KLoo<@L|VEop9r z-MxcO7L?K9AdS1pUHiA-Lo~-@7Ofbbkf*U=K?g5>8$I0bH)g*D8)ZKZa>M!p>3(<%m{Q{17*1pOTR2Zr}s_B{R`RU zcd8cPNJ{0Af^PGsvMTYvca?-YI{edeX@jhr3QbD<|2ty*pz>?D2V>ndUce9k5+^8{ zm4^t9$&5j9={x8_$$Nuvi3U9wV%|`#b;aHE68PUVHtbg{@NA2D`>6ry5FZC$m89v; z>UayA5Zc^)#vTvge>Ofn_I(zFDt(_9@|mGu<{dWp&Ul|wBHn@*K@hw;NSQGyIzge1 zCVE&n zVDBq|z9-$#DR7o*WU&5vsQxnl1rFg6Se|rKDwT57N3WpDRNL0eKaL6YZ%06T@YN`o zbf{%(e_*I9!q7^nPdQRCd83e5m%BR2-=P08{OHQKf)RSxip?Qt6rTYc`mLZh9S+oF znGYg4h68k|Xu_UXX#WUh(2Ueh=5~(;k;}P8Kt(CGCq9EGDwkkN^EzOx8+3v3HJTfa z9OUvAY3Hs1Zk=PJl*Py1T#@JXV0`~eaL0wF!olC5Ze+l>F5pCvd#HDPr^P4sF zlvBbz)3vuO0UeQK<|;Jo)&=r*HZ#yZOee^mk^JNE!EC7hJ*DasnDy~MNp?eE&#mB;xcHt z=(GsDS&+6i#H8)Bb+_-Eos)e16RSY0=WSLdwl%I`BO< z3L9$`=sZ)3!s-_%z$V+AYNi9*t7}jWWG&nfYG#taHs%L`EWhz|usk^@sBK@4=9%yB z-U87&J3XfaUC`j%N+uJ6*o*n4a7Yz}rl1jXy@qbXqR^5va1IzXBu_z*2xV|1SR@B9 z0tB=VYsjymU@yW`K_m+?5RvPgw5TGb#fKyIft5j`g2&*Sk|0o2FzECT3BCKJmj3i0F!1mXoIROm(7pQ@Uhf(b!%M_XkJGcZpn3SO9PQ3Q1xgT7D z+#fa^F7D*le=P7D1B^IF`s9w9SfH~%eAW>)U|Eu{ul@|$n$if*7vxz=XsBx+wbIW-EyPI|fy`a-__st))U0Z-;%j22N6#^b6KKsKm zm{I50K(YC`^~_LBQs=PZ86vgEwi zjLH9)4s`g#IoEu27F*yJXGzgbT(69XHesA{U9d?F7ydlA4z+&io&)CIE`eeuBKqi{ zO@egr7R`)PZWj^W_}_`UnS_R>@aX+^WJv4kdib>N#h1YAwY@EZ2t?aw!3H)=zW{OB zwMRKlea((AIvVR|WE|_7TJWEuagr{&IJHY}YwEl`@zc62!~a z&k*8w^KDL@m@3n{I$Rg{j;;nG7Qqf2w3+lPG6zJJOYA;N-8#0!#Kcca1fCpG@YDqU zfmEt|F%SuzE_TJ-w2|KPSgN-vEFPpaeeZ3$e?kNqdKVTLXHEs$zyhr19?J^=7BQ`| zUY>Iio4%bW3>qGzK!C1mBN%e{<)v;-mVg-BG8kw6oG^aY&-pW?Q7QxtOkas$m#{z{ zTZ}5Eafi+E=%Pbjp!4lTg{b6P$=cOsk2xt6o`Gai%^h?r9M0ok&uun|vIj3Jw;PoU zIo)kUc#}}JxD0@FTf*iz_O2IF9_4X$^FQ07kub;o;H^-X#D`Akh1}6tI{^cRrLg zG+@B`peSEQ!ueSq6n&m}big%F^;D{Tx8q-@DU19|({0H`@?o`s#b)#kgKwu=0u{?? zsIVv53Li9027=mH1*zN@yHgC>DKTV`vX+Y6zdxnCs33PcbU)k67wC=$WBwJ@%lVUT zT&18LX-qV+;78Eh$KPoa1ytw1t!#k2zS0^$=U3?R2BRmB!)QoJw8Vmh-cOOLg9BfX zo?yBJSvBj^bCs+R#)=jWDJp{AD5Au~k5dgvf)TDmoDrg)bT3XHg3AZ0R9#>QdhwG! z>gS~F<1>yyixaPD@IBeBvtY;M`DK={$))%(4OK;crk*e$uFJ5xhDBEgFnQUt8{-*J zkVN9IE9nK%VH;t;_!;f_S4*$2XHce9gZdCWXqkS z24f6jJv?u=9l^I`dH`F=6?AdZNGJdH<>3i<6fV?={hS5s4l>sjZ#Ptm)miZMjkZtb zgh68Juqlf9wHlJwR-|_bbCCV?`fZ&D=~Su8v$poYP@Gmjw0@L7Da1~ zA;@>5Cc#f^BbrV2#v!aEUngs76@-FJQWx6QLlh}LM;XZNU8x^rNRn@5O$tD2$D+yqfEg`FCj z+eT`iWh6S9MKG|q3Wj=x|Gv7JmjGS*`-*+7%%)NZhGwuW}65x|igy*Zpz?%^M1@ zIV-Eu*f%MuF~l1{z9IuD>f#}7eL}L9lF&%Keu*Y<=p-|(+-CU!Dj00Iw?zF*oY6KNJeX6N91MtX)Fd49{fcC3;=rcCgMyT z$xo%BJQhCXh_Y{4lauo!2i*j02)2Cq>eUu>R33=d-A!P`7YLD&cDLN~$|2Q6TpX_r z3>oV|t<_x%k(qFe**pAmXa2m|>=)e}*{SCm%rtpqE;KmvUwaljIAcs;eegZ6I#}#8 z!kxs7O*Q%q#(BDwY=j6tXF}W|SZB4xMPKKiN`2RkOy_Fn&XK7^9XRZ3&WruZR9iyW zv^s8C$jNq4W{f+TX)%kmxd&i>?YL8L8Ve!Kw{e5U}a?5z#S%gfo zK!Lurl1&g@!reYd&go8FBhDX}(o`78l1vPpiTv8(oGjqQ_|c4g8j!OR=9%w%c0D0- zR#4u8Zxt>0X>nNtxi2c%)4lL_joyf2GJ_lI90pWkMhky+cw z&?Ve|$95MT3+VUKGtaeYpIFriB{_^n?|n$8vRRg|UURRu=d{~U%JItCvAfvWX)~#UcHO5eoBZ`JOvumB-4StM)=UeHWh^@>Ch;!5 zLXN@uhkCW7-bsn|FSmkdV&(!F#PqweOgm)DXX2 zW0KMv&Yod-hv|6ZPaN}Kq^b_=#Otcls|;}eGolHiRz<&<>yNn^rm}$hNc&MHDAMlq z8j^QA%vO`cn?v=7`Y*SvXDg_1($V}8@$4K9lm2l!_NDE3{b}tfA4BNb9pyt2^R*T_ zlBxTD-XTMqOHA9+O_{|d^F=w)VokRyTooFhyEXFy+Qx-meV%&jkF{fgu=l9ByL-j; z!RH+l;X!2_T6L^Yx6mIHW-Igh%wpzNT_t3^$n&_zG~T%xOUVZ%gnr+ zw0LHzybUYEy~4JmVJRGQ%L&=9c5xAX%GAa5By!MTBoQZ*y{uYGzVl6I_!gGn?%H0E zTh`qDPJH*5U6RK9QL^gpI_Xqk*`#SiWl8im?XTNLDry^J0k2(q_REup zel_2^Zu^)LrCnLe?hlRoABI)TGyVG*3+Ts`V1M>;5%){JHeVV47W|YDAtsUDOM9Wo zjT#W`le}x(5>_@WBdBe?v&n|+VTQ&Ln{P&o&$5v%4m;m>q!njFjXx`q*8S4exdzMj zEB)zTx4yy6F=vO?sN_c}+nv2nR;_zWxSKdK7{XQZ$jGo%ln5E{u<=J*I}E?8U%~~= z1URoxU`&os(@K!#PaLiM@=Yd9Nc@CT#h1X>sS)IV#@py?O&lPNT)9}B5c*Ce%qgmb zq=u}^%YoRVshr67{BJp&hIKpIHxd!vC8{Nu-a1d|J=l$D{ z+!Tf1ifwf1mdr^sb-KUexf{FswT-YU59!;b8s|`$;=E_JwI)EDRs>%#NKA=}(p^r8 z>YLXdI05OD83&M8y)S+GNmbz3gyxvui-$OZ(!?uQ>Bx&XRhD zo%cdc&U(20zlX~Ax01_^43<1Nbr0r zXZDgI{8`l>;s5=APeoI>SYj~WCl_r}*fnR@-`3ZRvFCE~Dr=DmsV}as55l^L9X!Ho zK0JIjs@h^Ux+8|Yoa%m2k0tA4y-|rJnRFj6`u>TY^K}3B8E-;Pdf)hzW3CO?-@cyb zQrwyFTgwHUdxtvkjR`m>_jaSX-1<`pyEek-XP?@`6{~Qqxsr-!cJMDETy=cnE_Vt;3Bt{Ws&y1l=88DUTTS{yo;XzdS#)I}RjRJ5v4h_BWLj%Q>>e&~o-%r)(c}CtS9!U+d}mKu9~xLwG{iYF;HY|5kzr-Mnp?MiRN40DaIIu;Lz^`QI0S#KKA>9ll!yhlgY#i z>~CL!N+=s&$CzaO3NSZB+=hUuM>VC5o??(7~16`KsS(c8}BorO8KVC3BW%*1WW%I>p1? z)cd63k94Q*x0jKL;jJSXh^7k2>@uRSMD=wCn!jHsX@?ElCV}R*u6m74+I^aIxLK9q z+u{gt2ag*ghdEypCJmOBV3pEMzWY`kmWYZX$>L`nsK87)ZjAslXGEg??&Ss#EnLts zDh~F<2V}CaeE86^Dowas36a@O;!a}JH&h;VTNEzz6_htS@or#y$X{QY4Fx7lX)yb@ z!RjE$IntA{wKoJYo;MH?`3s>`20DIR>^KG@gLNfL_?PecT*C%I@ko=gEuV)( zb9rKn*D32+mw|NNi=BZ3vtN|NhOOU#P0Za|%usLmi(p4FG*UrF7OzyS4mM4pu`#qq zU@u&m{SN|4bLxvYMW*eHw49sX}rsbf`$HXoUr1iL^4D*RM2Y3Fz0ht!<(|dWyoCyQNVrtyjUt&J zo*ZxQdpMrqJo{NU4Im#n0$@?$NhGk>mdtqhCY3RT*2T|;z8scFUPtq0F3m}##)@RQ zPOe-q+7wev^e^C#l+T3(O|^mE_56M}r+6#nagstS8+DP`D3K03B#h^_2meW3s7oS= z(6xqy&A_iA!^bF}E;kuc?}-{?i5gENe?I)NM2mp;`jf6;Bs{$2#m(^9MzpQ~(Zebm zWX84o3x!_}8)70KnRT<<^J0yY5p&47e;w7Z->Lgw&+?S9FU;=O&ctmMH;S8r|D{Nu z3+uL>;lgp#80d+lW0?~ojF%EO9y)q6J=N_yfNAO@hI>#wLG*v_-8%5{a5PpqBrVO+Qv5Cx;)? zjKl_P5w7Hp{ZxA7k9e-2O?!9QED-`qA)#Wr4^e@vJpYz^;JwVV3X3m2=0H?G zM8Nql3;vmHp{&1do(CSMq#8(Y#;4M`zUBFDQ_p+tNk8t{@HkEU82j+;jwF6GMa%My zouGg_iM6x&_xLI_c{uGH?#$2jI*?xL`->=Uc%GVsIsDV&h#mV;{7zSh{$BJ5&(Heh zB4s_6T=6|@)1aX-0$l;@YY9r9JasEjk5AEr_mZ=4M%B|dz z)+~3Kw!a+MwY#hQb3EA@Er10l90oJx0A|>1$S1{`4_#B~gW_TGsX7T|>D_to?qOb3 zc@LFFamtD0-Nl`$LXnJ`rD^*1vect}vA)c=PMA_@6uSMqqOT3(?ls2yW8pU@geIQ? zJ39xp&<=j4QmCJ&5ZLDD^dO3dQ|zdUx)%9@nglAUn7clUZ+S8&8~nN>q6Kn-nTk2b zl(#2|(O)bVCKpkJytIeOKkEA4zZ0EWr!l#`Kcmh?cHljB#@?oA+iLCgoJ9<;FfXkErs{)gwq%_6~oVk6Lm~d3d>R zCeL#mE!pv@6Ip%HzD9_O{vPBW`)S;DjS6p@gN^^~+#M0MEQPC%DX3l(v4X*7vk^{^ zt>#kLN94IxR`;jc?tKxySCR~}GZL8Q9p#mHdXmDn?KDd^dA3M+XHk)VO3Z>Jq5GJh zuALd<1O|wdE9SnEy{9z6B1A^1c<{r=bi6B*Xbg~7-QXmeMqM-Bhsl!KNSHp18~b2K zWD{H|-YIXnv8*(;dCvs5LH8^#VJjbbAk|kS6#P~bw8;B7(dNF$ya}TX93&?kc>KRE z07|zDw;#R2UNUDgw^R%!de+b`d{VZv$N)gLMpfHjG~EU8L0Pr4q>J&;PlX@v{_;VeWbfCip&QewKx z2uo6rUfqdWi5QWz4g-g(xxT#f{3{uK#oOs*$N!Q)NKqmRv-caS(VAZNV=_oHTf9zf zlAI@EBL&6rr=0XEw(|Ips}TQJoa%q^@N3k1#uRjBzC+goQcd?SEs{Ut!nLyN>V52Z zPm7R&1&b_eN2KQy=2#`Jb4pD>QUl|T$ug7P<7ZB_PR+WP`P@ZgE8xjc(Q}*ZspJE# znd|okqQat2DlDU^of?En!Qu1@LlOfY;#q_SHwRa}AZ2nCIY#s1dYkshIAmu`M?JWa zNaT@ClZaoibj&D2C&-s2aRSQ-Foimz#Nn?Kc@Qxmm&RhaIC#h`oVW%OT1O_7Dv>Wj z774uH`TQOXR(4W)!dx^KI{}?G{@qdiZNA(PO{{qw?{3N zMMslD&K-iDDWCCo!ZA$MSgI~At1_Mtm0Z(nI6wk_9LMl0K4TqL`ipErQ~^z{AvjIy z=QLiCB{0EwdLQ<_0RMDZsE}NtM9aOFA?iiDHD%A51T|W6mw!t%>0PHm=d((V`wVJi zfh=gC)B@oO9ZkhROGjIKdT;@uP1oyG>1M)RjV~i`wu$vO1@HDGYU!gYQ@0*3ujXO8 z#;|h|-?1`=wnRB#FQD(v(+x#%>Mom-syxeGv7Rfu`*# zMS6%w`P7&iJvjNp`BtVKKE4F5fB1_nrl7AF7wnnpRezV>)tO0u%5TAT2y4?f3a9g$ zcso{w9v~F!B|wJi-zFng)LenB?*4C9OD^G8O4e!;XBbxL4iUs-2d%4H&vQfNHKt5ELaxH zXk45YeUN4kNpMJpq$A8V?fSQapR!RAg8-U|s&6Ir6Q5LiTrQUPd>dH(-`eJ5S%mQv z{b|`HA5D6ENqN7ea&0awL^GWWixi06Z|>t8pc=j1#0+=oL!0#G5=pYtyZVDgvE#0p zD-wMz{popQt^k`d)t7JX-yStfQ$hr$Fmh;Ug9*1CL z=@Is##Y7i(F3W=k3E=!;^rLuw94?3Rm8gR?V2HHeNhl!|mgf-VH;CyuB6gn>ACMSs zC=tX^`9mmA?YE%CiT>Nq>OxIHI3{X-$FHJ} z2}iMCV#Zcm)MSs}^%VF9Eqxh>K`GXq<}^x8?Sf4}lH>Unmgs|AJwTUrApx;*aFmW^ zl#0LWG1a2qwTDv(n_Odyk^W7##k_F=wo!3V;=eSh6#uDQS&Da;P><7@DEv}h_UJPB zraJzW-iD=2HVuN6h|M_qyxIrileL_+CJxR{$`Jd~dc=O=`CG@czKaSuORw=H(F$WK zWlS)%?E5zjGDZ5mbJH|;7Dsj#%VT}#77Ca7fL@KobKM9UOW9HxA$;k7<~DJ}?ISbI zGgiB|KGY*u-Kh3lW0@7b?!J^&6kl{7l>CZ~lMq3)V-!mX!`?U42E4KK+D_v%?r(9a z#>o(B`4!#9r&|AAn*vnrwZbF;{g#pjphc_rP_=LIBji&jK3{F9J7c_9SX+8S29ZMB%i|Fp^Kdm6D`mBLK{2b$sOLxvJvD_+dX(c0+XDW$eQ z!DAb1gS?(0$(2|f^+q3`0UH)Zp)h#qJ$}ky1FWpm++au_(wKP_Htb&@S(6Q&%9dML zm1*7OL|`h?sn*4~FKcmSM%L^H=Vy09asE}Hsm(Q*1teR`PJc0E%B(~A&ka=P-Q`eS zEd_bPvDAUG&9vt$=jI14m;w~6Ww4tcg%bW4wXXb2af@_>G-2*@4sMW zZ93d}xdN1pfm?u+>{S^vWIInfHsFooZoLe6pT^-_2lXxYq4YYD_wpBzV;x19Gu3nE zgti(bA}tJzJyYAX%#T_P)T>{Cc{#~$%$DgGh#Y{N1p=!OoiML>BuCWIDaGU4_zAzK#Z+DE#i-0 z3P~z18jI+bV3Zr`(c!2UZKi%JOex17ecgTgxt16X>qC?R>1;=IaG*Rpj&`b(by@os z7Ig2i#E&f%K-6f|bkd3Xh^fR%+n9ijo05IV^6SBI1tq4oEXJ$@BSa@Sn&zab<`!PS za)Sv`2VLU}DOy>)Lho+zWPZqJup26=2>HY9V$=3jY`-n_V4$ zl!Q7YL-qxF`$4O4T4b=yMb?+(wuM(cDeYJiJg)6do&e;bEpVt@g32)u%>O=WfNZ&r zhd6ORZxBIef2}uN=tV273;O8c?Ckjb0!z z#Y-b<&=1}M1NGBswXFB-W9$;lsWT^Pe6Hr}&q&$d$zTy3TN|kq54ly|L(6oaQi=vx zJ2yn;VjFXWfKL*Rr{w@+KIJ}i?7QklB6E(j<44Q?R!f=49L4^QdD)gn-$2mHwSHX> znfJEm;u*-GkM%5C`e#9FCVTe%9SA)*6CtW%w~WPIstW$RRk*^e;5jtr{Tap?l?88s zj4xn?`~U=X*~@jh_c^{#?!S6}op5mS{Y_)e0Q=y|tte*H|2>s;(zVhuMa%IzldHhw zd@+qa_5@rDd+wC%9p}*v*<`0-cwOxQ>3)Q45Q)cqJh+4u=a(#W_;BFFaN)sGFQNY? z`%m5t3wcub;pF*QBXwRaqsI(u0~DGDxW`n;i;``MPXDNJY>*HVThr3SX}n|1(V=CA zCT@p>XMKoKvmsKV?fa2#gk4S}t>-Zk!KAP*j~jMuIfn^b?2B43^x+8$vx}=aHREO?h_!fi;E_sSgFk^W{9=ie!T-CVFxUVU-RrX0c3Dj(yNy#Oh61RFA3b1Kih`@ZguW{%P>j1wMdgANU!avbc1 zSmx?kp;MAA^_ePZH3(O6=Q1BNT#I_2AK}FDgVv-I9UwQuf3-~iFMCe$PUj9iE+_UD=2(v+QdPiypnssuZ`06St zhWcjVN8fKf@Ru1cVO{T|u#yx$JA79ApMgq%68L72_%3ArcQNp8Xo=4|KjYsCYKrs| zT3V7;zD9)(X&%_H?v0+3W8mztNgC?2aQ@QGAXVY{ zj}X5##|Ev~|AfJTtrdhzNUgocRSvzXIYrkFk*th-e*0N=>L7W1qE1ahj0*0$A!*Hzx-tiBuscHs0`9bqICfk{zG^4EEDS5}STkaJ zm+J;l{jSy+cj*ybp*0tc066L3QL$(s`V@xuZotwCodU%>^b$2>t-rD;Jc6Z)*Ym zRt&Nf9EX65L^%cF`1T5q(aOkQ(7>9w zUwSrLGnna2g;MyWr2rE$-{;dyV90ZtRKTOzV+S+P5@6nBkLDPB-L)9%Mvj^wA|JrP zK>1;Ds2yHnx;9mo1iD!z5*aE}rC0{= z4ODU6jZ8rm-kx@Ak-FIS_ls2b94vE?KYKr&A^A)w8BKiq4w8&aD!OSF-S9~m(VF6 za9TS{Sw`S$sK?l}3A|}&qbL@G+UrW~aT5OQEk>*9P*zocEQ04M6Hww)^O}Lr`r}dzBp^HpVLHe@xEMpH6AZVUp)qImPWVE>e3(guaLBTN_ zfUKIBdUjJ^2^rS2DZ;5QM5q&5nQux93KZxzJ9%b^>jq|bAPXc5?!GWwM|-`l%AqwK z5E9qmn$ECMRAYo%WoxC}-VpB$;k;Ql}HHkMF-1KqY;+94(MEOeSqwFvPdj zY_c1e_yL4h!{CIQkW|e?CF*EyYEVB6_FHKmK<>({V>;mOm1a?YGU%nryOnM>=iAK4T#EA2kml%8fwa|2l4PRJb4|A)aUTV^<&vz0>!{g_ln2fG{Nm7DPm7_JX zO+ErUQxj1>qvQ=e11pa&DSMyxOY4NVY+Iah>ac!-k#y>YM9-f4Pel1VOwR~Myv(~5 z-@4fOT2PIikOn)S}QP! zf!^@;n7;5M{FIu#@zJep&hIG`l3oc<1vVwXftiJ3 z#dWvncy}4B*-h2iTJ=6!avXMG@weA+G7IUkDJ^&)Cc1Ty$Gies{5Konzw0dPtzb{% z(wQ)=h)&MV__{OOBs;(HwIeu|z${TESHvQ6Z=)lD$AY2F2#5xMqcRgU`k%h z;A`WV$lK7nGvTTAb0ds2UtHbRTr6fSqAKBi!e zE-UH3!sWf(oVKY_#;a`aWvg5{6M!qLcDj+Gfkzdh6s>%SnX6`o8>?DFU+0>syP3&J z$l*nx1RLwU39nJ-Bh8?sl!t;!&fM2WcP_Q(zUGd@bEgqKvT8kv z*dBRs>3UM6;Z2F%d8soIq_3B(3HjLLMSkpYY%Gdff;F70D8f>^45r|VZnoPF3L~ER zaf=<%LO-#FvY7>o)v(VZUOQN-1nm-7XGqLSIyL5dYguEzd|~4ELx%-gR9{W;;gA*i zvd8j6b#h6hb5#*v7hUz9f7oH^AZgiSjC}>7>t?6n*K$1_B1~X#E6dB67^3=hdb|3q z7XFFgCTbQI5=ImY#^`KkG~A7o>N~SNbcmb%zTXjS@{Qhd(J1y^ZLbaH4dic5z52SJ zjT^zaiO0>**r~`A@!(;lZ>j%85I56>XPPcIgJe$noSCE=dW|v%J_g|x-|-^g(T`w1 zdiLSW9PRhkXvphpf^@RG^%-hgyhpaob-{BgsxbVaP3gJaCvh|tT=@Pjv+XB-zr=9O zLy7HIMBkGS1MIJHkxu$T#;0Cx4<*XstIv807xi+Q*+^aXQ+UrbNiNKp7e>6xp)gHs z*pl_v3u~Y=zqBGN>gfB7@DMj_!iyBW$TGST6s2OVWD_*H zp}o;?zdHf-Qd#8hA2Bf!SbVBG{>^pTAtf1Sh(3Wr^24zHu}h2A`*K8LC`F%^P5TOW z%*mtn05276|0G-$Uv_-X9GMSx+p3V`eZawCd3_;@n`&~O!E@rzj|W*tF#ED|WhVgL zB<7MqIqW)7w{9fC;PfR=-(9v^E3I_tmqfIeB{ju8qMfOHAqR#r&5&rRN8#OR^k!h> zMxaJ_Z~j{)_8&$YlO-;!OLtej-ki~)noBlt*m~Ui79h>WL>R`J@>@%&@MD}be}}7z zDTik>C1uDBB5``>6`NpoVa18NFv*)B1?$Hf z+V|~K?i6xr-%gI7-xc(CZFuY|3swnyxwSm_gM%tU|v) z#>T&Q>qv(7@noY2reHKl2sK)?b`b-e;wRT15_dXexYQ(HAqa>Ahe$K#W@|lGwK)?5 zerOv;(UWH!+RrdKqh$^)t2bH-UVb+4$BiYs8htaB9IagYD4<~kg4SP#RH?osYrF0= zD-`cUNN9aX?bqfYXTo=SgQBmC{CfkFX6MCwzqRUnH*ZZm>hQNDe2)2;{N$DomLjjK zF^u2uNzMwV8t;cG`H6qG6DD%yTv+DD7RTg5;ftd^a94D*izU}wKTZ)EDV*_XUdS~k zxJt*A#nU4n{ai|GI;}lOoTldhhJcq(6ly(cKiyuLbgSqg&@?=Vj{n`iDI(NUo}0_z zK|rVyTKn`lThBGBPfrjRW$#JjXBc$kqR}@=gQ@Km-?rpnJa7D#fFWfn_;rEcM%b18 z^b$YXShE2oFM?;fuH+lM>vkL2jlu1u&ZoYdCc2K&!?S9x>^@phjY}Yi zc@L+toq!A7ulT3IPDaXEm)o!-iNy`b1N_^VHQC=cRmg~9qPT2YC6AGv%KQ-~ z2=z&yVh*T&54q^7FO-i`G);Qe(6ZRr1y_nKJfe*Ja>=6JsSLRjXQ)V{ZkSHDA<-Mt zKk^H|i!7<>`8i_n1S#_pPC29doBw!$x0vT{_|=v?=Ce_B6_i!-m?*2}hvd|knmKcNjqNp^goHE^S~%TuSyMDJn_K#BrYrt4 z+~f=d??)On4&!}sFFCrBF4(SrEztZWp}-!@d55} z_<~b;Q+0Ul>N^dqa*7ih$+K$GO1ZfTp|=GJB-U9YWLJ~<@_K2#zSUE|4p*LA?eutT zShOmc=5w=f!B4K~n}8RWg}2!ux*k9I+nH`-I#Gwsj+Cxsx3SIn7g2dsntg(#nZ@-) zCB9K{spTtP{*p;XfG5N{i4Q#Z>{rHz-S=1> zhs84&j+?^K#cZb}p7a@HMHmvZH~&uLb$z}l)IB`OUA*3dlNNg0wB&B8?Z)&&yLpkq z@A#V>dk0gLC$wobw=Jxr=Pkvj`Pw`^y0se)`$FQ$CBFZ}&QVyutJzv56u5Y?`l8Fz zbq&5Zj0JS4%3qATc*pdmvL4v;G1WUnCS0~P$)rpbG2QxuHEzr?Q_(c~)tRNl-GwA3 zU<${S-@S^CpF<3V{ZMOPDw%+54omJe#sFB@dy|42JAu(CwWpM6SjQ4%_}P$ zw9l7(SV^dPM(|l&2{`(%RR8F#{o#jqK+iGtC454pwAV#wOEWs>amxj5gh;EC^`L zX0u;~AHCC|b?)~+g0S(KW7c>$^lkhW!%er@sN6}Jtlq z_6vMn+|6`B1XCR|we4QWbv19k3wTM2^A0P=G`OkszuI(+WX$FrEZ&RBSDua12{O-w zQIU%TKguksy#1L7(4{Mcy;iDh+ZiM02gc;nl1g!<&AUKTog?dyO0Sj&$$(3ZR^%pN z)ajl?*jk~p>MKXM?|uUa_TW}M|19UQ(Ub$*@cM`EiL0f7c7h-Hlb7&GD<1*4H!kC2 z&fIqUAJ*Y6Ey}E{d0WAUxNB*0#`-b2RXj&GKO^AVS3>E%JJ*yK%XQux4^kso9<{IL zy0T`E2Cqe>PM**pDf}BHv9CtoGZJSJ!3uN5^oQ5sp8#28ymaMOyv1%y`@q;AiFb#9 z1ork~OP9yI;;)LNieQdmF#1v*gsJxmk>M>8nF}8w}iv)Cx8#j!B*et zkxvyM4CxX05ZWV{!o**YK+Yv3x@!8t`!vr;9XEVs!u{J%AAc32^>@J#sP{xaM-eRO z5QaJVif<4r80`?{5{1ze3EK$Rl3T~(pfIdUp_7}B{uuT0>oTD))GQ(Vxxe~!R*#jY zm+}l_k#k({Q_HC3?J}^+VR>F#`k`?EP^;ytt)fi29u!*J>ObtCidX6_JJdn}Yb8XD z2#{`ywb3}7ft)>zVbXS+tX2glBGj6w3uy=YzvN08iWCK8KlZiFrl6qhSY@yKSS3ZM za5tcjt$LK@nFh;I->^!kfmosgwv?AezxQFvB@r36-iF~%H?5alEl;fONB}Q>b^G^M zoqNtz_dCw%rQUDY{IjyRv4zw-jt!@nU(k)~x5y47x27Ijx>#y|5^c{#&YWQ?;;G_P zvV^!{{F6n{W~$=ShcF^z-J-t$<1$2ferODJ#}-HL5EwECh@-m$B1#gY8dBn3kAhX6 zki0cp0-$6r)W!@SO8ajwM$3WqE{)TJLulA5{$wTVEx@QN0qZZACAOt>osA)R63Z%o z48@^SqbGztmH$X7ef;uO8d1cS+m&;u92FIBei6TMtWE#D!h-Oi40b6Ss)rB~W@Q1a z2fj?E=Ob=%b5k18blLKTJzp$ZyKrP&7s0Xk4eKwkw!AiZ{2Lt0{^-v`td&4f(pMN6 zjHZS1zZ8xk7p|L_C$yQ1fNAWBBhk6rFon!gJ^b@+Fm3V=Kn7nX6wuQ(40!&094x}X z>V{&W0Lmc#enqAO&yA?fUhkRft>X}G7WKXuC=_|(nu`eRO?lBbXwxywPUw5d+YW4B zP%aE}nKn{(&Cn1hDo|lNmN7AO5ZY9CyHp~Pj*bA4iCHKZM@-#Q*e!xEwtaf+`_!=n z)jcpBT}xsUB0RnXaOn+>irH8TxZrz75XG?W2oZW&5TVyFu|>e>4Z>&lS|ZNyAtbXd zngt(1_L%k(TaSQ1`$YgKOJ$yt;lH8XNE*%M{ z{CH0{^8-u31mIHhN_~$?=r=*mv~x2C;$ah)ix|H0bqgE&2J}EEn`(c>A~-%Mf~&ET zC@{HWM>T)~xt$@Xl*MGdZ|WKVE%SxP{)o|$=8OZVt6eDwy)u&7Ar*8rbl%OSPb3~z z0Jx=izCrY3E)`7FnwcnBhro68{=`mS>u%lqc*|PHI>#n2JAN}TO#aH_qS;}yv^T=R z#^|Vs9fMdWu=USA2tE7UXPSx8`mN?GsZJJCq;N+17ll;WYxQYfhu0xUS~RfIIKsX5oU%l^LPJ16h6L%(y8gHm*r-n%Pw^3&&*s&Ef~cyzx;%2QGphx5F? zU;vfBMJG?|HpVNLFIw`U-A9l>s0t66L9jAV*`ILsoB@P5@>AmfwNStDVhvL7I3FEA zDCF-;tXk!v3>?0{qw6?IkM%2OFft5V}9KyY8sVcB3R{+@JW?*`JfgdpTyyVq7La4W7==Ow#;ewV$HDhT836jQsv4JuXY$V07YgAm(@zp}+1*p;OB0_a_WA z?D%UlBkO4j_Vz1f4pP-Q<41d@RWF@g(IHEcy-xV|OZ9lQEVlCtgDI1~n!1YFATx+i zN$o_^IYGJ~zC>_4ue_gGwe}E;J43z19({VyG^ND(cgX4-<6T;wo1DT zsZ{QrLx8jYa277Oh4uaZ9L%Q1pPsCF55+#PfQMq?CVEIG%2fROEHar@lOIKT9;~+N z_UCIr8WViZ*!%~M8|#P5g?ltzh|N3eSwIEjk2S#hAHFR1vemlQ97JHHid^P&FKl`? znMTG&y*kOe?dL~gs*rB2Bt#`%4el8h$rd?Ez$p~j(kcJUEBIL1;e1EV%nQbGb}S~q zWHZSE@W!>t!Be1~YVd)3u}}MPGw?GfDone3jleoG0I>zV_7Q(R1P;HQA)T_RxTSyx z6^JzUHU%nPQFL*Sb_&}9q+i~#%KrooAi02_zqk7h(7Q63uANLJn4$G6P+7n1wfzX0 z8iC6-t{0|qJ)WD3!TbKkBlA&n^fcnG8^{rrC)BLOf-V|X57ce!Dzw|%2<+6tq8*?? zAtUMTL0~>*^5nJS!@n%&4W{0*)Kw4)jKnP{7$+4+nVF_J{A_Hm*okANNX)0!Q*e1X@Z4rRuJMVv*E&n#XF2NIqe=y0pemC;9j3?Sl!3D^hd#vrjL{}(Vu{a53Zr{&R6!{6q1 zQ^m%?trb9C3JZ`IIEl2$lc>DZ`~!9=LOIYZ@I}C?d7yK#6@iREViyam*7euH3tLUy z8T}v1MsC2zH(?Zxppz&^!?H#)p>c4$)uRUdhSyZ>aRV?+DXK7|>xitNl!9U4;EcDa z)IrO)P_IDov2oyJ5x?@s?tZNOavMk`c*ng=nO@hD5p09Q`Mq6{fBp~}9 zHL{XXuqv~6LH4`Xi+2Rak4xiF`e3PB_7|(Jgo^W7S5K5#f5utXj7?wU!;!e_7xU{&1g=ne_gzoFNHI>euGb{H_4>&1AG#Sli8p zrhY2wknSWQo+<~x(yq|-zj8169j{|LchU(E@(lZGu4;}u*RT6|D=c3(c#**h*~8-o zcsHkp@{CKbdrL%aJ%LNfl0>xg2d-2{r#iI~@4%G1J~$(}PElhi9G4*JiqB>wgpCxW z`KXkiD17JJ+BOw@*RIU@(i7?wuM;p-=tSu1PE*2SB>cPzmVlFEZ~}kYrodfeTOxkmOmu612WlT?)mNSvWSefV z`H5u}p=d!YdX=kAIqmOJLrw^pY2`hnw?%gRsb2_vytqge);-nw>kQ~vQX7;sNEdG| zVmsBnOgwntBbhrc@dW%U%*Sfe-16}{ZvB0YDY1OZUL_pELMPe6bALs@0n&W3xaS(; zx*BitfA>BdIFk#=jo(sut-C5MZg}LYR#>x}>@Lxc1xYD=zZ}{QxJjMyz;th*6_Zw8 zsRF1~lFch0XAWAzM9q8Y_cRbj8zbrfvR0O8+uFF!Zdd&B&)rP-$G3lT2!!~u(mM8# zQ5!zu4HEF{LIqwiDP%uDCd-7rM?k_SAAJFJHfDZzAObznNrqiPDbV&JvOjr%m zH>rI!^A$ivyrA}^$a;B=Cyo5?w1zqnwiUYQ*XQZ?Qxs})orVR*d8nRmyGSMFhU#0! zJVtZ{{XTCCx$)I4JC-d`u^brXNQ;ZQK+fv{(R)Waq@qZ4Dy?W|bs#4X1#}8}$ug)R zKs-n$GITI9 zhW3QhTDBGylYI7#8HOL80=QSmy-hRfD_|}4p7mwNKDhT2f@+rb`kY<0aQ#vGNzN)> z^RsSFzLLT9ulX4o2SmLjD`V^`@p2xW1j1d}bj ztQZ)hYmk_iLK)R{H`%hXaW_k;);druxRUojRzC>5YemRkUF{MTS|S0pJu|kA$R#d2 z7d|E$C*~;bxaP}@36s_V(lJt3qjsfh*UN%Tg;v@2D9J$*g^P(#y^wXO(9CS)6l~A3 zkq$i!mO=N}c4lI2*Eqj(?Ue2YP4`SgU~};W!XCCgK4~7UKrWY?&f!zTE%dvSC`zrG z0hFZr?&AaAF76wca@RImf@3%p1!rT8+qf^`S47RDu=g&@g_oPWA>btlqhxx2XS;+%oW`mDasH;kCPebIM1-;>|!SmzIs!a+9AD zI|wp#6s@pe+j)J9`8vxS`%iyzgwtEaQJ^HUN7-yuuZ3)vO0%)jDq72GjYgG&=vyIk zu!HN-e~7*_ur1oasFGc_w>FV>mDUxb|3wU|Fe61f7v)aamD8liA2lVo(!vUiO@4Q= zuO9d?(du3$h?cHCz~!b7B%j~H9BwwJ3y2LYVV4aMo{P}(qt76`Ej~P@f@K`SHf~2M zI^E7%&GegzYD&^nws8gzbDM0o`X{-#58fp?3gzf{8RCF1q?d3mqFulYf}$>;;9@yl z_`3E?glWBBE+v7ee2NT3&jP8A)l>l8W+cyK$S@h{R*Qr3nL~tG6mk@rC}yOM{gT}z zPjX-3HbyuqZIf&uRWZY zb|l2Zb|&QfD95?6t}3=t3XUP8eR^_Fc^FI5xiH1BS$Knczq0{Y z7oqC}882k=EwCQ%&`)o3h&~(V`{wJ2HjIrHZX7YNneyY7E&6e?$|DGaSHAJVNMi!oZx4558$(?q1i?2zdI@a6_?a$Ga)D?W@;5 zm(md2FyRtkf5XAnukKtrbj*{8BPZ&wAweg~{qdC>Mw8}y%_X-|j?&G(p05^6H1q8w zjVfn;cgD+uZg~KFp&W2wmJ8G9!12HsTX@{L&7qTYw$OP#F^L@W;(;D3LtmRmZ>~RH z&%^9_#)(}Iy`NVxMQ{rPM652)90lxL{*23iXE3Sfr3rI(be{;)*+AEWgFce%=u9f@ zSnI;OB_Fi-yiIUqMf~2X)%ciFlu&Bh4f#dv;ftvnSu_j11ssO&v;Vl2(nJVfBG6jU z4wW9df2ME;?d?rtl?m?p1rFo84L;%TL?w1Sf^L-;_*aZSwRBb#vTnS}w)kQEUMY}f1YVDx-i<2Dd^lft zrOW8H7==PvUrG>`)b^;vL;QO-(Mk_DWA`3tdBzX~P%YlCG5vLz5phHA?~PrGk{*3p zS%oGc@1CERb7LW~D(;cm#J6cd($L+jQ{6*FF{7!FjYLPXM8u9$@^e8UfZ9~$=OVEH zW!PQ0p|J(0!fY>j_WR=C4RN3HpUZ@gsI;*p#@Ns+eJ&6wz_1a1)ZRF5F~#=O!l2f- z`sw?o>Xij!Yp7N=_LuZ+9DhY*sjR&Ux}pB}_uq+ugw<&c^vQyN5L<(|!Cf z&d}RUEOMHBE(-;ZsYbRxa#p{Oulop1@%g=JV}fxDT0EGFl9Z-zpo|y}#$3Oo!rHnNmkE=*J>=p;$$+VZ48AkCZyV+N3E{gU7X8 zC!2K;ANgo$2N0xw*&#t!+?7I?ga#(I>iQ|y*H5<^?KG8dys;33nR<@ObIm8T+XzIS zzM`}zM<6jPMqI3^9PdGPd-_(1waW(1%+X+n)E3dj8Ac&lDx&dS5DsA>YLV4a`$K|wO4 zicB!9wY57L`Ifx$6u=!8JORUX)hO}Jkl=P>^(#pTfOh^E(xZ>y`IcQBJKJxS>IXN% zWaw|JWWDR4V-A%POgQ*va2l&X{y3me?vD!*>b(Se~G(WP54QgN8{9 zN*C7Qoyk6_-i=}Ls=clz-j;}(YqA)ur(l}-8M5iocYwX!|| zV8C4n(Vjl6itCa?99p-bw>(FYtwCh2n!kabX4iIcGBi3dRx@Oyrg1z1= z#9*?~{eMnQ3pu$OsJajE9j#RTu$*FUl0LH+0a>{7f{+P|CZUx06e2NBhKgLzYcp5s zr0(h|GrI7&r4A@xVPY(c-ypwy_qWi}FfWVsCCMEBoOl@}9z14JMpC(-D|^wL7j zf1a*nDBE=v+fC_Qd>D}GKi~O*%jc~cK?DldQDmf?f2g6wjDQ5HS;(|aU62X7k)nRv zHN@={kq93Mw5@`A`Cc1w%|4*TZ%hF;3K$Z!MbObk6^dx1tK8%VgHMHi+4+;$w5@=i zcW!UG-Sks^z3+2FcB|%5y88qpg0K;?U<+Zg@Q_Th+Mn4}F0#-GBMI#SkJ~=rqCW?Z zc3IBBkq(WS$DO>&yzN&ddnsU$ccxVTd0GI1=iB1e`30s?U z!GB-)WDMUj)-k|wgZ8dm6$`%^xlrOSFH6o4301w>2pH@4gVub8aslo~;}a@)+Usm1 zBZw;?I@9P`1W5iOnUQpjl#9FbqI|At&h)(qcChFc$H90_n&_HMI%c&TX@5_|}9 zF(pNgr#wp`rXGN$Ic(DH{Q=v`MfkJzfGz`$$K;k8oXcA$Xh|waehT7(nNq8`H2fycRa)q0dS! zj@F*cZdsQM=Ffg0`Kp_Nq2V3V_%_t)hKaL9^e>C$#vA?GVad2!^vMBy2e;hK39^!R zOWth&LZAk~V6lNQ5lhP?<9p(uESwDdm?$?5OEyhjZY^*DCf0Ub@~1rky|Gf8+4}6a z|Jx}5_GQN|t;tP^Otvl?evKR1N@-twTTtyV`MrT!n#5(nI*YoBJ6Y%uwcEt3c$wu0Uhq#_{ES2KIA6OzcADx_I=>P0dCG@>qdN1z#DD?L~SM6 zSo~c8K_}Oz02ijOZB#{7!NB+Ip)*l5M86><3M?OrR@nX~FUp`xKdyZ$J1yf@`U4SV z69`?U8^-BVTfdR{M!`LTmbylma#MU`{?tg$&6kzDX;flLD5fd?UoU_Fl0!q(6M*m- z<6piA@Cy$r3t;%{dwE7LN4dJ2-4s`j7>(9d`Nrcm&sin;%N_aIAKLDm!;QdfxgKR& zktV~gSlcA;9lGphZGgTn`FQ1Ko{ZU1Mn?SK>O(q{9!4(FBz##8L2Rr9B&Ds)B|O)l zPZ>pX!hgW@+1^%AbV1zWYaCvYsa*1Lqe6&CR_Yl25#%-wzw`{8%=GTIt-{~9WfobD zab2sc&@JOb#E9Tvfh~-@tm;%hO#Zv_I^z45 z87K~ml;6m0q0)Kp^wHziVgbm_)Q+86=|}$14ln#5v9{h8%=h}Y2!KbjSN6698vMN! z7q#f_mnW22LI2$1Z}dQJbZIR9r8=C65X0!Il)zQ8e@cj6OYHBj97_h$|8nc<*q<)_ zYfA;=zKlQ`(D;11aQpukQs z(+yyWUJq=9x$SynF{t^s#2Z2Nt!!DueEgied8vji`;qJ!vJPmY{}gywbx+WGFK-$- zFW>lE2whTlx>Kk`q^x}^qK+0QUC6lprS62o8q(Bjw1as6?;L2*msBf54gEgqgk7&1 zhycw)sSv97+0n!()rHUcf4`QJ7CB_X#!*k$Fa*Rk^V8vSM+9Hcx3Pk85!>pHJcWO+ z=V_Rj4G3&{w?EQ014H)n41-|Ul#NJDn@$Y8_vlG97R)N1X z@NEW&B)RLW9in%U=FQ)C%~wG-ok4u;0%~824#q}&YgG&_(cn#Neu(t>6*0_}|LX63 zf2m-&-=DkeAjqGz=`w`n{ptpKG6o9Ks_EkOLG{1qfC^syV-IeqRd0B-9=-d7Y;fiV z=!qC%H-}4opGcU6Zmwr2ZyQdXK0rZUyL>+$LxF zJ%>m5|DLfMoo|qqqE=p>GBZhVg3CC$RORE5U6JVC0zz{dbR;;^fLo#vBHSU}*geyVP>d~tUm#g)T$%BbLUl@pt_YKhxl-^<_^na>Nh_+1rAk0e0wl06} z?^ms#Ve?;E)W|&ttj<5-{AQcVr$3g(6l8MAFZoaT0Y86-0fXV%5I2C-)aO;@dEwiO z2V-1SD5*9pdg#6Dg zdt5NN!tg)eF%IwZ7w5Broqw;=`2l$Pz$7)D&wIxI91hgsRB)WTtKR=UZGV55P5^Ux zt7y>=1O9tf((X)fY~v?|KmP5K&g=PY6j)9?FBHG{|J6Nwin%Qu+i2C=} z2!XT)(f=IVN(3C6`<{2xms8-Qt@lOq{r>awcgPI5K!D>v*ulTY???iNZiD)6^grbi zas>t8iwhXCmjCk!NKu~xSH+n`dW`eG*NPrJAI97D@EG*|=fV=Aj>)GRnEvPzcdQ~@ z5I|>vvD0nvli$`p?f7RkI6n$cUl<_t^eyiA-&40lPQCJIaKD?;4s;pWK%3{4e7XMr zE7aaO+rOiX2A75q=x)#NbOoVwsmv z;??OhD1&ppn4$R`kvv%1!7CW!;PvVZmn{jqwpgCC~i_Ah&Ury~}HZXz4v4G~%h&kvcs&jrx> z`pHhAw+ey8_FYi`DuOEJA&Db5%|G3_g?{TNaF=nZZcEo$m2od3f}L&?bzTnt3xB{g zTp7A_ciFS5rThZXDKB7?`Z36KCx@yI3w2LHJL3GP2V*rzCqRc}*1M_|vdI1N56S^yK9|1&a{TJmT{^4A2kQx_}+1(@n#`D;jqF5x1zu z*mjYw6`6#512ztMFs1Zx8B6IaaMPuwz&RZ8>?IV#XSJxQfto9ulfc1deX;>(?P{vc zA8?{liauJ*N#T@9uY3v`9?x@P9E9l)*6rX?Up&`n_+sv4QD-se6=`p--L5@wmxOMM zf1{aXUTx4ryb+tpWwi2krEU2Xror|Xh&FhBEp8a)@HyPV6}#HPaqeZmR+n&|7DaHG ziy;$(AyTJnUZ9A-16`$hz7refA+hZNK%ANb^Ka!I3)F_nF!#`TGM(>Rr6kB?t-9@( zXF6m+r?4gi0HKHy2$XSC`!A%)m|r(Jo1r_iqv$|bCS0x1yS^?YeNHo;l5>g17Y}6o z5QHh=CAthUw|QxZ?Kgqz{2^SfwMA!W2)1EhklC>k-U&|z0{rN9cIbC3Ksuo0eKb1J zaa&h0tkkEv6*4LOz9bwmOo3=hD`O?vCd8WAKUzk5qn=L_r-1oPQD^DOX2?8^n)`L* zAXMdO#%R+BfgkJ7&)I*J+j&RR)Z^cw7h)Wb_h6>POHB9Y)c0uPvxh3kxtw5Z8xaw3 z)@M|UZ3YX@UujQ5!&|q(CGP=xXVb9p!%<{@Y?IGcXBT>${h}DW zo-M7`r4m-PK* zT}(`8X)6SChhIK;wjs6Dyo0d*e6qh=!TdMQ$wkebhe;uIT+tzTIY(u0>vd_8PiNaB z)36O*CXLkY(h;vnduzfei!Ks_woL8QzyM}^W!ag7;d-OQ0+6G9GT^h2e-0$x5zp_@ z_dawmvbfsxOWnh7o8_!MT-i+FdyN4Ct+y*1#U;tNZDn*p-Ig+Q2>Joi;vy7ZgD)6G zfeuK|oCZYa9`9j-yQ}^?GFNgU&MXe+byz9(B@8LOzw^Dn?@p#=LR2?oYR(&n{zRd+ z1b$W>tu}j0kNt&`g7;|pK%x3jgwXiUwR!Sma9mNZEBk)`&J*zg@o&0(jA?ydpE7Yr z%+TH6CQ!W1I8CXP z<|_a2qU4g)HZx_8{$mI@VgeU$b5mq@npp$p1KJ(5;Wh*!Tih1{23v}S7%!-s))k-9 zB8z$9aKpnEKr);)4$U9;xt1EYmknOD|ZHcf`ZV9a6aJv(EA^joXg|u#96; zgoZG&uRaMvQ7OX{CO*)}XX_aTKy;xe56`h5p7{Bm%TN07|L=a}RE@b+M~qfM=)7pV z45wkLLfZ3SX{H$;t1*D9Xxq{4sfc}%xmqMqL^Z_t_M5wAK<%gL?p@R}ZlIhE#dy_5 zff&B9DoEp1$d25*bJ`~`dj*~Dg8=a*BdRlyO*y0bN0M!Za)9vflx$DKExTKDwksWl zI_I_hZ7G5neGtmk(BR%Be@%bKM_zVT0sP1OUI&T4Bg?w+Uib9tW};$`Kfd4>o~^5QfGgA6@J5EpTCUN5=*taUsK1G^5X;s%KFYI$T0fL21H`{$a=3INlJftjp6VdF5a^^~*T? zRy0OJ^b>4q4G!-R2F$mzXD?*zWaT2_k=5VnzB0Bvdq{j;-ewer{l8wRj?ru(M18vS zy6oZ@SdL9D_7+&QCy3syrp~;2qp~D~po?3B)4NV{&cKiWC6gN09-95}ZKMIYX=Hp1s@Bsg!5uE|cskX-pb0ooPUB5)Q#V zt8Yv-e0@=Aw|a}>Sck%_;pMr*!um`_n2)IW%+$w_m}AH?VL3%p8TTMZ#Uq~lF3&(c zE=i9AJLK-Fubr3K9HG)xix%I<=Va@>dfiJK1#hk<_roxIom}nPm76J509I5*zCu^{ zAuf?@>^SkgwK~_#n2Y{mW9#Z=9-& z%XN0AB6~AR{7y|qfdU%ciui1_T($V&_0B9}FA2M-t?ww-j?9^og62!n&DFn?lr(q` zqwEj(uHW1R6vpjeYt&C*jjBLD^B_n+$&rQT#==BJh+vxAy*m3K)A8!B6g^j516ZiV zZ+spyeJox>wD7oCFfVvMuF&`@udTzBYlistXenLhnXnn{hT>}B!k~z)eL*MY%a0LJ z=(<$7G4Jf&m@$fX#bA-E=R#O)b6#v=n{0I50jDzCES_ z?tX(!$wMM7f)UG9hSUm|)%?#1*RG0_e09o6zkiZgcwxt=h1Ky`*LaJbY0x5+Ilhf` z$2jW&1ImrsHqXgNdA8AI=_jUwf%8{!tq#}*gA_CW3d9SLSN*BPh|!Ag2#=HZ+hzi+K{ zj-Ls}b*!CYtx(yJmitad4UGP^_o>)oYGX!qex;h`TRt1xIR|4PjRCBynMZCE3?R)U zCGS`-IyT`Bx~@5x3K@Nd4cwIq|r3TM*sOM=&tXe;HpJPNB((54&V|K z@aM*%(qq(i$Xldsq#0b^ABZ$2;NqA_A@^>gOvqW6XbBYw)>IbS~I=H!@(4{RsuH z<;QZ3g_W%Q1H3YUI_SAbs50M!=XkBPi*3BlT6JhqL_%FIl4q=pAYZ}l@ppZjcmE36 z1Ek%NJFI{5;fEjt>YwtEqLL@Jl=>^z&}uedMB-I%ERwTaqh+PHWt#qYw47D2V9`PZ zFQui|8L`L{o%BKLM|SGjRGBU$yZ0k}Z)KTY;6FI)1mYCx8TRG2r7Zf-)#MMu!f8c?-%EKbz zwiO`Lk^%XaP!j?_pDaYpL`D@$P^tG(#;s3QE{B^`O4evNLn2&M`tTHR1=co-`;kxq zbH4$K5@(uP{tD_597z`7@oZh74Gaxo1m-Ab{sde@5Zymx4KPPB)`!3{Su&2Do$l2Q zAS4}vt0m;054IqE2c+64A{pRu`!Mv~x~w3!?gp*nT%i4Jnrd*rSp8Z9$A2*a%8W=^ zo%%U|EcUGXF5&!{gqHgbQ#iNm>4V@I&|3E|4^Ek^;t6|!1{>)}14~MyB&YN-uZ3|| znlC}--;`TLtu9bp#NOlqt_!_yCGC8!ZDH}`dLspntUhkz0@GovBnu5D82$mdD=G8o zI^SWjT?q{$7z~7OJF-4kV1x4O4Kf;2iYS^=Q;jioOU)2rIH=OgxN;<52kF0-8GgUq z;NQD(6+3ZHl?9!W)ED%#B7oi>Rq9C%^qMM!^D@0Y?Lg=l0|5|`j|~l^eBrGE`x8E< zx2NOS&l%EG1c$HkP5)kt5O+;QMit6Hg6iCsCSC^BAA;p?mX=|-D2Of@W!>OoOh(9$ zK!?|LaM>l+0QDK)0za5+*mAc2fI-o>aF6HLGJjERO}lxv4PasqLzngEiTyF~uTPK? zVgB3BKGb5vykSPM!}aO9Wl65q5)uc!YNZUeaL`o-LYtbxDx#o&+2S=TyUUN1sJtES zpsKF!`ra^0=G6n}%PR`su5d!L(g{q4VIW$k50b~n1`xk5ibOj)Pn1+J54U@(jNh@t zOg_qZU+q#3G(s+>v|e0npSWEW8Mu}@T|3gxu0UZp-uPnFbIzW2(2X?F-x7R=bdZju76k#vPM)S{`8Ao6L9Rff2*-<5KR<41b9@WD4 z8b}QERYUS)H2P)lZedbf?0*t_vCd?C?hhzzcelV7C%z5_Wu5%1VVEbRI*Ccv&>yQl7a@9* zlslxfE`vog>mOiloM&He1QT4TLbC(nixipxAE1m?y$$|$!2V}`AyOu<=1%tsP_LtJ zq2{Y^5bh?ps7$poFYFD)uuJka)}@Prx@OT=NcUUa_45$(q(v|wh7qA<4B|RihfQ+2 zcE6Q5Rn31hY~r6x>Jr~~?_eu~XLPLaO4_`HQwcWDFu-7s@8dNmQ8N$vaqN%_qESOnR*si%0y@e z0}+AtdDyLO>8KtYI5gvsYGcwOzM6X&J@>@fnk|IBv<~c~OtLT6 zHB(^#L-laIqZ=l$S9t>WK{XP;`aAGyi8)vJ*W~C&1MIAVc~b;EJgIj2d&6dQtVM`c z=x}u6G#(T?!UnH}0mse_Wl6q9^o?rM);^}KxQR{QTD;uFE3_t}=x5J%f1tbOPn?S2 z=9JfOn@(Ph-?dfbCg)2Jy~al9jw!;Dt3;^U#?yC~e!P^7K%fM>lhB9BKLQZ&k($%j znfSQz_3^%!u99VR$jagpoI!uIrvI32CRu5yo86oL*WPyqV%djp8@Ejg+50vsdt~nsA)}HN zSsB?fQ@HINk-bupqGW|KON6BCl}*S_M(8~+rT6)Nd_TQk{?A9x-F5x0>-vr3JkH}d zPYB_#r|5ABJDXXf{d}48mXHd>zFmk582rHm z82?k2=>;`U?O(H*8yj&KsN{XK57sguAtN}!5v7tHGqb4ZXiI1+SE_n&%Qo(zoX^EI zSLL9MO{<%X9Ng;XSvP&ot)VF0P{IU}t^_dBBd>x!nfb+tF|7s0$`!m-6=6x+6q=mKzCIT&wFIvEWMM6A6hZ zG3QtO^wJ}EB=UpIK7v0{ymN~0lRQs{N+3xHlM?g57p?>LAd;3J0@<6ByL0X160WD^ zUTpmZMgs^i^7>tVXX#d=$1B+?oI|_>c^^oJiJ%_MFN-|UKTJ0q{axWTJ3z?ivM;>; zXV!`i$y(u85Ffg?{v=3YX&WG^n~J$!=l_|tQU(hBQ|=W-Q`NwMzQ5bRVa`WX1TOOr zW&gY9B_xYSHTp>7Pm29-0`UL!$9(l@P7{%q$~MNbQIBfA#~Ib=jM`-j_&l@%OR8U_Zf7dd2if4|qm0!;UefcLJsaC?oJVk~>=h*>RB6fEGZ$pt)^W z>5rg!5nsYj5c_O7fKXsX$-dV`5)gMcm@DH!FO`ooF9%M@dasQ5w=D}U?}?R;BF?MYvQe9Ao&#<)7LmFF4n@3z(Fr?>H0OxK z&&)!WDzvhoaqES8rJZ%SLN&59{E9r0Tj!V>qnk>qxQkf>ZX73GW2`u%qE5A; z+Cmic8CQJ}ryn$x!fSNzRfRc5D2h^jaCy@2_v`z-uY!Kn%Un$iL>&%*^%p8V(JsPEzwe}lqSn$l=r_)d-ld<#DZn%mDw1( zzq?7IC0+%t!gq>yGh48f2O9FCcM%l5QAcNANh-;8ijy5*A;(kMQnQPe8kt-1% zT+}$=3`LYU-_d6_qO=pnJ-ygXti>1c*!o~^$IXiZUxs44=(X1pJvFc4@fweL3cfSl z1v+=fykVo0*k{FIauKU@wc4Z{fM|N7FT3fPKg<%a;Wvsdn>G`|_u zrmWhn_QU%#69Z9K$T3lE9^xprzLWT->-m}w%TdOajz&vCHr3B2R-^yS83ml{D!F;W zqh$6eUQ0vG_NbT1w2wJ;QwRC43OMn-Nk94(#r7Cc>bx1zTevMA{zd31QtVlG+?KPs z>NN&+v2vK#8yEZ5zh9@Zc5&DU7-ex}h`TT|17p|gOVs{6IFN2xYP?Dl7B4bqFr{qf z`L7+*+oCj(_55cVCGlX(j?N6Z_nG|rlX|Wr;i;PT{KBFq|3(w9P)(uhk>b#9d)m^rBQ7f9@lQK zHBZ1!Od?|2*+JDSh_iErY|0cX1F|n|UODtb??9 zUe`Hua#}%Fu@IvdTswY0@;P+5F+V&U+LBIVXt*Eyj+`9#w7gkxcsJ`nh5WgBt}4v9pqLobnV5FZSNFE z7fj;ZnMd&vi-!Y=KZ}!=S*y~RRgn|9nV?Wf)Fo;AhT!*#;NLyTh$CL|bF3|;W+dG* z@4?lgL{zCJE78jIuTJL+#c79Qy3CI0h;+tRf83~de}MIC8H%IJ6^FFkX1s#!&wOvC z$8`ACq$P|4&TOOSUPNOPB@toojYhZ-n5CQ#HoAa(gOf_Em(rZ~Qel&xPMLG+(-?;& zA2G;zZyHv)S`@cbJ@mpMeIhz+D$TNWxPdp!DYIV6%fRT)(AGn4=`94kR0z}LUR3AV zNsB>%!QOgGJ!$>UM;GBMW$RClbw^XzX%)!aaN;{$G`WS8tx4ZzSu_qBJB%;u>xaV4 zL^`shU)l8{<@8s9_Yc3`o_umNN6a76*K04g5C^(gHSY-woX;{{lE(L|;23U#=>GMY z0@1dY0ige|l0=-e50eb+L+yA9%c-~gk$WW(V6-Bg(+cM{*H?C*^M-z=UpoN8DtzqV=c@_u7(A_ZxL*(BX_){zz-(sY%w|Xe zS}hDV_LH^yLx#c(_+p_@K;+Rk-)9b13rT!tZ+tvW^Vr)wsS~5@Y+NqFLQohy-kSNX zlek*Q>b**UB;9vpq!!>JCj+War^9D!v0nh`5x@6q>~~zRst3p$!X~1x?OA5^>a={y z5EqDV=6HC1hwOv_BHy;%r*F8?a|Z zwJ2iK5z*qxA8`672*?Hf_qKmjn6-;J4TZ=og=S*&-ry&H>Q|e~qta0;O)*md@+eDE%*A&=R>Kr0weVyW znBoxlgmuP}^X-6`D3Marb+ai5^+4q}wl^#O(22ADVKxtG`Qq>^m)zizQc^$J=&O(4 zQu7&8A;7N$sJBoEU#;$UhSE&_1Q`iosN>?95IR{OC9L1zqJ8#Ud8!V$IVq@MEX2e3 z3gBiqNV8-c?m(9!LO3?H8OsSZ-&3uMKD1e-3Z@gX6FHE$8>k2`rU}#`rb|g6p5VPb zK|&S~p(2*O{a(CIulQ=6xhI=d{n~qmmI59gM%|XMfP;;k2~Y&Y=%^v3Cw!)%gZtb5 zx}Oz!wv8E;i2FHu`P2Hjp=in94qmCHS8W%$4mW}e847RjD^kt`EsX=HBDBsyB=>oF z@kKwq>wk@^2Vy5jq`}8;SB-Q8p2SUh@UkPR$}8v5rs)rfZy^+*FPjC<-f1LTZuQ0( z5CjwVH2EVnpX>3qmUAblzo$-6WR*y}0Ki+j@D)p1Cw>-Dkuah3tmIe7Xf`1=Hof_;$v4`rf%t;Ms4}6|IWtw~fNo3A9(R+eAPBnwnDed|F=Z*JB z1MV`*vTF+O684K$m90wrg9V0oIFf7kB3nOVUGxMI{GDab55CpUkRB^f5fZSoW^bH8 zKB8sDQOGTgl3Sm~%*R6i_rd-%Bm#QsHuYYA=AAq56vR#+1x_QBdr^gBTYF@!Ay><~ z+Mwi$cg0Qj!LFwxto2W?R!Ta5GBTU=MVwC+8#k{++?ebA$!V(}+28h&+;npz=eJD2 zex{2|0CE2i?9)?qBe`FmvK*|2?@YN*^7RSz?jSve4&F4~_t170OKAG2+J7_X^#XwG zyZx=#^PlYKb^MOOcP2YAYRtaY(-*%hh4PYodJvbi31z9D zyBhOfV%eH`=TmVeU7U#$>%^by%~1LR_Jz z_W+^>*MpJgrmwGD$dRihu51n(Q{3HePQydw2>3qDqw- zw7g@F5qsh0@wdN&n2?1TNV#}Q4YrSlO%Hv(bNQ4Frcgeg020TGu_Wa?i@lLaSPJPT zNCfvw00@P`sUl~C{;+US>}UAMU9M{;MP!Bs_h=Y1@QvEDW&PfBrWvzym#okARNQl9 zHnebO3iENXzC`C3!4$IHEZ~2ltj*eo<@RdhHU}0yYAeZ++WP()TBNniHC?W>*?RlK zII>9B-qgsowocyRR?f1aVEd*0sIlNGiL?C92c1c;(h1bT&by2i!;4YZJqHb1OII_0 zn4Ib}JIw=ekV902VpytTymi~nptDZTJ* z3Gw&WZhcW2jjS2U(ei9d-=GQ~qc+=0#}sq+pN`6W!tiyoN82!x^ayj&Yb$&<4QA@Y z!Vxm*V8?8tQ`}-(NSS3M_`3-#3G4a3nyZkolVh8R5Vv1nc2+?rv?sN2T5dpVg$SiH z7jzZqJeYRc_dVQx%Zg?l}oX4MN}|vx1=ZE%huxqn#W6fuz7G2+JBr| z@u`usOfOB7#ry07yy>|IX)5Bu+J!XKcq)u}37AquBT{Zg3Wl9dZLcaPkGwicY>wG(;L{&-Sq?+;g zm1_P?$z#$KLUvubw+T3$PpxCK;5U82JP-+T!uxgNk_JUN!3g0jtq!)1|HJy--WQWx zQuZpQOLtl4RHds6#6RaFQS3SYfk0v>^xl>37=AEqDgXE+VuNXwy%3=(e-ty?(PFE5 zlR0mjeJB#1B1}`0d4;+uiPl){Vb^l64F87BpILz#b8Cxt0a{}1O}f8ho3@ehG4r34 zD#Dl&8-CZ)0zx^NF>A*Yji|BiYSM8lzjR?i+qsCA@vRG!Op4*#R|hryB!nXQ`sl8P zf6Ck-T8JrSUl;+O~27g1Z0+Em>uh9tqxor+>z7bz@4 zWtq_)yt`+E1$?d0N~I;8<@%jcCq%w8rr1QCpF#H!wQ;mg3U83$>N%9g5KPO}t>1s~ zuJ>K$v6Ba@a5RcILPpMw<*U;|<7R<&7Ck{S*Ms!7)0NhpUXxJxE>hE<+qz|5;SsN- z=d^dALCzTU*L8l=ns*MYUs#}`#lX&kz}<5BdOj)b9+vE)4N)$#AF45wGR@{;%o6U5 zMo=9=Wh(s7d7j&d>T_I8yYNG$6zRgH%4nxnnIivAjlQVG37rHGYZ{IyADJ38LiXh2 zrwwtxVBHanaOws zYcxCeg>@7|bJqw{=lPrN#hhlge8(KbK!T@~M9137zaCMqYq*xL{GF9Cd_dV1J=;p0 zn8q6;I?C8?O{VaUGGd>j?S$R{SxE{j;b01b9sP^OO`;IiO>~JqYxWn%(~eU^)_sYa zfld|8K*ceXRaw6z38oV+xhROopm^?8zQlFuytp(`9|{F?A;qE%CTSG!6|TLh$lp8a znG)o$wLdmYDi&ZpBj3uBqGwF0qTY?~W?U_K=xm)_ox;&ZVwCUjSiAk(=Z8~{d=U<* z!VjU{m=l#q^B%u>W{}uZ9oCw8#;#85dnZP;^wS*Ch8hE5pXkT-%I&*xvXavqFU5?E z4>oj%)C{pilQJp4IT*nB1O*mz4qB&n%Xn6=2$ChaV^8K1pJwS3d`Z(NGGPiYu@ zB8x}fagzlzmtx?I_G&-ds($ap7__F&YpEW%Gfu=k98rvotzuKX8p;>JP=qtU*6Q3x z+Sc+VsB6mbtD{khDel_kqRjM}zD=fR+&pR_&2XFtQR;JYFW<2e&byjlQ{WI;VrQ?_ zCG6MtcluQ#%gRBN%^tt0wk;%)v`8_pD!iuIsP^$!*va4%Y1!nTWA6m*=0Az$^rHRF zSAHUkRd}%3hke1GrikttMTF_Qvs=l$g7defgp)pqhTD&GWZ~wES-Wymr&|lKN1O7J zn6+XQiv^44y=_4+7P`~o?kncCj37tJWGX4t=F+sF9JWR5R(1hiXci&9>zB_D zHsbO7-TAVwc&vPtJ$8KdO;LEB^dg`2KC+WuW`iR{cG@}6y6q3!*I-RYAWf%aP^e&)4DaXAEaPAIC;iUhaUs0GEq z!5f5TRMvkH2~ZhBKzTTzr@V-If)#F|uirA#mepQ6esu1hi4V+sjl(c%;w*!M7@{*j za%$pAGpsE0p8t1ecI3|N_L3*hv(^@tvAcZD0P^GT(8>LZ>hS1ixKHpO-bFpv0!BkN zHEZ-A?u6orIK0(bJ>T4;fC>4Z^yd^lj|w_xbN(g64!N9Ll&-AT`JR}8j8^b^Thc1% zPQu_cmO}&(7C^eD-P-rz)4yi10dzJkGi>aFww;+>Z+tfzhAF$T8h}*_wfUGHiiF}r zzDOP0?IxPEuaQ zXayQpcjwpatJm!R_nmYJBEuaycb7SfW+Y-j?#}LXSyN~@B4`z&tV1Ff6^AIup>Rcw z!8?&49tS50H*r70OvErs3cG#Iw=Ne0-r+Q`$GWG37d-XvjC0_H0S@00#z%Mo~sK~j(aJhD2e28#`hpn zU`wutcub#X4lF#;W8%;^=*IB=|9pg9f*Y>rb|ml7QjGLrq7|{xV!df9f;>hQ(PvXy zBww>91w4V>1v(iYSE?T6@MEz!G|$>V2g8gfPQYFh32d9dRnR$G>}{`EEc6tv8KU+e z(Q+Iq!@4hM|K*VnuaP}!3&Ly0Pe`rYw@_A=UK%@Mw<%S^&2sM+NI=37-R_TtULLJf zK{j0-UJ;eWzT&BScY(AI2fvQ2O&_AJ-t#gzLRH8-{R2+vCgk@f$T1zFFMcuu`8)uT z+*@!UQb0-|Rel%Ys0Orw7fl;OtOFrPIS2n0h==`uA&sN1*~%x!|6CdlClenLgfo+8 z=MD)qFChqzS_9^s!X1%jCsSf^rf+=Qst_j7{9G;|*=6i!#Mdd%=VAy652);GO<#N2 zU%I*sM<1q0Ji+XG=PuNceaB8pFKUOE<2^F*7rr1QXu}ywT@M}HpP;7FJm{+*K_C|wMw3*ePc-LG*ljWO){MHEOnfz_vqR9$IqWWJtHGZ z#q0-!=Q*~I*w+1YqmrA#&==GKKh9m)ewbMVFtng(k71J3+5Nj zE1t{1M4}e0ANGU2Na1H%DMAr@*nJzRzKP_#r@Y^7f&PjG$dA}mC$#|=a5lKH{G0a# z27GVR3AUIN2JBcY8+>x%AxG}iK@|B}T=WNHbQomdb50qr!2WP5{-vk6KHI(D^q9_u z_h4NTcDBvsVrTp$(ZU}10-6xHb}16z36DgWmoB6H9g8?SqLhASrHVNjUp>YsA!w{E z2DKVf6_FtHE$B1ozyb5cE@2|~vbM~GJPT>t?Qsl&<()#cC`4XgT$A*7r-e^yhr)4~ zE+Xr#C5zRyQ;stKd93t9N70&Z%lb_FTNnt1GvH&p*Xo-VLvrt2A#(4|zt4|o`|^Us zvka^1U1gU9u2L`CrZ>P!6VN_sD8dt4F%1prND)U;k$f=vwa5HL!^@vHfsi}L^AWZ? zLyvj}PF;RfjrR(3&R0RP#(yWZf{lu};OcoAW4=G;s>oy_D3-mf8`Mu3b&!t)(NsSToNr+)3wxvP8YWf-&(V=Sz3k@D<-jEvT6TxjvyL zzpJ-@?B}UJ7gmTQwASz_P}H$`s?oFiyISkdxoQ7mxtF7>3Ka&s_1Kf4s>jBE(-h+Y z==j1x&8m}T>>I#1UbkFm_)X@tllgIRF%6edrxu396>p|Aa~U%;64i&`K80?M_(Co5 zh@F;uX#`*7voBEkRW9EE9!KBR_SyNO3%G=yze(SW9LS?hoUEJ<_p9;WMAjEQrk1Yw{Gz-ch+0rqXf$QfUhJNVr=7JPV4TW2Zct9l2e z?2T@pB`LsJge54$@$uHZIPcTgL}22KSftV-XjNFGs#}mvaD~6YU5vPOq1xYZON#M% z7PHc;VUPnTC!*>s>VBXct`I)l>DBFgb443@2ba*gUU{zz&jxP&8Q((r4sI(3e^CkCDt8)i2B+FG-cjBQCg+)0fHPl=M}EP>A8p{LBmsJS zNbpnqH)~ii8tB|_&yOhizcW(<+2!4*&2;~Bi!T74c#4~Aul?uW-h+)bg|V9Ee}{LN l(D>iC_xBC_|C@Wi32Gk_)7H5wJ;#Fo&Z}yxJXbOg`afpP$CUs8 literal 0 HcmV?d00001 diff --git a/examples/chain-of-density/Readme.md b/examples/chain-of-density/Readme.md new file mode 100644 index 0000000..6aac999 --- /dev/null +++ b/examples/chain-of-density/Readme.md @@ -0,0 +1,31 @@ +# Introduction + +This is a simple example which shows how to perform Chain Of Density summarization using GPT-3.5 and utilise the generated output to fine-tune a 3.5 model for production usage. All of our data referenced in this file is located [here](https://huggingface.co/datasets/ivanleomk/gpt4-chain-of-density) on hugging face + +Check out our blog post [here](https://jxnl.github.io/instructor/blog/2023/11/05/implementing-chain-of-density/) where we have a detailed explanation of the code and a [colab notebook](https://colab.research.google.com/drive/1iBkrEh2G5U8yh8RmI8EkWxjLq6zIIuVm?usp=sharing) walking you through how we perform our calculations. + +## Instructions + +1. First, install all of the required dependencies by running the command below. We recommend using a virtual environment to install these so that it does not affect your system installation. + +> We use NLTK to ensure that our summaries are of a certain token length. In order to do so, you'll need to download the `punkt` package to compute the token metrics. You can do so by running the command `nltk.download('punkt')` + +``` +pip3 install -r requirements.txt +``` + +2. Download the `test.csv` file and the `summarization.jsonl` file that you want to use for finetuning. We provide one with `20` examples, `50` examples and `100` examples to be used for testing. Let's now run a simple finetuning job with the following command. + +> Don't forget to set your `OPENAI_API_KEY` as an environment variable in your shell before running these commands + +``` +instructor jobs create-from-file summarization.jsonl +``` + +3. Once the job is complete, you'll end up with a new GPT 3.5 model that's capable of producing high quality summaries with a high entity density. You can run it by simply changing our `finetune.py` file's `instructions.distil` annotator as + +``` +@instructions.distil(model=,mode="dispatch") +def distil_summarization(text: str) -> GeneratedSummary: +// rest of code goes here +``` \ No newline at end of file diff --git a/examples/chain-of-density/chain_of_density.py b/examples/chain-of-density/chain_of_density.py new file mode 100644 index 0000000..706373a --- /dev/null +++ b/examples/chain-of-density/chain_of_density.py @@ -0,0 +1,151 @@ +from pydantic import BaseModel, Field, field_validator +from typing import List +import instructor +import nltk +from openai import OpenAI +import spacy + +client = instructor.patch(OpenAI()) +nlp = spacy.load("en_core_web_sm") + + +class InitialSummary(BaseModel): + """ + This is an initial summary which should be long ( 4-5 sentences, ~80 words) yet highly non-specific, containing little information beyond the entities marked as missing. Use overly verbose languages and fillers (Eg. This article discusses) to reach ~80 words. + """ + + summary: str = Field( + ..., + description="This is a summary of the article provided which is overly verbose and uses fillers. It should be roughly 80 words in length", + ) + + +class RewrittenSummary(BaseModel): + """ + This is a new, denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities. + + Guidelines + - Make every word count : Rewrite the previous summary to improve flow and make space for additional entities + - Never drop entities from the previous summary. If space cannot be made, add fewer new entities. + - The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article. + - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses" + - Missing entities can appear anywhere in the new summary + + An Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title. + """ + + summary: str = Field( + ..., + description="This is a new, denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities. It should have the same length ( ~ 80 words ) as the previous summary and should be easily understood without the Article", + ) + absent: List[str] = Field( + ..., + default_factory=list, + description="this is a list of Entities found absent from the new summary that were present in the previous summary", + ) + missing: List[str] = Field( + default_factory=list, + description="This is a list of 1-3 informative Entities from the Article that are missing from the new summary which should be included in the next generated summary.", + ) + + @field_validator("summary") + def min_entity_density(cls, v: str): + # We want to make sure we have a minimum density of 0.12 whenever we do a rewrite. This ensures that the summary quality is always going up + tokens = nltk.word_tokenize(v) + num_tokens = len(tokens) + + # Extract Entities + doc = nlp(v) + num_entities = len(doc.ents) + + density = num_entities / num_tokens + if density < 0.08: + raise ValueError( + f"The summary of {v} has too few entities. Please regenerate a new summary with more new entities added to it. Remember that new entities can be added at any point of the summary." + ) + + return v + + @field_validator("summary") + def min_length(cls, v: str): + tokens = nltk.word_tokenize(v) + num_tokens = len(tokens) + if num_tokens < 60: + raise ValueError( + "The current summary is too short. Please make sure that you generate a new summary that is around 80 words long." + ) + return v + + @field_validator("missing") + def has_missing_entities(cls, missing_entities: List[str]): + if len(missing_entities) == 0: + raise ValueError( + "You must identify 1-3 informative Entities from the Article which are missing from the previously generated summary to be used in a new summary" + ) + return missing_entities + + @field_validator("absent") + def has_no_absent_entities(cls, absent_entities: List[str]): + absent_entity_string = ",".join(absent_entities) + if len(absent_entities) > 0: + print(f"Detected absent entities of {absent_entity_string}") + raise ValueError( + f"Do not omit the following Entities {absent_entity_string} from the new summary" + ) + return absent_entities + + +def summarize_article(article: str, summary_steps: int = 3): + summary_chain = [] + # We first generate an initial summary + summary: InitialSummary = client.chat.completions.create( + model="gpt-4-0613", + response_model=InitialSummary, + messages=[ + { + "role": "system", + "content": "Write a summary about the article that is long (4-5 sentences) yet highly non-specific. Use overly, verbose language and fillers(eg.,'this article discusses') to reach ~80 words. ", + }, + {"role": "user", "content": f"Here is the Article: {article}"}, + { + "role": "user", + "content": "The generated summary should be about 80 words.", + }, + ], + max_retries=2, + ) + summary_chain.append(summary.summary) + for i in range(summary_steps): + new_summary: RewrittenSummary = client.chat.completions.create( + model="gpt-4-0613", + messages=[ + { + "role": "system", + "content": f""" + Article: {article} + You are going to generate an increasingly concise,entity-dense summary of the following article. + + Perform the following two tasks + - Identify 1-3 informative entities from the following article which is missing from the previous summary + - Write a new denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities + + Guidelines + - Make every word count: re-write the previous summary to improve flow and make space for additional entities + - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses". + - The summaries should become highly dense and concise yet self-contained, e.g., easily understood without the Article. + - Missing entities can appear anywhere in the new summary + - Never drop entities from the previous summary. If space cannot be made, add fewer new entities. + """, + }, + { + "role": "user", + "content": f"Here is the previous summary: {summary_chain[-1]}", + }, + ], + max_retries=5, + max_tokens=1000, + response_model=RewrittenSummary, + ) + summary_chain.append(new_summary.summary) + + return summary_chain diff --git a/examples/chain-of-density/finetune.py b/examples/chain-of-density/finetune.py new file mode 100644 index 0000000..45d509c --- /dev/null +++ b/examples/chain-of-density/finetune.py @@ -0,0 +1,48 @@ +from typing import List +from chain_of_density import summarize_article +import csv +import logging +import instructor +from pydantic import BaseModel, Field + +logging.basicConfig(level=logging.INFO) + +instructions = instructor.Instructions( + name="Chain Of Density", + finetune_format="messages", + # log handler is used to save the data to a file + # you can imagine saving it to a database or other storage + # based on your needs! + log_handlers=[logging.FileHandler("generated.jsonl")], +) + + +class GeneratedSummary(BaseModel): + """ + This represents a highly concise summary that includes as many entities as possible from the original source article. + + An Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title. + + Guidelines + - Make every word count + - The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article. + - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses" + """ + + summary: str = Field( + ..., + description="This represents the final summary generated that captures the meaning of the original article which is as concise as possible. ", + ) + + +@instructions.distil +def distil_summarization(text: str) -> GeneratedSummary: + summary_chain: List[str] = summarize_article(text) + return GeneratedSummary(summary=summary_chain[-1]) + + +with open("test.csv", "r") as file: + reader = csv.reader(file) + next(reader) # Skip the header + for article, summary in reader: + distil_summarization(article) diff --git a/examples/chain-of-density/requirements.txt b/examples/chain-of-density/requirements.txt new file mode 100644 index 0000000..8cc8d88 --- /dev/null +++ b/examples/chain-of-density/requirements.txt @@ -0,0 +1,5 @@ +openai +pydantic +instructor +nltk +rich \ No newline at end of file diff --git a/examples/chain-of-density/run.py b/examples/chain-of-density/run.py deleted file mode 100644 index 0dfbf80..0000000 --- a/examples/chain-of-density/run.py +++ /dev/null @@ -1,230 +0,0 @@ -import instructor -from openai import OpenAI - -from pydantic import BaseModel, Field - -from pprint import pprint -from typing import List - -client = instructor.patch(OpenAI()) - - -class Summary(BaseModel): - """Represents a summary entry in the list. - - Guidelines: - - The first summary should be long (4-5 sentences, ~80 words) yet highly non-specific, - containing little information beyond the entities marked as missing. Use overly verbose - language and fillers (e.g., "this article discusses") to reach ~80 words. - - Make every word count: rewrite the previous summary to improve flow and make space for - additional entities. - - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses." - - The summaries should become highly dense and concise yet self-contained, i.e., easily understood - without the article. - - Missing entities can appear anywhere in the new summary. - - Never drop entities from the previous summary. If space cannot be made, add fewer new entities. - """ - - index: int = Field(..., description="Index of the summary in the chain.") - denser_summary: str = Field(..., description="Concise yet self-contained summary.") - included_entities: List[str] = Field( - ..., description="Correct list of Entities found in the summary." - ) - missing_entities: List[str] = Field( - ..., - description="Correct list of Entities found absent from the summary that should be included in the next summary attempt.", - ) - - -# This multitask helper will be used to generate a chain of summaries. -# Allows us to extract data via streaming to see resuls faster -ChainOfDenseSummaries = instructor.MultiTask( - Summary, - name="chain-of-dense-summaries", - description=""" - Repeat the following 2 steps 5 times. - - Step 1. Identify 1-3 informative entities (";" delimited) from the article which are missing from the previously generated summary. - - Step 2. Write a new, denser summary of identical length which covers every entity and detail from the previous summary plus the missing entities. - - A missing entity is: - - - relevant to the main story, - - specific yet concise (5 words or fewer), - - novel (not in the previous summary), - - faithful (present in the article), - - anywhere (can be located anywhere in the article). - - Remember, use the exact same number of words for each summary.""", -) - - -def summarize_article(article: str, n_summaries: int = 5, stream: bool = True): - completion = client.chat.completions.create( - model="gpt-3.5-turbo-16k", - stream=stream, - messages=[ - { - "role": "system", - "content": """Summarize the following article with {n_summary} chain of summaries with increasing density:""", - }, - {"role": "user", "content": article}, - ], - functions=[ChainOfDenseSummaries.openai_schema], - function_call={"name": ChainOfDenseSummaries.openai_schema["name"]}, - ) - if stream: - return ChainOfDenseSummaries.from_streaming_response(completion) - return ChainOfDenseSummaries.from_response(completion) - - -if __name__ == "__main__": - example = { - "text": "The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 10295.35 is added to the Public Contract Code, to read:\n10295.35.\n(a) (1) Notwithstanding any other law, a state agency shall not enter into any contract for the acquisition of goods or services in the amount of one hundred thousand dollars ($100,000) or more with a contractor that, in the provision of benefits, discriminates between employees on the basis of an employee’s or dependent’s actual or perceived gender identity, including, but not limited to, the employee’s or dependent’s identification as transgender.\n(2) For purposes of this section, “contract” includes contracts with a cumulative amount of one hundred thousand dollars ($100,000) or more per contractor in each fiscal year.\n(3) For purposes of this section, an employee health plan is discriminatory if the plan is not consistent with Section 1365.5 of the Health and Safety Code and Section 10140 of the Insurance Code.\n(4) The requirements of this section shall apply only to those portions of a contractor’s operations that occur under any of the following conditions:\n(A) Within the state.\n(B) On real property outside the state if the property is owned by the state or if the state has a right to occupy the property, and if the contractor’s presence at that location is connected to a contract with the state.\n(C) Elsewhere in the United States where work related to a state contract is being performed.\n(b) Contractors shall treat as confidential, to the maximum extent allowed by law or by the requirement of the contractor’s insurance provider, any request by an employee or applicant for employment benefits or any documentation of eligibility for benefits submitted by an employee or applicant for employment.\n(c) After taking all reasonable measures to find a contractor that complies with this section, as determined by the state agency, the requirements of this section may be waived under any of the following circumstances:\n(1) There is only one prospective contractor willing to enter into a specific contract with the state agency.\n(2) The contract is necessary to respond to an emergency, as determined by the state agency, that endangers the public health, welfare, or safety, or the contract is necessary for the provision of essential services, and no entity that complies with the requirements of this section capable of responding to the emergency is immediately available.\n(3) The requirements of this section violate, or are inconsistent with, the terms or conditions of a grant, subvention, or agreement, if the agency has made a good faith attempt to change the terms or conditions of any grant, subvention, or agreement to authorize application of this section.\n(4) The contractor is providing wholesale or bulk water, power, or natural gas, the conveyance or transmission of the same, or ancillary services, as required for ensuring reliable services in accordance with good utility practice, if the purchase of the same cannot practically be accomplished through the standard competitive bidding procedures and the contractor is not providing direct retail services to end users.\n(d) (1) A contractor shall not be deemed to discriminate in the provision of benefits if the contractor, in providing the benefits, pays the actual costs incurred in obtaining the benefit.\n(2) If a contractor is unable to provide a certain benefit, despite taking reasonable measures to do so, the contractor shall not be deemed to discriminate in the provision of benefits.\n(e) (1) Every contract subject to this chapter shall contain a statement by which the contractor certifies that the contractor is in compliance with this section.\n(2) The department or other contracting agency shall enforce this section pursuant to its existing enforcement powers.\n(3) (A) If a contractor falsely certifies that it is in compliance with this section, the contract with that contractor shall be subject to Article 9 (commencing with Section 10420), unless, within a time period specified by the department or other contracting agency, the contractor provides to the department or agency proof that it has complied, or is in the process of complying, with this section.\n(B) The application of the remedies or penalties contained in Article 9 (commencing with Section 10420) to a contract subject to this chapter shall not preclude the application of any existing remedies otherwise available to the department or other contracting agency under its existing enforcement powers.\n(f) Nothing in this section is intended to regulate the contracting practices of any local jurisdiction.\n(g) This section shall be construed so as not to conflict with applicable federal laws, rules, or regulations. In the event that a court or agency of competent jurisdiction holds that federal law, rule, or regulation invalidates any clause, sentence, paragraph, or section of this code or the application thereof to any person or circumstances, it is the intent of the state that the court or agency sever that clause, sentence, paragraph, or section so that the remainder of this section shall remain in effect.\nSEC. 2.\nSection 10295.35 of the Public Contract Code shall not be construed to create any new enforcement authority or responsibility in the Department of General Services or any other contracting agency.\nSEC. 3.\nNo reimbursement is required by this act pursuant to Section 6 of Article XIII\u2009B of the California Constitution because the only costs that may be incurred by a local agency or school district will be incurred because this act creates a new crime or infraction, eliminates a crime or infraction, or changes the penalty for a crime or infraction, within the meaning of Section 17556 of the Government Code, or changes the definition of a crime within the meaning of Section 6 of Article XIII\u2009B of the California Constitution.", - } - - # Generate a chain of summaries, however we can also stream the results - # to see the results faster - for summary in summarize_article(example["text"]): - pprint(summary.model_dump()) - - """ - {'denser_summary': 'State agencies in California cannot enter into contracts ' - 'worth $100,000 or more with contractors that discriminate ' - 'in benefits based on gender identity. The requirement ' - 'applies to contractors operating within the state, on ' - 'state-owned or occupied property outside the state, and ' - 'elsewhere in the United States where work related to a ' - 'state contract is being performed. Contractors must treat ' - 'employee benefit requests and eligibility documentation as ' - 'confidential. Exceptions to the requirement can be made in ' - 'certain circumstances. Contractors can avoid being seen as ' - 'discriminatory if they pay the actual costs of benefits or ' - 'if they are unable to provide certain benefits despite ' - 'reasonable efforts. Contracts must include a certification ' - 'of compliance with the requirement.', - 'included_entities': ['California', - 'contracts', - 'discrimination', - 'benefits', - 'gender identity', - 'state agencies', - 'state-owned property', - 'confidential', - 'exceptions'], - 'index': 0, - 'missing_entities': []} - {'denser_summary': 'State agencies in California cannot enter into contracts ' - 'worth $100,000 or more with contractors that discriminate ' - 'in benefits based on gender identity. The requirement ' - 'applies to contractors operating within the state, on ' - 'state-owned or occupied property outside the state, and ' - 'elsewhere in the United States where work related to a ' - 'state contract is being performed. Contractors must treat ' - 'employee benefit requests and eligibility documentation as ' - 'confidential. Exceptions to the requirement can be made in ' - 'certain circumstances, such as when there is only one ' - 'prospective contractor available or when the contract is ' - 'necessary to respond to an emergency. Contractors can ' - 'avoid being seen as discriminatory if they pay the actual ' - 'costs of benefits or if they are unable to provide certain ' - 'benefits despite reasonable efforts. Contracts must ' - 'include a certification of compliance with the ' - 'requirement, and false certification can result in ' - 'penalties.', - 'included_entities': ['California', - 'contracts', - 'discrimination', - 'benefits', - 'gender identity', - 'state agencies', - 'state-owned property', - 'confidential', - 'exceptions', - 'prospective contractor', - 'emergency', - 'actual costs', - 'penalties'], - 'index': 1, - 'missing_entities': ['availability', 'false certification']} - {'denser_summary': 'State agencies in California are prohibited from entering ' - 'into contracts worth $100,000 or more with contractors ' - 'that discriminate in benefits based on gender identity. ' - 'This requirement applies to contractors operating within ' - 'the state, on state-owned or occupied property outside the ' - 'state, and elsewhere in the United States where work ' - 'related to a state contract is being performed. ' - 'Contractors must keep employee benefit requests and ' - 'eligibility documentation confidential. There are ' - 'exceptions to this requirement, such as when there is only ' - 'one available contractor or when an emergency situation ' - 'requires immediate contracting. Contractors can avoid ' - 'being seen as discriminatory by paying the actual costs of ' - 'benefits or if they are unable to provide certain benefits ' - 'despite reasonable efforts. Contracts must include a ' - 'certification of compliance with this requirement, and ' - 'false certification can lead to penalties and the ' - 'application of other existing remedies.', - 'included_entities': ['California', - 'contracts', - 'discrimination', - 'benefits', - 'gender identity', - 'state agencies', - 'state-owned property', - 'confidential', - 'exceptions', - 'contractors', - 'availability', - 'emergency', - 'actual costs', - 'false certification', - 'penalties'], - 'index': 2, - 'missing_entities': ['contracting practices', 'federal laws']} - {'denser_summary': 'State agencies in California are prohibited from entering ' - 'into contracts worth $100,000 or more with contractors ' - 'that discriminate in benefits based on gender identity. ' - 'This requirement applies to contractors operating within ' - 'the state, on state-owned or occupied property outside the ' - 'state, and elsewhere in the United States where work ' - 'related to a state contract is being performed. ' - 'Contractors must keep employee benefit requests and ' - 'eligibility documentation confidential. There are ' - 'exceptions to this requirement, such as when there is only ' - 'one available contractor or when an emergency situation ' - 'requires immediate contracting. Contractors can avoid ' - 'being seen as discriminatory by paying the actual costs of ' - 'benefits or if they are unable to provide certain benefits ' - 'despite reasonable efforts. Contracts must include a ' - 'certification of compliance with this requirement, and ' - 'false certification can lead to penalties and the ' - 'application of other existing remedies. This section of ' - 'the Public Contract Code does not regulate the contracting ' - 'practices of local jurisdictions, and it is intended to be ' - 'consistent with applicable federal laws, rules, and ' - 'regulations.', - 'included_entities': ['California', - 'contracts', - 'discrimination', - 'benefits', - 'gender identity', - 'state agencies', - 'state-owned property', - 'confidential', - 'exceptions', - 'contractors', - 'availability', - 'emergency', - 'actual costs', - 'false certification', - 'penalties', - 'Public Contract Code', - 'local jurisdictions', - 'federal laws', - 'federal rules', - 'federal regulations'], - 'index': 3, - 'missing_entities': []} - """ diff --git a/mkdocs.yml b/mkdocs.yml index b6de72e..036bfe8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -12,6 +12,20 @@ theme: repo: fontawesome/brands/github edit: material/pencil view: material/eye + theme: + admonition: + note: octicons/tag-16 + abstract: octicons/checklist-16 + info: octicons/info-16 + tip: octicons/squirrel-16 + success: octicons/check-16 + question: octicons/question-16 + warning: octicons/alert-16 + failure: octicons/x-circle-16 + danger: octicons/zap-16 + bug: octicons/bug-16 + example: octicons/beaker-16 + quote: octicons/quote-16 features: - announce.dismiss - content.action.edit @@ -59,6 +73,7 @@ theme: markdown_extensions: - abbr - admonition + - pymdownx.details - attr_list - def_list - footnotes From cf23edfe7d33f771a004df05bf7b8d5ae5bb008d Mon Sep 17 00:00:00 2001 From: Ivan Leo Date: Mon, 13 Nov 2023 23:30:47 +0800 Subject: [PATCH 03/40] Chain of density edits (#171) --- docs/blog/posts/chain-of-density.md | 133 ++++++++++++++++++++------ examples/chain-of-density/finetune.py | 4 + 2 files changed, 109 insertions(+), 28 deletions(-) diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index 5e07816..b39f1f6 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -15,9 +15,9 @@ authors: # Better Summaries by Finetuning Chain of Density -> Discover how to distil an interative method like chain of density into a single finetune. +> Discover how to distil an iterative method like chain of Chain Of Density into a single finetune. -In this article, we'll guide you through implementing the original Chain of Density method using Instructor, then show how to distile a GPT 3.5 model to match GPT-4's iterative summarization capabilities. Using these methods were able to increase latency by 20x, reduce costs by 50x and maintain entity density. +In this article, we'll guide you through implementing the original Chain of Density method using Instructor, then show how to distile a GPT 3.5 model to match GPT-4's iterative summarization capabilities. Using these methods were able to decrease latency by 20x, reduce costs by 50x and maintain entity density. By the end you'll end up with a GPT 3.5 model, (fine-tuned using Instructor's great tooling), capable of producing summaries that rival the effectiveness of Chain of Density. As always, all code is readily available in our `examples/chain-of-density` folder in our repo for your reference. @@ -27,7 +27,7 @@ By the end you'll end up with a GPT 3.5 model, (fine-tuned using Instructor's gr ## Part 1) Chain of Density -Summarizing extensive texts with AI can be challenging, often relying on inconsistent techniques. Salesforce AI Research's novel method, chain of density, enhances AI-based text summarization, outperforming human-generated summaries. +Summarizing extensive texts with AI can be challenging, often relying on inconsistent techniques. Salesforce AI Research's novel method, Chain Of Density, enhances AI-based text summarization, outperforming human-generated summaries. Initially, an AI produces a summary, then refines it through multiple iterations, adding missing article entities. Each iteration adds new article entities to the summary, keeping length consistent, leading to an entity-dense, informative summary called Chain Of Density. @@ -39,7 +39,7 @@ First introduced by Salesforce's AI Research wing in their paper - [From Sparse ### Original Prompt -We can implement the original prompt using `pip install instructor` by breaking down the entire process into smaller api calls. This allows us to introduce validation at each step to ensure that we're getting the results that we want. +We can break down the original process into smaller api calls. This allows us to introduce validation at each step to ensure that we're getting the results that we want. ??? note "Original Chain of Density Prompt" @@ -92,11 +92,71 @@ We can implement the original prompt using `pip install instructor` by breaking ### Data Modelling +Before we begin modelling the data, let's make sure we install all of our dependencies + +``` +pip install instructor aiohttp rich +``` + #### Initial Summary Let's start by walking through some of the data models that we'll be using as the `response_model` for our open ai function calls -Firstly, we'll need a data model for the initial summary that we will be generating. We'll take the description of this class straight from the original prompt. Its important to note that these docstrings serve a purpose, they are directly used by the LLM when generating the outputs. +Firstly, we'll need a data model for the initial summary that we will be generating. We'll take the description of this class straight from the original prompt. It's important to note that these docstrings serve a purpose, they are **directly used by the LLM when generating the outputs**. + +??? note "A quick note on Docstrings" + + Under the hood, Instructor parses the `response_model` that you give us into a function call for OpenAI to execute. This means that the final output will be closely linked to the Pydantic model you specify. + + For instance, this simple model that we later use in fine-tuning. + + ```py + class GeneratedSummary(BaseModel): + """ + This represents a highly concise summary that includes as many entities as possible from the original source article. + + An Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title. + + Guidelines + - Make every word count + - The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article. + - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses" + """ + + summary: str = Field( + ..., + description="This represents the final summary generated that captures the meaning of the original article which is as concise as possible. ", + ) + ``` + + We eventually transform it into an OpenAI function call as seen below. + + ``` + { + "functions": [ + { + "name": "GeneratedSummary", + "description": "This represents a highly concise summary that includes as many entities as possible from the original source article.\n\nAn Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title.\n\nGuidelines\n- Make every word count\n- The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article.\n- Make space with fusion, compression, and removal of uninformative phrases like \"the article discusses\"", + "parameters": { + "properties": { + "summary": { + "description": "This represents the final summary generated that captures the meaning of the original article which is as concise as possible. ", + "title": "Summary", + "type": "string" + } + }, + "required": [ + "summary" + ], + "type": "object" + } + } + ] + } + } + ``` + + Therefore this means that the more elaborate and detailed your descriptions are, the better the outputs you will be able to get back. But we don't just stop there, since it's all Pydantic under the hood, you can validate and parse the resulting output to make sure it is **exactly what you specify**. It's all python all the way down. ```py class InitialSummary(BaseModel): @@ -306,7 +366,7 @@ def summarize_article(article: str, summary_steps: int = 3): 4. If you've chosen a value that is larger than 0.08, make sure to increase this value in case you need to do multiple rewrites -This summarization function yields a result which triples the number of entities while mantaining the same number of tokens. We can also see that stylistically, the summary is a lot more natural. +This summarization function yields a result which triples the number of entities while maintaining the same number of tokens. We can also see that stylistically, the summary is a lot more natural. **First Iteration** @@ -318,29 +378,33 @@ This summarization function yields a result which triples the number of entities ## Part 2) Fine-Tuning -In this section, we'll look into how to fine-tune a GPT 3.5 model so that it is able to perform at an equivalent level as a GPT-4 model. We'll then compare the performance of our model against that of `GPT-4` and `GPT-4-Turbo` to see how it stacks up. +In this section, we'll look into how to fine-tune a GPT 3.5 model so that it is able to perform at an equivalent level as a GPT-4 model. We'll then compare the performance of our model against that of `GPT-4` to see how it stacks up. ### Creating a Training Set -Let's first segregate our train and test set so that we don't have any sort of contamination - this corresponds to our `train.csv` and `test.csv` in our [Hugging Face Dataset](https://huggingface.co/datasets/ivanleomk/gpt4-chain-of-density). Now, we just need to import the `Instructions` module from the `Instructor` package which allows you to generate a nicely formatted `.jsonl` file to be used for fine-tuning +In order to prevent any contamination of data during testing, we randomly sampled 120 articles from the `griffin/chain-of-density` dataset and split these articles into a `train.csv` and a `test.csv` file which we uploaded to [Hugging Face](https://huggingface.co/datasets/ivanleomk/gpt4-chain-of-density). Now, we just neeed to import the `Instructions` module from the `Instructor` package which allows you to generate a nicely formatted `.jsonl` file to be used for fine-tuning -```py hl_lines="2 9 11-18 37 40" +```py hl_lines="2 9 11 13-21 40 43" from typing import List from chain_of_density import summarize_article #(1)! import csv import logging import instructor from pydantic import BaseModel +from openai import OpenAI -logging.basicConfig(level=logging.INFO) #(2)! +client = instructor.patch(OpenAI()) # (2)! -instructions = instructor.Instructions( #(3)! +logging.basicConfig(level=logging.INFO) #(3)! + +instructions = instructor.Instructions( #(4)! name="Chain Of Density", finetune_format="messages", # log handler is used to save the data to a file # you can imagine saving it to a database or other storage # based on your needs! log_handlers=[logging.FileHandler("generated.jsonl")], + openai_client=client, ) class GeneratedSummary(BaseModel): @@ -376,15 +440,17 @@ with open("train.csv", "r") as file: 1. In this example, we're using the summarize_article that we defined up above. We saved it in a local file called `chain_of_density.py`, hence the import -2. We also need to configure logging at the `INFO` level. This is very important, if this is not configured, your output will not be generated. +2. We patch the default OpenAI client so that we can use the Instructor library with it -3. We instantiate a `Instruction` object which will help us handle the conversion of our function calls into a valid `.jsonl` file. We also define +3. We also need to configure logging at the `INFO` level. This is very important, if this is not configured, your output will not be generated. + +4. We instantiate a `Instruction` object which will help us handle the conversion of our function calls into a valid `.jsonl` file. We also define the name of the `.jsonl` file in the `log_handlers` parameter -4. We add in an `instructions.distil` annotation so that we automatically capture the input and output of the function we'd like to +5. We add in an `instructions.distil` annotation so that we automatically capture the input and output of the function we'd like to fine-tune our model to output -5. We return a `Pydantic` object which matches the annotation that we use on our function. Note that we must specify a `Pydantic` object to +6. We return a `Pydantic` object which matches the annotation that we use on our function. Note that we must specify a `Pydantic` object to be returned when using the `instructions.distil` annotation !!! warning "Rate Limiting" @@ -421,23 +487,32 @@ With that, you've now got your own fine-tuned model ready to go and serve data i ## Results and Benchmarks -We fine-tuned a total of 3 different models, giving each 20, 50 and 76 samples respectively to see if more data improved the models. We then compared the output of these fine tuned models to GPT-4 and GPT-3 summaries that were generated using chain-of-density methods. - -We'll be comparing these models in three main ways +We'l be comparing the following models in 3 ways using 20 articles that were not used for fine-tuning. - Entity Density : This is entities per token, the higher the better for density. - Latency : Time to last token generated in seconds -- Costs : How much does the entire experiment cost +- Costs : Total cost to generate outputs - we break down the cost into training and inference costs for easy reference -We used a total of 20 articles as a validation set which our fine tuned models had not seen before. This was the overall performance that we observed. +`3.5 Finetuned (n) ` -| Model | Mean Latency (s) | Mean Entity Count | Mean Entity Density | Tokens | -| ------------------- | ---------------- | ----------------- | ------------------- | ------ | -| GPT-4 (COD) | 49.5 | 11.3 | 0.138 | 81.65 | -| GPT-3 (COD) | 145.94 | 11.05 | 0.105 | 105.7 | -| 3.5 Finetuned (20) | 2.25 | 14.7 | 0.154 | 95.45 | -| 3.5 Finetuned (50) | 2.09 | 12.4 | 0.140 | 88.35 | -| 3.5 Finetuned (76) | 2.17 | 11.65 | 0.142 | 82.05 | +: This is a GPT 3.5 model that we fine-tuned on `n` examples. Each model was finetuned for 4-5 epochs ( This was automatically decided by the OpenAI scheduler ) + +`GPT-4 (COD)` + +: This is a GPT4 model which we applied 3 rounds of Chain Of Density rewrites to generate a summary with using the methodology above + +`GPT-3 (Vanilla)` + +: This is a GPT 3.5 model that we asked to generate entity-dense summaries which were concise. Summaries were generated in a single pass + + +| Model | Mean Latency (s) | Mean Entity Count | Mean Entity Density | Mean Tokens | +| ------------------- | ---------------- | ----------------- | ------------------- | ----------- | +| GPT-4 (COD) | 49.5 | 11.3 | 0.138 | 81.65 | +| GPT-3.5 (Vanilla) | 16.8 | 11.95 | 0.122 | 98.35 | +| 3.5 Finetuned (20) | 2.25 | 14.7 | 0.154 | 95.45 | +| 3.5 Finetuned (50) | 2.09 | 12.4 | 0.140 | 88.35 | +| 3.5 Finetuned (76) | 2.17 | 11.65 | 0.142 | 82.05 | ??? notes "Finetuning Datasets" @@ -455,11 +530,13 @@ Using the OpenAI Usage Dashboard, we can calculate the cost of generating 20 sum | 3.5 Finetuned (50) | 1.368 | 0.165 | 49,057 | 1.266 | | 3.5 Finetuned (76) | 1.824 | 0.174 | 51,583 | 2.481 | | GPT-4 (COD) | - | 12.9 | 409,062 | 12.9 | -| GPT-3 (COD) | - | 0.45 | 290,164 | 0.45 | +| GPT-3.5 (Vanilla) | - | 0.20 | 51,162 | 0.2 | Here, we can see that `GPT-4` has an approximate inference cost of `0.65` per summary while our finetuned models have an inference cost of `0.0091` per summary which is ~ `72x` cheaper. +Interestingly, the model finetuned with the least examples seems to outperform the others. While the reason for this is unknown, a few potential reasons could be that either we didn't train for sufficient epochs ( We chose the default 5 epochs ) or that the models started learning to imitate other behaviour such as more abstract writing styles from the larger variety of samples, resulting in a decrease in entity density. + ## Conclusions Finetuning this iterative method was 20-40x faster while improving overall performance, resulting in massive efficiency gains by finetuning and distilling capabilities into specialized models. diff --git a/examples/chain-of-density/finetune.py b/examples/chain-of-density/finetune.py index 45d509c..85dbce5 100644 --- a/examples/chain-of-density/finetune.py +++ b/examples/chain-of-density/finetune.py @@ -1,4 +1,5 @@ from typing import List +from openai import OpenAI from chain_of_density import summarize_article import csv import logging @@ -7,6 +8,8 @@ from pydantic import BaseModel, Field logging.basicConfig(level=logging.INFO) +client = instructor.patch(OpenAI()) + instructions = instructor.Instructions( name="Chain Of Density", finetune_format="messages", @@ -14,6 +17,7 @@ instructions = instructor.Instructions( # you can imagine saving it to a database or other storage # based on your needs! log_handlers=[logging.FileHandler("generated.jsonl")], + openai_client=client, ) From 38c8959631ba26ec65cee380d873bf10c383e5be Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 11:02:01 -0500 Subject: [PATCH 04/40] update --- docs/blog/posts/chain-of-density.md | 54 ++++++++++++++--------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index b39f1f6..74cc176 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -13,11 +13,11 @@ authors: - jxnl --- -# Better Summaries by Finetuning Chain of Density +# Finetuning Faster Smarter Summaries with OpenAI's GPT-3.5-turbo -> Discover how to distil an iterative method like chain of Chain Of Density into a single finetune. +> Discover how to distil an iterative method like Chain Of Density into a single finetuned model using Instructor -In this article, we'll guide you through implementing the original Chain of Density method using Instructor, then show how to distile a GPT 3.5 model to match GPT-4's iterative summarization capabilities. Using these methods were able to decrease latency by 20x, reduce costs by 50x and maintain entity density. +In this article, we'll guide you through implementing the original Chain of Density method using Instructor, then show how to distile a GPT 3.5 model to match GPT-4's iterative summarization capabilities. Using these methods were able to decrease latency by 20x, reduce costs by 50x and maintain entity density. By the end you'll end up with a GPT 3.5 model, (fine-tuned using Instructor's great tooling), capable of producing summaries that rival the effectiveness of Chain of Density. As always, all code is readily available in our `examples/chain-of-density` folder in our repo for your reference. @@ -106,7 +106,7 @@ Firstly, we'll need a data model for the initial summary that we will be generat ??? note "A quick note on Docstrings" - Under the hood, Instructor parses the `response_model` that you give us into a function call for OpenAI to execute. This means that the final output will be closely linked to the Pydantic model you specify. + Under the hood, Instructor parses the `response_model` that you give us into a function call for OpenAI to execute. This means that the final output will be closely linked to the Pydantic model you specify. For instance, this simple model that we later use in fine-tuning. @@ -156,7 +156,7 @@ Firstly, we'll need a data model for the initial summary that we will be generat } ``` - Therefore this means that the more elaborate and detailed your descriptions are, the better the outputs you will be able to get back. But we don't just stop there, since it's all Pydantic under the hood, you can validate and parse the resulting output to make sure it is **exactly what you specify**. It's all python all the way down. + Therefore this means that the more elaborate and detailed your descriptions are, the better the outputs you will be able to get back. But we don't just stop there, since it's all Pydantic under the hood, you can validate and parse the resulting output to make sure it is **exactly what you specify**. It's all python all the way down. ```py class InitialSummary(BaseModel): @@ -270,7 +270,7 @@ def has_no_absent_entities(cls, absent_entities: List[str]): 1. Similar to the original paper, we utilize the `NLTK` word tokenizer to count the number of tokens within our generated sentences. We aim for at least 60 tokens in our generated summary so that we don't lose information. -2. We also use the spaCy library to calculate the entity density of the generated summary. +2. We also use the spaCy library to calculate the entity density of the generated summary. 3. We also implement a minimum entity density so that we stay within a given range. 0.08 is arbitrarily chosen in this case @@ -495,47 +495,45 @@ We'l be comparing the following models in 3 ways using 20 articles that were not `3.5 Finetuned (n) ` -: This is a GPT 3.5 model that we fine-tuned on `n` examples. Each model was finetuned for 4-5 epochs ( This was automatically decided by the OpenAI scheduler ) +: This is a GPT 3.5 model that we fine-tuned on `n` examples. Each model was finetuned for 4-5 epochs ( This was automatically decided by the OpenAI scheduler ) `GPT-4 (COD)` -: This is a GPT4 model which we applied 3 rounds of Chain Of Density rewrites to generate a summary with using the methodology above +: This is a GPT4 model which we applied 3 rounds of Chain Of Density rewrites to generate a summary with using the methodology above `GPT-3 (Vanilla)` -: This is a GPT 3.5 model that we asked to generate entity-dense summaries which were concise. Summaries were generated in a single pass +: This is a GPT 3.5 model that we asked to generate entity-dense summaries which were concise. Summaries were generated in a single pass - -| Model | Mean Latency (s) | Mean Entity Count | Mean Entity Density | Mean Tokens | -| ------------------- | ---------------- | ----------------- | ------------------- | ----------- | -| GPT-4 (COD) | 49.5 | 11.3 | 0.138 | 81.65 | -| GPT-3.5 (Vanilla) | 16.8 | 11.95 | 0.122 | 98.35 | -| 3.5 Finetuned (20) | 2.25 | 14.7 | 0.154 | 95.45 | -| 3.5 Finetuned (50) | 2.09 | 12.4 | 0.140 | 88.35 | -| 3.5 Finetuned (76) | 2.17 | 11.65 | 0.142 | 82.05 | +| Model | Mean Latency (s) | Mean Entity Count | Mean Entity Density | Mean Tokens | +| ------------------ | ---------------- | ----------------- | ------------------- | ----------- | +| GPT-4 (COD) | 49.5 | 11.3 | 0.138 | 81.65 | +| GPT-3.5 (Vanilla) | 16.8 | 11.95 | 0.122 | 98.35 | +| 3.5 Finetuned (20) | 2.25 | 14.7 | 0.154 | 95.45 | +| 3.5 Finetuned (50) | 2.09 | 12.4 | 0.140 | 88.35 | +| 3.5 Finetuned (76) | 2.17 | 11.65 | 0.142 | 82.05 | ??? notes "Finetuning Datasets" For our finetuned models, we did a few optimisations to raise the performance. - + We only included summaries that had a minimum density of 0.15 in the dataset, took the summary in the entire chain with the highest density as the final one, forced every regenerated summary to have a minimum density of 0.12 and regenerated summaries up to three times if they didn't meet the summaries. **This is a much more expensive strategy and can cost up to 2.5x or more what we do in this tutorial** This resulted in the total cost of $63.46 to generate just 75 examples due to the stringent requirements, translating to about $0.85 per generated summary example. Using the OpenAI Usage Dashboard, we can calculate the cost of generating 20 summaries as seen below. -| Model | Training Cost ($) | Inference Cost ($) | Tokens Used | Total Cost ($) | -| ------------------- | ----------------- | ------------------ | ----------- | -------------- | -| 3.5 Finetuned (20) | 0.664 | 0.207 | 56,573 | 0.817 | -| 3.5 Finetuned (50) | 1.368 | 0.165 | 49,057 | 1.266 | -| 3.5 Finetuned (76) | 1.824 | 0.174 | 51,583 | 2.481 | -| GPT-4 (COD) | - | 12.9 | 409,062 | 12.9 | -| GPT-3.5 (Vanilla) | - | 0.20 | 51,162 | 0.2 | +| Model | Training Cost ($) | Inference Cost ($) | Tokens Used | Total Cost ($) | +| ------------------ | ----------------- | ------------------ | ----------- | -------------- | +| 3.5 Finetuned (20) | 0.664 | 0.207 | 56,573 | 0.817 | +| 3.5 Finetuned (50) | 1.368 | 0.165 | 49,057 | 1.266 | +| 3.5 Finetuned (76) | 1.824 | 0.174 | 51,583 | 2.481 | +| GPT-4 (COD) | - | 12.9 | 409,062 | 12.9 | +| GPT-3.5 (Vanilla) | - | 0.20 | 51,162 | 0.2 | +Here, we can see that `GPT-4` has an approximate inference cost of `0.65` per summary while our finetuned models have an inference cost of `0.0091` per summary which is ~ `72x` cheaper. -Here, we can see that `GPT-4` has an approximate inference cost of `0.65` per summary while our finetuned models have an inference cost of `0.0091` per summary which is ~ `72x` cheaper. - -Interestingly, the model finetuned with the least examples seems to outperform the others. While the reason for this is unknown, a few potential reasons could be that either we didn't train for sufficient epochs ( We chose the default 5 epochs ) or that the models started learning to imitate other behaviour such as more abstract writing styles from the larger variety of samples, resulting in a decrease in entity density. +Interestingly, the model finetuned with the least examples seems to outperform the others. While the reason for this is unknown, a few potential reasons could be that either we didn't train for sufficient epochs ( We chose the default 5 epochs ) or that the models started learning to imitate other behaviour such as more abstract writing styles from the larger variety of samples, resulting in a decrease in entity density. ## Conclusions From 430b5f6a0a7e6d65bf1234ef665ea1656fbbfed8 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 11:09:07 -0500 Subject: [PATCH 05/40] add slug --- docs/blog/posts/chain-of-density.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index 74cc176..cdae20a 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -1,6 +1,7 @@ --- draft: False date: 2023-11-05 +slug: chain-of-density tags: - pydantic - validation @@ -13,7 +14,7 @@ authors: - jxnl --- -# Finetuning Faster Smarter Summaries with OpenAI's GPT-3.5-turbo +# Smarter Summaries w/ Finetuning GPT-3.5 and Chain of Density > Discover how to distil an iterative method like Chain Of Density into a single finetuned model using Instructor From 3391e1869e4be5be96f3d98cfdd2bda601aaa9e3 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 11:39:36 -0500 Subject: [PATCH 06/40] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f17742b..89ca690 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "instructor" -version = "0.3.2" +version = "0.3.3" description = "Helper functions that allow us to improve openai's function_call ergonomics" authors = ["Jason Liu "] license = "MIT" From c678af1166e890d72f3ca2e68cfbf182e8f09ac5 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 15:15:06 -0500 Subject: [PATCH 07/40] better citation --- docs/blog/posts/chain-of-density.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index cdae20a..a582994 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -20,7 +20,7 @@ authors: In this article, we'll guide you through implementing the original Chain of Density method using Instructor, then show how to distile a GPT 3.5 model to match GPT-4's iterative summarization capabilities. Using these methods were able to decrease latency by 20x, reduce costs by 50x and maintain entity density. -By the end you'll end up with a GPT 3.5 model, (fine-tuned using Instructor's great tooling), capable of producing summaries that rival the effectiveness of Chain of Density. As always, all code is readily available in our `examples/chain-of-density` folder in our repo for your reference. +By the end you'll end up with a GPT 3.5 model, (fine-tuned using Instructor's great tooling), capable of producing summaries that rival the effectiveness of Chain of Density [[Adams et al. (2023)]](https://arxiv.org/abs/2309.04269). As always, all code is readily available in our `examples/chain-of-density` folder in our repo for your reference. ??? abstract "Datasets and Colab Notebook" @@ -28,11 +28,11 @@ By the end you'll end up with a GPT 3.5 model, (fine-tuned using Instructor's gr ## Part 1) Chain of Density -Summarizing extensive texts with AI can be challenging, often relying on inconsistent techniques. Salesforce AI Research's novel method, Chain Of Density, enhances AI-based text summarization, outperforming human-generated summaries. +Summarizing extensive texts with AI can be challenging, often relying on inconsistent techniques. Their novel method, Chain Of Density prompting, enhances AI-based text summarization, outperforming human-generated summaries. Initially, an AI produces a summary, then refines it through multiple iterations, adding missing article entities. Each iteration adds new article entities to the summary, keeping length consistent, leading to an entity-dense, informative summary called Chain Of Density. -First introduced by Salesforce's AI Research wing in their paper - [From Sparse to Dense: GPT-4 Summarization with Chain of Density Prompting](https://arxiv.org/abs/2309.04269). The team has found that this method is able to consistently beats similar summaries written by human annotators. +First introduced in the paper - [From Sparse to Dense: GPT-4 Summarization with Chain of Density Prompting](https://arxiv.org/abs/2309.04269). The team has found that this method is able to consistently beats similar summaries written by human annotators. ??? info "Implementation Details" From 426ebfb2b209769b08d083b07d5bea1b1b6cb1fb Mon Sep 17 00:00:00 2001 From: Xeophon <104866563+Xeophon@users.noreply.github.com> Date: Mon, 13 Nov 2023 23:16:48 +0100 Subject: [PATCH 08/40] Fix typo (#173) --- docs/blog/posts/chain-of-density.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index a582994..1c2c454 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -502,7 +502,7 @@ We'l be comparing the following models in 3 ways using 20 articles that were not : This is a GPT4 model which we applied 3 rounds of Chain Of Density rewrites to generate a summary with using the methodology above -`GPT-3 (Vanilla)` +`GPT-3.5 (Vanilla)` : This is a GPT 3.5 model that we asked to generate entity-dense summaries which were concise. Summaries were generated in a single pass From 55efa8cad0abaeebcab02c66b3bc50d88cbe1fee Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 17:23:59 -0500 Subject: [PATCH 09/40] mention-raw-response --- docs/index.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/index.md b/docs/index.md index 62e2135..d73cae8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -152,6 +152,24 @@ assert user.name == "Jason" assert user.age == 25 ``` +!!! note "Accessing the original response" + + If you want to access anything like usage or other metadata, the original response is available on the `Model._raw_response` attribute. + + ```python + user: UserDetail = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=UserDetail, + messages=[ + {"role": "user", "content": "Extract Jason is 25 years old"}, + ] + ) + + from openai.types.chat.chat_completion import ChatCompletion + + assert isinstance(user._raw_response, ChatCompletion) + ``` + ## Pydantic Validation Validation can also be plugged into the same Pydantic model. Here, if the answer attribute contains content that violates the rule "don't say objectionable things," Pydantic will raise a validation error. From 209c06d22b8dcae037177551ef5d91afda9aa8a8 Mon Sep 17 00:00:00 2001 From: Braden Kinard Date: Mon, 13 Nov 2023 18:48:24 -0500 Subject: [PATCH 10/40] fixes retry_async message unpacking (#175) --- instructor/patch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instructor/patch.py b/instructor/patch.py index bdf9507..fa45c45 100644 --- a/instructor/patch.py +++ b/instructor/patch.py @@ -89,7 +89,7 @@ async def retry_async( None, ) except (ValidationError, JSONDecodeError) as e: - kwargs["messages"].append(dict(**response.choices[0].message)) # type: ignore + kwargs["messages"].append(response.choices[0].message) # type: ignore kwargs["messages"].append( { "role": "user", From e65ccc6f7ee8ebca95b27cc29fe6ad51be218281 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 18:49:42 -0500 Subject: [PATCH 11/40] add asyncio test --- tests/test_patch.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/test_patch.py b/tests/test_patch.py index 3029a49..3847511 100644 --- a/tests/test_patch.py +++ b/tests/test_patch.py @@ -75,3 +75,33 @@ def test_runmodel_validator(): assert hasattr( model, "_raw_response" ), "The raw response should be available from OpenAI" + +@pytest.mark.asyncio +async def test_async_runmodel_validator(): + aclient = instructor.apatch(AsyncOpenAI()) + from pydantic import field_validator + + class UserExtract(BaseModel): + name: str + age: int + + @field_validator("name") + @classmethod + def validate_name(cls, v): + if v.upper() != v: + raise ValueError("Name should be uppercase") + return v + + model = await aclient.chat.completions.create( + model="gpt-3.5-turbo", + response_model=UserExtract, + max_retries=2, + messages=[ + {"role": "user", "content": "Extract jason is 25 years old"}, + ], + ) + assert isinstance(model, UserExtract), "Should be instance of UserExtract" + assert model.name == "JASON" + assert hasattr( + model, "_raw_response" + ), "The raw response should be available from OpenAI" From 5e320650d51693a17cb6d6cfa3887322f7fd7243 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 18:49:55 -0500 Subject: [PATCH 12/40] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 89ca690..0d5dbcd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "instructor" -version = "0.3.3" +version = "0.3.4" description = "Helper functions that allow us to improve openai's function_call ergonomics" authors = ["Jason Liu "] license = "MIT" From e6709b591b0628b77d8195f5ae88e1fab0a80b60 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 22:51:40 -0500 Subject: [PATCH 13/40] clean up tables --- docs/blog/posts/chain-of-density.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index 1c2c454..0144978 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -494,7 +494,7 @@ We'l be comparing the following models in 3 ways using 20 articles that were not - Latency : Time to last token generated in seconds - Costs : Total cost to generate outputs - we break down the cost into training and inference costs for easy reference -`3.5 Finetuned (n) ` +`3.5 Finetuned (n)` : This is a GPT 3.5 model that we fine-tuned on `n` examples. Each model was finetuned for 4-5 epochs ( This was automatically decided by the OpenAI scheduler ) @@ -504,15 +504,15 @@ We'l be comparing the following models in 3 ways using 20 articles that were not `GPT-3.5 (Vanilla)` -: This is a GPT 3.5 model that we asked to generate entity-dense summaries which were concise. Summaries were generated in a single pass +: This is a GPT 3.5 model that we asked to generate entity-dense summaries which were concise. Summaries were generated in a single pass targetting about 80-90 tokens. -| Model | Mean Latency (s) | Mean Entity Count | Mean Entity Density | Mean Tokens | -| ------------------ | ---------------- | ----------------- | ------------------- | ----------- | -| GPT-4 (COD) | 49.5 | 11.3 | 0.138 | 81.65 | -| GPT-3.5 (Vanilla) | 16.8 | 11.95 | 0.122 | 98.35 | -| 3.5 Finetuned (20) | 2.25 | 14.7 | 0.154 | 95.45 | -| 3.5 Finetuned (50) | 2.09 | 12.4 | 0.140 | 88.35 | -| 3.5 Finetuned (76) | 2.17 | 11.65 | 0.142 | 82.05 | +| Model | Mean Latency (s) | Mean Entity Density | +| ------------------ | ---------------- | ------------------- | +| 3.5 Finetuned (20) | 2.1 | 0.15 | +| 3.5 Finetuned (50) | 2.1 | 0.14 | +| 3.5 Finetuned (76) | 2.1 | 0.14 | +| GPT-3.5 (Vanilla) | 16.8 | 0.12 | +| GPT-4 (COD) | 49.5 | 0.15 | ??? notes "Finetuning Datasets" @@ -526,11 +526,11 @@ Using the OpenAI Usage Dashboard, we can calculate the cost of generating 20 sum | Model | Training Cost ($) | Inference Cost ($) | Tokens Used | Total Cost ($) | | ------------------ | ----------------- | ------------------ | ----------- | -------------- | -| 3.5 Finetuned (20) | 0.664 | 0.207 | 56,573 | 0.817 | -| 3.5 Finetuned (50) | 1.368 | 0.165 | 49,057 | 1.266 | -| 3.5 Finetuned (76) | 1.824 | 0.174 | 51,583 | 2.481 | -| GPT-4 (COD) | - | 12.9 | 409,062 | 12.9 | | GPT-3.5 (Vanilla) | - | 0.20 | 51,162 | 0.2 | +| 3.5 Finetuned (20) | 0.7 | 0.20 | 56,573 | 0.8 | +| 3.5 Finetuned (50) | 1.4 | 0.17 | 49,057 | 1.3 | +| 3.5 Finetuned (76) | 1.8 | 0.17 | 51,583 | 2.5 | +| GPT-4 (COD) | - | 12.9 | 409,062 | 12.9 | Here, we can see that `GPT-4` has an approximate inference cost of `0.65` per summary while our finetuned models have an inference cost of `0.0091` per summary which is ~ `72x` cheaper. From 33b04c5c707d0c356700eaecdadb335065f9d3de Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 23:22:14 -0500 Subject: [PATCH 14/40] Update philosophy --- docs/philosophy.md | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/docs/philosophy.md b/docs/philosophy.md index 7f65870..3f5c1f8 100644 --- a/docs/philosophy.md +++ b/docs/philosophy.md @@ -1,13 +1,31 @@ # Philosophy -The philosophy behind this library is to provide a **lightweight** and **flexible** approach to leveraging language models (LLMs) to do **structured output without imposing unnecessary dependencies or abstractions.** +The instructor library embodies a philosophy of simplicity and flexibility in leveraging language models (LLMs). It offers a streamlined approach for structured output, avoiding unnecessary dependencies or complex abstractions. -The `instructor` library serves as a bridge from text-based language model interaction to Object-Oriented Programming, seamlessly integrating LLMs into the programming paradigms we're familiar with. By treating LLMs as callable functions that return typed objects, `instructor` demystifies their complexity, making them more accessible for everyday projects. This approach maintains the flexibility and power of Python, letting you write custom code without unnecessary constraints. +## The Bridge to Object-Oriented Programming + +`instructor` acts as a bridge converting text-based LLM interactions into a familiar object-oriented format. Its integration with Pydantic provides type hints, runtime validation, and robust IDE support; love and supported by many in the Python ecosystem. By treating LLMs as callable functions returning typed objects, instructor makes language models backwards compatible with code, making them practical for everyday use while being complex enough for advanced applications. + +## The zen of `instructor` + +Maintain the flexibility and power of Python, without unnecessary constraints. + +Begin with a function and a return type hint – simplicity is key. With my experience maintaining a large enterprize framework at my previous job over many years I've learned that the goal of a making a useful framework is minimizing regret, both for the author and hopefully for the user. 1. Define a Schema `#!python class StructuredData(BaseModel):` -2. Encapsulate all your LLM logic into a function `#!python def extract(a) -> StructuredData:` -3. Define typed computations against your data with `#!python def compute(data: StructuredData):` +2. Define validators and methods on your schema. +3. Encapsulate all your LLM logic into a function `#!python def extract(a) -> StructuredData:` +4. Define typed computations against your data with `#!python def compute(data: StructuredData):` or call methods on your schema `#!python data.compute()` -Please note that the library is designed to be adaptable and open-ended, allowing you to customize and extend its functionality based on your specific requirements. +It should be that simple. -If you have any further questions or ideas hit me up on [twitter](https://twitter.com/jxnlco) +## My Goals + +The goal for the library, documentation, and blog, is to help you be a better python programmer and as a result a better AI engineer. + +- The library is a result of my desire for simplicity. +- The library should help maintain simplicity in your codebase. +- I won't try to write prompts for you, +- I don't try to create indirections or abstractions that make it hard to debug in the future + +Please note that the library is designed to be adaptable and open-ended, allowing you to customize and extend its functionality based on your specific requirements. If you have any further questions or ideas hit me up on [twitter](https://twitter.com/jxnlco) From 5f188c00b848de3c4a82abb0e59ed4bcf56c0283 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 13 Nov 2023 23:26:15 -0500 Subject: [PATCH 15/40] bump --- docs/philosophy.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/philosophy.md b/docs/philosophy.md index 3f5c1f8..2ebf685 100644 --- a/docs/philosophy.md +++ b/docs/philosophy.md @@ -1,10 +1,12 @@ # Philosophy -The instructor library embodies a philosophy of simplicity and flexibility in leveraging language models (LLMs). It offers a streamlined approach for structured output, avoiding unnecessary dependencies or complex abstractions. +The instructor values [simplicity](https://eugeneyan.com/writing/simplicity/) and flexibility in leveraging language models (LLMs). It offers a streamlined approach for structured output, avoiding unnecessary dependencies or complex abstractions. Let [Pydantic](https://docs.pydantic.dev/latest/) do the heavy lifting. + +> “Simplicity is a great virtue but it requires hard work to achieve it and education to appreciate it. And to make matters worse: complexity sells better.” — Edsger Dijkstra ## The Bridge to Object-Oriented Programming -`instructor` acts as a bridge converting text-based LLM interactions into a familiar object-oriented format. Its integration with Pydantic provides type hints, runtime validation, and robust IDE support; love and supported by many in the Python ecosystem. By treating LLMs as callable functions returning typed objects, instructor makes language models backwards compatible with code, making them practical for everyday use while being complex enough for advanced applications. +`instructor` acts as a bridge converting text-based LLM interactions into a familiar object-oriented format. Its integration with Pydantic provides type hints, runtime validation, and robust IDE support; love and supported by many in the Python ecosystem. By treating LLMs as callable functions returning typed objects, instructor makes [language models backwards compatible with code](https://www.youtube.com/watch?v=yj-wSRJwrrc), making them practical for everyday use while being complex enough for advanced applications. ## The zen of `instructor` @@ -21,7 +23,7 @@ It should be that simple. ## My Goals -The goal for the library, documentation, and blog, is to help you be a better python programmer and as a result a better AI engineer. +The goal for the library, [documentation](https://jxnl.github.io/instructor/), and [blog](https://jxnl.github.io/instructor/blog/), is to help you be a better python programmer and as a result a better AI engineer. - The library is a result of my desire for simplicity. - The library should help maintain simplicity in your codebase. @@ -29,3 +31,5 @@ The goal for the library, documentation, and blog, is to help you be a better py - I don't try to create indirections or abstractions that make it hard to debug in the future Please note that the library is designed to be adaptable and open-ended, allowing you to customize and extend its functionality based on your specific requirements. If you have any further questions or ideas hit me up on [twitter](https://twitter.com/jxnlco) + +Cheers! From 66bd355657aaf8b64ed69ecb53a45e88459db634 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Tue, 14 Nov 2023 17:51:26 -0500 Subject: [PATCH 16/40] update docs --- docs/examples/index.md | 9 ++++++--- docs/{ => examples}/maybe.md | 0 docs/{ => examples}/multitask.md | 0 mkdocs.yml | 13 ++++++------- 4 files changed, 12 insertions(+), 10 deletions(-) rename docs/{ => examples}/maybe.md (100%) rename docs/{ => examples}/multitask.md (100%) diff --git a/docs/examples/index.md b/docs/examples/index.md index 79eff7d..84d79f0 100644 --- a/docs/examples/index.md +++ b/docs/examples/index.md @@ -2,6 +2,10 @@ ## Quick Links +- [Streaming Lists](multitask.md): Stream lists of objects from the same prompt. + +- [Missing Objects](maybe.md): Handle missing objects with `Maybe` and `Optional`. + - [Classifying Text](classification.md): Single and multi-label classification using enums. - [Self-Assessment via Validators](self_critique.md): Implement AI self-assessment with `llm_validator`. @@ -18,13 +22,12 @@ - [Working with Recursive Schemas](recursive.md): Implement and understand recursive schemas. - - [Table Extraction from Text](autodataframe.md): Extract tables, potentially multiple, automatically from textual data. -- [Multi-File Code Generation](gpt-engineer.md): Generate multi-file programs with contents and paths. +- [Multi-File Code Generation](gpt-engineer.md): Generate multi-file programs with contents and paths. - [PII Data Sanitization](pii.md): Extract and sanitize Personally Identifiable Information (PII) from documents. - [Action Item and Dependency Mapping](action_items.md): Generate action items and their dependencies from transcripts. -Happy exploring! \ No newline at end of file +Happy exploring! diff --git a/docs/maybe.md b/docs/examples/maybe.md similarity index 100% rename from docs/maybe.md rename to docs/examples/maybe.md diff --git a/docs/multitask.md b/docs/examples/multitask.md similarity index 100% rename from docs/multitask.md rename to docs/examples/multitask.md diff --git a/mkdocs.yml b/mkdocs.yml index 036bfe8..1efdbe0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -121,14 +121,15 @@ markdown_extensions: - pymdownx.tilde nav: - Introduction: - - Getting Started: 'index.md' + - Quick Start: 'index.md' + - Validators: "reask_validation.md" + - Distillation: "distillation.md" - Prompt Engineering Tips: 'tips/index.md' - - Using Validations: "reask_validation.md" - - Streaming Lists: "multitask.md" - - Handling Missing Content: "maybe.md" - - Philosophy: 'philosophy.md' + - Philosophy: 'philosophy.md' - Cookbook: - Overview: 'examples/index.md' + - Streaming Lists: "examples/multitask.md" + - Handling Missing Content: "examples/maybe.md" - Text Classification: 'examples/classification.md' - Self Critique: 'examples/self_critique.md' - Citations: 'examples/exact_citations.md' @@ -141,8 +142,6 @@ nav: - Action Item and Dependency Mapping: 'examples/action_items.md' - Multi-File Code Generation: 'examples/gpt-engineer.md' - PII Data Sanitization: 'examples/pii.md' - - Distillation: - - Distilation: "distillation.md" - CLI Reference: - "Introduction": "cli/index.md" - "Finetuning GPT-3.5": "cli/finetune.md" From d42ad0e7b6ced2ae51275b70942838b4c7c1038c Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Tue, 14 Nov 2023 17:51:53 -0500 Subject: [PATCH 17/40] organize docs --- mkdocs.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 1efdbe0..40ae12e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -121,11 +121,11 @@ markdown_extensions: - pymdownx.tilde nav: - Introduction: - - Quick Start: 'index.md' - - Validators: "reask_validation.md" - - Distillation: "distillation.md" - - Prompt Engineering Tips: 'tips/index.md' - - Philosophy: 'philosophy.md' + - Quick Start: 'index.md' + - Validators: "reask_validation.md" + - Distillation: "distillation.md" + - Prompt Engineering Tips: 'tips/index.md' + - Philosophy: 'philosophy.md' - Cookbook: - Overview: 'examples/index.md' - Streaming Lists: "examples/multitask.md" From f8583974a5326283124a805bd439dc00f90ab483 Mon Sep 17 00:00:00 2001 From: mitch-36 <91511669+mitch-36@users.noreply.github.com> Date: Wed, 15 Nov 2023 10:19:05 +1100 Subject: [PATCH 18/40] Spelling fix - chain-of-density.md (#176) Co-authored-by: Jason Liu --- docs/blog/posts/chain-of-density.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index 0144978..864c760 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -488,7 +488,7 @@ With that, you've now got your own fine-tuned model ready to go and serve data i ## Results and Benchmarks -We'l be comparing the following models in 3 ways using 20 articles that were not used for fine-tuning. +We'll be comparing the following models in 3 ways using 20 articles that were not used for fine-tuning. - Entity Density : This is entities per token, the higher the better for density. - Latency : Time to last token generated in seconds From 78bf56921fcb7e3068830a3456e4672577291355 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Tue, 14 Nov 2023 18:50:44 -0500 Subject: [PATCH 19/40] Fix llm_validator (#179) --- docs/reask_validation.md | 13 ++++++++--- examples/validators/llm_validator.py | 34 ++++++++++++++++++---------- instructor/dsl/validators.py | 1 - tests/test_patch.py | 27 ++++++++++++++++++---- 4 files changed, 55 insertions(+), 20 deletions(-) diff --git a/docs/reask_validation.md b/docs/reask_validation.md index ee6c22d..c2368b7 100644 --- a/docs/reask_validation.md +++ b/docs/reask_validation.md @@ -61,15 +61,22 @@ name LLM-based validation can also be plugged into the same Pydantic model. Here, if the answer attribute contains content that violates the rule "don't say objectionable things," Pydantic will raise a validation error. ```python hl_lines="9 15" +import instructor + +from openai import OpenAI +from instructor import llm_validator from pydantic import BaseModel, ValidationError, BeforeValidator from typing_extensions import Annotated -from instruct import llm_validator + +# Apply the patch to the OpenAI client +client = instructor.patch(OpenAI()) + class QuestionAnswer(BaseModel): question: str answer: Annotated[ str, - BeforeValidator(llm_validator("don't say objectionable things")) + BeforeValidator(llm_validator("don't say objectionable things", openai_client=client)) ] try: @@ -148,7 +155,7 @@ Behind the scenes, the `instructor.patch()` method adds a `max_retries` paramete try: ... except (ValidationError, JSONDecodeError) as e: - kwargs["messages"].append(dict(**response.choices[0].message)) + kwargs["messages"].append(response.choices[0].message) kwargs["messages"].append( { "role": "user", diff --git a/examples/validators/llm_validator.py b/examples/validators/llm_validator.py index 7e82ef7..adafe53 100644 --- a/examples/validators/llm_validator.py +++ b/examples/validators/llm_validator.py @@ -1,15 +1,12 @@ -from typing_extensions import Annotated -from pydantic import ( - BaseModel, - BeforeValidator, -) +import instructor -from instructor import llm_validator, patch from openai import OpenAI +from instructor import llm_validator +from pydantic import BaseModel, ValidationError, BeforeValidator +from typing_extensions import Annotated -client = OpenAI() - -patch() +# Apply the patch to the OpenAI client +client = instructor.patch(OpenAI()) class QuestionAnswer(BaseModel): @@ -46,15 +43,28 @@ After validation with `llm_validator` """ + + class QuestionAnswerNoEvil(BaseModel): question: str answer: Annotated[ str, - BeforeValidator( - llm_validator("don't say objectionable things", allow_override=True) - ), + BeforeValidator(llm_validator("don't say objectionable things", openai_client=client)) ] +try: + qa = QuestionAnswerNoEvil( + question="What is the meaning of life?", + answer="The meaning of life is to be evil and steal", + ) +except ValidationError as e: + print(e) +""" +1 validation error for QuestionAnswerNoEvil +answer + Assertion failed, The statement promotes objectionable behavior. [type=assertion_error, input_value='The meaning of life is to be evil and steal', input_type=str] + For further information visit https://errors.pydantic.dev/2.4/v/assertion_error +""" try: qa: QuestionAnswerNoEvil = client.chat.completions.create( diff --git a/instructor/dsl/validators.py b/instructor/dsl/validators.py index effb95c..5c951b8 100644 --- a/instructor/dsl/validators.py +++ b/instructor/dsl/validators.py @@ -1,4 +1,3 @@ -import openai from pydantic import Field from typing import Optional from openai import OpenAI diff --git a/tests/test_patch.py b/tests/test_patch.py index 3847511..8a586c0 100644 --- a/tests/test_patch.py +++ b/tests/test_patch.py @@ -1,10 +1,12 @@ import pytest - -from pydantic import BaseModel -from openai import OpenAI, AsyncOpenAI - import instructor +from pydantic import BaseModel, ValidationError, BeforeValidator +from openai import OpenAI, AsyncOpenAI +from instructor import llm_validator +from typing_extensions import Annotated + + client = instructor.patch(OpenAI()) aclient = instructor.apatch(AsyncOpenAI()) @@ -105,3 +107,20 @@ async def test_async_runmodel_validator(): assert hasattr( model, "_raw_response" ), "The raw response should be available from OpenAI" + + +def test_runmodel_validator_error(): + + + class QuestionAnswerNoEvil(BaseModel): + question: str + answer: Annotated[ + str, + BeforeValidator(llm_validator("don't say objectionable things", openai_client=client)) + ] + + with pytest.raises(ValidationError): + QuestionAnswerNoEvil( + question="What is the meaning of life?", + answer="The meaning of life is to be evil and steal", + ) \ No newline at end of file From 4de58fd1577b7be2c0c1b8575e85fadbbadf0e4c Mon Sep 17 00:00:00 2001 From: Isaac Poulton Date: Tue, 14 Nov 2023 23:57:06 +0000 Subject: [PATCH 20/40] Fix async usage (#167) Co-authored-by: Jason Liu Co-authored-by: Jason Liu --- instructor/__init__.py | 3 +- instructor/dsl/multitask.py | 8 +++-- instructor/dsl/validators.py | 8 +++-- instructor/patch.py | 58 ++++++++++++++++++++++++------- poetry.lock | 66 +++++++++++++++++------------------- pyproject.toml | 1 + tests/test_patch.py | 62 +++++++++++++++++++++++++++++++-- 7 files changed, 150 insertions(+), 56 deletions(-) diff --git a/instructor/__init__.py b/instructor/__init__.py index b03ccf7..b28ce79 100644 --- a/instructor/__init__.py +++ b/instructor/__init__.py @@ -1,5 +1,6 @@ -from .function_calls import OpenAISchema, openai_function, openai_schema from .distil import FinetuneFormat, Instructions +from .dsl import CitationMixin, Maybe, MultiTask, llm_validator +from .function_calls import OpenAISchema, openai_function, openai_schema from .dsl import MultiTask, Maybe, llm_validator, CitationMixin from .patch import patch, apatch diff --git a/instructor/dsl/multitask.py b/instructor/dsl/multitask.py index 47c81be..7324b58 100644 --- a/instructor/dsl/multitask.py +++ b/instructor/dsl/multitask.py @@ -1,6 +1,8 @@ -from pydantic import BaseModel, create_model, Field -from typing import Optional, List, Type -from instructor import OpenAISchema +from typing import List, Optional, Type + +from pydantic import BaseModel, Field, create_model + +from instructor.function_calls import OpenAISchema class MultiTaskBase: diff --git a/instructor/dsl/validators.py b/instructor/dsl/validators.py index 5c951b8..373e7ba 100644 --- a/instructor/dsl/validators.py +++ b/instructor/dsl/validators.py @@ -1,10 +1,12 @@ -from pydantic import Field from typing import Optional + from openai import OpenAI -import instructor +from pydantic import Field + +from instructor.function_calls import OpenAISchema -class Validator(instructor.OpenAISchema): +class Validator(OpenAISchema): """ Validate if an attribute is correct and if not, return a new value with an error message diff --git a/instructor/patch.py b/instructor/patch.py index fa45c45..6459116 100644 --- a/instructor/patch.py +++ b/instructor/patch.py @@ -1,9 +1,13 @@ import inspect - from functools import wraps from json import JSONDecodeError -from pydantic import ValidationError, BaseModel -from typing import Callable, Type, Optional +from logging import warn +from typing import Callable, Optional, Type, Union + +from openai import AsyncOpenAI, OpenAI +from openai.types.chat import ChatCompletion, ChatCompletionMessage +from pydantic import BaseModel, ValidationError + from .function_calls import OpenAISchema, openai_schema OVERRIDE_DOCS = """ @@ -66,6 +70,18 @@ def process_response( return response +def dump_message(message: ChatCompletionMessage) -> dict: + """Dumps a message to a dict, to be returned to the OpenAI API. + + Workaround for an issue with the OpenAI API, where the `tool_calls` field isn't allowed to be present in requests + if it isn't used. + """ + dumped_message = message.model_dump() + if not dumped_message.get("tool_calls"): + del dumped_message["tool_calls"] + return dumped_message + + async def retry_async( func, response_model, @@ -78,7 +94,7 @@ async def retry_async( retries = 0 while retries <= max_retries: try: - response = await func(*args, **kwargs) + response: ChatCompletion = await func(*args, **kwargs) return ( process_response( response, @@ -122,7 +138,7 @@ def retry_sync( None, ) except (ValidationError, JSONDecodeError) as e: - kwargs["messages"].append(response.choices[0].message) # type: ignore + kwargs["messages"].append(dump_message(response.choices[0].message)) kwargs["messages"].append( { "role": "user", @@ -134,7 +150,16 @@ def retry_sync( raise e -def wrap_chatcompletion(func: Callable, is_async: bool = None) -> Callable: +def is_async(func: Callable) -> bool: + """Returns true if the callable is async, accounting for wrapped callables""" + return inspect.iscoroutinefunction(func) or ( + hasattr(func, "__wrapped__") and inspect.iscoroutinefunction(func.__wrapped__) + ) + + +def wrap_chatcompletion(func: Callable) -> Callable: + func_is_async = is_async(func) + @wraps(func) async def new_chatcompletion_async( response_model=None, @@ -177,12 +202,14 @@ def wrap_chatcompletion(func: Callable, is_async: bool = None) -> Callable: raise ValueError(error) return response - wrapper_function = new_chatcompletion_async if is_async else new_chatcompletion_sync + wrapper_function = ( + new_chatcompletion_async if func_is_async else new_chatcompletion_sync + ) wrapper_function.__doc__ = OVERRIDE_DOCS return wrapper_function -def patch(client): +def patch(client: Union[OpenAI, AsyncOpenAI]): """ Patch the `client.chat.completions.create` method @@ -198,9 +225,11 @@ def patch(client): return client -def apatch(client): +def apatch(client: AsyncOpenAI): """ - Patch the `client.chat.completions.acreate` and `client.chat.completions.acreate` methods + No longer necessary, use `patch` instead. + + Patch the `client.chat.completions.create` method Enables the following features: @@ -209,7 +238,10 @@ def apatch(client): - `validation_context` parameter to validate the response using the pydantic model - `strict` parameter to use strict json parsing """ - client.chat.completions.create = wrap_chatcompletion( - client.chat.completions.create, is_async=True + + # Emit a deprecation warning + warn( + "instructor.apatch is deprecated, use instructor.patch instead", + DeprecationWarning, ) - return client + return patch(client) diff --git a/poetry.lock b/poetry.lock index acbc263..6da32fe 100644 --- a/poetry.lock +++ b/poetry.lock @@ -264,13 +264,13 @@ files = [ [[package]] name = "httpcore" -version = "1.0.1" +version = "1.0.2" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" files = [ - {file = "httpcore-1.0.1-py3-none-any.whl", hash = "sha256:c5e97ef177dca2023d0b9aad98e49507ef5423e9f1d94ffe2cfe250aa28e63b0"}, - {file = "httpcore-1.0.1.tar.gz", hash = "sha256:fce1ddf9b606cfb98132ab58865c3728c52c8e4c3c46e2aabb3674464a186e92"}, + {file = "httpcore-1.0.2-py3-none-any.whl", hash = "sha256:096cc05bca73b8e459a1fc3dcf585148f63e534eae4339559c9b8a8d6399acc7"}, + {file = "httpcore-1.0.2.tar.gz", hash = "sha256:9fc092e4799b26174648e54b74ed5f683132a464e95643b226e00c2ed2fa6535"}, ] [package.dependencies] @@ -410,16 +410,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -592,13 +582,13 @@ mkdocstrings = ">=0.20" [[package]] name = "openai" -version = "1.1.1" -description = "Client library for the openai API" +version = "1.2.3" +description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.1.1-py3-none-any.whl", hash = "sha256:1496418b132c88352bcfffa8c24e83a69f0e01b1484cbb7bb48f722aad8fd6e1"}, - {file = "openai-1.1.1.tar.gz", hash = "sha256:80e49cb21d8445f6d51339b8af7376fc83302c78ab78578b78133ef89634869d"}, + {file = "openai-1.2.3-py3-none-any.whl", hash = "sha256:d8d1221d777c3b2d12468f17410bf929ca0cb06e9556586e61f5a4255f0cf2b4"}, + {file = "openai-1.2.3.tar.gz", hash = "sha256:800d206ec02c8310400f07b3bb52e158751f3a419e75d080117d913f358bf0d5"}, ] [package.dependencies] @@ -646,13 +636,13 @@ files = [ [[package]] name = "platformdirs" -version = "3.11.0" +version = "4.0.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-3.11.0-py3-none-any.whl", hash = "sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e"}, - {file = "platformdirs-3.11.0.tar.gz", hash = "sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3"}, + {file = "platformdirs-4.0.0-py3-none-any.whl", hash = "sha256:118c954d7e949b35437270383a3f2531e99dd93cf7ce4dc8340d3356d30f173b"}, + {file = "platformdirs-4.0.0.tar.gz", hash = "sha256:cb633b2bcf10c51af60beb0ab06d2f1d69064b43abf4c185ca6b28865f3f9731"}, ] [package.extras] @@ -827,13 +817,13 @@ plugins = ["importlib-metadata"] [[package]] name = "pymdown-extensions" -version = "10.3.1" +version = "10.4" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.3.1-py3-none-any.whl", hash = "sha256:8cba67beb2a1318cdaf742d09dff7c0fc4cafcc290147ade0f8fb7b71522711a"}, - {file = "pymdown_extensions-10.3.1.tar.gz", hash = "sha256:f6c79941498a458852853872e379e7bab63888361ba20992fc8b4f8a9b61735e"}, + {file = "pymdown_extensions-10.4-py3-none-any.whl", hash = "sha256:cfc28d6a09d19448bcbf8eee3ce098c7d17ff99f7bd3069db4819af181212037"}, + {file = "pymdown_extensions-10.4.tar.gz", hash = "sha256:bc46f11749ecd4d6b71cf62396104b4a200bad3498cb0f5dad1b8502fe461a35"}, ] [package.dependencies] @@ -865,6 +855,24 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-asyncio" +version = "0.21.1" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-asyncio-0.21.1.tar.gz", hash = "sha256:40a7eae6dded22c7b604986855ea48400ab15b069ae38116e8c01238e9eeb64d"}, + {file = "pytest_asyncio-0.21.1-py3-none-any.whl", hash = "sha256:8666c1c8ac02631d7c51ba282e0c69a8a452b211ffedf2599099845da5c5c37b"}, +] + +[package.dependencies] +pytest = ">=7.0.0" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] + [[package]] name = "python-dateutil" version = "2.8.2" @@ -891,7 +899,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -899,15 +906,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -924,7 +924,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -932,7 +931,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -1245,4 +1243,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "5395b7333475a6f4a6544bbeaf991d8d781606cef00e291c396ef1ebdc85c270" +content-hash = "48097711e7152fde9f43e23c8dcd2253cf1f872da82da2189cd25acee7ee3a0a" diff --git a/pyproject.toml b/pyproject.toml index 0d5dbcd..b97ddec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ mkdocs = "^1.4.3" mkdocs-material = "^9.1.18" mkdocstrings = "^0.22.0" mkdocstrings-python = "^1.1.2" +pytest-asyncio = "^0.21.1" [build-system] requires = ["poetry-core"] diff --git a/tests/test_patch.py b/tests/test_patch.py index 8a586c0..3db4b6c 100644 --- a/tests/test_patch.py +++ b/tests/test_patch.py @@ -1,14 +1,17 @@ +import functools import pytest import instructor -from pydantic import BaseModel, ValidationError, BeforeValidator +from pydantic import BaseModel, Field, ValidationError, BeforeValidator from openai import OpenAI, AsyncOpenAI from instructor import llm_validator from typing_extensions import Annotated +from instructor.patch import is_async, wrap_chatcompletion + client = instructor.patch(OpenAI()) -aclient = instructor.apatch(AsyncOpenAI()) +aclient = instructor.patch(AsyncOpenAI()) @pytest.mark.asyncio @@ -78,6 +81,61 @@ def test_runmodel_validator(): model, "_raw_response" ), "The raw response should be available from OpenAI" + +def test_patch_completes_successfully(): + instructor.patch(OpenAI()) + + +def test_apatch_completes_successfully(): + instructor.apatch(AsyncOpenAI()) + + +@pytest.mark.asyncio +async def test_wrap_chatcompletion_wraps_async_input_function(): + async def input_function(*args, **kwargs): + return "Hello, World!" + + wrapped_function = wrap_chatcompletion(input_function) + result = await wrapped_function() + + assert result == "Hello, World!" + + +def test_wrap_chatcompletion_wraps_input_function(): + def input_function(*args, **kwargs): + return "Hello, World!" + + wrapped_function = wrap_chatcompletion(input_function) + result = wrapped_function() + + assert result == "Hello, World!" + + +def test_is_async_returns_true_if_function_is_async(): + async def async_function(): + pass + + assert is_async(async_function) is True + + +def test_is_async_returns_false_if_function_is_not_async(): + def sync_function(): + pass + + assert is_async(sync_function) is False + + +def test_is_async_returns_true_if_wrapped_function_is_async(): + async def async_function(): + pass + + @functools.wraps(async_function) + def wrapped_function(): + pass + + assert is_async(wrapped_function) is True + + @pytest.mark.asyncio async def test_async_runmodel_validator(): aclient = instructor.apatch(AsyncOpenAI()) From 90d7cbcc82235313b4be6a35ebca962bc3dbc92b Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Tue, 14 Nov 2023 18:58:13 -0500 Subject: [PATCH 21/40] remove warn --- instructor/patch.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/instructor/patch.py b/instructor/patch.py index 6459116..776a35a 100644 --- a/instructor/patch.py +++ b/instructor/patch.py @@ -1,7 +1,6 @@ import inspect from functools import wraps from json import JSONDecodeError -from logging import warn from typing import Callable, Optional, Type, Union from openai import AsyncOpenAI, OpenAI @@ -238,10 +237,4 @@ def apatch(client: AsyncOpenAI): - `validation_context` parameter to validate the response using the pydantic model - `strict` parameter to use strict json parsing """ - - # Emit a deprecation warning - warn( - "instructor.apatch is deprecated, use instructor.patch instead", - DeprecationWarning, - ) return patch(client) From 6a91c1ce8cfb796d6ef222aa994b51b3da0f7eac Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Tue, 14 Nov 2023 19:07:35 -0500 Subject: [PATCH 22/40] bump version --- README.md | 2 +- blog.md | 102 ------------------------------------------------- docs/index.md | 10 +---- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 112 deletions(-) delete mode 100644 blog.md diff --git a/README.md b/README.md index a54279b..dd431ce 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ _Structured extraction in Python, powered by OpenAI's function calling api, desi --- -[Star us on Github!](https://jxnl.github.io/instructor). +[Star us on Github!](www.github.com/jxnl/instructor). [![Buy Me a Coffee](https://img.shields.io/badge/Buy%20Me%20a%20Coffee-Donate-yellow)](https://www.buymeacoffee.com/jxnlco) [![Downloads](https://img.shields.io/pypi/dm/instructor.svg)](https://pypi.python.org/pypi/instructor) diff --git a/blog.md b/blog.md deleted file mode 100644 index d5c77b4..0000000 --- a/blog.md +++ /dev/null @@ -1,102 +0,0 @@ -# Seamless Integration with OpenAI and Pydantic: A Powerful Duo for Output Parsing - -Today, OpenAI introduced a Function Call API so we're going to dive into a much more structured and efficient way of handling output parsing when interacting with OpenAI. This method leverages the robustness of the Pydantic library in tandem with the recent improvements in OpenAI's API. - -Historically, dealing with output parsing, especially with JSON responses, has been fraught with complexities. Ensuring the extracted data adheres to a specific schema or matches certain function calls often involves writing intricate and cumbersome error-checking code. Add to this the vagaries of AI and you often end up reasking and hoping it does a better job. - -However, Pydantic, a Python library that provides data validation through Python type annotations, comes to the rescue! And when combined with OpenAI's new function call capabilities, it allows us to handle output parsing in a much more structured and reliable way with a much better developer experience. - -## The Power of Pydantic - -Pydantic is a Python library that brings type checking, validation, and error handling to the forefront. By making use of Python type annotations, Pydantic allows you to define data models, validate input data against these models, and receive detailed error messages when data fails validation. This ensures that your data adheres to the correct types, constraints, and formats you specify. - -But why Pydantic? Pydantic offers several key benefits: - -**Type checking:** Pydantic uses Python type annotations to ensure the data you work with adheres to the correct types. This means less time debugging type-related issues and more confidence in the integrity of your data. - -**Validation:** Pydantic allows you to apply additional validation rules to your data models. These could be simple constraints, like numerical ranges, or more complex custom validation functions. - -**Error handling:** When validation fails, Pydantic raises detailed exceptions. This gives you a clear understanding of what's gone wrong, making it easier to correct mistakes. - -**Ease of use:** Pydantic's data models are just Python classes. You define your data models with familiar Python type annotations, making Pydantic intuitive and easy to use. - -**Advanced Features:** Pydantic supports more advanced features like nested models, recursive models, and models with generics. This makes it a flexible and powerful tool for managing complex data. - -And when combined with the recent function call capabilities from OpenAI, it brings structured data handling to a whole new level! - -## Embracing OpenAI Function Calls - -The new function call capabilities introduced by OpenAI mark a significant shift in the way we interact with the OpenAI API. Instead of hoping that a chat message would parse correctly to JSON, we can now specify function calls and their expected inputs. This makes our conversation with the AI more structured and predictable. - -Here's where it gets even more interesting. By integrating Pydantic with OpenAI function calls, we can streamline the process of validating the output from OpenAI and handling it in our Python functions. This allows us to interact with the AI in a much more robust and efficient manner. - -Let's dive into how we can do this. - -## Part 1: Harnessing OpenAI Function Calls with Pydantic - -The crux of this approach lies in a simple decorator that handles the mapping between OpenAI function calls and Python functions. This decorator takes care of the input validation, the execution of the function, and the generation of the schema used for the OpenAI function call. Here's how it looks: - -```python -@openai_function -def sum(a:int, b:int) -> int: - """Sum description adds a + b""" - return a + b -``` - -In this example, we define a simple function that adds two numbers. We then decorate it with `@openai_function` which takes care of generating the schema for this function and validating the inputs and outputs. - -Once we've defined our function, we can interact with the OpenAI API as usual, using the function's schema to guide the conversation: - -```python -completion = openai.ChatCompletion.create( - model="gpt-3.5-turbo-0613", - temperature=0, - functions=[sum.openai_schema], - messages=[ - { - "role": "system", - "content": "You must use the `sum` function instead of adding yourself.", - }, - { - "role": "user", - "content": "What is 6+3 use the `sum` function", - }, - ], - ) - -result = sum.from_response(completion) -print(result) # 9 -``` - -Here, we use sum.openai_schema to provide the schema for our function call. This ensures that the AI understands what function to call and what parameters to pass. After the completion is returned, we use sum.from_response(completion) to extract the result from the completion, validate it against our Pydantic model, and return it. - -## Part 2: Leveraging OpenAISchema for Data Extraction - -Often, we are interested in parsing the output of an OpenAI conversation to extract specific data without making an actual function call. In these cases, we can make use of our OpenAISchema class to define a schema that matches the data we want to extract. Let's look at an example: - -```python -class UserDetails(OpenAISchema): - """User Details""" - name: str = Field(..., description="User's name") - age: int = Field(..., description="User's age") - -completion = openai.ChatCompletion.create( - model="gpt-3.5-turbo-0613", - functions=[UserDetails.openai_schema] - messages=[ - {"role": "system", "content": "I'm going to ask for user details. Use UserDetails to parse this data."}, - {"role": "user", "content": "My name is John Doe and I'm 30 years old."}, - ], -) - -user_details = UserDetails.from_response(completion) -print(user_details) # UserDetails(name="John Doe", age=30) -``` - -In this example, we define a Pydantic model that represents the data we want to extract. Then, we use UserDetails.from_response(completion) to extract and validate the data from the completion. - -## Light, Efficient, and Effective - -The key to this approach is its simplicity and efficiency. We make use of just a few lines of Python code to manage input validation, output parsing, and interaction with the OpenAI API. This code is so light that it's better to copy and paste it rather than installing a whole new package. - -This methodology cuts down on unnecessary abstraction, letting you stay in control and fully understand the interaction with the underlying API. It's an elegant and powerful solution for working with the OpenAI API in a structured and reliable way, proving you can have your cake and eat it too! diff --git a/docs/index.md b/docs/index.md index d73cae8..6242888 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,18 +4,12 @@ _Structured extraction in Python, powered by OpenAI's function calling api, desi --- -[Star us on Github!](https://jxnl.github.io/instructor). +[Star us on Github!](www.github.com/jxnl/instructor). [![Buy Me a Coffee](https://img.shields.io/badge/Buy%20Me%20a%20Coffee-Donate-yellow)](https://www.buymeacoffee.com/jxnlco) +[![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) [![Downloads](https://img.shields.io/pypi/dm/instructor.svg)](https://pypi.python.org/pypi/instructor) [![GitHub stars](https://img.shields.io/github/stars/jxnl/instructor.svg)](https://github.com/jxnl/instructor/stargazers) -[![Documentation](https://img.shields.io/badge/docs-available-brightgreen)](https://jxnl.github.io/instructor) -[![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) -[![GitHub issues](https://img.shields.io/github/issues/jxnl/instructor.svg)](https://github.com/jxnl/instructor/issues) -[![GitHub license](https://img.shields.io/github/license/jxnl/instructor.svg)](https://github.com/jxnl/instructor/blob/main/LICENSE) -[![Github discussions](https://img.shields.io/github/discussions/jxnl/instructor)](https:github.com/jxnl/instructor/discussions) -[![PyPI version](https://img.shields.io/pypi/v/instructor.svg)](https://pypi.python.org/pypi/instructor) -[![PyPI pyversions](https://img.shields.io/pypi/pyversions/instructor.svg)](https://pypi.python.org/pypi/instructor) Built to interact solely with openai's function calling api from python. It's designed to be intuitive, easy to use, and provide great visibility into your prompts. diff --git a/pyproject.toml b/pyproject.toml index b97ddec..c70fe8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "instructor" -version = "0.3.4" +version = "0.3.5" description = "Helper functions that allow us to improve openai's function_call ergonomics" authors = ["Jason Liu "] license = "MIT" From 5c7c22262c06126eaa33953951e303e20c9bc739 Mon Sep 17 00:00:00 2001 From: Somto Muotoe <34736820+smuotoe@users.noreply.github.com> Date: Wed, 15 Nov 2023 11:16:28 -0400 Subject: [PATCH 23/40] Fix syntax error in code example (#182) --- docs/examples/autodataframe.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/examples/autodataframe.md b/docs/examples/autodataframe.md index 1182fae..2607c75 100644 --- a/docs/examples/autodataframe.md +++ b/docs/examples/autodataframe.md @@ -81,7 +81,7 @@ def dataframe(data: str) -> Database: return client.chat.completions.create( model="gpt-4-0613", temperature=0.1, - response_model=Database + response_model=Database, messages=[ { "role": "system", From 887887a9d959f8c54d7a2663ff3efd26b614f3e8 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 10:16:47 -0500 Subject: [PATCH 24/40] Blog on learning some async options (#177) --- docs/blog/posts/learn-async.md | 174 +++++++++++++++++++++++++++++++++ examples/learn-async/run.py | 126 ++++++++++++++++++++++++ 2 files changed, 300 insertions(+) create mode 100644 docs/blog/posts/learn-async.md create mode 100644 examples/learn-async/run.py diff --git a/docs/blog/posts/learn-async.md b/docs/blog/posts/learn-async.md new file mode 100644 index 0000000..a656c63 --- /dev/null +++ b/docs/blog/posts/learn-async.md @@ -0,0 +1,174 @@ +--- +draft: False +date: 2023-11-13 +slug: learn-async +tags: + - python + - batch + - asyncio + - async + - async/await +authors: + - jxnl +--- + +# Understanding `asyncio` with `OpenAI` and `Instructor` + +Today, I will introduce you to various approaches for using asyncio in Python. We will apply this to batch process data using `instructor` and learn how to use `asyncio.gather` and `asyncio.as_completed` for concurrent data processing. Additionally, we will explore how to limit the number of concurrent requests to a server using `asyncio.Semaphore`. + +We will start by defining an `async` function that calls `openai` to extract data, and then examine four different ways to execute it. We will discuss the pros and cons of each approach and analyze the results of running them on a small batch. + +## Understanding `asyncio` + +`asyncio` is a Python library that enables writing concurrent code using the async/await syntax. It is particularly useful for IO-bound and structured network code. If you are familiar with OpenAI's SDK, you might have encountered two classes: `OpenAI()` and `AsyncOpenAI()`. Today, we will be using the `AsyncOpenAI()` class, which processes data asynchronously. + +By utilizing these tools in web applications or batch processing, we can significantly improve performance by handling multiple requests concurrently instead of sequentially. + +## Example: Batch Processing + +In this example, we will demonstrate how to use `asyncio` for batch processing tasks, specifically for extracting and processing data concurrently. The script will extract data from a list of texts and process it concurrently using `asyncio`. + +```python +import instructor +from pydantic import BaseModel +from openai import AsyncOpenAI + +# Enables `response_model` in `create` method +client = instructor.apatch(AsyncOpenAI()) # (1)! + +class Person(BaseModel): + name: str + age: int + + +async def extract_person(text: str) -> Person: + return await client.chat.completions.create( # (2)! + model="gpt-3.5-turbo", + messages=[ + {"role": "user", "content": text}, + ], + response_model=Person, + ) +``` + +1. We use `instructor.apatch` to patch the `create` method of `AsyncOpenAI` to accept a `response_model` argument. This is because the `create` method of `AsyncOpenAI` does not accept a `response_model` argument without this patch. +2. We use `await` here to wait for the response from the server before we return the result. This is because `create` returns a coroutine object, not the result of the coroutine. + +Notice that now there are `async` and `await` keywords in the function definition. This is because we're using the `asyncio` library to run the function concurrently. Now lets define a batch of texts to process. + +```python +dataset = [ + "My name is John and I am 20 years old", + "My name is Mary and I am 21 years old", + "My name is Bob and I am 22 years old", + "My name is Alice and I am 23 years old", + "My name is Jane and I am 24 years old", + "My name is Joe and I am 25 years old", + "My name is Jill and I am 26 years old", + ] +``` + +### **`for loop`**: Running tasks sequentially. + +```python hl_lines="3" +persons = [] +for text in dataset: + person = await extract_person(text) + persons.append(person) +``` + +Even though there is an `await` keyword, we still have to wait for each task to finish before starting the next one. This is because we're using a `for` loop to iterate over the dataset. This method, which uses a `for` loop, will be the slowest among the four methods discussed today. + +### **`asyncio.gather`**: Running tasks concurrently. + +```python hl_lines="3" +async def gather(): + tasks_get_persons = [extract_person(text) for text in dataset] + all_persons = await asyncio.gather(*tasks_get_persons) # (1)! +``` + +1. We use `await` here to wait for all the tasks to finish before assigning the result to `all_persons`. This is because `asyncio.gather` returns a coroutine object, not the result of the coroutine. Alternatively, we can use `asyncio.as_completed` to achieve the same result. + +Using `asyncio.gather` allows us to return all the results at once. It is an effective way to speed up our code, but it's not the only way. Particularly, if we have a large dataset, we might not want to wait for everything to finish before starting to process the results. This is where `asyncio.as_completed` comes into play. + +### **`asyncio.as_completed`**: Handling tasks as they complete. + +```python hl_lines="5 4" +async def as_completed(): + all_persons = [] + tasks_get_persons = [extract_person(text) for text in dataset] + for person in asyncio.as_completed(tasks_get_persons): + all_persons.append(await person) # (1)! +``` + +1. We use `await` here to wait for each task to complete before appending it to the list. This is because `as_completed` returns a coroutine object, not the result of the coroutine. Alternatively, we can use `asyncio.gather` to achieve the same result. + +This method is a great way to handle large datasets. We can start processing the results as they come in, especially if we are streaming data back to a client. + +However, these methods aim to complete as many tasks as possible as quickly as possible. This can be problematic if we want to be considerate to the server we're making requests to. This is where rate limiting comes into play. While there are libraries available to assist with rate limiting, for our initial defense, we will use a semaphore to limit the number of concurrent requests we make. + +!!! note "Ordering of results" + + Its important to note that the order of the results will not be the same as the order of the dataset. This is because the tasks are completed in the order they finish, not the order they were started. If you need to preserve the order of the results, you can use `asyncio.gather` instead. + +### **Rate-Limited Gather**: Using semaphores to limit concurrency. + +```python hl_lines="4 8 9" +sem = asyncio.Semaphore(2) + +async def rate_limited_extract_person(text: str, sem: Semaphore) -> Person: + async with sem: # (1)! + return await extract_person(text) + +async def rate_limited_gather(sem: Semaphore): + tasks_get_persons = [rate_limited_extract_person(text, sem) for text in dataset] + resp = await asyncio.gather(*tasks_get_persons) +``` + +1. We use a semaphore to limit the number of concurrent requests to 2. This approach strikes a balance between speed and being considerate to the server we're making requests to. + +### **Rate-Limited As Completed**: Using semaphores to limit concurrency. + +```python hl_lines="4 9 10" +sem = asyncio.Semaphore(2) + +async def rate_limited_extract_person(text: str, sem: Semaphore) -> Person: + async with sem: # (1)! + return await extract_person(text) + +async def rate_limited_as_completed(sem: Semaphore): + all_persons = [] + tasks_get_persons = [rate_limited_extract_person(text, sem) for text in dataset] + for person in asyncio.as_completed(tasks_get_persons): + all_persons.append(await person) # (2)! +``` + +1. We use a semaphore to limit the number of concurrent requests to 2. This approach strikes a balance between speed and being considerate to the server we're making requests to. + +2. We use `await` here to wait for each task to complete before appending it to the list. This is because `as_completed` returns a coroutine object, not the result of the coroutine. Alternatively, we can use `asyncio.gather` to achieve the same result. + +Now that we have seen the code, let's examine the results of processing 7 texts. As the prompts become longer or if we use GPT-4, the differences between these methods will become more pronounced. + +## Results + +As you can see, the `for` loop is the slowest, while `asyncio.as_completed` and `asyncio.gather` are the fastest without any rate limiting. + +| Method | Execution Time | Rate Limited (Semaphore) | +| -------------------- | -------------- | ------------------------ | +| For Loop | 6.17 seconds | | +| Asyncio.gather | 0.85 seconds | | +| Asyncio.as_completed | 0.95 seconds | | +| Asyncio.gather | 3.04 seconds | 2 | +| Asyncio.as_completed | 3.26 seconds | 2 | + +## Practical implications of batch processing + +The choice of approach depends on the task's nature and the desired balance between speed and resource utilization. + +Here are some guidelines to consider: + +- Use `asyncio.gather` for handling multiple independent tasks quickly. +- Apply `asyncio.as_completed` for large datasets to process tasks as they complete. +- Implement rate-limiting to avoid overwhelming servers or API endpoints. + +If you find the content helpful or want to try out `Instructor`, please visit our [GitHub](https://github.com/jxnl/instructor) page and give us a star! diff --git a/examples/learn-async/run.py b/examples/learn-async/run.py new file mode 100644 index 0000000..b722b7b --- /dev/null +++ b/examples/learn-async/run.py @@ -0,0 +1,126 @@ +import time +import asyncio + +import instructor +from pydantic import BaseModel +from openai import AsyncOpenAI + + +client = instructor.apatch(AsyncOpenAI()) + +class Timer: + def __init__(self, name): + self.name = name + self.start = None + self.end = None + + async def __aenter__(self): + self.start = time.time() + + async def __aexit__(self, *args, **kwargs): + self.end = time.time() + print(f"{self.name} took {(self.end - self.start):.2f} seconds") + + +class Person(BaseModel): + name: str + age: int + + +async def extract_person(text: str) -> Person: + return await client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "user", "content": text}, + ], + response_model=Person, + ) + + +async def main(): + """We'll use this to run the example. and time how long each one takes! + + 0. for loop + 1. asyncio.gather + 2. asyncio.as_completed + """ + dataset = [ + "My name is John and I am 20 years old", + "My name is Mary and I am 21 years old", + "My name is Bob and I am 22 years old", + "My name is Alice and I am 23 years old", + "My name is Jane and I am 24 years old", + "My name is Joe and I am 25 years old", + "My name is Jill and I am 26 years old", + ] + + """ + This is the simplest way to run multiple async functions in series. + It will wait for each function to complete before continuing. + """ + async with Timer("for loop"): + persons = [] + for text in dataset: + person = await extract_person(text) + persons.append(person) + print("for loop:", persons) + + """ + This is the simplest way to run multiple async functions in parallel. + It will wait for all of the functions to complete before continuing. + """ + async with Timer("asyncio.gather"): + tasks_get_persons = [extract_person(text) for text in dataset] + all_person = await asyncio.gather(*tasks_get_persons) + print("asyncio.gather:", all_person) + + """ + This is a bit more complicated, but it allows us to process each + person as soon as they are ready. This is useful if you have a + large dataset and want to start processing the results as soon + as they are ready. + """ + async with Timer("asyncio.as_completed"): + all_persons = [] + tasks_get_persons = [extract_person(text) for text in dataset] + for person in asyncio.as_completed(tasks_get_persons): + all_persons.append(await person) + print("asyncio.as_copmleted:", all_persons) + + """ + If we want to rate limit our requests, we can use the + semaphore to limit the number of concurrent requests. + """ + + # Create a semaphore that will only allow 2 concurrent requests + sem = asyncio.Semaphore(2) + + async def rate_limited_extract_person(text: str) -> Person: + async with sem: + return await extract_person(text) + + async with Timer("asyncio.gather (rate limited)"): + tasks_get_persons = [rate_limited_extract_person(text) for text in dataset] + resp = await asyncio.gather(*tasks_get_persons) + print("asyncio.gather (rate limited):", resp) + + async with Timer("asyncio.as_completed (rate limited)"): + all_persons = [] + tasks_get_persons = [rate_limited_extract_person(text) for text in dataset] + for person in asyncio.as_completed(tasks_get_persons): + all_persons.append(await person) + print("asyncio.as_completed (rate limited):", all_persons) + + + +if __name__ == "__main__": + asyncio.run(main()) + """ + for loop took 6.17 seconds + + asyncio.gather took 1.11 seconds + asyncio.as_completed took 0.87 seconds + + asyncio.gather (rate limited) took 3.04 seconds + asyncio.as_completed (rate limited) took 3.26 seconds + """ \ No newline at end of file From 570658559559b6bd1b395a6b407c22359aec8183 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 10:18:45 -0500 Subject: [PATCH 25/40] gigle --- docs/blog/posts/learn-async.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/blog/posts/learn-async.md b/docs/blog/posts/learn-async.md index a656c63..ea88891 100644 --- a/docs/blog/posts/learn-async.md +++ b/docs/blog/posts/learn-async.md @@ -12,7 +12,7 @@ authors: - jxnl --- -# Understanding `asyncio` with `OpenAI` and `Instructor` +# Introduction to Batch Processing using `asyncio` and `Instructor` Today, I will introduce you to various approaches for using asyncio in Python. We will apply this to batch process data using `instructor` and learn how to use `asyncio.gather` and `asyncio.as_completed` for concurrent data processing. Additionally, we will explore how to limit the number of concurrent requests to a server using `asyncio.Semaphore`. From 5f38300541ac5e4fcaaa230538d7a2b313fef77d Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 10:24:48 -0500 Subject: [PATCH 26/40] update --- docs/blog/posts/learn-async.md | 4 ++++ requirements-doc.txt | 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/blog/posts/learn-async.md b/docs/blog/posts/learn-async.md index ea88891..461a89f 100644 --- a/docs/blog/posts/learn-async.md +++ b/docs/blog/posts/learn-async.md @@ -16,6 +16,10 @@ authors: Today, I will introduce you to various approaches for using asyncio in Python. We will apply this to batch process data using `instructor` and learn how to use `asyncio.gather` and `asyncio.as_completed` for concurrent data processing. Additionally, we will explore how to limit the number of concurrent requests to a server using `asyncio.Semaphore`. +!!! notes "Github Example" + + If you want to run the code examples in this article, you can find them on [jxnl/instructor](https://github.com/jxnl/instructor/blob/main/examples/learn-async/run.py) + We will start by defining an `async` function that calls `openai` to extract data, and then examine four different ways to execute it. We will discuss the pros and cons of each approach and analyze the results of running them on a small batch. ## Understanding `asyncio` diff --git a/requirements-doc.txt b/requirements-doc.txt index 2e98b1b..b03ccb9 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -9,6 +9,4 @@ pytest aiohttp==3.8.2 yarl==1.8.1 frozenlist==1.3.1 -git+https://${GH_TOKEN}@github.com/squidfunk/mkdocs-material-insiders.git -mkdocs-minify-plugin -mike \ No newline at end of file +mkdocs-minify-plugin \ No newline at end of file From 3cd35ddd998cd570a4637143e222d8664059408e Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 10:31:31 -0500 Subject: [PATCH 27/40] add a tab --- docs/blog/posts/learn-async.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/blog/posts/learn-async.md b/docs/blog/posts/learn-async.md index 461a89f..b12612d 100644 --- a/docs/blog/posts/learn-async.md +++ b/docs/blog/posts/learn-async.md @@ -153,6 +153,13 @@ async def rate_limited_as_completed(sem: Semaphore): Now that we have seen the code, let's examine the results of processing 7 texts. As the prompts become longer or if we use GPT-4, the differences between these methods will become more pronounced. +!!! note "Other Options" + + Its important to also note that here we are using a `semaphore` to limit the number of concurrent requests. However, there are other ways to limit concurrency esp since we have rate limit information from the `openai` request. You can imagine using a library like `ratelimit` to limit the number of requests per second. OR catching rate limit exceptions and using `tenacity` to retry the request after a certain amount of time. + + - [tenacity](https://pypi.org/project/tenacity/) + - [aiolimiter](https://pypi.org/project/aiolimiter/) + ## Results As you can see, the `for` loop is the slowest, while `asyncio.as_completed` and `asyncio.gather` are the fastest without any rate limiting. From 0e3057294f7f46f14dd654fd0e02c7f89e5aca58 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 10:37:25 -0500 Subject: [PATCH 28/40] add more sections --- docs/blog/posts/learn-async.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/blog/posts/learn-async.md b/docs/blog/posts/learn-async.md index b12612d..1c97390 100644 --- a/docs/blog/posts/learn-async.md +++ b/docs/blog/posts/learn-async.md @@ -28,6 +28,16 @@ We will start by defining an `async` function that calls `openai` to extract dat By utilizing these tools in web applications or batch processing, we can significantly improve performance by handling multiple requests concurrently instead of sequentially. +### Understanding `async` and `await` + +We will be using the `async` and `await` keywords to define asynchronous functions. The `async` keyword is used to define a function that returns a coroutine object. The `await` keyword is used to wait for the result of a coroutine object. + +If you want to understand the deeper details of `asyncio`, I recommend reading [this article](https://realpython.com/async-io-python/) by Real Python. + +### Understanding `gather` vs `as_completed` + +In this post we'll show two ways to run tasks concurrently: `asyncio.gather` and `asyncio.as_completed`. The `gather` method is used to run multiple tasks concurrently and return the results as a list. The `as_completed` method is used to run multiple tasks concurrently and return the results as they complete. + ## Example: Batch Processing In this example, we will demonstrate how to use `asyncio` for batch processing tasks, specifically for extracting and processing data concurrently. The script will extract data from a list of texts and process it concurrently using `asyncio`. From 059f84b2d883f3cf183e78e063f13e7203e77b35 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 10:43:27 -0500 Subject: [PATCH 29/40] add more links --- docs/blog/posts/learn-async.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/blog/posts/learn-async.md b/docs/blog/posts/learn-async.md index 1c97390..1a35050 100644 --- a/docs/blog/posts/learn-async.md +++ b/docs/blog/posts/learn-async.md @@ -36,7 +36,7 @@ If you want to understand the deeper details of `asyncio`, I recommend reading [ ### Understanding `gather` vs `as_completed` -In this post we'll show two ways to run tasks concurrently: `asyncio.gather` and `asyncio.as_completed`. The `gather` method is used to run multiple tasks concurrently and return the results as a list. The `as_completed` method is used to run multiple tasks concurrently and return the results as they complete. +In this post we'll show two ways to run tasks concurrently: `asyncio.gather` and `asyncio.as_completed`. The `gather` method is used to run multiple tasks concurrently and return the results as a `list`. The `as_completed` returns a `iterable` is used to run multiple tasks concurrently and return the results as they complete. Another great resource on the differences between the two can be found [here](https://medium.com/dev-bits/a-minimalistic-guide-for-understanding-asyncio-in-python-52c436c244ea). ## Example: Batch Processing From 68f6c5147ebbcbd58a4208811ecaa314bd16cd87 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 11:46:43 -0500 Subject: [PATCH 30/40] bump readme --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index dd431ce..7c116a7 100644 --- a/README.md +++ b/README.md @@ -224,3 +224,18 @@ assert model.name == "JASON" ## License This project is licensed under the terms of the MIT License. + +# Contributors + + + + + + + + + + + + + From 97a830427e425bc344d7b06de92c4693ff8ebba0 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 17:32:28 -0500 Subject: [PATCH 31/40] improve readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 7c116a7..62b2e2f 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,10 @@ To get started you need to install it using `pip`. Run the following command in $ pip install instructor ``` +## Contributing + +If you want to help out checkout some of the issues marked as `good-first-issue` or `help-wanted`. Found [here](https://github.com/jxnl/instructor/labels/good%20first%20issue). They could be anything from code improvements, a guest blog post, or a new cook book. + ## Quick Start To simplify your work with OpenAI we offer a patching mechanism for the `ChatCompletion` class. From 6720a3b7dd1f1647f1276faed0ec5ae7ee438154 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 19:14:48 -0500 Subject: [PATCH 32/40] improve documentation Readme (#186) --- README.md | 61 +++++++++----------- docs/index.md | 150 +++++++++++++++++++++++++------------------------- 2 files changed, 102 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index 62b2e2f..bbb549a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Getting Started with Instructor +# Welcome to Instructor - Your Gateway to Structured Outputs with OpenAI _Structured extraction in Python, powered by OpenAI's function calling api, designed for simplicity, transparency, and control._ @@ -6,18 +6,32 @@ _Structured extraction in Python, powered by OpenAI's function calling api, desi [Star us on Github!](www.github.com/jxnl/instructor). -[![Buy Me a Coffee](https://img.shields.io/badge/Buy%20Me%20a%20Coffee-Donate-yellow)](https://www.buymeacoffee.com/jxnlco) [![Downloads](https://img.shields.io/pypi/dm/instructor.svg)](https://pypi.python.org/pypi/instructor) [![GitHub stars](https://img.shields.io/github/stars/jxnl/instructor.svg)](https://github.com/jxnl/instructor/stargazers) [![Documentation](https://img.shields.io/badge/docs-available-brightgreen)](https://jxnl.github.io/instructor) -[![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) [![GitHub issues](https://img.shields.io/github/issues/jxnl/instructor.svg)](https://github.com/jxnl/instructor/issues) -[![GitHub license](https://img.shields.io/github/license/jxnl/instructor.svg)](https://github.com/jxnl/instructor/blob/main/LICENSE) -[![Github discussions](https://img.shields.io/github/discussions/jxnl/instructor)](https:github.com/jxnl/instructor/discussions) -[![PyPI version](https://img.shields.io/pypi/v/instructor.svg)](https://pypi.python.org/pypi/instructor) -[![PyPI pyversions](https://img.shields.io/pypi/pyversions/instructor.svg)](https://pypi.python.org/pypi/instructor) +[![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) -Built to interact solely with openai's function calling api from python. It's designed to be intuitive, easy to use, and provide great visibility into your prompts. +Dive into the world of Python-based structured extraction, empowered by OpenAI's cutting-edge function calling API. Instructor stands out for its simplicity, transparency, and user-centric design. Whether you're a seasoned developer or just starting out, you'll find Instructor's approach intuitive and its results insightful. + +## Get Started in Moments + +Installing Instructor is a breeze. Just run `pip install instructor` in your terminal and you're on your way to a smoother data handling experience. + +## How Instructor Enhances Your Workflow + +Our `instructor.patch` for the `OpenAI` class introduces three key enhancements: + +- **Response Mode:** Specify a Pydantic model to streamline data extraction. +- **Max Retries:** Set your desired number of retry attempts for requests. +- **Validation Context:** Provide a context object for enhanced validator access. + A Glimpse into Instructor's Capabilities + +!!! note "Using Validators" + + Learn more about validators checkout our blog post [Good llm validation is just good validation](https://jxnl.github.io/instructor/blog/2023/10/23/good-llm-validation-is-just-good-validation/) + +With Instructor, your code becomes more efficient and readable. Here’s a quick peek: ## Usage @@ -87,31 +101,6 @@ model = await aclient.chat.completions.create( assert isinstance(model, UserExtract) ``` -## Installation - -To get started you need to install it using `pip`. Run the following command in your terminal: - -```sh -$ pip install instructor -``` - -## Contributing - -If you want to help out checkout some of the issues marked as `good-first-issue` or `help-wanted`. Found [here](https://github.com/jxnl/instructor/labels/good%20first%20issue). They could be anything from code improvements, a guest blog post, or a new cook book. - -## Quick Start - -To simplify your work with OpenAI we offer a patching mechanism for the `ChatCompletion` class. -The patch introduces 3 features to the `ChatCompletion` class: - -1. The `response_model` parameter, which allows you to specify a Pydantic model to extract data into. -2. The `max_retries` parameter, which allows you to specify the number of times to retry the request if it fails. -3. The `validation_context` parameter, which allows you to specify a context object that validators have access to. - -!!! note "Using Validators" - - Learn more about validators checkout our blog post [Good llm validation is just good validation](https://jxnl.github.io/instructor/blog/2023/10/23/good-llm-validation-is-just-good-validation/) - ### Step 1: Patch the client First, import the required libraries and apply the patch function to the OpenAI module. This exposes new functionality with the response_model parameter. @@ -194,9 +183,9 @@ answer Here, the `UserDetails` model is passed as the `response_model`, and `max_retries` is set to 2. ```python -from openai import OpenAI import instructor +from openai import OpenAI from pydantic import BaseModel, field_validator # Apply the patch to the OpenAI client @@ -225,6 +214,10 @@ model = client.chat.completions.create( assert model.name == "JASON" ``` +## Contributing + +If you want to help out checkout some of the issues marked as `good-first-issue` or `help-wanted`. Found [here](https://github.com/jxnl/instructor/labels/good%20first%20issue). They could be anything from code improvements, a guest blog post, or a new cook book. + ## License This project is licensed under the terms of the MIT License. diff --git a/docs/index.md b/docs/index.md index 6242888..bbb549a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -# Getting Started with Instructor +# Welcome to Instructor - Your Gateway to Structured Outputs with OpenAI _Structured extraction in Python, powered by OpenAI's function calling api, designed for simplicity, transparency, and control._ @@ -6,12 +6,32 @@ _Structured extraction in Python, powered by OpenAI's function calling api, desi [Star us on Github!](www.github.com/jxnl/instructor). -[![Buy Me a Coffee](https://img.shields.io/badge/Buy%20Me%20a%20Coffee-Donate-yellow)](https://www.buymeacoffee.com/jxnlco) -[![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) [![Downloads](https://img.shields.io/pypi/dm/instructor.svg)](https://pypi.python.org/pypi/instructor) [![GitHub stars](https://img.shields.io/github/stars/jxnl/instructor.svg)](https://github.com/jxnl/instructor/stargazers) +[![Documentation](https://img.shields.io/badge/docs-available-brightgreen)](https://jxnl.github.io/instructor) +[![GitHub issues](https://img.shields.io/github/issues/jxnl/instructor.svg)](https://github.com/jxnl/instructor/issues) +[![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) -Built to interact solely with openai's function calling api from python. It's designed to be intuitive, easy to use, and provide great visibility into your prompts. +Dive into the world of Python-based structured extraction, empowered by OpenAI's cutting-edge function calling API. Instructor stands out for its simplicity, transparency, and user-centric design. Whether you're a seasoned developer or just starting out, you'll find Instructor's approach intuitive and its results insightful. + +## Get Started in Moments + +Installing Instructor is a breeze. Just run `pip install instructor` in your terminal and you're on your way to a smoother data handling experience. + +## How Instructor Enhances Your Workflow + +Our `instructor.patch` for the `OpenAI` class introduces three key enhancements: + +- **Response Mode:** Specify a Pydantic model to streamline data extraction. +- **Max Retries:** Set your desired number of retry attempts for requests. +- **Validation Context:** Provide a context object for enhanced validator access. + A Glimpse into Instructor's Capabilities + +!!! note "Using Validators" + + Learn more about validators checkout our blog post [Good llm validation is just good validation](https://jxnl.github.io/instructor/blog/2023/10/23/good-llm-validation-is-just-good-validation/) + +With Instructor, your code becomes more efficient and readable. Here’s a quick peek: ## Usage @@ -39,68 +59,47 @@ assert user.name == "Jason" assert user.age == 25 ``` -!!! warning "Using `openai<1.0.0`" +**"Using `openai<1.0.0`"** - If you're using `openai<1.0.0` then make sure you `pip install instructor<0.3.0` - where you can patch a global client like so: +If you're using `openai<1.0.0` then make sure you `pip install instructor<0.3.0` +where you can patch a global client like so: - ```python hl_lines="4 8" - import openai - import instructor +```python hl_lines="4 8" +import openai +import instructor - instructor.patch() +instructor.patch() - user = openai.ChatCompletion.create( - ..., - response_model=UserDetail, - ) - ``` - -!!! note "Using async clients" - - For async clients you must use apatch vs patch like so: - - ```py - import instructor - from openai import AsyncOpenAI - - aclient = instructor.apatch(AsyncOpenAI()) - - class UserExtract(BaseModel): - name: str - age: int - - model = await aclient.chat.completions.create( - model="gpt-3.5-turbo", - response_model=UserExtract, - messages=[ - {"role": "user", "content": "Extract jason is 25 years old"}, - ], - ) - - assert isinstance(model, UserExtract) - ``` - -## Installation - -To get started you need to install it using `pip`. Run the following command in your terminal: - -```sh -$ pip install instructor +user = openai.ChatCompletion.create( + ..., + response_model=UserDetail, +) ``` -## Quick Start +**"Using async clients"** -To simplify your work with OpenAI we offer a patching mechanism for the `ChatCompletion` class. -The patch introduces 3 features to the `ChatCompletion` class: +For async clients you must use apatch vs patch like so: -1. The `response_model` parameter, which allows you to specify a Pydantic model to extract data into. -2. The `max_retries` parameter, which allows you to specify the number of times to retry the request if it fails. -3. The `validation_context` parameter, which allows you to specify a context object that validators have access to. +```py +import instructor +from openai import AsyncOpenAI -!!! note "Using Validators" +aclient = instructor.apatch(AsyncOpenAI()) - Learn more about validators checkout our blog post [Good llm validation is just good validation](https://jxnl.github.io/instructor/blog/2023/10/23/good-llm-validation-is-just-good-validation/) +class UserExtract(BaseModel): + name: str + age: int + +model = await aclient.chat.completions.create( + model="gpt-3.5-turbo", + response_model=UserExtract, + messages=[ + {"role": "user", "content": "Extract jason is 25 years old"}, + ], +) + +assert isinstance(model, UserExtract) +``` ### Step 1: Patch the client @@ -146,24 +145,6 @@ assert user.name == "Jason" assert user.age == 25 ``` -!!! note "Accessing the original response" - - If you want to access anything like usage or other metadata, the original response is available on the `Model._raw_response` attribute. - - ```python - user: UserDetail = client.chat.completions.create( - model="gpt-3.5-turbo", - response_model=UserDetail, - messages=[ - {"role": "user", "content": "Extract Jason is 25 years old"}, - ] - ) - - from openai.types.chat.chat_completion import ChatCompletion - - assert isinstance(user._raw_response, ChatCompletion) - ``` - ## Pydantic Validation Validation can also be plugged into the same Pydantic model. Here, if the answer attribute contains content that violates the rule "don't say objectionable things," Pydantic will raise a validation error. @@ -202,9 +183,9 @@ answer Here, the `UserDetails` model is passed as the `response_model`, and `max_retries` is set to 2. ```python -from openai import OpenAI import instructor +from openai import OpenAI from pydantic import BaseModel, field_validator # Apply the patch to the OpenAI client @@ -233,6 +214,25 @@ model = client.chat.completions.create( assert model.name == "JASON" ``` +## Contributing + +If you want to help out checkout some of the issues marked as `good-first-issue` or `help-wanted`. Found [here](https://github.com/jxnl/instructor/labels/good%20first%20issue). They could be anything from code improvements, a guest blog post, or a new cook book. + ## License This project is licensed under the terms of the MIT License. + +# Contributors + + + + + + + + + + + + + From 03363f152e132ea7f9f38b4b462fee9135e864a6 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 15 Nov 2023 19:54:12 -0500 Subject: [PATCH 33/40] clean up blog index --- docs/blog/index.md | 18 ++++++++++++++++++ docs/blog/posts/chain-of-density.md | 2 +- mkdocs.yml | 8 ++++++++ requirements-doc.txt | 3 ++- 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/docs/blog/index.md b/docs/blog/index.md index e69de29..5058819 100644 --- a/docs/blog/index.md +++ b/docs/blog/index.md @@ -0,0 +1,18 @@ +# Welcome to the Instructor Blog + +The goal of the blog is to capture some content that does not neatly fit within documentation or the cookbooks. + +## Advanced Topics + +- [Query Understanding and Expansion for RAG](posts/rag-and-beyond.md) +- [GPT-4 Level summarization with GPT3.5 Finetuning](posts/chain-of-density.md) +- [Deepdive on LLM Guardrails / Validation](posts/validation-part1.md) +- [A Guide to Fine-Tuning and Distillation](posts/distilation-part1.md) + +## Learning Python + +- [Understanding Batch Processing with async](posts/learn-async.md) + +## Talks + +- [AI Engineering Summit 2023](posts/aisummit-2023.md) diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index 864c760..bcb406d 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -470,7 +470,7 @@ instructor jobs create-from-file generated.jsonl ??? notes "Finetuning Reference" - Checking out our [Finetuning CLI](/instructor/cli/finetune/) to learn about other hyperparameters that you can tune to improve your model's performance. + Checking out our [Finetuning CLI](cli/finetune/) to learn about other hyperparameters that you can tune to improve your model's performance. Once the job is complete, all we need to do is to then change the annotation in the function call to `distil_summarization` in our original file above to start using our new model. diff --git a/mkdocs.yml b/mkdocs.yml index 40ae12e..bfaa2ec 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -177,6 +177,14 @@ plugins: post_date_format: yyyy/MM/dd post_url_format: "{date}/{slug}" authors_file: "{blog}/.authors.yml" + - rss: + match_path: blog/posts/.* + date_from_meta: + as_creation: date + categories: + - categories + - tags + enabled: !ENV [CI, false] extra: analytics: provider: google diff --git a/requirements-doc.txt b/requirements-doc.txt index b03ccb9..dc65fbe 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -9,4 +9,5 @@ pytest aiohttp==3.8.2 yarl==1.8.1 frozenlist==1.3.1 -mkdocs-minify-plugin \ No newline at end of file +mkdocs-minify-plugin +mkdocs-rss-plugin \ No newline at end of file From fc3e609de0bdc0c8cbec5f9302d576125aa452de Mon Sep 17 00:00:00 2001 From: A <31348710+daaniyaan@users.noreply.github.com> Date: Thu, 16 Nov 2023 05:54:18 +0330 Subject: [PATCH 34/40] Correct typo (#170) Co-authored-by: Jason Liu --- docs/blog/posts/chain-of-density.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index bcb406d..8e3e5f7 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -8,7 +8,7 @@ tags: - chain of density - finetuneing - gpt-3.5-turbo - - distilation + - distillation authors: - ivanleomk - jxnl @@ -540,4 +540,4 @@ Interestingly, the model finetuned with the least examples seems to outperform t Finetuning this iterative method was 20-40x faster while improving overall performance, resulting in massive efficiency gains by finetuning and distilling capabilities into specialized models. -We've seen how `Instructor` can make your life easier, from data modeling to distilation and finetuning. If you enjoy the content or want to try out `instructor` check out the [github](https://github.com/jxnl/instructor) and don't forget to give us a star! +We've seen how `Instructor` can make your life easier, from data modeling to distillation and finetuning. If you enjoy the content or want to try out `instructor` check out the [github](https://github.com/jxnl/instructor) and don't forget to give us a star! From 4054720004fa533b1e5531bbbb26ba0a23d9ea49 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Fri, 17 Nov 2023 14:09:59 -0500 Subject: [PATCH 35/40] add pydantic --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bbb549a..f6a2403 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ _Structured extraction in Python, powered by OpenAI's function calling api, desi [Star us on Github!](www.github.com/jxnl/instructor). +[![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev) [![Downloads](https://img.shields.io/pypi/dm/instructor.svg)](https://pypi.python.org/pypi/instructor) [![GitHub stars](https://img.shields.io/github/stars/jxnl/instructor.svg)](https://github.com/jxnl/instructor/stargazers) [![Documentation](https://img.shields.io/badge/docs-available-brightgreen)](https://jxnl.github.io/instructor) -[![GitHub issues](https://img.shields.io/github/issues/jxnl/instructor.svg)](https://github.com/jxnl/instructor/issues) [![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) Dive into the world of Python-based structured extraction, empowered by OpenAI's cutting-edge function calling API. Instructor stands out for its simplicity, transparency, and user-centric design. Whether you're a seasoned developer or just starting out, you'll find Instructor's approach intuitive and its results insightful. From b6c1bb05d5cad9b9489f0f9e3d0f02e18a8027f5 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Fri, 17 Nov 2023 14:39:47 -0500 Subject: [PATCH 36/40] clean up maybe --- docs/examples/maybe.md | 89 +++++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 27 deletions(-) diff --git a/docs/examples/maybe.md b/docs/examples/maybe.md index 2fbb7bf..183becf 100644 --- a/docs/examples/maybe.md +++ b/docs/examples/maybe.md @@ -1,12 +1,16 @@ -# Error Handling Using Maybe Pattern +# Handling Missing Data with `Maybe` -## Introduction +In this post, we will demonstrate how to use the `Maybe` pattern to manage missing data and employ pattern matching to handle errors in a structured manner. -The `Maybe` pattern is a functional programming concept used for error handling. Instead of raising exceptions or returning `None`, you can use a `Maybe` type to encapsulate both the result and possible errors. +## What is `Maybe`? -## Define Models with Pydantic +The `Maybe` pattern is a concept in functional programming used for error handling. Instead of raising exceptions or returning `None`, you can use a `Maybe` type to encapsulate both the result and potential errors. This pattern is particularly useful when making OpenAI API calls, as providing language models with an escape mechanism effectively reduces hallucinations. Consequently, we can construct a prompt that closely resembles regular programming. -Using Pydantic, define the `UserDetail` and `MaybeUser` classes. +Towards the end, we will demonstrate how to use `Maybe` instances in pattern matching, which offers an excellent approach for handling errors in a structured manner. + +## Defining the Model + +Using Pydantic, we'll first define the `UserDetail` and `MaybeUser` classes. ```python from pydantic import BaseModel, Field, Optional @@ -25,30 +29,32 @@ class MaybeUser(BaseModel): return self.result is not None ``` -## Implementing `Maybe` Pattern with `instructor` +Notice that `MaybeUser` has a `result` field that is an optional `UserDetail` instance where the extracted data will be stored. The `error` field is a boolean that indicates whether an error occurred, and the `message` field is an optional string that contains the error message. -You can use `instructor` to generalize the `Maybe` pattern. +## Defining the function + +Once we have the model defined, we can create a function that uses the `Maybe` pattern to extract the data. ```python -import instructor - -MaybeUser = instructor.Maybe(UserDetail) -``` - -## Function Example: `get_user_detail` - -Here's a function example that returns a `MaybeUser` instance. The function simulates an API call to get user details. - -```python -from typing import Optional import random +import instructor +from openai import OpenAI +from typing import Optional -def get_user_detail(string: str) -> MaybeUser: - ... - return +# This enables the `response_model` keyword +client = instructor.patch(OpenAI()) -# Example usage -user1 = get_user_detail("Jason is a 25 years old scientist") +def extract(content: str) -> MaybeUser: + return openai.chat.completions.create( + model="gpt-3.5-turbo", + response_model=MaybeUser, + messages=[ + {"role": "user", "content": f"Extract `{content}`"}, + ], + ) + +user1 = extract("Jason is a 25-year-old scientist") +# output: { "result": { "age": 25, @@ -59,8 +65,8 @@ user1 = get_user_detail("Jason is a 25 years old scientist") "message": null } - -user2 = get_user_detail("Unknown user") +user2 = extract("Unknown user") +# output: { "result": null, "error": true, @@ -68,6 +74,35 @@ user2 = get_user_detail("Unknown user") } ``` -## Conclusion +As you can see, when the data is extracted successfully, the `result` field contains the `UserDetail` instance. When an error occurs, the `error` field is set to `True`, and the `message` field contains the error message. -The `Maybe` pattern enables a more structured approach to error handling. This example illustrated its implementation using Python and Pydantic. \ No newline at end of file +## Handle the result + +There are a few ways we can handle the result. Normally, we can just access the individual fields. + +```python +def process_user_detail(maybe_user: MaybeUser): + if not maybe_user.error: + user = maybe_user.result + print(f"User {user.name} is {user.age} years old") + else: + print(f"Not found: {user1.message}") +``` + +## Pattern Matching + +We can also use pattern matching to handle the result. This is a great way to handle errors in a structured way. + +```python +def process_user_detail(maybe_user: MaybeUser): + match maybe_user: + case MaybeUser(error=True, message=msg): + print(f"Error: {msg}") + case MaybeUser(result=user_detail) if user_detail: + assert isinstance(user_detail, UserDetail) + print(f"User {user_detail.name} is {user_detail.age} years old") + case _: + print("Unknown error") +``` + +If you want to learn more about pattern matching, check out Pydantic's docs on [Structural Pattern Matching](https://docs.pydantic.dev/latest/concepts/models/#structural-pattern-matching) From ab6f9996197989c5a5e7a2c795aac7a84db9c8d4 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Fri, 17 Nov 2023 16:14:06 -0500 Subject: [PATCH 37/40] remove group --- mkdocs.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index bfaa2ec..ee828f1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -164,11 +164,6 @@ plugins: members_order: alphabetical allow_inspection: true show_bases: true - - group: - enabled: !ENV CI - plugins: - - optimize - - minify - blog: enabled: !ENV CI blog_dir: "blog" From 556d36a68c180c06d1083e8d76e1efa78b1f28a5 Mon Sep 17 00:00:00 2001 From: Matthew Date: Sat, 18 Nov 2023 14:32:42 -0500 Subject: [PATCH 38/40] Creative acts in documentation (#188) --- docs/index.md | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/docs/index.md b/docs/index.md index bbb549a..b846679 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,10 +4,8 @@ _Structured extraction in Python, powered by OpenAI's function calling api, desi --- -[Star us on Github!](www.github.com/jxnl/instructor). - [![Downloads](https://img.shields.io/pypi/dm/instructor.svg)](https://pypi.python.org/pypi/instructor) -[![GitHub stars](https://img.shields.io/github/stars/jxnl/instructor.svg)](https://github.com/jxnl/instructor/stargazers) +![Star us on Github!](https://img.shields.io/github/stars/jxnl/instructor.svg?style=social) [![Documentation](https://img.shields.io/badge/docs-available-brightgreen)](https://jxnl.github.io/instructor) [![GitHub issues](https://img.shields.io/github/issues/jxnl/instructor.svg)](https://github.com/jxnl/instructor/issues) [![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) @@ -36,10 +34,12 @@ With Instructor, your code becomes more efficient and readable. Here’s a quick ## Usage ```py hl_lines="5 13" -from openai import OpenAI import instructor +from openai import OpenAI +from pydantic import BaseModel -# Enables `response_model` +# This enables response_model keyword +# from client.chat.completions.create client = instructor.patch(OpenAI()) class UserDetail(BaseModel): @@ -103,7 +103,7 @@ assert isinstance(model, UserExtract) ### Step 1: Patch the client -First, import the required libraries and apply the patch function to the OpenAI module. This exposes new functionality with the response_model parameter. +First, import the required libraries and apply the patch function to the OpenAI module. This allows us to parse the raw JSON from our OpenAI completions into Pydantic output. ```python import instructor @@ -117,11 +117,9 @@ client = instructor.patch(OpenAI()) ### Step 2: Define the Pydantic Model -Create a Pydantic model to define the structure of the data you want to extract. This model will map directly to the information in the prompt. +Create a Pydantic model to define the structure of the data extracted from the OpenAI response. This model will map directly to the information in the prompt. ```python -from pydantic import BaseModel - class UserDetail(BaseModel): name: str age: int @@ -129,8 +127,7 @@ class UserDetail(BaseModel): ### Step 3: Extract -Use the `client.chat.completions.create` method to send a prompt and extract the data into the Pydantic object. The response_model parameter specifies the Pydantic model to use for extraction. Its helpful to annotate the variable with the type of the response model. -which will help your IDE provide autocomplete and spell check. +Use the `client.chat.completions.create` method to generate a completion and extract response data into the Pydantic object. The response_model parameter enables autocomplete and spell check in your IDE. ```python user: UserDetail = client.chat.completions.create( @@ -170,7 +167,7 @@ except ValidationError as e: print(e) ``` -Its important to not here that the error message is generated by the LLM, not the code, so it'll be helpful for re asking the model. +Note, the error message is generated by the LLM, not the code, so it'll be helpful for re asking the model. ```plaintext 1 validation error for QuestionAnswer From 1aca756eb0f5d579396c4f6443a656c77e88ad8f Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Sat, 18 Nov 2023 15:32:13 -0500 Subject: [PATCH 39/40] Improve Documentation (#189) --- docs/blog/posts/chain-of-density.md | 4 +- docs/{ => concepts}/distillation.md | 0 docs/{ => concepts}/philosophy.md | 0 docs/{tips/index.md => concepts/prompting.md} | 7 +- docs/{ => concepts}/reask_validation.md | 30 +-- docs/contributing.md | 34 ++++ docs/help.md | 27 +++ docs/index.md | 171 ++---------------- docs/installation.md | 14 ++ docs/why.md | 151 ++++++++++++++++ mkdocs.yml | 15 +- requirements.txt | 1 - 12 files changed, 264 insertions(+), 190 deletions(-) rename docs/{ => concepts}/distillation.md (100%) rename docs/{ => concepts}/philosophy.md (100%) rename docs/{tips/index.md => concepts/prompting.md} (98%) rename docs/{ => concepts}/reask_validation.md (75%) create mode 100644 docs/contributing.md create mode 100644 docs/help.md create mode 100644 docs/installation.md create mode 100644 docs/why.md diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md index 8e3e5f7..ab87c15 100644 --- a/docs/blog/posts/chain-of-density.md +++ b/docs/blog/posts/chain-of-density.md @@ -212,7 +212,7 @@ class RewrittenSummary(BaseModel): For a more in-depth walkthrough on how to use `Pydantic` validators with the `Instructor` library, we recommend checking out our previous article on LLM - validation - [Good LLM Validation is just Good Validation](/instructor/blog/2023/10/23/good-llm-validation-is-just-good-validation/) + validation - [Good LLM Validation is just Good Validation](../posts/validation-part1.md) Ideally, we'd like for `Missing` to have a length between 1 and 3, `Absent` to be an empty list and for our rewritten summaries to keep a minimum entity density. With `Instructor`, we can implement this logic using native `Pydantic` validators that are simply declared as part of the class itself. @@ -470,7 +470,7 @@ instructor jobs create-from-file generated.jsonl ??? notes "Finetuning Reference" - Checking out our [Finetuning CLI](cli/finetune/) to learn about other hyperparameters that you can tune to improve your model's performance. + Checking out our [Finetuning CLI](../../cli/finetune.md) to learn about other hyperparameters that you can tune to improve your model's performance. Once the job is complete, all we need to do is to then change the annotation in the function call to `distil_summarization` in our original file above to start using our new model. diff --git a/docs/distillation.md b/docs/concepts/distillation.md similarity index 100% rename from docs/distillation.md rename to docs/concepts/distillation.md diff --git a/docs/philosophy.md b/docs/concepts/philosophy.md similarity index 100% rename from docs/philosophy.md rename to docs/concepts/philosophy.md diff --git a/docs/tips/index.md b/docs/concepts/prompting.md similarity index 98% rename from docs/tips/index.md rename to docs/concepts/prompting.md index 97cdbd6..1b61275 100644 --- a/docs/tips/index.md +++ b/docs/concepts/prompting.md @@ -1,6 +1,4 @@ -# Prompt Engineering for Function Calling - -The overarching theme of using instructor and pydantic for function calling is to make the models as self-descriptive, modular, and flexible as possible, while maintaining data integrity and ease of use. +The overarching theme of using Instructor and Pydantic for function calling is to make the models as self-descriptive, modular, and flexible as possible, while maintaining data integrity and ease of use. - **Modularity**: Design self-contained components for reuse. - **Self-Description**: Use Pydantic's `Field` for clear field descriptions. @@ -39,7 +37,6 @@ class UserDetail(BaseModel): age: int name: str role: Optional[str] = Field(default=None) - ``` ## Handling Errors Within Function Calls @@ -121,7 +118,6 @@ class UserDetail(BaseModel): age: int name: str role: Role - ``` ## Handle Arbitrary Properties @@ -139,7 +135,6 @@ class UserDetail(BaseModel): age: int name: str properties: List[Property] = Field(..., description="Extract any other properties that might be relevant.") - ``` ## Limiting the Length of Lists diff --git a/docs/reask_validation.md b/docs/concepts/reask_validation.md similarity index 75% rename from docs/reask_validation.md rename to docs/concepts/reask_validation.md index c2368b7..67f0018 100644 --- a/docs/reask_validation.md +++ b/docs/concepts/reask_validation.md @@ -1,30 +1,18 @@ -# Validation and Reask with LLMs and Pydantic +# Validation and Reasking -Instead of framing "self-critique" or "self-reflection" in AI as new concepts, we can view them as validation errors with clear error messages that the systen can use to self heal. +Instead of framing "self-critique" or "self-reflection" in AI as new concepts, we can view them as validation errors with clear error messages that the systen can use to self correct. -## Pythonic Validation with Pydantic and Instructor +## Pydantic -1. **Uniform Validation API**: Pydantic provides identical developer experience, whether using code-based or LLM-based validation. -2. **Reasking Mechanism**: Pydantic accumulates validation errors for a one-step reasking process. -3. **Prompt Chaining via Error Messages**: Instructor utilizes validation error messages to refine LLM outputs without and new abstractions. +Pydantic offers an customizable and expressive validation framework for Python. Instructor leverages Pydantic's validation framework to provide a uniform developer experience for both code-based and LLM-based validation, as well as a reasking mechanism for correcting LLM outputs based on validation errors. To learn more check out the [Pydantic docs](https://docs.pydantic.dev/latest/concepts/validators/) on validators. -## Uniform Validation: Code-Based vs. LLM +!!! note "Good llm validation is just good validation" -Validation is crucial when using Large Language Models (LLMs) for data extraction. It ensures data integrity, ensuring both quantitative and qualititave correctness with code and llm validations. - -!!! note "Pydantic Validation Docs" - - Pydantic supports validation individual fields or the whole model dict all at once. - - - [Field-Level Validation](https://docs.pydantic.dev/latest/usage/validators/) - - [Model-Level Validation](https://docs.pydantic.dev/latest/usage/validators/#model-validators) - - To see the most up to date examples check out our repo [jxnl/instructor/examples/validators](https://github.com/jxnl/instructor/tree/main/examples/validators) + If you want to see some more examples on validators checkout our blog post [Good llm validation is just good validation](https://jxnl.github.io/instructor/blog/2023/10/23/good-llm-validation-is-just-good-validation/) ### Code-Based Validation Example -!!! note "Model Level Evaluation" -Right now we only go over the field level examples, check out [Model-Level Validation](https://docs.pydantic.dev/latest/usage/validators/#model-validators) if you want to see how to do model level evaluation +First define a Pydantic model with a validator using the `Annotation` class from `typing_extensions`. Enforce a naming rule using Pydantic's built-in validation: @@ -56,6 +44,8 @@ name Value error, name must contain a space (type=value_error) ``` +As we can see, Pydantic raises a validation error when the name attribute does not contain a space. This is a simple example, but it demonstrates how Pydantic can be used to validate attributes of a model. + ### LLM-Based Validation Example LLM-based validation can also be plugged into the same Pydantic model. Here, if the answer attribute contains content that violates the rule "don't say objectionable things," Pydantic will raise a validation error. @@ -166,7 +156,7 @@ except (ValidationError, JSONDecodeError) as e: ## Advanced Validation Techniques -The docs are currently incomplete, but we have a few advanced validation techniques that we're working on documenting better, for a example of model level validation, and using a validation context check out our example on [verifying citations](examples/exact_citations.md) which covers +The docs are currently incomplete, but we have a few advanced validation techniques that we're working on documenting better, for a example of model level validation, and using a validation context check out our example on [verifying citations](../examples/exact_citations.md) which covers 1. Validate the entire object with all attributes rather than one attribute at a time 2. Using some 'context' to validate the object, in this case we use the `context` to check if the citation existed in the original text. diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..66ca952 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,34 @@ +We would love for you to contribute to `Instructor`. + +## Issues + +If you find a bug, please file an issue on [our issue tracker on GitHub](https://github.com/jxnl/instructor/issues). + +To help us reproduce the bug, please provide a minimal reproducible example, including a code snippet and the full error message. + +1. The `response_model` you are using. +2. The `messages` you are using. +3. The `model` you are using. + +## Pull Requests + +We welcome pull requests! There is plenty to do, and we are happy to discuss any contributions you would like to make. + +If it is not a small change, please start by [filing an issue](https://github.com/jxnl/instructor/issues) first. + +If you need ideas, you can check out the [help wanted](https://github.com/jxnl/instructor/labels/help%20wanted) or [good first issue](https://github.com/jxnl/instructor/labels/good%20first%20issue) labels. + +# Contributors + + + + + + + + + + + + + diff --git a/docs/help.md b/docs/help.md new file mode 100644 index 0000000..20bcd4d --- /dev/null +++ b/docs/help.md @@ -0,0 +1,27 @@ +# Getting help with Instructor + +If you need help getting started with Instructor or with advanced usage, the following sources may be useful. + +## :material-creation: Concepts + +The [concepts](concepts/index.md) section explains the core concepts of Instructor and how to prompt with models. + +## :material-chef-hat: Cookbooks + +The [cookbooks](examples/index.md) are a great place to start. They contain a variety of examples that demonstrate how to use Instructor in different scenarios. + +## :material-book: Blog + +The [blog](blog/index.md) contains articles that explain how to use Instructor in different scenarios. + +## :material-github: GitHub Discussions + +[GitHub discussions](https://github.com/jxnl/instructor/discussions) are useful for asking questions, your question and the answer will help everyone. + +## :material-github: GitHub Issues + +[GitHub issues](https://github.com/jxnl/instructor/issues) are useful for reporting bugs or requesting new features. + +## :material-twitter: Twitter + +You can also reach out to me on [Twitter](https://twitter.com/jxnlco) if you have any questions or ideas. diff --git a/docs/index.md b/docs/index.md index b846679..7b3e7b2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,35 +1,16 @@ -# Welcome to Instructor - Your Gateway to Structured Outputs with OpenAI +# Instructor _Structured extraction in Python, powered by OpenAI's function calling api, designed for simplicity, transparency, and control._ --- +[![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev) +[![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) [![Downloads](https://img.shields.io/pypi/dm/instructor.svg)](https://pypi.python.org/pypi/instructor) -![Star us on Github!](https://img.shields.io/github/stars/jxnl/instructor.svg?style=social) [![Documentation](https://img.shields.io/badge/docs-available-brightgreen)](https://jxnl.github.io/instructor) [![GitHub issues](https://img.shields.io/github/issues/jxnl/instructor.svg)](https://github.com/jxnl/instructor/issues) -[![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco) -Dive into the world of Python-based structured extraction, empowered by OpenAI's cutting-edge function calling API. Instructor stands out for its simplicity, transparency, and user-centric design. Whether you're a seasoned developer or just starting out, you'll find Instructor's approach intuitive and its results insightful. - -## Get Started in Moments - -Installing Instructor is a breeze. Just run `pip install instructor` in your terminal and you're on your way to a smoother data handling experience. - -## How Instructor Enhances Your Workflow - -Our `instructor.patch` for the `OpenAI` class introduces three key enhancements: - -- **Response Mode:** Specify a Pydantic model to streamline data extraction. -- **Max Retries:** Set your desired number of retry attempts for requests. -- **Validation Context:** Provide a context object for enhanced validator access. - A Glimpse into Instructor's Capabilities - -!!! note "Using Validators" - - Learn more about validators checkout our blog post [Good llm validation is just good validation](https://jxnl.github.io/instructor/blog/2023/10/23/good-llm-validation-is-just-good-validation/) - -With Instructor, your code becomes more efficient and readable. Here’s a quick peek: +Dive into the world of Python-based structured extraction, by OpenAI's function calling API and Pydantic, the most widely used data validation library for Python. Instructor stands out for its simplicity, transparency, and user-centric design. Whether you're a seasoned developer or just starting out, you'll find Instructor's approach intuitive and steerable. ## Usage @@ -59,24 +40,7 @@ assert user.name == "Jason" assert user.age == 25 ``` -**"Using `openai<1.0.0`"** - -If you're using `openai<1.0.0` then make sure you `pip install instructor<0.3.0` -where you can patch a global client like so: - -```python hl_lines="4 8" -import openai -import instructor - -instructor.patch() - -user = openai.ChatCompletion.create( - ..., - response_model=UserDetail, -) -``` - -**"Using async clients"** +**Using async clients** For async clients you must use apatch vs patch like so: @@ -101,115 +65,25 @@ model = await aclient.chat.completions.create( assert isinstance(model, UserExtract) ``` -### Step 1: Patch the client +## Why use Instructor? -First, import the required libraries and apply the patch function to the OpenAI module. This allows us to parse the raw JSON from our OpenAI completions into Pydantic output. +The question of using Instructor is fundamentally a question of why to use Pydantic. -```python -import instructor -from openai import OpenAI -from pydantic import BaseModel +1. **Powered by type hints** — Instructor is powered by Pydantic, which is powered by type hints. Schema validation, prompting is controleld by type annotations; less to learn, less code ot write,and integrates with your IDE. -# This enables response_model keyword -# from client.chat.completions.create -client = instructor.patch(OpenAI()) -``` +2. **Powered by OpenAI** — Instructor is powered by OpenAI's function calling API. This means you can use the same API for both prompting and extraction. -### Step 2: Define the Pydantic Model +3. **Customizable** — Pydantic is highly customizable. You can define your own validators, custom error messages, and more. -Create a Pydantic model to define the structure of the data extracted from the OpenAI response. This model will map directly to the information in the prompt. +4. **Ecosystem** Pydantic is the most widely used data validation library for Python. It's used by FastAPI, Typer, and many other popular libraries. -```python -class UserDetail(BaseModel): - name: str - age: int -``` +5. **Battle Tested** — Pydantic is downloaded over 100M times per month, and supported by a large community of contributors. -### Step 3: Extract +## More Examples -Use the `client.chat.completions.create` method to generate a completion and extract response data into the Pydantic object. The response_model parameter enables autocomplete and spell check in your IDE. +If you'd like to see more check out our [cookbook](examples/index.md). -```python -user: UserDetail = client.chat.completions.create( - model="gpt-3.5-turbo", - response_model=UserDetail, - messages=[ - {"role": "user", "content": "Extract Jason is 25 years old"}, - ] -) - -assert user.name == "Jason" -assert user.age == 25 -``` - -## Pydantic Validation - -Validation can also be plugged into the same Pydantic model. Here, if the answer attribute contains content that violates the rule "don't say objectionable things," Pydantic will raise a validation error. - -```python hl_lines="9 15" -from pydantic import BaseModel, ValidationError, BeforeValidator -from typing_extensions import Annotated -from instructor import llm_validator - -class QuestionAnswer(BaseModel): - question: str - answer: Annotated[ - str, - BeforeValidator(llm_validator("don't say objectionable things")) - ] - -try: - qa = QuestionAnswer( - question="What is the meaning of life?", - answer="The meaning of life is to be evil and steal", - ) -except ValidationError as e: - print(e) -``` - -Note, the error message is generated by the LLM, not the code, so it'll be helpful for re asking the model. - -```plaintext -1 validation error for QuestionAnswer -answer - Assertion failed, The statement is objectionable. (type=assertion_error) -``` - -## Reask on validation error - -Here, the `UserDetails` model is passed as the `response_model`, and `max_retries` is set to 2. - -```python -import instructor - -from openai import OpenAI -from pydantic import BaseModel, field_validator - -# Apply the patch to the OpenAI client -client = instructor.patch(OpenAI()) - -class UserDetails(BaseModel): - name: str - age: int - - @field_validator("name") - @classmethod - def validate_name(cls, v): - if v.upper() != v: - raise ValueError("Name must be in uppercase.") - return v - -model = client.chat.completions.create( - model="gpt-3.5-turbo", - response_model=UserDetails, - max_retries=2, - messages=[ - {"role": "user", "content": "Extract jason is 25 years old"}, - ], -) - -assert model.name == "JASON" -``` +[Installing Instructor](installation.md) is a breeze. Just run `pip install instructor`. ## Contributing @@ -218,18 +92,3 @@ If you want to help out checkout some of the issues marked as `good-first-issue` ## License This project is licensed under the terms of the MIT License. - -# Contributors - - - - - - - - - - - - - diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 0000000..8046b82 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,14 @@ +Installation is as simple as: + +```bash +pip install instructor +``` + +Instructor has a few dependencies: + +- [`openai`](https://pypi.org/project/openai/): OpenAI's Python client. +- [`typer`](https://pypi.org/project/typer/): Build great CLIs. Easy to code. Based on Python type hints. +- [`docstring-parser`](https://pypi.org/project/docstring-parser/): A parser for Python docstrings, to improve the experience of working with docstrings in jsonschema. +- [`pydantic`](https://pypi.org/project/pydantic/): Data validation and settings management using python type annotations. + +If you've got Python 3.9+ and `pip` installed, you're good to go. diff --git a/docs/why.md b/docs/why.md new file mode 100644 index 0000000..5036c6f --- /dev/null +++ b/docs/why.md @@ -0,0 +1,151 @@ +# Why use Instructor? + +??? question "Why use Pydantic?" + + Its hard to answer the question of why use Instructor without first answering [why use Pydantic.](https://docs.pydantic.dev/latest/why/): + + + - **Powered by type hints** — with Pydantic, schema validation and serialization are controlled by type annotations; less to learn, less code to write, and integration with your IDE and static analysis tools. + + - **Speed** — Pydantic's core validation logic is written in Rust. As a result, Pydantic is among the fastest data validation libraries for Python. + + - **JSON Schema** — Pydantic models can emit JSON Schema, allowing for easy integration with other tools. [Learn more…] + + - **Customisation** — Pydantic allows custom validators and serializers to alter how data is processed in many powerful ways. + + - **Ecosystem** — around 8,000 packages on PyPI use Pydantic, including massively popular libraries like + _FastAPI_, _huggingface_, _Django Ninja_, _SQLModel_, & _LangChain_. + + - **Battle tested** — Pydantic is downloaded over 70M times/month and is used by all FAANG companies and 20 of the 25 largest companies on NASDAQ. If you're trying to do something with Pydantic, someone else has probably already done it. + +Our `instructor.patch` for the `OpenAI` class introduces three key enhancements: + +- **Response Mode:** Specify a Pydantic model to streamline data extraction. +- **Max Retries:** Set your desired number of retry attempts for requests. +- **Validation Context:** Provide a context object for enhanced validator access. + A Glimpse into Instructor's Capabilities + +!!! note "Using Validators" + + Learn more about validators checkout our blog post [Good llm validation is just good validation](https://jxnl.github.io/instructor/blog/2023/10/23/good-llm-validation-is-just-good-validation/) + +With Instructor, your code becomes more efficient and readable. Here’s a quick peek: + +## Understanding the `patch` + +Lets go over the `patch` function. And see how we can leverage it to make use of instructor + +### Step 1: Patch the client + +First, import the required libraries and apply the patch function to the OpenAI module. This exposes new functionality with the response_model parameter. + +```python +import instructor +from openai import OpenAI +from pydantic import BaseModel + +# This enables response_model keyword +# from client.chat.completions.create +client = instructor.patch(OpenAI()) +``` + +### Step 2: Define the Pydantic Model + +Create a Pydantic model to define the structure of the data you want to extract. This model will map directly to the information in the prompt. + +```python +from pydantic import BaseModel + +class UserDetail(BaseModel): + name: str + age: int +``` + +### Step 3: Extract + +Use the `client.chat.completions.create` method to send a prompt and extract the data into the Pydantic object. The response_model parameter specifies the Pydantic model to use for extraction. Its helpful to annotate the variable with the type of the response model. +which will help your IDE provide autocomplete and spell check. + +```python +user: UserDetail = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=UserDetail, + messages=[ + {"role": "user", "content": "Extract Jason is 25 years old"}, + ] +) + +assert user.name == "Jason" +assert user.age == 25 +``` + +## Understanding Validation + +Validation can also be plugged into the same Pydantic model. Here, if the answer attribute contains content that violates the rule "don't say objectionable things," Pydantic will raise a validation error. + +```python hl_lines="9 15" +from pydantic import BaseModel, ValidationError, BeforeValidator +from typing_extensions import Annotated +from instructor import llm_validator + +class QuestionAnswer(BaseModel): + question: str + answer: Annotated[ + str, + BeforeValidator(llm_validator("don't say objectionable things")) + ] + +try: + qa = QuestionAnswer( + question="What is the meaning of life?", + answer="The meaning of life is to be evil and steal", + ) +except ValidationError as e: + print(e) +``` + +Its important to not here that the error message is generated by the LLM, not the code, so it'll be helpful for re asking the model. + +```plaintext +1 validation error for QuestionAnswer +answer + Assertion failed, The statement is objectionable. (type=assertion_error) +``` + +## Self Correcting on Validation Error + +Here, the `UserDetails` model is passed as the `response_model`, and `max_retries` is set to 2. + +```python +import instructor + +from openai import OpenAI +from pydantic import BaseModel, field_validator + +# Apply the patch to the OpenAI client +client = instructor.patch(OpenAI()) + +class UserDetails(BaseModel): + name: str + age: int + + @field_validator("name") + @classmethod + def validate_name(cls, v): + if v.upper() != v: + raise ValueError("Name must be in uppercase.") + return v + +model = client.chat.completions.create( + model="gpt-3.5-turbo", + response_model=UserDetails, + max_retries=2, + messages=[ + {"role": "user", "content": "Extract jason is 25 years old"}, + ], +) + +assert model.name == "JASON" +``` + +As you can see, we've baked in a self correcting mechanism into the model. This is a powerful way to make your models more robust and less brittle without include a lot of extra code or prompt. diff --git a/mkdocs.yml b/mkdocs.yml index ee828f1..c835d45 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -121,11 +121,16 @@ markdown_extensions: - pymdownx.tilde nav: - Introduction: - - Quick Start: 'index.md' - - Validators: "reask_validation.md" - - Distillation: "distillation.md" - - Prompt Engineering Tips: 'tips/index.md' - - Philosophy: 'philosophy.md' + - Welcome To Instructor: 'index.md' + - Why use Instructor?: 'why.md' + - Help with Instructor: 'help.md' + - Installation: 'installation.md' + - Contributing: 'contributing.md' + - Concepts: + - Schema Engineering: 'concepts/prompting.md' + - Validators: "concepts/reask_validation.md" + - Distillation: "concepts/distillation.md" + - Philosophy: 'concepts/philosophy.md' - Cookbook: - Overview: 'examples/index.md' - Streaming Lists: "examples/multitask.md" diff --git a/requirements.txt b/requirements.txt index 9575d18..bf8687a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ openai>=1.1.0 pydantic -pytest docstring-parser \ No newline at end of file From c48f8e7f3f2552b990fd7f285c6435ae04811505 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Sat, 18 Nov 2023 18:01:47 -0500 Subject: [PATCH 40/40] move concepts --- docs/{examples => concepts}/maybe.md | 0 docs/{examples => concepts}/multitask.md | 6 ++---- docs/examples/index.md | 4 ---- docs/help.md | 2 +- mkdocs.yml | 4 ++-- 5 files changed, 5 insertions(+), 11 deletions(-) rename docs/{examples => concepts}/maybe.md (100%) rename docs/{examples => concepts}/multitask.md (98%) diff --git a/docs/examples/maybe.md b/docs/concepts/maybe.md similarity index 100% rename from docs/examples/maybe.md rename to docs/concepts/maybe.md diff --git a/docs/examples/multitask.md b/docs/concepts/multitask.md similarity index 98% rename from docs/examples/multitask.md rename to docs/concepts/multitask.md index 9fcca73..9eda36c 100644 --- a/docs/examples/multitask.md +++ b/docs/concepts/multitask.md @@ -22,18 +22,16 @@ Defining a task and creating a list of classes is a common enough pattern that w By using multitask you get a very convient class with prompts and names automatically defined. You get `from_response` just like any other `BaseModel` you're able to extract the list of objects data you want with `MultTask.tasks`. -```python hl_lines="13" +```python import instructor from openai import OpenAI client = instructor.patch(OpenAI()) - class User(BaseModel): name: str age: int - MultiUser = instructor.MultiTask(User) completion = client.chat.completions.create( @@ -70,7 +68,7 @@ Lets look at an example in action with the same class MultiUser = instructor.MultiTask(User) completion = client.chat.completions.create( - model="gpt-4-0613", + model="gpt-4", temperature=0.1, stream=True, response_model=MultiUser, diff --git a/docs/examples/index.md b/docs/examples/index.md index 84d79f0..971bcec 100644 --- a/docs/examples/index.md +++ b/docs/examples/index.md @@ -2,10 +2,6 @@ ## Quick Links -- [Streaming Lists](multitask.md): Stream lists of objects from the same prompt. - -- [Missing Objects](maybe.md): Handle missing objects with `Maybe` and `Optional`. - - [Classifying Text](classification.md): Single and multi-label classification using enums. - [Self-Assessment via Validators](self_critique.md): Implement AI self-assessment with `llm_validator`. diff --git a/docs/help.md b/docs/help.md index 20bcd4d..7aec1e9 100644 --- a/docs/help.md +++ b/docs/help.md @@ -4,7 +4,7 @@ If you need help getting started with Instructor or with advanced usage, the fol ## :material-creation: Concepts -The [concepts](concepts/index.md) section explains the core concepts of Instructor and how to prompt with models. +The [concepts](concepts/prompting.md) section explains the core concepts of Instructor and how to prompt with models. ## :material-chef-hat: Cookbooks diff --git a/mkdocs.yml b/mkdocs.yml index c835d45..e4477cb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -128,13 +128,13 @@ nav: - Contributing: 'contributing.md' - Concepts: - Schema Engineering: 'concepts/prompting.md' + - Lists: "concepts/multitask.md" + - Missing Content: "concepts/maybe.md" - Validators: "concepts/reask_validation.md" - Distillation: "concepts/distillation.md" - Philosophy: 'concepts/philosophy.md' - Cookbook: - Overview: 'examples/index.md' - - Streaming Lists: "examples/multitask.md" - - Handling Missing Content: "examples/maybe.md" - Text Classification: 'examples/classification.md' - Self Critique: 'examples/self_critique.md' - Citations: 'examples/exact_citations.md'