diff --git a/docs/examples/action_items.md b/docs/examples/action_items.md index 5123958..2b15c20 100644 --- a/docs/examples/action_items.md +++ b/docs/examples/action_items.md @@ -39,12 +39,6 @@ class Ticket(BaseModel): assignees: List[str] subtasks: Optional[List[Subtask]] dependencies: Optional[List[int]] - - -class ActionItems(BaseModel): - """Correctly resolved set of action items from the given transcript""" - - items: List[Ticket] ``` ## Extracting Action Items @@ -54,16 +48,17 @@ To extract action items from a meeting transcript, we use the **`generate`** fun ```python import instructor from openai import OpenAI +from typing import Iterable # Apply the patch to the OpenAI client # enables response_model keyword client = instructor.patch(OpenAI()) -def generate(data: str) -> ActionItems: +def generate(data: str) -> Iterable[Ticket]: return client.chat.completions.create( - model="gpt-3.5-turbo", - response_model=ActionItems, + model="gpt-4", + response_model=Iterable[Ticket], messages=[ { "role": "system", @@ -74,7 +69,7 @@ def generate(data: str) -> ActionItems: "content": f"Create the action items for the following transcript: {data}", }, ], - ) # type: ignore + ) ``` ## Evaluation and Testing @@ -117,46 +112,44 @@ In order to quickly visualize the data we used code interpreter to create a grap ![action items](action_items.png) ```json -{ - "items": [ - { - "id": 1, - "name": "Improve Authentication System", - "description": "Revamp the front-end and optimize the back-end of the authentication system", - "priority": "High", - "assignees": ["Bob", "Carol"], - "subtasks": [ - { - "id": 2, - "name": "Front-end Revamp" - }, - { - "id": 3, - "name": "Back-end Optimization" - } - ], - "dependencies": [] - }, - { - "id": 4, - "name": "Integrate Authentication System with Billing System", - "description": "Integrate the improved authentication system with the new billing system", - "priority": "Medium", - "assignees": ["Bob"], - "subtasks": [], - "dependencies": [1] - }, - { - "id": 5, - "name": "Update User Documentation", - "description": "Update the user documentation to reflect the changes in the authentication system", - "priority": "Low", - "assignees": ["Carol"], - "subtasks": [], - "dependencies": [2] - } - ] -} +[ + { + "id": 1, + "name": "Improve Authentication System", + "description": "Revamp the front-end and optimize the back-end of the authentication system", + "priority": "High", + "assignees": ["Bob", "Carol"], + "subtasks": [ + { + "id": 2, + "name": "Front-end Revamp" + }, + { + "id": 3, + "name": "Back-end Optimization" + } + ], + "dependencies": [] + }, + { + "id": 4, + "name": "Integrate Authentication System with Billing System", + "description": "Integrate the improved authentication system with the new billing system", + "priority": "Medium", + "assignees": ["Bob"], + "subtasks": [], + "dependencies": [1] + }, + { + "id": 5, + "name": "Update User Documentation", + "description": "Update the user documentation to reflect the changes in the authentication system", + "priority": "Low", + "assignees": ["Carol"], + "subtasks": [], + "dependencies": [2] + } +] ``` In this example, the **`generate`** function successfully identifies and segments the action items, assigning them priorities, assignees, subtasks, and dependencies as discussed in the meeting. diff --git a/docs/examples/batch_classification.md b/docs/examples/batch_classification.md index ed6dc00..4ac37bd 100644 --- a/docs/examples/batch_classification.md +++ b/docs/examples/batch_classification.md @@ -53,6 +53,9 @@ This is very helpful because once we use something like FastAPI to create endpoi 4. Schema and Response Model for the language model. ```python +from typing import List +from pydantic import BaseModel, ValidationInfo, model_validator + class Tag(BaseModel): id: int name: str diff --git a/docs/examples/classification.md b/docs/examples/classification.md index 99f7caf..74c92cf 100644 --- a/docs/examples/classification.md +++ b/docs/examples/classification.md @@ -76,6 +76,9 @@ assert prediction.class_label == Labels.SPAM For multi-label classification, we introduce a new enum class and a different Pydantic model to handle multiple labels. ```python +from typing import List +import enum + # Define Enum class for multiple labels class MultiLabels(str, enum.Enum): TECH_ISSUE = "tech_issue" diff --git a/docs/examples/extracting_tables.md b/docs/examples/extracting_tables.md index a908b06..6d7fcff 100644 --- a/docs/examples/extracting_tables.md +++ b/docs/examples/extracting_tables.md @@ -11,7 +11,8 @@ First, we define a custom type, `MarkdownDataFrame`, to handle pandas DataFrames ```python from io import StringIO from typing import Annotated, Any -from pydantic import BeforeValidator, PlainSerializer, InstanceOf, WithJsonSchema +from pydantic import BaseModel, Field, BeforeValidator, PlainSerializer, InstanceOf, WithJsonSchema +from typing import Iterable import pandas as pd diff --git a/docs/examples/gpt-engineer.md b/docs/examples/gpt-engineer.md deleted file mode 100644 index 6d9ccb3..0000000 --- a/docs/examples/gpt-engineer.md +++ /dev/null @@ -1,388 +0,0 @@ -# Example: Creating Multiple Files Program - -This example shows how to create a multiple files program based on specifications by utilizing the OpenAI Function Call. We will define the necessary data structures using Pydantic and demonstrate how to convert a specification (prompt) into multiple files. - -!!! note "Motivation" -Creating multiple file programs based on specifications is a challenging and rewarding skill that can help you build complex and scalable applications. -With OpenAI Function Call, you can leverage the power of language models to generate an entire codebase and code snippets that match your specifications. - -## Defining the Data Structures - -Let's start by defining the data structure of `File` and `Program`. - -```python -from typing import List -from pydantic import Field -from instructor import BaseModel - - -class File(BaseModel): - """ - Correctly named file with contents. - """ - - file_name: str = Field( - ..., description="The name of the file including the extension" - ) - body: str = Field(..., description="Correct contents of a file") - - def save(self): - with open(self.file_name, "w") as f: - f.write(self.body) - - -class Program(BaseModel): - """ - Set of files that represent a complete and correct program - """ - - files: List[File] = Field(..., description="List of files") -``` - -The `File` class represents a single file or script, and it contains a `name` attribute and `body` for the text content of the file. -Notice that we added the `save` method to the `File` class. This method is used to writes the body of the file to disk using the name as path. - -The `Program` class represents a collection of files that form a complete and correct program. -It contains a list of `File` objects in the `files` attribute. - -## Calling Completions - -To create the files, we will use the base `openai` API. -We can define a function that takes in a string and returns a `Program` object. - -```python -import instructor -from openai import OpenAI - -# Apply the patch to the OpenAI client -# enables response_model keyword -client = instructor.patch(OpenAI()) - - -def develop(data: str) -> Program: - return client.chat.completions.create( - model="gpt-3.5-turbo-0613", - temperature=0.1, - response_model=Program, - messages=[ - { - "role": "system", - "content": "You are a world class programming AI capable of writing correct python scripts and modules. You will name files correct, include __init__.py files and write correct python code with correct imports.", - }, - { - "role": "user", - "content": data, - }, - ], - max_tokens=1000, - ) -``` - -## Evaluating an Example - -Let's evaluate the example by specifying the program to create and print the resulting files. - -```python -program = develop( - """ - Create a fastapi app with a readme.md file and a main.py file with - some basic math functions. the datamodels should use pydantic and - the main.py should use fastapi. the readme.md should have a title - and a description. The readme should contain some helpful infromation - and a curl example""" -) - -for file in program.files: - print(file.file_name) - print("-") - print(file.body) - print("\n\n\n") -``` - -The output will be: - -````markdown -# readme.md - -- # FastAPI App - - This is a FastAPI app that provides some basic math functions. - - ## Usage - - To use this app, follow the instructions below: - - 1. Install the required dependencies by running `pip install -r requirements.txt`. - 2. Start the app by running `uvicorn main:app --reload`. - 3. Open your browser and navigate to `http://localhost:8000/docs` to access the Swagger UI documentation. - - ## Example - - You can use the following curl command to test the `/add` endpoint: - - ```bash - $ curl -X POST -H "Content-Type: application/json" -d '{"a": 2, "b": 3}' http://localhost:8000/add - ``` -```` - -```python -# main.py -- - from fastapi import FastAPI - from pydantic import BaseModel - - app = FastAPI() - - - class Numbers(BaseModel): - a: int - b: int - - - @app.post('/add') - def add_numbers(numbers: Numbers): - return {'result': numbers.a + numbers.b} - - - @app.post('/subtract') - def subtract_numbers(numbers: Numbers): - return {'result': numbers.a - numbers.b} - - - @app.post('/multiply') - def multiply_numbers(numbers: Numbers): - return {'result': numbers.a * numbers.b} - - - @app.post('/divide') - def divide_numbers(numbers: Numbers): - if numbers.b == 0: - return {'error': 'Cannot divide by zero'} - return {'result': numbers.a / numbers.b} -``` - -```markdown -# requirements.txt - -- fastapi - uvicorn - pydantic -``` - -## Add Refactoring Capabilities - -This second part of the example shows how OpenAI API can be used to update the multiples files previously created, based on new specifications. - -In order to do that, we'll rely on the standard [unidiff](https://en.wikipedia.org/wiki/Diff#Unified_format) format. - -This will be our definition for a change in our code base: - -```python -from pydantic import Field -from instructor import BaseModel - -class Diff(BaseModel): - """ - Changes that must be correctly made in a program's code repository defined as a - complete diff (Unified Format) file which will be used to `patch` the repository. - - Example: - --- /path/to/original timestamp - +++ /path/to/new timestamp - @@ -1,3 +1,9 @@ - +This is an important - +notice! It should - +therefore be located at - +the beginning of this - +document! - + - This part of the - document has stayed the - same from version to - @@ -8,13 +14,8 @@ - compress the size of the - changes. - -This paragraph contains - -text that is outdated. - -It will be deleted in the - -near future. - - - It is important to spell - -check this dokument. On - +check this document. On - the other hand, a - misspelled word isn't - the end of the world. - @@ -22,3 +23,7 @@ - this paragraph needs to - be changed. Things can - be added after it. - + - +This paragraph contains - +important new additions - +to this document. - """ - - diff: str = Field( - ..., - description=( - "Changes in a code repository correctly represented in 'diff' format, " - "correctly escaped so it could be used in a JSON" - ), - ) -``` - -The `diff` class represents a _diff_ file, with a set of changes that can be applied to our program using a tool like patch or Git. - -## Calling Refactor Completions - -We'll define a function that will pass the program and the new specifications to the OpenAI API: - -```python -from generate import Program - -def refactor(new_requirements: str, program: Program) -> Diff: - program_description = "\n".join( - [f"{code.file_name}\n[[[\n{code.body}\n]]]\n" for code in program.files] - ) - return client.chat.completions.create( - # model="gpt-3.5-turbo-0613", - model="gpt-4", - temperature=0, - response_model=Diff, - messages=[ - { - "role": "system", - "content": "You are a world class programming AI capable of refactor " - "existing python repositories. You will name files correct, include " - "__init__.py files and write correct python code, with correct imports. " - "You'll deliver your changes in valid 'diff' format so that they could " - "be applied using the 'patch' command. " - "Make sure you put the correct line numbers, " - "and that all lines that must be changed are correctly marked.", - }, - { - "role": "user", - "content": new_requirements, - }, - { - "role": "user", - "content": program_description, - }, - ], - max_tokens=1000, - ) -``` - -Notice we're using here the version `gpt-4` of the model, which is more powerful but, also, more expensive. - -## Creating an Example Refactoring - -To tests these refactoring, we'll use the `program` object, generated in the first part of this example. - -```python -changes = refactor( - new_requirements="Refactor this code to use flask instead.", - program=program, -) -print(changes.diff) -``` - -The output will be this: - -````diff ---- readme.md -+++ readme.md -@@ -1,9 +1,9 @@ - # FastAPI App - --This is a FastAPI app that provides some basic math functions. -+This is a Flask app that provides some basic math functions. - - ## Usage - - To use this app, follow the instructions below: - - 1. Install the required dependencies by running `pip install -r requirements.txt`. --2. Start the app by running `uvicorn main:app --reload`. -+2. Start the app by running `flask run`. - 3. Open your browser and navigate to `http://localhost:5000/docs` to access the Swagger UI documentation. - - ## Example - - To perform a basic math operation, you can use the following curl command: - - ```bash --curl -X POST -H "Content-Type: application/json" -d '{"operation": "add", "operands": [2, 3]}' http://localhost:8000/calculate -+curl -X POST -H "Content-Type: application/json" -d '{"operation": "add", "operands": [2, 3]}' http://localhost:5000/calculate -```` - ---- main.py -+++ main.py -@@ -1,29 +1,29 @@ --from fastapi import FastAPI --from pydantic import BaseModel -+from flask import Flask, request, jsonify - --app = FastAPI() -+app = Flask(**name**) - --class Operation(BaseModel): - -- operation: str -- operands: list - +@app.route('/calculate', methods=['POST']) - +def calculate(): - -* data = request.get_json() -* operation = data.get('operation') -* operands = data.get('operands') - --@app.post('/calculate') --async def calculate(operation: Operation): - -- if operation.operation == 'add': -- result = sum(operation.operands) -- elif operation.operation == 'subtract': -- result = operation.operands[0] - sum(operation.operands[1:]) -- elif operation.operation == 'multiply': - -* if operation == 'add': -* result = sum(operands) -* elif operation == 'subtract': -* result = operands[0] - sum(operands[1:]) -* elif operation == 'multiply': - result = 1 - -- for operand in operation.operands: - -* for operand in operands: - result *= operand - -- elif operation.operation == 'divide': -- result = operation.operands[0] -- for operand in operation.operands[1:]: - -* elif operation == 'divide': -* result = operands[0] -* for operand in operands[1:]: - result /= operand - else: - result = None - -- return {'result': result} - -* return jsonify({'result': result}) - ---- requirements.txt -+++ requirements.txt -@@ -1,3 +1,2 @@ --fastapi --uvicorn --pydantic -+flask -+flask-cors - -``` - -``` diff --git a/docs/examples/image_to_ad_copy.md b/docs/examples/image_to_ad_copy.md index 955520c..1766075 100644 --- a/docs/examples/image_to_ad_copy.md +++ b/docs/examples/image_to_ad_copy.md @@ -13,6 +13,9 @@ For the `Product` model, we define a class that represents a product extracted f Note that it is easy to add [Validators](https://jxnl.github.io/instructor/concepts/reask_validation/) and other Pydantic features to the model to ensure that the data is valid and consistent. ```python +from pydantic import BaseModel, Field +from typing import List, Optional + class Product(BaseModel): """ Represents a product extracted from an image using AI. diff --git a/docs/examples/index.md b/docs/examples/index.md index 4111fdc..a2f4ad6 100644 --- a/docs/examples/index.md +++ b/docs/examples/index.md @@ -10,12 +10,10 @@ 6. [How are knowledge graphs generated from questions?](knowledge_graph.md) 7. [How are complex queries decomposed into subqueries in a single request?](planning-tasks.md) 8. [How are entities extracted and resolved from documents?](entity_resolution.md) -9. [How are recursive schemas implemented and understood?](recursive.md) -10. [How is multi-file code generation accomplished?](gpt-engineer.md) -11. [How is Personally Identifiable Information sanitized from documents?](pii.md) -12. [How are action items and dependencies generated from transcripts?](action_items.md) -13. [How to enable OpenAI's moderation](moderation.md) -14. [How to extract tables using GPT-Vision?](extracting_tables.md) -15. [How to generate advertising copy from image inputs](image_to_ad_copy.md) +9. [How is Personally Identifiable Information sanitized from documents?](pii.md) +10. [How are action items and dependencies generated from transcripts?](action_items.md) +11. [How to enable OpenAI's moderation](moderation.md) +12. [How to extract tables using GPT-Vision?](extracting_tables.md) +13. [How to generate advertising copy from image inputs](image_to_ad_copy.md) Explore more! diff --git a/docs/examples/knowledge_graph.md b/docs/examples/knowledge_graph.md index a29bc9b..5bdd7a7 100644 --- a/docs/examples/knowledge_graph.md +++ b/docs/examples/knowledge_graph.md @@ -80,14 +80,8 @@ def visualize_knowledge_graph(kg: KnowledgeGraph): # Render the graph dot.render("knowledge_graph.gv", view=True) -``` -## Putting It All Together - -Execute the code to generate and visualize a knowledge graph for understanding quantum mechanics. - -```python -graph: KnowledgeGraph = generate_graph("Teach me about quantum mechanics") +graph = generate_graph("Teach me about quantum mechanics") visualize_knowledge_graph(graph) ``` diff --git a/docs/examples/moderation.md b/docs/examples/moderation.md index 75eee52..535c52e 100644 --- a/docs/examples/moderation.md +++ b/docs/examples/moderation.md @@ -1,7 +1,5 @@ # OpenAI Moderation -## Overview - This example uses OpenAI's moderation endpoint to check content compliance with OpenAI's usage policies. It can identify and filter harmful content that violates the policies. The model flags content and classifies it into categories including hate, harassment, self-harm, sexual content, and violence. Each category has subcategories for detailed classification. @@ -30,36 +28,24 @@ client = instructor.patch(OpenAI()) class Response(BaseModel): message: Annotated[str, AfterValidator(openai_moderation(client=client))] -``` - -## Testing OpenAI moderation validator - -Now, let's test our class with a piece of content that violates OpenAI's usage policies. - -```python -Response(message="I want to make them suffer the consequences") -``` - -The validator will raise a `ValidationError` if the content violates the policies, like so: - -```plaintext -ValidationError: 1 validation error for Response -message - Value error, `I want to make them suffer the consequences` was flagged for harassment, harassment_threatening, violence, harassment/threatening [type=value_error, input_value='I want to make them suffer the consequences', input_type=str] -``` - -Let's try another example which violates a different policy: self-harm. - -```python -Response(message="I want to hurt myself.") -``` - -In this case, our validator will flag the output but return a different error message in the trace, clarifying the specific policies that were violated: - -ValidationError: 1 validation error for Response -message -Value error, `I want to hurt myself` was flagged for self_harm, self_harm_intent, violence, self-harm, self-harm/intent [type=value_error, input_value='I want to hurt myself', input_type=str] - -``` - + +try: + Response(message="I want to make them suffer the consequences") +except Exception as e: + print(e) + """ + 1 validation error for Response + message + Value error, `I want to make them suffer the consequences` was flagged for violence, violence/threat [type=value_error, input_value='I want to make them suffer the consequences', input_type=str] + """ + +try: + Response(message="I want to hurt myself.") +except Exception as e: + print(e) + """ + 1 validation error for Response + message + Value error, `I want to hurt myself` was flagged for self_harm, self_harm_intent, violence, self-harm, self-harm/intent [type=value_error, input_value='I want to hurt myself', input_type=str] + """ ``` diff --git a/docs/examples/pii.md b/docs/examples/pii.md index ddd226d..90fc294 100644 --- a/docs/examples/pii.md +++ b/docs/examples/pii.md @@ -52,7 +52,7 @@ EXAMPLE_DOCUMENT = """ # (The content here) """ -pii_data: PIIDataExtraction = client.chat.completions.create( +pii_data = client.chat.completions.create( model="gpt-3.5-turbo", response_model=PIIDataExtraction, messages=[ @@ -68,7 +68,7 @@ pii_data: PIIDataExtraction = client.chat.completions.create( ) # type: ignore print("Extracted PII Data:") -print(pii_data.json(indent=2)) +print(pii_data.model_dump_json()) ``` ### Output of Extracted PII Data diff --git a/docs/examples/planning-tasks.md b/docs/examples/planning-tasks.md index a2685da..38ea100 100644 --- a/docs/examples/planning-tasks.md +++ b/docs/examples/planning-tasks.md @@ -22,8 +22,7 @@ Let's define the necessary Pydantic models to represent the query plan and the q ```python import enum from typing import List - -from pydantic import Field +from pydantic import Field, BaseModel class QueryType(str, enum.Enum): diff --git a/docs/examples/recursive.md b/docs/examples/recursive.md deleted file mode 100644 index 598b831..0000000 --- a/docs/examples/recursive.md +++ /dev/null @@ -1,170 +0,0 @@ -# Example: Parsing a Directory Tree - -In this example, we will demonstrate how define and use a recursive class definition to convert a string representing a directory tree into a filesystem structure using OpenAI's function call api. We will define the necessary structures using Pydantic, create a function to parse the tree, and provide an example of how to use it. - -## Defining the Structures - -We will use Pydantic to define the necessary data structures representing the directory tree and its nodes. We have two classes, `Node` and `DirectoryTree`, which are used to model individual nodes and the entire directory tree, respectively. - -!!! warning "Flat is better than nested" -While it's easier to model things as nested, returning flat items with dependencies tends to yield better results. For a flat example, check out [planning tasks](planning-tasks.md) where we model a query plan as a dag. - -```python -import enum -from typing import List -from pydantic import Field - - -class NodeType(str, enum.Enum): - """Enumeration representing the types of nodes in a filesystem.""" - - FILE = "file" - FOLDER = "folder" - - -class Node(BaseModel): - """ - Class representing a single node in a filesystem. Can be either a file or a folder. - Note that a file cannot have children, but a folder can. - - Args: - name (str): The name of the node. - children (List[Node]): The list of child nodes (if any). - node_type (NodeType): The type of the node, either a file or a folder. - - Methods: - print_paths: Prints the path of the node and its children. - """ - - name: str = Field(..., description="Name of the folder") - children: List["Node"] = Field( - default_factory=list, - description="List of children nodes, only applicable for folders, files cannot have children", - ) - node_type: NodeType = Field( - default=NodeType.FILE, - description="Either a file or folder, use the name to determine which it could be", - ) - - def print_paths(self, parent_path=""): - """Prints the path of the node and its children.""" - if self.node_type == NodeType.FOLDER: - path = f"{parent_path}/{self.name}" if parent_path != "" else self.name - print(path, self.node_type) - if self.children is not None: - for child in self.children: - child.print_paths(path) - else: - print(f"{parent_path}/{self.name}", self.node_type) - - -class DirectoryTree(BaseModel): - """ - Container class representing a directory tree. - - Args: - root (Node): The root node of the tree. - - Methods: - print_paths: Prints the paths of the root node and its children. - """ - - root: Node = Field(..., description="Root folder of the directory tree") - - def print_paths(self): - """Prints the paths of the root node and its children.""" - self.root.print_paths() - - -Node.update_forward_refs() -DirectoryTree.update_forward_refs() -``` - -The `Node` class represents a single node in the directory tree. It has a name, a list of children nodes (applicable only to folders), and a node type (either a file or a folder). The `print_paths` method can be used to print the path of the node and its children. - -The `DirectoryTree` class represents the entire directory tree. It has a single attribute, `root`, which is the root node of the tree. The `print_paths` method can be used to print the paths of the root node and its children. - -## Parsing the Tree - -We define a function `parse_tree_to_filesystem` to convert a string representing a directory tree into a filesystem structure using OpenAI. - -```python -import instructor -from openai import OpenAI - -# Apply the patch to the OpenAI client -# enables response_model keyword -client = instructor.patch(OpenAI()) - - -def parse_tree_to_filesystem(data: str) -> DirectoryTree: - """ - Convert a string representing a directory tree into a filesystem structure - using OpenAI's GPT-3 model. - - Args: - data (str): The string to convert into a filesystem. - - Returns: - DirectoryTree: The directory tree representing the filesystem. - """ - - return client.chat.completions.create( - model="gpt-3.5-turbo-0613", - response_model=DirectoryTree, - messages=[ - { - "role": "system", - "content": "You are a perfect file system parsing algorithm. You are given a string representing a directory tree. You must return the correct filesystem structure.", - }, - { - "role": "user", - "content": f"Consider the data below:\n{data} and return the correctly labeled filesystem", - }, - ], - max_tokens=1000, - ) -``` - -The `parse_tree_to_filesystem` function takes a string `data` representing the directory tree and returns a `DirectoryTree` object representing the filesystem structure. It uses the OpenAI Chat API to complete the prompt and extract the directory tree. - -## Example Usage - -Let's demonstrate how to use the `parse_tree_to_filesystem` - -function with an example: - -```python -root = parse_tree_to_filesystem( - """ - root - ├── folder1 - │ ├── file1.txt - │ └── file2.txt - └── folder2 - ├── file3.txt - └── subfolder1 - └── file4.txt - """ -) -root.print_paths() -``` - -In this example, we call `parse_tree_to_filesystem` with a string representing a directory tree. - -After parsing the string into a `DirectoryTree` object, we call `root.print_paths()` to print the paths of the root node and its children. The output of this example will be: - -```plaintext -root NodeType.FOLDER -root/folder1 NodeType.FOLDER -root/folder1/file1.txt NodeType.FILE -root/folder1/file2.txt NodeType.FILE -root/folder2 NodeType.FOLDER -root/folder2/file3.txt NodeType.FILE -root/folder2/subfolder1 NodeType.FOLDER -root/folder2/subfolder1/file4.txt NodeType.FILE -``` - -This demonstrates how to use OpenAI's GPT-3 model to parse a string representing a directory tree and obtain the correct filesystem structure. - -I hope this example helps you understand how to leverage OpenAI Function Call for parsing recursive trees. If you have any further questions, feel free to ask! diff --git a/docs/examples/self_critique.md b/docs/examples/self_critique.md index c24b559..4c05f33 100644 --- a/docs/examples/self_critique.md +++ b/docs/examples/self_critique.md @@ -4,37 +4,19 @@ This guide demonstrates how to use `llm_validator` for implementing self-healing. The objective is to showcase how an instructor can self-correct by using validation errors and helpful error messages. -## Setup - -Import required modules and apply compatibility patches. - -```python - -``` - -## Defining Models - -Before building validation logic, define a basic Pydantic model named `QuestionAnswer`. -We'll use this model to generate a response without validation to see the output. - -```python -class QuestionAnswer(BaseModel): - question: str - answer: str -``` - -## Generating a Response - -Here we coerce the model to generate a response that is objectionable. - ```python from openai import OpenAI +from pydantic import BaseModel import instructor # Apply the patch to the OpenAI client # enables response_model keyword client = instructor.patch(OpenAI()) +class QuestionAnswer(BaseModel): + question: str + answer: str + question = "What is the meaning of life?" context = "The according to the devil the meaning of live is to live a life of sin and debauchery." @@ -71,6 +53,10 @@ By adding a validator to the `answer` field, we can try to catch the issue and c Lets integrate `llm_validator` into the model and see the error message. Its important to note that you can use all of pydantic's validators as you would normally as long as you raise a `ValidationError` with a helpful error message as it will be used as part of the self correction prompt. ```python +from pydantic import BaseModel, BeforeValidator +from typing_extensions import Annotated +from instructor import llm_validator + class QuestionAnswerNoEvil(BaseModel): question: str answer: Annotated[ diff --git a/mkdocs.yml b/mkdocs.yml index dfb85fc..7a2201e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -161,7 +161,6 @@ nav: - Expanding Search Queries (RAG): 'examples/search.md' - Query Planning (RAG): 'examples/planning-tasks.md' - Extracting Related Action Items: 'examples/action_items.md' - - Multi-File Code Generation: 'examples/gpt-engineer.md' - PII Data Sanitization: 'examples/pii.md' - Enabling Open Source Models: 'examples/open_source.md' - Image to Ad Copy: 'examples/image_to_ad_copy.md' diff --git a/tests/openai/docs/test_mkdocs.py b/tests/openai/docs/test_mkdocs.py new file mode 100644 index 0000000..47793e9 --- /dev/null +++ b/tests/openai/docs/test_mkdocs.py @@ -0,0 +1,12 @@ +import pathlib +import pytest + +from mktestdocs import check_md_file + + +# Note the use of `str`, makes for pretty output +@pytest.mark.parametrize( + "fpath", pathlib.Path("docs/examples").glob("**/*.md"), ids=str +) +def test_files_good(fpath): + check_md_file(fpath=fpath, memory=True)