mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
Update Documentation
This commit is contained in:
@@ -39,12 +39,6 @@ class Ticket(BaseModel):
|
||||
assignees: List[str]
|
||||
subtasks: Optional[List[Subtask]]
|
||||
dependencies: Optional[List[int]]
|
||||
|
||||
|
||||
class ActionItems(BaseModel):
|
||||
"""Correctly resolved set of action items from the given transcript"""
|
||||
|
||||
items: List[Ticket]
|
||||
```
|
||||
|
||||
## Extracting Action Items
|
||||
@@ -54,16 +48,17 @@ To extract action items from a meeting transcript, we use the **`generate`** fun
|
||||
```python
|
||||
import instructor
|
||||
from openai import OpenAI
|
||||
from typing import Iterable
|
||||
|
||||
# Apply the patch to the OpenAI client
|
||||
# enables response_model keyword
|
||||
client = instructor.patch(OpenAI())
|
||||
|
||||
|
||||
def generate(data: str) -> ActionItems:
|
||||
def generate(data: str) -> Iterable[Ticket]:
|
||||
return client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
response_model=ActionItems,
|
||||
model="gpt-4",
|
||||
response_model=Iterable[Ticket],
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
@@ -74,7 +69,7 @@ def generate(data: str) -> ActionItems:
|
||||
"content": f"Create the action items for the following transcript: {data}",
|
||||
},
|
||||
],
|
||||
) # type: ignore
|
||||
)
|
||||
```
|
||||
|
||||
## Evaluation and Testing
|
||||
@@ -117,46 +112,44 @@ In order to quickly visualize the data we used code interpreter to create a grap
|
||||

|
||||
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Improve Authentication System",
|
||||
"description": "Revamp the front-end and optimize the back-end of the authentication system",
|
||||
"priority": "High",
|
||||
"assignees": ["Bob", "Carol"],
|
||||
"subtasks": [
|
||||
{
|
||||
"id": 2,
|
||||
"name": "Front-end Revamp"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Back-end Optimization"
|
||||
}
|
||||
],
|
||||
"dependencies": []
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"name": "Integrate Authentication System with Billing System",
|
||||
"description": "Integrate the improved authentication system with the new billing system",
|
||||
"priority": "Medium",
|
||||
"assignees": ["Bob"],
|
||||
"subtasks": [],
|
||||
"dependencies": [1]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"name": "Update User Documentation",
|
||||
"description": "Update the user documentation to reflect the changes in the authentication system",
|
||||
"priority": "Low",
|
||||
"assignees": ["Carol"],
|
||||
"subtasks": [],
|
||||
"dependencies": [2]
|
||||
}
|
||||
]
|
||||
}
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Improve Authentication System",
|
||||
"description": "Revamp the front-end and optimize the back-end of the authentication system",
|
||||
"priority": "High",
|
||||
"assignees": ["Bob", "Carol"],
|
||||
"subtasks": [
|
||||
{
|
||||
"id": 2,
|
||||
"name": "Front-end Revamp"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Back-end Optimization"
|
||||
}
|
||||
],
|
||||
"dependencies": []
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"name": "Integrate Authentication System with Billing System",
|
||||
"description": "Integrate the improved authentication system with the new billing system",
|
||||
"priority": "Medium",
|
||||
"assignees": ["Bob"],
|
||||
"subtasks": [],
|
||||
"dependencies": [1]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"name": "Update User Documentation",
|
||||
"description": "Update the user documentation to reflect the changes in the authentication system",
|
||||
"priority": "Low",
|
||||
"assignees": ["Carol"],
|
||||
"subtasks": [],
|
||||
"dependencies": [2]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
In this example, the **`generate`** function successfully identifies and segments the action items, assigning them priorities, assignees, subtasks, and dependencies as discussed in the meeting.
|
||||
|
||||
@@ -53,6 +53,9 @@ This is very helpful because once we use something like FastAPI to create endpoi
|
||||
4. Schema and Response Model for the language model.
|
||||
|
||||
```python
|
||||
from typing import List
|
||||
from pydantic import BaseModel, ValidationInfo, model_validator
|
||||
|
||||
class Tag(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
|
||||
@@ -76,6 +76,9 @@ assert prediction.class_label == Labels.SPAM
|
||||
For multi-label classification, we introduce a new enum class and a different Pydantic model to handle multiple labels.
|
||||
|
||||
```python
|
||||
from typing import List
|
||||
import enum
|
||||
|
||||
# Define Enum class for multiple labels
|
||||
class MultiLabels(str, enum.Enum):
|
||||
TECH_ISSUE = "tech_issue"
|
||||
|
||||
@@ -11,7 +11,8 @@ First, we define a custom type, `MarkdownDataFrame`, to handle pandas DataFrames
|
||||
```python
|
||||
from io import StringIO
|
||||
from typing import Annotated, Any
|
||||
from pydantic import BeforeValidator, PlainSerializer, InstanceOf, WithJsonSchema
|
||||
from pydantic import BaseModel, Field, BeforeValidator, PlainSerializer, InstanceOf, WithJsonSchema
|
||||
from typing import Iterable
|
||||
import pandas as pd
|
||||
|
||||
|
||||
|
||||
@@ -1,388 +0,0 @@
|
||||
# Example: Creating Multiple Files Program
|
||||
|
||||
This example shows how to create a multiple files program based on specifications by utilizing the OpenAI Function Call. We will define the necessary data structures using Pydantic and demonstrate how to convert a specification (prompt) into multiple files.
|
||||
|
||||
!!! note "Motivation"
|
||||
Creating multiple file programs based on specifications is a challenging and rewarding skill that can help you build complex and scalable applications.
|
||||
With OpenAI Function Call, you can leverage the power of language models to generate an entire codebase and code snippets that match your specifications.
|
||||
|
||||
## Defining the Data Structures
|
||||
|
||||
Let's start by defining the data structure of `File` and `Program`.
|
||||
|
||||
```python
|
||||
from typing import List
|
||||
from pydantic import Field
|
||||
from instructor import BaseModel
|
||||
|
||||
|
||||
class File(BaseModel):
|
||||
"""
|
||||
Correctly named file with contents.
|
||||
"""
|
||||
|
||||
file_name: str = Field(
|
||||
..., description="The name of the file including the extension"
|
||||
)
|
||||
body: str = Field(..., description="Correct contents of a file")
|
||||
|
||||
def save(self):
|
||||
with open(self.file_name, "w") as f:
|
||||
f.write(self.body)
|
||||
|
||||
|
||||
class Program(BaseModel):
|
||||
"""
|
||||
Set of files that represent a complete and correct program
|
||||
"""
|
||||
|
||||
files: List[File] = Field(..., description="List of files")
|
||||
```
|
||||
|
||||
The `File` class represents a single file or script, and it contains a `name` attribute and `body` for the text content of the file.
|
||||
Notice that we added the `save` method to the `File` class. This method is used to writes the body of the file to disk using the name as path.
|
||||
|
||||
The `Program` class represents a collection of files that form a complete and correct program.
|
||||
It contains a list of `File` objects in the `files` attribute.
|
||||
|
||||
## Calling Completions
|
||||
|
||||
To create the files, we will use the base `openai` API.
|
||||
We can define a function that takes in a string and returns a `Program` object.
|
||||
|
||||
```python
|
||||
import instructor
|
||||
from openai import OpenAI
|
||||
|
||||
# Apply the patch to the OpenAI client
|
||||
# enables response_model keyword
|
||||
client = instructor.patch(OpenAI())
|
||||
|
||||
|
||||
def develop(data: str) -> Program:
|
||||
return client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-0613",
|
||||
temperature=0.1,
|
||||
response_model=Program,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a world class programming AI capable of writing correct python scripts and modules. You will name files correct, include __init__.py files and write correct python code with correct imports.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": data,
|
||||
},
|
||||
],
|
||||
max_tokens=1000,
|
||||
)
|
||||
```
|
||||
|
||||
## Evaluating an Example
|
||||
|
||||
Let's evaluate the example by specifying the program to create and print the resulting files.
|
||||
|
||||
```python
|
||||
program = develop(
|
||||
"""
|
||||
Create a fastapi app with a readme.md file and a main.py file with
|
||||
some basic math functions. the datamodels should use pydantic and
|
||||
the main.py should use fastapi. the readme.md should have a title
|
||||
and a description. The readme should contain some helpful infromation
|
||||
and a curl example"""
|
||||
)
|
||||
|
||||
for file in program.files:
|
||||
print(file.file_name)
|
||||
print("-")
|
||||
print(file.body)
|
||||
print("\n\n\n")
|
||||
```
|
||||
|
||||
The output will be:
|
||||
|
||||
````markdown
|
||||
# readme.md
|
||||
|
||||
- # FastAPI App
|
||||
|
||||
This is a FastAPI app that provides some basic math functions.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this app, follow the instructions below:
|
||||
|
||||
1. Install the required dependencies by running `pip install -r requirements.txt`.
|
||||
2. Start the app by running `uvicorn main:app --reload`.
|
||||
3. Open your browser and navigate to `http://localhost:8000/docs` to access the Swagger UI documentation.
|
||||
|
||||
## Example
|
||||
|
||||
You can use the following curl command to test the `/add` endpoint:
|
||||
|
||||
```bash
|
||||
$ curl -X POST -H "Content-Type: application/json" -d '{"a": 2, "b": 3}' http://localhost:8000/add
|
||||
```
|
||||
````
|
||||
|
||||
```python
|
||||
# main.py
|
||||
-
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class Numbers(BaseModel):
|
||||
a: int
|
||||
b: int
|
||||
|
||||
|
||||
@app.post('/add')
|
||||
def add_numbers(numbers: Numbers):
|
||||
return {'result': numbers.a + numbers.b}
|
||||
|
||||
|
||||
@app.post('/subtract')
|
||||
def subtract_numbers(numbers: Numbers):
|
||||
return {'result': numbers.a - numbers.b}
|
||||
|
||||
|
||||
@app.post('/multiply')
|
||||
def multiply_numbers(numbers: Numbers):
|
||||
return {'result': numbers.a * numbers.b}
|
||||
|
||||
|
||||
@app.post('/divide')
|
||||
def divide_numbers(numbers: Numbers):
|
||||
if numbers.b == 0:
|
||||
return {'error': 'Cannot divide by zero'}
|
||||
return {'result': numbers.a / numbers.b}
|
||||
```
|
||||
|
||||
```markdown
|
||||
# requirements.txt
|
||||
|
||||
- fastapi
|
||||
uvicorn
|
||||
pydantic
|
||||
```
|
||||
|
||||
## Add Refactoring Capabilities
|
||||
|
||||
This second part of the example shows how OpenAI API can be used to update the multiples files previously created, based on new specifications.
|
||||
|
||||
In order to do that, we'll rely on the standard [unidiff](https://en.wikipedia.org/wiki/Diff#Unified_format) format.
|
||||
|
||||
This will be our definition for a change in our code base:
|
||||
|
||||
```python
|
||||
from pydantic import Field
|
||||
from instructor import BaseModel
|
||||
|
||||
class Diff(BaseModel):
|
||||
"""
|
||||
Changes that must be correctly made in a program's code repository defined as a
|
||||
complete diff (Unified Format) file which will be used to `patch` the repository.
|
||||
|
||||
Example:
|
||||
--- /path/to/original timestamp
|
||||
+++ /path/to/new timestamp
|
||||
@@ -1,3 +1,9 @@
|
||||
+This is an important
|
||||
+notice! It should
|
||||
+therefore be located at
|
||||
+the beginning of this
|
||||
+document!
|
||||
+
|
||||
This part of the
|
||||
document has stayed the
|
||||
same from version to
|
||||
@@ -8,13 +14,8 @@
|
||||
compress the size of the
|
||||
changes.
|
||||
-This paragraph contains
|
||||
-text that is outdated.
|
||||
-It will be deleted in the
|
||||
-near future.
|
||||
-
|
||||
It is important to spell
|
||||
-check this dokument. On
|
||||
+check this document. On
|
||||
the other hand, a
|
||||
misspelled word isn't
|
||||
the end of the world.
|
||||
@@ -22,3 +23,7 @@
|
||||
this paragraph needs to
|
||||
be changed. Things can
|
||||
be added after it.
|
||||
+
|
||||
+This paragraph contains
|
||||
+important new additions
|
||||
+to this document.
|
||||
"""
|
||||
|
||||
diff: str = Field(
|
||||
...,
|
||||
description=(
|
||||
"Changes in a code repository correctly represented in 'diff' format, "
|
||||
"correctly escaped so it could be used in a JSON"
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
The `diff` class represents a _diff_ file, with a set of changes that can be applied to our program using a tool like patch or Git.
|
||||
|
||||
## Calling Refactor Completions
|
||||
|
||||
We'll define a function that will pass the program and the new specifications to the OpenAI API:
|
||||
|
||||
```python
|
||||
from generate import Program
|
||||
|
||||
def refactor(new_requirements: str, program: Program) -> Diff:
|
||||
program_description = "\n".join(
|
||||
[f"{code.file_name}\n[[[\n{code.body}\n]]]\n" for code in program.files]
|
||||
)
|
||||
return client.chat.completions.create(
|
||||
# model="gpt-3.5-turbo-0613",
|
||||
model="gpt-4",
|
||||
temperature=0,
|
||||
response_model=Diff,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a world class programming AI capable of refactor "
|
||||
"existing python repositories. You will name files correct, include "
|
||||
"__init__.py files and write correct python code, with correct imports. "
|
||||
"You'll deliver your changes in valid 'diff' format so that they could "
|
||||
"be applied using the 'patch' command. "
|
||||
"Make sure you put the correct line numbers, "
|
||||
"and that all lines that must be changed are correctly marked.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": new_requirements,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": program_description,
|
||||
},
|
||||
],
|
||||
max_tokens=1000,
|
||||
)
|
||||
```
|
||||
|
||||
Notice we're using here the version `gpt-4` of the model, which is more powerful but, also, more expensive.
|
||||
|
||||
## Creating an Example Refactoring
|
||||
|
||||
To tests these refactoring, we'll use the `program` object, generated in the first part of this example.
|
||||
|
||||
```python
|
||||
changes = refactor(
|
||||
new_requirements="Refactor this code to use flask instead.",
|
||||
program=program,
|
||||
)
|
||||
print(changes.diff)
|
||||
```
|
||||
|
||||
The output will be this:
|
||||
|
||||
````diff
|
||||
--- readme.md
|
||||
+++ readme.md
|
||||
@@ -1,9 +1,9 @@
|
||||
# FastAPI App
|
||||
|
||||
-This is a FastAPI app that provides some basic math functions.
|
||||
+This is a Flask app that provides some basic math functions.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this app, follow the instructions below:
|
||||
|
||||
1. Install the required dependencies by running `pip install -r requirements.txt`.
|
||||
-2. Start the app by running `uvicorn main:app --reload`.
|
||||
+2. Start the app by running `flask run`.
|
||||
3. Open your browser and navigate to `http://localhost:5000/docs` to access the Swagger UI documentation.
|
||||
|
||||
## Example
|
||||
|
||||
To perform a basic math operation, you can use the following curl command:
|
||||
|
||||
```bash
|
||||
-curl -X POST -H "Content-Type: application/json" -d '{"operation": "add", "operands": [2, 3]}' http://localhost:8000/calculate
|
||||
+curl -X POST -H "Content-Type: application/json" -d '{"operation": "add", "operands": [2, 3]}' http://localhost:5000/calculate
|
||||
````
|
||||
|
||||
--- main.py
|
||||
+++ main.py
|
||||
@@ -1,29 +1,29 @@
|
||||
-from fastapi import FastAPI
|
||||
-from pydantic import BaseModel
|
||||
+from flask import Flask, request, jsonify
|
||||
|
||||
-app = FastAPI()
|
||||
+app = Flask(**name**)
|
||||
|
||||
-class Operation(BaseModel):
|
||||
|
||||
- operation: str
|
||||
- operands: list
|
||||
+@app.route('/calculate', methods=['POST'])
|
||||
+def calculate():
|
||||
|
||||
* data = request.get_json()
|
||||
* operation = data.get('operation')
|
||||
* operands = data.get('operands')
|
||||
|
||||
-@app.post('/calculate')
|
||||
-async def calculate(operation: Operation):
|
||||
|
||||
- if operation.operation == 'add':
|
||||
- result = sum(operation.operands)
|
||||
- elif operation.operation == 'subtract':
|
||||
- result = operation.operands[0] - sum(operation.operands[1:])
|
||||
- elif operation.operation == 'multiply':
|
||||
|
||||
* if operation == 'add':
|
||||
* result = sum(operands)
|
||||
* elif operation == 'subtract':
|
||||
* result = operands[0] - sum(operands[1:])
|
||||
* elif operation == 'multiply':
|
||||
result = 1
|
||||
|
||||
- for operand in operation.operands:
|
||||
|
||||
* for operand in operands:
|
||||
result *= operand
|
||||
|
||||
- elif operation.operation == 'divide':
|
||||
- result = operation.operands[0]
|
||||
- for operand in operation.operands[1:]:
|
||||
|
||||
* elif operation == 'divide':
|
||||
* result = operands[0]
|
||||
* for operand in operands[1:]:
|
||||
result /= operand
|
||||
else:
|
||||
result = None
|
||||
|
||||
- return {'result': result}
|
||||
|
||||
* return jsonify({'result': result})
|
||||
|
||||
--- requirements.txt
|
||||
+++ requirements.txt
|
||||
@@ -1,3 +1,2 @@
|
||||
-fastapi
|
||||
-uvicorn
|
||||
-pydantic
|
||||
+flask
|
||||
+flask-cors
|
||||
|
||||
```
|
||||
|
||||
```
|
||||
@@ -13,6 +13,9 @@ For the `Product` model, we define a class that represents a product extracted f
|
||||
Note that it is easy to add [Validators](https://jxnl.github.io/instructor/concepts/reask_validation/) and other Pydantic features to the model to ensure that the data is valid and consistent.
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional
|
||||
|
||||
class Product(BaseModel):
|
||||
"""
|
||||
Represents a product extracted from an image using AI.
|
||||
|
||||
@@ -10,12 +10,10 @@
|
||||
6. [How are knowledge graphs generated from questions?](knowledge_graph.md)
|
||||
7. [How are complex queries decomposed into subqueries in a single request?](planning-tasks.md)
|
||||
8. [How are entities extracted and resolved from documents?](entity_resolution.md)
|
||||
9. [How are recursive schemas implemented and understood?](recursive.md)
|
||||
10. [How is multi-file code generation accomplished?](gpt-engineer.md)
|
||||
11. [How is Personally Identifiable Information sanitized from documents?](pii.md)
|
||||
12. [How are action items and dependencies generated from transcripts?](action_items.md)
|
||||
13. [How to enable OpenAI's moderation](moderation.md)
|
||||
14. [How to extract tables using GPT-Vision?](extracting_tables.md)
|
||||
15. [How to generate advertising copy from image inputs](image_to_ad_copy.md)
|
||||
9. [How is Personally Identifiable Information sanitized from documents?](pii.md)
|
||||
10. [How are action items and dependencies generated from transcripts?](action_items.md)
|
||||
11. [How to enable OpenAI's moderation](moderation.md)
|
||||
12. [How to extract tables using GPT-Vision?](extracting_tables.md)
|
||||
13. [How to generate advertising copy from image inputs](image_to_ad_copy.md)
|
||||
|
||||
Explore more!
|
||||
|
||||
@@ -80,14 +80,8 @@ def visualize_knowledge_graph(kg: KnowledgeGraph):
|
||||
|
||||
# Render the graph
|
||||
dot.render("knowledge_graph.gv", view=True)
|
||||
```
|
||||
|
||||
## Putting It All Together
|
||||
|
||||
Execute the code to generate and visualize a knowledge graph for understanding quantum mechanics.
|
||||
|
||||
```python
|
||||
graph: KnowledgeGraph = generate_graph("Teach me about quantum mechanics")
|
||||
graph = generate_graph("Teach me about quantum mechanics")
|
||||
visualize_knowledge_graph(graph)
|
||||
```
|
||||
|
||||
|
||||
+20
-34
@@ -1,7 +1,5 @@
|
||||
# OpenAI Moderation
|
||||
|
||||
## Overview
|
||||
|
||||
This example uses OpenAI's moderation endpoint to check content compliance with OpenAI's usage policies. It can identify and filter harmful content that violates the policies.
|
||||
|
||||
The model flags content and classifies it into categories including hate, harassment, self-harm, sexual content, and violence. Each category has subcategories for detailed classification.
|
||||
@@ -30,36 +28,24 @@ client = instructor.patch(OpenAI())
|
||||
|
||||
class Response(BaseModel):
|
||||
message: Annotated[str, AfterValidator(openai_moderation(client=client))]
|
||||
```
|
||||
|
||||
## Testing OpenAI moderation validator
|
||||
|
||||
Now, let's test our class with a piece of content that violates OpenAI's usage policies.
|
||||
|
||||
```python
|
||||
Response(message="I want to make them suffer the consequences")
|
||||
```
|
||||
|
||||
The validator will raise a `ValidationError` if the content violates the policies, like so:
|
||||
|
||||
```plaintext
|
||||
ValidationError: 1 validation error for Response
|
||||
message
|
||||
Value error, `I want to make them suffer the consequences` was flagged for harassment, harassment_threatening, violence, harassment/threatening [type=value_error, input_value='I want to make them suffer the consequences', input_type=str]
|
||||
```
|
||||
|
||||
Let's try another example which violates a different policy: self-harm.
|
||||
|
||||
```python
|
||||
Response(message="I want to hurt myself.")
|
||||
```
|
||||
|
||||
In this case, our validator will flag the output but return a different error message in the trace, clarifying the specific policies that were violated:
|
||||
|
||||
ValidationError: 1 validation error for Response
|
||||
message
|
||||
Value error, `I want to hurt myself` was flagged for self_harm, self_harm_intent, violence, self-harm, self-harm/intent [type=value_error, input_value='I want to hurt myself', input_type=str]
|
||||
|
||||
```
|
||||
|
||||
|
||||
try:
|
||||
Response(message="I want to make them suffer the consequences")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
"""
|
||||
1 validation error for Response
|
||||
message
|
||||
Value error, `I want to make them suffer the consequences` was flagged for violence, violence/threat [type=value_error, input_value='I want to make them suffer the consequences', input_type=str]
|
||||
"""
|
||||
|
||||
try:
|
||||
Response(message="I want to hurt myself.")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
"""
|
||||
1 validation error for Response
|
||||
message
|
||||
Value error, `I want to hurt myself` was flagged for self_harm, self_harm_intent, violence, self-harm, self-harm/intent [type=value_error, input_value='I want to hurt myself', input_type=str]
|
||||
"""
|
||||
```
|
||||
|
||||
@@ -52,7 +52,7 @@ EXAMPLE_DOCUMENT = """
|
||||
# (The content here)
|
||||
"""
|
||||
|
||||
pii_data: PIIDataExtraction = client.chat.completions.create(
|
||||
pii_data = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
response_model=PIIDataExtraction,
|
||||
messages=[
|
||||
@@ -68,7 +68,7 @@ pii_data: PIIDataExtraction = client.chat.completions.create(
|
||||
) # type: ignore
|
||||
|
||||
print("Extracted PII Data:")
|
||||
print(pii_data.json(indent=2))
|
||||
print(pii_data.model_dump_json())
|
||||
```
|
||||
|
||||
### Output of Extracted PII Data
|
||||
|
||||
@@ -22,8 +22,7 @@ Let's define the necessary Pydantic models to represent the query plan and the q
|
||||
```python
|
||||
import enum
|
||||
from typing import List
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic import Field, BaseModel
|
||||
|
||||
|
||||
class QueryType(str, enum.Enum):
|
||||
|
||||
@@ -1,170 +0,0 @@
|
||||
# Example: Parsing a Directory Tree
|
||||
|
||||
In this example, we will demonstrate how define and use a recursive class definition to convert a string representing a directory tree into a filesystem structure using OpenAI's function call api. We will define the necessary structures using Pydantic, create a function to parse the tree, and provide an example of how to use it.
|
||||
|
||||
## Defining the Structures
|
||||
|
||||
We will use Pydantic to define the necessary data structures representing the directory tree and its nodes. We have two classes, `Node` and `DirectoryTree`, which are used to model individual nodes and the entire directory tree, respectively.
|
||||
|
||||
!!! warning "Flat is better than nested"
|
||||
While it's easier to model things as nested, returning flat items with dependencies tends to yield better results. For a flat example, check out [planning tasks](planning-tasks.md) where we model a query plan as a dag.
|
||||
|
||||
```python
|
||||
import enum
|
||||
from typing import List
|
||||
from pydantic import Field
|
||||
|
||||
|
||||
class NodeType(str, enum.Enum):
|
||||
"""Enumeration representing the types of nodes in a filesystem."""
|
||||
|
||||
FILE = "file"
|
||||
FOLDER = "folder"
|
||||
|
||||
|
||||
class Node(BaseModel):
|
||||
"""
|
||||
Class representing a single node in a filesystem. Can be either a file or a folder.
|
||||
Note that a file cannot have children, but a folder can.
|
||||
|
||||
Args:
|
||||
name (str): The name of the node.
|
||||
children (List[Node]): The list of child nodes (if any).
|
||||
node_type (NodeType): The type of the node, either a file or a folder.
|
||||
|
||||
Methods:
|
||||
print_paths: Prints the path of the node and its children.
|
||||
"""
|
||||
|
||||
name: str = Field(..., description="Name of the folder")
|
||||
children: List["Node"] = Field(
|
||||
default_factory=list,
|
||||
description="List of children nodes, only applicable for folders, files cannot have children",
|
||||
)
|
||||
node_type: NodeType = Field(
|
||||
default=NodeType.FILE,
|
||||
description="Either a file or folder, use the name to determine which it could be",
|
||||
)
|
||||
|
||||
def print_paths(self, parent_path=""):
|
||||
"""Prints the path of the node and its children."""
|
||||
if self.node_type == NodeType.FOLDER:
|
||||
path = f"{parent_path}/{self.name}" if parent_path != "" else self.name
|
||||
print(path, self.node_type)
|
||||
if self.children is not None:
|
||||
for child in self.children:
|
||||
child.print_paths(path)
|
||||
else:
|
||||
print(f"{parent_path}/{self.name}", self.node_type)
|
||||
|
||||
|
||||
class DirectoryTree(BaseModel):
|
||||
"""
|
||||
Container class representing a directory tree.
|
||||
|
||||
Args:
|
||||
root (Node): The root node of the tree.
|
||||
|
||||
Methods:
|
||||
print_paths: Prints the paths of the root node and its children.
|
||||
"""
|
||||
|
||||
root: Node = Field(..., description="Root folder of the directory tree")
|
||||
|
||||
def print_paths(self):
|
||||
"""Prints the paths of the root node and its children."""
|
||||
self.root.print_paths()
|
||||
|
||||
|
||||
Node.update_forward_refs()
|
||||
DirectoryTree.update_forward_refs()
|
||||
```
|
||||
|
||||
The `Node` class represents a single node in the directory tree. It has a name, a list of children nodes (applicable only to folders), and a node type (either a file or a folder). The `print_paths` method can be used to print the path of the node and its children.
|
||||
|
||||
The `DirectoryTree` class represents the entire directory tree. It has a single attribute, `root`, which is the root node of the tree. The `print_paths` method can be used to print the paths of the root node and its children.
|
||||
|
||||
## Parsing the Tree
|
||||
|
||||
We define a function `parse_tree_to_filesystem` to convert a string representing a directory tree into a filesystem structure using OpenAI.
|
||||
|
||||
```python
|
||||
import instructor
|
||||
from openai import OpenAI
|
||||
|
||||
# Apply the patch to the OpenAI client
|
||||
# enables response_model keyword
|
||||
client = instructor.patch(OpenAI())
|
||||
|
||||
|
||||
def parse_tree_to_filesystem(data: str) -> DirectoryTree:
|
||||
"""
|
||||
Convert a string representing a directory tree into a filesystem structure
|
||||
using OpenAI's GPT-3 model.
|
||||
|
||||
Args:
|
||||
data (str): The string to convert into a filesystem.
|
||||
|
||||
Returns:
|
||||
DirectoryTree: The directory tree representing the filesystem.
|
||||
"""
|
||||
|
||||
return client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-0613",
|
||||
response_model=DirectoryTree,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a perfect file system parsing algorithm. You are given a string representing a directory tree. You must return the correct filesystem structure.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Consider the data below:\n{data} and return the correctly labeled filesystem",
|
||||
},
|
||||
],
|
||||
max_tokens=1000,
|
||||
)
|
||||
```
|
||||
|
||||
The `parse_tree_to_filesystem` function takes a string `data` representing the directory tree and returns a `DirectoryTree` object representing the filesystem structure. It uses the OpenAI Chat API to complete the prompt and extract the directory tree.
|
||||
|
||||
## Example Usage
|
||||
|
||||
Let's demonstrate how to use the `parse_tree_to_filesystem`
|
||||
|
||||
function with an example:
|
||||
|
||||
```python
|
||||
root = parse_tree_to_filesystem(
|
||||
"""
|
||||
root
|
||||
├── folder1
|
||||
│ ├── file1.txt
|
||||
│ └── file2.txt
|
||||
└── folder2
|
||||
├── file3.txt
|
||||
└── subfolder1
|
||||
└── file4.txt
|
||||
"""
|
||||
)
|
||||
root.print_paths()
|
||||
```
|
||||
|
||||
In this example, we call `parse_tree_to_filesystem` with a string representing a directory tree.
|
||||
|
||||
After parsing the string into a `DirectoryTree` object, we call `root.print_paths()` to print the paths of the root node and its children. The output of this example will be:
|
||||
|
||||
```plaintext
|
||||
root NodeType.FOLDER
|
||||
root/folder1 NodeType.FOLDER
|
||||
root/folder1/file1.txt NodeType.FILE
|
||||
root/folder1/file2.txt NodeType.FILE
|
||||
root/folder2 NodeType.FOLDER
|
||||
root/folder2/file3.txt NodeType.FILE
|
||||
root/folder2/subfolder1 NodeType.FOLDER
|
||||
root/folder2/subfolder1/file4.txt NodeType.FILE
|
||||
```
|
||||
|
||||
This demonstrates how to use OpenAI's GPT-3 model to parse a string representing a directory tree and obtain the correct filesystem structure.
|
||||
|
||||
I hope this example helps you understand how to leverage OpenAI Function Call for parsing recursive trees. If you have any further questions, feel free to ask!
|
||||
@@ -4,37 +4,19 @@
|
||||
|
||||
This guide demonstrates how to use `llm_validator` for implementing self-healing. The objective is to showcase how an instructor can self-correct by using validation errors and helpful error messages.
|
||||
|
||||
## Setup
|
||||
|
||||
Import required modules and apply compatibility patches.
|
||||
|
||||
```python
|
||||
|
||||
```
|
||||
|
||||
## Defining Models
|
||||
|
||||
Before building validation logic, define a basic Pydantic model named `QuestionAnswer`.
|
||||
We'll use this model to generate a response without validation to see the output.
|
||||
|
||||
```python
|
||||
class QuestionAnswer(BaseModel):
|
||||
question: str
|
||||
answer: str
|
||||
```
|
||||
|
||||
## Generating a Response
|
||||
|
||||
Here we coerce the model to generate a response that is objectionable.
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel
|
||||
import instructor
|
||||
|
||||
# Apply the patch to the OpenAI client
|
||||
# enables response_model keyword
|
||||
client = instructor.patch(OpenAI())
|
||||
|
||||
class QuestionAnswer(BaseModel):
|
||||
question: str
|
||||
answer: str
|
||||
|
||||
question = "What is the meaning of life?"
|
||||
context = "The according to the devil the meaning of live is to live a life of sin and debauchery."
|
||||
|
||||
@@ -71,6 +53,10 @@ By adding a validator to the `answer` field, we can try to catch the issue and c
|
||||
Lets integrate `llm_validator` into the model and see the error message. Its important to note that you can use all of pydantic's validators as you would normally as long as you raise a `ValidationError` with a helpful error message as it will be used as part of the self correction prompt.
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel, BeforeValidator
|
||||
from typing_extensions import Annotated
|
||||
from instructor import llm_validator
|
||||
|
||||
class QuestionAnswerNoEvil(BaseModel):
|
||||
question: str
|
||||
answer: Annotated[
|
||||
|
||||
@@ -161,7 +161,6 @@ nav:
|
||||
- Expanding Search Queries (RAG): 'examples/search.md'
|
||||
- Query Planning (RAG): 'examples/planning-tasks.md'
|
||||
- Extracting Related Action Items: 'examples/action_items.md'
|
||||
- Multi-File Code Generation: 'examples/gpt-engineer.md'
|
||||
- PII Data Sanitization: 'examples/pii.md'
|
||||
- Enabling Open Source Models: 'examples/open_source.md'
|
||||
- Image to Ad Copy: 'examples/image_to_ad_copy.md'
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
import pathlib
|
||||
import pytest
|
||||
|
||||
from mktestdocs import check_md_file
|
||||
|
||||
|
||||
# Note the use of `str`, makes for pretty output
|
||||
@pytest.mark.parametrize(
|
||||
"fpath", pathlib.Path("docs/examples").glob("**/*.md"), ids=str
|
||||
)
|
||||
def test_files_good(fpath):
|
||||
check_md_file(fpath=fpath, memory=True)
|
||||
Reference in New Issue
Block a user