update docs

This commit is contained in:
Jason
2023-09-13 21:51:29 -04:00
parent d88f23321f
commit 9651a09b17
11 changed files with 285 additions and 26 deletions
+13
View File
@@ -0,0 +1,13 @@
# API Reference
::: instructor.patch
::: instructor.dsl.validators
::: instructor.dsl.citation
::: instructor.dsl.multitask
::: instructor.dsl.maybe
::: instructor.function_calls
-3
View File
@@ -1,3 +0,0 @@
# API: MultiTask
::: instructor.dsl.multitask
+2 -1
View File
@@ -1,10 +1,11 @@
from .function_calls import OpenAISchema, openai_function, openai_schema
from .dsl import MultiTask, Maybe, Validator, llm_validator
from .dsl import MultiTask, Maybe, Validator, llm_validator, CitationMixin
from .patch import patch
__all__ = [
"OpenAISchema",
"openai_function",
"CitationMixin",
"MultiTask",
"Maybe",
"openai_schema",
+3 -2
View File
@@ -2,13 +2,14 @@ from .completion import ChatCompletion
from .messages import *
from .multitask import MultiTask
from .maybe import Maybe
from .validators import Validator, llm_validator
from .validators import llm_validator
from .citation import CitationMixin
__all__ = [
"ChatCompletion",
"CitationMixin",
"MultiTask",
"messages",
"Maybe",
"Validator",
"llm_validator",
]
+96
View File
@@ -0,0 +1,96 @@
from pydantic import BaseModel, Field, FieldValidationInfo, model_validator
from typing import List
class CitationMixin(BaseModel):
"""
Helpful mixing that can use `validation_context={"context": context}` in `from_response` to find the span of the substring_phrase in the context.
## Usage
```python
from pydantic import BaseModel, Field
from instructor import CitationMixin
class User(BaseModel):
name: str = Field(description="The name of the person")
age: int = Field(description="The age of the person")
role: str = Field(description="The role of the person")
context = "Betty was a student. Jason was a student. Jason is 20 years old"
user = openai.ChatCompletion.create(
model="gpt-3.5-turbo',
messages=[
{
"role": "user",
"content": "Extract jason from {context}",
},
response_model=User,
validation_context={"context": context},
]
)
for quote in user.substring_quotes:
assert quote in context
print(user.model_dump())
```
## Result
```
{
"name": "Jason Liu",
"age": 20,
"role": "student",
"substring_quotes": [
"Jason was a student",
"Jason is 20 years old",
]
}
```
"""
substring_quotes: List[str] = Field(
description="List of unique and specific substrings of the quote that was used to answer the question.",
)
@model_validator(mode="after")
def validate_sources(self, info: FieldValidationInfo) -> "CitationMixin":
"""
For each substring_phrase, find the span of the substring_phrase in the context.
If the span is not found, remove the substring_phrase from the list.
"""
if info.context is None:
logger.info("No context found, skipping validation")
return self
# Get the context from the info
text_chunks = info.context.get("context", None)
# Get the spans of the substring_phrase in the context
spans = list(self.get_spans(text_chunks))
# Replace the substring_phrase with the actual substring
self.substring_quotes = [text_chunks[span[0] : span[1]] for span in spans]
return self
def _get_span(self, quote, context, errs=5):
import regex
minor = quote
major = context
errs_ = 0
s = regex.search(f"({minor}){{e<={errs_}}}", major)
while s is None and errs_ <= errs:
errs_ += 1
s = regex.search(f"({minor}){{e<={errs_}}}", major)
if s is not None:
yield from s.spans()
def get_spans(self, context):
for quote in self.substring_quotes:
yield from self._get_span(quote, context)
+31 -1
View File
@@ -3,6 +3,10 @@ from typing import Type, Optional
class MaybeBase(BaseModel):
"""
Extract a result from a model, if any, otherwise set the error and message fields.
"""
result: Optional[BaseModel]
error: bool = Field(default=False)
message: Optional[str]
@@ -13,7 +17,33 @@ class MaybeBase(BaseModel):
def Maybe(model: Type[BaseModel]) -> MaybeBase:
"""
Create a Maybe model for a given Pydantic model.
Create a Maybe model for a given Pydantic model. This allows you to return a model that includes fields for `result`, `error`, and `message` for sitatations where the data may not be present in the context.
## Usage
```python
from pydantic import BaseModel, Field
from instructor import Maybe
class User(BaseModel):
name: str = Field(description="The name of the person")
age: int = Field(description="The age of the person")
role: str = Field(description="The role of the person")
MaybeUser = Maybe(User)
```
## Result
```python
class MaybeUser(BaseModel):
result: Optional[User]
error: bool = Field(default=False)
message: Optional[str]
def __bool__(self):
return self.result is not None
```
Parameters:
model (Type[BaseModel]): The Pydantic model to wrap with Maybe.
+32 -14
View File
@@ -58,21 +58,39 @@ def MultiTask(
for a specific task, names and descriptions are automatically generated. However
they can be overridden.
Note:
Using this function is equivalent to creating a class that inherits from
OpenAISchema and has a list of the subtask class as a field.
## Usage
```python
class MultiTask(OpenAISchema):
\"""
Correct segmentation of `{subtask_class.__name__}` tasks
\"""
tasks: List[subtask_class] = Field(
default_factory=list,
repr=False,
description=f"Correctly segmented list of `{subtask_class.__name__}` tasks",
)
```
```python
from pydantic import BaseModel, Field
from instructor import MultiTask
class User(BaseModel):
name: str = Field(description="The name of the person")
age: int = Field(description="The age of the person")
role: str = Field(description="The role of the person")
MultiUser = MultiTask(User)
```
## Result
```python
class MultiUser(OpenAISchema, MultiTaskBase):
tasks: List[User] = Field(
default_factory=list,
repr=False,
description="Correctly segmented list of `User` tasks",
)
@classmethod
def from_streaming_response(cls, completion) -> Generator[User]:
'''
Parse the streaming response from OpenAI and yield a `User` object
for each task in the response
'''
json_chunks = cls.extract_json(completion)
yield from cls.tasks_from_chunks(json_chunks)
```
Parameters:
subtask_class (Type[OpenAISchema]): The base class to use for the MultiTask
+24
View File
@@ -33,6 +33,30 @@ def llm_validator(
"""
Create a validator that uses the LLM to validate an attribute
## Usage
```python
from instructor import llm_validator
from pydantic import BaseModel, Field, field_validator
class User(BaseModel):
name: str = Annotated[str, llm_validator("The name must be a full name all lowercase")]
age: int = Field(description="The age of the person")
try:
user = User(name="Jason Liu", age=20)
except ValidationError as e:
print(e)
```
```
1 validation error for User
name
The name is valid but not all lowercase (type=value_error.llm_validator)
```
Note that there, the error message is written by the LLM, and the error type is `value_error.llm_validator`.
Parameters:
statement (str): The statement to validate
model (str): The LLM to use for validation (default: "gpt-3.5-turbo-0613")
+40 -1
View File
@@ -123,6 +123,46 @@ class openai_function:
class OpenAISchema(BaseModel):
"""
Augments a Pydantic model with OpenAI's schema for function calling
This class augments a Pydantic model with OpenAI's schema for function calling. The schema is generated from the model's signature and docstring. The schema can be used to validate the response from OpenAI's API and extract the function call.
## Usage
```python
from instructor import OpenAISchema
class User(OpenAISchema):
name: str
age: int
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo',
messages=[{
"content": "Jason is 20 years old",
"role": "user"
}],
functions=[User.openai_schema],
function_call={"name": User.openai_schema["name"]},
)
user = User.from_response(completion)
print(user.model_dump())
```
## Result
```
{
"name": "Jason Liu",
"age": 20,
}
```
"""
@classmethod
@property
def openai_schema(cls):
@@ -161,7 +201,6 @@ class OpenAISchema(BaseModel):
)
_remove_a_key(parameters, "additionalProperties")
_remove_a_key(parameters, "title")
return {
"name": schema["title"],
"description": schema["description"],
+41
View File
@@ -155,6 +155,47 @@ original_chatcompletion_async = openai.ChatCompletion.acreate
def patch():
"""
Patch the `openai.ChatCompletion.create` and `openai.ChatCompletion.acreate` methods to support the `response_model` parameter.
## Usage
```python
from pydantic import BaseModel, Field
import instructor
instructor.patch()
class User(BaseModel):
name: str = Field(description="The name of the person")
age: int = Field(description="The age of the person")
role: str = Field(description="The role of the person")
user = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "Jason is 20 years old",
},
],
response_model=User,
)
print(user.model_dump())
```
## Result
```
{
"name": "Jason Liu",
"age": 20,
"role": "student",
}
```
"""
openai.ChatCompletion.create = wrap_chatcompletion(original_chatcompletion)
openai.ChatCompletion.acreate = wrap_chatcompletion(original_chatcompletion_async)
+3 -4
View File
@@ -80,10 +80,9 @@ nav:
- "Usage Tracking": "cli/usage.md"
- "Finetuning GPT": "cli/finetune.md"
- API Reference:
- 'OpenAISchema': 'openai_schema.md'
- 'MultiTask': 'api_multitask.md'
- "Introduction: Writing Prompts": "writing-prompts.md"
- "Prompting Templates": "chat-completion.md"
- 'Core Library': 'api.md'
- "Prompting DSL: Intro": "writing-prompts.md"
- "Prompting DSL Reference": "chat-completion.md"
- Blog:
- "blog/index.md"