From 9651a09b17c163dc91cd37aade52975a1618cbf3 Mon Sep 17 00:00:00 2001 From: Jason Date: Wed, 13 Sep 2023 21:51:29 -0400 Subject: [PATCH] update docs --- docs/api.md | 13 +++++ docs/api_multitask.md | 3 -- instructor/__init__.py | 3 +- instructor/dsl/__init__.py | 5 +- instructor/dsl/citation.py | 96 ++++++++++++++++++++++++++++++++++++ instructor/dsl/maybe.py | 32 +++++++++++- instructor/dsl/multitask.py | 46 +++++++++++------ instructor/dsl/validators.py | 24 +++++++++ instructor/function_calls.py | 41 ++++++++++++++- instructor/patch.py | 41 +++++++++++++++ mkdocs.yml | 7 ++- 11 files changed, 285 insertions(+), 26 deletions(-) create mode 100644 docs/api.md delete mode 100644 docs/api_multitask.md create mode 100644 instructor/dsl/citation.py diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..dbf1323 --- /dev/null +++ b/docs/api.md @@ -0,0 +1,13 @@ +# API Reference + +::: instructor.patch + +::: instructor.dsl.validators + +::: instructor.dsl.citation + +::: instructor.dsl.multitask + +::: instructor.dsl.maybe + +::: instructor.function_calls \ No newline at end of file diff --git a/docs/api_multitask.md b/docs/api_multitask.md deleted file mode 100644 index 45b51cb..0000000 --- a/docs/api_multitask.md +++ /dev/null @@ -1,3 +0,0 @@ -# API: MultiTask - -::: instructor.dsl.multitask \ No newline at end of file diff --git a/instructor/__init__.py b/instructor/__init__.py index 18a6a26..50f734f 100644 --- a/instructor/__init__.py +++ b/instructor/__init__.py @@ -1,10 +1,11 @@ from .function_calls import OpenAISchema, openai_function, openai_schema -from .dsl import MultiTask, Maybe, Validator, llm_validator +from .dsl import MultiTask, Maybe, Validator, llm_validator, CitationMixin from .patch import patch __all__ = [ "OpenAISchema", "openai_function", + "CitationMixin", "MultiTask", "Maybe", "openai_schema", diff --git a/instructor/dsl/__init__.py b/instructor/dsl/__init__.py index 441128d..8c5f540 100644 --- a/instructor/dsl/__init__.py +++ b/instructor/dsl/__init__.py @@ -2,13 +2,14 @@ from .completion import ChatCompletion from .messages import * from .multitask import MultiTask from .maybe import Maybe -from .validators import Validator, llm_validator +from .validators import llm_validator +from .citation import CitationMixin __all__ = [ "ChatCompletion", + "CitationMixin", "MultiTask", "messages", "Maybe", - "Validator", "llm_validator", ] diff --git a/instructor/dsl/citation.py b/instructor/dsl/citation.py new file mode 100644 index 0000000..78c9b7d --- /dev/null +++ b/instructor/dsl/citation.py @@ -0,0 +1,96 @@ +from pydantic import BaseModel, Field, FieldValidationInfo, model_validator +from typing import List + + +class CitationMixin(BaseModel): + """ + Helpful mixing that can use `validation_context={"context": context}` in `from_response` to find the span of the substring_phrase in the context. + + ## Usage + + ```python + from pydantic import BaseModel, Field + from instructor import CitationMixin + + class User(BaseModel): + name: str = Field(description="The name of the person") + age: int = Field(description="The age of the person") + role: str = Field(description="The role of the person") + + + context = "Betty was a student. Jason was a student. Jason is 20 years old" + + user = openai.ChatCompletion.create( + model="gpt-3.5-turbo', + messages=[ + { + "role": "user", + "content": "Extract jason from {context}", + }, + response_model=User, + validation_context={"context": context}, + ] + ) + + for quote in user.substring_quotes: + assert quote in context + + print(user.model_dump()) + ``` + + ## Result + ``` + { + "name": "Jason Liu", + "age": 20, + "role": "student", + "substring_quotes": [ + "Jason was a student", + "Jason is 20 years old", + ] + } + ``` + + """ + + substring_quotes: List[str] = Field( + description="List of unique and specific substrings of the quote that was used to answer the question.", + ) + + @model_validator(mode="after") + def validate_sources(self, info: FieldValidationInfo) -> "CitationMixin": + """ + For each substring_phrase, find the span of the substring_phrase in the context. + If the span is not found, remove the substring_phrase from the list. + """ + if info.context is None: + logger.info("No context found, skipping validation") + return self + + # Get the context from the info + text_chunks = info.context.get("context", None) + + # Get the spans of the substring_phrase in the context + spans = list(self.get_spans(text_chunks)) + # Replace the substring_phrase with the actual substring + self.substring_quotes = [text_chunks[span[0] : span[1]] for span in spans] + return self + + def _get_span(self, quote, context, errs=5): + import regex + + minor = quote + major = context + + errs_ = 0 + s = regex.search(f"({minor}){{e<={errs_}}}", major) + while s is None and errs_ <= errs: + errs_ += 1 + s = regex.search(f"({minor}){{e<={errs_}}}", major) + + if s is not None: + yield from s.spans() + + def get_spans(self, context): + for quote in self.substring_quotes: + yield from self._get_span(quote, context) diff --git a/instructor/dsl/maybe.py b/instructor/dsl/maybe.py index 5337643..b2780a1 100644 --- a/instructor/dsl/maybe.py +++ b/instructor/dsl/maybe.py @@ -3,6 +3,10 @@ from typing import Type, Optional class MaybeBase(BaseModel): + """ + Extract a result from a model, if any, otherwise set the error and message fields. + """ + result: Optional[BaseModel] error: bool = Field(default=False) message: Optional[str] @@ -13,7 +17,33 @@ class MaybeBase(BaseModel): def Maybe(model: Type[BaseModel]) -> MaybeBase: """ - Create a Maybe model for a given Pydantic model. + Create a Maybe model for a given Pydantic model. This allows you to return a model that includes fields for `result`, `error`, and `message` for sitatations where the data may not be present in the context. + + ## Usage + + ```python + from pydantic import BaseModel, Field + from instructor import Maybe + + class User(BaseModel): + name: str = Field(description="The name of the person") + age: int = Field(description="The age of the person") + role: str = Field(description="The role of the person") + + MaybeUser = Maybe(User) + ``` + + ## Result + + ```python + class MaybeUser(BaseModel): + result: Optional[User] + error: bool = Field(default=False) + message: Optional[str] + + def __bool__(self): + return self.result is not None + ``` Parameters: model (Type[BaseModel]): The Pydantic model to wrap with Maybe. diff --git a/instructor/dsl/multitask.py b/instructor/dsl/multitask.py index 4149e7b..15897ca 100644 --- a/instructor/dsl/multitask.py +++ b/instructor/dsl/multitask.py @@ -58,21 +58,39 @@ def MultiTask( for a specific task, names and descriptions are automatically generated. However they can be overridden. - Note: - Using this function is equivalent to creating a class that inherits from - OpenAISchema and has a list of the subtask class as a field. + ## Usage - ```python - class MultiTask(OpenAISchema): - \""" - Correct segmentation of `{subtask_class.__name__}` tasks - \""" - tasks: List[subtask_class] = Field( - default_factory=list, - repr=False, - description=f"Correctly segmented list of `{subtask_class.__name__}` tasks", - ) - ``` + ```python + from pydantic import BaseModel, Field + from instructor import MultiTask + + class User(BaseModel): + name: str = Field(description="The name of the person") + age: int = Field(description="The age of the person") + role: str = Field(description="The role of the person") + + MultiUser = MultiTask(User) + ``` + + ## Result + + ```python + class MultiUser(OpenAISchema, MultiTaskBase): + tasks: List[User] = Field( + default_factory=list, + repr=False, + description="Correctly segmented list of `User` tasks", + ) + + @classmethod + def from_streaming_response(cls, completion) -> Generator[User]: + ''' + Parse the streaming response from OpenAI and yield a `User` object + for each task in the response + ''' + json_chunks = cls.extract_json(completion) + yield from cls.tasks_from_chunks(json_chunks) + ``` Parameters: subtask_class (Type[OpenAISchema]): The base class to use for the MultiTask diff --git a/instructor/dsl/validators.py b/instructor/dsl/validators.py index 7e55d06..8af1baf 100644 --- a/instructor/dsl/validators.py +++ b/instructor/dsl/validators.py @@ -33,6 +33,30 @@ def llm_validator( """ Create a validator that uses the LLM to validate an attribute + ## Usage + + ```python + from instructor import llm_validator + from pydantic import BaseModel, Field, field_validator + + class User(BaseModel): + name: str = Annotated[str, llm_validator("The name must be a full name all lowercase")] + age: int = Field(description="The age of the person") + + try: + user = User(name="Jason Liu", age=20) + except ValidationError as e: + print(e) + ``` + + ``` + 1 validation error for User + name + The name is valid but not all lowercase (type=value_error.llm_validator) + ``` + + Note that there, the error message is written by the LLM, and the error type is `value_error.llm_validator`. + Parameters: statement (str): The statement to validate model (str): The LLM to use for validation (default: "gpt-3.5-turbo-0613") diff --git a/instructor/function_calls.py b/instructor/function_calls.py index bce0571..97938c1 100644 --- a/instructor/function_calls.py +++ b/instructor/function_calls.py @@ -123,6 +123,46 @@ class openai_function: class OpenAISchema(BaseModel): + """ + Augments a Pydantic model with OpenAI's schema for function calling + + This class augments a Pydantic model with OpenAI's schema for function calling. The schema is generated from the model's signature and docstring. The schema can be used to validate the response from OpenAI's API and extract the function call. + + ## Usage + + ```python + from instructor import OpenAISchema + + class User(OpenAISchema): + name: str + age: int + + completion = openai.ChatCompletion.create( + model="gpt-3.5-turbo', + messages=[{ + "content": "Jason is 20 years old", + "role": "user" + }], + functions=[User.openai_schema], + function_call={"name": User.openai_schema["name"]}, + ) + + user = User.from_response(completion) + + print(user.model_dump()) + ``` + ## Result + + ``` + { + "name": "Jason Liu", + "age": 20, + } + ``` + + + """ + @classmethod @property def openai_schema(cls): @@ -161,7 +201,6 @@ class OpenAISchema(BaseModel): ) _remove_a_key(parameters, "additionalProperties") - _remove_a_key(parameters, "title") return { "name": schema["title"], "description": schema["description"], diff --git a/instructor/patch.py b/instructor/patch.py index b5b2b66..28f46b9 100644 --- a/instructor/patch.py +++ b/instructor/patch.py @@ -155,6 +155,47 @@ original_chatcompletion_async = openai.ChatCompletion.acreate def patch(): + """ + Patch the `openai.ChatCompletion.create` and `openai.ChatCompletion.acreate` methods to support the `response_model` parameter. + + ## Usage + + ```python + from pydantic import BaseModel, Field + import instructor + + instructor.patch() + + class User(BaseModel): + name: str = Field(description="The name of the person") + age: int = Field(description="The age of the person") + role: str = Field(description="The role of the person") + + user = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + { + "role": "user", + "content": "Jason is 20 years old", + }, + ], + response_model=User, + ) + + print(user.model_dump()) + ``` + + ## Result + ``` + { + "name": "Jason Liu", + "age": 20, + "role": "student", + } + ``` + + + """ openai.ChatCompletion.create = wrap_chatcompletion(original_chatcompletion) openai.ChatCompletion.acreate = wrap_chatcompletion(original_chatcompletion_async) diff --git a/mkdocs.yml b/mkdocs.yml index 77332fe..d386c21 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -80,10 +80,9 @@ nav: - "Usage Tracking": "cli/usage.md" - "Finetuning GPT": "cli/finetune.md" - API Reference: - - 'OpenAISchema': 'openai_schema.md' - - 'MultiTask': 'api_multitask.md' - - "Introduction: Writing Prompts": "writing-prompts.md" - - "Prompting Templates": "chat-completion.md" + - 'Core Library': 'api.md' + - "Prompting DSL: Intro": "writing-prompts.md" + - "Prompting DSL Reference": "chat-completion.md" - Blog: - "blog/index.md"