update docs

2026-06-05 22:50:18 +00:00 · 2023-09-13 21:51:29 -04:00
parent d88f23321f
commit 9651a09b17
11 changed files with 285 additions and 26 deletions
@@ -0,0 +1,13 @@
+# API Reference
+
+::: instructor.patch
+
+::: instructor.dsl.validators
+
+::: instructor.dsl.citation
+
+::: instructor.dsl.multitask
+
+::: instructor.dsl.maybe
+
+::: instructor.function_calls
@@ -1,3 +0,0 @@
-# API: MultiTask
-
-::: instructor.dsl.multitask
@@ -1,10 +1,11 @@
 from .function_calls import OpenAISchema, openai_function, openai_schema
-from .dsl import MultiTask, Maybe, Validator, llm_validator
+from .dsl import MultiTask, Maybe, Validator, llm_validator, CitationMixin
 from .patch import patch

 __all__ = [
    "OpenAISchema",
    "openai_function",
+    "CitationMixin",
    "MultiTask",
    "Maybe",
    "openai_schema",
@@ -2,13 +2,14 @@ from .completion import ChatCompletion
 from .messages import *
 from .multitask import MultiTask
 from .maybe import Maybe
-from .validators import Validator, llm_validator
+from .validators import llm_validator
+from .citation import CitationMixin

 __all__ = [
    "ChatCompletion",
+    "CitationMixin",
    "MultiTask",
    "messages",
    "Maybe",
-    "Validator",
    "llm_validator",
 ]
@@ -0,0 +1,96 @@
+from pydantic import BaseModel, Field, FieldValidationInfo, model_validator
+from typing import List
+
+
+class CitationMixin(BaseModel):
+    """
+    Helpful mixing that can use `validation_context={"context": context}` in `from_response` to find the span of the substring_phrase in the context.
+
+    ## Usage
+
+    ```python
+    from pydantic import BaseModel, Field
+    from instructor import CitationMixin
+
+    class User(BaseModel):
+        name: str = Field(description="The name of the person")
+        age: int = Field(description="The age of the person")
+        role: str = Field(description="The role of the person")
+
+
+    context = "Betty was a student. Jason was a student. Jason is 20 years old"
+
+    user = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo',
+        messages=[
+            {
+                "role": "user",
+                "content": "Extract jason from {context}",
+            },
+        response_model=User,
+        validation_context={"context": context},
+        ]
+    )
+
+    for quote in user.substring_quotes:
+        assert quote in context
+
+    print(user.model_dump())
+    ```
+
+    ## Result
+    ```
+    {
+        "name": "Jason Liu",
+        "age": 20,
+        "role": "student",
+        "substring_quotes": [
+            "Jason was a student",
+            "Jason is 20 years old",
+        ]
+    }
+    ```
+
+    """
+
+    substring_quotes: List[str] = Field(
+        description="List of unique and specific substrings of the quote that was used to answer the question.",
+    )
+
+    @model_validator(mode="after")
+    def validate_sources(self, info: FieldValidationInfo) -> "CitationMixin":
+        """
+        For each substring_phrase, find the span of the substring_phrase in the context.
+        If the span is not found, remove the substring_phrase from the list.
+        """
+        if info.context is None:
+            logger.info("No context found, skipping validation")
+            return self
+
+        # Get the context from the info
+        text_chunks = info.context.get("context", None)
+
+        # Get the spans of the substring_phrase in the context
+        spans = list(self.get_spans(text_chunks))
+        # Replace the substring_phrase with the actual substring
+        self.substring_quotes = [text_chunks[span[0] : span[1]] for span in spans]
+        return self
+
+    def _get_span(self, quote, context, errs=5):
+        import regex
+
+        minor = quote
+        major = context
+
+        errs_ = 0
+        s = regex.search(f"({minor}){{e<={errs_}}}", major)
+        while s is None and errs_ <= errs:
+            errs_ += 1
+            s = regex.search(f"({minor}){{e<={errs_}}}", major)
+
+        if s is not None:
+            yield from s.spans()
+
+    def get_spans(self, context):
+        for quote in self.substring_quotes:
+            yield from self._get_span(quote, context)
@@ -3,6 +3,10 @@ from typing import Type, Optional


 class MaybeBase(BaseModel):
+    """
+    Extract a result from a model, if any, otherwise set the error and message fields.
+    """
+
    result: Optional[BaseModel]
    error: bool = Field(default=False)
    message: Optional[str]
@@ -13,7 +17,33 @@ class MaybeBase(BaseModel):

 def Maybe(model: Type[BaseModel]) -> MaybeBase:
    """
-    Create a Maybe model for a given Pydantic model.
+    Create a Maybe model for a given Pydantic model. This allows you to return a model that includes fields for `result`, `error`, and `message` for sitatations where the data may not be present in the context.
+
+    ## Usage
+
+    ```python
+    from pydantic import BaseModel, Field
+    from instructor import Maybe
+
+    class User(BaseModel):
+        name: str = Field(description="The name of the person")
+        age: int = Field(description="The age of the person")
+        role: str = Field(description="The role of the person")
+
+    MaybeUser = Maybe(User)
+    ```
+
+    ## Result
+
+    ```python
+    class MaybeUser(BaseModel):
+        result: Optional[User]
+        error: bool = Field(default=False)
+        message: Optional[str]
+
+        def __bool__(self):
+            return self.result is not None
+    ```

    Parameters:
        model (Type[BaseModel]): The Pydantic model to wrap with Maybe.
@@ -58,21 +58,39 @@ def MultiTask(
    for a specific task, names and descriptions are automatically generated. However
    they can be overridden.

-    Note:
-        Using this function is equivalent to creating a class that inherits from
-        OpenAISchema and has a list of the subtask class as a field.
+    ## Usage

-        ```python
-        class MultiTask(OpenAISchema):
-            \"""
-            Correct segmentation of `{subtask_class.__name__}` tasks
-            \"""
-            tasks: List[subtask_class] = Field(
-                default_factory=list,
-                repr=False,
-                description=f"Correctly segmented list of `{subtask_class.__name__}` tasks",
-            )
-        ```
+    ```python
+    from pydantic import BaseModel, Field
+    from instructor import MultiTask
+
+    class User(BaseModel):
+        name: str = Field(description="The name of the person")
+        age: int = Field(description="The age of the person")
+        role: str = Field(description="The role of the person")
+
+    MultiUser = MultiTask(User)
+    ```
+
+    ## Result
+
+    ```python
+    class MultiUser(OpenAISchema, MultiTaskBase):
+        tasks: List[User] = Field(
+            default_factory=list,
+            repr=False,
+            description="Correctly segmented list of `User` tasks",
+        )
+
+        @classmethod
+        def from_streaming_response(cls, completion) -> Generator[User]:
+            '''
+            Parse the streaming response from OpenAI and yield a `User` object
+            for each task in the response
+            '''
+            json_chunks = cls.extract_json(completion)
+            yield from cls.tasks_from_chunks(json_chunks)
+    ```

    Parameters:
        subtask_class (Type[OpenAISchema]): The base class to use for the MultiTask
@@ -33,6 +33,30 @@ def llm_validator(
    """
    Create a validator that uses the LLM to validate an attribute

+    ## Usage
+
+    ```python
+    from instructor import llm_validator
+    from pydantic import BaseModel, Field, field_validator
+
+    class User(BaseModel):
+        name: str = Annotated[str, llm_validator("The name must be a full name all lowercase")]
+        age: int = Field(description="The age of the person")
+
+    try:
+        user = User(name="Jason Liu", age=20)
+    except ValidationError as e:
+        print(e)
+    ```
+
+    ```
+    1 validation error for User
+    name
+      The name is valid but not all lowercase (type=value_error.llm_validator)
+    ```
+
+    Note that there, the error message is written by the LLM, and the error type is `value_error.llm_validator`.
+
    Parameters:
        statement (str): The statement to validate
        model (str): The LLM to use for validation (default: "gpt-3.5-turbo-0613")
@@ -123,6 +123,46 @@ class openai_function:


 class OpenAISchema(BaseModel):
+    """
+    Augments a Pydantic model with OpenAI's schema for function calling
+
+    This class augments a Pydantic model with OpenAI's schema for function calling. The schema is generated from the model's signature and docstring. The schema can be used to validate the response from OpenAI's API and extract the function call.
+
+    ## Usage
+
+    ```python
+    from instructor import OpenAISchema
+
+    class User(OpenAISchema):
+        name: str
+        age: int
+
+    completion = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo',
+        messages=[{
+            "content": "Jason is 20 years old",
+            "role": "user"
+        }],
+        functions=[User.openai_schema],
+        function_call={"name": User.openai_schema["name"]},
+    )
+
+    user = User.from_response(completion)
+
+    print(user.model_dump())
+    ```
+    ## Result
+
+    ```
+    {
+        "name": "Jason Liu",
+        "age": 20,
+    }
+    ```
+
+
+    """
+
    @classmethod
    @property
    def openai_schema(cls):
@@ -161,7 +201,6 @@ class OpenAISchema(BaseModel):
                )

        _remove_a_key(parameters, "additionalProperties")
-        _remove_a_key(parameters, "title")
        return {
            "name": schema["title"],
            "description": schema["description"],
@@ -155,6 +155,47 @@ original_chatcompletion_async = openai.ChatCompletion.acreate


 def patch():
+    """
+    Patch the `openai.ChatCompletion.create` and `openai.ChatCompletion.acreate` methods to support the `response_model` parameter.
+
+    ## Usage
+
+    ```python
+    from pydantic import BaseModel, Field
+    import instructor
+
+    instructor.patch()
+
+    class User(BaseModel):
+        name: str = Field(description="The name of the person")
+        age: int = Field(description="The age of the person")
+        role: str = Field(description="The role of the person")
+
+    user = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {
+                "role": "user",
+                "content": "Jason is 20 years old",
+            },
+        ],
+        response_model=User,
+    )
+
+    print(user.model_dump())
+    ```
+
+    ## Result
+    ```
+    {
+        "name": "Jason Liu",
+        "age": 20,
+        "role": "student",
+    }
+    ```
+
+
+    """
    openai.ChatCompletion.create = wrap_chatcompletion(original_chatcompletion)
    openai.ChatCompletion.acreate = wrap_chatcompletion(original_chatcompletion_async)

@@ -80,10 +80,9 @@ nav:
      - "Usage Tracking": "cli/usage.md"
      - "Finetuning GPT": "cli/finetune.md"
  - API Reference:
-      - 'OpenAISchema': 'openai_schema.md'
-      - 'MultiTask': 'api_multitask.md'
-      - "Introduction: Writing Prompts": "writing-prompts.md"
-      - "Prompting Templates": "chat-completion.md"
+      - 'Core Library': 'api.md'
+      - "Prompting DSL: Intro": "writing-prompts.md"
+      - "Prompting DSL Reference": "chat-completion.md"
  - Blog:
    - "blog/index.md"