Docstring parsing using docstring-parser (#76)

* adds description for params

* add test for missing param description

* adds docstring parsing to OpenAISchema

* docs

---------

Co-authored-by: Jason Liu <jxnl@users.noreply.github.com>
This commit is contained in:
Alexander Dicke
2023-08-20 08:40:59 +02:00
committed by GitHub
parent 1ebb7b95c1
commit 76ac9b4d12
6 changed files with 658 additions and 573 deletions
+22 -8
View File
@@ -130,20 +130,29 @@ from instructor import OpenAISchema
from pydantic import Field
class UserDetails(OpenAISchema):
"Correctly extracted user information"
""""
Correctly extracted user information
:param age: age of the user
"""
name: str = Field(..., description="User's full name")
age: int
```
In this updated schema, we use the `Field` class from `pydantic` to add descriptions to the `name` field. The description provides information about the field, giving even more context to the language model.
In this updated schema, we use the `Field` class from `pydantic` to add descriptions to the `name` field. Moreover, we use the docstring to add information for the parameter `age`.
In both cases, the description provides information about the fields, giving even more context to the language model.
Information from the docstring is extracted using [docstring-parser](https://github.com/rr-/docstring_parser) which supports different docstring styles.
Note that if the `Field` contains a description for a parameter as well as the docstring, the `Field`'s description is used.
!!! note "Code, schema, and prompt"
We can run `openai_schema` to see exactly what the API will see, notice how the docstrings, attributes, types, and field descriptions are now part of the schema. This describes on this library's core philosophies.
We can run `openai_schema` to see exactly what the API will see, notice how the docstrings, attributes, types, and parameter descriptions are now part of the schema. This describes on this library's core philosophies.
```python hl_lines="2 3"
class UserDetails(OpenAISchema):
"Correctly extracted user information"
"""
Correctly extracted user information
:param name: the user's full name
:param age: age of the user
"""
name: str = Field(..., description="User's full name")
age: int
@@ -158,11 +167,12 @@ In this updated schema, we use the `Field` class from `pydantic` to add descript
"type": "object",
"properties": {
"name": {
"description": "User's full name",
"type": "string"
"type": "string",
"description": "User's full name"
},
"age": {
"type": "integer"
"description": "age of the user"
}
},
"required": [
@@ -182,7 +192,11 @@ from instructor import OpenAISchema
from pydantic import Field
class UserDetails(OpenAISchema):
"Correctly extracted user information"
"""
Correctly extracted user information
:param name: the user's full name
:param age: age of the user
"""
name: str = Field(..., description="User's full name")
age: int
+14 -5
View File
@@ -21,6 +21,7 @@
# SOFTWARE.
import json
from docstring_parser import parse
from functools import wraps
from typing import Any, Callable
from pydantic import BaseModel, validate_arguments
@@ -65,12 +66,17 @@ class openai_function:
def __init__(self, func: Callable) -> None:
self.func = func
self.validate_func = validate_arguments(func)
self.docstring = parse(self.func.__doc__)
parameters = self.validate_func.model.model_json_schema()
parameters["properties"] = {
k: v
for k, v in parameters["properties"].items()
if k not in ("v__duplicate_kwargs", "args", "kwargs")
}
for param in self.docstring.params:
if (name := param.arg_name) in parameters["properties"] and (description := param.description):
parameters["properties"][name]["description"] = description
parameters["required"] = sorted(
k for k, v in parameters["properties"].items() if not "default" in v
)
@@ -78,7 +84,7 @@ class openai_function:
_remove_a_key(parameters, "title")
self.openai_schema = {
"name": self.func.__name__,
"description": self.func.__doc__,
"description": self.docstring.short_description,
"parameters": parameters,
}
self.model = self.validate_func.model
@@ -128,17 +134,20 @@ class OpenAISchema(BaseModel):
model_json_schema (dict): A dictionary in the format of OpenAI's schema as jsonschema
"""
schema = cls.model_json_schema()
docstring = parse(cls.__doc__)
parameters = {
k: v for k, v in schema.items() if k not in ("title", "description")
}
for param in docstring.params:
if (name := param.arg_name) in parameters["properties"] and (description := param.description):
if "description" not in parameters["properties"][name]:
parameters["properties"][name]["description"] = description
parameters["required"] = sorted(
k for k, v in parameters["properties"].items() if not "default" in v
)
if "description" not in schema:
schema[
"description"
] = f"Correctly extracted `{cls.__name__}` with all the required parameters with correct types"
schema["description"] = docstring.short_description
_remove_a_key(parameters, "additionalProperties")
_remove_a_key(parameters, "title")
Generated
+571 -558
View File
File diff suppressed because it is too large Load Diff
+1
View File
@@ -12,6 +12,7 @@ repository = "https://github.com/jxnl/instructor"
python = "^3.9"
openai = "^0.27.8"
pydantic = "^2.0.2"
docstring-parser = "^0.15"
[tool.poetry.group.dev.dependencies]
+2 -1
View File
@@ -1,3 +1,4 @@
openai
pydantic
pytest
pytest
docstring-parser
+48 -1
View File
@@ -1,7 +1,9 @@
from typing import Literal
import pytest
from pydantic import BaseModel
from instructor import openai_schema, OpenAISchema
from instructor import openai_schema, OpenAISchema, openai_function
def test_openai_schema():
@@ -40,3 +42,48 @@ def test_no_docstring():
Dummy.openai_schema["description"]
== "Correctly extracted `Dummy` with all the required parameters with correct types"
)
def test_openai_function():
@openai_function
def get_current_weather(
location: str, format: Literal["celsius", "fahrenheit"] = "celsius"
):
"""
Gets the current weather in a given location, use this function for any questions related to the weather
Parameters
----------
location
The city to get the weather, e.g. San Francisco. Guess the location from user messages
format
A string with the full content of what the given role said
"""
@openai_function
def get_current_weather_no_format_docstring(
location: str, format: Literal["celsius", "fahrenheit"] = "celsius"
):
"""
Gets the current weather in a given location, use this function for any questions related to the weather
Parameters
----------
location
The city to get the weather, e.g. San Francisco. Guess the location from user messages
"""
scheme_missing_param = get_current_weather_no_format_docstring.openai_schema
assert (
scheme_missing_param["parameters"]["properties"]["location"]["description"]
==
"The city to get the weather, e.g. San Francisco. Guess the location from user messages"
)
assert (
scheme_missing_param["parameters"]["properties"]["format"]["enum"]
==
["celsius", "fahrenheit"]
)
with pytest.raises(KeyError, match="description"):
scheme_missing_param["parameters"]["properties"]["format"]["description"]