Revert "Docstring parsing using docstring-parser (#76)" (#81)

This reverts commit 76ac9b4d12.
This commit is contained in:
Jason Liu
2023-08-19 23:50:06 -07:00
committed by GitHub
parent 76ac9b4d12
commit 964c17ca1a
6 changed files with 573 additions and 658 deletions
+8 -22
View File
@@ -130,29 +130,20 @@ from instructor import OpenAISchema
from pydantic import Field
class UserDetails(OpenAISchema):
""""
Correctly extracted user information
:param age: age of the user
"""
"Correctly extracted user information"
name: str = Field(..., description="User's full name")
age: int
```
In this updated schema, we use the `Field` class from `pydantic` to add descriptions to the `name` field. Moreover, we use the docstring to add information for the parameter `age`.
In both cases, the description provides information about the fields, giving even more context to the language model.
Information from the docstring is extracted using [docstring-parser](https://github.com/rr-/docstring_parser) which supports different docstring styles.
Note that if the `Field` contains a description for a parameter as well as the docstring, the `Field`'s description is used.
In this updated schema, we use the `Field` class from `pydantic` to add descriptions to the `name` field. The description provides information about the field, giving even more context to the language model.
!!! note "Code, schema, and prompt"
We can run `openai_schema` to see exactly what the API will see, notice how the docstrings, attributes, types, and parameter descriptions are now part of the schema. This describes on this library's core philosophies.
We can run `openai_schema` to see exactly what the API will see, notice how the docstrings, attributes, types, and field descriptions are now part of the schema. This describes on this library's core philosophies.
```python hl_lines="2 3"
class UserDetails(OpenAISchema):
"""
Correctly extracted user information
:param name: the user's full name
:param age: age of the user
"""
"Correctly extracted user information"
name: str = Field(..., description="User's full name")
age: int
@@ -167,12 +158,11 @@ We can run `openai_schema` to see exactly what the API will see, notice how the
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "User's full name"
"description": "User's full name",
"type": "string"
},
"age": {
"type": "integer"
"description": "age of the user"
}
},
"required": [
@@ -192,11 +182,7 @@ from instructor import OpenAISchema
from pydantic import Field
class UserDetails(OpenAISchema):
"""
Correctly extracted user information
:param name: the user's full name
:param age: age of the user
"""
"Correctly extracted user information"
name: str = Field(..., description="User's full name")
age: int
+5 -14
View File
@@ -21,7 +21,6 @@
# SOFTWARE.
import json
from docstring_parser import parse
from functools import wraps
from typing import Any, Callable
from pydantic import BaseModel, validate_arguments
@@ -66,17 +65,12 @@ class openai_function:
def __init__(self, func: Callable) -> None:
self.func = func
self.validate_func = validate_arguments(func)
self.docstring = parse(self.func.__doc__)
parameters = self.validate_func.model.model_json_schema()
parameters["properties"] = {
k: v
for k, v in parameters["properties"].items()
if k not in ("v__duplicate_kwargs", "args", "kwargs")
}
for param in self.docstring.params:
if (name := param.arg_name) in parameters["properties"] and (description := param.description):
parameters["properties"][name]["description"] = description
parameters["required"] = sorted(
k for k, v in parameters["properties"].items() if not "default" in v
)
@@ -84,7 +78,7 @@ class openai_function:
_remove_a_key(parameters, "title")
self.openai_schema = {
"name": self.func.__name__,
"description": self.docstring.short_description,
"description": self.func.__doc__,
"parameters": parameters,
}
self.model = self.validate_func.model
@@ -134,20 +128,17 @@ class OpenAISchema(BaseModel):
model_json_schema (dict): A dictionary in the format of OpenAI's schema as jsonschema
"""
schema = cls.model_json_schema()
docstring = parse(cls.__doc__)
parameters = {
k: v for k, v in schema.items() if k not in ("title", "description")
}
for param in docstring.params:
if (name := param.arg_name) in parameters["properties"] and (description := param.description):
if "description" not in parameters["properties"][name]:
parameters["properties"][name]["description"] = description
parameters["required"] = sorted(
k for k, v in parameters["properties"].items() if not "default" in v
)
schema["description"] = docstring.short_description
if "description" not in schema:
schema[
"description"
] = f"Correctly extracted `{cls.__name__}` with all the required parameters with correct types"
_remove_a_key(parameters, "additionalProperties")
_remove_a_key(parameters, "title")
Generated
+558 -571
View File
File diff suppressed because it is too large Load Diff
-1
View File
@@ -12,7 +12,6 @@ repository = "https://github.com/jxnl/instructor"
python = "^3.9"
openai = "^0.27.8"
pydantic = "^2.0.2"
docstring-parser = "^0.15"
[tool.poetry.group.dev.dependencies]
+1 -2
View File
@@ -1,4 +1,3 @@
openai
pydantic
pytest
docstring-parser
pytest
+1 -48
View File
@@ -1,9 +1,7 @@
from typing import Literal
import pytest
from pydantic import BaseModel
from instructor import openai_schema, OpenAISchema, openai_function
from instructor import openai_schema, OpenAISchema
def test_openai_schema():
@@ -42,48 +40,3 @@ def test_no_docstring():
Dummy.openai_schema["description"]
== "Correctly extracted `Dummy` with all the required parameters with correct types"
)
def test_openai_function():
@openai_function
def get_current_weather(
location: str, format: Literal["celsius", "fahrenheit"] = "celsius"
):
"""
Gets the current weather in a given location, use this function for any questions related to the weather
Parameters
----------
location
The city to get the weather, e.g. San Francisco. Guess the location from user messages
format
A string with the full content of what the given role said
"""
@openai_function
def get_current_weather_no_format_docstring(
location: str, format: Literal["celsius", "fahrenheit"] = "celsius"
):
"""
Gets the current weather in a given location, use this function for any questions related to the weather
Parameters
----------
location
The city to get the weather, e.g. San Francisco. Guess the location from user messages
"""
scheme_missing_param = get_current_weather_no_format_docstring.openai_schema
assert (
scheme_missing_param["parameters"]["properties"]["location"]["description"]
==
"The city to get the weather, e.g. San Francisco. Guess the location from user messages"
)
assert (
scheme_missing_param["parameters"]["properties"]["format"]["enum"]
==
["celsius", "fahrenheit"]
)
with pytest.raises(KeyError, match="description"):
scheme_missing_param["parameters"]["properties"]["format"]["description"]