diff --git a/docs/concepts/logging.md b/docs/concepts/logging.md index 9b13de2..75db7d1 100644 --- a/docs/concepts/logging.md +++ b/docs/concepts/logging.md @@ -28,27 +28,12 @@ user = client.chat.completions.create( ) # type: ignore """ -DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False -DEBUG:httpx:load_verify_locations cafile='/Users/jasonliu/dev/instructor/.venv/lib/python3.11/site-packages/certifi/cacert.pem' -DEBUG:instructor:Patching `client.chat.completions.create` with mode= +... +DEBUG:instructor:Patching `client.chat.completions.create` with mode= +DEBUG:instructor:Instructor Request: mode.value='tool_call', response_model=, new_kwargs={'model': 'gpt-3.5-turbo', 'messages': [{'role': 'user', 'content': 'Extract Jason is 25 years old'}], 'tools': [{'type': 'function', 'function': {'name': 'UserDetail', 'description': 'Correctly extracted `UserDetail` with all the required parameters with correct types', 'parameters': {'properties': {'name': {'title': 'Name', 'type': 'string'}, 'age': {'title': 'Age', 'type': 'integer'}}, 'required': ['age', 'name'], 'type': 'object'}}}], 'tool_choice': {'type': 'function', 'function': {'name': 'UserDetail'}}} DEBUG:instructor:max_retries: 1 -DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': 'Extract Jason is 25 years old'}], 'model': 'gpt-3.5-turbo', 'function_call': {'name': 'UserDetail'}, 'functions': [{'name': 'UserDetail', 'description': 'Correctly extracted `UserDetail` with all the required parameters with correct types', 'parameters': {'properties': {'name': {'title': 'Name', 'type': 'string'}, 'age': {'title': 'Age', 'type': 'integer'}}, 'required': ['age', 'name'], 'type': 'object'}}]}} -DEBUG:httpcore.connection:connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=5.0 socket_options=None -DEBUG:httpcore.connection:connect_tcp.complete return_value= -DEBUG:httpcore.connection:start_tls.started ssl_context= server_hostname='api.openai.com' timeout=5.0 -DEBUG:httpcore.connection:start_tls.complete return_value= -DEBUG:httpcore.http11:send_request_headers.started request= -DEBUG:httpcore.http11:send_request_headers.complete -DEBUG:httpcore.http11:send_request_body.started request= -DEBUG:httpcore.http11:send_request_body.complete -DEBUG:httpcore.http11:receive_response_headers.started request= -DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Mon, 12 Feb 2024 14:55:45 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'Cache-Control', b'no-cache, must-revalidate'), (b'openai-model', b'gpt-3.5-turbo-0613'), (b'openai-organization', b'scribe-ai'), (b'openai-processing-ms', b'483'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'2000000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'1999975'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'0s'), (b'x-request-id', b'req_f0fa476897ae165fc50fa90b7968595b'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=e2_yCrwo4frh6Oq4ZufCEhNJ4lSGJ2.MMtk45X8lrMM-1707749745-1-AfWk8CyACc7aZo6GpCI82FBfI/wmPEFZLNO/Cr3eavTW3xKVFCS7G9jvwYTFLXjJr0cttYsXeLAnjwipw18R0Vo=; path=/; expires=Mon, 12-Feb-24 15:25:45 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Set-Cookie', b'_cfuvid=PyVVCGSMxTg1p.woYvHVVC9E3n69faOs5FOxaDdjXOM-1707749745711-0-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8545aca30c1fa22f-YYZ'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')]) -INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" -DEBUG:httpcore.http11:receive_response_body.started request= -DEBUG:httpcore.http11:receive_response_body.complete -DEBUG:httpcore.http11:response_closed.started -DEBUG:httpcore.http11:response_closed.complete -DEBUG:openai._base_client:HTTP Request: POST https://api.openai.com/v1/chat/completions "200 OK" +... +DEBUG:instructor:Instructor Pre-Response: ChatCompletion(id='chatcmpl-8zBxMxsOqm5Sj6yeEI38PnU2r6ncC', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_E1cftF5U0zEjzIbWt3q0ZLbN', function=Function(arguments='{"name":"Jason","age":25}', name='UserDetail'), type='function')]))], created=1709594660, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint='fp_2b778c6b35', usage=CompletionUsage(completion_tokens=9, prompt_tokens=81, total_tokens=90)) DEBUG:httpcore.connection:close.started DEBUG:httpcore.connection:close.complete """ diff --git a/instructor/dsl/citation.py b/instructor/dsl/citation.py index df72b24..fcf46ad 100644 --- a/instructor/dsl/citation.py +++ b/instructor/dsl/citation.py @@ -1,4 +1,4 @@ -from pydantic import BaseModel, Field, FieldValidationInfo, model_validator +from pydantic import BaseModel, Field, model_validator, ValidationInfo from typing import Generator, List, Tuple @@ -58,7 +58,7 @@ class CitationMixin(BaseModel): # type: ignore[misc] ) @model_validator(mode="after") # type: ignore[misc] - def validate_sources(self, info: FieldValidationInfo) -> "CitationMixin": + def validate_sources(self, info: ValidationInfo) -> "CitationMixin": """ For each substring_phrase, find the span of the substring_phrase in the context. If the span is not found, remove the substring_phrase from the list. diff --git a/instructor/dsl/iterable.py b/instructor/dsl/iterable.py index fd2c9e2..3027bb2 100644 --- a/instructor/dsl/iterable.py +++ b/instructor/dsl/iterable.py @@ -191,7 +191,7 @@ def IterableModel( new_cls = create_model( name, tasks=list_tasks, - __base__=(OpenAISchema, IterableBase), + __base__=(OpenAISchema, IterableBase), # type: ignore ) # set the class constructor BaseModel new_cls.task_type = subtask_class diff --git a/instructor/dsl/parallel.py b/instructor/dsl/parallel.py index 831c1c0..1ab0e20 100644 --- a/instructor/dsl/parallel.py +++ b/instructor/dsl/parallel.py @@ -12,7 +12,7 @@ from typing import ( get_origin, ) from types import UnionType # type: ignore[attr-defined] - +from pydantic import BaseModel from instructor.function_calls import OpenAISchema, Mode, openai_schema from collections.abc import Iterable @@ -32,7 +32,7 @@ class ParallelBase: mode: Mode, validation_context: Optional[Any] = None, strict: Optional[bool] = None, - ) -> Generator[T, None, None]: + ) -> Generator[BaseModel, None, None]: #! We expect this from the OpenAISchema class, We should address #! this with a protocol or an abstract class... @jxnlco assert mode == Mode.PARALLEL_TOOLS, "Mode must be PARALLEL_TOOLS" @@ -44,7 +44,7 @@ class ParallelBase: ) -def get_types_array(typehint: Type[Iterable[Union[T]]]) -> Tuple[Type[T], ...]: +def get_types_array(typehint: Type[Iterable[T]]) -> Tuple[Type[T], ...]: should_be_iterable = get_origin(typehint) if should_be_iterable is not Iterable: raise TypeError(f"Model should be with Iterable instead if {typehint}") @@ -63,7 +63,7 @@ def get_types_array(typehint: Type[Iterable[Union[T]]]) -> Tuple[Type[T], ...]: return get_args(typehint) -def handle_parallel_model(typehint: Type[Iterable[Union[T]]]) -> List[Dict[str, Any]]: +def handle_parallel_model(typehint: Type[Iterable[T]]) -> List[Dict[str, Any]]: the_types = get_types_array(typehint) return [ {"type": "function", "function": openai_schema(model).openai_schema} @@ -71,6 +71,6 @@ def handle_parallel_model(typehint: Type[Iterable[Union[T]]]) -> List[Dict[str, ] -def ParallelModel(typehint: Type[Iterable[Union[T]]]) -> ParallelBase: +def ParallelModel(typehint: Type[Iterable[T]]) -> ParallelBase: the_types = get_types_array(typehint) return ParallelBase(*[model for model in the_types]) diff --git a/instructor/dsl/partial.py b/instructor/dsl/partial.py index cb4cc02..393fddd 100644 --- a/instructor/dsl/partial.py +++ b/instructor/dsl/partial.py @@ -19,6 +19,7 @@ from typing import ( NoReturn, Optional, TypeVar, + Type, ) from copy import deepcopy @@ -26,29 +27,28 @@ from instructor.function_calls import Mode from instructor.dsl.partialjson import JSONParser parser = JSONParser() - -Model = TypeVar("Model", bound=BaseModel) +T_Model = TypeVar("T_Model", bound=BaseModel) -class PartialBase: +class PartialBase(Generic[T_Model]): @classmethod def from_streaming_response( cls, completion: Iterable[Any], mode: Mode, **kwargs: Any - ) -> Generator[Model, None, None]: + ) -> Generator[T_Model, None, None]: json_chunks = cls.extract_json(completion, mode) yield from cls.model_from_chunks(json_chunks, **kwargs) @classmethod async def from_streaming_response_async( cls, completion: AsyncGenerator[Any, None], mode: Mode, **kwargs: Any - ) -> AsyncGenerator[Model, None]: + ) -> AsyncGenerator[T_Model, None]: json_chunks = cls.extract_json_async(completion, mode) return cls.model_from_chunks_async(json_chunks, **kwargs) @classmethod def model_from_chunks( cls, json_chunks: Iterable[Any], **kwargs: Any - ) -> Generator[Model, None, None]: + ) -> Generator[T_Model, None, None]: prev_obj = None potential_object = "" for chunk in json_chunks: @@ -70,7 +70,7 @@ class PartialBase: @classmethod async def model_from_chunks_async( cls, json_chunks: AsyncGenerator[str, None], **kwargs: Any - ) -> AsyncGenerator[Model, None]: + ) -> AsyncGenerator[T_Model, None]: potential_object = "" prev_obj = None async for chunk in json_chunks: @@ -136,7 +136,7 @@ class PartialBase: pass -class Partial(Generic[Model]): +class Partial(Generic[T_Model]): """Generate a new class with all attributes optionals. Notes: @@ -151,7 +151,7 @@ class Partial(Generic[Model]): cls, *args: object, # noqa :ARG003 **kwargs: object, # noqa :ARG003 - ) -> "Partial[Model]": + ) -> "Partial[T_Model]": """Cannot instantiate. Raises: @@ -173,8 +173,8 @@ class Partial(Generic[Model]): def __class_getitem__( # type: ignore[override] cls, - wrapped_class: type[Model], - ) -> type[Model]: + wrapped_class: type[T_Model], + ) -> type[T_Model]: """Convert model to a partial model with all fields being optionals.""" def _make_field_optional( @@ -199,7 +199,9 @@ class Partial(Generic[Model]): ) # Reconstruct the generic type with modified arguments - tmp_field.annotation = Optional[generic_base[modified_args]] + tmp_field.annotation = ( + Optional[generic_base[modified_args]] if generic_base else None + ) tmp_field.default = None # If the field is a BaseModel, then recursively convert it's # attributes to optionals. @@ -211,12 +213,12 @@ class Partial(Generic[Model]): tmp_field.default = None return tmp_field.annotation, tmp_field - return create_model( # type: ignore[no-any-return, call-overload] - f"Partial{wrapped_class.__name__}", + return create_model( + __model_name=f"Partial{wrapped_class.__name__}", __base__=(wrapped_class, PartialBase), __module__=wrapped_class.__module__, **{ field_name: _make_field_optional(field_info) - for field_name, field_info in wrapped_class.model_fields.items() + for field_name, field_info in wrapped_class.__fields__.items() }, - ) + ) # type: ignore[all] diff --git a/instructor/dsl/partialjson.py b/instructor/dsl/partialjson.py index 3e215e3..2a773a0 100644 --- a/instructor/dsl/partialjson.py +++ b/instructor/dsl/partialjson.py @@ -139,7 +139,7 @@ class JSONParser: if "." in num_str or "e" in num_str or "E" in num_str else int(num_str) ) - except ValueError as e: + except json.JSONDecodeError as e: raise e return num, s diff --git a/instructor/dsl/validators.py b/instructor/dsl/validators.py index 90365d8..fba60c9 100644 --- a/instructor/dsl/validators.py +++ b/instructor/dsl/validators.py @@ -32,7 +32,7 @@ def llm_validator( allow_override: bool = False, model: str = "gpt-3.5-turbo", temperature: float = 0, - openai_client: OpenAI = None, + openai_client: Optional[OpenAI] = None, ) -> Callable[[str], str]: """ Create a validator that uses the LLM to validate an attribute @@ -85,7 +85,7 @@ def llm_validator( ], model=model, temperature=temperature, - ) + ) # type: ignore[all] # If the response is not valid, return the reason, this could be used in # the future to generate a better response, via reasking mechanism. diff --git a/instructor/function_calls.py b/instructor/function_calls.py index ce5467f..eeebf01 100644 --- a/instructor/function_calls.py +++ b/instructor/function_calls.py @@ -5,20 +5,24 @@ from pydantic import BaseModel, create_model from instructor.exceptions import IncompleteOutputException import enum import warnings +import logging +from openai.types.chat import ChatCompletion T = TypeVar("T") +logger = logging.getLogger("instructor") + class Mode(enum.Enum): """The mode to use for patching the client""" - FUNCTIONS: str = "function_call" - PARALLEL_TOOLS: str = "parallel_tool_call" - TOOLS: str = "tool_call" - MISTRAL_TOOLS: str = "mistral_tools" - JSON: str = "json_mode" - MD_JSON: str = "markdown_json_mode" - JSON_SCHEMA: str = "json_schema_mode" + FUNCTIONS = "function_call" + PARALLEL_TOOLS = "parallel_tool_call" + TOOLS = "tool_call" + MISTRAL_TOOLS = "mistral_tools" + JSON = "json_mode" + MD_JSON = "markdown_json_mode" + JSON_SCHEMA = "json_schema_mode" def __new__(cls, value: str) -> "Mode": member = object.__new__(cls) @@ -82,11 +86,11 @@ class OpenAISchema(BaseModel): # type: ignore[misc] @classmethod def from_response( cls, - completion: T, + completion: ChatCompletion, validation_context: Optional[Dict[str, Any]] = None, strict: Optional[bool] = None, mode: Mode = Mode.TOOLS, - ) -> Dict[str, Any]: + ) -> BaseModel: """Execute the function from the response of an openai chat completion Parameters: @@ -102,41 +106,46 @@ class OpenAISchema(BaseModel): # type: ignore[misc] assert hasattr(completion, "choices") if completion.choices[0].finish_reason == "length": + logger.error("Incomplete output detected, should increase max_tokens") raise IncompleteOutputException() + # If Anthropic, this should be different message = completion.choices[0].message if mode == Mode.FUNCTIONS: assert ( message.function_call.name == cls.openai_schema["name"] # type: ignore[index] ), "Function name does not match" - return cls.model_validate_json( - message.function_call.arguments, + model_response = cls.model_validate_json( + message.function_call.arguments, # type: ignore[attr-defined] context=validation_context, strict=strict, ) elif mode in {Mode.TOOLS, Mode.MISTRAL_TOOLS}: assert ( - len(message.tool_calls) == 1 + len(message.tool_calls or []) == 1 ), "Instructor does not support multiple tool calls, use List[Model] instead." - tool_call = message.tool_calls[0] + tool_call = message.tool_calls[0] # type: ignore assert ( tool_call.function.name == cls.openai_schema["name"] # type: ignore[index] ), "Tool name does not match" - return cls.model_validate_json( + model_response = cls.model_validate_json( tool_call.function.arguments, context=validation_context, strict=strict, ) elif mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}: - return cls.model_validate_json( - message.content, + model_response = cls.model_validate_json( + message.content, # type: ignore context=validation_context, strict=strict, ) else: raise ValueError(f"Invalid patch mode: {mode}") + # TODO: add logging or response handler + return model_response + def openai_schema(cls: Type[BaseModel]) -> OpenAISchema: if not issubclass(cls, BaseModel): @@ -147,4 +156,4 @@ def openai_schema(cls: Type[BaseModel]) -> OpenAISchema: cls.__name__, __base__=(cls, OpenAISchema), ) - ) + ) # type: ignore[all] diff --git a/instructor/patch.py b/instructor/patch.py index c81e571..737b54d 100644 --- a/instructor/patch.py +++ b/instructor/patch.py @@ -1,3 +1,4 @@ +# type: ignore[all] import inspect import json import logging @@ -8,6 +9,7 @@ from tenacity import Retrying, AsyncRetrying, stop_after_attempt, RetryError from json import JSONDecodeError from typing import ( Callable, + Generator, Optional, ParamSpec, Protocol, @@ -45,6 +47,15 @@ T_ParamSpec = ParamSpec("T_ParamSpec") T = TypeVar("T") +def update_total_usage(response, total_usage): + if isinstance(response, ChatCompletion) and response.usage is not None: + total_usage.completion_tokens += response.usage.completion_tokens or 0 + total_usage.prompt_tokens += response.usage.prompt_tokens or 0 + total_usage.total_tokens += response.usage.total_tokens or 0 + response.usage = total_usage # Replace each response usage with the total usage + return response + + def dump_message(message: ChatCompletionMessage) -> ChatCompletionMessageParam: """Dumps a message to a dict, to be returned to the OpenAI API. Workaround for an issue with the OpenAI API, where the `tool_calls` field isn't allowed to be present in requests @@ -56,7 +67,11 @@ def dump_message(message: ChatCompletionMessage) -> ChatCompletionMessageParam: } if hasattr(message, "tool_calls") and message.tool_calls is not None: ret["tool_calls"] = message.model_dump()["tool_calls"] - if hasattr(message, "function_call") and message.function_call is not None: + if ( + hasattr(message, "function_call") + and message.function_call is not None + and ret["content"] + ): ret["content"] += json.dumps(message.model_dump()["function_call"]) return ret @@ -177,18 +192,29 @@ def handle_response_model( new_kwargs["messages"][0]["content"] += f"\n\n{message}" else: raise ValueError(f"Invalid patch mode: {mode}") + + logger.debug( + f"Instructor Request: {mode.value=}, {response_model=}, {new_kwargs=}", + extra={ + "mode": mode.value, + "response_model": response_model.__name__ + if response_model is not None + else None, + "new_kwargs": new_kwargs, + }, + ) return response_model, new_kwargs def process_response( - response: T, + response: T_Model, *, - response_model: Type[T_Model], + response_model: Type[OpenAISchema | BaseModel], stream: bool, - validation_context: dict = None, + validation_context: Optional[dict] = None, strict=None, mode: Mode = Mode.TOOLS, -) -> Union[T_Model, T]: +) -> T_Model | Generator[T_Model, None, None]: """Processes a OpenAI response with the response model, if available. Args: @@ -202,7 +228,13 @@ def process_response( Returns: Union[T_Model, T]: The parsed response, if a response model is available, otherwise the response as is from the SDK """ + + logger.debug( + f"Instructor Raw Response: {response}", + ) + if response_model is None: + logger.debug("No response model, returning response as is") return response if ( @@ -244,12 +276,12 @@ def process_response( async def process_response_async( response: ChatCompletion, *, - response_model: Type[T_Model], + response_model: Type[T_Model | OpenAISchema | BaseModel], stream: bool = False, - validation_context: dict = None, + validation_context: Optional[dict] = None, strict: Optional[bool] = None, mode: Mode = Mode.TOOLS, -) -> T: +) -> T_Model | ChatCompletion: """Processes a OpenAI response with the response model, if available. It can use `validation_context` and `strict` to validate the response via the pydantic model @@ -261,6 +293,10 @@ async def process_response_async( validation_context (dict, optional): The validation context to use for validating the response. Defaults to None. strict (bool, optional): Whether to use strict json parsing. Defaults to None. """ + + logger.debug( + f"Instructor Raw Response: {response}", + ) if response_model is None: return response @@ -329,18 +365,9 @@ async def retry_async( logger.debug(f"Retrying, attempt: {attempt}") with attempt: try: - response: ChatCompletion = await func(*args, **kwargs) + response: ChatCompletion = await func(*args, **kwargs) # type: ignore stream = kwargs.get("stream", False) - if ( - isinstance(response, ChatCompletion) - and response.usage is not None - ): - total_usage.completion_tokens += ( - response.usage.completion_tokens or 0 - ) - total_usage.prompt_tokens += response.usage.prompt_tokens or 0 - total_usage.total_tokens += response.usage.total_tokens or 0 - response.usage = total_usage # Replace each response usage with the total usage + response = update_total_usage(response, total_usage) return await process_response_async( response, response_model=response_model, @@ -348,9 +375,9 @@ async def retry_async( validation_context=validation_context, strict=strict, mode=mode, - ) + ) # type: ignore[all] except (ValidationError, JSONDecodeError) as e: - logger.debug(f"Error response: {response}") + logger.debug(f"Error response: {response}", e) kwargs["messages"].append(dump_message(response.choices[0].message)) # type: ignore if mode == Mode.TOOLS: kwargs["messages"].append( @@ -413,16 +440,7 @@ def retry_sync( try: response = func(*args, **kwargs) stream = kwargs.get("stream", False) - if ( - isinstance(response, ChatCompletion) - and response.usage is not None - ): - total_usage.completion_tokens += ( - response.usage.completion_tokens or 0 - ) - total_usage.prompt_tokens += response.usage.prompt_tokens or 0 - total_usage.total_tokens += response.usage.total_tokens or 0 - response.usage = total_usage # Replace each response usage with the total usage + response = update_total_usage(response, total_usage) return process_response( response, response_model=response_model, diff --git a/pyproject.toml b/pyproject.toml index b7f6966..4311b11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "instructor" -version = "0.6.2" +version = "0.6.3" description = "structured outputs for llm" authors = ["Jason Liu "] license = "MIT"