mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
clean up distil example
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(396, b=414, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":396,\"b\":414,\"result\":810}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(812, b=108, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":812,\"b\":108,\"result\":920}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(923, b=561, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":923,\"b\":561,\"result\":1484}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(933, b=756, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":933,\"b\":756,\"result\":1689}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(807, b=389, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":807,\"b\":389,\"result\":1196}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(509, b=776, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":509,\"b\":776,\"result\":1285}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(165, b=442, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":165,\"b\":442,\"result\":607}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(843, b=304, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":843,\"b\":304,\"result\":1147}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(512, b=791, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":512,\"b\":791,\"result\":1303}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(133, b=539, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":133,\"b\":539,\"result\":672}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
|
||||
@@ -0,0 +1,79 @@
|
||||
import logging
|
||||
|
||||
from pydantic import BaseModel
|
||||
from instructor.distil import Instructions
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
# Usage
|
||||
instructions = Instructions(
|
||||
name="three_digit_multiply",
|
||||
finetune_format="messages",
|
||||
log_handlers=[
|
||||
logging.FileHandler("math_finetunes.jsonl"),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class Response(BaseModel):
|
||||
a: int
|
||||
b: int
|
||||
result: int
|
||||
|
||||
|
||||
@instructions.distil
|
||||
def fn(a: int, b: int, c: str) -> Response:
|
||||
"""_summary_
|
||||
|
||||
Args:
|
||||
a (int): _description_
|
||||
b (int): _description_
|
||||
c (str): _description_
|
||||
|
||||
Returns:
|
||||
Response: _description_
|
||||
"""
|
||||
resp = a + b
|
||||
return Response(a=a, b=b, result=resp)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import random
|
||||
|
||||
# A log will look like this:
|
||||
log_line = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": 'Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n"""\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n"""',
|
||||
},
|
||||
{"role": "user", "content": 'Return fn(133, b=539, c="hello")'},
|
||||
{
|
||||
"role": "assistant",
|
||||
"function_call": {
|
||||
"name": "Response",
|
||||
"arguments": '{"a":133,"b":539,"result":672}',
|
||||
},
|
||||
},
|
||||
],
|
||||
"functions": [
|
||||
{
|
||||
"name": "Response",
|
||||
"description": "Correctly extracted `Response` with all the required parameters with correct types",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"a": {"type": "integer"},
|
||||
"b": {"type": "integer"},
|
||||
"result": {"type": "integer"},
|
||||
},
|
||||
"required": ["a", "b", "result"],
|
||||
"type": "object",
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
for _ in range(10):
|
||||
a = random.randint(100, 999)
|
||||
b = random.randint(100, 999)
|
||||
print("returning", fn(a, b=b, c="hello"))
|
||||
@@ -1,70 +0,0 @@
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from instructor.distil import Instructions
|
||||
|
||||
|
||||
class DatasetHandler(logging.Handler):
|
||||
def __init__(self, dataset_name=None):
|
||||
super().__init__()
|
||||
self.url = os.environ.get("INSTRUCTOR_URL")
|
||||
self.api_key = os.environ.get("INSTRUCTOR_KEY")
|
||||
self.formatter = logging.Formatter("%(message)s")
|
||||
self.dataset_name = dataset_name
|
||||
self.uuid = str(uuid.uuid4())
|
||||
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
log_entry = json.loads(self.format(record))
|
||||
|
||||
new_entry = {
|
||||
"record": log_entry,
|
||||
"dataset_name": self.dataset_name,
|
||||
"batch_id": self.uuid,
|
||||
"created_at": datetime.datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(self.url, data=json.dumps(new_entry))
|
||||
if response.status_code != 200:
|
||||
self.handleError(record)
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
# Usage
|
||||
instructions = Instructions(
|
||||
name="test_distil",
|
||||
log_handlers=[
|
||||
logging.FileHandler("finetunes.jsonl"),
|
||||
DatasetHandler("finetunes_fo_test_distil"),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class Response(BaseModel):
|
||||
a: int
|
||||
b: int
|
||||
result: int
|
||||
|
||||
|
||||
@instructions.distil
|
||||
def fn(a: int, b: int) -> Response:
|
||||
resp = a + b
|
||||
return Response(a=a, b=b, result=resp)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import random
|
||||
|
||||
for _ in range(10):
|
||||
a = random.randint(100, 999)
|
||||
b = random.randint(100, 999)
|
||||
print("returning", fn(a, b))
|
||||
+26
-16
@@ -3,7 +3,6 @@ import functools
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
from typing import Any, Callable, List, Optional
|
||||
import uuid
|
||||
@@ -79,10 +78,12 @@ class Instructions:
|
||||
name: str = None,
|
||||
id: str = None,
|
||||
log_handlers: List[logging.Handler] = None,
|
||||
finetune_format: FinetuneFormat = FinetuneFormat.MESSAGES,
|
||||
):
|
||||
self.name = name
|
||||
self.id = id or str(uuid.uuid4())
|
||||
self.unique_id = str(uuid.uuid4())
|
||||
self.finetune_format = finetune_format
|
||||
|
||||
self.logger = logging.getLogger(self.name)
|
||||
for handler in log_handlers or []:
|
||||
@@ -93,7 +94,7 @@ class Instructions:
|
||||
*args,
|
||||
name: str = None,
|
||||
mode: str = "distil",
|
||||
fine_tune_format: FinetuneFormat = FinetuneFormat.MESSAGES,
|
||||
fine_tune_format: FinetuneFormat = None,
|
||||
):
|
||||
"""
|
||||
Decorator to track the function call and response, supports distillation and dispatch modes.
|
||||
@@ -118,6 +119,9 @@ class Instructions:
|
||||
assert mode in allowed_modes, f"Must be in {allowed_modes}"
|
||||
assert mode == "distil", "Only distil mode is supported at the moment."
|
||||
|
||||
if fine_tune_format is None:
|
||||
fine_tune_format = self.finetune_format
|
||||
|
||||
def _wrap_distil(fn):
|
||||
msg = f"Return type hint for {fn} must subclass `pydantic.BaseModel'"
|
||||
assert is_return_type_base_model_or_instance(fn), msg
|
||||
@@ -161,30 +165,25 @@ class Instructions:
|
||||
name = name if name else fn.__name__
|
||||
base_model: BaseModel = type(resp)
|
||||
|
||||
if finetune_format == FinetuneFormat.RAW:
|
||||
function_body = dict(
|
||||
fn_name=name,
|
||||
fn_repr=format_function(fn),
|
||||
args=args,
|
||||
kwargs=kwargs,
|
||||
resp=resp.model_dump(),
|
||||
schema=base_model.model_json_schema(),
|
||||
)
|
||||
self.logger.info(json.dumps(function_body))
|
||||
|
||||
if finetune_format == FinetuneFormat.MESSAGES:
|
||||
# This is the format that OpenAI's API expects for a finetune call
|
||||
openai_function_call = openai_schema(base_model).openai_schema
|
||||
function_definition = get_signature_from_fn(fn).replace(fn.__name__, name)
|
||||
|
||||
str_args = ", ".join(map(str, args))
|
||||
str_kwargs = (
|
||||
", ".join(f"{k}={json.dumps(v)}" for k, v in kwargs.items()) or None
|
||||
)
|
||||
call_args = ", ".join(filter(None, [str_args, str_kwargs]))
|
||||
|
||||
function_body = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Return the response from the function call.\n\n {function_definition}",
|
||||
"content": f"Predict the results of this function:\n\n{function_definition}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Return the results of the function with the following arguments:\n\n {name}(*{args}, **{kwargs})",
|
||||
"content": f"Return {name}({call_args})",
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
@@ -197,3 +196,14 @@ class Instructions:
|
||||
"functions": [openai_function_call],
|
||||
}
|
||||
self.logger.info(json.dumps(function_body))
|
||||
|
||||
if finetune_format == FinetuneFormat.RAW:
|
||||
function_body = dict(
|
||||
fn_name=name,
|
||||
fn_repr=format_function(fn),
|
||||
args=args,
|
||||
kwargs=kwargs,
|
||||
resp=resp.model_dump(),
|
||||
schema=base_model.model_json_schema(),
|
||||
)
|
||||
self.logger.info(json.dumps(function_body))
|
||||
|
||||
Reference in New Issue
Block a user