clean up distil example

This commit is contained in:
Jason Liu
2023-10-14 15:13:05 -04:00
parent 79d872dd91
commit e98f777ef5
4 changed files with 115 additions and 86 deletions
@@ -0,0 +1,10 @@
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(396, b=414, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":396,\"b\":414,\"result\":810}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(812, b=108, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":812,\"b\":108,\"result\":920}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(923, b=561, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":923,\"b\":561,\"result\":1484}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(933, b=756, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":933,\"b\":756,\"result\":1689}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(807, b=389, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":807,\"b\":389,\"result\":1196}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(509, b=776, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":509,\"b\":776,\"result\":1285}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(165, b=442, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":165,\"b\":442,\"result\":607}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(843, b=304, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":843,\"b\":304,\"result\":1147}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(512, b=791, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":512,\"b\":791,\"result\":1303}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(133, b=539, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":133,\"b\":539,\"result\":672}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
+79
View File
@@ -0,0 +1,79 @@
import logging
from pydantic import BaseModel
from instructor.distil import Instructions
logging.basicConfig(level=logging.INFO)
# Usage
instructions = Instructions(
name="three_digit_multiply",
finetune_format="messages",
log_handlers=[
logging.FileHandler("math_finetunes.jsonl"),
],
)
class Response(BaseModel):
a: int
b: int
result: int
@instructions.distil
def fn(a: int, b: int, c: str) -> Response:
"""_summary_
Args:
a (int): _description_
b (int): _description_
c (str): _description_
Returns:
Response: _description_
"""
resp = a + b
return Response(a=a, b=b, result=resp)
if __name__ == "__main__":
import random
# A log will look like this:
log_line = {
"messages": [
{
"role": "system",
"content": 'Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n"""\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n"""',
},
{"role": "user", "content": 'Return fn(133, b=539, c="hello")'},
{
"role": "assistant",
"function_call": {
"name": "Response",
"arguments": '{"a":133,"b":539,"result":672}',
},
},
],
"functions": [
{
"name": "Response",
"description": "Correctly extracted `Response` with all the required parameters with correct types",
"parameters": {
"properties": {
"a": {"type": "integer"},
"b": {"type": "integer"},
"result": {"type": "integer"},
},
"required": ["a", "b", "result"],
"type": "object",
},
}
],
}
for _ in range(10):
a = random.randint(100, 999)
b = random.randint(100, 999)
print("returning", fn(a, b=b, c="hello"))
-70
View File
@@ -1,70 +0,0 @@
import datetime
import json
import os
import uuid
import requests
import logging
from pydantic import BaseModel
from instructor.distil import Instructions
class DatasetHandler(logging.Handler):
def __init__(self, dataset_name=None):
super().__init__()
self.url = os.environ.get("INSTRUCTOR_URL")
self.api_key = os.environ.get("INSTRUCTOR_KEY")
self.formatter = logging.Formatter("%(message)s")
self.dataset_name = dataset_name
self.uuid = str(uuid.uuid4())
def emit(self, record: logging.LogRecord) -> None:
log_entry = json.loads(self.format(record))
new_entry = {
"record": log_entry,
"dataset_name": self.dataset_name,
"batch_id": self.uuid,
"created_at": datetime.datetime.now().isoformat(),
}
try:
response = requests.post(self.url, data=json.dumps(new_entry))
if response.status_code != 200:
self.handleError(record)
except Exception:
self.handleError(record)
logging.basicConfig(level=logging.INFO)
# Usage
instructions = Instructions(
name="test_distil",
log_handlers=[
logging.FileHandler("finetunes.jsonl"),
DatasetHandler("finetunes_fo_test_distil"),
],
)
class Response(BaseModel):
a: int
b: int
result: int
@instructions.distil
def fn(a: int, b: int) -> Response:
resp = a + b
return Response(a=a, b=b, result=resp)
if __name__ == "__main__":
import random
for _ in range(10):
a = random.randint(100, 999)
b = random.randint(100, 999)
print("returning", fn(a, b))
+26 -16
View File
@@ -3,7 +3,6 @@ import functools
import inspect
import json
import logging
import os
from typing import Any, Callable, List, Optional
import uuid
@@ -79,10 +78,12 @@ class Instructions:
name: str = None,
id: str = None,
log_handlers: List[logging.Handler] = None,
finetune_format: FinetuneFormat = FinetuneFormat.MESSAGES,
):
self.name = name
self.id = id or str(uuid.uuid4())
self.unique_id = str(uuid.uuid4())
self.finetune_format = finetune_format
self.logger = logging.getLogger(self.name)
for handler in log_handlers or []:
@@ -93,7 +94,7 @@ class Instructions:
*args,
name: str = None,
mode: str = "distil",
fine_tune_format: FinetuneFormat = FinetuneFormat.MESSAGES,
fine_tune_format: FinetuneFormat = None,
):
"""
Decorator to track the function call and response, supports distillation and dispatch modes.
@@ -118,6 +119,9 @@ class Instructions:
assert mode in allowed_modes, f"Must be in {allowed_modes}"
assert mode == "distil", "Only distil mode is supported at the moment."
if fine_tune_format is None:
fine_tune_format = self.finetune_format
def _wrap_distil(fn):
msg = f"Return type hint for {fn} must subclass `pydantic.BaseModel'"
assert is_return_type_base_model_or_instance(fn), msg
@@ -161,30 +165,25 @@ class Instructions:
name = name if name else fn.__name__
base_model: BaseModel = type(resp)
if finetune_format == FinetuneFormat.RAW:
function_body = dict(
fn_name=name,
fn_repr=format_function(fn),
args=args,
kwargs=kwargs,
resp=resp.model_dump(),
schema=base_model.model_json_schema(),
)
self.logger.info(json.dumps(function_body))
if finetune_format == FinetuneFormat.MESSAGES:
# This is the format that OpenAI's API expects for a finetune call
openai_function_call = openai_schema(base_model).openai_schema
function_definition = get_signature_from_fn(fn).replace(fn.__name__, name)
str_args = ", ".join(map(str, args))
str_kwargs = (
", ".join(f"{k}={json.dumps(v)}" for k, v in kwargs.items()) or None
)
call_args = ", ".join(filter(None, [str_args, str_kwargs]))
function_body = {
"messages": [
{
"role": "system",
"content": f"Return the response from the function call.\n\n {function_definition}",
"content": f"Predict the results of this function:\n\n{function_definition}",
},
{
"role": "user",
"content": f"Return the results of the function with the following arguments:\n\n {name}(*{args}, **{kwargs})",
"content": f"Return {name}({call_args})",
},
{
"role": "assistant",
@@ -197,3 +196,14 @@ class Instructions:
"functions": [openai_function_call],
}
self.logger.info(json.dumps(function_body))
if finetune_format == FinetuneFormat.RAW:
function_body = dict(
fn_name=name,
fn_repr=format_function(fn),
args=args,
kwargs=kwargs,
resp=resp.model_dump(),
schema=base_model.model_json_schema(),
)
self.logger.info(json.dumps(function_body))