mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
Clean examples(#26)
This commit is contained in:
@@ -0,0 +1,75 @@
|
||||
from openai_function_call import OpenAISchema
|
||||
from pydantic import Field
|
||||
from typing import List, Any
|
||||
import openai
|
||||
|
||||
|
||||
class RowData(OpenAISchema):
|
||||
row: List[Any] = Field(..., description="The values for each row")
|
||||
|
||||
|
||||
class Dataframe(OpenAISchema):
|
||||
"""
|
||||
Class representing a dataframe. This class is used to convert
|
||||
data into a frame that can be used by pandas.
|
||||
"""
|
||||
|
||||
data: List[RowData] = Field(
|
||||
...,
|
||||
description="Correct rows of data aligned to column names, Nones are allowed",
|
||||
)
|
||||
columns: List[str] = Field(
|
||||
...,
|
||||
description="Column names relevant from source data, should be in snake_case",
|
||||
)
|
||||
|
||||
def to_pandas(self):
|
||||
import pandas as pd
|
||||
|
||||
columns = self.columns
|
||||
data = [row.row for row in self.data]
|
||||
|
||||
return pd.DataFrame(data=data, columns=columns)
|
||||
|
||||
|
||||
def dataframe(data: str) -> Dataframe:
|
||||
completion = openai.ChatCompletion.create(
|
||||
model="gpt-3.5-turbo-0613",
|
||||
temperature=0.1,
|
||||
functions=[Dataframe.openai_schema],
|
||||
function_call={"name": Dataframe.openai_schema["name"]},
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": """Map this data into a dataframe a
|
||||
nd correctly define the correct columns and rows""",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"{data}",
|
||||
},
|
||||
],
|
||||
max_tokens=1000,
|
||||
)
|
||||
return Dataframe.from_response(completion)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
df = dataframe(
|
||||
"""My name is John and I am 25 years old. I live in
|
||||
New York and I like to play basketball. His name is
|
||||
Mike and he is 30 years old. He lives in San Francisco
|
||||
and he likes to play baseball. Sarah is 20 years old
|
||||
and she lives in Los Angeles. She likes to play tennis.
|
||||
Her name is Mary and she is 35 years old.
|
||||
She lives in Chicago."""
|
||||
)
|
||||
|
||||
print(df.to_pandas())
|
||||
"""
|
||||
name age location hobby
|
||||
0 John 25 New York basketball
|
||||
1 Mike 30 San Francisco baseball
|
||||
2 Sarah 20 Los Angeles tennis
|
||||
3 Mary 35 Chicago None
|
||||
"""
|
||||
@@ -0,0 +1,102 @@
|
||||
from openai_function_call import OpenAISchema
|
||||
from pydantic import Field
|
||||
from typing import List, Any
|
||||
import openai
|
||||
|
||||
|
||||
class RowData(OpenAISchema):
|
||||
row: List[Any] = Field(..., description="The values for each row")
|
||||
citation: str = Field(
|
||||
..., description="The citation for this row from the original source data"
|
||||
)
|
||||
|
||||
|
||||
class Dataframe(OpenAISchema):
|
||||
"""
|
||||
Class representing a dataframe. This class is used to convert
|
||||
data into a frame that can be used by pandas.
|
||||
"""
|
||||
|
||||
name: str = Field(..., description="The name of the dataframe")
|
||||
data: List[RowData] = Field(
|
||||
...,
|
||||
description="Correct rows of data aligned to column names, Nones are allowed",
|
||||
)
|
||||
columns: List[str] = Field(
|
||||
...,
|
||||
description="Column names relevant from source data, should be in snake_case",
|
||||
)
|
||||
|
||||
def to_pandas(self):
|
||||
import pandas as pd
|
||||
|
||||
columns = self.columns + ["citation"]
|
||||
data = [row.row + [row.citation] for row in self.data]
|
||||
|
||||
return pd.DataFrame(data=data, columns=columns)
|
||||
|
||||
|
||||
class Database(OpenAISchema):
|
||||
"""
|
||||
A set of correct named and defined tables as dataframes
|
||||
"""
|
||||
|
||||
tables: List[Dataframe] = Field(
|
||||
...,
|
||||
description="List of tables in the database",
|
||||
)
|
||||
|
||||
|
||||
def dataframe(data: str) -> Database:
|
||||
completion = openai.ChatCompletion.create(
|
||||
model="gpt-4-0613",
|
||||
temperature=0.1,
|
||||
functions=[Database.openai_schema],
|
||||
function_call={"name": Database.openai_schema["name"]},
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": """Map this data into a dataframe a
|
||||
nd correctly define the correct columns and rows""",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"{data}",
|
||||
},
|
||||
],
|
||||
max_tokens=1000,
|
||||
)
|
||||
return Database.from_response(completion)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dfs = dataframe(
|
||||
"""My name is John and I am 25 years old. I live in
|
||||
New York and I like to play basketball. His name is
|
||||
Mike and he is 30 years old. He lives in San Francisco
|
||||
and he likes to play baseball. Sarah is 20 years old
|
||||
and she lives in Los Angeles. She likes to play tennis.
|
||||
Her name is Mary and she is 35 years old.
|
||||
She lives in Chicago.
|
||||
|
||||
On one team 'Tigers' the captan is John and there are 12 players.
|
||||
On the other team 'Lions' the captan is Mike and there are 10 players.
|
||||
"""
|
||||
)
|
||||
|
||||
for df in dfs.tables:
|
||||
print(df.name)
|
||||
print(df.to_pandas())
|
||||
"""
|
||||
People
|
||||
Name Age City Favorite Sport
|
||||
0 John 25 New York Basketball
|
||||
1 Mike 30 San Francisco Baseball
|
||||
2 Sarah 20 Los Angeles Tennis
|
||||
3 Mary 35 Chicago None
|
||||
|
||||
Teams
|
||||
Team Name Captain Number of Players
|
||||
0 Tigers John 12
|
||||
1 Lions Mike 10
|
||||
"""
|
||||
@@ -0,0 +1,134 @@
|
||||
from typing import List
|
||||
|
||||
import openai
|
||||
from pydantic import Field, BaseModel
|
||||
|
||||
from openai_function_call import OpenAISchema
|
||||
|
||||
|
||||
class Fact(BaseModel):
|
||||
"""
|
||||
Class representing single statement.
|
||||
Each fact has a body and a list of sources.
|
||||
If there are multiple facts make sure to break them apart such that each one only uses a set of sources that are relevant to it.
|
||||
"""
|
||||
|
||||
fact: str = Field(..., description="Body of the sentence, as part of a response")
|
||||
substring_quote: List[str] = Field(
|
||||
...,
|
||||
description="Each source should be a direct quote from the context, as a substring of the original content",
|
||||
)
|
||||
|
||||
def _get_span(self, quote, context, errs=100):
|
||||
import regex
|
||||
|
||||
minor = quote
|
||||
major = context
|
||||
|
||||
errs_ = 0
|
||||
s = regex.search(f"({minor}){{e<={errs_}}}", major)
|
||||
while s is None and errs_ <= errs:
|
||||
errs_ += 1
|
||||
s = regex.search(f"({minor}){{e<={errs_}}}", major)
|
||||
|
||||
if s is not None:
|
||||
yield from s.spans()
|
||||
|
||||
def get_spans(self, context):
|
||||
for quote in self.substring_quote:
|
||||
yield from self._get_span(quote, context)
|
||||
|
||||
|
||||
class QuestionAnswer(OpenAISchema):
|
||||
"""
|
||||
Class representing a question and its answer as a list of facts each one should have a soruce.
|
||||
each sentence contains a body and a list of sources."""
|
||||
|
||||
question: str = Field(..., description="Question that was asked")
|
||||
answer: List[Fact] = Field(
|
||||
...,
|
||||
description="Body of the answer, each fact should be its seperate object with a body and a list of sources",
|
||||
)
|
||||
|
||||
|
||||
def ask_ai(question: str, context: str) -> QuestionAnswer:
|
||||
"""
|
||||
Function to ask AI a question and get back an Answer object.
|
||||
but should be updated to use the actual method for making a request to the AI.
|
||||
|
||||
Args:
|
||||
question (str): The question to ask the AI.
|
||||
context (str): The context for the question.
|
||||
|
||||
Returns:
|
||||
Answer: The Answer object.
|
||||
"""
|
||||
|
||||
# Making a request to the hypothetical 'openai' module
|
||||
completion = openai.ChatCompletion.create(
|
||||
model="gpt-3.5-turbo-0613",
|
||||
temperature=0.2,
|
||||
max_tokens=1000,
|
||||
functions=[QuestionAnswer.openai_schema],
|
||||
function_call={"name": QuestionAnswer.openai_schema["name"]},
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"You are a world class algorithm to answer questions with correct and exact citations. ",
|
||||
},
|
||||
{"role": "user", "content": f"Answer question using the following context"},
|
||||
{"role": "user", "content": f"{context}"},
|
||||
{"role": "user", "content": f"Question: {question}"},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Tips: Make sure to cite your sources, and use the exact words from the context.",
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
# Creating an Answer object from the completion response
|
||||
return QuestionAnswer.from_response(completion)
|
||||
|
||||
|
||||
question = "What did the author do during college?"
|
||||
context = """
|
||||
My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.
|
||||
I went to an arts highschool but in university I studied Computational Mathematics and physics.
|
||||
As part of coop I worked at many companies including Stitchfix, Facebook.
|
||||
I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.
|
||||
"""
|
||||
|
||||
|
||||
def highlight(text, span):
|
||||
return (
|
||||
"..."
|
||||
+ text[span[0] - 50 : span[0]].replace("\n", "")
|
||||
+ "\033[91m"
|
||||
+ "<"
|
||||
+ text[span[0] : span[1]].replace("\n", "")
|
||||
+ "> "
|
||||
+ "\033[0m"
|
||||
+ text[span[1] : span[1] + 20].replace("\n", "")
|
||||
+ "..."
|
||||
)
|
||||
|
||||
|
||||
answer = ask_ai(question, context)
|
||||
|
||||
print("Question:", question)
|
||||
print()
|
||||
for fact in answer.answer:
|
||||
print("Statement:", fact.fact)
|
||||
for span in fact.get_spans(context):
|
||||
print("Citation:", highlight(context, span))
|
||||
print()
|
||||
"""
|
||||
Question: What did the author do during college?
|
||||
|
||||
Statement: The author studied Computational Mathematics and physics in university.
|
||||
Citation: ...s born in China.I went to an arts highschool but <in university I studied Computational Mathematics and physics> . As part of coop I...
|
||||
|
||||
Statement: The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.
|
||||
Citation: ...y companies including Stitchfix, Facebook.I also <started the Data Science club at the University of Waterloo> and I was the presi...
|
||||
Citation: ... club at the University of Waterloo and I was the <president of the club for 2 years> ...
|
||||
"""
|
||||
@@ -0,0 +1,6 @@
|
||||
import erdantic as erd
|
||||
|
||||
from citation_fuzzy_match import QuestionAnswer
|
||||
|
||||
diagram = erd.create(QuestionAnswer)
|
||||
diagram.draw("examples/citation_fuzzy_match/schema.png")
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 17 KiB |
Reference in New Issue
Block a user