Clean examples(#26)

This commit is contained in:
Jason Liu
2023-06-26 17:39:51 +08:00
committed by GitHub
parent 319ddd69f8
commit 6c2e01f967
5 changed files with 317 additions and 0 deletions
+75
View File
@@ -0,0 +1,75 @@
from openai_function_call import OpenAISchema
from pydantic import Field
from typing import List, Any
import openai
class RowData(OpenAISchema):
row: List[Any] = Field(..., description="The values for each row")
class Dataframe(OpenAISchema):
"""
Class representing a dataframe. This class is used to convert
data into a frame that can be used by pandas.
"""
data: List[RowData] = Field(
...,
description="Correct rows of data aligned to column names, Nones are allowed",
)
columns: List[str] = Field(
...,
description="Column names relevant from source data, should be in snake_case",
)
def to_pandas(self):
import pandas as pd
columns = self.columns
data = [row.row for row in self.data]
return pd.DataFrame(data=data, columns=columns)
def dataframe(data: str) -> Dataframe:
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0613",
temperature=0.1,
functions=[Dataframe.openai_schema],
function_call={"name": Dataframe.openai_schema["name"]},
messages=[
{
"role": "system",
"content": """Map this data into a dataframe a
nd correctly define the correct columns and rows""",
},
{
"role": "user",
"content": f"{data}",
},
],
max_tokens=1000,
)
return Dataframe.from_response(completion)
if __name__ == "__main__":
df = dataframe(
"""My name is John and I am 25 years old. I live in
New York and I like to play basketball. His name is
Mike and he is 30 years old. He lives in San Francisco
and he likes to play baseball. Sarah is 20 years old
and she lives in Los Angeles. She likes to play tennis.
Her name is Mary and she is 35 years old.
She lives in Chicago."""
)
print(df.to_pandas())
"""
name age location hobby
0 John 25 New York basketball
1 Mike 30 San Francisco baseball
2 Sarah 20 Los Angeles tennis
3 Mary 35 Chicago None
"""
@@ -0,0 +1,102 @@
from openai_function_call import OpenAISchema
from pydantic import Field
from typing import List, Any
import openai
class RowData(OpenAISchema):
row: List[Any] = Field(..., description="The values for each row")
citation: str = Field(
..., description="The citation for this row from the original source data"
)
class Dataframe(OpenAISchema):
"""
Class representing a dataframe. This class is used to convert
data into a frame that can be used by pandas.
"""
name: str = Field(..., description="The name of the dataframe")
data: List[RowData] = Field(
...,
description="Correct rows of data aligned to column names, Nones are allowed",
)
columns: List[str] = Field(
...,
description="Column names relevant from source data, should be in snake_case",
)
def to_pandas(self):
import pandas as pd
columns = self.columns + ["citation"]
data = [row.row + [row.citation] for row in self.data]
return pd.DataFrame(data=data, columns=columns)
class Database(OpenAISchema):
"""
A set of correct named and defined tables as dataframes
"""
tables: List[Dataframe] = Field(
...,
description="List of tables in the database",
)
def dataframe(data: str) -> Database:
completion = openai.ChatCompletion.create(
model="gpt-4-0613",
temperature=0.1,
functions=[Database.openai_schema],
function_call={"name": Database.openai_schema["name"]},
messages=[
{
"role": "system",
"content": """Map this data into a dataframe a
nd correctly define the correct columns and rows""",
},
{
"role": "user",
"content": f"{data}",
},
],
max_tokens=1000,
)
return Database.from_response(completion)
if __name__ == "__main__":
dfs = dataframe(
"""My name is John and I am 25 years old. I live in
New York and I like to play basketball. His name is
Mike and he is 30 years old. He lives in San Francisco
and he likes to play baseball. Sarah is 20 years old
and she lives in Los Angeles. She likes to play tennis.
Her name is Mary and she is 35 years old.
She lives in Chicago.
On one team 'Tigers' the captan is John and there are 12 players.
On the other team 'Lions' the captan is Mike and there are 10 players.
"""
)
for df in dfs.tables:
print(df.name)
print(df.to_pandas())
"""
People
Name Age City Favorite Sport
0 John 25 New York Basketball
1 Mike 30 San Francisco Baseball
2 Sarah 20 Los Angeles Tennis
3 Mary 35 Chicago None
Teams
Team Name Captain Number of Players
0 Tigers John 12
1 Lions Mike 10
"""
@@ -0,0 +1,134 @@
from typing import List
import openai
from pydantic import Field, BaseModel
from openai_function_call import OpenAISchema
class Fact(BaseModel):
"""
Class representing single statement.
Each fact has a body and a list of sources.
If there are multiple facts make sure to break them apart such that each one only uses a set of sources that are relevant to it.
"""
fact: str = Field(..., description="Body of the sentence, as part of a response")
substring_quote: List[str] = Field(
...,
description="Each source should be a direct quote from the context, as a substring of the original content",
)
def _get_span(self, quote, context, errs=100):
import regex
minor = quote
major = context
errs_ = 0
s = regex.search(f"({minor}){{e<={errs_}}}", major)
while s is None and errs_ <= errs:
errs_ += 1
s = regex.search(f"({minor}){{e<={errs_}}}", major)
if s is not None:
yield from s.spans()
def get_spans(self, context):
for quote in self.substring_quote:
yield from self._get_span(quote, context)
class QuestionAnswer(OpenAISchema):
"""
Class representing a question and its answer as a list of facts each one should have a soruce.
each sentence contains a body and a list of sources."""
question: str = Field(..., description="Question that was asked")
answer: List[Fact] = Field(
...,
description="Body of the answer, each fact should be its seperate object with a body and a list of sources",
)
def ask_ai(question: str, context: str) -> QuestionAnswer:
"""
Function to ask AI a question and get back an Answer object.
but should be updated to use the actual method for making a request to the AI.
Args:
question (str): The question to ask the AI.
context (str): The context for the question.
Returns:
Answer: The Answer object.
"""
# Making a request to the hypothetical 'openai' module
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0613",
temperature=0.2,
max_tokens=1000,
functions=[QuestionAnswer.openai_schema],
function_call={"name": QuestionAnswer.openai_schema["name"]},
messages=[
{
"role": "system",
"content": f"You are a world class algorithm to answer questions with correct and exact citations. ",
},
{"role": "user", "content": f"Answer question using the following context"},
{"role": "user", "content": f"{context}"},
{"role": "user", "content": f"Question: {question}"},
{
"role": "user",
"content": f"Tips: Make sure to cite your sources, and use the exact words from the context.",
},
],
)
# Creating an Answer object from the completion response
return QuestionAnswer.from_response(completion)
question = "What did the author do during college?"
context = """
My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.
I went to an arts highschool but in university I studied Computational Mathematics and physics.
As part of coop I worked at many companies including Stitchfix, Facebook.
I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.
"""
def highlight(text, span):
return (
"..."
+ text[span[0] - 50 : span[0]].replace("\n", "")
+ "\033[91m"
+ "<"
+ text[span[0] : span[1]].replace("\n", "")
+ "> "
+ "\033[0m"
+ text[span[1] : span[1] + 20].replace("\n", "")
+ "..."
)
answer = ask_ai(question, context)
print("Question:", question)
print()
for fact in answer.answer:
print("Statement:", fact.fact)
for span in fact.get_spans(context):
print("Citation:", highlight(context, span))
print()
"""
Question: What did the author do during college?
Statement: The author studied Computational Mathematics and physics in university.
Citation: ...s born in China.I went to an arts highschool but <in university I studied Computational Mathematics and physics> . As part of coop I...
Statement: The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.
Citation: ...y companies including Stitchfix, Facebook.I also <started the Data Science club at the University of Waterloo> and I was the presi...
Citation: ... club at the University of Waterloo and I was the <president of the club for 2 years> ...
"""
@@ -0,0 +1,6 @@
import erdantic as erd
from citation_fuzzy_match import QuestionAnswer
diagram = erd.create(QuestionAnswer)
diagram.draw("examples/citation_fuzzy_match/schema.png")
Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB