diff --git a/examples/auto_dataframe/auto_dataframe.py b/examples/auto_dataframe/auto_dataframe.py new file mode 100644 index 0000000..bf0b26e --- /dev/null +++ b/examples/auto_dataframe/auto_dataframe.py @@ -0,0 +1,75 @@ +from openai_function_call import OpenAISchema +from pydantic import Field +from typing import List, Any +import openai + + +class RowData(OpenAISchema): + row: List[Any] = Field(..., description="The values for each row") + + +class Dataframe(OpenAISchema): + """ + Class representing a dataframe. This class is used to convert + data into a frame that can be used by pandas. + """ + + data: List[RowData] = Field( + ..., + description="Correct rows of data aligned to column names, Nones are allowed", + ) + columns: List[str] = Field( + ..., + description="Column names relevant from source data, should be in snake_case", + ) + + def to_pandas(self): + import pandas as pd + + columns = self.columns + data = [row.row for row in self.data] + + return pd.DataFrame(data=data, columns=columns) + + +def dataframe(data: str) -> Dataframe: + completion = openai.ChatCompletion.create( + model="gpt-3.5-turbo-0613", + temperature=0.1, + functions=[Dataframe.openai_schema], + function_call={"name": Dataframe.openai_schema["name"]}, + messages=[ + { + "role": "system", + "content": """Map this data into a dataframe a + nd correctly define the correct columns and rows""", + }, + { + "role": "user", + "content": f"{data}", + }, + ], + max_tokens=1000, + ) + return Dataframe.from_response(completion) + + +if __name__ == "__main__": + df = dataframe( + """My name is John and I am 25 years old. I live in + New York and I like to play basketball. His name is + Mike and he is 30 years old. He lives in San Francisco + and he likes to play baseball. Sarah is 20 years old + and she lives in Los Angeles. She likes to play tennis. + Her name is Mary and she is 35 years old. + She lives in Chicago.""" + ) + + print(df.to_pandas()) + """ + name age location hobby + 0 John 25 New York basketball + 1 Mike 30 San Francisco baseball + 2 Sarah 20 Los Angeles tennis + 3 Mary 35 Chicago None + """ diff --git a/examples/auto_dataframe/auto_multi_dataframe.py b/examples/auto_dataframe/auto_multi_dataframe.py new file mode 100644 index 0000000..2aad173 --- /dev/null +++ b/examples/auto_dataframe/auto_multi_dataframe.py @@ -0,0 +1,102 @@ +from openai_function_call import OpenAISchema +from pydantic import Field +from typing import List, Any +import openai + + +class RowData(OpenAISchema): + row: List[Any] = Field(..., description="The values for each row") + citation: str = Field( + ..., description="The citation for this row from the original source data" + ) + + +class Dataframe(OpenAISchema): + """ + Class representing a dataframe. This class is used to convert + data into a frame that can be used by pandas. + """ + + name: str = Field(..., description="The name of the dataframe") + data: List[RowData] = Field( + ..., + description="Correct rows of data aligned to column names, Nones are allowed", + ) + columns: List[str] = Field( + ..., + description="Column names relevant from source data, should be in snake_case", + ) + + def to_pandas(self): + import pandas as pd + + columns = self.columns + ["citation"] + data = [row.row + [row.citation] for row in self.data] + + return pd.DataFrame(data=data, columns=columns) + + +class Database(OpenAISchema): + """ + A set of correct named and defined tables as dataframes + """ + + tables: List[Dataframe] = Field( + ..., + description="List of tables in the database", + ) + + +def dataframe(data: str) -> Database: + completion = openai.ChatCompletion.create( + model="gpt-4-0613", + temperature=0.1, + functions=[Database.openai_schema], + function_call={"name": Database.openai_schema["name"]}, + messages=[ + { + "role": "system", + "content": """Map this data into a dataframe a + nd correctly define the correct columns and rows""", + }, + { + "role": "user", + "content": f"{data}", + }, + ], + max_tokens=1000, + ) + return Database.from_response(completion) + + +if __name__ == "__main__": + dfs = dataframe( + """My name is John and I am 25 years old. I live in + New York and I like to play basketball. His name is + Mike and he is 30 years old. He lives in San Francisco + and he likes to play baseball. Sarah is 20 years old + and she lives in Los Angeles. She likes to play tennis. + Her name is Mary and she is 35 years old. + She lives in Chicago. + + On one team 'Tigers' the captan is John and there are 12 players. + On the other team 'Lions' the captan is Mike and there are 10 players. + """ + ) + + for df in dfs.tables: + print(df.name) + print(df.to_pandas()) + """ + People + Name Age City Favorite Sport + 0 John 25 New York Basketball + 1 Mike 30 San Francisco Baseball + 2 Sarah 20 Los Angeles Tennis + 3 Mary 35 Chicago None + + Teams + Team Name Captain Number of Players + 0 Tigers John 12 + 1 Lions Mike 10 + """ diff --git a/examples/citation_with_fuzzy_matching/citation_fuzzy_match.py b/examples/citation_with_fuzzy_matching/citation_fuzzy_match.py new file mode 100644 index 0000000..69f8c82 --- /dev/null +++ b/examples/citation_with_fuzzy_matching/citation_fuzzy_match.py @@ -0,0 +1,134 @@ +from typing import List + +import openai +from pydantic import Field, BaseModel + +from openai_function_call import OpenAISchema + + +class Fact(BaseModel): + """ + Class representing single statement. + Each fact has a body and a list of sources. + If there are multiple facts make sure to break them apart such that each one only uses a set of sources that are relevant to it. + """ + + fact: str = Field(..., description="Body of the sentence, as part of a response") + substring_quote: List[str] = Field( + ..., + description="Each source should be a direct quote from the context, as a substring of the original content", + ) + + def _get_span(self, quote, context, errs=100): + import regex + + minor = quote + major = context + + errs_ = 0 + s = regex.search(f"({minor}){{e<={errs_}}}", major) + while s is None and errs_ <= errs: + errs_ += 1 + s = regex.search(f"({minor}){{e<={errs_}}}", major) + + if s is not None: + yield from s.spans() + + def get_spans(self, context): + for quote in self.substring_quote: + yield from self._get_span(quote, context) + + +class QuestionAnswer(OpenAISchema): + """ + Class representing a question and its answer as a list of facts each one should have a soruce. + each sentence contains a body and a list of sources.""" + + question: str = Field(..., description="Question that was asked") + answer: List[Fact] = Field( + ..., + description="Body of the answer, each fact should be its seperate object with a body and a list of sources", + ) + + +def ask_ai(question: str, context: str) -> QuestionAnswer: + """ + Function to ask AI a question and get back an Answer object. + but should be updated to use the actual method for making a request to the AI. + + Args: + question (str): The question to ask the AI. + context (str): The context for the question. + + Returns: + Answer: The Answer object. + """ + + # Making a request to the hypothetical 'openai' module + completion = openai.ChatCompletion.create( + model="gpt-3.5-turbo-0613", + temperature=0.2, + max_tokens=1000, + functions=[QuestionAnswer.openai_schema], + function_call={"name": QuestionAnswer.openai_schema["name"]}, + messages=[ + { + "role": "system", + "content": f"You are a world class algorithm to answer questions with correct and exact citations. ", + }, + {"role": "user", "content": f"Answer question using the following context"}, + {"role": "user", "content": f"{context}"}, + {"role": "user", "content": f"Question: {question}"}, + { + "role": "user", + "content": f"Tips: Make sure to cite your sources, and use the exact words from the context.", + }, + ], + ) + + # Creating an Answer object from the completion response + return QuestionAnswer.from_response(completion) + + +question = "What did the author do during college?" +context = """ +My name is Jason Liu, and I grew up in Toronto Canada but I was born in China. +I went to an arts highschool but in university I studied Computational Mathematics and physics. +As part of coop I worked at many companies including Stitchfix, Facebook. +I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years. +""" + + +def highlight(text, span): + return ( + "..." + + text[span[0] - 50 : span[0]].replace("\n", "") + + "\033[91m" + + "<" + + text[span[0] : span[1]].replace("\n", "") + + "> " + + "\033[0m" + + text[span[1] : span[1] + 20].replace("\n", "") + + "..." + ) + + +answer = ask_ai(question, context) + +print("Question:", question) +print() +for fact in answer.answer: + print("Statement:", fact.fact) + for span in fact.get_spans(context): + print("Citation:", highlight(context, span)) + print() + """ + Question: What did the author do during college? + + Statement: The author studied Computational Mathematics and physics in university. + Citation: ...s born in China.I went to an arts highschool but . As part of coop I... + + Statement: The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years. + Citation: ...y companies including Stitchfix, Facebook.I also and I was the presi... + Citation: ... club at the University of Waterloo and I was the ... + """ diff --git a/examples/citation_with_fuzzy_matching/diagram.py b/examples/citation_with_fuzzy_matching/diagram.py new file mode 100644 index 0000000..87ed23c --- /dev/null +++ b/examples/citation_with_fuzzy_matching/diagram.py @@ -0,0 +1,6 @@ +import erdantic as erd + +from citation_fuzzy_match import QuestionAnswer + +diagram = erd.create(QuestionAnswer) +diagram.draw("examples/citation_fuzzy_match/schema.png") diff --git a/examples/citation_with_fuzzy_matching/schema.png b/examples/citation_with_fuzzy_matching/schema.png new file mode 100644 index 0000000..66bd66c Binary files /dev/null and b/examples/citation_with_fuzzy_matching/schema.png differ