Clean examples(#26)

2026-06-05 22:50:18 +00:00 · 2023-06-26 17:39:51 +08:00
parent 319ddd69f8
commit 6c2e01f967
5 changed files with 317 additions and 0 deletions
@@ -0,0 +1,75 @@
+from openai_function_call import OpenAISchema
+from pydantic import Field
+from typing import List, Any
+import openai
+
+
+class RowData(OpenAISchema):
+    row: List[Any] = Field(..., description="The values for each row")
+
+
+class Dataframe(OpenAISchema):
+    """
+    Class representing a dataframe. This class is used to convert
+    data into a frame that can be used by pandas.
+    """
+
+    data: List[RowData] = Field(
+        ...,
+        description="Correct rows of data aligned to column names, Nones are allowed",
+    )
+    columns: List[str] = Field(
+        ...,
+        description="Column names relevant from source data, should be in snake_case",
+    )
+
+    def to_pandas(self):
+        import pandas as pd
+
+        columns = self.columns
+        data = [row.row for row in self.data]
+
+        return pd.DataFrame(data=data, columns=columns)
+
+
+def dataframe(data: str) -> Dataframe:
+    completion = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo-0613",
+        temperature=0.1,
+        functions=[Dataframe.openai_schema],
+        function_call={"name": Dataframe.openai_schema["name"]},
+        messages=[
+            {
+                "role": "system",
+                "content": """Map this data into a dataframe a
+                nd correctly define the correct columns and rows""",
+            },
+            {
+                "role": "user",
+                "content": f"{data}",
+            },
+        ],
+        max_tokens=1000,
+    )
+    return Dataframe.from_response(completion)
+
+
+if __name__ == "__main__":
+    df = dataframe(
+        """My name is John and I am 25 years old. I live in 
+        New York and I like to play basketball. His name is 
+        Mike and he is 30 years old. He lives in San Francisco 
+        and he likes to play baseball. Sarah is 20 years old 
+        and she lives in Los Angeles. She likes to play tennis.
+        Her name is Mary and she is 35 years old. 
+        She lives in Chicago."""
+    )
+
+    print(df.to_pandas())
+    """
+        name  age       location       hobby
+    0   John   25       New York  basketball
+    1   Mike   30  San Francisco    baseball
+    2  Sarah   20    Los Angeles      tennis
+    3   Mary   35        Chicago        None
+    """
@@ -0,0 +1,102 @@
+from openai_function_call import OpenAISchema
+from pydantic import Field
+from typing import List, Any
+import openai
+
+
+class RowData(OpenAISchema):
+    row: List[Any] = Field(..., description="The values for each row")
+    citation: str = Field(
+        ..., description="The citation for this row from the original source data"
+    )
+
+
+class Dataframe(OpenAISchema):
+    """
+    Class representing a dataframe. This class is used to convert
+    data into a frame that can be used by pandas.
+    """
+
+    name: str = Field(..., description="The name of the dataframe")
+    data: List[RowData] = Field(
+        ...,
+        description="Correct rows of data aligned to column names, Nones are allowed",
+    )
+    columns: List[str] = Field(
+        ...,
+        description="Column names relevant from source data, should be in snake_case",
+    )
+
+    def to_pandas(self):
+        import pandas as pd
+
+        columns = self.columns + ["citation"]
+        data = [row.row + [row.citation] for row in self.data]
+
+        return pd.DataFrame(data=data, columns=columns)
+
+
+class Database(OpenAISchema):
+    """
+    A set of correct named and defined tables as dataframes
+    """
+
+    tables: List[Dataframe] = Field(
+        ...,
+        description="List of tables in the database",
+    )
+
+
+def dataframe(data: str) -> Database:
+    completion = openai.ChatCompletion.create(
+        model="gpt-4-0613",
+        temperature=0.1,
+        functions=[Database.openai_schema],
+        function_call={"name": Database.openai_schema["name"]},
+        messages=[
+            {
+                "role": "system",
+                "content": """Map this data into a dataframe a
+                nd correctly define the correct columns and rows""",
+            },
+            {
+                "role": "user",
+                "content": f"{data}",
+            },
+        ],
+        max_tokens=1000,
+    )
+    return Database.from_response(completion)
+
+
+if __name__ == "__main__":
+    dfs = dataframe(
+        """My name is John and I am 25 years old. I live in 
+        New York and I like to play basketball. His name is 
+        Mike and he is 30 years old. He lives in San Francisco 
+        and he likes to play baseball. Sarah is 20 years old 
+        and she lives in Los Angeles. She likes to play tennis.
+        Her name is Mary and she is 35 years old. 
+        She lives in Chicago.
+
+        On one team 'Tigers' the captan is John and there are 12 players.
+        On the other team 'Lions' the captan is Mike and there are 10 players.
+        """
+    )
+
+    for df in dfs.tables:
+        print(df.name)
+        print(df.to_pandas())
+    """
+    People
+    Name  Age           City Favorite Sport
+    0   John   25       New York     Basketball
+    1   Mike   30  San Francisco       Baseball
+    2  Sarah   20    Los Angeles         Tennis
+    3   Mary   35        Chicago           None
+
+    Teams
+    Team Name Captain  Number of Players
+    0    Tigers    John                 12
+    1     Lions    Mike                 10
+    """
@@ -0,0 +1,134 @@
+from typing import List
+
+import openai
+from pydantic import Field, BaseModel
+
+from openai_function_call import OpenAISchema
+
+
+class Fact(BaseModel):
+    """
+    Class representing single statement.
+    Each fact has a body and a list of sources.
+    If there are multiple facts make sure to break them apart such that each one only uses a set of sources that are relevant to it.
+    """
+
+    fact: str = Field(..., description="Body of the sentence, as part of a response")
+    substring_quote: List[str] = Field(
+        ...,
+        description="Each source should be a direct quote from the context, as a substring of the original content",
+    )
+
+    def _get_span(self, quote, context, errs=100):
+        import regex
+
+        minor = quote
+        major = context
+
+        errs_ = 0
+        s = regex.search(f"({minor}){{e<={errs_}}}", major)
+        while s is None and errs_ <= errs:
+            errs_ += 1
+            s = regex.search(f"({minor}){{e<={errs_}}}", major)
+
+        if s is not None:
+            yield from s.spans()
+
+    def get_spans(self, context):
+        for quote in self.substring_quote:
+            yield from self._get_span(quote, context)
+
+
+class QuestionAnswer(OpenAISchema):
+    """
+    Class representing a question and its answer as a list of facts each one should have a soruce.
+    each sentence contains a body and a list of sources."""
+
+    question: str = Field(..., description="Question that was asked")
+    answer: List[Fact] = Field(
+        ...,
+        description="Body of the answer, each fact should be its seperate object with a body and a list of sources",
+    )
+
+
+def ask_ai(question: str, context: str) -> QuestionAnswer:
+    """
+    Function to ask AI a question and get back an Answer object.
+    but should be updated to use the actual method for making a request to the AI.
+
+    Args:
+        question (str): The question to ask the AI.
+        context (str): The context for the question.
+
+    Returns:
+        Answer: The Answer object.
+    """
+
+    # Making a request to the hypothetical 'openai' module
+    completion = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo-0613",
+        temperature=0.2,
+        max_tokens=1000,
+        functions=[QuestionAnswer.openai_schema],
+        function_call={"name": QuestionAnswer.openai_schema["name"]},
+        messages=[
+            {
+                "role": "system",
+                "content": f"You are a world class algorithm to answer questions with correct and exact citations. ",
+            },
+            {"role": "user", "content": f"Answer question using the following context"},
+            {"role": "user", "content": f"{context}"},
+            {"role": "user", "content": f"Question: {question}"},
+            {
+                "role": "user",
+                "content": f"Tips: Make sure to cite your sources, and use the exact words from the context.",
+            },
+        ],
+    )
+
+    # Creating an Answer object from the completion response
+    return QuestionAnswer.from_response(completion)
+
+
+question = "What did the author do during college?"
+context = """
+My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.
+I went to an arts highschool but in university I studied Computational Mathematics and physics. 
+As part of coop I worked at many companies including Stitchfix, Facebook.
+I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.
+"""
+
+
+def highlight(text, span):
+    return (
+        "..."
+        + text[span[0] - 50 : span[0]].replace("\n", "")
+        + "\033[91m"
+        + "<"
+        + text[span[0] : span[1]].replace("\n", "")
+        + "> "
+        + "\033[0m"
+        + text[span[1] : span[1] + 20].replace("\n", "")
+        + "..."
+    )
+
+
+answer = ask_ai(question, context)
+
+print("Question:", question)
+print()
+for fact in answer.answer:
+    print("Statement:", fact.fact)
+    for span in fact.get_spans(context):
+        print("Citation:", highlight(context, span))
+    print()
+    """
+    Question: What did the author do during college?
+
+    Statement: The author studied Computational Mathematics and physics in university.
+    Citation: ...s born in China.I went to an arts highschool but <in university I studied Computational Mathematics and physics> . As part of coop I...
+
+    Statement: The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.
+    Citation: ...y companies including Stitchfix, Facebook.I also <started the Data Science club at the University of Waterloo>  and I was the presi...
+    Citation: ... club at the University of Waterloo and I was the <president of the club for 2 years> ...
+    """
@@ -0,0 +1,6 @@
+import erdantic as erd
+
+from citation_fuzzy_match import QuestionAnswer
+
+diagram = erd.create(QuestionAnswer)
+diagram.draw("examples/citation_fuzzy_match/schema.png")