mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
remove bad examples
This commit is contained in:
@@ -1,77 +0,0 @@
|
||||
from instructor import OpenAISchema
|
||||
from pydantic import Field
|
||||
from typing import List, Any
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
|
||||
class RowData(OpenAISchema):
|
||||
row: List[Any] = Field(..., description="The values for each row")
|
||||
|
||||
|
||||
class Dataframe(OpenAISchema):
|
||||
"""
|
||||
Class representing a dataframe. This class is used to convert
|
||||
data into a frame that can be used by pandas.
|
||||
"""
|
||||
|
||||
data: List[RowData] = Field(
|
||||
...,
|
||||
description="Correct rows of data aligned to column names, Nones are allowed",
|
||||
)
|
||||
columns: List[str] = Field(
|
||||
...,
|
||||
description="Column names relevant from source data, should be in snake_case",
|
||||
)
|
||||
|
||||
def to_pandas(self):
|
||||
import pandas as pd
|
||||
|
||||
columns = self.columns
|
||||
data = [row.row for row in self.data]
|
||||
|
||||
return pd.DataFrame(data=data, columns=columns)
|
||||
|
||||
|
||||
def dataframe(data: str) -> Dataframe:
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-0613",
|
||||
temperature=0.1,
|
||||
functions=[Dataframe.openai_schema],
|
||||
function_call={"name": Dataframe.openai_schema["name"]},
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": """Map this data into a dataframe a
|
||||
nd correctly define the correct columns and rows""",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"{data}",
|
||||
},
|
||||
],
|
||||
max_tokens=1000,
|
||||
)
|
||||
return Dataframe.from_response(completion)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
df = dataframe(
|
||||
"""My name is John and I am 25 years old. I live in
|
||||
New York and I like to play basketball. His name is
|
||||
Mike and he is 30 years old. He lives in San Francisco
|
||||
and he likes to play baseball. Sarah is 20 years old
|
||||
and she lives in Los Angeles. She likes to play tennis.
|
||||
Her name is Mary and she is 35 years old.
|
||||
She lives in Chicago."""
|
||||
)
|
||||
|
||||
print(df.to_pandas())
|
||||
"""
|
||||
name age location hobby
|
||||
0 John 25 New York basketball
|
||||
1 Mike 30 San Francisco baseball
|
||||
2 Sarah 20 Los Angeles tennis
|
||||
3 Mary 35 Chicago None
|
||||
"""
|
||||
@@ -1,97 +0,0 @@
|
||||
from instructor import OpenAISchema
|
||||
from pydantic import Field
|
||||
from typing import List, Any
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
|
||||
class RowData(OpenAISchema):
|
||||
row: List[Any] = Field(..., description="Correct values for each row")
|
||||
|
||||
|
||||
class Dataframe(OpenAISchema):
|
||||
name: str = Field(..., description="The name of the dataframe")
|
||||
data: List[RowData] = Field(
|
||||
...,
|
||||
description="Correct rows of data aligned to column names, Nones are allowed",
|
||||
)
|
||||
columns: List[str] = Field(
|
||||
...,
|
||||
description="Column names relevant from source data, should be in snake_case",
|
||||
)
|
||||
|
||||
def to_pandas(self):
|
||||
import pandas as pd
|
||||
|
||||
columns = self.columns
|
||||
data = [row.row for row in self.data]
|
||||
|
||||
return pd.DataFrame(data=data, columns=columns)
|
||||
|
||||
|
||||
class Database(OpenAISchema):
|
||||
"""
|
||||
A set of correct named and defined tables as dataframes
|
||||
Each one should have the right number of columns and correct
|
||||
values for each.
|
||||
"""
|
||||
|
||||
tables: List[Dataframe] = Field(
|
||||
...,
|
||||
description="List of tables in the database",
|
||||
)
|
||||
|
||||
|
||||
def dataframe(data: str) -> Database:
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-4-0613",
|
||||
temperature=0.0,
|
||||
functions=[Database.openai_schema],
|
||||
function_call={"name": Database.openai_schema["name"]},
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": """Map this data into a dataframe a
|
||||
nd correctly define the correct columns and rows""",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"{data}",
|
||||
},
|
||||
],
|
||||
max_tokens=1000,
|
||||
)
|
||||
return Database.from_response(completion)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dfs = dataframe(
|
||||
"""My name is John and I am 25 years old. I live in
|
||||
New York and I like to play basketball. His name is
|
||||
Mike and he is 30 years old. He lives in San Francisco
|
||||
and he likes to play baseball. Sarah is 20 years old
|
||||
and she lives in Los Angeles. She likes to play tennis.
|
||||
Her name is Mary and she is 35 years old.
|
||||
She lives in Chicago.
|
||||
|
||||
On one team 'Tigers' the captan is John and there are 12 players.
|
||||
On the other team 'Lions' the captan is Mike and there are 10 players.
|
||||
"""
|
||||
)
|
||||
|
||||
for df in dfs.tables:
|
||||
print(df.name)
|
||||
print(df.to_pandas())
|
||||
"""
|
||||
People
|
||||
ID Name Age City Favorite Sport
|
||||
0 1 John 25 New York Basketball
|
||||
1 2 Mike 30 San Francisco Baseball
|
||||
2 3 Sarah 20 Los Angeles Tennis
|
||||
3 4 Mary 35 Chicago None
|
||||
Teams
|
||||
ID Team Name Captain Number of Players
|
||||
0 1 Tigers John 12
|
||||
1 2 Lions Mike 10
|
||||
"""
|
||||
@@ -0,0 +1,94 @@
|
||||
from openai import OpenAI
|
||||
from io import StringIO
|
||||
from typing import Annotated, Any, Iterable
|
||||
from openai import OpenAI
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
BeforeValidator,
|
||||
PlainSerializer,
|
||||
InstanceOf,
|
||||
WithJsonSchema,
|
||||
)
|
||||
import pandas as pd
|
||||
from tomlkit import table
|
||||
import instructor
|
||||
|
||||
|
||||
client = instructor.patch(OpenAI(), mode=instructor.function_calls.Mode.MD_JSON)
|
||||
|
||||
|
||||
def md_to_df(data: Any) -> Any:
|
||||
if isinstance(data, str):
|
||||
return (
|
||||
pd.read_csv(
|
||||
StringIO(data), # Get rid of whitespaces
|
||||
sep="|",
|
||||
index_col=1,
|
||||
)
|
||||
.dropna(axis=1, how="all")
|
||||
.iloc[1:]
|
||||
.map(lambda x: x.strip())
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
MarkdownDataFrame = Annotated[
|
||||
InstanceOf[pd.DataFrame],
|
||||
BeforeValidator(md_to_df),
|
||||
PlainSerializer(lambda x: x.to_markdown()),
|
||||
WithJsonSchema(
|
||||
{
|
||||
"type": "string",
|
||||
"description": """
|
||||
The markdown representation of the table,
|
||||
each one should be tidy, do not try to join tables
|
||||
that should be seperate""",
|
||||
}
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class Table(BaseModel):
|
||||
caption: str
|
||||
dataframe: MarkdownDataFrame
|
||||
|
||||
|
||||
client = instructor.patch(OpenAI())
|
||||
|
||||
|
||||
tables = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
response_model=Iterable[Table],
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Please extract the tables from the following text, merge as much as possible:",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": """
|
||||
My name is John and I am 25 years old. I live in
|
||||
New York and I like to play basketball. His name is
|
||||
Mike and he is 30 years old. He lives in San Francisco
|
||||
and he likes to play baseball. Sarah is 20 years old
|
||||
and she lives in Los Angeles. She likes to play tennis.
|
||||
Her name is Mary and she is 35 years old.
|
||||
She lives in Chicago.
|
||||
""",
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
for table in tables:
|
||||
print(table.caption)
|
||||
print(table.dataframe)
|
||||
print()
|
||||
"""
|
||||
People
|
||||
Age City Hobby
|
||||
Name
|
||||
John 25 New York Basketball
|
||||
Mike 30 San Francisco Baseball
|
||||
Sarah 20 Los Angeles Tennis
|
||||
Mary 35 Chicago N/A
|
||||
"""
|
||||
@@ -1,11 +1,61 @@
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI()
|
||||
from io import StringIO
|
||||
from typing import Annotated, Any, Iterable
|
||||
from openai import OpenAI
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
BeforeValidator,
|
||||
PlainSerializer,
|
||||
InstanceOf,
|
||||
WithJsonSchema,
|
||||
)
|
||||
import pandas as pd
|
||||
import instructor
|
||||
|
||||
|
||||
response = client.chat.completions.create(
|
||||
client = instructor.patch(OpenAI(), mode=instructor.function_calls.Mode.MD_JSON)
|
||||
|
||||
|
||||
def md_to_df(data: Any) -> Any:
|
||||
if isinstance(data, str):
|
||||
return (
|
||||
pd.read_csv(
|
||||
StringIO(data), # Get rid of whitespaces
|
||||
sep="|",
|
||||
index_col=1,
|
||||
)
|
||||
.dropna(axis=1, how="all")
|
||||
.iloc[1:]
|
||||
.map(lambda x: x.strip())
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
MarkdownDataFrame = Annotated[
|
||||
InstanceOf[pd.DataFrame],
|
||||
BeforeValidator(md_to_df),
|
||||
PlainSerializer(lambda x: x.to_markdown()),
|
||||
WithJsonSchema(
|
||||
{
|
||||
"type": "string",
|
||||
"description": """
|
||||
The markdown representation of the table,
|
||||
each one should be tidy, do not try to join tables
|
||||
that should be seperate""",
|
||||
}
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class Table(BaseModel):
|
||||
caption: str
|
||||
dataframe: MarkdownDataFrame
|
||||
|
||||
|
||||
tables = client.chat.completions.create(
|
||||
model="gpt-4-vision-preview",
|
||||
max_tokens=1000,
|
||||
response_model=Iterable[Table],
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
@@ -38,4 +88,17 @@ response = client.chat.completions.create(
|
||||
],
|
||||
)
|
||||
|
||||
print(response.choices[0].message.content)
|
||||
for table in tables:
|
||||
print(table.caption)
|
||||
print(table.dataframe)
|
||||
print()
|
||||
"""
|
||||
D1 App Retention Rates July 2023 (Ireland & U.K.)
|
||||
Ireland UK
|
||||
Category
|
||||
Education 14% 12%
|
||||
Entertainment 13% 11%
|
||||
Games 26% 25%
|
||||
Social 27% 18%
|
||||
Utilities 11% 9%
|
||||
"""
|
||||
Reference in New Issue
Block a user