mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
120 lines
3.9 KiB
Python
120 lines
3.9 KiB
Python
from io import StringIO
|
|
from typing import Annotated, Any, Iterable
|
|
from openai import OpenAI
|
|
from pydantic import (
|
|
BaseModel,
|
|
BeforeValidator,
|
|
PlainSerializer,
|
|
InstanceOf,
|
|
WithJsonSchema,
|
|
)
|
|
import pandas as pd
|
|
import instructor
|
|
|
|
|
|
client = instructor.patch(OpenAI(), mode=instructor.function_calls.Mode.MD_JSON)
|
|
|
|
|
|
def to_markdown(df: pd.DataFrame) -> str:
|
|
return df.to_markdown()
|
|
|
|
|
|
def md_to_df(data: Any) -> Any:
|
|
if isinstance(data, str):
|
|
return (
|
|
pd.read_csv(
|
|
StringIO(data), # Get rid of whitespaces
|
|
sep="|",
|
|
index_col=1,
|
|
)
|
|
.dropna(axis=1, how="all")
|
|
.iloc[1:]
|
|
.map(lambda x: x.strip())
|
|
)
|
|
return data
|
|
|
|
|
|
MarkdownDataFrame = Annotated[
|
|
InstanceOf[pd.DataFrame],
|
|
BeforeValidator(md_to_df),
|
|
PlainSerializer(to_markdown),
|
|
WithJsonSchema(
|
|
{
|
|
"type": "string",
|
|
"description": """
|
|
The markdown representation of the table,
|
|
each one should be tidy, do not try to join tables
|
|
that should be seperate""",
|
|
}
|
|
),
|
|
]
|
|
|
|
|
|
class Table(BaseModel):
|
|
caption: str
|
|
dataframe: MarkdownDataFrame
|
|
|
|
|
|
def extract_table(url: str) -> Iterable[Table]:
|
|
return client.chat.completions.create(
|
|
model="gpt-4-vision-preview",
|
|
response_model=Iterable[Table],
|
|
max_tokens=1800,
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": """Extract the table from the image, and describe it.
|
|
Each table should be tidy, do not try to join tables that
|
|
should be seperately described.""",
|
|
},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {"url": url},
|
|
},
|
|
],
|
|
}
|
|
],
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
url = "https://a.storyblok.com/f/47007/2400x2000/bf383abc3c/231031_uk-ireland-in-three-charts_table_v01_b.png"
|
|
tables = extract_table(url)
|
|
for tbl in tables:
|
|
print(tbl.caption, end="\n")
|
|
print(tbl.dataframe)
|
|
"""
|
|
Top 10 grossing apps in October 2023 (Ireland) for Android platforms, listing the rank, app name, and category.
|
|
|
|
App Name Category
|
|
Rank
|
|
1 Google One Productivity
|
|
2 Disney+ Entertainment
|
|
3 TikTok - Videos, Music & LIVE Entertainment
|
|
4 Candy Crush Saga Games
|
|
5 Tinder: Dating, Chat & Friends Social networking
|
|
6 Coin Master Games
|
|
7 Roblox Games
|
|
8 Bumble - Dating & Make Friends Dating
|
|
9 Royal Match Games
|
|
10 Spotify: Music and Podcasts Music & Audio
|
|
|
|
Top 10 grossing apps in October 2023 (Ireland) for iOS platforms, listing the rank, app name, and category.
|
|
|
|
App Name Category
|
|
Rank
|
|
1 Tinder: Dating, Chat & Friends Social networking
|
|
2 Disney+ Entertainment
|
|
3 YouTube: Watch, Listen, Stream Entertainment
|
|
4 Audible: Audio Entertainment Entertainment
|
|
5 Candy Crush Saga Games
|
|
6 TikTok - Videos, Music & LIVE Entertainment
|
|
7 Bumble - Dating & Make Friends Dating
|
|
8 Roblox Games
|
|
9 LinkedIn: Job Search & News Business
|
|
10 Duolingo - Language Lessons Education
|
|
"""
|