Files
2023-12-18 18:27:04 -05:00

120 lines
3.9 KiB
Python

from io import StringIO
from typing import Annotated, Any, Iterable
from openai import OpenAI
from pydantic import (
BaseModel,
BeforeValidator,
PlainSerializer,
InstanceOf,
WithJsonSchema,
)
import pandas as pd
import instructor
client = instructor.patch(OpenAI(), mode=instructor.function_calls.Mode.MD_JSON)
def to_markdown(df: pd.DataFrame) -> str:
return df.to_markdown()
def md_to_df(data: Any) -> Any:
if isinstance(data, str):
return (
pd.read_csv(
StringIO(data), # Get rid of whitespaces
sep="|",
index_col=1,
)
.dropna(axis=1, how="all")
.iloc[1:]
.map(lambda x: x.strip())
)
return data
MarkdownDataFrame = Annotated[
InstanceOf[pd.DataFrame],
BeforeValidator(md_to_df),
PlainSerializer(to_markdown),
WithJsonSchema(
{
"type": "string",
"description": """
The markdown representation of the table,
each one should be tidy, do not try to join tables
that should be seperate""",
}
),
]
class Table(BaseModel):
caption: str
dataframe: MarkdownDataFrame
def extract_table(url: str) -> Iterable[Table]:
return client.chat.completions.create(
model="gpt-4-vision-preview",
response_model=Iterable[Table],
max_tokens=1800,
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": """Extract the table from the image, and describe it.
Each table should be tidy, do not try to join tables that
should be seperately described.""",
},
{
"type": "image_url",
"image_url": {"url": url},
},
],
}
],
)
if __name__ == "__main__":
url = "https://a.storyblok.com/f/47007/2400x2000/bf383abc3c/231031_uk-ireland-in-three-charts_table_v01_b.png"
tables = extract_table(url)
for tbl in tables:
print(tbl.caption, end="\n")
print(tbl.dataframe)
"""
Top 10 grossing apps in October 2023 (Ireland) for Android platforms, listing the rank, app name, and category.
App Name Category
Rank
1 Google One Productivity
2 Disney+ Entertainment
3 TikTok - Videos, Music & LIVE Entertainment
4 Candy Crush Saga Games
5 Tinder: Dating, Chat & Friends Social networking
6 Coin Master Games
7 Roblox Games
8 Bumble - Dating & Make Friends Dating
9 Royal Match Games
10 Spotify: Music and Podcasts Music & Audio
Top 10 grossing apps in October 2023 (Ireland) for iOS platforms, listing the rank, app name, and category.
App Name Category
Rank
1 Tinder: Dating, Chat & Friends Social networking
2 Disney+ Entertainment
3 YouTube: Watch, Listen, Stream Entertainment
4 Audible: Audio Entertainment Entertainment
5 Candy Crush Saga Games
6 TikTok - Videos, Music & LIVE Entertainment
7 Bumble - Dating & Make Friends Dating
8 Roblox Games
9 LinkedIn: Job Search & News Business
10 Duolingo - Language Lessons Education
"""