Files
instructor/examples/vision/run.py
T
Anmol Jawandha ae59ed434f Markdown JSON Mode (#246)
Co-authored-by: Jason Liu <jxnl@users.noreply.github.com>
2023-12-01 19:52:37 -05:00

63 lines
1.9 KiB
Python

import instructor
from openai import OpenAI
from typing import Iterable
from pydantic import BaseModel
import base64
client = instructor.patch(OpenAI(), mode=instructor.function_calls.Mode.MD_JSON)
class Circle(BaseModel):
x: int
y: int
color: str
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def draw_circle(image_size, num_circles, path):
from PIL import Image, ImageDraw
import random
image = Image.new("RGB", image_size, "white")
draw = ImageDraw.Draw(image)
for _ in range(num_circles):
# Randomize the circle properties
radius = 100#random.randint(10, min(image_size)//5) # Radius between 10 and 1/5th of the smallest dimension
x = random.randint(radius, image_size[0] - radius)
y = random.randint(radius, image_size[1] - radius)
color = ['red', 'black', 'blue', 'green'][random.randint(0, 3)]
circle_position = (x - radius, y - radius, x + radius, y + radius)
print(f"Generating circle at {x, y} with color {color}")
draw.ellipse(circle_position, fill=color, outline="black")
image.save(path)
img_path = 'circle.jpg'
draw_circle((1024,1024), 1, img_path)
base64_image = encode_image(img_path)
response = client.chat.completions.create(
model="gpt-4-vision-preview",
max_tokens=1800,
response_model=Circle,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": 'find the circle'},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}
],
)
print(f"Found circle with center at x: {response.x}, y: {response.y} and color: {response.color}")