From ae59ed434ff53a9821cac141e7b3569335ea9867 Mon Sep 17 00:00:00 2001
From: Anmol Jawandha <anmoljawandha6@gmail.com>
Date: Fri, 1 Dec 2023 16:52:37 -0800
Subject: [PATCH] Markdown JSON Mode (#246)

Co-authored-by: Jason Liu <jxnl@users.noreply.github.com>
---
 docs/concepts/patching.md    | 14 ++++++++
 examples/vision/run.py       | 63 ++++++++++++++++++++++++++++++++++++
 instructor/function_calls.py |  8 +++++
 instructor/patch.py          | 43 +++++++++++++++++++-----
 tests/openai/test_modes.py   | 21 ++++++++++++
 5 files changed, 141 insertions(+), 8 deletions(-)
 create mode 100644 examples/vision/run.py

diff --git a/docs/concepts/patching.md b/docs/concepts/patching.md
index 2b7b812..e4f1cd7 100644
--- a/docs/concepts/patching.md
+++ b/docs/concepts/patching.md
@@ -40,6 +40,20 @@ from openai import OpenAI
 client = instructor.patch(OpenAI(), mode=Mode.JSON)
 ```
 
+## Markdown JSON Mode
+
+!!! warning "Experimental"
+
+    This is not recommended, and may not be supported in the future, this is just left to support vision models.
+
+```python
+import instructor
+from instructor import Mode
+from openai import OpenAI
+
+client = instructor.patch(OpenAI(), mode=Mode.MD_JSON)
+
+```
 ### Schema Integration
 
 In JSON Mode, the schema is part of the system message:
diff --git a/examples/vision/run.py b/examples/vision/run.py
new file mode 100644
index 0000000..ce74985
--- /dev/null
+++ b/examples/vision/run.py
@@ -0,0 +1,63 @@
+import instructor
+from openai import OpenAI
+from typing import Iterable
+from pydantic import BaseModel
+import base64
+
+client = instructor.patch(OpenAI(), mode=instructor.function_calls.Mode.MD_JSON)
+
+class Circle(BaseModel):
+    x: int
+    y: int
+    color: str
+
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+
+def draw_circle(image_size, num_circles, path):
+
+    from PIL import Image, ImageDraw
+    import random
+
+    image = Image.new("RGB", image_size, "white")
+
+    draw = ImageDraw.Draw(image)
+    for _ in range(num_circles):
+        # Randomize the circle properties
+        radius = 100#random.randint(10, min(image_size)//5)  # Radius between 10 and 1/5th of the smallest dimension
+        x = random.randint(radius, image_size[0] - radius)
+        y = random.randint(radius, image_size[1] - radius)
+        color = ['red', 'black', 'blue', 'green'][random.randint(0, 3)]
+
+        circle_position = (x - radius, y - radius, x + radius, y + radius)
+        print(f"Generating circle at {x, y} with color {color}")
+        draw.ellipse(circle_position, fill=color, outline="black")
+
+    image.save(path)
+
+img_path = 'circle.jpg'
+draw_circle((1024,1024), 1, img_path)
+base64_image = encode_image(img_path)
+
+response = client.chat.completions.create(
+    model="gpt-4-vision-preview",
+    max_tokens=1800,
+    response_model=Circle,
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": 'find the circle'},
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}"
+                    },
+                },
+            ],
+        }
+    ],
+)
+
+print(f"Found circle with center at x: {response.x}, y: {response.y} and color: {response.color}")
\ No newline at end of file
diff --git a/instructor/function_calls.py b/instructor/function_calls.py
index 8936da9..a16e116 100644
--- a/instructor/function_calls.py
+++ b/instructor/function_calls.py
@@ -1,4 +1,5 @@
 import json
+import re 
 from docstring_parser import parse
 from functools import wraps
 from typing import Any, Callable
@@ -13,6 +14,7 @@ class Mode(enum.Enum):
     FUNCTIONS: str = "function_call"
     TOOLS: str = "tool_call"
     JSON: str = "json_mode"
+    MD_JSON: str = "markdown_json_mode"
 
 
 class openai_function:
@@ -237,6 +239,12 @@ class OpenAISchema(BaseModel):
                 context=validation_context,
                 strict=strict,
             )
+        elif mode == Mode.MD_JSON:
+            return cls.model_validate_json(
+                message.content,
+                context=validation_context,
+                strict=strict,
+            )
         else:
             raise ValueError(f"Invalid patch mode: {mode}")
 
diff --git a/instructor/patch.py b/instructor/patch.py
index 3e364e9..446ac67 100644
--- a/instructor/patch.py
+++ b/instructor/patch.py
@@ -89,13 +89,27 @@ def handle_response_model(
                 "type": "function",
                 "function": {"name": response_model.openai_schema["name"]},
             }
-        elif mode == Mode.JSON:
-            new_kwargs["response_format"] = {"type": "json_object"}
-
-            # check that the first message is a system message
-            # if it is not, add a system message to the beginning
-            message = f"Make sure that your response to any message matches the json_schema below, do not deviate at all: \n{response_model.model_json_schema()['properties']}"
-
+        elif mode == Mode.JSON or mode == Mode.MD_JSON:
+            if mode == Mode.JSON:
+                new_kwargs["response_format"] = {"type": "json_object"}
+                # check that the first message is a system message
+                # if it is not, add a system message to the beginning
+                message = f"""Make sure that your response to any message matches the json_schema below,
+                        do not deviate at all: \n{response_model.model_json_schema()['properties']}
+                        """
+            else:
+                message = f"""
+                    As a genius expert, your task is to understand the content and provide 
+                    the parsed objects in json that match the following json_schema (do not deviate at all and its okay if you cant be exact):\n
+                    {response_model.model_json_schema()['properties']}
+                    """
+                new_kwargs["messages"].append(
+                    {
+                        "role": "assistant",
+                        "content": "```json",
+                    },
+                )
+                new_kwargs["stop"] = "```"
             if new_kwargs["messages"][0]["role"] != "system":
                 new_kwargs["messages"].insert(
                     0,
@@ -110,7 +124,6 @@ def handle_response_model(
                 new_kwargs["messages"][0]["content"] += f"\n\n{message}"
         else:
             raise ValueError(f"Invalid patch mode: {mode}")
-
     return response_model, new_kwargs
 
 
@@ -182,6 +195,13 @@ async def retry_async(
                     "content": f"Recall the function correctly, exceptions found\n{e}",
                 }
             )
+            if mode == Mode.MD_JSON:
+                kwargs["messages"].append(
+                    {
+                        "role": "assistant",
+                        "content": "```json",
+                    },
+                )
             retries += 1
             if retries > max_retries:
                 raise e
@@ -219,6 +239,13 @@ def retry_sync(
                     "content": f"Recall the function correctly, exceptions found\n{e}",
                 }
             )
+            if mode == Mode.MD_JSON:
+                kwargs["messages"].append(
+                    {
+                        "role": "assistant",
+                        "content": "```json",
+                    },
+                )
             retries += 1
             if retries > max_retries:
                 raise e
diff --git a/tests/openai/test_modes.py b/tests/openai/test_modes.py
index 3b6d358..92a1040 100644
--- a/tests/openai/test_modes.py
+++ b/tests/openai/test_modes.py
@@ -62,6 +62,27 @@ def test_json_mode():
     assert user.age == 25
 
 
+
+def test_markdown_json_mode():
+    response = client.chat.completions.create(
+        model="gpt-3.5-turbo-1106",
+        response_format={"type": "json_object"},
+        messages=[
+            {
+                "role": "system",
+                "content": f"Make sure that your response to any message matchs the json_schema below, do not deviate at all: \n{UserExtract.model_json_schema()['properties']}",
+            },
+            {
+                "role": "user",
+                "content": "Extract jason is 25 years old",
+            },
+        ],
+    )
+    user = UserExtract.from_response(response, mode=Mode.MD_JSON)
+    assert user.name.lower() == "jason"
+    assert user.age == 25
+
+
 @pytest.mark.parametrize("mode", [Mode.FUNCTIONS, Mode.JSON, Mode.TOOLS])
 def test_mode(mode):
     client = instructor.patch(OpenAI(), mode=mode)