add ollama

2026-06-05 22:50:18 +00:00 · 2024-02-08 16:33:12 -05:00
parent 571603cbfb
commit 727deeb158
5 changed files with 183 additions and 147 deletions
@@ -0,0 +1,90 @@
+---
+draft: False
+date: 2024-02-08
+slug: patching
+tags:
+  - patching
+  - open source
+authors:
+  - jxnl
+---
+
+# Structured Outputs with Ollama
+
+Open-source LLMS are gaining popularity, and the release of Ollama's OpenAI compatibility later it has made it possible to obtain structured outputs using JSON schema.
+
+By the end of this blog post, you will learn how to effectively utilize instructor with ollama. But before we proceed, let's first explore the concept of patching.
+
+<!-- more -->
+
+## Patching
+
+Instructor's patch enhances a openai api it with the following features:
+
+- `response_model` in `create` calls that returns a pydantic model
+- `max_retries` in `create` calls that retries the call if it fails by using a backoff strategy
+
+!!! note "Learn More"
+
+    To learn more, please refer to the [docs](../../index.md). To understand the benefits of using Pydantic with Instructor, visit the tips and tricks section of the [why use Pydantic](../../why.md) page.
+
+## Ollama
+
+Start by downloading [Ollama](https://ollama.ai/download), and then pull a model such as Llama 2 or Mistral.
+
+!!! tip "Make sure you update your `ollama` to the latest version!"
+
+```
+ollama pull llama2
+```
+
+```python
+from openai import OpenAI
+from pydantic import BaseModel, Field
+from typing import List
+
+import instructor
+
+
+class Character(BaseModel):
+    name: str
+    age: int
+    fact: List[str] = Field(..., description="A list of facts about the character")
+
+
+# enables `response_model` in create call
+client = instructor.patch(
+    OpenAI(
+        base_url="http://localhost:11434/v1",
+        api_key="ollama",  # required, but unused
+    ),
+    mode=instructor.Mode.JSON,
+)
+
+resp = client.chat.completions.create(
+    model="llama2",
+    messages=[
+        {
+            "role": "user",
+            "content": "Tell me about the Harry Potter",
+        }
+    ],
+    response_model=Character,
+)
+print(resp.model_dump_json(indent=2))
+"""
+{
+  "name": "Harry James Potter",
+  "age": 37,
+  "fact": [
+    "He is the chosen one.",
+    "He has a lightning-shaped scar on his forehead.",
+    "He is the son of James and Lily Potter.",
+    "He attended Hogwarts School of Witchcraft and Wizardry.",
+    "He is a skilled wizard and sorcerer.",
+    "He fought against Lord Voldemort and his followers.",
+    "He has a pet owl named Snowy."
+  ]
+}
+"""
+```
@@ -1,59 +1,77 @@
-# Running a Local Ollama Model
+# Structured Outputs with Ollama

-Here are some instructions on using Ollamo and Litellm.
+Open-source LLMS are gaining popularity, and the release of Ollama's OpenAI compatibility later it has made it possible to obtain structured outputs using JSON schema.

-## Instructions
+By the end of this blog post, you will learn how to effectively utilize instructor with ollama. But before we proceed, let's first explore the concept of patching.

-1. Install Ollama by visiting the website [https://ollama.ai/download](https://ollama.ai/download) and selecting the appropriate operating system.
+## Patching

-2. Once installed, open the Ollama app, which should be running in your taskbar.
+Instructor's patch enhances a openai api it with the following features:

-3. Open the terminal and download a model. For example, to download the llama2 model, run the command:
+- `response_model` in `create` calls that returns a pydantic model
+- `max_retries` in `create` calls that retries the call if it fails by using a backoff strategy
+
+!!! note "Learn More"
+
+    To learn more, please refer to the [docs](../../index.md). To understand the benefits of using Pydantic with Instructor, visit the tips and tricks section of the [why use Pydantic](../../why.md) page.
+
+## Ollama
+
+Start by downloading [Ollama](https://ollama.ai/download), and then pull a model such as Llama 2 or Mistral.
+
+!!! tip "Make sure you update your `ollama` to the latest version!"

-```bash
-ollama run llama2
 ```
-
-4. In your terminal, start your virtual environment and install the 'litellm[proxy]' package using poetry you can run the command:
-
-```bash
-pip install 'litellm[proxy]'
+ollama pull llama2
 ```

-Then you should be able to patch using the wrap completion API.
-Since it's just going to use regular prompting and not... Function Calling. You'll need to have a lot more instructions in the system message to ask it to output JSON.
-
 ```python
-from litellm import completion
-from pydantic import BaseModel
+from openai import OpenAI
+from pydantic import BaseModel, Field
+from typing import List

 import instructor
-from instructor.patch import wrap_chatcompletion
-
-completion = wrap_chatcompletion(completion, mode=instructor.Mode.MD_JSON)


-class UserExtract(BaseModel):
+class Character(BaseModel):
    name: str
    age: int
+    fact: List[str] = Field(..., description="A list of facts about the character")


-user = completion(
-    model="ollama/llama2",
-    response_model=UserExtract,
-    messages=[
-        {
-            "role": "system",
-            "content": "You are a JSON extractor. Please extract the following JSON, No Talk.",
-        },
-        {
-            "role": "user",
-            "content": "Extract `My name is Jason and I am 25 years old`",
-        },
-    ],
+# enables `response_model` in create call
+client = instructor.patch(
+    OpenAI(
+        base_url="http://localhost:11434/v1",
+        api_key="ollama",  # required, but unused
+    ),
+    mode=instructor.Mode.JSON,
 )

-assert isinstance(user, UserExtract), "Should be instance of UserExtract"
-assert user.name.lower() == "jason"
-assert user.age == 25
+resp = client.chat.completions.create(
+    model="llama2",
+    messages=[
+        {
+            "role": "user",
+            "content": "Tell me about the Harry Potter",
+        }
+    ],
+    response_model=Character,
+)
+print(resp.model_dump_json(indent=2))
+"""
+{
+  "name": "Harry James Potter",
+  "age": 37,
+  "fact": [
+    "He is the chosen one.",
+    "He has a lightning-shaped scar on his forehead.",
+    "He is the son of James and Lily Potter.",
+    "He attended Hogwarts School of Witchcraft and Wizardry.",
+    "He is a skilled wizard and sorcerer.",
+    "He fought against Lord Voldemort and his followers.",
+    "He has a pet owl named Snowy."
+  ]
+}
+"""
 ```
@@ -1,39 +0,0 @@
-# Running a Local Ollama Model
-
-## Dependencies
-
- ollama
- litellm
- setuptools
-
-## Instructions
-
-1. Install Ollama by visiting the website [https://ollama.ai/download](https://ollama.ai/download) and selecting the appropriate operating system.
-
-2. Once installed, open the Ollama app, which should be running in your taskbar.
-
-3. Open the terminal and download a model. For example, to download the llama2 model, run the command:
-
-```bash
-ollama run llama2
-```
-
-4. In your terminal, start your virtual environment and install the 'litellm[proxy]' package using poetry you can run the command:
-
-```bash
-poetry add 'litellm[proxy]'
-```
-
-5. Next, install setuptools using the command:
-
-```bash
-poetry add setuptools
-```
-
-6. Lastly, start the litellm server with the command: `litellm --model ollama/llama2`. This will expose the port on your local machine.
-
-```bash
-litellm --model ollama/llama2
-```
-
-7. Now you can run the completion!
@@ -1,35 +0,0 @@
-from litellm import completion, provider_list
-from pydantic import BaseModel
-
-import instructor
-from instructor.patch import wrap_chatcompletion
-
-completion = wrap_chatcompletion(func=completion, mode=instructor.Mode.MD_JSON)
-
-
-class UserExtract(BaseModel):
-    name: str
-    age: int
-
-
-user = completion(
-    model="ollama/llama2",
-    response_model=UserExtract,
-    messages=[
-        {
-            "role": "system",
-            "content": "You are a JSON extractor. Please extract the following JSON, No Talk. You must return JSON right after the Codeblock",
-        },
-        {
-            "role": "user",
-            "content": "Extract `My name is Jason and I am 25 years old`",
-        },
-    ],
-)
-
-print(user.model_dump_json(indent=2))
-assert isinstance(user, UserExtract), "Should be instance of UserExtract"
-assert user.name.lower() == "jason"
-assert user.age == 25
-assert hasattr(user, "_raw_response")
-assert any(provider in user._raw_response.model for provider in provider_list)
@@ -1,46 +1,48 @@
-import instructor
-import openai
-from pydantic import BaseModel
+from openai import OpenAI
+from pydantic import BaseModel, Field
 from typing import List

-client = instructor.patch(openai.Client())
+import instructor


-class Analysis(BaseModel):
-    pros: List[str]
-    cons: List[str]
+class Character(BaseModel):
+    name: str
+    age: int
+    fact: List[str] = Field(..., description="A list of facts about the character")


-analysis = client.chat.completions.create(
-    model="gpt-3.5-turbo",
-    response_model=Analysis,
-    messages=[
-        {
-            "role": "system",
-            "content": "You are a perfect entity extraction system",
-        },
-        {
-            "role": "user",
-            "content": "Give me a pro-con analysis of joining South Park Commons. ",
-        },
-    ],
+# enables `response_model` in create call
+client = instructor.patch(
+    OpenAI(
+        base_url="http://localhost:11434/v1",
+        api_key="ollama",  # required, but unused
+    ),
+    mode=instructor.Mode.JSON,
 )

-print(analysis.model_dump_json(indent=2))
-"""{
-  "pros": [
-    "Access to a supportive community of like-minded individuals",
-    "Opportunities for collaboration and networking",
-    "Access to shared resources and knowledge",
-    "Exposure to diverse perspectives and ideas",
-    "Potential for personal and professional growth"
-  ],
-  "cons": [
-    "Membership fees and financial commitment",
-    "Limited autonomy and flexibility",
-    "Possible conflicts or disagreements within the community",
-    "Adherence to community rules and guidelines",
-    "Time commitment for participation in community activities"
+resp = client.chat.completions.create(
+    model="llama2",
+    messages=[
+        {
+            "role": "user",
+            "content": "Tell me about the Harry Potter",
+        }
+    ],
+    response_model=Character,
+)
+print(resp.model_dump_json(indent=2))
+""" 
+{
+  "name": "Harry James Potter",
+  "age": 37,
+  "fact": [
+    "He is the chosen one.",
+    "He has a lightning-shaped scar on his forehead.",
+    "He is the son of James and Lily Potter.",
+    "He attended Hogwarts School of Witchcraft and Wizardry.",
+    "He is a skilled wizard and sorcerer.",
+    "He fought against Lord Voldemort and his followers.",
+    "He has a pet owl named Snowy."
  ]
 }
 """