mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
Add ollama run.py with completion functionality (#352)
Co-authored-by: Jason Liu <jason@jxnl.co>
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
# Running a Local Ollama Model
|
||||
|
||||
## Dependencies
|
||||
|
||||
- ollama
|
||||
- litellm
|
||||
- setuptools
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Install Ollama by visiting the website [https://ollama.ai/download](https://ollama.ai/download) and selecting the appropriate operating system.
|
||||
|
||||
2. Once installed, open the Ollama app, which should be running in your taskbar.
|
||||
|
||||
3. Open the terminal and download a model. For example, to download the llama2 model, run the command:
|
||||
|
||||
```bash
|
||||
ollama run llama2
|
||||
```
|
||||
|
||||
4. In your terminal, start your virtual environment and install the 'litellm[proxy]' package using poetry you can run the command:
|
||||
|
||||
```bash
|
||||
poetry add 'litellm[proxy]'
|
||||
```
|
||||
|
||||
5. Next, install setuptools using the command:
|
||||
|
||||
```bash
|
||||
poetry add setuptools
|
||||
```
|
||||
|
||||
6. Lastly, start the litellm server with the command: `litellm --model ollama/llama2`. This will expose the port on your local machine.
|
||||
|
||||
```bash
|
||||
litellm --model ollama/llama2
|
||||
```
|
||||
|
||||
7. Now you can run the completion!
|
||||
@@ -0,0 +1,35 @@
|
||||
from litellm import completion, provider_list
|
||||
from pydantic import BaseModel
|
||||
|
||||
import instructor
|
||||
from instructor.patch import wrap_chatcompletion
|
||||
|
||||
completion = wrap_chatcompletion(func=completion, mode=instructor.Mode.MD_JSON)
|
||||
|
||||
|
||||
class UserExtract(BaseModel):
|
||||
name: str
|
||||
age: int
|
||||
|
||||
|
||||
user = completion(
|
||||
model="ollama/llama2",
|
||||
response_model=UserExtract,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a JSON extractor. Please extract the following JSON, No Talk. You must return JSON right after the Codeblock",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Extract `My name is Jason and I am 25 years old`",
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
print(user.model_dump_json(indent=2))
|
||||
assert isinstance(user, UserExtract), "Should be instance of UserExtract"
|
||||
assert user.name.lower() == "jason"
|
||||
assert user.age == 25
|
||||
assert hasattr(user, "_raw_response")
|
||||
assert any(provider in user._raw_response.model for provider in provider_list)
|
||||
+1
-1
@@ -119,7 +119,7 @@ def handle_response_model(
|
||||
new_kwargs["messages"].append(
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "```json",
|
||||
"content": "Here is the perfectly correctly formatted JSON\n```json",
|
||||
},
|
||||
)
|
||||
new_kwargs["stop"] = "```"
|
||||
|
||||
Reference in New Issue
Block a user