From 9cf9e5365903ed9511dcdd07a132632224e7949c Mon Sep 17 00:00:00 2001 From: Anmol Jawandha Date: Sat, 16 Dec 2023 19:00:28 -0800 Subject: [PATCH] Third party models (#284) Co-authored-by: Jason Liu Co-authored-by: Jason Liu --- docs/blog/index.md | 3 +- docs/blog/posts/anyscale.md | 75 +++++++++++++++++++++++++++++++++ docs/blog/posts/introduction.md | 12 ++++-- docs/cli/finetune.md | 68 +++++++++++++++--------------- docs/cli/index.md | 1 - docs/cli/usage.md | 7 ++- instructor/function_calls.py | 1 - 7 files changed, 122 insertions(+), 45 deletions(-) create mode 100644 docs/blog/posts/anyscale.md diff --git a/docs/blog/index.md b/docs/blog/index.md index ad55ac4..fd386c5 100644 --- a/docs/blog/index.md +++ b/docs/blog/index.md @@ -9,12 +9,13 @@ The goal of the blog is to capture some content that does not neatly fit within 3. [What are the basics of Guardrails and Validation in AI models?](posts/validation-part1.md) 4. [How does one validate citations in AI-generated content?](posts/citations.md) 5. [What are the methods and benefits of fine-tuning and distillation in AI models?](posts/distilation-part1.md) +6. [How can I use Anyscale with Instructor?](posts/anyscale.md) ## Learning Python - [How can I effectively cache my functions in Python?](posts/caching.md) - [What are the fundamentals of batch processing with async in Python?](posts/learn-async.md) -- [How can I stream models to improve latency?](posts/generators.md) +- [How can I stream models to improve latency?](posts/generator.md) ## Talks diff --git a/docs/blog/posts/anyscale.md b/docs/blog/posts/anyscale.md new file mode 100644 index 0000000..d0c5d03 --- /dev/null +++ b/docs/blog/posts/anyscale.md @@ -0,0 +1,75 @@ +--- +draft: False +date: 2023-12-15 +slug: patching +tags: + - patching + - open source +authors: + - anmol + - jxnl +--- + +# Structured Outputs with Anyscale + +Open-source LLMS are gaining popularity, and the release of Anyscale's Mistral model has made it possible to obtain structured outputs using JSON schema at any scale. Instead of relying on a model's default output mode, you can utilize JSON schema to obtain structured outputs. This approach is a time-saving alternative to extensive prompt engineering. + +By the end of this blog post, you will learn how to effectively utilize the instructor at any scale. But before we proceed, let's first explore the concept of patching. + +## Patching + +Instructor's patch enhances a openai api it with the following features: + +- `response_model` in `create` calls that returns a pydantic model +- `max_retries` in `create` calls that retries the call if it fails by using a backoff strategy + +!!! note "Learn More" + + To learn more check out the [docs](../../index.md) + +## Anyscale + +The good news is that Anyscale employs the same OpenAI client, and its models support some of these output modes too! + +Let's explore one of the models available in Anyscale's extensive collection! + +```python +from openai import OpenAI +from pydantic import BaseModel + +import instructor + + +class UserDetails(BaseModel): + name: str + age: int + +# enables `response_model` in create call +client = instructor.patch( + OpenAI( + base_url="https://api.endpoints.anyscale.com/v1", + api_key=" + ), + # This uses Anyscale's json schema output mode + mode=instructor.Mode.JSON_SCHEMA +) + +resp = client.chat.completions.create( + model="mistralai/Mixtral-8x7B-Instruct-v0.1", + messages=[ + { + "role": "system", + "content": "You are a world class extractor" + }, + { + "role": "user", + "content": 'Extract the following entities: "Jason is 20"' + }, + ], + response_model=UserDetails, +) +print(resp) +>>> name='Jason' age=20 +``` + +You can find more information about Anyscale's output mode support [here](https://docs.endpoints.anyscale.com/). diff --git a/docs/blog/posts/introduction.md b/docs/blog/posts/introduction.md index 9b15eb6..3defd39 100644 --- a/docs/blog/posts/introduction.md +++ b/docs/blog/posts/introduction.md @@ -1,5 +1,5 @@ --- -draft: False +draft: False date: 2023-09-11 tags: - Introduction @@ -38,7 +38,7 @@ client = instructor.patch(OpenAI()) class UserDetail(pydantic.BaseModel): name: str age: int - + def introduce(self): return f"Hello I'm {self.name} and I'm {self.age} years old" @@ -77,6 +77,7 @@ class QuestionAnswerNoEvil(BaseModel): Pydantic allows for modular output schemas. This leads to more organized code. ### Composition of Schemas + ```python class UserDetails(BaseModel): name: str @@ -87,6 +88,7 @@ class UserWithAddress(UserDetails): ``` ### Defining Relationships + ```python class UserDetail(BaseModel): id: int @@ -99,6 +101,7 @@ class UserRelationships(BaseModel): ``` ### Using Enums + ```python from enum import Enum, auto @@ -115,6 +118,7 @@ class UserDetail(BaseModel): ``` ### Flexible Schemas + ```python from typing import List @@ -129,6 +133,7 @@ class UserDetail(BaseModel): ``` ### Chain of Thought + ```python class TimeRange(BaseModel): chain_of_thought: str @@ -168,6 +173,7 @@ def extract_user(str) -> UserDetails: ``` ### Response Modeling + ```python class MaybeUser(BaseModel): result: Optional[UserDetail] @@ -179,4 +185,4 @@ class MaybeUser(BaseModel): Instructor, with Pydantic, simplifies interaction with language models. It is usable for both experienced and new developers. -If you enjoy the content or want to try out `instructor` please check out the [github](https://github.com/jxnl/instructor) and give us a star! \ No newline at end of file +If you enjoy the content or want to try out `instructor` please check out the [github](https://github.com/jxnl/instructor) and give us a star! diff --git a/docs/cli/finetune.md b/docs/cli/finetune.md index c5e510c..48a13d6 100644 --- a/docs/cli/finetune.md +++ b/docs/cli/finetune.md @@ -1,8 +1,9 @@ # Using the Command Line Interface + The instructor CLI provides functionalities for managing fine-tuning jobs on OpenAI. !!! warning "Incomplete API" - The CLI is still under development and does not yet support all features of the API. If you would like to use a feature that is not yet supported, please consider using the contributing to our library [jxnl/instructor](https://www.github.com/jxnl/instructor) instead. +The CLI is still under development and does not yet support all features of the API. If you would like to use a feature that is not yet supported, please consider using the contributing to our library [jxnl/instructor](https://www.github.com/jxnl/instructor) instead. !!! note "Low hanging fruit" @@ -15,12 +16,12 @@ The instructor CLI provides functionalities for managing fine-tuning jobs on Ope ### View Jobs Options ```sh -$ instructor jobs --help - - Usage: instructor jobs [OPTIONS] COMMAND [ARGS]... - - Monitor and create fine tuning jobs - +$ instructor jobs --help + + Usage: instructor jobs [OPTIONS] COMMAND [ARGS]... + + Monitor and create fine tuning jobs + ╭─ Options ───────────────────────────────────────────────────────────────────────────────╮ │ --help Display the help message. │ ╰─────────────────────────────────────────────────────────────────────────────────────────╯ @@ -39,11 +40,11 @@ The create-from-file command uploads and trains a model in a single step. ```sh ❯ instructor jobs create-from-file --help - -Usage: instructor jobs create-from-file [OPTIONS] FILE - - Create a fine-tuning job from a file. - + +Usage: instructor jobs create-from-file [OPTIONS] FILE + + Create a fine-tuning job from a file. + ╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────╮ │ * file TEXT Path to the file for fine-tuning [default: None] [required] │ ╰───────────────────────────────────────────────────────────────────────────────────────────────────╯ @@ -71,11 +72,11 @@ The create-from-id command uses an uploaded file and trains a model ```sh ❯ instructor jobs create-from-id --help - - Usage: instructor jobs create-from-id [OPTIONS] ID - - Create a fine-tuning job from an existing ID. - + + Usage: instructor jobs create-from-id [OPTIONS] ID + + Create a fine-tuning job from an existing ID. + ╭─ Arguments ───────────────────────────────────────────────────────────────────────────╮ │ * id TEXT ID of the existing fine-tuning job [default: None] [required] │ ╰───────────────────────────────────────────────────────────────────────────────────────╯ @@ -95,21 +96,20 @@ The create-from-id command uses an uploaded file and trains a model ```sh $ instructor files upload transformed_data.jsonl -$ instructor files upload validation_data.jsonl +$ instructor files upload validation_data.jsonl $ instructor files list ... $ instructor jobs create_from_id --validation_file --n_epochs 3 --batch_size 16 --learning_rate_multiplier 0.5 ``` - ### Viewing Files and Jobs #### Viewing Jobs ```sh -$ instructor jobs list +$ instructor jobs list -OpenAI Fine Tuning Job Monitoring +OpenAI Fine Tuning Job Monitoring ┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ ┃ ┃ ┃ Completion ┃ ┃ ┃ ┃ ┃ ┃ Job ID ┃ Status ┃ Creation Time ┃ Time ┃ Model Name ┃ File ID ┃ Epochs ┃ Base Model ┃ @@ -126,25 +126,23 @@ OpenAI Fine Tuning Job Monitoring Automatically refreshes every 5 seconds, press Ctrl+C to exit ``` - #### Viewing Files ```sh -$ instructor files list +$ instructor files list -OpenAI Files -┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓ -┃ File ID ┃ Size (bytes) ┃ Creation Time ┃ Filename ┃ Purpose ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩ -│ file-0lw2BSNRUlXZXRRu2beCCWjl │ 369523 │ 2023-08-23 23:31:57 │ file │ fine-tune │ -│ file-IHaUXcMEykmFUp1kt2puCDEq │ 369523 │ 2023-08-23 23:09:35 │ file │ fine-tune │ -│ file-ja9vRBf0FydEOTolaa3BMqES │ 369523 │ 2023-08-23 22:42:29 │ file │ fine-tune │ -│ file-F7lJg6Z47CREvmx4kyvyZ6Sn │ 369523 │ 2023-08-23 22:42:03 │ file │ fine-tune │ -│ file-YUxqZPyJRl5GJCUTw3cNmA46 │ 369523 │ 2023-08-23 22:29:10 │ file │ fine-tune │ -└───────────────────────────────┴──────────────┴─────────────────────┴──────────┴───────────┘ +OpenAI Files +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ File ID ┃ Size (bytes) ┃ Creation Time ┃ Filename ┃ Purpose ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩ +│ file-0lw2BSNRUlXZXRRu2beCCWjl │ 369523 │ 2023-08-23 23:31:57 │ file │ fine-tune │ +│ file-IHaUXcMEykmFUp1kt2puCDEq │ 369523 │ 2023-08-23 23:09:35 │ file │ fine-tune │ +│ file-ja9vRBf0FydEOTolaa3BMqES │ 369523 │ 2023-08-23 22:42:29 │ file │ fine-tune │ +│ file-F7lJg6Z47CREvmx4kyvyZ6Sn │ 369523 │ 2023-08-23 22:42:03 │ file │ fine-tune │ +│ file-YUxqZPyJRl5GJCUTw3cNmA46 │ 369523 │ 2023-08-23 22:29:10 │ file │ fine-tune │ +└───────────────────────────────┴──────────────┴─────────────────────┴──────────┴───────────┘ ``` -# Contributions +# Contributions We aim to provide a light wrapper around the API rather than offering a complete CLI. Contributions are welcome! Please feel free to make an issue at [jxnl/instructor/issues](https://github.com/jxnl/instructor/issues) or submit a pull request. - diff --git a/docs/cli/index.md b/docs/cli/index.md index da6ca90..0757042 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -25,7 +25,6 @@ pip install instructor ## Features - **API Usage Monitoring**: Keep tabs on your API usage right from the terminal. Track token counts, total requests, and even calculate the costs. To learn more, consult the [Usage Guide](usage.md). - - **Model Fine-Tuning**: Optimize your models to meet your specific requirements using our fine-tuning app. For more details, check out the [Fine-Tuning Guide](finetune.md). --- diff --git a/docs/cli/usage.md b/docs/cli/usage.md index 5d67254..6c91875 100644 --- a/docs/cli/usage.md +++ b/docs/cli/usage.md @@ -9,9 +9,9 @@ The OpenAI API Usage CLI tool provides functionalities for monitoring your OpenA ```sh $ instructor usage --help - Usage: instructor usage [OPTIONS] COMMAND [ARGS]... + Usage: instructor usage [OPTIONS] COMMAND [ARGS]... - Check OpenAI API usage data + Check OpenAI API usage data ╭─ Options ───────────────────────────────────────────────────────╮ │ --help Show this message and exit. │ @@ -51,7 +51,6 @@ To display the API usage for today, simply run: $ instructor usage list ``` -# Contributions +# Contributions We aim to provide a light wrapper around the API rather than offering a complete CLI. Contributions are welcome! Please feel free to make an issue at [jxnl/instructor/issues](https://github.com/jxnl/instructor/issues) or submit a pull request. - diff --git a/instructor/function_calls.py b/instructor/function_calls.py index 4fc6321..1bb87dd 100644 --- a/instructor/function_calls.py +++ b/instructor/function_calls.py @@ -174,7 +174,6 @@ class OpenAISchema(BaseModel): Parameters: completion (openai.ChatCompletion): The response from an openai chat completion - throw_error (bool): Whether to throw an error if the function call is not detected validation_context (dict): The validation context to use for validating the response strict (bool): Whether to use strict json parsing mode (Mode): The openai completion mode