Implement CLI for finetuning (#85)

* add cli * remove hint * fix * updat docs * lock
2026-06-05 22:50:18 +00:00 · 2023-08-23 23:53:22 -07:00
parent 964c17ca1a
commit 64e7f51601
9 changed files with 1275 additions and 589 deletions
@@ -123,37 +123,37 @@ In this updated schema, we use the `Field` class from `pydantic` to add descript
 !!! note "Code, schema, and prompt"
     We can run `openai_schema` to see exactly what the API will see, notice how the docstrings, attributes, types, and field descriptions are now part of the schema. This describes on this library's core philosophies.

-    ```python hl_lines="2 3"
-    class UserDetails(OpenAISchema):
-        "Correctly extracted user information"
-        name: str = Field(..., description="User's full name")
-        age: int
+```python hl_lines="2 3"
+class UserDetails(OpenAISchema):
+    "Correctly extracted user information"
+    name: str = Field(..., description="User's full name")
+    age: int

-    UserDetails.openai_schema
-    ```
+UserDetails.openai_schema
+```

-    ```json hl_lines="3 8"
-    {
-    "name": "UserDetails",
-    "description": "Correctly extracted user information",
-    "parameters": {
-        "type": "object",
-        "properties": {
-        "name": {
-            "description": "User's full name",
-            "type": "string"
-        },
-        "age": {
-            "type": "integer"
-        }
-        },
-        "required": [
-        "age",
-        "name"
-        ]
+```json hl_lines="3 8"
+{
+"name": "UserDetails",
+"description": "Correctly extracted user information",
+"parameters": {
+    "type": "object",
+    "properties": {
+    "name": {
+        "description": "User's full name",
+        "type": "string"
+    },
+    "age": {
+        "type": "integer"
    }
-    }
-    ```
+    },
+    "required": [
+    "age",
+    "name"
+    ]
+}
+}
+```

 ### Section 3: Calling the ChatCompletion

@@ -0,0 +1,89 @@
+# Using the Command Line Interface
+The instructor CLI provides functionalities for managing fine-tuning jobs on OpenAI.
+
+## Creating a Fine-Tuning Job
+
+### View Jobs Options
+
+```sh
+$ instructor jobs --help          
+
+Usage: instructor jobs [OPTIONS] COMMAND [ARGS]...
+
+  Monitor and create fine tuning jobs
+
+Options:
+  --help  Show this message and exit.
+
+Commands:
+  cancel            Cancel a fine-tuning job.
+  create-from-file  Create a fine-tuning job from a file.
+  create-from-id    Create a fine-tuning job from an existing ID.
+  list              Monitor the status of the most recent fine-tuning jobs.
+```
+
+### Create from File
+
+The create-from-file command uploads and trains a model in a single step:
+
+```sh
+$ instructor jobs create-from-file transformed_data.jsonl 
+```
+
+### Create from ID
+
+The create-from-id command uses an uploaded file and trains a model
+
+```sh
+$ instructor files upload transformed_data.jsonl 
+$ instructor files list
+...
+$ instructor jobs create-from-file <file_id>
+```
+
+
+### Viewing Files and Jobs
+
+#### Viewing Jobs
+
+```sh
+$ instructor jobs list 
+
+OpenAI Fine Tuning Job Monitoring                                                
+┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
+┃                ┃              ┃                ┃     Completion ┃                 ┃                ┃        ┃                 ┃
+┃ Job ID         ┃ Status       ┃  Creation Time ┃           Time ┃ Model Name      ┃ File ID        ┃ Epochs ┃ Base Model      ┃
+┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
+│ ftjob-PWo6uwk… │ 🚫 cancelled │     2023-08-23 │            N/A │                 │ file-F7lJg6Z4… │ 3      │ gpt-3.5-turbo-… │
+│                │              │       23:10:54 │                │                 │                │        │                 │
+│ ftjob-1whjva8… │ 🚫 cancelled │     2023-08-23 │            N/A │                 │ file-F7lJg6Z4… │ 3      │ gpt-3.5-turbo-… │
+│                │              │       22:47:05 │                │                 │                │        │                 │
+│ ftjob-wGoBDld… │ 🚫 cancelled │     2023-08-23 │            N/A │                 │ file-F7lJg6Z4… │ 3      │ gpt-3.5-turbo-… │
+│                │              │       22:44:12 │                │                 │                │        │                 │
+│ ftjob-yd5aRTc… │ ✅ succeeded │     2023-08-23 │     2023-08-23 │ ft:gpt-3.5-tur… │ file-IQxAUDqX… │ 3      │ gpt-3.5-turbo-… │
+│                │              │       14:26:03 │       15:02:29 │                 │                │        │                 │
+└────────────────┴──────────────┴────────────────┴────────────────┴─────────────────┴────────────────┴────────┴─────────────────┘
+                                    Automatically refreshes every 5 seconds, press Ctrl+C to exit
+```
+
+
+#### Viewing Files
+
+```sh
+$ instructor files list 
+
+OpenAI Files                                                      
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓                         
+┃ File ID                       ┃ Size (bytes) ┃ Creation Time       ┃ Filename ┃ Purpose   ┃                         
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩                         
+│ file-0lw2BSNRUlXZXRRu2beCCWjl │       369523 │ 2023-08-23 23:31:57 │ file     │ fine-tune │                         
+│ file-IHaUXcMEykmFUp1kt2puCDEq │       369523 │ 2023-08-23 23:09:35 │ file     │ fine-tune │                         
+│ file-ja9vRBf0FydEOTolaa3BMqES │       369523 │ 2023-08-23 22:42:29 │ file     │ fine-tune │                         
+│ file-F7lJg6Z47CREvmx4kyvyZ6Sn │       369523 │ 2023-08-23 22:42:03 │ file     │ fine-tune │                         
+│ file-YUxqZPyJRl5GJCUTw3cNmA46 │       369523 │ 2023-08-23 22:29:10 │ file     │ fine-tune │                         
+└───────────────────────────────┴──────────────┴─────────────────────┴──────────┴───────────┘   
+```
+
+
+## Conclusion
+The instructor CLI offers an intuitive interface for managing OpenAI's fine-tuning jobs and related files. By utilizing simple commands, you can create, monitor, and manage your fine-tuning tasks with ease. Feel free to explore further options and parameters by using the --help flag with any command.
@@ -0,0 +1,11 @@
+import typer
+import instructor.cli.jobs as jobs
+import instructor.cli.files as files
+
+app = typer.Typer(
+    name="instructor-ft",
+    help="A CLI for fine-tuning OpenAI's models",
+)
+
+app.add_typer(jobs.app, name="jobs", help="Monitor and create fine tuning jobs")
+app.add_typer(files.app, name="files", help="Manage files on OpenAI's servers")
@@ -0,0 +1,123 @@
+from typing import List
+from typing_extensions import Annotated
+from rich.live import Live
+from rich.table import Table
+from rich.spinner import Spinner
+from rich.console import Console
+
+from datetime import datetime
+import openai
+import typer
+import time
+
+app = typer.Typer()
+console = Console()
+
+
+# Sample response data
+def generate_file_table(files: List[openai.File]) -> Table:
+    table = Table(
+        title="OpenAI Files",
+    )
+    table.add_column("File ID", style="dim")
+    table.add_column("Size (bytes)", justify="right")
+    table.add_column("Creation Time")
+    table.add_column("Filename")
+    table.add_column("Purpose")
+
+    for file in files:
+        table.add_row(
+            file["id"],
+            str(file["bytes"]),
+            str(datetime.fromtimestamp(file["created_at"])),
+            file["filename"],
+            file["purpose"],
+        )
+
+    return table
+
+
+def get_files(limit: int = 5) -> List[openai.File]:
+    files = openai.File.list(limit=limit)["data"]  # type: ignore
+    files = sorted(files, key=lambda x: x["created_at"], reverse=True)
+    return files[:limit]
+
+
+def get_file_status(file_id: str) -> str:
+    response = openai.File.retrieve(file_id)
+    return response["status"]
+
+
+@app.command(
+    help="Upload a file to OpenAI's servers, will monitor the upload status until it is processed",
+)
+def upload(
+    filepath: str = typer.Argument(..., help="Path to the file to upload"),
+    purpose: str = typer.Option("fine-tune", help="Purpose of the file"),
+    poll: int = typer.Option(5, help="Polling interval in seconds"),
+):
+    with open(filepath, "rb") as file:
+        response = openai.File.create(file=file, purpose=purpose)
+    file_id = response["id"]
+    with console.status(f"Monitoring upload: {file_id}...") as status:
+        status.spinner_style = "dots"
+        while True:
+            file_status = get_file_status(file_id)
+            if file_status == "processed":
+                console.log(f"[bold green]File {file_id} uploaded successfully!")
+                break
+            time.sleep(poll)
+
+
+@app.command(
+    help="Download a file from OpenAI's servers",
+)
+def download(
+    file_id: str = typer.Argument(..., help="ID of the file to download"),
+    output: str = typer.Argument(..., help="Output path for the downloaded file"),
+):
+    with console.status(
+        f"[bold green]Downloading file {file_id}...", spinner="dots"
+    ) as status:
+        content = openai.File.download(file_id)
+        with open(output, "wb") as file:
+            file.write(content)
+        console.log(f"[bold green]File {file_id} downloaded successfully!")
+
+
+@app.command(
+    help="Delete a file from OpenAI's servers",
+)
+def delete(file_id: str = typer.Argument(..., help="ID of the file to delete")):
+    with console.status(
+        f"[bold red]Deleting file {file_id}...", spinner="dots"
+    ) as status:
+        try:
+            openai.File.delete(file_id)
+            console.log(f"[bold red]File {file_id} deleted successfully!")
+        except Exception as e:
+            console.log(f"[bold red]Error deleting file {file_id}: {e}")
+            return
+
+
+@app.command(
+    help="Monitor the status of a file on OpenAI's servers",
+)
+def status(
+    file_id: str = typer.Argument(..., help="ID of the file to check the status of")
+):
+    with console.status(f"Monitoring status of file {file_id}...") as status:
+        while True:
+            file_status = get_file_status(file_id)
+            status.update(f"File status: {file_status}")
+            if file_status in ["pending", "processed"]:
+                break
+            time.sleep(5)
+
+
+@app.command(
+    help="List the files on OpenAI's servers",
+)
+def list(limit: int = typer.Option(5, help="Limit the number of files to list")):
+    files = get_files(limit=limit)
+    console.log(generate_file_table(files))
@@ -0,0 +1,155 @@
+from typing import List
+import openai
+import typer
+import time
+
+from rich.live import Live
+from rich.table import Table
+from rich.console import Console
+from datetime import datetime
+
+app = typer.Typer()
+console = Console()
+
+
+def generate_table(jobs):
+    # Sorting the jobs by creation time
+    jobs = sorted(jobs, key=lambda x: x["created_at"], reverse=True)
+
+    table = Table(
+        title="OpenAI Fine Tuning Job Monitoring",
+        caption="Automatically refreshes every 5 seconds, press Ctrl+C to exit",
+    )
+
+    table.add_column("Job ID", style="dim")
+    table.add_column("Status")
+    table.add_column("Creation Time", justify="right")
+    table.add_column("Completion Time", justify="right")
+    table.add_column("Model Name")
+    table.add_column("File ID")
+    table.add_column("Epochs")
+    table.add_column("Base Model")
+
+    for job in jobs:
+        status_emoji = {
+            "running": "⏳",
+            "succeeded": "✅",
+            "failed": "❌",
+            "cancelled": "🚫",
+        }.get(job["status"], "❓")
+
+        finished_at = (
+            str(datetime.fromtimestamp(job["finished_at"]))
+            if job["finished_at"]
+            else "N/A"
+        )
+
+        table.add_row(
+            job["id"],
+            f"{status_emoji} [{status_color(job['status'])}]{job['status']}[/]",
+            str(datetime.fromtimestamp(job["created_at"])),
+            finished_at,
+            job["fine_tuned_model"],
+            job["training_file"],
+            str(job["hyperparameters"]["n_epochs"]),
+            job["model"],
+        )
+
+    return table
+
+
+def status_color(status: str) -> str:
+    return {"running": "yellow", "succeeded": "green", "failed": "red"}.get(
+        status, "white"
+    )
+
+
+def get_jobs(limit: int = 5) -> List[openai.FineTuningJob]:
+    return openai.FineTuningJob.list(limit=limit)["data"]
+
+
+def get_file_status(file_id: str) -> str:
+    response = openai.File.retrieve(file_id)
+    return response["status"]
+
+
+@app.command(
+    name="list",
+    help="Monitor the status of the most recent fine-tuning jobs.",
+)
+def watch(
+    limit: int = typer.Option(5, help="Limit the number of jobs to monitor"),
+    poll: int = typer.Option(5, help="Polling interval in seconds"),
+    screen: bool = typer.Option(False, help="Enable or disable screen output"),
+):
+    """
+    Monitor the status of the most recent fine-tuning jobs.
+    """
+    jobs = get_jobs(limit=limit)
+    with Live(generate_table(jobs), refresh_per_second=2, screen=screen) as live_table:
+        while True:
+            jobs = get_jobs(limit=limit)
+            live_table.update(generate_table(jobs))
+            time.sleep(poll)
+
+
+@app.command(
+    help="Create a fine-tuning job from an existing ID.",
+)
+def create_from_id(
+    id: str = typer.Argument(..., help="ID of the existing fine-tuning job"),
+    model: str = typer.Option("gpt-3.5-turbo", help="Model to use for fine-tuning"),
+):
+    with console.status(
+        f"[bold green]Creating fine-tuning job from ID {id}...", spinner="dots"
+    ) as status:
+        job = openai.FineTuningJob.create(training_file=id, model=model)
+        console.log(f"[bold green]Fine-tuning job created with ID: {job.id}")  # type: ignore
+    watch(limit=5, poll=2, screen=False)
+
+
+@app.command(
+    help="Create a fine-tuning job from a file.",
+)
+def create_from_file(
+    file: str = typer.Argument(..., help="Path to the file for fine-tuning"),
+    model: str = typer.Option("gpt-3.5-turbo", help="Model to use for fine-tuning"),
+    poll: int = typer.Option(2, help="Polling interval in seconds"),
+):
+    with open(file, "rb") as file:
+        response = openai.File.create(file=file, purpose="fine-tune")
+
+    file_id = response["id"]
+
+    with console.status(f"Monitoring upload: {file_id} before finetuning...") as status:
+        status.spinner_style = "dots"
+        while True:
+            file_status = get_file_status(file_id)
+
+            if file_status == "processed":
+                console.log(f"[bold green]File {file_id} uploaded successfully!")
+                break
+
+            time.sleep(poll)
+
+    job = openai.FineTuningJob.create(training_file=file_id, model=model)
+    console.log(
+        f"[bold green]Fine-tuning job created with ID: {job['id']} from file ID: {file_id}"
+    )
+    watch(limit=5, poll=poll, screen=False)
+
+
+@app.command(
+    help="Cancel a fine-tuning job.",
+)
+def cancel(id: str = typer.Argument(..., help="ID of the fine-tuning job to cancel")):
+    with console.status(f"[bold red]Cancelling job {id}...", spinner="dots") as status:
+        try:
+            openai.FineTuningJob.cancel(id)
+            console.log(f"[bold red]Job {id} cancelled successfully!")
+        except Exception as e:
+            console.log(f"[bold red]Error cancelling job {id}: {e}")
+
+
+if __name__ == "__main__":
+    app()
@@ -60,6 +60,8 @@ nav:
      - 'MultiTask': 'api_multitask.md'
      - "Introduction: Writing Prompts": "writing-prompts.md"
      - "Prompting Templates": "chat-completion.md"
+  - CLI Reference:
+      - "Finetuning": finetune.md
 extra:
  analytics:
    provider: google
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "instructor"
-version = "0.2.4"
+version = "0.2.5"
 description = "Helper functions that allow us to improve openai's function_call ergonomics"
 authors = ["Jason <jason@jxnl.co>"]
 license = "MIT"
@@ -12,7 +12,10 @@ repository = "https://github.com/jxnl/instructor"
 python = "^3.9"
 openai = "^0.27.8"
 pydantic = "^2.0.2"
+typer = "^0.4.0"  # Add this line

+[tool.poetry.scripts]
+instructor = "instructor.cli.cli:app"

 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.0"