mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
clean up wandb
This commit is contained in:
@@ -74,7 +74,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -196,7 +196,7 @@
|
||||
"extractions = client.chat.completions.create(\n",
|
||||
" model=\"gpt-4-1106-preview\",\n",
|
||||
" stream=True,\n",
|
||||
" response_model=Iterable[Extraction],\n",
|
||||
" response_model=s,\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"system\",\n",
|
||||
@@ -374,15 +374,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import wandb\n",
|
||||
"import instructor\n",
|
||||
"\n",
|
||||
"from openai import AsyncOpenAI\n",
|
||||
"from helpers import dicts_to_df\n",
|
||||
"from datetime import date\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class DateRange(BaseModel):\n",
|
||||
@@ -407,7 +411,6 @@
|
||||
" return dct\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from openai import AsyncOpenAI\n",
|
||||
"\n",
|
||||
"# We'll use a different client for async calls\n",
|
||||
"# To highlight the difference and how we can use both\n",
|
||||
@@ -433,7 +436,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -464,7 +467,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Run data is saved locally in <code>/Users/jasonliu/dev/instructor/tutorials/wandb/run-20231224_212738-tq55vci1</code>"
|
||||
"Run data is saved locally in <code>/Users/jasonliu/dev/instructor/tutorials/wandb/run-20231227_202003-7c9dxnfl</code>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -476,7 +479,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Syncing run <strong><a href='https://wandb.ai/instructor/query-understanding/runs/tq55vci1' target=\"_blank\">cool-sponge-25</a></strong> to <a href='https://wandb.ai/instructor/query-understanding' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
|
||||
"Syncing run <strong><a href='https://wandb.ai/instructor/query/runs/7c9dxnfl' target=\"_blank\">blooming-firefly-4</a></strong> to <a href='https://wandb.ai/instructor/query' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -488,7 +491,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
" View project at <a href='https://wandb.ai/instructor/query-understanding' target=\"_blank\">https://wandb.ai/instructor/query-understanding</a>"
|
||||
" View project at <a href='https://wandb.ai/instructor/query' target=\"_blank\">https://wandb.ai/instructor/query</a>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -500,7 +503,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
" View run at <a href='https://wandb.ai/instructor/query-understanding/runs/tq55vci1' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/tq55vci1</a>"
|
||||
" View run at <a href='https://wandb.ai/instructor/query/runs/7c9dxnfl' target=\"_blank\">https://wandb.ai/instructor/query/runs/7c9dxnfl</a>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -509,6 +512,20 @@
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "d73fb8a832254b32a938572fd27eca62",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
@@ -516,24 +533,10 @@
|
||||
"wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5497ec4b72e24f9baa3fd23e49fe2403",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"W&B sync reduced upload amount by 8.6% "
|
||||
"W&B sync reduced upload amount by 6.6% "
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -545,7 +548,12 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
" View run <strong style=\"color:#cdcd00\">cool-sponge-25</strong> at: <a href='https://wandb.ai/instructor/query-understanding/runs/tq55vci1' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/tq55vci1</a><br/>Synced 4 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)"
|
||||
"<style>\n",
|
||||
" table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
|
||||
" .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
|
||||
" .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
|
||||
" </style>\n",
|
||||
"<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>average duration (s)</td><td>▁</td></tr><tr><td>duration (s)</td><td>▁</td></tr><tr><td>n_queries</td><td>▁</td></tr><tr><td>usage_completion_tokens</td><td>▁</td></tr><tr><td>usage_prompt_tokens</td><td>▁</td></tr><tr><td>usage_total_tokens</td><td>▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>average duration (s)</td><td>2.28692</td></tr><tr><td>duration (s)</td><td>9.14768</td></tr><tr><td>n_queries</td><td>4</td></tr><tr><td>usage_completion_tokens</td><td>359</td></tr><tr><td>usage_prompt_tokens</td><td>780</td></tr><tr><td>usage_total_tokens</td><td>1139</td></tr></table><br/></div></div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -557,7 +565,19 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Find logs at: <code>./wandb/run-20231224_212738-tq55vci1/logs</code>"
|
||||
" View run <strong style=\"color:#cdcd00\">blooming-firefly-4</strong> at: <a href='https://wandb.ai/instructor/query/runs/7c9dxnfl' target=\"_blank\">https://wandb.ai/instructor/query/runs/7c9dxnfl</a><br/>Synced 4 W&B file(s), 2 media file(s), 5 artifact file(s) and 0 other file(s)"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Find logs at: <code>./wandb/run-20231227_202003-7c9dxnfl/logs</code>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -570,12 +590,15 @@
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"import time\n",
|
||||
"import pandas as pd\n",
|
||||
"import wandb\n",
|
||||
"\n",
|
||||
"model = \"gpt-4-1106-preview\"\n",
|
||||
"temp = 0\n",
|
||||
"\n",
|
||||
"run = wandb.init(\n",
|
||||
" project=\"query-understanding\",\n",
|
||||
" project=\"query\",\n",
|
||||
" config={\"model\": model, \"temp\": temp},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"test_queries = [\n",
|
||||
@@ -588,11 +611,7 @@
|
||||
"queries = await asyncio.gather(\n",
|
||||
" *[expand_query(q, model=model, temp=temp) for q in test_queries]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"run.config.update({\"duration (s)\": time.perf_counter() - start})\n",
|
||||
"run.config.update({\"n_queries\": len(queries)})\n",
|
||||
"run.config.update({\"model\": model})\n",
|
||||
"run.config.update({\"temp\": temp})\n",
|
||||
"duration = time.perf_counter() - start\n",
|
||||
"\n",
|
||||
"with open(\"schema.json\", \"w+\") as f:\n",
|
||||
" schema = Query.model_json_schema()\n",
|
||||
@@ -606,11 +625,26 @@
|
||||
"df[\"input\"] = test_queries\n",
|
||||
"df.to_csv(\"results.csv\")\n",
|
||||
"\n",
|
||||
"run.config.update({\"usage_total_tokens\": df[\"usage_total_tokens\"].sum()})\n",
|
||||
"run.config.update({\"usage_completion_tokens\": df[\"usage_completion_tokens\"].sum()})\n",
|
||||
"run.config.update({\"usage_prompt_tokens\": df[\"usage_prompt_tokens\"].sum()})\n",
|
||||
"\n",
|
||||
"run.log({\"results\": wandb.Table(dataframe=df)})\n",
|
||||
"run.log({\"schema\": wandb.Table(dataframe=pd.DataFrame([{\"schema\": schema}]))})\n",
|
||||
"\n",
|
||||
"run.log(\n",
|
||||
" {\n",
|
||||
" \"usage_total_tokens\": df[\"usage_total_tokens\"].sum(),\n",
|
||||
" \"usage_completion_tokens\": df[\"usage_completion_tokens\"].sum(),\n",
|
||||
" \"usage_prompt_tokens\": df[\"usage_prompt_tokens\"].sum(),\n",
|
||||
" \"duration (s)\": duration,\n",
|
||||
" \"average duration (s)\": duration / len(queries),\n",
|
||||
" \"n_queries\": len(queries),\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"run.log(\n",
|
||||
" {\n",
|
||||
" \"results\": wandb.Table(dataframe=df),\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"files = wandb.Artifact(\"data\", type=\"dataset\")\n",
|
||||
"\n",
|
||||
@@ -638,7 +672,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -670,9 +704,42 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"queries\": [\n",
|
||||
" {\n",
|
||||
" \"query\": \"\",\n",
|
||||
" \"keywords\": [],\n",
|
||||
" \"email\": \"jason@work.com\",\n",
|
||||
" \"source\": \"calendar\",\n",
|
||||
" \"date_range\": {\n",
|
||||
" \"chain_of_thought\": \"\",\n",
|
||||
" \"start\": \"2023-12-26\",\n",
|
||||
" \"end\": \"2023-12-26\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"query\": \"is:unread\",\n",
|
||||
" \"keywords\": [],\n",
|
||||
" \"email\": \"jason@work.com\",\n",
|
||||
" \"source\": \"gmail\",\n",
|
||||
" \"date_range\": {\n",
|
||||
" \"chain_of_thought\": \"\",\n",
|
||||
" \"start\": \"2023-12-26\",\n",
|
||||
" \"end\": \"2023-12-26\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retrival = client.chat.completions.create(\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
@@ -684,7 +751,7 @@
|
||||
" He has two emails jason@work.com jason@personal.com \n",
|
||||
" Today is {date.today()}\"\"\",\n",
|
||||
" },\n",
|
||||
" {\"role\": \"user\", \"content\": \"What do I have today?\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"What do I have today for work? any new emails?\"},\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"print(retrival.model_dump_json(indent=4))"
|
||||
@@ -699,7 +766,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -709,33 +776,32 @@
|
||||
"{\n",
|
||||
" \"queries\": [\n",
|
||||
" {\n",
|
||||
" \"query\": \"meetings\",\n",
|
||||
" \"query\": \"meeting today\",\n",
|
||||
" \"keywords\": [\n",
|
||||
" \"meetings\",\n",
|
||||
" \"appointments\",\n",
|
||||
" \"schedule\",\n",
|
||||
" \"calendar\"\n",
|
||||
" \"meeting\"\n",
|
||||
" ],\n",
|
||||
" \"email\": \"user@email.com\",\n",
|
||||
" \"email\": \"jason@work.com\",\n",
|
||||
" \"source\": \"calendar\",\n",
|
||||
" \"date_range\": {\n",
|
||||
" \"start\": \"2023-11-18\",\n",
|
||||
" \"end\": \"2023-11-18\"\n",
|
||||
" \"chain_of_thought\": \"Since today's date is 2023-12-26, I will retrieve calendar events specifically from this date.\",\n",
|
||||
" \"start\": \"2023-12-26\",\n",
|
||||
" \"end\": \"2023-12-26\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"query\": \"important emails\",\n",
|
||||
" \"query\": \"important\",\n",
|
||||
" \"keywords\": [\n",
|
||||
" \"important\",\n",
|
||||
" \"priority\",\n",
|
||||
" \"urgent\",\n",
|
||||
" \"follow-up\"\n",
|
||||
" \"ASAP\",\n",
|
||||
" \"high priority\"\n",
|
||||
" ],\n",
|
||||
" \"email\": \"user@email.com\",\n",
|
||||
" \"email\": \"jason@work.com\",\n",
|
||||
" \"source\": \"gmail\",\n",
|
||||
" \"date_range\": {\n",
|
||||
" \"start\": \"2023-11-18\",\n",
|
||||
" \"end\": \"2023-11-18\"\n",
|
||||
" \"chain_of_thought\": \"Since today's date is 2023-12-26, I will search for emails that are marked as important or convey urgency, received recently that may require Jason's attention today.\",\n",
|
||||
" \"start\": \"2023-12-24\",\n",
|
||||
" \"end\": \"2023-12-26\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
|
||||
Reference in New Issue
Block a user