From 8ac5518b1098363ccc0dc77bfdc89be2c0d3a0db Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 27 Dec 2023 20:21:59 -0500 Subject: [PATCH] clean up wandb --- tutorials/3.0.applications-rag.ipynb | 178 ++++++++++++++++++--------- 1 file changed, 122 insertions(+), 56 deletions(-) diff --git a/tutorials/3.0.applications-rag.ipynb b/tutorials/3.0.applications-rag.ipynb index 13bdfab..e62129a 100644 --- a/tutorials/3.0.applications-rag.ipynb +++ b/tutorials/3.0.applications-rag.ipynb @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -196,7 +196,7 @@ "extractions = client.chat.completions.create(\n", " model=\"gpt-4-1106-preview\",\n", " stream=True,\n", - " response_model=Iterable[Extraction],\n", + " response_model=s,\n", " messages=[\n", " {\n", " \"role\": \"system\",\n", @@ -374,15 +374,19 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [], "source": [ "import json\n", - "import wandb\n", + "import instructor\n", + "\n", + "from openai import AsyncOpenAI\n", "from helpers import dicts_to_df\n", + "from datetime import date\n", + "from pydantic import BaseModel, Field\n", "\n", "\n", "class DateRange(BaseModel):\n", @@ -407,7 +411,6 @@ " return dct\n", "\n", "\n", - "from openai import AsyncOpenAI\n", "\n", "# We'll use a different client for async calls\n", "# To highlight the difference and how we can use both\n", @@ -433,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -464,7 +467,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /Users/jasonliu/dev/instructor/tutorials/wandb/run-20231224_212738-tq55vci1" + "Run data is saved locally in /Users/jasonliu/dev/instructor/tutorials/wandb/run-20231227_202003-7c9dxnfl" ], "text/plain": [ "" @@ -476,7 +479,7 @@ { "data": { "text/html": [ - "Syncing run cool-sponge-25 to Weights & Biases (docs)
" + "Syncing run blooming-firefly-4 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -488,7 +491,7 @@ { "data": { "text/html": [ - " View project at https://wandb.ai/instructor/query-understanding" + " View project at https://wandb.ai/instructor/query" ], "text/plain": [ "" @@ -500,7 +503,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/instructor/query-understanding/runs/tq55vci1" + " View run at https://wandb.ai/instructor/query/runs/7c9dxnfl" ], "text/plain": [ "" @@ -509,6 +512,20 @@ "metadata": {}, "output_type": "display_data" }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d73fb8a832254b32a938572fd27eca62", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", @@ -516,24 +533,10 @@ "wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n" ] }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5497ec4b72e24f9baa3fd23e49fe2403", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ - "W&B sync reduced upload amount by 8.6% " + "W&B sync reduced upload amount by 6.6% " ], "text/plain": [ "" @@ -545,7 +548,12 @@ { "data": { "text/html": [ - " View run cool-sponge-25 at: https://wandb.ai/instructor/query-understanding/runs/tq55vci1
Synced 4 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)" + "\n", + "

Run history:


average duration (s)
duration (s)
n_queries
usage_completion_tokens
usage_prompt_tokens
usage_total_tokens

Run summary:


average duration (s)2.28692
duration (s)9.14768
n_queries4
usage_completion_tokens359
usage_prompt_tokens780
usage_total_tokens1139

" ], "text/plain": [ "" @@ -557,7 +565,19 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20231224_212738-tq55vci1/logs" + " View run blooming-firefly-4 at: https://wandb.ai/instructor/query/runs/7c9dxnfl
Synced 4 W&B file(s), 2 media file(s), 5 artifact file(s) and 0 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20231227_202003-7c9dxnfl/logs" ], "text/plain": [ "" @@ -570,12 +590,15 @@ "source": [ "import asyncio\n", "import time\n", + "import pandas as pd\n", + "import wandb\n", "\n", "model = \"gpt-4-1106-preview\"\n", "temp = 0\n", "\n", "run = wandb.init(\n", - " project=\"query-understanding\",\n", + " project=\"query\",\n", + " config={\"model\": model, \"temp\": temp},\n", ")\n", "\n", "test_queries = [\n", @@ -588,11 +611,7 @@ "queries = await asyncio.gather(\n", " *[expand_query(q, model=model, temp=temp) for q in test_queries]\n", ")\n", - "\n", - "run.config.update({\"duration (s)\": time.perf_counter() - start})\n", - "run.config.update({\"n_queries\": len(queries)})\n", - "run.config.update({\"model\": model})\n", - "run.config.update({\"temp\": temp})\n", + "duration = time.perf_counter() - start\n", "\n", "with open(\"schema.json\", \"w+\") as f:\n", " schema = Query.model_json_schema()\n", @@ -606,11 +625,26 @@ "df[\"input\"] = test_queries\n", "df.to_csv(\"results.csv\")\n", "\n", - "run.config.update({\"usage_total_tokens\": df[\"usage_total_tokens\"].sum()})\n", - "run.config.update({\"usage_completion_tokens\": df[\"usage_completion_tokens\"].sum()})\n", - "run.config.update({\"usage_prompt_tokens\": df[\"usage_prompt_tokens\"].sum()})\n", "\n", - "run.log({\"results\": wandb.Table(dataframe=df)})\n", + "run.log({\"schema\": wandb.Table(dataframe=pd.DataFrame([{\"schema\": schema}]))})\n", + "\n", + "run.log(\n", + " {\n", + " \"usage_total_tokens\": df[\"usage_total_tokens\"].sum(),\n", + " \"usage_completion_tokens\": df[\"usage_completion_tokens\"].sum(),\n", + " \"usage_prompt_tokens\": df[\"usage_prompt_tokens\"].sum(),\n", + " \"duration (s)\": duration,\n", + " \"average duration (s)\": duration / len(queries),\n", + " \"n_queries\": len(queries),\n", + " }\n", + ")\n", + "\n", + "\n", + "run.log(\n", + " {\n", + " \"results\": wandb.Table(dataframe=df),\n", + " }\n", + ")\n", "\n", "files = wandb.Artifact(\"data\", type=\"dataset\")\n", "\n", @@ -638,7 +672,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -670,9 +704,42 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"queries\": [\n", + " {\n", + " \"query\": \"\",\n", + " \"keywords\": [],\n", + " \"email\": \"jason@work.com\",\n", + " \"source\": \"calendar\",\n", + " \"date_range\": {\n", + " \"chain_of_thought\": \"\",\n", + " \"start\": \"2023-12-26\",\n", + " \"end\": \"2023-12-26\"\n", + " }\n", + " },\n", + " {\n", + " \"query\": \"is:unread\",\n", + " \"keywords\": [],\n", + " \"email\": \"jason@work.com\",\n", + " \"source\": \"gmail\",\n", + " \"date_range\": {\n", + " \"chain_of_thought\": \"\",\n", + " \"start\": \"2023-12-26\",\n", + " \"end\": \"2023-12-26\"\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] + } + ], "source": [ "retrival = client.chat.completions.create(\n", " model=\"gpt-3.5-turbo\",\n", @@ -684,7 +751,7 @@ " He has two emails jason@work.com jason@personal.com \n", " Today is {date.today()}\"\"\",\n", " },\n", - " {\"role\": \"user\", \"content\": \"What do I have today?\"},\n", + " {\"role\": \"user\", \"content\": \"What do I have today for work? any new emails?\"},\n", " ],\n", ")\n", "print(retrival.model_dump_json(indent=4))" @@ -699,7 +766,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -709,33 +776,32 @@ "{\n", " \"queries\": [\n", " {\n", - " \"query\": \"meetings\",\n", + " \"query\": \"meeting today\",\n", " \"keywords\": [\n", - " \"meetings\",\n", - " \"appointments\",\n", - " \"schedule\",\n", - " \"calendar\"\n", + " \"meeting\"\n", " ],\n", - " \"email\": \"user@email.com\",\n", + " \"email\": \"jason@work.com\",\n", " \"source\": \"calendar\",\n", " \"date_range\": {\n", - " \"start\": \"2023-11-18\",\n", - " \"end\": \"2023-11-18\"\n", + " \"chain_of_thought\": \"Since today's date is 2023-12-26, I will retrieve calendar events specifically from this date.\",\n", + " \"start\": \"2023-12-26\",\n", + " \"end\": \"2023-12-26\"\n", " }\n", " },\n", " {\n", - " \"query\": \"important emails\",\n", + " \"query\": \"important\",\n", " \"keywords\": [\n", " \"important\",\n", - " \"priority\",\n", " \"urgent\",\n", - " \"follow-up\"\n", + " \"ASAP\",\n", + " \"high priority\"\n", " ],\n", - " \"email\": \"user@email.com\",\n", + " \"email\": \"jason@work.com\",\n", " \"source\": \"gmail\",\n", " \"date_range\": {\n", - " \"start\": \"2023-11-18\",\n", - " \"end\": \"2023-11-18\"\n", + " \"chain_of_thought\": \"Since today's date is 2023-12-26, I will search for emails that are marked as important or convey urgency, received recently that may require Jason's attention today.\",\n", + " \"start\": \"2023-12-24\",\n", + " \"end\": \"2023-12-26\"\n", " }\n", " }\n", " ]\n",