diff --git a/tutorials/3.0.applications-rag.ipynb b/tutorials/3.0.applications-rag.ipynb index 2307db3..13bdfab 100644 --- a/tutorials/3.0.applications-rag.ipynb +++ b/tutorials/3.0.applications-rag.ipynb @@ -374,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 23, "metadata": { "scrolled": true }, @@ -414,9 +414,12 @@ "aclient = instructor.patch(AsyncOpenAI())\n", "\n", "\n", - "async def expand_query(q) -> Query:\n", + "async def expand_query(\n", + " q, *, model: str = \"gpt-4-1106-preview\", temp: float = 0\n", + ") -> Query:\n", " return await aclient.chat.completions.create(\n", - " model=\"gpt-4-1106-preview\",\n", + " model=model,\n", + " temperature=temp,\n", " response_model=Query,\n", " messages=[\n", " {\n", @@ -430,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -461,7 +464,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /Users/jasonliu/dev/instructor/tutorials/wandb/run-20231222_152028-opuq58lr" + "Run data is saved locally in /Users/jasonliu/dev/instructor/tutorials/wandb/run-20231224_212738-tq55vci1" ], "text/plain": [ "" @@ -473,7 +476,7 @@ { "data": { "text/html": [ - "Syncing run major-firebrand-21 to Weights & Biases (docs)
" + "Syncing run cool-sponge-25 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -497,7 +500,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/instructor/query-understanding/runs/opuq58lr" + " View run at https://wandb.ai/instructor/query-understanding/runs/tq55vci1" ], "text/plain": [ "" @@ -510,34 +513,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Retrying, exception: 1 validation error for Query\n", - "rewritten_query\n", - " Field required [type=missing, input_value={'rewitten_query': 'recen...', 'end': '2023-12-22'}}, input_type=dict]\n", - " For further information visit https://errors.pydantic.dev/2.5/v/missing\n", - "Traceback (most recent call last):\n", - " File \"/Users/jasonliu/dev/instructor/instructor/patch.py\", line 231, in retry_async\n", - " return await process_response_async(\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/Users/jasonliu/dev/instructor/instructor/patch.py\", line 201, in process_response_async\n", - " model = await response_model.from_response_async(\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/Users/jasonliu/dev/instructor/instructor/function_calls.py\", line 198, in from_response_async\n", - " return cls.model_validate_json(\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/Users/jasonliu/dev/instructor/.venv/lib/python3.11/site-packages/pydantic/main.py\", line 532, in model_validate_json\n", - " return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "pydantic_core._pydantic_core.ValidationError: 1 validation error for Query\n", - "rewritten_query\n", - " Field required [type=missing, input_value={'rewitten_query': 'recen...', 'end': '2023-12-22'}}, input_type=dict]\n", - " For further information visit https://errors.pydantic.dev/2.5/v/missing\n", "wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "96b112129c944465a35156a6ffbdfe54", + "model_id": "5497ec4b72e24f9baa3fd23e49fe2403", "version_major": 2, "version_minor": 0 }, @@ -551,7 +533,7 @@ { "data": { "text/html": [ - "W&B sync reduced upload amount by 7.9% " + "W&B sync reduced upload amount by 8.6% " ], "text/plain": [ "" @@ -563,7 +545,7 @@ { "data": { "text/html": [ - " View run major-firebrand-21 at: https://wandb.ai/instructor/query-understanding/runs/opuq58lr
Synced 5 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)" + " View run cool-sponge-25 at: https://wandb.ai/instructor/query-understanding/runs/tq55vci1
Synced 4 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)" ], "text/plain": [ "" @@ -575,7 +557,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20231222_152028-opuq58lr/logs" + "Find logs at: ./wandb/run-20231224_212738-tq55vci1/logs" ], "text/plain": [ "" @@ -587,7 +569,10 @@ ], "source": [ "import asyncio\n", + "import time\n", "\n", + "model = \"gpt-4-1106-preview\"\n", + "temp = 0\n", "\n", "run = wandb.init(\n", " project=\"query-understanding\",\n", @@ -599,8 +584,15 @@ " \"quantum computing advancements last 2 months\",\n", " \"biotechnology updates last 10 days\",\n", "]\n", + "start = time.perf_counter()\n", + "queries = await asyncio.gather(\n", + " *[expand_query(q, model=model, temp=temp) for q in test_queries]\n", + ")\n", "\n", - "queries = await asyncio.gather(*[expand_query(q) for q in test_queries])\n", + "run.config.update({\"duration (s)\": time.perf_counter() - start})\n", + "run.config.update({\"n_queries\": len(queries)})\n", + "run.config.update({\"model\": model})\n", + "run.config.update({\"temp\": temp})\n", "\n", "with open(\"schema.json\", \"w+\") as f:\n", " schema = Query.model_json_schema()\n", @@ -614,6 +606,10 @@ "df[\"input\"] = test_queries\n", "df.to_csv(\"results.csv\")\n", "\n", + "run.config.update({\"usage_total_tokens\": df[\"usage_total_tokens\"].sum()})\n", + "run.config.update({\"usage_completion_tokens\": df[\"usage_completion_tokens\"].sum()})\n", + "run.config.update({\"usage_prompt_tokens\": df[\"usage_prompt_tokens\"].sum()})\n", + "\n", "run.log({\"results\": wandb.Table(dataframe=df)})\n", "\n", "files = wandb.Artifact(\"data\", type=\"dataset\")\n", @@ -622,6 +618,7 @@ "files.add_file(\"results.jsonlines\")\n", "files.add_file(\"results.csv\")\n", "\n", + "\n", "run.log_artifact(files)\n", "run.finish()" ]