diff --git a/.gitignore b/.gitignore index ea3a62a..f58c18b 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,8 @@ cython_debug/ examples/citation_with_extraction/fly.toml my_cache_directory/ tutorials/wandb/* +tutorials/results.csv +tutorials/results.jsonl +tutorials/results.jsonlines +tutorials/schema.json +wandb/settings diff --git a/tutorials/3.0.applications-rag.ipynb b/tutorials/3.0.applications-rag.ipynb index 6d1c024..2307db3 100644 --- a/tutorials/3.0.applications-rag.ipynb +++ b/tutorials/3.0.applications-rag.ipynb @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -107,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -125,75 +125,43 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'hypothetical_questions': ['How does a simple RAG Model work?',\n", - " 'What are the key challenges faced by simple RAG '\n", - " 'systems?',\n", - " 'Can a simple RAG model handle complex queries '\n", - " 'effectively?'],\n", + "{'hypothetical_questions': ['What is the most basic implementation of '\n", + " 'Retrieval-Augmented Generation?',\n", + " 'Why might simple RAG not be adequate for complex '\n", + " 'user queries?'],\n", " 'keywords': ['Retrieval-Augmented Generation',\n", " 'RAG',\n", - " 'user query',\n", " 'vector database',\n", - " 'embeddings',\n", - " 'search limitations'],\n", - " 'summary': 'The simple Retrieval-Augmented Generation (RAG) model is an '\n", - " 'approach that uses a vector database to embed and search for user '\n", - " 'queries, such as Wikipedia articles. It provides answers by '\n", - " 'aligning query and document embeddings. However, it has several '\n", - " 'limitations, including query-document mismatch, reliance on a '\n", - " 'monolithic search backend, text search limitations, and a limited '\n", - " 'planning ability.',\n", - " 'topic': 'Simple RAG'}\n", - "{'hypothetical_questions': ['What is a query-document mismatch in the context '\n", - " 'of RAG models?',\n", - " 'Why might a simple RAG model struggle with '\n", - " 'specific user queries?'],\n", - " 'keywords': ['query-document mismatch',\n", - " 'RAG limitation',\n", - " 'embedding alignment'],\n", - " 'summary': \"A limitation where the simple RAG system's query and document \"\n", - " 'embeddings may not align properly, leading to ineffective '\n", - " \"retrieval of information specific to a user's query about, for \"\n", - " \"example, 'climate change effects on marine life'.\",\n", - " 'topic': 'Query-Document Mismatch'}\n", - "{'hypothetical_questions': ['Why is depending on a monolithic search backend a '\n", - " 'limitation for simple RAG?',\n", - " 'How can a monolithic search backend affect the '\n", - " 'quality of search results?'],\n", - " 'keywords': ['monolithic search backend', 'RAG system', 'data sources'],\n", - " 'summary': 'In simple RAG, the reliance on a single search method and backend '\n", - " \"can limit the system's ability to access diverse or specialized \"\n", - " \"data sources, such as when searching for 'latest research in \"\n", - " \"quantum computing'.\",\n", - " 'topic': 'Monolithic Search Backend'}\n", - "{'hypothetical_questions': ['How do text search limitations impact the '\n", - " 'effectiveness of simple RAG?',\n", - " 'Can simple RAG models understand the context of '\n", - " 'search terms?'],\n", - " 'keywords': ['text search limitations', 'RAG', 'advanced search'],\n", - " 'summary': 'The simple RAG model is limited to straightforward text queries '\n", - " 'without advanced search capabilities, failing to resolve nuanced '\n", - " \"queries like 'what problems did we fix last week' due to the \"\n", - " \"presence of generic terms such as 'problem' and 'last week' \"\n", - " 'throughout documents.',\n", - " 'topic': 'Text Search Limitations'}\n", - "{'hypothetical_questions': ['What does limited planning ability imply for a '\n", - " \"RAG model's search results?\",\n", - " 'Can simple RAG models provide context-specific '\n", - " 'information effectively?'],\n", - " 'keywords': ['limited planning ability', 'contextual information', 'RAG'],\n", - " 'summary': 'Simple RAG models struggle to incorporate additional context in '\n", - " 'their searches, which may result in less relevant or overly '\n", - " 'general responses to queries that require specific insights, like '\n", - " \"'Tips for first-time Europe travelers'.\",\n", - " 'topic': 'Limited Planning Ability'}\n" + " 'simple implementation'],\n", + " 'summary': 'The simplest form of RAG involves embedding a user query and '\n", + " 'performing a single search in a vector database, such as a '\n", + " 'Wikipedia article store. This method, however, often fails with '\n", + " 'complex queries and diverse data sources.',\n", + " 'topic': 'Simple Retrieval-Augmented Generation (RAG)'}\n", + "{'hypothetical_questions': ['What are the main drawbacks of the simple RAG '\n", + " 'model?',\n", + " 'How does Query-Document Mismatch affect search '\n", + " 'results in simple RAG?',\n", + " 'Why is relying on a monolithic search backend '\n", + " 'problematic for RAG?'],\n", + " 'keywords': ['limitations',\n", + " 'query-document mismatch',\n", + " 'monolithic search backend',\n", + " 'text search limitations',\n", + " 'limited planning ability'],\n", + " 'summary': 'The limitations of simple RAG include the Query-Document Mismatch '\n", + " 'which assumes a perfect alignment of embeddings, the reliance on '\n", + " 'a Monolithic Search Backend which limits flexibility, Text Search '\n", + " 'Limitations that impede nuanced search, and a Limited Planning '\n", + " 'Ability that overlooks additional context for refining results.',\n", + " 'topic': 'Limitations of Simple RAG'}\n" ] } ], @@ -261,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -296,16 +264,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Query(rewritten_query='recent developments in AI', published_daterange=DateRange(start=datetime.date(2023, 1, 1), end=datetime.date(2023, 12, 21)))" + "Query(rewritten_query='recent developments in AI', published_daterange=DateRange(start=datetime.date(2023, 1, 1), end=datetime.date(2023, 12, 22)))" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -325,7 +293,8 @@ " )\n", "\n", "\n", - "expand_query(\"What are some recent developments in AI?\")" + "query = expand_query(\"What are some recent developments in AI?\")\n", + "query" ] }, { @@ -398,55 +367,71 @@ "\n", "1. Save input and output pairs for later\n", "2. Save the JSON schema for the response_model\n", - "3. Having snapshots of the model and data allow us to compare results over time, and as we make changes to the model we can see how the results change.\n" + "3. Having snapshots of the model and data allow us to compare results over time, and as we make changes to the model we can see how the results change.\n", + "\n", + "This is particularly useful when we might want to blend a mix of synthetic and real data to evaluate our model. We can use the `wandb` library to track our experiments and save the results to a dashboard.\n" ] }, { "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "\n", - "def flatten_dict(d, parent_key=\"\", sep=\"_\"):\n", - " \"\"\"\n", - " Flatten a nested dictionary.\n", - "\n", - " :param d: The nested dictionary to flatten.\n", - " :param parent_key: The base key to use for the flattened keys.\n", - " :param sep: Separator to use between keys.\n", - " :return: A flattened dictionary.\n", - " \"\"\"\n", - " items = []\n", - " for k, v in d.items():\n", - " new_key = f\"{parent_key}{sep}{k}\" if parent_key else k\n", - " if isinstance(v, dict):\n", - " items.extend(flatten_dict(v, new_key, sep=sep).items())\n", - " else:\n", - " items.append((new_key, v))\n", - " return dict(items)\n", - "\n", - "\n", - "def dicts_to_df(list_of_dicts):\n", - " \"\"\"\n", - " Convert a list of dictionaries to a pandas DataFrame.\n", - "\n", - " :param list_of_dicts: List of dictionaries, potentially nested.\n", - " :return: A pandas DataFrame representing the flattened data.\n", - " \"\"\"\n", - " # Flatten each dictionary and create a DataFrame\n", - " flattened_data = [flatten_dict(d) for d in list_of_dicts]\n", - " return pd.DataFrame(flattened_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 40, + "execution_count": 18, "metadata": { "scrolled": true }, + "outputs": [], + "source": [ + "import json\n", + "import wandb\n", + "from helpers import dicts_to_df\n", + "\n", + "\n", + "class DateRange(BaseModel):\n", + " chain_of_thought: str = Field(\n", + " description=\"Think step by step to plan what is the best time range to search in\"\n", + " )\n", + " start: date\n", + " end: date\n", + "\n", + "\n", + "class Query(BaseModel):\n", + " rewritten_query: str = Field(\n", + " description=\"Rewrite the query to make it more specific\"\n", + " )\n", + " published_daterange: DateRange = Field(\n", + " description=\"Effective date range to search in\"\n", + " )\n", + "\n", + " def report(self):\n", + " dct = self.model_dump()\n", + " dct[\"usage\"] = self._raw_response.usage.model_dump()\n", + " return dct\n", + "\n", + "\n", + "from openai import AsyncOpenAI\n", + "\n", + "# We'll use a different client for async calls\n", + "# To highlight the difference and how we can use both\n", + "aclient = instructor.patch(AsyncOpenAI())\n", + "\n", + "\n", + "async def expand_query(q) -> Query:\n", + " return await aclient.chat.completions.create(\n", + " model=\"gpt-4-1106-preview\",\n", + " response_model=Query,\n", + " messages=[\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": f\"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...\",\n", + " },\n", + " {\"role\": \"user\", \"content\": f\"query: {q}\"},\n", + " ],\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, "outputs": [ { "data": { @@ -476,7 +461,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /Users/jasonliu/dev/instructor/tutorials/wandb/run-20231221_153734-idscpy5k" + "Run data is saved locally in /Users/jasonliu/dev/instructor/tutorials/wandb/run-20231222_152028-opuq58lr" ], "text/plain": [ "" @@ -488,7 +473,7 @@ { "data": { "text/html": [ - "Syncing run easy-feather-16 to Weights & Biases (docs)
" + "Syncing run major-firebrand-21 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -512,7 +497,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/instructor/query-understanding/runs/idscpy5k" + " View run at https://wandb.ai/instructor/query-understanding/runs/opuq58lr" ], "text/plain": [ "" @@ -525,18 +510,39 @@ "name": "stderr", "output_type": "stream", "text": [ + "Retrying, exception: 1 validation error for Query\n", + "rewritten_query\n", + " Field required [type=missing, input_value={'rewitten_query': 'recen...', 'end': '2023-12-22'}}, input_type=dict]\n", + " For further information visit https://errors.pydantic.dev/2.5/v/missing\n", + "Traceback (most recent call last):\n", + " File \"/Users/jasonliu/dev/instructor/instructor/patch.py\", line 231, in retry_async\n", + " return await process_response_async(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/jasonliu/dev/instructor/instructor/patch.py\", line 201, in process_response_async\n", + " model = await response_model.from_response_async(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/jasonliu/dev/instructor/instructor/function_calls.py\", line 198, in from_response_async\n", + " return cls.model_validate_json(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/jasonliu/dev/instructor/.venv/lib/python3.11/site-packages/pydantic/main.py\", line 532, in model_validate_json\n", + " return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "pydantic_core._pydantic_core.ValidationError: 1 validation error for Query\n", + "rewritten_query\n", + " Field required [type=missing, input_value={'rewitten_query': 'recen...', 'end': '2023-12-22'}}, input_type=dict]\n", + " For further information visit https://errors.pydantic.dev/2.5/v/missing\n", "wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6440cf236ba24c3b839d1256cfada604", + "model_id": "96b112129c944465a35156a6ffbdfe54", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…" + "VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…" ] }, "metadata": {}, @@ -545,7 +551,7 @@ { "data": { "text/html": [ - "W&B sync reduced upload amount by 9.0% " + "W&B sync reduced upload amount by 7.9% " ], "text/plain": [ "" @@ -557,7 +563,7 @@ { "data": { "text/html": [ - " View run easy-feather-16 at: https://wandb.ai/instructor/query-understanding/runs/idscpy5k
Synced 4 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)" + " View run major-firebrand-21 at: https://wandb.ai/instructor/query-understanding/runs/opuq58lr
Synced 5 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)" ], "text/plain": [ "" @@ -569,7 +575,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20231221_153734-idscpy5k/logs" + "Find logs at: ./wandb/run-20231222_152028-opuq58lr/logs" ], "text/plain": [ "" @@ -580,39 +586,7 @@ } ], "source": [ - "import json\n", - "import wandb\n", - "\n", - "\n", - "class DateRange(BaseModel):\n", - " chain_of_thought: str = Field(\n", - " description=\"Think step by step to plan what is the best time range to search in\"\n", - " )\n", - " start: date\n", - " end: date\n", - "\n", - "\n", - "class Query(BaseModel):\n", - " rewritten_query: str = Field(\n", - " description=\"Rewrite the query to make it more specific\"\n", - " )\n", - " published_daterange: DateRange = Field(\n", - " description=\"Effective date range to search in\"\n", - " )\n", - "\n", - "\n", - "def expand_query(q) -> Query:\n", - " return client.chat.completions.create(\n", - " model=\"gpt-4-1106-preview\",\n", - " response_model=Query,\n", - " messages=[\n", - " {\n", - " \"role\": \"system\",\n", - " \"content\": f\"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...\",\n", - " },\n", - " {\"role\": \"user\", \"content\": f\"query: {q}\"},\n", - " ],\n", - " )\n", + "import asyncio\n", "\n", "\n", "run = wandb.init(\n", @@ -626,7 +600,7 @@ " \"biotechnology updates last 10 days\",\n", "]\n", "\n", - "queries = [expand_query(q) for q in test_queries]\n", + "queries = await asyncio.gather(*[expand_query(q) for q in test_queries])\n", "\n", "with open(\"schema.json\", \"w+\") as f:\n", " schema = Query.model_json_schema()\n", @@ -636,7 +610,7 @@ " for query in queries:\n", " f.write(query.model_dump_json() + \"\\n\")\n", "\n", - "df = dicts_to_df([q.model_dump() for q in queries])\n", + "df = dicts_to_df([q.report() for q in queries])\n", "df[\"input\"] = test_queries\n", "df.to_csv(\"results.csv\")\n", "\n", @@ -652,13 +626,6 @@ "run.finish()" ] }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -674,7 +641,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -682,7 +649,7 @@ "\n", "\n", "class SearchClient(BaseModel):\n", - " query: str\n", + " query: str = Field(description=\"The search query that will go into the search bar\")\n", " keywords: List[str]\n", " email: str\n", " source: Literal[\"gmail\", \"calendar\"]\n", @@ -706,43 +673,19 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"queries\": [\n", - " {\n", - " \"query\": \"schedule\",\n", - " \"keywords\": [\n", - " \"appointments\",\n", - " \"meetings\",\n", - " \"schedule\",\n", - " \"events\"\n", - " ],\n", - " \"email\": \"jason.assistant@busybot.com\",\n", - " \"source\": \"calendar\",\n", - " \"date_range\": {\n", - " \"start\": \"2023-11-18\",\n", - " \"end\": \"2023-11-18\"\n", - " }\n", - " }\n", - " ]\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "retrival = client.chat.completions.create(\n", - " model=\"gpt-4-1106-preview\",\n", + " model=\"gpt-3.5-turbo\",\n", " response_model=Retrival,\n", " messages=[\n", " {\n", " \"role\": \"system\",\n", - " \"content\": f\"You are Jason's personal assistant. Today is {date.today()}\",\n", + " \"content\": f\"\"\"You are Jason's personal assistant.\n", + " He has two emails jason@work.com jason@personal.com \n", + " Today is {date.today()}\"\"\",\n", " },\n", " {\"role\": \"user\", \"content\": \"What do I have today?\"},\n", " ],\n", @@ -810,11 +753,13 @@ " messages=[\n", " {\n", " \"role\": \"system\",\n", - " \"content\": f\"You are Jason's personal assistant. Today is {date.today()}\",\n", + " \"content\": f\"\"\"You are Jason's personal assistant.\n", + " He has two emails jason@work.com jason@personal.com \n", + " Today is {date.today()}\"\"\",\n", " },\n", " {\n", " \"role\": \"user\",\n", - " \"content\": \"What meetings do I have today and are there any important emails I should be aware of?\",\n", + " \"content\": \"What meetings do I have today and are there any important emails I should be aware of\",\n", " },\n", " ],\n", ")\n", diff --git a/tutorials/helpers.py b/tutorials/helpers.py new file mode 100644 index 0000000..3d7d5fe --- /dev/null +++ b/tutorials/helpers.py @@ -0,0 +1,32 @@ +import pandas as pd + + +def flatten_dict(d, parent_key="", sep="_"): + """ + Flatten a nested dictionary. + + :param d: The nested dictionary to flatten. + :param parent_key: The base key to use for the flattened keys. + :param sep: Separator to use between keys. + :return: A flattened dictionary. + """ + items = [] + for k, v in d.items(): + new_key = f"{parent_key}{sep}{k}" if parent_key else k + if isinstance(v, dict): + items.extend(flatten_dict(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) + + +def dicts_to_df(list_of_dicts): + """ + Convert a list of dictionaries to a pandas DataFrame. + + :param list_of_dicts: List of dictionaries, potentially nested. + :return: A pandas DataFrame representing the flattened data. + """ + # Flatten each dictionary and create a DataFrame + flattened_data = [flatten_dict(d) for d in list_of_dicts] + return pd.DataFrame(flattened_data) diff --git a/wandb/settings b/wandb/settings new file mode 100644 index 0000000..fe5e2d8 --- /dev/null +++ b/wandb/settings @@ -0,0 +1,5 @@ +[default] +entity = instructor +project = query-understanding +base_url = https://api.wandb.ai +