updates

2026-06-05 22:50:18 +00:00 · 2023-12-20 16:21:25 -05:00
parent 7a6ce1e748
commit a5c7965d3a
2 changed files with 136 additions and 210 deletions
@@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -40,30 +40,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Jason is 10\n",
-      "None is 10\n",
-      "Next year Jason will be 11 years old\n"
-     ]
-    },
-    {
-     "ename": "TypeError",
-     "evalue": "can only concatenate str (not \"int\") to str",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[1;32m/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb Cell 5\u001b[0m line \u001b[0;36m9\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb#W4sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m name \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mfirst_name\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb#W4sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m age \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mage\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb#W4sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNext year \u001b[39m\u001b[39m{\u001b[39;00mname\u001b[39m}\u001b[39;00m\u001b[39m will be \u001b[39m\u001b[39m{\u001b[39;00mage\u001b[39m+\u001b[39;49m\u001b[39m1\u001b[39;49m\u001b[39m}\u001b[39;00m\u001b[39m years old\u001b[39m\u001b[39m\"\u001b[39m)\n",
-      "\u001b[0;31mTypeError\u001b[0m: can only concatenate str (not \"int\") to str"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "for obj in data:\n",
    "    name = obj.get(\"first_name\")\n",
@@ -94,20 +73,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Person(name='Sam', age=30)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "from pydantic import BaseModel, Field\n",
    "\n",
@@ -123,20 +91,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Person(name='Sam', age=30)"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Data is correctly casted to the right type\n",
    "person = Person.model_validate({\"name\": \"Sam\", \"age\": \"30\"})\n",
@@ -145,21 +102,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "ename": "AssertionError",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[1;32m/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb Cell 10\u001b[0m line \u001b[0;36m2\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb#X12sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39massert\u001b[39;00m person\u001b[39m.\u001b[39mname \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mSam\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb#X12sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39massert\u001b[39;00m person\u001b[39m.\u001b[39mage \u001b[39m==\u001b[39m \u001b[39m20\u001b[39m\n",
-      "\u001b[0;31mAssertionError\u001b[0m: "
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "assert person.name == \"Sam\"\n",
    "assert person.age == 20"
@@ -167,25 +112,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ValidationError",
-     "evalue": "1 validation error for Person\nage\n  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='30.2', input_type=str]\n    For further information visit https://errors.pydantic.dev/2.5/v/int_parsing",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValidationError\u001b[0m                           Traceback (most recent call last)",
-      "\u001b[1;32m/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb Cell 11\u001b[0m line \u001b[0;36m2\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb#X13sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39m# Data is validated to get better error messages\u001b[39;00m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb#X13sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m person \u001b[39m=\u001b[39m Person\u001b[39m.\u001b[39;49mmodel_validate({\u001b[39m\"\u001b[39;49m\u001b[39mname\u001b[39;49m\u001b[39m\"\u001b[39;49m: \u001b[39m\"\u001b[39;49m\u001b[39mSam\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mage\u001b[39;49m\u001b[39m\"\u001b[39;49m: \u001b[39m\"\u001b[39;49m\u001b[39m30.2\u001b[39;49m\u001b[39m\"\u001b[39;49m})\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb#X13sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m person\n",
-      "File \u001b[0;32m~/dev/instructor/.venv/lib/python3.11/site-packages/pydantic/main.py:503\u001b[0m, in \u001b[0;36mBaseModel.model_validate\u001b[0;34m(cls, obj, strict, from_attributes, context)\u001b[0m\n\u001b[1;32m    501\u001b[0m \u001b[39m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[1;32m    502\u001b[0m __tracebackhide__ \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m--> 503\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39;49m\u001b[39m.\u001b[39;49m__pydantic_validator__\u001b[39m.\u001b[39;49mvalidate_python(\n\u001b[1;32m    504\u001b[0m     obj, strict\u001b[39m=\u001b[39;49mstrict, from_attributes\u001b[39m=\u001b[39;49mfrom_attributes, context\u001b[39m=\u001b[39;49mcontext\n\u001b[1;32m    505\u001b[0m )\n",
-      "\u001b[0;31mValidationError\u001b[0m: 1 validation error for Person\nage\n  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='30.2', input_type=str]\n    For further information visit https://errors.pydantic.dev/2.5/v/int_parsing"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Data is validated to get better error messages\n",
-    "person = Person.model_validate({\"name\": \"Sam\", \"age\": \"30.2\"})\n",
+    "person = Person.model_validate({\"first_name\": \"Sam\", \"age\": \"30.2\"})\n",
    "person"
   ]
  },
@@ -202,41 +134,38 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Asking for JSON from OpenAI\n"
+    "## Fundamental problem with asking for JSON from OpenAI\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{\n",
-      "   \"Jason\": {\n",
-      "      \"age\": 10\n",
-      "   }\n",
-      "}\n",
-      "Here is the JSON representation of `jason is 10` as a JSON object:\n",
-      "\n",
-      "```\n",
-      "{\n",
-      "  \"name\": \"Jason\",\n",
-      "  \"age\": 10\n",
-      "}\n",
-      "```\n",
-      "Here is the JSON object representation of \"Jason is 10\":\n",
-      "\n",
-      "```json\n",
-      "{\n",
-      "  \"name\": \"Jason\",\n",
-      "  \"age\": 10\n",
-      "}\n",
-      "```\n",
-      "\n",
-      "In this JSON object, the key \"name\" corresponds to the value \"Jason\" and the key \"age\" corresponds to the value 10.\n"
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n",
+      "correctly parsed person=Person(name='Jason', age=10)\n"
     ]
    }
   ],
@@ -248,7 +177,7 @@
    "resp = client.chat.completions.create(\n",
    "    model=\"gpt-3.5-turbo\",\n",
    "    messages=[\n",
-    "        {\"role\": \"user\", \"content\": \"Please give me jason is 10 as a json object\"},\n",
+    "        {\"role\": \"user\", \"content\": \"Please give me jason is 10 as a json object ```json\\n\"},\n",
    "    ],\n",
    "    n=20,\n",
    "    temperature=1,\n",
@@ -257,8 +186,10 @@
    "for choice in resp.choices:\n",
    "    json = choice.message.content\n",
    "    try:\n",
-    "        Person.model_validate_json(json)\n",
+    "        person = Person.model_validate_json(json)\n",
+    "        print(f\"correctly parsed {person=}\")\n",
    "    except Exception as e:\n",
+    "        print(\"error!!\")\n",
    "        print(json)"
   ]
  },
@@ -277,16 +208,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "PersonBirthday(name='Jason Liu', age=30, birthday=datetime.date(2023, 11, 30))"
+       "PersonBirthday(name='Jason Liu', age=30, birthday=datetime.date(2023, 12, 19))"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -335,7 +266,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
@@ -349,7 +280,7 @@
       " 'type': 'object'}"
      ]
     },
-     "execution_count": 14,
+     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -367,7 +298,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
@@ -390,7 +321,7 @@
       " 'type': 'object'}"
      ]
     },
-     "execution_count": 15,
+     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -425,14 +356,14 @@
   "source": [
    "# The core idea around Instructor\n",
    "\n",
-    "1. Using function calling allows us to specify the schema we want\n",
-    "2. Pydantic can be used to define the schema and documentation AND validate the response at runtime\n",
+    "1. Using function calling allows us use a llm that is finetuned to use json_schema and output json.\n",
+    "2. Pydantic can be used to define the object, schema, and validation in one single class, allow us to encapsulate everything neatly\n",
    "3. As a library with 100M downloads, we can leverage pydantic to do all the heavy lifting for us and fit nicely with the python ecosystem\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
@@ -441,7 +372,7 @@
       "PersonAddress(name='Jason Liu', age=30, address=Address(address='123 Main St', city='San Francisco', state='CA'))"
      ]
     },
-     "execution_count": 16,
+     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -451,10 +382,10 @@
    "import datetime\n",
    "\n",
    "# patch the client to add `response_model` to the `create` method\n",
-    "client = instructor.patch(client)\n",
+    "client = instructor.patch(OpenAI(), mode=instructor.Mode.MD_JSON)\n",
    "\n",
    "resp = client.chat.completions.create(\n",
-    "    model=\"gpt-3.5-turbo\",\n",
+    "    model=\"gpt-3.5-turbo-1106\",\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"user\",\n",
@@ -484,7 +415,7 @@
   "source": [
    "## Is instructor the only way to do this?\n",
    "\n",
-    "No. Libraries like Marvin, Langchain, and Llamaindex all now leverage the pydantic object in similar ways however they all have different approaches to how they do it. With instructor the goal is to be as light weight as possible, get you as close as possible to the openai api, and then get out of your way.\n",
+    "No. Libraries like Marvin, Langchain, and Llamaindex all now leverage the Pydantic object in similar ways. The goal is to be as light weight as possible, get you as close as possible to the openai api, and then get out of your way.\n",
    "\n",
    "More importantly, we've also added straight forward validation and reasking to the mix.\n",
    "\n",
@@ -496,6 +427,13 @@
    "- [Langchain](https://python.langchain.com/docs/modules/model_io/output_parsers/pydantic)\n",
    "- [LlamaIndex](https://gpt-index.readthedocs.io/en/latest/examples/output_parsing/openai_pydantic_program.html)\n"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
@@ -74,7 +74,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -107,7 +107,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -120,46 +120,39 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'hypothetical_questions': ['What is the basic concept behind Simple RAG?',\n",
-      "                            'Why might Simple RAG not perform well with '\n",
-      "                            'complex queries?',\n",
-      "                            \"In what ways does Simple RAG's embedding search \"\n",
-      "                            'fall short?'],\n",
-      " 'keywords': ['Simple RAG',\n",
-      "              'Retriever-Augmented Generation',\n",
-      "              'user queries',\n",
-      "              'embedding search',\n",
-      "              'vector database',\n",
-      "              'query-document mismatch'],\n",
-      " 'summary': 'Simple RAG is an implementation that embeds user queries for a '\n",
-      "            'single embedding search in a vector database. Although '\n",
-      "            'straightforward, it struggles with complex queries and varied '\n",
-      "            'data sources because of its basic framework and query-document '\n",
-      "            'mismatch issues.',\n",
-      " 'topic': 'Simple Retriever-Augmented Generation (RAG)'}\n",
-      "{'hypothetical_questions': ['What kind of limitations does Simple RAG face?',\n",
-      "                            'How does a monolithic search backend affect '\n",
-      "                            \"Simple RAG's performance?\",\n",
-      "                            'Can Simple RAG handle complex, context-specific '\n",
-      "                            'queries effectively?'],\n",
-      " 'keywords': ['limitations',\n",
-      "              'Simple RAG',\n",
-      "              'query-document mismatch',\n",
+      "{'hypothetical_questions': [],\n",
+      " 'keywords': ['RAG',\n",
+      "              'Retrieval-Augmented Generation',\n",
+      "              'embedding',\n",
+      "              'query',\n",
+      "              'vector database'],\n",
+      " 'summary': 'The simplest implementation of RAG (Retrieval-Augmented '\n",
+      "            'Generation) involves embedding a user query and conducting a '\n",
+      "            'single embedding search in a vector database, often containing a '\n",
+      "            'vector store of Wikipedia articles. This approach is intended to '\n",
+      "            'retrieve relevant documents to aid in generating responses.',\n",
+      " 'topic': 'Simple RAG Description'}\n",
+      "{'hypothetical_questions': ['What are the common challenges encountered with '\n",
+      "                            'the simple RAG approach?',\n",
+      "                            'How does the monolithic search backend limit the '\n",
+      "                            'simple RAG?'],\n",
+      " 'keywords': ['query-document mismatch',\n",
      "              'monolithic search backend',\n",
      "              'text search limitations',\n",
      "              'limited planning ability'],\n",
-      " 'summary': 'The limitations of Simple RAG include query-document mismatch, '\n",
-      "            'reliance on a monolithic search backend, restrictions to simple '\n",
-      "            'text searches, and limited planning ability, which results in '\n",
-      "            'suboptimal outcomes when handling nuanced or context-specific '\n",
-      "            'queries.',\n",
+      " 'summary': 'Simple RAG has several limitations including query-document '\n",
+      "            'mismatch where embeddings may not align, reliance on a monolithic '\n",
+      "            'search backend which reduces flexibility, text search limitations '\n",
+      "            \"that can't grasp nuances of complex queries, and limited planning \"\n",
+      "            'ability that fails to consider additional contextual information '\n",
+      "            'for refining search results.',\n",
      " 'topic': 'Limitations of Simple RAG'}\n"
     ]
    }
@@ -227,7 +220,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -260,40 +253,38 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\n",
-      "    \"rewritten_query\": \"recent developments in AI\",\n",
-      "    \"published_daterange\": {\n",
-      "        \"start\": \"2023-01-01\",\n",
-      "        \"end\": \"2023-11-30\"\n",
-      "    }\n",
-      "}\n"
-     ]
+     "data": {
+      "text/plain": [
+       "Query(rewritten_query='recent developments in AI', published_daterange=DateRange(start=datetime.date(2023, 1, 1), end=datetime.date(2023, 12, 20)))"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
    }
   ],
   "source": [
-    "query = client.chat.completions.create(\n",
-    "    model=\"gpt-3.5-turbo\",\n",
-    "    response_model=Query,\n",
-    "    messages=[\n",
-    "        {\n",
-    "            \"role\": \"system\", \n",
-    "            \"content\": f\"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...\"\n",
-    "        },\n",
-    "        {\n",
-    "            \"role\": \"user\", \n",
-    "            \"content\": \"query: What are some recent developments in AI?\"\n",
-    "        }\n",
-    "    ],\n",
-    ")\n",
+    "def expand_query(q) -> Query:\n",
+    "    return client.chat.completions.create(\n",
+    "        model=\"gpt-3.5-turbo\",\n",
+    "        response_model=Query,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"system\", \n",
+    "                \"content\": f\"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...\"\n",
+    "            },\n",
+    "            {\n",
+    "                \"role\": \"user\", \n",
+    "                \"content\": f\"query: {q}\"\n",
+    "            }\n",
+    "        ],\n",
+    "    )\n",
    "\n",
-    "print(query.model_dump_json(indent=4)) # Printing the Json dump of the model"
+    "expand_query(\"What are some recent developments in AI?\")"
   ]
  },
  {
@@ -305,22 +296,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\n",
-      "    \"rewritten_query\": \"recent developments in artificial intelligence\",\n",
-      "    \"published_daterange\": {\n",
-      "        \"chain_of_thought\": \"Given that it's currently late 2023, a recent timeframe would ideally be within the last year to ensure the developments are current. Therefore, a suitable date range for recent AI developments could start from late 2022 to the present date in 2023.\",\n",
-      "        \"start\": \"2022-11-18\",\n",
-      "        \"end\": \"2023-11-18\"\n",
-      "    }\n",
-      "}\n"
-     ]
+     "data": {
+      "text/plain": [
+       "Query(rewritten_query='latest advancements in artificial intelligence', published_daterange=DateRange(chain_of_thought=\"Given that 'recent' suggests developments within the last few months to a year, the best time range to search would be from the beginning of the current year to the current date. This will ensure that the developments are timely and up-to-date.\", start=datetime.date(2023, 1, 1), end=datetime.date(2023, 12, 20)))"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
    }
   ],
   "source": [
@@ -336,22 +323,23 @@
    "    published_daterange: DateRange\n",
    "\n",
    "\n",
-    "query = client.chat.completions.create(\n",
-    "    model=\"gpt-4-1106-preview\",\n",
-    "    response_model=Query,\n",
-    "    messages=[\n",
-    "        {\n",
-    "            \"role\": \"system\", \n",
-    "            \"content\": f\"You're a query understanding system for a search engine. Today is {date.today()}.\"\n",
-    "        },\n",
-    "        {\n",
-    "            \"role\": \"user\", \n",
-    "            \"content\": \"What are some recent developments in AI?\"\n",
-    "        }\n",
-    "    ],\n",
-    ")\n",
+    "def expand_query(q) -> Query:\n",
+    "    return client.chat.completions.create(\n",
+    "        model=\"gpt-4-1106-preview\",\n",
+    "        response_model=Query,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"system\", \n",
+    "                \"content\": f\"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...\"\n",
+    "            },\n",
+    "            {\n",
+    "                \"role\": \"user\", \n",
+    "                \"content\": f\"query: {q}\"\n",
+    "            }\n",
+    "        ],\n",
+    "    )\n",
    "\n",
-    "print(query.model_dump_json(indent=4)) # Printing the Json dump of the model"
+    "expand_query(\"What are some recent developments in AI?\")"
   ]
  },
  {