clean up notebooks

2026-06-05 22:50:18 +00:00 · 2023-12-22 15:23:52 -05:00
parent c2fc90265f
commit feb2a532ca
4 changed files with 180 additions and 193 deletions
@@ -162,3 +162,8 @@ cython_debug/
 examples/citation_with_extraction/fly.toml
 my_cache_directory/
 tutorials/wandb/*
+tutorials/results.csv
+tutorials/results.jsonl
+tutorials/results.jsonlines
+tutorials/schema.json
+wandb/settings
@@ -74,7 +74,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -107,7 +107,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -125,75 +125,43 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'hypothetical_questions': ['How does a simple RAG Model work?',\n",
-      "                            'What are the key challenges faced by simple RAG '\n",
-      "                            'systems?',\n",
-      "                            'Can a simple RAG model handle complex queries '\n",
-      "                            'effectively?'],\n",
+      "{'hypothetical_questions': ['What is the most basic implementation of '\n",
+      "                            'Retrieval-Augmented Generation?',\n",
+      "                            'Why might simple RAG not be adequate for complex '\n",
+      "                            'user queries?'],\n",
      " 'keywords': ['Retrieval-Augmented Generation',\n",
      "              'RAG',\n",
-      "              'user query',\n",
      "              'vector database',\n",
-      "              'embeddings',\n",
-      "              'search limitations'],\n",
-      " 'summary': 'The simple Retrieval-Augmented Generation (RAG) model is an '\n",
-      "            'approach that uses a vector database to embed and search for user '\n",
-      "            'queries, such as Wikipedia articles. It provides answers by '\n",
-      "            'aligning query and document embeddings. However, it has several '\n",
-      "            'limitations, including query-document mismatch, reliance on a '\n",
-      "            'monolithic search backend, text search limitations, and a limited '\n",
-      "            'planning ability.',\n",
-      " 'topic': 'Simple RAG'}\n",
-      "{'hypothetical_questions': ['What is a query-document mismatch in the context '\n",
-      "                            'of RAG models?',\n",
-      "                            'Why might a simple RAG model struggle with '\n",
-      "                            'specific user queries?'],\n",
-      " 'keywords': ['query-document mismatch',\n",
-      "              'RAG limitation',\n",
-      "              'embedding alignment'],\n",
-      " 'summary': \"A limitation where the simple RAG system's query and document \"\n",
-      "            'embeddings may not align properly, leading to ineffective '\n",
-      "            \"retrieval of information specific to a user's query about, for \"\n",
-      "            \"example, 'climate change effects on marine life'.\",\n",
-      " 'topic': 'Query-Document Mismatch'}\n",
-      "{'hypothetical_questions': ['Why is depending on a monolithic search backend a '\n",
-      "                            'limitation for simple RAG?',\n",
-      "                            'How can a monolithic search backend affect the '\n",
-      "                            'quality of search results?'],\n",
-      " 'keywords': ['monolithic search backend', 'RAG system', 'data sources'],\n",
-      " 'summary': 'In simple RAG, the reliance on a single search method and backend '\n",
-      "            \"can limit the system's ability to access diverse or specialized \"\n",
-      "            \"data sources, such as when searching for 'latest research in \"\n",
-      "            \"quantum computing'.\",\n",
-      " 'topic': 'Monolithic Search Backend'}\n",
-      "{'hypothetical_questions': ['How do text search limitations impact the '\n",
-      "                            'effectiveness of simple RAG?',\n",
-      "                            'Can simple RAG models understand the context of '\n",
-      "                            'search terms?'],\n",
-      " 'keywords': ['text search limitations', 'RAG', 'advanced search'],\n",
-      " 'summary': 'The simple RAG model is limited to straightforward text queries '\n",
-      "            'without advanced search capabilities, failing to resolve nuanced '\n",
-      "            \"queries like 'what problems did we fix last week' due to the \"\n",
-      "            \"presence of generic terms such as 'problem' and 'last week' \"\n",
-      "            'throughout documents.',\n",
-      " 'topic': 'Text Search Limitations'}\n",
-      "{'hypothetical_questions': ['What does limited planning ability imply for a '\n",
-      "                            \"RAG model's search results?\",\n",
-      "                            'Can simple RAG models provide context-specific '\n",
-      "                            'information effectively?'],\n",
-      " 'keywords': ['limited planning ability', 'contextual information', 'RAG'],\n",
-      " 'summary': 'Simple RAG models struggle to incorporate additional context in '\n",
-      "            'their searches, which may result in less relevant or overly '\n",
-      "            'general responses to queries that require specific insights, like '\n",
-      "            \"'Tips for first-time Europe travelers'.\",\n",
-      " 'topic': 'Limited Planning Ability'}\n"
+      "              'simple implementation'],\n",
+      " 'summary': 'The simplest form of RAG involves embedding a user query and '\n",
+      "            'performing a single search in a vector database, such as a '\n",
+      "            'Wikipedia article store. This method, however, often fails with '\n",
+      "            'complex queries and diverse data sources.',\n",
+      " 'topic': 'Simple Retrieval-Augmented Generation (RAG)'}\n",
+      "{'hypothetical_questions': ['What are the main drawbacks of the simple RAG '\n",
+      "                            'model?',\n",
+      "                            'How does Query-Document Mismatch affect search '\n",
+      "                            'results in simple RAG?',\n",
+      "                            'Why is relying on a monolithic search backend '\n",
+      "                            'problematic for RAG?'],\n",
+      " 'keywords': ['limitations',\n",
+      "              'query-document mismatch',\n",
+      "              'monolithic search backend',\n",
+      "              'text search limitations',\n",
+      "              'limited planning ability'],\n",
+      " 'summary': 'The limitations of simple RAG include the Query-Document Mismatch '\n",
+      "            'which assumes a perfect alignment of embeddings, the reliance on '\n",
+      "            'a Monolithic Search Backend which limits flexibility, Text Search '\n",
+      "            'Limitations that impede nuanced search, and a Limited Planning '\n",
+      "            'Ability that overlooks additional context for refining results.',\n",
+      " 'topic': 'Limitations of Simple RAG'}\n"
     ]
    }
   ],
@@ -261,7 +229,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -296,16 +264,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "Query(rewritten_query='recent developments in AI', published_daterange=DateRange(start=datetime.date(2023, 1, 1), end=datetime.date(2023, 12, 21)))"
+       "Query(rewritten_query='recent developments in AI', published_daterange=DateRange(start=datetime.date(2023, 1, 1), end=datetime.date(2023, 12, 22)))"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -325,7 +293,8 @@
    "    )\n",
    "\n",
    "\n",
-    "expand_query(\"What are some recent developments in AI?\")"
+    "query = expand_query(\"What are some recent developments in AI?\")\n",
+    "query"
   ]
  },
  {
@@ -398,55 +367,71 @@
    "\n",
    "1. Save input and output pairs for later\n",
    "2. Save the JSON schema for the response_model\n",
-    "3. Having snapshots of the model and data allow us to compare results over time, and as we make changes to the model we can see how the results change.\n"
+    "3. Having snapshots of the model and data allow us to compare results over time, and as we make changes to the model we can see how the results change.\n",
+    "\n",
+    "This is particularly useful when we might want to blend a mix of synthetic and real data to evaluate our model. We can use the `wandb` library to track our experiments and save the results to a dashboard.\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "def flatten_dict(d, parent_key=\"\", sep=\"_\"):\n",
-    "    \"\"\"\n",
-    "    Flatten a nested dictionary.\n",
-    "\n",
-    "    :param d: The nested dictionary to flatten.\n",
-    "    :param parent_key: The base key to use for the flattened keys.\n",
-    "    :param sep: Separator to use between keys.\n",
-    "    :return: A flattened dictionary.\n",
-    "    \"\"\"\n",
-    "    items = []\n",
-    "    for k, v in d.items():\n",
-    "        new_key = f\"{parent_key}{sep}{k}\" if parent_key else k\n",
-    "        if isinstance(v, dict):\n",
-    "            items.extend(flatten_dict(v, new_key, sep=sep).items())\n",
-    "        else:\n",
-    "            items.append((new_key, v))\n",
-    "    return dict(items)\n",
-    "\n",
-    "\n",
-    "def dicts_to_df(list_of_dicts):\n",
-    "    \"\"\"\n",
-    "    Convert a list of dictionaries to a pandas DataFrame.\n",
-    "\n",
-    "    :param list_of_dicts: List of dictionaries, potentially nested.\n",
-    "    :return: A pandas DataFrame representing the flattened data.\n",
-    "    \"\"\"\n",
-    "    # Flatten each dictionary and create a DataFrame\n",
-    "    flattened_data = [flatten_dict(d) for d in list_of_dicts]\n",
-    "    return pd.DataFrame(flattened_data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import wandb\n",
+    "from helpers import dicts_to_df\n",
+    "\n",
+    "\n",
+    "class DateRange(BaseModel):\n",
+    "    chain_of_thought: str = Field(\n",
+    "        description=\"Think step by step to plan what is the best time range to search in\"\n",
+    "    )\n",
+    "    start: date\n",
+    "    end: date\n",
+    "\n",
+    "\n",
+    "class Query(BaseModel):\n",
+    "    rewritten_query: str = Field(\n",
+    "        description=\"Rewrite the query to make it more specific\"\n",
+    "    )\n",
+    "    published_daterange: DateRange = Field(\n",
+    "        description=\"Effective date range to search in\"\n",
+    "    )\n",
+    "\n",
+    "    def report(self):\n",
+    "        dct = self.model_dump()\n",
+    "        dct[\"usage\"] = self._raw_response.usage.model_dump()\n",
+    "        return dct\n",
+    "\n",
+    "\n",
+    "from openai import AsyncOpenAI\n",
+    "\n",
+    "# We'll use a different client for async calls\n",
+    "# To highlight the difference and how we can use both\n",
+    "aclient = instructor.patch(AsyncOpenAI())\n",
+    "\n",
+    "\n",
+    "async def expand_query(q) -> Query:\n",
+    "    return await aclient.chat.completions.create(\n",
+    "        model=\"gpt-4-1106-preview\",\n",
+    "        response_model=Query,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"system\",\n",
+    "                \"content\": f\"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...\",\n",
+    "            },\n",
+    "            {\"role\": \"user\", \"content\": f\"query: {q}\"},\n",
+    "        ],\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
   "outputs": [
    {
     "data": {
@@ -476,7 +461,7 @@
    {
     "data": {
      "text/html": [
-       "Run data is saved locally in <code>/Users/jasonliu/dev/instructor/tutorials/wandb/run-20231221_153734-idscpy5k</code>"
+       "Run data is saved locally in <code>/Users/jasonliu/dev/instructor/tutorials/wandb/run-20231222_152028-opuq58lr</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@@ -488,7 +473,7 @@
    {
     "data": {
      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/instructor/query-understanding/runs/idscpy5k' target=\"_blank\">easy-feather-16</a></strong> to <a href='https://wandb.ai/instructor/query-understanding' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/instructor/query-understanding/runs/opuq58lr' target=\"_blank\">major-firebrand-21</a></strong> to <a href='https://wandb.ai/instructor/query-understanding' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@@ -512,7 +497,7 @@
    {
     "data": {
      "text/html": [
-       " View run at <a href='https://wandb.ai/instructor/query-understanding/runs/idscpy5k' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/idscpy5k</a>"
+       " View run at <a href='https://wandb.ai/instructor/query-understanding/runs/opuq58lr' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/opuq58lr</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@@ -525,18 +510,39 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
+      "Retrying, exception: 1 validation error for Query\n",
+      "rewritten_query\n",
+      "  Field required [type=missing, input_value={'rewitten_query': 'recen...', 'end': '2023-12-22'}}, input_type=dict]\n",
+      "    For further information visit https://errors.pydantic.dev/2.5/v/missing\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/jasonliu/dev/instructor/instructor/patch.py\", line 231, in retry_async\n",
+      "    return await process_response_async(\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/Users/jasonliu/dev/instructor/instructor/patch.py\", line 201, in process_response_async\n",
+      "    model = await response_model.from_response_async(\n",
+      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/Users/jasonliu/dev/instructor/instructor/function_calls.py\", line 198, in from_response_async\n",
+      "    return cls.model_validate_json(\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/Users/jasonliu/dev/instructor/.venv/lib/python3.11/site-packages/pydantic/main.py\", line 532, in model_validate_json\n",
+      "    return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "pydantic_core._pydantic_core.ValidationError: 1 validation error for Query\n",
+      "rewritten_query\n",
+      "  Field required [type=missing, input_value={'rewitten_query': 'recen...', 'end': '2023-12-22'}}, input_type=dict]\n",
+      "    For further information visit https://errors.pydantic.dev/2.5/v/missing\n",
      "wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6440cf236ba24c3b839d1256cfada604",
+       "model_id": "96b112129c944465a35156a6ffbdfe54",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
-       "VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…"
+       "VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…"
      ]
     },
     "metadata": {},
@@ -545,7 +551,7 @@
    {
     "data": {
      "text/html": [
-       "W&B sync reduced upload amount by 9.0%             "
+       "W&B sync reduced upload amount by 7.9%             "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@@ -557,7 +563,7 @@
    {
     "data": {
      "text/html": [
-       " View run <strong style=\"color:#cdcd00\">easy-feather-16</strong> at: <a href='https://wandb.ai/instructor/query-understanding/runs/idscpy5k' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/idscpy5k</a><br/>Synced 4 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">major-firebrand-21</strong> at: <a href='https://wandb.ai/instructor/query-understanding/runs/opuq58lr' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/opuq58lr</a><br/>Synced 5 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@@ -569,7 +575,7 @@
    {
     "data": {
      "text/html": [
-       "Find logs at: <code>./wandb/run-20231221_153734-idscpy5k/logs</code>"
+       "Find logs at: <code>./wandb/run-20231222_152028-opuq58lr/logs</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@@ -580,39 +586,7 @@
    }
   ],
   "source": [
-    "import json\n",
-    "import wandb\n",
-    "\n",
-    "\n",
-    "class DateRange(BaseModel):\n",
-    "    chain_of_thought: str = Field(\n",
-    "        description=\"Think step by step to plan what is the best time range to search in\"\n",
-    "    )\n",
-    "    start: date\n",
-    "    end: date\n",
-    "\n",
-    "\n",
-    "class Query(BaseModel):\n",
-    "    rewritten_query: str = Field(\n",
-    "        description=\"Rewrite the query to make it more specific\"\n",
-    "    )\n",
-    "    published_daterange: DateRange = Field(\n",
-    "        description=\"Effective date range to search in\"\n",
-    "    )\n",
-    "\n",
-    "\n",
-    "def expand_query(q) -> Query:\n",
-    "    return client.chat.completions.create(\n",
-    "        model=\"gpt-4-1106-preview\",\n",
-    "        response_model=Query,\n",
-    "        messages=[\n",
-    "            {\n",
-    "                \"role\": \"system\",\n",
-    "                \"content\": f\"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...\",\n",
-    "            },\n",
-    "            {\"role\": \"user\", \"content\": f\"query: {q}\"},\n",
-    "        ],\n",
-    "    )\n",
+    "import asyncio\n",
    "\n",
    "\n",
    "run = wandb.init(\n",
@@ -626,7 +600,7 @@
    "    \"biotechnology updates last 10 days\",\n",
    "]\n",
    "\n",
-    "queries = [expand_query(q) for q in test_queries]\n",
+    "queries = await asyncio.gather(*[expand_query(q) for q in test_queries])\n",
    "\n",
    "with open(\"schema.json\", \"w+\") as f:\n",
    "    schema = Query.model_json_schema()\n",
@@ -636,7 +610,7 @@
    "    for query in queries:\n",
    "        f.write(query.model_dump_json() + \"\\n\")\n",
    "\n",
-    "df = dicts_to_df([q.model_dump() for q in queries])\n",
+    "df = dicts_to_df([q.report() for q in queries])\n",
    "df[\"input\"] = test_queries\n",
    "df.to_csv(\"results.csv\")\n",
    "\n",
@@ -652,13 +626,6 @@
    "run.finish()"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -674,7 +641,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -682,7 +649,7 @@
    "\n",
    "\n",
    "class SearchClient(BaseModel):\n",
-    "    query: str\n",
+    "    query: str = Field(description=\"The search query that will go into the search bar\")\n",
    "    keywords: List[str]\n",
    "    email: str\n",
    "    source: Literal[\"gmail\", \"calendar\"]\n",
@@ -706,43 +673,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 27,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\n",
-      "    \"queries\": [\n",
-      "        {\n",
-      "            \"query\": \"schedule\",\n",
-      "            \"keywords\": [\n",
-      "                \"appointments\",\n",
-      "                \"meetings\",\n",
-      "                \"schedule\",\n",
-      "                \"events\"\n",
-      "            ],\n",
-      "            \"email\": \"jason.assistant@busybot.com\",\n",
-      "            \"source\": \"calendar\",\n",
-      "            \"date_range\": {\n",
-      "                \"start\": \"2023-11-18\",\n",
-      "                \"end\": \"2023-11-18\"\n",
-      "            }\n",
-      "        }\n",
-      "    ]\n",
-      "}\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "retrival = client.chat.completions.create(\n",
-    "    model=\"gpt-4-1106-preview\",\n",
+    "    model=\"gpt-3.5-turbo\",\n",
    "    response_model=Retrival,\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"system\",\n",
-    "            \"content\": f\"You are Jason's personal assistant. Today is {date.today()}\",\n",
+    "            \"content\": f\"\"\"You are Jason's personal assistant.\n",
+    "                He has two emails jason@work.com jason@personal.com \n",
+    "                Today is {date.today()}\"\"\",\n",
    "        },\n",
    "        {\"role\": \"user\", \"content\": \"What do I have today?\"},\n",
    "    ],\n",
@@ -810,11 +753,13 @@
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"system\",\n",
-    "            \"content\": f\"You are Jason's personal assistant. Today is {date.today()}\",\n",
+    "            \"content\": f\"\"\"You are Jason's personal assistant.\n",
+    "                He has two emails jason@work.com jason@personal.com \n",
+    "                Today is {date.today()}\"\"\",\n",
    "        },\n",
    "        {\n",
    "            \"role\": \"user\",\n",
-    "            \"content\": \"What meetings do I have today and are there any important emails I should be aware of?\",\n",
+    "            \"content\": \"What meetings do I have today and are there any important emails I should be aware of\",\n",
    "        },\n",
    "    ],\n",
    ")\n",
@@ -0,0 +1,32 @@
+import pandas as pd
+
+
+def flatten_dict(d, parent_key="", sep="_"):
+    """
+    Flatten a nested dictionary.
+
+    :param d: The nested dictionary to flatten.
+    :param parent_key: The base key to use for the flattened keys.
+    :param sep: Separator to use between keys.
+    :return: A flattened dictionary.
+    """
+    items = []
+    for k, v in d.items():
+        new_key = f"{parent_key}{sep}{k}" if parent_key else k
+        if isinstance(v, dict):
+            items.extend(flatten_dict(v, new_key, sep=sep).items())
+        else:
+            items.append((new_key, v))
+    return dict(items)
+
+
+def dicts_to_df(list_of_dicts):
+    """
+    Convert a list of dictionaries to a pandas DataFrame.
+
+    :param list_of_dicts: List of dictionaries, potentially nested.
+    :return: A pandas DataFrame representing the flattened data.
+    """
+    # Flatten each dictionary and create a DataFrame
+    flattened_data = [flatten_dict(d) for d in list_of_dicts]
+    return pd.DataFrame(flattened_data)
@@ -0,0 +1,5 @@
+[default]
+entity = instructor
+project = query-understanding
+base_url = https://api.wandb.ai
+