mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
clean up notebooks
This commit is contained in:
@@ -162,3 +162,8 @@ cython_debug/
|
||||
examples/citation_with_extraction/fly.toml
|
||||
my_cache_directory/
|
||||
tutorials/wandb/*
|
||||
tutorials/results.csv
|
||||
tutorials/results.jsonl
|
||||
tutorials/results.jsonlines
|
||||
tutorials/schema.json
|
||||
wandb/settings
|
||||
|
||||
@@ -74,7 +74,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -107,7 +107,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -125,75 +125,43 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'hypothetical_questions': ['How does a simple RAG Model work?',\n",
|
||||
" 'What are the key challenges faced by simple RAG '\n",
|
||||
" 'systems?',\n",
|
||||
" 'Can a simple RAG model handle complex queries '\n",
|
||||
" 'effectively?'],\n",
|
||||
"{'hypothetical_questions': ['What is the most basic implementation of '\n",
|
||||
" 'Retrieval-Augmented Generation?',\n",
|
||||
" 'Why might simple RAG not be adequate for complex '\n",
|
||||
" 'user queries?'],\n",
|
||||
" 'keywords': ['Retrieval-Augmented Generation',\n",
|
||||
" 'RAG',\n",
|
||||
" 'user query',\n",
|
||||
" 'vector database',\n",
|
||||
" 'embeddings',\n",
|
||||
" 'search limitations'],\n",
|
||||
" 'summary': 'The simple Retrieval-Augmented Generation (RAG) model is an '\n",
|
||||
" 'approach that uses a vector database to embed and search for user '\n",
|
||||
" 'queries, such as Wikipedia articles. It provides answers by '\n",
|
||||
" 'aligning query and document embeddings. However, it has several '\n",
|
||||
" 'limitations, including query-document mismatch, reliance on a '\n",
|
||||
" 'monolithic search backend, text search limitations, and a limited '\n",
|
||||
" 'planning ability.',\n",
|
||||
" 'topic': 'Simple RAG'}\n",
|
||||
"{'hypothetical_questions': ['What is a query-document mismatch in the context '\n",
|
||||
" 'of RAG models?',\n",
|
||||
" 'Why might a simple RAG model struggle with '\n",
|
||||
" 'specific user queries?'],\n",
|
||||
" 'keywords': ['query-document mismatch',\n",
|
||||
" 'RAG limitation',\n",
|
||||
" 'embedding alignment'],\n",
|
||||
" 'summary': \"A limitation where the simple RAG system's query and document \"\n",
|
||||
" 'embeddings may not align properly, leading to ineffective '\n",
|
||||
" \"retrieval of information specific to a user's query about, for \"\n",
|
||||
" \"example, 'climate change effects on marine life'.\",\n",
|
||||
" 'topic': 'Query-Document Mismatch'}\n",
|
||||
"{'hypothetical_questions': ['Why is depending on a monolithic search backend a '\n",
|
||||
" 'limitation for simple RAG?',\n",
|
||||
" 'How can a monolithic search backend affect the '\n",
|
||||
" 'quality of search results?'],\n",
|
||||
" 'keywords': ['monolithic search backend', 'RAG system', 'data sources'],\n",
|
||||
" 'summary': 'In simple RAG, the reliance on a single search method and backend '\n",
|
||||
" \"can limit the system's ability to access diverse or specialized \"\n",
|
||||
" \"data sources, such as when searching for 'latest research in \"\n",
|
||||
" \"quantum computing'.\",\n",
|
||||
" 'topic': 'Monolithic Search Backend'}\n",
|
||||
"{'hypothetical_questions': ['How do text search limitations impact the '\n",
|
||||
" 'effectiveness of simple RAG?',\n",
|
||||
" 'Can simple RAG models understand the context of '\n",
|
||||
" 'search terms?'],\n",
|
||||
" 'keywords': ['text search limitations', 'RAG', 'advanced search'],\n",
|
||||
" 'summary': 'The simple RAG model is limited to straightforward text queries '\n",
|
||||
" 'without advanced search capabilities, failing to resolve nuanced '\n",
|
||||
" \"queries like 'what problems did we fix last week' due to the \"\n",
|
||||
" \"presence of generic terms such as 'problem' and 'last week' \"\n",
|
||||
" 'throughout documents.',\n",
|
||||
" 'topic': 'Text Search Limitations'}\n",
|
||||
"{'hypothetical_questions': ['What does limited planning ability imply for a '\n",
|
||||
" \"RAG model's search results?\",\n",
|
||||
" 'Can simple RAG models provide context-specific '\n",
|
||||
" 'information effectively?'],\n",
|
||||
" 'keywords': ['limited planning ability', 'contextual information', 'RAG'],\n",
|
||||
" 'summary': 'Simple RAG models struggle to incorporate additional context in '\n",
|
||||
" 'their searches, which may result in less relevant or overly '\n",
|
||||
" 'general responses to queries that require specific insights, like '\n",
|
||||
" \"'Tips for first-time Europe travelers'.\",\n",
|
||||
" 'topic': 'Limited Planning Ability'}\n"
|
||||
" 'simple implementation'],\n",
|
||||
" 'summary': 'The simplest form of RAG involves embedding a user query and '\n",
|
||||
" 'performing a single search in a vector database, such as a '\n",
|
||||
" 'Wikipedia article store. This method, however, often fails with '\n",
|
||||
" 'complex queries and diverse data sources.',\n",
|
||||
" 'topic': 'Simple Retrieval-Augmented Generation (RAG)'}\n",
|
||||
"{'hypothetical_questions': ['What are the main drawbacks of the simple RAG '\n",
|
||||
" 'model?',\n",
|
||||
" 'How does Query-Document Mismatch affect search '\n",
|
||||
" 'results in simple RAG?',\n",
|
||||
" 'Why is relying on a monolithic search backend '\n",
|
||||
" 'problematic for RAG?'],\n",
|
||||
" 'keywords': ['limitations',\n",
|
||||
" 'query-document mismatch',\n",
|
||||
" 'monolithic search backend',\n",
|
||||
" 'text search limitations',\n",
|
||||
" 'limited planning ability'],\n",
|
||||
" 'summary': 'The limitations of simple RAG include the Query-Document Mismatch '\n",
|
||||
" 'which assumes a perfect alignment of embeddings, the reliance on '\n",
|
||||
" 'a Monolithic Search Backend which limits flexibility, Text Search '\n",
|
||||
" 'Limitations that impede nuanced search, and a Limited Planning '\n",
|
||||
" 'Ability that overlooks additional context for refining results.',\n",
|
||||
" 'topic': 'Limitations of Simple RAG'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -261,7 +229,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -296,16 +264,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Query(rewritten_query='recent developments in AI', published_daterange=DateRange(start=datetime.date(2023, 1, 1), end=datetime.date(2023, 12, 21)))"
|
||||
"Query(rewritten_query='recent developments in AI', published_daterange=DateRange(start=datetime.date(2023, 1, 1), end=datetime.date(2023, 12, 22)))"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -325,7 +293,8 @@
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"expand_query(\"What are some recent developments in AI?\")"
|
||||
"query = expand_query(\"What are some recent developments in AI?\")\n",
|
||||
"query"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -398,55 +367,71 @@
|
||||
"\n",
|
||||
"1. Save input and output pairs for later\n",
|
||||
"2. Save the JSON schema for the response_model\n",
|
||||
"3. Having snapshots of the model and data allow us to compare results over time, and as we make changes to the model we can see how the results change.\n"
|
||||
"3. Having snapshots of the model and data allow us to compare results over time, and as we make changes to the model we can see how the results change.\n",
|
||||
"\n",
|
||||
"This is particularly useful when we might want to blend a mix of synthetic and real data to evaluate our model. We can use the `wandb` library to track our experiments and save the results to a dashboard.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def flatten_dict(d, parent_key=\"\", sep=\"_\"):\n",
|
||||
" \"\"\"\n",
|
||||
" Flatten a nested dictionary.\n",
|
||||
"\n",
|
||||
" :param d: The nested dictionary to flatten.\n",
|
||||
" :param parent_key: The base key to use for the flattened keys.\n",
|
||||
" :param sep: Separator to use between keys.\n",
|
||||
" :return: A flattened dictionary.\n",
|
||||
" \"\"\"\n",
|
||||
" items = []\n",
|
||||
" for k, v in d.items():\n",
|
||||
" new_key = f\"{parent_key}{sep}{k}\" if parent_key else k\n",
|
||||
" if isinstance(v, dict):\n",
|
||||
" items.extend(flatten_dict(v, new_key, sep=sep).items())\n",
|
||||
" else:\n",
|
||||
" items.append((new_key, v))\n",
|
||||
" return dict(items)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def dicts_to_df(list_of_dicts):\n",
|
||||
" \"\"\"\n",
|
||||
" Convert a list of dictionaries to a pandas DataFrame.\n",
|
||||
"\n",
|
||||
" :param list_of_dicts: List of dictionaries, potentially nested.\n",
|
||||
" :return: A pandas DataFrame representing the flattened data.\n",
|
||||
" \"\"\"\n",
|
||||
" # Flatten each dictionary and create a DataFrame\n",
|
||||
" flattened_data = [flatten_dict(d) for d in list_of_dicts]\n",
|
||||
" return pd.DataFrame(flattened_data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import wandb\n",
|
||||
"from helpers import dicts_to_df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class DateRange(BaseModel):\n",
|
||||
" chain_of_thought: str = Field(\n",
|
||||
" description=\"Think step by step to plan what is the best time range to search in\"\n",
|
||||
" )\n",
|
||||
" start: date\n",
|
||||
" end: date\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Query(BaseModel):\n",
|
||||
" rewritten_query: str = Field(\n",
|
||||
" description=\"Rewrite the query to make it more specific\"\n",
|
||||
" )\n",
|
||||
" published_daterange: DateRange = Field(\n",
|
||||
" description=\"Effective date range to search in\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def report(self):\n",
|
||||
" dct = self.model_dump()\n",
|
||||
" dct[\"usage\"] = self._raw_response.usage.model_dump()\n",
|
||||
" return dct\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from openai import AsyncOpenAI\n",
|
||||
"\n",
|
||||
"# We'll use a different client for async calls\n",
|
||||
"# To highlight the difference and how we can use both\n",
|
||||
"aclient = instructor.patch(AsyncOpenAI())\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def expand_query(q) -> Query:\n",
|
||||
" return await aclient.chat.completions.create(\n",
|
||||
" model=\"gpt-4-1106-preview\",\n",
|
||||
" response_model=Query,\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"system\",\n",
|
||||
" \"content\": f\"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...\",\n",
|
||||
" },\n",
|
||||
" {\"role\": \"user\", \"content\": f\"query: {q}\"},\n",
|
||||
" ],\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@@ -476,7 +461,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Run data is saved locally in <code>/Users/jasonliu/dev/instructor/tutorials/wandb/run-20231221_153734-idscpy5k</code>"
|
||||
"Run data is saved locally in <code>/Users/jasonliu/dev/instructor/tutorials/wandb/run-20231222_152028-opuq58lr</code>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -488,7 +473,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Syncing run <strong><a href='https://wandb.ai/instructor/query-understanding/runs/idscpy5k' target=\"_blank\">easy-feather-16</a></strong> to <a href='https://wandb.ai/instructor/query-understanding' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
|
||||
"Syncing run <strong><a href='https://wandb.ai/instructor/query-understanding/runs/opuq58lr' target=\"_blank\">major-firebrand-21</a></strong> to <a href='https://wandb.ai/instructor/query-understanding' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -512,7 +497,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
" View run at <a href='https://wandb.ai/instructor/query-understanding/runs/idscpy5k' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/idscpy5k</a>"
|
||||
" View run at <a href='https://wandb.ai/instructor/query-understanding/runs/opuq58lr' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/opuq58lr</a>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -525,18 +510,39 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Retrying, exception: 1 validation error for Query\n",
|
||||
"rewritten_query\n",
|
||||
" Field required [type=missing, input_value={'rewitten_query': 'recen...', 'end': '2023-12-22'}}, input_type=dict]\n",
|
||||
" For further information visit https://errors.pydantic.dev/2.5/v/missing\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/Users/jasonliu/dev/instructor/instructor/patch.py\", line 231, in retry_async\n",
|
||||
" return await process_response_async(\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/Users/jasonliu/dev/instructor/instructor/patch.py\", line 201, in process_response_async\n",
|
||||
" model = await response_model.from_response_async(\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/Users/jasonliu/dev/instructor/instructor/function_calls.py\", line 198, in from_response_async\n",
|
||||
" return cls.model_validate_json(\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
" File \"/Users/jasonliu/dev/instructor/.venv/lib/python3.11/site-packages/pydantic/main.py\", line 532, in model_validate_json\n",
|
||||
" return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)\n",
|
||||
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||
"pydantic_core._pydantic_core.ValidationError: 1 validation error for Query\n",
|
||||
"rewritten_query\n",
|
||||
" Field required [type=missing, input_value={'rewitten_query': 'recen...', 'end': '2023-12-22'}}, input_type=dict]\n",
|
||||
" For further information visit https://errors.pydantic.dev/2.5/v/missing\n",
|
||||
"wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "6440cf236ba24c3b839d1256cfada604",
|
||||
"model_id": "96b112129c944465a35156a6ffbdfe54",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…"
|
||||
"VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded (0.001 MB deduped)\\r'), FloatProgress(value=1.0, max…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
@@ -545,7 +551,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"W&B sync reduced upload amount by 9.0% "
|
||||
"W&B sync reduced upload amount by 7.9% "
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -557,7 +563,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
" View run <strong style=\"color:#cdcd00\">easy-feather-16</strong> at: <a href='https://wandb.ai/instructor/query-understanding/runs/idscpy5k' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/idscpy5k</a><br/>Synced 4 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)"
|
||||
" View run <strong style=\"color:#cdcd00\">major-firebrand-21</strong> at: <a href='https://wandb.ai/instructor/query-understanding/runs/opuq58lr' target=\"_blank\">https://wandb.ai/instructor/query-understanding/runs/opuq58lr</a><br/>Synced 5 W&B file(s), 1 media file(s), 4 artifact file(s) and 0 other file(s)"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -569,7 +575,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Find logs at: <code>./wandb/run-20231221_153734-idscpy5k/logs</code>"
|
||||
"Find logs at: <code>./wandb/run-20231222_152028-opuq58lr/logs</code>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
@@ -580,39 +586,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import wandb\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class DateRange(BaseModel):\n",
|
||||
" chain_of_thought: str = Field(\n",
|
||||
" description=\"Think step by step to plan what is the best time range to search in\"\n",
|
||||
" )\n",
|
||||
" start: date\n",
|
||||
" end: date\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Query(BaseModel):\n",
|
||||
" rewritten_query: str = Field(\n",
|
||||
" description=\"Rewrite the query to make it more specific\"\n",
|
||||
" )\n",
|
||||
" published_daterange: DateRange = Field(\n",
|
||||
" description=\"Effective date range to search in\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def expand_query(q) -> Query:\n",
|
||||
" return client.chat.completions.create(\n",
|
||||
" model=\"gpt-4-1106-preview\",\n",
|
||||
" response_model=Query,\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"system\",\n",
|
||||
" \"content\": f\"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...\",\n",
|
||||
" },\n",
|
||||
" {\"role\": \"user\", \"content\": f\"query: {q}\"},\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"run = wandb.init(\n",
|
||||
@@ -626,7 +600,7 @@
|
||||
" \"biotechnology updates last 10 days\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"queries = [expand_query(q) for q in test_queries]\n",
|
||||
"queries = await asyncio.gather(*[expand_query(q) for q in test_queries])\n",
|
||||
"\n",
|
||||
"with open(\"schema.json\", \"w+\") as f:\n",
|
||||
" schema = Query.model_json_schema()\n",
|
||||
@@ -636,7 +610,7 @@
|
||||
" for query in queries:\n",
|
||||
" f.write(query.model_dump_json() + \"\\n\")\n",
|
||||
"\n",
|
||||
"df = dicts_to_df([q.model_dump() for q in queries])\n",
|
||||
"df = dicts_to_df([q.report() for q in queries])\n",
|
||||
"df[\"input\"] = test_queries\n",
|
||||
"df.to_csv(\"results.csv\")\n",
|
||||
"\n",
|
||||
@@ -652,13 +626,6 @@
|
||||
"run.finish()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -674,7 +641,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -682,7 +649,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"class SearchClient(BaseModel):\n",
|
||||
" query: str\n",
|
||||
" query: str = Field(description=\"The search query that will go into the search bar\")\n",
|
||||
" keywords: List[str]\n",
|
||||
" email: str\n",
|
||||
" source: Literal[\"gmail\", \"calendar\"]\n",
|
||||
@@ -706,43 +673,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"queries\": [\n",
|
||||
" {\n",
|
||||
" \"query\": \"schedule\",\n",
|
||||
" \"keywords\": [\n",
|
||||
" \"appointments\",\n",
|
||||
" \"meetings\",\n",
|
||||
" \"schedule\",\n",
|
||||
" \"events\"\n",
|
||||
" ],\n",
|
||||
" \"email\": \"jason.assistant@busybot.com\",\n",
|
||||
" \"source\": \"calendar\",\n",
|
||||
" \"date_range\": {\n",
|
||||
" \"start\": \"2023-11-18\",\n",
|
||||
" \"end\": \"2023-11-18\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retrival = client.chat.completions.create(\n",
|
||||
" model=\"gpt-4-1106-preview\",\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
" response_model=Retrival,\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"system\",\n",
|
||||
" \"content\": f\"You are Jason's personal assistant. Today is {date.today()}\",\n",
|
||||
" \"content\": f\"\"\"You are Jason's personal assistant.\n",
|
||||
" He has two emails jason@work.com jason@personal.com \n",
|
||||
" Today is {date.today()}\"\"\",\n",
|
||||
" },\n",
|
||||
" {\"role\": \"user\", \"content\": \"What do I have today?\"},\n",
|
||||
" ],\n",
|
||||
@@ -810,11 +753,13 @@
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"system\",\n",
|
||||
" \"content\": f\"You are Jason's personal assistant. Today is {date.today()}\",\n",
|
||||
" \"content\": f\"\"\"You are Jason's personal assistant.\n",
|
||||
" He has two emails jason@work.com jason@personal.com \n",
|
||||
" Today is {date.today()}\"\"\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"What meetings do I have today and are there any important emails I should be aware of?\",\n",
|
||||
" \"content\": \"What meetings do I have today and are there any important emails I should be aware of\",\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def flatten_dict(d, parent_key="", sep="_"):
|
||||
"""
|
||||
Flatten a nested dictionary.
|
||||
|
||||
:param d: The nested dictionary to flatten.
|
||||
:param parent_key: The base key to use for the flattened keys.
|
||||
:param sep: Separator to use between keys.
|
||||
:return: A flattened dictionary.
|
||||
"""
|
||||
items = []
|
||||
for k, v in d.items():
|
||||
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
||||
if isinstance(v, dict):
|
||||
items.extend(flatten_dict(v, new_key, sep=sep).items())
|
||||
else:
|
||||
items.append((new_key, v))
|
||||
return dict(items)
|
||||
|
||||
|
||||
def dicts_to_df(list_of_dicts):
|
||||
"""
|
||||
Convert a list of dictionaries to a pandas DataFrame.
|
||||
|
||||
:param list_of_dicts: List of dictionaries, potentially nested.
|
||||
:return: A pandas DataFrame representing the flattened data.
|
||||
"""
|
||||
# Flatten each dictionary and create a DataFrame
|
||||
flattened_data = [flatten_dict(d) for d in list_of_dicts]
|
||||
return pd.DataFrame(flattened_data)
|
||||
@@ -0,0 +1,5 @@
|
||||
[default]
|
||||
entity = instructor
|
||||
project = query-understanding
|
||||
base_url = https://api.wandb.ai
|
||||
|
||||
Reference in New Issue
Block a user