diff --git a/tutorials/1.introduction.ipynb b/tutorials/1.introduction.ipynb
index c4e3baf..d178bea 100644
--- a/tutorials/1.introduction.ipynb
+++ b/tutorials/1.introduction.ipynb
@@ -6,11 +6,11 @@
"source": [
"# Thinking with Types: Whats the problem?\n",
"\n",
- "If you seen my [talk](https://www.youtube.com/watch?v=yj-wSRJwrrc&t=1s) on this topic, you can skip this chapter.\n",
+ "If you've seen my [talk](https://www.youtube.com/watch?v=yj-wSRJwrrc&t=1s) on this topic, you can skip this chapter.\n",
"\n",
"Many times, when we want to use language models, its not to make chatbots, but to communicate with other computer systems. This commonly means we want to use a model to output structured data like JSON. However, working with raw json or dictionaries can be a pain. \n",
"\n",
- "In this section will go over introducing Pydantic as a tool we can leverage in our day to day programming, and then later use openai function calling to extract some simple data out of a string. Which will lay the ground work for introducing my library Instructor."
+ "This notebook highlights the core concepts of Pydantic and open ai function calling. With a foundational understanding of these two libraries we can lay the ground work for introducing my library, Instructor."
]
},
{
@@ -24,7 +24,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -43,7 +43,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -62,7 +62,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
- "\u001b[1;32m/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb Cell 5\u001b[0m line \u001b[0;36m5\n\u001b[1;32m 3\u001b[0m age \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mage\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mname\u001b[39m}\u001b[39;00m\u001b[39m is \u001b[39m\u001b[39m{\u001b[39;00mage\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 5\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNext year he will be \u001b[39m\u001b[39m{\u001b[39;00mage\u001b[39m+\u001b[39;49m\u001b[39m1\u001b[39;49m\u001b[39m}\u001b[39;00m\u001b[39m years old\u001b[39m\u001b[39m\"\u001b[39m)\n",
+ "\u001b[1;32m/home/m/cookbook/instructor/tutorials/1.introduction.ipynb Cell 5\u001b[0m line \u001b[0;36m5\n\u001b[1;32m 3\u001b[0m age \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mage\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mname\u001b[39m}\u001b[39;00m\u001b[39m is \u001b[39m\u001b[39m{\u001b[39;00mage\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 5\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNext year he will be \u001b[39m\u001b[39m{\u001b[39;00mage\u001b[39m+\u001b[39;49m\u001b[39m1\u001b[39;49m\u001b[39m}\u001b[39;00m\u001b[39m years old\u001b[39m\u001b[39m\"\u001b[39m)\n",
"\u001b[0;31mTypeError\u001b[0m: can only concatenate str (not \"int\") to str"
]
}
@@ -93,7 +93,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -102,7 +102,7 @@
"Person(name='Sam', age=30)"
]
},
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -121,7 +121,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -143,7 +143,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -153,7 +153,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
- "\u001b[1;32m/Users/jasonliu/dev/instructor/tutorials/1.introduction.ipynb Cell 10\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 1\u001b[0m \u001b[39massert\u001b[39;00m person\u001b[39m.\u001b[39mname \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mSam\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m \u001b[39massert\u001b[39;00m person\u001b[39m.\u001b[39mage \u001b[39m==\u001b[39m \u001b[39m20\u001b[39m\n",
+ "\u001b[1;32m/home/m/cookbook/instructor/tutorials/1.introduction.ipynb Cell 10\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 1\u001b[0m \u001b[39massert\u001b[39;00m person\u001b[39m.\u001b[39mname \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mSam\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m \u001b[39massert\u001b[39;00m person\u001b[39m.\u001b[39mage \u001b[39m==\u001b[39m \u001b[39m20\u001b[39m\n",
"\u001b[0;31mAssertionError\u001b[0m: "
]
}
@@ -165,7 +165,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -203,7 +203,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -212,7 +212,7 @@
"Person(name='Jason', age=25)"
]
},
- "execution_count": 7,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -234,7 +234,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -263,21 +263,33 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "But what happens if I want describe specifically how the schema should look? what if i want full_name and age and birthday as a datetime?"
+ "But what happens if I want to describe specifically how the schema should look? What if I want full_name and age and birthday as a datetime?"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 11,
"metadata": {},
"outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\n",
+ " \"name\": \"Jason Liu\",\n",
+ " \"age\": 30,\n",
+ " \"birthday\": \"2023-11-17\"\n",
+ "}\n",
+ "name='Jason Liu' age=30\n"
+ ]
+ },
{
"data": {
"text/plain": [
- "Person(name='Jason Liu', age=30)"
+ "PersonBirthday(name='Jason Liu', age=30, birthday=datetime.date(2023, 11, 17))"
]
},
- "execution_count": 9,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -292,11 +304,13 @@
"resp = client.chat.completions.create(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[\n",
- " {\"role\": \"user\", \"content\": f\"Extract `Jason Liu is thirty years old his birthday is yesturday` into json today is {datetime.date.today()}\"},\n",
+ " {\"role\": \"user\", \"content\": f\"Extract `Jason Liu is thirty years old his birthday is yesterday` into json. Today is {datetime.date.today()}\"},\n",
" ]\n",
")\n",
"\n",
- "Person.model_validate_json(resp.choices[0].message.content)"
+ "print(resp.choices[0].message.content)\n",
+ "print(Person.model_validate_json(resp.choices[0].message.content))\n",
+ "PersonBirthday.model_validate_json(resp.choices[0].message.content)"
]
},
{
@@ -305,7 +319,7 @@
"source": [
"## Introduction to Function Calling \n",
"\n",
- "The json could be anything! we could add more and more into a prompt and hope it works, or we can use something called function calling to directly specify the schema we want. \n",
+ "The json could be anything! We could add more and more into a prompt and hope it works, or we can use something called function calling to directly specify the schema we want. \n",
"\n",
"\n",
"**Function Calling**\n",
@@ -315,7 +329,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -365,9 +379,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'properties': {'name': {'title': 'Name', 'type': 'string'},\n",
+ " 'age': {'title': 'Age', 'type': 'integer'},\n",
+ " 'birthday': {'format': 'date', 'title': 'Birthday', 'type': 'string'}},\n",
+ " 'required': ['name', 'age', 'birthday'],\n",
+ " 'title': 'PersonBirthday',\n",
+ " 'type': 'object'}"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"PersonBirthday.model_json_schema()"
]
@@ -381,9 +411,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'$defs': {'Address': {'properties': {'address': {'description': 'Full street address',\n",
+ " 'title': 'Address',\n",
+ " 'type': 'string'},\n",
+ " 'city': {'title': 'City', 'type': 'string'},\n",
+ " 'state': {'title': 'State', 'type': 'string'}},\n",
+ " 'required': ['address', 'city', 'state'],\n",
+ " 'title': 'Address',\n",
+ " 'type': 'object'}},\n",
+ " 'description': 'A Person with an address',\n",
+ " 'properties': {'name': {'title': 'Name', 'type': 'string'},\n",
+ " 'age': {'title': 'Age', 'type': 'integer'},\n",
+ " 'address': {'$ref': '#/$defs/Address'}},\n",
+ " 'required': ['name', 'age', 'address'],\n",
+ " 'title': 'PersonAddress',\n",
+ " 'type': 'object'}"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"class Address(BaseModel):\n",
" address: str = Field(description=\"Full street address\")\n",
@@ -409,9 +464,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PersonAddress(name='Jason Liu', age=30, address=Address(address='123 Main St', city='San Francisco', state='CA'))"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import instructor\n",
"\n",
@@ -446,7 +512,7 @@
"source": [
"## Is instructor the only way to do this?\n",
"\n",
- "No. Libraries like Marvin, Langchain, and LLamaindex all now leverage the pydantic object in similar ways however they all have different approaches to how they do it. With instructor the goal is to be as light weight as spossible, get you as close as possible to the openai api, and then get out of your way.\n",
+ "No. Libraries like Marvin, Langchain, and LLamaindex all now leverage the pydantic object in similar ways however they all have different approaches to how they do it. With instructor the goal is to be as light weight as possible, get you as close as possible to the openai api, and then get out of your way. \n",
"\n",
"More importantly, we've also added straight forward validation and reasking to the mix.\n",
"\n",
@@ -470,7 +536,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.6"
+ "version": "3.10.12"
}
},
"nbformat": 4,