From ff5e547ffd2b8a91a1fe20ec37532750749d3f3d Mon Sep 17 00:00:00 2001
From: Jason Liu <jason@jxnl.co>
Date: Tue, 6 Feb 2024 17:27:52 -0500
Subject: [PATCH] update everything

---
 docs/concepts/caching.md      |  4 ++--
 docs/concepts/lists.md        |  4 ++--
 docs/concepts/maybe.md        |  2 +-
 docs/concepts/parallel.md     |  2 +-
 docs/concepts/partial.md      | 24 ++++++++++++++++++++++++
 docs/concepts/raw_response.md |  4 ++--
 docs/concepts/usage.md        | 27 +++++++++++++++++++++++++++
 docs/index.md                 |  4 ++--
 mkdocs.yml                    |  1 +
 9 files changed, 62 insertions(+), 10 deletions(-)
 create mode 100644 docs/concepts/usage.md

diff --git a/docs/concepts/caching.md b/docs/concepts/caching.md
index 98b4a33..35c91d6 100644
--- a/docs/concepts/caching.md
+++ b/docs/concepts/caching.md
@@ -33,12 +33,12 @@ def extract(data) -> UserDetail:
 start = time.perf_counter()  # (1)
 model = extract("Extract jason is 25 years old")
 print(f"Time taken: {time.perf_counter() - start}")
-#> Time taken: 0.44677383406087756
+#> Time taken: 0.41433916706591845
 
 start = time.perf_counter()
 model = extract("Extract jason is 25 years old")  # (2)
 print(f"Time taken: {time.perf_counter() - start}")
-#> Time taken: 1.00000761449337e-06
+#> Time taken: 1.7080456018447876e-06
 ```
 
 1. Using `time.perf_counter()` to measure the time taken to run the function is better than using `time.time()` because it's more accurate and less susceptible to system clock changes.
diff --git a/docs/concepts/lists.md b/docs/concepts/lists.md
index 77275db..366d1e7 100644
--- a/docs/concepts/lists.md
+++ b/docs/concepts/lists.md
@@ -159,8 +159,8 @@ async def print_iterable_results():
     )
     async for m in model:
         print(m)
-        #> name='John Doe' age=30
-        #> name='Jane Doe' age=28
+        #> name='John Doe' age=32
+        #> name='Jane Smith' age=28
 
 
 import asyncio
diff --git a/docs/concepts/maybe.md b/docs/concepts/maybe.md
index 8b201ff..f25f48f 100644
--- a/docs/concepts/maybe.md
+++ b/docs/concepts/maybe.md
@@ -88,7 +88,7 @@ print(user2.model_dump_json(indent=2))
 """
 {
   "result": null,
-  "error": true,
+  "error": false,
   "message": "Unknown user"
 }
 """
diff --git a/docs/concepts/parallel.md b/docs/concepts/parallel.md
index 8afe1dc..4ff493d 100644
--- a/docs/concepts/parallel.md
+++ b/docs/concepts/parallel.md
@@ -46,7 +46,7 @@ for fc in function_calls:
     print(fc)
     #> location='Toronto' units='metric'
     #> location='Dallas' units='imperial'
-    #> query='Super Bowl winner'
+    #> query='super bowl winner'
 ```
 
 1. Set the mode to `PARALLEL_TOOLS` to enable parallel function calling.
diff --git a/docs/concepts/partial.md b/docs/concepts/partial.md
index b7afc03..8805c8e 100644
--- a/docs/concepts/partial.md
+++ b/docs/concepts/partial.md
@@ -216,6 +216,30 @@ async def print_partial_results():
           "age": 12
         }
         """
+        """
+        {
+          "name": "",
+          "age": null
+        }
+        """
+        """
+        {
+          "name": "Jason",
+          "age": null
+        }
+        """
+        """
+        {
+          "name": "Jason Liu",
+          "age": null
+        }
+        """
+        """
+        {
+          "name": "Jason Liu",
+          "age": 12
+        }
+        """
 
 
 import asyncio
diff --git a/docs/concepts/raw_response.md b/docs/concepts/raw_response.md
index 25b2148..13fa632 100644
--- a/docs/concepts/raw_response.md
+++ b/docs/concepts/raw_response.md
@@ -25,7 +25,7 @@ user: UserExtract = client.chat.completions.create(
 print(user._raw_response)
 """
 ChatCompletion(
-    id='chatcmpl-8p1AMuoml2bkdXK4284rbPmNRNNwF',
+    id='chatcmpl-8pOAsSOIHAmngMBBki3BLN3p552L0',
     choices=[
         Choice(
             finish_reason='stop',
@@ -42,7 +42,7 @@ ChatCompletion(
             ),
         )
     ],
-    created=1707169902,
+    created=1707258346,
     model='gpt-3.5-turbo-0613',
     object='chat.completion',
     system_fingerprint=None,
diff --git a/docs/concepts/usage.md b/docs/concepts/usage.md
new file mode 100644
index 0000000..c1d068d
--- /dev/null
+++ b/docs/concepts/usage.md
@@ -0,0 +1,27 @@
+The easiest way to get usage for non streaming requests is to access the raw response.
+
+```python
+import instructor
+
+from openai import OpenAI
+from pydantic import BaseModel
+
+client = instructor.patch(OpenAI())
+
+
+class UserExtract(BaseModel):
+    name: str
+    age: int
+
+
+user: UserExtract = client.chat.completions.create(
+    model="gpt-3.5-turbo",
+    response_model=UserExtract,
+    messages=[
+        {"role": "user", "content": "Extract jason is 25 years old"},
+    ],
+)
+
+print(user._raw_response.usage)
+#> CompletionUsage(completion_tokens=16, prompt_tokens=73, total_tokens=89)
+```
diff --git a/docs/index.md b/docs/index.md
index 3780a53..6b45735 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -120,7 +120,7 @@ print(response.model_dump_json(indent=2))
     print(user._raw_response.model_dump_json(indent=2))
     """
     {
-      "id": "chatcmpl-8p19pXZ5BIqGtaPhVLo184UuQNH3v",
+      "id": "chatcmpl-8pOAKwq8OXZVvOCMw4dv713oKplLF",
       "choices": [
         {
           "finish_reason": "stop",
@@ -137,7 +137,7 @@ print(response.model_dump_json(indent=2))
           }
         }
       ],
-      "created": 1707169869,
+      "created": 1707258312,
       "model": "gpt-3.5-turbo-0613",
       "object": "chat.completion",
       "system_fingerprint": null,
diff --git a/mkdocs.yml b/mkdocs.yml
index 7a2201e..822c2a6 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -132,6 +132,7 @@ nav:
     - Philosophy: 'concepts/philosophy.md'
     - Models: 'concepts/models.md'
     - Fields: 'concepts/fields.md'
+    - Usage Tokens: 'concepts/usage.md'
     - Missing: "concepts/maybe.md"
     - Patching: 'concepts/patching.md'
     - Retrying: 'concepts/retrying.md'