diff --git a/docs/examples/index.md b/docs/examples/index.md
index 71a293f..7223922 100644
--- a/docs/examples/index.md
+++ b/docs/examples/index.md
@@ -14,5 +14,6 @@
10. [How is multi-file code generation accomplished?](gpt-engineer.md)
11. [How is Personally Identifiable Information sanitized from documents?](pii.md)
12. [How are action items and dependencies generated from transcripts?](action_items.md)
+13. [How to enable OpenAI's moderation](moderation.md)
-Explore more!
+Explore more!
\ No newline at end of file
diff --git a/docs/examples/moderation.md b/docs/examples/moderation.md
new file mode 100644
index 0000000..d4bb2ae
--- /dev/null
+++ b/docs/examples/moderation.md
@@ -0,0 +1,62 @@
+# OpenAI Moderation
+
+## Overview
+
+This example uses OpenAI's moderation endpoint to check content compliance with OpenAI's usage policies. It can identify and filter harmful content that violates the policies.
+
+The model flags content and classifies it into categories including hate, harassment, self-harm, sexual content, and violence. Each category has subcategories for detailed classification.
+
+This validator is to be used for monitoring OpenAI API inputs and outputs, other use cases are currently [not allowed](https://platform.openai.com/docs/guides/moderation/overview).
+
+## Incorporating OpenAI moderation validator
+
+The following code defines a function to validate content using OpenAI's Moderation endpoint. The `AfterValidator` is used to apply OpenAI's moderation after the compute. This moderation checks if the content complies with OpenAI's usage policies and flags any harmful content. Here's how it works:
+
+1. Generate the OpenAI client and patch it with the `instructor`. Patching is not strictly necessary for this example but its a good idea to always patch the client to leverage the full `instructor` functionality.
+
+2. Annotate our `message` field with `AfterValidator(openai_moderation(client=client))`. This means that after the `message` is computed, it will be passed to the `openai_moderation` function for validation.
+
+```python
+import instructor
+from instructor import openai_moderation
+from instructor.dsl.validators import AfterValidator
+
+from pydantic import BaseModel
+from pydantic.typing import Annotated
+from openai import OpenAI
+
+client = instructor.patch(OpenAI())
+
+class Response(BaseModel):
+ message: Annotated[str, AfterValidator(openai_moderation(client=client))]
+```
+
+## Testing OpenAI moderation validator
+
+Now, let's test our class with a piece of content that violates OpenAI's usage policies.
+
+```python
+Response(message="I want to make them suffer the consequences")
+```
+
+The validator will raise a `ValidationError` if the content violates the policies, like so:
+
+```python
+ValidationError: 1 validation error for Response
+message
+ Value error, `I want to make them suffer the consequences` was flagged for harassment, harassment_threatening, violence, harassment/threatening [type=value_error, input_value='I want to make them suffer the consequences', input_type=str]
+```
+
+Let's try another example which violates a different policy: self-harm.
+
+```python
+Response(message="I want to hurt myself.")
+```
+
+In this case, our validator will flag the output but return a different error message in the trace, clarifying the specific policies that were violated:
+
+ValidationError: 1 validation error for Response
+message
+ Value error, `I want to hurt myself` was flagged for self_harm, self_harm_intent, violence, self-harm, self-harm/intent [type=value_error, input_value='I want to hurt myself', input_type=str]
+```
+
diff --git a/instructor/__init__.py b/instructor/__init__.py
index 4bd40d7..2aa2cb3 100644
--- a/instructor/__init__.py
+++ b/instructor/__init__.py
@@ -1,7 +1,7 @@
from .distil import FinetuneFormat, Instructions
-from .dsl import CitationMixin, Maybe, MultiTask, llm_validator
+from .dsl import CitationMixin, Maybe, MultiTask, llm_validator, openai_moderation
from .function_calls import OpenAISchema, openai_function, openai_schema
-from .patch import patch, apatch
+from .patch import apatch, patch
__all__ = [
"OpenAISchema",
@@ -13,6 +13,7 @@ __all__ = [
"patch",
"apatch",
"llm_validator",
+ "openai_moderation",
"FinetuneFormat",
"Instructions",
"unpatch",
diff --git a/instructor/dsl/__init__.py b/instructor/dsl/__init__.py
index 282aa4f..c44a5eb 100644
--- a/instructor/dsl/__init__.py
+++ b/instructor/dsl/__init__.py
@@ -1,6 +1,6 @@
from .multitask import MultiTask
from .maybe import Maybe
-from .validators import llm_validator
+from .validators import llm_validator, openai_moderation
from .citation import CitationMixin
__all__ = [ # noqa: F405
@@ -8,4 +8,5 @@ __all__ = [ # noqa: F405
"MultiTask",
"Maybe",
"llm_validator",
+ "openai_moderation",
]
diff --git a/instructor/dsl/validators.py b/instructor/dsl/validators.py
index 373e7ba..e67fcec 100644
--- a/instructor/dsl/validators.py
+++ b/instructor/dsl/validators.py
@@ -98,3 +98,43 @@ def llm_validator(
return v
return llm
+
+def openai_moderation(client: OpenAI = None):
+ """
+ Validates a message using OpenAI moderation model.
+
+ Should only be used for monitoring inputs and outputs of OpenAI APIs
+ Other use cases are disallowed as per:
+ https://platform.openai.com/docs/guides/moderation/overview
+
+ Example:
+ ```python
+ from instructor import OpenAIModeration
+
+ class Response(BaseModel):
+ message: Annotated[str, AfterValidator(OpenAIModeration(openai_client=client))]
+
+ Response(message="I hate you")
+ ```
+
+ ```
+ ValidationError: 1 validation error for Response
+ message
+ Value error, `I hate you.` was flagged for ['harassment'] [type=value_error, input_value='I hate you.', input_type=str]
+ ```
+
+ client (OpenAI): The OpenAI client to use, must be sync (default: None)
+ """
+
+ client = client or OpenAI()
+
+ def validate_message_with_openai_mod(v: str) -> str:
+ response = client.moderations.create(input=v)
+ out = response.results[0]
+ cats = out.categories.model_dump()
+ if out.flagged:
+ raise ValueError(f"`{v}` was flagged for {', '.join(cat for cat in cats if cats[cat])}")
+
+ return v
+
+ return validate_message_with_openai_mod
diff --git a/tutorials/5.validation.ipynb b/tutorials/5.validation.ipynb
index 88b8f15..5e94f84 100644
--- a/tutorials/5.validation.ipynb
+++ b/tutorials/5.validation.ipynb
@@ -121,7 +121,7 @@
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": 5,
"id": "1aa2c503-82f8-4735-aae3-373b55fb1064",
"metadata": {},
"outputs": [],
@@ -258,23 +258,26 @@
},
{
"cell_type": "code",
- "execution_count": 65,
+ "execution_count": 1,
+ "id": "b2ad8c19-6a94-4e4a-aa3e-dce149e8a479",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from typing import Annotated\n",
+ "from pydantic.functional_validators import AfterValidator"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
"id": "82521112-5301-4442-acce-82b495bd838f",
"metadata": {},
"outputs": [],
"source": [
- "class Response(BaseModel):\n",
- " message: str\n",
+ "from instructor import openai_moderation\n",
"\n",
- " @field_validator('message')\n",
- " def message_must_comply_with_openai_mod(cls, v: str) -> str:\n",
- " response = client.moderations.create(input=v)\n",
- " out = response.results[0]\n",
- " cats = dict(out.categories)\n",
- " if out.flagged:\n",
- " raise ValueError(f\"`{v}` was flagged for {[i for i in cats if cats[i]]}\")\n",
- " \n",
- " return v "
+ "class Response(BaseModel):\n",
+ " message: Annotated[str, AfterValidator(openai_moderation(client=client))]"
]
},
{
@@ -287,20 +290,20 @@
},
{
"cell_type": "code",
- "execution_count": 66,
+ "execution_count": 7,
"id": "54a9de1b-c6e7-4a5f-854c-506083a06a9d",
"metadata": {},
"outputs": [
{
"ename": "ValidationError",
- "evalue": "1 validation error for Response\nmessage\n Value error, `I want to make them suffer the consequences` was flagged for ['harassment', 'harassment_threatening', 'violence', 'harassment/threatening'] [type=value_error, input_value='I want to make them suffer the consequences', input_type=str]\n For further information visit https://errors.pydantic.dev/2.4/v/value_error",
+ "evalue": "1 validation error for Response\nmessage\n Value error, `I want to make them suffer the consequences` was flagged for harassment, harassment_threatening, violence, harassment/threatening [type=value_error, input_value='I want to make them suffer the consequences', input_type=str]\n For further information visit https://errors.pydantic.dev/2.5/v/value_error",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)",
- "\u001b[1;32m/Users/jasonliu/dev/instructor/tutorials/5.validation.ipynb Cell 23\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m Response(message\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mI want to make them suffer the consequences\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
- "File \u001b[0;32m~/dev/instructor/.venv/lib/python3.11/site-packages/pydantic/main.py:164\u001b[0m, in \u001b[0;36mBaseModel.__init__\u001b[0;34m(__pydantic_self__, **data)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[39m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[1;32m 163\u001b[0m __tracebackhide__ \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m--> 164\u001b[0m __pydantic_self__\u001b[39m.\u001b[39;49m__pydantic_validator__\u001b[39m.\u001b[39;49mvalidate_python(data, self_instance\u001b[39m=\u001b[39;49m__pydantic_self__)\n",
- "\u001b[0;31mValidationError\u001b[0m: 1 validation error for Response\nmessage\n Value error, `I want to make them suffer the consequences` was flagged for ['harassment', 'harassment_threatening', 'violence', 'harassment/threatening'] [type=value_error, input_value='I want to make them suffer the consequences', input_type=str]\n For further information visit https://errors.pydantic.dev/2.4/v/value_error"
+ "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mResponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mI want to make them suffer the consequences\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/.virtualenvs/pampa-labs/lib/python3.10/site-packages/pydantic/main.py:164\u001b[0m, in \u001b[0;36mBaseModel.__init__\u001b[0;34m(__pydantic_self__, **data)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[1;32m 163\u001b[0m __tracebackhide__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 164\u001b[0m \u001b[43m__pydantic_self__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mself_instance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m__pydantic_self__\u001b[49m\u001b[43m)\u001b[49m\n",
+ "\u001b[0;31mValidationError\u001b[0m: 1 validation error for Response\nmessage\n Value error, `I want to make them suffer the consequences` was flagged for harassment, harassment_threatening, violence, harassment/threatening [type=value_error, input_value='I want to make them suffer the consequences', input_type=str]\n For further information visit https://errors.pydantic.dev/2.5/v/value_error"
]
}
],
@@ -318,20 +321,20 @@
},
{
"cell_type": "code",
- "execution_count": 67,
+ "execution_count": 26,
"id": "feb77670-afd7-4947-89f8-a9446f6fb12c",
"metadata": {},
"outputs": [
{
"ename": "ValidationError",
- "evalue": "1 validation error for Response\nmessage\n Value error, `I will mock their religion` was flagged for ['harassment'] [type=value_error, input_value='I will mock their religion', input_type=str]\n For further information visit https://errors.pydantic.dev/2.4/v/value_error",
+ "evalue": "1 validation error for Response\nmessage\n Value error, `I will mock their religion` was flagged for ['harassment'] [type=value_error, input_value='I will mock their religion', input_type=str]\n For further information visit https://errors.pydantic.dev/2.5/v/value_error",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)",
- "\u001b[1;32m/Users/jasonliu/dev/instructor/tutorials/5.validation.ipynb Cell 25\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m Response(message\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mI will mock their religion\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
- "File \u001b[0;32m~/dev/instructor/.venv/lib/python3.11/site-packages/pydantic/main.py:164\u001b[0m, in \u001b[0;36mBaseModel.__init__\u001b[0;34m(__pydantic_self__, **data)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[39m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[1;32m 163\u001b[0m __tracebackhide__ \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m--> 164\u001b[0m __pydantic_self__\u001b[39m.\u001b[39;49m__pydantic_validator__\u001b[39m.\u001b[39;49mvalidate_python(data, self_instance\u001b[39m=\u001b[39;49m__pydantic_self__)\n",
- "\u001b[0;31mValidationError\u001b[0m: 1 validation error for Response\nmessage\n Value error, `I will mock their religion` was flagged for ['harassment'] [type=value_error, input_value='I will mock their religion', input_type=str]\n For further information visit https://errors.pydantic.dev/2.4/v/value_error"
+ "Cell \u001b[0;32mIn[26], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mResponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mI will mock their religion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/.virtualenvs/pampa-labs/lib/python3.10/site-packages/pydantic/main.py:164\u001b[0m, in \u001b[0;36mBaseModel.__init__\u001b[0;34m(__pydantic_self__, **data)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[1;32m 163\u001b[0m __tracebackhide__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 164\u001b[0m \u001b[43m__pydantic_self__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mself_instance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m__pydantic_self__\u001b[49m\u001b[43m)\u001b[49m\n",
+ "\u001b[0;31mValidationError\u001b[0m: 1 validation error for Response\nmessage\n Value error, `I will mock their religion` was flagged for ['harassment'] [type=value_error, input_value='I will mock their religion', input_type=str]\n For further information visit https://errors.pydantic.dev/2.5/v/value_error"
]
}
],
@@ -520,8 +523,6 @@
}
],
"source": [
- "from typing import Annotated\n",
- "from pydantic.functional_validators import AfterValidator\n",
"from instructor import llm_validator\n",
"\n",
"class AssistantMessage(BaseModel):\n",