From c83156d0e07589cb8f8050bbeebe03490dc2e639 Mon Sep 17 00:00:00 2001 From: PrettyWood Date: Mon, 26 Oct 2020 12:09:28 +0100 Subject: [PATCH] feat: make pydantic `dataclass` decorator support built-in `dataclass` (#1817) * feat: pydantic dataclasses support built-in ones closes #744 * feat: improve dataclass typing * feat: add support for nested dataclasses closes #1743 * feat: support dataclass schema with nested dataclasses * refactor: remove `_dataclass_with_validation` function * docs: add docstring for `make_dataclass_validator` * refactor: rename DataclassType into Dataclass The name `DataclassType` was missleading as it's not a `Type` per say. * refactor: change global `dataclass` import to local pydantic import time was improved in https://github.com/samuelcolvin/pydantic/pull/1132 by keeping `dataclass` import local. So let's keep it that way! * test: add extra nested case with BaseModel * chore: s/pydantic/_pydantic_/g * docs: add some documentation --- changes/744-PrettyWood.md | 3 + .../dataclasses_stdlib_to_pydantic.py | 35 ++++++ .../dataclasses_stdlib_with_basemodel.py | 27 +++++ docs/usage/dataclasses.md | 26 ++++- pydantic/dataclasses.py | 106 ++++++++++++++--- pydantic/schema.py | 10 +- pydantic/types.py | 4 +- pydantic/utils.py | 4 +- pydantic/validators.py | 5 + tests/mypy/outputs/plugin-fail-strict.txt | 5 +- tests/mypy/outputs/plugin-fail.txt | 5 +- tests/test_dataclasses.py | 109 +++++++++++++++++- 12 files changed, 310 insertions(+), 29 deletions(-) create mode 100644 changes/744-PrettyWood.md create mode 100644 docs/examples/dataclasses_stdlib_to_pydantic.py create mode 100644 docs/examples/dataclasses_stdlib_with_basemodel.py diff --git a/changes/744-PrettyWood.md b/changes/744-PrettyWood.md new file mode 100644 index 0000000..94956ef --- /dev/null +++ b/changes/744-PrettyWood.md @@ -0,0 +1,3 @@ +`pydantic.dataclasses.dataclass` decorator now supports built-in `dataclasses.dataclass`. +It is hence possible to convert an existing `dataclass` easily to add *pydantic* validation. +Moreover nested dataclasses are also supported. \ No newline at end of file diff --git a/docs/examples/dataclasses_stdlib_to_pydantic.py b/docs/examples/dataclasses_stdlib_to_pydantic.py new file mode 100644 index 0000000..88686b8 --- /dev/null +++ b/docs/examples/dataclasses_stdlib_to_pydantic.py @@ -0,0 +1,35 @@ +import dataclasses +from datetime import datetime +from typing import Optional + +import pydantic + + +@dataclasses.dataclass +class Meta: + modified_date: Optional[datetime] + seen_count: int + + +@dataclasses.dataclass +class File(Meta): + filename: str + + +File = pydantic.dataclasses.dataclass(File) + +file = File( + filename=b'thefilename', + modified_date='2020-01-01T00:00', + seen_count='7', +) +print(file) + +try: + File( + filename=['not', 'a', 'string'], + modified_date=None, + seen_count=3, + ) +except pydantic.ValidationError as e: + print(e) diff --git a/docs/examples/dataclasses_stdlib_with_basemodel.py b/docs/examples/dataclasses_stdlib_with_basemodel.py new file mode 100644 index 0000000..35ff1e8 --- /dev/null +++ b/docs/examples/dataclasses_stdlib_with_basemodel.py @@ -0,0 +1,27 @@ +import dataclasses +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, ValidationError + + +@dataclasses.dataclass +class File: + filename: str + last_modification_time: Optional[datetime] + + +class Foo(BaseModel): + file: File + + +file = File( + filename=['not', 'a', 'string'], + last_modification_time='2020-01-01T00:00', +) # nothing is validated as expected +print(file) + +try: + Foo(file=file) +except ValidationError as e: + print(e) diff --git a/docs/usage/dataclasses.md b/docs/usage/dataclasses.md index 41b3fb0..5f56b58 100644 --- a/docs/usage/dataclasses.md +++ b/docs/usage/dataclasses.md @@ -1,4 +1,4 @@ -If you don't want to use pydantic's `BaseModel` you can instead get the same data validation on standard +If you don't want to use _pydantic_'s `BaseModel` you can instead get the same data validation on standard [dataclasses](https://docs.python.org/3/library/dataclasses.html) (introduced in python 3.7). Dataclasses work in python 3.6 using the [dataclasses backport package](https://github.com/ericvsmith/dataclasses). @@ -16,7 +16,7 @@ _(This script is complete, it should run "as is")_ For more information and discussion see [samuelcolvin/pydantic#710](https://github.com/samuelcolvin/pydantic/issues/710). -You can use all the standard pydantic field types, and the resulting dataclass will be identical to the one +You can use all the standard _pydantic_ field types, and the resulting dataclass will be identical to the one created by the standard library `dataclass` decorator. The underlying model and its schema can be accessed through `__pydantic_model__`. @@ -31,7 +31,7 @@ _(This script is complete, it should run "as is")_ keyword argument `config` which has the same meaning as [Config](model_config.md). !!! warning - After v1.2, [The Mypy plugin](/mypy_plugin.md) must be installed to type check pydantic dataclasses. + After v1.2, [The Mypy plugin](/mypy_plugin.md) must be installed to type check _pydantic_ dataclasses. For more information about combining validators with dataclasses, see [dataclass validators](validators.md#dataclass-validators). @@ -47,6 +47,24 @@ _(This script is complete, it should run "as is")_ Dataclasses attributes can be populated by tuples, dictionaries or instances of the dataclass itself. +## Stdlib dataclasses and _pydantic_ dataclasses + +Stdlib dataclasses (nested or not) can be easily converted into _pydantic_ dataclasses by just decorating +them with `pydantic.dataclasses.dataclass`. + +```py +{!.tmp_examples/dataclasses_stdlib_to_pydantic.py!} +``` +_(This script is complete, it should run "as is")_ + +Bear in mind that stdlib dataclasses (nested or not) are **automatically converted** into _pydantic_ dataclasses +when mixed with `BaseModel`! + +```py +{!.tmp_examples/dataclasses_stdlib_with_basemodel.py!} +``` +_(This script is complete, it should run "as is")_ + ## Initialize hooks When you initialize a dataclass, it is possible to execute code *after* validation @@ -74,7 +92,7 @@ When substituting usage of `dataclasses.dataclass` with `pydantic.dataclasses.da ## JSON Dumping -Pydantic dataclasses do not feature a `.json()` function. To dump them as JSON, you will need to make use of the `pydantic_encoder` as follows: +_Pydantic_ dataclasses do not feature a `.json()` function. To dump them as JSON, you will need to make use of the `pydantic_encoder` as follows: ```py {!.tmp_examples/dataclasses_json_dumps.py!} diff --git a/pydantic/dataclasses.py b/pydantic/dataclasses.py index b5f9cb3..4fbdd63 100644 --- a/pydantic/dataclasses.py +++ b/pydantic/dataclasses.py @@ -1,23 +1,31 @@ -from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, Optional, Type, TypeVar, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Type, TypeVar, Union, overload from .class_validators import gather_all_validators from .error_wrappers import ValidationError from .errors import DataclassTypeError from .fields import Required from .main import create_model, validate_model +from .utils import ClassAttribute if TYPE_CHECKING: from .main import BaseModel # noqa: F401 + from .typing import CallableGenerator - DataclassT = TypeVar('DataclassT', bound='DataclassType') + DataclassT = TypeVar('DataclassT', bound='Dataclass') - class DataclassType: + class Dataclass: __pydantic_model__: Type[BaseModel] __initialised__: bool + __post_init_original__: Optional[Callable[..., None]] + __processed__: Optional[ClassAttribute] def __init__(self, *args: Any, **kwargs: Any) -> None: pass + @classmethod + def __get_validators__(cls: Type['Dataclass']) -> 'CallableGenerator': + pass + @classmethod def __validate__(cls: Type['DataclassT'], v: Any) -> 'DataclassT': pass @@ -33,15 +41,22 @@ def _validate_dataclass(cls: Type['DataclassT'], v: Any) -> 'DataclassT': return cls(*v) elif isinstance(v, dict): return cls(**v) + # In nested dataclasses, v can be of type `dataclasses.dataclass`. + # But to validate fields `cls` will be in fact a `pydantic.dataclasses.dataclass`, + # which inherits directly from the class of `v`. + elif is_builtin_dataclass(v) and cls.__bases__[0] is type(v): + import dataclasses + + return cls(**dataclasses.asdict(v)) else: raise DataclassTypeError(class_name=cls.__name__) -def _get_validators(cls: Type['DataclassT']) -> Generator[Any, None, None]: +def _get_validators(cls: Type['Dataclass']) -> 'CallableGenerator': yield cls.__validate__ -def setattr_validate_assignment(self: 'DataclassType', name: str, value: Any) -> None: +def setattr_validate_assignment(self: 'Dataclass', name: str, value: Any) -> None: if self.__initialised__: d = dict(self.__dict__) d.pop(name, None) @@ -54,6 +69,16 @@ def setattr_validate_assignment(self: 'DataclassType', name: str, value: Any) -> object.__setattr__(self, name, value) +def is_builtin_dataclass(_cls: Type[Any]) -> bool: + """ + `dataclasses.is_dataclass` is True if one of the class parents is a `dataclass`. + This is why we also add a class attribute `__processed__` to only consider 'direct' built-in dataclasses + """ + import dataclasses + + return not hasattr(_cls, '__processed__') and dataclasses.is_dataclass(_cls) + + def _process_class( _cls: Type[Any], init: bool, @@ -63,7 +88,7 @@ def _process_class( unsafe_hash: bool, frozen: bool, config: Optional[Type[Any]], -) -> 'DataclassType': +) -> Type['Dataclass']: import dataclasses post_init_original = getattr(_cls, '__post_init__', None) @@ -74,7 +99,7 @@ def _process_class( post_init_post_parse = getattr(_cls, '__post_init_post_parse__', None) - def _pydantic_post_init(self: 'DataclassType', *initvars: Any) -> None: + def _pydantic_post_init(self: 'Dataclass', *initvars: Any) -> None: if post_init_original is not None: post_init_original(self, *initvars) d, _, validation_error = validate_model(self.__pydantic_model__, self.__dict__, cls=self.__class__) @@ -85,8 +110,23 @@ def _process_class( if post_init_post_parse is not None: post_init_post_parse(self, *initvars) - _cls.__post_init__ = _pydantic_post_init - cls = dataclasses._process_class(_cls, init, repr, eq, order, unsafe_hash, frozen) # type: ignore + # If the class is already a dataclass, __post_init__ will not be called automatically + # so no validation will be added. + # We hence create dynamically a new dataclass: + # ``` + # @dataclasses.dataclass + # class NewClass(_cls): + # __post_init__ = _pydantic_post_init + # ``` + # with the exact same fields as the base dataclass + if is_builtin_dataclass(_cls): + _cls = type(_cls.__name__, (_cls,), {'__post_init__': _pydantic_post_init}) + else: + _cls.__post_init__ = _pydantic_post_init + cls: Type['Dataclass'] = dataclasses.dataclass( # type: ignore + _cls, init=init, repr=repr, eq=eq, order=order, unsafe_hash=unsafe_hash, frozen=frozen + ) + cls.__processed__ = ClassAttribute('__processed__', True) fields: Dict[str, Any] = {} for field in dataclasses.fields(cls): @@ -107,17 +147,46 @@ def _process_class( ) cls.__initialised__ = False - cls.__validate__ = classmethod(_validate_dataclass) - cls.__get_validators__ = classmethod(_get_validators) + cls.__validate__ = classmethod(_validate_dataclass) # type: ignore[assignment] + cls.__get_validators__ = classmethod(_get_validators) # type: ignore[assignment] if post_init_original: cls.__post_init_original__ = post_init_original if cls.__pydantic_model__.__config__.validate_assignment and not frozen: - cls.__setattr__ = setattr_validate_assignment + cls.__setattr__ = setattr_validate_assignment # type: ignore[assignment] return cls +@overload +def dataclass( + *, + init: bool = True, + repr: bool = True, + eq: bool = True, + order: bool = False, + unsafe_hash: bool = False, + frozen: bool = False, + config: Type[Any] = None, +) -> Callable[[Type[Any]], Type['Dataclass']]: + ... + + +@overload +def dataclass( + _cls: Type[Any], + *, + init: bool = True, + repr: bool = True, + eq: bool = True, + order: bool = False, + unsafe_hash: bool = False, + frozen: bool = False, + config: Type[Any] = None, +) -> Type['Dataclass']: + ... + + def dataclass( _cls: Optional[Type[Any]] = None, *, @@ -128,7 +197,7 @@ def dataclass( unsafe_hash: bool = False, frozen: bool = False, config: Type[Any] = None, -) -> Union[Callable[[Type[Any]], 'DataclassType'], 'DataclassType']: +) -> Union[Callable[[Type[Any]], Type['Dataclass']], Type['Dataclass']]: """ Like the python standard lib dataclasses but with type validation. @@ -136,10 +205,19 @@ def dataclass( as Config.validate_assignment. """ - def wrap(cls: Type[Any]) -> 'DataclassType': + def wrap(cls: Type[Any]) -> Type['Dataclass']: return _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, config) if _cls is None: return wrap return wrap(_cls) + + +def make_dataclass_validator(_cls: Type[Any], **kwargs: Any) -> 'CallableGenerator': + """ + Create a pydantic.dataclass from a builtin dataclass to add type validation + and yield the validators + """ + cls = dataclass(_cls, **kwargs) + yield from _get_validators(cls) diff --git a/pydantic/schema.py b/pydantic/schema.py index be2b454..a6a2777 100644 --- a/pydantic/schema.py +++ b/pydantic/schema.py @@ -61,7 +61,7 @@ from .typing import ForwardRef, Literal, get_args, get_origin, is_callable_type, from .utils import ROOT_KEY, get_model, lenient_issubclass, sequence_like if TYPE_CHECKING: - from .dataclasses import DataclassType # noqa: F401 + from .dataclasses import Dataclass # noqa: F401 from .main import BaseModel # noqa: F401 default_prefix = '#/definitions/' @@ -72,7 +72,7 @@ TypeModelSet = Set[TypeModelOrEnum] def schema( - models: Sequence[Union[Type['BaseModel'], Type['DataclassType']]], + models: Sequence[Union[Type['BaseModel'], Type['Dataclass']]], *, by_alias: bool = True, title: Optional[str] = None, @@ -125,7 +125,7 @@ def schema( def model_schema( - model: Union[Type['BaseModel'], Type['DataclassType']], + model: Union[Type['BaseModel'], Type['Dataclass']], by_alias: bool = True, ref_prefix: Optional[str] = None, ref_template: str = default_ref_template, @@ -342,10 +342,14 @@ def get_flat_models_from_field(field: ModelField, known_models: TypeModelSet) -> :param known_models: used to solve circular references :return: a set with the model used in the declaration for this field, if any, and all its sub-models """ + from .dataclasses import dataclass, is_builtin_dataclass from .main import BaseModel # noqa: F811 flat_models: TypeModelSet = set() + # Handle dataclass-based models + if is_builtin_dataclass(field.type_): + field.type_ = dataclass(field.type_) field_type = field.type_ if lenient_issubclass(getattr(field_type, '__pydantic_model__', None), BaseModel): field_type = field_type.__pydantic_model__ diff --git a/pydantic/types.py b/pydantic/types.py index 2090e92..9c462e1 100644 --- a/pydantic/types.py +++ b/pydantic/types.py @@ -96,12 +96,12 @@ OptionalIntFloatDecimal = Union[OptionalIntFloat, Decimal] StrIntFloat = Union[str, int, float] if TYPE_CHECKING: - from .dataclasses import DataclassType # noqa: F401 + from .dataclasses import Dataclass # noqa: F401 from .fields import ModelField from .main import BaseConfig, BaseModel # noqa: F401 from .typing import CallableGenerator - ModelOrDc = Type[Union['BaseModel', 'DataclassType']] + ModelOrDc = Type[Union['BaseModel', 'Dataclass']] class ConstrainedBytes(bytes): diff --git a/pydantic/utils.py b/pydantic/utils.py index 8019a19..96b792f 100644 --- a/pydantic/utils.py +++ b/pydantic/utils.py @@ -30,7 +30,7 @@ if TYPE_CHECKING: from inspect import Signature from pathlib import Path - from .dataclasses import DataclassType # noqa: F401 + from .dataclasses import Dataclass # noqa: F401 from .fields import ModelField # noqa: F401 from .main import BaseConfig, BaseModel # noqa: F401 from .typing import AbstractSetIntStr, DictIntStrAny, IntStr, MappingIntStrAny, ReprArgs # noqa: F401 @@ -251,7 +251,7 @@ def generate_model_signature( return Signature(parameters=list(merged_params.values()), return_annotation=None) -def get_model(obj: Union[Type['BaseModel'], Type['DataclassType']]) -> Type['BaseModel']: +def get_model(obj: Union[Type['BaseModel'], Type['Dataclass']]) -> Type['BaseModel']: from .main import BaseModel # noqa: F811 try: diff --git a/pydantic/validators.py b/pydantic/validators.py index 0a59f93..3fc9e43 100644 --- a/pydantic/validators.py +++ b/pydantic/validators.py @@ -573,6 +573,8 @@ _VALIDATORS: List[Tuple[Type[Any], List[Any]]] = [ def find_validators( # noqa: C901 (ignore complexity) type_: Type[Any], config: Type['BaseConfig'] ) -> Generator[AnyCallable, None, None]: + from .dataclasses import is_builtin_dataclass, make_dataclass_validator + if type_ is Any: return type_type = type_.__class__ @@ -590,6 +592,9 @@ def find_validators( # noqa: C901 (ignore complexity) if is_literal_type(type_): yield make_literal_validator(type_) return + if is_builtin_dataclass(type_): + yield from make_dataclass_validator(type_) + return if type_ is Enum: yield enum_validator return diff --git a/tests/mypy/outputs/plugin-fail-strict.txt b/tests/mypy/outputs/plugin-fail-strict.txt index 9df8e2f..6f46385 100644 --- a/tests/mypy/outputs/plugin-fail-strict.txt +++ b/tests/mypy/outputs/plugin-fail-strict.txt @@ -32,4 +32,7 @@ 185: error: Unexpected keyword argument "x" for "AliasGeneratorModel2" [call-arg] 186: error: Unexpected keyword argument "z" for "AliasGeneratorModel2" [call-arg] 189: error: Name 'Missing' is not defined [name-defined] -197: error: Argument "config" to "dataclass" has incompatible type "Dict[, ]"; expected "Optional[Type[Any]]" [arg-type] \ No newline at end of file +197: error: No overload variant of "dataclass" matches argument type "Dict[, ]" [call-overload] +197: note: Possible overload variant: +197: note: def dataclass(*, init: bool = ..., repr: bool = ..., eq: bool = ..., order: bool = ..., unsafe_hash: bool = ..., frozen: bool = ..., config: Optional[Type[Any]] = ...) -> Callable[[Type[Any]], Type[Dataclass]] +197: note: <1 more non-matching overload not shown> \ No newline at end of file diff --git a/tests/mypy/outputs/plugin-fail.txt b/tests/mypy/outputs/plugin-fail.txt index eeea727..227bec0 100644 --- a/tests/mypy/outputs/plugin-fail.txt +++ b/tests/mypy/outputs/plugin-fail.txt @@ -21,4 +21,7 @@ 175: error: unused 'type: ignore' comment 182: error: unused 'type: ignore' comment 189: error: Name 'Missing' is not defined [name-defined] -197: error: Argument "config" to "dataclass" has incompatible type "Dict[, ]"; expected "Optional[Type[Any]]" [arg-type] \ No newline at end of file +197: error: No overload variant of "dataclass" matches argument type "Dict[, ]" [call-overload] +197: note: Possible overload variant: +197: note: def dataclass(*, init: bool = ..., repr: bool = ..., eq: bool = ..., order: bool = ..., unsafe_hash: bool = ..., frozen: bool = ..., config: Optional[Type[Any]] = ...) -> Callable[[Type[Any]], Type[Dataclass]] +197: note: <1 more non-matching overload not shown> \ No newline at end of file diff --git a/tests/test_dataclasses.py b/tests/test_dataclasses.py index d799e35..6ad604d 100644 --- a/tests/test_dataclasses.py +++ b/tests/test_dataclasses.py @@ -356,9 +356,9 @@ def test_nested_dataclass(): def test_arbitrary_types_allowed(): - @dataclasses.dataclass class Button: - href: str + def __init__(self, href: str): + self.href = href class Config: arbitrary_types_allowed = True @@ -633,3 +633,108 @@ def test_hashable_optional(default): MyDataclass() MyDataclass(v=None) + + +def test_override_builtin_dataclass(): + @dataclasses.dataclass + class File: + hash: str + name: Optional[str] + size: int + content: Optional[bytes] = None + + FileChecked = pydantic.dataclasses.dataclass(File) + f = FileChecked(hash='xxx', name=b'whatever.txt', size='456') + assert f.name == 'whatever.txt' + assert f.size == 456 + + with pytest.raises(ValidationError) as e: + FileChecked(hash=[1], name='name', size=3) + assert e.value.errors() == [{'loc': ('hash',), 'msg': 'str type expected', 'type': 'type_error.str'}] + + +def test_override_builtin_dataclass_2(): + @dataclasses.dataclass + class Meta: + modified_date: Optional[datetime] + seen_count: int + + @pydantic.dataclasses.dataclass + @dataclasses.dataclass + class File(Meta): + filename: str + + f = File(filename=b'thefilename', modified_date='2020-01-01T00:00', seen_count='7') + assert f.filename == 'thefilename' + assert f.modified_date == datetime(2020, 1, 1, 0, 0) + assert f.seen_count == 7 + + +def test_override_builtin_dataclass_nested(): + @dataclasses.dataclass + class Meta: + modified_date: Optional[datetime] + seen_count: int + + @dataclasses.dataclass + class File: + filename: str + meta: Meta + + class Foo(BaseModel): + file: File + + FileChecked = pydantic.dataclasses.dataclass(File) + f = FileChecked(filename=b'thefilename', meta=Meta(modified_date='2020-01-01T00:00', seen_count='7')) + assert f.filename == 'thefilename' + assert f.meta.modified_date == datetime(2020, 1, 1, 0, 0) + assert f.meta.seen_count == 7 + + with pytest.raises(ValidationError) as e: + FileChecked(filename=b'thefilename', meta=Meta(modified_date='2020-01-01T00:00', seen_count=['7'])) + assert e.value.errors() == [ + {'loc': ('meta', 'seen_count'), 'msg': 'value is not a valid integer', 'type': 'type_error.integer'} + ] + + foo = Foo.parse_obj( + { + 'file': { + 'filename': b'thefilename', + 'meta': {'modified_date': '2020-01-01T00:00', 'seen_count': '7'}, + }, + } + ) + assert foo.file.filename == 'thefilename' + assert foo.file.meta.modified_date == datetime(2020, 1, 1, 0, 0) + assert foo.file.meta.seen_count == 7 + + +def test_override_builtin_dataclass_nested_schema(): + @dataclasses.dataclass + class Meta: + modified_date: Optional[datetime] + seen_count: int + + @dataclasses.dataclass + class File: + filename: str + meta: Meta + + FileChecked = pydantic.dataclasses.dataclass(File) + assert FileChecked.__pydantic_model__.schema() == { + 'definitions': { + 'Meta': { + 'properties': { + 'modified_date': {'format': 'date-time', 'title': 'Modified ' 'Date', 'type': 'string'}, + 'seen_count': {'title': 'Seen Count', 'type': 'integer'}, + }, + 'required': ['modified_date', 'seen_count'], + 'title': 'Meta', + 'type': 'object', + } + }, + 'properties': {'filename': {'title': 'Filename', 'type': 'string'}, 'meta': {'$ref': '#/definitions/Meta'}}, + 'required': ['filename', 'meta'], + 'title': 'File', + 'type': 'object', + }