models
通过创建一个继承 BaseModel
的子类(model)来定义一类对象(defining objects)
模型(model)可以是强类型语言中的类型(types),也可以是 API 接口中的参数
未经验证的 raw data 可以被传入 model,model 会根据自身的的字段定义对 untrusted data 进行验证
pydantic 不仅仅是一个验证模块(validation library),更是一个解析模块(parsing library),验证只提供对传入数据的类型与约束,而 pydantic 可以保证传出数据的类型与约束都是符合规则的。
pydantic is primarily a parsing library, not a validation library. Validation is a means to an end: building a model which conforms to the types and constraints provided.
In other words, pydantic guarantees the types and constraints of the output model, not the input data
基础使用
1
2
3
4
5
| from pydantic import BaseModel
class User(BaseModel):
id: int
name = 'Jane Doe' # has default value, not required then
|
name
字段的类型被它的默认值所指出,所以就不用显式地定义了
1
2
| user = User(id='123') # str was cast to an int
user_x = User(id='123.45') # float was cast to an int
|
聪明的实例化
1
| assert user.__fields_set__ == {'id'}
|
__fields_set__
指出了在实例化时传入的字段
<model>.dict()
和 dict(<model>)
能将 model 转换为字典;不过 <model>.dict()
可能会包含一些其他数据(比如递归解析出嵌套的 dict 字段)
model 常用的方法与属性
属性
- ==
model_fields
== model_computed_fields
model_fields_set
方法
model_dump()
model_dump_json()
model_copy()
- ==
model_validate()
==- 曾用
parse_obj()
- class method,将一个 dict 转换为 model 的方法
model_validate_json()
- 曾用
parse_raw()
- class method,将字符串转换为 model 的方法
model_construct()
嵌套 model
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
| from typing import Optional
from pydantic import BaseModel
class Foo(BaseModel):
count: int
size: Optional[float] = None
class Bar(BaseModel):
apple = 'x'
banana = 'y'
class Spam(BaseModel):
foo: Foo
bars: list[Bar]
m = Spam(foo={'count': 4}, bars=[{'apple': 'x1'}, {'apple': 'x2'}])
print(m)
#> foo=Foo(count=4, size=None) bars=[Bar(apple='x1', banana='y'),
#> Bar(apple='x2', banana='y')]
print(m.dict())
"""
{
'foo': {'count': 4, 'size': None},
'bars': [
{'apple': 'x1', 'banana': 'y'},
{'apple': 'x2', 'banana': 'y'},
],
}
"""
|
ORM Mode
models 支持 ORM 模式
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
| from typing import List
from sqlalchemy import Column, Integer, String
from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.orm import declarative_base
from typing_extensions import Annotated
from pydantic import BaseModel, ConfigDict, StringConstraints
Base = declarative_base()
class CompanyOrm(Base):
__tablename__ = 'companies'
id = Column(Integer, primary_key=True, nullable=False)
public_key = Column(String(20), index=True, nullable=False, unique=True)
name = Column(String(63), unique=True)
domains = Column(ARRAY(String(255)))
class CompanyModel(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: int
public_key: Annotated[str, StringConstraints(max_length=20)]
name: Annotated[str, StringConstraints(max_length=63)]
domains: List[Annotated[str, StringConstraints(max_length=255)]]
co_orm = CompanyOrm(
id=123,
public_key='foobar',
name='Testing',
domains=['example.com', 'foobar.com'],
)
print(co_orm)
#> <__main__.CompanyOrm object at 0x0123456789ab>
co_model = CompanyModel.model_validate(co_orm)
print(co_model)
"""
id=123 public_key='foobar' name='Testing' domains=['example.com', 'foobar.com']
"""
|
要点
Config
属性 orm_mode
必须为 True
- 使用
.from_orm()
类方法来创建 model
保留字
You may want to name a Column
after a reserved SQLAlchemy field. In that case, Field
aliases will be convenient:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
| import typing
import sqlalchemy as sa
from sqlalchemy.orm import declarative_base
from pydantic import BaseModel, ConfigDict, Field
class MyModel(BaseModel):
model_config = ConfigDict(from_attributes=True)
metadata: typing.Dict[str, str] = Field(alias='metadata_')
Base = declarative_base()
class SQLModel(Base):
__tablename__ = 'my_table'
id = sa.Column('id', sa.Integer, primary_key=True)
# 'metadata' is reserved by SQLAlchemy, hence the '_'
metadata_ = sa.Column('metadata', sa.JSON)
sql_model = SQLModel(metadata_={'key': 'val'}, id=1)
pydantic_model = MyModel.model_validate(sql_model)
print(pydantic_model.model_dump())
#> {'metadata': {'key': 'val'}}
print(pydantic_model.model_dump(by_alias=True))
#> {'metadata_': {'key': 'val'}}
|
嵌套的属性
When using attributes to parse models, model instances will be created from both top-level attributes and deeper-nested attributes as appropriate.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
| from typing import List
from pydantic import BaseModel, ConfigDict
class PetCls:
def __init__(self, *, name: str, species: str):
self.name = name
self.species = species
class PersonCls:
def __init__(self, *, name: str, age: float = None, pets: List[PetCls]):
self.name = name
self.age = age
self.pets = pets
class Pet(BaseModel):
model_config = ConfigDict(from_attributes=True)
name: str
species: str
class Person(BaseModel):
model_config = ConfigDict(from_attributes=True)
name: str
age: float = None
pets: List[Pet]
bones = PetCls(name='Bones', species='dog')
orion = PetCls(name='Orion', species='cat')
anna = PersonCls(name='Anna', age=20, pets=[bones, orion])
anna_model = Person.model_validate(anna)
print(anna_model)
"""
name='Anna' age=20.0 pets=[Pet(name='Bones', species='dog'), Pet(name='Orion', species='cat')]
"""
|
错误处理
校验过程中,一旦出错,pydantic 将会抛出 ValidationError
错误
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
| from typing import List
from pydantic import BaseModel, ValidationError
class Model(BaseModel):
list_of_ints: List[int]
a_float: float
data = dict(
list_of_ints=['1', 2, 'bad'],
a_float='not a float',
)
try:
Model(**data)
except ValidationError as e:
print(e)
"""
2 validation errors for Model
list_of_ints
Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='bad', input_type=str]
a_float
Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='not a float', input_type=str]
"""
|
解析数据的辅助函数
pydantic 为 model 提供两个 helper 类方法供其在 parsing data 时使用
model_validate()
- 跟
__init__
方法很像,只不过它接收一个 dict 而不是关键字参数(说实话,在 __init__
里用 **kwargs
传字典也能起到相同的作用)
model_validate_json()
- str / bytes 都能传,它首先会转换成 json,然后再把 json 传进
model_validate()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
| from datetime import datetime
from typing import Optional
from pydantic import BaseModel, ValidationError
class User(BaseModel):
id: int
name: str = 'John Doe'
signup_ts: Optional[datetime] = None
m = User.model_validate({'id': 123, 'name': 'James'})
print(m)
#> id=123 name='James' signup_ts=None
try:
User.model_validate(['not', 'a', 'dict'])
except ValidationError as e:
print(e)
"""
1 validation error for User
Input should be a valid dictionary or instance of User [type=model_type, input_value=['not', 'a', 'dict'], input_type=list]
"""
m = User.model_validate_json('{"id": 123, "name": "James"}')
print(m)
#> id=123 name='James' signup_ts=None
try:
m = User.model_validate_json('{"id": 123, "name": 123}')
except ValidationError as e:
print(e)
"""
1 validation error for User
name
Input should be a valid string [type=string_type, input_value=123, input_type=int]
"""
try:
m = User.model_validate_json('invalid JSON')
except ValidationError as e:
print(e)
"""
1 validation error for User
Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='invalid JSON', input_type=str]
"""
|
如果你想转换其他格式的数据,那先把它转成 dict,再调用 model_validate()
么好了
如果你在 model_validate()
里传了一个 model 实例,除非你设置了 revalidate_instances
,否则不会再验证一遍了:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| from pydantic import BaseModel, ConfigDict, ValidationError
class Model(BaseModel):
a: int
model_config = ConfigDict(revalidate_instances='always')
m = Model(a=0)
# note: the `model_config` setting validate_assignment=True` can prevent this kind of misbehavior
m.a = 'not an int'
try:
m2 = Model.model_validate(m)
except ValidationError as e:
print(e)
"""
1 validation error for Model
a
Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='not an int', input_type=str]
"""
|
跳过验证,创建 model
construct()
方法允许你不经验证就创建 model,当信息源绝对可靠时,这样做可以提高效率(==不验证比验证快 30 倍==)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
| from pydantic import BaseModel
class User(BaseModel):
id: int
age: int
name: str = 'John Doe'
original_user = User(id=123, age=32)
user_data = original_user.model_dump()
print(user_data)
#> {'id': 123, 'age': 32, 'name': 'John Doe'}
fields_set = original_user.model_fields_set
print(fields_set)
#> {'age', 'id'}
# ...
# pass user_data and fields_set to RPC or save to the database etc.
# ...
# you can then create a new instance of User without
# re-running validation which would be unnecessary at this point:
new_user = User.model_construct(_fields_set=fields_set, **user_data)
print(repr(new_user))
#> User(id=123, age=32, name='John Doe')
print(new_user.model_fields_set)
#> {'age', 'id'}
# construct can be dangerous, only use it with validated data!:
bad_user = User.model_construct(id='dog')
print(repr(bad_user))
#> User(id='dog', name='John Doe')
|
_fields_set
参数是可选的,它可以准确地区分出传入的和默认赋值的参数;如果它被忽略,__fields_set__
就是传入的所有字段了
For example, in the example above, if _fields_set
was not provided, new_user.model_fields_set
would be {'id', 'age', 'name'}
.
Generic Models 泛型类
为了方便 model 的复用,pydantic 支持创建 generic models
按照以下步骤去创建一个 generic model
- 声明一个或多个
typing.TypeVar
实例,用于参数化你的 model - 声明一个继承自
pydantic.generics.GenericModel
和 typing.Generic
的 pydantic model,并在其中传入你刚刚声明的 typing.TypeVar
实例作为 typing.Generic
的参数 - 在你想要的字段上,使用
typing.TypeVar
实例作为标注
Python 3.12 应该支持更简便的泛型语法,如 class Response[T](BaseModel)
以下是一个可复用的 HTTP 响应负载处理类(easily-reused HTTP response payload wrapper)的例子
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
| from typing import Generic, List, Optional, TypeVar
from pydantic import BaseModel, ValidationError
DataT = TypeVar('DataT')
class DataModel(BaseModel):
numbers: List[int]
people: List[str]
class Response(BaseModel, Generic[DataT]):
data: Optional[DataT] = None
print(Response[int](data=1))
#> data=1
print(Response[str](data='value'))
#> data='value'
print(Response[str](data='value').model_dump())
#> {'data': 'value'}
data = DataModel(numbers=[1, 2, 3], people=[])
print(Response[DataModel](data=data).model_dump())
#> {'data': {'numbers': [1, 2, 3], 'people': []}}
try:
Response[int](data='value')
except ValidationError as e:
print(e)
"""
1 validation error for Response[int]
data
Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='value', input_type=str]
"""
|
如果在 generic model 定义时设置了 model_config,或者用了 @field_validator 以及其他 Pydantic 装饰器,它们也需要被泛型化
如果要继承一个 GenericModel 并且不替换 TypeVar
实例的话,这个字类也必须继承一个 typing.Generic
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
| from typing import Generic, TypeVar
from pydantic import BaseModel
TypeX = TypeVar('TypeX')
class BaseClass(BaseModel, Generic[TypeX]):
X: TypeX
class ChildClass(BaseClass[TypeX], Generic[TypeX]):
# Inherit from Generic[TypeX]
pass
# Replace TypeX by int
print(ChildClass[int](X=1))
#> X=1
|
当然,可以部分继承父类的 TypeVar
实例,也可以新增 TypeVar
实例
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
| from typing import Generic, TypeVar
from pydantic import BaseModel
TypeX = TypeVar('TypeX')
TypeY = TypeVar('TypeY')
TypeZ = TypeVar('TypeZ')
class BaseClass(BaseModel, Generic[TypeX, TypeY]):
x: TypeX
y: TypeY
class ChildClass(BaseClass[int, TypeY], Generic[TypeY, TypeZ]):
z: TypeZ
# Replace TypeY by str
print(ChildClass[str, int](x='1', y='y', z='3'))
#> x=1 y='y' z=3
|
甚至可以自定义泛型实例化后的名字 QAQ
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
| from typing import Any, Generic, Tuple, Type, TypeVar
from pydantic import BaseModel
DataT = TypeVar('DataT')
class Response(BaseModel, Generic[DataT]):
data: DataT
@classmethod
def model_parametrized_name(cls, params: Tuple[Type[Any], ...]) -> str:
return f'{params[0].__name__.title()}Response'
print(repr(Response[int](data=1)))
#> IntResponse(data=1)
print(repr(Response[str](data='a')))
#> StrResponse(data='a')
|
当然,可以将泛型 model 嵌套进其他 model 中去
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
| from typing import Generic, TypeVar
from pydantic import BaseModel
T = TypeVar('T')
class ResponseModel(BaseModel, Generic[T]):
content: T
class Product(BaseModel):
name: str
price: float
class Order(BaseModel):
id: int
product: ResponseModel[Product]
product = Product(name='Apple', price=0.5)
response = ResponseModel[Product](content=product)
order = Order(id=1, product=response)
print(repr(order))
"""
Order(id=1, product=ResponseModel[Product](content=Product(name='Apple', price=0.5)))
"""
|
在嵌套 model 中使用相同的 TypeVar
实例可以保证一致性
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
| from typing import Generic, TypeVar
from pydantic import BaseModel, ValidationError
T = TypeVar('T')
class InnerT(BaseModel, Generic[T]):
inner: T
class OuterT(BaseModel, Generic[T]):
outer: T
nested: InnerT[T]
nested = InnerT[int](inner=1)
print(OuterT[int](outer=1, nested=nested))
#> outer=1 nested=InnerT[int](inner=1)
try:
nested = InnerT[str](inner='a')
print(OuterT[int](outer='a', nested=nested))
except ValidationError as e:
print(e)
"""
2 validation errors for OuterT[int]
outer
Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='a', input_type=str]
nested
Input should be a valid dictionary or instance of InnerT[int] [type=model_type, input_value=InnerT[str](inner='a'), input_type=InnerT[str]]
"""
|
与那些内置类型(如 List
和 Dict
)类似,GenericModel 的行为会被以下规定所定义:
- 如果在实例化(instantiate) generic model 之前没有被指定类型(specify parameters)之前,传参会被视为
Any
类型 - 如果
TypeVar
未设置任何上限(bounds),传参会被视为 Any
类型
类似于 List
和 Dict
,任何使用 TypeVar
传入 model 的参数都能被更具体的类型替换
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
| from typing import Generic, TypeVar
from pydantic import BaseModel, ValidationError
AT = TypeVar('AT')
BT = TypeVar('BT')
class Model(BaseModel, Generic[AT, BT]):
a: AT
b: BT
print(Model(a='a', b='a'))
#> a='a' b='a'
IntT = TypeVar('IntT', bound=int)
typevar_model = Model[int, IntT]
print(typevar_model(a=1, b=1))
#> a=1 b=1
try:
typevar_model(a='a', b='a')
except ValidationError as exc:
print(exc)
"""
2 validation errors for Model[int, TypeVar]
a
Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='a', input_type=str]
b
Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='a', input_type=str]
"""
concrete_model = typevar_model[int]
print(concrete_model(a=1, b=1))
#> a=1 b=1
|
动态定义 model
有些时候,model 的定义直到程序运行时才会完整,pydantic 提供 create_model
方法,允许 model 在运行时被定义
1
2
3
4
5
6
7
8
| from pydantic import BaseModel, create_model
DynamicFoobarModel = create_model('DynamicFoobarModel', foo=(str, ...), bar=123)
# same as below
class StaticFoobarModel(BaseModel):
foo: str
bar: int = 123
|
StaticFoobarModel
和 DynamicFoobarModel
在这里是完全一样的
动态 model 字段的定义,以下几种均可:
(<type>, <default value>)
的元组格式(<type>, Field(...))
的元组格式typing.Annotated[<type>, Field(...)]
__config__
和 __base__
这两个特殊参数可以用来自定义 model,比如添加额外字段来扩展已有的 model 之类的
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| from pydantic import BaseModel, create_model
class FooModel(BaseModel):
foo: str
bar: int = 123
BarModel = create_model(
'BarModel',
apple=(str, 'russet'),
banana=(str, 'yellow'),
__base__=FooModel,
)
print(BarModel)
#> <class '__main__.BarModel'>
print(BarModel.model_fields.keys())
#> dict_keys(['foo', 'bar', 'apple', 'banana'])
|
你甚至还能在 __validators__
参数中传入字典来实现 validator 的功能(震惊)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
| from pydantic import ValidationError, create_model, field_validator
def username_alphanumeric(cls, v):
assert v.isalnum(), 'must be alphanumeric'
return v
validators = {
'username_validator': field_validator('username')(username_alphanumeric)
# ...
}
UserModel = create_model(
'UserModel', username=(str, ...), __validators__=validators
)
user = UserModel(username='scolvin')
print(user)
#> username='scolvin'
try:
UserModel(username='scolvi%n')
except ValidationError as e:
print(e)
"""
1 validation error for UserModel
username
Assertion failed, must be alphanumeric [type=assertion_error, input_value='scolvi%n', input_type=str]
"""
|
自定义根类型与 RootModel
通过继承 RootModel
字段,Pydantic models 可以被定义为一个根类型(custom root type)
根类型可以是任何 pydantic 支持的类型,被 RootModel
的类型所指定
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
| from typing import Dict, List
from pydantic import RootModel
Pets = RootModel[List[str]]
PetsByName = RootModel[Dict[str, str]]
print(Pets(['dog', 'cat']))
#> root=['dog', 'cat']
print(Pets(['dog', 'cat']).model_dump_json())
#> ["dog","cat"]
print(Pets.model_validate(['dog', 'cat']))
#> root=['dog', 'cat']
print(Pets.model_json_schema())
"""
{'items': {'type': 'string'}, 'title': 'RootModel[List[str]]', 'type': 'array'}
"""
print(PetsByName({'Otis': 'dog', 'Milo': 'cat'}))
#> root={'Otis': 'dog', 'Milo': 'cat'}
print(PetsByName({'Otis': 'dog', 'Milo': 'cat'}).model_dump_json())
#> {"Otis":"dog","Milo":"cat"}
print(PetsByName.model_validate({'Otis': 'dog', 'Milo': 'cat'}))
#> root={'Otis': 'dog', 'Milo': 'cat'}
"""
|
当然,支持实现 __iter__
和 __getitem__
方法,这个时候就要用到传统而正经的写法:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| from typing import List
from pydantic import RootModel
class Pets(RootModel):
root: List[str]
def __iter__(self):
return iter(self.root)
def __getitem__(self, item):
return self.root[item]
pets = Pets.model_validate(['dog', 'cat'])
print(pets[0])
#> dog
print([pet for pet in pets])
#> ['dog', 'cat']
|
当然当然,直接继承一个指定类型的 RootModel
的写法也是合法的:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| from typing import List
from pydantic import RootModel
class Pets(RootModel[List[str]]):
def describe(self) -> str:
return f'Pets: {", ".join(self.root)}'
my_pets = Pets.model_validate(['dog', 'cat'])
print(my_pets.describe())
#> Pets: dog, cat
|
Calling the parse_obj
method on a dict with the single key "__root__"
for non-mapping custom root types is currently supported for backwards compatibility, but is not recommended and may be dropped in a future version.
如果你想要直接取到 __root__
字段中的值,或是想要迭代之,你可以实现对应的 __iter__
和 __getitem__
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| from pydantic import BaseModel
class Pets(BaseModel):
__root__: list[str]
def __iter__(self):
return iter(self.__root__)
def __getitem__(self, item):
return self.__root__[item]
pets = Pets.parse_obj(['dog', 'cat'])
print(pets[0])
#> dog
print([pet for pet in pets])
#> ['dog', 'cat']
|
“虚假” 的不可变性质
设置 model_config['frozen'] = True
可以让 model 不可变,所有对其值的修改都会抛出错误
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
| from pydantic import BaseModel, ConfigDict, ValidationError
class FooBarModel(BaseModel):
model_config = ConfigDict(frozen=True)
a: str
b: dict
foobar = FooBarModel(a='hello', b={'apple': 'pear'})
try:
foobar.a = 'different'
except ValidationError as e:
print(e)
"""
1 validation error for FooBarModel
a
Instance is frozen [type=frozen_instance, input_value='different', input_type=str]
"""
print(foobar.a)
#> hello
print(foobar.b)
#> {'apple': 'pear'}
foobar.b['apple'] = 'grape'
print(foobar.b)
#> {'apple': 'grape'}
|
a 不能变,但是字典类型的 b 是可以变的哦~
醒醒吧,都是假的,Python 里从来没有严格的不可变性
Immutability in Python is never strict. If developers are determined/stupid they can always modify a so-called “immutable” object.
继承自抽象基类 ABC
model 能跟抽象基类(Abstract Base Classes,ABC)一起使用
1
2
3
4
5
6
7
8
9
10
11
12
| import abc
from pydantic import BaseModel
class FooBarModel(BaseModel, abc.ABC):
a: str
b: int
@abc.abstractmethod
def my_abstract_method(self):
pass
|
字段顺序
字段顺序是非常重要的:
- 校验是按照字段顺序进行的
- 字段顺序在 model schema 中保留
- 字段顺序在校验错误中保留
- 字段顺序在
.model_dump()
and .model_dump_json()
中保留
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
| from pydantic import BaseModel, ValidationError
class Model(BaseModel):
a: int
b: int = 2
c: int = 1
d: int = 0
e: float
print(Model.model_fields.keys())
#> dict_keys(['a', 'b', 'c', 'd', 'e'])
m = Model(e=2, a=1)
print(m.model_dump())
#> {'a': 1, 'b': 2, 'c': 1, 'd': 0, 'e': 2.0}
try:
Model(a='x', b='x', c='x', d='x', e='x')
except ValidationError as err:
error_locations = [e['loc'] for e in err.errors()]
print(error_locations)
#> [('a',), ('b',), ('c',), ('d',), ('e',)]
|
从上面的例子可以看出,把带注释和不带注释的字段混在一起,有时会造成意想不到的结果,这其实是 Python 的缺陷
即使默认值也能跟类型标注一样来保证顺序,我们还是建议给所有字段都带上类型标注
必填字段
给个类型标注就能让一个字段变成必要字段(required fields),或者加个 ...
省略
1
2
3
4
5
6
7
| from pydantic import BaseModel, Field
class Model(BaseModel):
a: int
b: int = ...
c: int = Field(..., alias='C')
|
以上三个字段都是必填的。不过,单用省略符号是不被推荐的,跟 mypy 也不是很契合
具有不可哈希默认值的字段
Fields with non-hashable default values
Python 中常见的一类 Bug 来源,是使用可变字段为默认值,然后在每次调用时都附上了新值。。。
The dataclasses
module actually raises an error in this case, indicating that you should use the default_factory
argument to dataclasses.field
.
当然 Pydantic 同样支持使用 default_factory
来处理这些可变的默认值,但这并不是必要的!因为 Pydantic 会自动将它们作深拷贝处理
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
| from typing import Dict, List
from pydantic import BaseModel
class Model(BaseModel):
item_counts: List[Dict[str, int]] = [{}]
m1 = Model()
m1.item_counts[0]['a'] = 1
print(m1.item_counts)
#> [{'a': 1}]
m2 = Model()
print(m2.item_counts)
#> [{}]
|
牛的
拥有动态默认值的字段
使用 default_factory
来创建一个拥有动态默认值的字段
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| from datetime import datetime, timezone
from uuid import UUID, uuid4
from pydantic import BaseModel, Field
def datetime_now() -> datetime:
return datetime.now(timezone.utc)
class Model(BaseModel):
uid: UUID = Field(default_factory=uuid4)
updated: datetime = Field(default_factory=datetime_now)
m1 = Model()
m2 = Model()
assert m1.uid != m2.uid
|
自动排除的属性
以下划线开头的类变量(被视为私有属性)和类型标注为 typing.ClassVar
的属性将被自动从 model 中排除
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
| from typing import ClassVar
from pydantic import BaseModel
class Model(BaseModel):
x: int = 2
y: ClassVar[int] = 1
m = Model()
print(m)
#> x=2
print(Model.y)
#> 1
|
As of Pydantic v2.1.0, you will receive a NameError if trying to use the Field
function with a private attribute. Because private attributes are not treated as fields, the Field() function cannot be applied.
如果你想要修改或操作 model 实例的内部属性,使用 PrivateAttr
来声明之
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
| from datetime import datetime
from random import randint
from pydantic import BaseModel, PrivateAttr
class TimeAwareModel(BaseModel):
_processed_at: datetime = PrivateAttr(default_factory=datetime.now)
_secret_value: str
def __init__(self, **data):
super().__init__(**data)
# this could also be done with default_factory
self._secret_value = randint(1, 5)
m = TimeAwareModel()
print(m._processed_at)
#> 2032-01-02 03:04:05.000006
print(m._secret_value)
#> 3
|
私有属性必须以下划线开头,_attr
行, __attr__
不行!
数据转换
pydantic 可能会改变 input data 以使其通过类型检查,但有时候,这种行为可能会导致信息的丢失
1
2
3
4
5
6
7
8
9
10
11
| from pydantic import BaseModel
class Model(BaseModel):
a: int
b: float
c: str
print(Model(a=3.000, b='2.72', c=b'binary data').model_dump())
#> {'a': 3, 'b': 2.72, 'c': 'binary data'}
|
这种行为是 pydantic 经过深思熟虑之后的结果,而且一般来说,蛮有用的
不过,严格的类型检查也是支持的
❤🔥 model 签名
所有的 model 都会根据其字段生成签名(signature)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| import inspect
from pydantic import BaseModel, Field
class FooModel(BaseModel):
id: int
name: str = None
description: str = 'Foo'
apple: int = Field(alias='pear')
print(inspect.signature(FooModel))
#> (*, id: int, name: str = None, description: str = 'Foo', pear: int) -> None
|
一个精确的签名对于像 FastAPI
和 hypothesis
这样经常进行对象自省的库是很有用的
生成的签名也将遵循自定义的 __init__
函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| import inspect
from pydantic import BaseModel
class MyModel(BaseModel):
id: int
info: str = 'Foo'
def __init__(self, id: int = 1, *, bar: str, **data) -> None:
"""My custom init!"""
super().__init__(id=id, bar=bar, **data)
print(inspect.signature(MyModel))
#> (id: int = 1, *, bar: str, info: str = 'Foo') -> None
|
为了被签名收纳,字段的名字(name)或者别名(alias)必须是一个合法的 python 标识符,pydantic 会优先选择别名,当别名不合法时则会选择名字
如果一个字段的别名和名称都是无效的标识符,那将添加一个 **data
参数。不过,如果 model_config['extra'] == 'allow'
,那么 **data
参数将总是出现在签名中。
结构化模式匹配
model 支持结构化模式匹配(structural pattern matching),这是 Python 3.10 的新特性
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| from pydantic import BaseModel
class Pet(BaseModel):
name: str
species: str
a = Pet(name='Bones', species='dog')
match a:
# match `species` to 'dog', declare and initialize `dog_name`
case Pet(species='dog', name=dog_name):
print(f'{dog_name} is a dog')
#> Bones is a dog
# default case
case _:
print('No dog matched')
|
match-case 语句看起来像是创建了一个全新的 model,然而并没有
他只是一个用来获取属性、比较它、声明它、初始化它的语法糖
属性复制
一般来说,传入 model 的参数属性都会被强制复制一份以完成验证工作
下面的示例表明,两次实例化的 model 的 list id 是不同的,是因为在传参时 arr_orig 被复制了
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
| from typing import List
from pydantic import BaseModel
class C1:
arr = []
def __init__(self, in_arr):
self.arr = in_arr
class C2(BaseModel):
arr: List[int]
arr_orig = [1, 9, 10, 3]
c1 = C1(arr_orig)
c2 = C2(arr=arr_orig)
print('id(c1.arr) == id(c2.arr):', id(c1.arr) == id(c2.arr))
#> id(c1.arr) == id(c2.arr): False
|
有这么一些情况下,属性不会被复制,比如传的参数是一个 model 实例,因为 model 参数默认不会被校验。不过可以设置 model_config['revalidate_instances'] = 'always'
强制校验 model 参数就是了)
默认情况下,如果传了一些额外的字段的话,Pydantic 校验是不会出错的,仅忽略之
1
2
3
4
5
6
7
8
9
| from pydantic import BaseModel
class Model(BaseModel):
x: int
m = Model(x=1, y='a')
assert m.model_dump() == {'x': 1}
|
当然,可以设置成报错的
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| from pydantic import BaseModel, ConfigDict, ValidationError
class Model(BaseModel):
x: int
model_config = ConfigDict(extra='forbid')
try:
Model(x=1, y='a')
except ValidationError as exc:
print(exc)
"""
1 validation error for Model
y
Extra inputs are not permitted [type=extra_forbidden, input_value='a', input_type=str]
"""
|
还可以选择保留这些额外的字段,存到 BaseModel.__pydantic_extra__
中
1
2
3
4
5
6
7
8
9
10
11
| from pydantic import BaseModel, ConfigDict
class Model(BaseModel):
x: int
model_config = ConfigDict(extra='allow')
m = Model(x=1, y='a')
assert m.__pydantic_extra__ == {'y': 'a'}
|
默认情况下,存在 __pydantic_extra__
里的字段不会触发验证。不过,你甚至可以给 __pydantic_extra__
定义类型。。。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
| from typing import Dict
from pydantic import BaseModel, ConfigDict, ValidationError
class Model(BaseModel):
__pydantic_extra__: Dict[str, int]
x: int
model_config = ConfigDict(extra='allow')
try:
Model(x=1, y='a')
except ValidationError as exc:
print(exc)
"""
1 validation error for Model
y
Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='a', input_type=str]
"""
m = Model(x=1, y='2')
assert m.x == 1
assert m.y == 2
assert m.model_dump() == {'x': 1, 'y': 2}
assert m.__pydantic_extra__ == {'y': 2}
|