Descriptors
Source: https://docs.python.org/3/howto/descriptor.html
Descriptors let objects customize attribute lookup, storage, and deletion.
Minimal useless example
class Ten:
def __get__(self, obj, objtype=None):
return 10
class A:
x = 5 # Regular class attribute
y = Ten() # Descriptor instance
>>> a = A() # Make an instance of class A
>>> a.x # Normal attribute lookup
5
>>> a.y # Descriptor lookup
10
Better example
import os
class DirectorySize:
# self is a DirectorySize instance
# obj is a Directory instance
def __get__(self, obj, objtype=None):
return len(os.listdir(obj.dirname))
class Directory:
size = DirectorySize() # Descriptor instance
def __init__(self, dirname):
self.dirname = dirname # Regular instance attribute
__set_name__
import logging
logging.basicConfig(level=logging.INFO)
class LoggedAccess:
def __set_name__(self, owner, name):
self.public_name = name
self.private_name = '_' + name
def __get__(self, obj, objtype=None):
value = getattr(obj, self.private_name)
logging.info('Accessing %r giving %r', self.public_name, value)
return value
def __set__(self, obj, value):
logging.info('Updating %r to %r', self.public_name, value)
setattr(obj, self.private_name, value)
class Person:
name = LoggedAccess() # First descriptor instance
age = LoggedAccess() # Second descriptor instance
def __init__(self, name, age):
self.name = name # Calls the first descriptor
self.age = age # Calls the second descriptor
def birthday(self):
self.age += 1
Closing Thoughts
-
A descriptor is what we call any object that defines get(), set(), or delete().
-
Optionally, descriptors can have a set_name() method. This is only used in cases where a descriptor needs to know either the class where it was created or the name of class variable it was assigned to. (This method, if present, is called even if the class is not a descriptor.)
-
Descriptors get invoked by the dot “operator” during attribute lookup. If a descriptor is accessed indirectly with vars(some_class)[descriptor_name], the descriptor instance is returned without invoking it.
-
Descriptors only work when used as class variables. When put in instances, they have no effect.
-
The main motivation for descriptors is to provide a hook allowing objects stored in class variables to control what happens during attribute lookup.
-
Traditionally, the calling class controls what happens during lookup. Descriptors invert that relationship and allow the data being looked-up to have a say in the matter.
-
Descriptors are used throughout the language. It is how functions turn into bound methods. Common tools like classmethod(), staticmethod(), property(), and functools.cached_property() are all implemented as descriptors.
Complete Practical Example: Validator
A validator is a descriptor for managed attribute access. Prior to storing any data, it verifies that the new value meets various type and range restrictions. If those restrictions aren’t met, it raises an exception to prevent data corruption at its source.
from abc import ABC, abstractmethod
class Validator(ABC):
def __set_name__(self, owner, name):
self.private_name = '_' + name
def __get__(self, obj, objtype=None):
return getattr(obj, self.private_name)
def __set__(self, obj, value):
self.validate(value)
setattr(obj, self.private_name, value)
@abstractmethod
def validate(self, value):
pass
Custom Validators
class OneOf(Validator):
def __init__(self, *options):
self.options = set(options)
def validate(self, value):
if value not in self.options:
raise ValueError(f'Expected {value!r} to be one of {self.options!r}')
class Number(Validator):
def __init__(self, minvalue=None, maxvalue=None):
self.minvalue = minvalue
self.maxvalue = maxvalue
def validate(self, value):
if not isinstance(value, (int, float)):
raise TypeError(f'Expected {value!r} to be an int or float')
if self.minvalue is not None and value < self.minvalue:
raise ValueError(
f'Expected {value!r} to be at least {self.minvalue!r}'
)
if self.maxvalue is not None and value > self.maxvalue:
raise ValueError(
f'Expected {value!r} to be no more than {self.maxvalue!r}'
)
class String(Validator):
def __init__(self, minsize=None, maxsize=None, predicate=None):
self.minsize = minsize
self.maxsize = maxsize
self.predicate = predicate
def validate(self, value):
if not isinstance(value, str):
raise TypeError(f'Expected {value!r} to be an str')
if self.minsize is not None and len(value) < self.minsize:
raise ValueError(
f'Expected {value!r} to be no smaller than {self.minsize!r}'
)
if self.maxsize is not None and len(value) > self.maxsize:
raise ValueError(
f'Expected {value!r} to be no bigger than {self.maxsize!r}'
)
if self.predicate is not None and not self.predicate(value):
raise ValueError(
f'Expected {self.predicate} to be true for {value!r}'
)
Practical application
class Component:
name = String(minsize=3, maxsize=10, predicate=str.isupper)
kind = OneOf('wood', 'metal', 'plastic')
quantity = Number(minvalue=0)
def __init__(self, name, kind, quantity):
self.name = name
self.kind = kind
self.quantity = quantity
>>> Component('Widget', 'metal', 5) # Blocked: 'Widget' is not all uppercase
Traceback (most recent call last):
...
ValueError: Expected <method 'isupper' of 'str' objects> to be true for 'Widget'
>>> Component('WIDGET', 'metle', 5) # Blocked: 'metle' is misspelled
Traceback (most recent call last):
...
ValueError: Expected 'metle' to be one of {'metal', 'plastic', 'wood'}
>>> Component('WIDGET', 'metal', -5) # Blocked: -5 is negative
Traceback (most recent call last):
...
ValueError: Expected -5 to be at least 0
>>> Component('WIDGET', 'metal', 'V') # Blocked: 'V' isn't a number
Traceback (most recent call last):
...
TypeError: Expected 'V' to be an int or float
>>> c = Component('WIDGET', 'metal', 5) # Allowed: The inputs are valid
Technical Tutorial
Descriptors are a powerful, general purpose protocol. They are the mechanism behind properties, methods, static methods, class methods, and super(). They are used throughout Python itself. Descriptors simplify the underlying C code and offer a flexible set of new tools for everyday Python programs.
Descriptor protocol
descr.__get__(self, obj, type=None) -> value
descr.__set__(self, obj, value) -> None
descr.__delete__(self, obj) -> None
- Data descriptors (doesn't define
__set__
and__delete__
) always override instance dictionaries. - Non-data descriptors may be overridden by instance dictionaries.
- The logic for a dotted lookup is in object.getattribute()
- if getattr() exists, it is called whenever getattribute() raises AttributeError (either directly or in one of the descriptor calls)
- object.getattribute() and type.getattribute() make different calls to get(). The first includes the instance and may include the class. The second puts in None for the instance and always includes the class.
- I a descriptor define
__set_name__(owner, name)
,type
metaclass will call it intype.__new__
(class creation), the owner is the class where the descriptor is used, and the name is the class variable the descriptor was assigned to
Attribute Lookup priority
data descriptor > instance variable > non-data descriptor > class variable
Emulate @property
class Property:
"Emulate PyProperty_Type() in Objects/descrobject.c"
def __init__(self, fget=None, fset=None, fdel=None, doc=None):
self.fget = fget
self.fset = fset
self.fdel = fdel
if doc is None and fget is not None:
doc = fget.__doc__
self.__doc__ = doc
def __get__(self, obj, objtype=None):
if obj is None:
return self
if self.fget is None:
raise AttributeError("unreadable attribute")
return self.fget(obj)
def __set__(self, obj, value):
if self.fset is None:
raise AttributeError("can't set attribute")
self.fset(obj, value)
def __delete__(self, obj):
if self.fdel is None:
raise AttributeError("can't delete attribute")
self.fdel(obj)
def getter(self, fget):
return type(self)(fget, self.fset, self.fdel, self.__doc__)
def setter(self, fset):
return type(self)(self.fget, fset, self.fdel, self.__doc__)
def deleter(self, fdel):
return type(self)(self.fget, self.fset, fdel, self.__doc__)
Functions and methods
Functions are non-data descriptors that returns the bound method (MethodType
below) when accessed from an instance (if the obj
param of __get__
is not None), the underlying function is returned as it is when access from the class (obj
is None)
Methods are callables that preprend self
to the function's arguments when called, self
is stored as __self__
and the underlying function as __func__
in the MethodType
objects
class MethodType:
"Emulate PyMethod_Type in Objects/classobject.c"
def __init__(self, func, obj):
self.__func__ = func
self.__self__ = obj
def __call__(self, *args, **kwargs):
func = self.__func__
obj = self.__self__
return func(obj, *args, **kwargs)
class Function:
...
def __get__(self, obj, objtype=None):
"Simulate func_descr_get() in Objects/funcobject.c"
if obj is None:
return self
return MethodType(self, obj)