1# Licensed under the LGPL: https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html
2# For details: https://github.com/pylint-dev/astroid/blob/main/LICENSE
3# Copyright (c) https://github.com/pylint-dev/astroid/blob/main/CONTRIBUTORS.txt
4
5"""
6Astroid hook for the dataclasses library.
7
8Support built-in dataclasses, pydantic.dataclasses, and marshmallow_dataclass-annotated
9dataclasses. References:
10- https://docs.python.org/3/library/dataclasses.html
11- https://pydantic-docs.helpmanual.io/usage/dataclasses/
12- https://lovasoa.github.io/marshmallow_dataclass/
13"""
14
15from __future__ import annotations
16
17from collections.abc import Iterator
18from typing import Literal, Union
19
20from astroid import bases, context, nodes
21from astroid.builder import parse
22from astroid.const import PY310_PLUS, PY313_PLUS
23from astroid.exceptions import AstroidSyntaxError, InferenceError, UseInferenceDefault
24from astroid.inference_tip import inference_tip
25from astroid.manager import AstroidManager
26from astroid.typing import InferenceResult
27from astroid.util import Uninferable, UninferableBase, safe_infer
28
29_FieldDefaultReturn = Union[
30 None,
31 tuple[Literal["default"], nodes.NodeNG],
32 tuple[Literal["default_factory"], nodes.Call],
33]
34
35DATACLASSES_DECORATORS = frozenset(("dataclass",))
36FIELD_NAME = "field"
37DATACLASS_MODULES = frozenset(
38 ("dataclasses", "marshmallow_dataclass", "pydantic.dataclasses")
39)
40DEFAULT_FACTORY = "_HAS_DEFAULT_FACTORY" # based on typing.py
41
42
43def is_decorated_with_dataclass(
44 node: nodes.ClassDef, decorator_names: frozenset[str] = DATACLASSES_DECORATORS
45) -> bool:
46 """Return True if a decorated node has a `dataclass` decorator applied."""
47 if not isinstance(node, nodes.ClassDef) or not node.decorators:
48 return False
49
50 return any(
51 _looks_like_dataclass_decorator(decorator_attribute, decorator_names)
52 for decorator_attribute in node.decorators.nodes
53 )
54
55
56def dataclass_transform(node: nodes.ClassDef) -> None:
57 """Rewrite a dataclass to be easily understood by pylint."""
58 node.is_dataclass = True
59
60 for assign_node in _get_dataclass_attributes(node):
61 name = assign_node.target.name
62
63 rhs_node = nodes.Unknown(
64 lineno=assign_node.lineno,
65 col_offset=assign_node.col_offset,
66 parent=assign_node,
67 )
68 rhs_node = AstroidManager().visit_transforms(rhs_node)
69 node.instance_attrs[name] = [rhs_node]
70
71 if not _check_generate_dataclass_init(node):
72 return
73
74 kw_only_decorated = False
75 if PY310_PLUS and node.decorators.nodes:
76 for decorator in node.decorators.nodes:
77 if not isinstance(decorator, nodes.Call):
78 kw_only_decorated = False
79 break
80 for keyword in decorator.keywords:
81 if keyword.arg == "kw_only":
82 kw_only_decorated = keyword.value.bool_value()
83
84 init_str = _generate_dataclass_init(
85 node,
86 list(_get_dataclass_attributes(node, init=True)),
87 kw_only_decorated,
88 )
89
90 try:
91 init_node = parse(init_str)["__init__"]
92 except AstroidSyntaxError:
93 pass
94 else:
95 init_node.parent = node
96 init_node.lineno, init_node.col_offset = None, None
97 node.locals["__init__"] = [init_node]
98
99 root = node.root()
100 if DEFAULT_FACTORY not in root.locals:
101 new_assign = parse(f"{DEFAULT_FACTORY} = object()").body[0]
102 new_assign.parent = root
103 root.locals[DEFAULT_FACTORY] = [new_assign.targets[0]]
104
105
106def _get_dataclass_attributes(
107 node: nodes.ClassDef, init: bool = False
108) -> Iterator[nodes.AnnAssign]:
109 """Yield the AnnAssign nodes of dataclass attributes for the node.
110
111 If init is True, also include InitVars.
112 """
113 for assign_node in node.body:
114 if not isinstance(assign_node, nodes.AnnAssign) or not isinstance(
115 assign_node.target, nodes.AssignName
116 ):
117 continue
118
119 # Annotation is never None
120 if _is_class_var(assign_node.annotation): # type: ignore[arg-type]
121 continue
122
123 if _is_keyword_only_sentinel(assign_node.annotation):
124 continue
125
126 # Annotation is never None
127 if not init and _is_init_var(assign_node.annotation): # type: ignore[arg-type]
128 continue
129
130 yield assign_node
131
132
133def _check_generate_dataclass_init(node: nodes.ClassDef) -> bool:
134 """Return True if we should generate an __init__ method for node.
135
136 This is True when:
137 - node doesn't define its own __init__ method
138 - the dataclass decorator was called *without* the keyword argument init=False
139 """
140 if "__init__" in node.locals:
141 return False
142
143 found = None
144
145 for decorator_attribute in node.decorators.nodes:
146 if not isinstance(decorator_attribute, nodes.Call):
147 continue
148
149 if _looks_like_dataclass_decorator(decorator_attribute):
150 found = decorator_attribute
151
152 if found is None:
153 return True
154
155 # Check for keyword arguments of the form init=False
156 return not any(
157 keyword.arg == "init"
158 and not keyword.value.bool_value() # type: ignore[union-attr] # value is never None
159 for keyword in found.keywords
160 )
161
162
163def _find_arguments_from_base_classes(
164 node: nodes.ClassDef,
165) -> tuple[
166 dict[str, tuple[str | None, str | None]], dict[str, tuple[str | None, str | None]]
167]:
168 """Iterate through all bases and get their typing and defaults."""
169 pos_only_store: dict[str, tuple[str | None, str | None]] = {}
170 kw_only_store: dict[str, tuple[str | None, str | None]] = {}
171 # See TODO down below
172 # all_have_defaults = True
173
174 for base in reversed(node.mro()):
175 if not base.is_dataclass:
176 continue
177 try:
178 base_init: nodes.FunctionDef = base.locals["__init__"][0]
179 except KeyError:
180 continue
181
182 pos_only, kw_only = base_init.args._get_arguments_data()
183 for posarg, data in pos_only.items():
184 # if data[1] is None:
185 # if all_have_defaults and pos_only_store:
186 # # TODO: This should return an Uninferable as this would raise
187 # # a TypeError at runtime. However, transforms can't return
188 # # Uninferables currently.
189 # pass
190 # all_have_defaults = False
191 pos_only_store[posarg] = data
192
193 for kwarg, data in kw_only.items():
194 kw_only_store[kwarg] = data
195 return pos_only_store, kw_only_store
196
197
198def _parse_arguments_into_strings(
199 pos_only_store: dict[str, tuple[str | None, str | None]],
200 kw_only_store: dict[str, tuple[str | None, str | None]],
201) -> tuple[str, str]:
202 """Parse positional and keyword arguments into strings for an __init__ method."""
203 pos_only, kw_only = "", ""
204 for pos_arg, data in pos_only_store.items():
205 pos_only += pos_arg
206 if data[0]:
207 pos_only += ": " + data[0]
208 if data[1]:
209 pos_only += " = " + data[1]
210 pos_only += ", "
211 for kw_arg, data in kw_only_store.items():
212 kw_only += kw_arg
213 if data[0]:
214 kw_only += ": " + data[0]
215 if data[1]:
216 kw_only += " = " + data[1]
217 kw_only += ", "
218
219 return pos_only, kw_only
220
221
222def _get_previous_field_default(node: nodes.ClassDef, name: str) -> nodes.NodeNG | None:
223 """Get the default value of a previously defined field."""
224 for base in reversed(node.mro()):
225 if not base.is_dataclass:
226 continue
227 if name in base.locals:
228 for assign in base.locals[name]:
229 if (
230 isinstance(assign.parent, nodes.AnnAssign)
231 and assign.parent.value
232 and isinstance(assign.parent.value, nodes.Call)
233 and _looks_like_dataclass_field_call(assign.parent.value)
234 ):
235 default = _get_field_default(assign.parent.value)
236 if default:
237 return default[1]
238 return None
239
240
241def _generate_dataclass_init(
242 node: nodes.ClassDef, assigns: list[nodes.AnnAssign], kw_only_decorated: bool
243) -> str:
244 """Return an init method for a dataclass given the targets."""
245 # pylint: disable = too-many-locals, too-many-branches, too-many-statements
246
247 params: list[str] = []
248 kw_only_params: list[str] = []
249 assignments: list[str] = []
250
251 prev_pos_only_store, prev_kw_only_store = _find_arguments_from_base_classes(node)
252
253 for assign in assigns:
254 name, annotation, value = assign.target.name, assign.annotation, assign.value
255
256 # Check whether this assign is overriden by a property assignment
257 property_node: nodes.FunctionDef | None = None
258 for additional_assign in node.locals[name]:
259 if not isinstance(additional_assign, nodes.FunctionDef):
260 continue
261 if not additional_assign.decorators:
262 continue
263 if "builtins.property" in additional_assign.decoratornames():
264 property_node = additional_assign
265 break
266
267 is_field = isinstance(value, nodes.Call) and _looks_like_dataclass_field_call(
268 value, check_scope=False
269 )
270
271 if is_field:
272 # Skip any fields that have `init=False`
273 if any(
274 keyword.arg == "init" and not keyword.value.bool_value()
275 for keyword in value.keywords # type: ignore[union-attr] # value is never None
276 ):
277 # Also remove the name from the previous arguments to be inserted later
278 prev_pos_only_store.pop(name, None)
279 prev_kw_only_store.pop(name, None)
280 continue
281
282 if _is_init_var(annotation): # type: ignore[arg-type] # annotation is never None
283 init_var = True
284 if isinstance(annotation, nodes.Subscript):
285 annotation = annotation.slice
286 else:
287 # Cannot determine type annotation for parameter from InitVar
288 annotation = None
289 assignment_str = ""
290 else:
291 init_var = False
292 assignment_str = f"self.{name} = {name}"
293
294 ann_str, default_str = None, None
295 if annotation is not None:
296 ann_str = annotation.as_string()
297
298 if value:
299 if is_field:
300 result = _get_field_default(value) # type: ignore[arg-type]
301 if result:
302 default_type, default_node = result
303 if default_type == "default":
304 default_str = default_node.as_string()
305 elif default_type == "default_factory":
306 default_str = DEFAULT_FACTORY
307 assignment_str = (
308 f"self.{name} = {default_node.as_string()} "
309 f"if {name} is {DEFAULT_FACTORY} else {name}"
310 )
311 else:
312 default_str = value.as_string()
313 elif property_node:
314 # We set the result of the property call as default
315 # This hides the fact that this would normally be a 'property object'
316 # But we can't represent those as string
317 try:
318 # Call str to make sure also Uninferable gets stringified
319 default_str = str(
320 next(property_node.infer_call_result(None)).as_string()
321 )
322 except (InferenceError, StopIteration):
323 pass
324 else:
325 # Even with `init=False` the default value still can be propogated to
326 # later assignments. Creating weird signatures like:
327 # (self, a: str = 1) -> None
328 previous_default = _get_previous_field_default(node, name)
329 if previous_default:
330 default_str = previous_default.as_string()
331
332 # Construct the param string to add to the init if necessary
333 param_str = name
334 if ann_str is not None:
335 param_str += f": {ann_str}"
336 if default_str is not None:
337 param_str += f" = {default_str}"
338
339 # If the field is a kw_only field, we need to add it to the kw_only_params
340 # This overwrites whether or not the class is kw_only decorated
341 if is_field:
342 kw_only = [k for k in value.keywords if k.arg == "kw_only"] # type: ignore[union-attr]
343 if kw_only:
344 if kw_only[0].value.bool_value():
345 kw_only_params.append(param_str)
346 else:
347 params.append(param_str)
348 continue
349 # If kw_only decorated, we need to add all parameters to the kw_only_params
350 if kw_only_decorated:
351 if name in prev_kw_only_store:
352 prev_kw_only_store[name] = (ann_str, default_str)
353 else:
354 kw_only_params.append(param_str)
355 else:
356 # If the name was previously seen, overwrite that data
357 # pylint: disable-next=else-if-used
358 if name in prev_pos_only_store:
359 prev_pos_only_store[name] = (ann_str, default_str)
360 elif name in prev_kw_only_store:
361 params = [name, *params]
362 prev_kw_only_store.pop(name)
363 else:
364 params.append(param_str)
365
366 if not init_var:
367 assignments.append(assignment_str)
368
369 prev_pos_only, prev_kw_only = _parse_arguments_into_strings(
370 prev_pos_only_store, prev_kw_only_store
371 )
372
373 # Construct the new init method paramter string
374 # First we do the positional only parameters, making sure to add the
375 # the self parameter and the comma to allow adding keyword only parameters
376 params_string = "" if "self" in prev_pos_only else "self, "
377 params_string += prev_pos_only + ", ".join(params)
378 if not params_string.endswith(", "):
379 params_string += ", "
380
381 # Then we add the keyword only parameters
382 if prev_kw_only or kw_only_params:
383 params_string += "*, "
384 params_string += f"{prev_kw_only}{', '.join(kw_only_params)}"
385
386 assignments_string = "\n ".join(assignments) if assignments else "pass"
387 return f"def __init__({params_string}) -> None:\n {assignments_string}"
388
389
390def infer_dataclass_attribute(
391 node: nodes.Unknown, ctx: context.InferenceContext | None = None
392) -> Iterator[InferenceResult]:
393 """Inference tip for an Unknown node that was dynamically generated to
394 represent a dataclass attribute.
395
396 In the case that a default value is provided, that is inferred first.
397 Then, an Instance of the annotated class is yielded.
398 """
399 assign = node.parent
400 if not isinstance(assign, nodes.AnnAssign):
401 yield Uninferable
402 return
403
404 annotation, value = assign.annotation, assign.value
405 if value is not None:
406 yield from value.infer(context=ctx)
407 if annotation is not None:
408 yield from _infer_instance_from_annotation(annotation, ctx=ctx)
409 else:
410 yield Uninferable
411
412
413def infer_dataclass_field_call(
414 node: nodes.Call, ctx: context.InferenceContext | None = None
415) -> Iterator[InferenceResult]:
416 """Inference tip for dataclass field calls."""
417 if not isinstance(node.parent, (nodes.AnnAssign, nodes.Assign)):
418 raise UseInferenceDefault
419 result = _get_field_default(node)
420 if not result:
421 yield Uninferable
422 else:
423 default_type, default = result
424 if default_type == "default":
425 yield from default.infer(context=ctx)
426 else:
427 new_call = parse(default.as_string()).body[0].value
428 new_call.parent = node.parent
429 yield from new_call.infer(context=ctx)
430
431
432def _looks_like_dataclass_decorator(
433 node: nodes.NodeNG, decorator_names: frozenset[str] = DATACLASSES_DECORATORS
434) -> bool:
435 """Return True if node looks like a dataclass decorator.
436
437 Uses inference to lookup the value of the node, and if that fails,
438 matches against specific names.
439 """
440 if isinstance(node, nodes.Call): # decorator with arguments
441 node = node.func
442 try:
443 inferred = next(node.infer())
444 except (InferenceError, StopIteration):
445 inferred = Uninferable
446
447 if isinstance(inferred, UninferableBase):
448 if isinstance(node, nodes.Name):
449 return node.name in decorator_names
450 if isinstance(node, nodes.Attribute):
451 return node.attrname in decorator_names
452
453 return False
454
455 return (
456 isinstance(inferred, nodes.FunctionDef)
457 and inferred.name in decorator_names
458 and inferred.root().name in DATACLASS_MODULES
459 )
460
461
462def _looks_like_dataclass_attribute(node: nodes.Unknown) -> bool:
463 """Return True if node was dynamically generated as the child of an AnnAssign
464 statement.
465 """
466 parent = node.parent
467 if not parent:
468 return False
469
470 scope = parent.scope()
471 return (
472 isinstance(parent, nodes.AnnAssign)
473 and isinstance(scope, nodes.ClassDef)
474 and is_decorated_with_dataclass(scope)
475 )
476
477
478def _looks_like_dataclass_field_call(
479 node: nodes.Call, check_scope: bool = True
480) -> bool:
481 """Return True if node is calling dataclasses field or Field
482 from an AnnAssign statement directly in the body of a ClassDef.
483
484 If check_scope is False, skips checking the statement and body.
485 """
486 if check_scope:
487 stmt = node.statement()
488 scope = stmt.scope()
489 if not (
490 isinstance(stmt, nodes.AnnAssign)
491 and stmt.value is not None
492 and isinstance(scope, nodes.ClassDef)
493 and is_decorated_with_dataclass(scope)
494 ):
495 return False
496
497 try:
498 inferred = next(node.func.infer())
499 except (InferenceError, StopIteration):
500 return False
501
502 if not isinstance(inferred, nodes.FunctionDef):
503 return False
504
505 return inferred.name == FIELD_NAME and inferred.root().name in DATACLASS_MODULES
506
507
508def _looks_like_dataclasses(node: nodes.Module) -> bool:
509 return node.qname() == "dataclasses"
510
511
512def _resolve_private_replace_to_public(node: nodes.Module) -> None:
513 """In python/cpython@6f3c138, a _replace() method was extracted from
514 replace(), and this indirection made replace() uninferable."""
515 if "_replace" in node.locals:
516 node.locals["replace"] = node.locals["_replace"]
517
518
519def _get_field_default(field_call: nodes.Call) -> _FieldDefaultReturn:
520 """Return a the default value of a field call, and the corresponding keyword
521 argument name.
522
523 field(default=...) results in the ... node
524 field(default_factory=...) results in a Call node with func ... and no arguments
525
526 If neither or both arguments are present, return ("", None) instead,
527 indicating that there is not a valid default value.
528 """
529 default, default_factory = None, None
530 for keyword in field_call.keywords:
531 if keyword.arg == "default":
532 default = keyword.value
533 elif keyword.arg == "default_factory":
534 default_factory = keyword.value
535
536 if default is not None and default_factory is None:
537 return "default", default
538
539 if default is None and default_factory is not None:
540 new_call = nodes.Call(
541 lineno=field_call.lineno,
542 col_offset=field_call.col_offset,
543 parent=field_call.parent,
544 end_lineno=field_call.end_lineno,
545 end_col_offset=field_call.end_col_offset,
546 )
547 new_call.postinit(func=default_factory, args=[], keywords=[])
548 return "default_factory", new_call
549
550 return None
551
552
553def _is_class_var(node: nodes.NodeNG) -> bool:
554 """Return True if node is a ClassVar, with or without subscripting."""
555 try:
556 inferred = next(node.infer())
557 except (InferenceError, StopIteration):
558 return False
559
560 return getattr(inferred, "name", "") == "ClassVar"
561
562
563def _is_keyword_only_sentinel(node: nodes.NodeNG) -> bool:
564 """Return True if node is the KW_ONLY sentinel."""
565 if not PY310_PLUS:
566 return False
567 inferred = safe_infer(node)
568 return (
569 isinstance(inferred, bases.Instance)
570 and inferred.qname() == "dataclasses._KW_ONLY_TYPE"
571 )
572
573
574def _is_init_var(node: nodes.NodeNG) -> bool:
575 """Return True if node is an InitVar, with or without subscripting."""
576 try:
577 inferred = next(node.infer())
578 except (InferenceError, StopIteration):
579 return False
580
581 return getattr(inferred, "name", "") == "InitVar"
582
583
584# Allowed typing classes for which we support inferring instances
585_INFERABLE_TYPING_TYPES = frozenset(
586 (
587 "Dict",
588 "FrozenSet",
589 "List",
590 "Set",
591 "Tuple",
592 )
593)
594
595
596def _infer_instance_from_annotation(
597 node: nodes.NodeNG, ctx: context.InferenceContext | None = None
598) -> Iterator[UninferableBase | bases.Instance]:
599 """Infer an instance corresponding to the type annotation represented by node.
600
601 Currently has limited support for the typing module.
602 """
603 klass = None
604 try:
605 klass = next(node.infer(context=ctx))
606 except (InferenceError, StopIteration):
607 yield Uninferable
608 if not isinstance(klass, nodes.ClassDef):
609 yield Uninferable
610 elif klass.root().name in {
611 "typing",
612 "_collections_abc",
613 "",
614 }: # "" because of synthetic nodes in brain_typing.py
615 if klass.name in _INFERABLE_TYPING_TYPES:
616 yield klass.instantiate_class()
617 else:
618 yield Uninferable
619 else:
620 yield klass.instantiate_class()
621
622
623def register(manager: AstroidManager) -> None:
624 if PY313_PLUS:
625 manager.register_transform(
626 nodes.Module,
627 _resolve_private_replace_to_public,
628 _looks_like_dataclasses,
629 )
630
631 manager.register_transform(
632 nodes.ClassDef, dataclass_transform, is_decorated_with_dataclass
633 )
634
635 manager.register_transform(
636 nodes.Call,
637 inference_tip(infer_dataclass_field_call, raise_on_overwrite=True),
638 _looks_like_dataclass_field_call,
639 )
640
641 manager.register_transform(
642 nodes.Unknown,
643 inference_tip(infer_dataclass_attribute, raise_on_overwrite=True),
644 _looks_like_dataclass_attribute,
645 )