1# Licensed under the LGPL: https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html
2# For details: https://github.com/pylint-dev/astroid/blob/main/LICENSE
3# Copyright (c) https://github.com/pylint-dev/astroid/blob/main/CONTRIBUTORS.txt
4
5"""
6Astroid hook for the dataclasses library.
7
8Support built-in dataclasses, pydantic.dataclasses, and marshmallow_dataclass-annotated
9dataclasses. References:
10- https://docs.python.org/3/library/dataclasses.html
11- https://pydantic-docs.helpmanual.io/usage/dataclasses/
12- https://lovasoa.github.io/marshmallow_dataclass/
13"""
14
15from __future__ import annotations
16
17from collections.abc import Iterator
18from typing import Literal
19
20from astroid import bases, context, nodes
21from astroid.brain.helpers import is_class_var
22from astroid.builder import parse
23from astroid.const import PY313_PLUS
24from astroid.exceptions import AstroidSyntaxError, InferenceError, UseInferenceDefault
25from astroid.inference_tip import inference_tip
26from astroid.manager import AstroidManager
27from astroid.typing import InferenceResult
28from astroid.util import Uninferable, UninferableBase, safe_infer
29
30_FieldDefaultReturn = (
31 None
32 | tuple[Literal["default"], nodes.NodeNG]
33 | tuple[Literal["default_factory"], nodes.Call]
34)
35
36DATACLASSES_DECORATORS = frozenset(("dataclass",))
37FIELD_NAME = "field"
38DATACLASS_MODULES = frozenset(
39 ("dataclasses", "marshmallow_dataclass", "pydantic.dataclasses")
40)
41DEFAULT_FACTORY = "_HAS_DEFAULT_FACTORY" # based on typing.py
42
43
44def is_decorated_with_dataclass(
45 node: nodes.ClassDef, decorator_names: frozenset[str] = DATACLASSES_DECORATORS
46) -> bool:
47 """Return True if a decorated node has a `dataclass` decorator applied."""
48 if not (isinstance(node, nodes.ClassDef) and node.decorators):
49 return False
50
51 return any(
52 _looks_like_dataclass_decorator(decorator_attribute, decorator_names)
53 for decorator_attribute in node.decorators.nodes
54 )
55
56
57def dataclass_transform(node: nodes.ClassDef) -> None:
58 """Rewrite a dataclass to be easily understood by pylint."""
59 node.is_dataclass = True
60
61 for assign_node in _get_dataclass_attributes(node):
62 name = assign_node.target.name
63
64 rhs_node = nodes.Unknown(
65 lineno=assign_node.lineno,
66 col_offset=assign_node.col_offset,
67 parent=assign_node,
68 )
69 rhs_node = AstroidManager().visit_transforms(rhs_node)
70 node.instance_attrs[name] = [rhs_node]
71
72 if not _check_generate_dataclass_init(node):
73 return
74
75 kw_only_decorated = False
76 if node.decorators.nodes:
77 for decorator in node.decorators.nodes:
78 if not isinstance(decorator, nodes.Call):
79 kw_only_decorated = False
80 break
81 for keyword in decorator.keywords:
82 if keyword.arg == "kw_only":
83 kw_only_decorated = keyword.value.bool_value() is True
84
85 init_str = _generate_dataclass_init(
86 node,
87 list(_get_dataclass_attributes(node, init=True)),
88 kw_only_decorated,
89 )
90
91 try:
92 init_node = parse(init_str)["__init__"]
93 except AstroidSyntaxError:
94 pass
95 else:
96 init_node.parent = node
97 init_node.lineno, init_node.col_offset = None, None
98 node.locals["__init__"] = [init_node]
99
100 root = node.root()
101 if DEFAULT_FACTORY not in root.locals:
102 new_assign = parse(f"{DEFAULT_FACTORY} = object()").body[0]
103 new_assign.parent = root
104 root.locals[DEFAULT_FACTORY] = [new_assign.targets[0]]
105
106
107def _get_dataclass_attributes(
108 node: nodes.ClassDef, init: bool = False
109) -> Iterator[nodes.AnnAssign]:
110 """Yield the AnnAssign nodes of dataclass attributes for the node.
111
112 If init is True, also include InitVars.
113 """
114 for assign_node in node.body:
115 if not (
116 isinstance(assign_node, nodes.AnnAssign)
117 and isinstance(assign_node.target, nodes.AssignName)
118 ):
119 continue
120
121 # Annotation is never None
122 if is_class_var(assign_node.annotation): # type: ignore[arg-type]
123 continue
124
125 if _is_keyword_only_sentinel(assign_node.annotation):
126 continue
127
128 # Annotation is never None
129 if not init and _is_init_var(assign_node.annotation): # type: ignore[arg-type]
130 continue
131
132 yield assign_node
133
134
135def _check_generate_dataclass_init(node: nodes.ClassDef) -> bool:
136 """Return True if we should generate an __init__ method for node.
137
138 This is True when:
139 - node doesn't define its own __init__ method
140 - the dataclass decorator was called *without* the keyword argument init=False
141 """
142 if "__init__" in node.locals:
143 return False
144
145 found = None
146
147 for decorator_attribute in node.decorators.nodes:
148 if not isinstance(decorator_attribute, nodes.Call):
149 continue
150
151 if _looks_like_dataclass_decorator(decorator_attribute):
152 found = decorator_attribute
153
154 if found is None:
155 return True
156
157 # Check for keyword arguments of the form init=False
158 return not any(
159 keyword.arg == "init"
160 and keyword.value.bool_value() is False # type: ignore[union-attr] # value is never None
161 for keyword in found.keywords
162 )
163
164
165def _find_arguments_from_base_classes(
166 node: nodes.ClassDef,
167) -> tuple[
168 dict[str, tuple[str | None, str | None]], dict[str, tuple[str | None, str | None]]
169]:
170 """Iterate through all bases and get their typing and defaults."""
171 pos_only_store: dict[str, tuple[str | None, str | None]] = {}
172 kw_only_store: dict[str, tuple[str | None, str | None]] = {}
173 # See TODO down below
174 # all_have_defaults = True
175
176 for base in reversed(node.mro()):
177 if not base.is_dataclass:
178 continue
179 try:
180 base_init: nodes.FunctionDef = base.locals["__init__"][0]
181 except KeyError:
182 continue
183
184 pos_only, kw_only = base_init.args._get_arguments_data()
185 for posarg, data in pos_only.items():
186 # if data[1] is None:
187 # if all_have_defaults and pos_only_store:
188 # # TODO: This should return an Uninferable as this would raise
189 # # a TypeError at runtime. However, transforms can't return
190 # # Uninferables currently.
191 # pass
192 # all_have_defaults = False
193 pos_only_store[posarg] = data
194
195 for kwarg, data in kw_only.items():
196 kw_only_store[kwarg] = data
197 return pos_only_store, kw_only_store
198
199
200def _parse_arguments_into_strings(
201 pos_only_store: dict[str, tuple[str | None, str | None]],
202 kw_only_store: dict[str, tuple[str | None, str | None]],
203) -> tuple[str, str]:
204 """Parse positional and keyword arguments into strings for an __init__ method."""
205 pos_only, kw_only = "", ""
206 for pos_arg, data in pos_only_store.items():
207 pos_only += pos_arg
208 if data[0]:
209 pos_only += ": " + data[0]
210 if data[1]:
211 pos_only += " = " + data[1]
212 pos_only += ", "
213 for kw_arg, data in kw_only_store.items():
214 kw_only += kw_arg
215 if data[0]:
216 kw_only += ": " + data[0]
217 if data[1]:
218 kw_only += " = " + data[1]
219 kw_only += ", "
220
221 return pos_only, kw_only
222
223
224def _get_previous_field_default(node: nodes.ClassDef, name: str) -> nodes.NodeNG | None:
225 """Get the default value of a previously defined field."""
226 for base in reversed(node.mro()):
227 if not base.is_dataclass:
228 continue
229 if name in base.locals:
230 for assign in base.locals[name]:
231 if (
232 isinstance(assign.parent, nodes.AnnAssign)
233 and assign.parent.value
234 and isinstance(assign.parent.value, nodes.Call)
235 and _looks_like_dataclass_field_call(assign.parent.value)
236 ):
237 default = _get_field_default(assign.parent.value)
238 if default:
239 return default[1]
240 return None
241
242
243def _generate_dataclass_init(
244 node: nodes.ClassDef, assigns: list[nodes.AnnAssign], kw_only_decorated: bool
245) -> str:
246 """Return an init method for a dataclass given the targets."""
247 # pylint: disable = too-many-locals, too-many-branches, too-many-statements
248
249 params: list[str] = []
250 kw_only_params: list[str] = []
251 assignments: list[str] = []
252
253 prev_pos_only_store, prev_kw_only_store = _find_arguments_from_base_classes(node)
254
255 for assign in assigns:
256 name, annotation, value = assign.target.name, assign.annotation, assign.value
257
258 # Check whether this assign is overriden by a property assignment
259 property_node: nodes.FunctionDef | None = None
260 for additional_assign in node.locals[name]:
261 if not isinstance(additional_assign, nodes.FunctionDef):
262 continue
263 if not additional_assign.decorators:
264 continue
265 if "builtins.property" in additional_assign.decoratornames():
266 property_node = additional_assign
267 break
268
269 is_field = isinstance(value, nodes.Call) and _looks_like_dataclass_field_call(
270 value, check_scope=False
271 )
272
273 if is_field:
274 # Skip any fields that have `init=False`
275 if any(
276 keyword.arg == "init" and (keyword.value.bool_value() is False)
277 for keyword in value.keywords # type: ignore[union-attr] # value is never None
278 ):
279 # Also remove the name from the previous arguments to be inserted later
280 prev_pos_only_store.pop(name, None)
281 prev_kw_only_store.pop(name, None)
282 continue
283
284 if _is_init_var(annotation): # type: ignore[arg-type] # annotation is never None
285 init_var = True
286 if isinstance(annotation, nodes.Subscript):
287 annotation = annotation.slice
288 else:
289 # Cannot determine type annotation for parameter from InitVar
290 annotation = None
291 assignment_str = ""
292 else:
293 init_var = False
294 assignment_str = f"self.{name} = {name}"
295
296 ann_str, default_str = None, None
297 if annotation is not None:
298 ann_str = annotation.as_string()
299
300 if value:
301 if is_field:
302 result = _get_field_default(value) # type: ignore[arg-type]
303 if result:
304 default_type, default_node = result
305 if default_type == "default":
306 default_str = default_node.as_string()
307 elif default_type == "default_factory":
308 default_str = DEFAULT_FACTORY
309 assignment_str = (
310 f"self.{name} = {default_node.as_string()} "
311 f"if {name} is {DEFAULT_FACTORY} else {name}"
312 )
313 else:
314 default_str = value.as_string()
315 elif property_node:
316 # We set the result of the property call as default
317 # This hides the fact that this would normally be a 'property object'
318 # But we can't represent those as string
319 try:
320 # Call str to make sure also Uninferable gets stringified
321 default_str = str(
322 next(property_node.infer_call_result(None)).as_string()
323 )
324 except (InferenceError, StopIteration):
325 pass
326 else:
327 # Even with `init=False` the default value still can be propogated to
328 # later assignments. Creating weird signatures like:
329 # (self, a: str = 1) -> None
330 previous_default = _get_previous_field_default(node, name)
331 if previous_default:
332 default_str = previous_default.as_string()
333
334 # Construct the param string to add to the init if necessary
335 param_str = name
336 if ann_str is not None:
337 param_str += f": {ann_str}"
338 if default_str is not None:
339 param_str += f" = {default_str}"
340
341 # If the field is a kw_only field, we need to add it to the kw_only_params
342 # This overwrites whether or not the class is kw_only decorated
343 if is_field:
344 kw_only = [k for k in value.keywords if k.arg == "kw_only"] # type: ignore[union-attr]
345 if kw_only:
346 if kw_only[0].value.bool_value() is True:
347 kw_only_params.append(param_str)
348 else:
349 params.append(param_str)
350 continue
351 # If kw_only decorated, we need to add all parameters to the kw_only_params
352 if kw_only_decorated:
353 if name in prev_kw_only_store:
354 prev_kw_only_store[name] = (ann_str, default_str)
355 else:
356 kw_only_params.append(param_str)
357 else:
358 # If the name was previously seen, overwrite that data
359 # pylint: disable-next=else-if-used
360 if name in prev_pos_only_store:
361 prev_pos_only_store[name] = (ann_str, default_str)
362 elif name in prev_kw_only_store:
363 params = [name, *params]
364 prev_kw_only_store.pop(name)
365 else:
366 params.append(param_str)
367
368 if not init_var:
369 assignments.append(assignment_str)
370
371 prev_pos_only, prev_kw_only = _parse_arguments_into_strings(
372 prev_pos_only_store, prev_kw_only_store
373 )
374
375 # Construct the new init method paramter string
376 # First we do the positional only parameters, making sure to add the
377 # the self parameter and the comma to allow adding keyword only parameters
378 params_string = "" if "self" in prev_pos_only else "self, "
379 params_string += prev_pos_only + ", ".join(params)
380 if not params_string.endswith(", "):
381 params_string += ", "
382
383 # Then we add the keyword only parameters
384 if prev_kw_only or kw_only_params:
385 params_string += "*, "
386 params_string += f"{prev_kw_only}{', '.join(kw_only_params)}"
387
388 assignments_string = "\n ".join(assignments) if assignments else "pass"
389 return f"def __init__({params_string}) -> None:\n {assignments_string}"
390
391
392def infer_dataclass_attribute(
393 node: nodes.Unknown, ctx: context.InferenceContext | None = None
394) -> Iterator[InferenceResult]:
395 """Inference tip for an Unknown node that was dynamically generated to
396 represent a dataclass attribute.
397
398 In the case that a default value is provided, that is inferred first.
399 Then, an Instance of the annotated class is yielded.
400 """
401 assign = node.parent
402 if not isinstance(assign, nodes.AnnAssign):
403 yield Uninferable
404 return
405
406 annotation, value = assign.annotation, assign.value
407 if value is not None:
408 yield from value.infer(context=ctx)
409 if annotation is not None:
410 yield from _infer_instance_from_annotation(annotation, ctx=ctx)
411 else:
412 yield Uninferable
413
414
415def infer_dataclass_field_call(
416 node: nodes.Call, ctx: context.InferenceContext | None = None
417) -> Iterator[InferenceResult]:
418 """Inference tip for dataclass field calls."""
419 if not isinstance(node.parent, (nodes.AnnAssign, nodes.Assign)):
420 raise UseInferenceDefault
421 result = _get_field_default(node)
422 if not result:
423 yield Uninferable
424 else:
425 default_type, default = result
426 if default_type == "default":
427 yield from default.infer(context=ctx)
428 else:
429 new_call = parse(default.as_string()).body[0].value
430 new_call.parent = node.parent
431 yield from new_call.infer(context=ctx)
432
433
434def _looks_like_dataclass_decorator(
435 node: nodes.NodeNG, decorator_names: frozenset[str] = DATACLASSES_DECORATORS
436) -> bool:
437 """Return True if node looks like a dataclass decorator.
438
439 Uses inference to lookup the value of the node, and if that fails,
440 matches against specific names.
441 """
442 if isinstance(node, nodes.Call): # decorator with arguments
443 node = node.func
444 try:
445 inferred = next(node.infer())
446 except (InferenceError, StopIteration):
447 inferred = Uninferable
448
449 if isinstance(inferred, UninferableBase):
450 if isinstance(node, nodes.Name):
451 return node.name in decorator_names
452 if isinstance(node, nodes.Attribute):
453 return node.attrname in decorator_names
454
455 return False
456
457 return (
458 isinstance(inferred, nodes.FunctionDef)
459 and inferred.name in decorator_names
460 and inferred.root().name in DATACLASS_MODULES
461 )
462
463
464def _looks_like_dataclass_attribute(node: nodes.Unknown) -> bool:
465 """Return True if node was dynamically generated as the child of an AnnAssign
466 statement.
467 """
468 parent = node.parent
469 if not parent:
470 return False
471
472 scope = parent.scope()
473 return (
474 isinstance(parent, nodes.AnnAssign)
475 and isinstance(scope, nodes.ClassDef)
476 and is_decorated_with_dataclass(scope)
477 )
478
479
480def _looks_like_dataclass_field_call(
481 node: nodes.Call, check_scope: bool = True
482) -> bool:
483 """Return True if node is calling dataclasses field or Field
484 from an AnnAssign statement directly in the body of a ClassDef.
485
486 If check_scope is False, skips checking the statement and body.
487 """
488 if check_scope:
489 stmt = node.statement()
490 scope = stmt.scope()
491 if not (
492 isinstance(stmt, nodes.AnnAssign)
493 and stmt.value is not None
494 and isinstance(scope, nodes.ClassDef)
495 and is_decorated_with_dataclass(scope)
496 ):
497 return False
498
499 try:
500 inferred = next(node.func.infer())
501 except (InferenceError, StopIteration):
502 return False
503
504 if not isinstance(inferred, nodes.FunctionDef):
505 return False
506
507 return inferred.name == FIELD_NAME and inferred.root().name in DATACLASS_MODULES
508
509
510def _looks_like_dataclasses(node: nodes.Module) -> bool:
511 return node.qname() == "dataclasses"
512
513
514def _resolve_private_replace_to_public(node: nodes.Module) -> None:
515 """In python/cpython@6f3c138, a _replace() method was extracted from
516 replace(), and this indirection made replace() uninferable."""
517 if "_replace" in node.locals:
518 node.locals["replace"] = node.locals["_replace"]
519
520
521def _get_field_default(field_call: nodes.Call) -> _FieldDefaultReturn:
522 """Return a the default value of a field call, and the corresponding keyword
523 argument name.
524
525 field(default=...) results in the ... node
526 field(default_factory=...) results in a Call node with func ... and no arguments
527
528 If neither or both arguments are present, return ("", None) instead,
529 indicating that there is not a valid default value.
530 """
531 default, default_factory = None, None
532 for keyword in field_call.keywords:
533 if keyword.arg == "default":
534 default = keyword.value
535 elif keyword.arg == "default_factory":
536 default_factory = keyword.value
537
538 if default is not None and default_factory is None:
539 return "default", default
540
541 if default is None and default_factory is not None:
542 new_call = nodes.Call(
543 lineno=field_call.lineno,
544 col_offset=field_call.col_offset,
545 parent=field_call.parent,
546 end_lineno=field_call.end_lineno,
547 end_col_offset=field_call.end_col_offset,
548 )
549 new_call.postinit(func=default_factory, args=[], keywords=[])
550 return "default_factory", new_call
551
552 return None
553
554
555def _is_keyword_only_sentinel(node: nodes.NodeNG) -> bool:
556 """Return True if node is the KW_ONLY sentinel."""
557 inferred = safe_infer(node)
558 return (
559 isinstance(inferred, bases.Instance)
560 and inferred.qname() == "dataclasses._KW_ONLY_TYPE"
561 )
562
563
564def _is_init_var(node: nodes.NodeNG) -> bool:
565 """Return True if node is an InitVar, with or without subscripting."""
566 try:
567 inferred = next(node.infer())
568 except (InferenceError, StopIteration):
569 return False
570
571 return getattr(inferred, "name", "") == "InitVar"
572
573
574# Allowed typing classes for which we support inferring instances
575_INFERABLE_TYPING_TYPES = frozenset(
576 (
577 "Dict",
578 "FrozenSet",
579 "List",
580 "Set",
581 "Tuple",
582 )
583)
584
585
586def _infer_instance_from_annotation(
587 node: nodes.NodeNG, ctx: context.InferenceContext | None = None
588) -> Iterator[UninferableBase | bases.Instance]:
589 """Infer an instance corresponding to the type annotation represented by node.
590
591 Currently has limited support for the typing module.
592 """
593 klass = None
594 try:
595 klass = next(node.infer(context=ctx))
596 except (InferenceError, StopIteration):
597 yield Uninferable
598 if not isinstance(klass, nodes.ClassDef):
599 yield Uninferable
600 elif klass.root().name in {
601 "typing",
602 "_collections_abc",
603 "",
604 }: # "" because of synthetic nodes in brain_typing.py
605 if klass.name in _INFERABLE_TYPING_TYPES:
606 yield klass.instantiate_class()
607 else:
608 yield Uninferable
609 else:
610 yield klass.instantiate_class()
611
612
613def register(manager: AstroidManager) -> None:
614 if PY313_PLUS:
615 manager.register_transform(
616 nodes.Module,
617 _resolve_private_replace_to_public,
618 _looks_like_dataclasses,
619 )
620
621 manager.register_transform(
622 nodes.ClassDef, dataclass_transform, is_decorated_with_dataclass
623 )
624
625 manager.register_transform(
626 nodes.Call,
627 inference_tip(infer_dataclass_field_call, raise_on_overwrite=True),
628 _looks_like_dataclass_field_call,
629 )
630
631 manager.register_transform(
632 nodes.Unknown,
633 inference_tip(infer_dataclass_attribute, raise_on_overwrite=True),
634 _looks_like_dataclass_attribute,
635 )