1# dialects/postgresql/array.py
2# Copyright (C) 2005-2025 the SQLAlchemy authors and contributors
3# <see AUTHORS file>
4#
5# This module is part of SQLAlchemy and is released under
6# the MIT License: https://www.opensource.org/licenses/mit-license.php
7
8
9from __future__ import annotations
10
11import re
12from typing import Any as typing_Any
13from typing import Iterable
14from typing import Optional
15from typing import Sequence
16from typing import TYPE_CHECKING
17from typing import TypeVar
18from typing import Union
19
20from .operators import CONTAINED_BY
21from .operators import CONTAINS
22from .operators import OVERLAP
23from ... import types as sqltypes
24from ... import util
25from ...sql import expression
26from ...sql import operators
27from ...sql.visitors import InternalTraversal
28
29if TYPE_CHECKING:
30 from ...engine.interfaces import Dialect
31 from ...sql._typing import _ColumnExpressionArgument
32 from ...sql._typing import _TypeEngineArgument
33 from ...sql.elements import ColumnElement
34 from ...sql.elements import Grouping
35 from ...sql.expression import BindParameter
36 from ...sql.operators import OperatorType
37 from ...sql.selectable import _SelectIterable
38 from ...sql.type_api import _BindProcessorType
39 from ...sql.type_api import _LiteralProcessorType
40 from ...sql.type_api import _ResultProcessorType
41 from ...sql.type_api import TypeEngine
42 from ...sql.visitors import _TraverseInternalsType
43 from ...util.typing import Self
44
45
46_T = TypeVar("_T", bound=typing_Any)
47_CT = TypeVar("_CT", bound=typing_Any)
48
49
50def Any(
51 other: typing_Any,
52 arrexpr: _ColumnExpressionArgument[_T],
53 operator: OperatorType = operators.eq,
54) -> ColumnElement[bool]:
55 """A synonym for the ARRAY-level :meth:`.ARRAY.Comparator.any` method.
56 See that method for details.
57
58 """
59
60 return arrexpr.any(other, operator) # type: ignore[no-any-return, union-attr] # noqa: E501
61
62
63def All(
64 other: typing_Any,
65 arrexpr: _ColumnExpressionArgument[_T],
66 operator: OperatorType = operators.eq,
67) -> ColumnElement[bool]:
68 """A synonym for the ARRAY-level :meth:`.ARRAY.Comparator.all` method.
69 See that method for details.
70
71 """
72
73 return arrexpr.all(other, operator) # type: ignore[no-any-return, union-attr] # noqa: E501
74
75
76class array(expression.ExpressionClauseList[_T]):
77 """A PostgreSQL ARRAY literal.
78
79 This is used to produce ARRAY literals in SQL expressions, e.g.::
80
81 from sqlalchemy.dialects.postgresql import array
82 from sqlalchemy.dialects import postgresql
83 from sqlalchemy import select, func
84
85 stmt = select(array([1, 2]) + array([3, 4, 5]))
86
87 print(stmt.compile(dialect=postgresql.dialect()))
88
89 Produces the SQL:
90
91 .. sourcecode:: sql
92
93 SELECT ARRAY[%(param_1)s, %(param_2)s] ||
94 ARRAY[%(param_3)s, %(param_4)s, %(param_5)s]) AS anon_1
95
96 An instance of :class:`.array` will always have the datatype
97 :class:`_types.ARRAY`. The "inner" type of the array is inferred from the
98 values present, unless the :paramref:`_postgresql.array.type_` keyword
99 argument is passed::
100
101 array(["foo", "bar"], type_=CHAR)
102
103 When constructing an empty array, the :paramref:`_postgresql.array.type_`
104 argument is particularly important as PostgreSQL server typically requires
105 a cast to be rendered for the inner type in order to render an empty array.
106 SQLAlchemy's compilation for the empty array will produce this cast so
107 that::
108
109 stmt = array([], type_=Integer)
110 print(stmt.compile(dialect=postgresql.dialect()))
111
112 Produces:
113
114 .. sourcecode:: sql
115
116 ARRAY[]::INTEGER[]
117
118 As required by PostgreSQL for empty arrays.
119
120 .. versionadded:: 2.0.40 added support to render empty PostgreSQL array
121 literals with a required cast.
122
123 Multidimensional arrays are produced by nesting :class:`.array` constructs.
124 The dimensionality of the final :class:`_types.ARRAY`
125 type is calculated by
126 recursively adding the dimensions of the inner :class:`_types.ARRAY`
127 type::
128
129 stmt = select(
130 array(
131 [array([1, 2]), array([3, 4]), array([column("q"), column("x")])]
132 )
133 )
134 print(stmt.compile(dialect=postgresql.dialect()))
135
136 Produces:
137
138 .. sourcecode:: sql
139
140 SELECT ARRAY[
141 ARRAY[%(param_1)s, %(param_2)s],
142 ARRAY[%(param_3)s, %(param_4)s],
143 ARRAY[q, x]
144 ] AS anon_1
145
146 .. versionadded:: 1.3.6 added support for multidimensional array literals
147
148 .. seealso::
149
150 :class:`_postgresql.ARRAY`
151
152 """ # noqa: E501
153
154 __visit_name__ = "array"
155
156 stringify_dialect = "postgresql"
157
158 _traverse_internals: _TraverseInternalsType = [
159 ("clauses", InternalTraversal.dp_clauseelement_tuple),
160 ("type", InternalTraversal.dp_type),
161 ]
162
163 def __init__(
164 self,
165 clauses: Iterable[_T],
166 *,
167 type_: Optional[_TypeEngineArgument[_T]] = None,
168 **kw: typing_Any,
169 ):
170 r"""Construct an ARRAY literal.
171
172 :param clauses: iterable, such as a list, containing elements to be
173 rendered in the array
174 :param type\_: optional type. If omitted, the type is inferred
175 from the contents of the array.
176
177 """
178 super().__init__(operators.comma_op, *clauses, **kw)
179
180 main_type = (
181 type_
182 if type_ is not None
183 else self.clauses[0].type if self.clauses else sqltypes.NULLTYPE
184 )
185
186 if isinstance(main_type, ARRAY):
187 self.type = ARRAY(
188 main_type.item_type,
189 dimensions=(
190 main_type.dimensions + 1
191 if main_type.dimensions is not None
192 else 2
193 ),
194 ) # type: ignore[assignment]
195 else:
196 self.type = ARRAY(main_type) # type: ignore[assignment]
197
198 @property
199 def _select_iterable(self) -> _SelectIterable:
200 return (self,)
201
202 def _bind_param(
203 self,
204 operator: OperatorType,
205 obj: typing_Any,
206 type_: Optional[TypeEngine[_T]] = None,
207 _assume_scalar: bool = False,
208 ) -> BindParameter[_T]:
209 if _assume_scalar or operator is operators.getitem:
210 return expression.BindParameter(
211 None,
212 obj,
213 _compared_to_operator=operator,
214 type_=type_,
215 _compared_to_type=self.type,
216 unique=True,
217 )
218
219 else:
220 return array(
221 [
222 self._bind_param(
223 operator, o, _assume_scalar=True, type_=type_
224 )
225 for o in obj
226 ]
227 ) # type: ignore[return-value]
228
229 def self_group(
230 self, against: Optional[OperatorType] = None
231 ) -> Union[Self, Grouping[_T]]:
232 if against in (operators.any_op, operators.all_op, operators.getitem):
233 return expression.Grouping(self)
234 else:
235 return self
236
237
238class ARRAY(sqltypes.ARRAY[_T]):
239 """PostgreSQL ARRAY type.
240
241 The :class:`_postgresql.ARRAY` type is constructed in the same way
242 as the core :class:`_types.ARRAY` type; a member type is required, and a
243 number of dimensions is recommended if the type is to be used for more
244 than one dimension::
245
246 from sqlalchemy.dialects import postgresql
247
248 mytable = Table(
249 "mytable",
250 metadata,
251 Column("data", postgresql.ARRAY(Integer, dimensions=2)),
252 )
253
254 The :class:`_postgresql.ARRAY` type provides all operations defined on the
255 core :class:`_types.ARRAY` type, including support for "dimensions",
256 indexed access, and simple matching such as
257 :meth:`.types.ARRAY.Comparator.any` and
258 :meth:`.types.ARRAY.Comparator.all`. :class:`_postgresql.ARRAY`
259 class also
260 provides PostgreSQL-specific methods for containment operations, including
261 :meth:`.postgresql.ARRAY.Comparator.contains`
262 :meth:`.postgresql.ARRAY.Comparator.contained_by`, and
263 :meth:`.postgresql.ARRAY.Comparator.overlap`, e.g.::
264
265 mytable.c.data.contains([1, 2])
266
267 Indexed access is one-based by default, to match that of PostgreSQL;
268 for zero-based indexed access, set
269 :paramref:`_postgresql.ARRAY.zero_indexes`.
270
271 Additionally, the :class:`_postgresql.ARRAY`
272 type does not work directly in
273 conjunction with the :class:`.ENUM` type. For a workaround, see the
274 special type at :ref:`postgresql_array_of_enum`.
275
276 .. container:: topic
277
278 **Detecting Changes in ARRAY columns when using the ORM**
279
280 The :class:`_postgresql.ARRAY` type, when used with the SQLAlchemy ORM,
281 does not detect in-place mutations to the array. In order to detect
282 these, the :mod:`sqlalchemy.ext.mutable` extension must be used, using
283 the :class:`.MutableList` class::
284
285 from sqlalchemy.dialects.postgresql import ARRAY
286 from sqlalchemy.ext.mutable import MutableList
287
288
289 class SomeOrmClass(Base):
290 # ...
291
292 data = Column(MutableList.as_mutable(ARRAY(Integer)))
293
294 This extension will allow "in-place" changes such to the array
295 such as ``.append()`` to produce events which will be detected by the
296 unit of work. Note that changes to elements **inside** the array,
297 including subarrays that are mutated in place, are **not** detected.
298
299 Alternatively, assigning a new array value to an ORM element that
300 replaces the old one will always trigger a change event.
301
302 .. seealso::
303
304 :class:`_types.ARRAY` - base array type
305
306 :class:`_postgresql.array` - produces a literal array value.
307
308 """
309
310 def __init__(
311 self,
312 item_type: _TypeEngineArgument[_T],
313 as_tuple: bool = False,
314 dimensions: Optional[int] = None,
315 zero_indexes: bool = False,
316 ):
317 """Construct an ARRAY.
318
319 E.g.::
320
321 Column("myarray", ARRAY(Integer))
322
323 Arguments are:
324
325 :param item_type: The data type of items of this array. Note that
326 dimensionality is irrelevant here, so multi-dimensional arrays like
327 ``INTEGER[][]``, are constructed as ``ARRAY(Integer)``, not as
328 ``ARRAY(ARRAY(Integer))`` or such.
329
330 :param as_tuple=False: Specify whether return results
331 should be converted to tuples from lists. DBAPIs such
332 as psycopg2 return lists by default. When tuples are
333 returned, the results are hashable.
334
335 :param dimensions: if non-None, the ARRAY will assume a fixed
336 number of dimensions. This will cause the DDL emitted for this
337 ARRAY to include the exact number of bracket clauses ``[]``,
338 and will also optimize the performance of the type overall.
339 Note that PG arrays are always implicitly "non-dimensioned",
340 meaning they can store any number of dimensions no matter how
341 they were declared.
342
343 :param zero_indexes=False: when True, index values will be converted
344 between Python zero-based and PostgreSQL one-based indexes, e.g.
345 a value of one will be added to all index values before passing
346 to the database.
347
348 """
349 if isinstance(item_type, ARRAY):
350 raise ValueError(
351 "Do not nest ARRAY types; ARRAY(basetype) "
352 "handles multi-dimensional arrays of basetype"
353 )
354 if isinstance(item_type, type):
355 item_type = item_type()
356 self.item_type = item_type
357 self.as_tuple = as_tuple
358 self.dimensions = dimensions
359 self.zero_indexes = zero_indexes
360
361 class Comparator(sqltypes.ARRAY.Comparator[_CT]):
362 """Define comparison operations for :class:`_types.ARRAY`.
363
364 Note that these operations are in addition to those provided
365 by the base :class:`.types.ARRAY.Comparator` class, including
366 :meth:`.types.ARRAY.Comparator.any` and
367 :meth:`.types.ARRAY.Comparator.all`.
368
369 """
370
371 def contains(
372 self, other: typing_Any, **kwargs: typing_Any
373 ) -> ColumnElement[bool]:
374 """Boolean expression. Test if elements are a superset of the
375 elements of the argument array expression.
376
377 kwargs may be ignored by this operator but are required for API
378 conformance.
379 """
380 return self.operate(CONTAINS, other, result_type=sqltypes.Boolean)
381
382 def contained_by(self, other: typing_Any) -> ColumnElement[bool]:
383 """Boolean expression. Test if elements are a proper subset of the
384 elements of the argument array expression.
385 """
386 return self.operate(
387 CONTAINED_BY, other, result_type=sqltypes.Boolean
388 )
389
390 def overlap(self, other: typing_Any) -> ColumnElement[bool]:
391 """Boolean expression. Test if array has elements in common with
392 an argument array expression.
393 """
394 return self.operate(OVERLAP, other, result_type=sqltypes.Boolean)
395
396 comparator_factory = Comparator
397
398 @util.memoized_property
399 def _against_native_enum(self) -> bool:
400 return (
401 isinstance(self.item_type, sqltypes.Enum)
402 and self.item_type.native_enum
403 )
404
405 def literal_processor(
406 self, dialect: Dialect
407 ) -> Optional[_LiteralProcessorType[_T]]:
408 item_proc = self.item_type.dialect_impl(dialect).literal_processor(
409 dialect
410 )
411 if item_proc is None:
412 return None
413
414 def to_str(elements: Iterable[typing_Any]) -> str:
415 return f"ARRAY[{', '.join(elements)}]"
416
417 def process(value: Sequence[typing_Any]) -> str:
418 inner = self._apply_item_processor(
419 value, item_proc, self.dimensions, to_str
420 )
421 return inner
422
423 return process
424
425 def bind_processor(
426 self, dialect: Dialect
427 ) -> Optional[_BindProcessorType[Sequence[typing_Any]]]:
428 item_proc = self.item_type.dialect_impl(dialect).bind_processor(
429 dialect
430 )
431
432 def process(
433 value: Optional[Sequence[typing_Any]],
434 ) -> Optional[list[typing_Any]]:
435 if value is None:
436 return value
437 else:
438 return self._apply_item_processor(
439 value, item_proc, self.dimensions, list
440 )
441
442 return process
443
444 def result_processor(
445 self, dialect: Dialect, coltype: object
446 ) -> _ResultProcessorType[Sequence[typing_Any]]:
447 item_proc = self.item_type.dialect_impl(dialect).result_processor(
448 dialect, coltype
449 )
450
451 def process(
452 value: Sequence[typing_Any],
453 ) -> Optional[Sequence[typing_Any]]:
454 if value is None:
455 return value
456 else:
457 return self._apply_item_processor(
458 value,
459 item_proc,
460 self.dimensions,
461 tuple if self.as_tuple else list,
462 )
463
464 if self._against_native_enum:
465 super_rp = process
466 pattern = re.compile(r"^{(.*)}$")
467
468 def handle_raw_string(value: str) -> Sequence[Optional[str]]:
469 inner = pattern.match(value).group(1) # type: ignore[union-attr] # noqa: E501
470 return _split_enum_values(inner)
471
472 def process(
473 value: Sequence[typing_Any],
474 ) -> Optional[Sequence[typing_Any]]:
475 if value is None:
476 return value
477 # isinstance(value, str) is required to handle
478 # the case where a TypeDecorator for and Array of Enum is
479 # used like was required in sa < 1.3.17
480 return super_rp(
481 handle_raw_string(value)
482 if isinstance(value, str)
483 else value
484 )
485
486 return process
487
488
489def _split_enum_values(array_string: str) -> Sequence[Optional[str]]:
490 if '"' not in array_string:
491 # no escape char is present so it can just split on the comma
492 return [
493 r if r != "NULL" else None
494 for r in (array_string.split(",") if array_string else [])
495 ]
496
497 # handles quoted strings from:
498 # r'abc,"quoted","also\\\\quoted", "quoted, comma", "esc \" quot", qpr'
499 # returns
500 # ['abc', 'quoted', 'also\\quoted', 'quoted, comma', 'esc " quot', 'qpr']
501 text = array_string.replace(r"\"", "_$ESC_QUOTE$_")
502 text = text.replace(r"\\", "\\")
503 result = []
504 on_quotes = re.split(r'(")', text)
505 in_quotes = False
506 for tok in on_quotes:
507 if tok == '"':
508 in_quotes = not in_quotes
509 elif in_quotes:
510 result.append(tok.replace("_$ESC_QUOTE$_", '"'))
511 else:
512 # interpret NULL (without quotes!) as None
513 result.extend(
514 [
515 r if r != "NULL" else None
516 for r in re.findall(r"([^\s,]+),?", tok)
517 ]
518 )
519 return result