1# dialects/postgresql/array.py
2# Copyright (C) 2005-2024 the SQLAlchemy authors and contributors
3# <see AUTHORS file>
4#
5# This module is part of SQLAlchemy and is released under
6# the MIT License: https://www.opensource.org/licenses/mit-license.php
7
8import re
9
10from ... import types as sqltypes
11from ... import util
12from ...sql import coercions
13from ...sql import expression
14from ...sql import operators
15from ...sql import roles
16
17
18def Any(other, arrexpr, operator=operators.eq):
19 """A synonym for the ARRAY-level :meth:`.ARRAY.Comparator.any` method.
20 See that method for details.
21
22 """
23
24 return arrexpr.any(other, operator)
25
26
27def All(other, arrexpr, operator=operators.eq):
28 """A synonym for the ARRAY-level :meth:`.ARRAY.Comparator.all` method.
29 See that method for details.
30
31 """
32
33 return arrexpr.all(other, operator)
34
35
36class array(expression.ClauseList, expression.ColumnElement):
37
38 """A PostgreSQL ARRAY literal.
39
40 This is used to produce ARRAY literals in SQL expressions, e.g.::
41
42 from sqlalchemy.dialects.postgresql import array
43 from sqlalchemy.dialects import postgresql
44 from sqlalchemy import select, func
45
46 stmt = select(array([1,2]) + array([3,4,5]))
47
48 print(stmt.compile(dialect=postgresql.dialect()))
49
50 Produces the SQL::
51
52 SELECT ARRAY[%(param_1)s, %(param_2)s] ||
53 ARRAY[%(param_3)s, %(param_4)s, %(param_5)s]) AS anon_1
54
55 An instance of :class:`.array` will always have the datatype
56 :class:`_types.ARRAY`. The "inner" type of the array is inferred from
57 the values present, unless the ``type_`` keyword argument is passed::
58
59 array(['foo', 'bar'], type_=CHAR)
60
61 Multidimensional arrays are produced by nesting :class:`.array` constructs.
62 The dimensionality of the final :class:`_types.ARRAY`
63 type is calculated by
64 recursively adding the dimensions of the inner :class:`_types.ARRAY`
65 type::
66
67 stmt = select(
68 array([
69 array([1, 2]), array([3, 4]), array([column('q'), column('x')])
70 ])
71 )
72 print(stmt.compile(dialect=postgresql.dialect()))
73
74 Produces::
75
76 SELECT ARRAY[ARRAY[%(param_1)s, %(param_2)s],
77 ARRAY[%(param_3)s, %(param_4)s], ARRAY[q, x]] AS anon_1
78
79 .. versionadded:: 1.3.6 added support for multidimensional array literals
80
81 .. seealso::
82
83 :class:`_postgresql.ARRAY`
84
85 """
86
87 __visit_name__ = "array"
88
89 stringify_dialect = "postgresql"
90 inherit_cache = True
91
92 def __init__(self, clauses, **kw):
93 clauses = [
94 coercions.expect(roles.ExpressionElementRole, c) for c in clauses
95 ]
96
97 super(array, self).__init__(*clauses, **kw)
98
99 self._type_tuple = [arg.type for arg in clauses]
100 main_type = kw.pop(
101 "type_",
102 self._type_tuple[0] if self._type_tuple else sqltypes.NULLTYPE,
103 )
104
105 if isinstance(main_type, ARRAY):
106 self.type = ARRAY(
107 main_type.item_type,
108 dimensions=main_type.dimensions + 1
109 if main_type.dimensions is not None
110 else 2,
111 )
112 else:
113 self.type = ARRAY(main_type)
114
115 @property
116 def _select_iterable(self):
117 return (self,)
118
119 def _bind_param(self, operator, obj, _assume_scalar=False, type_=None):
120 if _assume_scalar or operator is operators.getitem:
121 return expression.BindParameter(
122 None,
123 obj,
124 _compared_to_operator=operator,
125 type_=type_,
126 _compared_to_type=self.type,
127 unique=True,
128 )
129
130 else:
131 return array(
132 [
133 self._bind_param(
134 operator, o, _assume_scalar=True, type_=type_
135 )
136 for o in obj
137 ]
138 )
139
140 def self_group(self, against=None):
141 if against in (operators.any_op, operators.all_op, operators.getitem):
142 return expression.Grouping(self)
143 else:
144 return self
145
146
147CONTAINS = operators.custom_op("@>", precedence=5, is_comparison=True)
148
149CONTAINED_BY = operators.custom_op("<@", precedence=5, is_comparison=True)
150
151OVERLAP = operators.custom_op("&&", precedence=5, is_comparison=True)
152
153
154class ARRAY(sqltypes.ARRAY):
155
156 """PostgreSQL ARRAY type.
157
158 .. versionchanged:: 1.1 The :class:`_postgresql.ARRAY` type is now
159 a subclass of the core :class:`_types.ARRAY` type.
160
161 The :class:`_postgresql.ARRAY` type is constructed in the same way
162 as the core :class:`_types.ARRAY` type; a member type is required, and a
163 number of dimensions is recommended if the type is to be used for more
164 than one dimension::
165
166 from sqlalchemy.dialects import postgresql
167
168 mytable = Table("mytable", metadata,
169 Column("data", postgresql.ARRAY(Integer, dimensions=2))
170 )
171
172 The :class:`_postgresql.ARRAY` type provides all operations defined on the
173 core :class:`_types.ARRAY` type, including support for "dimensions",
174 indexed access, and simple matching such as
175 :meth:`.types.ARRAY.Comparator.any` and
176 :meth:`.types.ARRAY.Comparator.all`. :class:`_postgresql.ARRAY`
177 class also
178 provides PostgreSQL-specific methods for containment operations, including
179 :meth:`.postgresql.ARRAY.Comparator.contains`
180 :meth:`.postgresql.ARRAY.Comparator.contained_by`, and
181 :meth:`.postgresql.ARRAY.Comparator.overlap`, e.g.::
182
183 mytable.c.data.contains([1, 2])
184
185 The :class:`_postgresql.ARRAY` type may not be supported on all
186 PostgreSQL DBAPIs; it is currently known to work on psycopg2 only.
187
188 Additionally, the :class:`_postgresql.ARRAY`
189 type does not work directly in
190 conjunction with the :class:`.ENUM` type. For a workaround, see the
191 special type at :ref:`postgresql_array_of_enum`.
192
193 .. container:: topic
194
195 **Detecting Changes in ARRAY columns when using the ORM**
196
197 The :class:`_postgresql.ARRAY` type, when used with the SQLAlchemy ORM,
198 does not detect in-place mutations to the array. In order to detect
199 these, the :mod:`sqlalchemy.ext.mutable` extension must be used, using
200 the :class:`.MutableList` class::
201
202 from sqlalchemy.dialects.postgresql import ARRAY
203 from sqlalchemy.ext.mutable import MutableList
204
205 class SomeOrmClass(Base):
206 # ...
207
208 data = Column(MutableList.as_mutable(ARRAY(Integer)))
209
210 This extension will allow "in-place" changes such to the array
211 such as ``.append()`` to produce events which will be detected by the
212 unit of work. Note that changes to elements **inside** the array,
213 including subarrays that are mutated in place, are **not** detected.
214
215 Alternatively, assigning a new array value to an ORM element that
216 replaces the old one will always trigger a change event.
217
218 .. seealso::
219
220 :class:`_types.ARRAY` - base array type
221
222 :class:`_postgresql.array` - produces a literal array value.
223
224 """
225
226 class Comparator(sqltypes.ARRAY.Comparator):
227
228 """Define comparison operations for :class:`_types.ARRAY`.
229
230 Note that these operations are in addition to those provided
231 by the base :class:`.types.ARRAY.Comparator` class, including
232 :meth:`.types.ARRAY.Comparator.any` and
233 :meth:`.types.ARRAY.Comparator.all`.
234
235 """
236
237 def contains(self, other, **kwargs):
238 """Boolean expression. Test if elements are a superset of the
239 elements of the argument array expression.
240
241 kwargs may be ignored by this operator but are required for API
242 conformance.
243 """
244 return self.operate(CONTAINS, other, result_type=sqltypes.Boolean)
245
246 def contained_by(self, other):
247 """Boolean expression. Test if elements are a proper subset of the
248 elements of the argument array expression.
249 """
250 return self.operate(
251 CONTAINED_BY, other, result_type=sqltypes.Boolean
252 )
253
254 def overlap(self, other):
255 """Boolean expression. Test if array has elements in common with
256 an argument array expression.
257 """
258 return self.operate(OVERLAP, other, result_type=sqltypes.Boolean)
259
260 comparator_factory = Comparator
261
262 def __init__(
263 self, item_type, as_tuple=False, dimensions=None, zero_indexes=False
264 ):
265 """Construct an ARRAY.
266
267 E.g.::
268
269 Column('myarray', ARRAY(Integer))
270
271 Arguments are:
272
273 :param item_type: The data type of items of this array. Note that
274 dimensionality is irrelevant here, so multi-dimensional arrays like
275 ``INTEGER[][]``, are constructed as ``ARRAY(Integer)``, not as
276 ``ARRAY(ARRAY(Integer))`` or such.
277
278 :param as_tuple=False: Specify whether return results
279 should be converted to tuples from lists. DBAPIs such
280 as psycopg2 return lists by default. When tuples are
281 returned, the results are hashable.
282
283 :param dimensions: if non-None, the ARRAY will assume a fixed
284 number of dimensions. This will cause the DDL emitted for this
285 ARRAY to include the exact number of bracket clauses ``[]``,
286 and will also optimize the performance of the type overall.
287 Note that PG arrays are always implicitly "non-dimensioned",
288 meaning they can store any number of dimensions no matter how
289 they were declared.
290
291 :param zero_indexes=False: when True, index values will be converted
292 between Python zero-based and PostgreSQL one-based indexes, e.g.
293 a value of one will be added to all index values before passing
294 to the database.
295
296 .. versionadded:: 0.9.5
297
298
299 """
300 if isinstance(item_type, ARRAY):
301 raise ValueError(
302 "Do not nest ARRAY types; ARRAY(basetype) "
303 "handles multi-dimensional arrays of basetype"
304 )
305 if isinstance(item_type, type):
306 item_type = item_type()
307 self.item_type = item_type
308 self.as_tuple = as_tuple
309 self.dimensions = dimensions
310 self.zero_indexes = zero_indexes
311
312 @property
313 def hashable(self):
314 return self.as_tuple
315
316 @property
317 def python_type(self):
318 return list
319
320 def compare_values(self, x, y):
321 return x == y
322
323 def _proc_array(self, arr, itemproc, dim, collection):
324 if dim is None:
325 arr = list(arr)
326 if (
327 dim == 1
328 or dim is None
329 and (
330 # this has to be (list, tuple), or at least
331 # not hasattr('__iter__'), since Py3K strings
332 # etc. have __iter__
333 not arr
334 or not isinstance(arr[0], (list, tuple))
335 )
336 ):
337 if itemproc:
338 return collection(itemproc(x) for x in arr)
339 else:
340 return collection(arr)
341 else:
342 return collection(
343 self._proc_array(
344 x,
345 itemproc,
346 dim - 1 if dim is not None else None,
347 collection,
348 )
349 for x in arr
350 )
351
352 @util.memoized_property
353 def _against_native_enum(self):
354 return (
355 isinstance(self.item_type, sqltypes.Enum)
356 and self.item_type.native_enum
357 )
358
359 def bind_expression(self, bindvalue):
360 return bindvalue
361
362 def bind_processor(self, dialect):
363 item_proc = self.item_type.dialect_impl(dialect).bind_processor(
364 dialect
365 )
366
367 def process(value):
368 if value is None:
369 return value
370 else:
371 return self._proc_array(
372 value, item_proc, self.dimensions, list
373 )
374
375 return process
376
377 def result_processor(self, dialect, coltype):
378 item_proc = self.item_type.dialect_impl(dialect).result_processor(
379 dialect, coltype
380 )
381
382 def process(value):
383 if value is None:
384 return value
385 else:
386 return self._proc_array(
387 value,
388 item_proc,
389 self.dimensions,
390 tuple if self.as_tuple else list,
391 )
392
393 if self._against_native_enum:
394 super_rp = process
395 pattern = re.compile(r"^{(.*)}$")
396
397 def handle_raw_string(value):
398 inner = pattern.match(value).group(1)
399 return _split_enum_values(inner)
400
401 def process(value):
402 if value is None:
403 return value
404 # isinstance(value, util.string_types) is required to handle
405 # the case where a TypeDecorator for and Array of Enum is
406 # used like was required in sa < 1.3.17
407 return super_rp(
408 handle_raw_string(value)
409 if isinstance(value, util.string_types)
410 else value
411 )
412
413 return process
414
415
416def _split_enum_values(array_string):
417
418 if '"' not in array_string:
419 # no escape char is present so it can just split on the comma
420 return array_string.split(",") if array_string else []
421
422 # handles quoted strings from:
423 # r'abc,"quoted","also\\\\quoted", "quoted, comma", "esc \" quot", qpr'
424 # returns
425 # ['abc', 'quoted', 'also\\quoted', 'quoted, comma', 'esc " quot', 'qpr']
426 text = array_string.replace(r"\"", "_$ESC_QUOTE$_")
427 text = text.replace(r"\\", "\\")
428 result = []
429 on_quotes = re.split(r'(")', text)
430 in_quotes = False
431 for tok in on_quotes:
432 if tok == '"':
433 in_quotes = not in_quotes
434 elif in_quotes:
435 result.append(tok.replace("_$ESC_QUOTE$_", '"'))
436 else:
437 result.extend(re.findall(r"([^\s,]+),?", tok))
438 return result