1# dialects/oracle/vector.py
2# Copyright (C) 2005-2025 the SQLAlchemy authors and contributors
3# <see AUTHORS file>
4#
5# This module is part of SQLAlchemy and is released under
6# the MIT License: https://www.opensource.org/licenses/mit-license.php
7# mypy: ignore-errors
8
9
10from __future__ import annotations
11
12import array
13from dataclasses import dataclass
14from enum import Enum
15from typing import Optional
16
17import sqlalchemy.types as types
18from sqlalchemy.types import Float
19
20
21class VectorIndexType(Enum):
22 """Enum representing different types of VECTOR index structures.
23
24 See :ref:`oracle_vector_datatype` for background.
25
26 .. versionadded:: 2.0.41
27
28 """
29
30 HNSW = "HNSW"
31 """
32 The HNSW (Hierarchical Navigable Small World) index type.
33 """
34 IVF = "IVF"
35 """
36 The IVF (Inverted File Index) index type
37 """
38
39
40class VectorDistanceType(Enum):
41 """Enum representing different types of vector distance metrics.
42
43 See :ref:`oracle_vector_datatype` for background.
44
45 .. versionadded:: 2.0.41
46
47 """
48
49 EUCLIDEAN = "EUCLIDEAN"
50 """Euclidean distance (L2 norm).
51
52 Measures the straight-line distance between two vectors in space.
53 """
54 DOT = "DOT"
55 """Dot product similarity.
56
57 Measures the algebraic similarity between two vectors.
58 """
59 COSINE = "COSINE"
60 """Cosine similarity.
61
62 Measures the cosine of the angle between two vectors.
63 """
64 MANHATTAN = "MANHATTAN"
65 """Manhattan distance (L1 norm).
66
67 Calculates the sum of absolute differences across dimensions.
68 """
69
70
71class VectorStorageFormat(Enum):
72 """Enum representing the data format used to store vector components.
73
74 See :ref:`oracle_vector_datatype` for background.
75
76 .. versionadded:: 2.0.41
77
78 """
79
80 INT8 = "INT8"
81 """
82 8-bit integer format.
83 """
84 BINARY = "BINARY"
85 """
86 Binary format.
87 """
88 FLOAT32 = "FLOAT32"
89 """
90 32-bit floating-point format.
91 """
92 FLOAT64 = "FLOAT64"
93 """
94 64-bit floating-point format.
95 """
96
97
98@dataclass
99class VectorIndexConfig:
100 """Define the configuration for Oracle VECTOR Index.
101
102 See :ref:`oracle_vector_datatype` for background.
103
104 .. versionadded:: 2.0.41
105
106 :param index_type: Enum value from :class:`.VectorIndexType`
107 Specifies the indexing method. For HNSW, this must be
108 :attr:`.VectorIndexType.HNSW`.
109
110 :param distance: Enum value from :class:`.VectorDistanceType`
111 specifies the metric for calculating distance between VECTORS.
112
113 :param accuracy: interger. Should be in the range 0 to 100
114 Specifies the accuracy of the nearest neighbor search during
115 query execution.
116
117 :param parallel: integer. Specifies degree of parallelism.
118
119 :param hnsw_neighbors: interger. Should be in the range 0 to
120 2048. Specifies the number of nearest neighbors considered
121 during the search. The attribute :attr:`.VectorIndexConfig.hnsw_neighbors`
122 is HNSW index specific.
123
124 :param hnsw_efconstruction: integer. Should be in the range 0
125 to 65535. Controls the trade-off between indexing speed and
126 recall quality during index construction. The attribute
127 :attr:`.VectorIndexConfig.hnsw_efconstruction` is HNSW index
128 specific.
129
130 :param ivf_neighbor_partitions: integer. Should be in the range
131 0 to 10,000,000. Specifies the number of partitions used to
132 divide the dataset. The attribute
133 :attr:`.VectorIndexConfig.ivf_neighbor_partitions` is IVF index
134 specific.
135
136 :param ivf_sample_per_partition: integer. Should be between 1
137 and ``num_vectors / neighbor partitions``. Specifies the
138 number of samples used per partition. The attribute
139 :attr:`.VectorIndexConfig.ivf_sample_per_partition` is IVF index
140 specific.
141
142 :param ivf_min_vectors_per_partition: integer. From 0 (no trimming)
143 to the total number of vectors (results in 1 partition). Specifies
144 the minimum number of vectors per partition. The attribute
145 :attr:`.VectorIndexConfig.ivf_min_vectors_per_partition`
146 is IVF index specific.
147
148 """
149
150 index_type: VectorIndexType = VectorIndexType.HNSW
151 distance: Optional[VectorDistanceType] = None
152 accuracy: Optional[int] = None
153 hnsw_neighbors: Optional[int] = None
154 hnsw_efconstruction: Optional[int] = None
155 ivf_neighbor_partitions: Optional[int] = None
156 ivf_sample_per_partition: Optional[int] = None
157 ivf_min_vectors_per_partition: Optional[int] = None
158 parallel: Optional[int] = None
159
160 def __post_init__(self):
161 self.index_type = VectorIndexType(self.index_type)
162 for field in [
163 "hnsw_neighbors",
164 "hnsw_efconstruction",
165 "ivf_neighbor_partitions",
166 "ivf_sample_per_partition",
167 "ivf_min_vectors_per_partition",
168 "parallel",
169 "accuracy",
170 ]:
171 value = getattr(self, field)
172 if value is not None and not isinstance(value, int):
173 raise TypeError(
174 f"{field} must be an integer if"
175 f"provided, got {type(value).__name__}"
176 )
177
178
179class VECTOR(types.TypeEngine):
180 """Oracle VECTOR datatype.
181
182 For complete background on using this type, see
183 :ref:`oracle_vector_datatype`.
184
185 .. versionadded:: 2.0.41
186
187 """
188
189 cache_ok = True
190 __visit_name__ = "VECTOR"
191
192 _typecode_map = {
193 VectorStorageFormat.INT8: "b", # Signed int
194 VectorStorageFormat.BINARY: "B", # Unsigned int
195 VectorStorageFormat.FLOAT32: "f", # Float
196 VectorStorageFormat.FLOAT64: "d", # Double
197 }
198
199 def __init__(self, dim=None, storage_format=None):
200 """Construct a VECTOR.
201
202 :param dim: integer. The dimension of the VECTOR datatype. This
203 should be an integer value.
204
205 :param storage_format: VectorStorageFormat. The VECTOR storage
206 type format. This may be Enum values form
207 :class:`.VectorStorageFormat` INT8, BINARY, FLOAT32, or FLOAT64.
208
209 """
210 if dim is not None and not isinstance(dim, int):
211 raise TypeError("dim must be an interger")
212 if storage_format is not None and not isinstance(
213 storage_format, VectorStorageFormat
214 ):
215 raise TypeError(
216 "storage_format must be an enum of type VectorStorageFormat"
217 )
218 self.dim = dim
219 self.storage_format = storage_format
220
221 def _cached_bind_processor(self, dialect):
222 """
223 Convert a list to a array.array before binding it to the database.
224 """
225
226 def process(value):
227 if value is None or isinstance(value, array.array):
228 return value
229
230 # Convert list to a array.array
231 elif isinstance(value, list):
232 typecode = self._array_typecode(self.storage_format)
233 value = array.array(typecode, value)
234 return value
235
236 else:
237 raise TypeError("VECTOR accepts list or array.array()")
238
239 return process
240
241 def _cached_result_processor(self, dialect, coltype):
242 """
243 Convert a array.array to list before binding it to the database.
244 """
245
246 def process(value):
247 if isinstance(value, array.array):
248 return list(value)
249
250 return process
251
252 def _array_typecode(self, typecode):
253 """
254 Map storage format to array typecode.
255 """
256 return self._typecode_map.get(typecode, "d")
257
258 class comparator_factory(types.TypeEngine.Comparator):
259 def l2_distance(self, other):
260 return self.op("<->", return_type=Float)(other)
261
262 def inner_product(self, other):
263 return self.op("<#>", return_type=Float)(other)
264
265 def cosine_distance(self, other):
266 return self.op("<=>", return_type=Float)(other)