Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/sqlalchemy/dialects/oracle/vector.py: 57%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

111 statements  

1# dialects/oracle/vector.py 

2# Copyright (C) 2005-2025 the SQLAlchemy authors and contributors 

3# <see AUTHORS file> 

4# 

5# This module is part of SQLAlchemy and is released under 

6# the MIT License: https://www.opensource.org/licenses/mit-license.php 

7# mypy: ignore-errors 

8 

9 

10from __future__ import annotations 

11 

12import array 

13from dataclasses import dataclass 

14from enum import Enum 

15from typing import Optional 

16from typing import Union 

17 

18from ... import types 

19from ...types import Float 

20 

21 

22class VectorIndexType(Enum): 

23 """Enum representing different types of VECTOR index structures. 

24 

25 See :ref:`oracle_vector_datatype` for background. 

26 

27 .. versionadded:: 2.0.41 

28 

29 """ 

30 

31 HNSW = "HNSW" 

32 """ 

33 The HNSW (Hierarchical Navigable Small World) index type. 

34 """ 

35 IVF = "IVF" 

36 """ 

37 The IVF (Inverted File Index) index type 

38 """ 

39 

40 

41class VectorDistanceType(Enum): 

42 """Enum representing different types of vector distance metrics. 

43 

44 See :ref:`oracle_vector_datatype` for background. 

45 

46 .. versionadded:: 2.0.41 

47 

48 """ 

49 

50 EUCLIDEAN = "EUCLIDEAN" 

51 """Euclidean distance (L2 norm). 

52 

53 Measures the straight-line distance between two vectors in space. 

54 """ 

55 DOT = "DOT" 

56 """Dot product similarity. 

57 

58 Measures the algebraic similarity between two vectors. 

59 """ 

60 COSINE = "COSINE" 

61 """Cosine similarity. 

62 

63 Measures the cosine of the angle between two vectors. 

64 """ 

65 MANHATTAN = "MANHATTAN" 

66 """Manhattan distance (L1 norm). 

67 

68 Calculates the sum of absolute differences across dimensions. 

69 """ 

70 

71 

72class VectorStorageFormat(Enum): 

73 """Enum representing the data format used to store vector components. 

74 

75 See :ref:`oracle_vector_datatype` for background. 

76 

77 .. versionadded:: 2.0.41 

78 

79 """ 

80 

81 INT8 = "INT8" 

82 """ 

83 8-bit integer format. 

84 """ 

85 BINARY = "BINARY" 

86 """ 

87 Binary format. 

88 """ 

89 FLOAT32 = "FLOAT32" 

90 """ 

91 32-bit floating-point format. 

92 """ 

93 FLOAT64 = "FLOAT64" 

94 """ 

95 64-bit floating-point format. 

96 """ 

97 

98 

99class VectorStorageType(Enum): 

100 """Enum representing the vector type, 

101 

102 See :ref:`oracle_vector_datatype` for background. 

103 

104 .. versionadded:: 2.0.43 

105 

106 """ 

107 

108 SPARSE = "SPARSE" 

109 """ 

110 A Sparse vector is a vector which has zero value for 

111 most of its dimensions. 

112 """ 

113 DENSE = "DENSE" 

114 """ 

115 A Dense vector is a vector where most, if not all, elements 

116 hold meaningful values. 

117 """ 

118 

119 

120@dataclass 

121class VectorIndexConfig: 

122 """Define the configuration for Oracle VECTOR Index. 

123 

124 See :ref:`oracle_vector_datatype` for background. 

125 

126 .. versionadded:: 2.0.41 

127 

128 :param index_type: Enum value from :class:`.VectorIndexType` 

129 Specifies the indexing method. For HNSW, this must be 

130 :attr:`.VectorIndexType.HNSW`. 

131 

132 :param distance: Enum value from :class:`.VectorDistanceType` 

133 specifies the metric for calculating distance between VECTORS. 

134 

135 :param accuracy: interger. Should be in the range 0 to 100 

136 Specifies the accuracy of the nearest neighbor search during 

137 query execution. 

138 

139 :param parallel: integer. Specifies degree of parallelism. 

140 

141 :param hnsw_neighbors: interger. Should be in the range 0 to 

142 2048. Specifies the number of nearest neighbors considered 

143 during the search. The attribute :attr:`.VectorIndexConfig.hnsw_neighbors` 

144 is HNSW index specific. 

145 

146 :param hnsw_efconstruction: integer. Should be in the range 0 

147 to 65535. Controls the trade-off between indexing speed and 

148 recall quality during index construction. The attribute 

149 :attr:`.VectorIndexConfig.hnsw_efconstruction` is HNSW index 

150 specific. 

151 

152 :param ivf_neighbor_partitions: integer. Should be in the range 

153 0 to 10,000,000. Specifies the number of partitions used to 

154 divide the dataset. The attribute 

155 :attr:`.VectorIndexConfig.ivf_neighbor_partitions` is IVF index 

156 specific. 

157 

158 :param ivf_sample_per_partition: integer. Should be between 1 

159 and ``num_vectors / neighbor partitions``. Specifies the 

160 number of samples used per partition. The attribute 

161 :attr:`.VectorIndexConfig.ivf_sample_per_partition` is IVF index 

162 specific. 

163 

164 :param ivf_min_vectors_per_partition: integer. From 0 (no trimming) 

165 to the total number of vectors (results in 1 partition). Specifies 

166 the minimum number of vectors per partition. The attribute 

167 :attr:`.VectorIndexConfig.ivf_min_vectors_per_partition` 

168 is IVF index specific. 

169 

170 """ 

171 

172 index_type: VectorIndexType = VectorIndexType.HNSW 

173 distance: Optional[VectorDistanceType] = None 

174 accuracy: Optional[int] = None 

175 hnsw_neighbors: Optional[int] = None 

176 hnsw_efconstruction: Optional[int] = None 

177 ivf_neighbor_partitions: Optional[int] = None 

178 ivf_sample_per_partition: Optional[int] = None 

179 ivf_min_vectors_per_partition: Optional[int] = None 

180 parallel: Optional[int] = None 

181 

182 def __post_init__(self): 

183 self.index_type = VectorIndexType(self.index_type) 

184 for field in [ 

185 "hnsw_neighbors", 

186 "hnsw_efconstruction", 

187 "ivf_neighbor_partitions", 

188 "ivf_sample_per_partition", 

189 "ivf_min_vectors_per_partition", 

190 "parallel", 

191 "accuracy", 

192 ]: 

193 value = getattr(self, field) 

194 if value is not None and not isinstance(value, int): 

195 raise TypeError( 

196 f"{field} must be an integer if" 

197 f"provided, got {type(value).__name__}" 

198 ) 

199 

200 

201class SparseVector: 

202 """ 

203 Lightweight SQLAlchemy-side version of SparseVector. 

204 This mimics oracledb.SparseVector. 

205 

206 .. versionadded:: 2.0.43 

207 

208 """ 

209 

210 def __init__( 

211 self, 

212 num_dimensions: int, 

213 indices: Union[list, array.array], 

214 values: Union[list, array.array], 

215 ): 

216 if not isinstance(indices, array.array) or indices.typecode != "I": 

217 indices = array.array("I", indices) 

218 if not isinstance(values, array.array): 

219 values = array.array("d", values) 

220 if len(indices) != len(values): 

221 raise TypeError("indices and values must be of the same length!") 

222 

223 self.num_dimensions = num_dimensions 

224 self.indices = indices 

225 self.values = values 

226 

227 def __str__(self): 

228 return ( 

229 f"SparseVector(num_dimensions={self.num_dimensions}, " 

230 f"size={len(self.indices)}, typecode={self.values.typecode})" 

231 ) 

232 

233 

234class VECTOR(types.TypeEngine): 

235 """Oracle VECTOR datatype. 

236 

237 For complete background on using this type, see 

238 :ref:`oracle_vector_datatype`. 

239 

240 .. versionadded:: 2.0.41 

241 

242 """ 

243 

244 cache_ok = True 

245 

246 __visit_name__ = "VECTOR" 

247 

248 _typecode_map = { 

249 VectorStorageFormat.INT8: "b", # Signed int 

250 VectorStorageFormat.BINARY: "B", # Unsigned int 

251 VectorStorageFormat.FLOAT32: "f", # Float 

252 VectorStorageFormat.FLOAT64: "d", # Double 

253 } 

254 

255 def __init__(self, dim=None, storage_format=None, storage_type=None): 

256 """Construct a VECTOR. 

257 

258 :param dim: integer. The dimension of the VECTOR datatype. This 

259 should be an integer value. 

260 

261 :param storage_format: VectorStorageFormat. The VECTOR storage 

262 type format. This should be Enum values form 

263 :class:`.VectorStorageFormat` INT8, BINARY, FLOAT32, or FLOAT64. 

264 

265 :param storage_type: VectorStorageType. The Vector storage type. This 

266 should be Enum values from :class:`.VectorStorageType` SPARSE or 

267 DENSE. 

268 

269 """ 

270 

271 if dim is not None and not isinstance(dim, int): 

272 raise TypeError("dim must be an interger") 

273 if storage_format is not None and not isinstance( 

274 storage_format, VectorStorageFormat 

275 ): 

276 raise TypeError( 

277 "storage_format must be an enum of type VectorStorageFormat" 

278 ) 

279 if storage_type is not None and not isinstance( 

280 storage_type, VectorStorageType 

281 ): 

282 raise TypeError( 

283 "storage_type must be an enum of type VectorStorageType" 

284 ) 

285 

286 self.dim = dim 

287 self.storage_format = storage_format 

288 self.storage_type = storage_type 

289 

290 def _cached_bind_processor(self, dialect): 

291 """ 

292 Converts a Python-side SparseVector instance into an 

293 oracledb.SparseVectormor a compatible array format before 

294 binding it to the database. 

295 """ 

296 

297 def process(value): 

298 if value is None or isinstance(value, array.array): 

299 return value 

300 

301 # Convert list to a array.array 

302 elif isinstance(value, list): 

303 typecode = self._array_typecode(self.storage_format) 

304 value = array.array(typecode, value) 

305 return value 

306 

307 # Convert SqlAlchemy SparseVector to oracledb SparseVector object 

308 elif isinstance(value, SparseVector): 

309 return dialect.dbapi.SparseVector( 

310 value.num_dimensions, 

311 value.indices, 

312 value.values, 

313 ) 

314 

315 else: 

316 raise TypeError( 

317 """ 

318 Invalid input for VECTOR: expected a list, an array.array, 

319 or a SparseVector object. 

320 """ 

321 ) 

322 

323 return process 

324 

325 def _cached_result_processor(self, dialect, coltype): 

326 """ 

327 Converts database-returned values into Python-native representations. 

328 If the value is an oracledb.SparseVector, it is converted into the 

329 SQLAlchemy-side SparseVector class. 

330 If the value is a array.array, it is converted to a plain Python list. 

331 

332 """ 

333 

334 def process(value): 

335 if value is None: 

336 return None 

337 

338 elif isinstance(value, array.array): 

339 return list(value) 

340 

341 # Convert Oracledb SparseVector to SqlAlchemy SparseVector object 

342 elif isinstance(value, dialect.dbapi.SparseVector): 

343 return SparseVector( 

344 num_dimensions=value.num_dimensions, 

345 indices=value.indices, 

346 values=value.values, 

347 ) 

348 

349 return process 

350 

351 def _array_typecode(self, typecode): 

352 """ 

353 Map storage format to array typecode. 

354 """ 

355 return self._typecode_map.get(typecode, "d") 

356 

357 class comparator_factory(types.TypeEngine.Comparator): 

358 def l2_distance(self, other): 

359 return self.op("<->", return_type=Float)(other) 

360 

361 def inner_product(self, other): 

362 return self.op("<#>", return_type=Float)(other) 

363 

364 def cosine_distance(self, other): 

365 return self.op("<=>", return_type=Float)(other)