Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/sqlalchemy/dialects/oracle/vector.py: 57%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

111 statements  

1# dialects/oracle/vector.py 

2# Copyright (C) 2005-2025 the SQLAlchemy authors and contributors 

3# <see AUTHORS file> 

4# 

5# This module is part of SQLAlchemy and is released under 

6# the MIT License: https://www.opensource.org/licenses/mit-license.php 

7# mypy: ignore-errors 

8 

9 

10from __future__ import annotations 

11 

12import array 

13from dataclasses import dataclass 

14from enum import Enum 

15from typing import Optional 

16from typing import Union 

17 

18import sqlalchemy.types as types 

19from sqlalchemy.types import Float 

20 

21 

22class VectorIndexType(Enum): 

23 """Enum representing different types of VECTOR index structures. 

24 

25 See :ref:`oracle_vector_datatype` for background. 

26 

27 .. versionadded:: 2.0.41 

28 

29 """ 

30 

31 HNSW = "HNSW" 

32 """ 

33 The HNSW (Hierarchical Navigable Small World) index type. 

34 """ 

35 IVF = "IVF" 

36 """ 

37 The IVF (Inverted File Index) index type 

38 """ 

39 

40 

41class VectorDistanceType(Enum): 

42 """Enum representing different types of vector distance metrics. 

43 

44 See :ref:`oracle_vector_datatype` for background. 

45 

46 .. versionadded:: 2.0.41 

47 

48 """ 

49 

50 EUCLIDEAN = "EUCLIDEAN" 

51 """Euclidean distance (L2 norm). 

52 

53 Measures the straight-line distance between two vectors in space. 

54 """ 

55 DOT = "DOT" 

56 """Dot product similarity. 

57 

58 Measures the algebraic similarity between two vectors. 

59 """ 

60 COSINE = "COSINE" 

61 """Cosine similarity. 

62 

63 Measures the cosine of the angle between two vectors. 

64 """ 

65 MANHATTAN = "MANHATTAN" 

66 """Manhattan distance (L1 norm). 

67 

68 Calculates the sum of absolute differences across dimensions. 

69 """ 

70 

71 

72class VectorStorageFormat(Enum): 

73 """Enum representing the data format used to store vector components. 

74 

75 See :ref:`oracle_vector_datatype` for background. 

76 

77 .. versionadded:: 2.0.41 

78 

79 """ 

80 

81 INT8 = "INT8" 

82 """ 

83 8-bit integer format. 

84 """ 

85 BINARY = "BINARY" 

86 """ 

87 Binary format. 

88 """ 

89 FLOAT32 = "FLOAT32" 

90 """ 

91 32-bit floating-point format. 

92 """ 

93 FLOAT64 = "FLOAT64" 

94 """ 

95 64-bit floating-point format. 

96 """ 

97 

98 

99class VectorStorageType(Enum): 

100 """Enum representing the vector type, 

101 

102 See :ref:`oracle_vector_datatype` for background. 

103 

104 .. versionadded:: 2.0.43 

105 

106 """ 

107 

108 SPARSE = "SPARSE" 

109 """ 

110 A Sparse vector is a vector which has zero value for 

111 most of its dimensions. 

112 """ 

113 DENSE = "DENSE" 

114 """ 

115 A Dense vector is a vector where most, if not all, elements 

116 hold meaningful values. 

117 """ 

118 

119 

120@dataclass 

121class VectorIndexConfig: 

122 """Define the configuration for Oracle VECTOR Index. 

123 

124 See :ref:`oracle_vector_datatype` for background. 

125 

126 .. versionadded:: 2.0.41 

127 

128 :param index_type: Enum value from :class:`.VectorIndexType` 

129 Specifies the indexing method. For HNSW, this must be 

130 :attr:`.VectorIndexType.HNSW`. 

131 

132 :param distance: Enum value from :class:`.VectorDistanceType` 

133 specifies the metric for calculating distance between VECTORS. 

134 

135 :param accuracy: interger. Should be in the range 0 to 100 

136 Specifies the accuracy of the nearest neighbor search during 

137 query execution. 

138 

139 :param parallel: integer. Specifies degree of parallelism. 

140 

141 :param hnsw_neighbors: interger. Should be in the range 0 to 

142 2048. Specifies the number of nearest neighbors considered 

143 during the search. The attribute :attr:`.VectorIndexConfig.hnsw_neighbors` 

144 is HNSW index specific. 

145 

146 :param hnsw_efconstruction: integer. Should be in the range 0 

147 to 65535. Controls the trade-off between indexing speed and 

148 recall quality during index construction. The attribute 

149 :attr:`.VectorIndexConfig.hnsw_efconstruction` is HNSW index 

150 specific. 

151 

152 :param ivf_neighbor_partitions: integer. Should be in the range 

153 0 to 10,000,000. Specifies the number of partitions used to 

154 divide the dataset. The attribute 

155 :attr:`.VectorIndexConfig.ivf_neighbor_partitions` is IVF index 

156 specific. 

157 

158 :param ivf_sample_per_partition: integer. Should be between 1 

159 and ``num_vectors / neighbor partitions``. Specifies the 

160 number of samples used per partition. The attribute 

161 :attr:`.VectorIndexConfig.ivf_sample_per_partition` is IVF index 

162 specific. 

163 

164 :param ivf_min_vectors_per_partition: integer. From 0 (no trimming) 

165 to the total number of vectors (results in 1 partition). Specifies 

166 the minimum number of vectors per partition. The attribute 

167 :attr:`.VectorIndexConfig.ivf_min_vectors_per_partition` 

168 is IVF index specific. 

169 

170 """ 

171 

172 index_type: VectorIndexType = VectorIndexType.HNSW 

173 distance: Optional[VectorDistanceType] = None 

174 accuracy: Optional[int] = None 

175 hnsw_neighbors: Optional[int] = None 

176 hnsw_efconstruction: Optional[int] = None 

177 ivf_neighbor_partitions: Optional[int] = None 

178 ivf_sample_per_partition: Optional[int] = None 

179 ivf_min_vectors_per_partition: Optional[int] = None 

180 parallel: Optional[int] = None 

181 

182 def __post_init__(self): 

183 self.index_type = VectorIndexType(self.index_type) 

184 for field in [ 

185 "hnsw_neighbors", 

186 "hnsw_efconstruction", 

187 "ivf_neighbor_partitions", 

188 "ivf_sample_per_partition", 

189 "ivf_min_vectors_per_partition", 

190 "parallel", 

191 "accuracy", 

192 ]: 

193 value = getattr(self, field) 

194 if value is not None and not isinstance(value, int): 

195 raise TypeError( 

196 f"{field} must be an integer if" 

197 f"provided, got {type(value).__name__}" 

198 ) 

199 

200 

201class SparseVector: 

202 """ 

203 Lightweight SQLAlchemy-side version of SparseVector. 

204 This mimics oracledb.SparseVector. 

205 

206 .. versionadded:: 2.0.43 

207 

208 """ 

209 

210 def __init__( 

211 self, 

212 num_dimensions: int, 

213 indices: Union[list, array.array], 

214 values: Union[list, array.array], 

215 ): 

216 if not isinstance(indices, array.array) or indices.typecode != "I": 

217 indices = array.array("I", indices) 

218 if not isinstance(values, array.array): 

219 values = array.array("d", values) 

220 if len(indices) != len(values): 

221 raise TypeError("indices and values must be of the same length!") 

222 

223 self.num_dimensions = num_dimensions 

224 self.indices = indices 

225 self.values = values 

226 

227 def __str__(self): 

228 return ( 

229 f"SparseVector(num_dimensions={self.num_dimensions}, " 

230 f"size={len(self.indices)}, typecode={self.values.typecode})" 

231 ) 

232 

233 

234class VECTOR(types.TypeEngine): 

235 """Oracle VECTOR datatype. 

236 

237 For complete background on using this type, see 

238 :ref:`oracle_vector_datatype`. 

239 

240 .. versionadded:: 2.0.41 

241 

242 """ 

243 

244 cache_ok = True 

245 __visit_name__ = "VECTOR" 

246 

247 _typecode_map = { 

248 VectorStorageFormat.INT8: "b", # Signed int 

249 VectorStorageFormat.BINARY: "B", # Unsigned int 

250 VectorStorageFormat.FLOAT32: "f", # Float 

251 VectorStorageFormat.FLOAT64: "d", # Double 

252 } 

253 

254 def __init__(self, dim=None, storage_format=None, storage_type=None): 

255 """Construct a VECTOR. 

256 

257 :param dim: integer. The dimension of the VECTOR datatype. This 

258 should be an integer value. 

259 

260 :param storage_format: VectorStorageFormat. The VECTOR storage 

261 type format. This should be Enum values form 

262 :class:`.VectorStorageFormat` INT8, BINARY, FLOAT32, or FLOAT64. 

263 

264 :param storage_type: VectorStorageType. The Vector storage type. This 

265 should be Enum values from :class:`.VectorStorageType` SPARSE or 

266 DENSE. 

267 

268 """ 

269 

270 if dim is not None and not isinstance(dim, int): 

271 raise TypeError("dim must be an interger") 

272 if storage_format is not None and not isinstance( 

273 storage_format, VectorStorageFormat 

274 ): 

275 raise TypeError( 

276 "storage_format must be an enum of type VectorStorageFormat" 

277 ) 

278 if storage_type is not None and not isinstance( 

279 storage_type, VectorStorageType 

280 ): 

281 raise TypeError( 

282 "storage_type must be an enum of type VectorStorageType" 

283 ) 

284 

285 self.dim = dim 

286 self.storage_format = storage_format 

287 self.storage_type = storage_type 

288 

289 def _cached_bind_processor(self, dialect): 

290 """ 

291 Converts a Python-side SparseVector instance into an 

292 oracledb.SparseVectormor a compatible array format before 

293 binding it to the database. 

294 """ 

295 

296 def process(value): 

297 if value is None or isinstance(value, array.array): 

298 return value 

299 

300 # Convert list to a array.array 

301 elif isinstance(value, list): 

302 typecode = self._array_typecode(self.storage_format) 

303 value = array.array(typecode, value) 

304 return value 

305 

306 # Convert SqlAlchemy SparseVector to oracledb SparseVector object 

307 elif isinstance(value, SparseVector): 

308 return dialect.dbapi.SparseVector( 

309 value.num_dimensions, 

310 value.indices, 

311 value.values, 

312 ) 

313 

314 else: 

315 raise TypeError( 

316 """ 

317 Invalid input for VECTOR: expected a list, an array.array, 

318 or a SparseVector object. 

319 """ 

320 ) 

321 

322 return process 

323 

324 def _cached_result_processor(self, dialect, coltype): 

325 """ 

326 Converts database-returned values into Python-native representations. 

327 If the value is an oracledb.SparseVector, it is converted into the 

328 SQLAlchemy-side SparseVector class. 

329 If the value is a array.array, it is converted to a plain Python list. 

330 

331 """ 

332 

333 def process(value): 

334 if value is None: 

335 return None 

336 

337 elif isinstance(value, array.array): 

338 return list(value) 

339 

340 # Convert Oracledb SparseVector to SqlAlchemy SparseVector object 

341 elif isinstance(value, dialect.dbapi.SparseVector): 

342 return SparseVector( 

343 num_dimensions=value.num_dimensions, 

344 indices=value.indices, 

345 values=value.values, 

346 ) 

347 

348 return process 

349 

350 def _array_typecode(self, typecode): 

351 """ 

352 Map storage format to array typecode. 

353 """ 

354 return self._typecode_map.get(typecode, "d") 

355 

356 class comparator_factory(types.TypeEngine.Comparator): 

357 def l2_distance(self, other): 

358 return self.op("<->", return_type=Float)(other) 

359 

360 def inner_product(self, other): 

361 return self.op("<#>", return_type=Float)(other) 

362 

363 def cosine_distance(self, other): 

364 return self.op("<=>", return_type=Float)(other)