Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/sqlalchemy/dialects/oracle/vector.py: 65%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

83 statements  

1# dialects/oracle/vector.py 

2# Copyright (C) 2005-2025 the SQLAlchemy authors and contributors 

3# <see AUTHORS file> 

4# 

5# This module is part of SQLAlchemy and is released under 

6# the MIT License: https://www.opensource.org/licenses/mit-license.php 

7# mypy: ignore-errors 

8 

9 

10from __future__ import annotations 

11 

12import array 

13from dataclasses import dataclass 

14from enum import Enum 

15from typing import Optional 

16 

17import sqlalchemy.types as types 

18from sqlalchemy.types import Float 

19 

20 

21class VectorIndexType(Enum): 

22 """Enum representing different types of VECTOR index structures. 

23 

24 See :ref:`oracle_vector_datatype` for background. 

25 

26 .. versionadded:: 2.0.41 

27 

28 """ 

29 

30 HNSW = "HNSW" 

31 """ 

32 The HNSW (Hierarchical Navigable Small World) index type. 

33 """ 

34 IVF = "IVF" 

35 """ 

36 The IVF (Inverted File Index) index type 

37 """ 

38 

39 

40class VectorDistanceType(Enum): 

41 """Enum representing different types of vector distance metrics. 

42 

43 See :ref:`oracle_vector_datatype` for background. 

44 

45 .. versionadded:: 2.0.41 

46 

47 """ 

48 

49 EUCLIDEAN = "EUCLIDEAN" 

50 """Euclidean distance (L2 norm). 

51 

52 Measures the straight-line distance between two vectors in space. 

53 """ 

54 DOT = "DOT" 

55 """Dot product similarity. 

56 

57 Measures the algebraic similarity between two vectors. 

58 """ 

59 COSINE = "COSINE" 

60 """Cosine similarity. 

61 

62 Measures the cosine of the angle between two vectors. 

63 """ 

64 MANHATTAN = "MANHATTAN" 

65 """Manhattan distance (L1 norm). 

66 

67 Calculates the sum of absolute differences across dimensions. 

68 """ 

69 

70 

71class VectorStorageFormat(Enum): 

72 """Enum representing the data format used to store vector components. 

73 

74 See :ref:`oracle_vector_datatype` for background. 

75 

76 .. versionadded:: 2.0.41 

77 

78 """ 

79 

80 INT8 = "INT8" 

81 """ 

82 8-bit integer format. 

83 """ 

84 BINARY = "BINARY" 

85 """ 

86 Binary format. 

87 """ 

88 FLOAT32 = "FLOAT32" 

89 """ 

90 32-bit floating-point format. 

91 """ 

92 FLOAT64 = "FLOAT64" 

93 """ 

94 64-bit floating-point format. 

95 """ 

96 

97 

98@dataclass 

99class VectorIndexConfig: 

100 """Define the configuration for Oracle VECTOR Index. 

101 

102 See :ref:`oracle_vector_datatype` for background. 

103 

104 .. versionadded:: 2.0.41 

105 

106 :param index_type: Enum value from :class:`.VectorIndexType` 

107 Specifies the indexing method. For HNSW, this must be 

108 :attr:`.VectorIndexType.HNSW`. 

109 

110 :param distance: Enum value from :class:`.VectorDistanceType` 

111 specifies the metric for calculating distance between VECTORS. 

112 

113 :param accuracy: interger. Should be in the range 0 to 100 

114 Specifies the accuracy of the nearest neighbor search during 

115 query execution. 

116 

117 :param parallel: integer. Specifies degree of parallelism. 

118 

119 :param hnsw_neighbors: interger. Should be in the range 0 to 

120 2048. Specifies the number of nearest neighbors considered 

121 during the search. The attribute :attr:`.VectorIndexConfig.hnsw_neighbors` 

122 is HNSW index specific. 

123 

124 :param hnsw_efconstruction: integer. Should be in the range 0 

125 to 65535. Controls the trade-off between indexing speed and 

126 recall quality during index construction. The attribute 

127 :attr:`.VectorIndexConfig.hnsw_efconstruction` is HNSW index 

128 specific. 

129 

130 :param ivf_neighbor_partitions: integer. Should be in the range 

131 0 to 10,000,000. Specifies the number of partitions used to 

132 divide the dataset. The attribute 

133 :attr:`.VectorIndexConfig.ivf_neighbor_partitions` is IVF index 

134 specific. 

135 

136 :param ivf_sample_per_partition: integer. Should be between 1 

137 and ``num_vectors / neighbor partitions``. Specifies the 

138 number of samples used per partition. The attribute 

139 :attr:`.VectorIndexConfig.ivf_sample_per_partition` is IVF index 

140 specific. 

141 

142 :param ivf_min_vectors_per_partition: integer. From 0 (no trimming) 

143 to the total number of vectors (results in 1 partition). Specifies 

144 the minimum number of vectors per partition. The attribute 

145 :attr:`.VectorIndexConfig.ivf_min_vectors_per_partition` 

146 is IVF index specific. 

147 

148 """ 

149 

150 index_type: VectorIndexType = VectorIndexType.HNSW 

151 distance: Optional[VectorDistanceType] = None 

152 accuracy: Optional[int] = None 

153 hnsw_neighbors: Optional[int] = None 

154 hnsw_efconstruction: Optional[int] = None 

155 ivf_neighbor_partitions: Optional[int] = None 

156 ivf_sample_per_partition: Optional[int] = None 

157 ivf_min_vectors_per_partition: Optional[int] = None 

158 parallel: Optional[int] = None 

159 

160 def __post_init__(self): 

161 self.index_type = VectorIndexType(self.index_type) 

162 for field in [ 

163 "hnsw_neighbors", 

164 "hnsw_efconstruction", 

165 "ivf_neighbor_partitions", 

166 "ivf_sample_per_partition", 

167 "ivf_min_vectors_per_partition", 

168 "parallel", 

169 "accuracy", 

170 ]: 

171 value = getattr(self, field) 

172 if value is not None and not isinstance(value, int): 

173 raise TypeError( 

174 f"{field} must be an integer if" 

175 f"provided, got {type(value).__name__}" 

176 ) 

177 

178 

179class VECTOR(types.TypeEngine): 

180 """Oracle VECTOR datatype. 

181 

182 For complete background on using this type, see 

183 :ref:`oracle_vector_datatype`. 

184 

185 .. versionadded:: 2.0.41 

186 

187 """ 

188 

189 cache_ok = True 

190 __visit_name__ = "VECTOR" 

191 

192 _typecode_map = { 

193 VectorStorageFormat.INT8: "b", # Signed int 

194 VectorStorageFormat.BINARY: "B", # Unsigned int 

195 VectorStorageFormat.FLOAT32: "f", # Float 

196 VectorStorageFormat.FLOAT64: "d", # Double 

197 } 

198 

199 def __init__(self, dim=None, storage_format=None): 

200 """Construct a VECTOR. 

201 

202 :param dim: integer. The dimension of the VECTOR datatype. This 

203 should be an integer value. 

204 

205 :param storage_format: VectorStorageFormat. The VECTOR storage 

206 type format. This may be Enum values form 

207 :class:`.VectorStorageFormat` INT8, BINARY, FLOAT32, or FLOAT64. 

208 

209 """ 

210 if dim is not None and not isinstance(dim, int): 

211 raise TypeError("dim must be an interger") 

212 if storage_format is not None and not isinstance( 

213 storage_format, VectorStorageFormat 

214 ): 

215 raise TypeError( 

216 "storage_format must be an enum of type VectorStorageFormat" 

217 ) 

218 self.dim = dim 

219 self.storage_format = storage_format 

220 

221 def _cached_bind_processor(self, dialect): 

222 """ 

223 Convert a list to a array.array before binding it to the database. 

224 """ 

225 

226 def process(value): 

227 if value is None or isinstance(value, array.array): 

228 return value 

229 

230 # Convert list to a array.array 

231 elif isinstance(value, list): 

232 typecode = self._array_typecode(self.storage_format) 

233 value = array.array(typecode, value) 

234 return value 

235 

236 else: 

237 raise TypeError("VECTOR accepts list or array.array()") 

238 

239 return process 

240 

241 def _cached_result_processor(self, dialect, coltype): 

242 """ 

243 Convert a array.array to list before binding it to the database. 

244 """ 

245 

246 def process(value): 

247 if isinstance(value, array.array): 

248 return list(value) 

249 

250 return process 

251 

252 def _array_typecode(self, typecode): 

253 """ 

254 Map storage format to array typecode. 

255 """ 

256 return self._typecode_map.get(typecode, "d") 

257 

258 class comparator_factory(types.TypeEngine.Comparator): 

259 def l2_distance(self, other): 

260 return self.op("<->", return_type=Float)(other) 

261 

262 def inner_product(self, other): 

263 return self.op("<#>", return_type=Float)(other) 

264 

265 def cosine_distance(self, other): 

266 return self.op("<=>", return_type=Float)(other)