/src/mozilla-central/storage/mozStorageSQLFunctions.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- |
2 | | * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ : |
3 | | * This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include "mozilla/ArrayUtils.h" |
8 | | |
9 | | #include "mozStorageSQLFunctions.h" |
10 | | #include "nsUnicharUtils.h" |
11 | | #include <algorithm> |
12 | | |
13 | | namespace mozilla { |
14 | | namespace storage { |
15 | | |
16 | | //////////////////////////////////////////////////////////////////////////////// |
17 | | //// Local Helper Functions |
18 | | |
19 | | namespace { |
20 | | |
21 | | /** |
22 | | * Performs the LIKE comparison of a string against a pattern. For more detail |
23 | | * see http://www.sqlite.org/lang_expr.html#like. |
24 | | * |
25 | | * @param aPatternItr |
26 | | * An iterator at the start of the pattern to check for. |
27 | | * @param aPatternEnd |
28 | | * An iterator at the end of the pattern to check for. |
29 | | * @param aStringItr |
30 | | * An iterator at the start of the string to check for the pattern. |
31 | | * @param aStringEnd |
32 | | * An iterator at the end of the string to check for the pattern. |
33 | | * @param aEscapeChar |
34 | | * The character to use for escaping symbols in the pattern. |
35 | | * @return 1 if the pattern is found, 0 otherwise. |
36 | | */ |
37 | | int |
38 | | likeCompare(nsAString::const_iterator aPatternItr, |
39 | | nsAString::const_iterator aPatternEnd, |
40 | | nsAString::const_iterator aStringItr, |
41 | | nsAString::const_iterator aStringEnd, |
42 | | char16_t aEscapeChar) |
43 | 0 | { |
44 | 0 | const char16_t MATCH_ALL('%'); |
45 | 0 | const char16_t MATCH_ONE('_'); |
46 | 0 |
|
47 | 0 | bool lastWasEscape = false; |
48 | 0 | while (aPatternItr != aPatternEnd) { |
49 | 0 | /** |
50 | 0 | * What we do in here is take a look at each character from the input |
51 | 0 | * pattern, and do something with it. There are 4 possibilities: |
52 | 0 | * 1) character is an un-escaped match-all character |
53 | 0 | * 2) character is an un-escaped match-one character |
54 | 0 | * 3) character is an un-escaped escape character |
55 | 0 | * 4) character is not any of the above |
56 | 0 | */ |
57 | 0 | if (!lastWasEscape && *aPatternItr == MATCH_ALL) { |
58 | 0 | // CASE 1 |
59 | 0 | /** |
60 | 0 | * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a |
61 | 0 | * MATCH_ALL character. For each MATCH_ONE character, skip one character |
62 | 0 | * in the pattern string. |
63 | 0 | */ |
64 | 0 | while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) { |
65 | 0 | if (*aPatternItr == MATCH_ONE) { |
66 | 0 | // If we've hit the end of the string we are testing, no match |
67 | 0 | if (aStringItr == aStringEnd) |
68 | 0 | return 0; |
69 | 0 | aStringItr++; |
70 | 0 | } |
71 | 0 | aPatternItr++; |
72 | 0 | } |
73 | 0 |
|
74 | 0 | // If we've hit the end of the pattern string, match |
75 | 0 | if (aPatternItr == aPatternEnd) |
76 | 0 | return 1; |
77 | 0 | |
78 | 0 | while (aStringItr != aStringEnd) { |
79 | 0 | if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd, |
80 | 0 | aEscapeChar)) { |
81 | 0 | // we've hit a match, so indicate this |
82 | 0 | return 1; |
83 | 0 | } |
84 | 0 | aStringItr++; |
85 | 0 | } |
86 | 0 |
|
87 | 0 | // No match |
88 | 0 | return 0; |
89 | 0 | } |
90 | 0 | else if (!lastWasEscape && *aPatternItr == MATCH_ONE) { |
91 | 0 | // CASE 2 |
92 | 0 | if (aStringItr == aStringEnd) { |
93 | 0 | // If we've hit the end of the string we are testing, no match |
94 | 0 | return 0; |
95 | 0 | } |
96 | 0 | aStringItr++; |
97 | 0 | lastWasEscape = false; |
98 | 0 | } |
99 | 0 | else if (!lastWasEscape && *aPatternItr == aEscapeChar) { |
100 | 0 | // CASE 3 |
101 | 0 | lastWasEscape = true; |
102 | 0 | } |
103 | 0 | else { |
104 | 0 | // CASE 4 |
105 | 0 | if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) { |
106 | 0 | // If we've hit a point where the strings don't match, there is no match |
107 | 0 | return 0; |
108 | 0 | } |
109 | 0 | aStringItr++; |
110 | 0 | lastWasEscape = false; |
111 | 0 | } |
112 | 0 |
|
113 | 0 | aPatternItr++; |
114 | 0 | } |
115 | 0 |
|
116 | 0 | return aStringItr == aStringEnd; |
117 | 0 | } |
118 | | |
119 | | /** |
120 | | * Compute the Levenshtein Edit Distance between two strings. |
121 | | * |
122 | | * @param aStringS |
123 | | * a string |
124 | | * @param aStringT |
125 | | * another string |
126 | | * @param _result |
127 | | * an outparam that will receive the edit distance between the arguments |
128 | | * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc. |
129 | | */ |
130 | | int |
131 | | levenshteinDistance(const nsAString &aStringS, |
132 | | const nsAString &aStringT, |
133 | | int *_result) |
134 | 0 | { |
135 | 0 | // Set the result to a non-sensical value in case we encounter an error. |
136 | 0 | *_result = -1; |
137 | 0 |
|
138 | 0 | const uint32_t sLen = aStringS.Length(); |
139 | 0 | const uint32_t tLen = aStringT.Length(); |
140 | 0 |
|
141 | 0 | if (sLen == 0) { |
142 | 0 | *_result = tLen; |
143 | 0 | return SQLITE_OK; |
144 | 0 | } |
145 | 0 | if (tLen == 0) { |
146 | 0 | *_result = sLen; |
147 | 0 | return SQLITE_OK; |
148 | 0 | } |
149 | 0 | |
150 | 0 | // Notionally, Levenshtein Distance is computed in a matrix. If we |
151 | 0 | // assume s = "span" and t = "spam", the matrix would look like this: |
152 | 0 | // s --> |
153 | 0 | // t s p a n |
154 | 0 | // | 0 1 2 3 4 |
155 | 0 | // V s 1 * * * * |
156 | 0 | // p 2 * * * * |
157 | 0 | // a 3 * * * * |
158 | 0 | // m 4 * * * * |
159 | 0 | // |
160 | 0 | // Note that the row width is sLen + 1 and the column height is tLen + 1, |
161 | 0 | // where sLen is the length of the string "s" and tLen is the length of "t". |
162 | 0 | // The first row and the first column are initialized as shown, and |
163 | 0 | // the algorithm computes the remaining cells row-by-row, and |
164 | 0 | // left-to-right within each row. The computation only requires that |
165 | 0 | // we be able to see the current row and the previous one. |
166 | 0 | |
167 | 0 | // Allocate memory for two rows. |
168 | 0 | AutoTArray<int, nsAutoString::kStorageSize> row1; |
169 | 0 | AutoTArray<int, nsAutoString::kStorageSize> row2; |
170 | 0 |
|
171 | 0 | // Declare the raw pointers that will actually be used to access the memory. |
172 | 0 | int *prevRow = row1.AppendElements(sLen + 1); |
173 | 0 | int *currRow = row2.AppendElements(sLen + 1); |
174 | 0 |
|
175 | 0 | // Initialize the first row. |
176 | 0 | for (uint32_t i = 0; i <= sLen; i++) |
177 | 0 | prevRow[i] = i; |
178 | 0 |
|
179 | 0 | const char16_t *s = aStringS.BeginReading(); |
180 | 0 | const char16_t *t = aStringT.BeginReading(); |
181 | 0 |
|
182 | 0 | // Compute the empty cells in the "matrix" row-by-row, starting with |
183 | 0 | // the second row. |
184 | 0 | for (uint32_t ti = 1; ti <= tLen; ti++) { |
185 | 0 |
|
186 | 0 | // Initialize the first cell in this row. |
187 | 0 | currRow[0] = ti; |
188 | 0 |
|
189 | 0 | // Get the character from "t" that corresponds to this row. |
190 | 0 | const char16_t tch = t[ti - 1]; |
191 | 0 |
|
192 | 0 | // Compute the remaining cells in this row, left-to-right, |
193 | 0 | // starting at the second column (and first character of "s"). |
194 | 0 | for (uint32_t si = 1; si <= sLen; si++) { |
195 | 0 |
|
196 | 0 | // Get the character from "s" that corresponds to this column, |
197 | 0 | // compare it to the t-character, and compute the "cost". |
198 | 0 | const char16_t sch = s[si - 1]; |
199 | 0 | int cost = (sch == tch) ? 0 : 1; |
200 | 0 |
|
201 | 0 | // ............ We want to calculate the value of cell "d" from |
202 | 0 | // ...ab....... the previously calculated (or initialized) cells |
203 | 0 | // ...cd....... "a", "b", and "c", where d = min(a', b', c'). |
204 | 0 | // ............ |
205 | 0 | int aPrime = prevRow[si - 1] + cost; |
206 | 0 | int bPrime = prevRow[si] + 1; |
207 | 0 | int cPrime = currRow[si - 1] + 1; |
208 | 0 | currRow[si] = std::min(aPrime, std::min(bPrime, cPrime)); |
209 | 0 | } |
210 | 0 |
|
211 | 0 | // Advance to the next row. The current row becomes the previous |
212 | 0 | // row and we recycle the old previous row as the new current row. |
213 | 0 | // We don't need to re-initialize the new current row since we will |
214 | 0 | // rewrite all of its cells anyway. |
215 | 0 | int *oldPrevRow = prevRow; |
216 | 0 | prevRow = currRow; |
217 | 0 | currRow = oldPrevRow; |
218 | 0 | } |
219 | 0 |
|
220 | 0 | // The final result is the value of the last cell in the last row. |
221 | 0 | // Note that that's now in the "previous" row, since we just swapped them. |
222 | 0 | *_result = prevRow[sLen]; |
223 | 0 | return SQLITE_OK; |
224 | 0 | } |
225 | | |
226 | | // This struct is used only by registerFunctions below, but ISO C++98 forbids |
227 | | // instantiating a template dependent on a locally-defined type. Boo-urns! |
228 | | struct Functions { |
229 | | const char *zName; |
230 | | int nArg; |
231 | | int enc; |
232 | | void *pContext; |
233 | | void (*xFunc)(::sqlite3_context*, int, sqlite3_value**); |
234 | | }; |
235 | | |
236 | | } // namespace |
237 | | |
238 | | //////////////////////////////////////////////////////////////////////////////// |
239 | | //// Exposed Functions |
240 | | |
241 | | int |
242 | | registerFunctions(sqlite3 *aDB) |
243 | 0 | { |
244 | 0 | Functions functions[] = { |
245 | 0 | {"lower", |
246 | 0 | 1, |
247 | 0 | SQLITE_UTF16, |
248 | 0 | 0, |
249 | 0 | caseFunction}, |
250 | 0 | {"lower", |
251 | 0 | 1, |
252 | 0 | SQLITE_UTF8, |
253 | 0 | 0, |
254 | 0 | caseFunction}, |
255 | 0 | {"upper", |
256 | 0 | 1, |
257 | 0 | SQLITE_UTF16, |
258 | 0 | (void*)1, |
259 | 0 | caseFunction}, |
260 | 0 | {"upper", |
261 | 0 | 1, |
262 | 0 | SQLITE_UTF8, |
263 | 0 | (void*)1, |
264 | 0 | caseFunction}, |
265 | 0 |
|
266 | 0 | {"like", |
267 | 0 | 2, |
268 | 0 | SQLITE_UTF16, |
269 | 0 | 0, |
270 | 0 | likeFunction}, |
271 | 0 | {"like", |
272 | 0 | 2, |
273 | 0 | SQLITE_UTF8, |
274 | 0 | 0, |
275 | 0 | likeFunction}, |
276 | 0 | {"like", |
277 | 0 | 3, |
278 | 0 | SQLITE_UTF16, |
279 | 0 | 0, |
280 | 0 | likeFunction}, |
281 | 0 | {"like", |
282 | 0 | 3, |
283 | 0 | SQLITE_UTF8, |
284 | 0 | 0, |
285 | 0 | likeFunction}, |
286 | 0 |
|
287 | 0 | {"levenshteinDistance", |
288 | 0 | 2, |
289 | 0 | SQLITE_UTF16, |
290 | 0 | 0, |
291 | 0 | levenshteinDistanceFunction}, |
292 | 0 | {"levenshteinDistance", |
293 | 0 | 2, |
294 | 0 | SQLITE_UTF8, |
295 | 0 | 0, |
296 | 0 | levenshteinDistanceFunction}, |
297 | 0 | }; |
298 | 0 |
|
299 | 0 | int rv = SQLITE_OK; |
300 | 0 | for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) { |
301 | 0 | struct Functions *p = &functions[i]; |
302 | 0 | rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext, |
303 | 0 | p->xFunc, nullptr, nullptr); |
304 | 0 | } |
305 | 0 |
|
306 | 0 | return rv; |
307 | 0 | } |
308 | | |
309 | | //////////////////////////////////////////////////////////////////////////////// |
310 | | //// SQL Functions |
311 | | |
312 | | void |
313 | | caseFunction(sqlite3_context *aCtx, |
314 | | int aArgc, |
315 | | sqlite3_value **aArgv) |
316 | 0 | { |
317 | 0 | NS_ASSERTION(1 == aArgc, "Invalid number of arguments!"); |
318 | 0 |
|
319 | 0 | nsAutoString data(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]))); |
320 | 0 | bool toUpper = ::sqlite3_user_data(aCtx) ? true : false; |
321 | 0 |
|
322 | 0 | if (toUpper) |
323 | 0 | ::ToUpperCase(data); |
324 | 0 | else |
325 | 0 | ::ToLowerCase(data); |
326 | 0 |
|
327 | 0 | // Set the result. |
328 | 0 | ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT); |
329 | 0 | } |
330 | | |
331 | | /** |
332 | | * This implements the like() SQL function. This is used by the LIKE operator. |
333 | | * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is |
334 | | * an escape character, say E, it is implemented as 'like(B, A, E)'. |
335 | | */ |
336 | | void |
337 | | likeFunction(sqlite3_context *aCtx, |
338 | | int aArgc, |
339 | | sqlite3_value **aArgv) |
340 | 0 | { |
341 | 0 | NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!"); |
342 | 0 |
|
343 | 0 | if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) { |
344 | 0 | ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex", |
345 | 0 | SQLITE_TOOBIG); |
346 | 0 | return; |
347 | 0 | } |
348 | 0 | |
349 | 0 | if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1])) |
350 | 0 | return; |
351 | 0 | |
352 | 0 | nsDependentString A(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]))); |
353 | 0 | nsDependentString B(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]))); |
354 | 0 | NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!"); |
355 | 0 |
|
356 | 0 | char16_t E = 0; |
357 | 0 | if (3 == aArgc) |
358 | 0 | E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0]; |
359 | 0 |
|
360 | 0 | nsAString::const_iterator itrString, endString; |
361 | 0 | A.BeginReading(itrString); |
362 | 0 | A.EndReading(endString); |
363 | 0 | nsAString::const_iterator itrPattern, endPattern; |
364 | 0 | B.BeginReading(itrPattern); |
365 | 0 | B.EndReading(endPattern); |
366 | 0 | ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString, |
367 | 0 | endString, E)); |
368 | 0 | } |
369 | | |
370 | | void levenshteinDistanceFunction(sqlite3_context *aCtx, |
371 | | int aArgc, |
372 | | sqlite3_value **aArgv) |
373 | 0 | { |
374 | 0 | NS_ASSERTION(2 == aArgc, "Invalid number of arguments!"); |
375 | 0 |
|
376 | 0 | // If either argument is a SQL NULL, then return SQL NULL. |
377 | 0 | if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL || |
378 | 0 | ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) { |
379 | 0 | ::sqlite3_result_null(aCtx); |
380 | 0 | return; |
381 | 0 | } |
382 | 0 | |
383 | 0 | int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t); |
384 | 0 | const char16_t *a = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])); |
385 | 0 |
|
386 | 0 | int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t); |
387 | 0 | const char16_t *b = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])); |
388 | 0 |
|
389 | 0 | // Compute the Levenshtein Distance, and return the result (or error). |
390 | 0 | int distance = -1; |
391 | 0 | const nsDependentString A(a, aLen); |
392 | 0 | const nsDependentString B(b, bLen); |
393 | 0 | int status = levenshteinDistance(A, B, &distance); |
394 | 0 | if (status == SQLITE_OK) { |
395 | 0 | ::sqlite3_result_int(aCtx, distance); |
396 | 0 | } |
397 | 0 | else if (status == SQLITE_NOMEM) { |
398 | 0 | ::sqlite3_result_error_nomem(aCtx); |
399 | 0 | } |
400 | 0 | else { |
401 | 0 | ::sqlite3_result_error(aCtx, "User function returned error code", -1); |
402 | 0 | } |
403 | 0 | } |
404 | | |
405 | | } // namespace storage |
406 | | } // namespace mozilla |