/src/gdal/ogr/ogrsf_frmts/sqlite/ogrsqliteregexp.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: OpenGIS Simple Features Reference Implementation |
4 | | * Purpose: SQLite REGEXP function |
5 | | * Author: Even Rouault, even dot rouault at spatialys.com |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2012, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | /* WARNING: VERY IMPORTANT NOTE: This file MUST not be directly compiled as */ |
14 | | /* a standalone object. It must be included from ogrsqlitevirtualogr.cpp */ |
15 | | /* (actually from ogrsqlitesqlfunctions.cpp) */ |
16 | | #ifndef COMPILATION_ALLOWED |
17 | | #error See comment in file |
18 | | #endif |
19 | | |
20 | | /* This code originates from pcre.c from the sqlite3-pcre extension */ |
21 | | /* from http://laltromondo.dynalias.net/~iki/informatica/soft/sqlite3-pcre/ */ |
22 | | /* whose header is : */ |
23 | | /* |
24 | | * Written by Alexey Tourbin <at@altlinux.org>. |
25 | | * |
26 | | * The author has dedicated the code to the public domain. Anyone is free |
27 | | * to copy, modify, publish, use, compile, sell, or distribute the original |
28 | | * code, either in source code form or as a compiled binary, for any purpose, |
29 | | * commercial or non-commercial, and by any means. |
30 | | */ |
31 | | |
32 | | // The pcre2 variant has been ported from |
33 | | // https://github.com/pfmoore/sqlite-pcre2/blob/main/src/pcre.c which has the |
34 | | // same license as above. |
35 | | |
36 | | #include "ogrsqliteregexp.h" |
37 | | #include "sqlite3.h" |
38 | | |
39 | | #ifdef HAVE_PCRE2 |
40 | | |
41 | | #define PCRE2_CODE_UNIT_WIDTH 8 |
42 | | #include <pcre2.h> |
43 | | |
44 | | typedef struct |
45 | | { |
46 | | char *s; |
47 | | pcre2_code *p; |
48 | | } cache_entry; |
49 | | |
50 | | constexpr int CACHE_SIZE = 16; |
51 | | |
52 | | static pcre2_code *re_compile_with_cache(sqlite3_context *ctx, const char *re) |
53 | | { |
54 | | cache_entry *cache = static_cast<cache_entry *>(sqlite3_user_data(ctx)); |
55 | | |
56 | | CPLAssert(cache); |
57 | | |
58 | | bool found = false; |
59 | | int i; |
60 | | for (i = 0; i < CACHE_SIZE && cache[i].s; i++) |
61 | | if (strcmp(re, cache[i].s) == 0) |
62 | | { |
63 | | found = true; |
64 | | break; |
65 | | } |
66 | | |
67 | | if (found) |
68 | | { |
69 | | if (i > 0) |
70 | | { |
71 | | /* Get the found entry */ |
72 | | cache_entry c = cache[i]; |
73 | | /* Move 0..i-1 up one - args are (dest, src, size) */ |
74 | | memmove(cache + 1, cache, i * sizeof(cache_entry)); |
75 | | /* Put the found entry at the start */ |
76 | | cache[0] = c; |
77 | | } |
78 | | } |
79 | | else |
80 | | { |
81 | | /* Create a new entry */ |
82 | | int errorcode = 0; |
83 | | PCRE2_SIZE pos = 0; |
84 | | uint32_t has_jit = 0; |
85 | | PCRE2_UCHAR8 err_buff[256]; |
86 | | |
87 | | #ifdef HAVE_GCC_DIAGNOSTIC_PUSH |
88 | | #pragma GCC diagnostic push |
89 | | #pragma GCC diagnostic ignored "-Wold-style-cast" |
90 | | #endif |
91 | | pcre2_code *pat = |
92 | | pcre2_compile(reinterpret_cast<const PCRE2_UCHAR8 *>(re), |
93 | | PCRE2_ZERO_TERMINATED, 0, &errorcode, &pos, nullptr); |
94 | | #ifdef HAVE_GCC_DIAGNOSTIC_PUSH |
95 | | #pragma GCC diagnostic pop |
96 | | #endif |
97 | | if (!pat) |
98 | | { |
99 | | pcre2_get_error_message(errorcode, err_buff, sizeof(err_buff)); |
100 | | char *e2 = sqlite3_mprintf("%s: %s (offset %d)", re, err_buff, pos); |
101 | | sqlite3_result_error(ctx, e2, -1); |
102 | | sqlite3_free(e2); |
103 | | return nullptr; |
104 | | } |
105 | | pcre2_config(PCRE2_CONFIG_JIT, &has_jit); |
106 | | if (has_jit) |
107 | | { |
108 | | errorcode = pcre2_jit_compile(pat, 0); |
109 | | if (errorcode) |
110 | | { |
111 | | pcre2_get_error_message(errorcode, err_buff, sizeof(err_buff)); |
112 | | char *e2 = sqlite3_mprintf("%s: %s", re, err_buff); |
113 | | sqlite3_result_error(ctx, e2, -1); |
114 | | sqlite3_free(e2); |
115 | | pcre2_code_free(pat); |
116 | | return nullptr; |
117 | | } |
118 | | } |
119 | | /* Free the last cache entry if necessary */ |
120 | | i = CACHE_SIZE - 1; |
121 | | if (cache[i].s) |
122 | | { |
123 | | VSIFree(cache[i].s); |
124 | | CPLAssert(cache[i].p); |
125 | | pcre2_code_free(cache[i].p); |
126 | | } |
127 | | /* Move everything up to make space */ |
128 | | memmove(cache + 1, cache, i * sizeof(cache_entry)); |
129 | | cache[0].s = VSIStrdup(re); |
130 | | cache[0].p = pat; |
131 | | } |
132 | | |
133 | | return cache[0].p; |
134 | | } |
135 | | |
136 | | /************************************************************************/ |
137 | | /* OGRSQLiteREGEXPFunction() */ |
138 | | /************************************************************************/ |
139 | | |
140 | | static void OGRSQLiteREGEXPFunction(sqlite3_context *ctx, |
141 | | [[maybe_unused]] int argc, |
142 | | sqlite3_value **argv) |
143 | | { |
144 | | CPLAssert(argc == 2); |
145 | | |
146 | | const char *re = |
147 | | reinterpret_cast<const char *>(sqlite3_value_text(argv[0])); |
148 | | if (!re) |
149 | | { |
150 | | CPLDebug("SQLITE", "REGEXP: no regexp"); |
151 | | sqlite3_result_null(ctx); |
152 | | return; |
153 | | } |
154 | | |
155 | | if (sqlite3_value_type(argv[1]) == SQLITE_NULL) |
156 | | { |
157 | | sqlite3_result_null(ctx); |
158 | | return; |
159 | | } |
160 | | |
161 | | const char *str = |
162 | | reinterpret_cast<const char *>(sqlite3_value_text(argv[1])); |
163 | | if (!str) |
164 | | { |
165 | | CPLDebug("SQLITE", "REGEXP: no string"); |
166 | | sqlite3_result_null(ctx); |
167 | | return; |
168 | | } |
169 | | |
170 | | pcre2_code *p = re_compile_with_cache(ctx, re); |
171 | | if (!p) |
172 | | return; |
173 | | |
174 | | pcre2_match_data *md = pcre2_match_data_create_from_pattern(p, nullptr); |
175 | | if (!md) |
176 | | { |
177 | | sqlite3_result_error(ctx, "could not create match data block", -1); |
178 | | return; |
179 | | } |
180 | | |
181 | | #ifdef HAVE_GCC_DIAGNOSTIC_PUSH |
182 | | #pragma GCC diagnostic push |
183 | | #pragma GCC diagnostic ignored "-Wold-style-cast" |
184 | | #endif |
185 | | int rc = pcre2_match(p, reinterpret_cast<const PCRE2_UCHAR8 *>(str), |
186 | | PCRE2_ZERO_TERMINATED, 0, 0, md, nullptr); |
187 | | #ifdef HAVE_GCC_DIAGNOSTIC_PUSH |
188 | | #pragma GCC diagnostic pop |
189 | | #endif |
190 | | |
191 | | pcre2_match_data_free(md); |
192 | | |
193 | | sqlite3_result_int(ctx, rc >= 0); |
194 | | } |
195 | | |
196 | | #elif defined(HAVE_PCRE) |
197 | | |
198 | | #include <pcre.h> |
199 | | |
200 | | typedef struct |
201 | | { |
202 | | char *s; |
203 | | pcre *p; |
204 | | pcre_extra *e; |
205 | | } cache_entry; |
206 | | |
207 | | constexpr int CACHE_SIZE = 16; |
208 | | |
209 | | /************************************************************************/ |
210 | | /* OGRSQLiteREGEXPFunction() */ |
211 | | /************************************************************************/ |
212 | | |
213 | | static void OGRSQLiteREGEXPFunction(sqlite3_context *ctx, |
214 | | [[maybe_unused]] int argc, |
215 | | sqlite3_value **argv) |
216 | | { |
217 | | CPLAssert(argc == 2); |
218 | | |
219 | | const char *re = |
220 | | reinterpret_cast<const char *>(sqlite3_value_text(argv[0])); |
221 | | if (!re) |
222 | | { |
223 | | CPLDebug("SQLITE", "REGEXP: no regexp"); |
224 | | sqlite3_result_null(ctx); |
225 | | return; |
226 | | } |
227 | | |
228 | | if (sqlite3_value_type(argv[1]) == SQLITE_NULL) |
229 | | { |
230 | | sqlite3_result_null(ctx); |
231 | | return; |
232 | | } |
233 | | |
234 | | const char *str = |
235 | | reinterpret_cast<const char *>(sqlite3_value_text(argv[1])); |
236 | | if (!str) |
237 | | { |
238 | | CPLDebug("SQLITE", "REGEXP: no string"); |
239 | | sqlite3_result_null(ctx); |
240 | | return; |
241 | | } |
242 | | |
243 | | /* simple LRU cache */ |
244 | | cache_entry *cache = static_cast<cache_entry *>(sqlite3_user_data(ctx)); |
245 | | CPLAssert(cache); |
246 | | |
247 | | bool found = false; |
248 | | int i = 0; // Used after for. |
249 | | for (; i < CACHE_SIZE && cache[i].s; i++) |
250 | | { |
251 | | if (strcmp(re, cache[i].s) == 0) |
252 | | { |
253 | | found = true; |
254 | | break; |
255 | | } |
256 | | } |
257 | | |
258 | | if (found) |
259 | | { |
260 | | if (i > 0) |
261 | | { |
262 | | cache_entry c = cache[i]; |
263 | | memmove(cache + 1, cache, i * sizeof(cache_entry)); |
264 | | cache[0] = c; |
265 | | } |
266 | | } |
267 | | else |
268 | | { |
269 | | cache_entry c; |
270 | | const char *err = nullptr; |
271 | | int pos = 0; |
272 | | c.p = pcre_compile(re, 0, &err, &pos, nullptr); |
273 | | if (!c.p) |
274 | | { |
275 | | char *e2 = sqlite3_mprintf("%s: %s (offset %d)", re, err, pos); |
276 | | sqlite3_result_error(ctx, e2, -1); |
277 | | sqlite3_free(e2); |
278 | | return; |
279 | | } |
280 | | c.e = pcre_study(c.p, 0, &err); |
281 | | c.s = VSIStrdup(re); |
282 | | if (!c.s) |
283 | | { |
284 | | sqlite3_result_error(ctx, "strdup: ENOMEM", -1); |
285 | | pcre_free(c.p); |
286 | | pcre_free(c.e); |
287 | | return; |
288 | | } |
289 | | i = CACHE_SIZE - 1; |
290 | | if (cache[i].s) |
291 | | { |
292 | | CPLFree(cache[i].s); |
293 | | CPLAssert(cache[i].p); |
294 | | pcre_free(cache[i].p); |
295 | | pcre_free(cache[i].e); |
296 | | } |
297 | | memmove(cache + 1, cache, i * sizeof(cache_entry)); |
298 | | cache[0] = c; |
299 | | } |
300 | | pcre *p = cache[0].p; |
301 | | CPLAssert(p); |
302 | | pcre_extra *e = cache[0].e; |
303 | | |
304 | | int rc = |
305 | | pcre_exec(p, e, str, static_cast<int>(strlen(str)), 0, 0, nullptr, 0); |
306 | | sqlite3_result_int(ctx, rc >= 0); |
307 | | } |
308 | | |
309 | | #endif // HAVE_PCRE |
310 | | |
311 | | /************************************************************************/ |
312 | | /* OGRSQLiteRegisterRegExpFunction() */ |
313 | | /************************************************************************/ |
314 | | |
315 | | static void *OGRSQLiteRegisterRegExpFunction(sqlite3 * |
316 | | #if defined(HAVE_PCRE) || defined(HAVE_PCRE2) |
317 | | hDB |
318 | | #endif |
319 | | ) |
320 | 8.61k | { |
321 | | #if defined(HAVE_PCRE) || defined(HAVE_PCRE2) |
322 | | |
323 | | /* For debugging purposes mostly */ |
324 | | if (!CPLTestBool(CPLGetConfigOption("OGR_SQLITE_REGEXP", "YES"))) |
325 | | return nullptr; |
326 | | |
327 | | /* Check if we really need to define our own REGEXP function */ |
328 | | int rc = |
329 | | sqlite3_exec(hDB, "SELECT 'a' REGEXP 'a'", nullptr, nullptr, nullptr); |
330 | | if (rc == SQLITE_OK) |
331 | | { |
332 | | CPLDebug("SQLITE", "REGEXP already available"); |
333 | | return nullptr; |
334 | | } |
335 | | |
336 | | cache_entry *cache = |
337 | | static_cast<cache_entry *>(CPLCalloc(CACHE_SIZE, sizeof(cache_entry))); |
338 | | sqlite3_create_function(hDB, "REGEXP", 2, SQLITE_UTF8, cache, |
339 | | OGRSQLiteREGEXPFunction, nullptr, nullptr); |
340 | | |
341 | | /* To clear the error flag */ |
342 | | sqlite3_exec(hDB, "SELECT 1", nullptr, nullptr, nullptr); |
343 | | |
344 | | return cache; |
345 | | #else // HAVE_PCRE |
346 | 8.61k | return nullptr; |
347 | 8.61k | #endif // HAVE_PCRE |
348 | 8.61k | } ogrsqlitevirtualogr.cpp:OGRSQLiteRegisterRegExpFunction(sqlite3*) Line | Count | Source | 320 | 6.07k | { | 321 | | #if defined(HAVE_PCRE) || defined(HAVE_PCRE2) | 322 | | | 323 | | /* For debugging purposes mostly */ | 324 | | if (!CPLTestBool(CPLGetConfigOption("OGR_SQLITE_REGEXP", "YES"))) | 325 | | return nullptr; | 326 | | | 327 | | /* Check if we really need to define our own REGEXP function */ | 328 | | int rc = | 329 | | sqlite3_exec(hDB, "SELECT 'a' REGEXP 'a'", nullptr, nullptr, nullptr); | 330 | | if (rc == SQLITE_OK) | 331 | | { | 332 | | CPLDebug("SQLITE", "REGEXP already available"); | 333 | | return nullptr; | 334 | | } | 335 | | | 336 | | cache_entry *cache = | 337 | | static_cast<cache_entry *>(CPLCalloc(CACHE_SIZE, sizeof(cache_entry))); | 338 | | sqlite3_create_function(hDB, "REGEXP", 2, SQLITE_UTF8, cache, | 339 | | OGRSQLiteREGEXPFunction, nullptr, nullptr); | 340 | | | 341 | | /* To clear the error flag */ | 342 | | sqlite3_exec(hDB, "SELECT 1", nullptr, nullptr, nullptr); | 343 | | | 344 | | return cache; | 345 | | #else // HAVE_PCRE | 346 | 6.07k | return nullptr; | 347 | 6.07k | #endif // HAVE_PCRE | 348 | 6.07k | } |
ogrgeopackagedatasource.cpp:OGRSQLiteRegisterRegExpFunction(sqlite3*) Line | Count | Source | 320 | 2.54k | { | 321 | | #if defined(HAVE_PCRE) || defined(HAVE_PCRE2) | 322 | | | 323 | | /* For debugging purposes mostly */ | 324 | | if (!CPLTestBool(CPLGetConfigOption("OGR_SQLITE_REGEXP", "YES"))) | 325 | | return nullptr; | 326 | | | 327 | | /* Check if we really need to define our own REGEXP function */ | 328 | | int rc = | 329 | | sqlite3_exec(hDB, "SELECT 'a' REGEXP 'a'", nullptr, nullptr, nullptr); | 330 | | if (rc == SQLITE_OK) | 331 | | { | 332 | | CPLDebug("SQLITE", "REGEXP already available"); | 333 | | return nullptr; | 334 | | } | 335 | | | 336 | | cache_entry *cache = | 337 | | static_cast<cache_entry *>(CPLCalloc(CACHE_SIZE, sizeof(cache_entry))); | 338 | | sqlite3_create_function(hDB, "REGEXP", 2, SQLITE_UTF8, cache, | 339 | | OGRSQLiteREGEXPFunction, nullptr, nullptr); | 340 | | | 341 | | /* To clear the error flag */ | 342 | | sqlite3_exec(hDB, "SELECT 1", nullptr, nullptr, nullptr); | 343 | | | 344 | | return cache; | 345 | | #else // HAVE_PCRE | 346 | 2.54k | return nullptr; | 347 | 2.54k | #endif // HAVE_PCRE | 348 | 2.54k | } |
|
349 | | |
350 | | /************************************************************************/ |
351 | | /* OGRSQLiteFreeRegExpCache() */ |
352 | | /************************************************************************/ |
353 | | |
354 | | static void OGRSQLiteFreeRegExpCache(void * |
355 | | #if defined(HAVE_PCRE) || defined(HAVE_PCRE2) |
356 | | hRegExpCache |
357 | | #endif |
358 | | ) |
359 | 8.61k | { |
360 | | #ifdef HAVE_PCRE2 |
361 | | if (hRegExpCache == nullptr) |
362 | | return; |
363 | | |
364 | | cache_entry *cache = static_cast<cache_entry *>(hRegExpCache); |
365 | | for (int i = 0; i < CACHE_SIZE && cache[i].s; i++) |
366 | | { |
367 | | CPLFree(cache[i].s); |
368 | | CPLAssert(cache[i].p); |
369 | | pcre2_code_free(cache[i].p); |
370 | | } |
371 | | CPLFree(cache); |
372 | | #elif defined(HAVE_PCRE) |
373 | | if (hRegExpCache == nullptr) |
374 | | return; |
375 | | |
376 | | cache_entry *cache = static_cast<cache_entry *>(hRegExpCache); |
377 | | for (int i = 0; i < CACHE_SIZE && cache[i].s; i++) |
378 | | { |
379 | | CPLFree(cache[i].s); |
380 | | CPLAssert(cache[i].p); |
381 | | pcre_free(cache[i].p); |
382 | | pcre_free(cache[i].e); |
383 | | } |
384 | | CPLFree(cache); |
385 | | #endif // HAVE_PCRE |
386 | 8.61k | } ogrsqlitevirtualogr.cpp:OGRSQLiteFreeRegExpCache(void*) Line | Count | Source | 359 | 6.07k | { | 360 | | #ifdef HAVE_PCRE2 | 361 | | if (hRegExpCache == nullptr) | 362 | | return; | 363 | | | 364 | | cache_entry *cache = static_cast<cache_entry *>(hRegExpCache); | 365 | | for (int i = 0; i < CACHE_SIZE && cache[i].s; i++) | 366 | | { | 367 | | CPLFree(cache[i].s); | 368 | | CPLAssert(cache[i].p); | 369 | | pcre2_code_free(cache[i].p); | 370 | | } | 371 | | CPLFree(cache); | 372 | | #elif defined(HAVE_PCRE) | 373 | | if (hRegExpCache == nullptr) | 374 | | return; | 375 | | | 376 | | cache_entry *cache = static_cast<cache_entry *>(hRegExpCache); | 377 | | for (int i = 0; i < CACHE_SIZE && cache[i].s; i++) | 378 | | { | 379 | | CPLFree(cache[i].s); | 380 | | CPLAssert(cache[i].p); | 381 | | pcre_free(cache[i].p); | 382 | | pcre_free(cache[i].e); | 383 | | } | 384 | | CPLFree(cache); | 385 | | #endif // HAVE_PCRE | 386 | 6.07k | } |
ogrgeopackagedatasource.cpp:OGRSQLiteFreeRegExpCache(void*) Line | Count | Source | 359 | 2.54k | { | 360 | | #ifdef HAVE_PCRE2 | 361 | | if (hRegExpCache == nullptr) | 362 | | return; | 363 | | | 364 | | cache_entry *cache = static_cast<cache_entry *>(hRegExpCache); | 365 | | for (int i = 0; i < CACHE_SIZE && cache[i].s; i++) | 366 | | { | 367 | | CPLFree(cache[i].s); | 368 | | CPLAssert(cache[i].p); | 369 | | pcre2_code_free(cache[i].p); | 370 | | } | 371 | | CPLFree(cache); | 372 | | #elif defined(HAVE_PCRE) | 373 | | if (hRegExpCache == nullptr) | 374 | | return; | 375 | | | 376 | | cache_entry *cache = static_cast<cache_entry *>(hRegExpCache); | 377 | | for (int i = 0; i < CACHE_SIZE && cache[i].s; i++) | 378 | | { | 379 | | CPLFree(cache[i].s); | 380 | | CPLAssert(cache[i].p); | 381 | | pcre_free(cache[i].p); | 382 | | pcre_free(cache[i].e); | 383 | | } | 384 | | CPLFree(cache); | 385 | | #endif // HAVE_PCRE | 386 | 2.54k | } |
|