/src/liblouis/liblouis/metadata.c
Line | Count | Source |
1 | | /* liblouis Braille Translation and Back-Translation Library |
2 | | |
3 | | Copyright (C) 2015 Bert Frees <bertfrees@gmail.com> |
4 | | |
5 | | This file is part of liblouis. |
6 | | |
7 | | liblouis is free software: you can redistribute it and/or modify it |
8 | | under the terms of the GNU Lesser General Public License as published |
9 | | by the Free Software Foundation, either version 2.1 of the License, or |
10 | | (at your option) any later version. |
11 | | |
12 | | liblouis is distributed in the hope that it will be useful, but |
13 | | WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | Lesser General Public License for more details. |
16 | | |
17 | | You should have received a copy of the GNU Lesser General Public |
18 | | License along with liblouis. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | /** |
22 | | * @file |
23 | | * @brief Find translation tables |
24 | | */ |
25 | | |
26 | | #include "config.h" |
27 | | |
28 | | #include <stdlib.h> |
29 | | #include <string.h> |
30 | | #include <strings.h> |
31 | | #ifdef _MSC_VER |
32 | | #include <windows.h> |
33 | | #else |
34 | | #include <dirent.h> |
35 | | #endif |
36 | | #include <sys/stat.h> |
37 | | #include "internal.h" |
38 | | |
39 | | /* =============================== LIST =================================== */ |
40 | | |
41 | | typedef struct List { |
42 | | void *head; |
43 | | void (*free)(void *); // free head |
44 | | void *(*dup)(void *); // dup head |
45 | | struct List *tail; |
46 | | } List; |
47 | | |
48 | | /** |
49 | | * Returns a list with the element `x' added to `list'. Returns a sorted list |
50 | | * if `cmp' is not NULL and if `list' is also sorted. New elements replace |
51 | | * existing ones if they are equal according to `cmp'. If `cmp' is NULL, |
52 | | * elements are simply prepended to the list. The function `dup' is used to |
53 | | * duplicate elements when the list is copied. The `free' function is used to |
54 | | * free elements when they are removed from the list. The returned list must |
55 | | * be freed by the caller, using list_free. |
56 | | */ |
57 | | static List * |
58 | | list_conj(List *list, void *x, int (*cmp)(void *, void *), void *(*dup)(void *), |
59 | 0 | void (*free)(void *)) { |
60 | 0 | if (!list) { |
61 | 0 | list = malloc(sizeof(List)); |
62 | 0 | list->head = x; |
63 | 0 | list->free = free; |
64 | 0 | list->dup = dup; |
65 | 0 | list->tail = NULL; |
66 | 0 | return list; |
67 | 0 | } else if (!cmp) { |
68 | 0 | List *l = malloc(sizeof(List)); |
69 | 0 | l->head = x; |
70 | 0 | l->free = free; |
71 | 0 | l->dup = dup; |
72 | 0 | l->tail = list; |
73 | 0 | return l; |
74 | 0 | } else { |
75 | 0 | List *l1 = list; |
76 | 0 | List *l2 = NULL; |
77 | 0 | while (l1) { |
78 | 0 | int c = cmp(l1->head, x); |
79 | 0 | if (c > 0) |
80 | 0 | break; |
81 | 0 | else if (c < 0) { |
82 | 0 | l2 = l1; |
83 | 0 | l1 = l2->tail; |
84 | 0 | } else { |
85 | 0 | if (x != l1->head && !dup && free) free(x); |
86 | 0 | return list; |
87 | 0 | } |
88 | 0 | } |
89 | 0 | List *l3 = malloc(sizeof(List)); |
90 | 0 | l3->head = x; |
91 | 0 | l3->free = free; |
92 | 0 | l3->dup = dup; |
93 | 0 | l3->tail = l1; |
94 | 0 | if (!l2) |
95 | 0 | list = l3; |
96 | 0 | else |
97 | 0 | l2->tail = l3; |
98 | 0 | return list; |
99 | 0 | } |
100 | 0 | } |
101 | | |
102 | | /** |
103 | | * Free an instance of type List. |
104 | | */ |
105 | | static void |
106 | 437 | list_free(List *list) { |
107 | 437 | if (list) { |
108 | 0 | if (list->free) list->free(list->head); |
109 | 0 | list_free(list->tail); |
110 | 0 | free(list); |
111 | 0 | } |
112 | 437 | } |
113 | | |
114 | | /** |
115 | | * Duplicate an instance of type List. |
116 | | */ |
117 | | static List * |
118 | 0 | list_dup(List *list) { |
119 | 0 | if (!list) return list; |
120 | 0 | List *d = malloc(sizeof(List)); |
121 | 0 | d->head = list->dup ? list->dup(list->head) : list->head; |
122 | 0 | d->free = list->free; |
123 | 0 | d->dup = list->dup; |
124 | 0 | d->tail = list_dup(list->tail); |
125 | 0 | return d; |
126 | 0 | } |
127 | | |
128 | | /** |
129 | | * Sort a list based on a comparison function. |
130 | | * |
131 | | * This function returns a new list, however the input list should not be used after the |
132 | | * returned list is freed as the elements are not copied. |
133 | | */ |
134 | | static List * |
135 | 0 | list_sort(List *list, int (*cmp)(void *, void *)) { |
136 | 0 | List *newList = NULL; |
137 | 0 | List *l; |
138 | 0 | for (l = list; l; l = l->tail) { |
139 | 0 | newList = list_conj(newList, l->head, cmp, NULL, l->free); |
140 | 0 | l->free = NULL; |
141 | 0 | } |
142 | 0 | list_free(list); |
143 | 0 | return newList; |
144 | 0 | } |
145 | | |
146 | | /** |
147 | | * Get the size of a list. |
148 | | */ |
149 | | static int |
150 | 0 | list_size(List *list) { |
151 | 0 | int len = 0; |
152 | 0 | List *l; |
153 | 0 | for (l = list; l; l = l->tail) len++; |
154 | 0 | return len; |
155 | 0 | } |
156 | | |
157 | | /** |
158 | | * Convert a list into a NULL terminated array. |
159 | | */ |
160 | | static void ** |
161 | 0 | list_toArray(List *list, int deepCopy) { |
162 | 0 | void **array; |
163 | 0 | List *l; |
164 | 0 | int i; |
165 | 0 | array = malloc((1 + list_size(list)) * sizeof(void *)); |
166 | 0 | i = 0; |
167 | 0 | for (l = list; l; l = l->tail) |
168 | 0 | array[i++] = deepCopy && l->dup ? l->dup(l->head) : l->head; |
169 | 0 | array[i] = NULL; |
170 | 0 | return array; |
171 | 0 | } |
172 | | |
173 | | /* ============================== FEATURE ================================= */ |
174 | | |
175 | | typedef struct { |
176 | | char *key; |
177 | | void *val; |
178 | | void (*free)(void *); // free val |
179 | | void *(*dup)(void *); // dup val |
180 | | } Feature; |
181 | | |
182 | | typedef struct { |
183 | | Feature feature; |
184 | | int importance; |
185 | | } FeatureWithImportance; |
186 | | |
187 | | typedef struct { |
188 | | Feature feature; |
189 | | int lineNumber; // no line number (-1) means it is a default value |
190 | | } FeatureWithLineNumber; |
191 | | |
192 | | typedef struct { |
193 | | char *name; |
194 | | List *features; |
195 | | } TableMeta; |
196 | | |
197 | | /** |
198 | | * Create an instance of type Feature. |
199 | | * |
200 | | * The returned instance must be freed by the caller, using feat_free. The `key' string is |
201 | | * freed in feat_free and copied in feat_dup. What happens with `val' is determined by the |
202 | | * `dup' and `free' arguments. |
203 | | */ |
204 | | static Feature |
205 | 0 | feat_new(char *key, void *val, void *(*dup)(void *), void (*free)(void *)) { |
206 | 0 | Feature f; |
207 | 0 | f.key = key; |
208 | 0 | f.val = val; |
209 | 0 | f.dup = dup; |
210 | 0 | f.free = free; |
211 | 0 | return f; |
212 | 0 | } |
213 | | |
214 | | /** |
215 | | * Free an instance of type Feature. |
216 | | */ |
217 | | static void |
218 | 0 | feat_free(Feature *f) { |
219 | 0 | if (f) { |
220 | 0 | free(f->key); |
221 | 0 | if (f->free) f->free(f->val); |
222 | 0 | free(f); |
223 | 0 | } |
224 | 0 | } |
225 | | |
226 | | /** |
227 | | * Duplicate an instance of type Feature. |
228 | | */ |
229 | | static Feature * |
230 | 0 | feat_dup(Feature *f) { |
231 | 0 | if (!f) return NULL; |
232 | 0 | Feature *d = malloc(sizeof(Feature)); |
233 | 0 | d->key = strdup(f->key); |
234 | 0 | d->val = f->dup ? f->dup(f->val) : f->val; |
235 | 0 | d->free = f->free; |
236 | 0 | d->dup = f->dup; |
237 | 0 | return d; |
238 | 0 | } |
239 | | |
240 | | /** |
241 | | * Free an instance of type TableMeta. |
242 | | * |
243 | | * Both `name' string and `features' list are freed. |
244 | | */ |
245 | | static void |
246 | 0 | meta_free(TableMeta *m) { |
247 | 0 | if (m) { |
248 | 0 | free(m->name); |
249 | 0 | list_free(m->features); |
250 | 0 | free(m); |
251 | 0 | } |
252 | 0 | } |
253 | | |
254 | | /* =========================== LANGUAGE TAGS ============================== */ |
255 | | |
256 | | /** |
257 | | * Return true if the tag we're parsing is a language tag (language, region or |
258 | | * locale). |
259 | | */ |
260 | | static int |
261 | 0 | isLanguageTag(const char *key, int len) { |
262 | 0 | return strncasecmp("language", key, len) == 0 || |
263 | 0 | strncasecmp("region", key, len) == 0 || strncasecmp("locale", key, len) == 0; |
264 | 0 | } |
265 | | |
266 | | static int |
267 | 0 | isAlpha(char c) { |
268 | 0 | return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); |
269 | 0 | } |
270 | | |
271 | | static int |
272 | 0 | isAlphaNum(char c) { |
273 | 0 | return (c >= '0' && c <= '9') || isAlpha(c); |
274 | 0 | } |
275 | | |
276 | | /** |
277 | | * Parse language tag into a list of subtags. |
278 | | * |
279 | | * The returned list must be freed by the caller, using list_free. |
280 | | */ |
281 | | static List * |
282 | 0 | parseLanguageTag(const char *val) { |
283 | 0 | List *list = NULL; |
284 | 0 | List **tail = &list; |
285 | 0 | static char subtag[9]; |
286 | 0 | if (!*val) return NULL; |
287 | 0 | if (val[0] == '*') { |
288 | 0 | if (val[1] && val[1] != '-') return NULL; |
289 | 0 | *subtag = '\0'; |
290 | 0 | strncat(subtag, val, 1); |
291 | 0 | *tail = list_conj(NULL, strdup(subtag), NULL, (void *(*)(void *))strdup, free); |
292 | 0 | tail = &(*tail)->tail; |
293 | 0 | if (!val[1]) return list; |
294 | 0 | val = &val[2]; |
295 | 0 | } |
296 | 0 | while (1) { |
297 | 0 | int len = 0; |
298 | 0 | for (; len <= 8; len++) |
299 | 0 | if (!val[len] || !isAlphaNum(val[len]) || (!list && !isAlpha(val[len]))) |
300 | 0 | break; |
301 | 0 | if (len < 1 || len > 8 || (val[len] && val[len] != '-')) { |
302 | 0 | list_free(list); |
303 | 0 | return NULL; |
304 | 0 | } |
305 | 0 | *subtag = '\0'; |
306 | 0 | strncat(subtag, val, len); |
307 | 0 | *tail = list_conj(NULL, strdup(subtag), NULL, (void *(*)(void *))strdup, free); |
308 | 0 | tail = &(*tail)->tail; |
309 | 0 | if (!val[len]) return list; |
310 | 0 | val = &val[len + 1]; |
311 | 0 | } |
312 | 0 | return NULL; |
313 | 0 | } |
314 | | |
315 | | /** |
316 | | * Serialize language tag. |
317 | | * |
318 | | * The returned string must be freed by the caller. |
319 | | */ |
320 | | static char * |
321 | 0 | serializeLanguageTag(const List *tag) { |
322 | 0 | int len = 0; |
323 | 0 | const List *l; |
324 | 0 | for (l = tag; l; l = l->tail) len = len + 1 + strlen(l->head); |
325 | 0 | char *s = malloc(len * sizeof(char)); |
326 | 0 | s[0] = '\0'; |
327 | 0 | for (l = tag; l; l = l->tail) { |
328 | 0 | if (l != tag) s = strcat(s, "-"); |
329 | 0 | s = strcat(s, l->head); |
330 | 0 | } |
331 | 0 | return s; |
332 | 0 | } |
333 | | |
334 | | /* ======================================================================== */ |
335 | | |
336 | | /** |
337 | | * Sort features by their key (alphabetical order). |
338 | | */ |
339 | | static int |
340 | 0 | cmpKeys(Feature *f1, Feature *f2) { |
341 | 0 | return strcasecmp(f1->key, f2->key); |
342 | 0 | } |
343 | | |
344 | | /** |
345 | | * Sort features by their key and value (alphabetical order). |
346 | | */ |
347 | | static int |
348 | 0 | cmpFeatures(Feature *f1, Feature *f2) { |
349 | 0 | int r = strcasecmp(f1->key, f2->key); |
350 | 0 | if (r != 0) return r; |
351 | 0 | if (isLanguageTag(f1->key, MAXSTRING)) { |
352 | 0 | List *l1 = f1->val; |
353 | 0 | List *l2 = f2->val; |
354 | 0 | while (l1 && l2) { |
355 | 0 | r = strcasecmp(l1->head, l2->head); |
356 | 0 | if (r != 0) return r; |
357 | 0 | l1 = l1->tail; |
358 | 0 | l2 = l2->tail; |
359 | 0 | } |
360 | 0 | return l1 ? 1 : l2 ? -1 : 0; |
361 | 0 | } else |
362 | 0 | return strcasecmp(f1->val, f2->val); |
363 | 0 | } |
364 | | |
365 | | /** |
366 | | * Return a positive number if the given language tag matches the language range, |
367 | | * 0 otherwise. |
368 | | * |
369 | | * In case of a perfect match, return 10. Otherwise, for each extra subtag that |
370 | | * has no exact match in the range, subtract two. |
371 | | * |
372 | | * See also <https://datatracker.ietf.org/doc/html/rfc4647#section-3.3.2> |
373 | | */ |
374 | | static int |
375 | 0 | matchLanguageTags(const List *tag, const List *range) { |
376 | 0 | static const int POS_MATCH = 10; |
377 | 0 | static const int EXTRA = -2; |
378 | 0 | int q = POS_MATCH; |
379 | 0 | if (*((char *)range->head) == '*') |
380 | 0 | q += EXTRA; |
381 | 0 | else if (strcasecmp(tag->head, range->head) != 0) |
382 | 0 | return 0; |
383 | 0 | range = range->tail; |
384 | 0 | tag = tag->tail; |
385 | 0 | while (range) { |
386 | 0 | if (!tag) return 0; |
387 | 0 | if (strcasecmp(tag->head, range->head) == 0) { |
388 | 0 | range = range->tail; |
389 | 0 | tag = tag->tail; |
390 | 0 | continue; |
391 | 0 | } else if (strlen(tag->head) == 1) |
392 | 0 | return 0; |
393 | 0 | else |
394 | 0 | q += EXTRA; |
395 | 0 | tag = tag->tail; |
396 | 0 | } |
397 | 0 | while (tag) { |
398 | 0 | q += EXTRA; |
399 | 0 | tag = tag->tail; |
400 | 0 | } |
401 | 0 | return q; |
402 | 0 | } |
403 | | |
404 | | /** |
405 | | * Compute the match quotient of the features in a query against the features in a table's |
406 | | * metadata. |
407 | | * |
408 | | * The features are assumed to be sorted. The query's features must be |
409 | | * of type FeatureWithImportance and are assumed to have no duplicate |
410 | | * keys. How a feature contributes to the match quotient depends on |
411 | | * its importance, on whether the feature is undefined, defined with |
412 | | * the same value (positive match), or defined with a different value |
413 | | * (negative match), and on the `fuzzy' argument. If the `fuzzy' |
414 | | * argument evaluates to true, negative matches and undefined features |
415 | | * get a lower penalty. |
416 | | */ |
417 | | static int |
418 | 0 | matchFeatureLists(const List *query, const List *tableFeatures, int fuzzy) { |
419 | 0 | static const int POS_MATCH = 10; |
420 | 0 | static const int NEG_MATCH = -100; |
421 | 0 | static const int UNDEFINED = -20; |
422 | 0 | static const int EXTRA = -1; |
423 | 0 | static const int POS_MATCH_FUZZY = 10; |
424 | 0 | static const int NEG_MATCH_FUZZY = -25; |
425 | 0 | static const int UNDEFINED_FUZZY = -5; |
426 | 0 | static const int EXTRA_FUZZY = -1; |
427 | 0 | int posMatch, negMatch, undefined, extra; |
428 | 0 | if (!fuzzy) { |
429 | 0 | posMatch = POS_MATCH; |
430 | 0 | negMatch = NEG_MATCH; |
431 | 0 | undefined = UNDEFINED; |
432 | 0 | extra = EXTRA; |
433 | 0 | } else { |
434 | 0 | posMatch = POS_MATCH_FUZZY; |
435 | 0 | negMatch = NEG_MATCH_FUZZY; |
436 | 0 | undefined = UNDEFINED_FUZZY; |
437 | 0 | extra = EXTRA_FUZZY; |
438 | 0 | } |
439 | 0 | int quotient = 0; |
440 | 0 | const List *l1 = query; |
441 | 0 | const List *l2 = tableFeatures; |
442 | 0 | while (1) { |
443 | 0 | if (!l1) { |
444 | 0 | if (!l2) break; |
445 | 0 | quotient += extra; |
446 | 0 | const List *l = l2; |
447 | 0 | l = l->tail; |
448 | 0 | while (l && cmpKeys(l->head, l2->head) == 0) l = l->tail; |
449 | 0 | l2 = l; |
450 | 0 | } else if (!l2) { |
451 | 0 | quotient += undefined; |
452 | 0 | l1 = l1->tail; |
453 | 0 | } else { |
454 | 0 | int cmp = cmpKeys(l1->head, l2->head); |
455 | 0 | if (cmp < 0) { |
456 | 0 | quotient += undefined; |
457 | 0 | l1 = l1->tail; |
458 | 0 | } else if (cmp > 0) { |
459 | 0 | quotient += extra; |
460 | 0 | const List *l = l2; |
461 | 0 | l = l->tail; |
462 | 0 | while (l && cmpKeys(l->head, l2->head) == 0) l = l->tail; |
463 | 0 | l2 = l; |
464 | 0 | } else { |
465 | 0 | const List *l = l2; |
466 | 0 | char *k = ((Feature *)l->head)->key; |
467 | 0 | int best = negMatch; |
468 | 0 | if (isLanguageTag(k, MAXSTRING)) { |
469 | 0 | int extraLanguages = 0; |
470 | 0 | while (1) { |
471 | | // special handling of language tags: tags in the |
472 | | // table are intepreted as language ranges |
473 | 0 | List *v = ((Feature *)l->head)->val; |
474 | 0 | List *v1 = ((Feature *)l1->head)->val; |
475 | 0 | int q = matchLanguageTags(v1, v); |
476 | 0 | if (q > 0 && q > best) |
477 | 0 | best = q; |
478 | 0 | else if (!q) |
479 | 0 | extraLanguages += extra; |
480 | 0 | l = l->tail; |
481 | 0 | if (!l || cmpKeys(l->head, l2->head) != 0) break; |
482 | 0 | } |
483 | 0 | if (best > 0) |
484 | 0 | best += ((extraLanguages + 4) / |
485 | 0 | 5); // penalty for extra languages is lower than penalty |
486 | | // for fields that are not in query at all |
487 | 0 | } else { |
488 | 0 | while (1) { |
489 | 0 | if (best < 0) { |
490 | 0 | char *v = ((Feature *)l->head)->val; |
491 | 0 | char *v1 = ((Feature *)l1->head)->val; |
492 | 0 | if (strcasecmp(v1, v) == 0) |
493 | 0 | best = posMatch; |
494 | 0 | else if (strcasecmp(k, "unicode-range") == 0) { |
495 | | // special handling of unicode-range: ucs2 in |
496 | | // table also matches ucs4 in query |
497 | 0 | if (strcasecmp(v1, "ucs4") == 0 && |
498 | 0 | strcasecmp(v, "ucs2") == 0) { |
499 | 0 | best = posMatch; |
500 | 0 | best--; // add small penalty to favour ucs4 table |
501 | | // if it exists |
502 | 0 | } |
503 | 0 | } |
504 | 0 | } |
505 | 0 | l = l->tail; |
506 | 0 | if (!l || cmpKeys(l->head, l2->head) != 0) break; |
507 | 0 | } |
508 | 0 | } |
509 | 0 | quotient += best; |
510 | 0 | l1 = l1->tail; |
511 | 0 | l2 = l; |
512 | 0 | } |
513 | 0 | } |
514 | 0 | } |
515 | 0 | return quotient; |
516 | 0 | } |
517 | | |
518 | | /** |
519 | | * Return true if a character matches [0-9A-Za-z_-\.] |
520 | | */ |
521 | | static int |
522 | 0 | isValidChar(char c) { |
523 | 0 | return isAlphaNum(c) || c == '-' || c == '.' || c == '_'; |
524 | 0 | } |
525 | | |
526 | | /** |
527 | | * Return true if a character matches [\s\t] |
528 | | */ |
529 | | static int |
530 | 0 | isSpace(char c) { |
531 | 0 | return c == ' ' || c == '\t'; |
532 | 0 | } |
533 | | |
534 | | /** |
535 | | * Parse a table query into a list of features. Features defined first get a |
536 | | * higher importance. |
537 | | * |
538 | | * The returned list must be freed by the caller, using list_free. |
539 | | */ |
540 | | static List * |
541 | 0 | parseQuery(const char *query) { |
542 | 0 | List *features = NULL; |
543 | 0 | const char *key = NULL; |
544 | 0 | const char *val = NULL; |
545 | 0 | size_t keySize = 0; |
546 | 0 | size_t valSize = 0; |
547 | 0 | const char *c; |
548 | 0 | int pos = 0; |
549 | 0 | int unicodeRange = 0; |
550 | 0 | while (1) { |
551 | 0 | c = &query[pos++]; |
552 | 0 | if (isSpace(*c) || (*c == '\n') || (*c == '\0')) { |
553 | 0 | if (key) { |
554 | 0 | char *v = NULL; |
555 | 0 | if (val) { |
556 | 0 | v = malloc(valSize + 1); |
557 | 0 | v[valSize] = '\0'; |
558 | 0 | memcpy(v, val, valSize); |
559 | 0 | } |
560 | 0 | if (!v) goto compile_error; |
561 | 0 | char *k = malloc(keySize + 1); |
562 | 0 | k[keySize] = '\0'; |
563 | 0 | memcpy(k, key, keySize); |
564 | 0 | if (isLanguageTag(k, keySize)) { |
565 | 0 | List *tag = parseLanguageTag(v); |
566 | 0 | if (!tag) { |
567 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Not a valid language tag: %s", v); |
568 | 0 | free(k); |
569 | 0 | free(v); |
570 | 0 | list_free(features); |
571 | 0 | return NULL; |
572 | 0 | } |
573 | 0 | if (strcasecmp(k, "locale") == 0) { |
574 | | // locale is shorthand for language + region |
575 | 0 | FeatureWithImportance f1 = { feat_new(strdup("language"), tag, |
576 | 0 | (void *(*)(void *))list_dup, |
577 | 0 | (void (*)(void *))list_free), |
578 | 0 | 0 }; |
579 | 0 | FeatureWithImportance f2 = { feat_new(strdup("region"), |
580 | 0 | list_dup(tag), |
581 | 0 | (void *(*)(void *))list_dup, |
582 | 0 | (void (*)(void *))list_free), |
583 | 0 | 0 }; |
584 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'", |
585 | 0 | f1.feature.key, v); |
586 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'", |
587 | 0 | f2.feature.key, v); |
588 | 0 | features = list_conj(features, |
589 | 0 | memcpy(malloc(sizeof(f1)), &f1, sizeof(f1)), NULL, |
590 | 0 | (void *(*)(void *))feat_dup, (void (*)(void *))feat_free); |
591 | 0 | features = list_conj(features, |
592 | 0 | memcpy(malloc(sizeof(f2)), &f2, sizeof(f2)), NULL, |
593 | 0 | (void *(*)(void *))feat_dup, (void (*)(void *))feat_free); |
594 | 0 | } else { |
595 | 0 | FeatureWithImportance f = { feat_new(strdup(k), tag, |
596 | 0 | (void *(*)(void *))list_dup, |
597 | 0 | (void (*)(void *))list_free), |
598 | 0 | 0 }; |
599 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'", k, v); |
600 | 0 | features = list_conj(features, |
601 | 0 | memcpy(malloc(sizeof(f)), &f, sizeof(f)), NULL, |
602 | 0 | (void *(*)(void *))feat_dup, (void (*)(void *))feat_free); |
603 | 0 | } |
604 | 0 | } else { |
605 | 0 | FeatureWithImportance f = { feat_new(strdup(k), strdup(v), |
606 | 0 | (void *(*)(void *))strdup, |
607 | 0 | (void (*)(void *))free), |
608 | 0 | 0 }; |
609 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'", k, v); |
610 | 0 | features = list_conj(features, |
611 | 0 | memcpy(malloc(sizeof(f)), &f, sizeof(f)), NULL, |
612 | 0 | (void *(*)(void *))feat_dup, (void (*)(void *))feat_free); |
613 | 0 | if (strcasecmp(k, "unicode-range") == 0) unicodeRange = 1; |
614 | 0 | } |
615 | 0 | free(k); |
616 | 0 | free(v); |
617 | 0 | key = val = NULL; |
618 | 0 | keySize = valSize = 0; |
619 | 0 | } |
620 | 0 | if (*c == '\0') break; |
621 | 0 | } else if (*c == ':') { |
622 | 0 | if (!key || val) |
623 | 0 | goto compile_error; |
624 | 0 | else { |
625 | 0 | c = &query[pos++]; |
626 | 0 | if (isValidChar(*c)) { |
627 | 0 | val = c; |
628 | 0 | valSize = 1; |
629 | 0 | } else |
630 | 0 | goto compile_error; |
631 | 0 | } |
632 | 0 | } else if (isValidChar(*c)) { |
633 | 0 | if (val) |
634 | 0 | valSize++; |
635 | 0 | else if (key) |
636 | 0 | keySize++; |
637 | 0 | else { |
638 | 0 | key = c; |
639 | 0 | keySize = 1; |
640 | 0 | } |
641 | 0 | } else |
642 | 0 | goto compile_error; |
643 | 0 | } |
644 | | // add defaults |
645 | 0 | if (!unicodeRange) { |
646 | | // default value of unicode-range is determined by CHARSIZE |
647 | 0 | static char value[5] = ""; |
648 | 0 | if (!*value) sprintf(value, "ucs%ld", CHARSIZE); |
649 | 0 | FeatureWithImportance *f = memcpy(malloc(sizeof(FeatureWithImportance)), |
650 | 0 | (&(FeatureWithImportance){ |
651 | 0 | feat_new(strdup("unicode-range"), strdup(value), |
652 | 0 | (void *(*)(void *))strdup, (void (*)(void *))free), |
653 | 0 | -1 }), |
654 | 0 | sizeof(FeatureWithImportance)); |
655 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'", f->feature.key, |
656 | 0 | f->feature.val); |
657 | 0 | features = list_conj(features, f, NULL, (void *(*)(void *))feat_dup, |
658 | 0 | (void (*)(void *))feat_free); |
659 | 0 | } |
660 | | // attach importance to features |
661 | 0 | { |
662 | 0 | int k = 1; |
663 | 0 | List *l; |
664 | 0 | for (l = features; l; l = l->tail) { |
665 | 0 | FeatureWithImportance *f = l->head; |
666 | 0 | f->importance = k++; |
667 | 0 | } |
668 | 0 | } |
669 | | // sort features by key (alphabetical order) |
670 | 0 | return list_sort(features, (int (*)(void *, void *))cmpKeys); |
671 | 0 | compile_error: |
672 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Unexpected character '%c' at position %d", *c, pos); |
673 | 0 | list_free(features); |
674 | 0 | return NULL; |
675 | 0 | } |
676 | | |
677 | | /** |
678 | | * Convert a widechar string to a normal string. |
679 | | */ |
680 | | static char * |
681 | 0 | widestrToStr(const widechar *str, size_t n) { |
682 | 0 | char *result = malloc((1 + n) * sizeof(char)); |
683 | 0 | size_t k; |
684 | 0 | for (k = 0; k < n; k++) result[k] = (char)str[k]; |
685 | 0 | result[k] = '\0'; |
686 | 0 | return result; |
687 | 0 | } |
688 | | |
689 | | /** |
690 | | * Extract a list of features from a table. The features are of type |
691 | | * FeatureWithLineNumber. |
692 | | */ |
693 | | static List * |
694 | 0 | analyzeTable(const char *table, int activeOnly) { |
695 | 0 | static char fileName[MAXSTRING]; |
696 | 0 | List *features = NULL; |
697 | 0 | FileInfo info; |
698 | |
|
699 | 0 | { |
700 | 0 | char **resolved = _lou_resolveTable(table, NULL); |
701 | |
|
702 | 0 | if (resolved == NULL) { |
703 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot resolve table '%s'", table); |
704 | 0 | return NULL; |
705 | 0 | } |
706 | | |
707 | 0 | sprintf(fileName, "%s", *resolved); |
708 | 0 | int k = 0; |
709 | |
|
710 | 0 | for (k = 0; resolved[k]; k += 1) free(resolved[k]); |
711 | 0 | free(resolved); |
712 | |
|
713 | 0 | if (k > 1) { |
714 | 0 | _lou_logMessage( |
715 | 0 | LOU_LOG_ERROR, "Table '%s' resolves to more than one file", table); |
716 | 0 | return NULL; |
717 | 0 | } |
718 | 0 | } |
719 | | |
720 | 0 | info.fileName = fileName; |
721 | 0 | info.encoding = noEncoding; |
722 | 0 | info.status = 0; |
723 | 0 | info.lineNumber = 0; |
724 | 0 | if ((info.in = fopen(info.fileName, "rb"))) { |
725 | 0 | FeatureWithLineNumber *region = NULL; |
726 | 0 | FeatureWithLineNumber *language = NULL; |
727 | 0 | int unicodeRange = 0; |
728 | 0 | while (_lou_getALine(&info)) { |
729 | 0 | if (info.linelen == 0) |
730 | 0 | ; |
731 | 0 | else if (info.line[0] == '#') { |
732 | 0 | if (info.linelen >= 2 && |
733 | 0 | (info.line[1] == '+' || |
734 | 0 | (!activeOnly && info.line[1] == '-' && |
735 | 0 | !(info.linelen > 2 && info.line[2] == '-')))) { |
736 | 0 | int active = (info.line[1] == '+'); |
737 | 0 | widechar *key = NULL; |
738 | 0 | widechar *val = NULL; |
739 | 0 | size_t keySize = 0; |
740 | 0 | size_t valSize = 0; |
741 | 0 | info.linepos = 2; |
742 | 0 | if (info.linepos < info.linelen && |
743 | 0 | isValidChar((char)info.line[info.linepos])) { |
744 | 0 | key = &info.line[info.linepos]; |
745 | 0 | keySize = 1; |
746 | 0 | info.linepos++; |
747 | 0 | while (info.linepos < info.linelen && |
748 | 0 | isValidChar((char)info.line[info.linepos])) { |
749 | 0 | keySize++; |
750 | 0 | info.linepos++; |
751 | 0 | } |
752 | 0 | char *k = widestrToStr(key, keySize); |
753 | 0 | int isLangTag = isLanguageTag(k, keySize); |
754 | 0 | if (info.linepos < info.linelen && |
755 | 0 | info.line[info.linepos] == ':') { |
756 | 0 | info.linepos++; |
757 | 0 | while (info.linepos < info.linelen && |
758 | 0 | isSpace((char)info.line[info.linepos])) |
759 | 0 | info.linepos++; |
760 | 0 | if (info.linepos < info.linelen && |
761 | 0 | (!active || |
762 | 0 | isValidChar((char)info.line[info.linepos]) || |
763 | 0 | (isLangTag && |
764 | 0 | '*' == info.line[info.linepos]))) { |
765 | 0 | val = &info.line[info.linepos]; |
766 | 0 | valSize = 1; |
767 | 0 | info.linepos++; |
768 | 0 | while (info.linepos < info.linelen && |
769 | 0 | (!active || |
770 | 0 | isValidChar( |
771 | 0 | (char)info.line[info.linepos]))) { |
772 | 0 | valSize++; |
773 | 0 | info.linepos++; |
774 | 0 | } |
775 | 0 | } else { |
776 | 0 | free(k); |
777 | 0 | goto compile_error; |
778 | 0 | } |
779 | 0 | } |
780 | 0 | if (info.linepos == info.linelen) { |
781 | 0 | char *v = val ? widestrToStr(val, valSize) : NULL; |
782 | 0 | if (!v) { |
783 | 0 | free(k); |
784 | 0 | goto compile_error; |
785 | 0 | } |
786 | 0 | if (!active) { |
787 | | // normalize space |
788 | 0 | int i = 0; |
789 | 0 | int j = 0; |
790 | 0 | int space = 1; |
791 | 0 | while (v[i]) { |
792 | 0 | if (isSpace(v[i])) { |
793 | 0 | if (!space) { |
794 | 0 | v[j++] = ' '; |
795 | 0 | space = 1; |
796 | 0 | } |
797 | 0 | } else { |
798 | 0 | v[j++] = v[i]; |
799 | 0 | space = 0; |
800 | 0 | } |
801 | 0 | i++; |
802 | 0 | } |
803 | 0 | if (j > 0 && v[j - 1] == ' ') j--; |
804 | 0 | v[j] = '\0'; |
805 | 0 | } |
806 | 0 | if (isLangTag) { |
807 | 0 | List *tag = parseLanguageTag(v); |
808 | 0 | if (!tag) { |
809 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
810 | 0 | "Not a valid language tag: %s (line %d)", v, |
811 | 0 | info.lineNumber); |
812 | 0 | list_free(features); |
813 | 0 | return NULL; |
814 | 0 | } |
815 | 0 | if (strcasecmp(k, "locale") == 0) { |
816 | 0 | FeatureWithLineNumber *f1 = memcpy( |
817 | 0 | malloc(sizeof(FeatureWithLineNumber)), |
818 | 0 | (&(FeatureWithLineNumber){ |
819 | 0 | feat_new(strdup("language"), tag, |
820 | 0 | (void *(*)(void *))list_dup, |
821 | 0 | (void (*)(void *))list_free), |
822 | 0 | info.lineNumber }), |
823 | 0 | sizeof(FeatureWithLineNumber)); |
824 | 0 | FeatureWithLineNumber *f2 = memcpy( |
825 | 0 | malloc(sizeof(FeatureWithLineNumber)), |
826 | 0 | (&(FeatureWithLineNumber){ |
827 | 0 | feat_new(strdup("region"), |
828 | 0 | list_dup(tag), |
829 | 0 | (void *(*)(void *))list_dup, |
830 | 0 | (void (*)(void *))list_free), |
831 | 0 | info.lineNumber }), |
832 | 0 | sizeof(FeatureWithLineNumber)); |
833 | 0 | _lou_logMessage(LOU_LOG_DEBUG, |
834 | 0 | "Table has feature '%s:%s'", f1->feature.key, |
835 | 0 | v); |
836 | 0 | _lou_logMessage(LOU_LOG_DEBUG, |
837 | 0 | "Table has feature '%s:%s'", f2->feature.key, |
838 | 0 | v); |
839 | 0 | features = list_conj(features, f1, NULL, |
840 | 0 | (void *(*)(void *))feat_dup, |
841 | 0 | (void (*)(void *))feat_free); |
842 | 0 | features = list_conj(features, f2, NULL, |
843 | 0 | (void *(*)(void *))feat_dup, |
844 | 0 | (void (*)(void *))feat_free); |
845 | 0 | if (!language) language = f1; |
846 | 0 | if (!region) region = f2; |
847 | 0 | } else { |
848 | 0 | FeatureWithLineNumber *f = memcpy( |
849 | 0 | malloc(sizeof(FeatureWithLineNumber)), |
850 | 0 | (&(FeatureWithLineNumber){ |
851 | 0 | feat_new(strdup(k), tag, |
852 | 0 | (void *(*)(void *))list_dup, |
853 | 0 | (void (*)(void *))list_free), |
854 | 0 | info.lineNumber }), |
855 | 0 | sizeof(FeatureWithLineNumber)); |
856 | 0 | _lou_logMessage(LOU_LOG_DEBUG, |
857 | 0 | "Table has feature '%s:%s'", k, v); |
858 | 0 | features = list_conj(features, f, NULL, |
859 | 0 | (void *(*)(void *))feat_dup, |
860 | 0 | (void (*)(void *))feat_free); |
861 | 0 | if (strcasecmp(k, "language") == 0) { |
862 | 0 | if (!language) language = f; |
863 | 0 | } else if (strcasecmp(k, "region") == 0) { |
864 | 0 | if (!region) region = f; |
865 | 0 | } |
866 | 0 | } |
867 | 0 | } else { |
868 | 0 | FeatureWithLineNumber *f = |
869 | 0 | memcpy(malloc(sizeof(FeatureWithLineNumber)), |
870 | 0 | (&(FeatureWithLineNumber){ |
871 | 0 | feat_new(strdup(k), strdup(v), |
872 | 0 | (void *(*)(void *))strdup, |
873 | 0 | (void (*)(void *))free), |
874 | 0 | info.lineNumber }), |
875 | 0 | sizeof(FeatureWithLineNumber)); |
876 | 0 | _lou_logMessage( |
877 | 0 | LOU_LOG_DEBUG, "Table has feature '%s:%s'", k, v); |
878 | 0 | features = list_conj(features, f, NULL, |
879 | 0 | (void *(*)(void *))feat_dup, |
880 | 0 | (void (*)(void *))feat_free); |
881 | 0 | if (strcasecmp(k, "unicode-range") == 0) unicodeRange = 1; |
882 | 0 | } |
883 | 0 | free(k); |
884 | 0 | free(v); |
885 | 0 | } else { |
886 | 0 | free(k); |
887 | 0 | goto compile_error; |
888 | 0 | } |
889 | 0 | } else |
890 | 0 | goto compile_error; |
891 | 0 | } |
892 | 0 | } else |
893 | 0 | break; |
894 | 0 | } |
895 | 0 | fclose(info.in); |
896 | | // add defaults |
897 | 0 | if (!region && language) { |
898 | 0 | region = memcpy(malloc(sizeof(FeatureWithLineNumber)), |
899 | 0 | (&(FeatureWithLineNumber){ |
900 | 0 | feat_new(strdup("region"), list_dup(language->feature.val), |
901 | 0 | (void *(*)(void *))list_dup, |
902 | 0 | (void (*)(void *))list_free), |
903 | 0 | -1 }), |
904 | 0 | sizeof(FeatureWithLineNumber)); |
905 | 0 | char *v = serializeLanguageTag(region->feature.val); |
906 | 0 | _lou_logMessage( |
907 | 0 | LOU_LOG_DEBUG, "Table has feature '%s:%s'", region->feature.key, v); |
908 | 0 | free(v); |
909 | 0 | features = list_conj(features, region, NULL, (void *(*)(void *))feat_dup, |
910 | 0 | (void (*)(void *))feat_free); |
911 | 0 | } |
912 | 0 | if (features && !unicodeRange) { |
913 | | // by default we assume unicode-range: ucs2 |
914 | 0 | FeatureWithLineNumber *f = memcpy(malloc(sizeof(FeatureWithLineNumber)), |
915 | 0 | (&(FeatureWithLineNumber){ |
916 | 0 | feat_new(strdup("unicode-range"), strdup("ucs2"), |
917 | 0 | (void *(*)(void *))strdup, (void (*)(void *))free), |
918 | 0 | -1 }), |
919 | 0 | sizeof(FeatureWithLineNumber)); |
920 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "Table has feature '%s:%s'", f->feature.key, |
921 | 0 | f->feature.val); |
922 | 0 | features = list_conj(features, f, NULL, (void *(*)(void *))feat_dup, |
923 | 0 | (void (*)(void *))feat_free); |
924 | 0 | } |
925 | 0 | } else |
926 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot open table '%s'", info.fileName); |
927 | 0 | return list_sort(features, (int (*)(void *, void *))cmpFeatures); |
928 | 0 | compile_error: |
929 | 0 | if (info.linepos < info.linelen) |
930 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Unexpected character '%c' on line %d, column %d", |
931 | 0 | info.line[info.linepos], info.lineNumber, info.linepos); |
932 | 0 | else |
933 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Unexpected newline on line %d", info.lineNumber); |
934 | 0 | list_free(features); |
935 | 0 | return NULL; |
936 | 0 | } |
937 | | |
938 | | /** |
939 | | * List of discoverable tables and corresponding metadata. |
940 | | * |
941 | | * The list is freed by _lou_freeTableIndex, which is invoked by lou_free. It should not |
942 | | * be copied. |
943 | | */ |
944 | | static List *tableIndex = NULL; |
945 | | |
946 | | void EXPORT_CALL |
947 | 0 | lou_indexTables(const char **tables) { |
948 | 0 | const char **table; |
949 | 0 | list_free(tableIndex); |
950 | 0 | tableIndex = NULL; |
951 | 0 | for (table = tables; *table; table++) { |
952 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "Analyzing table %s", *table); |
953 | 0 | List *features = analyzeTable(*table, 1); |
954 | 0 | if (features) { |
955 | 0 | TableMeta m = { strdup(*table), features }; |
956 | 0 | tableIndex = list_conj(tableIndex, memcpy(malloc(sizeof(m)), &m, sizeof(m)), |
957 | 0 | NULL, NULL, (void (*)(void *))meta_free); |
958 | 0 | } |
959 | 0 | } |
960 | 0 | if (!tableIndex) _lou_logMessage(LOU_LOG_WARN, "No tables were indexed"); |
961 | 0 | } |
962 | | |
963 | | // called by lou_free |
964 | | void EXPORT_CALL |
965 | 437 | _lou_freeTableIndex(void) { |
966 | 437 | list_free(tableIndex); |
967 | 437 | tableIndex = NULL; |
968 | 437 | } |
969 | | |
970 | | /** |
971 | | * Returns the list of files found in a single directory. |
972 | | * |
973 | | * Must be freed by the caller, using list_free. |
974 | | */ |
975 | | #ifdef _MSC_VER |
976 | | static List * |
977 | | listDir(List *list, char *dirName) { |
978 | | static char glob[MAXSTRING]; |
979 | | static char fileName[MAXSTRING]; |
980 | | WIN32_FIND_DATAA ffd; |
981 | | HANDLE hFind; |
982 | | sprintf(glob, "%s%c%c", dirName, DIR_SEP, '*'); |
983 | | hFind = FindFirstFileA(glob, &ffd); |
984 | | if (hFind == INVALID_HANDLE_VALUE) { |
985 | | _lou_logMessage(LOU_LOG_WARN, "%s is not a directory", dirName); |
986 | | } else { |
987 | | do { |
988 | | if (!(ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { |
989 | | sprintf(fileName, "%s%c%s", dirName, DIR_SEP, ffd.cFileName); |
990 | | list = list_conj( |
991 | | list, strdup(fileName), NULL, (void *(*)(void *))strdup, free); |
992 | | } |
993 | | } while (FindNextFileA(hFind, &ffd)); |
994 | | FindClose(hFind); |
995 | | } |
996 | | return list; |
997 | | } |
998 | | #else /* !_MSC_VER */ |
999 | | static List * |
1000 | 0 | listDir(List *list, char *dirName) { |
1001 | 0 | static char fileName[MAXSTRING]; |
1002 | 0 | struct stat info; |
1003 | 0 | DIR *dir; |
1004 | 0 | struct dirent *file; |
1005 | 0 | if ((dir = opendir(dirName))) { |
1006 | 0 | while ((file = readdir(dir))) { |
1007 | 0 | sprintf(fileName, "%s%c%s", dirName, DIR_SEP, file->d_name); |
1008 | 0 | if (stat(fileName, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
1009 | 0 | list = list_conj( |
1010 | 0 | list, strdup(fileName), NULL, (void *(*)(void *))strdup, free); |
1011 | 0 | } |
1012 | 0 | } |
1013 | 0 | closedir(dir); |
1014 | 0 | } else { |
1015 | 0 | _lou_logMessage(LOU_LOG_WARN, "%s is not a directory", dirName); |
1016 | 0 | } |
1017 | 0 | return list; |
1018 | 0 | } |
1019 | | #endif /* !_MSC_VER */ |
1020 | | |
1021 | | /** |
1022 | | * Returns the list of files found on searchPath, where searchPath is a |
1023 | | * comma-separated list of directories. |
1024 | | */ |
1025 | | static List * |
1026 | 0 | listFiles(char *searchPath) { |
1027 | 0 | List *list = NULL; |
1028 | 0 | char *dirName; |
1029 | 0 | int pos = 0; |
1030 | 0 | int n; |
1031 | 0 | while (1) { |
1032 | 0 | for (n = 0; searchPath[pos + n] != '\0' && searchPath[pos + n] != ','; n++) |
1033 | 0 | ; |
1034 | 0 | dirName = malloc(n + 1); |
1035 | 0 | dirName[n] = '\0'; |
1036 | 0 | memcpy(dirName, &searchPath[pos], n); |
1037 | 0 | list = listDir(list, dirName); |
1038 | 0 | free(dirName); |
1039 | 0 | pos += n; |
1040 | 0 | if (searchPath[pos] == '\0') |
1041 | 0 | break; |
1042 | 0 | else |
1043 | 0 | pos++; |
1044 | 0 | } |
1045 | 0 | return list; |
1046 | 0 | } |
1047 | | |
1048 | | static void |
1049 | 0 | indexTablePath(void) { |
1050 | 0 | char *searchPath; |
1051 | 0 | List *tables; |
1052 | 0 | void *tablesArray; |
1053 | 0 | _lou_logMessage( |
1054 | 0 | LOU_LOG_WARN, "Tables have not been indexed yet. Indexing LOUIS_TABLEPATH."); |
1055 | 0 | searchPath = _lou_getTablePath(); |
1056 | 0 | tables = listFiles(searchPath); |
1057 | 0 | tablesArray = list_toArray(tables, 0); |
1058 | 0 | lou_indexTables(tablesArray); |
1059 | 0 | free(searchPath); |
1060 | 0 | list_free(tables); |
1061 | 0 | free(tablesArray); |
1062 | 0 | } |
1063 | | |
1064 | | char *EXPORT_CALL |
1065 | 0 | lou_findTable(const char *query) { |
1066 | 0 | if (!tableIndex) indexTablePath(); |
1067 | 0 | List *queryFeatures = parseQuery(query); |
1068 | 0 | int bestQuotient = 0; |
1069 | 0 | char *bestMatch = NULL; |
1070 | 0 | List *l; |
1071 | 0 | for (l = tableIndex; l; l = l->tail) { |
1072 | 0 | TableMeta *table = l->head; |
1073 | 0 | int q = matchFeatureLists(queryFeatures, table->features, 0); |
1074 | 0 | if (q > bestQuotient) { |
1075 | 0 | bestQuotient = q; |
1076 | 0 | if (bestMatch) free(bestMatch); |
1077 | 0 | bestMatch = strdup(table->name); |
1078 | 0 | } |
1079 | 0 | } |
1080 | 0 | list_free(queryFeatures); |
1081 | 0 | if (bestMatch) { |
1082 | 0 | _lou_logMessage(LOU_LOG_INFO, "Best match: %s (%d)", bestMatch, bestQuotient); |
1083 | 0 | return bestMatch; |
1084 | 0 | } else { |
1085 | 0 | _lou_logMessage(LOU_LOG_INFO, "No table could be found for query '%s'", query); |
1086 | 0 | return NULL; |
1087 | 0 | } |
1088 | 0 | } |
1089 | | |
1090 | | void EXPORT_CALL |
1091 | 0 | lou_freeTableFile(char *table) { |
1092 | 0 | free(table); |
1093 | 0 | } |
1094 | | |
1095 | | typedef struct { |
1096 | | char *name; |
1097 | | int matchQuotient; |
1098 | | } TableMatch; |
1099 | | |
1100 | | static int |
1101 | 0 | cmpMatches(TableMatch *m1, TableMatch *m2) { |
1102 | 0 | if (m1->matchQuotient > m2->matchQuotient) |
1103 | 0 | return -1; |
1104 | 0 | else |
1105 | 0 | return 1; |
1106 | 0 | } |
1107 | | |
1108 | | /** |
1109 | | * The returned array and strings must be freed by the caller. |
1110 | | */ |
1111 | | char **EXPORT_CALL |
1112 | 0 | lou_findTables(const char *query) { |
1113 | 0 | char **tablesArray; |
1114 | 0 | List *matches = NULL; |
1115 | 0 | if (!tableIndex) indexTablePath(); |
1116 | 0 | List *queryFeatures = parseQuery(query); |
1117 | 0 | List *l; |
1118 | 0 | for (l = tableIndex; l; l = l->tail) { |
1119 | 0 | TableMeta *table = l->head; |
1120 | 0 | int quotient = matchFeatureLists(queryFeatures, table->features, 0); |
1121 | 0 | if (quotient > 0) { |
1122 | 0 | TableMatch m = { strdup(table->name), quotient }; |
1123 | 0 | matches = list_conj(matches, memcpy(malloc(sizeof(m)), &m, sizeof(m)), |
1124 | 0 | (int (*)(void *, void *))cmpMatches, NULL, free); |
1125 | 0 | } |
1126 | 0 | } |
1127 | 0 | list_free(queryFeatures); |
1128 | 0 | if (matches) { |
1129 | 0 | _lou_logMessage(LOU_LOG_INFO, "%d matches found", list_size(matches)); |
1130 | 0 | int i = 0; |
1131 | 0 | tablesArray = malloc((1 + list_size(matches)) * sizeof(void *)); |
1132 | 0 | for (List *m = matches; m; m = m->tail) |
1133 | 0 | tablesArray[i++] = ((TableMatch *)m->head)->name; |
1134 | 0 | tablesArray[i] = NULL; |
1135 | 0 | list_free(matches); |
1136 | 0 | return tablesArray; |
1137 | 0 | } else { |
1138 | 0 | _lou_logMessage(LOU_LOG_INFO, "No table could be found for query '%s'", query); |
1139 | 0 | return NULL; |
1140 | 0 | } |
1141 | 0 | } |
1142 | | |
1143 | | char *EXPORT_CALL |
1144 | 0 | lou_getTableInfo(const char *table, const char *key) { |
1145 | 0 | char *value = NULL; |
1146 | 0 | List *features = analyzeTable(table, 0); |
1147 | 0 | List *l; |
1148 | 0 | int lineNumber = -1; // line number of first matching feature |
1149 | 0 | for (l = features; l; l = l->tail) { |
1150 | 0 | FeatureWithLineNumber *f = l->head; |
1151 | 0 | int cmp = strcasecmp(f->feature.key, key); |
1152 | 0 | if (cmp == 0) { |
1153 | 0 | if (lineNumber < 0 || lineNumber > f->lineNumber) { |
1154 | 0 | if (isLanguageTag(key, MAXSTRING)) |
1155 | 0 | value = serializeLanguageTag(f->feature.val); |
1156 | 0 | else |
1157 | 0 | value = strdup(f->feature.val); |
1158 | 0 | lineNumber = f->lineNumber; |
1159 | 0 | } |
1160 | 0 | } else if (cmp > 0) { |
1161 | 0 | break; |
1162 | 0 | } |
1163 | 0 | } |
1164 | 0 | list_free(features); |
1165 | 0 | return value; |
1166 | 0 | } |
1167 | | |
1168 | | void EXPORT_CALL |
1169 | 0 | lou_freeTableInfo(char *info) { |
1170 | 0 | free(info); |
1171 | 0 | } |
1172 | | |
1173 | | char **EXPORT_CALL |
1174 | 0 | lou_listTables(void) { |
1175 | 0 | void *tablesArray; |
1176 | 0 | List *tables = NULL; |
1177 | 0 | List *l; |
1178 | 0 | if (!tableIndex) indexTablePath(); |
1179 | 0 | for (l = tableIndex; l; l = l->tail) { |
1180 | 0 | TableMeta *table = l->head; |
1181 | 0 | tables = list_conj( |
1182 | 0 | tables, strdup(table->name), (int (*)(void *, void *))strcmp, NULL, NULL); |
1183 | 0 | } |
1184 | 0 | tablesArray = list_toArray(tables, 0); |
1185 | 0 | list_free(tables); |
1186 | 0 | return tablesArray; |
1187 | 0 | } |