Coverage Report

Created: 2026-02-26 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/liblouis/liblouis/metadata.c
Line
Count
Source
1
/* liblouis Braille Translation and Back-Translation Library
2
3
   Copyright (C) 2015 Bert Frees <bertfrees@gmail.com>
4
5
   This file is part of liblouis.
6
7
   liblouis is free software: you can redistribute it and/or modify it
8
   under the terms of the GNU Lesser General Public License as published
9
   by the Free Software Foundation, either version 2.1 of the License, or
10
   (at your option) any later version.
11
12
   liblouis is distributed in the hope that it will be useful, but
13
   WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
   Lesser General Public License for more details.
16
17
   You should have received a copy of the GNU Lesser General Public
18
   License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
19
*/
20
21
/**
22
 * @file
23
 * @brief Find translation tables
24
 */
25
26
#include "config.h"
27
28
#include <stdlib.h>
29
#include <string.h>
30
#include <strings.h>
31
#ifdef _MSC_VER
32
#include <windows.h>
33
#else
34
#include <dirent.h>
35
#endif
36
#include <sys/stat.h>
37
#include "internal.h"
38
39
/* =============================== LIST =================================== */
40
41
typedef struct List {
42
  void *head;
43
  void (*free)(void *);  // free head
44
  void *(*dup)(void *);  // dup head
45
  struct List *tail;
46
} List;
47
48
/**
49
 * Returns a list with the element `x' added to `list'. Returns a sorted list
50
 * if `cmp' is not NULL and if `list' is also sorted. New elements replace
51
 * existing ones if they are equal according to `cmp'. If `cmp' is NULL,
52
 * elements are simply prepended to the list. The function `dup' is used to
53
 * duplicate elements when the list is copied. The `free' function is used to
54
 * free elements when they are removed from the list. The returned list must
55
 * be freed by the caller, using list_free.
56
 */
57
static List *
58
list_conj(List *list, void *x, int (*cmp)(void *, void *), void *(*dup)(void *),
59
0
    void (*free)(void *)) {
60
0
  if (!list) {
61
0
    list = malloc(sizeof(List));
62
0
    list->head = x;
63
0
    list->free = free;
64
0
    list->dup = dup;
65
0
    list->tail = NULL;
66
0
    return list;
67
0
  } else if (!cmp) {
68
0
    List *l = malloc(sizeof(List));
69
0
    l->head = x;
70
0
    l->free = free;
71
0
    l->dup = dup;
72
0
    l->tail = list;
73
0
    return l;
74
0
  } else {
75
0
    List *l1 = list;
76
0
    List *l2 = NULL;
77
0
    while (l1) {
78
0
      int c = cmp(l1->head, x);
79
0
      if (c > 0)
80
0
        break;
81
0
      else if (c < 0) {
82
0
        l2 = l1;
83
0
        l1 = l2->tail;
84
0
      } else {
85
0
        if (x != l1->head && !dup && free) free(x);
86
0
        return list;
87
0
      }
88
0
    }
89
0
    List *l3 = malloc(sizeof(List));
90
0
    l3->head = x;
91
0
    l3->free = free;
92
0
    l3->dup = dup;
93
0
    l3->tail = l1;
94
0
    if (!l2)
95
0
      list = l3;
96
0
    else
97
0
      l2->tail = l3;
98
0
    return list;
99
0
  }
100
0
}
101
102
/**
103
 * Free an instance of type List.
104
 */
105
static void
106
437
list_free(List *list) {
107
437
  if (list) {
108
0
    if (list->free) list->free(list->head);
109
0
    list_free(list->tail);
110
0
    free(list);
111
0
  }
112
437
}
113
114
/**
115
 * Duplicate an instance of type List.
116
 */
117
static List *
118
0
list_dup(List *list) {
119
0
  if (!list) return list;
120
0
  List *d = malloc(sizeof(List));
121
0
  d->head = list->dup ? list->dup(list->head) : list->head;
122
0
  d->free = list->free;
123
0
  d->dup = list->dup;
124
0
  d->tail = list_dup(list->tail);
125
0
  return d;
126
0
}
127
128
/**
129
 * Sort a list based on a comparison function.
130
 *
131
 * This function returns a new list, however the input list should not be used after the
132
 * returned list is freed as the elements are not copied.
133
 */
134
static List *
135
0
list_sort(List *list, int (*cmp)(void *, void *)) {
136
0
  List *newList = NULL;
137
0
  List *l;
138
0
  for (l = list; l; l = l->tail) {
139
0
    newList = list_conj(newList, l->head, cmp, NULL, l->free);
140
0
    l->free = NULL;
141
0
  }
142
0
  list_free(list);
143
0
  return newList;
144
0
}
145
146
/**
147
 * Get the size of a list.
148
 */
149
static int
150
0
list_size(List *list) {
151
0
  int len = 0;
152
0
  List *l;
153
0
  for (l = list; l; l = l->tail) len++;
154
0
  return len;
155
0
}
156
157
/**
158
 * Convert a list into a NULL terminated array.
159
 */
160
static void **
161
0
list_toArray(List *list, int deepCopy) {
162
0
  void **array;
163
0
  List *l;
164
0
  int i;
165
0
  array = malloc((1 + list_size(list)) * sizeof(void *));
166
0
  i = 0;
167
0
  for (l = list; l; l = l->tail)
168
0
    array[i++] = deepCopy && l->dup ? l->dup(l->head) : l->head;
169
0
  array[i] = NULL;
170
0
  return array;
171
0
}
172
173
/* ============================== FEATURE ================================= */
174
175
typedef struct {
176
  char *key;
177
  void *val;
178
  void (*free)(void *);  // free val
179
  void *(*dup)(void *);  // dup val
180
} Feature;
181
182
typedef struct {
183
  Feature feature;
184
  int importance;
185
} FeatureWithImportance;
186
187
typedef struct {
188
  Feature feature;
189
  int lineNumber;  // no line number (-1) means it is a default value
190
} FeatureWithLineNumber;
191
192
typedef struct {
193
  char *name;
194
  List *features;
195
} TableMeta;
196
197
/**
198
 * Create an instance of type Feature.
199
 *
200
 * The returned instance must be freed by the caller, using feat_free. The `key' string is
201
 * freed in feat_free and copied in feat_dup. What happens with `val' is determined by the
202
 * `dup' and `free' arguments.
203
 */
204
static Feature
205
0
feat_new(char *key, void *val, void *(*dup)(void *), void (*free)(void *)) {
206
0
  Feature f;
207
0
  f.key = key;
208
0
  f.val = val;
209
0
  f.dup = dup;
210
0
  f.free = free;
211
0
  return f;
212
0
}
213
214
/**
215
 * Free an instance of type Feature.
216
 */
217
static void
218
0
feat_free(Feature *f) {
219
0
  if (f) {
220
0
    free(f->key);
221
0
    if (f->free) f->free(f->val);
222
0
    free(f);
223
0
  }
224
0
}
225
226
/**
227
 * Duplicate an instance of type Feature.
228
 */
229
static Feature *
230
0
feat_dup(Feature *f) {
231
0
  if (!f) return NULL;
232
0
  Feature *d = malloc(sizeof(Feature));
233
0
  d->key = strdup(f->key);
234
0
  d->val = f->dup ? f->dup(f->val) : f->val;
235
0
  d->free = f->free;
236
0
  d->dup = f->dup;
237
0
  return d;
238
0
}
239
240
/**
241
 * Free an instance of type TableMeta.
242
 *
243
 * Both `name' string and `features' list are freed.
244
 */
245
static void
246
0
meta_free(TableMeta *m) {
247
0
  if (m) {
248
0
    free(m->name);
249
0
    list_free(m->features);
250
0
    free(m);
251
0
  }
252
0
}
253
254
/* =========================== LANGUAGE TAGS ============================== */
255
256
/**
257
 * Return true if the tag we're parsing is a language tag (language, region or
258
 * locale).
259
 */
260
static int
261
0
isLanguageTag(const char *key, int len) {
262
0
  return strncasecmp("language", key, len) == 0 ||
263
0
      strncasecmp("region", key, len) == 0 || strncasecmp("locale", key, len) == 0;
264
0
}
265
266
static int
267
0
isAlpha(char c) {
268
0
  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
269
0
}
270
271
static int
272
0
isAlphaNum(char c) {
273
0
  return (c >= '0' && c <= '9') || isAlpha(c);
274
0
}
275
276
/**
277
 * Parse language tag into a list of subtags.
278
 *
279
 * The returned list must be freed by the caller, using list_free.
280
 */
281
static List *
282
0
parseLanguageTag(const char *val) {
283
0
  List *list = NULL;
284
0
  List **tail = &list;
285
0
  static char subtag[9];
286
0
  if (!*val) return NULL;
287
0
  if (val[0] == '*') {
288
0
    if (val[1] && val[1] != '-') return NULL;
289
0
    *subtag = '\0';
290
0
    strncat(subtag, val, 1);
291
0
    *tail = list_conj(NULL, strdup(subtag), NULL, (void *(*)(void *))strdup, free);
292
0
    tail = &(*tail)->tail;
293
0
    if (!val[1]) return list;
294
0
    val = &val[2];
295
0
  }
296
0
  while (1) {
297
0
    int len = 0;
298
0
    for (; len <= 8; len++)
299
0
      if (!val[len] || !isAlphaNum(val[len]) || (!list && !isAlpha(val[len])))
300
0
        break;
301
0
    if (len < 1 || len > 8 || (val[len] && val[len] != '-')) {
302
0
      list_free(list);
303
0
      return NULL;
304
0
    }
305
0
    *subtag = '\0';
306
0
    strncat(subtag, val, len);
307
0
    *tail = list_conj(NULL, strdup(subtag), NULL, (void *(*)(void *))strdup, free);
308
0
    tail = &(*tail)->tail;
309
0
    if (!val[len]) return list;
310
0
    val = &val[len + 1];
311
0
  }
312
0
  return NULL;
313
0
}
314
315
/**
316
 * Serialize language tag.
317
 *
318
 * The returned string must be freed by the caller.
319
 */
320
static char *
321
0
serializeLanguageTag(const List *tag) {
322
0
  int len = 0;
323
0
  const List *l;
324
0
  for (l = tag; l; l = l->tail) len = len + 1 + strlen(l->head);
325
0
  char *s = malloc(len * sizeof(char));
326
0
  s[0] = '\0';
327
0
  for (l = tag; l; l = l->tail) {
328
0
    if (l != tag) s = strcat(s, "-");
329
0
    s = strcat(s, l->head);
330
0
  }
331
0
  return s;
332
0
}
333
334
/* ======================================================================== */
335
336
/**
337
 * Sort features by their key (alphabetical order).
338
 */
339
static int
340
0
cmpKeys(Feature *f1, Feature *f2) {
341
0
  return strcasecmp(f1->key, f2->key);
342
0
}
343
344
/**
345
 * Sort features by their key and value (alphabetical order).
346
 */
347
static int
348
0
cmpFeatures(Feature *f1, Feature *f2) {
349
0
  int r = strcasecmp(f1->key, f2->key);
350
0
  if (r != 0) return r;
351
0
  if (isLanguageTag(f1->key, MAXSTRING)) {
352
0
    List *l1 = f1->val;
353
0
    List *l2 = f2->val;
354
0
    while (l1 && l2) {
355
0
      r = strcasecmp(l1->head, l2->head);
356
0
      if (r != 0) return r;
357
0
      l1 = l1->tail;
358
0
      l2 = l2->tail;
359
0
    }
360
0
    return l1 ? 1 : l2 ? -1 : 0;
361
0
  } else
362
0
    return strcasecmp(f1->val, f2->val);
363
0
}
364
365
/**
366
 * Return a positive number if the given language tag matches the language range,
367
 * 0 otherwise.
368
 *
369
 * In case of a perfect match, return 10. Otherwise, for each extra subtag that
370
 * has no exact match in the range, subtract two.
371
 *
372
 * See also <https://datatracker.ietf.org/doc/html/rfc4647#section-3.3.2>
373
 */
374
static int
375
0
matchLanguageTags(const List *tag, const List *range) {
376
0
  static const int POS_MATCH = 10;
377
0
  static const int EXTRA = -2;
378
0
  int q = POS_MATCH;
379
0
  if (*((char *)range->head) == '*')
380
0
    q += EXTRA;
381
0
  else if (strcasecmp(tag->head, range->head) != 0)
382
0
    return 0;
383
0
  range = range->tail;
384
0
  tag = tag->tail;
385
0
  while (range) {
386
0
    if (!tag) return 0;
387
0
    if (strcasecmp(tag->head, range->head) == 0) {
388
0
      range = range->tail;
389
0
      tag = tag->tail;
390
0
      continue;
391
0
    } else if (strlen(tag->head) == 1)
392
0
      return 0;
393
0
    else
394
0
      q += EXTRA;
395
0
    tag = tag->tail;
396
0
  }
397
0
  while (tag) {
398
0
    q += EXTRA;
399
0
    tag = tag->tail;
400
0
  }
401
0
  return q;
402
0
}
403
404
/**
405
 * Compute the match quotient of the features in a query against the features in a table's
406
 * metadata.
407
 *
408
 * The features are assumed to be sorted. The query's features must be
409
 * of type FeatureWithImportance and are assumed to have no duplicate
410
 * keys. How a feature contributes to the match quotient depends on
411
 * its importance, on whether the feature is undefined, defined with
412
 * the same value (positive match), or defined with a different value
413
 * (negative match), and on the `fuzzy' argument. If the `fuzzy'
414
 * argument evaluates to true, negative matches and undefined features
415
 * get a lower penalty.
416
 */
417
static int
418
0
matchFeatureLists(const List *query, const List *tableFeatures, int fuzzy) {
419
0
  static const int POS_MATCH = 10;
420
0
  static const int NEG_MATCH = -100;
421
0
  static const int UNDEFINED = -20;
422
0
  static const int EXTRA = -1;
423
0
  static const int POS_MATCH_FUZZY = 10;
424
0
  static const int NEG_MATCH_FUZZY = -25;
425
0
  static const int UNDEFINED_FUZZY = -5;
426
0
  static const int EXTRA_FUZZY = -1;
427
0
  int posMatch, negMatch, undefined, extra;
428
0
  if (!fuzzy) {
429
0
    posMatch = POS_MATCH;
430
0
    negMatch = NEG_MATCH;
431
0
    undefined = UNDEFINED;
432
0
    extra = EXTRA;
433
0
  } else {
434
0
    posMatch = POS_MATCH_FUZZY;
435
0
    negMatch = NEG_MATCH_FUZZY;
436
0
    undefined = UNDEFINED_FUZZY;
437
0
    extra = EXTRA_FUZZY;
438
0
  }
439
0
  int quotient = 0;
440
0
  const List *l1 = query;
441
0
  const List *l2 = tableFeatures;
442
0
  while (1) {
443
0
    if (!l1) {
444
0
      if (!l2) break;
445
0
      quotient += extra;
446
0
      const List *l = l2;
447
0
      l = l->tail;
448
0
      while (l && cmpKeys(l->head, l2->head) == 0) l = l->tail;
449
0
      l2 = l;
450
0
    } else if (!l2) {
451
0
      quotient += undefined;
452
0
      l1 = l1->tail;
453
0
    } else {
454
0
      int cmp = cmpKeys(l1->head, l2->head);
455
0
      if (cmp < 0) {
456
0
        quotient += undefined;
457
0
        l1 = l1->tail;
458
0
      } else if (cmp > 0) {
459
0
        quotient += extra;
460
0
        const List *l = l2;
461
0
        l = l->tail;
462
0
        while (l && cmpKeys(l->head, l2->head) == 0) l = l->tail;
463
0
        l2 = l;
464
0
      } else {
465
0
        const List *l = l2;
466
0
        char *k = ((Feature *)l->head)->key;
467
0
        int best = negMatch;
468
0
        if (isLanguageTag(k, MAXSTRING)) {
469
0
          int extraLanguages = 0;
470
0
          while (1) {
471
            // special handling of language tags: tags in the
472
            // table are intepreted as language ranges
473
0
            List *v = ((Feature *)l->head)->val;
474
0
            List *v1 = ((Feature *)l1->head)->val;
475
0
            int q = matchLanguageTags(v1, v);
476
0
            if (q > 0 && q > best)
477
0
              best = q;
478
0
            else if (!q)
479
0
              extraLanguages += extra;
480
0
            l = l->tail;
481
0
            if (!l || cmpKeys(l->head, l2->head) != 0) break;
482
0
          }
483
0
          if (best > 0)
484
0
            best += ((extraLanguages + 4) /
485
0
                5);  // penalty for extra languages is lower than penalty
486
                   // for fields that are not in query at all
487
0
        } else {
488
0
          while (1) {
489
0
            if (best < 0) {
490
0
              char *v = ((Feature *)l->head)->val;
491
0
              char *v1 = ((Feature *)l1->head)->val;
492
0
              if (strcasecmp(v1, v) == 0)
493
0
                best = posMatch;
494
0
              else if (strcasecmp(k, "unicode-range") == 0) {
495
                // special handling of unicode-range: ucs2 in
496
                // table also matches ucs4 in query
497
0
                if (strcasecmp(v1, "ucs4") == 0 &&
498
0
                    strcasecmp(v, "ucs2") == 0) {
499
0
                  best = posMatch;
500
0
                  best--;  // add small penalty to favour ucs4 table
501
                       // if it exists
502
0
                }
503
0
              }
504
0
            }
505
0
            l = l->tail;
506
0
            if (!l || cmpKeys(l->head, l2->head) != 0) break;
507
0
          }
508
0
        }
509
0
        quotient += best;
510
0
        l1 = l1->tail;
511
0
        l2 = l;
512
0
      }
513
0
    }
514
0
  }
515
0
  return quotient;
516
0
}
517
518
/**
519
 * Return true if a character matches [0-9A-Za-z_-\.]
520
 */
521
static int
522
0
isValidChar(char c) {
523
0
  return isAlphaNum(c) || c == '-' || c == '.' || c == '_';
524
0
}
525
526
/**
527
 * Return true if a character matches [\s\t]
528
 */
529
static int
530
0
isSpace(char c) {
531
0
  return c == ' ' || c == '\t';
532
0
}
533
534
/**
535
 * Parse a table query into a list of features. Features defined first get a
536
 * higher importance.
537
 *
538
 * The returned list must be freed by the caller, using list_free.
539
 */
540
static List *
541
0
parseQuery(const char *query) {
542
0
  List *features = NULL;
543
0
  const char *key = NULL;
544
0
  const char *val = NULL;
545
0
  size_t keySize = 0;
546
0
  size_t valSize = 0;
547
0
  const char *c;
548
0
  int pos = 0;
549
0
  int unicodeRange = 0;
550
0
  while (1) {
551
0
    c = &query[pos++];
552
0
    if (isSpace(*c) || (*c == '\n') || (*c == '\0')) {
553
0
      if (key) {
554
0
        char *v = NULL;
555
0
        if (val) {
556
0
          v = malloc(valSize + 1);
557
0
          v[valSize] = '\0';
558
0
          memcpy(v, val, valSize);
559
0
        }
560
0
        if (!v) goto compile_error;
561
0
        char *k = malloc(keySize + 1);
562
0
        k[keySize] = '\0';
563
0
        memcpy(k, key, keySize);
564
0
        if (isLanguageTag(k, keySize)) {
565
0
          List *tag = parseLanguageTag(v);
566
0
          if (!tag) {
567
0
            _lou_logMessage(LOU_LOG_ERROR, "Not a valid language tag: %s", v);
568
0
            free(k);
569
0
            free(v);
570
0
            list_free(features);
571
0
            return NULL;
572
0
          }
573
0
          if (strcasecmp(k, "locale") == 0) {
574
            // locale is shorthand for language + region
575
0
            FeatureWithImportance f1 = { feat_new(strdup("language"), tag,
576
0
                               (void *(*)(void *))list_dup,
577
0
                               (void (*)(void *))list_free),
578
0
              0 };
579
0
            FeatureWithImportance f2 = { feat_new(strdup("region"),
580
0
                               list_dup(tag),
581
0
                               (void *(*)(void *))list_dup,
582
0
                               (void (*)(void *))list_free),
583
0
              0 };
584
0
            _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'",
585
0
                f1.feature.key, v);
586
0
            _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'",
587
0
                f2.feature.key, v);
588
0
            features = list_conj(features,
589
0
                memcpy(malloc(sizeof(f1)), &f1, sizeof(f1)), NULL,
590
0
                (void *(*)(void *))feat_dup, (void (*)(void *))feat_free);
591
0
            features = list_conj(features,
592
0
                memcpy(malloc(sizeof(f2)), &f2, sizeof(f2)), NULL,
593
0
                (void *(*)(void *))feat_dup, (void (*)(void *))feat_free);
594
0
          } else {
595
0
            FeatureWithImportance f = { feat_new(strdup(k), tag,
596
0
                              (void *(*)(void *))list_dup,
597
0
                              (void (*)(void *))list_free),
598
0
              0 };
599
0
            _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'", k, v);
600
0
            features = list_conj(features,
601
0
                memcpy(malloc(sizeof(f)), &f, sizeof(f)), NULL,
602
0
                (void *(*)(void *))feat_dup, (void (*)(void *))feat_free);
603
0
          }
604
0
        } else {
605
0
          FeatureWithImportance f = { feat_new(strdup(k), strdup(v),
606
0
                            (void *(*)(void *))strdup,
607
0
                            (void (*)(void *))free),
608
0
            0 };
609
0
          _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'", k, v);
610
0
          features = list_conj(features,
611
0
              memcpy(malloc(sizeof(f)), &f, sizeof(f)), NULL,
612
0
              (void *(*)(void *))feat_dup, (void (*)(void *))feat_free);
613
0
          if (strcasecmp(k, "unicode-range") == 0) unicodeRange = 1;
614
0
        }
615
0
        free(k);
616
0
        free(v);
617
0
        key = val = NULL;
618
0
        keySize = valSize = 0;
619
0
      }
620
0
      if (*c == '\0') break;
621
0
    } else if (*c == ':') {
622
0
      if (!key || val)
623
0
        goto compile_error;
624
0
      else {
625
0
        c = &query[pos++];
626
0
        if (isValidChar(*c)) {
627
0
          val = c;
628
0
          valSize = 1;
629
0
        } else
630
0
          goto compile_error;
631
0
      }
632
0
    } else if (isValidChar(*c)) {
633
0
      if (val)
634
0
        valSize++;
635
0
      else if (key)
636
0
        keySize++;
637
0
      else {
638
0
        key = c;
639
0
        keySize = 1;
640
0
      }
641
0
    } else
642
0
      goto compile_error;
643
0
  }
644
  // add defaults
645
0
  if (!unicodeRange) {
646
    // default value of unicode-range is determined by CHARSIZE
647
0
    static char value[5] = "";
648
0
    if (!*value) sprintf(value, "ucs%ld", CHARSIZE);
649
0
    FeatureWithImportance *f = memcpy(malloc(sizeof(FeatureWithImportance)),
650
0
        (&(FeatureWithImportance){
651
0
            feat_new(strdup("unicode-range"), strdup(value),
652
0
                (void *(*)(void *))strdup, (void (*)(void *))free),
653
0
            -1 }),
654
0
        sizeof(FeatureWithImportance));
655
0
    _lou_logMessage(LOU_LOG_DEBUG, "Query has feature '%s:%s'", f->feature.key,
656
0
        f->feature.val);
657
0
    features = list_conj(features, f, NULL, (void *(*)(void *))feat_dup,
658
0
        (void (*)(void *))feat_free);
659
0
  }
660
  // attach importance to features
661
0
  {
662
0
    int k = 1;
663
0
    List *l;
664
0
    for (l = features; l; l = l->tail) {
665
0
      FeatureWithImportance *f = l->head;
666
0
      f->importance = k++;
667
0
    }
668
0
  }
669
  // sort features by key (alphabetical order)
670
0
  return list_sort(features, (int (*)(void *, void *))cmpKeys);
671
0
compile_error:
672
0
  _lou_logMessage(LOU_LOG_ERROR, "Unexpected character '%c' at position %d", *c, pos);
673
0
  list_free(features);
674
0
  return NULL;
675
0
}
676
677
/**
678
 * Convert a widechar string to a normal string.
679
 */
680
static char *
681
0
widestrToStr(const widechar *str, size_t n) {
682
0
  char *result = malloc((1 + n) * sizeof(char));
683
0
  size_t k;
684
0
  for (k = 0; k < n; k++) result[k] = (char)str[k];
685
0
  result[k] = '\0';
686
0
  return result;
687
0
}
688
689
/**
690
 * Extract a list of features from a table. The features are of type
691
 * FeatureWithLineNumber.
692
 */
693
static List *
694
0
analyzeTable(const char *table, int activeOnly) {
695
0
  static char fileName[MAXSTRING];
696
0
  List *features = NULL;
697
0
  FileInfo info;
698
699
0
  {
700
0
    char **resolved = _lou_resolveTable(table, NULL);
701
702
0
    if (resolved == NULL) {
703
0
      _lou_logMessage(LOU_LOG_ERROR, "Cannot resolve table '%s'", table);
704
0
      return NULL;
705
0
    }
706
707
0
    sprintf(fileName, "%s", *resolved);
708
0
    int k = 0;
709
710
0
    for (k = 0; resolved[k]; k += 1) free(resolved[k]);
711
0
    free(resolved);
712
713
0
    if (k > 1) {
714
0
      _lou_logMessage(
715
0
          LOU_LOG_ERROR, "Table '%s' resolves to more than one file", table);
716
0
      return NULL;
717
0
    }
718
0
  }
719
720
0
  info.fileName = fileName;
721
0
  info.encoding = noEncoding;
722
0
  info.status = 0;
723
0
  info.lineNumber = 0;
724
0
  if ((info.in = fopen(info.fileName, "rb"))) {
725
0
    FeatureWithLineNumber *region = NULL;
726
0
    FeatureWithLineNumber *language = NULL;
727
0
    int unicodeRange = 0;
728
0
    while (_lou_getALine(&info)) {
729
0
      if (info.linelen == 0)
730
0
        ;
731
0
      else if (info.line[0] == '#') {
732
0
        if (info.linelen >= 2 &&
733
0
            (info.line[1] == '+' ||
734
0
                (!activeOnly && info.line[1] == '-' &&
735
0
                    !(info.linelen > 2 && info.line[2] == '-')))) {
736
0
          int active = (info.line[1] == '+');
737
0
          widechar *key = NULL;
738
0
          widechar *val = NULL;
739
0
          size_t keySize = 0;
740
0
          size_t valSize = 0;
741
0
          info.linepos = 2;
742
0
          if (info.linepos < info.linelen &&
743
0
              isValidChar((char)info.line[info.linepos])) {
744
0
            key = &info.line[info.linepos];
745
0
            keySize = 1;
746
0
            info.linepos++;
747
0
            while (info.linepos < info.linelen &&
748
0
                isValidChar((char)info.line[info.linepos])) {
749
0
              keySize++;
750
0
              info.linepos++;
751
0
            }
752
0
            char *k = widestrToStr(key, keySize);
753
0
            int isLangTag = isLanguageTag(k, keySize);
754
0
            if (info.linepos < info.linelen &&
755
0
                info.line[info.linepos] == ':') {
756
0
              info.linepos++;
757
0
              while (info.linepos < info.linelen &&
758
0
                  isSpace((char)info.line[info.linepos]))
759
0
                info.linepos++;
760
0
              if (info.linepos < info.linelen &&
761
0
                  (!active ||
762
0
                      isValidChar((char)info.line[info.linepos]) ||
763
0
                      (isLangTag &&
764
0
                          '*' == info.line[info.linepos]))) {
765
0
                val = &info.line[info.linepos];
766
0
                valSize = 1;
767
0
                info.linepos++;
768
0
                while (info.linepos < info.linelen &&
769
0
                    (!active ||
770
0
                        isValidChar(
771
0
                            (char)info.line[info.linepos]))) {
772
0
                  valSize++;
773
0
                  info.linepos++;
774
0
                }
775
0
              } else {
776
0
                free(k);
777
0
                goto compile_error;
778
0
              }
779
0
            }
780
0
            if (info.linepos == info.linelen) {
781
0
              char *v = val ? widestrToStr(val, valSize) : NULL;
782
0
              if (!v) {
783
0
                free(k);
784
0
                goto compile_error;
785
0
              }
786
0
              if (!active) {
787
                // normalize space
788
0
                int i = 0;
789
0
                int j = 0;
790
0
                int space = 1;
791
0
                while (v[i]) {
792
0
                  if (isSpace(v[i])) {
793
0
                    if (!space) {
794
0
                      v[j++] = ' ';
795
0
                      space = 1;
796
0
                    }
797
0
                  } else {
798
0
                    v[j++] = v[i];
799
0
                    space = 0;
800
0
                  }
801
0
                  i++;
802
0
                }
803
0
                if (j > 0 && v[j - 1] == ' ') j--;
804
0
                v[j] = '\0';
805
0
              }
806
0
              if (isLangTag) {
807
0
                List *tag = parseLanguageTag(v);
808
0
                if (!tag) {
809
0
                  _lou_logMessage(LOU_LOG_ERROR,
810
0
                      "Not a valid language tag: %s (line %d)", v,
811
0
                      info.lineNumber);
812
0
                  list_free(features);
813
0
                  return NULL;
814
0
                }
815
0
                if (strcasecmp(k, "locale") == 0) {
816
0
                  FeatureWithLineNumber *f1 = memcpy(
817
0
                      malloc(sizeof(FeatureWithLineNumber)),
818
0
                      (&(FeatureWithLineNumber){
819
0
                          feat_new(strdup("language"), tag,
820
0
                              (void *(*)(void *))list_dup,
821
0
                              (void (*)(void *))list_free),
822
0
                          info.lineNumber }),
823
0
                      sizeof(FeatureWithLineNumber));
824
0
                  FeatureWithLineNumber *f2 = memcpy(
825
0
                      malloc(sizeof(FeatureWithLineNumber)),
826
0
                      (&(FeatureWithLineNumber){
827
0
                          feat_new(strdup("region"),
828
0
                              list_dup(tag),
829
0
                              (void *(*)(void *))list_dup,
830
0
                              (void (*)(void *))list_free),
831
0
                          info.lineNumber }),
832
0
                      sizeof(FeatureWithLineNumber));
833
0
                  _lou_logMessage(LOU_LOG_DEBUG,
834
0
                      "Table has feature '%s:%s'", f1->feature.key,
835
0
                      v);
836
0
                  _lou_logMessage(LOU_LOG_DEBUG,
837
0
                      "Table has feature '%s:%s'", f2->feature.key,
838
0
                      v);
839
0
                  features = list_conj(features, f1, NULL,
840
0
                      (void *(*)(void *))feat_dup,
841
0
                      (void (*)(void *))feat_free);
842
0
                  features = list_conj(features, f2, NULL,
843
0
                      (void *(*)(void *))feat_dup,
844
0
                      (void (*)(void *))feat_free);
845
0
                  if (!language) language = f1;
846
0
                  if (!region) region = f2;
847
0
                } else {
848
0
                  FeatureWithLineNumber *f = memcpy(
849
0
                      malloc(sizeof(FeatureWithLineNumber)),
850
0
                      (&(FeatureWithLineNumber){
851
0
                          feat_new(strdup(k), tag,
852
0
                              (void *(*)(void *))list_dup,
853
0
                              (void (*)(void *))list_free),
854
0
                          info.lineNumber }),
855
0
                      sizeof(FeatureWithLineNumber));
856
0
                  _lou_logMessage(LOU_LOG_DEBUG,
857
0
                      "Table has feature '%s:%s'", k, v);
858
0
                  features = list_conj(features, f, NULL,
859
0
                      (void *(*)(void *))feat_dup,
860
0
                      (void (*)(void *))feat_free);
861
0
                  if (strcasecmp(k, "language") == 0) {
862
0
                    if (!language) language = f;
863
0
                  } else if (strcasecmp(k, "region") == 0) {
864
0
                    if (!region) region = f;
865
0
                  }
866
0
                }
867
0
              } else {
868
0
                FeatureWithLineNumber *f =
869
0
                    memcpy(malloc(sizeof(FeatureWithLineNumber)),
870
0
                        (&(FeatureWithLineNumber){
871
0
                            feat_new(strdup(k), strdup(v),
872
0
                                (void *(*)(void *))strdup,
873
0
                                (void (*)(void *))free),
874
0
                            info.lineNumber }),
875
0
                        sizeof(FeatureWithLineNumber));
876
0
                _lou_logMessage(
877
0
                    LOU_LOG_DEBUG, "Table has feature '%s:%s'", k, v);
878
0
                features = list_conj(features, f, NULL,
879
0
                    (void *(*)(void *))feat_dup,
880
0
                    (void (*)(void *))feat_free);
881
0
                if (strcasecmp(k, "unicode-range") == 0) unicodeRange = 1;
882
0
              }
883
0
              free(k);
884
0
              free(v);
885
0
            } else {
886
0
              free(k);
887
0
              goto compile_error;
888
0
            }
889
0
          } else
890
0
            goto compile_error;
891
0
        }
892
0
      } else
893
0
        break;
894
0
    }
895
0
    fclose(info.in);
896
    // add defaults
897
0
    if (!region && language) {
898
0
      region = memcpy(malloc(sizeof(FeatureWithLineNumber)),
899
0
          (&(FeatureWithLineNumber){
900
0
              feat_new(strdup("region"), list_dup(language->feature.val),
901
0
                  (void *(*)(void *))list_dup,
902
0
                  (void (*)(void *))list_free),
903
0
              -1 }),
904
0
          sizeof(FeatureWithLineNumber));
905
0
      char *v = serializeLanguageTag(region->feature.val);
906
0
      _lou_logMessage(
907
0
          LOU_LOG_DEBUG, "Table has feature '%s:%s'", region->feature.key, v);
908
0
      free(v);
909
0
      features = list_conj(features, region, NULL, (void *(*)(void *))feat_dup,
910
0
          (void (*)(void *))feat_free);
911
0
    }
912
0
    if (features && !unicodeRange) {
913
      // by default we assume unicode-range: ucs2
914
0
      FeatureWithLineNumber *f = memcpy(malloc(sizeof(FeatureWithLineNumber)),
915
0
          (&(FeatureWithLineNumber){
916
0
              feat_new(strdup("unicode-range"), strdup("ucs2"),
917
0
                  (void *(*)(void *))strdup, (void (*)(void *))free),
918
0
              -1 }),
919
0
          sizeof(FeatureWithLineNumber));
920
0
      _lou_logMessage(LOU_LOG_DEBUG, "Table has feature '%s:%s'", f->feature.key,
921
0
          f->feature.val);
922
0
      features = list_conj(features, f, NULL, (void *(*)(void *))feat_dup,
923
0
          (void (*)(void *))feat_free);
924
0
    }
925
0
  } else
926
0
    _lou_logMessage(LOU_LOG_ERROR, "Cannot open table '%s'", info.fileName);
927
0
  return list_sort(features, (int (*)(void *, void *))cmpFeatures);
928
0
compile_error:
929
0
  if (info.linepos < info.linelen)
930
0
    _lou_logMessage(LOU_LOG_ERROR, "Unexpected character '%c' on line %d, column %d",
931
0
        info.line[info.linepos], info.lineNumber, info.linepos);
932
0
  else
933
0
    _lou_logMessage(LOU_LOG_ERROR, "Unexpected newline on line %d", info.lineNumber);
934
0
  list_free(features);
935
0
  return NULL;
936
0
}
937
938
/**
939
 * List of discoverable tables and corresponding metadata.
940
 *
941
 * The list is freed by _lou_freeTableIndex, which is invoked by lou_free. It should not
942
 * be copied.
943
 */
944
static List *tableIndex = NULL;
945
946
void EXPORT_CALL
947
0
lou_indexTables(const char **tables) {
948
0
  const char **table;
949
0
  list_free(tableIndex);
950
0
  tableIndex = NULL;
951
0
  for (table = tables; *table; table++) {
952
0
    _lou_logMessage(LOU_LOG_DEBUG, "Analyzing table %s", *table);
953
0
    List *features = analyzeTable(*table, 1);
954
0
    if (features) {
955
0
      TableMeta m = { strdup(*table), features };
956
0
      tableIndex = list_conj(tableIndex, memcpy(malloc(sizeof(m)), &m, sizeof(m)),
957
0
          NULL, NULL, (void (*)(void *))meta_free);
958
0
    }
959
0
  }
960
0
  if (!tableIndex) _lou_logMessage(LOU_LOG_WARN, "No tables were indexed");
961
0
}
962
963
// called by lou_free
964
void EXPORT_CALL
965
437
_lou_freeTableIndex(void) {
966
437
  list_free(tableIndex);
967
437
  tableIndex = NULL;
968
437
}
969
970
/**
971
 * Returns the list of files found in a single directory.
972
 *
973
 * Must be freed by the caller, using list_free.
974
 */
975
#ifdef _MSC_VER
976
static List *
977
listDir(List *list, char *dirName) {
978
  static char glob[MAXSTRING];
979
  static char fileName[MAXSTRING];
980
  WIN32_FIND_DATAA ffd;
981
  HANDLE hFind;
982
  sprintf(glob, "%s%c%c", dirName, DIR_SEP, '*');
983
  hFind = FindFirstFileA(glob, &ffd);
984
  if (hFind == INVALID_HANDLE_VALUE) {
985
    _lou_logMessage(LOU_LOG_WARN, "%s is not a directory", dirName);
986
  } else {
987
    do {
988
      if (!(ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
989
        sprintf(fileName, "%s%c%s", dirName, DIR_SEP, ffd.cFileName);
990
        list = list_conj(
991
            list, strdup(fileName), NULL, (void *(*)(void *))strdup, free);
992
      }
993
    } while (FindNextFileA(hFind, &ffd));
994
    FindClose(hFind);
995
  }
996
  return list;
997
}
998
#else  /* !_MSC_VER */
999
static List *
1000
0
listDir(List *list, char *dirName) {
1001
0
  static char fileName[MAXSTRING];
1002
0
  struct stat info;
1003
0
  DIR *dir;
1004
0
  struct dirent *file;
1005
0
  if ((dir = opendir(dirName))) {
1006
0
    while ((file = readdir(dir))) {
1007
0
      sprintf(fileName, "%s%c%s", dirName, DIR_SEP, file->d_name);
1008
0
      if (stat(fileName, &info) == 0 && !(info.st_mode & S_IFDIR)) {
1009
0
        list = list_conj(
1010
0
            list, strdup(fileName), NULL, (void *(*)(void *))strdup, free);
1011
0
      }
1012
0
    }
1013
0
    closedir(dir);
1014
0
  } else {
1015
0
    _lou_logMessage(LOU_LOG_WARN, "%s is not a directory", dirName);
1016
0
  }
1017
0
  return list;
1018
0
}
1019
#endif /* !_MSC_VER */
1020
1021
/**
1022
 * Returns the list of files found on searchPath, where searchPath is a
1023
 * comma-separated list of directories.
1024
 */
1025
static List *
1026
0
listFiles(char *searchPath) {
1027
0
  List *list = NULL;
1028
0
  char *dirName;
1029
0
  int pos = 0;
1030
0
  int n;
1031
0
  while (1) {
1032
0
    for (n = 0; searchPath[pos + n] != '\0' && searchPath[pos + n] != ','; n++)
1033
0
      ;
1034
0
    dirName = malloc(n + 1);
1035
0
    dirName[n] = '\0';
1036
0
    memcpy(dirName, &searchPath[pos], n);
1037
0
    list = listDir(list, dirName);
1038
0
    free(dirName);
1039
0
    pos += n;
1040
0
    if (searchPath[pos] == '\0')
1041
0
      break;
1042
0
    else
1043
0
      pos++;
1044
0
  }
1045
0
  return list;
1046
0
}
1047
1048
static void
1049
0
indexTablePath(void) {
1050
0
  char *searchPath;
1051
0
  List *tables;
1052
0
  void *tablesArray;
1053
0
  _lou_logMessage(
1054
0
      LOU_LOG_WARN, "Tables have not been indexed yet. Indexing LOUIS_TABLEPATH.");
1055
0
  searchPath = _lou_getTablePath();
1056
0
  tables = listFiles(searchPath);
1057
0
  tablesArray = list_toArray(tables, 0);
1058
0
  lou_indexTables(tablesArray);
1059
0
  free(searchPath);
1060
0
  list_free(tables);
1061
0
  free(tablesArray);
1062
0
}
1063
1064
char *EXPORT_CALL
1065
0
lou_findTable(const char *query) {
1066
0
  if (!tableIndex) indexTablePath();
1067
0
  List *queryFeatures = parseQuery(query);
1068
0
  int bestQuotient = 0;
1069
0
  char *bestMatch = NULL;
1070
0
  List *l;
1071
0
  for (l = tableIndex; l; l = l->tail) {
1072
0
    TableMeta *table = l->head;
1073
0
    int q = matchFeatureLists(queryFeatures, table->features, 0);
1074
0
    if (q > bestQuotient) {
1075
0
      bestQuotient = q;
1076
0
      if (bestMatch) free(bestMatch);
1077
0
      bestMatch = strdup(table->name);
1078
0
    }
1079
0
  }
1080
0
  list_free(queryFeatures);
1081
0
  if (bestMatch) {
1082
0
    _lou_logMessage(LOU_LOG_INFO, "Best match: %s (%d)", bestMatch, bestQuotient);
1083
0
    return bestMatch;
1084
0
  } else {
1085
0
    _lou_logMessage(LOU_LOG_INFO, "No table could be found for query '%s'", query);
1086
0
    return NULL;
1087
0
  }
1088
0
}
1089
1090
void EXPORT_CALL
1091
0
lou_freeTableFile(char *table) {
1092
0
  free(table);
1093
0
}
1094
1095
typedef struct {
1096
  char *name;
1097
  int matchQuotient;
1098
} TableMatch;
1099
1100
static int
1101
0
cmpMatches(TableMatch *m1, TableMatch *m2) {
1102
0
  if (m1->matchQuotient > m2->matchQuotient)
1103
0
    return -1;
1104
0
  else
1105
0
    return 1;
1106
0
}
1107
1108
/**
1109
 * The returned array and strings must be freed by the caller.
1110
 */
1111
char **EXPORT_CALL
1112
0
lou_findTables(const char *query) {
1113
0
  char **tablesArray;
1114
0
  List *matches = NULL;
1115
0
  if (!tableIndex) indexTablePath();
1116
0
  List *queryFeatures = parseQuery(query);
1117
0
  List *l;
1118
0
  for (l = tableIndex; l; l = l->tail) {
1119
0
    TableMeta *table = l->head;
1120
0
    int quotient = matchFeatureLists(queryFeatures, table->features, 0);
1121
0
    if (quotient > 0) {
1122
0
      TableMatch m = { strdup(table->name), quotient };
1123
0
      matches = list_conj(matches, memcpy(malloc(sizeof(m)), &m, sizeof(m)),
1124
0
          (int (*)(void *, void *))cmpMatches, NULL, free);
1125
0
    }
1126
0
  }
1127
0
  list_free(queryFeatures);
1128
0
  if (matches) {
1129
0
    _lou_logMessage(LOU_LOG_INFO, "%d matches found", list_size(matches));
1130
0
    int i = 0;
1131
0
    tablesArray = malloc((1 + list_size(matches)) * sizeof(void *));
1132
0
    for (List *m = matches; m; m = m->tail)
1133
0
      tablesArray[i++] = ((TableMatch *)m->head)->name;
1134
0
    tablesArray[i] = NULL;
1135
0
    list_free(matches);
1136
0
    return tablesArray;
1137
0
  } else {
1138
0
    _lou_logMessage(LOU_LOG_INFO, "No table could be found for query '%s'", query);
1139
0
    return NULL;
1140
0
  }
1141
0
}
1142
1143
char *EXPORT_CALL
1144
0
lou_getTableInfo(const char *table, const char *key) {
1145
0
  char *value = NULL;
1146
0
  List *features = analyzeTable(table, 0);
1147
0
  List *l;
1148
0
  int lineNumber = -1;  // line number of first matching feature
1149
0
  for (l = features; l; l = l->tail) {
1150
0
    FeatureWithLineNumber *f = l->head;
1151
0
    int cmp = strcasecmp(f->feature.key, key);
1152
0
    if (cmp == 0) {
1153
0
      if (lineNumber < 0 || lineNumber > f->lineNumber) {
1154
0
        if (isLanguageTag(key, MAXSTRING))
1155
0
          value = serializeLanguageTag(f->feature.val);
1156
0
        else
1157
0
          value = strdup(f->feature.val);
1158
0
        lineNumber = f->lineNumber;
1159
0
      }
1160
0
    } else if (cmp > 0) {
1161
0
      break;
1162
0
    }
1163
0
  }
1164
0
  list_free(features);
1165
0
  return value;
1166
0
}
1167
1168
void EXPORT_CALL
1169
0
lou_freeTableInfo(char *info) {
1170
0
  free(info);
1171
0
}
1172
1173
char **EXPORT_CALL
1174
0
lou_listTables(void) {
1175
0
  void *tablesArray;
1176
0
  List *tables = NULL;
1177
0
  List *l;
1178
0
  if (!tableIndex) indexTablePath();
1179
0
  for (l = tableIndex; l; l = l->tail) {
1180
0
    TableMeta *table = l->head;
1181
0
    tables = list_conj(
1182
0
        tables, strdup(table->name), (int (*)(void *, void *))strcmp, NULL, NULL);
1183
0
  }
1184
0
  tablesArray = list_toArray(tables, 0);
1185
0
  list_free(tables);
1186
0
  return tablesArray;
1187
0
}