Coverage Report

Created: 2025-12-14 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/liblouis/liblouis/utils.c
Line
Count
Source
1
/* liblouis Braille Translation and Back-Translation Library
2
3
   Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The
4
   BRLTTY Team
5
6
   Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com
7
   Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com
8
   Copyright (C) 2016 Mike Gray, American Printing House for the Blind
9
   Copyright (C) 2016 Davy Kager, Dedicon
10
11
   This file is part of liblouis.
12
13
   liblouis is free software: you can redistribute it and/or modify it
14
   under the terms of the GNU Lesser General Public License as published
15
   by the Free Software Foundation, either version 2.1 of the License, or
16
   (at your option) any later version.
17
18
   liblouis is distributed in the hope that it will be useful, but
19
   WITHOUT ANY WARRANTY; without even the implied warranty of
20
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
   Lesser General Public License for more details.
22
23
   You should have received a copy of the GNU Lesser General Public
24
   License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
25
*/
26
27
/**
28
 * @file
29
 * @brief Common utility functions
30
 */
31
32
#include "config.h"
33
34
#include <stddef.h>
35
#include <stdlib.h>
36
#include <stdio.h>
37
#include <stdarg.h>
38
#include <string.h>
39
#include <ctype.h>
40
#include <sys/stat.h>
41
42
#include "internal.h"
43
44
/* Contributed by Michel Such <michel.such@free.fr> */
45
#ifdef _WIN32
46
47
/* Adapted from BRLTTY code (see sys_progs_wihdows.h) */
48
49
#include <shlobj.h>
50
51
static void *
52
reallocWrapper(void *address, size_t size) {
53
  if (!(address = realloc(address, size)) && size) _lou_outOfMemory();
54
  return address;
55
}
56
57
static char *
58
strdupWrapper(const char *string) {
59
  char *address = strdup(string);
60
  if (!address) _lou_outOfMemory();
61
  return address;
62
}
63
64
char *EXPORT_CALL
65
lou_getProgramPath(void) {
66
  char *path = NULL;
67
  HMODULE handle;
68
69
  if ((handle = GetModuleHandle(NULL))) {
70
    DWORD size = 0X80;
71
    char *buffer = NULL;
72
73
    while (1) {
74
      buffer = reallocWrapper(buffer, size <<= 1);
75
76
      {
77
        // As the "UNICODE" Windows define may have been set at compilation,
78
        // This call must be specifically GetModuleFilenameA as further code
79
        // expects it to be single byte chars.
80
        DWORD length = GetModuleFileNameA(handle, buffer, size);
81
82
        if (!length) {
83
          printf("GetModuleFileName\n");
84
          exit(3);
85
        }
86
87
        if (length < size) {
88
          buffer[length] = 0;
89
          path = strdupWrapper(buffer);
90
91
          while (length > 0)
92
            if (path[--length] == '\\') break;
93
94
          strncpy(path, path, length + 1);
95
          path[length + 1] = '\0';
96
          break;
97
        }
98
      }
99
    }
100
101
    free(buffer);
102
  } else {
103
    printf("GetModuleHandle\n");
104
    exit(3);
105
  }
106
107
  return path;
108
}
109
#endif
110
/* End of MS contribution */
111
112
static widechar
113
3.25M
toLowercase(widechar c, const TranslationTableHeader *table) {
114
3.25M
  static TranslationTableOffset offset;
115
3.25M
  static TranslationTableCharacter *character;
116
3.25M
  offset = table->characters[_lou_charHash(c)];
117
3.29M
  while (offset) {
118
1.77M
    character = (TranslationTableCharacter *)&table->ruleArea[offset];
119
1.77M
    if (character->value == c) {
120
1.73M
      if (character->mode & CTC_UpperCase) {
121
3.77k
        const TranslationTableCharacter *c = character;
122
3.77k
        if (c->basechar)
123
3.77k
          c = (TranslationTableCharacter *)&table->ruleArea[c->basechar];
124
3.77k
        while (1) {
125
3.77k
          if ((c->mode & (character->mode & ~CTC_UpperCase)) ==
126
3.77k
              (character->mode & ~CTC_UpperCase))
127
3.77k
            return c->value;
128
0
          if (!c->linked) break;
129
0
          c = (TranslationTableCharacter *)&table->ruleArea[c->linked];
130
0
        }
131
3.77k
      }
132
1.73M
      return character->value;
133
1.73M
    }
134
38.7k
    offset = character->next;
135
38.7k
  }
136
1.51M
  return c;
137
3.25M
}
138
139
unsigned long int EXPORT_CALL
140
1.62M
_lou_stringHash(const widechar *c, int lowercase, const TranslationTableHeader *table) {
141
1.62M
  if (!lowercase)
142
731
    return (((unsigned long int)c[0] << 8) + (unsigned long int)c[1]) % HASHNUM;
143
1.62M
  else
144
1.62M
    return (((unsigned long int)toLowercase(c[0], table) << 8) +
145
1.62M
             (unsigned long int)toLowercase(c[1], table)) %
146
1.62M
        HASHNUM;
147
1.62M
}
148
149
unsigned long int EXPORT_CALL
150
205M
_lou_charHash(widechar c) {
151
205M
  return (unsigned long int)c % HASHNUM;
152
205M
}
153
154
const char *EXPORT_CALL
155
273k
_lou_showString(widechar const *chars, int length, int forceHex) {
156
  /* Translate a string of characters to the encoding used in character
157
   * operands */
158
273k
  static char scratchBuf[MAXSTRING];
159
273k
  int bufPos = 0;
160
273k
  scratchBuf[bufPos++] = '\'';
161
162
547k
  for (int charPos = 0; (charPos < length) && (bufPos < (MAXSTRING - 2));
163
273k
      charPos += 1) {
164
273k
    widechar c = chars[charPos];
165
166
273k
    if (!forceHex && isASCII(c)) {
167
687
      scratchBuf[bufPos++] = (char)c;
168
273k
    } else {
169
273k
      char hexbuf[20];
170
273k
      int hexLength;
171
273k
      char escapeLetter;
172
173
273k
      int leadingZeros;
174
273k
      int hexPos;
175
273k
      hexLength = sprintf(hexbuf, "%x", c);
176
273k
      switch (hexLength) {
177
25.3k
      case 1:
178
267k
      case 2:
179
268k
      case 3:
180
273k
      case 4:
181
273k
        escapeLetter = 'x';
182
273k
        leadingZeros = 4 - hexLength;
183
273k
        break;
184
0
      case 5:
185
0
        escapeLetter = 'y';
186
0
        leadingZeros = 0;
187
0
        break;
188
0
      case 6:
189
0
      case 7:
190
0
      case 8:
191
0
        escapeLetter = 'z';
192
0
        leadingZeros = 8 - hexLength;
193
0
        break;
194
0
      default:
195
0
        escapeLetter = '?';
196
0
        leadingZeros = 0;
197
0
        break;
198
273k
      }
199
273k
      if ((bufPos + leadingZeros + hexLength + 4) >= (MAXSTRING - 2)) break;
200
273k
      scratchBuf[bufPos++] = '\\';
201
273k
      scratchBuf[bufPos++] = escapeLetter;
202
834k
      for (hexPos = 0; hexPos < leadingZeros; hexPos++) scratchBuf[bufPos++] = '0';
203
805k
      for (hexPos = 0; hexPos < hexLength; hexPos++)
204
531k
        scratchBuf[bufPos++] = hexbuf[hexPos];
205
273k
    }
206
273k
  }
207
273k
  scratchBuf[bufPos++] = '\'';
208
273k
  scratchBuf[bufPos] = 0;
209
273k
  return scratchBuf;
210
273k
}
211
212
/**
213
 * Mapping between braille dot and textual representation as used in dots operands
214
 */
215
static const intCharTupple dotMapping[] = {
216
  { LOU_DOT_1, '1' },
217
  { LOU_DOT_2, '2' },
218
  { LOU_DOT_3, '3' },
219
  { LOU_DOT_4, '4' },
220
  { LOU_DOT_5, '5' },
221
  { LOU_DOT_6, '6' },
222
  { LOU_DOT_7, '7' },
223
  { LOU_DOT_8, '8' },
224
  { LOU_DOT_9, '9' },
225
  { LOU_DOT_10, 'A' },
226
  { LOU_DOT_11, 'B' },
227
  { LOU_DOT_12, 'C' },
228
  { LOU_DOT_13, 'D' },
229
  { LOU_DOT_14, 'E' },
230
  { LOU_DOT_15, 'F' },
231
  { 0, 0 },
232
};
233
234
/**
235
 * Print out dot numbers
236
 *
237
 * @return a string containing the dot numbers. The longest possible
238
 * output is "\123456789ABCDEF0/"
239
 */
240
const char *EXPORT_CALL
241
214k
_lou_unknownDots(widechar dots) {
242
214k
  static char buffer[20];
243
244
214k
  int k = 0;
245
214k
  buffer[k++] = '\\';
246
247
3.43M
  for (int mappingPos = 0; dotMapping[mappingPos].key; mappingPos++) {
248
3.22M
    if (dots & dotMapping[mappingPos].key) buffer[k++] = dotMapping[mappingPos].value;
249
3.22M
  }
250
251
214k
  if (k == 1) buffer[k++] = '0';
252
214k
  buffer[k++] = '/';
253
214k
  buffer[k] = 0;
254
214k
  return buffer;
255
214k
}
256
257
/**
258
 * Translate a sequence of dots to the encoding used in dots operands.
259
 */
260
const char *EXPORT_CALL
261
233
_lou_showDots(widechar const *dots, int length) {
262
233
  int bufPos = 0;
263
233
  static char scratchBuf[MAXSTRING];
264
466
  for (int dotsPos = 0; dotsPos < length && bufPos < (MAXSTRING - 1); dotsPos++) {
265
3.72k
    for (int mappingPos = 0; dotMapping[mappingPos].key; mappingPos++) {
266
3.49k
      if ((dots[dotsPos] & dotMapping[mappingPos].key) &&
267
235
          (bufPos < (MAXSTRING - 1)))
268
235
        scratchBuf[bufPos++] = dotMapping[mappingPos].value;
269
3.49k
    }
270
233
    if ((dots[dotsPos] == LOU_DOTS) && (bufPos < (MAXSTRING - 1)))
271
4
      scratchBuf[bufPos++] = '0';
272
233
    if ((dotsPos != length - 1) && (bufPos < (MAXSTRING - 1)))
273
0
      scratchBuf[bufPos++] = '-';
274
233
  }
275
233
  scratchBuf[bufPos] = 0;
276
233
  return scratchBuf;
277
233
}
278
279
/**
280
 * Mapping between character attribute and textual representation
281
 */
282
static const intCharTupple attributeMapping[] = {
283
  { CTC_Space, 's' },
284
  { CTC_Letter, 'l' },
285
  { CTC_Digit, 'd' },
286
  { CTC_Punctuation, 'p' },
287
  { CTC_UpperCase, 'U' },
288
  { CTC_LowerCase, 'u' },
289
  { CTC_Math, 'm' },
290
  { CTC_Sign, 'S' },
291
  { CTC_LitDigit, 'D' },
292
  { CTC_UserDefined9, 'w' },
293
  { CTC_UserDefined10, 'x' },
294
  { CTC_UserDefined11, 'y' },
295
  { CTC_UserDefined12, 'z' },
296
  { 0, 0 },
297
};
298
299
/**
300
 * Show attributes using the letters used after the $ in multipass
301
 * opcodes.
302
 */
303
char *EXPORT_CALL
304
0
_lou_showAttributes(TranslationTableCharacterAttributes a) {
305
0
  int bufPos = 0;
306
0
  static char scratchBuf[MAXSTRING];
307
0
  for (int mappingPos = 0; attributeMapping[mappingPos].key; mappingPos++) {
308
0
    if ((a & attributeMapping[mappingPos].key) && bufPos < (MAXSTRING - 1))
309
0
      scratchBuf[bufPos++] = attributeMapping[mappingPos].value;
310
0
  }
311
0
  scratchBuf[bufPos] = 0;
312
0
  return scratchBuf;
313
0
}
314
315
void EXPORT_CALL
316
0
_lou_outOfMemory(void) {
317
0
  _lou_logMessage(LOU_LOG_FATAL, "liblouis: Insufficient memory\n");
318
0
  exit(3);
319
0
}
320
321
#ifdef DEBUG
322
void EXPORT_CALL
323
_lou_debugHook(void) {
324
  char *hook = "debug hook";
325
  printf("%s\n", hook);
326
}
327
#endif
328
329
static const int validTranslationModes[] = { noContractions, compbrlAtCursor, dotsIO,
330
  compbrlLeftCursor, ucBrl, noUndefined, partialTrans };
331
332
int EXPORT_CALL
333
397
_lou_isValidMode(int mode) {
334
  // mask out all valid mode bits. If you end up with some bits set
335
  // then the input isn't valid. See
336
  // https://en.wikipedia.org/wiki/Material_nonimplication
337
3.17k
  for (int i = 0; i < (sizeof(validTranslationModes) / sizeof(*validTranslationModes));
338
2.77k
      i++)
339
2.77k
    mode &= ~validTranslationModes[i];
340
397
  return !mode;
341
397
}
342
343
/* Map char to dots according to North American Braille Computer Code (NABCC) */
344
widechar EXPORT_CALL
345
2.16M
_lou_charToFallbackDots(widechar c) {
346
2.16M
  static const unsigned char charToDots[] = {
347
    /* ASCII characters 0X00-0X1F - control characters.
348
     * These won't be referenced so we have room for data.
349
     * These groups must be in descending order.
350
     * Each group contains the following four bytes:
351
     * 1) The first character to which this block applies.
352
     * 2) The bits to remove from the character.
353
     * 3) The bits to add to the character.
354
     * 4) The dots to add to the braille pattern.
355
     */
356
    // clang-format off
357
2.16M
    0X7F, 0X20, 0X00, LOU_DOT_7,
358
2.16M
    0X60, 0X20, 0X00, 0,
359
2.16M
    0X5F, 0X00, 0X00, 0,
360
2.16M
    0X40, 0X00, 0X00, LOU_DOT_7,
361
2.16M
    0X20, 0X00, 0X00, 0,
362
2.16M
    0X00, 0X00, 0X40, LOU_DOT_7 | LOU_DOT_8,
363
364
    // ASCII characters 0X20-0X3F - digits and common symbols.
365
2.16M
    [' '] = 0,
366
2.16M
    ['!'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_6,
367
2.16M
    ['"'] = LOU_DOT_5,
368
2.16M
    ['#'] = LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
369
2.16M
    ['$'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_6,
370
2.16M
    ['%'] = LOU_DOT_1 | LOU_DOT_4 | LOU_DOT_6,
371
2.16M
    ['&'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_6,
372
2.16M
    ['\''] = LOU_DOT_3,
373
2.16M
    ['('] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_5 | LOU_DOT_6,
374
2.16M
    [')'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
375
2.16M
    ['*'] = LOU_DOT_1 | LOU_DOT_6,
376
2.16M
    ['+'] = LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_6,
377
2.16M
    [','] = LOU_DOT_6,
378
2.16M
    ['-'] = LOU_DOT_3 | LOU_DOT_6,
379
2.16M
    ['.'] = LOU_DOT_4 | LOU_DOT_6,
380
2.16M
    ['/'] = LOU_DOT_3 | LOU_DOT_4,
381
2.16M
    ['0'] = LOU_DOT_3 | LOU_DOT_5 | LOU_DOT_6,
382
2.16M
    ['1'] = LOU_DOT_2,
383
2.16M
    ['2'] = LOU_DOT_2 | LOU_DOT_3,
384
2.16M
    ['3'] = LOU_DOT_2 | LOU_DOT_5,
385
2.16M
    ['4'] = LOU_DOT_2 | LOU_DOT_5 | LOU_DOT_6,
386
2.16M
    ['5'] = LOU_DOT_2 | LOU_DOT_6,
387
2.16M
    ['6'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_5,
388
2.16M
    ['7'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_5 | LOU_DOT_6,
389
2.16M
    ['8'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_6,
390
2.16M
    ['9'] = LOU_DOT_3 | LOU_DOT_5,
391
2.16M
    [':'] = LOU_DOT_1 | LOU_DOT_5 | LOU_DOT_6,
392
2.16M
    [';'] = LOU_DOT_5 | LOU_DOT_6,
393
2.16M
    ['<'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_6,
394
2.16M
    ['='] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
395
2.16M
    ['>'] = LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5,
396
2.16M
    ['?'] = LOU_DOT_1 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
397
398
    // ASCII characters 0X40-0X5F - letters and other symbols.
399
2.16M
    ['@'] = LOU_DOT_4,
400
2.16M
    ['A'] = LOU_DOT_1,
401
2.16M
    ['B'] = LOU_DOT_1 | LOU_DOT_2,
402
2.16M
    ['C'] = LOU_DOT_1 | LOU_DOT_4,
403
2.16M
    ['D'] = LOU_DOT_1 | LOU_DOT_4 | LOU_DOT_5,
404
2.16M
    ['E'] = LOU_DOT_1 | LOU_DOT_5,
405
2.16M
    ['F'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_4,
406
2.16M
    ['G'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_5,
407
2.16M
    ['H'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_5,
408
2.16M
    ['I'] = LOU_DOT_2 | LOU_DOT_4,
409
2.16M
    ['J'] = LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_5,
410
2.16M
    ['K'] = LOU_DOT_1 | LOU_DOT_3,
411
2.16M
    ['L'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3,
412
2.16M
    ['M'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_4,
413
2.16M
    ['N'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5,
414
2.16M
    ['O'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_5,
415
2.16M
    ['P'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4,
416
2.16M
    ['Q'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5,
417
2.16M
    ['R'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_5,
418
2.16M
    ['S'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4,
419
2.16M
    ['T'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5,
420
2.16M
    ['U'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_6,
421
2.16M
    ['V'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_6,
422
2.16M
    ['W'] = LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
423
2.16M
    ['X'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_6,
424
2.16M
    ['Y'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
425
2.16M
    ['Z'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_5 | LOU_DOT_6,
426
2.16M
    ['['] = LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_6,
427
2.16M
    ['\\'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_5 | LOU_DOT_6,
428
2.16M
    [']'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
429
2.16M
    ['^'] = LOU_DOT_4 | LOU_DOT_5,
430
2.16M
    ['_'] = LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6
431
    // clang-format on
432
2.16M
  };
433
434
2.16M
  if (c >= 0X80) c = '?';
435
2.16M
  widechar dots = LOU_DOTS;
436
437
2.16M
  {
438
2.16M
    const unsigned char *p = charToDots;
439
9.52M
    while (*p > c) p += 4;
440
441
2.16M
    c &= ~*++p;
442
2.16M
    c |= *++p;
443
2.16M
    dots |= *++p;
444
2.16M
  }
445
446
2.16M
  dots |= charToDots[c];
447
2.16M
  return dots;
448
2.16M
}