Coverage Report

Created: 2025-07-18 06:54

/src/liblouis/liblouis/utils.c
Line
Count
Source (jump to first uncovered line)
1
/* liblouis Braille Translation and Back-Translation Library
2
3
   Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The
4
   BRLTTY Team
5
6
   Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com
7
   Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com
8
   Copyright (C) 2016 Mike Gray, American Printing House for the Blind
9
   Copyright (C) 2016 Davy Kager, Dedicon
10
11
   This file is part of liblouis.
12
13
   liblouis is free software: you can redistribute it and/or modify it
14
   under the terms of the GNU Lesser General Public License as published
15
   by the Free Software Foundation, either version 2.1 of the License, or
16
   (at your option) any later version.
17
18
   liblouis is distributed in the hope that it will be useful, but
19
   WITHOUT ANY WARRANTY; without even the implied warranty of
20
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
   Lesser General Public License for more details.
22
23
   You should have received a copy of the GNU Lesser General Public
24
   License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
25
*/
26
27
/**
28
 * @file
29
 * @brief Common utility functions
30
 */
31
32
#include <config.h>
33
34
#include <stddef.h>
35
#include <stdlib.h>
36
#include <stdio.h>
37
#include <stdarg.h>
38
#include <string.h>
39
#include <ctype.h>
40
#include <sys/stat.h>
41
42
#include "internal.h"
43
44
/* Contributed by Michel Such <michel.such@free.fr> */
45
#ifdef _WIN32
46
47
/* Adapted from BRLTTY code (see sys_progs_wihdows.h) */
48
49
#include <shlobj.h>
50
51
static void *
52
reallocWrapper(void *address, size_t size) {
53
  if (!(address = realloc(address, size)) && size) _lou_outOfMemory();
54
  return address;
55
}
56
57
static char *
58
strdupWrapper(const char *string) {
59
  char *address = strdup(string);
60
  if (!address) _lou_outOfMemory();
61
  return address;
62
}
63
64
char *EXPORT_CALL
65
lou_getProgramPath(void) {
66
  char *path = NULL;
67
  HMODULE handle;
68
69
  if ((handle = GetModuleHandle(NULL))) {
70
    DWORD size = 0X80;
71
    char *buffer = NULL;
72
73
    while (1) {
74
      buffer = reallocWrapper(buffer, size <<= 1);
75
76
      {
77
        // As the "UNICODE" Windows define may have been set at compilation,
78
        // This call must be specifically GetModuleFilenameA as further code
79
        // expects it to be single byte chars.
80
        DWORD length = GetModuleFileNameA(handle, buffer, size);
81
82
        if (!length) {
83
          printf("GetModuleFileName\n");
84
          exit(3);
85
        }
86
87
        if (length < size) {
88
          buffer[length] = 0;
89
          path = strdupWrapper(buffer);
90
91
          while (length > 0)
92
            if (path[--length] == '\\') break;
93
94
          strncpy(path, path, length + 1);
95
          path[length + 1] = '\0';
96
          break;
97
        }
98
      }
99
    }
100
101
    free(buffer);
102
  } else {
103
    printf("GetModuleHandle\n");
104
    exit(3);
105
  }
106
107
  return path;
108
}
109
#endif
110
/* End of MS contribution */
111
112
static widechar
113
0
toLowercase(widechar c, const TranslationTableHeader *table) {
114
0
  static TranslationTableOffset offset;
115
0
  static TranslationTableCharacter *character;
116
0
  offset = table->characters[_lou_charHash(c)];
117
0
  while (offset) {
118
0
    character = (TranslationTableCharacter *)&table->ruleArea[offset];
119
0
    if (character->value == c) {
120
0
      if (character->mode & CTC_UpperCase) {
121
0
        const TranslationTableCharacter *c = character;
122
0
        if (c->basechar)
123
0
          c = (TranslationTableCharacter *)&table->ruleArea[c->basechar];
124
0
        while (1) {
125
0
          if ((c->mode & (character->mode & ~CTC_UpperCase)) ==
126
0
              (character->mode & ~CTC_UpperCase))
127
0
            return c->value;
128
0
          if (!c->linked) break;
129
0
          c = (TranslationTableCharacter *)&table->ruleArea[c->linked];
130
0
        }
131
0
      }
132
0
      return character->value;
133
0
    }
134
0
    offset = character->next;
135
0
  }
136
0
  return c;
137
0
}
138
139
unsigned long int EXPORT_CALL
140
0
_lou_stringHash(const widechar *c, int lowercase, const TranslationTableHeader *table) {
141
0
  if (!lowercase)
142
0
    return (((unsigned long int)c[0] << 8) + (unsigned long int)c[1]) % HASHNUM;
143
0
  else
144
0
    return (((unsigned long int)toLowercase(c[0], table) << 8) +
145
0
             (unsigned long int)toLowercase(c[1], table)) %
146
0
        HASHNUM;
147
0
}
148
149
unsigned long int EXPORT_CALL
150
11
_lou_charHash(widechar c) {
151
11
  return (unsigned long int)c % HASHNUM;
152
11
}
153
154
const char *EXPORT_CALL
155
0
_lou_showString(widechar const *chars, int length, int forceHex) {
156
  /* Translate a string of characters to the encoding used in character
157
   * operands */
158
0
  static char scratchBuf[MAXSTRING];
159
0
  int bufPos = 0;
160
0
  scratchBuf[bufPos++] = '\'';
161
162
0
  for (int charPos = 0; (charPos < length) && (bufPos < (MAXSTRING - 2));
163
0
      charPos += 1) {
164
0
    widechar c = chars[charPos];
165
166
0
    if (!forceHex && isASCII(c)) {
167
0
      scratchBuf[bufPos++] = (char)c;
168
0
    } else {
169
0
      char hexbuf[20];
170
0
      int hexLength;
171
0
      char escapeLetter;
172
173
0
      int leadingZeros;
174
0
      int hexPos;
175
0
      hexLength = sprintf(hexbuf, "%x", c);
176
0
      switch (hexLength) {
177
0
      case 1:
178
0
      case 2:
179
0
      case 3:
180
0
      case 4:
181
0
        escapeLetter = 'x';
182
0
        leadingZeros = 4 - hexLength;
183
0
        break;
184
0
      case 5:
185
0
        escapeLetter = 'y';
186
0
        leadingZeros = 0;
187
0
        break;
188
0
      case 6:
189
0
      case 7:
190
0
      case 8:
191
0
        escapeLetter = 'z';
192
0
        leadingZeros = 8 - hexLength;
193
0
        break;
194
0
      default:
195
0
        escapeLetter = '?';
196
0
        leadingZeros = 0;
197
0
        break;
198
0
      }
199
0
      if ((bufPos + leadingZeros + hexLength + 4) >= (MAXSTRING - 2)) break;
200
0
      scratchBuf[bufPos++] = '\\';
201
0
      scratchBuf[bufPos++] = escapeLetter;
202
0
      for (hexPos = 0; hexPos < leadingZeros; hexPos++) scratchBuf[bufPos++] = '0';
203
0
      for (hexPos = 0; hexPos < hexLength; hexPos++)
204
0
        scratchBuf[bufPos++] = hexbuf[hexPos];
205
0
    }
206
0
  }
207
0
  scratchBuf[bufPos++] = '\'';
208
0
  scratchBuf[bufPos] = 0;
209
0
  return scratchBuf;
210
0
}
211
212
/**
213
 * Mapping between braille dot and textual representation as used in dots operands
214
 */
215
static const intCharTupple dotMapping[] = {
216
  { LOU_DOT_1, '1' },
217
  { LOU_DOT_2, '2' },
218
  { LOU_DOT_3, '3' },
219
  { LOU_DOT_4, '4' },
220
  { LOU_DOT_5, '5' },
221
  { LOU_DOT_6, '6' },
222
  { LOU_DOT_7, '7' },
223
  { LOU_DOT_8, '8' },
224
  { LOU_DOT_9, '9' },
225
  { LOU_DOT_10, 'A' },
226
  { LOU_DOT_11, 'B' },
227
  { LOU_DOT_12, 'C' },
228
  { LOU_DOT_13, 'D' },
229
  { LOU_DOT_14, 'E' },
230
  { LOU_DOT_15, 'F' },
231
  { 0, 0 },
232
};
233
234
/**
235
 * Print out dot numbers
236
 *
237
 * @return a string containing the dot numbers. The longest possible
238
 * output is "\123456789ABCDEF0/"
239
 */
240
const char *EXPORT_CALL
241
0
_lou_unknownDots(widechar dots) {
242
0
  static char buffer[20];
243
244
0
  int k = 0;
245
0
  buffer[k++] = '\\';
246
247
0
  for (int mappingPos = 0; dotMapping[mappingPos].key; mappingPos++) {
248
0
    if (dots & dotMapping[mappingPos].key) buffer[k++] = dotMapping[mappingPos].value;
249
0
  }
250
251
0
  if (k == 1) buffer[k++] = '0';
252
0
  buffer[k++] = '/';
253
0
  buffer[k] = 0;
254
0
  return buffer;
255
0
}
256
257
/**
258
 * Translate a sequence of dots to the encoding used in dots operands.
259
 */
260
const char *EXPORT_CALL
261
0
_lou_showDots(widechar const *dots, int length) {
262
0
  int bufPos = 0;
263
0
  static char scratchBuf[MAXSTRING];
264
0
  for (int dotsPos = 0; dotsPos < length && bufPos < (MAXSTRING - 1); dotsPos++) {
265
0
    for (int mappingPos = 0; dotMapping[mappingPos].key; mappingPos++) {
266
0
      if ((dots[dotsPos] & dotMapping[mappingPos].key) &&
267
0
          (bufPos < (MAXSTRING - 1)))
268
0
        scratchBuf[bufPos++] = dotMapping[mappingPos].value;
269
0
    }
270
0
    if ((dots[dotsPos] == LOU_DOTS) && (bufPos < (MAXSTRING - 1)))
271
0
      scratchBuf[bufPos++] = '0';
272
0
    if ((dotsPos != length - 1) && (bufPos < (MAXSTRING - 1)))
273
0
      scratchBuf[bufPos++] = '-';
274
0
  }
275
0
  scratchBuf[bufPos] = 0;
276
0
  return scratchBuf;
277
0
}
278
279
/**
280
 * Mapping between character attribute and textual representation
281
 */
282
static const intCharTupple attributeMapping[] = {
283
  { CTC_Space, 's' },
284
  { CTC_Letter, 'l' },
285
  { CTC_Digit, 'd' },
286
  { CTC_Punctuation, 'p' },
287
  { CTC_UpperCase, 'U' },
288
  { CTC_LowerCase, 'u' },
289
  { CTC_Math, 'm' },
290
  { CTC_Sign, 'S' },
291
  { CTC_LitDigit, 'D' },
292
  { CTC_UserDefined9, 'w' },
293
  { CTC_UserDefined10, 'x' },
294
  { CTC_UserDefined11, 'y' },
295
  { CTC_UserDefined12, 'z' },
296
  { 0, 0 },
297
};
298
299
/**
300
 * Show attributes using the letters used after the $ in multipass
301
 * opcodes.
302
 */
303
char *EXPORT_CALL
304
0
_lou_showAttributes(TranslationTableCharacterAttributes a) {
305
0
  int bufPos = 0;
306
0
  static char scratchBuf[MAXSTRING];
307
0
  for (int mappingPos = 0; attributeMapping[mappingPos].key; mappingPos++) {
308
0
    if ((a & attributeMapping[mappingPos].key) && bufPos < (MAXSTRING - 1))
309
0
      scratchBuf[bufPos++] = attributeMapping[mappingPos].value;
310
0
  }
311
0
  scratchBuf[bufPos] = 0;
312
0
  return scratchBuf;
313
0
}
314
315
void EXPORT_CALL
316
0
_lou_outOfMemory(void) {
317
0
  _lou_logMessage(LOU_LOG_FATAL, "liblouis: Insufficient memory\n");
318
0
  exit(3);
319
0
}
320
321
#ifdef DEBUG
322
void EXPORT_CALL
323
_lou_debugHook(void) {
324
  char *hook = "debug hook";
325
  printf("%s\n", hook);
326
}
327
#endif
328
329
static const int validTranslationModes[] = { noContractions, compbrlAtCursor, dotsIO,
330
  compbrlLeftCursor, ucBrl, noUndefined, partialTrans };
331
332
int EXPORT_CALL
333
0
_lou_isValidMode(int mode) {
334
  // mask out all valid mode bits. If you end up with some bits set
335
  // then the input isn't valid. See
336
  // https://en.wikipedia.org/wiki/Material_nonimplication
337
0
  for (int i = 0; i < (sizeof(validTranslationModes) / sizeof(*validTranslationModes));
338
0
      i++)
339
0
    mode &= ~validTranslationModes[i];
340
0
  return !mode;
341
0
}
342
343
/* Map char to dots according to North American Braille Computer Code (NABCC) */
344
widechar EXPORT_CALL
345
0
_lou_charToFallbackDots(widechar c) {
346
0
  static const unsigned char charToDots[] = {
347
    /* ASCII characters 0X00-0X1F - control characters.
348
     * These won't be referenced so we have room for data.
349
     * These groups must be in descending order.
350
     * Each group contains the following four bytes:
351
     * 1) The first character to which this block applies.
352
     * 2) The bits to remove from the character.
353
     * 3) The bits to add to the character.
354
     * 4) The dots to add to the braille pattern.
355
     */
356
    // clang-format off
357
0
    0X7F, 0X20, 0X00, LOU_DOT_7,
358
0
    0X60, 0X20, 0X00, 0,
359
0
    0X5F, 0X00, 0X00, 0,
360
0
    0X40, 0X00, 0X00, LOU_DOT_7,
361
0
    0X20, 0X00, 0X00, 0,
362
0
    0X00, 0X00, 0X40, LOU_DOT_7 | LOU_DOT_8,
363
364
    // ASCII characters 0X20-0X3F - digits and common symbols.
365
0
    [' '] = 0,
366
0
    ['!'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_6,
367
0
    ['"'] = LOU_DOT_5,
368
0
    ['#'] = LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
369
0
    ['$'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_6,
370
0
    ['%'] = LOU_DOT_1 | LOU_DOT_4 | LOU_DOT_6,
371
0
    ['&'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_6,
372
0
    ['\''] = LOU_DOT_3,
373
0
    ['('] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_5 | LOU_DOT_6,
374
0
    [')'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
375
0
    ['*'] = LOU_DOT_1 | LOU_DOT_6,
376
0
    ['+'] = LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_6,
377
0
    [','] = LOU_DOT_6,
378
0
    ['-'] = LOU_DOT_3 | LOU_DOT_6,
379
0
    ['.'] = LOU_DOT_4 | LOU_DOT_6,
380
0
    ['/'] = LOU_DOT_3 | LOU_DOT_4,
381
0
    ['0'] = LOU_DOT_3 | LOU_DOT_5 | LOU_DOT_6,
382
0
    ['1'] = LOU_DOT_2,
383
0
    ['2'] = LOU_DOT_2 | LOU_DOT_3,
384
0
    ['3'] = LOU_DOT_2 | LOU_DOT_5,
385
0
    ['4'] = LOU_DOT_2 | LOU_DOT_5 | LOU_DOT_6,
386
0
    ['5'] = LOU_DOT_2 | LOU_DOT_6,
387
0
    ['6'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_5,
388
0
    ['7'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_5 | LOU_DOT_6,
389
0
    ['8'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_6,
390
0
    ['9'] = LOU_DOT_3 | LOU_DOT_5,
391
0
    [':'] = LOU_DOT_1 | LOU_DOT_5 | LOU_DOT_6,
392
0
    [';'] = LOU_DOT_5 | LOU_DOT_6,
393
0
    ['<'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_6,
394
0
    ['='] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
395
0
    ['>'] = LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5,
396
0
    ['?'] = LOU_DOT_1 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
397
398
    // ASCII characters 0X40-0X5F - letters and other symbols.
399
0
    ['@'] = LOU_DOT_4,
400
0
    ['A'] = LOU_DOT_1,
401
0
    ['B'] = LOU_DOT_1 | LOU_DOT_2,
402
0
    ['C'] = LOU_DOT_1 | LOU_DOT_4,
403
0
    ['D'] = LOU_DOT_1 | LOU_DOT_4 | LOU_DOT_5,
404
0
    ['E'] = LOU_DOT_1 | LOU_DOT_5,
405
0
    ['F'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_4,
406
0
    ['G'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_5,
407
0
    ['H'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_5,
408
0
    ['I'] = LOU_DOT_2 | LOU_DOT_4,
409
0
    ['J'] = LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_5,
410
0
    ['K'] = LOU_DOT_1 | LOU_DOT_3,
411
0
    ['L'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3,
412
0
    ['M'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_4,
413
0
    ['N'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5,
414
0
    ['O'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_5,
415
0
    ['P'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4,
416
0
    ['Q'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5,
417
0
    ['R'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_5,
418
0
    ['S'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4,
419
0
    ['T'] = LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5,
420
0
    ['U'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_6,
421
0
    ['V'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_3 | LOU_DOT_6,
422
0
    ['W'] = LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
423
0
    ['X'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_6,
424
0
    ['Y'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
425
0
    ['Z'] = LOU_DOT_1 | LOU_DOT_3 | LOU_DOT_5 | LOU_DOT_6,
426
0
    ['['] = LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_6,
427
0
    ['\\'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_5 | LOU_DOT_6,
428
0
    [']'] = LOU_DOT_1 | LOU_DOT_2 | LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6,
429
0
    ['^'] = LOU_DOT_4 | LOU_DOT_5,
430
0
    ['_'] = LOU_DOT_4 | LOU_DOT_5 | LOU_DOT_6
431
    // clang-format on
432
0
  };
433
434
0
  if (c >= 0X80) c = '?';
435
0
  widechar dots = LOU_DOTS;
436
437
0
  {
438
0
    const unsigned char *p = charToDots;
439
0
    while (*p > c) p += 4;
440
441
0
    c &= ~*++p;
442
0
    c |= *++p;
443
0
    dots |= *++p;
444
0
  }
445
446
0
  dots |= charToDots[c];
447
0
  return dots;
448
0
}