/src/liblouis/liblouis/compileTranslationTable.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* liblouis Braille Translation and Back-Translation Library |
2 | | |
3 | | Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The |
4 | | BRLTTY Team |
5 | | |
6 | | Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com |
7 | | Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com |
8 | | Copyright (C) 2016 Mike Gray, American Printing House for the Blind |
9 | | Copyright (C) 2016 Davy Kager, Dedicon |
10 | | |
11 | | This file is part of liblouis. |
12 | | |
13 | | liblouis is free software: you can redistribute it and/or modify it |
14 | | under the terms of the GNU Lesser General Public License as published |
15 | | by the Free Software Foundation, either version 2.1 of the License, or |
16 | | (at your option) any later version. |
17 | | |
18 | | liblouis is distributed in the hope that it will be useful, but |
19 | | WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
21 | | Lesser General Public License for more details. |
22 | | |
23 | | You should have received a copy of the GNU Lesser General Public |
24 | | License along with liblouis. If not, see <http://www.gnu.org/licenses/>. |
25 | | */ |
26 | | |
27 | | /** |
28 | | * @file |
29 | | * @brief Read and compile translation tables |
30 | | */ |
31 | | |
32 | | #include <config.h> |
33 | | |
34 | | #include <stddef.h> |
35 | | #include <stdlib.h> |
36 | | #include <stdio.h> |
37 | | #include <stdarg.h> |
38 | | #include <string.h> |
39 | | #include <ctype.h> |
40 | | #include <sys/stat.h> |
41 | | |
42 | | #include "internal.h" |
43 | | |
44 | 4.46k | #define QUOTESUB 28 /* Stand-in for double quotes in strings */ |
45 | | |
46 | | /* needed to make debuggin easier */ |
47 | | #ifdef DEBUG |
48 | | wchar_t wchar; |
49 | | #endif |
50 | | |
51 | | /* The following variables and functions make it possible to specify the |
52 | | * path on which all tables for liblouis and all files for liblouisutdml, |
53 | | * in their proper directories, will be found. |
54 | | */ |
55 | | |
56 | | static char *dataPathPtr; |
57 | | |
58 | | char *EXPORT_CALL |
59 | 0 | lou_setDataPath(const char *path) { |
60 | 0 | _lou_logMessage(LOU_LOG_WARN, "warning: lou_setDataPath is deprecated."); |
61 | 0 | static char dataPath[MAXSTRING]; |
62 | 0 | dataPathPtr = NULL; |
63 | 0 | if (path == NULL || strlen(path) >= MAXSTRING) return NULL; |
64 | 0 | strcpy(dataPath, path); |
65 | 0 | dataPathPtr = dataPath; |
66 | 0 | return dataPathPtr; |
67 | 0 | } |
68 | | |
69 | | char *EXPORT_CALL |
70 | 0 | lou_getDataPath(void) { |
71 | 0 | _lou_logMessage(LOU_LOG_WARN, "warning: lou_getDataPath is deprecated."); |
72 | 0 | return dataPathPtr; |
73 | 0 | } |
74 | | |
75 | | /* End of dataPath code. */ |
76 | | |
77 | | static int |
78 | 11.9k | eqasc2uni(const unsigned char *a, const widechar *b, const int len) { |
79 | 11.9k | int k; |
80 | 25.5k | for (k = 0; k < len; k++) |
81 | 23.7k | if ((widechar)a[k] != b[k]) return 0; |
82 | 1.73k | return 1; |
83 | 11.9k | } |
84 | | |
85 | | typedef struct CharsString { |
86 | | widechar length; |
87 | | widechar chars[MAXSTRING]; |
88 | | } CharsString; |
89 | | |
90 | | static int errorCount; |
91 | | static int warningCount; |
92 | | |
93 | | typedef struct TranslationTableChainEntry { |
94 | | struct TranslationTableChainEntry *next; |
95 | | TranslationTableHeader *table; |
96 | | int tableListLength; |
97 | | char tableList[1]; |
98 | | } TranslationTableChainEntry; |
99 | | |
100 | | static TranslationTableChainEntry *translationTableChain = NULL; |
101 | | |
102 | | typedef struct DisplayTableChainEntry { |
103 | | struct DisplayTableChainEntry *next; |
104 | | DisplayTableHeader *table; |
105 | | int tableListLength; |
106 | | char tableList[1]; |
107 | | } DisplayTableChainEntry; |
108 | | |
109 | | static DisplayTableChainEntry *displayTableChain = NULL; |
110 | | |
111 | | /* predefined character classes */ |
112 | | static const char *characterClassNames[] = { |
113 | | "space", |
114 | | "letter", |
115 | | "digit", |
116 | | "punctuation", |
117 | | "uppercase", |
118 | | "lowercase", |
119 | | "math", |
120 | | "sign", |
121 | | "litdigit", |
122 | | NULL, |
123 | | }; |
124 | | |
125 | | // names that may not be used for custom attributes |
126 | | static const char *reservedAttributeNames[] = { |
127 | | "numericnocontchars", |
128 | | "numericnocontchar", |
129 | | "numericnocont", |
130 | | "midendnumericmodechars", |
131 | | "midendnumericmodechar", |
132 | | "midendnumericmode", |
133 | | "numericmodechars", |
134 | | "numericmodechar", |
135 | | "numericmode", |
136 | | "capsmodechars", |
137 | | "capsmodechar", |
138 | | "capsmode", |
139 | | "emphmodechars", |
140 | | "emphmodechar", |
141 | | "emphmode", |
142 | | "noemphchars", |
143 | | "noemphchar", |
144 | | "noemph", |
145 | | "seqdelimiter", |
146 | | "seqbeforechars", |
147 | | "seqbeforechar", |
148 | | "seqbefore", |
149 | | "seqafterchars", |
150 | | "seqafterchar", |
151 | | "seqafter", |
152 | | "noletsign", |
153 | | "noletsignbefore", |
154 | | "noletsignafter", |
155 | | NULL, |
156 | | }; |
157 | | |
158 | | static const char *opcodeNames[CTO_None] = { |
159 | | "include", |
160 | | "locale", |
161 | | "undefined", |
162 | | "capsletter", |
163 | | "begcapsword", |
164 | | "endcapsword", |
165 | | "begcaps", |
166 | | "endcaps", |
167 | | "begcapsphrase", |
168 | | "endcapsphrase", |
169 | | "lencapsphrase", |
170 | | "modeletter", |
171 | | "begmodeword", |
172 | | "endmodeword", |
173 | | "begmode", |
174 | | "endmode", |
175 | | "begmodephrase", |
176 | | "endmodephrase", |
177 | | "lenmodephrase", |
178 | | "letsign", |
179 | | "noletsignbefore", |
180 | | "noletsign", |
181 | | "noletsignafter", |
182 | | "numsign", |
183 | | "nonumsign", |
184 | | "numericmodechars", |
185 | | "midendnumericmodechars", |
186 | | "numericnocontchars", |
187 | | "seqdelimiter", |
188 | | "seqbeforechars", |
189 | | "seqafterchars", |
190 | | "seqafterpattern", |
191 | | "seqafterexpression", |
192 | | "emphclass", |
193 | | "emphletter", |
194 | | "begemphword", |
195 | | "endemphword", |
196 | | "begemph", |
197 | | "endemph", |
198 | | "begemphphrase", |
199 | | "endemphphrase", |
200 | | "lenemphphrase", |
201 | | "capsmodechars", |
202 | | "emphmodechars", |
203 | | "noemphchars", |
204 | | "begcomp", |
205 | | "endcomp", |
206 | | "nocontractsign", |
207 | | "multind", |
208 | | "compdots", |
209 | | "comp6", |
210 | | "class", |
211 | | "after", |
212 | | "before", |
213 | | "noback", |
214 | | "nofor", |
215 | | "empmatchbefore", |
216 | | "empmatchafter", |
217 | | "swapcc", |
218 | | "swapcd", |
219 | | "swapdd", |
220 | | "space", |
221 | | "digit", |
222 | | "punctuation", |
223 | | "math", |
224 | | "sign", |
225 | | "letter", |
226 | | "uppercase", |
227 | | "lowercase", |
228 | | "grouping", |
229 | | "uplow", |
230 | | "litdigit", |
231 | | "display", |
232 | | "replace", |
233 | | "context", |
234 | | "correct", |
235 | | "pass2", |
236 | | "pass3", |
237 | | "pass4", |
238 | | "repeated", |
239 | | "repword", |
240 | | "rependword", |
241 | | "capsnocont", |
242 | | "always", |
243 | | "exactdots", |
244 | | "nocross", |
245 | | "syllable", |
246 | | "nocont", |
247 | | "compbrl", |
248 | | "literal", |
249 | | "largesign", |
250 | | "word", |
251 | | "partword", |
252 | | "joinnum", |
253 | | "joinword", |
254 | | "lowword", |
255 | | "contraction", |
256 | | "sufword", |
257 | | "prfword", |
258 | | "begword", |
259 | | "begmidword", |
260 | | "midword", |
261 | | "midendword", |
262 | | "endword", |
263 | | "prepunc", |
264 | | "postpunc", |
265 | | "begnum", |
266 | | "midnum", |
267 | | "endnum", |
268 | | "decpoint", |
269 | | "hyphen", |
270 | | // "apostrophe", |
271 | | // "initial", |
272 | | "nobreak", |
273 | | "match", |
274 | | "backmatch", |
275 | | "attribute", |
276 | | "base", |
277 | | "macro", |
278 | | }; |
279 | | |
280 | | static short opcodeLengths[CTO_None] = { 0 }; |
281 | | |
282 | | static void |
283 | | compileError(const FileInfo *file, const char *format, ...); |
284 | | |
285 | | static void |
286 | | free_tablefiles(char **tables); |
287 | | |
288 | | static int |
289 | 132k | getAChar(FileInfo *file) { |
290 | | /* Read a big endian, little endian or ASCII 8 file and convert it to |
291 | | * 16- or 32-bit unsigned integers */ |
292 | 132k | int ch1 = 0, ch2 = 0; |
293 | 132k | widechar character; |
294 | 132k | if (file->encoding == ascii8) |
295 | 132k | if (file->status == 2) { |
296 | 268 | file->status++; |
297 | 268 | return file->checkencoding[1]; |
298 | 268 | } |
299 | 132k | while ((ch1 = fgetc(file->in)) != EOF) { |
300 | 132k | if (file->status < 2) file->checkencoding[file->status] = ch1; |
301 | 132k | file->status++; |
302 | 132k | if (file->status == 2) { |
303 | 273 | if (file->checkencoding[0] == 0xfe && file->checkencoding[1] == 0xff) |
304 | 0 | file->encoding = bigEndian; |
305 | 273 | else if (file->checkencoding[0] == 0xff && file->checkencoding[1] == 0xfe) |
306 | 1 | file->encoding = littleEndian; |
307 | 272 | else if (file->checkencoding[0] < 128 && file->checkencoding[1] < 128) { |
308 | 268 | file->encoding = ascii8; |
309 | 268 | return file->checkencoding[0]; |
310 | 268 | } else { |
311 | 4 | compileError(file, |
312 | 4 | "encoding is neither big-endian, little-endian nor ASCII 8."); |
313 | 4 | ch1 = EOF; |
314 | 4 | break; |
315 | 0 | ; |
316 | 0 | } |
317 | 1 | continue; |
318 | 273 | } |
319 | 131k | switch (file->encoding) { |
320 | 274 | case noEncoding: |
321 | 274 | break; |
322 | 131k | case ascii8: |
323 | 131k | return ch1; |
324 | 0 | break; |
325 | 0 | case bigEndian: |
326 | 0 | ch2 = fgetc(file->in); |
327 | 0 | if (ch2 == EOF) break; |
328 | 0 | character = (widechar)(ch1 << 8) | ch2; |
329 | 0 | return (int)character; |
330 | 0 | break; |
331 | 255 | case littleEndian: |
332 | 255 | ch2 = fgetc(file->in); |
333 | 255 | if (ch2 == EOF) break; |
334 | 255 | character = (widechar)(ch2 << 8) | ch1; |
335 | 255 | return (int)character; |
336 | 0 | break; |
337 | 131k | } |
338 | 274 | if (ch1 == EOF || ch2 == EOF) break; |
339 | 274 | } |
340 | 500 | return EOF; |
341 | 132k | } |
342 | | |
343 | | int EXPORT_CALL |
344 | 1.97k | _lou_getALine(FileInfo *file) { |
345 | | /* Read a line of widechar's from an input file */ |
346 | 1.97k | int ch; |
347 | 1.97k | file->linelen = 0; |
348 | 132k | while ((ch = getAChar(file)) != EOF) { |
349 | 132k | if (ch == 13) continue; |
350 | 131k | if (ch == 10 || file->linelen >= MAXSTRING - 1) break; |
351 | 130k | file->line[file->linelen++] = (widechar)ch; |
352 | 130k | } |
353 | 1.97k | file->line[file->linelen] = 0; |
354 | 1.97k | file->linepos = 0; |
355 | 1.97k | if (ch == EOF && !file->linelen) return 0; |
356 | 1.72k | file->lineNumber++; |
357 | 1.72k | return 1; |
358 | 1.97k | } |
359 | | |
360 | | static inline int |
361 | 66.0k | atEndOfLine(const FileInfo *file) { |
362 | 66.0k | return file->linepos >= file->linelen; |
363 | 66.0k | } |
364 | | |
365 | | static inline int |
366 | 64.4k | atTokenDelimiter(const FileInfo *file) { |
367 | 64.4k | return file->line[file->linepos] <= 32; |
368 | 64.4k | } |
369 | | |
370 | | static int |
371 | 4.27k | getToken(FileInfo *file, CharsString *result, const char *description) { |
372 | | /* Find the next string of contiguous non-whitespace characters. If this |
373 | | * is the last token on the line, return 2 instead of 1. */ |
374 | 10.7k | while (!atEndOfLine(file) && atTokenDelimiter(file)) file->linepos++; |
375 | 4.27k | result->length = 0; |
376 | 39.4k | while (!atEndOfLine(file) && !atTokenDelimiter(file)) { |
377 | 35.1k | int maxlen = MAXSTRING; |
378 | 35.1k | if (result->length >= maxlen) { |
379 | 0 | compileError(file, "more than %d characters (bytes)", maxlen); |
380 | 0 | return 0; |
381 | 0 | } else |
382 | 35.1k | result->chars[result->length++] = file->line[file->linepos++]; |
383 | 35.1k | } |
384 | 4.27k | if (!result->length) { |
385 | | /* Not enough tokens */ |
386 | 576 | if (description) compileError(file, "%s not specified.", description); |
387 | 576 | return 0; |
388 | 576 | } |
389 | 3.70k | result->chars[result->length] = 0; |
390 | 15.7k | while (!atEndOfLine(file) && atTokenDelimiter(file)) file->linepos++; |
391 | 3.70k | return 1; |
392 | 4.27k | } |
393 | | |
394 | | static void |
395 | 4.69k | compileError(const FileInfo *file, const char *format, ...) { |
396 | 4.69k | #ifndef __SYMBIAN32__ |
397 | 4.69k | char buffer[MAXSTRING]; |
398 | 4.69k | va_list arguments; |
399 | 4.69k | va_start(arguments, format); |
400 | 4.69k | vsnprintf(buffer, sizeof(buffer), format, arguments); |
401 | 4.69k | va_end(arguments); |
402 | 4.69k | if (file) |
403 | 103 | _lou_logMessage(LOU_LOG_ERROR, "%s:%d: error: %s", file->fileName, |
404 | 103 | file->lineNumber, buffer); |
405 | 4.58k | else |
406 | 4.58k | _lou_logMessage(LOU_LOG_ERROR, "error: %s", buffer); |
407 | 4.69k | errorCount++; |
408 | 4.69k | #endif |
409 | 4.69k | } |
410 | | |
411 | | static void |
412 | 143k | compileWarning(const FileInfo *file, const char *format, ...) { |
413 | 143k | #ifndef __SYMBIAN32__ |
414 | 143k | char buffer[MAXSTRING]; |
415 | 143k | va_list arguments; |
416 | 143k | va_start(arguments, format); |
417 | 143k | vsnprintf(buffer, sizeof(buffer), format, arguments); |
418 | 143k | va_end(arguments); |
419 | 143k | if (file) |
420 | 27.6k | _lou_logMessage(LOU_LOG_WARN, "%s:%d: warning: %s", file->fileName, |
421 | 27.6k | file->lineNumber, buffer); |
422 | 116k | else |
423 | 116k | _lou_logMessage(LOU_LOG_WARN, "warning: %s", buffer); |
424 | 143k | warningCount++; |
425 | 143k | #endif |
426 | 143k | } |
427 | | |
428 | | static int |
429 | | allocateSpaceInTranslationTable(const FileInfo *file, TranslationTableOffset *offset, |
430 | 4.81k | int size, TranslationTableHeader **table) { |
431 | | /* allocate memory for table and expand previously allocated memory if necessary */ |
432 | 4.81k | int spaceNeeded = ((size + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE; |
433 | 4.81k | TranslationTableOffset newTableSize = (*table)->bytesUsed + spaceNeeded; |
434 | 4.81k | TranslationTableOffset tableSize = (*table)->tableSize; |
435 | 4.81k | if (newTableSize > tableSize) { |
436 | 4 | TranslationTableHeader *newTable; |
437 | 4 | newTableSize += (newTableSize / OFFSETSIZE); |
438 | 4 | newTable = realloc(*table, newTableSize); |
439 | 4 | if (!newTable) { |
440 | 0 | compileError(file, "Not enough memory for translation table."); |
441 | 0 | _lou_outOfMemory(); |
442 | 0 | } |
443 | 4 | memset(((unsigned char *)newTable) + tableSize, 0, newTableSize - tableSize); |
444 | | /* update references to the old table */ |
445 | 4 | { |
446 | 4 | TranslationTableChainEntry *entry; |
447 | 4 | for (entry = translationTableChain; entry != NULL; entry = entry->next) |
448 | 0 | if (entry->table == *table) |
449 | 0 | entry->table = (TranslationTableHeader *)newTable; |
450 | 4 | } |
451 | 4 | newTable->tableSize = newTableSize; |
452 | 4 | *table = newTable; |
453 | 4 | } |
454 | 4.81k | if (offset != NULL) { |
455 | 4.81k | *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE; |
456 | 4.81k | (*table)->bytesUsed += spaceNeeded; |
457 | 4.81k | } |
458 | 4.81k | return 1; |
459 | 4.81k | } |
460 | | |
461 | | static int |
462 | | allocateSpaceInDisplayTable(const FileInfo *file, TranslationTableOffset *offset, |
463 | 650 | int size, DisplayTableHeader **table) { |
464 | | /* allocate memory for table and expand previously allocated memory if necessary */ |
465 | 650 | int spaceNeeded = ((size + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE; |
466 | 650 | TranslationTableOffset newTableSize = (*table)->bytesUsed + spaceNeeded; |
467 | 650 | TranslationTableOffset tableSize = (*table)->tableSize; |
468 | 650 | if (newTableSize > tableSize) { |
469 | 0 | DisplayTableHeader *newTable; |
470 | 0 | newTableSize += (newTableSize / OFFSETSIZE); |
471 | 0 | newTable = realloc(*table, newTableSize); |
472 | 0 | if (!newTable) { |
473 | 0 | compileError(file, "Not enough memory for display table."); |
474 | 0 | _lou_outOfMemory(); |
475 | 0 | } |
476 | 0 | memset(((unsigned char *)newTable) + tableSize, 0, newTableSize - tableSize); |
477 | | /* update references to the old table */ |
478 | 0 | { |
479 | 0 | DisplayTableChainEntry *entry; |
480 | 0 | for (entry = displayTableChain; entry != NULL; entry = entry->next) |
481 | 0 | if (entry->table == *table) entry->table = (DisplayTableHeader *)newTable; |
482 | 0 | } |
483 | 0 | newTable->tableSize = newTableSize; |
484 | 0 | *table = newTable; |
485 | 0 | } |
486 | 650 | if (offset != NULL) { |
487 | 650 | *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE; |
488 | 650 | (*table)->bytesUsed += spaceNeeded; |
489 | 650 | } |
490 | 650 | return 1; |
491 | 650 | } |
492 | | |
493 | | static int |
494 | 273 | allocateTranslationTable(const FileInfo *file, TranslationTableHeader **table) { |
495 | | /* Allocate memory for the table and a guess on the number of rules */ |
496 | 273 | const TranslationTableOffset startSize = 2 * sizeof(**table); |
497 | 273 | if (*table) return 1; |
498 | 273 | TranslationTableOffset bytesUsed = |
499 | 273 | sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */ |
500 | 273 | if (!(*table = malloc(startSize))) { |
501 | 0 | compileError(file, "Not enough memory"); |
502 | 0 | if (*table != NULL) free(*table); |
503 | 0 | *table = NULL; |
504 | 0 | _lou_outOfMemory(); |
505 | 0 | } |
506 | 273 | memset(*table, 0, startSize); |
507 | 273 | (*table)->tableSize = startSize; |
508 | 273 | (*table)->bytesUsed = bytesUsed; |
509 | 273 | return 1; |
510 | 273 | } |
511 | | |
512 | | static int |
513 | 273 | allocateDisplayTable(const FileInfo *file, DisplayTableHeader **table) { |
514 | | /* Allocate memory for the table and a guess on the number of rules */ |
515 | 273 | const TranslationTableOffset startSize = 2 * sizeof(**table); |
516 | 273 | if (*table) return 1; |
517 | 273 | TranslationTableOffset bytesUsed = |
518 | 273 | sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */ |
519 | 273 | if (!(*table = malloc(startSize))) { |
520 | 0 | compileError(file, "Not enough memory"); |
521 | 0 | if (*table != NULL) free(*table); |
522 | 0 | *table = NULL; |
523 | 0 | _lou_outOfMemory(); |
524 | 0 | } |
525 | 273 | memset(*table, 0, startSize); |
526 | 273 | (*table)->tableSize = startSize; |
527 | 273 | (*table)->bytesUsed = bytesUsed; |
528 | 273 | return 1; |
529 | 273 | } |
530 | | |
531 | | /* Look up a character or dot pattern. Although the algorithms are almost identical, |
532 | | * different tables are needed for characters and dots because of the possibility of |
533 | | * conflicts. */ |
534 | | |
535 | | static TranslationTableCharacter * |
536 | | getChar(widechar c, TranslationTableHeader *table, |
537 | 18.6k | TranslationTableOffset *characterOffset) { |
538 | 18.6k | const TranslationTableOffset bucket = table->characters[_lou_charHash(c)]; |
539 | 18.6k | TranslationTableOffset offset = bucket; |
540 | 18.8k | while (offset) { |
541 | 16.2k | TranslationTableCharacter *character = |
542 | 16.2k | (TranslationTableCharacter *)&table->ruleArea[offset]; |
543 | 16.2k | if (character->value == c) { |
544 | 15.9k | if (characterOffset) *characterOffset = offset; |
545 | 15.9k | return character; |
546 | 15.9k | } |
547 | 297 | offset = character->next; |
548 | 297 | } |
549 | 2.66k | return NULL; |
550 | 18.6k | } |
551 | | |
552 | | static TranslationTableCharacter * |
553 | 1.29k | getDots(widechar d, TranslationTableHeader *table) { |
554 | 1.29k | const TranslationTableOffset bucket = table->dots[_lou_charHash(d)]; |
555 | 1.29k | TranslationTableOffset offset = bucket; |
556 | 1.29k | while (offset) { |
557 | 449 | TranslationTableCharacter *character = |
558 | 449 | (TranslationTableCharacter *)&table->ruleArea[offset]; |
559 | 449 | if (character->value == d) return character; |
560 | 7 | offset = character->next; |
561 | 7 | } |
562 | 849 | return NULL; |
563 | 1.29k | } |
564 | | |
565 | | static TranslationTableCharacter * |
566 | | putChar(const FileInfo *file, widechar c, TranslationTableHeader **table, |
567 | 16.1k | TranslationTableOffset *characterOffset, int ruleIndex) { |
568 | | /* See if a character is in the appropriate table. If not, insert it. In either case, |
569 | | * return a pointer to it. */ |
570 | 16.1k | TranslationTableCharacter *character; |
571 | 16.1k | TranslationTableOffset offset; |
572 | 16.1k | if ((character = getChar(c, *table, characterOffset))) return character; |
573 | 2.66k | if (!allocateSpaceInTranslationTable(file, &offset, sizeof(*character), table)) |
574 | 0 | return NULL; |
575 | 2.66k | character = (TranslationTableCharacter *)&(*table)->ruleArea[offset]; |
576 | 2.66k | memset(character, 0, sizeof(*character)); |
577 | 2.66k | character->sourceFile = file->sourceFile; |
578 | 2.66k | character->sourceLine = file->lineNumber; |
579 | 2.66k | character->ruleIndex = ruleIndex; |
580 | 2.66k | character->value = c; |
581 | 2.66k | const unsigned long int charHash = _lou_charHash(c); |
582 | 2.66k | const TranslationTableOffset bucket = (*table)->characters[charHash]; |
583 | 2.66k | if (!bucket) |
584 | 2.64k | (*table)->characters[charHash] = offset; |
585 | 17 | else { |
586 | 17 | TranslationTableCharacter *oldchar = |
587 | 17 | (TranslationTableCharacter *)&(*table)->ruleArea[bucket]; |
588 | 17 | while (oldchar->next) |
589 | 0 | oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next]; |
590 | 17 | oldchar->next = offset; |
591 | 17 | } |
592 | 2.66k | if (characterOffset) *characterOffset = offset; |
593 | 2.66k | return character; |
594 | 2.66k | } |
595 | | |
596 | | static TranslationTableCharacter * |
597 | 952 | putDots(const FileInfo *file, widechar d, TranslationTableHeader **table, int ruleIndex) { |
598 | | /* See if a dot pattern is in the appropriate table. If not, insert it. In either |
599 | | * case, return a pointer to it. */ |
600 | 952 | TranslationTableCharacter *character; |
601 | 952 | TranslationTableOffset offset; |
602 | 952 | if ((character = getDots(d, *table))) return character; |
603 | 537 | if (!allocateSpaceInTranslationTable(file, &offset, sizeof(*character), table)) |
604 | 0 | return NULL; |
605 | 537 | character = (TranslationTableCharacter *)&(*table)->ruleArea[offset]; |
606 | 537 | memset(character, 0, sizeof(*character)); |
607 | 537 | character->sourceFile = file->sourceFile; |
608 | 537 | character->sourceLine = file->lineNumber; |
609 | 537 | character->ruleIndex = ruleIndex; |
610 | 537 | character->value = d; |
611 | 537 | const unsigned long int charHash = _lou_charHash(d); |
612 | 537 | const TranslationTableOffset bucket = (*table)->dots[charHash]; |
613 | 537 | if (!bucket) |
614 | 534 | (*table)->dots[charHash] = offset; |
615 | 3 | else { |
616 | 3 | TranslationTableCharacter *oldchar = |
617 | 3 | (TranslationTableCharacter *)&(*table)->ruleArea[bucket]; |
618 | 3 | while (oldchar->next) |
619 | 0 | oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next]; |
620 | 3 | oldchar->next = offset; |
621 | 3 | } |
622 | 537 | return character; |
623 | 537 | } |
624 | | |
625 | | /* Look up a character-dots mapping in a display table. */ |
626 | | |
627 | | static CharDotsMapping * |
628 | 77.3k | getDotsForChar(widechar c, const DisplayTableHeader *table) { |
629 | 77.3k | if (table == NULL) return NULL; |
630 | 77.3k | CharDotsMapping *cdPtr; |
631 | 77.3k | const TranslationTableOffset bucket = table->charToDots[_lou_charHash(c)]; |
632 | 77.3k | TranslationTableOffset offset = bucket; |
633 | 77.3k | while (offset) { |
634 | 30.2k | cdPtr = (CharDotsMapping *)&table->ruleArea[offset]; |
635 | 30.2k | if (cdPtr->lookFor == c) return cdPtr; |
636 | 0 | offset = cdPtr->next; |
637 | 0 | } |
638 | 47.1k | return NULL; |
639 | 77.3k | } |
640 | | |
641 | | static CharDotsMapping * |
642 | 4.65k | getCharForDots(widechar d, const DisplayTableHeader *table) { |
643 | 4.65k | if (table == NULL) return NULL; |
644 | 4.65k | CharDotsMapping *cdPtr; |
645 | 4.65k | const TranslationTableOffset bucket = table->dotsToChar[_lou_charHash(d)]; |
646 | 4.65k | TranslationTableOffset offset = bucket; |
647 | 4.65k | while (offset) { |
648 | 4.19k | cdPtr = (CharDotsMapping *)&table->ruleArea[offset]; |
649 | 4.19k | if (cdPtr->lookFor == d) return cdPtr; |
650 | 0 | offset = cdPtr->next; |
651 | 0 | } |
652 | 461 | return NULL; |
653 | 4.65k | } |
654 | | |
655 | | widechar EXPORT_CALL |
656 | 76.9k | _lou_getDotsForChar(widechar c, const DisplayTableHeader *table) { |
657 | 76.9k | CharDotsMapping *cdPtr = getDotsForChar(c, table); |
658 | 76.9k | if (cdPtr) return cdPtr->found; |
659 | 46.7k | return LOU_DOTS; |
660 | 76.9k | } |
661 | | |
662 | | widechar EXPORT_CALL |
663 | 4.30k | _lou_getCharForDots(widechar d, const DisplayTableHeader *table) { |
664 | 4.30k | CharDotsMapping *cdPtr = getCharForDots(d, table); |
665 | 4.30k | if (cdPtr) return cdPtr->found; |
666 | 134 | return '\0'; |
667 | 4.30k | } |
668 | | |
669 | | static int |
670 | | putCharDotsMapping( |
671 | 348 | const FileInfo *file, widechar c, widechar d, DisplayTableHeader **table) { |
672 | 348 | if (!getDotsForChar(c, *table)) { |
673 | 323 | CharDotsMapping *cdPtr; |
674 | 323 | TranslationTableOffset offset; |
675 | 323 | if (!allocateSpaceInDisplayTable(file, &offset, sizeof(*cdPtr), table)) return 0; |
676 | 323 | cdPtr = (CharDotsMapping *)&(*table)->ruleArea[offset]; |
677 | 323 | cdPtr->next = 0; |
678 | 323 | cdPtr->lookFor = c; |
679 | 323 | cdPtr->found = d; |
680 | 323 | const unsigned long int charHash = _lou_charHash(c); |
681 | 323 | const TranslationTableOffset bucket = (*table)->charToDots[charHash]; |
682 | 323 | if (!bucket) |
683 | 323 | (*table)->charToDots[charHash] = offset; |
684 | 0 | else { |
685 | 0 | CharDotsMapping *oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[bucket]; |
686 | 0 | while (oldcdPtr->next) |
687 | 0 | oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[oldcdPtr->next]; |
688 | 0 | oldcdPtr->next = offset; |
689 | 0 | } |
690 | 323 | } |
691 | 348 | if (!getCharForDots(d, *table)) { |
692 | 327 | CharDotsMapping *cdPtr; |
693 | 327 | TranslationTableOffset offset; |
694 | 327 | if (!allocateSpaceInDisplayTable(file, &offset, sizeof(*cdPtr), table)) return 0; |
695 | 327 | cdPtr = (CharDotsMapping *)&(*table)->ruleArea[offset]; |
696 | 327 | cdPtr->next = 0; |
697 | 327 | cdPtr->lookFor = d; |
698 | 327 | cdPtr->found = c; |
699 | 327 | const unsigned long int charHash = _lou_charHash(d); |
700 | 327 | const TranslationTableOffset bucket = (*table)->dotsToChar[charHash]; |
701 | 327 | if (!bucket) |
702 | 327 | (*table)->dotsToChar[charHash] = offset; |
703 | 0 | else { |
704 | 0 | CharDotsMapping *oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[bucket]; |
705 | 0 | while (oldcdPtr->next) |
706 | 0 | oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[oldcdPtr->next]; |
707 | 0 | oldcdPtr->next = offset; |
708 | 0 | } |
709 | 327 | } |
710 | 348 | return 1; |
711 | 348 | } |
712 | | |
713 | | static inline const char * |
714 | 0 | getPartName(int actionPart) { |
715 | 0 | return actionPart ? "action" : "test"; |
716 | 0 | } |
717 | | |
718 | | static int |
719 | | passFindCharacters(const FileInfo *file, widechar *instructions, int end, |
720 | 278 | widechar **characters, int *length) { |
721 | 278 | int IC = 0; |
722 | 278 | int lookback = 0; |
723 | | |
724 | 278 | *characters = NULL; |
725 | 278 | *length = 0; |
726 | | |
727 | 534 | while (IC < end) { |
728 | 534 | widechar instruction = instructions[IC]; |
729 | | |
730 | 534 | switch (instruction) { |
731 | 8 | case pass_string: |
732 | 19 | case pass_dots: { |
733 | 19 | int count = instructions[IC + 1]; |
734 | 19 | IC += 2; |
735 | 19 | if (count > lookback) { |
736 | 19 | *characters = &instructions[IC + lookback]; |
737 | 19 | *length = count - lookback; |
738 | 19 | return 1; |
739 | 19 | } else { |
740 | 0 | lookback -= count; |
741 | 0 | } |
742 | 0 | IC += count; |
743 | 0 | continue; |
744 | 19 | } |
745 | | |
746 | 60 | case pass_attributes: |
747 | 60 | IC += 7; |
748 | 60 | if (instructions[IC - 2] == instructions[IC - 1] && |
749 | 60 | instructions[IC - 1] <= lookback) { |
750 | 0 | lookback -= instructions[IC - 1]; |
751 | 0 | continue; |
752 | 0 | } |
753 | 60 | goto NO_CHARACTERS; |
754 | | |
755 | 60 | case pass_swap: |
756 | 4 | IC += 2; |
757 | | /* fall through */ |
758 | | |
759 | 6 | case pass_groupstart: |
760 | 14 | case pass_groupend: |
761 | 14 | case pass_groupreplace: |
762 | 14 | IC += 3; |
763 | | |
764 | 258 | NO_CHARACTERS : { return 1; } |
765 | | |
766 | 43 | case pass_eq: |
767 | 45 | case pass_lt: |
768 | 46 | case pass_gt: |
769 | 47 | case pass_lteq: |
770 | 49 | case pass_gteq: |
771 | 49 | IC += 3; |
772 | 49 | continue; |
773 | | |
774 | 143 | case pass_lookback: |
775 | 143 | lookback += instructions[IC + 1]; |
776 | 143 | IC += 2; |
777 | 143 | continue; |
778 | | |
779 | 50 | case pass_not: |
780 | 56 | case pass_startReplace: |
781 | 60 | case pass_endReplace: |
782 | 61 | case pass_first: |
783 | 64 | case pass_last: |
784 | 64 | case pass_copy: |
785 | 64 | case pass_omit: |
786 | 64 | case pass_plus: |
787 | 64 | case pass_hyphen: |
788 | 64 | IC += 1; |
789 | 64 | continue; |
790 | | |
791 | 184 | case pass_endTest: |
792 | 184 | goto NO_CHARACTERS; |
793 | | |
794 | 1 | default: |
795 | 1 | compileError(file, "unhandled test suboperand: \\x%02x", instruction); |
796 | 1 | return 0; |
797 | 534 | } |
798 | 534 | } |
799 | 0 | goto NO_CHARACTERS; |
800 | 278 | } |
801 | | |
802 | | static const char * |
803 | 26 | printSource(const char *currentFile, const char *sourceFile, int sourceLine) { |
804 | 26 | static char scratchBuf[MAXSTRING]; |
805 | 26 | if (sourceFile) { |
806 | 26 | if (currentFile && strcmp(currentFile, sourceFile) == 0) |
807 | 26 | snprintf(scratchBuf, MAXSTRING, "line %d", sourceLine); |
808 | 0 | else |
809 | 0 | snprintf(scratchBuf, MAXSTRING, "%s:%d", sourceFile, sourceLine); |
810 | 26 | } else |
811 | 0 | snprintf(scratchBuf, MAXSTRING, "source unknown"); |
812 | 26 | return scratchBuf; |
813 | 26 | } |
814 | | |
815 | | /* The following functions are called by addRule to handle various cases. */ |
816 | | |
817 | | static void |
818 | | addForwardRuleWithSingleChar(const FileInfo *file, TranslationTableOffset ruleOffset, |
819 | 481 | TranslationTableRule *rule, TranslationTableHeader **table) { |
820 | | /* direction = 0, rule->charslen = 1 */ |
821 | 481 | TranslationTableCharacter *character; |
822 | | // get the character from the table, or if the character is not defined yet, define it |
823 | | // (without adding attributes) |
824 | 481 | if (rule->opcode >= CTO_Pass2 && rule->opcode <= CTO_Pass4) { |
825 | 0 | character = putDots(file, rule->charsdots[0], table, rule->index); |
826 | | // putDots may have moved table, so make sure rule is still valid |
827 | 0 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
828 | 481 | } else if (rule->opcode == CTO_CompDots || rule->opcode == CTO_Comp6) { |
829 | 1 | character = putChar(file, rule->charsdots[0], table, NULL, rule->index); |
830 | | // putChar may have moved table, so make sure rule is still valid |
831 | 1 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
832 | 1 | character->compRule = ruleOffset; |
833 | 1 | return; |
834 | 480 | } else { |
835 | 480 | character = putChar(file, rule->charsdots[0], table, NULL, rule->index); |
836 | | // putChar may have moved table, so make sure rule is still valid |
837 | 480 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
838 | | // if the new rule is a character definition rule, set the main definition rule of |
839 | | // this character to it, but don't override existing character definitions rules |
840 | | // or base rules |
841 | | // adding the attributes to the character has already been done elsewhere |
842 | 480 | if (rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow) { |
843 | 348 | if (character->definitionRule) { |
844 | 25 | TranslationTableRule *prevRule = |
845 | 25 | (TranslationTableRule *)&(*table) |
846 | 25 | ->ruleArea[character->definitionRule]; |
847 | 25 | char *prevOpcodeName = strdup(_lou_findOpcodeName(prevRule->opcode)); |
848 | 25 | char *newOpcodeName = strdup(_lou_findOpcodeName(rule->opcode)); |
849 | 25 | _lou_logMessage(LOU_LOG_DEBUG, |
850 | 25 | "%s:%d: Character already defined (%s). The existing %s rule " |
851 | 25 | "will take precedence over the new %s rule.", |
852 | 25 | file->fileName, file->lineNumber, |
853 | 25 | printSource(file->sourceFile, prevRule->sourceFile, |
854 | 25 | prevRule->sourceLine), |
855 | 25 | prevOpcodeName, newOpcodeName); |
856 | 25 | free(prevOpcodeName); |
857 | 25 | free(newOpcodeName); |
858 | 323 | } else { |
859 | 323 | character->definitionRule = ruleOffset; |
860 | 323 | } |
861 | 348 | } |
862 | 480 | } |
863 | | // add the new rule to the list of rules associated with this character |
864 | | // if the new rule is a character definition rule, it is inserted at the end of the |
865 | | // list, otherwise it is inserted before the first character definition rule |
866 | | // in other words, rules are considered in the order in which they are defined in the |
867 | | // table |
868 | 480 | TranslationTableOffset *otherRule = &character->otherRules; |
869 | 917 | while (*otherRule) { |
870 | 448 | TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[*otherRule]; |
871 | 448 | if (r->charslen == 0) break; |
872 | 448 | if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow) |
873 | 144 | if (!(rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow)) break; |
874 | 437 | otherRule = &r->charsnext; |
875 | 437 | } |
876 | 480 | rule->charsnext = *otherRule; |
877 | 480 | *otherRule = ruleOffset; |
878 | 480 | } |
879 | | |
880 | | static void |
881 | | addForwardRuleWithMultipleChars(TranslationTableOffset ruleOffset, |
882 | 184 | TranslationTableRule *rule, TranslationTableHeader *table) { |
883 | | /* direction = 0 rule->charslen > 1 */ |
884 | 184 | TranslationTableOffset *forRule = |
885 | 184 | &table->forRules[_lou_stringHash(&rule->charsdots[0], 0, NULL)]; |
886 | 276 | while (*forRule) { |
887 | 98 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*forRule]; |
888 | 98 | if (rule->charslen > r->charslen) break; |
889 | 92 | if (rule->charslen == r->charslen) |
890 | 87 | if ((r->opcode == CTO_Always) && (rule->opcode != CTO_Always)) break; |
891 | 92 | forRule = &r->charsnext; |
892 | 92 | } |
893 | 184 | rule->charsnext = *forRule; |
894 | 184 | *forRule = ruleOffset; |
895 | 184 | } |
896 | | |
897 | | static void |
898 | | addBackwardRuleWithSingleCell(const FileInfo *file, widechar cell, |
899 | | TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
900 | 623 | TranslationTableHeader **table) { |
901 | | /* direction = 1, rule->dotslen = 1 */ |
902 | 623 | TranslationTableCharacter *dots; |
903 | 623 | if (rule->opcode == CTO_SwapCc || rule->opcode == CTO_Repeated) |
904 | 3 | return; /* too ambiguous */ |
905 | | // get the cell from the table, or if the cell is not defined yet, define it (without |
906 | | // adding attributes) |
907 | 620 | dots = putDots(file, cell, table, rule->index); |
908 | | // putDots may have moved table, so make sure rule is still valid |
909 | 620 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
910 | 620 | if (rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow) |
911 | 337 | dots->definitionRule = ruleOffset; |
912 | 620 | TranslationTableOffset *otherRule = &dots->otherRules; |
913 | 927 | while (*otherRule) { |
914 | 318 | TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[*otherRule]; |
915 | 318 | if (rule->charslen > r->charslen || r->dotslen == 0) break; |
916 | 308 | if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow) |
917 | 111 | if (!(rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow)) break; |
918 | 307 | otherRule = &r->dotsnext; |
919 | 307 | } |
920 | 620 | rule->dotsnext = *otherRule; |
921 | 620 | *otherRule = ruleOffset; |
922 | 620 | } |
923 | | |
924 | | static void |
925 | | addBackwardRuleWithMultipleCells(widechar *cells, int dotslen, |
926 | | TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
927 | 153 | TranslationTableHeader *table) { |
928 | | /* direction = 1, dotslen > 1 */ |
929 | 153 | TranslationTableOffset *backRule = &table->backRules[_lou_stringHash(cells, 0, NULL)]; |
930 | 153 | if (rule->opcode == CTO_SwapCc) return; |
931 | 153 | int ruleLength = dotslen + rule->charslen; |
932 | 411 | while (*backRule) { |
933 | 258 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*backRule]; |
934 | 258 | int rLength = r->dotslen + r->charslen; |
935 | 258 | if (ruleLength > rLength) break; |
936 | 258 | if (rLength == ruleLength) |
937 | 255 | if ((r->opcode == CTO_Always) && (rule->opcode != CTO_Always)) break; |
938 | 258 | backRule = &r->dotsnext; |
939 | 258 | } |
940 | 153 | rule->dotsnext = *backRule; |
941 | 153 | *backRule = ruleOffset; |
942 | 153 | } |
943 | | |
944 | | static int |
945 | | addForwardPassRule(TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
946 | 170 | TranslationTableHeader *table) { |
947 | 170 | TranslationTableOffset *forPassRule; |
948 | 170 | switch (rule->opcode) { |
949 | 111 | case CTO_Correct: |
950 | 111 | forPassRule = &table->forPassRules[0]; |
951 | 111 | break; |
952 | 36 | case CTO_Context: |
953 | 36 | forPassRule = &table->forPassRules[1]; |
954 | 36 | break; |
955 | 6 | case CTO_Pass2: |
956 | 6 | forPassRule = &table->forPassRules[2]; |
957 | 6 | break; |
958 | 7 | case CTO_Pass3: |
959 | 7 | forPassRule = &table->forPassRules[3]; |
960 | 7 | break; |
961 | 10 | case CTO_Pass4: |
962 | 10 | forPassRule = &table->forPassRules[4]; |
963 | 10 | break; |
964 | 0 | default: |
965 | 0 | return 0; |
966 | 170 | } |
967 | 182 | while (*forPassRule) { |
968 | 13 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*forPassRule]; |
969 | 13 | if (rule->charslen > r->charslen) break; |
970 | 12 | forPassRule = &r->charsnext; |
971 | 12 | } |
972 | 170 | rule->charsnext = *forPassRule; |
973 | 170 | *forPassRule = ruleOffset; |
974 | 170 | return 1; |
975 | 170 | } |
976 | | |
977 | | static int |
978 | | addBackwardPassRule(TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
979 | 100 | TranslationTableHeader *table) { |
980 | 100 | TranslationTableOffset *backPassRule; |
981 | 100 | switch (rule->opcode) { |
982 | 19 | case CTO_Correct: |
983 | 19 | backPassRule = &table->backPassRules[0]; |
984 | 19 | break; |
985 | 1 | case CTO_Context: |
986 | 1 | backPassRule = &table->backPassRules[1]; |
987 | 1 | break; |
988 | 21 | case CTO_Pass2: |
989 | 21 | backPassRule = &table->backPassRules[2]; |
990 | 21 | break; |
991 | 31 | case CTO_Pass3: |
992 | 31 | backPassRule = &table->backPassRules[3]; |
993 | 31 | break; |
994 | 28 | case CTO_Pass4: |
995 | 28 | backPassRule = &table->backPassRules[4]; |
996 | 28 | break; |
997 | 0 | default: |
998 | 0 | return 0; |
999 | 100 | } |
1000 | 288 | while (*backPassRule) { |
1001 | 190 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*backPassRule]; |
1002 | 190 | if (rule->charslen > r->charslen) break; |
1003 | 188 | backPassRule = &r->dotsnext; |
1004 | 188 | } |
1005 | 100 | rule->dotsnext = *backPassRule; |
1006 | 100 | *backPassRule = ruleOffset; |
1007 | 100 | return 1; |
1008 | 100 | } |
1009 | | |
1010 | | static int |
1011 | | addRule(const FileInfo *file, TranslationTableOpcode opcode, CharsString *ruleChars, |
1012 | | CharsString *ruleDots, TranslationTableCharacterAttributes after, |
1013 | | TranslationTableCharacterAttributes before, TranslationTableOffset *ruleOffset, |
1014 | | TranslationTableRule **rule, int noback, int nofor, |
1015 | 1.12k | TranslationTableHeader **table) { |
1016 | | /* Add a rule to the table, using the hash function to find the start of |
1017 | | * chains and chaining both the chars and dots strings */ |
1018 | 1.12k | TranslationTableOffset offset; |
1019 | 1.12k | int ruleSize = sizeof(TranslationTableRule) - (DEFAULTRULESIZE * CHARSIZE); |
1020 | 1.12k | if (ruleChars) ruleSize += CHARSIZE * ruleChars->length; |
1021 | 1.12k | if (ruleDots) ruleSize += CHARSIZE * ruleDots->length; |
1022 | 1.12k | if (!allocateSpaceInTranslationTable(file, &offset, ruleSize, table)) return 0; |
1023 | 1.12k | TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
1024 | 1.12k | if (rule) *rule = r; |
1025 | 1.12k | if (ruleOffset) *ruleOffset = offset; |
1026 | 1.12k | r->sourceFile = file->sourceFile; |
1027 | 1.12k | r->sourceLine = file->lineNumber; |
1028 | 1.12k | r->index = (*table)->ruleCounter++; |
1029 | 1.12k | r->opcode = opcode; |
1030 | 1.12k | r->after = after; |
1031 | 1.12k | r->before = before; |
1032 | 1.12k | r->nocross = 0; |
1033 | 1.12k | if (ruleChars) |
1034 | 962 | memcpy(&r->charsdots[0], &ruleChars->chars[0], |
1035 | 962 | CHARSIZE * (r->charslen = ruleChars->length)); |
1036 | 158 | else |
1037 | 158 | r->charslen = 0; |
1038 | 1.12k | if (ruleDots) |
1039 | 1.10k | memcpy(&r->charsdots[r->charslen], &ruleDots->chars[0], |
1040 | 1.10k | CHARSIZE * (r->dotslen = ruleDots->length)); |
1041 | 19 | else |
1042 | 19 | r->dotslen = 0; |
1043 | | |
1044 | | /* link new rule into table. */ |
1045 | 1.12k | if (opcode == CTO_SwapCc || opcode == CTO_SwapCd || opcode == CTO_SwapDd) return 1; |
1046 | 1.10k | if (opcode >= CTO_Context && opcode <= CTO_Pass4) |
1047 | 277 | if (!(opcode == CTO_Context && r->charslen > 0)) { |
1048 | 270 | if (!nofor) |
1049 | 170 | if (!addForwardPassRule(offset, r, *table)) return 0; |
1050 | 270 | if (!noback) |
1051 | 100 | if (!addBackwardPassRule(offset, r, *table)) return 0; |
1052 | 270 | return 1; |
1053 | 270 | } |
1054 | 837 | if (!nofor) { |
1055 | 826 | if (r->charslen == 1) { |
1056 | 481 | addForwardRuleWithSingleChar(file, offset, r, table); |
1057 | | // addForwardRuleWithSingleChar may have moved table, so make sure rule is |
1058 | | // still valid |
1059 | 481 | r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
1060 | 481 | if (rule) *rule = r; |
1061 | 481 | } else if (r->charslen > 1) |
1062 | 184 | addForwardRuleWithMultipleChars(offset, r, *table); |
1063 | 826 | } |
1064 | 837 | if (!noback) { |
1065 | 806 | widechar *cells; |
1066 | 806 | int dotslen; |
1067 | | |
1068 | 806 | if (r->opcode == CTO_Context) { |
1069 | 3 | cells = &r->charsdots[0]; |
1070 | 3 | dotslen = r->charslen; |
1071 | 803 | } else { |
1072 | 803 | cells = &r->charsdots[r->charslen]; |
1073 | 803 | dotslen = r->dotslen; |
1074 | 803 | } |
1075 | 806 | if (dotslen == 1) { |
1076 | 623 | addBackwardRuleWithSingleCell(file, *cells, offset, r, table); |
1077 | | // addBackwardRuleWithSingleCell may have moved table, so make sure rule is |
1078 | | // still valid |
1079 | 623 | r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
1080 | 623 | if (rule) *rule = r; |
1081 | 623 | } else if (dotslen > 1) |
1082 | 153 | addBackwardRuleWithMultipleCells(cells, dotslen, offset, r, *table); |
1083 | 806 | } |
1084 | 837 | return 1; |
1085 | 1.10k | } |
1086 | | |
1087 | | static const CharacterClass * |
1088 | 127 | findCharacterClass(const CharsString *name, const TranslationTableHeader *table) { |
1089 | | /* Find a character class, whether predefined or user-defined */ |
1090 | 127 | const CharacterClass *class = table->characterClasses; |
1091 | 904 | while (class) { |
1092 | 849 | if ((name->length == class->length) && |
1093 | 849 | (memcmp(&name->chars[0], class->name, CHARSIZE * name->length) == 0)) |
1094 | 72 | return class; |
1095 | 777 | class = class->next; |
1096 | 777 | } |
1097 | 55 | return NULL; |
1098 | 127 | } |
1099 | | |
1100 | | static TranslationTableCharacterAttributes |
1101 | 3 | getNextNumberedAttribute(TranslationTableHeader *table) { |
1102 | | /* Get the next attribute value for numbered attributes, or 0 if there is no more |
1103 | | * space in the table. */ |
1104 | 3 | TranslationTableCharacterAttributes next = table->nextNumberedCharacterClassAttribute; |
1105 | 3 | if (next > CTC_UserDefined8) return 0; |
1106 | 3 | table->nextNumberedCharacterClassAttribute <<= 1; |
1107 | 3 | return next; |
1108 | 3 | } |
1109 | | |
1110 | | static TranslationTableCharacterAttributes |
1111 | 896 | getNextAttribute(TranslationTableHeader *table) { |
1112 | | /* Get the next attribute value, or 0 if there is no more space in the table. */ |
1113 | 896 | TranslationTableCharacterAttributes next = table->nextCharacterClassAttribute; |
1114 | 896 | if (next) { |
1115 | 896 | if (next == CTC_LitDigit) |
1116 | 94 | table->nextCharacterClassAttribute = CTC_UserDefined9; |
1117 | 802 | else |
1118 | 802 | table->nextCharacterClassAttribute <<= 1; |
1119 | 896 | return next; |
1120 | 896 | } else |
1121 | 0 | return getNextNumberedAttribute(table); |
1122 | 896 | } |
1123 | | |
1124 | | static CharacterClass * |
1125 | | addCharacterClass(const FileInfo *file, const widechar *name, int length, |
1126 | 896 | TranslationTableHeader *table, int validate) { |
1127 | | /* Define a character class, Whether predefined or user-defined */ |
1128 | 896 | if (validate) { |
1129 | 507 | for (int i = 0; i < length; i++) { |
1130 | 457 | if (!((name[i] >= 'a' && name[i] <= 'z') || |
1131 | 457 | (name[i] >= 'A' && name[i] <= 'Z'))) { |
1132 | 17 | compileError(file, |
1133 | 17 | "Invalid attribute name: must be a digit between " |
1134 | 17 | "0 and 7 or a word containing only letters"); |
1135 | 17 | } |
1136 | 457 | } |
1137 | | // check that name is not reserved |
1138 | 50 | int k = 0; |
1139 | 1.45k | while (reservedAttributeNames[k]) { |
1140 | 1.40k | if (strlen(reservedAttributeNames[k]) == length) { |
1141 | 68 | int i; |
1142 | 140 | for (i = 0; i < length; i++) |
1143 | 140 | if (reservedAttributeNames[k][i] != name[i]) break; |
1144 | 68 | if (i == length) { |
1145 | 0 | compileError(file, "Attribute name is reserved: %s", |
1146 | 0 | reservedAttributeNames[k]); |
1147 | 0 | return NULL; |
1148 | 0 | } |
1149 | 68 | } |
1150 | 1.40k | k++; |
1151 | 1.40k | } |
1152 | 50 | } |
1153 | 896 | CharacterClass **classes = &table->characterClasses; |
1154 | 896 | TranslationTableCharacterAttributes attribute = getNextAttribute(table); |
1155 | 896 | CharacterClass *class; |
1156 | 896 | if (attribute) { |
1157 | 896 | if (!(class = malloc(sizeof(*class) + CHARSIZE * (length - 1)))) |
1158 | 0 | _lou_outOfMemory(); |
1159 | 896 | else { |
1160 | 896 | memset(class, 0, sizeof(*class)); |
1161 | 896 | memcpy(class->name, name, CHARSIZE * (class->length = length)); |
1162 | 896 | class->attribute = attribute; |
1163 | 896 | class->next = *classes; |
1164 | 896 | *classes = class; |
1165 | 896 | return class; |
1166 | 896 | } |
1167 | 896 | } |
1168 | 0 | compileError(file, "character class table overflow."); |
1169 | 0 | return NULL; |
1170 | 896 | } |
1171 | | |
1172 | | static void |
1173 | 94 | deallocateCharacterClasses(TranslationTableHeader *table) { |
1174 | 94 | CharacterClass **classes = &table->characterClasses; |
1175 | 990 | while (*classes) { |
1176 | 896 | CharacterClass *class = *classes; |
1177 | 896 | *classes = (*classes)->next; |
1178 | 896 | if (class) free(class); |
1179 | 896 | } |
1180 | 94 | } |
1181 | | |
1182 | | static int |
1183 | 94 | allocateCharacterClasses(TranslationTableHeader *table) { |
1184 | | /* Allocate memory for predefined character classes */ |
1185 | 94 | int k = 0; |
1186 | 94 | table->characterClasses = NULL; |
1187 | 94 | table->nextCharacterClassAttribute = 1; // CTC_Space |
1188 | 94 | table->nextNumberedCharacterClassAttribute = CTC_UserDefined1; |
1189 | 940 | while (characterClassNames[k]) { |
1190 | 846 | widechar wname[MAXSTRING]; |
1191 | 846 | int length = (int)strlen(characterClassNames[k]); |
1192 | 846 | int kk; |
1193 | 6.58k | for (kk = 0; kk < length; kk++) wname[kk] = (widechar)characterClassNames[k][kk]; |
1194 | 846 | if (!addCharacterClass(NULL, wname, length, table, 0)) { |
1195 | 0 | deallocateCharacterClasses(table); |
1196 | 0 | return 0; |
1197 | 0 | } |
1198 | 846 | k++; |
1199 | 846 | } |
1200 | 94 | return 1; |
1201 | 94 | } |
1202 | | |
1203 | | static TranslationTableOpcode |
1204 | 1.73k | getOpcode(const FileInfo *file, const CharsString *token) { |
1205 | 1.73k | static TranslationTableOpcode lastOpcode = 0; |
1206 | 1.73k | TranslationTableOpcode opcode = lastOpcode; |
1207 | | |
1208 | 93.1k | do { |
1209 | 93.1k | if (token->length == opcodeLengths[opcode]) |
1210 | 10.5k | if (eqasc2uni((unsigned char *)opcodeNames[opcode], &token->chars[0], |
1211 | 10.5k | token->length)) { |
1212 | 1.71k | lastOpcode = opcode; |
1213 | 1.71k | return opcode; |
1214 | 1.71k | } |
1215 | 91.3k | opcode++; |
1216 | 91.3k | if (opcode >= CTO_None) opcode = 0; |
1217 | 91.3k | } while (opcode != lastOpcode); |
1218 | 13 | return CTO_None; |
1219 | 1.73k | } |
1220 | | |
1221 | | TranslationTableOpcode EXPORT_CALL |
1222 | 0 | _lou_findOpcodeNumber(const char *toFind) { |
1223 | | /* Used by tools such as lou_debug */ |
1224 | 0 | static TranslationTableOpcode lastOpcode = 0; |
1225 | 0 | TranslationTableOpcode opcode = lastOpcode; |
1226 | 0 | int length = (int)strlen(toFind); |
1227 | 0 | do { |
1228 | 0 | if (length == opcodeLengths[opcode] && |
1229 | 0 | strcasecmp(toFind, opcodeNames[opcode]) == 0) { |
1230 | 0 | lastOpcode = opcode; |
1231 | 0 | return opcode; |
1232 | 0 | } |
1233 | 0 | opcode++; |
1234 | 0 | if (opcode >= CTO_None) opcode = 0; |
1235 | 0 | } while (opcode != lastOpcode); |
1236 | 0 | return CTO_None; |
1237 | 0 | } |
1238 | | |
1239 | | const char *EXPORT_CALL |
1240 | 50 | _lou_findOpcodeName(TranslationTableOpcode opcode) { |
1241 | 50 | static char scratchBuf[MAXSTRING]; |
1242 | | /* Used by tools such as lou_debug */ |
1243 | 50 | if (opcode < 0 || opcode >= CTO_None) { |
1244 | 0 | sprintf(scratchBuf, "%u", opcode); |
1245 | 0 | return scratchBuf; |
1246 | 0 | } |
1247 | 50 | return opcodeNames[opcode]; |
1248 | 50 | } |
1249 | | |
1250 | | static widechar |
1251 | 5.13k | hexValue(const FileInfo *file, const widechar *digits, int length) { |
1252 | 5.13k | int k; |
1253 | 5.13k | unsigned int binaryValue = 0; |
1254 | 18.9k | for (k = 0; k < length; k++) { |
1255 | 18.1k | unsigned int hexDigit = 0; |
1256 | 18.1k | if (digits[k] >= '0' && digits[k] <= '9') |
1257 | 3.17k | hexDigit = digits[k] - '0'; |
1258 | 14.9k | else if (digits[k] >= 'a' && digits[k] <= 'f') |
1259 | 4.16k | hexDigit = digits[k] - 'a' + 10; |
1260 | 10.7k | else if (digits[k] >= 'A' && digits[k] <= 'F') |
1261 | 6.49k | hexDigit = digits[k] - 'A' + 10; |
1262 | 4.28k | else { |
1263 | 4.28k | compileError(file, "invalid %d-digit hexadecimal number", length); |
1264 | 4.28k | return (widechar)0xffffffff; |
1265 | 4.28k | } |
1266 | 13.8k | binaryValue |= hexDigit << (4 * (length - 1 - k)); |
1267 | 13.8k | } |
1268 | 855 | return (widechar)binaryValue; |
1269 | 5.13k | } |
1270 | | |
1271 | 33.1k | #define MAXBYTES 7 |
1272 | | static const unsigned int first0Bit[MAXBYTES] = { 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, |
1273 | | 0XFE }; |
1274 | | |
1275 | | static int |
1276 | 1.53k | parseChars(const FileInfo *file, CharsString *result, CharsString *token) { |
1277 | 1.53k | int in = 0; |
1278 | 1.53k | int out = 0; |
1279 | 1.53k | int lastOutSize = 0; |
1280 | 1.53k | int lastIn; |
1281 | 1.53k | unsigned int ch = 0; |
1282 | 1.53k | int numBytes = 0; |
1283 | 1.53k | unsigned int utf32 = 0; |
1284 | 1.53k | int k; |
1285 | 149k | while (in < token->length) { |
1286 | 147k | ch = token->chars[in++] & 0xff; |
1287 | 147k | if (ch < 128) { |
1288 | 114k | if (ch == '\\') { /* escape sequence */ |
1289 | 8.31k | switch (ch = token->chars[in]) { |
1290 | 347 | case '\\': |
1291 | 347 | break; |
1292 | 163 | case 'e': |
1293 | 163 | ch = 0x1b; |
1294 | 163 | break; |
1295 | 0 | case 'f': |
1296 | 0 | ch = 12; |
1297 | 0 | break; |
1298 | 497 | case 'n': |
1299 | 497 | ch = 10; |
1300 | 497 | break; |
1301 | 368 | case 'r': |
1302 | 368 | ch = 13; |
1303 | 368 | break; |
1304 | 4 | case 's': |
1305 | 4 | ch = ' '; |
1306 | 4 | break; |
1307 | 128 | case 't': |
1308 | 128 | ch = 9; |
1309 | 128 | break; |
1310 | 199 | case 'v': |
1311 | 199 | ch = 11; |
1312 | 199 | break; |
1313 | 1.01k | case 'w': |
1314 | 1.01k | ch = LOU_ENDSEGMENT; |
1315 | 1.01k | break; |
1316 | 108 | case 34: |
1317 | 108 | ch = QUOTESUB; |
1318 | 108 | break; |
1319 | 3.67k | case 'X': |
1320 | 3.67k | compileWarning(file, "\\Xhhhh (with a capital 'X') is deprecated."); |
1321 | 5.16k | case 'x': |
1322 | 5.16k | if (token->length - in > 4) { |
1323 | 5.13k | ch = hexValue(file, &token->chars[in + 1], 4); |
1324 | 5.13k | in += 4; |
1325 | 5.13k | } |
1326 | 5.16k | break; |
1327 | 0 | case 'Y': |
1328 | 0 | compileWarning(file, "\\Yhhhhh (with a capital 'Y') is deprecated."); |
1329 | 226 | case 'y': |
1330 | 226 | if (CHARSIZE == 2) { |
1331 | 307 | not32: |
1332 | 307 | compileError(file, |
1333 | 307 | "liblouis has not been compiled for 32-bit Unicode"); |
1334 | 307 | break; |
1335 | 226 | } |
1336 | 0 | if (token->length - in > 5) { |
1337 | 0 | ch = hexValue(file, &token->chars[in + 1], 5); |
1338 | 0 | in += 5; |
1339 | 0 | } |
1340 | 0 | break; |
1341 | 0 | case 'Z': |
1342 | 0 | compileWarning( |
1343 | 0 | file, "\\Zhhhhhhhh (with a capital 'Z') is deprecated."); |
1344 | 81 | case 'z': |
1345 | 81 | if (CHARSIZE == 2) goto not32; |
1346 | 0 | if (token->length - in > 8) { |
1347 | 0 | ch = hexValue(file, &token->chars[in + 1], 8); |
1348 | 0 | in += 8; |
1349 | 0 | } |
1350 | 0 | break; |
1351 | 8 | default: |
1352 | 8 | compileError(file, "invalid escape sequence '\\%c'", ch); |
1353 | 8 | result->length = lastOutSize; |
1354 | 8 | return 0; |
1355 | 8.31k | } |
1356 | 8.30k | in++; |
1357 | 8.30k | } |
1358 | 114k | if (out >= MAXSTRING - 1) { |
1359 | 3 | compileError(file, "Token too long"); |
1360 | 3 | result->length = MAXSTRING - 1; |
1361 | 3 | return 0; |
1362 | 3 | } |
1363 | 114k | result->chars[out++] = (widechar)ch; |
1364 | 114k | continue; |
1365 | 114k | } |
1366 | 33.1k | lastOutSize = out; |
1367 | 33.1k | lastIn = in; |
1368 | 89.7k | for (numBytes = MAXBYTES - 1; numBytes > 0; numBytes--) |
1369 | 82.8k | if (ch >= first0Bit[numBytes]) break; |
1370 | 33.1k | utf32 = ch & (0XFF - first0Bit[numBytes]); |
1371 | 174k | for (k = 0; k < numBytes; k++) { |
1372 | 141k | if (in >= MAXSTRING - 1 || in >= token->length) break; |
1373 | 141k | if (out >= MAXSTRING - 1) { |
1374 | 2 | compileError(file, "Token too long"); |
1375 | 2 | result->length = lastOutSize; |
1376 | 2 | return 0; |
1377 | 2 | } |
1378 | 141k | if (token->chars[in] < 128 || (token->chars[in] & 0x0040)) { |
1379 | 139k | compileWarning(file, "invalid UTF-8. Assuming Latin-1."); |
1380 | 139k | result->chars[out++] = token->chars[lastIn]; |
1381 | 139k | in = lastIn + 1; |
1382 | 139k | continue; |
1383 | 139k | } |
1384 | 1.14k | utf32 = (utf32 << 6) + (token->chars[in++] & 0x3f); |
1385 | 1.14k | } |
1386 | 33.1k | if (out >= MAXSTRING - 1) { |
1387 | 2 | compileError(file, "Token too long"); |
1388 | 2 | result->length = lastOutSize; |
1389 | 2 | return 0; |
1390 | 2 | } |
1391 | 33.1k | if (CHARSIZE == 2 && utf32 > 0xffff) { |
1392 | 7 | compileError(file, "liblouis has not been compiled for 32-bit Unicode"); |
1393 | 7 | result->length = lastOutSize; |
1394 | 7 | return 0; |
1395 | 7 | } |
1396 | 33.1k | result->chars[out++] = (widechar)utf32; |
1397 | 33.1k | } |
1398 | 1.51k | result->length = out; |
1399 | 1.51k | return 1; |
1400 | 1.53k | } |
1401 | | |
1402 | | int EXPORT_CALL |
1403 | 237 | _lou_extParseChars(const char *inString, widechar *outString) { |
1404 | | /* Parse external character strings */ |
1405 | 237 | CharsString wideIn; |
1406 | 237 | CharsString result; |
1407 | 237 | int k; |
1408 | 165k | for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k]; |
1409 | 237 | wideIn.chars[k] = 0; |
1410 | 237 | wideIn.length = k; |
1411 | 237 | if (!parseChars(NULL, &result, &wideIn)) return 0; |
1412 | 210k | for (k = 0; k < result.length; k++) outString[k] = result.chars[k]; |
1413 | 227 | return result.length; |
1414 | 237 | } |
1415 | | |
1416 | | static int |
1417 | 839 | parseDots(const FileInfo *file, CharsString *cells, const CharsString *token) { |
1418 | | /* get dot patterns */ |
1419 | 839 | widechar cell = 0; /* assembly place for dots */ |
1420 | 839 | int cellCount = 0; |
1421 | 839 | int index; |
1422 | 839 | int start = 0; |
1423 | | |
1424 | 6.39k | for (index = 0; index < token->length; index++) { |
1425 | 5.56k | int started = index != start; |
1426 | 5.56k | widechar character = token->chars[index]; |
1427 | 5.56k | switch (character) { /* or dots to make up Braille cell */ |
1428 | 0 | { |
1429 | 0 | int dot; |
1430 | 378 | case '1': |
1431 | 378 | dot = LOU_DOT_1; |
1432 | 378 | goto haveDot; |
1433 | 315 | case '2': |
1434 | 315 | dot = LOU_DOT_2; |
1435 | 315 | goto haveDot; |
1436 | 303 | case '3': |
1437 | 303 | dot = LOU_DOT_3; |
1438 | 303 | goto haveDot; |
1439 | 296 | case '4': |
1440 | 296 | dot = LOU_DOT_4; |
1441 | 296 | goto haveDot; |
1442 | 432 | case '5': |
1443 | 432 | dot = LOU_DOT_5; |
1444 | 432 | goto haveDot; |
1445 | 323 | case '6': |
1446 | 323 | dot = LOU_DOT_6; |
1447 | 323 | goto haveDot; |
1448 | 309 | case '7': |
1449 | 309 | dot = LOU_DOT_7; |
1450 | 309 | goto haveDot; |
1451 | 426 | case '8': |
1452 | 426 | dot = LOU_DOT_8; |
1453 | 426 | goto haveDot; |
1454 | 318 | case '9': |
1455 | 318 | dot = LOU_DOT_9; |
1456 | 318 | goto haveDot; |
1457 | 308 | case 'a': |
1458 | 320 | case 'A': |
1459 | 320 | dot = LOU_DOT_10; |
1460 | 320 | goto haveDot; |
1461 | 299 | case 'b': |
1462 | 302 | case 'B': |
1463 | 302 | dot = LOU_DOT_11; |
1464 | 302 | goto haveDot; |
1465 | 296 | case 'c': |
1466 | 301 | case 'C': |
1467 | 301 | dot = LOU_DOT_12; |
1468 | 301 | goto haveDot; |
1469 | 370 | case 'd': |
1470 | 382 | case 'D': |
1471 | 382 | dot = LOU_DOT_13; |
1472 | 382 | goto haveDot; |
1473 | 358 | case 'e': |
1474 | 434 | case 'E': |
1475 | 434 | dot = LOU_DOT_14; |
1476 | 434 | goto haveDot; |
1477 | 288 | case 'f': |
1478 | 292 | case 'F': |
1479 | 292 | dot = LOU_DOT_15; |
1480 | 5.13k | haveDot: |
1481 | 5.13k | if (started && !cell) goto invalid; |
1482 | 5.13k | if (cell & dot) { |
1483 | 4 | compileError(file, "dot specified more than once."); |
1484 | 4 | return 0; |
1485 | 4 | } |
1486 | 5.12k | cell |= dot; |
1487 | 5.12k | break; |
1488 | 5.13k | } |
1489 | 122 | case '0': /* blank */ |
1490 | 122 | if (started) goto invalid; |
1491 | 121 | break; |
1492 | 308 | case '-': /* got all dots for this cell */ |
1493 | 308 | if (!started) { |
1494 | 0 | compileError(file, "missing cell specification."); |
1495 | 0 | return 0; |
1496 | 0 | } |
1497 | 308 | cells->chars[cellCount++] = cell | LOU_DOTS; |
1498 | 308 | cell = 0; |
1499 | 308 | start = index + 1; |
1500 | 308 | break; |
1501 | 2 | default: |
1502 | 3 | invalid: |
1503 | 3 | compileError( |
1504 | 3 | file, "invalid dot number %s.", _lou_showString(&character, 1, 0)); |
1505 | 3 | return 0; |
1506 | 5.56k | } |
1507 | 5.56k | } |
1508 | 832 | if (index == start) { |
1509 | 24 | compileError(file, "missing cell specification."); |
1510 | 24 | return 0; |
1511 | 24 | } |
1512 | 808 | cells->chars[cellCount++] = cell | LOU_DOTS; /* last cell */ |
1513 | 808 | cells->length = cellCount; |
1514 | 808 | return 1; |
1515 | 832 | } |
1516 | | |
1517 | | int EXPORT_CALL |
1518 | 0 | _lou_extParseDots(const char *inString, widechar *outString) { |
1519 | | /* Parse external dot patterns */ |
1520 | 0 | CharsString wideIn; |
1521 | 0 | CharsString result; |
1522 | 0 | int k; |
1523 | 0 | for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k]; |
1524 | 0 | wideIn.chars[k] = 0; |
1525 | 0 | wideIn.length = k; |
1526 | 0 | parseDots(NULL, &result, &wideIn); |
1527 | 0 | if (errorCount) { |
1528 | 0 | errorCount = 0; |
1529 | 0 | return 0; |
1530 | 0 | } |
1531 | 0 | for (k = 0; k < result.length; k++) outString[k] = result.chars[k]; |
1532 | 0 | outString[k] = 0; |
1533 | 0 | return result.length; |
1534 | 0 | } |
1535 | | |
1536 | | static int |
1537 | 120 | getCharacters(FileInfo *file, CharsString *characters) { |
1538 | | /* Get ruleChars string */ |
1539 | 120 | CharsString token; |
1540 | 120 | if (!getToken(file, &token, "characters")) return 0; |
1541 | 120 | return parseChars(file, characters, &token); |
1542 | 120 | } |
1543 | | |
1544 | | static int |
1545 | 737 | getRuleCharsText(FileInfo *file, CharsString *ruleChars) { |
1546 | 737 | CharsString token; |
1547 | 737 | if (!getToken(file, &token, "Characters operand")) return 0; |
1548 | 737 | return parseChars(file, ruleChars, &token); |
1549 | 737 | } |
1550 | | |
1551 | | static int |
1552 | 89 | getRuleDotsText(FileInfo *file, CharsString *ruleDots) { |
1553 | 89 | CharsString token; |
1554 | 89 | if (!getToken(file, &token, "characters")) return 0; |
1555 | 89 | return parseChars(file, ruleDots, &token); |
1556 | 89 | } |
1557 | | |
1558 | | static int |
1559 | 477 | getRuleDotsPattern(FileInfo *file, CharsString *ruleDots) { |
1560 | | /* Interpret the dets operand */ |
1561 | 477 | CharsString token; |
1562 | 477 | if (!getToken(file, &token, "Dots operand")) return 0; |
1563 | 474 | if (token.length == 1 && token.chars[0] == '=') { |
1564 | 1 | ruleDots->length = 0; |
1565 | 1 | return 1; |
1566 | 1 | } else |
1567 | 473 | return parseDots(file, ruleDots, &token); |
1568 | 474 | } |
1569 | | |
1570 | | static int |
1571 | | includeFile(const FileInfo *file, CharsString *includedFile, |
1572 | | TranslationTableHeader **table, DisplayTableHeader **displayTable); |
1573 | | |
1574 | | static TranslationTableOffset |
1575 | 202 | findRuleName(const CharsString *name, const TranslationTableHeader *table) { |
1576 | 202 | const RuleName *ruleName = table->ruleNames; |
1577 | 516 | while (ruleName) { |
1578 | 337 | if ((name->length == ruleName->length) && |
1579 | 337 | (memcmp(&name->chars[0], ruleName->name, CHARSIZE * name->length) == 0)) |
1580 | 23 | return ruleName->ruleOffset; |
1581 | 314 | ruleName = ruleName->next; |
1582 | 314 | } |
1583 | 179 | return 0; |
1584 | 202 | } |
1585 | | |
1586 | | static int |
1587 | | addRuleName(const FileInfo *file, CharsString *name, TranslationTableOffset ruleOffset, |
1588 | 23 | TranslationTableHeader *table) { |
1589 | 23 | int k; |
1590 | 23 | RuleName *ruleName; |
1591 | 23 | if (!(ruleName = malloc(sizeof(*ruleName) + CHARSIZE * (name->length - 1)))) { |
1592 | 0 | compileError(file, "not enough memory"); |
1593 | 0 | _lou_outOfMemory(); |
1594 | 0 | } |
1595 | 23 | memset(ruleName, 0, sizeof(*ruleName)); |
1596 | | // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z' |
1597 | 159 | for (k = 0; k < name->length; k++) { |
1598 | 136 | widechar c = name->chars[k]; |
1599 | 136 | if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) |
1600 | 136 | ruleName->name[k] = c; |
1601 | 0 | else { |
1602 | 0 | compileError(file, "a name may contain only letters"); |
1603 | 0 | free(ruleName); |
1604 | 0 | return 0; |
1605 | 0 | } |
1606 | 136 | } |
1607 | 23 | ruleName->length = name->length; |
1608 | 23 | ruleName->ruleOffset = ruleOffset; |
1609 | 23 | ruleName->next = table->ruleNames; |
1610 | 23 | table->ruleNames = ruleName; |
1611 | 23 | return 1; |
1612 | 23 | } |
1613 | | |
1614 | | static void |
1615 | 18 | deallocateRuleNames(TranslationTableHeader *table) { |
1616 | 18 | RuleName **ruleName = &table->ruleNames; |
1617 | 41 | while (*ruleName) { |
1618 | 23 | RuleName *rn = *ruleName; |
1619 | 23 | *ruleName = rn->next; |
1620 | 23 | free(rn); |
1621 | 23 | } |
1622 | 18 | } |
1623 | | |
1624 | | static int |
1625 | 16 | compileSwapDots(const FileInfo *file, CharsString *source, CharsString *dest) { |
1626 | 16 | int k = 0; |
1627 | 16 | int kk = 0; |
1628 | 16 | CharsString dotsSource; |
1629 | 16 | CharsString dotsDest; |
1630 | 16 | dest->length = 0; |
1631 | 16 | dotsSource.length = 0; |
1632 | 97 | while (k <= source->length) { |
1633 | 81 | if (source->chars[k] != ',' && k != source->length) |
1634 | 54 | dotsSource.chars[dotsSource.length++] = source->chars[k]; |
1635 | 27 | else { |
1636 | 27 | if (!parseDots(file, &dotsDest, &dotsSource)) return 0; |
1637 | 27 | dest->chars[dest->length++] = dotsDest.length + 1; |
1638 | 65 | for (kk = 0; kk < dotsDest.length; kk++) |
1639 | 38 | dest->chars[dest->length++] = dotsDest.chars[kk]; |
1640 | 27 | dotsSource.length = 0; |
1641 | 27 | } |
1642 | 81 | k++; |
1643 | 81 | } |
1644 | 16 | return 1; |
1645 | 16 | } |
1646 | | |
1647 | | static int |
1648 | | compileSwap(FileInfo *file, TranslationTableOpcode opcode, int noback, int nofor, |
1649 | 13 | TranslationTableHeader **table) { |
1650 | 13 | CharsString ruleChars; |
1651 | 13 | CharsString ruleDots; |
1652 | 13 | CharsString name; |
1653 | 13 | CharsString matches; |
1654 | 13 | CharsString replacements; |
1655 | 13 | TranslationTableOffset ruleOffset; |
1656 | 13 | if (!getToken(file, &name, "name operand")) return 0; |
1657 | 13 | if (!getToken(file, &matches, "matches operand")) return 0; |
1658 | 13 | if (!getToken(file, &replacements, "replacements operand")) return 0; |
1659 | 13 | if (opcode == CTO_SwapCc || opcode == CTO_SwapCd) { |
1660 | 5 | if (!parseChars(file, &ruleChars, &matches)) return 0; |
1661 | 8 | } else { |
1662 | 8 | if (!compileSwapDots(file, &matches, &ruleChars)) return 0; |
1663 | 8 | } |
1664 | 13 | if (opcode == CTO_SwapCc) { |
1665 | 5 | if (!parseChars(file, &ruleDots, &replacements)) return 0; |
1666 | 8 | } else { |
1667 | 8 | if (!compileSwapDots(file, &replacements, &ruleDots)) return 0; |
1668 | 8 | } |
1669 | 13 | if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, NULL, noback, |
1670 | 13 | nofor, table)) |
1671 | 0 | return 0; |
1672 | 13 | if (!addRuleName(file, &name, ruleOffset, *table)) return 0; |
1673 | 13 | return 1; |
1674 | 13 | } |
1675 | | |
1676 | | static int |
1677 | 18 | getNumber(widechar *string, widechar *number) { |
1678 | | /* Convert a string of wide character digits to an integer */ |
1679 | 18 | int k = 0; |
1680 | 18 | *number = 0; |
1681 | 51 | while (string[k] >= '0' && string[k] <= '9') |
1682 | 33 | *number = 10 * *number + (string[k++] - '0'); |
1683 | 18 | return k; |
1684 | 18 | } |
1685 | | |
1686 | | /* Start of multipass compiler */ |
1687 | | |
1688 | | static int |
1689 | | passGetAttributes(CharsString *passLine, int *passLinepos, |
1690 | 78 | TranslationTableCharacterAttributes *attributes, const FileInfo *file) { |
1691 | 78 | int more = 1; |
1692 | 78 | *attributes = 0; |
1693 | 347 | while (more) { |
1694 | 269 | switch (passLine->chars[*passLinepos]) { |
1695 | 39 | case pass_any: |
1696 | 39 | *attributes = 0xffffffff; |
1697 | 39 | break; |
1698 | 18 | case pass_digit: |
1699 | 18 | *attributes |= CTC_Digit; |
1700 | 18 | break; |
1701 | 2 | case pass_litDigit: |
1702 | 2 | *attributes |= CTC_LitDigit; |
1703 | 2 | break; |
1704 | 3 | case pass_letter: |
1705 | 3 | *attributes |= CTC_Letter; |
1706 | 3 | break; |
1707 | 0 | case pass_math: |
1708 | 0 | *attributes |= CTC_Math; |
1709 | 0 | break; |
1710 | 34 | case pass_punctuation: |
1711 | 34 | *attributes |= CTC_Punctuation; |
1712 | 34 | break; |
1713 | 13 | case pass_sign: |
1714 | 13 | *attributes |= CTC_Sign; |
1715 | 13 | break; |
1716 | 54 | case pass_space: |
1717 | 54 | *attributes |= CTC_Space; |
1718 | 54 | break; |
1719 | 0 | case pass_uppercase: |
1720 | 0 | *attributes |= CTC_UpperCase; |
1721 | 0 | break; |
1722 | 0 | case pass_lowercase: |
1723 | 0 | *attributes |= CTC_LowerCase; |
1724 | 0 | break; |
1725 | 6 | case pass_class1: |
1726 | 6 | *attributes |= CTC_UserDefined9; |
1727 | 6 | break; |
1728 | 3 | case pass_class2: |
1729 | 3 | *attributes |= CTC_UserDefined10; |
1730 | 3 | break; |
1731 | 17 | case pass_class3: |
1732 | 17 | *attributes |= CTC_UserDefined11; |
1733 | 17 | break; |
1734 | 2 | case pass_class4: |
1735 | 2 | *attributes |= CTC_UserDefined12; |
1736 | 2 | break; |
1737 | 78 | default: |
1738 | 78 | more = 0; |
1739 | 78 | break; |
1740 | 269 | } |
1741 | 269 | if (more) (*passLinepos)++; |
1742 | 269 | } |
1743 | 78 | if (!*attributes) { |
1744 | 0 | compileError(file, "missing attribute"); |
1745 | 0 | (*passLinepos)--; |
1746 | 0 | return 0; |
1747 | 0 | } |
1748 | 78 | return 1; |
1749 | 78 | } |
1750 | | |
1751 | | static int |
1752 | | passGetDots(CharsString *passLine, int *passLinepos, CharsString *dots, |
1753 | 88 | const FileInfo *file) { |
1754 | 88 | CharsString collectDots; |
1755 | 88 | collectDots.length = 0; |
1756 | 237 | while (*passLinepos < passLine->length && |
1757 | 237 | (passLine->chars[*passLinepos] == '-' || |
1758 | 218 | (passLine->chars[*passLinepos] >= '0' && |
1759 | 217 | passLine->chars[*passLinepos] <= '9') || |
1760 | 218 | ((passLine->chars[*passLinepos] | 32) >= 'a' && |
1761 | 75 | (passLine->chars[*passLinepos] | 32) <= 'f'))) |
1762 | 149 | collectDots.chars[collectDots.length++] = passLine->chars[(*passLinepos)++]; |
1763 | 88 | if (!parseDots(file, dots, &collectDots)) return 0; |
1764 | 59 | return 1; |
1765 | 88 | } |
1766 | | |
1767 | | static int |
1768 | | passGetString(CharsString *passLine, int *passLinepos, CharsString *string, |
1769 | 161 | const FileInfo *file) { |
1770 | 161 | string->length = 0; |
1771 | 4.51k | while (1) { |
1772 | 4.51k | if ((*passLinepos >= passLine->length) || !passLine->chars[*passLinepos]) { |
1773 | 4 | compileError(file, "unterminated string"); |
1774 | 4 | return 0; |
1775 | 4 | } |
1776 | 4.50k | if (passLine->chars[*passLinepos] == 34) break; |
1777 | 4.35k | if (passLine->chars[*passLinepos] == QUOTESUB) |
1778 | 8 | string->chars[string->length++] = 34; |
1779 | 4.34k | else |
1780 | 4.34k | string->chars[string->length++] = passLine->chars[*passLinepos]; |
1781 | 4.35k | (*passLinepos)++; |
1782 | 4.35k | } |
1783 | 157 | string->chars[string->length] = 0; |
1784 | 157 | (*passLinepos)++; |
1785 | 157 | return 1; |
1786 | 161 | } |
1787 | | |
1788 | | static int |
1789 | 423 | passGetNumber(CharsString *passLine, int *passLinepos, widechar *number) { |
1790 | | /* Convert a string of wide character digits to an integer */ |
1791 | 423 | *number = 0; |
1792 | 910 | while ((*passLinepos < passLine->length) && (passLine->chars[*passLinepos] >= '0') && |
1793 | 910 | (passLine->chars[*passLinepos] <= '9')) |
1794 | 487 | *number = 10 * (*number) + (passLine->chars[(*passLinepos)++] - '0'); |
1795 | 423 | return 1; |
1796 | 423 | } |
1797 | | |
1798 | | static int |
1799 | | passGetVariableNumber( |
1800 | 82 | const FileInfo *file, CharsString *passLine, int *passLinepos, widechar *number) { |
1801 | 82 | if (!passGetNumber(passLine, passLinepos, number)) { |
1802 | 0 | compileError(file, "missing variable number"); |
1803 | 0 | return 0; |
1804 | 0 | } |
1805 | 82 | if ((*number >= 0) && (*number < NUMVAR)) return 1; |
1806 | 0 | compileError(file, "variable number out of range"); |
1807 | 0 | return 0; |
1808 | 82 | } |
1809 | | |
1810 | | static int |
1811 | 202 | passGetName(CharsString *passLine, int *passLinepos, CharsString *name) { |
1812 | 202 | name->length = 0; |
1813 | | // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z' |
1814 | 416 | do { |
1815 | 416 | widechar c = passLine->chars[*passLinepos]; |
1816 | 416 | if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { |
1817 | 214 | name->chars[name->length++] = c; |
1818 | 214 | (*passLinepos)++; |
1819 | 214 | } else { |
1820 | 202 | break; |
1821 | 202 | } |
1822 | 416 | } while (*passLinepos < passLine->length); |
1823 | 0 | return 1; |
1824 | 202 | } |
1825 | | |
1826 | | static inline int |
1827 | 249 | wantsString(TranslationTableOpcode opcode, int actionPart, int nofor) { |
1828 | 249 | if (opcode == CTO_Correct) return 1; |
1829 | 93 | if (opcode != CTO_Context) return 0; |
1830 | 13 | return !nofor == !actionPart; |
1831 | 93 | } |
1832 | | |
1833 | | static int |
1834 | | verifyStringOrDots(const FileInfo *file, TranslationTableOpcode opcode, int isString, |
1835 | 249 | int actionPart, int nofor) { |
1836 | 249 | if (!wantsString(opcode, actionPart, nofor) == !isString) return 1; |
1837 | | |
1838 | 0 | compileError(file, "%s are not allowed in the %s part of a %s translation %s rule.", |
1839 | 0 | isString ? "strings" : "dots", getPartName(actionPart), |
1840 | 0 | nofor ? "backward" : "forward", _lou_findOpcodeName(opcode)); |
1841 | |
|
1842 | 0 | return 0; |
1843 | 249 | } |
1844 | | |
1845 | | static int |
1846 | | appendInstructionChar( |
1847 | 9.11k | const FileInfo *file, widechar *passInstructions, int *passIC, widechar ch) { |
1848 | 9.11k | if (*passIC >= MAXSTRING) { |
1849 | 0 | compileError(file, "multipass operand too long"); |
1850 | 0 | return 0; |
1851 | 0 | } |
1852 | 9.11k | passInstructions[(*passIC)++] = ch; |
1853 | 9.11k | return 1; |
1854 | 9.11k | } |
1855 | | |
1856 | | static int |
1857 | | compilePassOpcode(const FileInfo *file, TranslationTableOpcode opcode, int noback, |
1858 | 284 | int nofor, TranslationTableHeader **table) { |
1859 | 284 | static CharsString passRuleChars; |
1860 | 284 | static CharsString passRuleDots; |
1861 | | /* Compile the operands of a pass opcode */ |
1862 | 284 | widechar passSubOp; |
1863 | 284 | const CharacterClass *class; |
1864 | 284 | TranslationTableRule *rule = NULL; |
1865 | 284 | int k; |
1866 | 284 | int kk = 0; |
1867 | 284 | int endTest = 0; |
1868 | 284 | widechar *passInstructions = passRuleDots.chars; |
1869 | 284 | int passIC = 0; /* Instruction counter */ |
1870 | 284 | passRuleChars.length = 0; |
1871 | 284 | CharsString passHoldString; |
1872 | 284 | widechar passHoldNumber; |
1873 | 284 | CharsString passLine; |
1874 | 284 | int passLinepos = 0; |
1875 | 284 | TranslationTableCharacterAttributes passAttributes; |
1876 | 284 | int replacing = 0; |
1877 | 284 | passHoldString.length = 0; |
1878 | 25.8k | for (k = file->linepos; k < file->linelen; k++) |
1879 | 25.6k | passHoldString.chars[passHoldString.length++] = file->line[k]; |
1880 | 2.71k | #define SEPCHAR 0x0001 |
1881 | 2.71k | for (k = 0; k < passHoldString.length && passHoldString.chars[k] > 32; k++) |
1882 | 2.42k | ; |
1883 | 284 | if (k < passHoldString.length) |
1884 | 284 | passHoldString.chars[k] = SEPCHAR; |
1885 | 0 | else { |
1886 | 0 | compileError(file, "Invalid multipass operands"); |
1887 | 0 | return 0; |
1888 | 0 | } |
1889 | 284 | parseChars(file, &passLine, &passHoldString); |
1890 | | /* Compile test part */ |
1891 | 2.43k | for (k = 0; k < passLine.length && passLine.chars[k] != SEPCHAR; k++) |
1892 | 2.14k | ; |
1893 | 284 | endTest = k; |
1894 | 284 | passLine.chars[endTest] = pass_endTest; |
1895 | 284 | passLinepos = 0; |
1896 | 1.42k | while (passLinepos <= endTest) { |
1897 | 1.14k | switch ((passSubOp = passLine.chars[passLinepos])) { |
1898 | 227 | case pass_lookback: |
1899 | 227 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_lookback)) |
1900 | 0 | return 0; |
1901 | 227 | passLinepos++; |
1902 | 227 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
1903 | 227 | if (passHoldNumber == 0) passHoldNumber = 1; |
1904 | 227 | if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
1905 | 0 | return 0; |
1906 | 227 | break; |
1907 | 227 | case pass_not: |
1908 | 68 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_not)) |
1909 | 0 | return 0; |
1910 | 68 | passLinepos++; |
1911 | 68 | break; |
1912 | 2 | case pass_first: |
1913 | 2 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_first)) |
1914 | 0 | return 0; |
1915 | 2 | passLinepos++; |
1916 | 2 | break; |
1917 | 7 | case pass_last: |
1918 | 7 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_last)) |
1919 | 0 | return 0; |
1920 | 7 | passLinepos++; |
1921 | 7 | break; |
1922 | 200 | case pass_search: |
1923 | 200 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_search)) |
1924 | 0 | return 0; |
1925 | 200 | passLinepos++; |
1926 | 200 | break; |
1927 | 9 | case pass_string: |
1928 | 9 | if (!verifyStringOrDots(file, opcode, 1, 0, nofor)) { |
1929 | 0 | return 0; |
1930 | 0 | } |
1931 | 9 | passLinepos++; |
1932 | 9 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_string)) |
1933 | 0 | return 0; |
1934 | 9 | passGetString(&passLine, &passLinepos, &passHoldString, file); |
1935 | 9 | if (passHoldString.length == 0) { |
1936 | 0 | compileError(file, "empty string in test part"); |
1937 | 0 | return 0; |
1938 | 0 | } |
1939 | 9 | goto testDoCharsDots; |
1940 | 19 | case pass_dots: |
1941 | 19 | if (!verifyStringOrDots(file, opcode, 0, 0, nofor)) { |
1942 | 0 | return 0; |
1943 | 0 | } |
1944 | 19 | passLinepos++; |
1945 | 19 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_dots)) |
1946 | 0 | return 0; |
1947 | 19 | passGetDots(&passLine, &passLinepos, &passHoldString, file); |
1948 | 19 | if (passHoldString.length == 0) { |
1949 | 0 | compileError(file, "expected dot pattern after @ operand in test part"); |
1950 | 0 | return 0; |
1951 | 0 | } |
1952 | 28 | testDoCharsDots: |
1953 | 28 | if (passIC >= MAXSTRING) { |
1954 | 0 | compileError( |
1955 | 0 | file, "@ operand in test part of multipass operand too long"); |
1956 | 0 | return 0; |
1957 | 0 | } |
1958 | 28 | if (!appendInstructionChar( |
1959 | 28 | file, passInstructions, &passIC, passHoldString.length)) |
1960 | 0 | return 0; |
1961 | 3.18k | for (kk = 0; kk < passHoldString.length; kk++) { |
1962 | 3.15k | if (passIC >= MAXSTRING) { |
1963 | 0 | compileError( |
1964 | 0 | file, "@ operand in test part of multipass operand too long"); |
1965 | 0 | return 0; |
1966 | 0 | } |
1967 | 3.15k | if (!appendInstructionChar( |
1968 | 3.15k | file, passInstructions, &passIC, passHoldString.chars[kk])) |
1969 | 0 | return 0; |
1970 | 3.15k | } |
1971 | 28 | break; |
1972 | 28 | case pass_startReplace: |
1973 | 23 | if (replacing) { |
1974 | 0 | compileError(file, "nested replacement statements"); |
1975 | 0 | return 0; |
1976 | 0 | } |
1977 | 23 | if (!appendInstructionChar( |
1978 | 23 | file, passInstructions, &passIC, pass_startReplace)) |
1979 | 0 | return 0; |
1980 | 23 | replacing = 1; |
1981 | 23 | passLinepos++; |
1982 | 23 | break; |
1983 | 21 | case pass_endReplace: |
1984 | 21 | if (!replacing) { |
1985 | 0 | compileError(file, "unexpected end of replacement"); |
1986 | 0 | return 0; |
1987 | 0 | } |
1988 | 21 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_endReplace)) |
1989 | 0 | return 0; |
1990 | 21 | replacing = 0; |
1991 | 21 | passLinepos++; |
1992 | 21 | break; |
1993 | 54 | case pass_variable: |
1994 | 54 | passLinepos++; |
1995 | 54 | if (!passGetVariableNumber(file, &passLine, &passLinepos, &passHoldNumber)) |
1996 | 0 | return 0; |
1997 | 54 | switch (passLine.chars[passLinepos]) { |
1998 | 44 | case pass_eq: |
1999 | 44 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_eq)) |
2000 | 0 | return 0; |
2001 | 44 | goto doComp; |
2002 | 44 | case pass_lt: |
2003 | 4 | if (passLine.chars[passLinepos + 1] == pass_eq) { |
2004 | 1 | passLinepos++; |
2005 | 1 | if (!appendInstructionChar( |
2006 | 1 | file, passInstructions, &passIC, pass_lteq)) |
2007 | 0 | return 0; |
2008 | 3 | } else if (!appendInstructionChar( |
2009 | 3 | file, passInstructions, &passIC, pass_lt)) |
2010 | 0 | return 0; |
2011 | 4 | goto doComp; |
2012 | 6 | case pass_gt: |
2013 | 6 | if (passLine.chars[passLinepos + 1] == pass_eq) { |
2014 | 2 | passLinepos++; |
2015 | 2 | if (!appendInstructionChar( |
2016 | 2 | file, passInstructions, &passIC, pass_gteq)) |
2017 | 0 | return 0; |
2018 | 4 | } else if (!appendInstructionChar( |
2019 | 4 | file, passInstructions, &passIC, pass_gt)) |
2020 | 0 | return 0; |
2021 | 54 | doComp: |
2022 | 54 | if (!appendInstructionChar( |
2023 | 54 | file, passInstructions, &passIC, passHoldNumber)) |
2024 | 0 | return 0; |
2025 | 54 | passLinepos++; |
2026 | 54 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2027 | 54 | if (!appendInstructionChar( |
2028 | 54 | file, passInstructions, &passIC, passHoldNumber)) |
2029 | 0 | return 0; |
2030 | 54 | break; |
2031 | 54 | default: |
2032 | 0 | compileError(file, "incorrect comparison operator"); |
2033 | 0 | return 0; |
2034 | 54 | } |
2035 | 54 | break; |
2036 | 78 | case pass_attributes: |
2037 | 78 | passLinepos++; |
2038 | 78 | if (!passGetAttributes(&passLine, &passLinepos, &passAttributes, file)) |
2039 | 0 | return 0; |
2040 | 78 | insertAttributes: |
2041 | 78 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_attributes)) |
2042 | 0 | return 0; |
2043 | 78 | if (!appendInstructionChar( |
2044 | 78 | file, passInstructions, &passIC, (passAttributes >> 48) & 0xffff)) |
2045 | 0 | return 0; |
2046 | 78 | if (!appendInstructionChar( |
2047 | 78 | file, passInstructions, &passIC, (passAttributes >> 32) & 0xffff)) |
2048 | 0 | return 0; |
2049 | 78 | if (!appendInstructionChar( |
2050 | 78 | file, passInstructions, &passIC, (passAttributes >> 16) & 0xffff)) |
2051 | 0 | return 0; |
2052 | 78 | if (!appendInstructionChar( |
2053 | 78 | file, passInstructions, &passIC, passAttributes & 0xffff)) |
2054 | 0 | return 0; |
2055 | 82 | getRange: |
2056 | 82 | if (passLine.chars[passLinepos] == pass_until) { |
2057 | 28 | passLinepos++; |
2058 | 28 | if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
2059 | 28 | if (!appendInstructionChar(file, passInstructions, &passIC, 0xffff)) |
2060 | 0 | return 0; |
2061 | 28 | break; |
2062 | 28 | } |
2063 | 54 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2064 | 54 | if (passHoldNumber == 0) { |
2065 | 34 | if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
2066 | 34 | if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
2067 | 34 | break; |
2068 | 34 | } |
2069 | 20 | if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
2070 | 0 | return 0; |
2071 | 20 | if (passLine.chars[passLinepos] != pass_hyphen) { |
2072 | 20 | if (!appendInstructionChar( |
2073 | 20 | file, passInstructions, &passIC, passHoldNumber)) |
2074 | 0 | return 0; |
2075 | 20 | break; |
2076 | 20 | } |
2077 | 0 | passLinepos++; |
2078 | 0 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2079 | 0 | if (passHoldNumber == 0) { |
2080 | 0 | compileError(file, "invalid range"); |
2081 | 0 | return 0; |
2082 | 0 | } |
2083 | 0 | if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
2084 | 0 | return 0; |
2085 | 0 | break; |
2086 | 137 | case pass_groupstart: |
2087 | 148 | case pass_groupend: { |
2088 | 148 | passLinepos++; |
2089 | 148 | passGetName(&passLine, &passLinepos, &passHoldString); |
2090 | 148 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2091 | 148 | if (ruleOffset) |
2092 | 13 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2093 | 148 | if (rule && rule->opcode == CTO_Grouping) { |
2094 | 148 | if (!appendInstructionChar(file, passInstructions, &passIC, passSubOp)) |
2095 | 0 | return 0; |
2096 | 148 | if (!appendInstructionChar( |
2097 | 148 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2098 | 0 | return 0; |
2099 | 148 | if (!appendInstructionChar( |
2100 | 148 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2101 | 0 | return 0; |
2102 | 148 | break; |
2103 | 148 | } else { |
2104 | 0 | compileError(file, "%s is not a grouping name", |
2105 | 0 | _lou_showString( |
2106 | 0 | &passHoldString.chars[0], passHoldString.length, 0)); |
2107 | 0 | return 0; |
2108 | 0 | } |
2109 | 0 | break; |
2110 | 148 | } |
2111 | 5 | case pass_swap: { |
2112 | 5 | passLinepos++; |
2113 | 5 | passGetName(&passLine, &passLinepos, &passHoldString); |
2114 | 5 | if ((class = findCharacterClass(&passHoldString, *table))) { |
2115 | 0 | passAttributes = class->attribute; |
2116 | 0 | goto insertAttributes; |
2117 | 0 | } |
2118 | 5 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2119 | 5 | if (ruleOffset) |
2120 | 4 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2121 | 5 | if (rule && |
2122 | 5 | (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd || |
2123 | 4 | rule->opcode == CTO_SwapDd)) { |
2124 | 4 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_swap)) |
2125 | 0 | return 0; |
2126 | 4 | if (!appendInstructionChar( |
2127 | 4 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2128 | 0 | return 0; |
2129 | 4 | if (!appendInstructionChar( |
2130 | 4 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2131 | 0 | return 0; |
2132 | 4 | goto getRange; |
2133 | 4 | } |
2134 | 1 | compileError(file, "%s is neither a class name nor a swap name.", |
2135 | 1 | _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
2136 | 1 | return 0; |
2137 | 5 | } |
2138 | 276 | case pass_endTest: |
2139 | 276 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_endTest)) |
2140 | 0 | return 0; |
2141 | 276 | if (replacing) { |
2142 | 0 | compileError(file, "expected end of replacement"); |
2143 | 0 | return 0; |
2144 | 0 | } |
2145 | 276 | passLinepos++; |
2146 | 276 | break; |
2147 | 3 | default: |
2148 | 3 | compileError(file, "incorrect operator '%c ' in test part", |
2149 | 3 | passLine.chars[passLinepos]); |
2150 | 3 | return 0; |
2151 | 1.14k | } |
2152 | | |
2153 | 1.14k | } /* Compile action part */ |
2154 | | |
2155 | | /* Compile action part */ |
2156 | 1.93k | while (passLinepos < passLine.length && passLine.chars[passLinepos] <= 32) |
2157 | 1.65k | passLinepos++; |
2158 | 826 | while (passLinepos < passLine.length && passLine.chars[passLinepos] > 32) { |
2159 | 548 | if (passIC >= MAXSTRING) { |
2160 | 0 | compileError(file, "Action part in multipass operand too long"); |
2161 | 0 | return 0; |
2162 | 0 | } |
2163 | 548 | switch ((passSubOp = passLine.chars[passLinepos])) { |
2164 | 152 | case pass_string: |
2165 | 152 | if (!verifyStringOrDots(file, opcode, 1, 1, nofor)) { |
2166 | 0 | return 0; |
2167 | 0 | } |
2168 | 152 | passLinepos++; |
2169 | 152 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_string)) |
2170 | 0 | return 0; |
2171 | 152 | passGetString(&passLine, &passLinepos, &passHoldString, file); |
2172 | 152 | goto actionDoCharsDots; |
2173 | 69 | case pass_dots: |
2174 | 69 | if (!verifyStringOrDots(file, opcode, 0, 1, nofor)) { |
2175 | 0 | return 0; |
2176 | 0 | } |
2177 | 69 | passLinepos++; |
2178 | 69 | passGetDots(&passLine, &passLinepos, &passHoldString, file); |
2179 | 69 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_dots)) |
2180 | 0 | return 0; |
2181 | 69 | if (passHoldString.length == 0) { |
2182 | 0 | compileError(file, "expected dot pattern after @ operand in action part"); |
2183 | 0 | return 0; |
2184 | 0 | } |
2185 | 221 | actionDoCharsDots: |
2186 | 221 | if (passIC >= MAXSTRING) { |
2187 | 0 | compileError( |
2188 | 0 | file, "@ operand in action part of multipass operand too long"); |
2189 | 0 | return 0; |
2190 | 0 | } |
2191 | 221 | if (!appendInstructionChar( |
2192 | 221 | file, passInstructions, &passIC, passHoldString.length)) |
2193 | 0 | return 0; |
2194 | 3.00k | for (kk = 0; kk < passHoldString.length; kk++) { |
2195 | 2.78k | if (passIC >= MAXSTRING) { |
2196 | 0 | compileError(file, |
2197 | 0 | "@ operand in action part of multipass operand too long"); |
2198 | 0 | return 0; |
2199 | 0 | } |
2200 | 2.78k | if (!appendInstructionChar( |
2201 | 2.78k | file, passInstructions, &passIC, passHoldString.chars[kk])) |
2202 | 0 | return 0; |
2203 | 2.78k | } |
2204 | 221 | break; |
2205 | 221 | case pass_variable: |
2206 | 28 | passLinepos++; |
2207 | 28 | if (!passGetVariableNumber(file, &passLine, &passLinepos, &passHoldNumber)) |
2208 | 0 | return 0; |
2209 | 28 | switch (passLine.chars[passLinepos]) { |
2210 | 6 | case pass_eq: |
2211 | 6 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_eq)) |
2212 | 0 | return 0; |
2213 | 6 | if (!appendInstructionChar( |
2214 | 6 | file, passInstructions, &passIC, passHoldNumber)) |
2215 | 0 | return 0; |
2216 | 6 | passLinepos++; |
2217 | 6 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2218 | 6 | if (!appendInstructionChar( |
2219 | 6 | file, passInstructions, &passIC, passHoldNumber)) |
2220 | 0 | return 0; |
2221 | 6 | break; |
2222 | 21 | case pass_plus: |
2223 | 22 | case pass_hyphen: |
2224 | 22 | if (!appendInstructionChar(file, passInstructions, &passIC, |
2225 | 22 | passLine.chars[passLinepos++])) |
2226 | 0 | return 0; |
2227 | 22 | if (!appendInstructionChar( |
2228 | 22 | file, passInstructions, &passIC, passHoldNumber)) |
2229 | 0 | return 0; |
2230 | 22 | break; |
2231 | 22 | default: |
2232 | 0 | compileError(file, "incorrect variable operator in action part"); |
2233 | 0 | return 0; |
2234 | 28 | } |
2235 | 28 | break; |
2236 | 195 | case pass_copy: |
2237 | 195 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_copy)) |
2238 | 0 | return 0; |
2239 | 195 | passLinepos++; |
2240 | 195 | break; |
2241 | 53 | case pass_omit: |
2242 | 53 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_omit)) |
2243 | 0 | return 0; |
2244 | 53 | passLinepos++; |
2245 | 53 | break; |
2246 | 13 | case pass_groupreplace: |
2247 | 18 | case pass_groupstart: |
2248 | 22 | case pass_groupend: { |
2249 | 22 | passLinepos++; |
2250 | 22 | passGetName(&passLine, &passLinepos, &passHoldString); |
2251 | 22 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2252 | 22 | if (ruleOffset) |
2253 | 0 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2254 | 22 | if (rule && rule->opcode == CTO_Grouping) { |
2255 | 22 | if (!appendInstructionChar(file, passInstructions, &passIC, passSubOp)) |
2256 | 0 | return 0; |
2257 | 22 | if (!appendInstructionChar( |
2258 | 22 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2259 | 0 | return 0; |
2260 | 22 | if (!appendInstructionChar( |
2261 | 22 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2262 | 0 | return 0; |
2263 | 22 | break; |
2264 | 22 | } |
2265 | 0 | compileError(file, "%s is not a grouping name", |
2266 | 0 | _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
2267 | 0 | return 0; |
2268 | 22 | } |
2269 | 27 | case pass_swap: { |
2270 | 27 | passLinepos++; |
2271 | 27 | passGetName(&passLine, &passLinepos, &passHoldString); |
2272 | 27 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2273 | 27 | if (ruleOffset) |
2274 | 6 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2275 | 27 | if (rule && |
2276 | 27 | (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd || |
2277 | 27 | rule->opcode == CTO_SwapDd)) { |
2278 | 27 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_swap)) |
2279 | 0 | return 0; |
2280 | 27 | if (!appendInstructionChar( |
2281 | 27 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2282 | 0 | return 0; |
2283 | 27 | if (!appendInstructionChar( |
2284 | 27 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2285 | 0 | return 0; |
2286 | 27 | break; |
2287 | 27 | } |
2288 | 0 | compileError(file, "%s is not a swap name.", |
2289 | 0 | _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
2290 | 0 | return 0; |
2291 | 0 | break; |
2292 | 27 | } |
2293 | 2 | default: |
2294 | 2 | compileError(file, "incorrect operator in action part"); |
2295 | 2 | return 0; |
2296 | 548 | } |
2297 | 548 | } |
2298 | | |
2299 | | /* Analyze and add rule */ |
2300 | 278 | passRuleDots.length = passIC; |
2301 | | |
2302 | 278 | { |
2303 | 278 | widechar *characters; |
2304 | 278 | int length; |
2305 | 278 | int found = passFindCharacters( |
2306 | 278 | file, passInstructions, passRuleDots.length, &characters, &length); |
2307 | | |
2308 | 278 | if (!found) return 0; |
2309 | | |
2310 | 277 | if (characters) { |
2311 | 1.04k | for (k = 0; k < length; k += 1) passRuleChars.chars[k] = characters[k]; |
2312 | 19 | passRuleChars.length = k; |
2313 | 19 | } |
2314 | 277 | } |
2315 | | |
2316 | 277 | if (!addRule(file, opcode, &passRuleChars, &passRuleDots, 0, 0, NULL, NULL, noback, |
2317 | 277 | nofor, table)) |
2318 | 0 | return 0; |
2319 | 277 | return 1; |
2320 | 277 | } |
2321 | | |
2322 | | /* End of multipass compiler */ |
2323 | | |
2324 | | static int |
2325 | | compileBrailleIndicator(FileInfo *file, const char *ermsg, TranslationTableOpcode opcode, |
2326 | | TranslationTableOffset *ruleOffset, int noback, int nofor, |
2327 | 158 | TranslationTableHeader **table) { |
2328 | 158 | CharsString token; |
2329 | 158 | CharsString cells; |
2330 | 158 | if (!getToken(file, &token, ermsg)) return 0; |
2331 | 158 | if (!parseDots(file, &cells, &token)) return 0; |
2332 | 158 | return addRule( |
2333 | 158 | file, opcode, NULL, &cells, 0, 0, ruleOffset, NULL, noback, nofor, table); |
2334 | 158 | } |
2335 | | |
2336 | | static int |
2337 | 18 | compileNumber(FileInfo *file) { |
2338 | 18 | CharsString token; |
2339 | 18 | widechar number; |
2340 | 18 | if (!getToken(file, &token, "number")) return 0; |
2341 | 18 | getNumber(&token.chars[0], &number); |
2342 | 18 | if (!(number > 0)) { |
2343 | 0 | compileError(file, "a nonzero positive number is required"); |
2344 | 0 | return 0; |
2345 | 0 | } |
2346 | 18 | return number; |
2347 | 18 | } |
2348 | | |
2349 | | static int |
2350 | | compileGrouping(FileInfo *file, int noback, int nofor, TranslationTableHeader **table, |
2351 | 10 | DisplayTableHeader **displayTable) { |
2352 | 10 | int k; |
2353 | 10 | CharsString name; |
2354 | 10 | CharsString groupChars; |
2355 | 10 | CharsString groupDots; |
2356 | 10 | CharsString dotsParsed; |
2357 | 10 | if (!getToken(file, &name, "name operand")) return 0; |
2358 | 10 | if (!getRuleCharsText(file, &groupChars)) return 0; |
2359 | 10 | if (!getToken(file, &groupDots, "dots operand")) return 0; |
2360 | 30 | for (k = 0; k < groupDots.length && groupDots.chars[k] != ','; k++) |
2361 | 20 | ; |
2362 | 10 | if (k == groupDots.length) { |
2363 | 0 | compileError(file, "Dots operand must consist of two cells separated by a comma"); |
2364 | 0 | return 0; |
2365 | 0 | } |
2366 | 10 | groupDots.chars[k] = '-'; |
2367 | 10 | if (!parseDots(file, &dotsParsed, &groupDots)) return 0; |
2368 | 10 | if (groupChars.length != 2 || dotsParsed.length != 2) { |
2369 | 0 | compileError(file, |
2370 | 0 | "two Unicode characters and two cells separated by a comma are needed."); |
2371 | 0 | return 0; |
2372 | 0 | } |
2373 | 10 | if (table) { |
2374 | 10 | TranslationTableOffset ruleOffset; |
2375 | 10 | TranslationTableCharacter *charsDotsPtr; |
2376 | 10 | charsDotsPtr = |
2377 | 10 | putChar(file, groupChars.chars[0], table, NULL, (*table)->ruleCounter); |
2378 | 10 | charsDotsPtr->attributes |= CTC_Math; |
2379 | 10 | charsDotsPtr = |
2380 | 10 | putChar(file, groupChars.chars[1], table, NULL, (*table)->ruleCounter); |
2381 | 10 | charsDotsPtr->attributes |= CTC_Math; |
2382 | 10 | charsDotsPtr = putDots(file, dotsParsed.chars[0], table, (*table)->ruleCounter); |
2383 | 10 | charsDotsPtr->attributes |= CTC_Math; |
2384 | 10 | charsDotsPtr = putDots(file, dotsParsed.chars[1], table, (*table)->ruleCounter); |
2385 | 10 | charsDotsPtr->attributes |= CTC_Math; |
2386 | 10 | if (!addRule(file, CTO_Grouping, &groupChars, &dotsParsed, 0, 0, &ruleOffset, |
2387 | 10 | NULL, noback, nofor, table)) |
2388 | 0 | return 0; |
2389 | 10 | if (!addRuleName(file, &name, ruleOffset, *table)) return 0; |
2390 | 10 | } |
2391 | 10 | if (displayTable) { |
2392 | 10 | putCharDotsMapping(file, groupChars.chars[0], dotsParsed.chars[0], displayTable); |
2393 | 10 | putCharDotsMapping(file, groupChars.chars[1], dotsParsed.chars[1], displayTable); |
2394 | 10 | } |
2395 | 10 | if (table) { |
2396 | 10 | widechar endChar; |
2397 | 10 | widechar endDots; |
2398 | 10 | endChar = groupChars.chars[1]; |
2399 | 10 | endDots = dotsParsed.chars[1]; |
2400 | 10 | groupChars.length = dotsParsed.length = 1; |
2401 | 10 | if (!addRule(file, CTO_Math, &groupChars, &dotsParsed, 0, 0, NULL, NULL, noback, |
2402 | 10 | nofor, table)) |
2403 | 0 | return 0; |
2404 | 10 | groupChars.chars[0] = endChar; |
2405 | 10 | dotsParsed.chars[0] = endDots; |
2406 | 10 | if (!addRule(file, CTO_Math, &groupChars, &dotsParsed, 0, 0, NULL, NULL, noback, |
2407 | 10 | nofor, table)) |
2408 | 0 | return 0; |
2409 | 10 | } |
2410 | 10 | return 1; |
2411 | 10 | } |
2412 | | |
2413 | | /* Functions for compiling hyphenation tables */ |
2414 | | |
2415 | | typedef struct HyphenDict { /* hyphenation dictionary: finite state machine */ |
2416 | | int numStates; |
2417 | | HyphenationState *states; |
2418 | | } HyphenDict; |
2419 | | |
2420 | 21.0k | #define DEFAULTSTATE 0xffff |
2421 | 76.0k | #define HYPHENHASHSIZE 8191 |
2422 | | |
2423 | | typedef struct HyphenHashEntry { |
2424 | | struct HyphenHashEntry *next; |
2425 | | CharsString *key; |
2426 | | int val; |
2427 | | } HyphenHashEntry; |
2428 | | |
2429 | | typedef struct HyphenHashTab { |
2430 | | HyphenHashEntry *entries[HYPHENHASHSIZE]; |
2431 | | } HyphenHashTab; |
2432 | | |
2433 | | /* a hash function from ASU - adapted from Gtk+ */ |
2434 | | static unsigned int |
2435 | 10.4k | hyphenStringHash(const CharsString *s) { |
2436 | 10.4k | int k; |
2437 | 10.4k | unsigned int h = 0, g; |
2438 | 282k | for (k = 0; k < s->length; k++) { |
2439 | 271k | h = (h << 4) + s->chars[k]; |
2440 | 271k | if ((g = h & 0xf0000000)) { |
2441 | 201k | h = h ^ (g >> 24); |
2442 | 201k | h = h ^ g; |
2443 | 201k | } |
2444 | 271k | } |
2445 | 10.4k | return h; |
2446 | 10.4k | } |
2447 | | |
2448 | | static HyphenHashTab * |
2449 | 4 | hyphenHashNew(void) { |
2450 | 4 | HyphenHashTab *hashTab; |
2451 | 4 | if (!(hashTab = malloc(sizeof(HyphenHashTab)))) _lou_outOfMemory(); |
2452 | 4 | memset(hashTab, 0, sizeof(HyphenHashTab)); |
2453 | 4 | return hashTab; |
2454 | 4 | } |
2455 | | |
2456 | | static void |
2457 | 4 | hyphenHashFree(HyphenHashTab *hashTab) { |
2458 | 4 | int i; |
2459 | 4 | HyphenHashEntry *e, *next; |
2460 | 32.7k | for (i = 0; i < HYPHENHASHSIZE; i++) |
2461 | 33.2k | for (e = hashTab->entries[i]; e; e = next) { |
2462 | 439 | next = e->next; |
2463 | 439 | free(e->key); |
2464 | 439 | free(e); |
2465 | 439 | } |
2466 | 4 | free(hashTab); |
2467 | 4 | } |
2468 | | |
2469 | | /* assumes that key is not already present! */ |
2470 | | static void |
2471 | 439 | hyphenHashInsert(HyphenHashTab *hashTab, const CharsString *key, int val) { |
2472 | 439 | int i, j; |
2473 | 439 | HyphenHashEntry *e; |
2474 | 439 | i = hyphenStringHash(key) % HYPHENHASHSIZE; |
2475 | 439 | if (!(e = malloc(sizeof(HyphenHashEntry)))) _lou_outOfMemory(); |
2476 | 439 | e->next = hashTab->entries[i]; |
2477 | 439 | e->key = malloc((key->length + 1) * CHARSIZE); |
2478 | 439 | if (!e->key) _lou_outOfMemory(); |
2479 | 439 | e->key->length = key->length; |
2480 | 11.4k | for (j = 0; j < key->length; j++) e->key->chars[j] = key->chars[j]; |
2481 | 439 | e->val = val; |
2482 | 439 | hashTab->entries[i] = e; |
2483 | 439 | } |
2484 | | |
2485 | | /* return val if found, otherwise DEFAULTSTATE */ |
2486 | | static int |
2487 | 10.3k | hyphenHashLookup(HyphenHashTab *hashTab, const CharsString *key) { |
2488 | 10.3k | int i, j; |
2489 | 10.3k | HyphenHashEntry *e; |
2490 | 10.3k | if (key->length == 0) return 0; |
2491 | 10.0k | i = hyphenStringHash(key) % HYPHENHASHSIZE; |
2492 | 10.5k | for (e = hashTab->entries[i]; e; e = e->next) { |
2493 | 734 | if (key->length != e->key->length) continue; |
2494 | 1.75k | for (j = 0; j < key->length; j++) |
2495 | 1.54k | if (key->chars[j] != e->key->chars[j]) break; |
2496 | 222 | if (j == key->length) return e->val; |
2497 | 222 | } |
2498 | 9.84k | return DEFAULTSTATE; |
2499 | 10.0k | } |
2500 | | |
2501 | | static int |
2502 | 439 | hyphenGetNewState(HyphenDict *dict, HyphenHashTab *hashTab, const CharsString *string) { |
2503 | 439 | hyphenHashInsert(hashTab, string, dict->numStates); |
2504 | | /* predicate is true if dict->numStates is a power of two */ |
2505 | 439 | if (!(dict->numStates & (dict->numStates - 1))) |
2506 | 27 | dict->states = |
2507 | 27 | realloc(dict->states, (dict->numStates << 1) * sizeof(HyphenationState)); |
2508 | 439 | if (!dict->states) _lou_outOfMemory(); |
2509 | 439 | dict->states[dict->numStates].hyphenPattern = 0; |
2510 | 439 | dict->states[dict->numStates].fallbackState = DEFAULTSTATE; |
2511 | 439 | dict->states[dict->numStates].numTrans = 0; |
2512 | 439 | dict->states[dict->numStates].trans.pointer = NULL; |
2513 | 439 | return dict->numStates++; |
2514 | 439 | } |
2515 | | |
2516 | | /* add a transition from state1 to state2 through ch - assumes that the |
2517 | | * transition does not already exist */ |
2518 | | static void |
2519 | 439 | hyphenAddTrans(HyphenDict *dict, int state1, int state2, widechar ch) { |
2520 | 439 | int numTrans; |
2521 | 439 | numTrans = dict->states[state1].numTrans; |
2522 | 439 | if (numTrans == 0) |
2523 | 421 | dict->states[state1].trans.pointer = malloc(sizeof(HyphenationTrans)); |
2524 | 18 | else if (!(numTrans & (numTrans - 1))) |
2525 | 14 | dict->states[state1].trans.pointer = realloc(dict->states[state1].trans.pointer, |
2526 | 14 | (numTrans << 1) * sizeof(HyphenationTrans)); |
2527 | 439 | dict->states[state1].trans.pointer[numTrans].ch = ch; |
2528 | 439 | dict->states[state1].trans.pointer[numTrans].newState = state2; |
2529 | 439 | dict->states[state1].numTrans++; |
2530 | 439 | } |
2531 | | |
2532 | | static int |
2533 | | compileHyphenation( |
2534 | 4 | FileInfo *file, CharsString *encoding, TranslationTableHeader **table) { |
2535 | 4 | CharsString hyph; |
2536 | 4 | HyphenationTrans *holdPointer; |
2537 | 4 | HyphenHashTab *hashTab; |
2538 | 4 | CharsString word; |
2539 | 4 | char pattern[MAXSTRING + 1]; |
2540 | 4 | unsigned int stateNum = 0, lastState = 0; |
2541 | 4 | int i, j, k = encoding->length; |
2542 | 4 | widechar ch; |
2543 | 4 | int found; |
2544 | 4 | HyphenHashEntry *e; |
2545 | 4 | HyphenDict dict; |
2546 | 4 | TranslationTableOffset holdOffset; |
2547 | | /* Set aside enough space for hyphenation states and transitions in |
2548 | | * translation table. Must be done before anything else */ |
2549 | 4 | allocateSpaceInTranslationTable(file, NULL, 250000, table); |
2550 | 4 | hashTab = hyphenHashNew(); |
2551 | 4 | dict.numStates = 1; |
2552 | 4 | dict.states = malloc(sizeof(HyphenationState)); |
2553 | 4 | if (!dict.states) _lou_outOfMemory(); |
2554 | 4 | dict.states[0].hyphenPattern = 0; |
2555 | 4 | dict.states[0].fallbackState = DEFAULTSTATE; |
2556 | 4 | dict.states[0].numTrans = 0; |
2557 | 4 | dict.states[0].trans.pointer = NULL; |
2558 | 58 | do { |
2559 | 58 | if (encoding->chars[0] == 'I') { |
2560 | 3 | if (!getToken(file, &hyph, NULL)) continue; |
2561 | 55 | } else { |
2562 | | /* UTF-8 */ |
2563 | 55 | if (!getToken(file, &word, NULL)) continue; |
2564 | 47 | parseChars(file, &hyph, &word); |
2565 | 47 | } |
2566 | 50 | if (hyph.length == 0 || hyph.chars[0] == '#' || hyph.chars[0] == '%' || |
2567 | 50 | hyph.chars[0] == '<') |
2568 | 3 | continue; /* comment */ |
2569 | 47 | j = 0; |
2570 | 47 | pattern[j] = '0'; |
2571 | 879 | for (i = 0; i < hyph.length; i++) { |
2572 | 832 | if (hyph.chars[i] >= '0' && hyph.chars[i] <= '9') |
2573 | 55 | pattern[j] = (char)hyph.chars[i]; |
2574 | 777 | else { |
2575 | 777 | word.chars[j] = hyph.chars[i]; |
2576 | 777 | pattern[++j] = '0'; |
2577 | 777 | } |
2578 | 832 | } |
2579 | 47 | word.chars[j] = 0; |
2580 | 47 | word.length = j; |
2581 | 47 | pattern[j + 1] = 0; |
2582 | 497 | for (i = 0; pattern[i] == '0'; i++) |
2583 | 450 | ; |
2584 | 47 | found = hyphenHashLookup(hashTab, &word); |
2585 | 47 | if (found != DEFAULTSTATE) |
2586 | 22 | stateNum = found; |
2587 | 25 | else |
2588 | 25 | stateNum = hyphenGetNewState(&dict, hashTab, &word); |
2589 | 47 | k = j + 2 - i; |
2590 | 47 | if (k > 0) { |
2591 | 47 | allocateSpaceInTranslationTable( |
2592 | 47 | file, &dict.states[stateNum].hyphenPattern, k, table); |
2593 | 47 | memcpy(&(*table)->ruleArea[dict.states[stateNum].hyphenPattern], &pattern[i], |
2594 | 47 | k); |
2595 | 47 | } |
2596 | | /* now, put in the prefix transitions */ |
2597 | 486 | while (found == DEFAULTSTATE) { |
2598 | 439 | lastState = stateNum; |
2599 | 439 | ch = word.chars[word.length-- - 1]; |
2600 | 439 | found = hyphenHashLookup(hashTab, &word); |
2601 | 439 | if (found != DEFAULTSTATE) |
2602 | 25 | stateNum = found; |
2603 | 414 | else |
2604 | 414 | stateNum = hyphenGetNewState(&dict, hashTab, &word); |
2605 | 439 | hyphenAddTrans(&dict, stateNum, lastState, ch); |
2606 | 439 | } |
2607 | 58 | } while (_lou_getALine(file)); |
2608 | | /* put in the fallback states */ |
2609 | 32.7k | for (i = 0; i < HYPHENHASHSIZE; i++) { |
2610 | 33.2k | for (e = hashTab->entries[i]; e; e = e->next) { |
2611 | 9.84k | for (j = 1; j <= e->key->length; j++) { |
2612 | 9.84k | word.length = 0; |
2613 | 259k | for (k = j; k < e->key->length; k++) |
2614 | 249k | word.chars[word.length++] = e->key->chars[k]; |
2615 | 9.84k | stateNum = hyphenHashLookup(hashTab, &word); |
2616 | 9.84k | if (stateNum != DEFAULTSTATE) break; |
2617 | 9.84k | } |
2618 | 439 | if (e->val) dict.states[e->val].fallbackState = stateNum; |
2619 | 439 | } |
2620 | 32.7k | } |
2621 | 4 | hyphenHashFree(hashTab); |
2622 | | /* Transfer hyphenation information to table */ |
2623 | 447 | for (i = 0; i < dict.numStates; i++) { |
2624 | 443 | if (dict.states[i].numTrans == 0) |
2625 | 22 | dict.states[i].trans.offset = 0; |
2626 | 421 | else { |
2627 | 421 | holdPointer = dict.states[i].trans.pointer; |
2628 | 421 | allocateSpaceInTranslationTable(file, &dict.states[i].trans.offset, |
2629 | 421 | dict.states[i].numTrans * sizeof(HyphenationTrans), table); |
2630 | 421 | memcpy(&(*table)->ruleArea[dict.states[i].trans.offset], holdPointer, |
2631 | 421 | dict.states[i].numTrans * sizeof(HyphenationTrans)); |
2632 | 421 | free(holdPointer); |
2633 | 421 | } |
2634 | 443 | } |
2635 | 4 | allocateSpaceInTranslationTable( |
2636 | 4 | file, &holdOffset, dict.numStates * sizeof(HyphenationState), table); |
2637 | 4 | (*table)->hyphenStatesArray = holdOffset; |
2638 | | /* Prevents segmentation fault if table is reallocated */ |
2639 | 4 | memcpy(&(*table)->ruleArea[(*table)->hyphenStatesArray], &dict.states[0], |
2640 | 4 | dict.numStates * sizeof(HyphenationState)); |
2641 | 4 | free(dict.states); |
2642 | 4 | return 1; |
2643 | 4 | } |
2644 | | |
2645 | | static int |
2646 | | compileCharDef(FileInfo *file, TranslationTableOpcode opcode, |
2647 | | TranslationTableCharacterAttributes attributes, int noback, int nofor, |
2648 | 329 | TranslationTableHeader **table, DisplayTableHeader **displayTable) { |
2649 | 329 | CharsString ruleChars; |
2650 | 329 | CharsString ruleDots; |
2651 | 329 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
2652 | 329 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
2653 | 329 | if (ruleChars.length != 1) { |
2654 | 0 | compileError(file, "Exactly one character is required."); |
2655 | 0 | return 0; |
2656 | 0 | } |
2657 | 329 | if (ruleDots.length < 1) { |
2658 | 0 | compileError(file, "At least one cell is required."); |
2659 | 0 | return 0; |
2660 | 0 | } |
2661 | 329 | if (table) { |
2662 | 329 | TranslationTableCharacter *character; |
2663 | 329 | TranslationTableCharacter *cell = NULL; |
2664 | 329 | int k; |
2665 | 329 | if (attributes & (CTC_UpperCase | CTC_LowerCase)) attributes |= CTC_Letter; |
2666 | 329 | character = putChar(file, ruleChars.chars[0], table, NULL, (*table)->ruleCounter); |
2667 | 329 | character->attributes |= attributes; |
2668 | 667 | for (k = ruleDots.length - 1; k >= 0; k -= 1) { |
2669 | 338 | cell = getDots(ruleDots.chars[k], *table); |
2670 | 338 | if (!cell) |
2671 | 312 | cell = putDots(file, ruleDots.chars[k], table, (*table)->ruleCounter); |
2672 | 338 | } |
2673 | 329 | if (ruleDots.length == 1) cell->attributes |= attributes; |
2674 | 329 | } |
2675 | 329 | if (displayTable && ruleDots.length == 1) |
2676 | 328 | putCharDotsMapping(file, ruleChars.chars[0], ruleDots.chars[0], displayTable); |
2677 | 329 | if (table) |
2678 | 329 | if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, NULL, NULL, noback, nofor, |
2679 | 329 | table)) |
2680 | 0 | return 0; |
2681 | 329 | return 1; |
2682 | 329 | } |
2683 | | |
2684 | | static int |
2685 | 8 | compileBeforeAfter(FileInfo *file) { |
2686 | | /* 1=before, 2=after, 0=error */ |
2687 | 8 | CharsString token; |
2688 | 8 | CharsString tmp; |
2689 | 8 | if (!getToken(file, &token, "last word before or after")) return 0; |
2690 | 8 | if (!parseChars(file, &tmp, &token)) return 0; |
2691 | 8 | if (eqasc2uni((unsigned char *)"before", tmp.chars, 6)) |
2692 | 2 | return 1; |
2693 | 6 | else if (eqasc2uni((unsigned char *)"after", tmp.chars, 5)) |
2694 | 6 | return 2; |
2695 | 0 | return 0; |
2696 | 8 | } |
2697 | | |
2698 | | /** |
2699 | | * Macro |
2700 | | */ |
2701 | | typedef struct { |
2702 | | const char *name; |
2703 | | const widechar *definition; // fixed part |
2704 | | int definition_length; |
2705 | | const int *substitutions; // variable part: position and argument index of each |
2706 | | // variable substitution |
2707 | | int substitution_count; |
2708 | | int argument_count; // number of expected arguments |
2709 | | } Macro; |
2710 | | |
2711 | | /** |
2712 | | * List of in-scope macros |
2713 | | */ |
2714 | | typedef struct MacroList { |
2715 | | const Macro *head; |
2716 | | const struct MacroList *tail; |
2717 | | } MacroList; |
2718 | | |
2719 | | /** |
2720 | | * Create new macro. |
2721 | | */ |
2722 | | static const Macro * |
2723 | | create_macro(const char *name, const widechar *definition, int definition_length, |
2724 | 0 | const int *substitutions, int substitution_count, int argument_count) { |
2725 | 0 | Macro *m = malloc(sizeof(Macro)); |
2726 | 0 | m->name = strdup(name); |
2727 | 0 | widechar *definition_copy = malloc(definition_length * sizeof(widechar)); |
2728 | 0 | memcpy(definition_copy, definition, definition_length * sizeof(widechar)); |
2729 | 0 | m->definition = definition_copy; |
2730 | 0 | m->definition_length = definition_length; |
2731 | 0 | int *substitutions_copy = malloc(2 * substitution_count * sizeof(int)); |
2732 | 0 | memcpy(substitutions_copy, substitutions, 2 * substitution_count * sizeof(int)); |
2733 | 0 | m->substitutions = substitutions_copy; |
2734 | 0 | m->substitution_count = substitution_count; |
2735 | 0 | m->argument_count = argument_count; |
2736 | 0 | return m; |
2737 | 0 | } |
2738 | | |
2739 | | /** |
2740 | | * Create new macro list from "head" macro and "tail" list. |
2741 | | */ |
2742 | | static const MacroList * |
2743 | 0 | cons_macro(const Macro *head, const MacroList *tail) { |
2744 | 0 | MacroList *list = malloc(sizeof(MacroList)); |
2745 | 0 | list->head = head; |
2746 | 0 | list->tail = tail; |
2747 | 0 | return list; |
2748 | 0 | } |
2749 | | |
2750 | | /** |
2751 | | * Free macro returned by create_macro. |
2752 | | */ |
2753 | | static void |
2754 | 0 | free_macro(const Macro *macro) { |
2755 | 0 | if (macro) { |
2756 | 0 | free((char *)macro->name); |
2757 | 0 | free((char *)macro->definition); |
2758 | 0 | free((int *)macro->substitutions); |
2759 | 0 | free((Macro *)macro); |
2760 | 0 | } |
2761 | 0 | } |
2762 | | |
2763 | | /** |
2764 | | * Free macro list returned by cons_macro. |
2765 | | */ |
2766 | | static void |
2767 | 274 | free_macro_list(const MacroList *list) { |
2768 | 274 | if (list) { |
2769 | 0 | free_macro((Macro *)list->head); |
2770 | 0 | free_macro_list((MacroList *)list->tail); |
2771 | 0 | free((MacroList *)list); |
2772 | 0 | } |
2773 | 274 | } |
2774 | | |
2775 | | /** |
2776 | | * Compile macro |
2777 | | */ |
2778 | | static int |
2779 | 0 | compileMacro(FileInfo *file, const Macro **macro) { |
2780 | 0 |
|
2781 | 0 | // parse name |
2782 | 0 | CharsString token; |
2783 | 0 | if (!getToken(file, &token, "macro name")) return 0; |
2784 | 0 | switch (getOpcode(file, &token)) { |
2785 | 0 | case CTO_UpLow: // deprecated so "uplow" may be used as macro name |
2786 | 0 | case CTO_None: |
2787 | 0 | break; |
2788 | 0 | default: |
2789 | 0 | compileError(file, "Invalid macro name: already taken by an opcode"); |
2790 | 0 | return 0; |
2791 | 0 | } |
2792 | 0 | for (int i = 0; i < token.length; i++) { |
2793 | 0 | if (!((token.chars[i] >= 'a' && token.chars[i] <= 'z') || |
2794 | 0 | (token.chars[i] >= 'A' && token.chars[i] <= 'Z') || |
2795 | 0 | (token.chars[i] >= '0' && token.chars[i] <= '9'))) { |
2796 | 0 | compileError(file, |
2797 | 0 | "Invalid macro name: must be a word containing only letters and " |
2798 | 0 | "digits"); |
2799 | 0 | return 0; |
2800 | 0 | } |
2801 | 0 | } |
2802 | 0 | static char name[MAXSTRING + 1]; |
2803 | 0 | int name_length; |
2804 | 0 | for (name_length = 0; name_length < token.length; |
2805 | 0 | name_length++) // we know token can not be longer than MAXSTRING |
2806 | 0 | name[name_length] = (char)token.chars[name_length]; |
2807 | 0 | name[name_length] = '\0'; |
2808 | 0 |
|
2809 | 0 | // parse body |
2810 | 0 | static widechar definition[MAXSTRING]; |
2811 | 0 | static int substitutions[2 * MAX_MACRO_VAR]; |
2812 | 0 | int definition_length = 0; |
2813 | 0 | int substitution_count = 0; |
2814 | 0 | int argument_count = 0; |
2815 | 0 | int dollar = 0; |
2816 | 0 |
|
2817 | 0 | // ignore rest of line after name and read lines until "eom" is encountered |
2818 | 0 | while (_lou_getALine(file)) { |
2819 | 0 | if (file->linelen >= 3 && file->line[0] == 'e' && file->line[1] == 'o' && |
2820 | 0 | file->line[2] == 'm') { |
2821 | 0 | *macro = create_macro(name, definition, definition_length, substitutions, |
2822 | 0 | substitution_count, argument_count); |
2823 | 0 | return 1; |
2824 | 0 | } |
2825 | 0 | while (!atEndOfLine(file)) { |
2826 | 0 | widechar c = file->line[file->linepos++]; |
2827 | 0 | if (dollar) { |
2828 | 0 | dollar = 0; |
2829 | 0 | if (c >= '0' && c <= '9') { |
2830 | 0 | if (substitution_count >= MAX_MACRO_VAR) { |
2831 | 0 | compileError(file, |
2832 | 0 | "Macro can not have more than %d variable substitutions", |
2833 | 0 | MAXSTRING); |
2834 | 0 | return 0; |
2835 | 0 | } |
2836 | 0 | substitutions[2 * substitution_count] = definition_length; |
2837 | 0 | int arg = c - '0'; |
2838 | 0 | substitutions[2 * substitution_count + 1] = arg; |
2839 | 0 | if (arg > argument_count) argument_count = arg; |
2840 | 0 | substitution_count++; |
2841 | 0 | continue; |
2842 | 0 | } |
2843 | 0 | } else if (c == '$') { |
2844 | 0 | dollar = 1; |
2845 | 0 | continue; |
2846 | 0 | } |
2847 | 0 | if (definition_length >= MAXSTRING) { |
2848 | 0 | compileError(file, "Macro exceeds %d characters", MAXSTRING); |
2849 | 0 | return 0; |
2850 | 0 | } else |
2851 | 0 | definition[definition_length++] = c; |
2852 | 0 | } |
2853 | 0 | dollar = 0; |
2854 | 0 | if (definition_length >= MAXSTRING) { |
2855 | 0 | compileError(file, "Macro exceeds %d characters", MAXSTRING); |
2856 | 0 | return 0; |
2857 | 0 | } |
2858 | 0 | definition[definition_length++] = '\n'; |
2859 | 0 | } |
2860 | 0 | compileError(file, "macro must be terminated with 'eom'"); |
2861 | 0 | return 0; |
2862 | 0 | } |
2863 | | |
2864 | | static int |
2865 | | compileRule(FileInfo *file, TranslationTableHeader **table, |
2866 | 1.94k | DisplayTableHeader **displayTable, const MacroList **inScopeMacros) { |
2867 | 1.94k | CharsString token; |
2868 | 1.94k | TranslationTableOpcode opcode; |
2869 | 1.94k | CharsString ruleChars; |
2870 | 1.94k | CharsString ruleDots; |
2871 | 1.94k | CharsString cells; |
2872 | 1.94k | CharsString scratchPad; |
2873 | 1.94k | CharsString emphClass; |
2874 | 1.94k | TranslationTableCharacterAttributes after = 0; |
2875 | 1.94k | TranslationTableCharacterAttributes before = 0; |
2876 | 1.94k | int noback, nofor, nocross; |
2877 | 1.94k | noback = nofor = nocross = 0; |
2878 | 2.31k | doOpcode: |
2879 | 2.31k | if (!getToken(file, &token, NULL)) return 1; /* blank line */ |
2880 | 1.75k | if (token.chars[0] == '#' || token.chars[0] == '<') return 1; /* comment */ |
2881 | 1.71k | if (file->lineNumber == 1 && |
2882 | 1.71k | (eqasc2uni((unsigned char *)"ISO", token.chars, 3) || |
2883 | 685 | eqasc2uni((unsigned char *)"UTF-8", token.chars, 5))) { |
2884 | 4 | if (table) |
2885 | 4 | compileHyphenation(file, &token, table); |
2886 | 0 | else |
2887 | | /* ignore the whole file */ |
2888 | 0 | while (_lou_getALine(file)) |
2889 | 0 | ; |
2890 | 4 | return 1; |
2891 | 4 | } |
2892 | 1.71k | opcode = getOpcode(file, &token); |
2893 | 1.71k | switch (opcode) { |
2894 | 0 | case CTO_Macro: { |
2895 | 0 | const Macro *macro; |
2896 | | #ifdef ENABLE_MACROS |
2897 | | if (!inScopeMacros) { |
2898 | | compileError(file, "Defining macros only allowed in table files."); |
2899 | | return 0; |
2900 | | } |
2901 | | if (compileMacro(file, ¯o)) { |
2902 | | *inScopeMacros = cons_macro(macro, *inScopeMacros); |
2903 | | return 1; |
2904 | | } |
2905 | | return 0; |
2906 | | #else |
2907 | 0 | compileError(file, "Macro feature is disabled."); |
2908 | 0 | return 0; |
2909 | 0 | #endif |
2910 | 0 | } |
2911 | 1 | case CTO_IncludeFile: { |
2912 | 1 | CharsString includedFile; |
2913 | 1 | if (!getToken(file, &token, "include file name")) return 0; |
2914 | 1 | if (!parseChars(file, &includedFile, &token)) return 0; |
2915 | 1 | return includeFile(file, &includedFile, table, displayTable); |
2916 | 1 | } |
2917 | 238 | case CTO_NoBack: |
2918 | 238 | if (nofor) { |
2919 | 0 | compileError(file, "%s already specified.", _lou_findOpcodeName(CTO_NoFor)); |
2920 | 0 | return 0; |
2921 | 0 | } |
2922 | 238 | noback = 1; |
2923 | 238 | goto doOpcode; |
2924 | 117 | case CTO_NoFor: |
2925 | 117 | if (noback) { |
2926 | 0 | compileError(file, "%s already specified.", _lou_findOpcodeName(CTO_NoBack)); |
2927 | 0 | return 0; |
2928 | 0 | } |
2929 | 117 | nofor = 1; |
2930 | 117 | goto doOpcode; |
2931 | 273 | case CTO_Space: |
2932 | 273 | return compileCharDef( |
2933 | 273 | file, opcode, CTC_Space, noback, nofor, table, displayTable); |
2934 | 7 | case CTO_Digit: |
2935 | 7 | return compileCharDef( |
2936 | 7 | file, opcode, CTC_Digit, noback, nofor, table, displayTable); |
2937 | 1 | case CTO_LitDigit: |
2938 | 1 | return compileCharDef( |
2939 | 1 | file, opcode, CTC_LitDigit, noback, nofor, table, displayTable); |
2940 | 4 | case CTO_Punctuation: |
2941 | 4 | return compileCharDef( |
2942 | 4 | file, opcode, CTC_Punctuation, noback, nofor, table, displayTable); |
2943 | 0 | case CTO_Math: |
2944 | 0 | return compileCharDef(file, opcode, CTC_Math, noback, nofor, table, displayTable); |
2945 | 9 | case CTO_Sign: |
2946 | 9 | return compileCharDef(file, opcode, CTC_Sign, noback, nofor, table, displayTable); |
2947 | 9 | case CTO_Letter: |
2948 | 9 | return compileCharDef( |
2949 | 9 | file, opcode, CTC_Letter, noback, nofor, table, displayTable); |
2950 | 20 | case CTO_UpperCase: |
2951 | 20 | return compileCharDef( |
2952 | 20 | file, opcode, CTC_UpperCase, noback, nofor, table, displayTable); |
2953 | 6 | case CTO_LowerCase: |
2954 | 6 | return compileCharDef( |
2955 | 6 | file, opcode, CTC_LowerCase, noback, nofor, table, displayTable); |
2956 | 10 | case CTO_Grouping: |
2957 | 10 | return compileGrouping(file, noback, nofor, table, displayTable); |
2958 | 0 | case CTO_Display: |
2959 | 0 | if (!displayTable) return 1; // ignore |
2960 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
2961 | 0 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
2962 | 0 | if (ruleChars.length != 1 || ruleDots.length != 1) { |
2963 | 0 | compileError(file, "Exactly one character and one cell are required."); |
2964 | 0 | return 0; |
2965 | 0 | } |
2966 | 0 | return putCharDotsMapping( |
2967 | 0 | file, ruleChars.chars[0], ruleDots.chars[0], displayTable); |
2968 | 0 | case CTO_UpLow: |
2969 | 12 | case CTO_None: { |
2970 | | // check if token is a macro name |
2971 | 12 | if (inScopeMacros) { |
2972 | 12 | const MacroList *macros = *inScopeMacros; |
2973 | 12 | while (macros) { |
2974 | 0 | const Macro *m = macros->head; |
2975 | 0 | if (token.length == strlen(m->name) && |
2976 | 0 | eqasc2uni((unsigned char *)m->name, token.chars, token.length)) { |
2977 | 0 | if (!inScopeMacros) { |
2978 | 0 | compileError(file, "Calling macros only allowed in table files."); |
2979 | 0 | return 0; |
2980 | 0 | } |
2981 | 0 | FileInfo tmpFile; |
2982 | 0 | memset(&tmpFile, 0, sizeof(tmpFile)); |
2983 | 0 | tmpFile.fileName = file->fileName; |
2984 | 0 | tmpFile.sourceFile = file->sourceFile; |
2985 | 0 | tmpFile.lineNumber = file->lineNumber; |
2986 | 0 | tmpFile.encoding = noEncoding; |
2987 | 0 | tmpFile.status = 0; |
2988 | 0 | tmpFile.linepos = 0; |
2989 | 0 | tmpFile.linelen = 0; |
2990 | 0 | int argument_count = 0; |
2991 | 0 | CharsString *arguments = |
2992 | 0 | malloc(m->argument_count * sizeof(CharsString)); |
2993 | 0 | while (argument_count < m->argument_count) { |
2994 | 0 | if (getToken(file, &token, "macro argument")) |
2995 | 0 | arguments[argument_count++] = token; |
2996 | 0 | else |
2997 | 0 | break; |
2998 | 0 | } |
2999 | 0 | if (argument_count < m->argument_count) { |
3000 | 0 | compileError(file, "Expected %d arguments", m->argument_count); |
3001 | 0 | return 0; |
3002 | 0 | } |
3003 | 0 | int i = 0; |
3004 | 0 | int subst = 0; |
3005 | 0 | int next = subst < m->substitution_count ? m->substitutions[2 * subst] |
3006 | 0 | : m->definition_length; |
3007 | 0 | for (;;) { |
3008 | 0 | while (i < next) { |
3009 | 0 | widechar c = m->definition[i++]; |
3010 | 0 | if (c == '\n') { |
3011 | 0 | if (!compileRule(&tmpFile, table, displayTable, |
3012 | 0 | inScopeMacros)) { |
3013 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3014 | 0 | "result of macro expansion was: %s", |
3015 | 0 | _lou_showString( |
3016 | 0 | tmpFile.line, tmpFile.linelen, 0)); |
3017 | 0 | return 0; |
3018 | 0 | } |
3019 | 0 | tmpFile.linepos = 0; |
3020 | 0 | tmpFile.linelen = 0; |
3021 | 0 | } else if (tmpFile.linelen >= MAXSTRING) { |
3022 | 0 | compileError(file, |
3023 | 0 | "Line exceeds %d characters (post macro " |
3024 | 0 | "expansion)", |
3025 | 0 | MAXSTRING); |
3026 | 0 | return 0; |
3027 | 0 | } else |
3028 | 0 | tmpFile.line[tmpFile.linelen++] = c; |
3029 | 0 | } |
3030 | 0 | if (subst < m->substitution_count) { |
3031 | 0 | CharsString arg = |
3032 | 0 | arguments[m->substitutions[2 * subst + 1] - 1]; |
3033 | 0 | for (int j = 0; j < arg.length; j++) |
3034 | 0 | tmpFile.line[tmpFile.linelen++] = arg.chars[j]; |
3035 | 0 | subst++; |
3036 | 0 | next = subst < m->substitution_count |
3037 | 0 | ? m->substitutions[2 * subst] |
3038 | 0 | : m->definition_length; |
3039 | 0 | } else { |
3040 | 0 | if (!compileRule( |
3041 | 0 | &tmpFile, table, displayTable, inScopeMacros)) { |
3042 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3043 | 0 | "result of macro expansion was: %s", |
3044 | 0 | _lou_showString( |
3045 | 0 | tmpFile.line, tmpFile.linelen, 0)); |
3046 | 0 | return 0; |
3047 | 0 | } |
3048 | 0 | break; |
3049 | 0 | } |
3050 | 0 | } |
3051 | 0 | return 1; |
3052 | 0 | } |
3053 | 0 | macros = macros->tail; |
3054 | 0 | } |
3055 | 12 | } |
3056 | 12 | if (opcode == CTO_UpLow) { |
3057 | 0 | compileError(file, "The uplow opcode is deprecated."); |
3058 | 0 | return 0; |
3059 | 0 | } |
3060 | 12 | compileError(file, "opcode %s not defined.", |
3061 | 12 | _lou_showString(token.chars, token.length, 0)); |
3062 | 12 | return 0; |
3063 | 12 | } |
3064 | | |
3065 | | /* now only opcodes follow that don't modify the display table */ |
3066 | 1.00k | default: |
3067 | 1.00k | if (!table) return 1; |
3068 | 1.00k | switch (opcode) { |
3069 | 3 | case CTO_Locale: |
3070 | 3 | compileWarning(file, |
3071 | 3 | "The locale opcode is not implemented. Use the locale meta data " |
3072 | 3 | "instead."); |
3073 | 3 | return 1; |
3074 | 28 | case CTO_Undefined: { |
3075 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3076 | 28 | TranslationTableOffset ruleOffset = (*table)->undefined; |
3077 | 28 | if (!compileBrailleIndicator(file, "undefined character opcode", |
3078 | 28 | CTO_Undefined, &ruleOffset, noback, nofor, table)) |
3079 | 0 | return 0; |
3080 | 28 | (*table)->undefined = ruleOffset; |
3081 | 28 | return 1; |
3082 | 28 | } |
3083 | 8 | case CTO_Match: { |
3084 | 8 | int ok = 0; |
3085 | 8 | widechar *patterns = NULL; |
3086 | 8 | TranslationTableRule *rule; |
3087 | 8 | TranslationTableOffset ruleOffset; |
3088 | 8 | CharsString ptn_before, ptn_after; |
3089 | 8 | TranslationTableOffset patternsOffset; |
3090 | 8 | int len, mrk; |
3091 | 8 | size_t patternsByteSize = sizeof(*patterns) * 27720; |
3092 | 8 | patterns = (widechar *)malloc(patternsByteSize); |
3093 | 8 | if (!patterns) _lou_outOfMemory(); |
3094 | 8 | memset(patterns, 0xffff, patternsByteSize); |
3095 | 8 | noback = 1; |
3096 | 8 | getCharacters(file, &ptn_before); |
3097 | 8 | getRuleCharsText(file, &ruleChars); |
3098 | 8 | getCharacters(file, &ptn_after); |
3099 | 8 | getRuleDotsPattern(file, &ruleDots); |
3100 | 8 | if (!addRule(file, opcode, &ruleChars, &ruleDots, after, before, &ruleOffset, |
3101 | 8 | &rule, noback, nofor, table)) |
3102 | 0 | goto CTO_Match_cleanup; |
3103 | 8 | if (ptn_before.chars[0] == '-' && ptn_before.length == 1) |
3104 | 0 | len = _lou_pattern_compile( |
3105 | 0 | &ptn_before.chars[0], 0, &patterns[1], 13841, *table, file); |
3106 | 8 | else |
3107 | 8 | len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length, |
3108 | 8 | &patterns[1], 13841, *table, file); |
3109 | 8 | if (!len) goto CTO_Match_cleanup; |
3110 | 8 | mrk = patterns[0] = len + 1; |
3111 | 8 | _lou_pattern_reverse(&patterns[1]); |
3112 | 8 | if (ptn_after.chars[0] == '-' && ptn_after.length == 1) |
3113 | 0 | len = _lou_pattern_compile( |
3114 | 0 | &ptn_after.chars[0], 0, &patterns[mrk], 13841, *table, file); |
3115 | 8 | else |
3116 | 8 | len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length, |
3117 | 8 | &patterns[mrk], 13841, *table, file); |
3118 | 8 | if (!len) goto CTO_Match_cleanup; |
3119 | 7 | len += mrk; |
3120 | 7 | if (!allocateSpaceInTranslationTable( |
3121 | 7 | file, &patternsOffset, len * sizeof(widechar), table)) |
3122 | 0 | goto CTO_Match_cleanup; |
3123 | | // allocateSpaceInTranslationTable may have moved table, so make sure rule is |
3124 | | // still valid |
3125 | 7 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
3126 | 7 | memcpy(&(*table)->ruleArea[patternsOffset], patterns, len * sizeof(widechar)); |
3127 | 7 | rule->patterns = patternsOffset; |
3128 | 7 | ok = 1; |
3129 | 8 | CTO_Match_cleanup: |
3130 | 8 | free(patterns); |
3131 | 8 | return ok; |
3132 | 7 | } |
3133 | | |
3134 | 8 | case CTO_BackMatch: { |
3135 | 8 | int ok = 0; |
3136 | 8 | widechar *patterns = NULL; |
3137 | 8 | TranslationTableRule *rule; |
3138 | 8 | TranslationTableOffset ruleOffset; |
3139 | 8 | CharsString ptn_before, ptn_after; |
3140 | 8 | TranslationTableOffset patternOffset; |
3141 | 8 | int len, mrk; |
3142 | 8 | size_t patternsByteSize = sizeof(*patterns) * 27720; |
3143 | 8 | patterns = (widechar *)malloc(patternsByteSize); |
3144 | 8 | if (!patterns) _lou_outOfMemory(); |
3145 | 8 | memset(patterns, 0xffff, patternsByteSize); |
3146 | 8 | nofor = 1; |
3147 | 8 | getCharacters(file, &ptn_before); |
3148 | 8 | getRuleCharsText(file, &ruleChars); |
3149 | 8 | getCharacters(file, &ptn_after); |
3150 | 8 | getRuleDotsPattern(file, &ruleDots); |
3151 | 8 | if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, &rule, |
3152 | 8 | noback, nofor, table)) |
3153 | 0 | goto CTO_BackMatch_cleanup; |
3154 | 8 | if (ptn_before.chars[0] == '-' && ptn_before.length == 1) |
3155 | 0 | len = _lou_pattern_compile( |
3156 | 0 | &ptn_before.chars[0], 0, &patterns[1], 13841, *table, file); |
3157 | 8 | else |
3158 | 8 | len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length, |
3159 | 8 | &patterns[1], 13841, *table, file); |
3160 | 8 | if (!len) goto CTO_BackMatch_cleanup; |
3161 | 8 | mrk = patterns[0] = len + 1; |
3162 | 8 | _lou_pattern_reverse(&patterns[1]); |
3163 | 8 | if (ptn_after.chars[0] == '-' && ptn_after.length == 1) |
3164 | 0 | len = _lou_pattern_compile( |
3165 | 0 | &ptn_after.chars[0], 0, &patterns[mrk], 13841, *table, file); |
3166 | 8 | else |
3167 | 8 | len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length, |
3168 | 8 | &patterns[mrk], 13841, *table, file); |
3169 | 8 | if (!len) goto CTO_BackMatch_cleanup; |
3170 | 8 | len += mrk; |
3171 | 8 | if (!allocateSpaceInTranslationTable( |
3172 | 8 | file, &patternOffset, len * sizeof(widechar), table)) |
3173 | 0 | goto CTO_BackMatch_cleanup; |
3174 | | // allocateSpaceInTranslationTable may have moved table, so make sure rule is |
3175 | | // still valid |
3176 | 8 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
3177 | 8 | memcpy(&(*table)->ruleArea[patternOffset], patterns, len * sizeof(widechar)); |
3178 | 8 | rule->patterns = patternOffset; |
3179 | 8 | ok = 1; |
3180 | 8 | CTO_BackMatch_cleanup: |
3181 | 8 | free(patterns); |
3182 | 8 | return ok; |
3183 | 8 | } |
3184 | | |
3185 | 10 | case CTO_CapsLetter: |
3186 | 49 | case CTO_BegCapsWord: |
3187 | 59 | case CTO_EndCapsWord: |
3188 | 67 | case CTO_BegCaps: |
3189 | 72 | case CTO_EndCaps: |
3190 | 86 | case CTO_BegCapsPhrase: |
3191 | 94 | case CTO_EndCapsPhrase: |
3192 | 112 | case CTO_LenCapsPhrase: |
3193 | | /* these 8 general purpose opcodes are compiled further down to more specific |
3194 | | * internal opcodes: |
3195 | | * - modeletter |
3196 | | * - begmodeword |
3197 | | * - endmodeword |
3198 | | * - begmode |
3199 | | * - endmode |
3200 | | * - begmodephrase |
3201 | | * - endmodephrase |
3202 | | * - lenmodephrase |
3203 | | */ |
3204 | 112 | case CTO_ModeLetter: |
3205 | 118 | case CTO_BegModeWord: |
3206 | 118 | case CTO_EndModeWord: |
3207 | 119 | case CTO_BegMode: |
3208 | 119 | case CTO_EndMode: |
3209 | 119 | case CTO_BegModePhrase: |
3210 | 119 | case CTO_EndModePhrase: |
3211 | 119 | case CTO_LenModePhrase: { |
3212 | 119 | TranslationTableCharacterAttributes mode; |
3213 | 119 | int i; |
3214 | 119 | switch (opcode) { |
3215 | 10 | case CTO_CapsLetter: |
3216 | 49 | case CTO_BegCapsWord: |
3217 | 59 | case CTO_EndCapsWord: |
3218 | 67 | case CTO_BegCaps: |
3219 | 72 | case CTO_EndCaps: |
3220 | 86 | case CTO_BegCapsPhrase: |
3221 | 94 | case CTO_EndCapsPhrase: |
3222 | 112 | case CTO_LenCapsPhrase: |
3223 | 112 | mode = CTC_UpperCase; |
3224 | 112 | i = 0; |
3225 | 112 | opcode += (CTO_ModeLetter - CTO_CapsLetter); |
3226 | 112 | break; |
3227 | 7 | default: |
3228 | 7 | if (!getToken(file, &token, "attribute name")) return 0; |
3229 | 7 | if (!(*table)->characterClasses && !allocateCharacterClasses(*table)) { |
3230 | 0 | return 0; |
3231 | 0 | } |
3232 | 7 | const CharacterClass *characterClass = findCharacterClass(&token, *table); |
3233 | 7 | if (!characterClass) { |
3234 | 7 | characterClass = |
3235 | 7 | addCharacterClass(file, token.chars, token.length, *table, 1); |
3236 | 7 | if (!characterClass) return 0; |
3237 | 7 | } |
3238 | 7 | mode = characterClass->attribute; |
3239 | 7 | if (!(mode == CTC_UpperCase || mode == CTC_Digit) && mode >= CTC_Space && |
3240 | 7 | mode <= CTC_LitDigit) { |
3241 | 0 | compileError(file, |
3242 | 0 | "mode must be \"uppercase\", \"digit\", or a custom " |
3243 | 0 | "attribute name."); |
3244 | 0 | return 0; |
3245 | 0 | } |
3246 | | /* check if this mode is already defined and if the number of modes does |
3247 | | * not exceed the maximal number */ |
3248 | 7 | if (mode == CTC_UpperCase) |
3249 | 0 | i = 0; |
3250 | 7 | else { |
3251 | 7 | for (i = 1; i < MAX_MODES && (*table)->modes[i].value; i++) { |
3252 | 0 | if ((*table)->modes[i].mode == mode) { |
3253 | 0 | break; |
3254 | 0 | } |
3255 | 0 | } |
3256 | 7 | if (i == MAX_MODES) { |
3257 | 0 | compileError(file, "Max number of modes (%i) reached", MAX_MODES); |
3258 | 0 | return 0; |
3259 | 0 | } |
3260 | 7 | } |
3261 | 119 | } |
3262 | 119 | if (!(*table)->modes[i].value) |
3263 | 74 | (*table)->modes[i] = (EmphasisClass){ plain_text, mode, |
3264 | 74 | 0x1 << (MAX_EMPH_CLASSES + i), MAX_EMPH_CLASSES + i }; |
3265 | 119 | switch (opcode) { |
3266 | 14 | case CTO_BegModePhrase: { |
3267 | | // not passing pointer because compileBrailleIndicator may reallocate |
3268 | | // table |
3269 | 14 | TranslationTableOffset ruleOffset = |
3270 | 14 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begPhraseOffset]; |
3271 | 14 | if (!compileBrailleIndicator(file, "first word capital sign", |
3272 | | // when mode is not caps (i != 0), provide enough information |
3273 | | // for back-translator to be able to recognize and ignore the |
3274 | | // indicator (but it won't be able to determine the mode) |
3275 | 14 | i == 0 ? CTO_BegCapsPhrase : CTO_BegModePhrase, &ruleOffset, |
3276 | 14 | noback, nofor, table)) |
3277 | 0 | return 0; |
3278 | 14 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begPhraseOffset] = ruleOffset; |
3279 | 14 | return 1; |
3280 | 14 | } |
3281 | 8 | case CTO_EndModePhrase: { |
3282 | 8 | TranslationTableOffset ruleOffset; |
3283 | 8 | switch (compileBeforeAfter(file)) { |
3284 | 2 | case 1: // before |
3285 | 2 | if ((*table)->emphRules[MAX_EMPH_CLASSES + i][endPhraseAfterOffset]) { |
3286 | 0 | compileError( |
3287 | 0 | file, "Capital sign after last word already defined."); |
3288 | 0 | return 0; |
3289 | 0 | } |
3290 | | // not passing pointer because compileBrailleIndicator may reallocate |
3291 | | // table |
3292 | 2 | ruleOffset = (*table)->emphRules[MAX_EMPH_CLASSES + i] |
3293 | 2 | [endPhraseBeforeOffset]; |
3294 | 2 | if (!compileBrailleIndicator(file, "capital sign before last word", |
3295 | 2 | i == 0 ? CTO_EndCapsPhraseBefore : CTO_EndModePhrase, |
3296 | 2 | &ruleOffset, noback, nofor, table)) |
3297 | 0 | return 0; |
3298 | 2 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endPhraseBeforeOffset] = |
3299 | 2 | ruleOffset; |
3300 | 2 | return 1; |
3301 | 6 | case 2: // after |
3302 | 6 | if ((*table)->emphRules[MAX_EMPH_CLASSES + i] |
3303 | 6 | [endPhraseBeforeOffset]) { |
3304 | 0 | compileError( |
3305 | 0 | file, "Capital sign before last word already defined."); |
3306 | 0 | return 0; |
3307 | 0 | } |
3308 | | // not passing pointer because compileBrailleIndicator may reallocate |
3309 | | // table |
3310 | 6 | ruleOffset = (*table)->emphRules[MAX_EMPH_CLASSES + i] |
3311 | 6 | [endPhraseAfterOffset]; |
3312 | 6 | if (!compileBrailleIndicator(file, "capital sign after last word", |
3313 | 6 | i == 0 ? CTO_EndCapsPhraseAfter : CTO_EndModePhrase, |
3314 | 6 | &ruleOffset, noback, nofor, table)) |
3315 | 0 | return 0; |
3316 | 6 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endPhraseAfterOffset] = |
3317 | 6 | ruleOffset; |
3318 | 6 | return 1; |
3319 | 0 | default: // error |
3320 | 0 | compileError(file, "Invalid lastword indicator location."); |
3321 | 0 | return 0; |
3322 | 8 | } |
3323 | 0 | return 0; |
3324 | 8 | } |
3325 | 9 | case CTO_BegMode: { |
3326 | | // not passing pointer because compileBrailleIndicator may reallocate |
3327 | | // table |
3328 | 9 | TranslationTableOffset ruleOffset = |
3329 | 9 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begOffset]; |
3330 | 9 | if (!compileBrailleIndicator(file, "first letter capital sign", |
3331 | 9 | i == 0 ? CTO_BegCaps : CTO_BegMode, &ruleOffset, noback, |
3332 | 9 | nofor, table)) |
3333 | 0 | return 0; |
3334 | 9 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begOffset] = ruleOffset; |
3335 | 9 | return 1; |
3336 | 9 | } |
3337 | 5 | case CTO_EndMode: { |
3338 | | // not passing pointer because compileBrailleIndicator may reallocate |
3339 | | // table |
3340 | 5 | TranslationTableOffset ruleOffset = |
3341 | 5 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endOffset]; |
3342 | 5 | if (!compileBrailleIndicator(file, "last letter capital sign", |
3343 | 5 | i == 0 ? CTO_EndCaps : CTO_EndMode, &ruleOffset, noback, |
3344 | 5 | nofor, table)) |
3345 | 0 | return 0; |
3346 | 5 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endOffset] = ruleOffset; |
3347 | 5 | return 1; |
3348 | 5 | } |
3349 | 10 | case CTO_ModeLetter: { |
3350 | | // not passing pointer because compileBrailleIndicator may reallocate |
3351 | | // table |
3352 | 10 | TranslationTableOffset ruleOffset = |
3353 | 10 | (*table)->emphRules[MAX_EMPH_CLASSES + i][letterOffset]; |
3354 | 10 | if (!compileBrailleIndicator(file, "single letter capital sign", |
3355 | 10 | i == 0 ? CTO_CapsLetter : CTO_ModeLetter, &ruleOffset, noback, |
3356 | 10 | nofor, table)) |
3357 | 0 | return 0; |
3358 | 10 | (*table)->emphRules[MAX_EMPH_CLASSES + i][letterOffset] = ruleOffset; |
3359 | 10 | return 1; |
3360 | 10 | } |
3361 | 45 | case CTO_BegModeWord: { |
3362 | | // not passing pointer because compileBrailleIndicator may reallocate |
3363 | | // table |
3364 | 45 | TranslationTableOffset ruleOffset = |
3365 | 45 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begWordOffset]; |
3366 | 45 | if (!compileBrailleIndicator(file, "capital word", |
3367 | 45 | i == 0 ? CTO_BegCapsWord : CTO_BegModeWord, &ruleOffset, |
3368 | 45 | noback, nofor, table)) |
3369 | 0 | return 0; |
3370 | 45 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begWordOffset] = ruleOffset; |
3371 | 45 | return 1; |
3372 | 45 | } |
3373 | 10 | case CTO_EndModeWord: { |
3374 | | // not passing pointer because compileBrailleIndicator may reallocate |
3375 | | // table |
3376 | 10 | TranslationTableOffset ruleOffset = |
3377 | 10 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endWordOffset]; |
3378 | 10 | if (!compileBrailleIndicator(file, "capital word stop", |
3379 | 10 | i == 0 ? CTO_EndCapsWord : CTO_EndModeWord, &ruleOffset, |
3380 | 10 | noback, nofor, table)) |
3381 | 0 | return 0; |
3382 | 10 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endWordOffset] = ruleOffset; |
3383 | 10 | return 1; |
3384 | 10 | } |
3385 | 18 | case CTO_LenModePhrase: |
3386 | 18 | return (*table)->emphRules[MAX_EMPH_CLASSES + i][lenPhraseOffset] = |
3387 | 18 | compileNumber(file); |
3388 | 0 | default: |
3389 | 0 | break; |
3390 | 119 | } |
3391 | 0 | break; |
3392 | 119 | } |
3393 | | |
3394 | | /* these 8 general purpose emphasis opcodes are compiled further down to more |
3395 | | * specific internal opcodes: |
3396 | | * - emphletter |
3397 | | * - begemphword |
3398 | | * - endemphword |
3399 | | * - begemph |
3400 | | * - endemph |
3401 | | * - begemphphrase |
3402 | | * - endemphphrase |
3403 | | * - lenemphphrase |
3404 | | */ |
3405 | 37 | case CTO_EmphClass: |
3406 | 37 | if (!getToken(file, &emphClass, "emphasis class")) { |
3407 | 0 | compileError(file, "emphclass must be followed by a valid class name."); |
3408 | 0 | return 0; |
3409 | 0 | } |
3410 | 37 | int k, i; |
3411 | 37 | char *s = malloc(sizeof(char) * (emphClass.length + 1)); |
3412 | 404 | for (k = 0; k < emphClass.length; k++) s[k] = (char)emphClass.chars[k]; |
3413 | 37 | s[k++] = '\0'; |
3414 | 98 | for (i = 0; i < MAX_EMPH_CLASSES && (*table)->emphClassNames[i]; i++) |
3415 | 68 | if (strcmp(s, (*table)->emphClassNames[i]) == 0) { |
3416 | 7 | _lou_logMessage(LOU_LOG_DEBUG, "Duplicate emphasis class: %s", s); |
3417 | 7 | free(s); |
3418 | 7 | return 1; |
3419 | 7 | } |
3420 | 30 | if (i == MAX_EMPH_CLASSES) { |
3421 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3422 | 0 | "Max number of emphasis classes (%i) reached", MAX_EMPH_CLASSES); |
3423 | 0 | errorCount++; |
3424 | 0 | free(s); |
3425 | 0 | return 0; |
3426 | 0 | } |
3427 | 30 | switch (i) { |
3428 | | /* For backwards compatibility (i.e. because programs will assume |
3429 | | * the first 3 typeform bits are `italic', `underline' and `bold') |
3430 | | * we require that the first 3 emphclass definitions are (in that |
3431 | | * order): |
3432 | | * |
3433 | | * emphclass italic |
3434 | | * emphclass underline |
3435 | | * emphclass bold |
3436 | | * |
3437 | | * While it would be possible to use the emphclass opcode only for |
3438 | | * defining _additional_ classes (not allowing for them to be called |
3439 | | * italic, underline or bold), thereby reducing the amount of |
3440 | | * boilerplate, we deliberately choose not to do that in order to |
3441 | | * not give italic, underline and bold any special status. The |
3442 | | * hope is that eventually all programs will use liblouis for |
3443 | | * emphasis the recommended way (i.e. by looking up the supported |
3444 | | * typeforms in the documentation or API) so that we can drop this |
3445 | | * restriction. |
3446 | | */ |
3447 | 12 | case 0: |
3448 | 12 | if (strcmp(s, "italic") != 0) { |
3449 | 1 | _lou_logMessage(LOU_LOG_ERROR, |
3450 | 1 | "First emphasis class must be \"italic\" but got " |
3451 | 1 | "%s", |
3452 | 1 | s); |
3453 | 1 | errorCount++; |
3454 | 1 | free(s); |
3455 | 1 | return 0; |
3456 | 1 | } |
3457 | 11 | break; |
3458 | 11 | case 1: |
3459 | 7 | if (strcmp(s, "underline") != 0) { |
3460 | 3 | _lou_logMessage(LOU_LOG_ERROR, |
3461 | 3 | "Second emphasis class must be \"underline\" but " |
3462 | 3 | "got " |
3463 | 3 | "%s", |
3464 | 3 | s); |
3465 | 3 | errorCount++; |
3466 | 3 | free(s); |
3467 | 3 | return 0; |
3468 | 3 | } |
3469 | 4 | break; |
3470 | 4 | case 2: |
3471 | 3 | if (strcmp(s, "bold") != 0) { |
3472 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3473 | 0 | "Third emphasis class must be \"bold\" but got " |
3474 | 0 | "%s", |
3475 | 0 | s); |
3476 | 0 | errorCount++; |
3477 | 0 | free(s); |
3478 | 0 | return 0; |
3479 | 0 | } |
3480 | 3 | break; |
3481 | 30 | } |
3482 | 26 | (*table)->emphClassNames[i] = s; |
3483 | 26 | (*table)->emphClasses[i] = (EmphasisClass){ emph_1 |
3484 | 26 | << i, /* relies on the order of typeforms emph_1..emph_10 */ |
3485 | 26 | 0, 0x1 << i, i }; |
3486 | 26 | return 1; |
3487 | 0 | case CTO_EmphLetter: |
3488 | 5 | case CTO_BegEmphWord: |
3489 | 5 | case CTO_EndEmphWord: |
3490 | 6 | case CTO_BegEmph: |
3491 | 6 | case CTO_EndEmph: |
3492 | 6 | case CTO_BegEmphPhrase: |
3493 | 6 | case CTO_EndEmphPhrase: |
3494 | 6 | case CTO_LenEmphPhrase: |
3495 | 6 | case CTO_EmphModeChars: |
3496 | 6 | case CTO_NoEmphChars: { |
3497 | 6 | if (!getToken(file, &token, "emphasis class")) return 0; |
3498 | 6 | if (!parseChars(file, &emphClass, &token)) return 0; |
3499 | 6 | char *s = malloc(sizeof(char) * (emphClass.length + 1)); |
3500 | 6 | int k, i; |
3501 | 42 | for (k = 0; k < emphClass.length; k++) s[k] = (char)emphClass.chars[k]; |
3502 | 6 | s[k++] = '\0'; |
3503 | 6 | for (i = 0; i < MAX_EMPH_CLASSES && (*table)->emphClassNames[i]; i++) |
3504 | 6 | if (strcmp(s, (*table)->emphClassNames[i]) == 0) break; |
3505 | 6 | if (i == MAX_EMPH_CLASSES || !(*table)->emphClassNames[i]) { |
3506 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Emphasis class %s not declared", s); |
3507 | 0 | errorCount++; |
3508 | 0 | free(s); |
3509 | 0 | return 0; |
3510 | 0 | } |
3511 | 6 | int ok = 0; |
3512 | 6 | switch (opcode) { |
3513 | 0 | case CTO_EmphLetter: { |
3514 | | // not passing pointer because compileBrailleIndicator may reallocate |
3515 | | // table |
3516 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][letterOffset]; |
3517 | | // provide enough information for back-translator to be able to recognize |
3518 | | // and ignore the indicator (but it won't be able to determine the |
3519 | | // emphasis class) |
3520 | 0 | if (!compileBrailleIndicator(file, "single letter", CTO_EmphLetter, |
3521 | 0 | &ruleOffset, noback, nofor, table)) |
3522 | 0 | break; |
3523 | 0 | (*table)->emphRules[i][letterOffset] = ruleOffset; |
3524 | 0 | ok = 1; |
3525 | 0 | break; |
3526 | 0 | } |
3527 | 5 | case CTO_BegEmphWord: { |
3528 | | // not passing pointer because compileBrailleIndicator may reallocate |
3529 | | // table |
3530 | 5 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][begWordOffset]; |
3531 | 5 | if (!compileBrailleIndicator(file, "word", CTO_BegEmphWord, &ruleOffset, |
3532 | 5 | noback, nofor, table)) |
3533 | 0 | break; |
3534 | 5 | (*table)->emphRules[i][begWordOffset] = ruleOffset; |
3535 | 5 | ok = 1; |
3536 | 5 | break; |
3537 | 5 | } |
3538 | 0 | case CTO_EndEmphWord: { |
3539 | | // not passing pointer because compileBrailleIndicator may reallocate |
3540 | | // table |
3541 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][endWordOffset]; |
3542 | 0 | if (!compileBrailleIndicator(file, "word stop", CTO_EndEmphWord, |
3543 | 0 | &ruleOffset, noback, nofor, table)) |
3544 | 0 | break; |
3545 | 0 | (*table)->emphRules[i][endWordOffset] = ruleOffset; |
3546 | 0 | ok = 1; |
3547 | 0 | break; |
3548 | 0 | } |
3549 | 1 | case CTO_BegEmph: { |
3550 | | /* fail if both begemph and any of begemphphrase or begemphword are |
3551 | | * defined */ |
3552 | 1 | if ((*table)->emphRules[i][begWordOffset] || |
3553 | 1 | (*table)->emphRules[i][begPhraseOffset]) { |
3554 | 0 | compileError(file, |
3555 | 0 | "Cannot define emphasis for both no context and word or " |
3556 | 0 | "phrase context, i.e. cannot have both begemph and " |
3557 | 0 | "begemphword or begemphphrase."); |
3558 | 0 | break; |
3559 | 0 | } |
3560 | | // not passing pointer because compileBrailleIndicator may reallocate |
3561 | | // table |
3562 | 1 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][begOffset]; |
3563 | 1 | if (!compileBrailleIndicator(file, "first letter", CTO_BegEmph, |
3564 | 1 | &ruleOffset, noback, nofor, table)) |
3565 | 0 | break; |
3566 | 1 | (*table)->emphRules[i][begOffset] = ruleOffset; |
3567 | 1 | ok = 1; |
3568 | 1 | break; |
3569 | 1 | } |
3570 | 0 | case CTO_EndEmph: { |
3571 | 0 | if ((*table)->emphRules[i][endWordOffset] || |
3572 | 0 | (*table)->emphRules[i][endPhraseBeforeOffset] || |
3573 | 0 | (*table)->emphRules[i][endPhraseAfterOffset]) { |
3574 | 0 | compileError(file, |
3575 | 0 | "Cannot define emphasis for both no context and word or " |
3576 | 0 | "phrase context, i.e. cannot have both endemph and " |
3577 | 0 | "endemphword or endemphphrase."); |
3578 | 0 | break; |
3579 | 0 | } |
3580 | | // not passing pointer because compileBrailleIndicator may reallocate |
3581 | | // table |
3582 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][endOffset]; |
3583 | 0 | if (!compileBrailleIndicator(file, "last letter", CTO_EndEmph, |
3584 | 0 | &ruleOffset, noback, nofor, table)) |
3585 | 0 | break; |
3586 | 0 | (*table)->emphRules[i][endOffset] = ruleOffset; |
3587 | 0 | ok = 1; |
3588 | 0 | break; |
3589 | 0 | } |
3590 | 0 | case CTO_BegEmphPhrase: { |
3591 | | // not passing pointer because compileBrailleIndicator may reallocate |
3592 | | // table |
3593 | 0 | TranslationTableOffset ruleOffset = |
3594 | 0 | (*table)->emphRules[i][begPhraseOffset]; |
3595 | 0 | if (!compileBrailleIndicator(file, "first word", CTO_BegEmphPhrase, |
3596 | 0 | &ruleOffset, noback, nofor, table)) |
3597 | 0 | break; |
3598 | 0 | (*table)->emphRules[i][begPhraseOffset] = ruleOffset; |
3599 | 0 | ok = 1; |
3600 | 0 | break; |
3601 | 0 | } |
3602 | 0 | case CTO_EndEmphPhrase: |
3603 | 0 | switch (compileBeforeAfter(file)) { |
3604 | 0 | case 1: { // before |
3605 | 0 | if ((*table)->emphRules[i][endPhraseAfterOffset]) { |
3606 | 0 | compileError(file, "last word after already defined."); |
3607 | 0 | break; |
3608 | 0 | } |
3609 | | // not passing pointer because compileBrailleIndicator may reallocate |
3610 | | // table |
3611 | 0 | TranslationTableOffset ruleOffset = |
3612 | 0 | (*table)->emphRules[i][endPhraseBeforeOffset]; |
3613 | 0 | if (!compileBrailleIndicator(file, "last word before", |
3614 | 0 | CTO_EndEmphPhrase, &ruleOffset, noback, nofor, table)) |
3615 | 0 | break; |
3616 | 0 | (*table)->emphRules[i][endPhraseBeforeOffset] = ruleOffset; |
3617 | 0 | ok = 1; |
3618 | 0 | break; |
3619 | 0 | } |
3620 | 0 | case 2: { // after |
3621 | 0 | if ((*table)->emphRules[i][endPhraseBeforeOffset]) { |
3622 | 0 | compileError(file, "last word before already defined."); |
3623 | 0 | break; |
3624 | 0 | } |
3625 | | // not passing pointer because compileBrailleIndicator may reallocate |
3626 | | // table |
3627 | 0 | TranslationTableOffset ruleOffset = |
3628 | 0 | (*table)->emphRules[i][endPhraseAfterOffset]; |
3629 | 0 | if (!compileBrailleIndicator(file, "last word after", |
3630 | 0 | CTO_EndEmphPhrase, &ruleOffset, noback, nofor, table)) |
3631 | 0 | break; |
3632 | 0 | (*table)->emphRules[i][endPhraseAfterOffset] = ruleOffset; |
3633 | 0 | ok = 1; |
3634 | 0 | break; |
3635 | 0 | } |
3636 | 0 | default: // error |
3637 | 0 | compileError(file, "Invalid lastword indicator location."); |
3638 | 0 | break; |
3639 | 0 | } |
3640 | 0 | break; |
3641 | 0 | case CTO_LenEmphPhrase: |
3642 | 0 | if (((*table)->emphRules[i][lenPhraseOffset] = compileNumber(file))) |
3643 | 0 | ok = 1; |
3644 | 0 | break; |
3645 | 0 | case CTO_EmphModeChars: { |
3646 | 0 | if (!getRuleCharsText(file, &ruleChars)) break; |
3647 | 0 | widechar *emphmodechars = (*table)->emphModeChars[i]; |
3648 | 0 | int len; |
3649 | 0 | for (len = 0; len < EMPHMODECHARSSIZE && emphmodechars[len]; len++) |
3650 | 0 | ; |
3651 | 0 | if (len + ruleChars.length > EMPHMODECHARSSIZE) { |
3652 | 0 | compileError(file, "More than %d characters", EMPHMODECHARSSIZE); |
3653 | 0 | break; |
3654 | 0 | } |
3655 | 0 | ok = 1; |
3656 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3657 | 0 | if (!getChar(ruleChars.chars[k], *table, NULL)) { |
3658 | 0 | compileError(file, "Emphasis mode character undefined"); |
3659 | 0 | ok = 0; |
3660 | 0 | break; |
3661 | 0 | } |
3662 | 0 | emphmodechars[len++] = ruleChars.chars[k]; |
3663 | 0 | } |
3664 | 0 | break; |
3665 | 0 | } |
3666 | 0 | case CTO_NoEmphChars: { |
3667 | 0 | if (!getRuleCharsText(file, &ruleChars)) break; |
3668 | 0 | widechar *noemphchars = (*table)->noEmphChars[i]; |
3669 | 0 | int len; |
3670 | 0 | for (len = 0; len < NOEMPHCHARSSIZE && noemphchars[len]; len++) |
3671 | 0 | ; |
3672 | 0 | if (len + ruleChars.length > NOEMPHCHARSSIZE) { |
3673 | 0 | compileError(file, "More than %d characters", NOEMPHCHARSSIZE); |
3674 | 0 | break; |
3675 | 0 | } |
3676 | 0 | ok = 1; |
3677 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3678 | 0 | if (!getChar(ruleChars.chars[k], *table, NULL)) { |
3679 | 0 | compileError(file, "Character undefined"); |
3680 | 0 | ok = 0; |
3681 | 0 | break; |
3682 | 0 | } |
3683 | 0 | noemphchars[len++] = ruleChars.chars[k]; |
3684 | 0 | } |
3685 | 0 | break; |
3686 | 0 | } |
3687 | 0 | default: |
3688 | 0 | break; |
3689 | 6 | } |
3690 | 6 | free(s); |
3691 | 6 | return ok; |
3692 | 6 | } |
3693 | 4 | case CTO_LetterSign: { |
3694 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3695 | 4 | TranslationTableOffset ruleOffset = (*table)->letterSign; |
3696 | 4 | if (!compileBrailleIndicator(file, "letter sign", CTO_LetterSign, &ruleOffset, |
3697 | 4 | noback, nofor, table)) |
3698 | 0 | return 0; |
3699 | 4 | (*table)->letterSign = ruleOffset; |
3700 | 4 | return 1; |
3701 | 4 | } |
3702 | 23 | case CTO_NoLetsignBefore: |
3703 | 23 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3704 | 23 | if (((*table)->noLetsignBeforeCount + ruleChars.length) > LETSIGNBEFORESIZE) { |
3705 | 0 | compileError(file, "More than %d characters", LETSIGNBEFORESIZE); |
3706 | 0 | return 0; |
3707 | 0 | } |
3708 | 218 | for (int k = 0; k < ruleChars.length; k++) |
3709 | 195 | (*table)->noLetsignBefore[(*table)->noLetsignBeforeCount++] = |
3710 | 195 | ruleChars.chars[k]; |
3711 | 23 | return 1; |
3712 | 2 | case CTO_NoLetsign: |
3713 | 2 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3714 | 2 | if (((*table)->noLetsignCount + ruleChars.length) > LETSIGNSIZE) { |
3715 | 0 | compileError(file, "More than %d characters", LETSIGNSIZE); |
3716 | 0 | return 0; |
3717 | 0 | } |
3718 | 24 | for (int k = 0; k < ruleChars.length; k++) |
3719 | 22 | (*table)->noLetsign[(*table)->noLetsignCount++] = ruleChars.chars[k]; |
3720 | 2 | return 1; |
3721 | 11 | case CTO_NoLetsignAfter: |
3722 | 11 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3723 | 11 | if (((*table)->noLetsignAfterCount + ruleChars.length) > LETSIGNAFTERSIZE) { |
3724 | 0 | compileError(file, "More than %d characters", LETSIGNAFTERSIZE); |
3725 | 0 | return 0; |
3726 | 0 | } |
3727 | 68 | for (int k = 0; k < ruleChars.length; k++) |
3728 | 57 | (*table)->noLetsignAfter[(*table)->noLetsignAfterCount++] = |
3729 | 57 | ruleChars.chars[k]; |
3730 | 11 | return 1; |
3731 | 14 | case CTO_NumberSign: { |
3732 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3733 | 14 | TranslationTableOffset ruleOffset = (*table)->numberSign; |
3734 | 14 | if (!compileBrailleIndicator(file, "number sign", CTO_NumberSign, &ruleOffset, |
3735 | 14 | noback, nofor, table)) |
3736 | 0 | return 0; |
3737 | 14 | (*table)->numberSign = ruleOffset; |
3738 | 14 | return 1; |
3739 | 14 | } |
3740 | 4 | case CTO_NoNumberSign: { |
3741 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3742 | 4 | TranslationTableOffset ruleOffset = (*table)->noNumberSign; |
3743 | 4 | if (!compileBrailleIndicator(file, "no number sign", CTO_NoNumberSign, |
3744 | 4 | &ruleOffset, noback, nofor, table)) |
3745 | 0 | return 0; |
3746 | 4 | (*table)->noNumberSign = ruleOffset; |
3747 | 4 | return 1; |
3748 | 4 | } |
3749 | | |
3750 | 5 | case CTO_NumericModeChars: |
3751 | 5 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3752 | 542 | for (int k = 0; k < ruleChars.length; k++) { |
3753 | 537 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3754 | 537 | if (!c) { |
3755 | 0 | compileError(file, "Numeric mode character undefined: %s", |
3756 | 0 | _lou_showString(&ruleChars.chars[k], 1, 0)); |
3757 | 0 | return 0; |
3758 | 0 | } |
3759 | 537 | c->attributes |= CTC_NumericMode; |
3760 | 537 | (*table)->usesNumericMode = 1; |
3761 | 537 | } |
3762 | 5 | return 1; |
3763 | | |
3764 | 1 | case CTO_MidEndNumericModeChars: |
3765 | 1 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3766 | 2 | for (int k = 0; k < ruleChars.length; k++) { |
3767 | 1 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3768 | 1 | if (!c) { |
3769 | 0 | compileError(file, "Midendnumeric mode character undefined"); |
3770 | 0 | return 0; |
3771 | 0 | } |
3772 | 1 | c->attributes |= CTC_MidEndNumericMode; |
3773 | 1 | (*table)->usesNumericMode = 1; |
3774 | 1 | } |
3775 | 1 | return 1; |
3776 | | |
3777 | 3 | case CTO_NumericNoContractChars: |
3778 | 3 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3779 | 30 | for (int k = 0; k < ruleChars.length; k++) { |
3780 | 27 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3781 | 27 | if (!c) { |
3782 | 0 | compileError(file, "Numeric no contraction character undefined"); |
3783 | 0 | return 0; |
3784 | 0 | } |
3785 | 27 | c->attributes |= CTC_NumericNoContract; |
3786 | 27 | (*table)->usesNumericMode = 1; |
3787 | 27 | } |
3788 | 3 | return 1; |
3789 | | |
3790 | 0 | case CTO_NoContractSign: { |
3791 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3792 | 0 | TranslationTableOffset ruleOffset = (*table)->noContractSign; |
3793 | 0 | if (!compileBrailleIndicator(file, "no contractions sign", CTO_NoContractSign, |
3794 | 0 | &ruleOffset, noback, nofor, table)) |
3795 | 0 | return 0; |
3796 | 0 | (*table)->noContractSign = ruleOffset; |
3797 | 0 | return 1; |
3798 | 0 | } |
3799 | 3 | case CTO_SeqDelimiter: |
3800 | 3 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3801 | 1.21k | for (int k = 0; k < ruleChars.length; k++) { |
3802 | 1.20k | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3803 | 1.20k | if (!c) { |
3804 | 0 | compileError(file, "Sequence delimiter character undefined"); |
3805 | 0 | return 0; |
3806 | 0 | } |
3807 | 1.20k | c->attributes |= CTC_SeqDelimiter; |
3808 | 1.20k | (*table)->usesSequences = 1; |
3809 | 1.20k | } |
3810 | 3 | return 1; |
3811 | | |
3812 | 1 | case CTO_SeqBeforeChars: |
3813 | 1 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3814 | 640 | for (int k = 0; k < ruleChars.length; k++) { |
3815 | 639 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3816 | 639 | if (!c) { |
3817 | 0 | compileError(file, "Sequence before character undefined"); |
3818 | 0 | return 0; |
3819 | 0 | } |
3820 | 639 | c->attributes |= CTC_SeqBefore; |
3821 | 639 | } |
3822 | 1 | return 1; |
3823 | | |
3824 | 0 | case CTO_SeqAfterChars: |
3825 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3826 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3827 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3828 | 0 | if (!c) { |
3829 | 0 | compileError(file, "Sequence after character undefined"); |
3830 | 0 | return 0; |
3831 | 0 | } |
3832 | 0 | c->attributes |= CTC_SeqAfter; |
3833 | 0 | } |
3834 | 0 | return 1; |
3835 | | |
3836 | 0 | case CTO_SeqAfterPattern: |
3837 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3838 | 0 | if (((*table)->seqPatternsCount + ruleChars.length + 1) > SEQPATTERNSIZE) { |
3839 | 0 | compileError(file, "More than %d characters", SEQPATTERNSIZE); |
3840 | 0 | return 0; |
3841 | 0 | } |
3842 | 0 | for (int k = 0; k < ruleChars.length; k++) |
3843 | 0 | (*table)->seqPatterns[(*table)->seqPatternsCount++] = ruleChars.chars[k]; |
3844 | 0 | (*table)->seqPatterns[(*table)->seqPatternsCount++] = 0; |
3845 | 0 | return 1; |
3846 | | |
3847 | 0 | case CTO_SeqAfterExpression: |
3848 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3849 | 0 | if ((ruleChars.length + 1) > SEQPATTERNSIZE) { |
3850 | 0 | compileError(file, "More than %d characters", SEQPATTERNSIZE); |
3851 | 0 | return 0; |
3852 | 0 | } |
3853 | 0 | for (int k = 0; k < ruleChars.length; k++) |
3854 | 0 | (*table)->seqAfterExpression[k] = ruleChars.chars[k]; |
3855 | 0 | (*table)->seqAfterExpression[ruleChars.length] = 0; |
3856 | 0 | (*table)->seqAfterExpressionLength = ruleChars.length; |
3857 | 0 | return 1; |
3858 | | |
3859 | 0 | case CTO_CapsModeChars: |
3860 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3861 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3862 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3863 | 0 | if (!c) { |
3864 | 0 | compileError(file, "Capital mode character undefined"); |
3865 | 0 | return 0; |
3866 | 0 | } |
3867 | 0 | c->attributes |= CTC_CapsMode; |
3868 | 0 | (*table)->hasCapsModeChars = 1; |
3869 | 0 | } |
3870 | 0 | return 1; |
3871 | | |
3872 | 1 | case CTO_BegComp: { |
3873 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3874 | 1 | TranslationTableOffset ruleOffset = (*table)->begComp; |
3875 | 1 | if (!compileBrailleIndicator(file, "begin computer braille", CTO_BegComp, |
3876 | 1 | &ruleOffset, noback, nofor, table)) |
3877 | 0 | return 0; |
3878 | 1 | (*table)->begComp = ruleOffset; |
3879 | 1 | return 1; |
3880 | 1 | } |
3881 | 0 | case CTO_EndComp: { |
3882 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3883 | 0 | TranslationTableOffset ruleOffset = (*table)->endComp; |
3884 | 0 | if (!compileBrailleIndicator(file, "end computer braslle", CTO_EndComp, |
3885 | 0 | &ruleOffset, noback, nofor, table)) |
3886 | 0 | return 0; |
3887 | 0 | (*table)->endComp = ruleOffset; |
3888 | 0 | return 1; |
3889 | 0 | } |
3890 | 9 | case CTO_NoCross: |
3891 | 9 | if (nocross) { |
3892 | 0 | compileError( |
3893 | 0 | file, "%s already specified.", _lou_findOpcodeName(CTO_NoCross)); |
3894 | 0 | return 0; |
3895 | 0 | } |
3896 | 9 | nocross = 1; |
3897 | 9 | goto doOpcode; |
3898 | 2 | case CTO_Syllable: |
3899 | 2 | (*table)->syllables = 1; |
3900 | 3 | case CTO_Always: |
3901 | 9 | case CTO_LargeSign: |
3902 | 14 | case CTO_WholeWord: |
3903 | 19 | case CTO_PartWord: |
3904 | 23 | case CTO_JoinNum: |
3905 | 56 | case CTO_JoinableWord: |
3906 | 56 | case CTO_LowWord: |
3907 | 56 | case CTO_SuffixableWord: |
3908 | 57 | case CTO_PrefixableWord: |
3909 | 61 | case CTO_BegWord: |
3910 | 63 | case CTO_BegMidWord: |
3911 | 71 | case CTO_MidWord: |
3912 | 71 | case CTO_MidEndWord: |
3913 | 73 | case CTO_EndWord: |
3914 | 97 | case CTO_PrePunc: |
3915 | 100 | case CTO_PostPunc: |
3916 | 101 | case CTO_BegNum: |
3917 | 102 | case CTO_MidNum: |
3918 | 103 | case CTO_EndNum: |
3919 | 106 | case CTO_Repeated: |
3920 | 111 | case CTO_RepWord: |
3921 | 111 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3922 | 111 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
3923 | 110 | if (ruleDots.length == 0) |
3924 | | // check that all characters in a rule with `=` as second operand are |
3925 | | // defined (or based on another character) |
3926 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3927 | 0 | TranslationTableCharacter *c = |
3928 | 0 | getChar(ruleChars.chars[k], *table, NULL); |
3929 | 0 | if (!(c && (c->definitionRule || c->basechar))) { |
3930 | 0 | compileError(file, "Character %s is not defined", |
3931 | 0 | _lou_showString(&ruleChars.chars[k], 1, 0)); |
3932 | 0 | return 0; |
3933 | 0 | } |
3934 | 0 | } |
3935 | 110 | TranslationTableRule *r; |
3936 | 110 | if (!addRule(file, opcode, &ruleChars, &ruleDots, after, before, NULL, &r, |
3937 | 110 | noback, nofor, table)) |
3938 | 0 | return 0; |
3939 | 110 | if (nocross) r->nocross = 1; |
3940 | 110 | return 1; |
3941 | | // if (opcode == CTO_MidNum) |
3942 | | // { |
3943 | | // TranslationTableCharacter *c = getChar(ruleChars.chars[0]); |
3944 | | // if(c) |
3945 | | // c->attributes |= CTC_NumericMode; |
3946 | | // } |
3947 | 34 | case CTO_RepEndWord: |
3948 | 34 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3949 | 34 | CharsString dots; |
3950 | 34 | if (!getToken(file, &dots, "dots,dots operand")) return 0; |
3951 | 34 | int len = dots.length; |
3952 | 91 | for (int k = 0; k < len - 1; k++) { |
3953 | 91 | if (dots.chars[k] == ',') { |
3954 | 34 | dots.length = k; |
3955 | 34 | if (!parseDots(file, &ruleDots, &dots)) return 0; |
3956 | 34 | ruleDots.chars[ruleDots.length++] = ','; |
3957 | 34 | k++; |
3958 | 34 | if (k == len - 1 && dots.chars[k] == '=') { |
3959 | | // check that all characters are defined (or based on another |
3960 | | // character) |
3961 | 0 | for (int l = 0; l < ruleChars.length; l++) { |
3962 | 0 | TranslationTableCharacter *c = |
3963 | 0 | getChar(ruleChars.chars[l], *table, NULL); |
3964 | 0 | if (!(c && (c->definitionRule || c->basechar))) { |
3965 | 0 | compileError(file, "Character %s is not defined", |
3966 | 0 | _lou_showString(&ruleChars.chars[l], 1, 0)); |
3967 | 0 | return 0; |
3968 | 0 | } |
3969 | 0 | } |
3970 | 34 | } else { |
3971 | 34 | CharsString x, y; |
3972 | 34 | x.length = 0; |
3973 | 554 | while (k < len) x.chars[x.length++] = dots.chars[k++]; |
3974 | 34 | if (parseDots(file, &y, &x)) |
3975 | 234 | for (int l = 0; l < y.length; l++) |
3976 | 200 | ruleDots.chars[ruleDots.length++] = y.chars[l]; |
3977 | 34 | } |
3978 | 34 | return addRule(file, opcode, &ruleChars, &ruleDots, after, before, |
3979 | 34 | NULL, NULL, noback, nofor, table); |
3980 | 34 | } |
3981 | 91 | } |
3982 | 0 | return 0; |
3983 | 1 | case CTO_CompDots: |
3984 | 1 | case CTO_Comp6: { |
3985 | 1 | TranslationTableOffset ruleOffset; |
3986 | 1 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3987 | 1 | if (ruleChars.length != 1) { |
3988 | 0 | compileError(file, "first operand must be 1 character"); |
3989 | 0 | return 0; |
3990 | 0 | } |
3991 | 1 | if (nofor || noback) { |
3992 | 0 | compileWarning(file, "nofor and noback not allowed on comp6 rules"); |
3993 | 0 | } |
3994 | 1 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
3995 | 1 | if (!addRule(file, opcode, &ruleChars, &ruleDots, after, before, &ruleOffset, |
3996 | 1 | NULL, noback, nofor, table)) |
3997 | 0 | return 0; |
3998 | 1 | return 1; |
3999 | 1 | } |
4000 | 0 | case CTO_ExactDots: |
4001 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
4002 | 0 | if (ruleChars.chars[0] != '@') { |
4003 | 0 | compileError(file, "The operand must begin with an at sign (@)"); |
4004 | 0 | return 0; |
4005 | 0 | } |
4006 | 0 | for (int k = 1; k < ruleChars.length; k++) |
4007 | 0 | scratchPad.chars[k - 1] = ruleChars.chars[k]; |
4008 | 0 | scratchPad.length = ruleChars.length - 1; |
4009 | 0 | if (!parseDots(file, &ruleDots, &scratchPad)) return 0; |
4010 | 0 | return addRule(file, opcode, &ruleChars, &ruleDots, before, after, NULL, NULL, |
4011 | 0 | noback, nofor, table); |
4012 | 11 | case CTO_CapsNoCont: { |
4013 | 11 | TranslationTableOffset ruleOffset; |
4014 | 11 | ruleChars.length = 1; |
4015 | 11 | ruleChars.chars[0] = 'a'; |
4016 | 11 | if (!addRule(file, opcode, &ruleChars, NULL, after, before, &ruleOffset, NULL, |
4017 | 11 | noback, nofor, table)) |
4018 | 0 | return 0; |
4019 | 11 | (*table)->capsNoCont = ruleOffset; |
4020 | 11 | return 1; |
4021 | 11 | } |
4022 | 99 | case CTO_Replace: |
4023 | 99 | if (getRuleCharsText(file, &ruleChars)) { |
4024 | 99 | if (atEndOfLine(file)) |
4025 | 10 | ruleDots.length = ruleDots.chars[0] = 0; |
4026 | 89 | else { |
4027 | 89 | getRuleDotsText(file, &ruleDots); |
4028 | 89 | if (ruleDots.chars[0] == '#') |
4029 | 0 | ruleDots.length = ruleDots.chars[0] = 0; |
4030 | 89 | else if (ruleDots.chars[0] == '\\' && ruleDots.chars[1] == '#') |
4031 | 0 | memmove(&ruleDots.chars[0], &ruleDots.chars[1], |
4032 | 0 | ruleDots.length-- * CHARSIZE); |
4033 | 89 | } |
4034 | 99 | } |
4035 | 3.73k | for (int k = 0; k < ruleChars.length; k++) |
4036 | 3.63k | putChar(file, ruleChars.chars[k], table, NULL, (*table)->ruleCounter); |
4037 | 7.09k | for (int k = 0; k < ruleDots.length; k++) |
4038 | 6.99k | putChar(file, ruleDots.chars[k], table, NULL, (*table)->ruleCounter); |
4039 | 99 | return addRule(file, opcode, &ruleChars, &ruleDots, after, before, NULL, NULL, |
4040 | 99 | noback, nofor, table); |
4041 | 131 | case CTO_Correct: |
4042 | 131 | (*table)->corrections = 1; |
4043 | 131 | goto doPass; |
4044 | 28 | case CTO_Pass2: |
4045 | 28 | if ((*table)->numPasses < 2) (*table)->numPasses = 2; |
4046 | 28 | goto doPass; |
4047 | 38 | case CTO_Pass3: |
4048 | 38 | if ((*table)->numPasses < 3) (*table)->numPasses = 3; |
4049 | 38 | goto doPass; |
4050 | 39 | case CTO_Pass4: |
4051 | 39 | if ((*table)->numPasses < 4) (*table)->numPasses = 4; |
4052 | 236 | doPass: |
4053 | 284 | case CTO_Context: |
4054 | 284 | if (!(nofor || noback)) { |
4055 | 0 | compileError(file, "%s or %s must be specified.", |
4056 | 0 | _lou_findOpcodeName(CTO_NoFor), _lou_findOpcodeName(CTO_NoBack)); |
4057 | 0 | return 0; |
4058 | 0 | } |
4059 | 284 | return compilePassOpcode(file, opcode, noback, nofor, table); |
4060 | 1 | case CTO_Contraction: |
4061 | 1 | case CTO_NoCont: |
4062 | 3 | case CTO_CompBrl: |
4063 | 8 | case CTO_Literal: |
4064 | 8 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
4065 | | // check that all characters in a compbrl, contraction, |
4066 | | // nocont or literal rule are defined (or based on another |
4067 | | // character) |
4068 | 16 | for (int k = 0; k < ruleChars.length; k++) { |
4069 | 8 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
4070 | 8 | if (!(c && (c->definitionRule || c->basechar))) { |
4071 | 0 | compileError(file, "Character %s is not defined", |
4072 | 0 | _lou_showString(&ruleChars.chars[k], 1, 0)); |
4073 | 0 | return 0; |
4074 | 0 | } |
4075 | 8 | } |
4076 | 8 | return addRule(file, opcode, &ruleChars, NULL, after, before, NULL, NULL, |
4077 | 8 | noback, nofor, table); |
4078 | 15 | case CTO_MultInd: { |
4079 | 15 | ruleChars.length = 0; |
4080 | 15 | if (!getToken(file, &token, "multiple braille indicators") || |
4081 | 15 | !parseDots(file, &cells, &token)) |
4082 | 0 | return 0; |
4083 | 22 | while (getToken(file, &token, "multind opcodes")) { |
4084 | 19 | opcode = getOpcode(file, &token); |
4085 | 19 | if (opcode == CTO_None) { |
4086 | 1 | compileError(file, "opcode %s not defined.", |
4087 | 1 | _lou_showString(token.chars, token.length, 0)); |
4088 | 1 | return 0; |
4089 | 1 | } |
4090 | 18 | if (!(opcode >= CTO_CapsLetter && opcode < CTO_MultInd)) { |
4091 | 0 | compileError(file, "Not a braille indicator opcode."); |
4092 | 0 | return 0; |
4093 | 0 | } |
4094 | 18 | ruleChars.chars[ruleChars.length++] = (widechar)opcode; |
4095 | 18 | if (atEndOfLine(file)) break; |
4096 | 18 | } |
4097 | 14 | return addRule(file, CTO_MultInd, &ruleChars, &cells, after, before, NULL, |
4098 | 14 | NULL, noback, nofor, table); |
4099 | 15 | } |
4100 | | |
4101 | 83 | case CTO_Class: |
4102 | 83 | compileWarning(file, "class is deprecated, use attribute instead"); |
4103 | 88 | case CTO_Attribute: { |
4104 | 88 | if (nofor || noback) { |
4105 | 5 | compileWarning( |
4106 | 5 | file, "nofor and noback not allowed before class/attribute"); |
4107 | 5 | } |
4108 | 88 | if ((opcode == CTO_Class && (*table)->usesAttributeOrClass == 1) || |
4109 | 88 | (opcode == CTO_Attribute && (*table)->usesAttributeOrClass == 2)) { |
4110 | 0 | compileError(file, |
4111 | 0 | "attribute and class rules must not be both present in a table"); |
4112 | 0 | return 0; |
4113 | 0 | } |
4114 | 88 | if (opcode == CTO_Class) |
4115 | 83 | (*table)->usesAttributeOrClass = 2; |
4116 | 5 | else |
4117 | 5 | (*table)->usesAttributeOrClass = 1; |
4118 | 88 | if (!getToken(file, &token, "attribute name")) { |
4119 | 0 | compileError(file, "Expected %s", "attribute name"); |
4120 | 0 | return 0; |
4121 | 0 | } |
4122 | 88 | if (!(*table)->characterClasses && !allocateCharacterClasses(*table)) { |
4123 | 0 | return 0; |
4124 | 0 | } |
4125 | | |
4126 | 88 | TranslationTableCharacterAttributes attribute = 0; |
4127 | 88 | { |
4128 | 88 | int attrNumber = -1; |
4129 | 88 | switch (token.chars[0]) { |
4130 | 0 | case '0': |
4131 | 3 | case '1': |
4132 | 3 | case '2': |
4133 | 3 | case '3': |
4134 | 3 | case '4': |
4135 | 3 | case '5': |
4136 | 3 | case '6': |
4137 | 3 | case '7': |
4138 | 3 | case '8': |
4139 | 3 | case '9': |
4140 | 3 | attrNumber = token.chars[0] - '0'; |
4141 | 3 | break; |
4142 | 88 | } |
4143 | 88 | if (attrNumber >= 0) { |
4144 | 3 | if (opcode == CTO_Class) { |
4145 | 0 | compileError(file, |
4146 | 0 | "Invalid class name: may not contain digits, use " |
4147 | 0 | "attribute instead of class"); |
4148 | 0 | return 0; |
4149 | 0 | } |
4150 | 3 | if (token.length > 1 || attrNumber > 7) { |
4151 | 0 | compileError(file, |
4152 | 0 | "Invalid attribute name: must be a digit between 0 and 7 " |
4153 | 0 | "or a word containing only letters"); |
4154 | 0 | return 0; |
4155 | 0 | } |
4156 | 3 | if (!(*table)->numberedAttributes[attrNumber]) |
4157 | | // attribute not used before yet: assign it a value |
4158 | 3 | (*table)->numberedAttributes[attrNumber] = |
4159 | 3 | getNextNumberedAttribute(*table); |
4160 | 3 | attribute = (*table)->numberedAttributes[attrNumber]; |
4161 | 85 | } else { |
4162 | 85 | const CharacterClass *namedAttr = findCharacterClass(&token, *table); |
4163 | 85 | if (!namedAttr) { |
4164 | | // no class with that name: create one |
4165 | 18 | namedAttr = addCharacterClass( |
4166 | 18 | file, &token.chars[0], token.length, *table, 1); |
4167 | 18 | if (!namedAttr) return 0; |
4168 | 18 | } |
4169 | | // there is a class with that name or a new class was successfully |
4170 | | // created |
4171 | 85 | attribute = namedAttr->attribute; |
4172 | 85 | if (attribute == CTC_UpperCase || attribute == CTC_LowerCase) |
4173 | 52 | attribute |= CTC_Letter; |
4174 | 85 | } |
4175 | 88 | } |
4176 | 88 | CharsString characters; |
4177 | 88 | if (!getCharacters(file, &characters)) return 0; |
4178 | 4.74k | for (int i = 0; i < characters.length; i++) { |
4179 | | // get the character from the table, or if it is not defined yet, |
4180 | | // define it |
4181 | 4.65k | TranslationTableCharacter *character = putChar( |
4182 | 4.65k | file, characters.chars[i], table, NULL, (*table)->ruleCounter); |
4183 | | // set the attribute |
4184 | 4.65k | character->attributes |= attribute; |
4185 | | // also set the attribute on the associated dots (if any) |
4186 | 4.65k | if (character->basechar) |
4187 | 387 | character = (TranslationTableCharacter *)&(*table) |
4188 | 387 | ->ruleArea[character->basechar]; |
4189 | 4.65k | if (character->definitionRule) { |
4190 | 1 | TranslationTableRule *defRule = |
4191 | 1 | (TranslationTableRule *)&(*table) |
4192 | 1 | ->ruleArea[character->definitionRule]; |
4193 | 1 | if (defRule->dotslen == 1) { |
4194 | 1 | TranslationTableCharacter *dots = |
4195 | 1 | getDots(defRule->charsdots[defRule->charslen], *table); |
4196 | 1 | if (dots) dots->attributes |= attribute; |
4197 | 1 | } |
4198 | 1 | } |
4199 | 4.65k | } |
4200 | 88 | (*table)->ruleCounter++; |
4201 | 88 | return 1; |
4202 | 88 | } |
4203 | | |
4204 | 0 | { |
4205 | 0 | TranslationTableCharacterAttributes *attributes; |
4206 | 0 | const CharacterClass *class; |
4207 | 0 | case CTO_After: |
4208 | 0 | attributes = &after; |
4209 | 0 | goto doBeforeAfter; |
4210 | 0 | case CTO_Before: |
4211 | 0 | attributes = &before; |
4212 | 0 | doBeforeAfter: |
4213 | 0 | if (!(*table)->characterClasses) { |
4214 | 0 | if (!allocateCharacterClasses(*table)) return 0; |
4215 | 0 | } |
4216 | 0 | if (!getToken(file, &token, "attribute name")) return 0; |
4217 | 0 | if (!(class = findCharacterClass(&token, *table))) { |
4218 | 0 | compileError(file, "attribute not defined"); |
4219 | 0 | return 0; |
4220 | 0 | } |
4221 | 0 | *attributes |= class->attribute; |
4222 | 0 | goto doOpcode; |
4223 | 0 | } |
4224 | 30 | case CTO_Base: |
4225 | 30 | if (nofor || noback) { |
4226 | 0 | compileWarning(file, "nofor and noback not allowed before base"); |
4227 | 0 | } |
4228 | 30 | if (!getToken(file, &token, "attribute name")) { |
4229 | 0 | compileError( |
4230 | 0 | file, "base opcode must be followed by a valid attribute name."); |
4231 | 0 | return 0; |
4232 | 0 | } |
4233 | 30 | if (!(*table)->characterClasses && !allocateCharacterClasses(*table)) { |
4234 | 0 | return 0; |
4235 | 0 | } |
4236 | 30 | const CharacterClass *mode = findCharacterClass(&token, *table); |
4237 | 30 | if (!mode) { |
4238 | 25 | mode = addCharacterClass(file, token.chars, token.length, *table, 1); |
4239 | 25 | if (!mode) return 0; |
4240 | 25 | } |
4241 | 30 | if (!(mode->attribute == CTC_UpperCase || mode->attribute == CTC_Digit) && |
4242 | 30 | mode->attribute >= CTC_Space && mode->attribute <= CTC_LitDigit) { |
4243 | 0 | compileError(file, |
4244 | 0 | "base opcode must be followed by \"uppercase\", \"digit\", or a " |
4245 | 0 | "custom attribute name."); |
4246 | 0 | return 0; |
4247 | 0 | } |
4248 | 30 | if (!getRuleCharsText(file, &token)) return 0; |
4249 | 30 | if (token.length != 1) { |
4250 | 0 | compileError(file, |
4251 | 0 | "Exactly one character followed by one base character is " |
4252 | 0 | "required."); |
4253 | 0 | return 0; |
4254 | 0 | } |
4255 | 30 | TranslationTableOffset characterOffset; |
4256 | 30 | TranslationTableCharacter *character = putChar( |
4257 | 30 | file, token.chars[0], table, &characterOffset, (*table)->ruleCounter); |
4258 | 30 | if (!getRuleCharsText(file, &token)) return 0; |
4259 | 30 | if (token.length != 1) { |
4260 | 0 | compileError(file, "Exactly one base character is required."); |
4261 | 0 | return 0; |
4262 | 0 | } |
4263 | 30 | TranslationTableOffset basechar; |
4264 | 30 | putChar(file, token.chars[0], table, &basechar, (*table)->ruleCounter); |
4265 | | // putChar may have moved table, so make sure character is still valid |
4266 | 30 | character = (TranslationTableCharacter *)&(*table)->ruleArea[characterOffset]; |
4267 | 30 | if (character->basechar) { |
4268 | 1 | if (character->basechar == basechar && |
4269 | 1 | character->mode == mode->attribute) { |
4270 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "%s:%d: Duplicate base rule.", |
4271 | 0 | file->fileName, file->lineNumber); |
4272 | 1 | } else { |
4273 | 1 | _lou_logMessage(LOU_LOG_DEBUG, |
4274 | 1 | "%s:%d: A different base rule already exists for this " |
4275 | 1 | "character (%s). The existing rule will take precedence " |
4276 | 1 | "over the new one.", |
4277 | 1 | file->fileName, file->lineNumber, |
4278 | 1 | printSource(file->sourceFile, character->sourceFile, |
4279 | 1 | character->sourceLine)); |
4280 | 1 | } |
4281 | 29 | } else { |
4282 | 29 | character->basechar = basechar; |
4283 | 29 | character->mode = mode->attribute; |
4284 | 29 | character->sourceFile = file->sourceFile; |
4285 | 29 | character->sourceLine = file->lineNumber; |
4286 | 29 | character->ruleIndex = (*table)->ruleCounter; |
4287 | | /* some other processing is done at the end of the compilation, in |
4288 | | * finalizeTable() */ |
4289 | 29 | } |
4290 | 30 | (*table)->ruleCounter++; |
4291 | 30 | return 1; |
4292 | 1 | case CTO_EmpMatchBefore: |
4293 | 1 | before |= CTC_EmpMatch; |
4294 | 1 | goto doOpcode; |
4295 | 1 | case CTO_EmpMatchAfter: |
4296 | 1 | after |= CTC_EmpMatch; |
4297 | 1 | goto doOpcode; |
4298 | | |
4299 | 5 | case CTO_SwapCc: |
4300 | 5 | case CTO_SwapCd: |
4301 | 13 | case CTO_SwapDd: |
4302 | 13 | return compileSwap(file, opcode, noback, nofor, table); |
4303 | 1 | case CTO_Hyphen: |
4304 | 20 | case CTO_DecPoint: |
4305 | | // case CTO_Apostrophe: |
4306 | | // case CTO_Initial: |
4307 | 20 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
4308 | 20 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
4309 | 20 | if (ruleChars.length != 1 || ruleDots.length < 1) { |
4310 | 0 | compileError(file, |
4311 | 0 | "One Unicode character and at least one cell are " |
4312 | 0 | "required."); |
4313 | 0 | return 0; |
4314 | 0 | } |
4315 | 20 | return addRule(file, opcode, &ruleChars, &ruleDots, after, before, NULL, NULL, |
4316 | 20 | noback, nofor, table); |
4317 | | // if (opcode == CTO_DecPoint) |
4318 | | // { |
4319 | | // TranslationTableCharacter *c = |
4320 | | // getChar(ruleChars.chars[0]); |
4321 | | // if(c) |
4322 | | // c->attributes |= CTC_NumericMode; |
4323 | | // } |
4324 | 0 | default: |
4325 | 0 | compileError(file, "unimplemented opcode."); |
4326 | 0 | return 0; |
4327 | 1.00k | } |
4328 | 1.71k | } |
4329 | 0 | return 0; |
4330 | 1.71k | } |
4331 | | |
4332 | | int EXPORT_CALL |
4333 | 0 | lou_readCharFromFile(const char *fileName, int *mode) { |
4334 | | /* Read a character from a file, whether big-endian, little-endian or |
4335 | | * ASCII8 */ |
4336 | 0 | int ch; |
4337 | 0 | static FileInfo file; |
4338 | 0 | if (fileName == NULL) return 0; |
4339 | 0 | if (*mode == 1) { |
4340 | 0 | *mode = 0; |
4341 | 0 | file.fileName = fileName; |
4342 | 0 | file.encoding = noEncoding; |
4343 | 0 | file.status = 0; |
4344 | 0 | file.lineNumber = 0; |
4345 | 0 | if (!(file.in = fopen(file.fileName, "r"))) { |
4346 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot open file '%s'", file.fileName); |
4347 | 0 | *mode = 1; |
4348 | 0 | return EOF; |
4349 | 0 | } |
4350 | 0 | } |
4351 | 0 | if (file.in == NULL) { |
4352 | 0 | *mode = 1; |
4353 | 0 | return EOF; |
4354 | 0 | } |
4355 | 0 | ch = getAChar(&file); |
4356 | 0 | if (ch == EOF) { |
4357 | 0 | fclose(file.in); |
4358 | 0 | file.in = NULL; |
4359 | 0 | *mode = 1; |
4360 | 0 | } |
4361 | 0 | return ch; |
4362 | 0 | } |
4363 | | |
4364 | | static TranslationTableCharacter * |
4365 | | finalizeCharacter(TranslationTableHeader *table, TranslationTableOffset characterOffset, |
4366 | 2.64k | int detect_loop) { |
4367 | 2.64k | TranslationTableCharacter *character = |
4368 | 2.64k | (TranslationTableCharacter *)&table->ruleArea[characterOffset]; |
4369 | 2.64k | if (character->basechar) { |
4370 | 29 | TranslationTableOffset basecharOffset = 0; |
4371 | 29 | TranslationTableCharacter *basechar = character; |
4372 | 29 | TranslationTableCharacterAttributes mode = 0; |
4373 | 59 | while (basechar->basechar) { |
4374 | 30 | if (basechar->basechar == characterOffset || detect_loop++ > MAX_MODES) { |
4375 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
4376 | 0 | "%s: error: Character can not be (indirectly) based on " |
4377 | 0 | "itself.", |
4378 | 0 | printSource(NULL, character->sourceFile, character->sourceLine)); |
4379 | 0 | errorCount++; |
4380 | 0 | return NULL; |
4381 | 0 | } |
4382 | | // inherit basechar mode |
4383 | 30 | mode |= basechar->mode; |
4384 | | // compute basechar recursively |
4385 | 30 | basecharOffset = basechar->basechar; |
4386 | 30 | basechar = finalizeCharacter(table, basecharOffset, detect_loop); |
4387 | 30 | if (!basechar) return NULL; |
4388 | 30 | if (character->mode & (basechar->attributes | basechar->mode)) { |
4389 | 0 | char *attributeName = NULL; |
4390 | 0 | const CharacterClass *class = table->characterClasses; |
4391 | 0 | while (class) { |
4392 | 0 | if (class->attribute == character->mode) { |
4393 | 0 | attributeName = |
4394 | 0 | strdup(_lou_showString(class->name, class->length, 0)); |
4395 | 0 | break; |
4396 | 0 | } |
4397 | 0 | class = class->next; |
4398 | 0 | } |
4399 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
4400 | 0 | "%s: error: Base character %s can not have the %s " |
4401 | 0 | "attribute.", |
4402 | 0 | printSource(NULL, character->sourceFile, character->sourceLine), |
4403 | 0 | _lou_showString(&basechar->value, 1, 0), |
4404 | 0 | attributeName != NULL ? attributeName : "?"); |
4405 | 0 | errorCount++; |
4406 | 0 | free(attributeName); |
4407 | 0 | return NULL; |
4408 | 0 | } |
4409 | 30 | } |
4410 | | // unset character definition rule or base rule (whichever was declared |
4411 | | // last) if the dot patterns are not compatible, meaning if the real parts |
4412 | | // (1-8) of the dot patterns do not match |
4413 | 29 | TranslationTableRule *basecharDefRule = |
4414 | 29 | (TranslationTableRule *)&table->ruleArea[basechar->definitionRule]; |
4415 | 29 | if (character->definitionRule) { |
4416 | 0 | TranslationTableRule *defRule = |
4417 | 0 | (TranslationTableRule *)&table->ruleArea[character->definitionRule]; |
4418 | 0 | if (defRule->dotslen != basecharDefRule->dotslen || |
4419 | 0 | memcmp(&defRule->charsdots[defRule->charslen], |
4420 | 0 | &basecharDefRule->charsdots[basecharDefRule->charslen], |
4421 | 0 | defRule->dotslen * CHARSIZE)) { |
4422 | 0 | char *defOpcodeName = strdup(_lou_findOpcodeName(defRule->opcode)); |
4423 | 0 | if (defRule->index < character->ruleIndex) { |
4424 | | // character definition rule was defined before base rule; ignore base |
4425 | | // rule |
4426 | 0 | _lou_logMessage(LOU_LOG_DEBUG, |
4427 | 0 | "%s:%d: Character already defined (%s). The existing %s rule " |
4428 | 0 | "will take precedence over the new base rule.", |
4429 | 0 | character->sourceFile, character->sourceLine, |
4430 | 0 | printSource(character->sourceFile, defRule->sourceFile, |
4431 | 0 | defRule->sourceLine), |
4432 | 0 | defOpcodeName); |
4433 | 0 | free(defOpcodeName); |
4434 | 0 | character->basechar = 0; |
4435 | 0 | character->mode = 0; |
4436 | 0 | character->sourceFile = defRule->sourceFile; |
4437 | 0 | character->sourceLine = defRule->sourceLine; |
4438 | 0 | character->ruleIndex = defRule->index; |
4439 | 0 | character->finalized = 1; |
4440 | 0 | return character; |
4441 | 0 | } else { |
4442 | 0 | _lou_logMessage(LOU_LOG_DEBUG, |
4443 | 0 | "%s:%d: A base rule already exists for this character (%s). " |
4444 | 0 | "The " |
4445 | 0 | "existing base rule will take precedence over the new %s " |
4446 | 0 | "rule.", |
4447 | 0 | defRule->sourceFile, defRule->sourceLine, |
4448 | 0 | printSource(defRule->sourceFile, character->sourceFile, |
4449 | 0 | character->sourceLine), |
4450 | 0 | defOpcodeName); |
4451 | 0 | free(defOpcodeName); |
4452 | 0 | character->definitionRule = 0; |
4453 | 0 | } |
4454 | 0 | } |
4455 | 0 | } |
4456 | 29 | character->mode = mode; |
4457 | 29 | character->basechar = basecharOffset; |
4458 | | // add mode to attributes |
4459 | 29 | character->attributes |= character->mode; |
4460 | 29 | if (character->attributes & (CTC_UpperCase | CTC_LowerCase)) |
4461 | 14 | character->attributes |= CTC_Letter; |
4462 | | // also set the new attributes on the associated dots of the base |
4463 | | // character |
4464 | 29 | if (basecharDefRule->dotslen == 1) { |
4465 | 0 | TranslationTableCharacter *dots = |
4466 | 0 | getDots(basecharDefRule->charsdots[basecharDefRule->charslen], table); |
4467 | 0 | if (dots) { |
4468 | 0 | dots->attributes |= character->mode; |
4469 | 0 | if (dots->attributes & (CTC_UpperCase | CTC_LowerCase)) |
4470 | 0 | dots->attributes |= CTC_Letter; |
4471 | 0 | } |
4472 | 0 | } |
4473 | | // store all characters that are based on a base character in list |
4474 | 29 | if (basechar->linked) character->linked = basechar->linked; |
4475 | 29 | basechar->linked = characterOffset; |
4476 | 29 | } |
4477 | 2.64k | character->finalized = 1; |
4478 | 2.64k | return character; |
4479 | 2.64k | } |
4480 | | |
4481 | | static int |
4482 | 463 | finalizeTable(TranslationTableHeader *table) { |
4483 | 463 | if (table->finalized) return 1; |
4484 | | // normalize basechar and mode of all characters |
4485 | 266k | for (int i = 0; i < HASHNUM; i++) { |
4486 | 266k | TranslationTableOffset characterOffset = table->characters[i]; |
4487 | 268k | while (characterOffset) { |
4488 | 2.61k | TranslationTableCharacter *character = |
4489 | 2.61k | finalizeCharacter(table, characterOffset, 0); |
4490 | 2.61k | if (!character) return 0; |
4491 | 2.61k | characterOffset = character->next; |
4492 | 2.61k | } |
4493 | 266k | } |
4494 | | // add noletsign rules from single-letter word and largesign rules |
4495 | 266k | for (int i = 0; i < HASHNUM; i++) { |
4496 | 266k | TranslationTableOffset characterOffset = table->characters[i]; |
4497 | 268k | while (characterOffset) { |
4498 | 2.61k | TranslationTableCharacter *character = |
4499 | 2.61k | (TranslationTableCharacter *)&table->ruleArea[characterOffset]; |
4500 | 2.61k | if (character->attributes & CTC_Letter) { |
4501 | 720 | TranslationTableOffset *otherRule = &character->otherRules; |
4502 | 773 | while (*otherRule) { |
4503 | 53 | TranslationTableRule *rule = |
4504 | 53 | (TranslationTableRule *)&table->ruleArea[*otherRule]; |
4505 | 53 | if (rule->opcode == CTO_WholeWord || rule->opcode == CTO_LargeSign) |
4506 | 0 | if (table->noLetsignCount < LETSIGNSIZE) |
4507 | 0 | table->noLetsign[table->noLetsignCount++] = |
4508 | 0 | rule->charsdots[0]; |
4509 | 53 | otherRule = &rule->charsnext; |
4510 | 53 | } |
4511 | 720 | } |
4512 | 2.61k | characterOffset = character->next; |
4513 | 2.61k | } |
4514 | 266k | } |
4515 | | // Rearrange rules in `forRules' so that when iterating over candidate rules in |
4516 | | // for_selectRule(), both case-sensitive and case-insensitive rules are contained |
4517 | | // within the same ordered list. We do the rearrangement by iterating over all |
4518 | | // case-sensitive rules and if needed move them to another bucket. This may slow down |
4519 | | // the compilation of tables with a lot of context rules, but the good news is that |
4520 | | // translation speed is not affected. |
4521 | 266k | for (unsigned long int i = 0; i < HASHNUM; i++) { |
4522 | 266k | TranslationTableOffset *p = &table->forRules[i]; |
4523 | 266k | while (*p) { |
4524 | 160 | TranslationTableRule *rule = (TranslationTableRule *)&table->ruleArea[*p]; |
4525 | | // For now only move the rules that we know are case-sensitive, namely |
4526 | | // `context' rules. (Note that there may be other case-sensitive rules that |
4527 | | // we're currently not aware of.) We don't move case insensitive rules because |
4528 | | // the user can/should define them using all lowercases. |
4529 | 160 | if (rule->opcode == CTO_Context) { |
4530 | 1 | unsigned long int hash = _lou_stringHash(&rule->charsdots[0], 1, table); |
4531 | | // no need to do anything if the first two characters are not uppercase |
4532 | | // letters |
4533 | 1 | if (hash != i) { |
4534 | | // compute new position |
4535 | 0 | TranslationTableOffset *insert_at = &table->forRules[hash]; |
4536 | 0 | while (*insert_at) { |
4537 | 0 | TranslationTableRule *r = |
4538 | 0 | (TranslationTableRule *)&table->ruleArea[*insert_at]; |
4539 | 0 | if (rule->charslen > r->charslen) |
4540 | 0 | break; |
4541 | 0 | else if (rule->charslen == r->charslen && r->opcode == CTO_Always) |
4542 | 0 | break; |
4543 | 0 | insert_at = &r->charsnext; |
4544 | 0 | } |
4545 | | // remove rule from current list and insert it at the correct position |
4546 | | // in the new list |
4547 | 0 | TranslationTableOffset next = rule->charsnext; |
4548 | 0 | rule->charsnext = *insert_at; |
4549 | 0 | *insert_at = *p; |
4550 | 0 | *p = next; |
4551 | 0 | continue; |
4552 | 0 | } |
4553 | 1 | } |
4554 | 160 | p = &rule->charsnext; |
4555 | 160 | } |
4556 | 266k | } |
4557 | 237 | table->finalized = 1; |
4558 | 237 | return 1; |
4559 | 237 | } |
4560 | | |
4561 | | static int |
4562 | | compileString(const char *inString, TranslationTableHeader **table, |
4563 | 274 | DisplayTableHeader **displayTable) { |
4564 | | /* This function can be used to make changes to tables on the fly. */ |
4565 | 274 | int k; |
4566 | 274 | FileInfo file; |
4567 | 274 | if (inString == NULL) return 0; |
4568 | 274 | memset(&file, 0, sizeof(file)); |
4569 | 274 | file.fileName = inString; |
4570 | 274 | file.encoding = noEncoding; |
4571 | 274 | file.lineNumber = 1; |
4572 | 274 | file.status = 0; |
4573 | 274 | file.linepos = 0; |
4574 | 12.0k | for (k = 0; k < MAXSTRING - 1 && inString[k]; k++) file.line[k] = inString[k]; |
4575 | 274 | file.line[k] = 0; |
4576 | 274 | file.linelen = k; |
4577 | 274 | if (table && *table && (*table)->finalized) { |
4578 | 0 | compileError(&file, "Table is finalized"); |
4579 | 0 | return 0; |
4580 | 0 | } |
4581 | 274 | return compileRule(&file, table, displayTable, NULL); |
4582 | 274 | } |
4583 | | |
4584 | | static int |
4585 | 238 | setDefaults(TranslationTableHeader *table) { |
4586 | 952 | for (int i = 0; i < 3; i++) |
4587 | 714 | if (!table->emphRules[i][lenPhraseOffset]) |
4588 | 714 | table->emphRules[i][lenPhraseOffset] = 4; |
4589 | 238 | if (table->numPasses == 0) table->numPasses = 1; |
4590 | 238 | return 1; |
4591 | 238 | } |
4592 | | |
4593 | | /* =============== * |
4594 | | * TABLE RESOLVING * |
4595 | | * =============== * |
4596 | | * |
4597 | | * A table resolver is a function that resolves a `tableList` path against a |
4598 | | * `base` path, and returns the resolved table(s) as a list of absolute file |
4599 | | * paths. |
4600 | | * |
4601 | | * The function must have the following signature: |
4602 | | * |
4603 | | * char ** (const char * tableList, const char * base) |
4604 | | * |
4605 | | * In general, `tableList` is a path in the broad sense. The default |
4606 | | * implementation accepts only *file* paths. But another implementation could |
4607 | | * for instance handle URI's. `base` is always a file path however. |
4608 | | * |
4609 | | * The idea is to give other programs that use liblouis the ability to define |
4610 | | * their own table resolver (in C, Java, Python, etc.) when the default |
4611 | | * resolver is not satisfying. (see also lou_registerTableResolver) |
4612 | | * |
4613 | | */ |
4614 | | |
4615 | | /** |
4616 | | * Resolve a single (sub)table. |
4617 | | * |
4618 | | * Tries to resolve `table` against `base` if base is an absolute path. If |
4619 | | * that fails, searches `searchPath`. |
4620 | | * |
4621 | | */ |
4622 | | static char * |
4623 | 274 | resolveSubtable(const char *table, const char *base, const char *searchPath) { |
4624 | 274 | char *tableFile; |
4625 | 274 | static struct stat info; |
4626 | | |
4627 | 550 | #define MAX_TABLEFILE_SIZE (MAXSTRING * sizeof(char) * 2) |
4628 | 274 | if (table == NULL || table[0] == '\0') return NULL; |
4629 | 274 | tableFile = (char *)malloc(MAX_TABLEFILE_SIZE); |
4630 | | |
4631 | | // |
4632 | | // First try to resolve against base |
4633 | | // |
4634 | 274 | if (base) { |
4635 | 1 | int k; |
4636 | 1 | if (strlen(base) >= MAX_TABLEFILE_SIZE) goto failure; |
4637 | 1 | strcpy(tableFile, base); |
4638 | 1 | k = (int)strlen(tableFile); |
4639 | 15 | while (k >= 0 && tableFile[k] != '/' && tableFile[k] != '\\') k--; |
4640 | 1 | tableFile[++k] = '\0'; |
4641 | 1 | if (strlen(tableFile) + strlen(table) >= MAX_TABLEFILE_SIZE) goto failure; |
4642 | 1 | strcat(tableFile, table); |
4643 | 1 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4644 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4645 | 0 | return tableFile; |
4646 | 0 | } |
4647 | 1 | } |
4648 | | |
4649 | | // |
4650 | | // It could be an absolute path, or a path relative to the current working |
4651 | | // directory |
4652 | | // |
4653 | 274 | if (strlen(table) >= MAX_TABLEFILE_SIZE) goto failure; |
4654 | 274 | strcpy(tableFile, table); |
4655 | 274 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4656 | 274 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4657 | 274 | return tableFile; |
4658 | 274 | } |
4659 | | |
4660 | | // |
4661 | | // Then search `LOUIS_TABLEPATH`, `dataPath` and `programPath` |
4662 | | // |
4663 | 0 | if (searchPath[0] != '\0') { |
4664 | 0 | char *dir; |
4665 | 0 | int last; |
4666 | 0 | char *cp; |
4667 | 0 | char *searchPath_copy = strdup(searchPath); |
4668 | 0 | for (dir = searchPath_copy;; dir = cp + 1) { |
4669 | 0 | for (cp = dir; *cp != '\0' && *cp != ','; cp++) |
4670 | 0 | ; |
4671 | 0 | last = (*cp == '\0'); |
4672 | 0 | *cp = '\0'; |
4673 | 0 | if (dir == cp) dir = "."; |
4674 | 0 | if (strlen(dir) + strlen(table) + 1 >= MAX_TABLEFILE_SIZE) { |
4675 | 0 | free(searchPath_copy); |
4676 | 0 | goto failure; |
4677 | 0 | } |
4678 | 0 | sprintf(tableFile, "%s%c%s", dir, DIR_SEP, table); |
4679 | 0 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4680 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4681 | 0 | free(searchPath_copy); |
4682 | 0 | return tableFile; |
4683 | 0 | } |
4684 | 0 | if (last) break; |
4685 | 0 | if (strlen(dir) + strlen("liblouis") + strlen("tables") + strlen(table) + 3 >= |
4686 | 0 | MAX_TABLEFILE_SIZE) { |
4687 | 0 | free(searchPath_copy); |
4688 | 0 | goto failure; |
4689 | 0 | } |
4690 | 0 | sprintf(tableFile, "%s%c%s%c%s%c%s", dir, DIR_SEP, "liblouis", DIR_SEP, |
4691 | 0 | "tables", DIR_SEP, table); |
4692 | 0 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4693 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4694 | 0 | free(searchPath_copy); |
4695 | 0 | return tableFile; |
4696 | 0 | } |
4697 | 0 | if (last) break; |
4698 | 0 | } |
4699 | 0 | free(searchPath_copy); |
4700 | 0 | } |
4701 | 0 | failure: |
4702 | 0 | free(tableFile); |
4703 | 0 | return NULL; |
4704 | 0 | } |
4705 | | |
4706 | | char *EXPORT_CALL |
4707 | 274 | _lou_getTablePath(void) { |
4708 | 274 | char searchPath[MAXSTRING]; |
4709 | 274 | char *path; |
4710 | 274 | char *cp; |
4711 | 274 | int envset = 0; |
4712 | 274 | cp = searchPath; |
4713 | 274 | path = getenv("LOUIS_TABLEPATH"); |
4714 | 274 | if (path != NULL && path[0] != '\0') { |
4715 | 0 | envset = 1; |
4716 | 0 | cp += sprintf(cp, ",%s", path); |
4717 | 0 | } |
4718 | 274 | path = dataPathPtr; |
4719 | 274 | if (path != NULL && path[0] != '\0') |
4720 | 0 | cp += sprintf(cp, ",%s%c%s%c%s", path, DIR_SEP, "liblouis", DIR_SEP, "tables"); |
4721 | 274 | if (!envset) { |
4722 | | #ifdef _WIN32 |
4723 | | path = lou_getProgramPath(); |
4724 | | if (path != NULL) { |
4725 | | if (path[0] != '\0') |
4726 | | // assuming the following directory structure: |
4727 | | // . |
4728 | | // ├── bin |
4729 | | // │ ├── liblouis.dll |
4730 | | // ├── include |
4731 | | // ├── lib |
4732 | | // └── share |
4733 | | // ├── doc |
4734 | | // ├── info |
4735 | | // └── liblouis |
4736 | | // └── tables |
4737 | | cp += sprintf(cp, ",%s%s", path, "\\..\\share\\liblouis\\tables"); |
4738 | | free(path); |
4739 | | } |
4740 | | #else |
4741 | 274 | cp += sprintf(cp, ",%s", TABLESDIR); |
4742 | 274 | #endif |
4743 | 274 | } |
4744 | 274 | if (searchPath[0] != '\0') |
4745 | 274 | return strdup(&searchPath[1]); |
4746 | 0 | else |
4747 | 0 | return strdup("."); |
4748 | 274 | } |
4749 | | |
4750 | | /** |
4751 | | * The default table resolver |
4752 | | * |
4753 | | * Tries to resolve tableList against base. The search path is set to |
4754 | | * `LOUIS_TABLEPATH`, `dataPath` and `programPath` (in that order). |
4755 | | * |
4756 | | * @param table A file path, may be absolute or relative. May be a list of |
4757 | | * tables separated by commas. In that case, the first table |
4758 | | * is used as the base for the other subtables. |
4759 | | * @param base A file path or directory path, or NULL. |
4760 | | * @return The file paths of the resolved subtables, or NULL if the table |
4761 | | * could not be resolved. |
4762 | | * |
4763 | | */ |
4764 | | char **EXPORT_CALL |
4765 | 274 | _lou_defaultTableResolver(const char *tableList, const char *base) { |
4766 | 274 | char *searchPath; |
4767 | 274 | char **tableFiles; |
4768 | 274 | char *subTable; |
4769 | 274 | char *tableList_copy; |
4770 | 274 | char *cp; |
4771 | 274 | int last; |
4772 | 274 | int k; |
4773 | | |
4774 | | /* Set up search path */ |
4775 | 274 | searchPath = _lou_getTablePath(); |
4776 | | |
4777 | | /* Count number of subtables in table list */ |
4778 | 274 | k = 0; |
4779 | 6.06k | for (cp = (char *)tableList; *cp != '\0'; cp++) |
4780 | 5.79k | if (*cp == ',') k++; |
4781 | 274 | tableFiles = (char **)calloc(k + 2, sizeof(char *)); |
4782 | 274 | if (!tableFiles) _lou_outOfMemory(); |
4783 | | |
4784 | | /* Resolve subtables */ |
4785 | 274 | k = 0; |
4786 | 274 | tableList_copy = strdup(tableList); |
4787 | 274 | for (subTable = tableList_copy;; subTable = cp + 1) { |
4788 | 6.06k | for (cp = subTable; *cp != '\0' && *cp != ','; cp++) |
4789 | 5.79k | ; |
4790 | 274 | last = (*cp == '\0'); |
4791 | 274 | *cp = '\0'; |
4792 | 274 | if (!(tableFiles[k++] = resolveSubtable(subTable, base, searchPath))) { |
4793 | 0 | char *path; |
4794 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot resolve table '%s'", subTable); |
4795 | 0 | path = getenv("LOUIS_TABLEPATH"); |
4796 | 0 | if (path != NULL && path[0] != '\0') |
4797 | 0 | _lou_logMessage(LOU_LOG_ERROR, "LOUIS_TABLEPATH=%s", path); |
4798 | 0 | free(searchPath); |
4799 | 0 | free(tableList_copy); |
4800 | 0 | free_tablefiles(tableFiles); |
4801 | 0 | return NULL; |
4802 | 0 | } |
4803 | 274 | if (k == 1) base = subTable; |
4804 | 274 | if (last) break; |
4805 | 274 | } |
4806 | 274 | free(searchPath); |
4807 | 274 | free(tableList_copy); |
4808 | 274 | tableFiles[k] = NULL; |
4809 | 274 | return tableFiles; |
4810 | 274 | } |
4811 | | |
4812 | | static char **(EXPORT_CALL *tableResolver)( |
4813 | | const char *tableList, const char *base) = &_lou_defaultTableResolver; |
4814 | | |
4815 | | static char ** |
4816 | 274 | copyStringArray(char **array) { |
4817 | 274 | int len; |
4818 | 274 | char **copy; |
4819 | 274 | if (!array) return NULL; |
4820 | 274 | len = 0; |
4821 | 548 | while (array[len]) len++; |
4822 | 274 | copy = malloc((len + 1) * sizeof(char *)); |
4823 | 274 | copy[len] = NULL; |
4824 | 548 | while (len) { |
4825 | 274 | len--; |
4826 | 274 | copy[len] = strdup(array[len]); |
4827 | 274 | } |
4828 | 274 | return copy; |
4829 | 274 | } |
4830 | | |
4831 | | char **EXPORT_CALL |
4832 | 274 | _lou_resolveTable(const char *tableList, const char *base) { |
4833 | 274 | char **tableFiles = (*tableResolver)(tableList, base); |
4834 | 274 | char **result = copyStringArray(tableFiles); |
4835 | 274 | if (tableResolver == &_lou_defaultTableResolver) free_tablefiles(tableFiles); |
4836 | 274 | return result; |
4837 | 274 | } |
4838 | | |
4839 | | /** |
4840 | | * Register a new table resolver. Overrides the default resolver. |
4841 | | * |
4842 | | * @param resolver The new resolver as a function pointer. |
4843 | | * |
4844 | | */ |
4845 | | void EXPORT_CALL |
4846 | | lou_registerTableResolver( |
4847 | 0 | char **(EXPORT_CALL *resolver)(const char *tableList, const char *base)) { |
4848 | 0 | tableResolver = resolver; |
4849 | 0 | } |
4850 | | |
4851 | | static int fileCount = 0; |
4852 | | |
4853 | | /** |
4854 | | * Compile a single file |
4855 | | * |
4856 | | */ |
4857 | | static int |
4858 | | compileFile(const char *fileName, TranslationTableHeader **table, |
4859 | 274 | DisplayTableHeader **displayTable) { |
4860 | 274 | FileInfo file; |
4861 | 274 | fileCount++; |
4862 | 274 | file.fileName = fileName; |
4863 | 274 | if (table) { |
4864 | 274 | int i; |
4865 | 275 | for (i = 0; (*table)->sourceFiles[i]; i++) |
4866 | 1 | ; |
4867 | 274 | if (i >= MAX_SOURCE_FILES) { |
4868 | 0 | _lou_logMessage(LOU_LOG_WARN, "Max number of source files (%i) reached", |
4869 | 0 | MAX_SOURCE_FILES); |
4870 | 0 | file.sourceFile = NULL; |
4871 | 274 | } else { |
4872 | 274 | file.sourceFile = (*table)->sourceFiles[i] = strdup(fileName); |
4873 | 274 | } |
4874 | 274 | } |
4875 | 274 | file.encoding = noEncoding; |
4876 | 274 | file.status = 0; |
4877 | 274 | file.lineNumber = 0; |
4878 | 274 | if ((file.in = fopen(file.fileName, "rb"))) { |
4879 | | // the scope of a macro is the current file (after the macro definition) |
4880 | 274 | const MacroList *inscopeMacros = NULL; |
4881 | 1.92k | while (_lou_getALine(&file)) |
4882 | 1.67k | if (!compileRule(&file, table, displayTable, &inscopeMacros)) { |
4883 | 26 | if (!errorCount) compileError(&file, "Rule could not be compiled"); |
4884 | 26 | break; |
4885 | 26 | } |
4886 | 274 | fclose(file.in); |
4887 | 274 | free_macro_list(inscopeMacros); |
4888 | 274 | } else { |
4889 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot open table '%s'", file.fileName); |
4890 | 0 | errorCount++; |
4891 | 0 | } |
4892 | 274 | return !errorCount; |
4893 | 274 | } |
4894 | | |
4895 | | static void |
4896 | 273 | freeTranslationTable(TranslationTableHeader *t) { |
4897 | 299 | for (int i = 0; i < MAX_EMPH_CLASSES && t->emphClassNames[i]; i++) |
4898 | 26 | free(t->emphClassNames[i]); |
4899 | 547 | for (int i = 0; t->sourceFiles[i]; i++) free(t->sourceFiles[i]); |
4900 | 273 | if (t->characterClasses) deallocateCharacterClasses(t); |
4901 | 273 | if (t->ruleNames) deallocateRuleNames(t); |
4902 | 273 | free(t); |
4903 | 273 | } |
4904 | | |
4905 | | static void |
4906 | 273 | freeDisplayTable(DisplayTableHeader *t) { |
4907 | 273 | free(t); |
4908 | 273 | } |
4909 | | |
4910 | | /** |
4911 | | * Free a char** array |
4912 | | */ |
4913 | | static void |
4914 | 548 | free_tablefiles(char **tables) { |
4915 | 548 | char **table; |
4916 | 548 | if (!tables) return; |
4917 | 1.09k | for (table = tables; *table; table++) free(*table); |
4918 | 548 | free(tables); |
4919 | 548 | } |
4920 | | |
4921 | | /** |
4922 | | * Implement include opcode |
4923 | | * |
4924 | | */ |
4925 | | static int |
4926 | | includeFile(const FileInfo *file, CharsString *includedFile, |
4927 | 1 | TranslationTableHeader **table, DisplayTableHeader **displayTable) { |
4928 | 1 | int k; |
4929 | 1 | char includeThis[MAXSTRING]; |
4930 | 1 | char **tableFiles; |
4931 | 1 | int rv; |
4932 | 19 | for (k = 0; k < includedFile->length; k++) |
4933 | 18 | includeThis[k] = (char)includedFile->chars[k]; |
4934 | 1 | if (k >= MAXSTRING) { |
4935 | 0 | compileError(file, "Include statement too long: 'include %s'", includeThis); |
4936 | 0 | return 0; |
4937 | 0 | } |
4938 | 1 | includeThis[k] = 0; |
4939 | 1 | tableFiles = _lou_resolveTable(includeThis, file->fileName); |
4940 | 1 | if (tableFiles == NULL) { |
4941 | 0 | errorCount++; |
4942 | 0 | return 0; |
4943 | 0 | } |
4944 | 1 | if (tableFiles[1] != NULL) { |
4945 | 0 | free_tablefiles(tableFiles); |
4946 | 0 | compileError(file, "Table list not supported in include statement: 'include %s'", |
4947 | 0 | includeThis); |
4948 | 0 | return 0; |
4949 | 0 | } |
4950 | 1 | rv = compileFile(*tableFiles, table, displayTable); |
4951 | 1 | free_tablefiles(tableFiles); |
4952 | 1 | if (!rv) |
4953 | 0 | _lou_logMessage(LOU_LOG_ERROR, "%s:%d: Error in included file", file->fileName, |
4954 | 0 | file->lineNumber); |
4955 | 1 | return rv; |
4956 | 1 | } |
4957 | | |
4958 | | /** |
4959 | | * Compile source tables into a table in memory |
4960 | | * |
4961 | | */ |
4962 | | static int |
4963 | | compileTable(const char *tableList, const char *displayTableList, |
4964 | 273 | TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) { |
4965 | 273 | char **tableFiles; |
4966 | 273 | char **subTable; |
4967 | 273 | if (translationTable && !tableList) return 0; |
4968 | 273 | if (displayTable && !displayTableList) return 0; |
4969 | 273 | if (!translationTable && !displayTable) return 0; |
4970 | 273 | if (translationTable) *translationTable = NULL; |
4971 | 273 | if (displayTable) *displayTable = NULL; |
4972 | 273 | errorCount = warningCount = fileCount = 0; |
4973 | 273 | if (!opcodeLengths[0]) { |
4974 | 273 | TranslationTableOpcode opcode; |
4975 | 32.2k | for (opcode = 0; opcode < CTO_None; opcode++) |
4976 | 31.9k | opcodeLengths[opcode] = (short)strlen(opcodeNames[opcode]); |
4977 | 273 | } |
4978 | 273 | if (translationTable) allocateTranslationTable(NULL, translationTable); |
4979 | 273 | if (displayTable) allocateDisplayTable(NULL, displayTable); |
4980 | | |
4981 | 273 | if (translationTable) { |
4982 | 273 | (*translationTable)->emphClassNames[0] = NULL; |
4983 | 273 | (*translationTable)->characterClasses = NULL; |
4984 | 273 | (*translationTable)->ruleNames = NULL; |
4985 | 273 | } |
4986 | | |
4987 | | /* Compile things that are necessary for the proper operation of |
4988 | | * liblouis or liblouisxml or liblouisutdml */ |
4989 | | /* TODO: These definitions seem to be necessary for proper functioning of |
4990 | | liblouisutdml. Find a way to satisfy those requirements without hard coding |
4991 | | some characters in every table notably behind the user's back */ |
4992 | 273 | compileString("space \\xffff 123456789abcdef LOU_ENDSEGMENT", translationTable, |
4993 | 273 | displayTable); |
4994 | | |
4995 | 273 | if (displayTable && translationTable && strcmp(tableList, displayTableList) == 0) { |
4996 | | /* Compile the display and translation tables in one go */ |
4997 | | |
4998 | | /* Compile all subtables in the list */ |
4999 | 273 | if (!(tableFiles = _lou_resolveTable(tableList, NULL))) { |
5000 | 0 | errorCount++; |
5001 | 0 | goto cleanup; |
5002 | 0 | } |
5003 | 511 | for (subTable = tableFiles; *subTable; subTable++) |
5004 | 273 | if (!compileFile(*subTable, translationTable, displayTable)) goto cleanup; |
5005 | 273 | } else { |
5006 | | /* Compile the display and translation tables separately */ |
5007 | |
|
5008 | 0 | if (displayTable) { |
5009 | 0 | if (!(tableFiles = _lou_resolveTable(displayTableList, NULL))) { |
5010 | 0 | errorCount++; |
5011 | 0 | goto cleanup; |
5012 | 0 | } |
5013 | 0 | for (subTable = tableFiles; *subTable; subTable++) |
5014 | 0 | if (!compileFile(*subTable, NULL, displayTable)) goto cleanup; |
5015 | 0 | free_tablefiles(tableFiles); |
5016 | 0 | tableFiles = NULL; |
5017 | 0 | } |
5018 | 0 | if (translationTable) { |
5019 | 0 | if (!(tableFiles = _lou_resolveTable(tableList, NULL))) { |
5020 | 0 | errorCount++; |
5021 | 0 | goto cleanup; |
5022 | 0 | } |
5023 | 0 | for (subTable = tableFiles; *subTable; subTable++) |
5024 | 0 | if (!compileFile(*subTable, translationTable, NULL)) goto cleanup; |
5025 | 0 | } |
5026 | 0 | } |
5027 | | |
5028 | | /* Clean up after compiling files */ |
5029 | 273 | cleanup: |
5030 | 273 | free_tablefiles(tableFiles); |
5031 | 273 | if (warningCount) |
5032 | 213 | _lou_logMessage(LOU_LOG_WARN, "%s: %d warnings issued", tableList, warningCount); |
5033 | 273 | if (!errorCount) { |
5034 | 238 | if (translationTable) setDefaults(*translationTable); |
5035 | 238 | return 1; |
5036 | 238 | } else { |
5037 | 35 | _lou_logMessage(LOU_LOG_ERROR, "%d errors found.", errorCount); |
5038 | 35 | if (translationTable) { |
5039 | 35 | if (*translationTable) freeTranslationTable(*translationTable); |
5040 | 35 | *translationTable = NULL; |
5041 | 35 | } |
5042 | 35 | if (displayTable) { |
5043 | 35 | if (*displayTable) freeDisplayTable(*displayTable); |
5044 | 35 | *displayTable = NULL; |
5045 | 35 | } |
5046 | 35 | return 0; |
5047 | 35 | } |
5048 | 273 | } |
5049 | | |
5050 | | /* Return the emphasis classes declared in tableList. */ |
5051 | | char const **EXPORT_CALL |
5052 | 0 | lou_getEmphClasses(const char *tableList) { |
5053 | 0 | const char *names[MAX_EMPH_CLASSES + 1]; |
5054 | 0 | unsigned int count = 0; |
5055 | 0 | const TranslationTableHeader *table = _lou_getTranslationTable(tableList); |
5056 | 0 | if (!table) return NULL; |
5057 | | |
5058 | 0 | while (count < MAX_EMPH_CLASSES) { |
5059 | 0 | char const *name = table->emphClassNames[count]; |
5060 | 0 | if (!name) break; |
5061 | 0 | names[count++] = name; |
5062 | 0 | } |
5063 | 0 | names[count++] = NULL; |
5064 | |
|
5065 | 0 | { |
5066 | 0 | unsigned int size = count * sizeof(names[0]); |
5067 | 0 | char const **result = malloc(size); |
5068 | 0 | if (!result) return NULL; |
5069 | | /* The void* cast is necessary to stop MSVC from warning about |
5070 | | * different 'const' qualifiers (C4090). */ |
5071 | 0 | memcpy((void *)result, names, size); |
5072 | 0 | return result; |
5073 | 0 | } |
5074 | 0 | } |
5075 | | |
5076 | | void |
5077 | | getTable(const char *tableList, const char *displayTableList, |
5078 | | TranslationTableHeader **translationTable, DisplayTableHeader **displayTable); |
5079 | | |
5080 | | void EXPORT_CALL |
5081 | | _lou_getTable(const char *tableList, const char *displayTableList, |
5082 | | const TranslationTableHeader **translationTable, |
5083 | 498 | const DisplayTableHeader **displayTable) { |
5084 | 498 | TranslationTableHeader *newTable = NULL; |
5085 | 498 | DisplayTableHeader *newDisplayTable = NULL; |
5086 | 498 | getTable(tableList, displayTableList, &newTable, &newDisplayTable); |
5087 | 498 | if (newTable) |
5088 | 463 | if (!finalizeTable(newTable)) newTable = NULL; |
5089 | 498 | *translationTable = newTable; |
5090 | 498 | *displayTable = newDisplayTable; |
5091 | 498 | } |
5092 | | |
5093 | | /* Checks and loads tableList. */ |
5094 | | const void *EXPORT_CALL |
5095 | 272 | lou_getTable(const char *tableList) { |
5096 | 272 | const TranslationTableHeader *table = NULL; |
5097 | 272 | const DisplayTableHeader *displayTable = NULL; |
5098 | 272 | _lou_getTable(tableList, tableList, &table, &displayTable); |
5099 | 272 | if (!table || !displayTable) return NULL; |
5100 | 237 | return table; |
5101 | 272 | } |
5102 | | |
5103 | | const TranslationTableHeader *EXPORT_CALL |
5104 | 0 | _lou_getTranslationTable(const char *tableList) { |
5105 | 0 | TranslationTableHeader *table = NULL; |
5106 | 0 | getTable(tableList, NULL, &table, NULL); |
5107 | 0 | if (table) |
5108 | 0 | if (!finalizeTable(table)) table = NULL; |
5109 | 0 | return table; |
5110 | 0 | } |
5111 | | |
5112 | | const DisplayTableHeader *EXPORT_CALL |
5113 | 0 | _lou_getDisplayTable(const char *tableList) { |
5114 | 0 | DisplayTableHeader *table = NULL; |
5115 | 0 | getTable(NULL, tableList, NULL, &table); |
5116 | 0 | return table; |
5117 | 0 | } |
5118 | | |
5119 | | void |
5120 | | getTable(const char *translationTableList, const char *displayTableList, |
5121 | 499 | TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) { |
5122 | | /* Keep track of which tables have already been compiled */ |
5123 | 499 | int translationTableListLen, displayTableListLen = 0; |
5124 | 499 | if (translationTableList == NULL || *translationTableList == 0) |
5125 | 0 | translationTable = NULL; |
5126 | 499 | if (displayTableList == NULL || *displayTableList == 0) displayTable = NULL; |
5127 | | /* See if translation table has already been compiled */ |
5128 | 499 | if (translationTable) { |
5129 | 499 | translationTableListLen = (int)strlen(translationTableList); |
5130 | 499 | *translationTable = NULL; |
5131 | 499 | TranslationTableChainEntry *currentEntry = translationTableChain; |
5132 | 499 | TranslationTableChainEntry *prevEntry = NULL; |
5133 | 499 | while (currentEntry != NULL) { |
5134 | 226 | if (translationTableListLen == currentEntry->tableListLength && |
5135 | 226 | (memcmp(¤tEntry->tableList[0], translationTableList, |
5136 | 226 | translationTableListLen)) == 0) { |
5137 | | /* Move the table to the top of the table chain. */ |
5138 | 226 | if (prevEntry != NULL) { |
5139 | 0 | prevEntry->next = currentEntry->next; |
5140 | 0 | currentEntry->next = translationTableChain; |
5141 | 0 | translationTableChain = currentEntry; |
5142 | 0 | } |
5143 | 226 | *translationTable = currentEntry->table; |
5144 | 226 | break; |
5145 | 226 | } |
5146 | 0 | prevEntry = currentEntry; |
5147 | 0 | currentEntry = currentEntry->next; |
5148 | 0 | } |
5149 | 499 | } |
5150 | | /* See if display table has already been compiled */ |
5151 | 499 | if (displayTable) { |
5152 | 499 | displayTableListLen = (int)strlen(displayTableList); |
5153 | 499 | *displayTable = NULL; |
5154 | 499 | DisplayTableChainEntry *currentEntry = displayTableChain; |
5155 | 499 | DisplayTableChainEntry *prevEntry = NULL; |
5156 | 499 | while (currentEntry != NULL) { |
5157 | 226 | if (displayTableListLen == currentEntry->tableListLength && |
5158 | 226 | (memcmp(¤tEntry->tableList[0], displayTableList, |
5159 | 226 | displayTableListLen)) == 0) { |
5160 | | /* Move the table to the top of the table chain. */ |
5161 | 226 | if (prevEntry != NULL) { |
5162 | 0 | prevEntry->next = currentEntry->next; |
5163 | 0 | currentEntry->next = displayTableChain; |
5164 | 0 | displayTableChain = currentEntry; |
5165 | 0 | } |
5166 | 226 | *displayTable = currentEntry->table; |
5167 | 226 | break; |
5168 | 226 | } |
5169 | 0 | prevEntry = currentEntry; |
5170 | 0 | currentEntry = currentEntry->next; |
5171 | 0 | } |
5172 | 499 | } |
5173 | 499 | if ((translationTable && *translationTable == NULL) || |
5174 | 499 | (displayTable && *displayTable == NULL)) { |
5175 | 273 | TranslationTableHeader *newTranslationTable = NULL; |
5176 | 273 | DisplayTableHeader *newDisplayTable = NULL; |
5177 | 273 | if (compileTable(translationTableList, displayTableList, |
5178 | 273 | (translationTable && *translationTable == NULL) ? &newTranslationTable |
5179 | 273 | : NULL, |
5180 | 273 | (displayTable && *displayTable == NULL) ? &newDisplayTable : NULL)) { |
5181 | | /* Add a new entry to the top of the table chain. */ |
5182 | 238 | if (newTranslationTable != NULL) { |
5183 | 238 | int entrySize = |
5184 | 238 | sizeof(TranslationTableChainEntry) + translationTableListLen; |
5185 | 238 | TranslationTableChainEntry *newEntry = malloc(entrySize); |
5186 | 238 | if (!newEntry) _lou_outOfMemory(); |
5187 | 238 | newEntry->next = translationTableChain; |
5188 | 238 | newEntry->table = newTranslationTable; |
5189 | 238 | newEntry->tableListLength = translationTableListLen; |
5190 | 238 | memcpy(&newEntry->tableList[0], translationTableList, |
5191 | 238 | translationTableListLen); |
5192 | 238 | translationTableChain = newEntry; |
5193 | 238 | *translationTable = newTranslationTable; |
5194 | 238 | } |
5195 | 238 | if (newDisplayTable != NULL) { |
5196 | 238 | int entrySize = sizeof(DisplayTableChainEntry) + displayTableListLen; |
5197 | 238 | DisplayTableChainEntry *newEntry = malloc(entrySize); |
5198 | 238 | if (!newEntry) _lou_outOfMemory(); |
5199 | 238 | newEntry->next = displayTableChain; |
5200 | 238 | newEntry->table = newDisplayTable; |
5201 | 238 | newEntry->tableListLength = displayTableListLen; |
5202 | 238 | memcpy(&newEntry->tableList[0], displayTableList, displayTableListLen); |
5203 | 238 | displayTableChain = newEntry; |
5204 | 238 | *displayTable = newDisplayTable; |
5205 | 238 | } |
5206 | 238 | } else { |
5207 | 35 | _lou_logMessage( |
5208 | 35 | LOU_LOG_ERROR, "%s could not be compiled", translationTableList); |
5209 | 35 | return; |
5210 | 35 | } |
5211 | 273 | } |
5212 | 499 | } |
5213 | | |
5214 | | int EXPORT_CALL |
5215 | 272 | lou_checkTable(const char *tableList) { |
5216 | 272 | if (lou_getTable(tableList)) return 1; |
5217 | 35 | return 0; |
5218 | 272 | } |
5219 | | |
5220 | | formtype EXPORT_CALL |
5221 | 0 | lou_getTypeformForEmphClass(const char *tableList, const char *emphClass) { |
5222 | 0 | const TranslationTableHeader *table = _lou_getTranslationTable(tableList); |
5223 | 0 | if (!table) return 0; |
5224 | 0 | for (int i = 0; i < MAX_EMPH_CLASSES && table->emphClassNames[i]; i++) |
5225 | 0 | if (strcmp(emphClass, table->emphClassNames[i]) == 0) return italic << i; |
5226 | 0 | return 0; |
5227 | 0 | } |
5228 | | |
5229 | | static unsigned char *destSpacing = NULL; |
5230 | | static int sizeDestSpacing = 0; |
5231 | | static formtype *typebuf = NULL; |
5232 | | static unsigned int *wordBuffer = NULL; |
5233 | | static EmphasisInfo *emphasisBuffer = NULL; |
5234 | | static int sizeTypebuf = 0; |
5235 | | static widechar *passbuf[MAXPASSBUF] = { NULL }; |
5236 | | static int sizePassbuf[MAXPASSBUF] = { 0 }; |
5237 | | static int *posMapping1 = NULL; |
5238 | | static int sizePosMapping1 = 0; |
5239 | | static int *posMapping2 = NULL; |
5240 | | static int sizePosMapping2 = 0; |
5241 | | static int *posMapping3 = NULL; |
5242 | | static int sizePosMapping3 = 0; |
5243 | | void *EXPORT_CALL |
5244 | 1.52k | _lou_allocMem(AllocBuf buffer, int index, int srcmax, int destmax) { |
5245 | 1.52k | if (srcmax < 1024) srcmax = 1024; |
5246 | 1.52k | if (destmax < 1024) destmax = 1024; |
5247 | 1.52k | switch (buffer) { |
5248 | 141 | case alloc_typebuf: |
5249 | 141 | if (destmax > sizeTypebuf) { |
5250 | 141 | if (typebuf != NULL) free(typebuf); |
5251 | | // TODO: should this be srcmax? |
5252 | 141 | typebuf = malloc((destmax + 4) * sizeof(formtype)); |
5253 | 141 | if (!typebuf) _lou_outOfMemory(); |
5254 | 141 | sizeTypebuf = destmax; |
5255 | 141 | } |
5256 | 141 | return typebuf; |
5257 | | |
5258 | 141 | case alloc_wordBuffer: |
5259 | | |
5260 | 141 | if (wordBuffer != NULL) free(wordBuffer); |
5261 | 141 | wordBuffer = calloc(srcmax + 4, sizeof(unsigned int)); |
5262 | 141 | if (wordBuffer == NULL) _lou_outOfMemory(); |
5263 | 141 | return wordBuffer; |
5264 | | |
5265 | 141 | case alloc_emphasisBuffer: |
5266 | | |
5267 | 141 | if (emphasisBuffer != NULL) free(emphasisBuffer); |
5268 | 141 | emphasisBuffer = calloc(srcmax + 4, sizeof(EmphasisInfo)); |
5269 | 141 | if (emphasisBuffer == NULL) _lou_outOfMemory(); |
5270 | 141 | return emphasisBuffer; |
5271 | | |
5272 | 0 | case alloc_destSpacing: |
5273 | 0 | if (destmax > sizeDestSpacing) { |
5274 | 0 | if (destSpacing != NULL) free(destSpacing); |
5275 | 0 | destSpacing = malloc(destmax + 4); |
5276 | 0 | if (!destSpacing) _lou_outOfMemory(); |
5277 | 0 | sizeDestSpacing = destmax; |
5278 | 0 | } |
5279 | 0 | return destSpacing; |
5280 | 543 | case alloc_passbuf: |
5281 | 543 | if (index < 0 || index >= MAXPASSBUF) { |
5282 | 0 | _lou_logMessage(LOU_LOG_FATAL, "Index out of bounds: %d\n", index); |
5283 | 0 | exit(3); |
5284 | 0 | } |
5285 | 543 | if (destmax > sizePassbuf[index]) { |
5286 | 468 | if (passbuf[index] != NULL) free(passbuf[index]); |
5287 | 468 | passbuf[index] = malloc((destmax + 4) * CHARSIZE); |
5288 | 468 | if (!passbuf[index]) _lou_outOfMemory(); |
5289 | 468 | sizePassbuf[index] = destmax; |
5290 | 468 | } |
5291 | 543 | return passbuf[index]; |
5292 | 226 | case alloc_posMapping1: { |
5293 | 226 | int mapSize; |
5294 | 226 | if (srcmax >= destmax) |
5295 | 47 | mapSize = srcmax; |
5296 | 179 | else |
5297 | 179 | mapSize = destmax; |
5298 | 226 | if (mapSize > sizePosMapping1) { |
5299 | 226 | if (posMapping1 != NULL) free(posMapping1); |
5300 | 226 | posMapping1 = malloc((mapSize + 4) * sizeof(int)); |
5301 | 226 | if (!posMapping1) _lou_outOfMemory(); |
5302 | 226 | sizePosMapping1 = mapSize; |
5303 | 226 | } |
5304 | 226 | } |
5305 | 226 | return posMapping1; |
5306 | 164 | case alloc_posMapping2: { |
5307 | 164 | int mapSize; |
5308 | 164 | if (srcmax >= destmax) |
5309 | 30 | mapSize = srcmax; |
5310 | 134 | else |
5311 | 134 | mapSize = destmax; |
5312 | 164 | if (mapSize > sizePosMapping2) { |
5313 | 164 | if (posMapping2 != NULL) free(posMapping2); |
5314 | 164 | posMapping2 = malloc((mapSize + 4) * sizeof(int)); |
5315 | 164 | if (!posMapping2) _lou_outOfMemory(); |
5316 | 164 | sizePosMapping2 = mapSize; |
5317 | 164 | } |
5318 | 164 | } |
5319 | 164 | return posMapping2; |
5320 | 164 | case alloc_posMapping3: { |
5321 | 164 | int mapSize; |
5322 | 164 | if (srcmax >= destmax) |
5323 | 30 | mapSize = srcmax; |
5324 | 134 | else |
5325 | 134 | mapSize = destmax; |
5326 | 164 | if (mapSize > sizePosMapping3) { |
5327 | 164 | if (posMapping3 != NULL) free(posMapping3); |
5328 | 164 | posMapping3 = malloc((mapSize + 4) * sizeof(int)); |
5329 | 164 | if (!posMapping3) _lou_outOfMemory(); |
5330 | 164 | sizePosMapping3 = mapSize; |
5331 | 164 | } |
5332 | 164 | } |
5333 | 164 | return posMapping3; |
5334 | 0 | default: |
5335 | 0 | return NULL; |
5336 | 1.52k | } |
5337 | 1.52k | } |
5338 | | |
5339 | | void EXPORT_CALL |
5340 | 273 | lou_free(void) { |
5341 | 273 | lou_logEnd(); |
5342 | 273 | if (translationTableChain != NULL) { |
5343 | 238 | TranslationTableChainEntry *currentEntry = translationTableChain; |
5344 | 238 | TranslationTableChainEntry *previousEntry; |
5345 | 476 | while (currentEntry) { |
5346 | 238 | freeTranslationTable(currentEntry->table); |
5347 | 238 | previousEntry = currentEntry; |
5348 | 238 | currentEntry = currentEntry->next; |
5349 | 238 | free(previousEntry); |
5350 | 238 | } |
5351 | 238 | translationTableChain = NULL; |
5352 | 238 | } |
5353 | 273 | if (displayTableChain != NULL) { |
5354 | 238 | DisplayTableChainEntry *currentEntry = displayTableChain; |
5355 | 238 | DisplayTableChainEntry *previousEntry; |
5356 | 476 | while (currentEntry) { |
5357 | 238 | freeDisplayTable(currentEntry->table); |
5358 | 238 | previousEntry = currentEntry; |
5359 | 238 | currentEntry = currentEntry->next; |
5360 | 238 | free(previousEntry); |
5361 | 238 | } |
5362 | 238 | displayTableChain = NULL; |
5363 | 238 | } |
5364 | 273 | if (typebuf != NULL) free(typebuf); |
5365 | 273 | typebuf = NULL; |
5366 | 273 | if (wordBuffer != NULL) free(wordBuffer); |
5367 | 273 | wordBuffer = NULL; |
5368 | 273 | if (emphasisBuffer != NULL) free(emphasisBuffer); |
5369 | 273 | emphasisBuffer = NULL; |
5370 | 273 | sizeTypebuf = 0; |
5371 | 273 | if (destSpacing != NULL) free(destSpacing); |
5372 | 273 | destSpacing = NULL; |
5373 | 273 | sizeDestSpacing = 0; |
5374 | 273 | { |
5375 | 273 | int k; |
5376 | 1.09k | for (k = 0; k < MAXPASSBUF; k++) { |
5377 | 819 | if (passbuf[k] != NULL) free(passbuf[k]); |
5378 | 819 | passbuf[k] = NULL; |
5379 | 819 | sizePassbuf[k] = 0; |
5380 | 819 | } |
5381 | 273 | } |
5382 | 273 | if (posMapping1 != NULL) free(posMapping1); |
5383 | 273 | posMapping1 = NULL; |
5384 | 273 | sizePosMapping1 = 0; |
5385 | 273 | if (posMapping2 != NULL) free(posMapping2); |
5386 | 273 | posMapping2 = NULL; |
5387 | 273 | sizePosMapping2 = 0; |
5388 | 273 | if (posMapping3 != NULL) free(posMapping3); |
5389 | 273 | posMapping3 = NULL; |
5390 | 273 | sizePosMapping3 = 0; |
5391 | 273 | opcodeLengths[0] = 0; |
5392 | 273 | } |
5393 | | |
5394 | | const char *EXPORT_CALL |
5395 | 0 | lou_version(void) { |
5396 | 0 | static const char *version = PACKAGE_VERSION; |
5397 | 0 | return version; |
5398 | 0 | } |
5399 | | |
5400 | | int EXPORT_CALL |
5401 | 0 | lou_charSize(void) { |
5402 | 0 | return CHARSIZE; |
5403 | 0 | } |
5404 | | |
5405 | | int EXPORT_CALL |
5406 | 1 | lou_compileString(const char *tableList, const char *inString) { |
5407 | 1 | TranslationTableHeader *table; |
5408 | 1 | DisplayTableHeader *displayTable; |
5409 | 1 | getTable(tableList, tableList, &table, &displayTable); |
5410 | 1 | if (!table) return 0; |
5411 | 1 | if (!compileString(inString, &table, &displayTable)) return 0; |
5412 | 1 | return 1; |
5413 | 1 | } |
5414 | | |
5415 | | int EXPORT_CALL |
5416 | 0 | _lou_compileTranslationRule(const char *tableList, const char *inString) { |
5417 | 0 | TranslationTableHeader *table; |
5418 | 0 | getTable(tableList, NULL, &table, NULL); |
5419 | 0 | return compileString(inString, &table, NULL); |
5420 | 0 | } |
5421 | | |
5422 | | int EXPORT_CALL |
5423 | 0 | _lou_compileDisplayRule(const char *tableList, const char *inString) { |
5424 | 0 | DisplayTableHeader *table; |
5425 | 0 | getTable(NULL, tableList, NULL, &table); |
5426 | 0 | return compileString(inString, NULL, &table); |
5427 | 0 | } |
5428 | | |
5429 | | /** |
5430 | | * This procedure provides a target for cals that serve as breakpoints |
5431 | | * for gdb. |
5432 | | */ |
5433 | | // char *EXPORT_CALL |
5434 | | // lou_getTablePaths (void) |
5435 | | // { |
5436 | | // static char paths[MAXSTRING]; |
5437 | | // static char scratchBuf[MAXSTRING]; |
5438 | | // char *pathList; |
5439 | | // strcpy (paths, tablePath); |
5440 | | // strcat (paths, ","); |
5441 | | // pathList = getenv ("LOUIS_TABLEPATH"); |
5442 | | // if (pathList) |
5443 | | // { |
5444 | | // strcat (paths, pathList); |
5445 | | // strcat (paths, ","); |
5446 | | // } |
5447 | | // pathList = getcwd (scratchBuf, MAXSTRING); |
5448 | | // if (pathList) |
5449 | | // { |
5450 | | // strcat (paths, pathList); |
5451 | | // strcat (paths, ","); |
5452 | | // } |
5453 | | // pathList = lou_getDataPath (); |
5454 | | // if (pathList) |
5455 | | // { |
5456 | | // strcat (paths, pathList); |
5457 | | // strcat (paths, ","); |
5458 | | // } |
5459 | | // #ifdef _WIN32 |
5460 | | // strcpy (paths, lou_getProgramPath ()); |
5461 | | // strcat (paths, "\\share\\liblouss\\tables\\"); |
5462 | | // #else |
5463 | | // strcpy (paths, TABLESDIR); |
5464 | | // #endif |
5465 | | // return paths; |
5466 | | // } |