/src/liblouis/liblouis/compileTranslationTable.c
Line | Count | Source |
1 | | /* liblouis Braille Translation and Back-Translation Library |
2 | | |
3 | | Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The |
4 | | BRLTTY Team |
5 | | |
6 | | Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com |
7 | | Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com |
8 | | Copyright (C) 2016 Mike Gray, American Printing House for the Blind |
9 | | Copyright (C) 2016 Davy Kager, Dedicon |
10 | | |
11 | | This file is part of liblouis. |
12 | | |
13 | | liblouis is free software: you can redistribute it and/or modify it |
14 | | under the terms of the GNU Lesser General Public License as published |
15 | | by the Free Software Foundation, either version 2.1 of the License, or |
16 | | (at your option) any later version. |
17 | | |
18 | | liblouis is distributed in the hope that it will be useful, but |
19 | | WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
21 | | Lesser General Public License for more details. |
22 | | |
23 | | You should have received a copy of the GNU Lesser General Public |
24 | | License along with liblouis. If not, see <http://www.gnu.org/licenses/>. |
25 | | */ |
26 | | |
27 | | /** |
28 | | * @file |
29 | | * @brief Read and compile translation tables |
30 | | */ |
31 | | |
32 | | #include "config.h" |
33 | | |
34 | | #include <stddef.h> |
35 | | #include <stdlib.h> |
36 | | #include <stdio.h> |
37 | | #include <stdarg.h> |
38 | | #include <string.h> |
39 | | #include <ctype.h> |
40 | | #include <sys/stat.h> |
41 | | |
42 | | #include "internal.h" |
43 | | |
44 | 8.61k | #define QUOTESUB 28 /* Stand-in for double quotes in strings */ |
45 | | |
46 | | /* needed to make debuggin easier */ |
47 | | #ifdef DEBUG |
48 | | wchar_t wchar; |
49 | | #endif |
50 | | |
51 | | /* The following variables and functions make it possible to specify the |
52 | | * path on which all tables for liblouis and all files for liblouisutdml, |
53 | | * in their proper directories, will be found. |
54 | | */ |
55 | | |
56 | | static char *dataPathPtr; |
57 | | |
58 | | char *EXPORT_CALL |
59 | 0 | lou_setDataPath(const char *path) { |
60 | 0 | _lou_logMessage(LOU_LOG_WARN, "warning: lou_setDataPath is deprecated."); |
61 | 0 | static char dataPath[MAXSTRING]; |
62 | 0 | dataPathPtr = NULL; |
63 | 0 | if (path == NULL || strlen(path) >= MAXSTRING) return NULL; |
64 | 0 | strcpy(dataPath, path); |
65 | 0 | dataPathPtr = dataPath; |
66 | 0 | return dataPathPtr; |
67 | 0 | } |
68 | | |
69 | | char *EXPORT_CALL |
70 | 0 | lou_getDataPath(void) { |
71 | 0 | _lou_logMessage(LOU_LOG_WARN, "warning: lou_getDataPath is deprecated."); |
72 | 0 | return dataPathPtr; |
73 | 0 | } |
74 | | |
75 | | /* End of dataPath code. */ |
76 | | |
77 | | static int |
78 | 30.3k | eqasc2uni(const unsigned char *a, const widechar *b, const int len) { |
79 | 30.3k | int k; |
80 | 64.5k | for (k = 0; k < len; k++) |
81 | 59.9k | if ((widechar)a[k] != b[k]) return 0; |
82 | 4.54k | return 1; |
83 | 30.3k | } |
84 | | |
85 | | typedef struct CharsString { |
86 | | widechar length; |
87 | | widechar chars[MAXSTRING]; |
88 | | } CharsString; |
89 | | |
90 | | static int errorCount; |
91 | | static int warningCount; |
92 | | |
93 | | typedef struct TranslationTableChainEntry { |
94 | | struct TranslationTableChainEntry *next; |
95 | | TranslationTableHeader *table; |
96 | | int tableListLength; |
97 | | char tableList[1]; |
98 | | } TranslationTableChainEntry; |
99 | | |
100 | | static TranslationTableChainEntry *translationTableChain = NULL; |
101 | | |
102 | | typedef struct DisplayTableChainEntry { |
103 | | struct DisplayTableChainEntry *next; |
104 | | DisplayTableHeader *table; |
105 | | int tableListLength; |
106 | | char tableList[1]; |
107 | | } DisplayTableChainEntry; |
108 | | |
109 | | static DisplayTableChainEntry *displayTableChain = NULL; |
110 | | |
111 | | /* predefined character classes */ |
112 | | static const char *characterClassNames[] = { |
113 | | "space", |
114 | | "letter", |
115 | | "digit", |
116 | | "punctuation", |
117 | | "uppercase", |
118 | | "lowercase", |
119 | | "math", |
120 | | "sign", |
121 | | "litdigit", |
122 | | NULL, |
123 | | }; |
124 | | |
125 | | // names that may not be used for custom attributes |
126 | | static const char *reservedAttributeNames[] = { |
127 | | "numericnocontchars", |
128 | | "numericnocontchar", |
129 | | "numericnocont", |
130 | | "midendnumericmodechars", |
131 | | "midendnumericmodechar", |
132 | | "midendnumericmode", |
133 | | "numericmodechars", |
134 | | "numericmodechar", |
135 | | "numericmode", |
136 | | "capsmodechars", |
137 | | "capsmodechar", |
138 | | "capsmode", |
139 | | "emphmodechars", |
140 | | "emphmodechar", |
141 | | "emphmode", |
142 | | "noemphchars", |
143 | | "noemphchar", |
144 | | "noemph", |
145 | | "seqdelimiter", |
146 | | "seqbeforechars", |
147 | | "seqbeforechar", |
148 | | "seqbefore", |
149 | | "seqafterchars", |
150 | | "seqafterchar", |
151 | | "seqafter", |
152 | | "noletsign", |
153 | | "noletsignbefore", |
154 | | "noletsignafter", |
155 | | NULL, |
156 | | }; |
157 | | |
158 | | static const char *opcodeNames[CTO_None] = { |
159 | | "include", |
160 | | "locale", |
161 | | "undefined", |
162 | | "capsletter", |
163 | | "begcapsword", |
164 | | "endcapsword", |
165 | | "begcaps", |
166 | | "endcaps", |
167 | | "begcapsphrase", |
168 | | "endcapsphrase", |
169 | | "lencapsphrase", |
170 | | "modeletter", |
171 | | "begmodeword", |
172 | | "endmodeword", |
173 | | "begmode", |
174 | | "endmode", |
175 | | "begmodephrase", |
176 | | "endmodephrase", |
177 | | "lenmodephrase", |
178 | | "letsign", |
179 | | "noletsignbefore", |
180 | | "noletsign", |
181 | | "noletsignafter", |
182 | | "numsign", |
183 | | "nonumsign", |
184 | | "numericmodechars", |
185 | | "midendnumericmodechars", |
186 | | "numericnocontchars", |
187 | | "seqdelimiter", |
188 | | "seqbeforechars", |
189 | | "seqafterchars", |
190 | | "seqafterpattern", |
191 | | "seqafterexpression", |
192 | | "emphclass", |
193 | | "emphletter", |
194 | | "begemphword", |
195 | | "endemphword", |
196 | | "begemph", |
197 | | "endemph", |
198 | | "begemphphrase", |
199 | | "endemphphrase", |
200 | | "lenemphphrase", |
201 | | "capsmodechars", |
202 | | "emphmodechars", |
203 | | "noemphchars", |
204 | | "begcomp", |
205 | | "endcomp", |
206 | | "nocontractsign", |
207 | | "multind", |
208 | | "compdots", |
209 | | "comp6", |
210 | | "class", |
211 | | "after", |
212 | | "before", |
213 | | "noback", |
214 | | "nofor", |
215 | | "empmatchbefore", |
216 | | "empmatchafter", |
217 | | "swapcc", |
218 | | "swapcd", |
219 | | "swapdd", |
220 | | "space", |
221 | | "digit", |
222 | | "punctuation", |
223 | | "math", |
224 | | "sign", |
225 | | "letter", |
226 | | "uppercase", |
227 | | "lowercase", |
228 | | "grouping", |
229 | | "uplow", |
230 | | "litdigit", |
231 | | "display", |
232 | | "replace", |
233 | | "context", |
234 | | "correct", |
235 | | "pass2", |
236 | | "pass3", |
237 | | "pass4", |
238 | | "repeated", |
239 | | "repword", |
240 | | "rependword", |
241 | | "capsnocont", |
242 | | "always", |
243 | | "exactdots", |
244 | | "nocross", |
245 | | "syllable", |
246 | | "nocont", |
247 | | "compbrl", |
248 | | "literal", |
249 | | "largesign", |
250 | | "word", |
251 | | "partword", |
252 | | "joinnum", |
253 | | "joinword", |
254 | | "lowword", |
255 | | "contraction", |
256 | | "sufword", |
257 | | "prfword", |
258 | | "begword", |
259 | | "begmidword", |
260 | | "midword", |
261 | | "midendword", |
262 | | "endword", |
263 | | "prepunc", |
264 | | "postpunc", |
265 | | "begnum", |
266 | | "midnum", |
267 | | "endnum", |
268 | | "decpoint", |
269 | | "hyphen", |
270 | | // "apostrophe", |
271 | | // "initial", |
272 | | "nobreak", |
273 | | "match", |
274 | | "backmatch", |
275 | | "attribute", |
276 | | "base", |
277 | | "macro", |
278 | | }; |
279 | | |
280 | | static short opcodeLengths[CTO_None] = { 0 }; |
281 | | |
282 | | static void |
283 | | compileError(const FileInfo *file, const char *format, ...); |
284 | | |
285 | | static int |
286 | 347k | getAChar(FileInfo *file) { |
287 | | /* Read a big endian, little endian or ASCII 8 file and convert it to |
288 | | * 16- or 32-bit unsigned integers */ |
289 | 347k | int ch1 = 0, ch2 = 0; |
290 | 347k | widechar character; |
291 | 347k | if (file->encoding == ascii8) |
292 | 343k | if (file->status == 2) { |
293 | 682 | file->status++; |
294 | 682 | return file->checkencoding[1]; |
295 | 682 | } |
296 | 347k | while ((ch1 = fgetc(file->in)) != EOF) { |
297 | 346k | if (file->status < 2) file->checkencoding[file->status] = ch1; |
298 | 346k | file->status++; |
299 | 346k | if (file->status == 2) { |
300 | 697 | if (file->checkencoding[0] == 0xfe && file->checkencoding[1] == 0xff) |
301 | 1 | file->encoding = bigEndian; |
302 | 696 | else if (file->checkencoding[0] == 0xff && file->checkencoding[1] == 0xfe) |
303 | 11 | file->encoding = littleEndian; |
304 | 685 | else if (file->checkencoding[0] < 128 && file->checkencoding[1] < 128) { |
305 | 682 | file->encoding = ascii8; |
306 | 682 | return file->checkencoding[0]; |
307 | 682 | } else { |
308 | 3 | compileError(file, |
309 | 3 | "encoding is neither big-endian, little-endian nor ASCII 8."); |
310 | 3 | ch1 = EOF; |
311 | 3 | break; |
312 | 0 | ; |
313 | 0 | } |
314 | 12 | continue; |
315 | 697 | } |
316 | 345k | switch (file->encoding) { |
317 | 699 | case noEncoding: |
318 | 699 | break; |
319 | 341k | case ascii8: |
320 | 341k | return ch1; |
321 | 0 | break; |
322 | 255 | case bigEndian: |
323 | 255 | ch2 = fgetc(file->in); |
324 | 255 | if (ch2 == EOF) break; |
325 | 255 | character = (widechar)(ch1 << 8) | ch2; |
326 | 255 | return (int)character; |
327 | 0 | break; |
328 | 2.80k | case littleEndian: |
329 | 2.80k | ch2 = fgetc(file->in); |
330 | 2.80k | if (ch2 == EOF) break; |
331 | 2.80k | character = (widechar)(ch2 << 8) | ch1; |
332 | 2.80k | return (int)character; |
333 | 0 | break; |
334 | 345k | } |
335 | 699 | if (ch1 == EOF || ch2 == EOF) break; |
336 | 699 | } |
337 | 1.30k | return EOF; |
338 | 346k | } |
339 | | |
340 | | int EXPORT_CALL |
341 | 5.76k | _lou_getALine(FileInfo *file) { |
342 | | /* Read a line of widechar's from an input file */ |
343 | 5.76k | int ch; |
344 | 5.76k | file->linelen = 0; |
345 | 347k | while ((ch = getAChar(file)) != EOF) { |
346 | 346k | if (ch == 13) continue; |
347 | 345k | if (ch == 10 || file->linelen >= MAXSTRING - 1) break; |
348 | 341k | file->line[file->linelen++] = (widechar)ch; |
349 | 341k | } |
350 | 5.76k | file->line[file->linelen] = 0; |
351 | 5.76k | file->linepos = 0; |
352 | 5.76k | if (ch == EOF && !file->linelen) return 0; |
353 | 5.11k | file->lineNumber++; |
354 | 5.11k | return 1; |
355 | 5.76k | } |
356 | | |
357 | | static inline int |
358 | 180k | atEndOfLine(const FileInfo *file) { |
359 | 180k | return file->linepos >= file->linelen; |
360 | 180k | } |
361 | | |
362 | | static inline int |
363 | 174k | atTokenDelimiter(const FileInfo *file) { |
364 | 174k | return file->line[file->linepos] <= 32; |
365 | 174k | } |
366 | | |
367 | | static int |
368 | 12.1k | getToken(FileInfo *file, CharsString *result, const char *description) { |
369 | | /* Find the next string of contiguous non-whitespace characters. If this |
370 | | * is the last token on the line, return 2 instead of 1. */ |
371 | 23.2k | while (!atEndOfLine(file) && atTokenDelimiter(file)) file->linepos++; |
372 | 12.1k | result->length = 0; |
373 | 115k | while (!atEndOfLine(file) && !atTokenDelimiter(file)) { |
374 | 103k | int maxlen = MAXSTRING; |
375 | 103k | if (result->length >= maxlen) { |
376 | 0 | compileError(file, "more than %d characters (bytes)", maxlen); |
377 | 0 | return 0; |
378 | 0 | } else |
379 | 103k | result->chars[result->length++] = file->line[file->linepos++]; |
380 | 103k | } |
381 | 12.1k | if (!result->length) { |
382 | | /* Not enough tokens */ |
383 | 2.05k | if (description) compileError(file, "%s not specified.", description); |
384 | 2.05k | return 0; |
385 | 2.05k | } |
386 | 10.1k | result->chars[result->length] = 0; |
387 | 41.0k | while (!atEndOfLine(file) && atTokenDelimiter(file)) file->linepos++; |
388 | 10.1k | return 1; |
389 | 12.1k | } |
390 | | |
391 | | static void |
392 | 4.53k | compileError(const FileInfo *file, const char *format, ...) { |
393 | 4.53k | #ifndef __SYMBIAN32__ |
394 | 4.53k | char buffer[MAXSTRING]; |
395 | 4.53k | va_list arguments; |
396 | 4.53k | va_start(arguments, format); |
397 | 4.53k | vsnprintf(buffer, sizeof(buffer), format, arguments); |
398 | 4.53k | va_end(arguments); |
399 | 4.53k | if (file) |
400 | 773 | _lou_logMessage(LOU_LOG_ERROR, "%s:%d: error: %s", file->fileName, |
401 | 773 | file->lineNumber, buffer); |
402 | 3.75k | else |
403 | 3.75k | _lou_logMessage(LOU_LOG_ERROR, "error: %s", buffer); |
404 | 4.53k | errorCount++; |
405 | 4.53k | #endif |
406 | 4.53k | } |
407 | | |
408 | | static void |
409 | 311k | compileWarning(const FileInfo *file, const char *format, ...) { |
410 | 311k | #ifndef __SYMBIAN32__ |
411 | 311k | char buffer[MAXSTRING]; |
412 | 311k | va_list arguments; |
413 | 311k | va_start(arguments, format); |
414 | 311k | vsnprintf(buffer, sizeof(buffer), format, arguments); |
415 | 311k | va_end(arguments); |
416 | 311k | if (file) |
417 | 79.6k | _lou_logMessage(LOU_LOG_WARN, "%s:%d: warning: %s", file->fileName, |
418 | 79.6k | file->lineNumber, buffer); |
419 | 231k | else |
420 | 231k | _lou_logMessage(LOU_LOG_WARN, "warning: %s", buffer); |
421 | 311k | warningCount++; |
422 | 311k | #endif |
423 | 311k | } |
424 | | |
425 | | static int |
426 | | allocateSpaceInTranslationTable(const FileInfo *file, TranslationTableOffset *offset, |
427 | 18.1k | int size, TranslationTableHeader **table) { |
428 | | /* allocate memory for table and expand previously allocated memory if necessary */ |
429 | 18.1k | int spaceNeeded = ((size + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE; |
430 | 18.1k | TranslationTableOffset newTableSize = (*table)->bytesUsed + spaceNeeded; |
431 | 18.1k | TranslationTableOffset tableSize = (*table)->tableSize; |
432 | 18.1k | if (newTableSize > tableSize) { |
433 | 25 | TranslationTableHeader *newTable; |
434 | 25 | newTableSize += (newTableSize / OFFSETSIZE); |
435 | 25 | newTable = realloc(*table, newTableSize); |
436 | 25 | if (!newTable) { |
437 | 0 | compileError(file, "Not enough memory for translation table."); |
438 | 0 | _lou_outOfMemory(); |
439 | 0 | } |
440 | 25 | memset(((unsigned char *)newTable) + tableSize, 0, newTableSize - tableSize); |
441 | | /* update references to the old table */ |
442 | 25 | { |
443 | 25 | TranslationTableChainEntry *entry; |
444 | 25 | for (entry = translationTableChain; entry != NULL; entry = entry->next) |
445 | 0 | if (entry->table == *table) |
446 | 0 | entry->table = (TranslationTableHeader *)newTable; |
447 | 25 | } |
448 | 25 | newTable->tableSize = newTableSize; |
449 | 25 | *table = newTable; |
450 | 25 | } |
451 | 18.1k | if (offset != NULL) { |
452 | 18.1k | *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE; |
453 | 18.1k | (*table)->bytesUsed += spaceNeeded; |
454 | 18.1k | } |
455 | 18.1k | return 1; |
456 | 18.1k | } |
457 | | |
458 | | static int |
459 | | allocateSpaceInDisplayTable(const FileInfo *file, TranslationTableOffset *offset, |
460 | 1.64k | int size, DisplayTableHeader **table) { |
461 | | /* allocate memory for table and expand previously allocated memory if necessary */ |
462 | 1.64k | int spaceNeeded = ((size + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE; |
463 | 1.64k | TranslationTableOffset newTableSize = (*table)->bytesUsed + spaceNeeded; |
464 | 1.64k | TranslationTableOffset tableSize = (*table)->tableSize; |
465 | 1.64k | if (newTableSize > tableSize) { |
466 | 0 | DisplayTableHeader *newTable; |
467 | 0 | newTableSize += (newTableSize / OFFSETSIZE); |
468 | 0 | newTable = realloc(*table, newTableSize); |
469 | 0 | if (!newTable) { |
470 | 0 | compileError(file, "Not enough memory for display table."); |
471 | 0 | _lou_outOfMemory(); |
472 | 0 | } |
473 | 0 | memset(((unsigned char *)newTable) + tableSize, 0, newTableSize - tableSize); |
474 | | /* update references to the old table */ |
475 | 0 | { |
476 | 0 | DisplayTableChainEntry *entry; |
477 | 0 | for (entry = displayTableChain; entry != NULL; entry = entry->next) |
478 | 0 | if (entry->table == *table) entry->table = (DisplayTableHeader *)newTable; |
479 | 0 | } |
480 | 0 | newTable->tableSize = newTableSize; |
481 | 0 | *table = newTable; |
482 | 0 | } |
483 | 1.64k | if (offset != NULL) { |
484 | 1.64k | *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE; |
485 | 1.64k | (*table)->bytesUsed += spaceNeeded; |
486 | 1.64k | } |
487 | 1.64k | return 1; |
488 | 1.64k | } |
489 | | |
490 | | static int |
491 | 697 | allocateTranslationTable(const FileInfo *file, TranslationTableHeader **table) { |
492 | | /* Allocate memory for the table and a guess on the number of rules */ |
493 | 697 | const TranslationTableOffset startSize = 2 * sizeof(**table); |
494 | 697 | if (*table) return 1; |
495 | 697 | TranslationTableOffset bytesUsed = |
496 | 697 | sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */ |
497 | 697 | if (!(*table = malloc(startSize))) { |
498 | 0 | compileError(file, "Not enough memory"); |
499 | 0 | if (*table != NULL) free(*table); |
500 | 0 | *table = NULL; |
501 | 0 | _lou_outOfMemory(); |
502 | 0 | } |
503 | 697 | memset(*table, 0, startSize); |
504 | 697 | (*table)->tableSize = startSize; |
505 | 697 | (*table)->bytesUsed = bytesUsed; |
506 | 697 | return 1; |
507 | 697 | } |
508 | | |
509 | | static int |
510 | 697 | allocateDisplayTable(const FileInfo *file, DisplayTableHeader **table) { |
511 | | /* Allocate memory for the table and a guess on the number of rules */ |
512 | 697 | const TranslationTableOffset startSize = 2 * sizeof(**table); |
513 | 697 | if (*table) return 1; |
514 | 697 | TranslationTableOffset bytesUsed = |
515 | 697 | sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */ |
516 | 697 | if (!(*table = malloc(startSize))) { |
517 | 0 | compileError(file, "Not enough memory"); |
518 | 0 | if (*table != NULL) free(*table); |
519 | 0 | *table = NULL; |
520 | 0 | _lou_outOfMemory(); |
521 | 0 | } |
522 | 697 | memset(*table, 0, startSize); |
523 | 697 | (*table)->tableSize = startSize; |
524 | 697 | (*table)->bytesUsed = bytesUsed; |
525 | 697 | return 1; |
526 | 697 | } |
527 | | |
528 | | /* Look up a character or dot pattern. Although the algorithms are almost identical, |
529 | | * different tables are needed for characters and dots because of the possibility of |
530 | | * conflicts. */ |
531 | | |
532 | | static TranslationTableCharacter * |
533 | | getChar(widechar c, TranslationTableHeader *table, |
534 | 34.2k | TranslationTableOffset *characterOffset) { |
535 | 34.2k | const TranslationTableOffset bucket = table->characters[_lou_charHash(c)]; |
536 | 34.2k | TranslationTableOffset offset = bucket; |
537 | 34.4k | while (offset) { |
538 | 27.5k | TranslationTableCharacter *character = |
539 | 27.5k | (TranslationTableCharacter *)&table->ruleArea[offset]; |
540 | 27.5k | if (character->value == c) { |
541 | 27.3k | if (characterOffset) *characterOffset = offset; |
542 | 27.3k | return character; |
543 | 27.3k | } |
544 | 208 | offset = character->next; |
545 | 208 | } |
546 | 6.85k | return NULL; |
547 | 34.2k | } |
548 | | |
549 | | static TranslationTableCharacter * |
550 | 3.33k | getDots(widechar d, TranslationTableHeader *table) { |
551 | 3.33k | const TranslationTableOffset bucket = table->dots[_lou_charHash(d)]; |
552 | 3.33k | TranslationTableOffset offset = bucket; |
553 | 3.34k | while (offset) { |
554 | 1.24k | TranslationTableCharacter *character = |
555 | 1.24k | (TranslationTableCharacter *)&table->ruleArea[offset]; |
556 | 1.24k | if (character->value == d) return character; |
557 | 9 | offset = character->next; |
558 | 9 | } |
559 | 2.10k | return NULL; |
560 | 3.33k | } |
561 | | |
562 | | static TranslationTableCharacter * |
563 | | putChar(const FileInfo *file, widechar c, TranslationTableHeader **table, |
564 | 33.6k | TranslationTableOffset *characterOffset, int ruleIndex) { |
565 | | /* See if a character is in the appropriate table. If not, insert it. In either case, |
566 | | * return a pointer to it. */ |
567 | 33.6k | TranslationTableCharacter *character; |
568 | 33.6k | TranslationTableOffset offset; |
569 | 33.6k | if ((character = getChar(c, *table, characterOffset))) return character; |
570 | 6.85k | if (!allocateSpaceInTranslationTable(file, &offset, sizeof(*character), table)) |
571 | 0 | return NULL; |
572 | 6.85k | character = (TranslationTableCharacter *)&(*table)->ruleArea[offset]; |
573 | 6.85k | memset(character, 0, sizeof(*character)); |
574 | 6.85k | character->sourceFile = file->sourceFile; |
575 | 6.85k | character->sourceLine = file->lineNumber; |
576 | 6.85k | character->ruleIndex = ruleIndex; |
577 | 6.85k | character->value = c; |
578 | 6.85k | const unsigned long int charHash = _lou_charHash(c); |
579 | 6.85k | const TranslationTableOffset bucket = (*table)->characters[charHash]; |
580 | 6.85k | if (!bucket) |
581 | 6.80k | (*table)->characters[charHash] = offset; |
582 | 49 | else { |
583 | 49 | TranslationTableCharacter *oldchar = |
584 | 49 | (TranslationTableCharacter *)&(*table)->ruleArea[bucket]; |
585 | 52 | while (oldchar->next) |
586 | 3 | oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next]; |
587 | 49 | oldchar->next = offset; |
588 | 49 | } |
589 | 6.85k | if (characterOffset) *characterOffset = offset; |
590 | 6.85k | return character; |
591 | 6.85k | } |
592 | | |
593 | | static TranslationTableCharacter * |
594 | 2.43k | putDots(const FileInfo *file, widechar d, TranslationTableHeader **table, int ruleIndex) { |
595 | | /* See if a dot pattern is in the appropriate table. If not, insert it. In either |
596 | | * case, return a pointer to it. */ |
597 | 2.43k | TranslationTableCharacter *character; |
598 | 2.43k | TranslationTableOffset offset; |
599 | 2.43k | if ((character = getDots(d, *table))) return character; |
600 | 1.31k | if (!allocateSpaceInTranslationTable(file, &offset, sizeof(*character), table)) |
601 | 0 | return NULL; |
602 | 1.31k | character = (TranslationTableCharacter *)&(*table)->ruleArea[offset]; |
603 | 1.31k | memset(character, 0, sizeof(*character)); |
604 | 1.31k | character->sourceFile = file->sourceFile; |
605 | 1.31k | character->sourceLine = file->lineNumber; |
606 | 1.31k | character->ruleIndex = ruleIndex; |
607 | 1.31k | character->value = d; |
608 | 1.31k | const unsigned long int charHash = _lou_charHash(d); |
609 | 1.31k | const TranslationTableOffset bucket = (*table)->dots[charHash]; |
610 | 1.31k | if (!bucket) |
611 | 1.31k | (*table)->dots[charHash] = offset; |
612 | 3 | else { |
613 | 3 | TranslationTableCharacter *oldchar = |
614 | 3 | (TranslationTableCharacter *)&(*table)->ruleArea[bucket]; |
615 | 3 | while (oldchar->next) |
616 | 0 | oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next]; |
617 | 3 | oldchar->next = offset; |
618 | 3 | } |
619 | 1.31k | return character; |
620 | 1.31k | } |
621 | | |
622 | | /* Look up a character-dots mapping in a display table. */ |
623 | | |
624 | | static CharDotsMapping * |
625 | 136k | getDotsForChar(widechar c, const DisplayTableHeader *table) { |
626 | 136k | if (table == NULL) return NULL; |
627 | 136k | CharDotsMapping *cdPtr; |
628 | 136k | const TranslationTableOffset bucket = table->charToDots[_lou_charHash(c)]; |
629 | 136k | TranslationTableOffset offset = bucket; |
630 | 137k | while (offset) { |
631 | 56.9k | cdPtr = (CharDotsMapping *)&table->ruleArea[offset]; |
632 | 56.9k | if (cdPtr->lookFor == c) return cdPtr; |
633 | 179 | offset = cdPtr->next; |
634 | 179 | } |
635 | 80.1k | return NULL; |
636 | 136k | } |
637 | | |
638 | | static CharDotsMapping * |
639 | 29.2k | getCharForDots(widechar d, const DisplayTableHeader *table) { |
640 | 29.2k | if (table == NULL) return NULL; |
641 | 29.2k | CharDotsMapping *cdPtr; |
642 | 29.2k | const TranslationTableOffset bucket = table->dotsToChar[_lou_charHash(d)]; |
643 | 29.2k | TranslationTableOffset offset = bucket; |
644 | 29.2k | while (offset) { |
645 | 28.0k | cdPtr = (CharDotsMapping *)&table->ruleArea[offset]; |
646 | 28.0k | if (cdPtr->lookFor == d) return cdPtr; |
647 | 1 | offset = cdPtr->next; |
648 | 1 | } |
649 | 1.12k | return NULL; |
650 | 29.2k | } |
651 | | |
652 | | widechar EXPORT_CALL |
653 | 136k | _lou_getDotsForChar(widechar c, const DisplayTableHeader *table) { |
654 | 136k | CharDotsMapping *cdPtr = getDotsForChar(c, table); |
655 | 136k | if (cdPtr) return cdPtr->found; |
656 | 79.3k | return LOU_DOTS; |
657 | 136k | } |
658 | | |
659 | | widechar EXPORT_CALL |
660 | 28.3k | _lou_getCharForDots(widechar d, const DisplayTableHeader *table) { |
661 | 28.3k | CharDotsMapping *cdPtr = getCharForDots(d, table); |
662 | 28.3k | if (cdPtr) return cdPtr->found; |
663 | 299 | return '\0'; |
664 | 28.3k | } |
665 | | |
666 | | static int |
667 | | putCharDotsMapping( |
668 | 837 | const FileInfo *file, widechar c, widechar d, DisplayTableHeader **table) { |
669 | 837 | if (!getDotsForChar(c, *table)) { |
670 | 823 | CharDotsMapping *cdPtr; |
671 | 823 | TranslationTableOffset offset; |
672 | 823 | if (!allocateSpaceInDisplayTable(file, &offset, sizeof(*cdPtr), table)) return 0; |
673 | 823 | cdPtr = (CharDotsMapping *)&(*table)->ruleArea[offset]; |
674 | 823 | cdPtr->next = 0; |
675 | 823 | cdPtr->lookFor = c; |
676 | 823 | cdPtr->found = d; |
677 | 823 | const unsigned long int charHash = _lou_charHash(c); |
678 | 823 | const TranslationTableOffset bucket = (*table)->charToDots[charHash]; |
679 | 823 | if (!bucket) |
680 | 812 | (*table)->charToDots[charHash] = offset; |
681 | 11 | else { |
682 | 11 | CharDotsMapping *oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[bucket]; |
683 | 11 | while (oldcdPtr->next) |
684 | 0 | oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[oldcdPtr->next]; |
685 | 11 | oldcdPtr->next = offset; |
686 | 11 | } |
687 | 823 | } |
688 | 837 | if (!getCharForDots(d, *table)) { |
689 | 825 | CharDotsMapping *cdPtr; |
690 | 825 | TranslationTableOffset offset; |
691 | 825 | if (!allocateSpaceInDisplayTable(file, &offset, sizeof(*cdPtr), table)) return 0; |
692 | 825 | cdPtr = (CharDotsMapping *)&(*table)->ruleArea[offset]; |
693 | 825 | cdPtr->next = 0; |
694 | 825 | cdPtr->lookFor = d; |
695 | 825 | cdPtr->found = c; |
696 | 825 | const unsigned long int charHash = _lou_charHash(d); |
697 | 825 | const TranslationTableOffset bucket = (*table)->dotsToChar[charHash]; |
698 | 825 | if (!bucket) |
699 | 824 | (*table)->dotsToChar[charHash] = offset; |
700 | 1 | else { |
701 | 1 | CharDotsMapping *oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[bucket]; |
702 | 1 | while (oldcdPtr->next) |
703 | 0 | oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[oldcdPtr->next]; |
704 | 1 | oldcdPtr->next = offset; |
705 | 1 | } |
706 | 825 | } |
707 | 837 | return 1; |
708 | 837 | } |
709 | | |
710 | | static inline const char * |
711 | 1 | getPartName(int actionPart) { |
712 | 1 | return actionPart ? "action" : "test"; |
713 | 1 | } |
714 | | |
715 | | static int |
716 | | passFindCharacters(const FileInfo *file, widechar *instructions, int end, |
717 | 731 | widechar **characters, int *length) { |
718 | 731 | int IC = 0; |
719 | 731 | int lookback = 0; |
720 | | |
721 | 731 | *characters = NULL; |
722 | 731 | *length = 0; |
723 | | |
724 | 1.75k | while (IC < end) { |
725 | 1.75k | widechar instruction = instructions[IC]; |
726 | | |
727 | 1.75k | switch (instruction) { |
728 | 42 | case pass_string: |
729 | 68 | case pass_dots: { |
730 | 68 | int count = instructions[IC + 1]; |
731 | 68 | IC += 2; |
732 | 68 | if (count > lookback) { |
733 | 68 | *characters = &instructions[IC + lookback]; |
734 | 68 | *length = count - lookback; |
735 | 68 | return 1; |
736 | 68 | } else { |
737 | 0 | lookback -= count; |
738 | 0 | } |
739 | 0 | IC += count; |
740 | 0 | continue; |
741 | 68 | } |
742 | | |
743 | 220 | case pass_attributes: |
744 | 220 | IC += 7; |
745 | 220 | if (instructions[IC - 2] == instructions[IC - 1] && |
746 | 97 | instructions[IC - 1] <= lookback) { |
747 | 3 | lookback -= instructions[IC - 1]; |
748 | 3 | continue; |
749 | 3 | } |
750 | 217 | goto NO_CHARACTERS; |
751 | | |
752 | 217 | case pass_swap: |
753 | 15 | IC += 2; |
754 | | /* fall through */ |
755 | | |
756 | 25 | case pass_groupstart: |
757 | 41 | case pass_groupend: |
758 | 41 | case pass_groupreplace: |
759 | 41 | IC += 3; |
760 | | |
761 | 663 | NO_CHARACTERS : { return 1; } |
762 | | |
763 | 13 | case pass_eq: |
764 | 15 | case pass_lt: |
765 | 24 | case pass_gt: |
766 | 41 | case pass_lteq: |
767 | 56 | case pass_gteq: |
768 | 56 | IC += 3; |
769 | 56 | continue; |
770 | | |
771 | 391 | case pass_lookback: |
772 | 391 | lookback += instructions[IC + 1]; |
773 | 391 | IC += 2; |
774 | 391 | continue; |
775 | | |
776 | 424 | case pass_not: |
777 | 476 | case pass_startReplace: |
778 | 515 | case pass_endReplace: |
779 | 544 | case pass_first: |
780 | 577 | case pass_last: |
781 | 577 | case pass_copy: |
782 | 577 | case pass_omit: |
783 | 577 | case pass_plus: |
784 | 577 | case pass_hyphen: |
785 | 577 | IC += 1; |
786 | 577 | continue; |
787 | | |
788 | 405 | case pass_endTest: |
789 | 405 | goto NO_CHARACTERS; |
790 | | |
791 | 0 | default: |
792 | 0 | compileError(file, "unhandled test suboperand: \\x%02x", instruction); |
793 | 0 | return 0; |
794 | 1.75k | } |
795 | 1.75k | } |
796 | 0 | goto NO_CHARACTERS; |
797 | 731 | } |
798 | | |
799 | | static const char * |
800 | 66 | printSource(const char *currentFile, const char *sourceFile, int sourceLine) { |
801 | 66 | static char scratchBuf[MAXSTRING]; |
802 | 66 | if (sourceFile) { |
803 | 66 | if (currentFile && strcmp(currentFile, sourceFile) == 0) |
804 | 66 | snprintf(scratchBuf, MAXSTRING, "line %d", sourceLine); |
805 | 0 | else |
806 | 0 | snprintf(scratchBuf, MAXSTRING, "%s:%d", sourceFile, sourceLine); |
807 | 66 | } else |
808 | 0 | snprintf(scratchBuf, MAXSTRING, "source unknown"); |
809 | 66 | return scratchBuf; |
810 | 66 | } |
811 | | |
812 | | /* The following functions are called by addRule to handle various cases. */ |
813 | | |
814 | | static void |
815 | | addForwardRuleWithSingleChar(const FileInfo *file, TranslationTableOffset ruleOffset, |
816 | 1.14k | TranslationTableRule *rule, TranslationTableHeader **table) { |
817 | | /* direction = 0, rule->charslen = 1 */ |
818 | 1.14k | TranslationTableCharacter *character; |
819 | | // get the character from the table, or if the character is not defined yet, define it |
820 | | // (without adding attributes) |
821 | 1.14k | if (rule->opcode >= CTO_Pass2 && rule->opcode <= CTO_Pass4) { |
822 | 0 | character = putDots(file, rule->charsdots[0], table, rule->index); |
823 | | // putDots may have moved table, so make sure rule is still valid |
824 | 0 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
825 | 1.14k | } else if (rule->opcode == CTO_CompDots || rule->opcode == CTO_Comp6) { |
826 | 32 | character = putChar(file, rule->charsdots[0], table, NULL, rule->index); |
827 | | // putChar may have moved table, so make sure rule is still valid |
828 | 32 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
829 | 32 | character->compRule = ruleOffset; |
830 | 32 | return; |
831 | 1.11k | } else { |
832 | 1.11k | character = putChar(file, rule->charsdots[0], table, NULL, rule->index); |
833 | | // putChar may have moved table, so make sure rule is still valid |
834 | 1.11k | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
835 | | // if the new rule is a character definition rule, set the main definition rule of |
836 | | // this character to it, but don't override existing character definitions rules |
837 | | // or base rules |
838 | | // adding the attributes to the character has already been done elsewhere |
839 | 1.11k | if (rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow) { |
840 | 844 | if (character->definitionRule) { |
841 | 30 | TranslationTableRule *prevRule = |
842 | 30 | (TranslationTableRule *)&(*table) |
843 | 30 | ->ruleArea[character->definitionRule]; |
844 | 30 | char *prevOpcodeName = strdup(_lou_findOpcodeName(prevRule->opcode)); |
845 | 30 | char *newOpcodeName = strdup(_lou_findOpcodeName(rule->opcode)); |
846 | 30 | _lou_logMessage(LOU_LOG_DEBUG, |
847 | 30 | "%s:%d: Character already defined (%s). The existing %s rule " |
848 | 30 | "will take precedence over the new %s rule.", |
849 | 30 | file->fileName, file->lineNumber, |
850 | 30 | printSource(file->sourceFile, prevRule->sourceFile, |
851 | 30 | prevRule->sourceLine), |
852 | 30 | prevOpcodeName, newOpcodeName); |
853 | 30 | free(prevOpcodeName); |
854 | 30 | free(newOpcodeName); |
855 | 814 | } else { |
856 | 814 | character->definitionRule = ruleOffset; |
857 | 814 | } |
858 | 844 | } |
859 | 1.11k | } |
860 | | // add the new rule to the list of rules associated with this character |
861 | | // if the new rule is a character definition rule, it is inserted at the end of the |
862 | | // list, otherwise it is inserted before the first character definition rule |
863 | | // in other words, rules are considered in the order in which they are defined in the |
864 | | // table |
865 | 1.11k | TranslationTableOffset *otherRule = &character->otherRules; |
866 | 1.65k | while (*otherRule) { |
867 | 564 | TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[*otherRule]; |
868 | 564 | if (r->charslen == 0) break; |
869 | 564 | if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow) |
870 | 109 | if (!(rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow)) break; |
871 | 536 | otherRule = &r->charsnext; |
872 | 536 | } |
873 | 1.11k | rule->charsnext = *otherRule; |
874 | 1.11k | *otherRule = ruleOffset; |
875 | 1.11k | } |
876 | | |
877 | | static void |
878 | | addForwardRuleWithMultipleChars(TranslationTableOffset ruleOffset, |
879 | 618 | TranslationTableRule *rule, TranslationTableHeader *table) { |
880 | | /* direction = 0 rule->charslen > 1 */ |
881 | 618 | TranslationTableOffset *forRule = |
882 | 618 | &table->forRules[_lou_stringHash(&rule->charsdots[0], 0, NULL)]; |
883 | 3.12k | while (*forRule) { |
884 | 2.52k | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*forRule]; |
885 | 2.52k | if (rule->charslen > r->charslen) break; |
886 | 2.50k | if (rule->charslen == r->charslen) |
887 | 2.45k | if ((r->opcode == CTO_Always) && (rule->opcode != CTO_Always)) break; |
888 | 2.50k | forRule = &r->charsnext; |
889 | 2.50k | } |
890 | 618 | rule->charsnext = *forRule; |
891 | 618 | *forRule = ruleOffset; |
892 | 618 | } |
893 | | |
894 | | static void |
895 | | addBackwardRuleWithSingleCell(const FileInfo *file, widechar cell, |
896 | | TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
897 | 1.61k | TranslationTableHeader **table) { |
898 | | /* direction = 1, rule->dotslen = 1 */ |
899 | 1.61k | TranslationTableCharacter *dots; |
900 | 1.61k | if (rule->opcode == CTO_SwapCc || rule->opcode == CTO_Repeated) |
901 | 14 | return; /* too ambiguous */ |
902 | | // get the cell from the table, or if the cell is not defined yet, define it (without |
903 | | // adding attributes) |
904 | 1.59k | dots = putDots(file, cell, table, rule->index); |
905 | | // putDots may have moved table, so make sure rule is still valid |
906 | 1.59k | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
907 | 1.59k | if (rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow) |
908 | 776 | dots->definitionRule = ruleOffset; |
909 | 1.59k | TranslationTableOffset *otherRule = &dots->otherRules; |
910 | 3.18k | while (*otherRule) { |
911 | 1.68k | TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[*otherRule]; |
912 | 1.68k | if (rule->charslen > r->charslen || r->dotslen == 0) break; |
913 | 1.62k | if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow) |
914 | 33 | if (!(rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow)) break; |
915 | 1.59k | otherRule = &r->dotsnext; |
916 | 1.59k | } |
917 | 1.59k | rule->dotsnext = *otherRule; |
918 | 1.59k | *otherRule = ruleOffset; |
919 | 1.59k | } |
920 | | |
921 | | static void |
922 | | addBackwardRuleWithMultipleCells(widechar *cells, int dotslen, |
923 | | TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
924 | 333 | TranslationTableHeader *table) { |
925 | | /* direction = 1, dotslen > 1 */ |
926 | 333 | TranslationTableOffset *backRule = &table->backRules[_lou_stringHash(cells, 0, NULL)]; |
927 | 333 | if (rule->opcode == CTO_SwapCc) return; |
928 | 333 | int ruleLength = dotslen + rule->charslen; |
929 | 650 | while (*backRule) { |
930 | 328 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*backRule]; |
931 | 328 | int rLength = r->dotslen + r->charslen; |
932 | 328 | if (ruleLength > rLength) break; |
933 | 317 | if (rLength == ruleLength) |
934 | 296 | if ((r->opcode == CTO_Always) && (rule->opcode != CTO_Always)) break; |
935 | 317 | backRule = &r->dotsnext; |
936 | 317 | } |
937 | 333 | rule->dotsnext = *backRule; |
938 | 333 | *backRule = ruleOffset; |
939 | 333 | } |
940 | | |
941 | | static int |
942 | | addForwardPassRule(TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
943 | 372 | TranslationTableHeader *table) { |
944 | 372 | TranslationTableOffset *forPassRule; |
945 | 372 | switch (rule->opcode) { |
946 | 250 | case CTO_Correct: |
947 | 250 | forPassRule = &table->forPassRules[0]; |
948 | 250 | break; |
949 | 60 | case CTO_Context: |
950 | 60 | forPassRule = &table->forPassRules[1]; |
951 | 60 | break; |
952 | 31 | case CTO_Pass2: |
953 | 31 | forPassRule = &table->forPassRules[2]; |
954 | 31 | break; |
955 | 16 | case CTO_Pass3: |
956 | 16 | forPassRule = &table->forPassRules[3]; |
957 | 16 | break; |
958 | 15 | case CTO_Pass4: |
959 | 15 | forPassRule = &table->forPassRules[4]; |
960 | 15 | break; |
961 | 0 | default: |
962 | 0 | return 0; |
963 | 372 | } |
964 | 415 | while (*forPassRule) { |
965 | 45 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*forPassRule]; |
966 | 45 | if (rule->charslen > r->charslen) break; |
967 | 43 | forPassRule = &r->charsnext; |
968 | 43 | } |
969 | 372 | rule->charsnext = *forPassRule; |
970 | 372 | *forPassRule = ruleOffset; |
971 | 372 | return 1; |
972 | 372 | } |
973 | | |
974 | | static int |
975 | | addBackwardPassRule(TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
976 | 335 | TranslationTableHeader *table) { |
977 | 335 | TranslationTableOffset *backPassRule; |
978 | 335 | switch (rule->opcode) { |
979 | 112 | case CTO_Correct: |
980 | 112 | backPassRule = &table->backPassRules[0]; |
981 | 112 | break; |
982 | 35 | case CTO_Context: |
983 | 35 | backPassRule = &table->backPassRules[1]; |
984 | 35 | break; |
985 | 60 | case CTO_Pass2: |
986 | 60 | backPassRule = &table->backPassRules[2]; |
987 | 60 | break; |
988 | 66 | case CTO_Pass3: |
989 | 66 | backPassRule = &table->backPassRules[3]; |
990 | 66 | break; |
991 | 62 | case CTO_Pass4: |
992 | 62 | backPassRule = &table->backPassRules[4]; |
993 | 62 | break; |
994 | 0 | default: |
995 | 0 | return 0; |
996 | 335 | } |
997 | 386 | while (*backPassRule) { |
998 | 61 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*backPassRule]; |
999 | 61 | if (rule->charslen > r->charslen) break; |
1000 | 51 | backPassRule = &r->dotsnext; |
1001 | 51 | } |
1002 | 335 | rule->dotsnext = *backPassRule; |
1003 | 335 | *backPassRule = ruleOffset; |
1004 | 335 | return 1; |
1005 | 335 | } |
1006 | | |
1007 | | static int |
1008 | | addRule(const FileInfo *file, TranslationTableOpcode opcode, CharsString *ruleChars, |
1009 | | CharsString *ruleDots, TranslationTableCharacterAttributes after, |
1010 | | TranslationTableCharacterAttributes before, TranslationTableOffset *ruleOffset, |
1011 | | TranslationTableRule **rule, int noback, int nofor, |
1012 | 2.93k | TranslationTableHeader **table) { |
1013 | | /* Add a rule to the table, using the hash function to find the start of |
1014 | | * chains and chaining both the chars and dots strings */ |
1015 | 2.93k | TranslationTableOffset offset; |
1016 | 2.93k | int ruleSize = sizeof(TranslationTableRule) - (DEFAULTRULESIZE * CHARSIZE); |
1017 | 2.93k | if (ruleChars) ruleSize += CHARSIZE * ruleChars->length; |
1018 | 2.93k | if (ruleDots) ruleSize += CHARSIZE * ruleDots->length; |
1019 | 2.93k | if (!allocateSpaceInTranslationTable(file, &offset, ruleSize, table)) return 0; |
1020 | 2.93k | TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
1021 | 2.93k | if (rule) *rule = r; |
1022 | 2.93k | if (ruleOffset) *ruleOffset = offset; |
1023 | 2.93k | r->sourceFile = file->sourceFile; |
1024 | 2.93k | r->sourceLine = file->lineNumber; |
1025 | 2.93k | r->index = (*table)->ruleCounter++; |
1026 | 2.93k | r->opcode = opcode; |
1027 | 2.93k | r->after = after; |
1028 | 2.93k | r->before = before; |
1029 | 2.93k | r->nocross = 0; |
1030 | 2.93k | if (ruleChars) |
1031 | 2.57k | memcpy(&r->charsdots[0], &ruleChars->chars[0], |
1032 | 2.57k | CHARSIZE * (r->charslen = ruleChars->length)); |
1033 | 363 | else |
1034 | 363 | r->charslen = 0; |
1035 | 2.93k | if (ruleDots) |
1036 | 2.91k | memcpy(&r->charsdots[r->charslen], &ruleDots->chars[0], |
1037 | 2.91k | CHARSIZE * (r->dotslen = ruleDots->length)); |
1038 | 20 | else |
1039 | 20 | r->dotslen = 0; |
1040 | | |
1041 | | /* link new rule into table. */ |
1042 | 2.93k | if (opcode == CTO_SwapCc || opcode == CTO_SwapCd || opcode == CTO_SwapDd) return 1; |
1043 | 2.89k | if (opcode >= CTO_Context && opcode <= CTO_Pass4) |
1044 | 731 | if (!(opcode == CTO_Context && r->charslen > 0)) { |
1045 | 707 | if (!nofor) |
1046 | 372 | if (!addForwardPassRule(offset, r, *table)) return 0; |
1047 | 707 | if (!noback) |
1048 | 335 | if (!addBackwardPassRule(offset, r, *table)) return 0; |
1049 | 707 | return 1; |
1050 | 707 | } |
1051 | 2.19k | if (!nofor) { |
1052 | 2.12k | if (r->charslen == 1) { |
1053 | 1.14k | addForwardRuleWithSingleChar(file, offset, r, table); |
1054 | | // addForwardRuleWithSingleChar may have moved table, so make sure rule is |
1055 | | // still valid |
1056 | 1.14k | r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
1057 | 1.14k | if (rule) *rule = r; |
1058 | 1.14k | } else if (r->charslen > 1) |
1059 | 618 | addForwardRuleWithMultipleChars(offset, r, *table); |
1060 | 2.12k | } |
1061 | 2.19k | if (!noback) { |
1062 | 2.00k | widechar *cells; |
1063 | 2.00k | int dotslen; |
1064 | | |
1065 | 2.00k | if (r->opcode == CTO_Context) { |
1066 | 1 | cells = &r->charsdots[0]; |
1067 | 1 | dotslen = r->charslen; |
1068 | 2.00k | } else { |
1069 | 2.00k | cells = &r->charsdots[r->charslen]; |
1070 | 2.00k | dotslen = r->dotslen; |
1071 | 2.00k | } |
1072 | 2.00k | if (dotslen == 1) { |
1073 | 1.61k | addBackwardRuleWithSingleCell(file, *cells, offset, r, table); |
1074 | | // addBackwardRuleWithSingleCell may have moved table, so make sure rule is |
1075 | | // still valid |
1076 | 1.61k | r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
1077 | 1.61k | if (rule) *rule = r; |
1078 | 1.61k | } else if (dotslen > 1) |
1079 | 333 | addBackwardRuleWithMultipleCells(cells, dotslen, offset, r, *table); |
1080 | 2.00k | } |
1081 | 2.19k | return 1; |
1082 | 2.89k | } |
1083 | | |
1084 | | static const CharacterClass * |
1085 | 419 | findCharacterClass(const CharsString *name, const TranslationTableHeader *table) { |
1086 | | /* Find a character class, whether predefined or user-defined */ |
1087 | 419 | const CharacterClass *class = table->characterClasses; |
1088 | 2.65k | while (class) { |
1089 | 2.48k | if ((name->length == class->length) && |
1090 | 654 | (memcmp(&name->chars[0], class->name, CHARSIZE * name->length) == 0)) |
1091 | 252 | return class; |
1092 | 2.23k | class = class->next; |
1093 | 2.23k | } |
1094 | 167 | return NULL; |
1095 | 419 | } |
1096 | | |
1097 | | static TranslationTableCharacterAttributes |
1098 | 1 | getNextNumberedAttribute(TranslationTableHeader *table) { |
1099 | | /* Get the next attribute value for numbered attributes, or 0 if there is no more |
1100 | | * space in the table. */ |
1101 | 1 | TranslationTableCharacterAttributes next = table->nextNumberedCharacterClassAttribute; |
1102 | 1 | if (next > CTC_UserDefined8) return 0; |
1103 | 1 | table->nextNumberedCharacterClassAttribute <<= 1; |
1104 | 1 | return next; |
1105 | 1 | } |
1106 | | |
1107 | | static TranslationTableCharacterAttributes |
1108 | 1.75k | getNextAttribute(TranslationTableHeader *table) { |
1109 | | /* Get the next attribute value, or 0 if there is no more space in the table. */ |
1110 | 1.75k | TranslationTableCharacterAttributes next = table->nextCharacterClassAttribute; |
1111 | 1.75k | if (next) { |
1112 | 1.75k | if (next == CTC_LitDigit) |
1113 | 181 | table->nextCharacterClassAttribute = CTC_UserDefined9; |
1114 | 1.57k | else |
1115 | 1.57k | table->nextCharacterClassAttribute <<= 1; |
1116 | 1.75k | return next; |
1117 | 1.75k | } else |
1118 | 0 | return getNextNumberedAttribute(table); |
1119 | 1.75k | } |
1120 | | |
1121 | | static CharacterClass * |
1122 | | addCharacterClass(const FileInfo *file, const widechar *name, int length, |
1123 | 1.75k | TranslationTableHeader *table, int validate) { |
1124 | | /* Define a character class, Whether predefined or user-defined */ |
1125 | 1.75k | if (validate) { |
1126 | 1.51k | for (int i = 0; i < length; i++) { |
1127 | 1.38k | if (!((name[i] >= 'a' && name[i] <= 'z') || |
1128 | 382 | (name[i] >= 'A' && name[i] <= 'Z'))) { |
1129 | 88 | compileError(file, |
1130 | 88 | "Invalid attribute name: must be a digit between " |
1131 | 88 | "0 and 7 or a word containing only letters"); |
1132 | 88 | } |
1133 | 1.38k | } |
1134 | | // check that name is not reserved |
1135 | 127 | int k = 0; |
1136 | 3.68k | while (reservedAttributeNames[k]) { |
1137 | 3.55k | if (strlen(reservedAttributeNames[k]) == length) { |
1138 | 121 | int i; |
1139 | 149 | for (i = 0; i < length; i++) |
1140 | 149 | if (reservedAttributeNames[k][i] != name[i]) break; |
1141 | 121 | if (i == length) { |
1142 | 0 | compileError(file, "Attribute name is reserved: %s", |
1143 | 0 | reservedAttributeNames[k]); |
1144 | 0 | return NULL; |
1145 | 0 | } |
1146 | 121 | } |
1147 | 3.55k | k++; |
1148 | 3.55k | } |
1149 | 127 | } |
1150 | 1.75k | CharacterClass **classes = &table->characterClasses; |
1151 | 1.75k | TranslationTableCharacterAttributes attribute = getNextAttribute(table); |
1152 | 1.75k | CharacterClass *class; |
1153 | 1.75k | if (attribute) { |
1154 | 1.75k | if (!(class = malloc(sizeof(*class) + CHARSIZE * (length - 1)))) |
1155 | 0 | _lou_outOfMemory(); |
1156 | 1.75k | else { |
1157 | 1.75k | memset(class, 0, sizeof(*class)); |
1158 | 1.75k | memcpy(class->name, name, CHARSIZE * (class->length = length)); |
1159 | 1.75k | class->attribute = attribute; |
1160 | 1.75k | class->next = *classes; |
1161 | 1.75k | *classes = class; |
1162 | 1.75k | return class; |
1163 | 1.75k | } |
1164 | 1.75k | } |
1165 | 0 | compileError(file, "character class table overflow."); |
1166 | 0 | return NULL; |
1167 | 1.75k | } |
1168 | | |
1169 | | static void |
1170 | 181 | deallocateCharacterClasses(TranslationTableHeader *table) { |
1171 | 181 | CharacterClass **classes = &table->characterClasses; |
1172 | 1.93k | while (*classes) { |
1173 | 1.75k | CharacterClass *class = *classes; |
1174 | 1.75k | *classes = (*classes)->next; |
1175 | 1.75k | if (class) free(class); |
1176 | 1.75k | } |
1177 | 181 | } |
1178 | | |
1179 | | static int |
1180 | 181 | allocateCharacterClasses(TranslationTableHeader *table) { |
1181 | | /* Allocate memory for predefined character classes */ |
1182 | 181 | int k = 0; |
1183 | 181 | table->characterClasses = NULL; |
1184 | 181 | table->nextCharacterClassAttribute = 1; // CTC_Space |
1185 | 181 | table->nextNumberedCharacterClassAttribute = CTC_UserDefined1; |
1186 | 1.81k | while (characterClassNames[k]) { |
1187 | 1.62k | widechar wname[MAXSTRING]; |
1188 | 1.62k | int length = (int)strlen(characterClassNames[k]); |
1189 | 1.62k | int kk; |
1190 | 12.6k | for (kk = 0; kk < length; kk++) wname[kk] = (widechar)characterClassNames[k][kk]; |
1191 | 1.62k | if (!addCharacterClass(NULL, wname, length, table, 0)) { |
1192 | 0 | deallocateCharacterClasses(table); |
1193 | 0 | return 0; |
1194 | 0 | } |
1195 | 1.62k | k++; |
1196 | 1.62k | } |
1197 | 181 | return 1; |
1198 | 181 | } |
1199 | | |
1200 | | static TranslationTableOpcode |
1201 | 4.54k | getOpcode(const FileInfo *file, const CharsString *token) { |
1202 | 4.54k | static TranslationTableOpcode lastOpcode = 0; |
1203 | 4.54k | TranslationTableOpcode opcode = lastOpcode; |
1204 | | |
1205 | 235k | do { |
1206 | 235k | if (token->length == opcodeLengths[opcode]) |
1207 | 26.8k | if (eqasc2uni((unsigned char *)opcodeNames[opcode], &token->chars[0], |
1208 | 26.8k | token->length)) { |
1209 | 4.51k | lastOpcode = opcode; |
1210 | 4.51k | return opcode; |
1211 | 4.51k | } |
1212 | 230k | opcode++; |
1213 | 230k | if (opcode >= CTO_None) opcode = 0; |
1214 | 230k | } while (opcode != lastOpcode); |
1215 | 25 | return CTO_None; |
1216 | 4.54k | } |
1217 | | |
1218 | | TranslationTableOpcode EXPORT_CALL |
1219 | 0 | _lou_findOpcodeNumber(const char *toFind) { |
1220 | | /* Used by tools such as lou_debug */ |
1221 | 0 | static TranslationTableOpcode lastOpcode = 0; |
1222 | 0 | TranslationTableOpcode opcode = lastOpcode; |
1223 | 0 | int length = (int)strlen(toFind); |
1224 | 0 | do { |
1225 | 0 | if (length == opcodeLengths[opcode] && |
1226 | 0 | strcasecmp(toFind, opcodeNames[opcode]) == 0) { |
1227 | 0 | lastOpcode = opcode; |
1228 | 0 | return opcode; |
1229 | 0 | } |
1230 | 0 | opcode++; |
1231 | 0 | if (opcode >= CTO_None) opcode = 0; |
1232 | 0 | } while (opcode != lastOpcode); |
1233 | 0 | return CTO_None; |
1234 | 0 | } |
1235 | | |
1236 | | const char *EXPORT_CALL |
1237 | 62 | _lou_findOpcodeName(TranslationTableOpcode opcode) { |
1238 | 62 | static char scratchBuf[MAXSTRING]; |
1239 | | /* Used by tools such as lou_debug */ |
1240 | 62 | if (opcode < 0 || opcode >= CTO_None) { |
1241 | 0 | sprintf(scratchBuf, "%u", opcode); |
1242 | 0 | return scratchBuf; |
1243 | 0 | } |
1244 | 62 | return opcodeNames[opcode]; |
1245 | 62 | } |
1246 | | |
1247 | | static widechar |
1248 | 4.98k | hexValue(const FileInfo *file, const widechar *digits, int length) { |
1249 | 4.98k | int k; |
1250 | 4.98k | unsigned int binaryValue = 0; |
1251 | 17.9k | for (k = 0; k < length; k++) { |
1252 | 16.7k | unsigned int hexDigit = 0; |
1253 | 16.7k | if (digits[k] >= '0' && digits[k] <= '9') |
1254 | 7.10k | hexDigit = digits[k] - '0'; |
1255 | 9.59k | else if (digits[k] >= 'a' && digits[k] <= 'f') |
1256 | 4.58k | hexDigit = digits[k] - 'a' + 10; |
1257 | 5.00k | else if (digits[k] >= 'A' && digits[k] <= 'F') |
1258 | 1.25k | hexDigit = digits[k] - 'A' + 10; |
1259 | 3.75k | else { |
1260 | 3.75k | compileError(file, "invalid %d-digit hexadecimal number", length); |
1261 | 3.75k | return (widechar)0xffffffff; |
1262 | 3.75k | } |
1263 | 12.9k | binaryValue |= hexDigit << (4 * (length - 1 - k)); |
1264 | 12.9k | } |
1265 | 1.22k | return (widechar)binaryValue; |
1266 | 4.98k | } |
1267 | | |
1268 | 80.9k | #define MAXBYTES 7 |
1269 | | static const unsigned int first0Bit[MAXBYTES] = { 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, |
1270 | | 0XFE }; |
1271 | | |
1272 | | static bool |
1273 | 8.63k | isMatchPatternEscape(unsigned int ch, bool inMatchPattern) { |
1274 | 8.63k | return inMatchPattern && (ch == '(' || ch == ')' || ch == ']'); |
1275 | 8.63k | } |
1276 | | |
1277 | | static int |
1278 | | parseCharsInternal(const FileInfo *file, CharsString *result, CharsString *token, |
1279 | 4.30k | bool inMatchPattern) { |
1280 | 4.30k | int in = 0; |
1281 | 4.30k | int out = 0; |
1282 | 4.30k | int lastOutSize = 0; |
1283 | 291k | while (in < token->length) { |
1284 | 287k | unsigned int ch = token->chars[in++] & 0xff; |
1285 | 287k | if (ch < 128) { |
1286 | 206k | if (ch == '\\' && |
1287 | 8.63k | !isMatchPatternEscape( |
1288 | 8.63k | token->chars[in], inMatchPattern)) { /* escape sequence */ |
1289 | 8.49k | switch (ch = token->chars[in]) { |
1290 | 102 | case '\\': |
1291 | 102 | break; |
1292 | 176 | case 'e': |
1293 | 176 | ch = 0x1b; |
1294 | 176 | break; |
1295 | 19 | case 'f': |
1296 | 19 | ch = 12; |
1297 | 19 | break; |
1298 | 149 | case 'n': |
1299 | 149 | ch = 10; |
1300 | 149 | break; |
1301 | 270 | case 'r': |
1302 | 270 | ch = 13; |
1303 | 270 | break; |
1304 | 43 | case 's': |
1305 | 43 | ch = ' '; |
1306 | 43 | break; |
1307 | 416 | case 't': |
1308 | 416 | ch = 9; |
1309 | 416 | break; |
1310 | 38 | case 'v': |
1311 | 38 | ch = 11; |
1312 | 38 | break; |
1313 | 1.99k | case 'w': |
1314 | 1.99k | ch = LOU_ENDSEGMENT; |
1315 | 1.99k | break; |
1316 | 0 | case 34: |
1317 | 0 | ch = QUOTESUB; |
1318 | 0 | break; |
1319 | 790 | case 'X': |
1320 | 790 | compileWarning(file, "\\Xhhhh (with a capital 'X') is deprecated."); |
1321 | 5.06k | case 'x': |
1322 | 5.06k | if (token->length - in > 4) { |
1323 | 4.98k | ch = hexValue(file, &token->chars[in + 1], 4); |
1324 | 4.98k | in += 4; |
1325 | 4.98k | } |
1326 | 5.06k | break; |
1327 | 174 | case 'Y': |
1328 | 174 | compileWarning(file, "\\Yhhhhh (with a capital 'Y') is deprecated."); |
1329 | 190 | case 'y': |
1330 | 190 | if (CHARSIZE == 2) { |
1331 | 197 | not32: |
1332 | 197 | compileError(file, |
1333 | 197 | "liblouis has not been compiled for 32-bit Unicode"); |
1334 | 197 | break; |
1335 | 190 | } |
1336 | 0 | if (token->length - in > 5) { |
1337 | 0 | ch = hexValue(file, &token->chars[in + 1], 5); |
1338 | 0 | in += 5; |
1339 | 0 | } |
1340 | 0 | break; |
1341 | 3 | case 'Z': |
1342 | 3 | compileWarning( |
1343 | 3 | file, "\\Zhhhhhhhh (with a capital 'Z') is deprecated."); |
1344 | 7 | case 'z': |
1345 | 7 | if (CHARSIZE == 2) goto not32; |
1346 | 0 | if (token->length - in > 8) { |
1347 | 0 | ch = hexValue(file, &token->chars[in + 1], 8); |
1348 | 0 | in += 8; |
1349 | 0 | } |
1350 | 0 | break; |
1351 | 26 | default: |
1352 | 26 | compileError(file, "invalid escape sequence '\\%c'", ch); |
1353 | 26 | result->length = lastOutSize; |
1354 | 26 | return 0; |
1355 | 8.49k | } |
1356 | 8.47k | in++; |
1357 | 8.47k | } |
1358 | 206k | if (out >= MAXSTRING - 1) { |
1359 | 2 | compileError(file, "Token too long"); |
1360 | 2 | result->length = MAXSTRING - 1; |
1361 | 2 | return 0; |
1362 | 2 | } |
1363 | 206k | result->chars[out++] = (widechar)ch; |
1364 | 206k | continue; |
1365 | 206k | } |
1366 | 80.9k | lastOutSize = out; |
1367 | 80.9k | int lastIn = in; |
1368 | 80.9k | int numBytes = 0; |
1369 | 249k | for (numBytes = MAXBYTES - 1; numBytes > 0; numBytes--) |
1370 | 230k | if (ch >= first0Bit[numBytes]) break; |
1371 | 80.9k | unsigned int utf32 = ch & (0XFF - first0Bit[numBytes]); |
1372 | 392k | for (int k = 0; k < numBytes; k++) { |
1373 | 313k | if (in >= MAXSTRING - 1 || in >= token->length) break; |
1374 | 312k | if (out >= MAXSTRING - 1) { |
1375 | 2 | compileError(file, "Token too long"); |
1376 | 2 | result->length = lastOutSize; |
1377 | 2 | return 0; |
1378 | 2 | } |
1379 | 312k | if (token->chars[in] < 128 || (token->chars[in] & 0x0040)) { |
1380 | 310k | compileWarning(file, "invalid UTF-8. Assuming Latin-1."); |
1381 | 310k | result->chars[out++] = token->chars[lastIn]; |
1382 | 310k | in = lastIn + 1; |
1383 | 310k | continue; |
1384 | 310k | } |
1385 | 1.85k | utf32 = (utf32 << 6) + (token->chars[in++] & 0x3f); |
1386 | 1.85k | } |
1387 | 80.9k | if (out >= MAXSTRING - 1) { |
1388 | 3 | compileError(file, "Token too long"); |
1389 | 3 | result->length = lastOutSize; |
1390 | 3 | return 0; |
1391 | 3 | } |
1392 | 80.9k | if (CHARSIZE == 2 && utf32 > 0xffff) { |
1393 | 20 | compileError(file, "liblouis has not been compiled for 32-bit Unicode"); |
1394 | 20 | result->length = lastOutSize; |
1395 | 20 | return 0; |
1396 | 20 | } |
1397 | 80.8k | result->chars[out++] = (widechar)utf32; |
1398 | 80.8k | } |
1399 | 4.24k | result->length = out; |
1400 | 4.24k | return 1; |
1401 | 4.30k | } |
1402 | | |
1403 | | static int |
1404 | 4.17k | parseChars(const FileInfo *file, CharsString *result, CharsString *token) { |
1405 | 4.17k | return parseCharsInternal(file, result, token, false); |
1406 | 4.17k | } |
1407 | | |
1408 | | static int |
1409 | 131 | parseMatchPatternChars(const FileInfo *file, CharsString *result, CharsString *token) { |
1410 | 131 | return parseCharsInternal(file, result, token, true); |
1411 | 131 | } |
1412 | | |
1413 | | int EXPORT_CALL |
1414 | 603 | _lou_extParseChars(const char *inString, widechar *outString) { |
1415 | | /* Parse external character strings */ |
1416 | 603 | CharsString wideIn; |
1417 | 603 | CharsString result; |
1418 | 603 | int k; |
1419 | 258k | for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k]; |
1420 | 603 | wideIn.chars[k] = 0; |
1421 | 603 | wideIn.length = k; |
1422 | 603 | if (!parseChars(NULL, &result, &wideIn)) return 0; |
1423 | 403k | for (k = 0; k < result.length; k++) outString[k] = result.chars[k]; |
1424 | 591 | return result.length; |
1425 | 603 | } |
1426 | | |
1427 | | static int |
1428 | 2.02k | parseDots(const FileInfo *file, CharsString *cells, const CharsString *token) { |
1429 | | /* get dot patterns */ |
1430 | 2.02k | widechar cell = 0; /* assembly place for dots */ |
1431 | 2.02k | int cellCount = 0; |
1432 | 2.02k | int index; |
1433 | 2.02k | int start = 0; |
1434 | | |
1435 | 15.1k | for (index = 0; index < token->length; index++) { |
1436 | 13.1k | int started = index != start; |
1437 | 13.1k | widechar character = token->chars[index]; |
1438 | 13.1k | switch (character) { /* or dots to make up Braille cell */ |
1439 | 0 | { |
1440 | 0 | int dot; |
1441 | 907 | case '1': |
1442 | 907 | dot = LOU_DOT_1; |
1443 | 907 | goto haveDot; |
1444 | 821 | case '2': |
1445 | 821 | dot = LOU_DOT_2; |
1446 | 821 | goto haveDot; |
1447 | 825 | case '3': |
1448 | 825 | dot = LOU_DOT_3; |
1449 | 825 | goto haveDot; |
1450 | 743 | case '4': |
1451 | 743 | dot = LOU_DOT_4; |
1452 | 743 | goto haveDot; |
1453 | 734 | case '5': |
1454 | 734 | dot = LOU_DOT_5; |
1455 | 734 | goto haveDot; |
1456 | 743 | case '6': |
1457 | 743 | dot = LOU_DOT_6; |
1458 | 743 | goto haveDot; |
1459 | 763 | case '7': |
1460 | 763 | dot = LOU_DOT_7; |
1461 | 763 | goto haveDot; |
1462 | 769 | case '8': |
1463 | 769 | dot = LOU_DOT_8; |
1464 | 769 | goto haveDot; |
1465 | 756 | case '9': |
1466 | 756 | dot = LOU_DOT_9; |
1467 | 756 | goto haveDot; |
1468 | 773 | case 'a': |
1469 | 822 | case 'A': |
1470 | 822 | dot = LOU_DOT_10; |
1471 | 822 | goto haveDot; |
1472 | 766 | case 'b': |
1473 | 796 | case 'B': |
1474 | 796 | dot = LOU_DOT_11; |
1475 | 796 | goto haveDot; |
1476 | 739 | case 'c': |
1477 | 816 | case 'C': |
1478 | 816 | dot = LOU_DOT_12; |
1479 | 816 | goto haveDot; |
1480 | 909 | case 'd': |
1481 | 956 | case 'D': |
1482 | 956 | dot = LOU_DOT_13; |
1483 | 956 | goto haveDot; |
1484 | 909 | case 'e': |
1485 | 998 | case 'E': |
1486 | 998 | dot = LOU_DOT_14; |
1487 | 998 | goto haveDot; |
1488 | 735 | case 'f': |
1489 | 740 | case 'F': |
1490 | 740 | dot = LOU_DOT_15; |
1491 | 12.1k | haveDot: |
1492 | 12.1k | if (started && !cell) goto invalid; |
1493 | 12.1k | if (cell & dot) { |
1494 | 24 | compileError(file, "dot specified more than once."); |
1495 | 24 | return 0; |
1496 | 24 | } |
1497 | 12.1k | cell |= dot; |
1498 | 12.1k | break; |
1499 | 12.1k | } |
1500 | 474 | case '0': /* blank */ |
1501 | 474 | if (started) goto invalid; |
1502 | 473 | break; |
1503 | 473 | case '-': /* got all dots for this cell */ |
1504 | 457 | if (!started) { |
1505 | 11 | compileError(file, "missing cell specification."); |
1506 | 11 | return 0; |
1507 | 11 | } |
1508 | 446 | cells->chars[cellCount++] = cell | LOU_DOTS; |
1509 | 446 | cell = 0; |
1510 | 446 | start = index + 1; |
1511 | 446 | break; |
1512 | 26 | default: |
1513 | 27 | invalid: |
1514 | 27 | compileError( |
1515 | 27 | file, "invalid dot number %s.", _lou_showString(&character, 1, 0)); |
1516 | 27 | return 0; |
1517 | 13.1k | } |
1518 | 13.1k | } |
1519 | 1.96k | if (index == start) { |
1520 | 17 | compileError(file, "missing cell specification."); |
1521 | 17 | return 0; |
1522 | 17 | } |
1523 | 1.94k | cells->chars[cellCount++] = cell | LOU_DOTS; /* last cell */ |
1524 | 1.94k | cells->length = cellCount; |
1525 | 1.94k | return 1; |
1526 | 1.96k | } |
1527 | | |
1528 | | int EXPORT_CALL |
1529 | 0 | _lou_extParseDots(const char *inString, widechar *outString) { |
1530 | | /* Parse external dot patterns */ |
1531 | 0 | CharsString wideIn; |
1532 | 0 | CharsString result; |
1533 | 0 | int k; |
1534 | 0 | for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k]; |
1535 | 0 | wideIn.chars[k] = 0; |
1536 | 0 | wideIn.length = k; |
1537 | 0 | parseDots(NULL, &result, &wideIn); |
1538 | 0 | if (errorCount) { |
1539 | 0 | errorCount = 0; |
1540 | 0 | return 0; |
1541 | 0 | } |
1542 | 0 | for (k = 0; k < result.length; k++) outString[k] = result.chars[k]; |
1543 | 0 | outString[k] = 0; |
1544 | 0 | return result.length; |
1545 | 0 | } |
1546 | | |
1547 | | static int |
1548 | 237 | getCharacters(FileInfo *file, CharsString *characters) { |
1549 | | /* Get ruleChars string */ |
1550 | 237 | CharsString token; |
1551 | 237 | if (!getToken(file, &token, "characters")) return 0; |
1552 | 235 | return parseChars(file, characters, &token); |
1553 | 237 | } |
1554 | | |
1555 | | static int |
1556 | 272 | getMatchPatternCharacters(FileInfo *file, CharsString *characters) { |
1557 | | /* Get match pattern string */ |
1558 | 272 | CharsString token; |
1559 | 272 | if (!getToken(file, &token, "characters")) return 0; |
1560 | 131 | return parseMatchPatternChars(file, characters, &token); |
1561 | 272 | } |
1562 | | |
1563 | | static int |
1564 | 2.11k | getRuleCharsText(FileInfo *file, CharsString *ruleChars) { |
1565 | 2.11k | CharsString token; |
1566 | 2.11k | if (!getToken(file, &token, "Characters operand")) return 0; |
1567 | 2.04k | return parseChars(file, ruleChars, &token); |
1568 | 2.11k | } |
1569 | | |
1570 | | static int |
1571 | 251 | getRuleDotsText(FileInfo *file, CharsString *ruleDots) { |
1572 | 251 | CharsString token; |
1573 | 251 | if (!getToken(file, &token, "characters")) return 0; |
1574 | 251 | return parseChars(file, ruleDots, &token); |
1575 | 251 | } |
1576 | | |
1577 | | static int |
1578 | 1.33k | getRuleDotsPattern(FileInfo *file, CharsString *ruleDots) { |
1579 | | /* Interpret the dets operand */ |
1580 | 1.33k | CharsString token; |
1581 | 1.33k | if (!getToken(file, &token, "Dots operand")) return 0; |
1582 | 1.26k | if (token.length == 1 && token.chars[0] == '=') { |
1583 | 1 | ruleDots->length = 0; |
1584 | 1 | return 1; |
1585 | 1 | } else |
1586 | 1.25k | return parseDots(file, ruleDots, &token); |
1587 | 1.26k | } |
1588 | | |
1589 | | static int |
1590 | | includeFile(const FileInfo *file, CharsString *includedFile, |
1591 | | TranslationTableHeader **table, DisplayTableHeader **displayTable); |
1592 | | |
1593 | | static TranslationTableOffset |
1594 | 432 | findRuleName(const CharsString *name, const TranslationTableHeader *table) { |
1595 | 432 | const RuleName *ruleName = table->ruleNames; |
1596 | 961 | while (ruleName) { |
1597 | 613 | if ((name->length == ruleName->length) && |
1598 | 168 | (memcmp(&name->chars[0], ruleName->name, CHARSIZE * name->length) == 0)) |
1599 | 84 | return ruleName->ruleOffset; |
1600 | 529 | ruleName = ruleName->next; |
1601 | 529 | } |
1602 | 348 | return 0; |
1603 | 432 | } |
1604 | | |
1605 | | static int |
1606 | | addRuleName(const FileInfo *file, CharsString *name, TranslationTableOffset ruleOffset, |
1607 | 63 | TranslationTableHeader *table) { |
1608 | 63 | int k; |
1609 | 63 | RuleName *ruleName; |
1610 | 63 | if (!(ruleName = malloc(sizeof(*ruleName) + CHARSIZE * (name->length - 1)))) { |
1611 | 0 | compileError(file, "not enough memory"); |
1612 | 0 | _lou_outOfMemory(); |
1613 | 0 | } |
1614 | 63 | memset(ruleName, 0, sizeof(*ruleName)); |
1615 | | // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z' |
1616 | 387 | for (k = 0; k < name->length; k++) { |
1617 | 326 | widechar c = name->chars[k]; |
1618 | 326 | if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) |
1619 | 324 | ruleName->name[k] = c; |
1620 | 2 | else { |
1621 | 2 | compileError(file, "a name may contain only letters"); |
1622 | 2 | free(ruleName); |
1623 | 2 | return 0; |
1624 | 2 | } |
1625 | 326 | } |
1626 | 61 | ruleName->length = name->length; |
1627 | 61 | ruleName->ruleOffset = ruleOffset; |
1628 | 61 | ruleName->next = table->ruleNames; |
1629 | 61 | table->ruleNames = ruleName; |
1630 | 61 | return 1; |
1631 | 63 | } |
1632 | | |
1633 | | static void |
1634 | 52 | deallocateRuleNames(TranslationTableHeader *table) { |
1635 | 52 | RuleName **ruleName = &table->ruleNames; |
1636 | 113 | while (*ruleName) { |
1637 | 61 | RuleName *rn = *ruleName; |
1638 | 61 | *ruleName = rn->next; |
1639 | 61 | free(rn); |
1640 | 61 | } |
1641 | 52 | } |
1642 | | |
1643 | | static int |
1644 | 23 | compileSwapDots(const FileInfo *file, CharsString *source, CharsString *dest) { |
1645 | 23 | int k = 0; |
1646 | 23 | int kk = 0; |
1647 | 23 | CharsString dotsSource; |
1648 | 23 | CharsString dotsDest; |
1649 | 23 | dest->length = 0; |
1650 | 23 | dotsSource.length = 0; |
1651 | 569 | while (k <= source->length) { |
1652 | 546 | if (source->chars[k] != ',' && k != source->length) |
1653 | 494 | dotsSource.chars[dotsSource.length++] = source->chars[k]; |
1654 | 52 | else { |
1655 | 52 | if (!parseDots(file, &dotsDest, &dotsSource)) return 0; |
1656 | 52 | dest->chars[dest->length++] = dotsDest.length + 1; |
1657 | 262 | for (kk = 0; kk < dotsDest.length; kk++) |
1658 | 210 | dest->chars[dest->length++] = dotsDest.chars[kk]; |
1659 | 52 | dotsSource.length = 0; |
1660 | 52 | } |
1661 | 546 | k++; |
1662 | 546 | } |
1663 | 23 | return 1; |
1664 | 23 | } |
1665 | | |
1666 | | static int |
1667 | | compileSwap(FileInfo *file, TranslationTableOpcode opcode, int noback, int nofor, |
1668 | 35 | TranslationTableHeader **table) { |
1669 | 35 | CharsString ruleChars; |
1670 | 35 | CharsString ruleDots; |
1671 | 35 | CharsString name; |
1672 | 35 | CharsString matches; |
1673 | 35 | CharsString replacements; |
1674 | 35 | TranslationTableOffset ruleOffset; |
1675 | 35 | if (!getToken(file, &name, "name operand")) return 0; |
1676 | 35 | if (!getToken(file, &matches, "matches operand")) return 0; |
1677 | 35 | if (!getToken(file, &replacements, "replacements operand")) return 0; |
1678 | 35 | if (opcode == CTO_SwapCc || opcode == CTO_SwapCd) { |
1679 | 25 | if (!parseChars(file, &ruleChars, &matches)) return 0; |
1680 | 25 | } else { |
1681 | 10 | if (!compileSwapDots(file, &matches, &ruleChars)) return 0; |
1682 | 10 | } |
1683 | 35 | if (opcode == CTO_SwapCc) { |
1684 | 22 | if (!parseChars(file, &ruleDots, &replacements)) return 0; |
1685 | 22 | } else { |
1686 | 13 | if (!compileSwapDots(file, &replacements, &ruleDots)) return 0; |
1687 | 13 | } |
1688 | 35 | if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, NULL, noback, |
1689 | 35 | nofor, table)) |
1690 | 0 | return 0; |
1691 | 35 | if (!addRuleName(file, &name, ruleOffset, *table)) return 0; |
1692 | 34 | return 1; |
1693 | 35 | } |
1694 | | |
1695 | | static int |
1696 | 35 | getNumber(widechar *string, widechar *number) { |
1697 | | /* Convert a string of wide character digits to an integer */ |
1698 | 35 | int k = 0; |
1699 | 35 | *number = 0; |
1700 | 97 | while (string[k] >= '0' && string[k] <= '9') |
1701 | 62 | *number = 10 * *number + (string[k++] - '0'); |
1702 | 35 | return k; |
1703 | 35 | } |
1704 | | |
1705 | | /* Start of multipass compiler */ |
1706 | | |
1707 | | static int |
1708 | | passGetAttributes(CharsString *passLine, int *passLinepos, |
1709 | 292 | TranslationTableCharacterAttributes *attributes, const FileInfo *file) { |
1710 | 292 | int more = 1; |
1711 | 292 | *attributes = 0; |
1712 | 1.48k | while (more) { |
1713 | 1.19k | switch (passLine->chars[*passLinepos]) { |
1714 | 196 | case pass_any: |
1715 | 196 | *attributes = 0xffffffff; |
1716 | 196 | break; |
1717 | 65 | case pass_digit: |
1718 | 65 | *attributes |= CTC_Digit; |
1719 | 65 | break; |
1720 | 35 | case pass_litDigit: |
1721 | 35 | *attributes |= CTC_LitDigit; |
1722 | 35 | break; |
1723 | 6 | case pass_letter: |
1724 | 6 | *attributes |= CTC_Letter; |
1725 | 6 | break; |
1726 | 21 | case pass_math: |
1727 | 21 | *attributes |= CTC_Math; |
1728 | 21 | break; |
1729 | 170 | case pass_punctuation: |
1730 | 170 | *attributes |= CTC_Punctuation; |
1731 | 170 | break; |
1732 | 14 | case pass_sign: |
1733 | 14 | *attributes |= CTC_Sign; |
1734 | 14 | break; |
1735 | 273 | case pass_space: |
1736 | 273 | *attributes |= CTC_Space; |
1737 | 273 | break; |
1738 | 0 | case pass_uppercase: |
1739 | 0 | *attributes |= CTC_UpperCase; |
1740 | 0 | break; |
1741 | 0 | case pass_lowercase: |
1742 | 0 | *attributes |= CTC_LowerCase; |
1743 | 0 | break; |
1744 | 71 | case pass_class1: |
1745 | 71 | *attributes |= CTC_UserDefined9; |
1746 | 71 | break; |
1747 | 11 | case pass_class2: |
1748 | 11 | *attributes |= CTC_UserDefined10; |
1749 | 11 | break; |
1750 | 38 | case pass_class3: |
1751 | 38 | *attributes |= CTC_UserDefined11; |
1752 | 38 | break; |
1753 | 0 | case pass_class4: |
1754 | 0 | *attributes |= CTC_UserDefined12; |
1755 | 0 | break; |
1756 | 292 | default: |
1757 | 292 | more = 0; |
1758 | 292 | break; |
1759 | 1.19k | } |
1760 | 1.19k | if (more) (*passLinepos)++; |
1761 | 1.19k | } |
1762 | 292 | if (!*attributes) { |
1763 | 0 | compileError(file, "missing attribute"); |
1764 | 0 | (*passLinepos)--; |
1765 | 0 | return 0; |
1766 | 0 | } |
1767 | 292 | return 1; |
1768 | 292 | } |
1769 | | |
1770 | | static int |
1771 | | passGetDots(CharsString *passLine, int *passLinepos, CharsString *dots, |
1772 | 193 | const FileInfo *file) { |
1773 | 193 | CharsString collectDots; |
1774 | 193 | collectDots.length = 0; |
1775 | 645 | while (*passLinepos < passLine->length && |
1776 | 645 | (passLine->chars[*passLinepos] == '-' || |
1777 | 595 | (passLine->chars[*passLinepos] >= '0' && |
1778 | 423 | passLine->chars[*passLinepos] <= '9') || |
1779 | 258 | ((passLine->chars[*passLinepos] | 32) >= 'a' && |
1780 | 76 | (passLine->chars[*passLinepos] | 32) <= 'f'))) |
1781 | 452 | collectDots.chars[collectDots.length++] = passLine->chars[(*passLinepos)++]; |
1782 | 193 | if (!parseDots(file, dots, &collectDots)) return 0; |
1783 | 150 | return 1; |
1784 | 193 | } |
1785 | | |
1786 | | static int |
1787 | | passGetString(CharsString *passLine, int *passLinepos, CharsString *string, |
1788 | 331 | const FileInfo *file) { |
1789 | 331 | string->length = 0; |
1790 | 8.94k | while (1) { |
1791 | 8.94k | if ((*passLinepos >= passLine->length) || !passLine->chars[*passLinepos]) { |
1792 | 7 | compileError(file, "unterminated string"); |
1793 | 7 | return 0; |
1794 | 7 | } |
1795 | 8.94k | if (passLine->chars[*passLinepos] == 34) break; |
1796 | 8.61k | if (passLine->chars[*passLinepos] == QUOTESUB) |
1797 | 12 | string->chars[string->length++] = 34; |
1798 | 8.60k | else |
1799 | 8.60k | string->chars[string->length++] = passLine->chars[*passLinepos]; |
1800 | 8.61k | (*passLinepos)++; |
1801 | 8.61k | } |
1802 | 324 | string->chars[string->length] = 0; |
1803 | 324 | (*passLinepos)++; |
1804 | 324 | return 1; |
1805 | 331 | } |
1806 | | |
1807 | | static int |
1808 | 1.69k | passGetNumber(CharsString *passLine, int *passLinepos, widechar *number) { |
1809 | | /* Convert a string of wide character digits to an integer */ |
1810 | 1.69k | *number = 0; |
1811 | 3.50k | while ((*passLinepos < passLine->length) && (passLine->chars[*passLinepos] >= '0') && |
1812 | 2.87k | (passLine->chars[*passLinepos] <= '9')) |
1813 | 1.81k | *number = 10 * (*number) + (passLine->chars[(*passLinepos)++] - '0'); |
1814 | 1.69k | return 1; |
1815 | 1.69k | } |
1816 | | |
1817 | | static int |
1818 | | passGetVariableNumber( |
1819 | 260 | const FileInfo *file, CharsString *passLine, int *passLinepos, widechar *number) { |
1820 | 260 | if (!passGetNumber(passLine, passLinepos, number)) { |
1821 | 0 | compileError(file, "missing variable number"); |
1822 | 0 | return 0; |
1823 | 0 | } |
1824 | 260 | if ((*number >= 0) && (*number < NUMVAR)) return 1; |
1825 | 4 | compileError(file, "variable number out of range"); |
1826 | 4 | return 0; |
1827 | 260 | } |
1828 | | |
1829 | | static int |
1830 | 432 | passGetName(CharsString *passLine, int *passLinepos, CharsString *name) { |
1831 | 432 | name->length = 0; |
1832 | | // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z' |
1833 | 1.77k | do { |
1834 | 1.77k | widechar c = passLine->chars[*passLinepos]; |
1835 | 1.77k | if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { |
1836 | 1.34k | name->chars[name->length++] = c; |
1837 | 1.34k | (*passLinepos)++; |
1838 | 1.34k | } else { |
1839 | 429 | break; |
1840 | 429 | } |
1841 | 1.77k | } while (*passLinepos < passLine->length); |
1842 | 432 | return 1; |
1843 | 432 | } |
1844 | | |
1845 | | static inline int |
1846 | 525 | wantsString(TranslationTableOpcode opcode, int actionPart, int nofor) { |
1847 | 525 | if (opcode == CTO_Correct) return 1; |
1848 | 223 | if (opcode != CTO_Context) return 0; |
1849 | 35 | return !nofor == !actionPart; |
1850 | 223 | } |
1851 | | |
1852 | | static int |
1853 | | verifyStringOrDots(const FileInfo *file, TranslationTableOpcode opcode, int isString, |
1854 | 525 | int actionPart, int nofor) { |
1855 | 525 | if (!wantsString(opcode, actionPart, nofor) == !isString) return 1; |
1856 | | |
1857 | 1 | compileError(file, "%s are not allowed in the %s part of a %s translation %s rule.", |
1858 | 1 | isString ? "strings" : "dots", getPartName(actionPart), |
1859 | 1 | nofor ? "backward" : "forward", _lou_findOpcodeName(opcode)); |
1860 | | |
1861 | 1 | return 0; |
1862 | 525 | } |
1863 | | |
1864 | | static int |
1865 | | appendInstructionChar( |
1866 | 28.6k | const FileInfo *file, widechar *passInstructions, int *passIC, widechar ch) { |
1867 | 28.6k | if (*passIC >= MAXSTRING) { |
1868 | 1 | compileError(file, "multipass operand too long"); |
1869 | 1 | return 0; |
1870 | 1 | } |
1871 | 28.6k | passInstructions[(*passIC)++] = ch; |
1872 | 28.6k | return 1; |
1873 | 28.6k | } |
1874 | | |
1875 | | static int |
1876 | | compilePassOpcode(const FileInfo *file, TranslationTableOpcode opcode, int noback, |
1877 | 750 | int nofor, TranslationTableHeader **table) { |
1878 | 750 | static CharsString passRuleChars; |
1879 | 750 | static CharsString passRuleDots; |
1880 | | /* Compile the operands of a pass opcode */ |
1881 | 750 | widechar passSubOp; |
1882 | 750 | const CharacterClass *class; |
1883 | 750 | TranslationTableRule *rule = NULL; |
1884 | 750 | int k; |
1885 | 750 | int kk = 0; |
1886 | 750 | int endTest = 0; |
1887 | 750 | widechar *passInstructions = passRuleDots.chars; |
1888 | 750 | int passIC = 0; /* Instruction counter */ |
1889 | 750 | passRuleChars.length = 0; |
1890 | 750 | CharsString passHoldString; |
1891 | 750 | widechar passHoldNumber; |
1892 | 750 | CharsString passLine; |
1893 | 750 | int passLinepos = 0; |
1894 | 750 | TranslationTableCharacterAttributes passAttributes; |
1895 | 750 | int replacing = 0; |
1896 | 750 | passHoldString.length = 0; |
1897 | 79.5k | for (k = file->linepos; k < file->linelen; k++) |
1898 | 78.7k | passHoldString.chars[passHoldString.length++] = file->line[k]; |
1899 | 8.21k | #define SEPCHAR 0x0001 |
1900 | 7.98k | for (k = 0; k < passHoldString.length && passHoldString.chars[k] > 32; k++) |
1901 | 7.23k | ; |
1902 | 750 | if (k < passHoldString.length) |
1903 | 750 | passHoldString.chars[k] = SEPCHAR; |
1904 | 0 | else { |
1905 | 0 | compileError(file, "Invalid multipass operands"); |
1906 | 0 | return 0; |
1907 | 0 | } |
1908 | 750 | parseChars(file, &passLine, &passHoldString); |
1909 | | /* Compile test part */ |
1910 | 7.47k | for (k = 0; k < passLine.length && passLine.chars[k] != SEPCHAR; k++) |
1911 | 6.72k | ; |
1912 | 750 | endTest = k; |
1913 | 750 | passLine.chars[endTest] = pass_endTest; |
1914 | 750 | passLinepos = 0; |
1915 | 4.20k | while (passLinepos <= endTest) { |
1916 | 3.46k | switch ((passSubOp = passLine.chars[passLinepos])) { |
1917 | 990 | case pass_lookback: |
1918 | 990 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_lookback)) |
1919 | 0 | return 0; |
1920 | 990 | passLinepos++; |
1921 | 990 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
1922 | 990 | if (passHoldNumber == 0) passHoldNumber = 1; |
1923 | 990 | if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
1924 | 0 | return 0; |
1925 | 990 | break; |
1926 | 990 | case pass_not: |
1927 | 505 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_not)) |
1928 | 0 | return 0; |
1929 | 505 | passLinepos++; |
1930 | 505 | break; |
1931 | 36 | case pass_first: |
1932 | 36 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_first)) |
1933 | 0 | return 0; |
1934 | 36 | passLinepos++; |
1935 | 36 | break; |
1936 | 49 | case pass_last: |
1937 | 49 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_last)) |
1938 | 0 | return 0; |
1939 | 49 | passLinepos++; |
1940 | 49 | break; |
1941 | 85 | case pass_search: |
1942 | 85 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_search)) |
1943 | 0 | return 0; |
1944 | 85 | passLinepos++; |
1945 | 85 | break; |
1946 | 43 | case pass_string: |
1947 | 43 | if (!verifyStringOrDots(file, opcode, 1, 0, nofor)) { |
1948 | 0 | return 0; |
1949 | 0 | } |
1950 | 43 | passLinepos++; |
1951 | 43 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_string)) |
1952 | 0 | return 0; |
1953 | 43 | passGetString(&passLine, &passLinepos, &passHoldString, file); |
1954 | 43 | if (passHoldString.length == 0) { |
1955 | 0 | compileError(file, "empty string in test part"); |
1956 | 0 | return 0; |
1957 | 0 | } |
1958 | 43 | goto testDoCharsDots; |
1959 | 72 | case pass_dots: |
1960 | 72 | if (!verifyStringOrDots(file, opcode, 0, 0, nofor)) { |
1961 | 1 | return 0; |
1962 | 1 | } |
1963 | 71 | passLinepos++; |
1964 | 71 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_dots)) |
1965 | 0 | return 0; |
1966 | 71 | passGetDots(&passLine, &passLinepos, &passHoldString, file); |
1967 | 71 | if (passHoldString.length == 0) { |
1968 | 0 | compileError(file, "expected dot pattern after @ operand in test part"); |
1969 | 0 | return 0; |
1970 | 0 | } |
1971 | 114 | testDoCharsDots: |
1972 | 114 | if (passIC >= MAXSTRING) { |
1973 | 0 | compileError( |
1974 | 0 | file, "@ operand in test part of multipass operand too long"); |
1975 | 0 | return 0; |
1976 | 0 | } |
1977 | 114 | if (!appendInstructionChar( |
1978 | 114 | file, passInstructions, &passIC, passHoldString.length)) |
1979 | 0 | return 0; |
1980 | 9.72k | for (kk = 0; kk < passHoldString.length; kk++) { |
1981 | 9.61k | if (passIC >= MAXSTRING) { |
1982 | 1 | compileError( |
1983 | 1 | file, "@ operand in test part of multipass operand too long"); |
1984 | 1 | return 0; |
1985 | 1 | } |
1986 | 9.61k | if (!appendInstructionChar( |
1987 | 9.61k | file, passInstructions, &passIC, passHoldString.chars[kk])) |
1988 | 0 | return 0; |
1989 | 9.61k | } |
1990 | 113 | break; |
1991 | 164 | case pass_startReplace: |
1992 | 164 | if (replacing) { |
1993 | 0 | compileError(file, "nested replacement statements"); |
1994 | 0 | return 0; |
1995 | 0 | } |
1996 | 164 | if (!appendInstructionChar( |
1997 | 164 | file, passInstructions, &passIC, pass_startReplace)) |
1998 | 0 | return 0; |
1999 | 164 | replacing = 1; |
2000 | 164 | passLinepos++; |
2001 | 164 | break; |
2002 | 150 | case pass_endReplace: |
2003 | 150 | if (!replacing) { |
2004 | 0 | compileError(file, "unexpected end of replacement"); |
2005 | 0 | return 0; |
2006 | 0 | } |
2007 | 150 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_endReplace)) |
2008 | 0 | return 0; |
2009 | 150 | replacing = 0; |
2010 | 150 | passLinepos++; |
2011 | 150 | break; |
2012 | 152 | case pass_variable: |
2013 | 152 | passLinepos++; |
2014 | 152 | if (!passGetVariableNumber(file, &passLine, &passLinepos, &passHoldNumber)) |
2015 | 2 | return 0; |
2016 | 150 | switch (passLine.chars[passLinepos]) { |
2017 | 23 | case pass_eq: |
2018 | 23 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_eq)) |
2019 | 0 | return 0; |
2020 | 23 | goto doComp; |
2021 | 63 | case pass_lt: |
2022 | 63 | if (passLine.chars[passLinepos + 1] == pass_eq) { |
2023 | 31 | passLinepos++; |
2024 | 31 | if (!appendInstructionChar( |
2025 | 31 | file, passInstructions, &passIC, pass_lteq)) |
2026 | 0 | return 0; |
2027 | 32 | } else if (!appendInstructionChar( |
2028 | 32 | file, passInstructions, &passIC, pass_lt)) |
2029 | 0 | return 0; |
2030 | 63 | goto doComp; |
2031 | 63 | case pass_gt: |
2032 | 63 | if (passLine.chars[passLinepos + 1] == pass_eq) { |
2033 | 36 | passLinepos++; |
2034 | 36 | if (!appendInstructionChar( |
2035 | 36 | file, passInstructions, &passIC, pass_gteq)) |
2036 | 1 | return 0; |
2037 | 36 | } else if (!appendInstructionChar( |
2038 | 27 | file, passInstructions, &passIC, pass_gt)) |
2039 | 0 | return 0; |
2040 | 148 | doComp: |
2041 | 148 | if (!appendInstructionChar( |
2042 | 148 | file, passInstructions, &passIC, passHoldNumber)) |
2043 | 0 | return 0; |
2044 | 148 | passLinepos++; |
2045 | 148 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2046 | 148 | if (!appendInstructionChar( |
2047 | 148 | file, passInstructions, &passIC, passHoldNumber)) |
2048 | 0 | return 0; |
2049 | 148 | break; |
2050 | 148 | default: |
2051 | 1 | compileError(file, "incorrect comparison operator"); |
2052 | 1 | return 0; |
2053 | 150 | } |
2054 | 148 | break; |
2055 | 292 | case pass_attributes: |
2056 | 292 | passLinepos++; |
2057 | 292 | if (!passGetAttributes(&passLine, &passLinepos, &passAttributes, file)) |
2058 | 0 | return 0; |
2059 | 292 | insertAttributes: |
2060 | 292 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_attributes)) |
2061 | 0 | return 0; |
2062 | 292 | if (!appendInstructionChar( |
2063 | 292 | file, passInstructions, &passIC, (passAttributes >> 48) & 0xffff)) |
2064 | 0 | return 0; |
2065 | 292 | if (!appendInstructionChar( |
2066 | 292 | file, passInstructions, &passIC, (passAttributes >> 32) & 0xffff)) |
2067 | 0 | return 0; |
2068 | 292 | if (!appendInstructionChar( |
2069 | 292 | file, passInstructions, &passIC, (passAttributes >> 16) & 0xffff)) |
2070 | 0 | return 0; |
2071 | 292 | if (!appendInstructionChar( |
2072 | 292 | file, passInstructions, &passIC, passAttributes & 0xffff)) |
2073 | 0 | return 0; |
2074 | 332 | getRange: |
2075 | 332 | if (passLine.chars[passLinepos] == pass_until) { |
2076 | 121 | passLinepos++; |
2077 | 121 | if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
2078 | 121 | if (!appendInstructionChar(file, passInstructions, &passIC, 0xffff)) |
2079 | 0 | return 0; |
2080 | 121 | break; |
2081 | 121 | } |
2082 | 211 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2083 | 211 | if (passHoldNumber == 0) { |
2084 | 112 | if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
2085 | 112 | if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
2086 | 112 | break; |
2087 | 112 | } |
2088 | 99 | if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
2089 | 0 | return 0; |
2090 | 99 | if (passLine.chars[passLinepos] != pass_hyphen) { |
2091 | 85 | if (!appendInstructionChar( |
2092 | 85 | file, passInstructions, &passIC, passHoldNumber)) |
2093 | 0 | return 0; |
2094 | 85 | break; |
2095 | 85 | } |
2096 | 14 | passLinepos++; |
2097 | 14 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2098 | 14 | if (passHoldNumber == 0) { |
2099 | 0 | compileError(file, "invalid range"); |
2100 | 0 | return 0; |
2101 | 0 | } |
2102 | 14 | if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
2103 | 0 | return 0; |
2104 | 14 | break; |
2105 | 72 | case pass_groupstart: |
2106 | 103 | case pass_groupend: { |
2107 | 103 | passLinepos++; |
2108 | 103 | passGetName(&passLine, &passLinepos, &passHoldString); |
2109 | 103 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2110 | 103 | if (ruleOffset) |
2111 | 41 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2112 | 103 | if (rule && rule->opcode == CTO_Grouping) { |
2113 | 102 | if (!appendInstructionChar(file, passInstructions, &passIC, passSubOp)) |
2114 | 0 | return 0; |
2115 | 102 | if (!appendInstructionChar( |
2116 | 102 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2117 | 0 | return 0; |
2118 | 102 | if (!appendInstructionChar( |
2119 | 102 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2120 | 0 | return 0; |
2121 | 102 | break; |
2122 | 102 | } else { |
2123 | 1 | compileError(file, "%s is not a grouping name", |
2124 | 1 | _lou_showString( |
2125 | 1 | &passHoldString.chars[0], passHoldString.length, 0)); |
2126 | 1 | return 0; |
2127 | 1 | } |
2128 | 0 | break; |
2129 | 103 | } |
2130 | 40 | case pass_swap: { |
2131 | 40 | passLinepos++; |
2132 | 40 | passGetName(&passLine, &passLinepos, &passHoldString); |
2133 | 40 | if ((class = findCharacterClass(&passHoldString, *table))) { |
2134 | 0 | passAttributes = class->attribute; |
2135 | 0 | goto insertAttributes; |
2136 | 0 | } |
2137 | 40 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2138 | 40 | if (ruleOffset) |
2139 | 19 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2140 | 40 | if (rule && |
2141 | 40 | (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd || |
2142 | 40 | rule->opcode == CTO_SwapDd)) { |
2143 | 40 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_swap)) |
2144 | 0 | return 0; |
2145 | 40 | if (!appendInstructionChar( |
2146 | 40 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2147 | 0 | return 0; |
2148 | 40 | if (!appendInstructionChar( |
2149 | 40 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2150 | 0 | return 0; |
2151 | 40 | goto getRange; |
2152 | 40 | } |
2153 | 0 | compileError(file, "%s is neither a class name nor a swap name.", |
2154 | 0 | _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
2155 | 0 | return 0; |
2156 | 40 | } |
2157 | 780 | case pass_endTest: |
2158 | 780 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_endTest)) |
2159 | 0 | return 0; |
2160 | 780 | if (replacing) { |
2161 | 0 | compileError(file, "expected end of replacement"); |
2162 | 0 | return 0; |
2163 | 0 | } |
2164 | 780 | passLinepos++; |
2165 | 780 | break; |
2166 | 3 | default: |
2167 | 3 | compileError(file, "incorrect operator '%c ' in test part", |
2168 | 3 | passLine.chars[passLinepos]); |
2169 | 3 | return 0; |
2170 | 3.46k | } |
2171 | | |
2172 | 3.46k | } /* Compile action part */ |
2173 | | |
2174 | | /* Compile action part */ |
2175 | 4.54k | while (passLinepos < passLine.length && passLine.chars[passLinepos] <= 32) |
2176 | 3.80k | passLinepos++; |
2177 | 2.36k | while (passLinepos < passLine.length && passLine.chars[passLinepos] > 32) { |
2178 | 1.63k | if (passIC >= MAXSTRING) { |
2179 | 0 | compileError(file, "Action part in multipass operand too long"); |
2180 | 0 | return 0; |
2181 | 0 | } |
2182 | 1.63k | switch ((passSubOp = passLine.chars[passLinepos])) { |
2183 | 288 | case pass_string: |
2184 | 288 | if (!verifyStringOrDots(file, opcode, 1, 1, nofor)) { |
2185 | 0 | return 0; |
2186 | 0 | } |
2187 | 288 | passLinepos++; |
2188 | 288 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_string)) |
2189 | 0 | return 0; |
2190 | 288 | passGetString(&passLine, &passLinepos, &passHoldString, file); |
2191 | 288 | goto actionDoCharsDots; |
2192 | 122 | case pass_dots: |
2193 | 122 | if (!verifyStringOrDots(file, opcode, 0, 1, nofor)) { |
2194 | 0 | return 0; |
2195 | 0 | } |
2196 | 122 | passLinepos++; |
2197 | 122 | passGetDots(&passLine, &passLinepos, &passHoldString, file); |
2198 | 122 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_dots)) |
2199 | 0 | return 0; |
2200 | 122 | if (passHoldString.length == 0) { |
2201 | 0 | compileError(file, "expected dot pattern after @ operand in action part"); |
2202 | 0 | return 0; |
2203 | 0 | } |
2204 | 410 | actionDoCharsDots: |
2205 | 410 | if (passIC >= MAXSTRING) { |
2206 | 0 | compileError( |
2207 | 0 | file, "@ operand in action part of multipass operand too long"); |
2208 | 0 | return 0; |
2209 | 0 | } |
2210 | 410 | if (!appendInstructionChar( |
2211 | 410 | file, passInstructions, &passIC, passHoldString.length)) |
2212 | 0 | return 0; |
2213 | 9.69k | for (kk = 0; kk < passHoldString.length; kk++) { |
2214 | 9.28k | if (passIC >= MAXSTRING) { |
2215 | 0 | compileError(file, |
2216 | 0 | "@ operand in action part of multipass operand too long"); |
2217 | 0 | return 0; |
2218 | 0 | } |
2219 | 9.28k | if (!appendInstructionChar( |
2220 | 9.28k | file, passInstructions, &passIC, passHoldString.chars[kk])) |
2221 | 0 | return 0; |
2222 | 9.28k | } |
2223 | 410 | break; |
2224 | 410 | case pass_variable: |
2225 | 108 | passLinepos++; |
2226 | 108 | if (!passGetVariableNumber(file, &passLine, &passLinepos, &passHoldNumber)) |
2227 | 2 | return 0; |
2228 | 106 | switch (passLine.chars[passLinepos]) { |
2229 | 69 | case pass_eq: |
2230 | 69 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_eq)) |
2231 | 0 | return 0; |
2232 | 69 | if (!appendInstructionChar( |
2233 | 69 | file, passInstructions, &passIC, passHoldNumber)) |
2234 | 0 | return 0; |
2235 | 69 | passLinepos++; |
2236 | 69 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2237 | 69 | if (!appendInstructionChar( |
2238 | 69 | file, passInstructions, &passIC, passHoldNumber)) |
2239 | 0 | return 0; |
2240 | 69 | break; |
2241 | 69 | case pass_plus: |
2242 | 37 | case pass_hyphen: |
2243 | 37 | if (!appendInstructionChar(file, passInstructions, &passIC, |
2244 | 37 | passLine.chars[passLinepos++])) |
2245 | 0 | return 0; |
2246 | 37 | if (!appendInstructionChar( |
2247 | 37 | file, passInstructions, &passIC, passHoldNumber)) |
2248 | 0 | return 0; |
2249 | 37 | break; |
2250 | 37 | default: |
2251 | 0 | compileError(file, "incorrect variable operator in action part"); |
2252 | 0 | return 0; |
2253 | 106 | } |
2254 | 106 | break; |
2255 | 615 | case pass_copy: |
2256 | 615 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_copy)) |
2257 | 0 | return 0; |
2258 | 615 | passLinepos++; |
2259 | 615 | break; |
2260 | 205 | case pass_omit: |
2261 | 205 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_omit)) |
2262 | 0 | return 0; |
2263 | 205 | passLinepos++; |
2264 | 205 | break; |
2265 | 62 | case pass_groupreplace: |
2266 | 184 | case pass_groupstart: |
2267 | 195 | case pass_groupend: { |
2268 | 195 | passLinepos++; |
2269 | 195 | passGetName(&passLine, &passLinepos, &passHoldString); |
2270 | 195 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2271 | 195 | if (ruleOffset) |
2272 | 7 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2273 | 195 | if (rule && rule->opcode == CTO_Grouping) { |
2274 | 195 | if (!appendInstructionChar(file, passInstructions, &passIC, passSubOp)) |
2275 | 0 | return 0; |
2276 | 195 | if (!appendInstructionChar( |
2277 | 195 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2278 | 0 | return 0; |
2279 | 195 | if (!appendInstructionChar( |
2280 | 195 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2281 | 0 | return 0; |
2282 | 195 | break; |
2283 | 195 | } |
2284 | 0 | compileError(file, "%s is not a grouping name", |
2285 | 0 | _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
2286 | 0 | return 0; |
2287 | 195 | } |
2288 | 94 | case pass_swap: { |
2289 | 94 | passLinepos++; |
2290 | 94 | passGetName(&passLine, &passLinepos, &passHoldString); |
2291 | 94 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2292 | 94 | if (ruleOffset) |
2293 | 17 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2294 | 94 | if (rule && |
2295 | 94 | (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd || |
2296 | 94 | rule->opcode == CTO_SwapDd)) { |
2297 | 94 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_swap)) |
2298 | 0 | return 0; |
2299 | 94 | if (!appendInstructionChar( |
2300 | 94 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2301 | 0 | return 0; |
2302 | 94 | if (!appendInstructionChar( |
2303 | 94 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2304 | 0 | return 0; |
2305 | 94 | break; |
2306 | 94 | } |
2307 | 0 | compileError(file, "%s is not a swap name.", |
2308 | 0 | _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
2309 | 0 | return 0; |
2310 | 0 | break; |
2311 | 94 | } |
2312 | 7 | default: |
2313 | 7 | compileError(file, "incorrect operator in action part"); |
2314 | 7 | return 0; |
2315 | 1.63k | } |
2316 | 1.63k | } |
2317 | | |
2318 | | /* Analyze and add rule */ |
2319 | 731 | passRuleDots.length = passIC; |
2320 | | |
2321 | 731 | { |
2322 | 731 | widechar *characters; |
2323 | 731 | int length; |
2324 | 731 | int found = passFindCharacters( |
2325 | 731 | file, passInstructions, passRuleDots.length, &characters, &length); |
2326 | | |
2327 | 731 | if (!found) return 0; |
2328 | | |
2329 | 731 | if (characters) { |
2330 | 2.44k | for (k = 0; k < length; k += 1) passRuleChars.chars[k] = characters[k]; |
2331 | 68 | passRuleChars.length = k; |
2332 | 68 | } |
2333 | 731 | } |
2334 | | |
2335 | 731 | if (!addRule(file, opcode, &passRuleChars, &passRuleDots, 0, 0, NULL, NULL, noback, |
2336 | 731 | nofor, table)) |
2337 | 0 | return 0; |
2338 | 731 | return 1; |
2339 | 731 | } |
2340 | | |
2341 | | /* End of multipass compiler */ |
2342 | | |
2343 | | static int |
2344 | | compileBrailleIndicator(FileInfo *file, const char *ermsg, TranslationTableOpcode opcode, |
2345 | | TranslationTableOffset *ruleOffset, int noback, int nofor, |
2346 | 365 | TranslationTableHeader **table) { |
2347 | 365 | CharsString token; |
2348 | 365 | CharsString cells; |
2349 | 365 | if (!getToken(file, &token, ermsg)) return 0; |
2350 | 364 | if (!parseDots(file, &cells, &token)) return 0; |
2351 | 363 | return addRule( |
2352 | 363 | file, opcode, NULL, &cells, 0, 0, ruleOffset, NULL, noback, nofor, table); |
2353 | 364 | } |
2354 | | |
2355 | | static int |
2356 | 35 | compileNumber(FileInfo *file) { |
2357 | 35 | CharsString token; |
2358 | 35 | widechar number; |
2359 | 35 | if (!getToken(file, &token, "number")) return 0; |
2360 | 35 | getNumber(&token.chars[0], &number); |
2361 | 35 | if (!(number > 0)) { |
2362 | 0 | compileError(file, "a nonzero positive number is required"); |
2363 | 0 | return 0; |
2364 | 0 | } |
2365 | 35 | return number; |
2366 | 35 | } |
2367 | | |
2368 | | static int |
2369 | | compileGrouping(FileInfo *file, int noback, int nofor, TranslationTableHeader **table, |
2370 | 28 | DisplayTableHeader **displayTable) { |
2371 | 28 | int k; |
2372 | 28 | CharsString name; |
2373 | 28 | CharsString groupChars; |
2374 | 28 | CharsString groupDots; |
2375 | 28 | CharsString dotsParsed; |
2376 | 28 | if (!getToken(file, &name, "name operand")) return 0; |
2377 | 28 | if (!getRuleCharsText(file, &groupChars)) return 0; |
2378 | 28 | if (!getToken(file, &groupDots, "dots operand")) return 0; |
2379 | 76 | for (k = 0; k < groupDots.length && groupDots.chars[k] != ','; k++) |
2380 | 48 | ; |
2381 | 28 | if (k == groupDots.length) { |
2382 | 0 | compileError(file, "Dots operand must consist of two cells separated by a comma"); |
2383 | 0 | return 0; |
2384 | 0 | } |
2385 | 28 | groupDots.chars[k] = '-'; |
2386 | 28 | if (!parseDots(file, &dotsParsed, &groupDots)) return 0; |
2387 | 28 | if (groupChars.length != 2 || dotsParsed.length != 2) { |
2388 | 0 | compileError(file, |
2389 | 0 | "two Unicode characters and two cells separated by a comma are needed."); |
2390 | 0 | return 0; |
2391 | 0 | } |
2392 | 28 | if (table) { |
2393 | 28 | TranslationTableOffset ruleOffset; |
2394 | 28 | TranslationTableCharacter *charsDotsPtr; |
2395 | 28 | charsDotsPtr = |
2396 | 28 | putChar(file, groupChars.chars[0], table, NULL, (*table)->ruleCounter); |
2397 | 28 | charsDotsPtr->attributes |= CTC_Math; |
2398 | 28 | charsDotsPtr = |
2399 | 28 | putChar(file, groupChars.chars[1], table, NULL, (*table)->ruleCounter); |
2400 | 28 | charsDotsPtr->attributes |= CTC_Math; |
2401 | 28 | charsDotsPtr = putDots(file, dotsParsed.chars[0], table, (*table)->ruleCounter); |
2402 | 28 | charsDotsPtr->attributes |= CTC_Math; |
2403 | 28 | charsDotsPtr = putDots(file, dotsParsed.chars[1], table, (*table)->ruleCounter); |
2404 | 28 | charsDotsPtr->attributes |= CTC_Math; |
2405 | 28 | if (!addRule(file, CTO_Grouping, &groupChars, &dotsParsed, 0, 0, &ruleOffset, |
2406 | 28 | NULL, noback, nofor, table)) |
2407 | 0 | return 0; |
2408 | 28 | if (!addRuleName(file, &name, ruleOffset, *table)) return 0; |
2409 | 28 | } |
2410 | 27 | if (displayTable) { |
2411 | 27 | putCharDotsMapping(file, groupChars.chars[0], dotsParsed.chars[0], displayTable); |
2412 | 27 | putCharDotsMapping(file, groupChars.chars[1], dotsParsed.chars[1], displayTable); |
2413 | 27 | } |
2414 | 27 | if (table) { |
2415 | 27 | widechar endChar; |
2416 | 27 | widechar endDots; |
2417 | 27 | endChar = groupChars.chars[1]; |
2418 | 27 | endDots = dotsParsed.chars[1]; |
2419 | 27 | groupChars.length = dotsParsed.length = 1; |
2420 | 27 | if (!addRule(file, CTO_Math, &groupChars, &dotsParsed, 0, 0, NULL, NULL, noback, |
2421 | 27 | nofor, table)) |
2422 | 0 | return 0; |
2423 | 27 | groupChars.chars[0] = endChar; |
2424 | 27 | dotsParsed.chars[0] = endDots; |
2425 | 27 | if (!addRule(file, CTO_Math, &groupChars, &dotsParsed, 0, 0, NULL, NULL, noback, |
2426 | 27 | nofor, table)) |
2427 | 0 | return 0; |
2428 | 27 | } |
2429 | 27 | return 1; |
2430 | 27 | } |
2431 | | |
2432 | | /* Functions for compiling hyphenation tables */ |
2433 | | |
2434 | | typedef struct HyphenDict { /* hyphenation dictionary: finite state machine */ |
2435 | | int numStates; |
2436 | | HyphenationState *states; |
2437 | | } HyphenDict; |
2438 | | |
2439 | 3.92M | #define DEFAULTSTATE 0xffff |
2440 | 2.30M | #define HYPHENHASHSIZE 8191 |
2441 | | |
2442 | | typedef struct HyphenHashEntry { |
2443 | | struct HyphenHashEntry *next; |
2444 | | CharsString *key; |
2445 | | int val; |
2446 | | } HyphenHashEntry; |
2447 | | |
2448 | | typedef struct HyphenHashTab { |
2449 | | HyphenHashEntry *entries[HYPHENHASHSIZE]; |
2450 | | } HyphenHashTab; |
2451 | | |
2452 | | /* a hash function from ASU - adapted from Gtk+ */ |
2453 | | static unsigned int |
2454 | 1.96M | hyphenStringHash(const CharsString *s) { |
2455 | 1.96M | int k; |
2456 | 1.96M | unsigned int h = 0, g; |
2457 | 567M | for (k = 0; k < s->length; k++) { |
2458 | 565M | h = (h << 4) + s->chars[k]; |
2459 | 565M | if ((g = h & 0xf0000000)) { |
2460 | 505M | h = h ^ (g >> 24); |
2461 | 505M | h = h ^ g; |
2462 | 505M | } |
2463 | 565M | } |
2464 | 1.96M | return h; |
2465 | 1.96M | } |
2466 | | |
2467 | | static HyphenHashTab * |
2468 | 21 | hyphenHashNew(void) { |
2469 | 21 | HyphenHashTab *hashTab; |
2470 | 21 | if (!(hashTab = malloc(sizeof(HyphenHashTab)))) _lou_outOfMemory(); |
2471 | 21 | memset(hashTab, 0, sizeof(HyphenHashTab)); |
2472 | 21 | return hashTab; |
2473 | 21 | } |
2474 | | |
2475 | | static void |
2476 | 21 | hyphenHashFree(HyphenHashTab *hashTab) { |
2477 | 21 | int i; |
2478 | 21 | HyphenHashEntry *e, *next; |
2479 | 172k | for (i = 0; i < HYPHENHASHSIZE; i++) |
2480 | 178k | for (e = hashTab->entries[i]; e; e = next) { |
2481 | 6.79k | next = e->next; |
2482 | 6.79k | free(e->key); |
2483 | 6.79k | free(e); |
2484 | 6.79k | } |
2485 | 21 | free(hashTab); |
2486 | 21 | } |
2487 | | |
2488 | | /* assumes that key is not already present! */ |
2489 | | static void |
2490 | 6.79k | hyphenHashInsert(HyphenHashTab *hashTab, const CharsString *key, int val) { |
2491 | 6.79k | int i, j; |
2492 | 6.79k | HyphenHashEntry *e; |
2493 | 6.79k | i = hyphenStringHash(key) % HYPHENHASHSIZE; |
2494 | 6.79k | if (!(e = malloc(sizeof(HyphenHashEntry)))) _lou_outOfMemory(); |
2495 | 6.79k | e->next = hashTab->entries[i]; |
2496 | 6.79k | e->key = malloc((key->length + 1) * CHARSIZE); |
2497 | 6.79k | if (!e->key) _lou_outOfMemory(); |
2498 | 6.79k | e->key->length = key->length; |
2499 | 2.00M | for (j = 0; j < key->length; j++) e->key->chars[j] = key->chars[j]; |
2500 | 6.79k | e->val = val; |
2501 | 6.79k | hashTab->entries[i] = e; |
2502 | 6.79k | } |
2503 | | |
2504 | | /* return val if found, otherwise DEFAULTSTATE */ |
2505 | | static int |
2506 | 1.96M | hyphenHashLookup(HyphenHashTab *hashTab, const CharsString *key) { |
2507 | 1.96M | int i, j; |
2508 | 1.96M | HyphenHashEntry *e; |
2509 | 1.96M | if (key->length == 0) return 0; |
2510 | 1.95M | i = hyphenStringHash(key) % HYPHENHASHSIZE; |
2511 | 2.18M | for (e = hashTab->entries[i]; e; e = e->next) { |
2512 | 235k | if (key->length != e->key->length) continue; |
2513 | 48.5k | for (j = 0; j < key->length; j++) |
2514 | 45.4k | if (key->chars[j] != e->key->chars[j]) break; |
2515 | 3.30k | if (j == key->length) return e->val; |
2516 | 3.30k | } |
2517 | 1.95M | return DEFAULTSTATE; |
2518 | 1.95M | } |
2519 | | |
2520 | | static int |
2521 | 6.79k | hyphenGetNewState(HyphenDict *dict, HyphenHashTab *hashTab, const CharsString *string) { |
2522 | 6.79k | hyphenHashInsert(hashTab, string, dict->numStates); |
2523 | | /* predicate is true if dict->numStates is a power of two */ |
2524 | 6.79k | if (!(dict->numStates & (dict->numStates - 1))) |
2525 | 161 | dict->states = |
2526 | 161 | realloc(dict->states, (dict->numStates << 1) * sizeof(HyphenationState)); |
2527 | 6.79k | if (!dict->states) _lou_outOfMemory(); |
2528 | 6.79k | dict->states[dict->numStates].hyphenPattern = 0; |
2529 | 6.79k | dict->states[dict->numStates].fallbackState = DEFAULTSTATE; |
2530 | 6.79k | dict->states[dict->numStates].numTrans = 0; |
2531 | 6.79k | dict->states[dict->numStates].trans.pointer = NULL; |
2532 | 6.79k | return dict->numStates++; |
2533 | 6.79k | } |
2534 | | |
2535 | | /* add a transition from state1 to state2 through ch - assumes that the |
2536 | | * transition does not already exist */ |
2537 | | static void |
2538 | 6.79k | hyphenAddTrans(HyphenDict *dict, int state1, int state2, widechar ch) { |
2539 | 6.79k | int numTrans; |
2540 | 6.79k | numTrans = dict->states[state1].numTrans; |
2541 | 6.79k | if (numTrans == 0) |
2542 | 6.70k | dict->states[state1].trans.pointer = malloc(sizeof(HyphenationTrans)); |
2543 | 87 | else if (!(numTrans & (numTrans - 1))) |
2544 | 70 | dict->states[state1].trans.pointer = realloc(dict->states[state1].trans.pointer, |
2545 | 70 | (numTrans << 1) * sizeof(HyphenationTrans)); |
2546 | 6.79k | dict->states[state1].trans.pointer[numTrans].ch = ch; |
2547 | 6.79k | dict->states[state1].trans.pointer[numTrans].newState = state2; |
2548 | 6.79k | dict->states[state1].numTrans++; |
2549 | 6.79k | } |
2550 | | |
2551 | | static int |
2552 | | compileHyphenation( |
2553 | 21 | FileInfo *file, CharsString *encoding, TranslationTableHeader **table) { |
2554 | 21 | CharsString hyph; |
2555 | 21 | HyphenationTrans *holdPointer; |
2556 | 21 | HyphenHashTab *hashTab; |
2557 | 21 | CharsString word; |
2558 | 21 | char pattern[MAXSTRING + 1]; |
2559 | 21 | unsigned int stateNum = 0, lastState = 0; |
2560 | 21 | int i, j, k = encoding->length; |
2561 | 21 | widechar ch; |
2562 | 21 | int found; |
2563 | 21 | HyphenHashEntry *e; |
2564 | 21 | HyphenDict dict; |
2565 | 21 | TranslationTableOffset holdOffset; |
2566 | | /* Set aside enough space for hyphenation states and transitions in |
2567 | | * translation table. Must be done before anything else */ |
2568 | 21 | allocateSpaceInTranslationTable(file, NULL, 250000, table); |
2569 | 21 | hashTab = hyphenHashNew(); |
2570 | 21 | dict.numStates = 1; |
2571 | 21 | dict.states = malloc(sizeof(HyphenationState)); |
2572 | 21 | if (!dict.states) _lou_outOfMemory(); |
2573 | 21 | dict.states[0].hyphenPattern = 0; |
2574 | 21 | dict.states[0].fallbackState = DEFAULTSTATE; |
2575 | 21 | dict.states[0].numTrans = 0; |
2576 | 21 | dict.states[0].trans.pointer = NULL; |
2577 | 251 | do { |
2578 | 251 | if (encoding->chars[0] == 'I') { |
2579 | 36 | if (!getToken(file, &hyph, NULL)) continue; |
2580 | 215 | } else { |
2581 | | /* UTF-8 */ |
2582 | 215 | if (!getToken(file, &word, NULL)) continue; |
2583 | 206 | parseChars(file, &hyph, &word); |
2584 | 206 | } |
2585 | 238 | if (hyph.length == 0 || hyph.chars[0] == '#' || hyph.chars[0] == '%' || |
2586 | 211 | hyph.chars[0] == '<') |
2587 | 28 | continue; /* comment */ |
2588 | 210 | j = 0; |
2589 | 210 | pattern[j] = '0'; |
2590 | 7.94k | for (i = 0; i < hyph.length; i++) { |
2591 | 7.73k | if (hyph.chars[i] >= '0' && hyph.chars[i] <= '9') |
2592 | 53 | pattern[j] = (char)hyph.chars[i]; |
2593 | 7.68k | else { |
2594 | 7.68k | word.chars[j] = hyph.chars[i]; |
2595 | 7.68k | pattern[++j] = '0'; |
2596 | 7.68k | } |
2597 | 7.73k | } |
2598 | 210 | word.chars[j] = 0; |
2599 | 210 | word.length = j; |
2600 | 210 | pattern[j + 1] = 0; |
2601 | 4.83k | for (i = 0; pattern[i] == '0'; i++) |
2602 | 4.62k | ; |
2603 | 210 | found = hyphenHashLookup(hashTab, &word); |
2604 | 210 | if (found != DEFAULTSTATE) |
2605 | 98 | stateNum = found; |
2606 | 112 | else |
2607 | 112 | stateNum = hyphenGetNewState(&dict, hashTab, &word); |
2608 | 210 | k = j + 2 - i; |
2609 | 210 | if (k > 0) { |
2610 | 210 | allocateSpaceInTranslationTable( |
2611 | 210 | file, &dict.states[stateNum].hyphenPattern, k, table); |
2612 | 210 | memcpy(&(*table)->ruleArea[dict.states[stateNum].hyphenPattern], &pattern[i], |
2613 | 210 | k); |
2614 | 210 | } |
2615 | | /* now, put in the prefix transitions */ |
2616 | 7.00k | while (found == DEFAULTSTATE) { |
2617 | 6.79k | lastState = stateNum; |
2618 | 6.79k | ch = word.chars[word.length-- - 1]; |
2619 | 6.79k | found = hyphenHashLookup(hashTab, &word); |
2620 | 6.79k | if (found != DEFAULTSTATE) |
2621 | 112 | stateNum = found; |
2622 | 6.68k | else |
2623 | 6.68k | stateNum = hyphenGetNewState(&dict, hashTab, &word); |
2624 | 6.79k | hyphenAddTrans(&dict, stateNum, lastState, ch); |
2625 | 6.79k | } |
2626 | 251 | } while (_lou_getALine(file)); |
2627 | | /* put in the fallback states */ |
2628 | 172k | for (i = 0; i < HYPHENHASHSIZE; i++) { |
2629 | 178k | for (e = hashTab->entries[i]; e; e = e->next) { |
2630 | 1.95M | for (j = 1; j <= e->key->length; j++) { |
2631 | 1.95M | word.length = 0; |
2632 | 563M | for (k = j; k < e->key->length; k++) |
2633 | 561M | word.chars[word.length++] = e->key->chars[k]; |
2634 | 1.95M | stateNum = hyphenHashLookup(hashTab, &word); |
2635 | 1.95M | if (stateNum != DEFAULTSTATE) break; |
2636 | 1.95M | } |
2637 | 6.79k | if (e->val) dict.states[e->val].fallbackState = stateNum; |
2638 | 6.79k | } |
2639 | 172k | } |
2640 | 21 | hyphenHashFree(hashTab); |
2641 | | /* Transfer hyphenation information to table */ |
2642 | 6.83k | for (i = 0; i < dict.numStates; i++) { |
2643 | 6.81k | if (dict.states[i].numTrans == 0) |
2644 | 108 | dict.states[i].trans.offset = 0; |
2645 | 6.70k | else { |
2646 | 6.70k | holdPointer = dict.states[i].trans.pointer; |
2647 | 6.70k | allocateSpaceInTranslationTable(file, &dict.states[i].trans.offset, |
2648 | 6.70k | dict.states[i].numTrans * sizeof(HyphenationTrans), table); |
2649 | 6.70k | memcpy(&(*table)->ruleArea[dict.states[i].trans.offset], holdPointer, |
2650 | 6.70k | dict.states[i].numTrans * sizeof(HyphenationTrans)); |
2651 | 6.70k | free(holdPointer); |
2652 | 6.70k | } |
2653 | 6.81k | } |
2654 | 21 | allocateSpaceInTranslationTable( |
2655 | 21 | file, &holdOffset, dict.numStates * sizeof(HyphenationState), table); |
2656 | 21 | (*table)->hyphenStatesArray = holdOffset; |
2657 | | /* Prevents segmentation fault if table is reallocated */ |
2658 | 21 | memcpy(&(*table)->ruleArea[(*table)->hyphenStatesArray], &dict.states[0], |
2659 | 21 | dict.numStates * sizeof(HyphenationState)); |
2660 | 21 | free(dict.states); |
2661 | 21 | return 1; |
2662 | 21 | } |
2663 | | |
2664 | | static int |
2665 | | compileCharDef(FileInfo *file, TranslationTableOpcode opcode, |
2666 | | TranslationTableCharacterAttributes attributes, int noback, int nofor, |
2667 | 808 | TranslationTableHeader **table, DisplayTableHeader **displayTable) { |
2668 | 808 | CharsString ruleChars; |
2669 | 808 | CharsString ruleDots; |
2670 | 808 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
2671 | 808 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
2672 | 808 | if (ruleChars.length != 1) { |
2673 | 0 | compileError(file, "Exactly one character is required."); |
2674 | 0 | return 0; |
2675 | 0 | } |
2676 | 808 | if (ruleDots.length < 1) { |
2677 | 0 | compileError(file, "At least one cell is required."); |
2678 | 0 | return 0; |
2679 | 0 | } |
2680 | 808 | if (table) { |
2681 | 808 | TranslationTableCharacter *character; |
2682 | 808 | TranslationTableCharacter *cell = NULL; |
2683 | 808 | int k; |
2684 | 808 | if (attributes & (CTC_UpperCase | CTC_LowerCase)) attributes |= CTC_Letter; |
2685 | 808 | character = putChar(file, ruleChars.chars[0], table, NULL, (*table)->ruleCounter); |
2686 | 808 | character->attributes |= attributes; |
2687 | 1.66k | for (k = ruleDots.length - 1; k >= 0; k -= 1) { |
2688 | 857 | cell = getDots(ruleDots.chars[k], *table); |
2689 | 857 | if (!cell) |
2690 | 785 | cell = putDots(file, ruleDots.chars[k], table, (*table)->ruleCounter); |
2691 | 857 | } |
2692 | 808 | if (ruleDots.length == 1) cell->attributes |= attributes; |
2693 | 808 | } |
2694 | 808 | if (displayTable && ruleDots.length == 1) |
2695 | 783 | putCharDotsMapping(file, ruleChars.chars[0], ruleDots.chars[0], displayTable); |
2696 | 808 | if (table) |
2697 | 808 | if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, NULL, NULL, noback, nofor, |
2698 | 808 | table)) |
2699 | 0 | return 0; |
2700 | 808 | return 1; |
2701 | 808 | } |
2702 | | |
2703 | | static int |
2704 | 8 | compileBeforeAfter(FileInfo *file) { |
2705 | | /* 1=before, 2=after, 0=error */ |
2706 | 8 | CharsString token; |
2707 | 8 | CharsString tmp; |
2708 | 8 | if (!getToken(file, &token, "last word before or after")) return 0; |
2709 | 8 | if (!parseChars(file, &tmp, &token)) return 0; |
2710 | 8 | if (eqasc2uni((unsigned char *)"before", tmp.chars, 6)) |
2711 | 3 | return 1; |
2712 | 5 | else if (eqasc2uni((unsigned char *)"after", tmp.chars, 5)) |
2713 | 4 | return 2; |
2714 | 1 | return 0; |
2715 | 8 | } |
2716 | | |
2717 | | /** |
2718 | | * Macro |
2719 | | */ |
2720 | | typedef struct { |
2721 | | const char *name; |
2722 | | const widechar *definition; // fixed part |
2723 | | int definition_length; |
2724 | | const int *substitutions; // variable part: position and argument index of each |
2725 | | // variable substitution |
2726 | | int substitution_count; |
2727 | | int argument_count; // number of expected arguments |
2728 | | } Macro; |
2729 | | |
2730 | | /** |
2731 | | * List of in-scope macros |
2732 | | */ |
2733 | | typedef struct MacroList { |
2734 | | const Macro *head; |
2735 | | const struct MacroList *tail; |
2736 | | } MacroList; |
2737 | | |
2738 | | /** |
2739 | | * Create new macro. |
2740 | | */ |
2741 | | static const Macro * |
2742 | | create_macro(const char *name, const widechar *definition, int definition_length, |
2743 | 0 | const int *substitutions, int substitution_count, int argument_count) { |
2744 | 0 | Macro *m = malloc(sizeof(Macro)); |
2745 | 0 | m->name = strdup(name); |
2746 | 0 | widechar *definition_copy = malloc(definition_length * sizeof(widechar)); |
2747 | 0 | memcpy(definition_copy, definition, definition_length * sizeof(widechar)); |
2748 | 0 | m->definition = definition_copy; |
2749 | 0 | m->definition_length = definition_length; |
2750 | 0 | int *substitutions_copy = malloc(2 * substitution_count * sizeof(int)); |
2751 | 0 | memcpy(substitutions_copy, substitutions, 2 * substitution_count * sizeof(int)); |
2752 | 0 | m->substitutions = substitutions_copy; |
2753 | 0 | m->substitution_count = substitution_count; |
2754 | 0 | m->argument_count = argument_count; |
2755 | 0 | return m; |
2756 | 0 | } |
2757 | | |
2758 | | /** |
2759 | | * Create new macro list from "head" macro and "tail" list. |
2760 | | */ |
2761 | | static const MacroList * |
2762 | 0 | cons_macro(const Macro *head, const MacroList *tail) { |
2763 | 0 | MacroList *list = malloc(sizeof(MacroList)); |
2764 | 0 | list->head = head; |
2765 | 0 | list->tail = tail; |
2766 | 0 | return list; |
2767 | 0 | } |
2768 | | |
2769 | | /** |
2770 | | * Free macro returned by create_macro. |
2771 | | */ |
2772 | | static void |
2773 | 0 | free_macro(const Macro *macro) { |
2774 | 0 | if (macro) { |
2775 | 0 | free((char *)macro->name); |
2776 | 0 | free((char *)macro->definition); |
2777 | 0 | free((int *)macro->substitutions); |
2778 | 0 | free((Macro *)macro); |
2779 | 0 | } |
2780 | 0 | } |
2781 | | |
2782 | | /** |
2783 | | * Free macro list returned by cons_macro. |
2784 | | */ |
2785 | | static void |
2786 | 699 | free_macro_list(const MacroList *list) { |
2787 | 699 | if (list) { |
2788 | 0 | free_macro((Macro *)list->head); |
2789 | 0 | free_macro_list((MacroList *)list->tail); |
2790 | 0 | free((MacroList *)list); |
2791 | 0 | } |
2792 | 699 | } |
2793 | | |
2794 | | /** |
2795 | | * Compile macro |
2796 | | */ |
2797 | | static int |
2798 | 0 | compileMacro(FileInfo *file, const Macro **macro) { |
2799 | 0 |
|
2800 | 0 | // parse name |
2801 | 0 | CharsString token; |
2802 | 0 | if (!getToken(file, &token, "macro name")) return 0; |
2803 | 0 | switch (getOpcode(file, &token)) { |
2804 | 0 | case CTO_UpLow: // deprecated so "uplow" may be used as macro name |
2805 | 0 | case CTO_None: |
2806 | 0 | break; |
2807 | 0 | default: |
2808 | 0 | compileError(file, "Invalid macro name: already taken by an opcode"); |
2809 | 0 | return 0; |
2810 | 0 | } |
2811 | 0 | for (int i = 0; i < token.length; i++) { |
2812 | 0 | if (!((token.chars[i] >= 'a' && token.chars[i] <= 'z') || |
2813 | 0 | (token.chars[i] >= 'A' && token.chars[i] <= 'Z') || |
2814 | 0 | (token.chars[i] >= '0' && token.chars[i] <= '9'))) { |
2815 | 0 | compileError(file, |
2816 | 0 | "Invalid macro name: must be a word containing only letters and " |
2817 | 0 | "digits"); |
2818 | 0 | return 0; |
2819 | 0 | } |
2820 | 0 | } |
2821 | 0 | static char name[MAXSTRING + 1]; |
2822 | 0 | int name_length; |
2823 | 0 | for (name_length = 0; name_length < token.length; |
2824 | 0 | name_length++) // we know token can not be longer than MAXSTRING |
2825 | 0 | name[name_length] = (char)token.chars[name_length]; |
2826 | 0 | name[name_length] = '\0'; |
2827 | 0 |
|
2828 | 0 | // parse body |
2829 | 0 | static widechar definition[MAXSTRING]; |
2830 | 0 | static int substitutions[2 * MAX_MACRO_VAR]; |
2831 | 0 | int definition_length = 0; |
2832 | 0 | int substitution_count = 0; |
2833 | 0 | int argument_count = 0; |
2834 | 0 | int dollar = 0; |
2835 | 0 |
|
2836 | 0 | // ignore rest of line after name and read lines until "eom" is encountered |
2837 | 0 | while (_lou_getALine(file)) { |
2838 | 0 | if (file->linelen >= 3 && file->line[0] == 'e' && file->line[1] == 'o' && |
2839 | 0 | file->line[2] == 'm') { |
2840 | 0 | *macro = create_macro(name, definition, definition_length, substitutions, |
2841 | 0 | substitution_count, argument_count); |
2842 | 0 | return 1; |
2843 | 0 | } |
2844 | 0 | while (!atEndOfLine(file)) { |
2845 | 0 | widechar c = file->line[file->linepos++]; |
2846 | 0 | if (dollar) { |
2847 | 0 | dollar = 0; |
2848 | 0 | if (c >= '0' && c <= '9') { |
2849 | 0 | if (substitution_count >= MAX_MACRO_VAR) { |
2850 | 0 | compileError(file, |
2851 | 0 | "Macro can not have more than %d variable substitutions", |
2852 | 0 | MAXSTRING); |
2853 | 0 | return 0; |
2854 | 0 | } |
2855 | 0 | substitutions[2 * substitution_count] = definition_length; |
2856 | 0 | int arg = c - '0'; |
2857 | 0 | substitutions[2 * substitution_count + 1] = arg; |
2858 | 0 | if (arg > argument_count) argument_count = arg; |
2859 | 0 | substitution_count++; |
2860 | 0 | continue; |
2861 | 0 | } |
2862 | 0 | } else if (c == '$') { |
2863 | 0 | dollar = 1; |
2864 | 0 | continue; |
2865 | 0 | } |
2866 | 0 | if (definition_length >= MAXSTRING) { |
2867 | 0 | compileError(file, "Macro exceeds %d characters", MAXSTRING); |
2868 | 0 | return 0; |
2869 | 0 | } else |
2870 | 0 | definition[definition_length++] = c; |
2871 | 0 | } |
2872 | 0 | dollar = 0; |
2873 | 0 | if (definition_length >= MAXSTRING) { |
2874 | 0 | compileError(file, "Macro exceeds %d characters", MAXSTRING); |
2875 | 0 | return 0; |
2876 | 0 | } |
2877 | 0 | definition[definition_length++] = '\n'; |
2878 | 0 | } |
2879 | 0 | compileError(file, "macro must be terminated with 'eom'"); |
2880 | 0 | return 0; |
2881 | 0 | } |
2882 | | |
2883 | | static int |
2884 | | compileRule(FileInfo *file, TranslationTableHeader **table, |
2885 | 5.58k | DisplayTableHeader **displayTable, const MacroList **inScopeMacros) { |
2886 | 5.58k | CharsString token; |
2887 | 5.58k | TranslationTableOpcode opcode; |
2888 | 5.58k | CharsString ruleChars; |
2889 | 5.58k | CharsString ruleDots; |
2890 | 5.58k | CharsString cells; |
2891 | 5.58k | CharsString scratchPad; |
2892 | 5.58k | CharsString emphClass; |
2893 | 5.58k | TranslationTableCharacterAttributes after = 0; |
2894 | 5.58k | TranslationTableCharacterAttributes before = 0; |
2895 | 5.58k | int noback, nofor, nocross; |
2896 | 5.58k | noback = nofor = nocross = 0; |
2897 | 6.55k | doOpcode: |
2898 | 6.55k | if (!getToken(file, &token, NULL)) return 1; /* blank line */ |
2899 | 4.80k | if (token.chars[0] == '#' || token.chars[0] == '<') return 1; /* comment */ |
2900 | 4.54k | if (file->lineNumber == 1 && |
2901 | 1.70k | (eqasc2uni((unsigned char *)"ISO", token.chars, 3) || |
2902 | 1.69k | eqasc2uni((unsigned char *)"UTF-8", token.chars, 5))) { |
2903 | 21 | if (table) |
2904 | 21 | compileHyphenation(file, &token, table); |
2905 | 0 | else |
2906 | | /* ignore the whole file */ |
2907 | 0 | while (_lou_getALine(file)) |
2908 | 0 | ; |
2909 | 21 | return 1; |
2910 | 21 | } |
2911 | 4.51k | opcode = getOpcode(file, &token); |
2912 | 4.51k | switch (opcode) { |
2913 | 0 | case CTO_Macro: { |
2914 | 0 | const Macro *macro; |
2915 | | #ifdef ENABLE_MACROS |
2916 | | if (!inScopeMacros) { |
2917 | | compileError(file, "Defining macros only allowed in table files."); |
2918 | | return 0; |
2919 | | } |
2920 | | if (compileMacro(file, ¯o)) { |
2921 | | *inScopeMacros = cons_macro(macro, *inScopeMacros); |
2922 | | return 1; |
2923 | | } |
2924 | | return 0; |
2925 | | #else |
2926 | 0 | compileError(file, "Macro feature is disabled."); |
2927 | 0 | return 0; |
2928 | 0 | #endif |
2929 | 0 | } |
2930 | 2 | case CTO_IncludeFile: { |
2931 | 2 | CharsString includedFile; |
2932 | 2 | if (!getToken(file, &token, "include file name")) return 0; |
2933 | 2 | if (!parseChars(file, &includedFile, &token)) return 0; |
2934 | 2 | return includeFile(file, &includedFile, table, displayTable); |
2935 | 2 | } |
2936 | 488 | case CTO_NoBack: |
2937 | 488 | if (nofor) { |
2938 | 0 | compileError(file, "%s already specified.", _lou_findOpcodeName(CTO_NoFor)); |
2939 | 0 | return 0; |
2940 | 0 | } |
2941 | 488 | noback = 1; |
2942 | 488 | goto doOpcode; |
2943 | 411 | case CTO_NoFor: |
2944 | 411 | if (noback) { |
2945 | 0 | compileError(file, "%s already specified.", _lou_findOpcodeName(CTO_NoBack)); |
2946 | 0 | return 0; |
2947 | 0 | } |
2948 | 411 | nofor = 1; |
2949 | 411 | goto doOpcode; |
2950 | 697 | case CTO_Space: |
2951 | 697 | return compileCharDef( |
2952 | 697 | file, opcode, CTC_Space, noback, nofor, table, displayTable); |
2953 | 17 | case CTO_Digit: |
2954 | 17 | return compileCharDef( |
2955 | 17 | file, opcode, CTC_Digit, noback, nofor, table, displayTable); |
2956 | 15 | case CTO_LitDigit: |
2957 | 15 | return compileCharDef( |
2958 | 15 | file, opcode, CTC_LitDigit, noback, nofor, table, displayTable); |
2959 | 5 | case CTO_Punctuation: |
2960 | 5 | return compileCharDef( |
2961 | 5 | file, opcode, CTC_Punctuation, noback, nofor, table, displayTable); |
2962 | 0 | case CTO_Math: |
2963 | 0 | return compileCharDef(file, opcode, CTC_Math, noback, nofor, table, displayTable); |
2964 | 9 | case CTO_Sign: |
2965 | 9 | return compileCharDef(file, opcode, CTC_Sign, noback, nofor, table, displayTable); |
2966 | 16 | case CTO_Letter: |
2967 | 16 | return compileCharDef( |
2968 | 16 | file, opcode, CTC_Letter, noback, nofor, table, displayTable); |
2969 | 23 | case CTO_UpperCase: |
2970 | 23 | return compileCharDef( |
2971 | 23 | file, opcode, CTC_UpperCase, noback, nofor, table, displayTable); |
2972 | 26 | case CTO_LowerCase: |
2973 | 26 | return compileCharDef( |
2974 | 26 | file, opcode, CTC_LowerCase, noback, nofor, table, displayTable); |
2975 | 28 | case CTO_Grouping: |
2976 | 28 | return compileGrouping(file, noback, nofor, table, displayTable); |
2977 | 0 | case CTO_Display: |
2978 | 0 | if (!displayTable) return 1; // ignore |
2979 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
2980 | 0 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
2981 | 0 | if (ruleChars.length != 1 || ruleDots.length != 1) { |
2982 | 0 | compileError(file, "Exactly one character and one cell are required."); |
2983 | 0 | return 0; |
2984 | 0 | } |
2985 | 0 | return putCharDotsMapping( |
2986 | 0 | file, ruleChars.chars[0], ruleDots.chars[0], displayTable); |
2987 | 0 | case CTO_UpLow: |
2988 | 25 | case CTO_None: { |
2989 | | // check if token is a macro name |
2990 | 25 | if (inScopeMacros) { |
2991 | 25 | const MacroList *macros = *inScopeMacros; |
2992 | 25 | while (macros) { |
2993 | 0 | const Macro *m = macros->head; |
2994 | 0 | if (token.length == strlen(m->name) && |
2995 | 0 | eqasc2uni((unsigned char *)m->name, token.chars, token.length)) { |
2996 | 0 | if (!inScopeMacros) { |
2997 | 0 | compileError(file, "Calling macros only allowed in table files."); |
2998 | 0 | return 0; |
2999 | 0 | } |
3000 | 0 | FileInfo tmpFile; |
3001 | 0 | memset(&tmpFile, 0, sizeof(tmpFile)); |
3002 | 0 | tmpFile.fileName = file->fileName; |
3003 | 0 | tmpFile.sourceFile = file->sourceFile; |
3004 | 0 | tmpFile.lineNumber = file->lineNumber; |
3005 | 0 | tmpFile.encoding = noEncoding; |
3006 | 0 | tmpFile.status = 0; |
3007 | 0 | tmpFile.linepos = 0; |
3008 | 0 | tmpFile.linelen = 0; |
3009 | 0 | int argument_count = 0; |
3010 | 0 | CharsString *arguments = |
3011 | 0 | malloc(m->argument_count * sizeof(CharsString)); |
3012 | 0 | while (argument_count < m->argument_count) { |
3013 | 0 | if (getToken(file, &token, "macro argument")) |
3014 | 0 | arguments[argument_count++] = token; |
3015 | 0 | else |
3016 | 0 | break; |
3017 | 0 | } |
3018 | 0 | if (argument_count < m->argument_count) { |
3019 | 0 | compileError(file, "Expected %d arguments", m->argument_count); |
3020 | 0 | return 0; |
3021 | 0 | } |
3022 | 0 | int i = 0; |
3023 | 0 | int subst = 0; |
3024 | 0 | int next = subst < m->substitution_count ? m->substitutions[2 * subst] |
3025 | 0 | : m->definition_length; |
3026 | 0 | for (;;) { |
3027 | 0 | while (i < next) { |
3028 | 0 | widechar c = m->definition[i++]; |
3029 | 0 | if (c == '\n') { |
3030 | 0 | if (!compileRule(&tmpFile, table, displayTable, |
3031 | 0 | inScopeMacros)) { |
3032 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3033 | 0 | "result of macro expansion was: %s", |
3034 | 0 | _lou_showString( |
3035 | 0 | tmpFile.line, tmpFile.linelen, 0)); |
3036 | 0 | return 0; |
3037 | 0 | } |
3038 | 0 | tmpFile.linepos = 0; |
3039 | 0 | tmpFile.linelen = 0; |
3040 | 0 | } else if (tmpFile.linelen >= MAXSTRING) { |
3041 | 0 | compileError(file, |
3042 | 0 | "Line exceeds %d characters (post macro " |
3043 | 0 | "expansion)", |
3044 | 0 | MAXSTRING); |
3045 | 0 | return 0; |
3046 | 0 | } else |
3047 | 0 | tmpFile.line[tmpFile.linelen++] = c; |
3048 | 0 | } |
3049 | 0 | if (subst < m->substitution_count) { |
3050 | 0 | CharsString arg = |
3051 | 0 | arguments[m->substitutions[2 * subst + 1] - 1]; |
3052 | 0 | for (int j = 0; j < arg.length; j++) |
3053 | 0 | tmpFile.line[tmpFile.linelen++] = arg.chars[j]; |
3054 | 0 | subst++; |
3055 | 0 | next = subst < m->substitution_count |
3056 | 0 | ? m->substitutions[2 * subst] |
3057 | 0 | : m->definition_length; |
3058 | 0 | } else { |
3059 | 0 | if (!compileRule( |
3060 | 0 | &tmpFile, table, displayTable, inScopeMacros)) { |
3061 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3062 | 0 | "result of macro expansion was: %s", |
3063 | 0 | _lou_showString( |
3064 | 0 | tmpFile.line, tmpFile.linelen, 0)); |
3065 | 0 | return 0; |
3066 | 0 | } |
3067 | 0 | break; |
3068 | 0 | } |
3069 | 0 | } |
3070 | 0 | return 1; |
3071 | 0 | } |
3072 | 0 | macros = macros->tail; |
3073 | 0 | } |
3074 | 25 | } |
3075 | 25 | if (opcode == CTO_UpLow) { |
3076 | 0 | compileError(file, "The uplow opcode is deprecated."); |
3077 | 0 | return 0; |
3078 | 0 | } |
3079 | 25 | compileError(file, "opcode %s not defined.", |
3080 | 25 | _lou_showString(token.chars, token.length, 0)); |
3081 | 25 | return 0; |
3082 | 25 | } |
3083 | | |
3084 | | /* now only opcodes follow that don't modify the display table */ |
3085 | 2.75k | default: |
3086 | 2.75k | if (!table) return 1; |
3087 | 2.75k | switch (opcode) { |
3088 | 10 | case CTO_Locale: |
3089 | 10 | compileWarning(file, |
3090 | 10 | "The locale opcode is not implemented. Use the locale meta data " |
3091 | 10 | "instead."); |
3092 | 10 | return 1; |
3093 | 64 | case CTO_Undefined: { |
3094 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3095 | 64 | TranslationTableOffset ruleOffset = (*table)->undefined; |
3096 | 64 | if (!compileBrailleIndicator(file, "undefined character opcode", |
3097 | 64 | CTO_Undefined, &ruleOffset, noback, nofor, table)) |
3098 | 1 | return 0; |
3099 | 63 | (*table)->undefined = ruleOffset; |
3100 | 63 | return 1; |
3101 | 64 | } |
3102 | 85 | case CTO_Match: { |
3103 | 85 | int ok = 0; |
3104 | 85 | widechar *patterns = NULL; |
3105 | 85 | TranslationTableRule *rule; |
3106 | 85 | TranslationTableOffset ruleOffset; |
3107 | 85 | CharsString ptn_before, ptn_after; |
3108 | 85 | TranslationTableOffset patternsOffset; |
3109 | 85 | int len, mrk; |
3110 | 85 | size_t patternsByteSize = sizeof(*patterns) * 27720; |
3111 | 85 | patterns = (widechar *)malloc(patternsByteSize); |
3112 | 85 | if (!patterns) _lou_outOfMemory(); |
3113 | 85 | memset(patterns, 0xffff, patternsByteSize); |
3114 | 85 | noback = 1; |
3115 | 85 | getMatchPatternCharacters(file, &ptn_before); |
3116 | 85 | getRuleCharsText(file, &ruleChars); |
3117 | 85 | getMatchPatternCharacters(file, &ptn_after); |
3118 | 85 | getRuleDotsPattern(file, &ruleDots); |
3119 | 85 | if (!addRule(file, opcode, &ruleChars, &ruleDots, after, before, &ruleOffset, |
3120 | 85 | &rule, noback, nofor, table)) |
3121 | 0 | goto CTO_Match_cleanup; |
3122 | 85 | if (ptn_before.chars[0] == '-' && ptn_before.length == 1) |
3123 | 1 | len = _lou_pattern_compile( |
3124 | 1 | &ptn_before.chars[0], 0, &patterns[1], 13841, *table, file); |
3125 | 84 | else |
3126 | 84 | len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length, |
3127 | 84 | &patterns[1], 13841, *table, file); |
3128 | 85 | if (!len) goto CTO_Match_cleanup; |
3129 | 83 | mrk = patterns[0] = len + 1; |
3130 | 83 | _lou_pattern_reverse(&patterns[1]); |
3131 | 83 | if (ptn_after.chars[0] == '-' && ptn_after.length == 1) |
3132 | 0 | len = _lou_pattern_compile( |
3133 | 0 | &ptn_after.chars[0], 0, &patterns[mrk], 13841, *table, file); |
3134 | 83 | else |
3135 | 83 | len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length, |
3136 | 83 | &patterns[mrk], 13841, *table, file); |
3137 | 83 | if (!len) goto CTO_Match_cleanup; |
3138 | 82 | len += mrk; |
3139 | 82 | if (!allocateSpaceInTranslationTable( |
3140 | 82 | file, &patternsOffset, len * sizeof(widechar), table)) |
3141 | 0 | goto CTO_Match_cleanup; |
3142 | | // allocateSpaceInTranslationTable may have moved table, so make sure rule is |
3143 | | // still valid |
3144 | 82 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
3145 | 82 | memcpy(&(*table)->ruleArea[patternsOffset], patterns, len * sizeof(widechar)); |
3146 | 82 | rule->patterns = patternsOffset; |
3147 | 82 | ok = 1; |
3148 | 85 | CTO_Match_cleanup: |
3149 | 85 | free(patterns); |
3150 | 85 | return ok; |
3151 | 82 | } |
3152 | | |
3153 | 51 | case CTO_BackMatch: { |
3154 | 51 | int ok = 0; |
3155 | 51 | widechar *patterns = NULL; |
3156 | 51 | TranslationTableRule *rule; |
3157 | 51 | TranslationTableOffset ruleOffset; |
3158 | 51 | CharsString ptn_before, ptn_after; |
3159 | 51 | TranslationTableOffset patternOffset; |
3160 | 51 | int len, mrk; |
3161 | 51 | size_t patternsByteSize = sizeof(*patterns) * 27720; |
3162 | 51 | patterns = (widechar *)malloc(patternsByteSize); |
3163 | 51 | if (!patterns) _lou_outOfMemory(); |
3164 | 51 | memset(patterns, 0xffff, patternsByteSize); |
3165 | 51 | nofor = 1; |
3166 | 51 | getMatchPatternCharacters(file, &ptn_before); |
3167 | 51 | getRuleCharsText(file, &ruleChars); |
3168 | 51 | getMatchPatternCharacters(file, &ptn_after); |
3169 | 51 | getRuleDotsPattern(file, &ruleDots); |
3170 | 51 | if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, &rule, |
3171 | 51 | noback, nofor, table)) |
3172 | 0 | goto CTO_BackMatch_cleanup; |
3173 | 51 | if (ptn_before.chars[0] == '-' && ptn_before.length == 1) |
3174 | 0 | len = _lou_pattern_compile( |
3175 | 0 | &ptn_before.chars[0], 0, &patterns[1], 13841, *table, file); |
3176 | 51 | else |
3177 | 51 | len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length, |
3178 | 51 | &patterns[1], 13841, *table, file); |
3179 | 51 | if (!len) goto CTO_BackMatch_cleanup; |
3180 | 48 | mrk = patterns[0] = len + 1; |
3181 | 48 | _lou_pattern_reverse(&patterns[1]); |
3182 | 48 | if (ptn_after.chars[0] == '-' && ptn_after.length == 1) |
3183 | 0 | len = _lou_pattern_compile( |
3184 | 0 | &ptn_after.chars[0], 0, &patterns[mrk], 13841, *table, file); |
3185 | 48 | else |
3186 | 48 | len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length, |
3187 | 48 | &patterns[mrk], 13841, *table, file); |
3188 | 48 | if (!len) goto CTO_BackMatch_cleanup; |
3189 | 46 | len += mrk; |
3190 | 46 | if (!allocateSpaceInTranslationTable( |
3191 | 46 | file, &patternOffset, len * sizeof(widechar), table)) |
3192 | 0 | goto CTO_BackMatch_cleanup; |
3193 | | // allocateSpaceInTranslationTable may have moved table, so make sure rule is |
3194 | | // still valid |
3195 | 46 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
3196 | 46 | memcpy(&(*table)->ruleArea[patternOffset], patterns, len * sizeof(widechar)); |
3197 | 46 | rule->patterns = patternOffset; |
3198 | 46 | ok = 1; |
3199 | 51 | CTO_BackMatch_cleanup: |
3200 | 51 | free(patterns); |
3201 | 51 | return ok; |
3202 | 46 | } |
3203 | | |
3204 | 50 | case CTO_CapsLetter: |
3205 | 141 | case CTO_BegCapsWord: |
3206 | 154 | case CTO_EndCapsWord: |
3207 | 164 | case CTO_BegCaps: |
3208 | 166 | case CTO_EndCaps: |
3209 | 176 | case CTO_BegCapsPhrase: |
3210 | 183 | case CTO_EndCapsPhrase: |
3211 | 212 | case CTO_LenCapsPhrase: |
3212 | | /* these 8 general purpose opcodes are compiled further down to more specific |
3213 | | * internal opcodes: |
3214 | | * - modeletter |
3215 | | * - begmodeword |
3216 | | * - endmodeword |
3217 | | * - begmode |
3218 | | * - endmode |
3219 | | * - begmodephrase |
3220 | | * - endmodephrase |
3221 | | * - lenmodephrase |
3222 | | */ |
3223 | 214 | case CTO_ModeLetter: |
3224 | 219 | case CTO_BegModeWord: |
3225 | 228 | case CTO_EndModeWord: |
3226 | 229 | case CTO_BegMode: |
3227 | 229 | case CTO_EndMode: |
3228 | 229 | case CTO_BegModePhrase: |
3229 | 230 | case CTO_EndModePhrase: |
3230 | 230 | case CTO_LenModePhrase: { |
3231 | 230 | TranslationTableCharacterAttributes mode; |
3232 | 230 | int i; |
3233 | 230 | switch (opcode) { |
3234 | 50 | case CTO_CapsLetter: |
3235 | 141 | case CTO_BegCapsWord: |
3236 | 154 | case CTO_EndCapsWord: |
3237 | 164 | case CTO_BegCaps: |
3238 | 166 | case CTO_EndCaps: |
3239 | 176 | case CTO_BegCapsPhrase: |
3240 | 183 | case CTO_EndCapsPhrase: |
3241 | 212 | case CTO_LenCapsPhrase: |
3242 | 212 | mode = CTC_UpperCase; |
3243 | 212 | i = 0; |
3244 | 212 | opcode += (CTO_ModeLetter - CTO_CapsLetter); |
3245 | 212 | break; |
3246 | 18 | default: |
3247 | 18 | if (!getToken(file, &token, "attribute name")) return 0; |
3248 | 18 | if (!(*table)->characterClasses && !allocateCharacterClasses(*table)) { |
3249 | 0 | return 0; |
3250 | 0 | } |
3251 | 18 | const CharacterClass *characterClass = findCharacterClass(&token, *table); |
3252 | 18 | if (!characterClass) { |
3253 | 1 | characterClass = |
3254 | 1 | addCharacterClass(file, token.chars, token.length, *table, 1); |
3255 | 1 | if (!characterClass) return 0; |
3256 | 1 | } |
3257 | 18 | mode = characterClass->attribute; |
3258 | 18 | if (!(mode == CTC_UpperCase || mode == CTC_Digit) && mode >= CTC_Space && |
3259 | 3 | mode <= CTC_LitDigit) { |
3260 | 0 | compileError(file, |
3261 | 0 | "mode must be \"uppercase\", \"digit\", or a custom " |
3262 | 0 | "attribute name."); |
3263 | 0 | return 0; |
3264 | 0 | } |
3265 | | /* check if this mode is already defined and if the number of modes does |
3266 | | * not exceed the maximal number */ |
3267 | 18 | if (mode == CTC_UpperCase) |
3268 | 10 | i = 0; |
3269 | 8 | else { |
3270 | 8 | for (i = 1; i < MAX_MODES && (*table)->modes[i].value; i++) { |
3271 | 0 | if ((*table)->modes[i].mode == mode) { |
3272 | 0 | break; |
3273 | 0 | } |
3274 | 0 | } |
3275 | 8 | if (i == MAX_MODES) { |
3276 | 0 | compileError(file, "Max number of modes (%i) reached", MAX_MODES); |
3277 | 0 | return 0; |
3278 | 0 | } |
3279 | 8 | } |
3280 | 230 | } |
3281 | 230 | if (!(*table)->modes[i].value) |
3282 | 139 | (*table)->modes[i] = (EmphasisClass){ plain_text, mode, |
3283 | 139 | 0x1 << (MAX_EMPH_CLASSES + i), MAX_EMPH_CLASSES + i }; |
3284 | 230 | switch (opcode) { |
3285 | 10 | case CTO_BegModePhrase: { |
3286 | | // not passing pointer because compileBrailleIndicator may reallocate |
3287 | | // table |
3288 | 10 | TranslationTableOffset ruleOffset = |
3289 | 10 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begPhraseOffset]; |
3290 | 10 | if (!compileBrailleIndicator(file, "first word capital sign", |
3291 | | // when mode is not caps (i != 0), provide enough information |
3292 | | // for back-translator to be able to recognize and ignore the |
3293 | | // indicator (but it won't be able to determine the mode) |
3294 | 10 | i == 0 ? CTO_BegCapsPhrase : CTO_BegModePhrase, &ruleOffset, |
3295 | 10 | noback, nofor, table)) |
3296 | 0 | return 0; |
3297 | 10 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begPhraseOffset] = ruleOffset; |
3298 | 10 | return 1; |
3299 | 10 | } |
3300 | 8 | case CTO_EndModePhrase: { |
3301 | 8 | TranslationTableOffset ruleOffset; |
3302 | 8 | switch (compileBeforeAfter(file)) { |
3303 | 3 | case 1: // before |
3304 | 3 | if ((*table)->emphRules[MAX_EMPH_CLASSES + i][endPhraseAfterOffset]) { |
3305 | 0 | compileError( |
3306 | 0 | file, "Capital sign after last word already defined."); |
3307 | 0 | return 0; |
3308 | 0 | } |
3309 | | // not passing pointer because compileBrailleIndicator may reallocate |
3310 | | // table |
3311 | 3 | ruleOffset = (*table)->emphRules[MAX_EMPH_CLASSES + i] |
3312 | 3 | [endPhraseBeforeOffset]; |
3313 | 3 | if (!compileBrailleIndicator(file, "capital sign before last word", |
3314 | 3 | i == 0 ? CTO_EndCapsPhraseBefore : CTO_EndModePhrase, |
3315 | 3 | &ruleOffset, noback, nofor, table)) |
3316 | 0 | return 0; |
3317 | 3 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endPhraseBeforeOffset] = |
3318 | 3 | ruleOffset; |
3319 | 3 | return 1; |
3320 | 4 | case 2: // after |
3321 | 4 | if ((*table)->emphRules[MAX_EMPH_CLASSES + i] |
3322 | 4 | [endPhraseBeforeOffset]) { |
3323 | 0 | compileError( |
3324 | 0 | file, "Capital sign before last word already defined."); |
3325 | 0 | return 0; |
3326 | 0 | } |
3327 | | // not passing pointer because compileBrailleIndicator may reallocate |
3328 | | // table |
3329 | 4 | ruleOffset = (*table)->emphRules[MAX_EMPH_CLASSES + i] |
3330 | 4 | [endPhraseAfterOffset]; |
3331 | 4 | if (!compileBrailleIndicator(file, "capital sign after last word", |
3332 | 4 | i == 0 ? CTO_EndCapsPhraseAfter : CTO_EndModePhrase, |
3333 | 4 | &ruleOffset, noback, nofor, table)) |
3334 | 0 | return 0; |
3335 | 4 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endPhraseAfterOffset] = |
3336 | 4 | ruleOffset; |
3337 | 4 | return 1; |
3338 | 1 | default: // error |
3339 | 1 | compileError(file, "Invalid lastword indicator location."); |
3340 | 1 | return 0; |
3341 | 8 | } |
3342 | 0 | return 0; |
3343 | 8 | } |
3344 | 11 | case CTO_BegMode: { |
3345 | | // not passing pointer because compileBrailleIndicator may reallocate |
3346 | | // table |
3347 | 11 | TranslationTableOffset ruleOffset = |
3348 | 11 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begOffset]; |
3349 | 11 | if (!compileBrailleIndicator(file, "first letter capital sign", |
3350 | 11 | i == 0 ? CTO_BegCaps : CTO_BegMode, &ruleOffset, noback, |
3351 | 11 | nofor, table)) |
3352 | 0 | return 0; |
3353 | 11 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begOffset] = ruleOffset; |
3354 | 11 | return 1; |
3355 | 11 | } |
3356 | 2 | case CTO_EndMode: { |
3357 | | // not passing pointer because compileBrailleIndicator may reallocate |
3358 | | // table |
3359 | 2 | TranslationTableOffset ruleOffset = |
3360 | 2 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endOffset]; |
3361 | 2 | if (!compileBrailleIndicator(file, "last letter capital sign", |
3362 | 2 | i == 0 ? CTO_EndCaps : CTO_EndMode, &ruleOffset, noback, |
3363 | 2 | nofor, table)) |
3364 | 0 | return 0; |
3365 | 2 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endOffset] = ruleOffset; |
3366 | 2 | return 1; |
3367 | 2 | } |
3368 | 52 | case CTO_ModeLetter: { |
3369 | | // not passing pointer because compileBrailleIndicator may reallocate |
3370 | | // table |
3371 | 52 | TranslationTableOffset ruleOffset = |
3372 | 52 | (*table)->emphRules[MAX_EMPH_CLASSES + i][letterOffset]; |
3373 | 52 | if (!compileBrailleIndicator(file, "single letter capital sign", |
3374 | 52 | i == 0 ? CTO_CapsLetter : CTO_ModeLetter, &ruleOffset, noback, |
3375 | 52 | nofor, table)) |
3376 | 0 | return 0; |
3377 | 52 | (*table)->emphRules[MAX_EMPH_CLASSES + i][letterOffset] = ruleOffset; |
3378 | 52 | return 1; |
3379 | 52 | } |
3380 | 96 | case CTO_BegModeWord: { |
3381 | | // not passing pointer because compileBrailleIndicator may reallocate |
3382 | | // table |
3383 | 96 | TranslationTableOffset ruleOffset = |
3384 | 96 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begWordOffset]; |
3385 | 96 | if (!compileBrailleIndicator(file, "capital word", |
3386 | 96 | i == 0 ? CTO_BegCapsWord : CTO_BegModeWord, &ruleOffset, |
3387 | 96 | noback, nofor, table)) |
3388 | 0 | return 0; |
3389 | 96 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begWordOffset] = ruleOffset; |
3390 | 96 | return 1; |
3391 | 96 | } |
3392 | 22 | case CTO_EndModeWord: { |
3393 | | // not passing pointer because compileBrailleIndicator may reallocate |
3394 | | // table |
3395 | 22 | TranslationTableOffset ruleOffset = |
3396 | 22 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endWordOffset]; |
3397 | 22 | if (!compileBrailleIndicator(file, "capital word stop", |
3398 | 22 | i == 0 ? CTO_EndCapsWord : CTO_EndModeWord, &ruleOffset, |
3399 | 22 | noback, nofor, table)) |
3400 | 0 | return 0; |
3401 | 22 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endWordOffset] = ruleOffset; |
3402 | 22 | return 1; |
3403 | 22 | } |
3404 | 29 | case CTO_LenModePhrase: |
3405 | 29 | return (*table)->emphRules[MAX_EMPH_CLASSES + i][lenPhraseOffset] = |
3406 | 29 | compileNumber(file); |
3407 | 0 | default: |
3408 | 0 | break; |
3409 | 230 | } |
3410 | 0 | break; |
3411 | 230 | } |
3412 | | |
3413 | | /* these 8 general purpose emphasis opcodes are compiled further down to more |
3414 | | * specific internal opcodes: |
3415 | | * - emphletter |
3416 | | * - begemphword |
3417 | | * - endemphword |
3418 | | * - begemph |
3419 | | * - endemph |
3420 | | * - begemphphrase |
3421 | | * - endemphphrase |
3422 | | * - lenemphphrase |
3423 | | */ |
3424 | 65 | case CTO_EmphClass: |
3425 | 65 | if (!getToken(file, &emphClass, "emphasis class")) { |
3426 | 0 | compileError(file, "emphclass must be followed by a valid class name."); |
3427 | 0 | return 0; |
3428 | 0 | } |
3429 | 65 | int k, i; |
3430 | 65 | char *s = malloc(sizeof(char) * (emphClass.length + 1)); |
3431 | 1.01k | for (k = 0; k < emphClass.length; k++) s[k] = (char)emphClass.chars[k]; |
3432 | 65 | s[k++] = '\0'; |
3433 | 189 | for (i = 0; i < MAX_EMPH_CLASSES && (*table)->emphClassNames[i]; i++) |
3434 | 133 | if (strcmp(s, (*table)->emphClassNames[i]) == 0) { |
3435 | 9 | _lou_logMessage(LOU_LOG_DEBUG, "Duplicate emphasis class: %s", s); |
3436 | 9 | free(s); |
3437 | 9 | return 1; |
3438 | 9 | } |
3439 | 56 | if (i == MAX_EMPH_CLASSES) { |
3440 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3441 | 0 | "Max number of emphasis classes (%i) reached", MAX_EMPH_CLASSES); |
3442 | 0 | errorCount++; |
3443 | 0 | free(s); |
3444 | 0 | return 0; |
3445 | 0 | } |
3446 | 56 | switch (i) { |
3447 | | /* For backwards compatibility (i.e. because programs will assume |
3448 | | * the first 3 typeform bits are `italic', `underline' and `bold') |
3449 | | * we require that the first 3 emphclass definitions are (in that |
3450 | | * order): |
3451 | | * |
3452 | | * emphclass italic |
3453 | | * emphclass underline |
3454 | | * emphclass bold |
3455 | | * |
3456 | | * While it would be possible to use the emphclass opcode only for |
3457 | | * defining _additional_ classes (not allowing for them to be called |
3458 | | * italic, underline or bold), thereby reducing the amount of |
3459 | | * boilerplate, we deliberately choose not to do that in order to |
3460 | | * not give italic, underline and bold any special status. The |
3461 | | * hope is that eventually all programs will use liblouis for |
3462 | | * emphasis the recommended way (i.e. by looking up the supported |
3463 | | * typeforms in the documentation or API) so that we can drop this |
3464 | | * restriction. |
3465 | | */ |
3466 | 24 | case 0: |
3467 | 24 | if (strcmp(s, "italic") != 0) { |
3468 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3469 | 0 | "First emphasis class must be \"italic\" but got " |
3470 | 0 | "%s", |
3471 | 0 | s); |
3472 | 0 | errorCount++; |
3473 | 0 | free(s); |
3474 | 0 | return 0; |
3475 | 0 | } |
3476 | 24 | break; |
3477 | 24 | case 1: |
3478 | 8 | if (strcmp(s, "underline") != 0) { |
3479 | 1 | _lou_logMessage(LOU_LOG_ERROR, |
3480 | 1 | "Second emphasis class must be \"underline\" but " |
3481 | 1 | "got " |
3482 | 1 | "%s", |
3483 | 1 | s); |
3484 | 1 | errorCount++; |
3485 | 1 | free(s); |
3486 | 1 | return 0; |
3487 | 1 | } |
3488 | 7 | break; |
3489 | 7 | case 2: |
3490 | 6 | if (strcmp(s, "bold") != 0) { |
3491 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3492 | 0 | "Third emphasis class must be \"bold\" but got " |
3493 | 0 | "%s", |
3494 | 0 | s); |
3495 | 0 | errorCount++; |
3496 | 0 | free(s); |
3497 | 0 | return 0; |
3498 | 0 | } |
3499 | 6 | break; |
3500 | 56 | } |
3501 | 55 | (*table)->emphClassNames[i] = s; |
3502 | 55 | (*table)->emphClasses[i] = (EmphasisClass){ emph_1 |
3503 | 55 | << i, /* relies on the order of typeforms emph_1..emph_10 */ |
3504 | 55 | 0, 0x1 << i, i }; |
3505 | 55 | return 1; |
3506 | 0 | case CTO_EmphLetter: |
3507 | 5 | case CTO_BegEmphWord: |
3508 | 6 | case CTO_EndEmphWord: |
3509 | 6 | case CTO_BegEmph: |
3510 | 15 | case CTO_EndEmph: |
3511 | 15 | case CTO_BegEmphPhrase: |
3512 | 15 | case CTO_EndEmphPhrase: |
3513 | 21 | case CTO_LenEmphPhrase: |
3514 | 25 | case CTO_EmphModeChars: |
3515 | 28 | case CTO_NoEmphChars: { |
3516 | 28 | if (!getToken(file, &token, "emphasis class")) return 0; |
3517 | 28 | if (!parseChars(file, &emphClass, &token)) return 0; |
3518 | 28 | char *s = malloc(sizeof(char) * (emphClass.length + 1)); |
3519 | 28 | int k, i; |
3520 | 336 | for (k = 0; k < emphClass.length; k++) s[k] = (char)emphClass.chars[k]; |
3521 | 28 | s[k++] = '\0'; |
3522 | 45 | for (i = 0; i < MAX_EMPH_CLASSES && (*table)->emphClassNames[i]; i++) |
3523 | 41 | if (strcmp(s, (*table)->emphClassNames[i]) == 0) break; |
3524 | 28 | if (i == MAX_EMPH_CLASSES || !(*table)->emphClassNames[i]) { |
3525 | 4 | _lou_logMessage(LOU_LOG_ERROR, "Emphasis class %s not declared", s); |
3526 | 4 | errorCount++; |
3527 | 4 | free(s); |
3528 | 4 | return 0; |
3529 | 4 | } |
3530 | 24 | int ok = 0; |
3531 | 24 | switch (opcode) { |
3532 | 0 | case CTO_EmphLetter: { |
3533 | | // not passing pointer because compileBrailleIndicator may reallocate |
3534 | | // table |
3535 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][letterOffset]; |
3536 | | // provide enough information for back-translator to be able to recognize |
3537 | | // and ignore the indicator (but it won't be able to determine the |
3538 | | // emphasis class) |
3539 | 0 | if (!compileBrailleIndicator(file, "single letter", CTO_EmphLetter, |
3540 | 0 | &ruleOffset, noback, nofor, table)) |
3541 | 0 | break; |
3542 | 0 | (*table)->emphRules[i][letterOffset] = ruleOffset; |
3543 | 0 | ok = 1; |
3544 | 0 | break; |
3545 | 0 | } |
3546 | 4 | case CTO_BegEmphWord: { |
3547 | | // not passing pointer because compileBrailleIndicator may reallocate |
3548 | | // table |
3549 | 4 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][begWordOffset]; |
3550 | 4 | if (!compileBrailleIndicator(file, "word", CTO_BegEmphWord, &ruleOffset, |
3551 | 4 | noback, nofor, table)) |
3552 | 0 | break; |
3553 | 4 | (*table)->emphRules[i][begWordOffset] = ruleOffset; |
3554 | 4 | ok = 1; |
3555 | 4 | break; |
3556 | 4 | } |
3557 | 0 | case CTO_EndEmphWord: { |
3558 | | // not passing pointer because compileBrailleIndicator may reallocate |
3559 | | // table |
3560 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][endWordOffset]; |
3561 | 0 | if (!compileBrailleIndicator(file, "word stop", CTO_EndEmphWord, |
3562 | 0 | &ruleOffset, noback, nofor, table)) |
3563 | 0 | break; |
3564 | 0 | (*table)->emphRules[i][endWordOffset] = ruleOffset; |
3565 | 0 | ok = 1; |
3566 | 0 | break; |
3567 | 0 | } |
3568 | 0 | case CTO_BegEmph: { |
3569 | | /* fail if both begemph and any of begemphphrase or begemphword are |
3570 | | * defined */ |
3571 | 0 | if ((*table)->emphRules[i][begWordOffset] || |
3572 | 0 | (*table)->emphRules[i][begPhraseOffset]) { |
3573 | 0 | compileError(file, |
3574 | 0 | "Cannot define emphasis for both no context and word or " |
3575 | 0 | "phrase context, i.e. cannot have both begemph and " |
3576 | 0 | "begemphword or begemphphrase."); |
3577 | 0 | break; |
3578 | 0 | } |
3579 | | // not passing pointer because compileBrailleIndicator may reallocate |
3580 | | // table |
3581 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][begOffset]; |
3582 | 0 | if (!compileBrailleIndicator(file, "first letter", CTO_BegEmph, |
3583 | 0 | &ruleOffset, noback, nofor, table)) |
3584 | 0 | break; |
3585 | 0 | (*table)->emphRules[i][begOffset] = ruleOffset; |
3586 | 0 | ok = 1; |
3587 | 0 | break; |
3588 | 0 | } |
3589 | 8 | case CTO_EndEmph: { |
3590 | 8 | if ((*table)->emphRules[i][endWordOffset] || |
3591 | 8 | (*table)->emphRules[i][endPhraseBeforeOffset] || |
3592 | 8 | (*table)->emphRules[i][endPhraseAfterOffset]) { |
3593 | 0 | compileError(file, |
3594 | 0 | "Cannot define emphasis for both no context and word or " |
3595 | 0 | "phrase context, i.e. cannot have both endemph and " |
3596 | 0 | "endemphword or endemphphrase."); |
3597 | 0 | break; |
3598 | 0 | } |
3599 | | // not passing pointer because compileBrailleIndicator may reallocate |
3600 | | // table |
3601 | 8 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][endOffset]; |
3602 | 8 | if (!compileBrailleIndicator(file, "last letter", CTO_EndEmph, |
3603 | 8 | &ruleOffset, noback, nofor, table)) |
3604 | 0 | break; |
3605 | 8 | (*table)->emphRules[i][endOffset] = ruleOffset; |
3606 | 8 | ok = 1; |
3607 | 8 | break; |
3608 | 8 | } |
3609 | 0 | case CTO_BegEmphPhrase: { |
3610 | | // not passing pointer because compileBrailleIndicator may reallocate |
3611 | | // table |
3612 | 0 | TranslationTableOffset ruleOffset = |
3613 | 0 | (*table)->emphRules[i][begPhraseOffset]; |
3614 | 0 | if (!compileBrailleIndicator(file, "first word", CTO_BegEmphPhrase, |
3615 | 0 | &ruleOffset, noback, nofor, table)) |
3616 | 0 | break; |
3617 | 0 | (*table)->emphRules[i][begPhraseOffset] = ruleOffset; |
3618 | 0 | ok = 1; |
3619 | 0 | break; |
3620 | 0 | } |
3621 | 0 | case CTO_EndEmphPhrase: |
3622 | 0 | switch (compileBeforeAfter(file)) { |
3623 | 0 | case 1: { // before |
3624 | 0 | if ((*table)->emphRules[i][endPhraseAfterOffset]) { |
3625 | 0 | compileError(file, "last word after already defined."); |
3626 | 0 | break; |
3627 | 0 | } |
3628 | | // not passing pointer because compileBrailleIndicator may reallocate |
3629 | | // table |
3630 | 0 | TranslationTableOffset ruleOffset = |
3631 | 0 | (*table)->emphRules[i][endPhraseBeforeOffset]; |
3632 | 0 | if (!compileBrailleIndicator(file, "last word before", |
3633 | 0 | CTO_EndEmphPhrase, &ruleOffset, noback, nofor, table)) |
3634 | 0 | break; |
3635 | 0 | (*table)->emphRules[i][endPhraseBeforeOffset] = ruleOffset; |
3636 | 0 | ok = 1; |
3637 | 0 | break; |
3638 | 0 | } |
3639 | 0 | case 2: { // after |
3640 | 0 | if ((*table)->emphRules[i][endPhraseBeforeOffset]) { |
3641 | 0 | compileError(file, "last word before already defined."); |
3642 | 0 | break; |
3643 | 0 | } |
3644 | | // not passing pointer because compileBrailleIndicator may reallocate |
3645 | | // table |
3646 | 0 | TranslationTableOffset ruleOffset = |
3647 | 0 | (*table)->emphRules[i][endPhraseAfterOffset]; |
3648 | 0 | if (!compileBrailleIndicator(file, "last word after", |
3649 | 0 | CTO_EndEmphPhrase, &ruleOffset, noback, nofor, table)) |
3650 | 0 | break; |
3651 | 0 | (*table)->emphRules[i][endPhraseAfterOffset] = ruleOffset; |
3652 | 0 | ok = 1; |
3653 | 0 | break; |
3654 | 0 | } |
3655 | 0 | default: // error |
3656 | 0 | compileError(file, "Invalid lastword indicator location."); |
3657 | 0 | break; |
3658 | 0 | } |
3659 | 0 | break; |
3660 | 6 | case CTO_LenEmphPhrase: |
3661 | 6 | if (((*table)->emphRules[i][lenPhraseOffset] = compileNumber(file))) |
3662 | 6 | ok = 1; |
3663 | 6 | break; |
3664 | 3 | case CTO_EmphModeChars: { |
3665 | 3 | if (!getRuleCharsText(file, &ruleChars)) break; |
3666 | 3 | widechar *emphmodechars = (*table)->emphModeChars[i]; |
3667 | 3 | int len; |
3668 | 247 | for (len = 0; len < EMPHMODECHARSSIZE && emphmodechars[len]; len++) |
3669 | 244 | ; |
3670 | 3 | if (len + ruleChars.length > EMPHMODECHARSSIZE) { |
3671 | 0 | compileError(file, "More than %d characters", EMPHMODECHARSSIZE); |
3672 | 0 | break; |
3673 | 0 | } |
3674 | 3 | ok = 1; |
3675 | 150 | for (int k = 0; k < ruleChars.length; k++) { |
3676 | 147 | if (!getChar(ruleChars.chars[k], *table, NULL)) { |
3677 | 0 | compileError(file, "Emphasis mode character undefined"); |
3678 | 0 | ok = 0; |
3679 | 0 | break; |
3680 | 0 | } |
3681 | 147 | emphmodechars[len++] = ruleChars.chars[k]; |
3682 | 147 | } |
3683 | 3 | break; |
3684 | 3 | } |
3685 | 3 | case CTO_NoEmphChars: { |
3686 | 3 | if (!getRuleCharsText(file, &ruleChars)) break; |
3687 | 3 | widechar *noemphchars = (*table)->noEmphChars[i]; |
3688 | 3 | int len; |
3689 | 4 | for (len = 0; len < NOEMPHCHARSSIZE && noemphchars[len]; len++) |
3690 | 1 | ; |
3691 | 3 | if (len + ruleChars.length > NOEMPHCHARSSIZE) { |
3692 | 0 | compileError(file, "More than %d characters", NOEMPHCHARSSIZE); |
3693 | 0 | break; |
3694 | 0 | } |
3695 | 3 | ok = 1; |
3696 | 14 | for (int k = 0; k < ruleChars.length; k++) { |
3697 | 12 | if (!getChar(ruleChars.chars[k], *table, NULL)) { |
3698 | 1 | compileError(file, "Character undefined"); |
3699 | 1 | ok = 0; |
3700 | 1 | break; |
3701 | 1 | } |
3702 | 11 | noemphchars[len++] = ruleChars.chars[k]; |
3703 | 11 | } |
3704 | 3 | break; |
3705 | 3 | } |
3706 | 0 | default: |
3707 | 0 | break; |
3708 | 24 | } |
3709 | 24 | free(s); |
3710 | 24 | return ok; |
3711 | 24 | } |
3712 | 36 | case CTO_LetterSign: { |
3713 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3714 | 36 | TranslationTableOffset ruleOffset = (*table)->letterSign; |
3715 | 36 | if (!compileBrailleIndicator(file, "letter sign", CTO_LetterSign, &ruleOffset, |
3716 | 36 | noback, nofor, table)) |
3717 | 0 | return 0; |
3718 | 36 | (*table)->letterSign = ruleOffset; |
3719 | 36 | return 1; |
3720 | 36 | } |
3721 | 14 | case CTO_NoLetsignBefore: |
3722 | 14 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3723 | 14 | if (((*table)->noLetsignBeforeCount + ruleChars.length) > LETSIGNBEFORESIZE) { |
3724 | 2 | compileError(file, "More than %d characters", LETSIGNBEFORESIZE); |
3725 | 2 | return 0; |
3726 | 2 | } |
3727 | 48 | for (int k = 0; k < ruleChars.length; k++) |
3728 | 36 | (*table)->noLetsignBefore[(*table)->noLetsignBeforeCount++] = |
3729 | 36 | ruleChars.chars[k]; |
3730 | 12 | return 1; |
3731 | 39 | case CTO_NoLetsign: |
3732 | 39 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3733 | 39 | if (((*table)->noLetsignCount + ruleChars.length) > LETSIGNSIZE) { |
3734 | 0 | compileError(file, "More than %d characters", LETSIGNSIZE); |
3735 | 0 | return 0; |
3736 | 0 | } |
3737 | 490 | for (int k = 0; k < ruleChars.length; k++) |
3738 | 451 | (*table)->noLetsign[(*table)->noLetsignCount++] = ruleChars.chars[k]; |
3739 | 39 | return 1; |
3740 | 18 | case CTO_NoLetsignAfter: |
3741 | 18 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3742 | 18 | if (((*table)->noLetsignAfterCount + ruleChars.length) > LETSIGNAFTERSIZE) { |
3743 | 0 | compileError(file, "More than %d characters", LETSIGNAFTERSIZE); |
3744 | 0 | return 0; |
3745 | 0 | } |
3746 | 110 | for (int k = 0; k < ruleChars.length; k++) |
3747 | 92 | (*table)->noLetsignAfter[(*table)->noLetsignAfterCount++] = |
3748 | 92 | ruleChars.chars[k]; |
3749 | 18 | return 1; |
3750 | 12 | case CTO_NumberSign: { |
3751 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3752 | 12 | TranslationTableOffset ruleOffset = (*table)->numberSign; |
3753 | 12 | if (!compileBrailleIndicator(file, "number sign", CTO_NumberSign, &ruleOffset, |
3754 | 12 | noback, nofor, table)) |
3755 | 0 | return 0; |
3756 | 12 | (*table)->numberSign = ruleOffset; |
3757 | 12 | return 1; |
3758 | 12 | } |
3759 | 30 | case CTO_NoNumberSign: { |
3760 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3761 | 30 | TranslationTableOffset ruleOffset = (*table)->noNumberSign; |
3762 | 30 | if (!compileBrailleIndicator(file, "no number sign", CTO_NoNumberSign, |
3763 | 30 | &ruleOffset, noback, nofor, table)) |
3764 | 1 | return 0; |
3765 | 29 | (*table)->noNumberSign = ruleOffset; |
3766 | 29 | return 1; |
3767 | 30 | } |
3768 | | |
3769 | 3 | case CTO_NumericModeChars: |
3770 | 3 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3771 | 153 | for (int k = 0; k < ruleChars.length; k++) { |
3772 | 150 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3773 | 150 | if (!c) { |
3774 | 0 | compileError(file, "Numeric mode character undefined: %s", |
3775 | 0 | _lou_showString(&ruleChars.chars[k], 1, 0)); |
3776 | 0 | return 0; |
3777 | 0 | } |
3778 | 150 | c->attributes |= CTC_NumericMode; |
3779 | 150 | (*table)->usesNumericMode = 1; |
3780 | 150 | } |
3781 | 3 | return 1; |
3782 | | |
3783 | 0 | case CTO_MidEndNumericModeChars: |
3784 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3785 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3786 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3787 | 0 | if (!c) { |
3788 | 0 | compileError(file, "Midendnumeric mode character undefined"); |
3789 | 0 | return 0; |
3790 | 0 | } |
3791 | 0 | c->attributes |= CTC_MidEndNumericMode; |
3792 | 0 | (*table)->usesNumericMode = 1; |
3793 | 0 | } |
3794 | 0 | return 1; |
3795 | | |
3796 | 10 | case CTO_NumericNoContractChars: |
3797 | 10 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3798 | 163 | for (int k = 0; k < ruleChars.length; k++) { |
3799 | 155 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3800 | 155 | if (!c) { |
3801 | 2 | compileError(file, "Numeric no contraction character undefined"); |
3802 | 2 | return 0; |
3803 | 2 | } |
3804 | 153 | c->attributes |= CTC_NumericNoContract; |
3805 | 153 | (*table)->usesNumericMode = 1; |
3806 | 153 | } |
3807 | 8 | return 1; |
3808 | | |
3809 | 5 | case CTO_NoContractSign: { |
3810 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3811 | 5 | TranslationTableOffset ruleOffset = (*table)->noContractSign; |
3812 | 5 | if (!compileBrailleIndicator(file, "no contractions sign", CTO_NoContractSign, |
3813 | 5 | &ruleOffset, noback, nofor, table)) |
3814 | 0 | return 0; |
3815 | 5 | (*table)->noContractSign = ruleOffset; |
3816 | 5 | return 1; |
3817 | 5 | } |
3818 | 6 | case CTO_SeqDelimiter: |
3819 | 6 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3820 | 13 | for (int k = 0; k < ruleChars.length; k++) { |
3821 | 7 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3822 | 7 | if (!c) { |
3823 | 0 | compileError(file, "Sequence delimiter character undefined"); |
3824 | 0 | return 0; |
3825 | 0 | } |
3826 | 7 | c->attributes |= CTC_SeqDelimiter; |
3827 | 7 | (*table)->usesSequences = 1; |
3828 | 7 | } |
3829 | 6 | return 1; |
3830 | | |
3831 | 0 | case CTO_SeqBeforeChars: |
3832 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3833 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3834 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3835 | 0 | if (!c) { |
3836 | 0 | compileError(file, "Sequence before character undefined"); |
3837 | 0 | return 0; |
3838 | 0 | } |
3839 | 0 | c->attributes |= CTC_SeqBefore; |
3840 | 0 | } |
3841 | 0 | return 1; |
3842 | | |
3843 | 3 | case CTO_SeqAfterChars: |
3844 | 3 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3845 | 6 | for (int k = 0; k < ruleChars.length; k++) { |
3846 | 3 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3847 | 3 | if (!c) { |
3848 | 0 | compileError(file, "Sequence after character undefined"); |
3849 | 0 | return 0; |
3850 | 0 | } |
3851 | 3 | c->attributes |= CTC_SeqAfter; |
3852 | 3 | } |
3853 | 3 | return 1; |
3854 | | |
3855 | 29 | case CTO_SeqAfterPattern: |
3856 | 29 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3857 | 29 | if (((*table)->seqPatternsCount + ruleChars.length + 1) > SEQPATTERNSIZE) { |
3858 | 0 | compileError(file, "More than %d characters", SEQPATTERNSIZE); |
3859 | 0 | return 0; |
3860 | 0 | } |
3861 | 478 | for (int k = 0; k < ruleChars.length; k++) |
3862 | 449 | (*table)->seqPatterns[(*table)->seqPatternsCount++] = ruleChars.chars[k]; |
3863 | 29 | (*table)->seqPatterns[(*table)->seqPatternsCount++] = 0; |
3864 | 29 | return 1; |
3865 | | |
3866 | 18 | case CTO_SeqAfterExpression: |
3867 | 18 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3868 | 18 | if ((ruleChars.length + 1) > SEQPATTERNSIZE) { |
3869 | 0 | compileError(file, "More than %d characters", SEQPATTERNSIZE); |
3870 | 0 | return 0; |
3871 | 0 | } |
3872 | 107 | for (int k = 0; k < ruleChars.length; k++) |
3873 | 89 | (*table)->seqAfterExpression[k] = ruleChars.chars[k]; |
3874 | 18 | (*table)->seqAfterExpression[ruleChars.length] = 0; |
3875 | 18 | (*table)->seqAfterExpressionLength = ruleChars.length; |
3876 | 18 | return 1; |
3877 | | |
3878 | 0 | case CTO_CapsModeChars: |
3879 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3880 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3881 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3882 | 0 | if (!c) { |
3883 | 0 | compileError(file, "Capital mode character undefined"); |
3884 | 0 | return 0; |
3885 | 0 | } |
3886 | 0 | c->attributes |= CTC_CapsMode; |
3887 | 0 | (*table)->hasCapsModeChars = 1; |
3888 | 0 | } |
3889 | 0 | return 1; |
3890 | | |
3891 | 3 | case CTO_BegComp: { |
3892 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3893 | 3 | TranslationTableOffset ruleOffset = (*table)->begComp; |
3894 | 3 | if (!compileBrailleIndicator(file, "begin computer braille", CTO_BegComp, |
3895 | 3 | &ruleOffset, noback, nofor, table)) |
3896 | 0 | return 0; |
3897 | 3 | (*table)->begComp = ruleOffset; |
3898 | 3 | return 1; |
3899 | 3 | } |
3900 | 3 | case CTO_EndComp: { |
3901 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3902 | 3 | TranslationTableOffset ruleOffset = (*table)->endComp; |
3903 | 3 | if (!compileBrailleIndicator(file, "end computer braslle", CTO_EndComp, |
3904 | 3 | &ruleOffset, noback, nofor, table)) |
3905 | 0 | return 0; |
3906 | 3 | (*table)->endComp = ruleOffset; |
3907 | 3 | return 1; |
3908 | 3 | } |
3909 | 63 | case CTO_NoCross: |
3910 | 63 | if (nocross) { |
3911 | 0 | compileError( |
3912 | 0 | file, "%s already specified.", _lou_findOpcodeName(CTO_NoCross)); |
3913 | 0 | return 0; |
3914 | 0 | } |
3915 | 63 | nocross = 1; |
3916 | 63 | goto doOpcode; |
3917 | 32 | case CTO_Syllable: |
3918 | 32 | (*table)->syllables = 1; |
3919 | 38 | case CTO_Always: |
3920 | 57 | case CTO_LargeSign: |
3921 | 97 | case CTO_WholeWord: |
3922 | 101 | case CTO_PartWord: |
3923 | 120 | case CTO_JoinNum: |
3924 | 183 | case CTO_JoinableWord: |
3925 | 188 | case CTO_LowWord: |
3926 | 188 | case CTO_SuffixableWord: |
3927 | 193 | case CTO_PrefixableWord: |
3928 | 221 | case CTO_BegWord: |
3929 | 230 | case CTO_BegMidWord: |
3930 | 233 | case CTO_MidWord: |
3931 | 237 | case CTO_MidEndWord: |
3932 | 258 | case CTO_EndWord: |
3933 | 286 | case CTO_PrePunc: |
3934 | 302 | case CTO_PostPunc: |
3935 | 308 | case CTO_BegNum: |
3936 | 311 | case CTO_MidNum: |
3937 | 322 | case CTO_EndNum: |
3938 | 336 | case CTO_Repeated: |
3939 | 350 | case CTO_RepWord: |
3940 | 350 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3941 | 350 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
3942 | 350 | if (ruleDots.length == 0) |
3943 | | // check that all characters in a rule with `=` as second operand are |
3944 | | // defined (or based on another character) |
3945 | 48 | for (int k = 0; k < ruleChars.length; k++) { |
3946 | 48 | TranslationTableCharacter *c = |
3947 | 48 | getChar(ruleChars.chars[k], *table, NULL); |
3948 | 48 | if (!(c && (c->definitionRule || c->basechar))) { |
3949 | 1 | compileError(file, "Character %s is not defined", |
3950 | 1 | _lou_showString(&ruleChars.chars[k], 1, 0)); |
3951 | 1 | return 0; |
3952 | 1 | } |
3953 | 48 | } |
3954 | 349 | TranslationTableRule *r; |
3955 | 349 | if (!addRule(file, opcode, &ruleChars, &ruleDots, after, before, NULL, &r, |
3956 | 349 | noback, nofor, table)) |
3957 | 0 | return 0; |
3958 | 349 | if (nocross) r->nocross = 1; |
3959 | 349 | return 1; |
3960 | | // if (opcode == CTO_MidNum) |
3961 | | // { |
3962 | | // TranslationTableCharacter *c = getChar(ruleChars.chars[0]); |
3963 | | // if(c) |
3964 | | // c->attributes |= CTC_NumericMode; |
3965 | | // } |
3966 | 54 | case CTO_RepEndWord: |
3967 | 54 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3968 | 54 | CharsString dots; |
3969 | 54 | if (!getToken(file, &dots, "dots,dots operand")) return 0; |
3970 | 54 | int len = dots.length; |
3971 | 166 | for (int k = 0; k < len - 1; k++) { |
3972 | 166 | if (dots.chars[k] == ',') { |
3973 | 54 | dots.length = k; |
3974 | 54 | if (!parseDots(file, &ruleDots, &dots)) return 0; |
3975 | 54 | ruleDots.chars[ruleDots.length++] = ','; |
3976 | 54 | k++; |
3977 | 54 | if (k == len - 1 && dots.chars[k] == '=') { |
3978 | | // check that all characters are defined (or based on another |
3979 | | // character) |
3980 | 6 | for (int l = 0; l < ruleChars.length; l++) { |
3981 | 3 | TranslationTableCharacter *c = |
3982 | 3 | getChar(ruleChars.chars[l], *table, NULL); |
3983 | 3 | if (!(c && (c->definitionRule || c->basechar))) { |
3984 | 0 | compileError(file, "Character %s is not defined", |
3985 | 0 | _lou_showString(&ruleChars.chars[l], 1, 0)); |
3986 | 0 | return 0; |
3987 | 0 | } |
3988 | 3 | } |
3989 | 51 | } else { |
3990 | 51 | CharsString x, y; |
3991 | 51 | x.length = 0; |
3992 | 258 | while (k < len) x.chars[x.length++] = dots.chars[k++]; |
3993 | 51 | if (parseDots(file, &y, &x)) |
3994 | 77 | for (int l = 0; l < y.length; l++) |
3995 | 51 | ruleDots.chars[ruleDots.length++] = y.chars[l]; |
3996 | 51 | } |
3997 | 54 | return addRule(file, opcode, &ruleChars, &ruleDots, after, before, |
3998 | 54 | NULL, NULL, noback, nofor, table); |
3999 | 54 | } |
4000 | 166 | } |
4001 | 0 | return 0; |
4002 | 32 | case CTO_CompDots: |
4003 | 33 | case CTO_Comp6: { |
4004 | 33 | TranslationTableOffset ruleOffset; |
4005 | 33 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
4006 | 32 | if (ruleChars.length != 1) { |
4007 | 0 | compileError(file, "first operand must be 1 character"); |
4008 | 0 | return 0; |
4009 | 0 | } |
4010 | 32 | if (nofor || noback) { |
4011 | 0 | compileWarning(file, "nofor and noback not allowed on comp6 rules"); |
4012 | 0 | } |
4013 | 32 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
4014 | 32 | if (!addRule(file, opcode, &ruleChars, &ruleDots, after, before, &ruleOffset, |
4015 | 32 | NULL, noback, nofor, table)) |
4016 | 0 | return 0; |
4017 | 32 | return 1; |
4018 | 32 | } |
4019 | 3 | case CTO_ExactDots: |
4020 | 3 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
4021 | 3 | if (ruleChars.chars[0] != '@') { |
4022 | 0 | compileError(file, "The operand must begin with an at sign (@)"); |
4023 | 0 | return 0; |
4024 | 0 | } |
4025 | 6 | for (int k = 1; k < ruleChars.length; k++) |
4026 | 3 | scratchPad.chars[k - 1] = ruleChars.chars[k]; |
4027 | 3 | scratchPad.length = ruleChars.length - 1; |
4028 | 3 | if (!parseDots(file, &ruleDots, &scratchPad)) return 0; |
4029 | 3 | return addRule(file, opcode, &ruleChars, &ruleDots, before, after, NULL, NULL, |
4030 | 3 | noback, nofor, table); |
4031 | 8 | case CTO_CapsNoCont: { |
4032 | 8 | TranslationTableOffset ruleOffset; |
4033 | 8 | ruleChars.length = 1; |
4034 | 8 | ruleChars.chars[0] = 'a'; |
4035 | 8 | if (!addRule(file, opcode, &ruleChars, NULL, after, before, &ruleOffset, NULL, |
4036 | 8 | noback, nofor, table)) |
4037 | 0 | return 0; |
4038 | 8 | (*table)->capsNoCont = ruleOffset; |
4039 | 8 | return 1; |
4040 | 8 | } |
4041 | 289 | case CTO_Replace: |
4042 | 289 | if (getRuleCharsText(file, &ruleChars)) { |
4043 | 289 | if (atEndOfLine(file)) |
4044 | 38 | ruleDots.length = ruleDots.chars[0] = 0; |
4045 | 251 | else { |
4046 | 251 | getRuleDotsText(file, &ruleDots); |
4047 | 251 | if (ruleDots.chars[0] == '#') |
4048 | 2 | ruleDots.length = ruleDots.chars[0] = 0; |
4049 | 249 | else if (ruleDots.chars[0] == '\\' && ruleDots.chars[1] == '#') |
4050 | 0 | memmove(&ruleDots.chars[0], &ruleDots.chars[1], |
4051 | 0 | ruleDots.length-- * CHARSIZE); |
4052 | 251 | } |
4053 | 289 | } |
4054 | 11.8k | for (int k = 0; k < ruleChars.length; k++) |
4055 | 11.5k | putChar(file, ruleChars.chars[k], table, NULL, (*table)->ruleCounter); |
4056 | 12.0k | for (int k = 0; k < ruleDots.length; k++) |
4057 | 11.7k | putChar(file, ruleDots.chars[k], table, NULL, (*table)->ruleCounter); |
4058 | 289 | return addRule(file, opcode, &ruleChars, &ruleDots, after, before, NULL, NULL, |
4059 | 289 | noback, nofor, table); |
4060 | 365 | case CTO_Correct: |
4061 | 365 | (*table)->corrections = 1; |
4062 | 365 | goto doPass; |
4063 | 95 | case CTO_Pass2: |
4064 | 95 | if ((*table)->numPasses < 2) (*table)->numPasses = 2; |
4065 | 95 | goto doPass; |
4066 | 84 | case CTO_Pass3: |
4067 | 84 | if ((*table)->numPasses < 3) (*table)->numPasses = 3; |
4068 | 84 | goto doPass; |
4069 | 83 | case CTO_Pass4: |
4070 | 83 | if ((*table)->numPasses < 4) (*table)->numPasses = 4; |
4071 | 627 | doPass: |
4072 | 750 | case CTO_Context: |
4073 | 750 | if (!(nofor || noback)) { |
4074 | 0 | compileError(file, "%s or %s must be specified.", |
4075 | 0 | _lou_findOpcodeName(CTO_NoFor), _lou_findOpcodeName(CTO_NoBack)); |
4076 | 0 | return 0; |
4077 | 0 | } |
4078 | 750 | return compilePassOpcode(file, opcode, noback, nofor, table); |
4079 | 4 | case CTO_Contraction: |
4080 | 6 | case CTO_NoCont: |
4081 | 12 | case CTO_CompBrl: |
4082 | 12 | case CTO_Literal: |
4083 | 12 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
4084 | | // check that all characters in a compbrl, contraction, |
4085 | | // nocont or literal rule are defined (or based on another |
4086 | | // character) |
4087 | 54 | for (int k = 0; k < ruleChars.length; k++) { |
4088 | 42 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
4089 | 42 | if (!(c && (c->definitionRule || c->basechar))) { |
4090 | 0 | compileError(file, "Character %s is not defined", |
4091 | 0 | _lou_showString(&ruleChars.chars[k], 1, 0)); |
4092 | 0 | return 0; |
4093 | 0 | } |
4094 | 42 | } |
4095 | 12 | return addRule(file, opcode, &ruleChars, NULL, after, before, NULL, NULL, |
4096 | 12 | noback, nofor, table); |
4097 | 19 | case CTO_MultInd: { |
4098 | 19 | ruleChars.length = 0; |
4099 | 19 | if (!getToken(file, &token, "multiple braille indicators") || |
4100 | 19 | !parseDots(file, &cells, &token)) |
4101 | 1 | return 0; |
4102 | 22 | while (getToken(file, &token, "multind opcodes")) { |
4103 | 22 | opcode = getOpcode(file, &token); |
4104 | 22 | if (opcode == CTO_None) { |
4105 | 0 | compileError(file, "opcode %s not defined.", |
4106 | 0 | _lou_showString(token.chars, token.length, 0)); |
4107 | 0 | return 0; |
4108 | 0 | } |
4109 | 22 | if (!(opcode >= CTO_CapsLetter && opcode < CTO_MultInd)) { |
4110 | 0 | compileError(file, "Not a braille indicator opcode."); |
4111 | 0 | return 0; |
4112 | 0 | } |
4113 | 22 | ruleChars.chars[ruleChars.length++] = (widechar)opcode; |
4114 | 22 | if (atEndOfLine(file)) break; |
4115 | 22 | } |
4116 | 18 | return addRule(file, CTO_MultInd, &ruleChars, &cells, after, before, NULL, |
4117 | 18 | NULL, noback, nofor, table); |
4118 | 18 | } |
4119 | | |
4120 | 230 | case CTO_Class: |
4121 | 230 | compileWarning(file, "class is deprecated, use attribute instead"); |
4122 | 237 | case CTO_Attribute: { |
4123 | 237 | if (nofor || noback) { |
4124 | 7 | compileWarning( |
4125 | 7 | file, "nofor and noback not allowed before class/attribute"); |
4126 | 7 | } |
4127 | 237 | if ((opcode == CTO_Class && (*table)->usesAttributeOrClass == 1) || |
4128 | 237 | (opcode == CTO_Attribute && (*table)->usesAttributeOrClass == 2)) { |
4129 | 0 | compileError(file, |
4130 | 0 | "attribute and class rules must not be both present in a table"); |
4131 | 0 | return 0; |
4132 | 0 | } |
4133 | 237 | if (opcode == CTO_Class) |
4134 | 230 | (*table)->usesAttributeOrClass = 2; |
4135 | 7 | else |
4136 | 7 | (*table)->usesAttributeOrClass = 1; |
4137 | 237 | if (!getToken(file, &token, "attribute name")) { |
4138 | 0 | compileError(file, "Expected %s", "attribute name"); |
4139 | 0 | return 0; |
4140 | 0 | } |
4141 | 237 | if (!(*table)->characterClasses && !allocateCharacterClasses(*table)) { |
4142 | 0 | return 0; |
4143 | 0 | } |
4144 | | |
4145 | 237 | TranslationTableCharacterAttributes attribute = 0; |
4146 | 237 | { |
4147 | 237 | int attrNumber = -1; |
4148 | 237 | switch (token.chars[0]) { |
4149 | 0 | case '0': |
4150 | 0 | case '1': |
4151 | 0 | case '2': |
4152 | 0 | case '3': |
4153 | 0 | case '4': |
4154 | 0 | case '5': |
4155 | 1 | case '6': |
4156 | 1 | case '7': |
4157 | 1 | case '8': |
4158 | 1 | case '9': |
4159 | 1 | attrNumber = token.chars[0] - '0'; |
4160 | 1 | break; |
4161 | 237 | } |
4162 | 237 | if (attrNumber >= 0) { |
4163 | 1 | if (opcode == CTO_Class) { |
4164 | 0 | compileError(file, |
4165 | 0 | "Invalid class name: may not contain digits, use " |
4166 | 0 | "attribute instead of class"); |
4167 | 0 | return 0; |
4168 | 0 | } |
4169 | 1 | if (token.length > 1 || attrNumber > 7) { |
4170 | 0 | compileError(file, |
4171 | 0 | "Invalid attribute name: must be a digit between 0 and 7 " |
4172 | 0 | "or a word containing only letters"); |
4173 | 0 | return 0; |
4174 | 0 | } |
4175 | 1 | if (!(*table)->numberedAttributes[attrNumber]) |
4176 | | // attribute not used before yet: assign it a value |
4177 | 1 | (*table)->numberedAttributes[attrNumber] = |
4178 | 1 | getNextNumberedAttribute(*table); |
4179 | 1 | attribute = (*table)->numberedAttributes[attrNumber]; |
4180 | 236 | } else { |
4181 | 236 | const CharacterClass *namedAttr = findCharacterClass(&token, *table); |
4182 | 236 | if (!namedAttr) { |
4183 | | // no class with that name: create one |
4184 | 53 | namedAttr = addCharacterClass( |
4185 | 53 | file, &token.chars[0], token.length, *table, 1); |
4186 | 53 | if (!namedAttr) return 0; |
4187 | 53 | } |
4188 | | // there is a class with that name or a new class was successfully |
4189 | | // created |
4190 | 236 | attribute = namedAttr->attribute; |
4191 | 236 | if (attribute == CTC_UpperCase || attribute == CTC_LowerCase) |
4192 | 131 | attribute |= CTC_Letter; |
4193 | 236 | } |
4194 | 237 | } |
4195 | 237 | CharsString characters; |
4196 | 237 | if (!getCharacters(file, &characters)) return 0; |
4197 | 8.38k | for (int i = 0; i < characters.length; i++) { |
4198 | | // get the character from the table, or if it is not defined yet, |
4199 | | // define it |
4200 | 8.14k | TranslationTableCharacter *character = putChar( |
4201 | 8.14k | file, characters.chars[i], table, NULL, (*table)->ruleCounter); |
4202 | | // set the attribute |
4203 | 8.14k | character->attributes |= attribute; |
4204 | | // also set the attribute on the associated dots (if any) |
4205 | 8.14k | if (character->basechar) |
4206 | 88 | character = (TranslationTableCharacter *)&(*table) |
4207 | 88 | ->ruleArea[character->basechar]; |
4208 | 8.14k | if (character->definitionRule) { |
4209 | 33 | TranslationTableRule *defRule = |
4210 | 33 | (TranslationTableRule *)&(*table) |
4211 | 33 | ->ruleArea[character->definitionRule]; |
4212 | 33 | if (defRule->dotslen == 1) { |
4213 | 33 | TranslationTableCharacter *dots = |
4214 | 33 | getDots(defRule->charsdots[defRule->charslen], *table); |
4215 | 33 | if (dots) dots->attributes |= attribute; |
4216 | 33 | } |
4217 | 33 | } |
4218 | 8.14k | } |
4219 | 235 | (*table)->ruleCounter++; |
4220 | 235 | return 1; |
4221 | 237 | } |
4222 | | |
4223 | 0 | { |
4224 | 0 | TranslationTableCharacterAttributes *attributes; |
4225 | 0 | const CharacterClass *class; |
4226 | 2 | case CTO_After: |
4227 | 2 | attributes = &after; |
4228 | 2 | goto doBeforeAfter; |
4229 | 2 | case CTO_Before: |
4230 | 2 | attributes = &before; |
4231 | 4 | doBeforeAfter: |
4232 | 4 | if (!(*table)->characterClasses) { |
4233 | 3 | if (!allocateCharacterClasses(*table)) return 0; |
4234 | 3 | } |
4235 | 4 | if (!getToken(file, &token, "attribute name")) return 0; |
4236 | 4 | if (!(class = findCharacterClass(&token, *table))) { |
4237 | 0 | compileError(file, "attribute not defined"); |
4238 | 0 | return 0; |
4239 | 0 | } |
4240 | 4 | *attributes |= class->attribute; |
4241 | 4 | goto doOpcode; |
4242 | 4 | } |
4243 | 121 | case CTO_Base: |
4244 | 121 | if (nofor || noback) { |
4245 | 0 | compileWarning(file, "nofor and noback not allowed before base"); |
4246 | 0 | } |
4247 | 121 | if (!getToken(file, &token, "attribute name")) { |
4248 | 0 | compileError( |
4249 | 0 | file, "base opcode must be followed by a valid attribute name."); |
4250 | 0 | return 0; |
4251 | 0 | } |
4252 | 121 | if (!(*table)->characterClasses && !allocateCharacterClasses(*table)) { |
4253 | 0 | return 0; |
4254 | 0 | } |
4255 | 121 | const CharacterClass *mode = findCharacterClass(&token, *table); |
4256 | 121 | if (!mode) { |
4257 | 73 | mode = addCharacterClass(file, token.chars, token.length, *table, 1); |
4258 | 73 | if (!mode) return 0; |
4259 | 73 | } |
4260 | 121 | if (!(mode->attribute == CTC_UpperCase || mode->attribute == CTC_Digit) && |
4261 | 106 | mode->attribute >= CTC_Space && mode->attribute <= CTC_LitDigit) { |
4262 | 0 | compileError(file, |
4263 | 0 | "base opcode must be followed by \"uppercase\", \"digit\", or a " |
4264 | 0 | "custom attribute name."); |
4265 | 0 | return 0; |
4266 | 0 | } |
4267 | 121 | if (!getRuleCharsText(file, &token)) return 0; |
4268 | 121 | if (token.length != 1) { |
4269 | 0 | compileError(file, |
4270 | 0 | "Exactly one character followed by one base character is " |
4271 | 0 | "required."); |
4272 | 0 | return 0; |
4273 | 0 | } |
4274 | 121 | TranslationTableOffset characterOffset; |
4275 | 121 | TranslationTableCharacter *character = putChar( |
4276 | 121 | file, token.chars[0], table, &characterOffset, (*table)->ruleCounter); |
4277 | 121 | if (!getRuleCharsText(file, &token)) return 0; |
4278 | 120 | if (token.length != 1) { |
4279 | 0 | compileError(file, "Exactly one base character is required."); |
4280 | 0 | return 0; |
4281 | 0 | } |
4282 | 120 | TranslationTableOffset basechar; |
4283 | 120 | putChar(file, token.chars[0], table, &basechar, (*table)->ruleCounter); |
4284 | | // putChar may have moved table, so make sure character is still valid |
4285 | 120 | character = (TranslationTableCharacter *)&(*table)->ruleArea[characterOffset]; |
4286 | 120 | if (character->basechar) { |
4287 | 37 | if (character->basechar == basechar && |
4288 | 37 | character->mode == mode->attribute) { |
4289 | 2 | _lou_logMessage(LOU_LOG_DEBUG, "%s:%d: Duplicate base rule.", |
4290 | 2 | file->fileName, file->lineNumber); |
4291 | 35 | } else { |
4292 | 35 | _lou_logMessage(LOU_LOG_DEBUG, |
4293 | 35 | "%s:%d: A different base rule already exists for this " |
4294 | 35 | "character (%s). The existing rule will take precedence " |
4295 | 35 | "over the new one.", |
4296 | 35 | file->fileName, file->lineNumber, |
4297 | 35 | printSource(file->sourceFile, character->sourceFile, |
4298 | 35 | character->sourceLine)); |
4299 | 35 | } |
4300 | 83 | } else { |
4301 | 83 | character->basechar = basechar; |
4302 | 83 | character->mode = mode->attribute; |
4303 | 83 | character->sourceFile = file->sourceFile; |
4304 | 83 | character->sourceLine = file->lineNumber; |
4305 | 83 | character->ruleIndex = (*table)->ruleCounter; |
4306 | | /* some other processing is done at the end of the compilation, in |
4307 | | * finalizeTable() */ |
4308 | 83 | } |
4309 | 120 | (*table)->ruleCounter++; |
4310 | 120 | return 1; |
4311 | 2 | case CTO_EmpMatchBefore: |
4312 | 2 | before |= CTC_EmpMatch; |
4313 | 2 | goto doOpcode; |
4314 | 2 | case CTO_EmpMatchAfter: |
4315 | 2 | after |= CTC_EmpMatch; |
4316 | 2 | goto doOpcode; |
4317 | | |
4318 | 22 | case CTO_SwapCc: |
4319 | 25 | case CTO_SwapCd: |
4320 | 35 | case CTO_SwapDd: |
4321 | 35 | return compileSwap(file, opcode, noback, nofor, table); |
4322 | 10 | case CTO_Hyphen: |
4323 | 13 | case CTO_DecPoint: |
4324 | | // case CTO_Apostrophe: |
4325 | | // case CTO_Initial: |
4326 | 13 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
4327 | 13 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
4328 | 13 | if (ruleChars.length != 1 || ruleDots.length < 1) { |
4329 | 0 | compileError(file, |
4330 | 0 | "One Unicode character and at least one cell are " |
4331 | 0 | "required."); |
4332 | 0 | return 0; |
4333 | 0 | } |
4334 | 13 | return addRule(file, opcode, &ruleChars, &ruleDots, after, before, NULL, NULL, |
4335 | 13 | noback, nofor, table); |
4336 | | // if (opcode == CTO_DecPoint) |
4337 | | // { |
4338 | | // TranslationTableCharacter *c = |
4339 | | // getChar(ruleChars.chars[0]); |
4340 | | // if(c) |
4341 | | // c->attributes |= CTC_NumericMode; |
4342 | | // } |
4343 | 0 | default: |
4344 | 0 | compileError(file, "unimplemented opcode."); |
4345 | 0 | return 0; |
4346 | 2.75k | } |
4347 | 4.51k | } |
4348 | 0 | return 0; |
4349 | 4.51k | } |
4350 | | |
4351 | | int EXPORT_CALL |
4352 | 0 | lou_readCharFromFile(const char *fileName, int *mode) { |
4353 | | /* Read a character from a file, whether big-endian, little-endian or |
4354 | | * ASCII8 */ |
4355 | 0 | int ch; |
4356 | 0 | static FileInfo file; |
4357 | 0 | if (fileName == NULL) return 0; |
4358 | 0 | if (*mode == 1) { |
4359 | 0 | *mode = 0; |
4360 | 0 | file.fileName = fileName; |
4361 | 0 | file.encoding = noEncoding; |
4362 | 0 | file.status = 0; |
4363 | 0 | file.lineNumber = 0; |
4364 | 0 | if (!(file.in = fopen(file.fileName, "r"))) { |
4365 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot open file '%s'", file.fileName); |
4366 | 0 | *mode = 1; |
4367 | 0 | return EOF; |
4368 | 0 | } |
4369 | 0 | } |
4370 | 0 | if (file.in == NULL) { |
4371 | 0 | *mode = 1; |
4372 | 0 | return EOF; |
4373 | 0 | } |
4374 | 0 | ch = getAChar(&file); |
4375 | 0 | if (ch == EOF) { |
4376 | 0 | fclose(file.in); |
4377 | 0 | file.in = NULL; |
4378 | 0 | *mode = 1; |
4379 | 0 | } |
4380 | 0 | return ch; |
4381 | 0 | } |
4382 | | |
4383 | | static TranslationTableCharacter * |
4384 | | finalizeCharacter(TranslationTableHeader *table, TranslationTableOffset characterOffset, |
4385 | 6.43k | int detect_loop) { |
4386 | 6.43k | TranslationTableCharacter *character = |
4387 | 6.43k | (TranslationTableCharacter *)&table->ruleArea[characterOffset]; |
4388 | 6.43k | if (character->basechar) { |
4389 | 83 | TranslationTableOffset basecharOffset = 0; |
4390 | 83 | TranslationTableCharacter *basechar = character; |
4391 | 83 | TranslationTableCharacterAttributes mode = 0; |
4392 | 170 | while (basechar->basechar) { |
4393 | 87 | if (basechar->basechar == characterOffset || detect_loop++ > MAX_MODES) { |
4394 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
4395 | 0 | "%s: error: Character can not be (indirectly) based on " |
4396 | 0 | "itself.", |
4397 | 0 | printSource(NULL, character->sourceFile, character->sourceLine)); |
4398 | 0 | errorCount++; |
4399 | 0 | return NULL; |
4400 | 0 | } |
4401 | | // inherit basechar mode |
4402 | 87 | mode |= basechar->mode; |
4403 | | // compute basechar recursively |
4404 | 87 | basecharOffset = basechar->basechar; |
4405 | 87 | basechar = finalizeCharacter(table, basecharOffset, detect_loop); |
4406 | 87 | if (!basechar) return NULL; |
4407 | 87 | if (character->mode & (basechar->attributes | basechar->mode)) { |
4408 | 0 | char *attributeName = NULL; |
4409 | 0 | const CharacterClass *class = table->characterClasses; |
4410 | 0 | while (class) { |
4411 | 0 | if (class->attribute == character->mode) { |
4412 | 0 | attributeName = |
4413 | 0 | strdup(_lou_showString(class->name, class->length, 0)); |
4414 | 0 | break; |
4415 | 0 | } |
4416 | 0 | class = class->next; |
4417 | 0 | } |
4418 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
4419 | 0 | "%s: error: Base character %s can not have the %s " |
4420 | 0 | "attribute.", |
4421 | 0 | printSource(NULL, character->sourceFile, character->sourceLine), |
4422 | 0 | _lou_showString(&basechar->value, 1, 0), |
4423 | 0 | attributeName != NULL ? attributeName : "?"); |
4424 | 0 | errorCount++; |
4425 | 0 | free(attributeName); |
4426 | 0 | return NULL; |
4427 | 0 | } |
4428 | 87 | } |
4429 | | // unset character definition rule or base rule (whichever was declared |
4430 | | // last) if the dot patterns are not compatible, meaning if the real parts |
4431 | | // (1-8) of the dot patterns do not match |
4432 | 83 | TranslationTableRule *basecharDefRule = |
4433 | 83 | (TranslationTableRule *)&table->ruleArea[basechar->definitionRule]; |
4434 | 83 | if (character->definitionRule) { |
4435 | 1 | TranslationTableRule *defRule = |
4436 | 1 | (TranslationTableRule *)&table->ruleArea[character->definitionRule]; |
4437 | 1 | if (defRule->dotslen != basecharDefRule->dotslen || |
4438 | 1 | memcmp(&defRule->charsdots[defRule->charslen], |
4439 | 1 | &basecharDefRule->charsdots[basecharDefRule->charslen], |
4440 | 1 | defRule->dotslen * CHARSIZE)) { |
4441 | 1 | char *defOpcodeName = strdup(_lou_findOpcodeName(defRule->opcode)); |
4442 | 1 | if (defRule->index < character->ruleIndex) { |
4443 | | // character definition rule was defined before base rule; ignore base |
4444 | | // rule |
4445 | 0 | _lou_logMessage(LOU_LOG_DEBUG, |
4446 | 0 | "%s:%d: Character already defined (%s). The existing %s rule " |
4447 | 0 | "will take precedence over the new base rule.", |
4448 | 0 | character->sourceFile, character->sourceLine, |
4449 | 0 | printSource(character->sourceFile, defRule->sourceFile, |
4450 | 0 | defRule->sourceLine), |
4451 | 0 | defOpcodeName); |
4452 | 0 | free(defOpcodeName); |
4453 | 0 | character->basechar = 0; |
4454 | 0 | character->mode = 0; |
4455 | 0 | character->sourceFile = defRule->sourceFile; |
4456 | 0 | character->sourceLine = defRule->sourceLine; |
4457 | 0 | character->ruleIndex = defRule->index; |
4458 | 0 | character->finalized = 1; |
4459 | 0 | return character; |
4460 | 1 | } else { |
4461 | 1 | _lou_logMessage(LOU_LOG_DEBUG, |
4462 | 1 | "%s:%d: A base rule already exists for this character (%s). " |
4463 | 1 | "The " |
4464 | 1 | "existing base rule will take precedence over the new %s " |
4465 | 1 | "rule.", |
4466 | 1 | defRule->sourceFile, defRule->sourceLine, |
4467 | 1 | printSource(defRule->sourceFile, character->sourceFile, |
4468 | 1 | character->sourceLine), |
4469 | 1 | defOpcodeName); |
4470 | 1 | free(defOpcodeName); |
4471 | 1 | character->definitionRule = 0; |
4472 | 1 | } |
4473 | 1 | } |
4474 | 1 | } |
4475 | 83 | character->mode = mode; |
4476 | 83 | character->basechar = basecharOffset; |
4477 | | // add mode to attributes |
4478 | 83 | character->attributes |= character->mode; |
4479 | 83 | if (character->attributes & (CTC_UpperCase | CTC_LowerCase)) |
4480 | 50 | character->attributes |= CTC_Letter; |
4481 | | // also set the new attributes on the associated dots of the base |
4482 | | // character |
4483 | 83 | if (basecharDefRule->dotslen == 1) { |
4484 | 6 | TranslationTableCharacter *dots = |
4485 | 6 | getDots(basecharDefRule->charsdots[basecharDefRule->charslen], table); |
4486 | 6 | if (dots) { |
4487 | 6 | dots->attributes |= character->mode; |
4488 | 6 | if (dots->attributes & (CTC_UpperCase | CTC_LowerCase)) |
4489 | 4 | dots->attributes |= CTC_Letter; |
4490 | 6 | } |
4491 | 6 | } |
4492 | | // store all characters that are based on a base character in list |
4493 | 83 | if (basechar->linked) character->linked = basechar->linked; |
4494 | 83 | basechar->linked = characterOffset; |
4495 | 83 | } |
4496 | 6.43k | character->finalized = 1; |
4497 | 6.43k | return character; |
4498 | 6.43k | } |
4499 | | |
4500 | | static int |
4501 | 1.18k | finalizeTable(TranslationTableHeader *table) { |
4502 | 1.18k | if (table->finalized) return 1; |
4503 | | // normalize basechar and mode of all characters |
4504 | 677k | for (int i = 0; i < HASHNUM; i++) { |
4505 | 677k | TranslationTableOffset characterOffset = table->characters[i]; |
4506 | 683k | while (characterOffset) { |
4507 | 6.34k | TranslationTableCharacter *character = |
4508 | 6.34k | finalizeCharacter(table, characterOffset, 0); |
4509 | 6.34k | if (!character) return 0; |
4510 | 6.34k | characterOffset = character->next; |
4511 | 6.34k | } |
4512 | 677k | } |
4513 | | // add noletsign rules from single-letter word and largesign rules |
4514 | 677k | for (int i = 0; i < HASHNUM; i++) { |
4515 | 677k | TranslationTableOffset characterOffset = table->characters[i]; |
4516 | 683k | while (characterOffset) { |
4517 | 6.34k | TranslationTableCharacter *character = |
4518 | 6.34k | (TranslationTableCharacter *)&table->ruleArea[characterOffset]; |
4519 | 6.34k | if (character->attributes & CTC_Letter) { |
4520 | 2.01k | TranslationTableOffset *otherRule = &character->otherRules; |
4521 | 2.12k | while (*otherRule) { |
4522 | 110 | TranslationTableRule *rule = |
4523 | 110 | (TranslationTableRule *)&table->ruleArea[*otherRule]; |
4524 | 110 | if (rule->opcode == CTO_WholeWord || rule->opcode == CTO_LargeSign) |
4525 | 1 | if (table->noLetsignCount < LETSIGNSIZE) |
4526 | 1 | table->noLetsign[table->noLetsignCount++] = |
4527 | 1 | rule->charsdots[0]; |
4528 | 110 | otherRule = &rule->charsnext; |
4529 | 110 | } |
4530 | 2.01k | } |
4531 | 6.34k | characterOffset = character->next; |
4532 | 6.34k | } |
4533 | 677k | } |
4534 | | // Rearrange rules in `forRules' so that when iterating over candidate rules in |
4535 | | // for_selectRule(), both case-sensitive and case-insensitive rules are contained |
4536 | | // within the same ordered list. We do the rearrangement by iterating over all |
4537 | | // case-sensitive rules and if needed move them to another bucket. This may slow down |
4538 | | // the compilation of tables with a lot of context rules, but the good news is that |
4539 | | // translation speed is not affected. |
4540 | 677k | for (unsigned long int i = 0; i < HASHNUM; i++) { |
4541 | 677k | TranslationTableOffset *p = &table->forRules[i]; |
4542 | 677k | while (*p) { |
4543 | 480 | TranslationTableRule *rule = (TranslationTableRule *)&table->ruleArea[*p]; |
4544 | | // For now only move the rules that we know are case-sensitive, namely |
4545 | | // `context' rules. (Note that there may be other case-sensitive rules that |
4546 | | // we're currently not aware of.) We don't move case insensitive rules because |
4547 | | // the user can/should define them using all lowercases. |
4548 | 480 | if (rule->opcode == CTO_Context) { |
4549 | 43 | unsigned long int hash = _lou_stringHash(&rule->charsdots[0], 1, table); |
4550 | | // no need to do anything if the first two characters are not uppercase |
4551 | | // letters |
4552 | 43 | if (hash != i) { |
4553 | | // compute new position |
4554 | 21 | TranslationTableOffset *insert_at = &table->forRules[hash]; |
4555 | 80 | while (*insert_at) { |
4556 | 65 | TranslationTableRule *r = |
4557 | 65 | (TranslationTableRule *)&table->ruleArea[*insert_at]; |
4558 | 65 | if (rule->charslen > r->charslen) |
4559 | 6 | break; |
4560 | 59 | else if (rule->charslen == r->charslen && r->opcode == CTO_Always) |
4561 | 0 | break; |
4562 | 59 | insert_at = &r->charsnext; |
4563 | 59 | } |
4564 | | // remove rule from current list and insert it at the correct position |
4565 | | // in the new list |
4566 | 21 | TranslationTableOffset next = rule->charsnext; |
4567 | 21 | rule->charsnext = *insert_at; |
4568 | 21 | *insert_at = *p; |
4569 | 21 | *p = next; |
4570 | 21 | continue; |
4571 | 21 | } |
4572 | 43 | } |
4573 | 459 | p = &rule->charsnext; |
4574 | 459 | } |
4575 | 677k | } |
4576 | 603 | table->finalized = 1; |
4577 | 603 | return 1; |
4578 | 603 | } |
4579 | | |
4580 | | static int |
4581 | | compileString(const char *inString, TranslationTableHeader **table, |
4582 | 699 | DisplayTableHeader **displayTable) { |
4583 | | /* This function can be used to make changes to tables on the fly. */ |
4584 | 699 | int k; |
4585 | 699 | FileInfo file; |
4586 | 699 | if (inString == NULL) return 0; |
4587 | 699 | memset(&file, 0, sizeof(file)); |
4588 | 699 | file.fileName = inString; |
4589 | 699 | file.encoding = noEncoding; |
4590 | 699 | file.lineNumber = 1; |
4591 | 699 | file.status = 0; |
4592 | 699 | file.linepos = 0; |
4593 | 30.7k | for (k = 0; k < MAXSTRING - 1 && inString[k]; k++) file.line[k] = inString[k]; |
4594 | 699 | file.line[k] = 0; |
4595 | 699 | file.linelen = k; |
4596 | 699 | if (table && *table && (*table)->finalized) { |
4597 | 0 | compileError(&file, "Table is finalized"); |
4598 | 0 | return 0; |
4599 | 0 | } |
4600 | 699 | return compileRule(&file, table, displayTable, NULL); |
4601 | 699 | } |
4602 | | |
4603 | | static int |
4604 | 605 | setDefaults(TranslationTableHeader *table) { |
4605 | 2.42k | for (int i = 0; i < 3; i++) |
4606 | 1.81k | if (!table->emphRules[i][lenPhraseOffset]) |
4607 | 1.81k | table->emphRules[i][lenPhraseOffset] = 4; |
4608 | 605 | if (table->numPasses == 0) table->numPasses = 1; |
4609 | 605 | return 1; |
4610 | 605 | } |
4611 | | |
4612 | | /* =============== * |
4613 | | * TABLE RESOLVING * |
4614 | | * =============== * |
4615 | | * |
4616 | | * A table resolver is a function that resolves a `tableList` path against a |
4617 | | * `base` path, and returns the resolved table(s) as a list of absolute file |
4618 | | * paths. |
4619 | | * |
4620 | | * The function must have the following signature: |
4621 | | * |
4622 | | * char ** (const char * tableList, const char * base) |
4623 | | * |
4624 | | * In general, `tableList` is a path in the broad sense. The default |
4625 | | * implementation accepts only *file* paths. But another implementation could |
4626 | | * for instance handle URI's. `base` is always a file path however. |
4627 | | * |
4628 | | * The idea is to give other programs that use liblouis the ability to define |
4629 | | * their own table resolver (in C, Java, Python, etc.) when the default |
4630 | | * resolver is not satisfying. (see also lou_registerTableResolver) |
4631 | | * |
4632 | | */ |
4633 | | |
4634 | | /** |
4635 | | * Resolve a single (sub)table. |
4636 | | * |
4637 | | * Tries to resolve `table` against `base` if base is an absolute path. If |
4638 | | * that fails, searches `searchPath`. |
4639 | | * |
4640 | | */ |
4641 | | static char * |
4642 | 699 | resolveSubtable(const char *table, const char *base, const char *searchPath) { |
4643 | 699 | char *tableFile; |
4644 | 699 | static struct stat info; |
4645 | | |
4646 | 1.40k | #define MAX_TABLEFILE_SIZE (MAXSTRING * sizeof(char) * 2) |
4647 | 699 | if (table == NULL || table[0] == '\0') return NULL; |
4648 | 699 | tableFile = (char *)malloc(MAX_TABLEFILE_SIZE); |
4649 | | |
4650 | | // |
4651 | | // First try to resolve against base |
4652 | | // |
4653 | 699 | if (base) { |
4654 | 2 | int k; |
4655 | 2 | if (strlen(base) >= MAX_TABLEFILE_SIZE) goto failure; |
4656 | 2 | strcpy(tableFile, base); |
4657 | 2 | k = (int)strlen(tableFile); |
4658 | 30 | while (k >= 0 && tableFile[k] != '/' && tableFile[k] != '\\') k--; |
4659 | 2 | tableFile[++k] = '\0'; |
4660 | 2 | if (strlen(tableFile) + strlen(table) >= MAX_TABLEFILE_SIZE) goto failure; |
4661 | 2 | strcat(tableFile, table); |
4662 | 2 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4663 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4664 | 0 | return tableFile; |
4665 | 0 | } |
4666 | 2 | } |
4667 | | |
4668 | | // |
4669 | | // It could be an absolute path, or a path relative to the current working |
4670 | | // directory |
4671 | | // |
4672 | 699 | if (strlen(table) >= MAX_TABLEFILE_SIZE) goto failure; |
4673 | 699 | strcpy(tableFile, table); |
4674 | 699 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4675 | 699 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4676 | 699 | return tableFile; |
4677 | 699 | } |
4678 | | |
4679 | | // |
4680 | | // Then search `LOUIS_TABLEPATH`, `dataPath` and `programPath` |
4681 | | // |
4682 | 0 | if (searchPath[0] != '\0') { |
4683 | 0 | char *dir; |
4684 | 0 | int last; |
4685 | 0 | char *cp; |
4686 | 0 | char *searchPath_copy = strdup(searchPath); |
4687 | 0 | for (dir = searchPath_copy;; dir = cp + 1) { |
4688 | 0 | for (cp = dir; *cp != '\0' && *cp != ','; cp++) |
4689 | 0 | ; |
4690 | 0 | last = (*cp == '\0'); |
4691 | 0 | *cp = '\0'; |
4692 | 0 | if (dir == cp) dir = "."; |
4693 | 0 | if (strlen(dir) + strlen(table) + 1 >= MAX_TABLEFILE_SIZE) { |
4694 | 0 | free(searchPath_copy); |
4695 | 0 | goto failure; |
4696 | 0 | } |
4697 | 0 | sprintf(tableFile, "%s%c%s", dir, DIR_SEP, table); |
4698 | 0 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4699 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4700 | 0 | free(searchPath_copy); |
4701 | 0 | return tableFile; |
4702 | 0 | } |
4703 | 0 | if (last) break; |
4704 | 0 | if (strlen(dir) + strlen("liblouis") + strlen("tables") + strlen(table) + 3 >= |
4705 | 0 | MAX_TABLEFILE_SIZE) { |
4706 | 0 | free(searchPath_copy); |
4707 | 0 | goto failure; |
4708 | 0 | } |
4709 | 0 | sprintf(tableFile, "%s%c%s%c%s%c%s", dir, DIR_SEP, "liblouis", DIR_SEP, |
4710 | 0 | "tables", DIR_SEP, table); |
4711 | 0 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4712 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4713 | 0 | free(searchPath_copy); |
4714 | 0 | return tableFile; |
4715 | 0 | } |
4716 | 0 | if (last) break; |
4717 | 0 | } |
4718 | 0 | free(searchPath_copy); |
4719 | 0 | } |
4720 | 0 | failure: |
4721 | 0 | free(tableFile); |
4722 | 0 | return NULL; |
4723 | 0 | } |
4724 | | |
4725 | | char *EXPORT_CALL |
4726 | 699 | _lou_getTablePath(void) { |
4727 | 699 | char searchPath[MAXSTRING]; |
4728 | 699 | char *path; |
4729 | 699 | char *cp; |
4730 | 699 | int envset = 0; |
4731 | 699 | cp = searchPath; |
4732 | 699 | path = getenv("LOUIS_TABLEPATH"); |
4733 | 699 | if (path != NULL && path[0] != '\0') { |
4734 | 0 | envset = 1; |
4735 | 0 | cp += sprintf(cp, ",%s", path); |
4736 | 0 | } |
4737 | 699 | path = dataPathPtr; |
4738 | 699 | if (path != NULL && path[0] != '\0') |
4739 | 0 | cp += sprintf(cp, ",%s%c%s%c%s", path, DIR_SEP, "liblouis", DIR_SEP, "tables"); |
4740 | 699 | if (!envset) { |
4741 | | #ifdef _WIN32 |
4742 | | path = lou_getProgramPath(); |
4743 | | if (path != NULL) { |
4744 | | if (path[0] != '\0') |
4745 | | // assuming the following directory structure: |
4746 | | // . |
4747 | | // ├── bin |
4748 | | // │ ├── liblouis.dll |
4749 | | // ├── include |
4750 | | // ├── lib |
4751 | | // └── share |
4752 | | // ├── doc |
4753 | | // ├── info |
4754 | | // └── liblouis |
4755 | | // └── tables |
4756 | | cp += sprintf(cp, ",%s%s", path, "\\..\\share\\liblouis\\tables"); |
4757 | | free(path); |
4758 | | } |
4759 | | #else |
4760 | 699 | cp += sprintf(cp, ",%s", TABLESDIR); |
4761 | 699 | #endif |
4762 | 699 | } |
4763 | 699 | if (searchPath[0] != '\0') |
4764 | 699 | return strdup(&searchPath[1]); |
4765 | 0 | else |
4766 | 0 | return strdup("."); |
4767 | 699 | } |
4768 | | |
4769 | | /** |
4770 | | * The default table resolver |
4771 | | * |
4772 | | * Tries to resolve tableList against base. The search path is set to |
4773 | | * `LOUIS_TABLEPATH`, `dataPath` and `programPath` (in that order). |
4774 | | * |
4775 | | * @param table A file path, may be absolute or relative. May be a list of |
4776 | | * tables separated by commas. In that case, the first table |
4777 | | * is used as the base for the other subtables. |
4778 | | * @param base A file path or directory path, or NULL. |
4779 | | * @return The file paths of the resolved subtables, or NULL if the table |
4780 | | * could not be resolved. |
4781 | | * |
4782 | | */ |
4783 | | char **EXPORT_CALL |
4784 | 699 | _lou_defaultTableResolver(const char *tableList, const char *base) { |
4785 | 699 | char *searchPath; |
4786 | 699 | char **tableFiles; |
4787 | 699 | char *subTable; |
4788 | 699 | char *tableList_copy; |
4789 | 699 | char *cp; |
4790 | 699 | int last; |
4791 | 699 | int k; |
4792 | | |
4793 | | /* Set up search path */ |
4794 | 699 | searchPath = _lou_getTablePath(); |
4795 | | |
4796 | | /* Count number of subtables in table list */ |
4797 | 699 | k = 0; |
4798 | 15.8k | for (cp = (char *)tableList; *cp != '\0'; cp++) |
4799 | 15.1k | if (*cp == ',') k++; |
4800 | 699 | tableFiles = (char **)calloc(k + 2, sizeof(char *)); |
4801 | 699 | if (!tableFiles) _lou_outOfMemory(); |
4802 | | |
4803 | | /* Resolve subtables */ |
4804 | 699 | k = 0; |
4805 | 699 | tableList_copy = strdup(tableList); |
4806 | 699 | for (subTable = tableList_copy;; subTable = cp + 1) { |
4807 | 15.8k | for (cp = subTable; *cp != '\0' && *cp != ','; cp++) |
4808 | 15.1k | ; |
4809 | 699 | last = (*cp == '\0'); |
4810 | 699 | *cp = '\0'; |
4811 | 699 | if (!(tableFiles[k++] = resolveSubtable(subTable, base, searchPath))) { |
4812 | 0 | char *path; |
4813 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot resolve table '%s'", subTable); |
4814 | 0 | path = getenv("LOUIS_TABLEPATH"); |
4815 | 0 | if (path != NULL && path[0] != '\0') |
4816 | 0 | _lou_logMessage(LOU_LOG_ERROR, "LOUIS_TABLEPATH=%s", path); |
4817 | 0 | free(searchPath); |
4818 | 0 | free(tableList_copy); |
4819 | 0 | lou_freeTableFiles(tableFiles); |
4820 | 0 | return NULL; |
4821 | 0 | } |
4822 | 699 | if (k == 1) base = subTable; |
4823 | 699 | if (last) break; |
4824 | 699 | } |
4825 | 699 | free(searchPath); |
4826 | 699 | free(tableList_copy); |
4827 | 699 | tableFiles[k] = NULL; |
4828 | 699 | return tableFiles; |
4829 | 699 | } |
4830 | | |
4831 | | static char **(EXPORT_CALL *tableResolver)( |
4832 | | const char *tableList, const char *base) = &_lou_defaultTableResolver; |
4833 | | |
4834 | | static char ** |
4835 | 699 | copyStringArray(char **array) { |
4836 | 699 | int len; |
4837 | 699 | char **copy; |
4838 | 699 | if (!array) return NULL; |
4839 | 699 | len = 0; |
4840 | 1.39k | while (array[len]) len++; |
4841 | 699 | copy = malloc((len + 1) * sizeof(char *)); |
4842 | 699 | copy[len] = NULL; |
4843 | 1.39k | while (len) { |
4844 | 699 | len--; |
4845 | 699 | copy[len] = strdup(array[len]); |
4846 | 699 | } |
4847 | 699 | return copy; |
4848 | 699 | } |
4849 | | |
4850 | | char **EXPORT_CALL |
4851 | 699 | _lou_resolveTable(const char *tableList, const char *base) { |
4852 | 699 | char **tableFiles = (*tableResolver)(tableList, base); |
4853 | 699 | char **result = copyStringArray(tableFiles); |
4854 | 699 | if (tableResolver == &_lou_defaultTableResolver) lou_freeTableFiles(tableFiles); |
4855 | 699 | return result; |
4856 | 699 | } |
4857 | | |
4858 | | /** |
4859 | | * Register a new table resolver. Overrides the default resolver. |
4860 | | * |
4861 | | * @param resolver The new resolver as a function pointer. |
4862 | | * |
4863 | | */ |
4864 | | void EXPORT_CALL |
4865 | | lou_registerTableResolver( |
4866 | 0 | char **(EXPORT_CALL *resolver)(const char *tableList, const char *base)) { |
4867 | 0 | tableResolver = resolver; |
4868 | 0 | } |
4869 | | |
4870 | | static int fileCount = 0; |
4871 | | |
4872 | | /** |
4873 | | * Compile a single file |
4874 | | * |
4875 | | */ |
4876 | | static int |
4877 | | compileFile(const char *fileName, TranslationTableHeader **table, |
4878 | 699 | DisplayTableHeader **displayTable) { |
4879 | 699 | FileInfo file; |
4880 | 699 | fileCount++; |
4881 | 699 | file.fileName = fileName; |
4882 | 699 | if (table) { |
4883 | 699 | int i; |
4884 | 701 | for (i = 0; (*table)->sourceFiles[i]; i++) |
4885 | 2 | ; |
4886 | 699 | if (i >= MAX_SOURCE_FILES) { |
4887 | 0 | _lou_logMessage(LOU_LOG_WARN, "Max number of source files (%i) reached", |
4888 | 0 | MAX_SOURCE_FILES); |
4889 | 0 | file.sourceFile = NULL; |
4890 | 699 | } else { |
4891 | 699 | file.sourceFile = (*table)->sourceFiles[i] = strdup(fileName); |
4892 | 699 | } |
4893 | 699 | } |
4894 | 699 | file.encoding = noEncoding; |
4895 | 699 | file.status = 0; |
4896 | 699 | file.lineNumber = 0; |
4897 | 699 | if ((file.in = fopen(file.fileName, "rb"))) { |
4898 | | // the scope of a macro is the current file (after the macro definition) |
4899 | 699 | const MacroList *inscopeMacros = NULL; |
4900 | 5.50k | while (_lou_getALine(&file)) |
4901 | 4.88k | if (!compileRule(&file, table, displayTable, &inscopeMacros)) { |
4902 | 73 | if (!errorCount) compileError(&file, "Rule could not be compiled"); |
4903 | 73 | break; |
4904 | 73 | } |
4905 | 699 | fclose(file.in); |
4906 | 699 | free_macro_list(inscopeMacros); |
4907 | 699 | } else { |
4908 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot open table '%s'", file.fileName); |
4909 | 0 | errorCount++; |
4910 | 0 | } |
4911 | 699 | return !errorCount; |
4912 | 699 | } |
4913 | | |
4914 | | static void |
4915 | 697 | freeTranslationTable(TranslationTableHeader *t) { |
4916 | 752 | for (int i = 0; i < MAX_EMPH_CLASSES && t->emphClassNames[i]; i++) |
4917 | 697 | free(t->emphClassNames[i]); |
4918 | 1.39k | for (int i = 0; t->sourceFiles[i]; i++) free(t->sourceFiles[i]); |
4919 | 697 | if (t->characterClasses) deallocateCharacterClasses(t); |
4920 | 697 | if (t->ruleNames) deallocateRuleNames(t); |
4921 | 697 | free(t); |
4922 | 697 | } |
4923 | | |
4924 | | static void |
4925 | 697 | freeDisplayTable(DisplayTableHeader *t) { |
4926 | 697 | free(t); |
4927 | 697 | } |
4928 | | |
4929 | | /** |
4930 | | * Free a char** array |
4931 | | */ |
4932 | | void EXPORT_CALL |
4933 | 1.39k | lou_freeTableFiles(char **tables) { |
4934 | 1.39k | if (!tables) return; |
4935 | 2.79k | for (char **table = tables; *table; table++) free(*table); |
4936 | 1.39k | free(tables); |
4937 | 1.39k | } |
4938 | | |
4939 | | /** |
4940 | | * Implement include opcode |
4941 | | * |
4942 | | */ |
4943 | | static int |
4944 | | includeFile(const FileInfo *file, CharsString *includedFile, |
4945 | 2 | TranslationTableHeader **table, DisplayTableHeader **displayTable) { |
4946 | 2 | int k; |
4947 | 2 | char includeThis[MAXSTRING]; |
4948 | 2 | char **tableFiles; |
4949 | 2 | int rv; |
4950 | 38 | for (k = 0; k < includedFile->length; k++) |
4951 | 36 | includeThis[k] = (char)includedFile->chars[k]; |
4952 | 2 | if (k >= MAXSTRING) { |
4953 | 0 | compileError(file, "Include statement too long: 'include %s'", includeThis); |
4954 | 0 | return 0; |
4955 | 0 | } |
4956 | 2 | includeThis[k] = 0; |
4957 | 2 | tableFiles = _lou_resolveTable(includeThis, file->fileName); |
4958 | 2 | if (tableFiles == NULL) { |
4959 | 0 | errorCount++; |
4960 | 0 | return 0; |
4961 | 0 | } |
4962 | 2 | if (tableFiles[1] != NULL) { |
4963 | 0 | lou_freeTableFiles(tableFiles); |
4964 | 0 | compileError(file, "Table list not supported in include statement: 'include %s'", |
4965 | 0 | includeThis); |
4966 | 0 | return 0; |
4967 | 0 | } |
4968 | 2 | rv = compileFile(*tableFiles, table, displayTable); |
4969 | 2 | lou_freeTableFiles(tableFiles); |
4970 | 2 | if (!rv) |
4971 | 0 | _lou_logMessage(LOU_LOG_ERROR, "%s:%d: Error in included file", file->fileName, |
4972 | 0 | file->lineNumber); |
4973 | 2 | return rv; |
4974 | 2 | } |
4975 | | |
4976 | | /** |
4977 | | * Compile source tables into a table in memory |
4978 | | * |
4979 | | */ |
4980 | | static int |
4981 | | compileTable(const char *tableList, const char *displayTableList, |
4982 | 697 | TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) { |
4983 | 697 | char **tableFiles; |
4984 | 697 | char **subTable; |
4985 | 697 | if (translationTable && !tableList) return 0; |
4986 | 697 | if (displayTable && !displayTableList) return 0; |
4987 | 697 | if (!translationTable && !displayTable) return 0; |
4988 | 697 | if (translationTable) *translationTable = NULL; |
4989 | 697 | if (displayTable) *displayTable = NULL; |
4990 | 697 | errorCount = warningCount = fileCount = 0; |
4991 | 697 | if (!opcodeLengths[0]) { |
4992 | 697 | TranslationTableOpcode opcode; |
4993 | 82.2k | for (opcode = 0; opcode < CTO_None; opcode++) |
4994 | 81.5k | opcodeLengths[opcode] = (short)strlen(opcodeNames[opcode]); |
4995 | 697 | } |
4996 | 697 | if (translationTable) allocateTranslationTable(NULL, translationTable); |
4997 | 697 | if (displayTable) allocateDisplayTable(NULL, displayTable); |
4998 | | |
4999 | 697 | if (translationTable) { |
5000 | 697 | (*translationTable)->emphClassNames[0] = NULL; |
5001 | 697 | (*translationTable)->characterClasses = NULL; |
5002 | 697 | (*translationTable)->ruleNames = NULL; |
5003 | 697 | } |
5004 | | |
5005 | | /* Compile things that are necessary for the proper operation of |
5006 | | * liblouis or liblouisxml or liblouisutdml */ |
5007 | | /* TODO: These definitions seem to be necessary for proper functioning of |
5008 | | liblouisutdml. Find a way to satisfy those requirements without hard coding |
5009 | | some characters in every table notably behind the user's back */ |
5010 | 697 | compileString("space \\xffff 123456789abcdef LOU_ENDSEGMENT", translationTable, |
5011 | 697 | displayTable); |
5012 | | |
5013 | 697 | if (displayTable && translationTable && strcmp(tableList, displayTableList) == 0) { |
5014 | | /* Compile the display and translation tables in one go */ |
5015 | | |
5016 | | /* Compile all subtables in the list */ |
5017 | 697 | if (!(tableFiles = _lou_resolveTable(tableList, NULL))) { |
5018 | 0 | errorCount++; |
5019 | 0 | goto cleanup; |
5020 | 0 | } |
5021 | 1.30k | for (subTable = tableFiles; *subTable; subTable++) |
5022 | 697 | if (!compileFile(*subTable, translationTable, displayTable)) goto cleanup; |
5023 | 697 | } else { |
5024 | | /* Compile the display and translation tables separately */ |
5025 | |
|
5026 | 0 | if (displayTable) { |
5027 | 0 | if (!(tableFiles = _lou_resolveTable(displayTableList, NULL))) { |
5028 | 0 | errorCount++; |
5029 | 0 | goto cleanup; |
5030 | 0 | } |
5031 | 0 | for (subTable = tableFiles; *subTable; subTable++) |
5032 | 0 | if (!compileFile(*subTable, NULL, displayTable)) goto cleanup; |
5033 | 0 | lou_freeTableFiles(tableFiles); |
5034 | 0 | tableFiles = NULL; |
5035 | 0 | } |
5036 | 0 | if (translationTable) { |
5037 | 0 | if (!(tableFiles = _lou_resolveTable(tableList, NULL))) { |
5038 | 0 | errorCount++; |
5039 | 0 | goto cleanup; |
5040 | 0 | } |
5041 | 0 | for (subTable = tableFiles; *subTable; subTable++) |
5042 | 0 | if (!compileFile(*subTable, translationTable, NULL)) goto cleanup; |
5043 | 0 | } |
5044 | 0 | } |
5045 | | |
5046 | | /* Clean up after compiling files */ |
5047 | 697 | cleanup: |
5048 | 697 | lou_freeTableFiles(tableFiles); |
5049 | 697 | if (warningCount) |
5050 | 565 | _lou_logMessage(LOU_LOG_WARN, "%s: %d warnings issued", tableList, warningCount); |
5051 | 697 | if (!errorCount) { |
5052 | 605 | if (translationTable) setDefaults(*translationTable); |
5053 | 605 | return 1; |
5054 | 605 | } else { |
5055 | 92 | _lou_logMessage(LOU_LOG_ERROR, "%d errors found.", errorCount); |
5056 | 92 | if (translationTable) { |
5057 | 92 | if (*translationTable) freeTranslationTable(*translationTable); |
5058 | 92 | *translationTable = NULL; |
5059 | 92 | } |
5060 | 92 | if (displayTable) { |
5061 | 92 | if (*displayTable) freeDisplayTable(*displayTable); |
5062 | 92 | *displayTable = NULL; |
5063 | 92 | } |
5064 | 92 | return 0; |
5065 | 92 | } |
5066 | 697 | } |
5067 | | |
5068 | | /* Return the emphasis classes declared in tableList. */ |
5069 | | char const **EXPORT_CALL |
5070 | 0 | lou_getEmphClasses(const char *tableList) { |
5071 | 0 | const char *names[MAX_EMPH_CLASSES + 1]; |
5072 | 0 | unsigned int count = 0; |
5073 | 0 | const TranslationTableHeader *table = _lou_getTranslationTable(tableList); |
5074 | 0 | if (!table) return NULL; |
5075 | | |
5076 | 0 | while (count < MAX_EMPH_CLASSES) { |
5077 | 0 | char const *name = table->emphClassNames[count]; |
5078 | 0 | if (!name) break; |
5079 | 0 | names[count++] = name; |
5080 | 0 | } |
5081 | 0 | names[count++] = NULL; |
5082 | |
|
5083 | 0 | { |
5084 | 0 | unsigned int size = count * sizeof(names[0]); |
5085 | 0 | char const **result = malloc(size); |
5086 | 0 | if (!result) return NULL; |
5087 | | /* The void* cast is necessary to stop MSVC from warning about |
5088 | | * different 'const' qualifiers (C4090). */ |
5089 | 0 | memcpy((void *)result, names, size); |
5090 | 0 | return result; |
5091 | 0 | } |
5092 | 0 | } |
5093 | | |
5094 | | void EXPORT_CALL |
5095 | 0 | lou_freeEmphClasses(char const **classes) { |
5096 | 0 | free(classes); |
5097 | 0 | } |
5098 | | |
5099 | | void |
5100 | | getTable(const char *tableList, const char *displayTableList, |
5101 | | TranslationTableHeader **translationTable, DisplayTableHeader **displayTable); |
5102 | | |
5103 | | void EXPORT_CALL |
5104 | | _lou_getTable(const char *tableList, const char *displayTableList, |
5105 | | const TranslationTableHeader **translationTable, |
5106 | 1.28k | const DisplayTableHeader **displayTable) { |
5107 | 1.28k | TranslationTableHeader *newTable = NULL; |
5108 | 1.28k | DisplayTableHeader *newDisplayTable = NULL; |
5109 | 1.28k | getTable(tableList, displayTableList, &newTable, &newDisplayTable); |
5110 | 1.28k | if (newTable) |
5111 | 1.18k | if (!finalizeTable(newTable)) newTable = NULL; |
5112 | 1.28k | *translationTable = newTable; |
5113 | 1.28k | *displayTable = newDisplayTable; |
5114 | 1.28k | } |
5115 | | |
5116 | | /* Checks and loads tableList. */ |
5117 | | const void *EXPORT_CALL |
5118 | 695 | lou_getTable(const char *tableList) { |
5119 | 695 | const TranslationTableHeader *table = NULL; |
5120 | 695 | const DisplayTableHeader *displayTable = NULL; |
5121 | 695 | _lou_getTable(tableList, tableList, &table, &displayTable); |
5122 | 695 | if (!table || !displayTable) return NULL; |
5123 | 603 | return table; |
5124 | 695 | } |
5125 | | |
5126 | | const TranslationTableHeader *EXPORT_CALL |
5127 | 0 | _lou_getTranslationTable(const char *tableList) { |
5128 | 0 | TranslationTableHeader *table = NULL; |
5129 | 0 | getTable(tableList, NULL, &table, NULL); |
5130 | 0 | if (table) |
5131 | 0 | if (!finalizeTable(table)) table = NULL; |
5132 | 0 | return table; |
5133 | 0 | } |
5134 | | |
5135 | | const DisplayTableHeader *EXPORT_CALL |
5136 | 0 | _lou_getDisplayTable(const char *tableList) { |
5137 | 0 | DisplayTableHeader *table = NULL; |
5138 | 0 | getTable(NULL, tableList, NULL, &table); |
5139 | 0 | return table; |
5140 | 0 | } |
5141 | | |
5142 | | void |
5143 | | getTable(const char *translationTableList, const char *displayTableList, |
5144 | 1.28k | TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) { |
5145 | | /* Keep track of which tables have already been compiled */ |
5146 | 1.28k | int translationTableListLen, displayTableListLen = 0; |
5147 | 1.28k | if (translationTableList == NULL || *translationTableList == 0) |
5148 | 0 | translationTable = NULL; |
5149 | 1.28k | if (displayTableList == NULL || *displayTableList == 0) displayTable = NULL; |
5150 | | /* See if translation table has already been compiled */ |
5151 | 1.28k | if (translationTable) { |
5152 | 1.28k | translationTableListLen = (int)strlen(translationTableList); |
5153 | 1.28k | *translationTable = NULL; |
5154 | 1.28k | TranslationTableChainEntry *currentEntry = translationTableChain; |
5155 | 1.28k | TranslationTableChainEntry *prevEntry = NULL; |
5156 | 1.28k | while (currentEntry != NULL) { |
5157 | 586 | if (translationTableListLen == currentEntry->tableListLength && |
5158 | 586 | (memcmp(¤tEntry->tableList[0], translationTableList, |
5159 | 586 | translationTableListLen)) == 0) { |
5160 | | /* Move the table to the top of the table chain. */ |
5161 | 586 | if (prevEntry != NULL) { |
5162 | 0 | prevEntry->next = currentEntry->next; |
5163 | 0 | currentEntry->next = translationTableChain; |
5164 | 0 | translationTableChain = currentEntry; |
5165 | 0 | } |
5166 | 586 | *translationTable = currentEntry->table; |
5167 | 586 | break; |
5168 | 586 | } |
5169 | 0 | prevEntry = currentEntry; |
5170 | 0 | currentEntry = currentEntry->next; |
5171 | 0 | } |
5172 | 1.28k | } |
5173 | | /* See if display table has already been compiled */ |
5174 | 1.28k | if (displayTable) { |
5175 | 1.28k | displayTableListLen = (int)strlen(displayTableList); |
5176 | 1.28k | *displayTable = NULL; |
5177 | 1.28k | DisplayTableChainEntry *currentEntry = displayTableChain; |
5178 | 1.28k | DisplayTableChainEntry *prevEntry = NULL; |
5179 | 1.28k | while (currentEntry != NULL) { |
5180 | 586 | if (displayTableListLen == currentEntry->tableListLength && |
5181 | 586 | (memcmp(¤tEntry->tableList[0], displayTableList, |
5182 | 586 | displayTableListLen)) == 0) { |
5183 | | /* Move the table to the top of the table chain. */ |
5184 | 586 | if (prevEntry != NULL) { |
5185 | 0 | prevEntry->next = currentEntry->next; |
5186 | 0 | currentEntry->next = displayTableChain; |
5187 | 0 | displayTableChain = currentEntry; |
5188 | 0 | } |
5189 | 586 | *displayTable = currentEntry->table; |
5190 | 586 | break; |
5191 | 586 | } |
5192 | 0 | prevEntry = currentEntry; |
5193 | 0 | currentEntry = currentEntry->next; |
5194 | 0 | } |
5195 | 1.28k | } |
5196 | 1.28k | if ((translationTable && *translationTable == NULL) || |
5197 | 697 | (displayTable && *displayTable == NULL)) { |
5198 | 697 | TranslationTableHeader *newTranslationTable = NULL; |
5199 | 697 | DisplayTableHeader *newDisplayTable = NULL; |
5200 | 697 | if (compileTable(translationTableList, displayTableList, |
5201 | 697 | (translationTable && *translationTable == NULL) ? &newTranslationTable |
5202 | 697 | : NULL, |
5203 | 697 | (displayTable && *displayTable == NULL) ? &newDisplayTable : NULL)) { |
5204 | | /* Add a new entry to the top of the table chain. */ |
5205 | 605 | if (newTranslationTable != NULL) { |
5206 | 605 | int entrySize = |
5207 | 605 | sizeof(TranslationTableChainEntry) + translationTableListLen; |
5208 | 605 | TranslationTableChainEntry *newEntry = malloc(entrySize); |
5209 | 605 | if (!newEntry) _lou_outOfMemory(); |
5210 | 605 | newEntry->next = translationTableChain; |
5211 | 605 | newEntry->table = newTranslationTable; |
5212 | 605 | newEntry->tableListLength = translationTableListLen; |
5213 | 605 | memcpy(&newEntry->tableList[0], translationTableList, |
5214 | 605 | translationTableListLen); |
5215 | 605 | translationTableChain = newEntry; |
5216 | 605 | *translationTable = newTranslationTable; |
5217 | 605 | } |
5218 | 605 | if (newDisplayTable != NULL) { |
5219 | 605 | int entrySize = sizeof(DisplayTableChainEntry) + displayTableListLen; |
5220 | 605 | DisplayTableChainEntry *newEntry = malloc(entrySize); |
5221 | 605 | if (!newEntry) _lou_outOfMemory(); |
5222 | 605 | newEntry->next = displayTableChain; |
5223 | 605 | newEntry->table = newDisplayTable; |
5224 | 605 | newEntry->tableListLength = displayTableListLen; |
5225 | 605 | memcpy(&newEntry->tableList[0], displayTableList, displayTableListLen); |
5226 | 605 | displayTableChain = newEntry; |
5227 | 605 | *displayTable = newDisplayTable; |
5228 | 605 | } |
5229 | 605 | } else { |
5230 | 92 | _lou_logMessage( |
5231 | 92 | LOU_LOG_ERROR, "%s could not be compiled", translationTableList); |
5232 | 92 | return; |
5233 | 92 | } |
5234 | 697 | } |
5235 | 1.28k | } |
5236 | | |
5237 | | int EXPORT_CALL |
5238 | 695 | lou_checkTable(const char *tableList) { |
5239 | 695 | if (lou_getTable(tableList)) return 1; |
5240 | 92 | return 0; |
5241 | 695 | } |
5242 | | |
5243 | | formtype EXPORT_CALL |
5244 | 0 | lou_getTypeformForEmphClass(const char *tableList, const char *emphClass) { |
5245 | 0 | const TranslationTableHeader *table = _lou_getTranslationTable(tableList); |
5246 | 0 | if (!table) return 0; |
5247 | 0 | for (int i = 0; i < MAX_EMPH_CLASSES && table->emphClassNames[i]; i++) |
5248 | 0 | if (strcmp(emphClass, table->emphClassNames[i]) == 0) return italic << i; |
5249 | 0 | return 0; |
5250 | 0 | } |
5251 | | |
5252 | | static unsigned char *destSpacing = NULL; |
5253 | | static int sizeDestSpacing = 0; |
5254 | | static formtype *typebuf = NULL; |
5255 | | static unsigned int *wordBuffer = NULL; |
5256 | | static EmphasisInfo *emphasisBuffer = NULL; |
5257 | | static int sizeTypebuf = 0; |
5258 | | static widechar *passbuf[MAXPASSBUF] = { NULL }; |
5259 | | static int sizePassbuf[MAXPASSBUF] = { 0 }; |
5260 | | static int *posMapping1 = NULL; |
5261 | | static int sizePosMapping1 = 0; |
5262 | | static int *posMapping2 = NULL; |
5263 | | static int sizePosMapping2 = 0; |
5264 | | static int *posMapping3 = NULL; |
5265 | | static int sizePosMapping3 = 0; |
5266 | | void *EXPORT_CALL |
5267 | 3.89k | _lou_allocMem(AllocBuf buffer, int index, int srcmax, int destmax) { |
5268 | 3.89k | if (srcmax < 1024) srcmax = 1024; |
5269 | 3.89k | if (destmax < 1024) destmax = 1024; |
5270 | 3.89k | switch (buffer) { |
5271 | 307 | case alloc_typebuf: |
5272 | 307 | if (destmax > sizeTypebuf) { |
5273 | 307 | if (typebuf != NULL) free(typebuf); |
5274 | | // TODO: should this be srcmax? |
5275 | 307 | typebuf = malloc((destmax + 4) * sizeof(formtype)); |
5276 | 307 | if (!typebuf) _lou_outOfMemory(); |
5277 | 307 | sizeTypebuf = destmax; |
5278 | 307 | } |
5279 | 307 | return typebuf; |
5280 | | |
5281 | 307 | case alloc_wordBuffer: |
5282 | | |
5283 | 307 | if (wordBuffer != NULL) free(wordBuffer); |
5284 | 307 | wordBuffer = calloc(srcmax + 4, sizeof(unsigned int)); |
5285 | 307 | if (wordBuffer == NULL) _lou_outOfMemory(); |
5286 | 307 | return wordBuffer; |
5287 | | |
5288 | 307 | case alloc_emphasisBuffer: |
5289 | | |
5290 | 307 | if (emphasisBuffer != NULL) free(emphasisBuffer); |
5291 | 307 | emphasisBuffer = calloc(srcmax + 4, sizeof(EmphasisInfo)); |
5292 | 307 | if (emphasisBuffer == NULL) _lou_outOfMemory(); |
5293 | 307 | return emphasisBuffer; |
5294 | | |
5295 | 0 | case alloc_destSpacing: |
5296 | 0 | if (destmax > sizeDestSpacing) { |
5297 | 0 | if (destSpacing != NULL) free(destSpacing); |
5298 | 0 | destSpacing = malloc(destmax + 4); |
5299 | 0 | if (!destSpacing) _lou_outOfMemory(); |
5300 | 0 | sizeDestSpacing = destmax; |
5301 | 0 | } |
5302 | 0 | return destSpacing; |
5303 | 1.52k | case alloc_passbuf: |
5304 | 1.52k | if (index < 0 || index >= MAXPASSBUF) { |
5305 | 0 | _lou_logMessage(LOU_LOG_FATAL, "Index out of bounds: %d\n", index); |
5306 | 0 | exit(3); |
5307 | 0 | } |
5308 | 1.52k | if (destmax > sizePassbuf[index]) { |
5309 | 1.25k | if (passbuf[index] != NULL) free(passbuf[index]); |
5310 | 1.25k | passbuf[index] = malloc((destmax + 4) * CHARSIZE); |
5311 | 1.25k | if (!passbuf[index]) _lou_outOfMemory(); |
5312 | 1.25k | sizePassbuf[index] = destmax; |
5313 | 1.25k | } |
5314 | 1.52k | return passbuf[index]; |
5315 | 586 | case alloc_posMapping1: { |
5316 | 586 | int mapSize; |
5317 | 586 | if (srcmax >= destmax) |
5318 | 117 | mapSize = srcmax; |
5319 | 469 | else |
5320 | 469 | mapSize = destmax; |
5321 | 586 | if (mapSize > sizePosMapping1) { |
5322 | 586 | if (posMapping1 != NULL) free(posMapping1); |
5323 | 586 | posMapping1 = malloc((mapSize + 4) * sizeof(int)); |
5324 | 586 | if (!posMapping1) _lou_outOfMemory(); |
5325 | 586 | sizePosMapping1 = mapSize; |
5326 | 586 | } |
5327 | 586 | } |
5328 | 586 | return posMapping1; |
5329 | 429 | case alloc_posMapping2: { |
5330 | 429 | int mapSize; |
5331 | 429 | if (srcmax >= destmax) |
5332 | 63 | mapSize = srcmax; |
5333 | 366 | else |
5334 | 366 | mapSize = destmax; |
5335 | 429 | if (mapSize > sizePosMapping2) { |
5336 | 429 | if (posMapping2 != NULL) free(posMapping2); |
5337 | 429 | posMapping2 = malloc((mapSize + 4) * sizeof(int)); |
5338 | 429 | if (!posMapping2) _lou_outOfMemory(); |
5339 | 429 | sizePosMapping2 = mapSize; |
5340 | 429 | } |
5341 | 429 | } |
5342 | 429 | return posMapping2; |
5343 | 429 | case alloc_posMapping3: { |
5344 | 429 | int mapSize; |
5345 | 429 | if (srcmax >= destmax) |
5346 | 63 | mapSize = srcmax; |
5347 | 366 | else |
5348 | 366 | mapSize = destmax; |
5349 | 429 | if (mapSize > sizePosMapping3) { |
5350 | 429 | if (posMapping3 != NULL) free(posMapping3); |
5351 | 429 | posMapping3 = malloc((mapSize + 4) * sizeof(int)); |
5352 | 429 | if (!posMapping3) _lou_outOfMemory(); |
5353 | 429 | sizePosMapping3 = mapSize; |
5354 | 429 | } |
5355 | 429 | } |
5356 | 429 | return posMapping3; |
5357 | 0 | default: |
5358 | 0 | return NULL; |
5359 | 3.89k | } |
5360 | 3.89k | } |
5361 | | |
5362 | | void EXPORT_CALL |
5363 | 697 | lou_free(void) { |
5364 | 697 | lou_logEnd(); |
5365 | 697 | if (translationTableChain != NULL) { |
5366 | 605 | TranslationTableChainEntry *currentEntry = translationTableChain; |
5367 | 605 | TranslationTableChainEntry *previousEntry; |
5368 | 1.21k | while (currentEntry) { |
5369 | 605 | freeTranslationTable(currentEntry->table); |
5370 | 605 | previousEntry = currentEntry; |
5371 | 605 | currentEntry = currentEntry->next; |
5372 | 605 | free(previousEntry); |
5373 | 605 | } |
5374 | 605 | translationTableChain = NULL; |
5375 | 605 | } |
5376 | 697 | if (displayTableChain != NULL) { |
5377 | 605 | DisplayTableChainEntry *currentEntry = displayTableChain; |
5378 | 605 | DisplayTableChainEntry *previousEntry; |
5379 | 1.21k | while (currentEntry) { |
5380 | 605 | freeDisplayTable(currentEntry->table); |
5381 | 605 | previousEntry = currentEntry; |
5382 | 605 | currentEntry = currentEntry->next; |
5383 | 605 | free(previousEntry); |
5384 | 605 | } |
5385 | 605 | displayTableChain = NULL; |
5386 | 605 | } |
5387 | 697 | if (typebuf != NULL) free(typebuf); |
5388 | 697 | typebuf = NULL; |
5389 | 697 | if (wordBuffer != NULL) free(wordBuffer); |
5390 | 697 | wordBuffer = NULL; |
5391 | 697 | if (emphasisBuffer != NULL) free(emphasisBuffer); |
5392 | 697 | emphasisBuffer = NULL; |
5393 | 697 | sizeTypebuf = 0; |
5394 | 697 | if (destSpacing != NULL) free(destSpacing); |
5395 | 697 | destSpacing = NULL; |
5396 | 697 | sizeDestSpacing = 0; |
5397 | 697 | { |
5398 | 697 | int k; |
5399 | 2.78k | for (k = 0; k < MAXPASSBUF; k++) { |
5400 | 2.09k | if (passbuf[k] != NULL) free(passbuf[k]); |
5401 | 2.09k | passbuf[k] = NULL; |
5402 | 2.09k | sizePassbuf[k] = 0; |
5403 | 2.09k | } |
5404 | 697 | } |
5405 | 697 | if (posMapping1 != NULL) free(posMapping1); |
5406 | 697 | posMapping1 = NULL; |
5407 | 697 | sizePosMapping1 = 0; |
5408 | 697 | if (posMapping2 != NULL) free(posMapping2); |
5409 | 697 | posMapping2 = NULL; |
5410 | 697 | sizePosMapping2 = 0; |
5411 | 697 | if (posMapping3 != NULL) free(posMapping3); |
5412 | 697 | posMapping3 = NULL; |
5413 | 697 | sizePosMapping3 = 0; |
5414 | 697 | opcodeLengths[0] = 0; |
5415 | 697 | _lou_freeTableIndex(); |
5416 | 697 | } |
5417 | | |
5418 | | const char *EXPORT_CALL |
5419 | 0 | lou_version(void) { |
5420 | 0 | static const char *version = PACKAGE_VERSION; |
5421 | 0 | return version; |
5422 | 0 | } |
5423 | | |
5424 | | int EXPORT_CALL |
5425 | 0 | lou_charSize(void) { |
5426 | 0 | return CHARSIZE; |
5427 | 0 | } |
5428 | | |
5429 | | int EXPORT_CALL |
5430 | 2 | lou_compileString(const char *tableList, const char *inString) { |
5431 | 2 | TranslationTableHeader *table; |
5432 | 2 | DisplayTableHeader *displayTable; |
5433 | 2 | getTable(tableList, tableList, &table, &displayTable); |
5434 | 2 | if (!table) return 0; |
5435 | 2 | if (!compileString(inString, &table, &displayTable)) return 0; |
5436 | 2 | return 1; |
5437 | 2 | } |
5438 | | |
5439 | | int EXPORT_CALL |
5440 | 0 | _lou_compileTranslationRule(const char *tableList, const char *inString) { |
5441 | 0 | TranslationTableHeader *table; |
5442 | 0 | getTable(tableList, NULL, &table, NULL); |
5443 | 0 | return compileString(inString, &table, NULL); |
5444 | 0 | } |
5445 | | |
5446 | | int EXPORT_CALL |
5447 | 0 | _lou_compileDisplayRule(const char *tableList, const char *inString) { |
5448 | 0 | DisplayTableHeader *table; |
5449 | 0 | getTable(NULL, tableList, NULL, &table); |
5450 | | return compileString(inString, NULL, &table); |
5451 | 0 | } |
5452 | | |
5453 | | /** |
5454 | | * This procedure provides a target for cals that serve as breakpoints |
5455 | | * for gdb. |
5456 | | */ |
5457 | | // char *EXPORT_CALL |
5458 | | // lou_getTablePaths (void) |
5459 | | // { |
5460 | | // static char paths[MAXSTRING]; |
5461 | | // static char scratchBuf[MAXSTRING]; |
5462 | | // char *pathList; |
5463 | | // strcpy (paths, tablePath); |
5464 | | // strcat (paths, ","); |
5465 | | // pathList = getenv ("LOUIS_TABLEPATH"); |
5466 | | // if (pathList) |
5467 | | // { |
5468 | | // strcat (paths, pathList); |
5469 | | // strcat (paths, ","); |
5470 | | // } |
5471 | | // pathList = getcwd (scratchBuf, MAXSTRING); |
5472 | | // if (pathList) |
5473 | | // { |
5474 | | // strcat (paths, pathList); |
5475 | | // strcat (paths, ","); |
5476 | | // } |
5477 | | // pathList = lou_getDataPath (); |
5478 | | // if (pathList) |
5479 | | // { |
5480 | | // strcat (paths, pathList); |
5481 | | // strcat (paths, ","); |
5482 | | // } |
5483 | | // #ifdef _WIN32 |
5484 | | // strcpy (paths, lou_getProgramPath ()); |
5485 | | // strcat (paths, "\\share\\liblouss\\tables\\"); |
5486 | | // #else |
5487 | | // strcpy (paths, TABLESDIR); |
5488 | | // #endif |
5489 | | // return paths; |
5490 | | // } |