/src/liblouis/liblouis/compileTranslationTable.c
Line | Count | Source |
1 | | /* liblouis Braille Translation and Back-Translation Library |
2 | | |
3 | | Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The |
4 | | BRLTTY Team |
5 | | |
6 | | Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com |
7 | | Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com |
8 | | Copyright (C) 2016 Mike Gray, American Printing House for the Blind |
9 | | Copyright (C) 2016 Davy Kager, Dedicon |
10 | | |
11 | | This file is part of liblouis. |
12 | | |
13 | | liblouis is free software: you can redistribute it and/or modify it |
14 | | under the terms of the GNU Lesser General Public License as published |
15 | | by the Free Software Foundation, either version 2.1 of the License, or |
16 | | (at your option) any later version. |
17 | | |
18 | | liblouis is distributed in the hope that it will be useful, but |
19 | | WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
21 | | Lesser General Public License for more details. |
22 | | |
23 | | You should have received a copy of the GNU Lesser General Public |
24 | | License along with liblouis. If not, see <http://www.gnu.org/licenses/>. |
25 | | */ |
26 | | |
27 | | /** |
28 | | * @file |
29 | | * @brief Read and compile translation tables |
30 | | */ |
31 | | |
32 | | #include <config.h> |
33 | | |
34 | | #include <stddef.h> |
35 | | #include <stdlib.h> |
36 | | #include <stdio.h> |
37 | | #include <stdarg.h> |
38 | | #include <string.h> |
39 | | #include <ctype.h> |
40 | | #include <sys/stat.h> |
41 | | |
42 | | #include "internal.h" |
43 | | |
44 | 875 | #define QUOTESUB 28 /* Stand-in for double quotes in strings */ |
45 | | |
46 | | /* needed to make debuggin easier */ |
47 | | #ifdef DEBUG |
48 | | wchar_t wchar; |
49 | | #endif |
50 | | |
51 | | /* The following variables and functions make it possible to specify the |
52 | | * path on which all tables for liblouis and all files for liblouisutdml, |
53 | | * in their proper directories, will be found. |
54 | | */ |
55 | | |
56 | | static char *dataPathPtr; |
57 | | |
58 | | char *EXPORT_CALL |
59 | 0 | lou_setDataPath(const char *path) { |
60 | 0 | _lou_logMessage(LOU_LOG_WARN, "warning: lou_setDataPath is deprecated."); |
61 | 0 | static char dataPath[MAXSTRING]; |
62 | 0 | dataPathPtr = NULL; |
63 | 0 | if (path == NULL || strlen(path) >= MAXSTRING) return NULL; |
64 | 0 | strcpy(dataPath, path); |
65 | 0 | dataPathPtr = dataPath; |
66 | 0 | return dataPathPtr; |
67 | 0 | } |
68 | | |
69 | | char *EXPORT_CALL |
70 | 0 | lou_getDataPath(void) { |
71 | 0 | _lou_logMessage(LOU_LOG_WARN, "warning: lou_getDataPath is deprecated."); |
72 | 0 | return dataPathPtr; |
73 | 0 | } |
74 | | |
75 | | /* End of dataPath code. */ |
76 | | |
77 | | static int |
78 | 3.81k | eqasc2uni(const unsigned char *a, const widechar *b, const int len) { |
79 | 3.81k | int k; |
80 | 9.00k | for (k = 0; k < len; k++) |
81 | 8.25k | if ((widechar)a[k] != b[k]) return 0; |
82 | 744 | return 1; |
83 | 3.81k | } |
84 | | |
85 | | typedef struct CharsString { |
86 | | widechar length; |
87 | | widechar chars[MAXSTRING]; |
88 | | } CharsString; |
89 | | |
90 | | static int errorCount; |
91 | | static int warningCount; |
92 | | |
93 | | typedef struct TranslationTableChainEntry { |
94 | | struct TranslationTableChainEntry *next; |
95 | | TranslationTableHeader *table; |
96 | | int tableListLength; |
97 | | char tableList[1]; |
98 | | } TranslationTableChainEntry; |
99 | | |
100 | | static TranslationTableChainEntry *translationTableChain = NULL; |
101 | | |
102 | | typedef struct DisplayTableChainEntry { |
103 | | struct DisplayTableChainEntry *next; |
104 | | DisplayTableHeader *table; |
105 | | int tableListLength; |
106 | | char tableList[1]; |
107 | | } DisplayTableChainEntry; |
108 | | |
109 | | static DisplayTableChainEntry *displayTableChain = NULL; |
110 | | |
111 | | /* predefined character classes */ |
112 | | static const char *characterClassNames[] = { |
113 | | "space", |
114 | | "letter", |
115 | | "digit", |
116 | | "punctuation", |
117 | | "uppercase", |
118 | | "lowercase", |
119 | | "math", |
120 | | "sign", |
121 | | "litdigit", |
122 | | NULL, |
123 | | }; |
124 | | |
125 | | // names that may not be used for custom attributes |
126 | | static const char *reservedAttributeNames[] = { |
127 | | "numericnocontchars", |
128 | | "numericnocontchar", |
129 | | "numericnocont", |
130 | | "midendnumericmodechars", |
131 | | "midendnumericmodechar", |
132 | | "midendnumericmode", |
133 | | "numericmodechars", |
134 | | "numericmodechar", |
135 | | "numericmode", |
136 | | "capsmodechars", |
137 | | "capsmodechar", |
138 | | "capsmode", |
139 | | "emphmodechars", |
140 | | "emphmodechar", |
141 | | "emphmode", |
142 | | "noemphchars", |
143 | | "noemphchar", |
144 | | "noemph", |
145 | | "seqdelimiter", |
146 | | "seqbeforechars", |
147 | | "seqbeforechar", |
148 | | "seqbefore", |
149 | | "seqafterchars", |
150 | | "seqafterchar", |
151 | | "seqafter", |
152 | | "noletsign", |
153 | | "noletsignbefore", |
154 | | "noletsignafter", |
155 | | NULL, |
156 | | }; |
157 | | |
158 | | static const char *opcodeNames[CTO_None] = { |
159 | | "include", |
160 | | "locale", |
161 | | "undefined", |
162 | | "capsletter", |
163 | | "begcapsword", |
164 | | "endcapsword", |
165 | | "begcaps", |
166 | | "endcaps", |
167 | | "begcapsphrase", |
168 | | "endcapsphrase", |
169 | | "lencapsphrase", |
170 | | "modeletter", |
171 | | "begmodeword", |
172 | | "endmodeword", |
173 | | "begmode", |
174 | | "endmode", |
175 | | "begmodephrase", |
176 | | "endmodephrase", |
177 | | "lenmodephrase", |
178 | | "letsign", |
179 | | "noletsignbefore", |
180 | | "noletsign", |
181 | | "noletsignafter", |
182 | | "numsign", |
183 | | "nonumsign", |
184 | | "numericmodechars", |
185 | | "midendnumericmodechars", |
186 | | "numericnocontchars", |
187 | | "seqdelimiter", |
188 | | "seqbeforechars", |
189 | | "seqafterchars", |
190 | | "seqafterpattern", |
191 | | "seqafterexpression", |
192 | | "emphclass", |
193 | | "emphletter", |
194 | | "begemphword", |
195 | | "endemphword", |
196 | | "begemph", |
197 | | "endemph", |
198 | | "begemphphrase", |
199 | | "endemphphrase", |
200 | | "lenemphphrase", |
201 | | "capsmodechars", |
202 | | "emphmodechars", |
203 | | "noemphchars", |
204 | | "begcomp", |
205 | | "endcomp", |
206 | | "nocontractsign", |
207 | | "multind", |
208 | | "compdots", |
209 | | "comp6", |
210 | | "class", |
211 | | "after", |
212 | | "before", |
213 | | "noback", |
214 | | "nofor", |
215 | | "empmatchbefore", |
216 | | "empmatchafter", |
217 | | "swapcc", |
218 | | "swapcd", |
219 | | "swapdd", |
220 | | "space", |
221 | | "digit", |
222 | | "punctuation", |
223 | | "math", |
224 | | "sign", |
225 | | "letter", |
226 | | "uppercase", |
227 | | "lowercase", |
228 | | "grouping", |
229 | | "uplow", |
230 | | "litdigit", |
231 | | "display", |
232 | | "replace", |
233 | | "context", |
234 | | "correct", |
235 | | "pass2", |
236 | | "pass3", |
237 | | "pass4", |
238 | | "repeated", |
239 | | "repword", |
240 | | "rependword", |
241 | | "capsnocont", |
242 | | "always", |
243 | | "exactdots", |
244 | | "nocross", |
245 | | "syllable", |
246 | | "nocont", |
247 | | "compbrl", |
248 | | "literal", |
249 | | "largesign", |
250 | | "word", |
251 | | "partword", |
252 | | "joinnum", |
253 | | "joinword", |
254 | | "lowword", |
255 | | "contraction", |
256 | | "sufword", |
257 | | "prfword", |
258 | | "begword", |
259 | | "begmidword", |
260 | | "midword", |
261 | | "midendword", |
262 | | "endword", |
263 | | "prepunc", |
264 | | "postpunc", |
265 | | "begnum", |
266 | | "midnum", |
267 | | "endnum", |
268 | | "decpoint", |
269 | | "hyphen", |
270 | | // "apostrophe", |
271 | | // "initial", |
272 | | "nobreak", |
273 | | "match", |
274 | | "backmatch", |
275 | | "attribute", |
276 | | "base", |
277 | | "macro", |
278 | | }; |
279 | | |
280 | | static short opcodeLengths[CTO_None] = { 0 }; |
281 | | |
282 | | static void |
283 | | compileError(const FileInfo *file, const char *format, ...); |
284 | | |
285 | | static int |
286 | 40.6k | getAChar(FileInfo *file) { |
287 | | /* Read a big endian, little endian or ASCII 8 file and convert it to |
288 | | * 16- or 32-bit unsigned integers */ |
289 | 40.6k | int ch1 = 0, ch2 = 0; |
290 | 40.6k | widechar character; |
291 | 40.6k | if (file->encoding == ascii8) |
292 | 40.3k | if (file->status == 2) { |
293 | 82 | file->status++; |
294 | 82 | return file->checkencoding[1]; |
295 | 82 | } |
296 | 40.6k | while ((ch1 = fgetc(file->in)) != EOF) { |
297 | 40.5k | if (file->status < 2) file->checkencoding[file->status] = ch1; |
298 | 40.5k | file->status++; |
299 | 40.5k | if (file->status == 2) { |
300 | 84 | if (file->checkencoding[0] == 0xfe && file->checkencoding[1] == 0xff) |
301 | 1 | file->encoding = bigEndian; |
302 | 83 | else if (file->checkencoding[0] == 0xff && file->checkencoding[1] == 0xfe) |
303 | 0 | file->encoding = littleEndian; |
304 | 83 | else if (file->checkencoding[0] < 128 && file->checkencoding[1] < 128) { |
305 | 82 | file->encoding = ascii8; |
306 | 82 | return file->checkencoding[0]; |
307 | 82 | } else { |
308 | 1 | compileError(file, |
309 | 1 | "encoding is neither big-endian, little-endian nor ASCII 8."); |
310 | 1 | ch1 = EOF; |
311 | 1 | break; |
312 | 0 | ; |
313 | 0 | } |
314 | 1 | continue; |
315 | 84 | } |
316 | 40.4k | switch (file->encoding) { |
317 | 84 | case noEncoding: |
318 | 84 | break; |
319 | 40.1k | case ascii8: |
320 | 40.1k | return ch1; |
321 | 0 | break; |
322 | 255 | case bigEndian: |
323 | 255 | ch2 = fgetc(file->in); |
324 | 255 | if (ch2 == EOF) break; |
325 | 255 | character = (widechar)(ch1 << 8) | ch2; |
326 | 255 | return (int)character; |
327 | 0 | break; |
328 | 0 | case littleEndian: |
329 | 0 | ch2 = fgetc(file->in); |
330 | 0 | if (ch2 == EOF) break; |
331 | 0 | character = (widechar)(ch2 << 8) | ch1; |
332 | 0 | return (int)character; |
333 | 0 | break; |
334 | 40.4k | } |
335 | 84 | if (ch1 == EOF || ch2 == EOF) break; |
336 | 84 | } |
337 | 150 | return EOF; |
338 | 40.5k | } |
339 | | |
340 | | int EXPORT_CALL |
341 | 802 | _lou_getALine(FileInfo *file) { |
342 | | /* Read a line of widechar's from an input file */ |
343 | 802 | int ch; |
344 | 802 | file->linelen = 0; |
345 | 40.6k | while ((ch = getAChar(file)) != EOF) { |
346 | 40.5k | if (ch == 13) continue; |
347 | 39.9k | if (ch == 10 || file->linelen >= MAXSTRING - 1) break; |
348 | 39.3k | file->line[file->linelen++] = (widechar)ch; |
349 | 39.3k | } |
350 | 802 | file->line[file->linelen] = 0; |
351 | 802 | file->linepos = 0; |
352 | 802 | if (ch == EOF && !file->linelen) return 0; |
353 | 728 | file->lineNumber++; |
354 | 728 | return 1; |
355 | 802 | } |
356 | | |
357 | | static inline int |
358 | 22.6k | atEndOfLine(const FileInfo *file) { |
359 | 22.6k | return file->linepos >= file->linelen; |
360 | 22.6k | } |
361 | | |
362 | | static inline int |
363 | 20.6k | atTokenDelimiter(const FileInfo *file) { |
364 | 20.6k | return file->line[file->linepos] <= 32; |
365 | 20.6k | } |
366 | | |
367 | | static int |
368 | 2.21k | getToken(FileInfo *file, CharsString *result, const char *description) { |
369 | | /* Find the next string of contiguous non-whitespace characters. If this |
370 | | * is the last token on the line, return 2 instead of 1. */ |
371 | 3.89k | while (!atEndOfLine(file) && atTokenDelimiter(file)) file->linepos++; |
372 | 2.21k | result->length = 0; |
373 | 13.4k | while (!atEndOfLine(file) && !atTokenDelimiter(file)) { |
374 | 11.2k | int maxlen = MAXSTRING; |
375 | 11.2k | if (result->length >= maxlen) { |
376 | 0 | compileError(file, "more than %d characters (bytes)", maxlen); |
377 | 0 | return 0; |
378 | 0 | } else |
379 | 11.2k | result->chars[result->length++] = file->line[file->linepos++]; |
380 | 11.2k | } |
381 | 2.21k | if (!result->length) { |
382 | | /* Not enough tokens */ |
383 | 758 | if (description) compileError(file, "%s not specified.", description); |
384 | 758 | return 0; |
385 | 758 | } |
386 | 1.46k | result->chars[result->length] = 0; |
387 | 5.28k | while (!atEndOfLine(file) && atTokenDelimiter(file)) file->linepos++; |
388 | 1.46k | return 1; |
389 | 2.21k | } |
390 | | |
391 | | static void |
392 | 2.35k | compileError(const FileInfo *file, const char *format, ...) { |
393 | 2.35k | #ifndef __SYMBIAN32__ |
394 | 2.35k | char buffer[MAXSTRING]; |
395 | 2.35k | va_list arguments; |
396 | 2.35k | va_start(arguments, format); |
397 | 2.35k | vsnprintf(buffer, sizeof(buffer), format, arguments); |
398 | 2.35k | va_end(arguments); |
399 | 2.35k | if (file) |
400 | 749 | _lou_logMessage(LOU_LOG_ERROR, "%s:%d: error: %s", file->fileName, |
401 | 749 | file->lineNumber, buffer); |
402 | 1.60k | else |
403 | 1.60k | _lou_logMessage(LOU_LOG_ERROR, "error: %s", buffer); |
404 | 2.35k | errorCount++; |
405 | 2.35k | #endif |
406 | 2.35k | } |
407 | | |
408 | | static void |
409 | 34.6k | compileWarning(const FileInfo *file, const char *format, ...) { |
410 | 34.6k | #ifndef __SYMBIAN32__ |
411 | 34.6k | char buffer[MAXSTRING]; |
412 | 34.6k | va_list arguments; |
413 | 34.6k | va_start(arguments, format); |
414 | 34.6k | vsnprintf(buffer, sizeof(buffer), format, arguments); |
415 | 34.6k | va_end(arguments); |
416 | 34.6k | if (file) |
417 | 4.31k | _lou_logMessage(LOU_LOG_WARN, "%s:%d: warning: %s", file->fileName, |
418 | 4.31k | file->lineNumber, buffer); |
419 | 30.2k | else |
420 | 30.2k | _lou_logMessage(LOU_LOG_WARN, "warning: %s", buffer); |
421 | 34.6k | warningCount++; |
422 | 34.6k | #endif |
423 | 34.6k | } |
424 | | |
425 | | static int |
426 | | allocateSpaceInTranslationTable(const FileInfo *file, TranslationTableOffset *offset, |
427 | 1.79k | int size, TranslationTableHeader **table) { |
428 | | /* allocate memory for table and expand previously allocated memory if necessary */ |
429 | 1.79k | int spaceNeeded = ((size + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE; |
430 | 1.79k | TranslationTableOffset newTableSize = (*table)->bytesUsed + spaceNeeded; |
431 | 1.79k | TranslationTableOffset tableSize = (*table)->tableSize; |
432 | 1.79k | if (newTableSize > tableSize) { |
433 | 5 | TranslationTableHeader *newTable; |
434 | 5 | newTableSize += (newTableSize / OFFSETSIZE); |
435 | 5 | newTable = realloc(*table, newTableSize); |
436 | 5 | if (!newTable) { |
437 | 0 | compileError(file, "Not enough memory for translation table."); |
438 | 0 | _lou_outOfMemory(); |
439 | 0 | } |
440 | 5 | memset(((unsigned char *)newTable) + tableSize, 0, newTableSize - tableSize); |
441 | | /* update references to the old table */ |
442 | 5 | { |
443 | 5 | TranslationTableChainEntry *entry; |
444 | 5 | for (entry = translationTableChain; entry != NULL; entry = entry->next) |
445 | 0 | if (entry->table == *table) |
446 | 0 | entry->table = (TranslationTableHeader *)newTable; |
447 | 5 | } |
448 | 5 | newTable->tableSize = newTableSize; |
449 | 5 | *table = newTable; |
450 | 5 | } |
451 | 1.79k | if (offset != NULL) { |
452 | 1.79k | *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE; |
453 | 1.79k | (*table)->bytesUsed += spaceNeeded; |
454 | 1.79k | } |
455 | 1.79k | return 1; |
456 | 1.79k | } |
457 | | |
458 | | static int |
459 | | allocateSpaceInDisplayTable(const FileInfo *file, TranslationTableOffset *offset, |
460 | 188 | int size, DisplayTableHeader **table) { |
461 | | /* allocate memory for table and expand previously allocated memory if necessary */ |
462 | 188 | int spaceNeeded = ((size + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE; |
463 | 188 | TranslationTableOffset newTableSize = (*table)->bytesUsed + spaceNeeded; |
464 | 188 | TranslationTableOffset tableSize = (*table)->tableSize; |
465 | 188 | if (newTableSize > tableSize) { |
466 | 0 | DisplayTableHeader *newTable; |
467 | 0 | newTableSize += (newTableSize / OFFSETSIZE); |
468 | 0 | newTable = realloc(*table, newTableSize); |
469 | 0 | if (!newTable) { |
470 | 0 | compileError(file, "Not enough memory for display table."); |
471 | 0 | _lou_outOfMemory(); |
472 | 0 | } |
473 | 0 | memset(((unsigned char *)newTable) + tableSize, 0, newTableSize - tableSize); |
474 | | /* update references to the old table */ |
475 | 0 | { |
476 | 0 | DisplayTableChainEntry *entry; |
477 | 0 | for (entry = displayTableChain; entry != NULL; entry = entry->next) |
478 | 0 | if (entry->table == *table) entry->table = (DisplayTableHeader *)newTable; |
479 | 0 | } |
480 | 0 | newTable->tableSize = newTableSize; |
481 | 0 | *table = newTable; |
482 | 0 | } |
483 | 188 | if (offset != NULL) { |
484 | 188 | *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE; |
485 | 188 | (*table)->bytesUsed += spaceNeeded; |
486 | 188 | } |
487 | 188 | return 1; |
488 | 188 | } |
489 | | |
490 | | static int |
491 | 84 | allocateTranslationTable(const FileInfo *file, TranslationTableHeader **table) { |
492 | | /* Allocate memory for the table and a guess on the number of rules */ |
493 | 84 | const TranslationTableOffset startSize = 2 * sizeof(**table); |
494 | 84 | if (*table) return 1; |
495 | 84 | TranslationTableOffset bytesUsed = |
496 | 84 | sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */ |
497 | 84 | if (!(*table = malloc(startSize))) { |
498 | 0 | compileError(file, "Not enough memory"); |
499 | 0 | if (*table != NULL) free(*table); |
500 | 0 | *table = NULL; |
501 | 0 | _lou_outOfMemory(); |
502 | 0 | } |
503 | 84 | memset(*table, 0, startSize); |
504 | 84 | (*table)->tableSize = startSize; |
505 | 84 | (*table)->bytesUsed = bytesUsed; |
506 | 84 | return 1; |
507 | 84 | } |
508 | | |
509 | | static int |
510 | 84 | allocateDisplayTable(const FileInfo *file, DisplayTableHeader **table) { |
511 | | /* Allocate memory for the table and a guess on the number of rules */ |
512 | 84 | const TranslationTableOffset startSize = 2 * sizeof(**table); |
513 | 84 | if (*table) return 1; |
514 | 84 | TranslationTableOffset bytesUsed = |
515 | 84 | sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */ |
516 | 84 | if (!(*table = malloc(startSize))) { |
517 | 0 | compileError(file, "Not enough memory"); |
518 | 0 | if (*table != NULL) free(*table); |
519 | 0 | *table = NULL; |
520 | 0 | _lou_outOfMemory(); |
521 | 0 | } |
522 | 84 | memset(*table, 0, startSize); |
523 | 84 | (*table)->tableSize = startSize; |
524 | 84 | (*table)->bytesUsed = bytesUsed; |
525 | 84 | return 1; |
526 | 84 | } |
527 | | |
528 | | /* Look up a character or dot pattern. Although the algorithms are almost identical, |
529 | | * different tables are needed for characters and dots because of the possibility of |
530 | | * conflicts. */ |
531 | | |
532 | | static TranslationTableCharacter * |
533 | | getChar(widechar c, TranslationTableHeader *table, |
534 | 2.62k | TranslationTableOffset *characterOffset) { |
535 | 2.62k | const TranslationTableOffset bucket = table->characters[_lou_charHash(c)]; |
536 | 2.62k | TranslationTableOffset offset = bucket; |
537 | 2.71k | while (offset) { |
538 | 1.97k | TranslationTableCharacter *character = |
539 | 1.97k | (TranslationTableCharacter *)&table->ruleArea[offset]; |
540 | 1.97k | if (character->value == c) { |
541 | 1.88k | if (characterOffset) *characterOffset = offset; |
542 | 1.88k | return character; |
543 | 1.88k | } |
544 | 92 | offset = character->next; |
545 | 92 | } |
546 | 742 | return NULL; |
547 | 2.62k | } |
548 | | |
549 | | static TranslationTableCharacter * |
550 | 414 | getDots(widechar d, TranslationTableHeader *table) { |
551 | 414 | const TranslationTableOffset bucket = table->dots[_lou_charHash(d)]; |
552 | 414 | TranslationTableOffset offset = bucket; |
553 | 417 | while (offset) { |
554 | 164 | TranslationTableCharacter *character = |
555 | 164 | (TranslationTableCharacter *)&table->ruleArea[offset]; |
556 | 164 | if (character->value == d) return character; |
557 | 3 | offset = character->next; |
558 | 3 | } |
559 | 253 | return NULL; |
560 | 414 | } |
561 | | |
562 | | static TranslationTableCharacter * |
563 | | putChar(const FileInfo *file, widechar c, TranslationTableHeader **table, |
564 | 2.25k | TranslationTableOffset *characterOffset, int ruleIndex) { |
565 | | /* See if a character is in the appropriate table. If not, insert it. In either case, |
566 | | * return a pointer to it. */ |
567 | 2.25k | TranslationTableCharacter *character; |
568 | 2.25k | TranslationTableOffset offset; |
569 | 2.25k | if ((character = getChar(c, *table, characterOffset))) return character; |
570 | 742 | if (!allocateSpaceInTranslationTable(file, &offset, sizeof(*character), table)) |
571 | 0 | return NULL; |
572 | 742 | character = (TranslationTableCharacter *)&(*table)->ruleArea[offset]; |
573 | 742 | memset(character, 0, sizeof(*character)); |
574 | 742 | character->sourceFile = file->sourceFile; |
575 | 742 | character->sourceLine = file->lineNumber; |
576 | 742 | character->ruleIndex = ruleIndex; |
577 | 742 | character->value = c; |
578 | 742 | const unsigned long int charHash = _lou_charHash(c); |
579 | 742 | const TranslationTableOffset bucket = (*table)->characters[charHash]; |
580 | 742 | if (!bucket) |
581 | 734 | (*table)->characters[charHash] = offset; |
582 | 8 | else { |
583 | 8 | TranslationTableCharacter *oldchar = |
584 | 8 | (TranslationTableCharacter *)&(*table)->ruleArea[bucket]; |
585 | 8 | while (oldchar->next) |
586 | 0 | oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next]; |
587 | 8 | oldchar->next = offset; |
588 | 8 | } |
589 | 742 | if (characterOffset) *characterOffset = offset; |
590 | 742 | return character; |
591 | 742 | } |
592 | | |
593 | | static TranslationTableCharacter * |
594 | 309 | putDots(const FileInfo *file, widechar d, TranslationTableHeader **table, int ruleIndex) { |
595 | | /* See if a dot pattern is in the appropriate table. If not, insert it. In either |
596 | | * case, return a pointer to it. */ |
597 | 309 | TranslationTableCharacter *character; |
598 | 309 | TranslationTableOffset offset; |
599 | 309 | if ((character = getDots(d, *table))) return character; |
600 | 164 | if (!allocateSpaceInTranslationTable(file, &offset, sizeof(*character), table)) |
601 | 0 | return NULL; |
602 | 164 | character = (TranslationTableCharacter *)&(*table)->ruleArea[offset]; |
603 | 164 | memset(character, 0, sizeof(*character)); |
604 | 164 | character->sourceFile = file->sourceFile; |
605 | 164 | character->sourceLine = file->lineNumber; |
606 | 164 | character->ruleIndex = ruleIndex; |
607 | 164 | character->value = d; |
608 | 164 | const unsigned long int charHash = _lou_charHash(d); |
609 | 164 | const TranslationTableOffset bucket = (*table)->dots[charHash]; |
610 | 164 | if (!bucket) |
611 | 163 | (*table)->dots[charHash] = offset; |
612 | 1 | else { |
613 | 1 | TranslationTableCharacter *oldchar = |
614 | 1 | (TranslationTableCharacter *)&(*table)->ruleArea[bucket]; |
615 | 1 | while (oldchar->next) |
616 | 0 | oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next]; |
617 | 1 | oldchar->next = offset; |
618 | 1 | } |
619 | 164 | return character; |
620 | 164 | } |
621 | | |
622 | | /* Look up a character-dots mapping in a display table. */ |
623 | | |
624 | | static CharDotsMapping * |
625 | 108 | getDotsForChar(widechar c, const DisplayTableHeader *table) { |
626 | 108 | if (table == NULL) return NULL; |
627 | 108 | CharDotsMapping *cdPtr; |
628 | 108 | const TranslationTableOffset bucket = table->charToDots[_lou_charHash(c)]; |
629 | 108 | TranslationTableOffset offset = bucket; |
630 | 108 | while (offset) { |
631 | 14 | cdPtr = (CharDotsMapping *)&table->ruleArea[offset]; |
632 | 14 | if (cdPtr->lookFor == c) return cdPtr; |
633 | 0 | offset = cdPtr->next; |
634 | 0 | } |
635 | 94 | return NULL; |
636 | 108 | } |
637 | | |
638 | | static CharDotsMapping * |
639 | 175 | getCharForDots(widechar d, const DisplayTableHeader *table) { |
640 | 175 | if (table == NULL) return NULL; |
641 | 175 | CharDotsMapping *cdPtr; |
642 | 175 | const TranslationTableOffset bucket = table->dotsToChar[_lou_charHash(d)]; |
643 | 175 | TranslationTableOffset offset = bucket; |
644 | 175 | while (offset) { |
645 | 22 | cdPtr = (CharDotsMapping *)&table->ruleArea[offset]; |
646 | 22 | if (cdPtr->lookFor == d) return cdPtr; |
647 | 0 | offset = cdPtr->next; |
648 | 0 | } |
649 | 153 | return NULL; |
650 | 175 | } |
651 | | |
652 | | widechar EXPORT_CALL |
653 | 0 | _lou_getDotsForChar(widechar c, const DisplayTableHeader *table) { |
654 | 0 | CharDotsMapping *cdPtr = getDotsForChar(c, table); |
655 | 0 | if (cdPtr) return cdPtr->found; |
656 | 0 | return LOU_DOTS; |
657 | 0 | } |
658 | | |
659 | | widechar EXPORT_CALL |
660 | 67 | _lou_getCharForDots(widechar d, const DisplayTableHeader *table) { |
661 | 67 | CharDotsMapping *cdPtr = getCharForDots(d, table); |
662 | 67 | if (cdPtr) return cdPtr->found; |
663 | 59 | return '\0'; |
664 | 67 | } |
665 | | |
666 | | static int |
667 | | putCharDotsMapping( |
668 | 108 | const FileInfo *file, widechar c, widechar d, DisplayTableHeader **table) { |
669 | 108 | if (!getDotsForChar(c, *table)) { |
670 | 94 | CharDotsMapping *cdPtr; |
671 | 94 | TranslationTableOffset offset; |
672 | 94 | if (!allocateSpaceInDisplayTable(file, &offset, sizeof(*cdPtr), table)) return 0; |
673 | 94 | cdPtr = (CharDotsMapping *)&(*table)->ruleArea[offset]; |
674 | 94 | cdPtr->next = 0; |
675 | 94 | cdPtr->lookFor = c; |
676 | 94 | cdPtr->found = d; |
677 | 94 | const unsigned long int charHash = _lou_charHash(c); |
678 | 94 | const TranslationTableOffset bucket = (*table)->charToDots[charHash]; |
679 | 94 | if (!bucket) |
680 | 94 | (*table)->charToDots[charHash] = offset; |
681 | 0 | else { |
682 | 0 | CharDotsMapping *oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[bucket]; |
683 | 0 | while (oldcdPtr->next) |
684 | 0 | oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[oldcdPtr->next]; |
685 | 0 | oldcdPtr->next = offset; |
686 | 0 | } |
687 | 94 | } |
688 | 108 | if (!getCharForDots(d, *table)) { |
689 | 94 | CharDotsMapping *cdPtr; |
690 | 94 | TranslationTableOffset offset; |
691 | 94 | if (!allocateSpaceInDisplayTable(file, &offset, sizeof(*cdPtr), table)) return 0; |
692 | 94 | cdPtr = (CharDotsMapping *)&(*table)->ruleArea[offset]; |
693 | 94 | cdPtr->next = 0; |
694 | 94 | cdPtr->lookFor = d; |
695 | 94 | cdPtr->found = c; |
696 | 94 | const unsigned long int charHash = _lou_charHash(d); |
697 | 94 | const TranslationTableOffset bucket = (*table)->dotsToChar[charHash]; |
698 | 94 | if (!bucket) |
699 | 94 | (*table)->dotsToChar[charHash] = offset; |
700 | 0 | else { |
701 | 0 | CharDotsMapping *oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[bucket]; |
702 | 0 | while (oldcdPtr->next) |
703 | 0 | oldcdPtr = (CharDotsMapping *)&(*table)->ruleArea[oldcdPtr->next]; |
704 | 0 | oldcdPtr->next = offset; |
705 | 0 | } |
706 | 94 | } |
707 | 108 | return 1; |
708 | 108 | } |
709 | | |
710 | | static inline const char * |
711 | 0 | getPartName(int actionPart) { |
712 | 0 | return actionPart ? "action" : "test"; |
713 | 0 | } |
714 | | |
715 | | static int |
716 | | passFindCharacters(const FileInfo *file, widechar *instructions, int end, |
717 | 65 | widechar **characters, int *length) { |
718 | 65 | int IC = 0; |
719 | 65 | int lookback = 0; |
720 | | |
721 | 65 | *characters = NULL; |
722 | 65 | *length = 0; |
723 | | |
724 | 81 | while (IC < end) { |
725 | 81 | widechar instruction = instructions[IC]; |
726 | | |
727 | 81 | switch (instruction) { |
728 | 0 | case pass_string: |
729 | 0 | case pass_dots: { |
730 | 0 | int count = instructions[IC + 1]; |
731 | 0 | IC += 2; |
732 | 0 | if (count > lookback) { |
733 | 0 | *characters = &instructions[IC + lookback]; |
734 | 0 | *length = count - lookback; |
735 | 0 | return 1; |
736 | 0 | } else { |
737 | 0 | lookback -= count; |
738 | 0 | } |
739 | 0 | IC += count; |
740 | 0 | continue; |
741 | 0 | } |
742 | | |
743 | 25 | case pass_attributes: |
744 | 25 | IC += 7; |
745 | 25 | if (instructions[IC - 2] == instructions[IC - 1] && |
746 | 10 | instructions[IC - 1] <= lookback) { |
747 | 0 | lookback -= instructions[IC - 1]; |
748 | 0 | continue; |
749 | 0 | } |
750 | 25 | goto NO_CHARACTERS; |
751 | | |
752 | 25 | case pass_swap: |
753 | 1 | IC += 2; |
754 | | /* fall through */ |
755 | | |
756 | 2 | case pass_groupstart: |
757 | 3 | case pass_groupend: |
758 | 3 | case pass_groupreplace: |
759 | 3 | IC += 3; |
760 | | |
761 | 65 | NO_CHARACTERS : { return 1; } |
762 | | |
763 | 0 | case pass_eq: |
764 | 0 | case pass_lt: |
765 | 0 | case pass_gt: |
766 | 0 | case pass_lteq: |
767 | 0 | case pass_gteq: |
768 | 0 | IC += 3; |
769 | 0 | continue; |
770 | | |
771 | 1 | case pass_lookback: |
772 | 1 | lookback += instructions[IC + 1]; |
773 | 1 | IC += 2; |
774 | 1 | continue; |
775 | | |
776 | 9 | case pass_not: |
777 | 9 | case pass_startReplace: |
778 | 9 | case pass_endReplace: |
779 | 14 | case pass_first: |
780 | 15 | case pass_last: |
781 | 15 | case pass_copy: |
782 | 15 | case pass_omit: |
783 | 15 | case pass_plus: |
784 | 15 | case pass_hyphen: |
785 | 15 | IC += 1; |
786 | 15 | continue; |
787 | | |
788 | 37 | case pass_endTest: |
789 | 37 | goto NO_CHARACTERS; |
790 | | |
791 | 0 | default: |
792 | 0 | compileError(file, "unhandled test suboperand: \\x%02x", instruction); |
793 | 0 | return 0; |
794 | 81 | } |
795 | 81 | } |
796 | 0 | goto NO_CHARACTERS; |
797 | 65 | } |
798 | | |
799 | | static const char * |
800 | 14 | printSource(const char *currentFile, const char *sourceFile, int sourceLine) { |
801 | 14 | static char scratchBuf[MAXSTRING]; |
802 | 14 | if (sourceFile) { |
803 | 14 | if (currentFile && strcmp(currentFile, sourceFile) == 0) |
804 | 14 | snprintf(scratchBuf, MAXSTRING, "line %d", sourceLine); |
805 | 0 | else |
806 | 0 | snprintf(scratchBuf, MAXSTRING, "%s:%d", sourceFile, sourceLine); |
807 | 14 | } else |
808 | 0 | snprintf(scratchBuf, MAXSTRING, "source unknown"); |
809 | 14 | return scratchBuf; |
810 | 14 | } |
811 | | |
812 | | /* The following functions are called by addRule to handle various cases. */ |
813 | | |
814 | | static void |
815 | | addForwardRuleWithSingleChar(const FileInfo *file, TranslationTableOffset ruleOffset, |
816 | 148 | TranslationTableRule *rule, TranslationTableHeader **table) { |
817 | | /* direction = 0, rule->charslen = 1 */ |
818 | 148 | TranslationTableCharacter *character; |
819 | | // get the character from the table, or if the character is not defined yet, define it |
820 | | // (without adding attributes) |
821 | 148 | if (rule->opcode >= CTO_Pass2 && rule->opcode <= CTO_Pass4) { |
822 | 0 | character = putDots(file, rule->charsdots[0], table, rule->index); |
823 | | // putDots may have moved table, so make sure rule is still valid |
824 | 0 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
825 | 148 | } else if (rule->opcode == CTO_CompDots || rule->opcode == CTO_Comp6) { |
826 | 10 | character = putChar(file, rule->charsdots[0], table, NULL, rule->index); |
827 | | // putChar may have moved table, so make sure rule is still valid |
828 | 10 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
829 | 10 | character->compRule = ruleOffset; |
830 | 10 | return; |
831 | 138 | } else { |
832 | 138 | character = putChar(file, rule->charsdots[0], table, NULL, rule->index); |
833 | | // putChar may have moved table, so make sure rule is still valid |
834 | 138 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
835 | | // if the new rule is a character definition rule, set the main definition rule of |
836 | | // this character to it, but don't override existing character definitions rules |
837 | | // or base rules |
838 | | // adding the attributes to the character has already been done elsewhere |
839 | 138 | if (rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow) { |
840 | 108 | if (character->definitionRule) { |
841 | 14 | TranslationTableRule *prevRule = |
842 | 14 | (TranslationTableRule *)&(*table) |
843 | 14 | ->ruleArea[character->definitionRule]; |
844 | 14 | char *prevOpcodeName = strdup(_lou_findOpcodeName(prevRule->opcode)); |
845 | 14 | char *newOpcodeName = strdup(_lou_findOpcodeName(rule->opcode)); |
846 | 14 | _lou_logMessage(LOU_LOG_DEBUG, |
847 | 14 | "%s:%d: Character already defined (%s). The existing %s rule " |
848 | 14 | "will take precedence over the new %s rule.", |
849 | 14 | file->fileName, file->lineNumber, |
850 | 14 | printSource(file->sourceFile, prevRule->sourceFile, |
851 | 14 | prevRule->sourceLine), |
852 | 14 | prevOpcodeName, newOpcodeName); |
853 | 14 | free(prevOpcodeName); |
854 | 14 | free(newOpcodeName); |
855 | 94 | } else { |
856 | 94 | character->definitionRule = ruleOffset; |
857 | 94 | } |
858 | 108 | } |
859 | 138 | } |
860 | | // add the new rule to the list of rules associated with this character |
861 | | // if the new rule is a character definition rule, it is inserted at the end of the |
862 | | // list, otherwise it is inserted before the first character definition rule |
863 | | // in other words, rules are considered in the order in which they are defined in the |
864 | | // table |
865 | 138 | TranslationTableOffset *otherRule = &character->otherRules; |
866 | 252 | while (*otherRule) { |
867 | 120 | TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[*otherRule]; |
868 | 120 | if (r->charslen == 0) break; |
869 | 120 | if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow) |
870 | 111 | if (!(rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow)) break; |
871 | 114 | otherRule = &r->charsnext; |
872 | 114 | } |
873 | 138 | rule->charsnext = *otherRule; |
874 | 138 | *otherRule = ruleOffset; |
875 | 138 | } |
876 | | |
877 | | static void |
878 | | addForwardRuleWithMultipleChars(TranslationTableOffset ruleOffset, |
879 | 203 | TranslationTableRule *rule, TranslationTableHeader *table) { |
880 | | /* direction = 0 rule->charslen > 1 */ |
881 | 203 | TranslationTableOffset *forRule = |
882 | 203 | &table->forRules[_lou_stringHash(&rule->charsdots[0], 0, NULL)]; |
883 | 4.50k | while (*forRule) { |
884 | 4.30k | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*forRule]; |
885 | 4.30k | if (rule->charslen > r->charslen) break; |
886 | 4.29k | if (rule->charslen == r->charslen) |
887 | 4.29k | if ((r->opcode == CTO_Always) && (rule->opcode != CTO_Always)) break; |
888 | 4.29k | forRule = &r->charsnext; |
889 | 4.29k | } |
890 | 203 | rule->charsnext = *forRule; |
891 | 203 | *forRule = ruleOffset; |
892 | 203 | } |
893 | | |
894 | | static void |
895 | | addBackwardRuleWithSingleCell(const FileInfo *file, widechar cell, |
896 | | TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
897 | 217 | TranslationTableHeader **table) { |
898 | | /* direction = 1, rule->dotslen = 1 */ |
899 | 217 | TranslationTableCharacter *dots; |
900 | 217 | if (rule->opcode == CTO_SwapCc || rule->opcode == CTO_Repeated) |
901 | 1 | return; /* too ambiguous */ |
902 | | // get the cell from the table, or if the cell is not defined yet, define it (without |
903 | | // adding attributes) |
904 | 216 | dots = putDots(file, cell, table, rule->index); |
905 | | // putDots may have moved table, so make sure rule is still valid |
906 | 216 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
907 | 216 | if (rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow) |
908 | 104 | dots->definitionRule = ruleOffset; |
909 | 216 | TranslationTableOffset *otherRule = &dots->otherRules; |
910 | 395 | while (*otherRule) { |
911 | 181 | TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[*otherRule]; |
912 | 181 | if (rule->charslen > r->charslen || r->dotslen == 0) break; |
913 | 179 | if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow) |
914 | 105 | if (!(rule->opcode >= CTO_Space && rule->opcode < CTO_UpLow)) break; |
915 | 179 | otherRule = &r->dotsnext; |
916 | 179 | } |
917 | 216 | rule->dotsnext = *otherRule; |
918 | 216 | *otherRule = ruleOffset; |
919 | 216 | } |
920 | | |
921 | | static void |
922 | | addBackwardRuleWithMultipleCells(widechar *cells, int dotslen, |
923 | | TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
924 | 34 | TranslationTableHeader *table) { |
925 | | /* direction = 1, dotslen > 1 */ |
926 | 34 | TranslationTableOffset *backRule = &table->backRules[_lou_stringHash(cells, 0, NULL)]; |
927 | 34 | if (rule->opcode == CTO_SwapCc) return; |
928 | 34 | int ruleLength = dotslen + rule->charslen; |
929 | 99 | while (*backRule) { |
930 | 65 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*backRule]; |
931 | 65 | int rLength = r->dotslen + r->charslen; |
932 | 65 | if (ruleLength > rLength) break; |
933 | 65 | if (rLength == ruleLength) |
934 | 65 | if ((r->opcode == CTO_Always) && (rule->opcode != CTO_Always)) break; |
935 | 65 | backRule = &r->dotsnext; |
936 | 65 | } |
937 | 34 | rule->dotsnext = *backRule; |
938 | 34 | *backRule = ruleOffset; |
939 | 34 | } |
940 | | |
941 | | static int |
942 | | addForwardPassRule(TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
943 | 65 | TranslationTableHeader *table) { |
944 | 65 | TranslationTableOffset *forPassRule; |
945 | 65 | switch (rule->opcode) { |
946 | 40 | case CTO_Correct: |
947 | 40 | forPassRule = &table->forPassRules[0]; |
948 | 40 | break; |
949 | 10 | case CTO_Context: |
950 | 10 | forPassRule = &table->forPassRules[1]; |
951 | 10 | break; |
952 | 5 | case CTO_Pass2: |
953 | 5 | forPassRule = &table->forPassRules[2]; |
954 | 5 | break; |
955 | 5 | case CTO_Pass3: |
956 | 5 | forPassRule = &table->forPassRules[3]; |
957 | 5 | break; |
958 | 5 | case CTO_Pass4: |
959 | 5 | forPassRule = &table->forPassRules[4]; |
960 | 5 | break; |
961 | 0 | default: |
962 | 0 | return 0; |
963 | 65 | } |
964 | 70 | while (*forPassRule) { |
965 | 5 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*forPassRule]; |
966 | 5 | if (rule->charslen > r->charslen) break; |
967 | 5 | forPassRule = &r->charsnext; |
968 | 5 | } |
969 | 65 | rule->charsnext = *forPassRule; |
970 | 65 | *forPassRule = ruleOffset; |
971 | 65 | return 1; |
972 | 65 | } |
973 | | |
974 | | static int |
975 | | addBackwardPassRule(TranslationTableOffset ruleOffset, TranslationTableRule *rule, |
976 | 0 | TranslationTableHeader *table) { |
977 | 0 | TranslationTableOffset *backPassRule; |
978 | 0 | switch (rule->opcode) { |
979 | 0 | case CTO_Correct: |
980 | 0 | backPassRule = &table->backPassRules[0]; |
981 | 0 | break; |
982 | 0 | case CTO_Context: |
983 | 0 | backPassRule = &table->backPassRules[1]; |
984 | 0 | break; |
985 | 0 | case CTO_Pass2: |
986 | 0 | backPassRule = &table->backPassRules[2]; |
987 | 0 | break; |
988 | 0 | case CTO_Pass3: |
989 | 0 | backPassRule = &table->backPassRules[3]; |
990 | 0 | break; |
991 | 0 | case CTO_Pass4: |
992 | 0 | backPassRule = &table->backPassRules[4]; |
993 | 0 | break; |
994 | 0 | default: |
995 | 0 | return 0; |
996 | 0 | } |
997 | 0 | while (*backPassRule) { |
998 | 0 | TranslationTableRule *r = (TranslationTableRule *)&table->ruleArea[*backPassRule]; |
999 | 0 | if (rule->charslen > r->charslen) break; |
1000 | 0 | backPassRule = &r->dotsnext; |
1001 | 0 | } |
1002 | 0 | rule->dotsnext = *backPassRule; |
1003 | 0 | *backPassRule = ruleOffset; |
1004 | 0 | return 1; |
1005 | 0 | } |
1006 | | |
1007 | | static int |
1008 | | addRule(const FileInfo *file, TranslationTableOpcode opcode, CharsString *ruleChars, |
1009 | | CharsString *ruleDots, TranslationTableCharacterAttributes after, |
1010 | | TranslationTableCharacterAttributes before, TranslationTableOffset *ruleOffset, |
1011 | | TranslationTableRule **rule, int noback, int nofor, |
1012 | 488 | TranslationTableHeader **table) { |
1013 | | /* Add a rule to the table, using the hash function to find the start of |
1014 | | * chains and chaining both the chars and dots strings */ |
1015 | 488 | TranslationTableOffset offset; |
1016 | 488 | int ruleSize = sizeof(TranslationTableRule) - (DEFAULTRULESIZE * CHARSIZE); |
1017 | 488 | if (ruleChars) ruleSize += CHARSIZE * ruleChars->length; |
1018 | 488 | if (ruleDots) ruleSize += CHARSIZE * ruleDots->length; |
1019 | 488 | if (!allocateSpaceInTranslationTable(file, &offset, ruleSize, table)) return 0; |
1020 | 488 | TranslationTableRule *r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
1021 | 488 | if (rule) *rule = r; |
1022 | 488 | if (ruleOffset) *ruleOffset = offset; |
1023 | 488 | r->sourceFile = file->sourceFile; |
1024 | 488 | r->sourceLine = file->lineNumber; |
1025 | 488 | r->index = (*table)->ruleCounter++; |
1026 | 488 | r->opcode = opcode; |
1027 | 488 | r->after = after; |
1028 | 488 | r->before = before; |
1029 | 488 | r->nocross = 0; |
1030 | 488 | if (ruleChars) |
1031 | 419 | memcpy(&r->charsdots[0], &ruleChars->chars[0], |
1032 | 419 | CHARSIZE * (r->charslen = ruleChars->length)); |
1033 | 69 | else |
1034 | 69 | r->charslen = 0; |
1035 | 488 | if (ruleDots) |
1036 | 482 | memcpy(&r->charsdots[r->charslen], &ruleDots->chars[0], |
1037 | 482 | CHARSIZE * (r->dotslen = ruleDots->length)); |
1038 | 6 | else |
1039 | 6 | r->dotslen = 0; |
1040 | | |
1041 | | /* link new rule into table. */ |
1042 | 488 | if (opcode == CTO_SwapCc || opcode == CTO_SwapCd || opcode == CTO_SwapDd) return 1; |
1043 | 485 | if (opcode >= CTO_Context && opcode <= CTO_Pass4) |
1044 | 65 | if (!(opcode == CTO_Context && r->charslen > 0)) { |
1045 | 65 | if (!nofor) |
1046 | 65 | if (!addForwardPassRule(offset, r, *table)) return 0; |
1047 | 65 | if (!noback) |
1048 | 0 | if (!addBackwardPassRule(offset, r, *table)) return 0; |
1049 | 65 | return 1; |
1050 | 65 | } |
1051 | 420 | if (!nofor) { |
1052 | 420 | if (r->charslen == 1) { |
1053 | 148 | addForwardRuleWithSingleChar(file, offset, r, table); |
1054 | | // addForwardRuleWithSingleChar may have moved table, so make sure rule is |
1055 | | // still valid |
1056 | 148 | r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
1057 | 148 | if (rule) *rule = r; |
1058 | 272 | } else if (r->charslen > 1) |
1059 | 203 | addForwardRuleWithMultipleChars(offset, r, *table); |
1060 | 420 | } |
1061 | 420 | if (!noback) { |
1062 | 262 | widechar *cells; |
1063 | 262 | int dotslen; |
1064 | | |
1065 | 262 | if (r->opcode == CTO_Context) { |
1066 | 0 | cells = &r->charsdots[0]; |
1067 | 0 | dotslen = r->charslen; |
1068 | 262 | } else { |
1069 | 262 | cells = &r->charsdots[r->charslen]; |
1070 | 262 | dotslen = r->dotslen; |
1071 | 262 | } |
1072 | 262 | if (dotslen == 1) { |
1073 | 217 | addBackwardRuleWithSingleCell(file, *cells, offset, r, table); |
1074 | | // addBackwardRuleWithSingleCell may have moved table, so make sure rule is |
1075 | | // still valid |
1076 | 217 | r = (TranslationTableRule *)&(*table)->ruleArea[offset]; |
1077 | 217 | if (rule) *rule = r; |
1078 | 217 | } else if (dotslen > 1) |
1079 | 34 | addBackwardRuleWithMultipleCells(cells, dotslen, offset, r, *table); |
1080 | 262 | } |
1081 | 420 | return 1; |
1082 | 485 | } |
1083 | | |
1084 | | static const CharacterClass * |
1085 | 59 | findCharacterClass(const CharsString *name, const TranslationTableHeader *table) { |
1086 | | /* Find a character class, whether predefined or user-defined */ |
1087 | 59 | const CharacterClass *class = table->characterClasses; |
1088 | 416 | while (class) { |
1089 | 394 | if ((name->length == class->length) && |
1090 | 81 | (memcmp(&name->chars[0], class->name, CHARSIZE * name->length) == 0)) |
1091 | 37 | return class; |
1092 | 357 | class = class->next; |
1093 | 357 | } |
1094 | 22 | return NULL; |
1095 | 59 | } |
1096 | | |
1097 | | static TranslationTableCharacterAttributes |
1098 | 0 | getNextNumberedAttribute(TranslationTableHeader *table) { |
1099 | | /* Get the next attribute value for numbered attributes, or 0 if there is no more |
1100 | | * space in the table. */ |
1101 | 0 | TranslationTableCharacterAttributes next = table->nextNumberedCharacterClassAttribute; |
1102 | 0 | if (next > CTC_UserDefined8) return 0; |
1103 | 0 | table->nextNumberedCharacterClassAttribute <<= 1; |
1104 | 0 | return next; |
1105 | 0 | } |
1106 | | |
1107 | | static TranslationTableCharacterAttributes |
1108 | 327 | getNextAttribute(TranslationTableHeader *table) { |
1109 | | /* Get the next attribute value, or 0 if there is no more space in the table. */ |
1110 | 327 | TranslationTableCharacterAttributes next = table->nextCharacterClassAttribute; |
1111 | 327 | if (next) { |
1112 | 327 | if (next == CTC_LitDigit) |
1113 | 34 | table->nextCharacterClassAttribute = CTC_UserDefined9; |
1114 | 293 | else |
1115 | 293 | table->nextCharacterClassAttribute <<= 1; |
1116 | 327 | return next; |
1117 | 327 | } else |
1118 | 0 | return getNextNumberedAttribute(table); |
1119 | 327 | } |
1120 | | |
1121 | | static CharacterClass * |
1122 | | addCharacterClass(const FileInfo *file, const widechar *name, int length, |
1123 | 327 | TranslationTableHeader *table, int validate) { |
1124 | | /* Define a character class, Whether predefined or user-defined */ |
1125 | 327 | if (validate) { |
1126 | 358 | for (int i = 0; i < length; i++) { |
1127 | 337 | if (!((name[i] >= 'a' && name[i] <= 'z') || |
1128 | 173 | (name[i] >= 'A' && name[i] <= 'Z'))) { |
1129 | 143 | compileError(file, |
1130 | 143 | "Invalid attribute name: must be a digit between " |
1131 | 143 | "0 and 7 or a word containing only letters"); |
1132 | 143 | } |
1133 | 337 | } |
1134 | | // check that name is not reserved |
1135 | 21 | int k = 0; |
1136 | 609 | while (reservedAttributeNames[k]) { |
1137 | 588 | if (strlen(reservedAttributeNames[k]) == length) { |
1138 | 26 | int i; |
1139 | 28 | for (i = 0; i < length; i++) |
1140 | 28 | if (reservedAttributeNames[k][i] != name[i]) break; |
1141 | 26 | if (i == length) { |
1142 | 0 | compileError(file, "Attribute name is reserved: %s", |
1143 | 0 | reservedAttributeNames[k]); |
1144 | 0 | return NULL; |
1145 | 0 | } |
1146 | 26 | } |
1147 | 588 | k++; |
1148 | 588 | } |
1149 | 21 | } |
1150 | 327 | CharacterClass **classes = &table->characterClasses; |
1151 | 327 | TranslationTableCharacterAttributes attribute = getNextAttribute(table); |
1152 | 327 | CharacterClass *class; |
1153 | 327 | if (attribute) { |
1154 | 327 | if (!(class = malloc(sizeof(*class) + CHARSIZE * (length - 1)))) |
1155 | 0 | _lou_outOfMemory(); |
1156 | 327 | else { |
1157 | 327 | memset(class, 0, sizeof(*class)); |
1158 | 327 | memcpy(class->name, name, CHARSIZE * (class->length = length)); |
1159 | 327 | class->attribute = attribute; |
1160 | 327 | class->next = *classes; |
1161 | 327 | *classes = class; |
1162 | 327 | return class; |
1163 | 327 | } |
1164 | 327 | } |
1165 | 0 | compileError(file, "character class table overflow."); |
1166 | 0 | return NULL; |
1167 | 327 | } |
1168 | | |
1169 | | static void |
1170 | 34 | deallocateCharacterClasses(TranslationTableHeader *table) { |
1171 | 34 | CharacterClass **classes = &table->characterClasses; |
1172 | 361 | while (*classes) { |
1173 | 327 | CharacterClass *class = *classes; |
1174 | 327 | *classes = (*classes)->next; |
1175 | 327 | if (class) free(class); |
1176 | 327 | } |
1177 | 34 | } |
1178 | | |
1179 | | static int |
1180 | 34 | allocateCharacterClasses(TranslationTableHeader *table) { |
1181 | | /* Allocate memory for predefined character classes */ |
1182 | 34 | int k = 0; |
1183 | 34 | table->characterClasses = NULL; |
1184 | 34 | table->nextCharacterClassAttribute = 1; // CTC_Space |
1185 | 34 | table->nextNumberedCharacterClassAttribute = CTC_UserDefined1; |
1186 | 340 | while (characterClassNames[k]) { |
1187 | 306 | widechar wname[MAXSTRING]; |
1188 | 306 | int length = (int)strlen(characterClassNames[k]); |
1189 | 306 | int kk; |
1190 | 2.38k | for (kk = 0; kk < length; kk++) wname[kk] = (widechar)characterClassNames[k][kk]; |
1191 | 306 | if (!addCharacterClass(NULL, wname, length, table, 0)) { |
1192 | 0 | deallocateCharacterClasses(table); |
1193 | 0 | return 0; |
1194 | 0 | } |
1195 | 306 | k++; |
1196 | 306 | } |
1197 | 34 | return 1; |
1198 | 34 | } |
1199 | | |
1200 | | static TranslationTableOpcode |
1201 | 739 | getOpcode(const FileInfo *file, const CharsString *token) { |
1202 | 739 | static TranslationTableOpcode lastOpcode = 0; |
1203 | 739 | TranslationTableOpcode opcode = lastOpcode; |
1204 | | |
1205 | 26.0k | do { |
1206 | 26.0k | if (token->length == opcodeLengths[opcode]) |
1207 | 3.20k | if (eqasc2uni((unsigned char *)opcodeNames[opcode], &token->chars[0], |
1208 | 3.20k | token->length)) { |
1209 | 729 | lastOpcode = opcode; |
1210 | 729 | return opcode; |
1211 | 729 | } |
1212 | 25.3k | opcode++; |
1213 | 25.3k | if (opcode >= CTO_None) opcode = 0; |
1214 | 25.3k | } while (opcode != lastOpcode); |
1215 | 10 | return CTO_None; |
1216 | 739 | } |
1217 | | |
1218 | | TranslationTableOpcode EXPORT_CALL |
1219 | 0 | _lou_findOpcodeNumber(const char *toFind) { |
1220 | | /* Used by tools such as lou_debug */ |
1221 | 0 | static TranslationTableOpcode lastOpcode = 0; |
1222 | 0 | TranslationTableOpcode opcode = lastOpcode; |
1223 | 0 | int length = (int)strlen(toFind); |
1224 | 0 | do { |
1225 | 0 | if (length == opcodeLengths[opcode] && |
1226 | 0 | strcasecmp(toFind, opcodeNames[opcode]) == 0) { |
1227 | 0 | lastOpcode = opcode; |
1228 | 0 | return opcode; |
1229 | 0 | } |
1230 | 0 | opcode++; |
1231 | 0 | if (opcode >= CTO_None) opcode = 0; |
1232 | 0 | } while (opcode != lastOpcode); |
1233 | 0 | return CTO_None; |
1234 | 0 | } |
1235 | | |
1236 | | const char *EXPORT_CALL |
1237 | 28 | _lou_findOpcodeName(TranslationTableOpcode opcode) { |
1238 | 28 | static char scratchBuf[MAXSTRING]; |
1239 | | /* Used by tools such as lou_debug */ |
1240 | 28 | if (opcode < 0 || opcode >= CTO_None) { |
1241 | 0 | sprintf(scratchBuf, "%u", opcode); |
1242 | 0 | return scratchBuf; |
1243 | 0 | } |
1244 | 28 | return opcodeNames[opcode]; |
1245 | 28 | } |
1246 | | |
1247 | | static widechar |
1248 | 892 | hexValue(const FileInfo *file, const widechar *digits, int length) { |
1249 | 892 | int k; |
1250 | 892 | unsigned int binaryValue = 0; |
1251 | 3.51k | for (k = 0; k < length; k++) { |
1252 | 3.38k | unsigned int hexDigit = 0; |
1253 | 3.38k | if (digits[k] >= '0' && digits[k] <= '9') |
1254 | 2.28k | hexDigit = digits[k] - '0'; |
1255 | 1.10k | else if (digits[k] >= 'a' && digits[k] <= 'f') |
1256 | 336 | hexDigit = digits[k] - 'a' + 10; |
1257 | 764 | else if (digits[k] >= 'A' && digits[k] <= 'F') |
1258 | 2 | hexDigit = digits[k] - 'A' + 10; |
1259 | 762 | else { |
1260 | 762 | compileError(file, "invalid %d-digit hexadecimal number", length); |
1261 | 762 | return (widechar)0xffffffff; |
1262 | 762 | } |
1263 | 2.62k | binaryValue |= hexDigit << (4 * (length - 1 - k)); |
1264 | 2.62k | } |
1265 | 130 | return (widechar)binaryValue; |
1266 | 892 | } |
1267 | | |
1268 | 9.78k | #define MAXBYTES 7 |
1269 | | static const unsigned int first0Bit[MAXBYTES] = { 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, |
1270 | | 0XFE }; |
1271 | | |
1272 | | static int |
1273 | 460 | parseChars(const FileInfo *file, CharsString *result, CharsString *token) { |
1274 | 460 | int in = 0; |
1275 | 460 | int out = 0; |
1276 | 460 | int lastOutSize = 0; |
1277 | 460 | int lastIn; |
1278 | 460 | unsigned int ch = 0; |
1279 | 460 | int numBytes = 0; |
1280 | 460 | unsigned int utf32 = 0; |
1281 | 460 | int k; |
1282 | 67.6k | while (in < token->length) { |
1283 | 67.1k | ch = token->chars[in++] & 0xff; |
1284 | 67.1k | if (ch < 128) { |
1285 | 57.4k | if (ch == '\\') { /* escape sequence */ |
1286 | 3.27k | switch (ch = token->chars[in]) { |
1287 | 170 | case '\\': |
1288 | 170 | break; |
1289 | 0 | case 'e': |
1290 | 0 | ch = 0x1b; |
1291 | 0 | break; |
1292 | 7 | case 'f': |
1293 | 7 | ch = 12; |
1294 | 7 | break; |
1295 | 1 | case 'n': |
1296 | 1 | ch = 10; |
1297 | 1 | break; |
1298 | 224 | case 'r': |
1299 | 224 | ch = 13; |
1300 | 224 | break; |
1301 | 51 | case 's': |
1302 | 51 | ch = ' '; |
1303 | 51 | break; |
1304 | 0 | case 't': |
1305 | 0 | ch = 9; |
1306 | 0 | break; |
1307 | 340 | case 'v': |
1308 | 340 | ch = 11; |
1309 | 340 | break; |
1310 | 748 | case 'w': |
1311 | 748 | ch = LOU_ENDSEGMENT; |
1312 | 748 | break; |
1313 | 0 | case 34: |
1314 | 0 | ch = QUOTESUB; |
1315 | 0 | break; |
1316 | 0 | case 'X': |
1317 | 0 | compileWarning(file, "\\Xhhhh (with a capital 'X') is deprecated."); |
1318 | 892 | case 'x': |
1319 | 892 | if (token->length - in > 4) { |
1320 | 892 | ch = hexValue(file, &token->chars[in + 1], 4); |
1321 | 892 | in += 4; |
1322 | 892 | } |
1323 | 892 | break; |
1324 | 51 | case 'Y': |
1325 | 51 | compileWarning(file, "\\Yhhhhh (with a capital 'Y') is deprecated."); |
1326 | 588 | case 'y': |
1327 | 588 | if (CHARSIZE == 2) { |
1328 | 835 | not32: |
1329 | 835 | compileError(file, |
1330 | 835 | "liblouis has not been compiled for 32-bit Unicode"); |
1331 | 835 | break; |
1332 | 588 | } |
1333 | 0 | if (token->length - in > 5) { |
1334 | 0 | ch = hexValue(file, &token->chars[in + 1], 5); |
1335 | 0 | in += 5; |
1336 | 0 | } |
1337 | 0 | break; |
1338 | 2 | case 'Z': |
1339 | 2 | compileWarning( |
1340 | 2 | file, "\\Zhhhhhhhh (with a capital 'Z') is deprecated."); |
1341 | 247 | case 'z': |
1342 | 247 | if (CHARSIZE == 2) goto not32; |
1343 | 0 | if (token->length - in > 8) { |
1344 | 0 | ch = hexValue(file, &token->chars[in + 1], 8); |
1345 | 0 | in += 8; |
1346 | 0 | } |
1347 | 0 | break; |
1348 | 3 | default: |
1349 | 3 | compileError(file, "invalid escape sequence '\\%c'", ch); |
1350 | 3 | result->length = lastOutSize; |
1351 | 3 | return 0; |
1352 | 3.27k | } |
1353 | 3.26k | in++; |
1354 | 3.26k | } |
1355 | 57.4k | if (out >= MAXSTRING - 1) { |
1356 | 1 | compileError(file, "Token too long"); |
1357 | 1 | result->length = MAXSTRING - 1; |
1358 | 1 | return 0; |
1359 | 1 | } |
1360 | 57.3k | result->chars[out++] = (widechar)ch; |
1361 | 57.3k | continue; |
1362 | 57.4k | } |
1363 | 9.78k | lastOutSize = out; |
1364 | 9.78k | lastIn = in; |
1365 | 33.2k | for (numBytes = MAXBYTES - 1; numBytes > 0; numBytes--) |
1366 | 30.2k | if (ch >= first0Bit[numBytes]) break; |
1367 | 9.78k | utf32 = ch & (0XFF - first0Bit[numBytes]); |
1368 | 44.5k | for (k = 0; k < numBytes; k++) { |
1369 | 34.8k | if (in >= MAXSTRING - 1 || in >= token->length) break; |
1370 | 34.7k | if (out >= MAXSTRING - 1) { |
1371 | 2 | compileError(file, "Token too long"); |
1372 | 2 | result->length = lastOutSize; |
1373 | 2 | return 0; |
1374 | 2 | } |
1375 | 34.7k | if (token->chars[in] < 128 || (token->chars[in] & 0x0040)) { |
1376 | 34.5k | compileWarning(file, "invalid UTF-8. Assuming Latin-1."); |
1377 | 34.5k | result->chars[out++] = token->chars[lastIn]; |
1378 | 34.5k | in = lastIn + 1; |
1379 | 34.5k | continue; |
1380 | 34.5k | } |
1381 | 270 | utf32 = (utf32 << 6) + (token->chars[in++] & 0x3f); |
1382 | 270 | } |
1383 | 9.78k | if (out >= MAXSTRING - 1) { |
1384 | 1 | compileError(file, "Token too long"); |
1385 | 1 | result->length = lastOutSize; |
1386 | 1 | return 0; |
1387 | 1 | } |
1388 | 9.78k | if (CHARSIZE == 2 && utf32 > 0xffff) { |
1389 | 2 | compileError(file, "liblouis has not been compiled for 32-bit Unicode"); |
1390 | 2 | result->length = lastOutSize; |
1391 | 2 | return 0; |
1392 | 2 | } |
1393 | 9.78k | result->chars[out++] = (widechar)utf32; |
1394 | 9.78k | } |
1395 | 451 | result->length = out; |
1396 | 451 | return 1; |
1397 | 460 | } |
1398 | | |
1399 | | int EXPORT_CALL |
1400 | 69 | _lou_extParseChars(const char *inString, widechar *outString) { |
1401 | | /* Parse external character strings */ |
1402 | 69 | CharsString wideIn; |
1403 | 69 | CharsString result; |
1404 | 69 | int k; |
1405 | 74.1k | for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k]; |
1406 | 69 | wideIn.chars[k] = 0; |
1407 | 69 | wideIn.length = k; |
1408 | 69 | if (!parseChars(NULL, &result, &wideIn)) return 0; |
1409 | 77.7k | for (k = 0; k < result.length; k++) outString[k] = result.chars[k]; |
1410 | 60 | return result.length; |
1411 | 69 | } |
1412 | | |
1413 | | static int |
1414 | 244 | parseDots(const FileInfo *file, CharsString *cells, const CharsString *token) { |
1415 | | /* get dot patterns */ |
1416 | 244 | widechar cell = 0; /* assembly place for dots */ |
1417 | 244 | int cellCount = 0; |
1418 | 244 | int index; |
1419 | 244 | int start = 0; |
1420 | | |
1421 | 1.77k | for (index = 0; index < token->length; index++) { |
1422 | 1.53k | int started = index != start; |
1423 | 1.53k | widechar character = token->chars[index]; |
1424 | 1.53k | switch (character) { /* or dots to make up Braille cell */ |
1425 | 0 | { |
1426 | 0 | int dot; |
1427 | 104 | case '1': |
1428 | 104 | dot = LOU_DOT_1; |
1429 | 104 | goto haveDot; |
1430 | 89 | case '2': |
1431 | 89 | dot = LOU_DOT_2; |
1432 | 89 | goto haveDot; |
1433 | 87 | case '3': |
1434 | 87 | dot = LOU_DOT_3; |
1435 | 87 | goto haveDot; |
1436 | 94 | case '4': |
1437 | 94 | dot = LOU_DOT_4; |
1438 | 94 | goto haveDot; |
1439 | 85 | case '5': |
1440 | 85 | dot = LOU_DOT_5; |
1441 | 85 | goto haveDot; |
1442 | 85 | case '6': |
1443 | 85 | dot = LOU_DOT_6; |
1444 | 85 | goto haveDot; |
1445 | 85 | case '7': |
1446 | 85 | dot = LOU_DOT_7; |
1447 | 85 | goto haveDot; |
1448 | 94 | case '8': |
1449 | 94 | dot = LOU_DOT_8; |
1450 | 94 | goto haveDot; |
1451 | 93 | case '9': |
1452 | 93 | dot = LOU_DOT_9; |
1453 | 93 | goto haveDot; |
1454 | 97 | case 'a': |
1455 | 99 | case 'A': |
1456 | 99 | dot = LOU_DOT_10; |
1457 | 99 | goto haveDot; |
1458 | 86 | case 'b': |
1459 | 86 | case 'B': |
1460 | 86 | dot = LOU_DOT_11; |
1461 | 86 | goto haveDot; |
1462 | 104 | case 'c': |
1463 | 105 | case 'C': |
1464 | 105 | dot = LOU_DOT_12; |
1465 | 105 | goto haveDot; |
1466 | 114 | case 'd': |
1467 | 116 | case 'D': |
1468 | 116 | dot = LOU_DOT_13; |
1469 | 116 | goto haveDot; |
1470 | 146 | case 'e': |
1471 | 152 | case 'E': |
1472 | 152 | dot = LOU_DOT_14; |
1473 | 152 | goto haveDot; |
1474 | 86 | case 'f': |
1475 | 97 | case 'F': |
1476 | 97 | dot = LOU_DOT_15; |
1477 | 1.47k | haveDot: |
1478 | 1.47k | if (started && !cell) goto invalid; |
1479 | 1.47k | if (cell & dot) { |
1480 | 0 | compileError(file, "dot specified more than once."); |
1481 | 0 | return 0; |
1482 | 0 | } |
1483 | 1.47k | cell |= dot; |
1484 | 1.47k | break; |
1485 | 1.47k | } |
1486 | 28 | case '0': /* blank */ |
1487 | 28 | if (started) goto invalid; |
1488 | 28 | break; |
1489 | 36 | case '-': /* got all dots for this cell */ |
1490 | 36 | if (!started) { |
1491 | 2 | compileError(file, "missing cell specification."); |
1492 | 2 | return 0; |
1493 | 2 | } |
1494 | 34 | cells->chars[cellCount++] = cell | LOU_DOTS; |
1495 | 34 | cell = 0; |
1496 | 34 | start = index + 1; |
1497 | 34 | break; |
1498 | 0 | default: |
1499 | 0 | invalid: |
1500 | 0 | compileError( |
1501 | 0 | file, "invalid dot number %s.", _lou_showString(&character, 1, 0)); |
1502 | 0 | return 0; |
1503 | 1.53k | } |
1504 | 1.53k | } |
1505 | 242 | if (index == start) { |
1506 | 0 | compileError(file, "missing cell specification."); |
1507 | 0 | return 0; |
1508 | 0 | } |
1509 | 242 | cells->chars[cellCount++] = cell | LOU_DOTS; /* last cell */ |
1510 | 242 | cells->length = cellCount; |
1511 | 242 | return 1; |
1512 | 242 | } |
1513 | | |
1514 | | int EXPORT_CALL |
1515 | 0 | _lou_extParseDots(const char *inString, widechar *outString) { |
1516 | | /* Parse external dot patterns */ |
1517 | 0 | CharsString wideIn; |
1518 | 0 | CharsString result; |
1519 | 0 | int k; |
1520 | 0 | for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k]; |
1521 | 0 | wideIn.chars[k] = 0; |
1522 | 0 | wideIn.length = k; |
1523 | 0 | parseDots(NULL, &result, &wideIn); |
1524 | 0 | if (errorCount) { |
1525 | 0 | errorCount = 0; |
1526 | 0 | return 0; |
1527 | 0 | } |
1528 | 0 | for (k = 0; k < result.length; k++) outString[k] = result.chars[k]; |
1529 | 0 | outString[k] = 0; |
1530 | 0 | return result.length; |
1531 | 0 | } |
1532 | | |
1533 | | static int |
1534 | 332 | getCharacters(FileInfo *file, CharsString *characters) { |
1535 | | /* Get ruleChars string */ |
1536 | 332 | CharsString token; |
1537 | 332 | if (!getToken(file, &token, "characters")) return 0; |
1538 | 37 | return parseChars(file, characters, &token); |
1539 | 332 | } |
1540 | | |
1541 | | static int |
1542 | 398 | getRuleCharsText(FileInfo *file, CharsString *ruleChars) { |
1543 | 398 | CharsString token; |
1544 | 398 | if (!getToken(file, &token, "Characters operand")) return 0; |
1545 | 250 | return parseChars(file, ruleChars, &token); |
1546 | 398 | } |
1547 | | |
1548 | | static int |
1549 | 18 | getRuleDotsText(FileInfo *file, CharsString *ruleDots) { |
1550 | 18 | CharsString token; |
1551 | 18 | if (!getToken(file, &token, "characters")) return 0; |
1552 | 18 | return parseChars(file, ruleDots, &token); |
1553 | 18 | } |
1554 | | |
1555 | | static int |
1556 | 314 | getRuleDotsPattern(FileInfo *file, CharsString *ruleDots) { |
1557 | | /* Interpret the dets operand */ |
1558 | 314 | CharsString token; |
1559 | 314 | if (!getToken(file, &token, "Dots operand")) return 0; |
1560 | 166 | if (token.length == 1 && token.chars[0] == '=') { |
1561 | 1 | ruleDots->length = 0; |
1562 | 1 | return 1; |
1563 | 1 | } else |
1564 | 165 | return parseDots(file, ruleDots, &token); |
1565 | 166 | } |
1566 | | |
1567 | | static int |
1568 | | includeFile(const FileInfo *file, CharsString *includedFile, |
1569 | | TranslationTableHeader **table, DisplayTableHeader **displayTable); |
1570 | | |
1571 | | static TranslationTableOffset |
1572 | 10 | findRuleName(const CharsString *name, const TranslationTableHeader *table) { |
1573 | 10 | const RuleName *ruleName = table->ruleNames; |
1574 | 15 | while (ruleName) { |
1575 | 10 | if ((name->length == ruleName->length) && |
1576 | 5 | (memcmp(&name->chars[0], ruleName->name, CHARSIZE * name->length) == 0)) |
1577 | 5 | return ruleName->ruleOffset; |
1578 | 5 | ruleName = ruleName->next; |
1579 | 5 | } |
1580 | 5 | return 0; |
1581 | 10 | } |
1582 | | |
1583 | | static int |
1584 | | addRuleName(const FileInfo *file, CharsString *name, TranslationTableOffset ruleOffset, |
1585 | 5 | TranslationTableHeader *table) { |
1586 | 5 | int k; |
1587 | 5 | RuleName *ruleName; |
1588 | 5 | if (!(ruleName = malloc(sizeof(*ruleName) + CHARSIZE * (name->length - 1)))) { |
1589 | 0 | compileError(file, "not enough memory"); |
1590 | 0 | _lou_outOfMemory(); |
1591 | 0 | } |
1592 | 5 | memset(ruleName, 0, sizeof(*ruleName)); |
1593 | | // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z' |
1594 | 41 | for (k = 0; k < name->length; k++) { |
1595 | 36 | widechar c = name->chars[k]; |
1596 | 36 | if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) |
1597 | 36 | ruleName->name[k] = c; |
1598 | 0 | else { |
1599 | 0 | compileError(file, "a name may contain only letters"); |
1600 | 0 | free(ruleName); |
1601 | 0 | return 0; |
1602 | 0 | } |
1603 | 36 | } |
1604 | 5 | ruleName->length = name->length; |
1605 | 5 | ruleName->ruleOffset = ruleOffset; |
1606 | 5 | ruleName->next = table->ruleNames; |
1607 | 5 | table->ruleNames = ruleName; |
1608 | 5 | return 1; |
1609 | 5 | } |
1610 | | |
1611 | | static void |
1612 | 5 | deallocateRuleNames(TranslationTableHeader *table) { |
1613 | 5 | RuleName **ruleName = &table->ruleNames; |
1614 | 10 | while (*ruleName) { |
1615 | 5 | RuleName *rn = *ruleName; |
1616 | 5 | *ruleName = rn->next; |
1617 | 5 | free(rn); |
1618 | 5 | } |
1619 | 5 | } |
1620 | | |
1621 | | static int |
1622 | 2 | compileSwapDots(const FileInfo *file, CharsString *source, CharsString *dest) { |
1623 | 2 | int k = 0; |
1624 | 2 | int kk = 0; |
1625 | 2 | CharsString dotsSource; |
1626 | 2 | CharsString dotsDest; |
1627 | 2 | dest->length = 0; |
1628 | 2 | dotsSource.length = 0; |
1629 | 11 | while (k <= source->length) { |
1630 | 9 | if (source->chars[k] != ',' && k != source->length) |
1631 | 7 | dotsSource.chars[dotsSource.length++] = source->chars[k]; |
1632 | 2 | else { |
1633 | 2 | if (!parseDots(file, &dotsDest, &dotsSource)) return 0; |
1634 | 2 | dest->chars[dest->length++] = dotsDest.length + 1; |
1635 | 6 | for (kk = 0; kk < dotsDest.length; kk++) |
1636 | 4 | dest->chars[dest->length++] = dotsDest.chars[kk]; |
1637 | 2 | dotsSource.length = 0; |
1638 | 2 | } |
1639 | 9 | k++; |
1640 | 9 | } |
1641 | 2 | return 1; |
1642 | 2 | } |
1643 | | |
1644 | | static int |
1645 | | compileSwap(FileInfo *file, TranslationTableOpcode opcode, int noback, int nofor, |
1646 | 3 | TranslationTableHeader **table) { |
1647 | 3 | CharsString ruleChars; |
1648 | 3 | CharsString ruleDots; |
1649 | 3 | CharsString name; |
1650 | 3 | CharsString matches; |
1651 | 3 | CharsString replacements; |
1652 | 3 | TranslationTableOffset ruleOffset; |
1653 | 3 | if (!getToken(file, &name, "name operand")) return 0; |
1654 | 3 | if (!getToken(file, &matches, "matches operand")) return 0; |
1655 | 3 | if (!getToken(file, &replacements, "replacements operand")) return 0; |
1656 | 3 | if (opcode == CTO_SwapCc || opcode == CTO_SwapCd) { |
1657 | 2 | if (!parseChars(file, &ruleChars, &matches)) return 0; |
1658 | 2 | } else { |
1659 | 1 | if (!compileSwapDots(file, &matches, &ruleChars)) return 0; |
1660 | 1 | } |
1661 | 3 | if (opcode == CTO_SwapCc) { |
1662 | 2 | if (!parseChars(file, &ruleDots, &replacements)) return 0; |
1663 | 2 | } else { |
1664 | 1 | if (!compileSwapDots(file, &replacements, &ruleDots)) return 0; |
1665 | 1 | } |
1666 | 3 | if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, NULL, noback, |
1667 | 3 | nofor, table)) |
1668 | 0 | return 0; |
1669 | 3 | if (!addRuleName(file, &name, ruleOffset, *table)) return 0; |
1670 | 3 | return 1; |
1671 | 3 | } |
1672 | | |
1673 | | static int |
1674 | 3 | getNumber(widechar *string, widechar *number) { |
1675 | | /* Convert a string of wide character digits to an integer */ |
1676 | 3 | int k = 0; |
1677 | 3 | *number = 0; |
1678 | 8 | while (string[k] >= '0' && string[k] <= '9') |
1679 | 5 | *number = 10 * *number + (string[k++] - '0'); |
1680 | 3 | return k; |
1681 | 3 | } |
1682 | | |
1683 | | /* Start of multipass compiler */ |
1684 | | |
1685 | | static int |
1686 | | passGetAttributes(CharsString *passLine, int *passLinepos, |
1687 | 29 | TranslationTableCharacterAttributes *attributes, const FileInfo *file) { |
1688 | 29 | int more = 1; |
1689 | 29 | *attributes = 0; |
1690 | 161 | while (more) { |
1691 | 132 | switch (passLine->chars[*passLinepos]) { |
1692 | 28 | case pass_any: |
1693 | 28 | *attributes = 0xffffffff; |
1694 | 28 | break; |
1695 | 5 | case pass_digit: |
1696 | 5 | *attributes |= CTC_Digit; |
1697 | 5 | break; |
1698 | 6 | case pass_litDigit: |
1699 | 6 | *attributes |= CTC_LitDigit; |
1700 | 6 | break; |
1701 | 1 | case pass_letter: |
1702 | 1 | *attributes |= CTC_Letter; |
1703 | 1 | break; |
1704 | 0 | case pass_math: |
1705 | 0 | *attributes |= CTC_Math; |
1706 | 0 | break; |
1707 | 18 | case pass_punctuation: |
1708 | 18 | *attributes |= CTC_Punctuation; |
1709 | 18 | break; |
1710 | 2 | case pass_sign: |
1711 | 2 | *attributes |= CTC_Sign; |
1712 | 2 | break; |
1713 | 31 | case pass_space: |
1714 | 31 | *attributes |= CTC_Space; |
1715 | 31 | break; |
1716 | 0 | case pass_uppercase: |
1717 | 0 | *attributes |= CTC_UpperCase; |
1718 | 0 | break; |
1719 | 0 | case pass_lowercase: |
1720 | 0 | *attributes |= CTC_LowerCase; |
1721 | 0 | break; |
1722 | 6 | case pass_class1: |
1723 | 6 | *attributes |= CTC_UserDefined9; |
1724 | 6 | break; |
1725 | 3 | case pass_class2: |
1726 | 3 | *attributes |= CTC_UserDefined10; |
1727 | 3 | break; |
1728 | 3 | case pass_class3: |
1729 | 3 | *attributes |= CTC_UserDefined11; |
1730 | 3 | break; |
1731 | 0 | case pass_class4: |
1732 | 0 | *attributes |= CTC_UserDefined12; |
1733 | 0 | break; |
1734 | 29 | default: |
1735 | 29 | more = 0; |
1736 | 29 | break; |
1737 | 132 | } |
1738 | 132 | if (more) (*passLinepos)++; |
1739 | 132 | } |
1740 | 29 | if (!*attributes) { |
1741 | 0 | compileError(file, "missing attribute"); |
1742 | 0 | (*passLinepos)--; |
1743 | 0 | return 0; |
1744 | 0 | } |
1745 | 29 | return 1; |
1746 | 29 | } |
1747 | | |
1748 | | static int |
1749 | | passGetDots(CharsString *passLine, int *passLinepos, CharsString *dots, |
1750 | 1 | const FileInfo *file) { |
1751 | 1 | CharsString collectDots; |
1752 | 1 | collectDots.length = 0; |
1753 | 2 | while (*passLinepos < passLine->length && |
1754 | 2 | (passLine->chars[*passLinepos] == '-' || |
1755 | 2 | (passLine->chars[*passLinepos] >= '0' && |
1756 | 1 | passLine->chars[*passLinepos] <= '9') || |
1757 | 1 | ((passLine->chars[*passLinepos] | 32) >= 'a' && |
1758 | 0 | (passLine->chars[*passLinepos] | 32) <= 'f'))) |
1759 | 1 | collectDots.chars[collectDots.length++] = passLine->chars[(*passLinepos)++]; |
1760 | 1 | if (!parseDots(file, dots, &collectDots)) return 0; |
1761 | 1 | return 1; |
1762 | 1 | } |
1763 | | |
1764 | | static int |
1765 | | passGetString(CharsString *passLine, int *passLinepos, CharsString *string, |
1766 | 37 | const FileInfo *file) { |
1767 | 37 | string->length = 0; |
1768 | 912 | while (1) { |
1769 | 912 | if ((*passLinepos >= passLine->length) || !passLine->chars[*passLinepos]) { |
1770 | 0 | compileError(file, "unterminated string"); |
1771 | 0 | return 0; |
1772 | 0 | } |
1773 | 912 | if (passLine->chars[*passLinepos] == 34) break; |
1774 | 875 | if (passLine->chars[*passLinepos] == QUOTESUB) |
1775 | 0 | string->chars[string->length++] = 34; |
1776 | 875 | else |
1777 | 875 | string->chars[string->length++] = passLine->chars[*passLinepos]; |
1778 | 875 | (*passLinepos)++; |
1779 | 875 | } |
1780 | 37 | string->chars[string->length] = 0; |
1781 | 37 | (*passLinepos)++; |
1782 | 37 | return 1; |
1783 | 37 | } |
1784 | | |
1785 | | static int |
1786 | 34 | passGetNumber(CharsString *passLine, int *passLinepos, widechar *number) { |
1787 | | /* Convert a string of wide character digits to an integer */ |
1788 | 34 | *number = 0; |
1789 | 71 | while ((*passLinepos < passLine->length) && (passLine->chars[*passLinepos] >= '0') && |
1790 | 48 | (passLine->chars[*passLinepos] <= '9')) |
1791 | 37 | *number = 10 * (*number) + (passLine->chars[(*passLinepos)++] - '0'); |
1792 | 34 | return 1; |
1793 | 34 | } |
1794 | | |
1795 | | static int |
1796 | | passGetVariableNumber( |
1797 | 0 | const FileInfo *file, CharsString *passLine, int *passLinepos, widechar *number) { |
1798 | 0 | if (!passGetNumber(passLine, passLinepos, number)) { |
1799 | 0 | compileError(file, "missing variable number"); |
1800 | 0 | return 0; |
1801 | 0 | } |
1802 | 0 | if ((*number >= 0) && (*number < NUMVAR)) return 1; |
1803 | 0 | compileError(file, "variable number out of range"); |
1804 | 0 | return 0; |
1805 | 0 | } |
1806 | | |
1807 | | static int |
1808 | 10 | passGetName(CharsString *passLine, int *passLinepos, CharsString *name) { |
1809 | 10 | name->length = 0; |
1810 | | // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z' |
1811 | 41 | do { |
1812 | 41 | widechar c = passLine->chars[*passLinepos]; |
1813 | 41 | if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { |
1814 | 31 | name->chars[name->length++] = c; |
1815 | 31 | (*passLinepos)++; |
1816 | 31 | } else { |
1817 | 10 | break; |
1818 | 10 | } |
1819 | 41 | } while (*passLinepos < passLine->length); |
1820 | 10 | return 1; |
1821 | 10 | } |
1822 | | |
1823 | | static inline int |
1824 | 38 | wantsString(TranslationTableOpcode opcode, int actionPart, int nofor) { |
1825 | 38 | if (opcode == CTO_Correct) return 1; |
1826 | 1 | if (opcode != CTO_Context) return 0; |
1827 | 1 | return !nofor == !actionPart; |
1828 | 1 | } |
1829 | | |
1830 | | static int |
1831 | | verifyStringOrDots(const FileInfo *file, TranslationTableOpcode opcode, int isString, |
1832 | 38 | int actionPart, int nofor) { |
1833 | 38 | if (!wantsString(opcode, actionPart, nofor) == !isString) return 1; |
1834 | | |
1835 | 0 | compileError(file, "%s are not allowed in the %s part of a %s translation %s rule.", |
1836 | 0 | isString ? "strings" : "dots", getPartName(actionPart), |
1837 | 0 | nofor ? "backward" : "forward", _lou_findOpcodeName(opcode)); |
1838 | |
|
1839 | 0 | return 0; |
1840 | 38 | } |
1841 | | |
1842 | | static int |
1843 | | appendInstructionChar( |
1844 | 1.41k | const FileInfo *file, widechar *passInstructions, int *passIC, widechar ch) { |
1845 | 1.41k | if (*passIC >= MAXSTRING) { |
1846 | 0 | compileError(file, "multipass operand too long"); |
1847 | 0 | return 0; |
1848 | 0 | } |
1849 | 1.41k | passInstructions[(*passIC)++] = ch; |
1850 | 1.41k | return 1; |
1851 | 1.41k | } |
1852 | | |
1853 | | static int |
1854 | | compilePassOpcode(const FileInfo *file, TranslationTableOpcode opcode, int noback, |
1855 | 65 | int nofor, TranslationTableHeader **table) { |
1856 | 65 | static CharsString passRuleChars; |
1857 | 65 | static CharsString passRuleDots; |
1858 | | /* Compile the operands of a pass opcode */ |
1859 | 65 | widechar passSubOp; |
1860 | 65 | const CharacterClass *class; |
1861 | 65 | TranslationTableRule *rule = NULL; |
1862 | 65 | int k; |
1863 | 65 | int kk = 0; |
1864 | 65 | int endTest = 0; |
1865 | 65 | widechar *passInstructions = passRuleDots.chars; |
1866 | 65 | int passIC = 0; /* Instruction counter */ |
1867 | 65 | passRuleChars.length = 0; |
1868 | 65 | CharsString passHoldString; |
1869 | 65 | widechar passHoldNumber; |
1870 | 65 | CharsString passLine; |
1871 | 65 | int passLinepos = 0; |
1872 | 65 | TranslationTableCharacterAttributes passAttributes; |
1873 | 65 | int replacing = 0; |
1874 | 65 | passHoldString.length = 0; |
1875 | 5.51k | for (k = file->linepos; k < file->linelen; k++) |
1876 | 5.44k | passHoldString.chars[passHoldString.length++] = file->line[k]; |
1877 | 394 | #define SEPCHAR 0x0001 |
1878 | 363 | for (k = 0; k < passHoldString.length && passHoldString.chars[k] > 32; k++) |
1879 | 298 | ; |
1880 | 65 | if (k < passHoldString.length) |
1881 | 65 | passHoldString.chars[k] = SEPCHAR; |
1882 | 0 | else { |
1883 | 0 | compileError(file, "Invalid multipass operands"); |
1884 | 0 | return 0; |
1885 | 0 | } |
1886 | 65 | parseChars(file, &passLine, &passHoldString); |
1887 | | /* Compile test part */ |
1888 | 329 | for (k = 0; k < passLine.length && passLine.chars[k] != SEPCHAR; k++) |
1889 | 264 | ; |
1890 | 65 | endTest = k; |
1891 | 65 | passLine.chars[endTest] = pass_endTest; |
1892 | 65 | passLinepos = 0; |
1893 | 223 | while (passLinepos <= endTest) { |
1894 | 158 | switch ((passSubOp = passLine.chars[passLinepos])) { |
1895 | 20 | case pass_lookback: |
1896 | 20 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_lookback)) |
1897 | 0 | return 0; |
1898 | 20 | passLinepos++; |
1899 | 20 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
1900 | 20 | if (passHoldNumber == 0) passHoldNumber = 1; |
1901 | 20 | if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
1902 | 0 | return 0; |
1903 | 20 | break; |
1904 | 20 | case pass_not: |
1905 | 13 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_not)) |
1906 | 0 | return 0; |
1907 | 13 | passLinepos++; |
1908 | 13 | break; |
1909 | 5 | case pass_first: |
1910 | 5 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_first)) |
1911 | 0 | return 0; |
1912 | 5 | passLinepos++; |
1913 | 5 | break; |
1914 | 1 | case pass_last: |
1915 | 1 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_last)) |
1916 | 0 | return 0; |
1917 | 1 | passLinepos++; |
1918 | 1 | break; |
1919 | 5 | case pass_search: |
1920 | 5 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_search)) |
1921 | 0 | return 0; |
1922 | 5 | passLinepos++; |
1923 | 5 | break; |
1924 | 0 | case pass_string: |
1925 | 0 | if (!verifyStringOrDots(file, opcode, 1, 0, nofor)) { |
1926 | 0 | return 0; |
1927 | 0 | } |
1928 | 0 | passLinepos++; |
1929 | 0 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_string)) |
1930 | 0 | return 0; |
1931 | 0 | passGetString(&passLine, &passLinepos, &passHoldString, file); |
1932 | 0 | if (passHoldString.length == 0) { |
1933 | 0 | compileError(file, "empty string in test part"); |
1934 | 0 | return 0; |
1935 | 0 | } |
1936 | 0 | goto testDoCharsDots; |
1937 | 0 | case pass_dots: |
1938 | 0 | if (!verifyStringOrDots(file, opcode, 0, 0, nofor)) { |
1939 | 0 | return 0; |
1940 | 0 | } |
1941 | 0 | passLinepos++; |
1942 | 0 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_dots)) |
1943 | 0 | return 0; |
1944 | 0 | passGetDots(&passLine, &passLinepos, &passHoldString, file); |
1945 | 0 | if (passHoldString.length == 0) { |
1946 | 0 | compileError(file, "expected dot pattern after @ operand in test part"); |
1947 | 0 | return 0; |
1948 | 0 | } |
1949 | 0 | testDoCharsDots: |
1950 | 0 | if (passIC >= MAXSTRING) { |
1951 | 0 | compileError( |
1952 | 0 | file, "@ operand in test part of multipass operand too long"); |
1953 | 0 | return 0; |
1954 | 0 | } |
1955 | 0 | if (!appendInstructionChar( |
1956 | 0 | file, passInstructions, &passIC, passHoldString.length)) |
1957 | 0 | return 0; |
1958 | 0 | for (kk = 0; kk < passHoldString.length; kk++) { |
1959 | 0 | if (passIC >= MAXSTRING) { |
1960 | 0 | compileError( |
1961 | 0 | file, "@ operand in test part of multipass operand too long"); |
1962 | 0 | return 0; |
1963 | 0 | } |
1964 | 0 | if (!appendInstructionChar( |
1965 | 0 | file, passInstructions, &passIC, passHoldString.chars[kk])) |
1966 | 0 | return 0; |
1967 | 0 | } |
1968 | 0 | break; |
1969 | 8 | case pass_startReplace: |
1970 | 8 | if (replacing) { |
1971 | 0 | compileError(file, "nested replacement statements"); |
1972 | 0 | return 0; |
1973 | 0 | } |
1974 | 8 | if (!appendInstructionChar( |
1975 | 8 | file, passInstructions, &passIC, pass_startReplace)) |
1976 | 0 | return 0; |
1977 | 8 | replacing = 1; |
1978 | 8 | passLinepos++; |
1979 | 8 | break; |
1980 | 8 | case pass_endReplace: |
1981 | 8 | if (!replacing) { |
1982 | 0 | compileError(file, "unexpected end of replacement"); |
1983 | 0 | return 0; |
1984 | 0 | } |
1985 | 8 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_endReplace)) |
1986 | 0 | return 0; |
1987 | 8 | replacing = 0; |
1988 | 8 | passLinepos++; |
1989 | 8 | break; |
1990 | 0 | case pass_variable: |
1991 | 0 | passLinepos++; |
1992 | 0 | if (!passGetVariableNumber(file, &passLine, &passLinepos, &passHoldNumber)) |
1993 | 0 | return 0; |
1994 | 0 | switch (passLine.chars[passLinepos]) { |
1995 | 0 | case pass_eq: |
1996 | 0 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_eq)) |
1997 | 0 | return 0; |
1998 | 0 | goto doComp; |
1999 | 0 | case pass_lt: |
2000 | 0 | if (passLine.chars[passLinepos + 1] == pass_eq) { |
2001 | 0 | passLinepos++; |
2002 | 0 | if (!appendInstructionChar( |
2003 | 0 | file, passInstructions, &passIC, pass_lteq)) |
2004 | 0 | return 0; |
2005 | 0 | } else if (!appendInstructionChar( |
2006 | 0 | file, passInstructions, &passIC, pass_lt)) |
2007 | 0 | return 0; |
2008 | 0 | goto doComp; |
2009 | 0 | case pass_gt: |
2010 | 0 | if (passLine.chars[passLinepos + 1] == pass_eq) { |
2011 | 0 | passLinepos++; |
2012 | 0 | if (!appendInstructionChar( |
2013 | 0 | file, passInstructions, &passIC, pass_gteq)) |
2014 | 0 | return 0; |
2015 | 0 | } else if (!appendInstructionChar( |
2016 | 0 | file, passInstructions, &passIC, pass_gt)) |
2017 | 0 | return 0; |
2018 | 0 | doComp: |
2019 | 0 | if (!appendInstructionChar( |
2020 | 0 | file, passInstructions, &passIC, passHoldNumber)) |
2021 | 0 | return 0; |
2022 | 0 | passLinepos++; |
2023 | 0 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2024 | 0 | if (!appendInstructionChar( |
2025 | 0 | file, passInstructions, &passIC, passHoldNumber)) |
2026 | 0 | return 0; |
2027 | 0 | break; |
2028 | 0 | default: |
2029 | 0 | compileError(file, "incorrect comparison operator"); |
2030 | 0 | return 0; |
2031 | 0 | } |
2032 | 0 | break; |
2033 | 29 | case pass_attributes: |
2034 | 29 | passLinepos++; |
2035 | 29 | if (!passGetAttributes(&passLine, &passLinepos, &passAttributes, file)) |
2036 | 0 | return 0; |
2037 | 29 | insertAttributes: |
2038 | 29 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_attributes)) |
2039 | 0 | return 0; |
2040 | 29 | if (!appendInstructionChar( |
2041 | 29 | file, passInstructions, &passIC, (passAttributes >> 48) & 0xffff)) |
2042 | 0 | return 0; |
2043 | 29 | if (!appendInstructionChar( |
2044 | 29 | file, passInstructions, &passIC, (passAttributes >> 32) & 0xffff)) |
2045 | 0 | return 0; |
2046 | 29 | if (!appendInstructionChar( |
2047 | 29 | file, passInstructions, &passIC, (passAttributes >> 16) & 0xffff)) |
2048 | 0 | return 0; |
2049 | 29 | if (!appendInstructionChar( |
2050 | 29 | file, passInstructions, &passIC, passAttributes & 0xffff)) |
2051 | 0 | return 0; |
2052 | 30 | getRange: |
2053 | 30 | if (passLine.chars[passLinepos] == pass_until) { |
2054 | 17 | passLinepos++; |
2055 | 17 | if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
2056 | 17 | if (!appendInstructionChar(file, passInstructions, &passIC, 0xffff)) |
2057 | 0 | return 0; |
2058 | 17 | break; |
2059 | 17 | } |
2060 | 13 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2061 | 13 | if (passHoldNumber == 0) { |
2062 | 5 | if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
2063 | 5 | if (!appendInstructionChar(file, passInstructions, &passIC, 1)) return 0; |
2064 | 5 | break; |
2065 | 5 | } |
2066 | 8 | if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
2067 | 0 | return 0; |
2068 | 8 | if (passLine.chars[passLinepos] != pass_hyphen) { |
2069 | 7 | if (!appendInstructionChar( |
2070 | 7 | file, passInstructions, &passIC, passHoldNumber)) |
2071 | 0 | return 0; |
2072 | 7 | break; |
2073 | 7 | } |
2074 | 1 | passLinepos++; |
2075 | 1 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2076 | 1 | if (passHoldNumber == 0) { |
2077 | 0 | compileError(file, "invalid range"); |
2078 | 0 | return 0; |
2079 | 0 | } |
2080 | 1 | if (!appendInstructionChar(file, passInstructions, &passIC, passHoldNumber)) |
2081 | 0 | return 0; |
2082 | 1 | break; |
2083 | 1 | case pass_groupstart: |
2084 | 3 | case pass_groupend: { |
2085 | 3 | passLinepos++; |
2086 | 3 | passGetName(&passLine, &passLinepos, &passHoldString); |
2087 | 3 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2088 | 3 | if (ruleOffset) |
2089 | 3 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2090 | 3 | if (rule && rule->opcode == CTO_Grouping) { |
2091 | 3 | if (!appendInstructionChar(file, passInstructions, &passIC, passSubOp)) |
2092 | 0 | return 0; |
2093 | 3 | if (!appendInstructionChar( |
2094 | 3 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2095 | 0 | return 0; |
2096 | 3 | if (!appendInstructionChar( |
2097 | 3 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2098 | 0 | return 0; |
2099 | 3 | break; |
2100 | 3 | } else { |
2101 | 0 | compileError(file, "%s is not a grouping name", |
2102 | 0 | _lou_showString( |
2103 | 0 | &passHoldString.chars[0], passHoldString.length, 0)); |
2104 | 0 | return 0; |
2105 | 0 | } |
2106 | 0 | break; |
2107 | 3 | } |
2108 | 1 | case pass_swap: { |
2109 | 1 | passLinepos++; |
2110 | 1 | passGetName(&passLine, &passLinepos, &passHoldString); |
2111 | 1 | if ((class = findCharacterClass(&passHoldString, *table))) { |
2112 | 0 | passAttributes = class->attribute; |
2113 | 0 | goto insertAttributes; |
2114 | 0 | } |
2115 | 1 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2116 | 1 | if (ruleOffset) |
2117 | 1 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2118 | 1 | if (rule && |
2119 | 1 | (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd || |
2120 | 1 | rule->opcode == CTO_SwapDd)) { |
2121 | 1 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_swap)) |
2122 | 0 | return 0; |
2123 | 1 | if (!appendInstructionChar( |
2124 | 1 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2125 | 0 | return 0; |
2126 | 1 | if (!appendInstructionChar( |
2127 | 1 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2128 | 0 | return 0; |
2129 | 1 | goto getRange; |
2130 | 1 | } |
2131 | 0 | compileError(file, "%s is neither a class name nor a swap name.", |
2132 | 0 | _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
2133 | 0 | return 0; |
2134 | 1 | } |
2135 | 65 | case pass_endTest: |
2136 | 65 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_endTest)) |
2137 | 0 | return 0; |
2138 | 65 | if (replacing) { |
2139 | 0 | compileError(file, "expected end of replacement"); |
2140 | 0 | return 0; |
2141 | 0 | } |
2142 | 65 | passLinepos++; |
2143 | 65 | break; |
2144 | 0 | default: |
2145 | 0 | compileError(file, "incorrect operator '%c ' in test part", |
2146 | 0 | passLine.chars[passLinepos]); |
2147 | 0 | return 0; |
2148 | 158 | } |
2149 | | |
2150 | 158 | } /* Compile action part */ |
2151 | | |
2152 | | /* Compile action part */ |
2153 | 452 | while (passLinepos < passLine.length && passLine.chars[passLinepos] <= 32) |
2154 | 387 | passLinepos++; |
2155 | 193 | while (passLinepos < passLine.length && passLine.chars[passLinepos] > 32) { |
2156 | 128 | if (passIC >= MAXSTRING) { |
2157 | 0 | compileError(file, "Action part in multipass operand too long"); |
2158 | 0 | return 0; |
2159 | 0 | } |
2160 | 128 | switch ((passSubOp = passLine.chars[passLinepos])) { |
2161 | 37 | case pass_string: |
2162 | 37 | if (!verifyStringOrDots(file, opcode, 1, 1, nofor)) { |
2163 | 0 | return 0; |
2164 | 0 | } |
2165 | 37 | passLinepos++; |
2166 | 37 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_string)) |
2167 | 0 | return 0; |
2168 | 37 | passGetString(&passLine, &passLinepos, &passHoldString, file); |
2169 | 37 | goto actionDoCharsDots; |
2170 | 1 | case pass_dots: |
2171 | 1 | if (!verifyStringOrDots(file, opcode, 0, 1, nofor)) { |
2172 | 0 | return 0; |
2173 | 0 | } |
2174 | 1 | passLinepos++; |
2175 | 1 | passGetDots(&passLine, &passLinepos, &passHoldString, file); |
2176 | 1 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_dots)) |
2177 | 0 | return 0; |
2178 | 1 | if (passHoldString.length == 0) { |
2179 | 0 | compileError(file, "expected dot pattern after @ operand in action part"); |
2180 | 0 | return 0; |
2181 | 0 | } |
2182 | 38 | actionDoCharsDots: |
2183 | 38 | if (passIC >= MAXSTRING) { |
2184 | 0 | compileError( |
2185 | 0 | file, "@ operand in action part of multipass operand too long"); |
2186 | 0 | return 0; |
2187 | 0 | } |
2188 | 38 | if (!appendInstructionChar( |
2189 | 38 | file, passInstructions, &passIC, passHoldString.length)) |
2190 | 0 | return 0; |
2191 | 914 | for (kk = 0; kk < passHoldString.length; kk++) { |
2192 | 876 | if (passIC >= MAXSTRING) { |
2193 | 0 | compileError(file, |
2194 | 0 | "@ operand in action part of multipass operand too long"); |
2195 | 0 | return 0; |
2196 | 0 | } |
2197 | 876 | if (!appendInstructionChar( |
2198 | 876 | file, passInstructions, &passIC, passHoldString.chars[kk])) |
2199 | 0 | return 0; |
2200 | 876 | } |
2201 | 38 | break; |
2202 | 38 | case pass_variable: |
2203 | 0 | passLinepos++; |
2204 | 0 | if (!passGetVariableNumber(file, &passLine, &passLinepos, &passHoldNumber)) |
2205 | 0 | return 0; |
2206 | 0 | switch (passLine.chars[passLinepos]) { |
2207 | 0 | case pass_eq: |
2208 | 0 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_eq)) |
2209 | 0 | return 0; |
2210 | 0 | if (!appendInstructionChar( |
2211 | 0 | file, passInstructions, &passIC, passHoldNumber)) |
2212 | 0 | return 0; |
2213 | 0 | passLinepos++; |
2214 | 0 | passGetNumber(&passLine, &passLinepos, &passHoldNumber); |
2215 | 0 | if (!appendInstructionChar( |
2216 | 0 | file, passInstructions, &passIC, passHoldNumber)) |
2217 | 0 | return 0; |
2218 | 0 | break; |
2219 | 0 | case pass_plus: |
2220 | 0 | case pass_hyphen: |
2221 | 0 | if (!appendInstructionChar(file, passInstructions, &passIC, |
2222 | 0 | passLine.chars[passLinepos++])) |
2223 | 0 | return 0; |
2224 | 0 | if (!appendInstructionChar( |
2225 | 0 | file, passInstructions, &passIC, passHoldNumber)) |
2226 | 0 | return 0; |
2227 | 0 | break; |
2228 | 0 | default: |
2229 | 0 | compileError(file, "incorrect variable operator in action part"); |
2230 | 0 | return 0; |
2231 | 0 | } |
2232 | 0 | break; |
2233 | 71 | case pass_copy: |
2234 | 71 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_copy)) |
2235 | 0 | return 0; |
2236 | 71 | passLinepos++; |
2237 | 71 | break; |
2238 | 13 | case pass_omit: |
2239 | 13 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_omit)) |
2240 | 0 | return 0; |
2241 | 13 | passLinepos++; |
2242 | 13 | break; |
2243 | 2 | case pass_groupreplace: |
2244 | 2 | case pass_groupstart: |
2245 | 2 | case pass_groupend: { |
2246 | 2 | passLinepos++; |
2247 | 2 | passGetName(&passLine, &passLinepos, &passHoldString); |
2248 | 2 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2249 | 2 | if (ruleOffset) |
2250 | 0 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2251 | 2 | if (rule && rule->opcode == CTO_Grouping) { |
2252 | 2 | if (!appendInstructionChar(file, passInstructions, &passIC, passSubOp)) |
2253 | 0 | return 0; |
2254 | 2 | if (!appendInstructionChar( |
2255 | 2 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2256 | 0 | return 0; |
2257 | 2 | if (!appendInstructionChar( |
2258 | 2 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2259 | 0 | return 0; |
2260 | 2 | break; |
2261 | 2 | } |
2262 | 0 | compileError(file, "%s is not a grouping name", |
2263 | 0 | _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
2264 | 0 | return 0; |
2265 | 2 | } |
2266 | 4 | case pass_swap: { |
2267 | 4 | passLinepos++; |
2268 | 4 | passGetName(&passLine, &passLinepos, &passHoldString); |
2269 | 4 | TranslationTableOffset ruleOffset = findRuleName(&passHoldString, *table); |
2270 | 4 | if (ruleOffset) |
2271 | 1 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
2272 | 4 | if (rule && |
2273 | 4 | (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd || |
2274 | 4 | rule->opcode == CTO_SwapDd)) { |
2275 | 4 | if (!appendInstructionChar(file, passInstructions, &passIC, pass_swap)) |
2276 | 0 | return 0; |
2277 | 4 | if (!appendInstructionChar( |
2278 | 4 | file, passInstructions, &passIC, ruleOffset >> 16)) |
2279 | 0 | return 0; |
2280 | 4 | if (!appendInstructionChar( |
2281 | 4 | file, passInstructions, &passIC, ruleOffset & 0xffff)) |
2282 | 0 | return 0; |
2283 | 4 | break; |
2284 | 4 | } |
2285 | 0 | compileError(file, "%s is not a swap name.", |
2286 | 0 | _lou_showString(&passHoldString.chars[0], passHoldString.length, 0)); |
2287 | 0 | return 0; |
2288 | 0 | break; |
2289 | 4 | } |
2290 | 0 | default: |
2291 | 0 | compileError(file, "incorrect operator in action part"); |
2292 | 0 | return 0; |
2293 | 128 | } |
2294 | 128 | } |
2295 | | |
2296 | | /* Analyze and add rule */ |
2297 | 65 | passRuleDots.length = passIC; |
2298 | | |
2299 | 65 | { |
2300 | 65 | widechar *characters; |
2301 | 65 | int length; |
2302 | 65 | int found = passFindCharacters( |
2303 | 65 | file, passInstructions, passRuleDots.length, &characters, &length); |
2304 | | |
2305 | 65 | if (!found) return 0; |
2306 | | |
2307 | 65 | if (characters) { |
2308 | 0 | for (k = 0; k < length; k += 1) passRuleChars.chars[k] = characters[k]; |
2309 | 0 | passRuleChars.length = k; |
2310 | 0 | } |
2311 | 65 | } |
2312 | | |
2313 | 65 | if (!addRule(file, opcode, &passRuleChars, &passRuleDots, 0, 0, NULL, NULL, noback, |
2314 | 65 | nofor, table)) |
2315 | 0 | return 0; |
2316 | 65 | return 1; |
2317 | 65 | } |
2318 | | |
2319 | | /* End of multipass compiler */ |
2320 | | |
2321 | | static int |
2322 | | compileBrailleIndicator(FileInfo *file, const char *ermsg, TranslationTableOpcode opcode, |
2323 | | TranslationTableOffset *ruleOffset, int noback, int nofor, |
2324 | 70 | TranslationTableHeader **table) { |
2325 | 70 | CharsString token; |
2326 | 70 | CharsString cells; |
2327 | 70 | if (!getToken(file, &token, ermsg)) return 0; |
2328 | 69 | if (!parseDots(file, &cells, &token)) return 0; |
2329 | 69 | return addRule( |
2330 | 69 | file, opcode, NULL, &cells, 0, 0, ruleOffset, NULL, noback, nofor, table); |
2331 | 69 | } |
2332 | | |
2333 | | static int |
2334 | 3 | compileNumber(FileInfo *file) { |
2335 | 3 | CharsString token; |
2336 | 3 | widechar number; |
2337 | 3 | if (!getToken(file, &token, "number")) return 0; |
2338 | 3 | getNumber(&token.chars[0], &number); |
2339 | 3 | if (!(number > 0)) { |
2340 | 0 | compileError(file, "a nonzero positive number is required"); |
2341 | 0 | return 0; |
2342 | 0 | } |
2343 | 3 | return number; |
2344 | 3 | } |
2345 | | |
2346 | | static int |
2347 | | compileGrouping(FileInfo *file, int noback, int nofor, TranslationTableHeader **table, |
2348 | 2 | DisplayTableHeader **displayTable) { |
2349 | 2 | int k; |
2350 | 2 | CharsString name; |
2351 | 2 | CharsString groupChars; |
2352 | 2 | CharsString groupDots; |
2353 | 2 | CharsString dotsParsed; |
2354 | 2 | if (!getToken(file, &name, "name operand")) return 0; |
2355 | 2 | if (!getRuleCharsText(file, &groupChars)) return 0; |
2356 | 2 | if (!getToken(file, &groupDots, "dots operand")) return 0; |
2357 | 6 | for (k = 0; k < groupDots.length && groupDots.chars[k] != ','; k++) |
2358 | 4 | ; |
2359 | 2 | if (k == groupDots.length) { |
2360 | 0 | compileError(file, "Dots operand must consist of two cells separated by a comma"); |
2361 | 0 | return 0; |
2362 | 0 | } |
2363 | 2 | groupDots.chars[k] = '-'; |
2364 | 2 | if (!parseDots(file, &dotsParsed, &groupDots)) return 0; |
2365 | 2 | if (groupChars.length != 2 || dotsParsed.length != 2) { |
2366 | 0 | compileError(file, |
2367 | 0 | "two Unicode characters and two cells separated by a comma are needed."); |
2368 | 0 | return 0; |
2369 | 0 | } |
2370 | 2 | if (table) { |
2371 | 2 | TranslationTableOffset ruleOffset; |
2372 | 2 | TranslationTableCharacter *charsDotsPtr; |
2373 | 2 | charsDotsPtr = |
2374 | 2 | putChar(file, groupChars.chars[0], table, NULL, (*table)->ruleCounter); |
2375 | 2 | charsDotsPtr->attributes |= CTC_Math; |
2376 | 2 | charsDotsPtr = |
2377 | 2 | putChar(file, groupChars.chars[1], table, NULL, (*table)->ruleCounter); |
2378 | 2 | charsDotsPtr->attributes |= CTC_Math; |
2379 | 2 | charsDotsPtr = putDots(file, dotsParsed.chars[0], table, (*table)->ruleCounter); |
2380 | 2 | charsDotsPtr->attributes |= CTC_Math; |
2381 | 2 | charsDotsPtr = putDots(file, dotsParsed.chars[1], table, (*table)->ruleCounter); |
2382 | 2 | charsDotsPtr->attributes |= CTC_Math; |
2383 | 2 | if (!addRule(file, CTO_Grouping, &groupChars, &dotsParsed, 0, 0, &ruleOffset, |
2384 | 2 | NULL, noback, nofor, table)) |
2385 | 0 | return 0; |
2386 | 2 | if (!addRuleName(file, &name, ruleOffset, *table)) return 0; |
2387 | 2 | } |
2388 | 2 | if (displayTable) { |
2389 | 2 | putCharDotsMapping(file, groupChars.chars[0], dotsParsed.chars[0], displayTable); |
2390 | 2 | putCharDotsMapping(file, groupChars.chars[1], dotsParsed.chars[1], displayTable); |
2391 | 2 | } |
2392 | 2 | if (table) { |
2393 | 2 | widechar endChar; |
2394 | 2 | widechar endDots; |
2395 | 2 | endChar = groupChars.chars[1]; |
2396 | 2 | endDots = dotsParsed.chars[1]; |
2397 | 2 | groupChars.length = dotsParsed.length = 1; |
2398 | 2 | if (!addRule(file, CTO_Math, &groupChars, &dotsParsed, 0, 0, NULL, NULL, noback, |
2399 | 2 | nofor, table)) |
2400 | 0 | return 0; |
2401 | 2 | groupChars.chars[0] = endChar; |
2402 | 2 | dotsParsed.chars[0] = endDots; |
2403 | 2 | if (!addRule(file, CTO_Math, &groupChars, &dotsParsed, 0, 0, NULL, NULL, noback, |
2404 | 2 | nofor, table)) |
2405 | 0 | return 0; |
2406 | 2 | } |
2407 | 2 | return 1; |
2408 | 2 | } |
2409 | | |
2410 | | /* Functions for compiling hyphenation tables */ |
2411 | | |
2412 | | typedef struct HyphenDict { /* hyphenation dictionary: finite state machine */ |
2413 | | int numStates; |
2414 | | HyphenationState *states; |
2415 | | } HyphenDict; |
2416 | | |
2417 | 53.5k | #define DEFAULTSTATE 0xffff |
2418 | 75.9k | #define HYPHENHASHSIZE 8191 |
2419 | | |
2420 | | typedef struct HyphenHashEntry { |
2421 | | struct HyphenHashEntry *next; |
2422 | | CharsString *key; |
2423 | | int val; |
2424 | | } HyphenHashEntry; |
2425 | | |
2426 | | typedef struct HyphenHashTab { |
2427 | | HyphenHashEntry *entries[HYPHENHASHSIZE]; |
2428 | | } HyphenHashTab; |
2429 | | |
2430 | | /* a hash function from ASU - adapted from Gtk+ */ |
2431 | | static unsigned int |
2432 | 26.7k | hyphenStringHash(const CharsString *s) { |
2433 | 26.7k | int k; |
2434 | 26.7k | unsigned int h = 0, g; |
2435 | 2.13M | for (k = 0; k < s->length; k++) { |
2436 | 2.10M | h = (h << 4) + s->chars[k]; |
2437 | 2.10M | if ((g = h & 0xf0000000)) { |
2438 | 1.80M | h = h ^ (g >> 24); |
2439 | 1.80M | h = h ^ g; |
2440 | 1.80M | } |
2441 | 2.10M | } |
2442 | 26.7k | return h; |
2443 | 26.7k | } |
2444 | | |
2445 | | static HyphenHashTab * |
2446 | 3 | hyphenHashNew(void) { |
2447 | 3 | HyphenHashTab *hashTab; |
2448 | 3 | if (!(hashTab = malloc(sizeof(HyphenHashTab)))) _lou_outOfMemory(); |
2449 | 3 | memset(hashTab, 0, sizeof(HyphenHashTab)); |
2450 | 3 | return hashTab; |
2451 | 3 | } |
2452 | | |
2453 | | static void |
2454 | 3 | hyphenHashFree(HyphenHashTab *hashTab) { |
2455 | 3 | int i; |
2456 | 3 | HyphenHashEntry *e, *next; |
2457 | 24.5k | for (i = 0; i < HYPHENHASHSIZE; i++) |
2458 | 24.8k | for (e = hashTab->entries[i]; e; e = next) { |
2459 | 243 | next = e->next; |
2460 | 243 | free(e->key); |
2461 | 243 | free(e); |
2462 | 243 | } |
2463 | 3 | free(hashTab); |
2464 | 3 | } |
2465 | | |
2466 | | /* assumes that key is not already present! */ |
2467 | | static void |
2468 | 243 | hyphenHashInsert(HyphenHashTab *hashTab, const CharsString *key, int val) { |
2469 | 243 | int i, j; |
2470 | 243 | HyphenHashEntry *e; |
2471 | 243 | i = hyphenStringHash(key) % HYPHENHASHSIZE; |
2472 | 243 | if (!(e = malloc(sizeof(HyphenHashEntry)))) _lou_outOfMemory(); |
2473 | 243 | e->next = hashTab->entries[i]; |
2474 | 243 | e->key = malloc((key->length + 1) * CHARSIZE); |
2475 | 243 | if (!e->key) _lou_outOfMemory(); |
2476 | 243 | e->key->length = key->length; |
2477 | 27.0k | for (j = 0; j < key->length; j++) e->key->chars[j] = key->chars[j]; |
2478 | 243 | e->val = val; |
2479 | 243 | hashTab->entries[i] = e; |
2480 | 243 | } |
2481 | | |
2482 | | /* return val if found, otherwise DEFAULTSTATE */ |
2483 | | static int |
2484 | 26.6k | hyphenHashLookup(HyphenHashTab *hashTab, const CharsString *key) { |
2485 | 26.6k | int i, j; |
2486 | 26.6k | HyphenHashEntry *e; |
2487 | 26.6k | if (key->length == 0) return 0; |
2488 | 26.5k | i = hyphenStringHash(key) % HYPHENHASHSIZE; |
2489 | 27.2k | for (e = hashTab->entries[i]; e; e = e->next) { |
2490 | 858 | if (key->length != e->key->length) continue; |
2491 | 670 | for (j = 0; j < key->length; j++) |
2492 | 544 | if (key->chars[j] != e->key->chars[j]) break; |
2493 | 132 | if (j == key->length) return e->val; |
2494 | 132 | } |
2495 | 26.3k | return DEFAULTSTATE; |
2496 | 26.5k | } |
2497 | | |
2498 | | static int |
2499 | 243 | hyphenGetNewState(HyphenDict *dict, HyphenHashTab *hashTab, const CharsString *string) { |
2500 | 243 | hyphenHashInsert(hashTab, string, dict->numStates); |
2501 | | /* predicate is true if dict->numStates is a power of two */ |
2502 | 243 | if (!(dict->numStates & (dict->numStates - 1))) |
2503 | 12 | dict->states = |
2504 | 12 | realloc(dict->states, (dict->numStates << 1) * sizeof(HyphenationState)); |
2505 | 243 | if (!dict->states) _lou_outOfMemory(); |
2506 | 243 | dict->states[dict->numStates].hyphenPattern = 0; |
2507 | 243 | dict->states[dict->numStates].fallbackState = DEFAULTSTATE; |
2508 | 243 | dict->states[dict->numStates].numTrans = 0; |
2509 | 243 | dict->states[dict->numStates].trans.pointer = NULL; |
2510 | 243 | return dict->numStates++; |
2511 | 243 | } |
2512 | | |
2513 | | /* add a transition from state1 to state2 through ch - assumes that the |
2514 | | * transition does not already exist */ |
2515 | | static void |
2516 | 243 | hyphenAddTrans(HyphenDict *dict, int state1, int state2, widechar ch) { |
2517 | 243 | int numTrans; |
2518 | 243 | numTrans = dict->states[state1].numTrans; |
2519 | 243 | if (numTrans == 0) |
2520 | 241 | dict->states[state1].trans.pointer = malloc(sizeof(HyphenationTrans)); |
2521 | 2 | else if (!(numTrans & (numTrans - 1))) |
2522 | 2 | dict->states[state1].trans.pointer = realloc(dict->states[state1].trans.pointer, |
2523 | 2 | (numTrans << 1) * sizeof(HyphenationTrans)); |
2524 | 243 | dict->states[state1].trans.pointer[numTrans].ch = ch; |
2525 | 243 | dict->states[state1].trans.pointer[numTrans].newState = state2; |
2526 | 243 | dict->states[state1].numTrans++; |
2527 | 243 | } |
2528 | | |
2529 | | static int |
2530 | | compileHyphenation( |
2531 | 3 | FileInfo *file, CharsString *encoding, TranslationTableHeader **table) { |
2532 | 3 | CharsString hyph; |
2533 | 3 | HyphenationTrans *holdPointer; |
2534 | 3 | HyphenHashTab *hashTab; |
2535 | 3 | CharsString word; |
2536 | 3 | char pattern[MAXSTRING + 1]; |
2537 | 3 | unsigned int stateNum = 0, lastState = 0; |
2538 | 3 | int i, j, k = encoding->length; |
2539 | 3 | widechar ch; |
2540 | 3 | int found; |
2541 | 3 | HyphenHashEntry *e; |
2542 | 3 | HyphenDict dict; |
2543 | 3 | TranslationTableOffset holdOffset; |
2544 | | /* Set aside enough space for hyphenation states and transitions in |
2545 | | * translation table. Must be done before anything else */ |
2546 | 3 | allocateSpaceInTranslationTable(file, NULL, 250000, table); |
2547 | 3 | hashTab = hyphenHashNew(); |
2548 | 3 | dict.numStates = 1; |
2549 | 3 | dict.states = malloc(sizeof(HyphenationState)); |
2550 | 3 | if (!dict.states) _lou_outOfMemory(); |
2551 | 3 | dict.states[0].hyphenPattern = 0; |
2552 | 3 | dict.states[0].fallbackState = DEFAULTSTATE; |
2553 | 3 | dict.states[0].numTrans = 0; |
2554 | 3 | dict.states[0].trans.pointer = NULL; |
2555 | 8 | do { |
2556 | 8 | if (encoding->chars[0] == 'I') { |
2557 | 8 | if (!getToken(file, &hyph, NULL)) continue; |
2558 | 8 | } else { |
2559 | | /* UTF-8 */ |
2560 | 0 | if (!getToken(file, &word, NULL)) continue; |
2561 | 0 | parseChars(file, &hyph, &word); |
2562 | 0 | } |
2563 | 6 | if (hyph.length == 0 || hyph.chars[0] == '#' || hyph.chars[0] == '%' || |
2564 | 6 | hyph.chars[0] == '<') |
2565 | 0 | continue; /* comment */ |
2566 | 6 | j = 0; |
2567 | 6 | pattern[j] = '0'; |
2568 | 382 | for (i = 0; i < hyph.length; i++) { |
2569 | 376 | if (hyph.chars[i] >= '0' && hyph.chars[i] <= '9') |
2570 | 36 | pattern[j] = (char)hyph.chars[i]; |
2571 | 340 | else { |
2572 | 340 | word.chars[j] = hyph.chars[i]; |
2573 | 340 | pattern[++j] = '0'; |
2574 | 340 | } |
2575 | 376 | } |
2576 | 6 | word.chars[j] = 0; |
2577 | 6 | word.length = j; |
2578 | 6 | pattern[j + 1] = 0; |
2579 | 97 | for (i = 0; pattern[i] == '0'; i++) |
2580 | 91 | ; |
2581 | 6 | found = hyphenHashLookup(hashTab, &word); |
2582 | 6 | if (found != DEFAULTSTATE) |
2583 | 0 | stateNum = found; |
2584 | 6 | else |
2585 | 6 | stateNum = hyphenGetNewState(&dict, hashTab, &word); |
2586 | 6 | k = j + 2 - i; |
2587 | 6 | if (k > 0) { |
2588 | 6 | allocateSpaceInTranslationTable( |
2589 | 6 | file, &dict.states[stateNum].hyphenPattern, k, table); |
2590 | 6 | memcpy(&(*table)->ruleArea[dict.states[stateNum].hyphenPattern], &pattern[i], |
2591 | 6 | k); |
2592 | 6 | } |
2593 | | /* now, put in the prefix transitions */ |
2594 | 249 | while (found == DEFAULTSTATE) { |
2595 | 243 | lastState = stateNum; |
2596 | 243 | ch = word.chars[word.length-- - 1]; |
2597 | 243 | found = hyphenHashLookup(hashTab, &word); |
2598 | 243 | if (found != DEFAULTSTATE) |
2599 | 6 | stateNum = found; |
2600 | 237 | else |
2601 | 237 | stateNum = hyphenGetNewState(&dict, hashTab, &word); |
2602 | 243 | hyphenAddTrans(&dict, stateNum, lastState, ch); |
2603 | 243 | } |
2604 | 8 | } while (_lou_getALine(file)); |
2605 | | /* put in the fallback states */ |
2606 | 24.5k | for (i = 0; i < HYPHENHASHSIZE; i++) { |
2607 | 24.8k | for (e = hashTab->entries[i]; e; e = e->next) { |
2608 | 26.3k | for (j = 1; j <= e->key->length; j++) { |
2609 | 26.3k | word.length = 0; |
2610 | 2.07M | for (k = j; k < e->key->length; k++) |
2611 | 2.05M | word.chars[word.length++] = e->key->chars[k]; |
2612 | 26.3k | stateNum = hyphenHashLookup(hashTab, &word); |
2613 | 26.3k | if (stateNum != DEFAULTSTATE) break; |
2614 | 26.3k | } |
2615 | 243 | if (e->val) dict.states[e->val].fallbackState = stateNum; |
2616 | 243 | } |
2617 | 24.5k | } |
2618 | 3 | hyphenHashFree(hashTab); |
2619 | | /* Transfer hyphenation information to table */ |
2620 | 249 | for (i = 0; i < dict.numStates; i++) { |
2621 | 246 | if (dict.states[i].numTrans == 0) |
2622 | 5 | dict.states[i].trans.offset = 0; |
2623 | 241 | else { |
2624 | 241 | holdPointer = dict.states[i].trans.pointer; |
2625 | 241 | allocateSpaceInTranslationTable(file, &dict.states[i].trans.offset, |
2626 | 241 | dict.states[i].numTrans * sizeof(HyphenationTrans), table); |
2627 | 241 | memcpy(&(*table)->ruleArea[dict.states[i].trans.offset], holdPointer, |
2628 | 241 | dict.states[i].numTrans * sizeof(HyphenationTrans)); |
2629 | 241 | free(holdPointer); |
2630 | 241 | } |
2631 | 246 | } |
2632 | 3 | allocateSpaceInTranslationTable( |
2633 | 3 | file, &holdOffset, dict.numStates * sizeof(HyphenationState), table); |
2634 | 3 | (*table)->hyphenStatesArray = holdOffset; |
2635 | | /* Prevents segmentation fault if table is reallocated */ |
2636 | 3 | memcpy(&(*table)->ruleArea[(*table)->hyphenStatesArray], &dict.states[0], |
2637 | 3 | dict.numStates * sizeof(HyphenationState)); |
2638 | 3 | free(dict.states); |
2639 | 3 | return 1; |
2640 | 3 | } |
2641 | | |
2642 | | static int |
2643 | | compileCharDef(FileInfo *file, TranslationTableOpcode opcode, |
2644 | | TranslationTableCharacterAttributes attributes, int noback, int nofor, |
2645 | 104 | TranslationTableHeader **table, DisplayTableHeader **displayTable) { |
2646 | 104 | CharsString ruleChars; |
2647 | 104 | CharsString ruleDots; |
2648 | 104 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
2649 | 104 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
2650 | 104 | if (ruleChars.length != 1) { |
2651 | 0 | compileError(file, "Exactly one character is required."); |
2652 | 0 | return 0; |
2653 | 0 | } |
2654 | 104 | if (ruleDots.length < 1) { |
2655 | 0 | compileError(file, "At least one cell is required."); |
2656 | 0 | return 0; |
2657 | 0 | } |
2658 | 104 | if (table) { |
2659 | 104 | TranslationTableCharacter *character; |
2660 | 104 | TranslationTableCharacter *cell = NULL; |
2661 | 104 | int k; |
2662 | 104 | if (attributes & (CTC_UpperCase | CTC_LowerCase)) attributes |= CTC_Letter; |
2663 | 104 | character = putChar(file, ruleChars.chars[0], table, NULL, (*table)->ruleCounter); |
2664 | 104 | character->attributes |= attributes; |
2665 | 208 | for (k = ruleDots.length - 1; k >= 0; k -= 1) { |
2666 | 104 | cell = getDots(ruleDots.chars[k], *table); |
2667 | 104 | if (!cell) |
2668 | 89 | cell = putDots(file, ruleDots.chars[k], table, (*table)->ruleCounter); |
2669 | 104 | } |
2670 | 104 | if (ruleDots.length == 1) cell->attributes |= attributes; |
2671 | 104 | } |
2672 | 104 | if (displayTable && ruleDots.length == 1) |
2673 | 104 | putCharDotsMapping(file, ruleChars.chars[0], ruleDots.chars[0], displayTable); |
2674 | 104 | if (table) |
2675 | 104 | if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, NULL, NULL, noback, nofor, |
2676 | 104 | table)) |
2677 | 0 | return 0; |
2678 | 104 | return 1; |
2679 | 104 | } |
2680 | | |
2681 | | static int |
2682 | 12 | compileBeforeAfter(FileInfo *file) { |
2683 | | /* 1=before, 2=after, 0=error */ |
2684 | 12 | CharsString token; |
2685 | 12 | CharsString tmp; |
2686 | 12 | if (!getToken(file, &token, "last word before or after")) return 0; |
2687 | 12 | if (!parseChars(file, &tmp, &token)) return 0; |
2688 | 12 | if (eqasc2uni((unsigned char *)"before", tmp.chars, 6)) |
2689 | 1 | return 1; |
2690 | 11 | else if (eqasc2uni((unsigned char *)"after", tmp.chars, 5)) |
2691 | 11 | return 2; |
2692 | 0 | return 0; |
2693 | 12 | } |
2694 | | |
2695 | | /** |
2696 | | * Macro |
2697 | | */ |
2698 | | typedef struct { |
2699 | | const char *name; |
2700 | | const widechar *definition; // fixed part |
2701 | | int definition_length; |
2702 | | const int *substitutions; // variable part: position and argument index of each |
2703 | | // variable substitution |
2704 | | int substitution_count; |
2705 | | int argument_count; // number of expected arguments |
2706 | | } Macro; |
2707 | | |
2708 | | /** |
2709 | | * List of in-scope macros |
2710 | | */ |
2711 | | typedef struct MacroList { |
2712 | | const Macro *head; |
2713 | | const struct MacroList *tail; |
2714 | | } MacroList; |
2715 | | |
2716 | | /** |
2717 | | * Create new macro. |
2718 | | */ |
2719 | | static const Macro * |
2720 | | create_macro(const char *name, const widechar *definition, int definition_length, |
2721 | 0 | const int *substitutions, int substitution_count, int argument_count) { |
2722 | 0 | Macro *m = malloc(sizeof(Macro)); |
2723 | 0 | m->name = strdup(name); |
2724 | 0 | widechar *definition_copy = malloc(definition_length * sizeof(widechar)); |
2725 | 0 | memcpy(definition_copy, definition, definition_length * sizeof(widechar)); |
2726 | 0 | m->definition = definition_copy; |
2727 | 0 | m->definition_length = definition_length; |
2728 | 0 | int *substitutions_copy = malloc(2 * substitution_count * sizeof(int)); |
2729 | 0 | memcpy(substitutions_copy, substitutions, 2 * substitution_count * sizeof(int)); |
2730 | 0 | m->substitutions = substitutions_copy; |
2731 | 0 | m->substitution_count = substitution_count; |
2732 | 0 | m->argument_count = argument_count; |
2733 | 0 | return m; |
2734 | 0 | } |
2735 | | |
2736 | | /** |
2737 | | * Create new macro list from "head" macro and "tail" list. |
2738 | | */ |
2739 | | static const MacroList * |
2740 | 0 | cons_macro(const Macro *head, const MacroList *tail) { |
2741 | 0 | MacroList *list = malloc(sizeof(MacroList)); |
2742 | 0 | list->head = head; |
2743 | 0 | list->tail = tail; |
2744 | 0 | return list; |
2745 | 0 | } |
2746 | | |
2747 | | /** |
2748 | | * Free macro returned by create_macro. |
2749 | | */ |
2750 | | static void |
2751 | 0 | free_macro(const Macro *macro) { |
2752 | 0 | if (macro) { |
2753 | 0 | free((char *)macro->name); |
2754 | 0 | free((char *)macro->definition); |
2755 | 0 | free((int *)macro->substitutions); |
2756 | 0 | free((Macro *)macro); |
2757 | 0 | } |
2758 | 0 | } |
2759 | | |
2760 | | /** |
2761 | | * Free macro list returned by cons_macro. |
2762 | | */ |
2763 | | static void |
2764 | 84 | free_macro_list(const MacroList *list) { |
2765 | 84 | if (list) { |
2766 | 0 | free_macro((Macro *)list->head); |
2767 | 0 | free_macro_list((MacroList *)list->tail); |
2768 | 0 | free((MacroList *)list); |
2769 | 0 | } |
2770 | 84 | } |
2771 | | |
2772 | | /** |
2773 | | * Compile macro |
2774 | | */ |
2775 | | static int |
2776 | 0 | compileMacro(FileInfo *file, const Macro **macro) { |
2777 | 0 |
|
2778 | 0 | // parse name |
2779 | 0 | CharsString token; |
2780 | 0 | if (!getToken(file, &token, "macro name")) return 0; |
2781 | 0 | switch (getOpcode(file, &token)) { |
2782 | 0 | case CTO_UpLow: // deprecated so "uplow" may be used as macro name |
2783 | 0 | case CTO_None: |
2784 | 0 | break; |
2785 | 0 | default: |
2786 | 0 | compileError(file, "Invalid macro name: already taken by an opcode"); |
2787 | 0 | return 0; |
2788 | 0 | } |
2789 | 0 | for (int i = 0; i < token.length; i++) { |
2790 | 0 | if (!((token.chars[i] >= 'a' && token.chars[i] <= 'z') || |
2791 | 0 | (token.chars[i] >= 'A' && token.chars[i] <= 'Z') || |
2792 | 0 | (token.chars[i] >= '0' && token.chars[i] <= '9'))) { |
2793 | 0 | compileError(file, |
2794 | 0 | "Invalid macro name: must be a word containing only letters and " |
2795 | 0 | "digits"); |
2796 | 0 | return 0; |
2797 | 0 | } |
2798 | 0 | } |
2799 | 0 | static char name[MAXSTRING + 1]; |
2800 | 0 | int name_length; |
2801 | 0 | for (name_length = 0; name_length < token.length; |
2802 | 0 | name_length++) // we know token can not be longer than MAXSTRING |
2803 | 0 | name[name_length] = (char)token.chars[name_length]; |
2804 | 0 | name[name_length] = '\0'; |
2805 | 0 |
|
2806 | 0 | // parse body |
2807 | 0 | static widechar definition[MAXSTRING]; |
2808 | 0 | static int substitutions[2 * MAX_MACRO_VAR]; |
2809 | 0 | int definition_length = 0; |
2810 | 0 | int substitution_count = 0; |
2811 | 0 | int argument_count = 0; |
2812 | 0 | int dollar = 0; |
2813 | 0 |
|
2814 | 0 | // ignore rest of line after name and read lines until "eom" is encountered |
2815 | 0 | while (_lou_getALine(file)) { |
2816 | 0 | if (file->linelen >= 3 && file->line[0] == 'e' && file->line[1] == 'o' && |
2817 | 0 | file->line[2] == 'm') { |
2818 | 0 | *macro = create_macro(name, definition, definition_length, substitutions, |
2819 | 0 | substitution_count, argument_count); |
2820 | 0 | return 1; |
2821 | 0 | } |
2822 | 0 | while (!atEndOfLine(file)) { |
2823 | 0 | widechar c = file->line[file->linepos++]; |
2824 | 0 | if (dollar) { |
2825 | 0 | dollar = 0; |
2826 | 0 | if (c >= '0' && c <= '9') { |
2827 | 0 | if (substitution_count >= MAX_MACRO_VAR) { |
2828 | 0 | compileError(file, |
2829 | 0 | "Macro can not have more than %d variable substitutions", |
2830 | 0 | MAXSTRING); |
2831 | 0 | return 0; |
2832 | 0 | } |
2833 | 0 | substitutions[2 * substitution_count] = definition_length; |
2834 | 0 | int arg = c - '0'; |
2835 | 0 | substitutions[2 * substitution_count + 1] = arg; |
2836 | 0 | if (arg > argument_count) argument_count = arg; |
2837 | 0 | substitution_count++; |
2838 | 0 | continue; |
2839 | 0 | } |
2840 | 0 | } else if (c == '$') { |
2841 | 0 | dollar = 1; |
2842 | 0 | continue; |
2843 | 0 | } |
2844 | 0 | if (definition_length >= MAXSTRING) { |
2845 | 0 | compileError(file, "Macro exceeds %d characters", MAXSTRING); |
2846 | 0 | return 0; |
2847 | 0 | } else |
2848 | 0 | definition[definition_length++] = c; |
2849 | 0 | } |
2850 | 0 | dollar = 0; |
2851 | 0 | if (definition_length >= MAXSTRING) { |
2852 | 0 | compileError(file, "Macro exceeds %d characters", MAXSTRING); |
2853 | 0 | return 0; |
2854 | 0 | } |
2855 | 0 | definition[definition_length++] = '\n'; |
2856 | 0 | } |
2857 | 0 | compileError(file, "macro must be terminated with 'eom'"); |
2858 | 0 | return 0; |
2859 | 0 | } |
2860 | | |
2861 | | static int |
2862 | | compileRule(FileInfo *file, TranslationTableHeader **table, |
2863 | 807 | DisplayTableHeader **displayTable, const MacroList **inScopeMacros) { |
2864 | 807 | CharsString token; |
2865 | 807 | TranslationTableOpcode opcode; |
2866 | 807 | CharsString ruleChars; |
2867 | 807 | CharsString ruleDots; |
2868 | 807 | CharsString cells; |
2869 | 807 | CharsString scratchPad; |
2870 | 807 | CharsString emphClass; |
2871 | 807 | TranslationTableCharacterAttributes after = 0; |
2872 | 807 | TranslationTableCharacterAttributes before = 0; |
2873 | 807 | int noback, nofor, nocross; |
2874 | 807 | noback = nofor = nocross = 0; |
2875 | 975 | doOpcode: |
2876 | 975 | if (!getToken(file, &token, NULL)) return 1; /* blank line */ |
2877 | 811 | if (token.chars[0] == '#' || token.chars[0] == '<') return 1; /* comment */ |
2878 | 742 | if (file->lineNumber == 1 && |
2879 | 291 | (eqasc2uni((unsigned char *)"ISO", token.chars, 3) || |
2880 | 288 | eqasc2uni((unsigned char *)"UTF-8", token.chars, 5))) { |
2881 | 3 | if (table) |
2882 | 3 | compileHyphenation(file, &token, table); |
2883 | 0 | else |
2884 | | /* ignore the whole file */ |
2885 | 0 | while (_lou_getALine(file)) |
2886 | 0 | ; |
2887 | 3 | return 1; |
2888 | 3 | } |
2889 | 739 | opcode = getOpcode(file, &token); |
2890 | 739 | switch (opcode) { |
2891 | 0 | case CTO_Macro: { |
2892 | 0 | const Macro *macro; |
2893 | | #ifdef ENABLE_MACROS |
2894 | | if (!inScopeMacros) { |
2895 | | compileError(file, "Defining macros only allowed in table files."); |
2896 | | return 0; |
2897 | | } |
2898 | | if (compileMacro(file, ¯o)) { |
2899 | | *inScopeMacros = cons_macro(macro, *inScopeMacros); |
2900 | | return 1; |
2901 | | } |
2902 | | return 0; |
2903 | | #else |
2904 | 0 | compileError(file, "Macro feature is disabled."); |
2905 | 0 | return 0; |
2906 | 0 | #endif |
2907 | 0 | } |
2908 | 0 | case CTO_IncludeFile: { |
2909 | 0 | CharsString includedFile; |
2910 | 0 | if (!getToken(file, &token, "include file name")) return 0; |
2911 | 0 | if (!parseChars(file, &includedFile, &token)) return 0; |
2912 | 0 | return includeFile(file, &includedFile, table, displayTable); |
2913 | 0 | } |
2914 | 155 | case CTO_NoBack: |
2915 | 155 | if (nofor) { |
2916 | 0 | compileError(file, "%s already specified.", _lou_findOpcodeName(CTO_NoFor)); |
2917 | 0 | return 0; |
2918 | 0 | } |
2919 | 155 | noback = 1; |
2920 | 155 | goto doOpcode; |
2921 | 0 | case CTO_NoFor: |
2922 | 0 | if (noback) { |
2923 | 0 | compileError(file, "%s already specified.", _lou_findOpcodeName(CTO_NoBack)); |
2924 | 0 | return 0; |
2925 | 0 | } |
2926 | 0 | nofor = 1; |
2927 | 0 | goto doOpcode; |
2928 | 84 | case CTO_Space: |
2929 | 84 | return compileCharDef( |
2930 | 84 | file, opcode, CTC_Space, noback, nofor, table, displayTable); |
2931 | 0 | case CTO_Digit: |
2932 | 0 | return compileCharDef( |
2933 | 0 | file, opcode, CTC_Digit, noback, nofor, table, displayTable); |
2934 | 0 | case CTO_LitDigit: |
2935 | 0 | return compileCharDef( |
2936 | 0 | file, opcode, CTC_LitDigit, noback, nofor, table, displayTable); |
2937 | 0 | case CTO_Punctuation: |
2938 | 0 | return compileCharDef( |
2939 | 0 | file, opcode, CTC_Punctuation, noback, nofor, table, displayTable); |
2940 | 0 | case CTO_Math: |
2941 | 0 | return compileCharDef(file, opcode, CTC_Math, noback, nofor, table, displayTable); |
2942 | 1 | case CTO_Sign: |
2943 | 1 | return compileCharDef(file, opcode, CTC_Sign, noback, nofor, table, displayTable); |
2944 | 1 | case CTO_Letter: |
2945 | 1 | return compileCharDef( |
2946 | 1 | file, opcode, CTC_Letter, noback, nofor, table, displayTable); |
2947 | 18 | case CTO_UpperCase: |
2948 | 18 | return compileCharDef( |
2949 | 18 | file, opcode, CTC_UpperCase, noback, nofor, table, displayTable); |
2950 | 0 | case CTO_LowerCase: |
2951 | 0 | return compileCharDef( |
2952 | 0 | file, opcode, CTC_LowerCase, noback, nofor, table, displayTable); |
2953 | 2 | case CTO_Grouping: |
2954 | 2 | return compileGrouping(file, noback, nofor, table, displayTable); |
2955 | 0 | case CTO_Display: |
2956 | 0 | if (!displayTable) return 1; // ignore |
2957 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
2958 | 0 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
2959 | 0 | if (ruleChars.length != 1 || ruleDots.length != 1) { |
2960 | 0 | compileError(file, "Exactly one character and one cell are required."); |
2961 | 0 | return 0; |
2962 | 0 | } |
2963 | 0 | return putCharDotsMapping( |
2964 | 0 | file, ruleChars.chars[0], ruleDots.chars[0], displayTable); |
2965 | 0 | case CTO_UpLow: |
2966 | 10 | case CTO_None: { |
2967 | | // check if token is a macro name |
2968 | 10 | if (inScopeMacros) { |
2969 | 10 | const MacroList *macros = *inScopeMacros; |
2970 | 10 | while (macros) { |
2971 | 0 | const Macro *m = macros->head; |
2972 | 0 | if (token.length == strlen(m->name) && |
2973 | 0 | eqasc2uni((unsigned char *)m->name, token.chars, token.length)) { |
2974 | 0 | if (!inScopeMacros) { |
2975 | 0 | compileError(file, "Calling macros only allowed in table files."); |
2976 | 0 | return 0; |
2977 | 0 | } |
2978 | 0 | FileInfo tmpFile; |
2979 | 0 | memset(&tmpFile, 0, sizeof(tmpFile)); |
2980 | 0 | tmpFile.fileName = file->fileName; |
2981 | 0 | tmpFile.sourceFile = file->sourceFile; |
2982 | 0 | tmpFile.lineNumber = file->lineNumber; |
2983 | 0 | tmpFile.encoding = noEncoding; |
2984 | 0 | tmpFile.status = 0; |
2985 | 0 | tmpFile.linepos = 0; |
2986 | 0 | tmpFile.linelen = 0; |
2987 | 0 | int argument_count = 0; |
2988 | 0 | CharsString *arguments = |
2989 | 0 | malloc(m->argument_count * sizeof(CharsString)); |
2990 | 0 | while (argument_count < m->argument_count) { |
2991 | 0 | if (getToken(file, &token, "macro argument")) |
2992 | 0 | arguments[argument_count++] = token; |
2993 | 0 | else |
2994 | 0 | break; |
2995 | 0 | } |
2996 | 0 | if (argument_count < m->argument_count) { |
2997 | 0 | compileError(file, "Expected %d arguments", m->argument_count); |
2998 | 0 | return 0; |
2999 | 0 | } |
3000 | 0 | int i = 0; |
3001 | 0 | int subst = 0; |
3002 | 0 | int next = subst < m->substitution_count ? m->substitutions[2 * subst] |
3003 | 0 | : m->definition_length; |
3004 | 0 | for (;;) { |
3005 | 0 | while (i < next) { |
3006 | 0 | widechar c = m->definition[i++]; |
3007 | 0 | if (c == '\n') { |
3008 | 0 | if (!compileRule(&tmpFile, table, displayTable, |
3009 | 0 | inScopeMacros)) { |
3010 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3011 | 0 | "result of macro expansion was: %s", |
3012 | 0 | _lou_showString( |
3013 | 0 | tmpFile.line, tmpFile.linelen, 0)); |
3014 | 0 | return 0; |
3015 | 0 | } |
3016 | 0 | tmpFile.linepos = 0; |
3017 | 0 | tmpFile.linelen = 0; |
3018 | 0 | } else if (tmpFile.linelen >= MAXSTRING) { |
3019 | 0 | compileError(file, |
3020 | 0 | "Line exceeds %d characters (post macro " |
3021 | 0 | "expansion)", |
3022 | 0 | MAXSTRING); |
3023 | 0 | return 0; |
3024 | 0 | } else |
3025 | 0 | tmpFile.line[tmpFile.linelen++] = c; |
3026 | 0 | } |
3027 | 0 | if (subst < m->substitution_count) { |
3028 | 0 | CharsString arg = |
3029 | 0 | arguments[m->substitutions[2 * subst + 1] - 1]; |
3030 | 0 | for (int j = 0; j < arg.length; j++) |
3031 | 0 | tmpFile.line[tmpFile.linelen++] = arg.chars[j]; |
3032 | 0 | subst++; |
3033 | 0 | next = subst < m->substitution_count |
3034 | 0 | ? m->substitutions[2 * subst] |
3035 | 0 | : m->definition_length; |
3036 | 0 | } else { |
3037 | 0 | if (!compileRule( |
3038 | 0 | &tmpFile, table, displayTable, inScopeMacros)) { |
3039 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3040 | 0 | "result of macro expansion was: %s", |
3041 | 0 | _lou_showString( |
3042 | 0 | tmpFile.line, tmpFile.linelen, 0)); |
3043 | 0 | return 0; |
3044 | 0 | } |
3045 | 0 | break; |
3046 | 0 | } |
3047 | 0 | } |
3048 | 0 | return 1; |
3049 | 0 | } |
3050 | 0 | macros = macros->tail; |
3051 | 0 | } |
3052 | 10 | } |
3053 | 10 | if (opcode == CTO_UpLow) { |
3054 | 0 | compileError(file, "The uplow opcode is deprecated."); |
3055 | 0 | return 0; |
3056 | 0 | } |
3057 | 10 | compileError(file, "opcode %s not defined.", |
3058 | 10 | _lou_showString(token.chars, token.length, 0)); |
3059 | 10 | return 0; |
3060 | 10 | } |
3061 | | |
3062 | | /* now only opcodes follow that don't modify the display table */ |
3063 | 468 | default: |
3064 | 468 | if (!table) return 1; |
3065 | 468 | switch (opcode) { |
3066 | 0 | case CTO_Locale: |
3067 | 0 | compileWarning(file, |
3068 | 0 | "The locale opcode is not implemented. Use the locale meta data " |
3069 | 0 | "instead."); |
3070 | 0 | return 1; |
3071 | 9 | case CTO_Undefined: { |
3072 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3073 | 9 | TranslationTableOffset ruleOffset = (*table)->undefined; |
3074 | 9 | if (!compileBrailleIndicator(file, "undefined character opcode", |
3075 | 9 | CTO_Undefined, &ruleOffset, noback, nofor, table)) |
3076 | 0 | return 0; |
3077 | 9 | (*table)->undefined = ruleOffset; |
3078 | 9 | return 1; |
3079 | 9 | } |
3080 | 150 | case CTO_Match: { |
3081 | 150 | int ok = 0; |
3082 | 150 | widechar *patterns = NULL; |
3083 | 150 | TranslationTableRule *rule; |
3084 | 150 | TranslationTableOffset ruleOffset; |
3085 | 150 | CharsString ptn_before, ptn_after; |
3086 | 150 | TranslationTableOffset patternsOffset; |
3087 | 150 | int len, mrk; |
3088 | 150 | size_t patternsByteSize = sizeof(*patterns) * 27720; |
3089 | 150 | patterns = (widechar *)malloc(patternsByteSize); |
3090 | 150 | if (!patterns) _lou_outOfMemory(); |
3091 | 150 | memset(patterns, 0xffff, patternsByteSize); |
3092 | 150 | noback = 1; |
3093 | 150 | getCharacters(file, &ptn_before); |
3094 | 150 | getRuleCharsText(file, &ruleChars); |
3095 | 150 | getCharacters(file, &ptn_after); |
3096 | 150 | getRuleDotsPattern(file, &ruleDots); |
3097 | 150 | if (!addRule(file, opcode, &ruleChars, &ruleDots, after, before, &ruleOffset, |
3098 | 150 | &rule, noback, nofor, table)) |
3099 | 0 | goto CTO_Match_cleanup; |
3100 | 150 | if (ptn_before.chars[0] == '-' && ptn_before.length == 1) |
3101 | 0 | len = _lou_pattern_compile( |
3102 | 0 | &ptn_before.chars[0], 0, &patterns[1], 13841, *table, file); |
3103 | 150 | else |
3104 | 150 | len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length, |
3105 | 150 | &patterns[1], 13841, *table, file); |
3106 | 150 | if (!len) goto CTO_Match_cleanup; |
3107 | 150 | mrk = patterns[0] = len + 1; |
3108 | 150 | _lou_pattern_reverse(&patterns[1]); |
3109 | 150 | if (ptn_after.chars[0] == '-' && ptn_after.length == 1) |
3110 | 66 | len = _lou_pattern_compile( |
3111 | 66 | &ptn_after.chars[0], 0, &patterns[mrk], 13841, *table, file); |
3112 | 84 | else |
3113 | 84 | len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length, |
3114 | 84 | &patterns[mrk], 13841, *table, file); |
3115 | 150 | if (!len) goto CTO_Match_cleanup; |
3116 | 150 | len += mrk; |
3117 | 150 | if (!allocateSpaceInTranslationTable( |
3118 | 150 | file, &patternsOffset, len * sizeof(widechar), table)) |
3119 | 0 | goto CTO_Match_cleanup; |
3120 | | // allocateSpaceInTranslationTable may have moved table, so make sure rule is |
3121 | | // still valid |
3122 | 150 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
3123 | 150 | memcpy(&(*table)->ruleArea[patternsOffset], patterns, len * sizeof(widechar)); |
3124 | 150 | rule->patterns = patternsOffset; |
3125 | 150 | ok = 1; |
3126 | 150 | CTO_Match_cleanup: |
3127 | 150 | free(patterns); |
3128 | 150 | return ok; |
3129 | 150 | } |
3130 | | |
3131 | 0 | case CTO_BackMatch: { |
3132 | 0 | int ok = 0; |
3133 | 0 | widechar *patterns = NULL; |
3134 | 0 | TranslationTableRule *rule; |
3135 | 0 | TranslationTableOffset ruleOffset; |
3136 | 0 | CharsString ptn_before, ptn_after; |
3137 | 0 | TranslationTableOffset patternOffset; |
3138 | 0 | int len, mrk; |
3139 | 0 | size_t patternsByteSize = sizeof(*patterns) * 27720; |
3140 | 0 | patterns = (widechar *)malloc(patternsByteSize); |
3141 | 0 | if (!patterns) _lou_outOfMemory(); |
3142 | 0 | memset(patterns, 0xffff, patternsByteSize); |
3143 | 0 | nofor = 1; |
3144 | 0 | getCharacters(file, &ptn_before); |
3145 | 0 | getRuleCharsText(file, &ruleChars); |
3146 | 0 | getCharacters(file, &ptn_after); |
3147 | 0 | getRuleDotsPattern(file, &ruleDots); |
3148 | 0 | if (!addRule(file, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, &rule, |
3149 | 0 | noback, nofor, table)) |
3150 | 0 | goto CTO_BackMatch_cleanup; |
3151 | 0 | if (ptn_before.chars[0] == '-' && ptn_before.length == 1) |
3152 | 0 | len = _lou_pattern_compile( |
3153 | 0 | &ptn_before.chars[0], 0, &patterns[1], 13841, *table, file); |
3154 | 0 | else |
3155 | 0 | len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length, |
3156 | 0 | &patterns[1], 13841, *table, file); |
3157 | 0 | if (!len) goto CTO_BackMatch_cleanup; |
3158 | 0 | mrk = patterns[0] = len + 1; |
3159 | 0 | _lou_pattern_reverse(&patterns[1]); |
3160 | 0 | if (ptn_after.chars[0] == '-' && ptn_after.length == 1) |
3161 | 0 | len = _lou_pattern_compile( |
3162 | 0 | &ptn_after.chars[0], 0, &patterns[mrk], 13841, *table, file); |
3163 | 0 | else |
3164 | 0 | len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length, |
3165 | 0 | &patterns[mrk], 13841, *table, file); |
3166 | 0 | if (!len) goto CTO_BackMatch_cleanup; |
3167 | 0 | len += mrk; |
3168 | 0 | if (!allocateSpaceInTranslationTable( |
3169 | 0 | file, &patternOffset, len * sizeof(widechar), table)) |
3170 | 0 | goto CTO_BackMatch_cleanup; |
3171 | | // allocateSpaceInTranslationTable may have moved table, so make sure rule is |
3172 | | // still valid |
3173 | 0 | rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset]; |
3174 | 0 | memcpy(&(*table)->ruleArea[patternOffset], patterns, len * sizeof(widechar)); |
3175 | 0 | rule->patterns = patternOffset; |
3176 | 0 | ok = 1; |
3177 | 0 | CTO_BackMatch_cleanup: |
3178 | 0 | free(patterns); |
3179 | 0 | return ok; |
3180 | 0 | } |
3181 | | |
3182 | 1 | case CTO_CapsLetter: |
3183 | 11 | case CTO_BegCapsWord: |
3184 | 20 | case CTO_EndCapsWord: |
3185 | 21 | case CTO_BegCaps: |
3186 | 28 | case CTO_EndCaps: |
3187 | 28 | case CTO_BegCapsPhrase: |
3188 | 40 | case CTO_EndCapsPhrase: |
3189 | 43 | case CTO_LenCapsPhrase: |
3190 | | /* these 8 general purpose opcodes are compiled further down to more specific |
3191 | | * internal opcodes: |
3192 | | * - modeletter |
3193 | | * - begmodeword |
3194 | | * - endmodeword |
3195 | | * - begmode |
3196 | | * - endmode |
3197 | | * - begmodephrase |
3198 | | * - endmodephrase |
3199 | | * - lenmodephrase |
3200 | | */ |
3201 | 43 | case CTO_ModeLetter: |
3202 | 44 | case CTO_BegModeWord: |
3203 | 44 | case CTO_EndModeWord: |
3204 | 44 | case CTO_BegMode: |
3205 | 44 | case CTO_EndMode: |
3206 | 44 | case CTO_BegModePhrase: |
3207 | 44 | case CTO_EndModePhrase: |
3208 | 44 | case CTO_LenModePhrase: { |
3209 | 44 | TranslationTableCharacterAttributes mode; |
3210 | 44 | int i; |
3211 | 44 | switch (opcode) { |
3212 | 1 | case CTO_CapsLetter: |
3213 | 11 | case CTO_BegCapsWord: |
3214 | 20 | case CTO_EndCapsWord: |
3215 | 21 | case CTO_BegCaps: |
3216 | 28 | case CTO_EndCaps: |
3217 | 28 | case CTO_BegCapsPhrase: |
3218 | 40 | case CTO_EndCapsPhrase: |
3219 | 43 | case CTO_LenCapsPhrase: |
3220 | 43 | mode = CTC_UpperCase; |
3221 | 43 | i = 0; |
3222 | 43 | opcode += (CTO_ModeLetter - CTO_CapsLetter); |
3223 | 43 | break; |
3224 | 1 | default: |
3225 | 1 | if (!getToken(file, &token, "attribute name")) return 0; |
3226 | 1 | if (!(*table)->characterClasses && !allocateCharacterClasses(*table)) { |
3227 | 0 | return 0; |
3228 | 0 | } |
3229 | 1 | const CharacterClass *characterClass = findCharacterClass(&token, *table); |
3230 | 1 | if (!characterClass) { |
3231 | 1 | characterClass = |
3232 | 1 | addCharacterClass(file, token.chars, token.length, *table, 1); |
3233 | 1 | if (!characterClass) return 0; |
3234 | 1 | } |
3235 | 1 | mode = characterClass->attribute; |
3236 | 1 | if (!(mode == CTC_UpperCase || mode == CTC_Digit) && mode >= CTC_Space && |
3237 | 1 | mode <= CTC_LitDigit) { |
3238 | 0 | compileError(file, |
3239 | 0 | "mode must be \"uppercase\", \"digit\", or a custom " |
3240 | 0 | "attribute name."); |
3241 | 0 | return 0; |
3242 | 0 | } |
3243 | | /* check if this mode is already defined and if the number of modes does |
3244 | | * not exceed the maximal number */ |
3245 | 1 | if (mode == CTC_UpperCase) |
3246 | 0 | i = 0; |
3247 | 1 | else { |
3248 | 1 | for (i = 1; i < MAX_MODES && (*table)->modes[i].value; i++) { |
3249 | 0 | if ((*table)->modes[i].mode == mode) { |
3250 | 0 | break; |
3251 | 0 | } |
3252 | 0 | } |
3253 | 1 | if (i == MAX_MODES) { |
3254 | 0 | compileError(file, "Max number of modes (%i) reached", MAX_MODES); |
3255 | 0 | return 0; |
3256 | 0 | } |
3257 | 1 | } |
3258 | 44 | } |
3259 | 44 | if (!(*table)->modes[i].value) |
3260 | 20 | (*table)->modes[i] = (EmphasisClass){ plain_text, mode, |
3261 | 20 | 0x1 << (MAX_EMPH_CLASSES + i), MAX_EMPH_CLASSES + i }; |
3262 | 44 | switch (opcode) { |
3263 | 0 | case CTO_BegModePhrase: { |
3264 | | // not passing pointer because compileBrailleIndicator may reallocate |
3265 | | // table |
3266 | 0 | TranslationTableOffset ruleOffset = |
3267 | 0 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begPhraseOffset]; |
3268 | 0 | if (!compileBrailleIndicator(file, "first word capital sign", |
3269 | | // when mode is not caps (i != 0), provide enough information |
3270 | | // for back-translator to be able to recognize and ignore the |
3271 | | // indicator (but it won't be able to determine the mode) |
3272 | 0 | i == 0 ? CTO_BegCapsPhrase : CTO_BegModePhrase, &ruleOffset, |
3273 | 0 | noback, nofor, table)) |
3274 | 0 | return 0; |
3275 | 0 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begPhraseOffset] = ruleOffset; |
3276 | 0 | return 1; |
3277 | 0 | } |
3278 | 12 | case CTO_EndModePhrase: { |
3279 | 12 | TranslationTableOffset ruleOffset; |
3280 | 12 | switch (compileBeforeAfter(file)) { |
3281 | 1 | case 1: // before |
3282 | 1 | if ((*table)->emphRules[MAX_EMPH_CLASSES + i][endPhraseAfterOffset]) { |
3283 | 0 | compileError( |
3284 | 0 | file, "Capital sign after last word already defined."); |
3285 | 0 | return 0; |
3286 | 0 | } |
3287 | | // not passing pointer because compileBrailleIndicator may reallocate |
3288 | | // table |
3289 | 1 | ruleOffset = (*table)->emphRules[MAX_EMPH_CLASSES + i] |
3290 | 1 | [endPhraseBeforeOffset]; |
3291 | 1 | if (!compileBrailleIndicator(file, "capital sign before last word", |
3292 | 1 | i == 0 ? CTO_EndCapsPhraseBefore : CTO_EndModePhrase, |
3293 | 1 | &ruleOffset, noback, nofor, table)) |
3294 | 0 | return 0; |
3295 | 1 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endPhraseBeforeOffset] = |
3296 | 1 | ruleOffset; |
3297 | 1 | return 1; |
3298 | 11 | case 2: // after |
3299 | 11 | if ((*table)->emphRules[MAX_EMPH_CLASSES + i] |
3300 | 11 | [endPhraseBeforeOffset]) { |
3301 | 0 | compileError( |
3302 | 0 | file, "Capital sign before last word already defined."); |
3303 | 0 | return 0; |
3304 | 0 | } |
3305 | | // not passing pointer because compileBrailleIndicator may reallocate |
3306 | | // table |
3307 | 11 | ruleOffset = (*table)->emphRules[MAX_EMPH_CLASSES + i] |
3308 | 11 | [endPhraseAfterOffset]; |
3309 | 11 | if (!compileBrailleIndicator(file, "capital sign after last word", |
3310 | 11 | i == 0 ? CTO_EndCapsPhraseAfter : CTO_EndModePhrase, |
3311 | 11 | &ruleOffset, noback, nofor, table)) |
3312 | 1 | return 0; |
3313 | 10 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endPhraseAfterOffset] = |
3314 | 10 | ruleOffset; |
3315 | 10 | return 1; |
3316 | 0 | default: // error |
3317 | 0 | compileError(file, "Invalid lastword indicator location."); |
3318 | 0 | return 0; |
3319 | 12 | } |
3320 | 0 | return 0; |
3321 | 12 | } |
3322 | 1 | case CTO_BegMode: { |
3323 | | // not passing pointer because compileBrailleIndicator may reallocate |
3324 | | // table |
3325 | 1 | TranslationTableOffset ruleOffset = |
3326 | 1 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begOffset]; |
3327 | 1 | if (!compileBrailleIndicator(file, "first letter capital sign", |
3328 | 1 | i == 0 ? CTO_BegCaps : CTO_BegMode, &ruleOffset, noback, |
3329 | 1 | nofor, table)) |
3330 | 0 | return 0; |
3331 | 1 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begOffset] = ruleOffset; |
3332 | 1 | return 1; |
3333 | 1 | } |
3334 | 7 | case CTO_EndMode: { |
3335 | | // not passing pointer because compileBrailleIndicator may reallocate |
3336 | | // table |
3337 | 7 | TranslationTableOffset ruleOffset = |
3338 | 7 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endOffset]; |
3339 | 7 | if (!compileBrailleIndicator(file, "last letter capital sign", |
3340 | 7 | i == 0 ? CTO_EndCaps : CTO_EndMode, &ruleOffset, noback, |
3341 | 7 | nofor, table)) |
3342 | 0 | return 0; |
3343 | 7 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endOffset] = ruleOffset; |
3344 | 7 | return 1; |
3345 | 7 | } |
3346 | 1 | case CTO_ModeLetter: { |
3347 | | // not passing pointer because compileBrailleIndicator may reallocate |
3348 | | // table |
3349 | 1 | TranslationTableOffset ruleOffset = |
3350 | 1 | (*table)->emphRules[MAX_EMPH_CLASSES + i][letterOffset]; |
3351 | 1 | if (!compileBrailleIndicator(file, "single letter capital sign", |
3352 | 1 | i == 0 ? CTO_CapsLetter : CTO_ModeLetter, &ruleOffset, noback, |
3353 | 1 | nofor, table)) |
3354 | 0 | return 0; |
3355 | 1 | (*table)->emphRules[MAX_EMPH_CLASSES + i][letterOffset] = ruleOffset; |
3356 | 1 | return 1; |
3357 | 1 | } |
3358 | 11 | case CTO_BegModeWord: { |
3359 | | // not passing pointer because compileBrailleIndicator may reallocate |
3360 | | // table |
3361 | 11 | TranslationTableOffset ruleOffset = |
3362 | 11 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begWordOffset]; |
3363 | 11 | if (!compileBrailleIndicator(file, "capital word", |
3364 | 11 | i == 0 ? CTO_BegCapsWord : CTO_BegModeWord, &ruleOffset, |
3365 | 11 | noback, nofor, table)) |
3366 | 0 | return 0; |
3367 | 11 | (*table)->emphRules[MAX_EMPH_CLASSES + i][begWordOffset] = ruleOffset; |
3368 | 11 | return 1; |
3369 | 11 | } |
3370 | 9 | case CTO_EndModeWord: { |
3371 | | // not passing pointer because compileBrailleIndicator may reallocate |
3372 | | // table |
3373 | 9 | TranslationTableOffset ruleOffset = |
3374 | 9 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endWordOffset]; |
3375 | 9 | if (!compileBrailleIndicator(file, "capital word stop", |
3376 | 9 | i == 0 ? CTO_EndCapsWord : CTO_EndModeWord, &ruleOffset, |
3377 | 9 | noback, nofor, table)) |
3378 | 0 | return 0; |
3379 | 9 | (*table)->emphRules[MAX_EMPH_CLASSES + i][endWordOffset] = ruleOffset; |
3380 | 9 | return 1; |
3381 | 9 | } |
3382 | 3 | case CTO_LenModePhrase: |
3383 | 3 | return (*table)->emphRules[MAX_EMPH_CLASSES + i][lenPhraseOffset] = |
3384 | 3 | compileNumber(file); |
3385 | 0 | default: |
3386 | 0 | break; |
3387 | 44 | } |
3388 | 0 | break; |
3389 | 44 | } |
3390 | | |
3391 | | /* these 8 general purpose emphasis opcodes are compiled further down to more |
3392 | | * specific internal opcodes: |
3393 | | * - emphletter |
3394 | | * - begemphword |
3395 | | * - endemphword |
3396 | | * - begemph |
3397 | | * - endemph |
3398 | | * - begemphphrase |
3399 | | * - endemphphrase |
3400 | | * - lenemphphrase |
3401 | | */ |
3402 | 9 | case CTO_EmphClass: |
3403 | 9 | if (!getToken(file, &emphClass, "emphasis class")) { |
3404 | 0 | compileError(file, "emphclass must be followed by a valid class name."); |
3405 | 0 | return 0; |
3406 | 0 | } |
3407 | 9 | int k, i; |
3408 | 9 | char *s = malloc(sizeof(char) * (emphClass.length + 1)); |
3409 | 68 | for (k = 0; k < emphClass.length; k++) s[k] = (char)emphClass.chars[k]; |
3410 | 9 | s[k++] = '\0'; |
3411 | 10 | for (i = 0; i < MAX_EMPH_CLASSES && (*table)->emphClassNames[i]; i++) |
3412 | 1 | if (strcmp(s, (*table)->emphClassNames[i]) == 0) { |
3413 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "Duplicate emphasis class: %s", s); |
3414 | 0 | free(s); |
3415 | 0 | return 1; |
3416 | 0 | } |
3417 | 9 | if (i == MAX_EMPH_CLASSES) { |
3418 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3419 | 0 | "Max number of emphasis classes (%i) reached", MAX_EMPH_CLASSES); |
3420 | 0 | errorCount++; |
3421 | 0 | free(s); |
3422 | 0 | return 0; |
3423 | 0 | } |
3424 | 9 | switch (i) { |
3425 | | /* For backwards compatibility (i.e. because programs will assume |
3426 | | * the first 3 typeform bits are `italic', `underline' and `bold') |
3427 | | * we require that the first 3 emphclass definitions are (in that |
3428 | | * order): |
3429 | | * |
3430 | | * emphclass italic |
3431 | | * emphclass underline |
3432 | | * emphclass bold |
3433 | | * |
3434 | | * While it would be possible to use the emphclass opcode only for |
3435 | | * defining _additional_ classes (not allowing for them to be called |
3436 | | * italic, underline or bold), thereby reducing the amount of |
3437 | | * boilerplate, we deliberately choose not to do that in order to |
3438 | | * not give italic, underline and bold any special status. The |
3439 | | * hope is that eventually all programs will use liblouis for |
3440 | | * emphasis the recommended way (i.e. by looking up the supported |
3441 | | * typeforms in the documentation or API) so that we can drop this |
3442 | | * restriction. |
3443 | | */ |
3444 | 8 | case 0: |
3445 | 8 | if (strcmp(s, "italic") != 0) { |
3446 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3447 | 0 | "First emphasis class must be \"italic\" but got " |
3448 | 0 | "%s", |
3449 | 0 | s); |
3450 | 0 | errorCount++; |
3451 | 0 | free(s); |
3452 | 0 | return 0; |
3453 | 0 | } |
3454 | 8 | break; |
3455 | 8 | case 1: |
3456 | 1 | if (strcmp(s, "underline") != 0) { |
3457 | 1 | _lou_logMessage(LOU_LOG_ERROR, |
3458 | 1 | "Second emphasis class must be \"underline\" but " |
3459 | 1 | "got " |
3460 | 1 | "%s", |
3461 | 1 | s); |
3462 | 1 | errorCount++; |
3463 | 1 | free(s); |
3464 | 1 | return 0; |
3465 | 1 | } |
3466 | 0 | break; |
3467 | 0 | case 2: |
3468 | 0 | if (strcmp(s, "bold") != 0) { |
3469 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
3470 | 0 | "Third emphasis class must be \"bold\" but got " |
3471 | 0 | "%s", |
3472 | 0 | s); |
3473 | 0 | errorCount++; |
3474 | 0 | free(s); |
3475 | 0 | return 0; |
3476 | 0 | } |
3477 | 0 | break; |
3478 | 9 | } |
3479 | 8 | (*table)->emphClassNames[i] = s; |
3480 | 8 | (*table)->emphClasses[i] = (EmphasisClass){ emph_1 |
3481 | 8 | << i, /* relies on the order of typeforms emph_1..emph_10 */ |
3482 | 8 | 0, 0x1 << i, i }; |
3483 | 8 | return 1; |
3484 | 0 | case CTO_EmphLetter: |
3485 | 5 | case CTO_BegEmphWord: |
3486 | 5 | case CTO_EndEmphWord: |
3487 | 5 | case CTO_BegEmph: |
3488 | 5 | case CTO_EndEmph: |
3489 | 5 | case CTO_BegEmphPhrase: |
3490 | 5 | case CTO_EndEmphPhrase: |
3491 | 5 | case CTO_LenEmphPhrase: |
3492 | 5 | case CTO_EmphModeChars: |
3493 | 5 | case CTO_NoEmphChars: { |
3494 | 5 | if (!getToken(file, &token, "emphasis class")) return 0; |
3495 | 5 | if (!parseChars(file, &emphClass, &token)) return 0; |
3496 | 5 | char *s = malloc(sizeof(char) * (emphClass.length + 1)); |
3497 | 5 | int k, i; |
3498 | 35 | for (k = 0; k < emphClass.length; k++) s[k] = (char)emphClass.chars[k]; |
3499 | 5 | s[k++] = '\0'; |
3500 | 5 | for (i = 0; i < MAX_EMPH_CLASSES && (*table)->emphClassNames[i]; i++) |
3501 | 5 | if (strcmp(s, (*table)->emphClassNames[i]) == 0) break; |
3502 | 5 | if (i == MAX_EMPH_CLASSES || !(*table)->emphClassNames[i]) { |
3503 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Emphasis class %s not declared", s); |
3504 | 0 | errorCount++; |
3505 | 0 | free(s); |
3506 | 0 | return 0; |
3507 | 0 | } |
3508 | 5 | int ok = 0; |
3509 | 5 | switch (opcode) { |
3510 | 0 | case CTO_EmphLetter: { |
3511 | | // not passing pointer because compileBrailleIndicator may reallocate |
3512 | | // table |
3513 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][letterOffset]; |
3514 | | // provide enough information for back-translator to be able to recognize |
3515 | | // and ignore the indicator (but it won't be able to determine the |
3516 | | // emphasis class) |
3517 | 0 | if (!compileBrailleIndicator(file, "single letter", CTO_EmphLetter, |
3518 | 0 | &ruleOffset, noback, nofor, table)) |
3519 | 0 | break; |
3520 | 0 | (*table)->emphRules[i][letterOffset] = ruleOffset; |
3521 | 0 | ok = 1; |
3522 | 0 | break; |
3523 | 0 | } |
3524 | 5 | case CTO_BegEmphWord: { |
3525 | | // not passing pointer because compileBrailleIndicator may reallocate |
3526 | | // table |
3527 | 5 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][begWordOffset]; |
3528 | 5 | if (!compileBrailleIndicator(file, "word", CTO_BegEmphWord, &ruleOffset, |
3529 | 5 | noback, nofor, table)) |
3530 | 0 | break; |
3531 | 5 | (*table)->emphRules[i][begWordOffset] = ruleOffset; |
3532 | 5 | ok = 1; |
3533 | 5 | break; |
3534 | 5 | } |
3535 | 0 | case CTO_EndEmphWord: { |
3536 | | // not passing pointer because compileBrailleIndicator may reallocate |
3537 | | // table |
3538 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][endWordOffset]; |
3539 | 0 | if (!compileBrailleIndicator(file, "word stop", CTO_EndEmphWord, |
3540 | 0 | &ruleOffset, noback, nofor, table)) |
3541 | 0 | break; |
3542 | 0 | (*table)->emphRules[i][endWordOffset] = ruleOffset; |
3543 | 0 | ok = 1; |
3544 | 0 | break; |
3545 | 0 | } |
3546 | 0 | case CTO_BegEmph: { |
3547 | | /* fail if both begemph and any of begemphphrase or begemphword are |
3548 | | * defined */ |
3549 | 0 | if ((*table)->emphRules[i][begWordOffset] || |
3550 | 0 | (*table)->emphRules[i][begPhraseOffset]) { |
3551 | 0 | compileError(file, |
3552 | 0 | "Cannot define emphasis for both no context and word or " |
3553 | 0 | "phrase context, i.e. cannot have both begemph and " |
3554 | 0 | "begemphword or begemphphrase."); |
3555 | 0 | break; |
3556 | 0 | } |
3557 | | // not passing pointer because compileBrailleIndicator may reallocate |
3558 | | // table |
3559 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][begOffset]; |
3560 | 0 | if (!compileBrailleIndicator(file, "first letter", CTO_BegEmph, |
3561 | 0 | &ruleOffset, noback, nofor, table)) |
3562 | 0 | break; |
3563 | 0 | (*table)->emphRules[i][begOffset] = ruleOffset; |
3564 | 0 | ok = 1; |
3565 | 0 | break; |
3566 | 0 | } |
3567 | 0 | case CTO_EndEmph: { |
3568 | 0 | if ((*table)->emphRules[i][endWordOffset] || |
3569 | 0 | (*table)->emphRules[i][endPhraseBeforeOffset] || |
3570 | 0 | (*table)->emphRules[i][endPhraseAfterOffset]) { |
3571 | 0 | compileError(file, |
3572 | 0 | "Cannot define emphasis for both no context and word or " |
3573 | 0 | "phrase context, i.e. cannot have both endemph and " |
3574 | 0 | "endemphword or endemphphrase."); |
3575 | 0 | break; |
3576 | 0 | } |
3577 | | // not passing pointer because compileBrailleIndicator may reallocate |
3578 | | // table |
3579 | 0 | TranslationTableOffset ruleOffset = (*table)->emphRules[i][endOffset]; |
3580 | 0 | if (!compileBrailleIndicator(file, "last letter", CTO_EndEmph, |
3581 | 0 | &ruleOffset, noback, nofor, table)) |
3582 | 0 | break; |
3583 | 0 | (*table)->emphRules[i][endOffset] = ruleOffset; |
3584 | 0 | ok = 1; |
3585 | 0 | break; |
3586 | 0 | } |
3587 | 0 | case CTO_BegEmphPhrase: { |
3588 | | // not passing pointer because compileBrailleIndicator may reallocate |
3589 | | // table |
3590 | 0 | TranslationTableOffset ruleOffset = |
3591 | 0 | (*table)->emphRules[i][begPhraseOffset]; |
3592 | 0 | if (!compileBrailleIndicator(file, "first word", CTO_BegEmphPhrase, |
3593 | 0 | &ruleOffset, noback, nofor, table)) |
3594 | 0 | break; |
3595 | 0 | (*table)->emphRules[i][begPhraseOffset] = ruleOffset; |
3596 | 0 | ok = 1; |
3597 | 0 | break; |
3598 | 0 | } |
3599 | 0 | case CTO_EndEmphPhrase: |
3600 | 0 | switch (compileBeforeAfter(file)) { |
3601 | 0 | case 1: { // before |
3602 | 0 | if ((*table)->emphRules[i][endPhraseAfterOffset]) { |
3603 | 0 | compileError(file, "last word after already defined."); |
3604 | 0 | break; |
3605 | 0 | } |
3606 | | // not passing pointer because compileBrailleIndicator may reallocate |
3607 | | // table |
3608 | 0 | TranslationTableOffset ruleOffset = |
3609 | 0 | (*table)->emphRules[i][endPhraseBeforeOffset]; |
3610 | 0 | if (!compileBrailleIndicator(file, "last word before", |
3611 | 0 | CTO_EndEmphPhrase, &ruleOffset, noback, nofor, table)) |
3612 | 0 | break; |
3613 | 0 | (*table)->emphRules[i][endPhraseBeforeOffset] = ruleOffset; |
3614 | 0 | ok = 1; |
3615 | 0 | break; |
3616 | 0 | } |
3617 | 0 | case 2: { // after |
3618 | 0 | if ((*table)->emphRules[i][endPhraseBeforeOffset]) { |
3619 | 0 | compileError(file, "last word before already defined."); |
3620 | 0 | break; |
3621 | 0 | } |
3622 | | // not passing pointer because compileBrailleIndicator may reallocate |
3623 | | // table |
3624 | 0 | TranslationTableOffset ruleOffset = |
3625 | 0 | (*table)->emphRules[i][endPhraseAfterOffset]; |
3626 | 0 | if (!compileBrailleIndicator(file, "last word after", |
3627 | 0 | CTO_EndEmphPhrase, &ruleOffset, noback, nofor, table)) |
3628 | 0 | break; |
3629 | 0 | (*table)->emphRules[i][endPhraseAfterOffset] = ruleOffset; |
3630 | 0 | ok = 1; |
3631 | 0 | break; |
3632 | 0 | } |
3633 | 0 | default: // error |
3634 | 0 | compileError(file, "Invalid lastword indicator location."); |
3635 | 0 | break; |
3636 | 0 | } |
3637 | 0 | break; |
3638 | 0 | case CTO_LenEmphPhrase: |
3639 | 0 | if (((*table)->emphRules[i][lenPhraseOffset] = compileNumber(file))) |
3640 | 0 | ok = 1; |
3641 | 0 | break; |
3642 | 0 | case CTO_EmphModeChars: { |
3643 | 0 | if (!getRuleCharsText(file, &ruleChars)) break; |
3644 | 0 | widechar *emphmodechars = (*table)->emphModeChars[i]; |
3645 | 0 | int len; |
3646 | 0 | for (len = 0; len < EMPHMODECHARSSIZE && emphmodechars[len]; len++) |
3647 | 0 | ; |
3648 | 0 | if (len + ruleChars.length > EMPHMODECHARSSIZE) { |
3649 | 0 | compileError(file, "More than %d characters", EMPHMODECHARSSIZE); |
3650 | 0 | break; |
3651 | 0 | } |
3652 | 0 | ok = 1; |
3653 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3654 | 0 | if (!getChar(ruleChars.chars[k], *table, NULL)) { |
3655 | 0 | compileError(file, "Emphasis mode character undefined"); |
3656 | 0 | ok = 0; |
3657 | 0 | break; |
3658 | 0 | } |
3659 | 0 | emphmodechars[len++] = ruleChars.chars[k]; |
3660 | 0 | } |
3661 | 0 | break; |
3662 | 0 | } |
3663 | 0 | case CTO_NoEmphChars: { |
3664 | 0 | if (!getRuleCharsText(file, &ruleChars)) break; |
3665 | 0 | widechar *noemphchars = (*table)->noEmphChars[i]; |
3666 | 0 | int len; |
3667 | 0 | for (len = 0; len < NOEMPHCHARSSIZE && noemphchars[len]; len++) |
3668 | 0 | ; |
3669 | 0 | if (len + ruleChars.length > NOEMPHCHARSSIZE) { |
3670 | 0 | compileError(file, "More than %d characters", NOEMPHCHARSSIZE); |
3671 | 0 | break; |
3672 | 0 | } |
3673 | 0 | ok = 1; |
3674 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3675 | 0 | if (!getChar(ruleChars.chars[k], *table, NULL)) { |
3676 | 0 | compileError(file, "Character undefined"); |
3677 | 0 | ok = 0; |
3678 | 0 | break; |
3679 | 0 | } |
3680 | 0 | noemphchars[len++] = ruleChars.chars[k]; |
3681 | 0 | } |
3682 | 0 | break; |
3683 | 0 | } |
3684 | 0 | default: |
3685 | 0 | break; |
3686 | 5 | } |
3687 | 5 | free(s); |
3688 | 5 | return ok; |
3689 | 5 | } |
3690 | 9 | case CTO_LetterSign: { |
3691 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3692 | 9 | TranslationTableOffset ruleOffset = (*table)->letterSign; |
3693 | 9 | if (!compileBrailleIndicator(file, "letter sign", CTO_LetterSign, &ruleOffset, |
3694 | 9 | noback, nofor, table)) |
3695 | 0 | return 0; |
3696 | 9 | (*table)->letterSign = ruleOffset; |
3697 | 9 | return 1; |
3698 | 9 | } |
3699 | 0 | case CTO_NoLetsignBefore: |
3700 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3701 | 0 | if (((*table)->noLetsignBeforeCount + ruleChars.length) > LETSIGNBEFORESIZE) { |
3702 | 0 | compileError(file, "More than %d characters", LETSIGNBEFORESIZE); |
3703 | 0 | return 0; |
3704 | 0 | } |
3705 | 0 | for (int k = 0; k < ruleChars.length; k++) |
3706 | 0 | (*table)->noLetsignBefore[(*table)->noLetsignBeforeCount++] = |
3707 | 0 | ruleChars.chars[k]; |
3708 | 0 | return 1; |
3709 | 0 | case CTO_NoLetsign: |
3710 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3711 | 0 | if (((*table)->noLetsignCount + ruleChars.length) > LETSIGNSIZE) { |
3712 | 0 | compileError(file, "More than %d characters", LETSIGNSIZE); |
3713 | 0 | return 0; |
3714 | 0 | } |
3715 | 0 | for (int k = 0; k < ruleChars.length; k++) |
3716 | 0 | (*table)->noLetsign[(*table)->noLetsignCount++] = ruleChars.chars[k]; |
3717 | 0 | return 1; |
3718 | 0 | case CTO_NoLetsignAfter: |
3719 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3720 | 0 | if (((*table)->noLetsignAfterCount + ruleChars.length) > LETSIGNAFTERSIZE) { |
3721 | 0 | compileError(file, "More than %d characters", LETSIGNAFTERSIZE); |
3722 | 0 | return 0; |
3723 | 0 | } |
3724 | 0 | for (int k = 0; k < ruleChars.length; k++) |
3725 | 0 | (*table)->noLetsignAfter[(*table)->noLetsignAfterCount++] = |
3726 | 0 | ruleChars.chars[k]; |
3727 | 0 | return 1; |
3728 | 6 | case CTO_NumberSign: { |
3729 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3730 | 6 | TranslationTableOffset ruleOffset = (*table)->numberSign; |
3731 | 6 | if (!compileBrailleIndicator(file, "number sign", CTO_NumberSign, &ruleOffset, |
3732 | 6 | noback, nofor, table)) |
3733 | 0 | return 0; |
3734 | 6 | (*table)->numberSign = ruleOffset; |
3735 | 6 | return 1; |
3736 | 6 | } |
3737 | 0 | case CTO_NoNumberSign: { |
3738 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3739 | 0 | TranslationTableOffset ruleOffset = (*table)->noNumberSign; |
3740 | 0 | if (!compileBrailleIndicator(file, "no number sign", CTO_NoNumberSign, |
3741 | 0 | &ruleOffset, noback, nofor, table)) |
3742 | 0 | return 0; |
3743 | 0 | (*table)->noNumberSign = ruleOffset; |
3744 | 0 | return 1; |
3745 | 0 | } |
3746 | | |
3747 | 6 | case CTO_NumericModeChars: |
3748 | 6 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3749 | 374 | for (int k = 0; k < ruleChars.length; k++) { |
3750 | 368 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3751 | 368 | if (!c) { |
3752 | 0 | compileError(file, "Numeric mode character undefined: %s", |
3753 | 0 | _lou_showString(&ruleChars.chars[k], 1, 0)); |
3754 | 0 | return 0; |
3755 | 0 | } |
3756 | 368 | c->attributes |= CTC_NumericMode; |
3757 | 368 | (*table)->usesNumericMode = 1; |
3758 | 368 | } |
3759 | 6 | return 1; |
3760 | | |
3761 | 0 | case CTO_MidEndNumericModeChars: |
3762 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3763 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3764 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3765 | 0 | if (!c) { |
3766 | 0 | compileError(file, "Midendnumeric mode character undefined"); |
3767 | 0 | return 0; |
3768 | 0 | } |
3769 | 0 | c->attributes |= CTC_MidEndNumericMode; |
3770 | 0 | (*table)->usesNumericMode = 1; |
3771 | 0 | } |
3772 | 0 | return 1; |
3773 | | |
3774 | 0 | case CTO_NumericNoContractChars: |
3775 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3776 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3777 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3778 | 0 | if (!c) { |
3779 | 0 | compileError(file, "Numeric no contraction character undefined"); |
3780 | 0 | return 0; |
3781 | 0 | } |
3782 | 0 | c->attributes |= CTC_NumericNoContract; |
3783 | 0 | (*table)->usesNumericMode = 1; |
3784 | 0 | } |
3785 | 0 | return 1; |
3786 | | |
3787 | 0 | case CTO_NoContractSign: { |
3788 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3789 | 0 | TranslationTableOffset ruleOffset = (*table)->noContractSign; |
3790 | 0 | if (!compileBrailleIndicator(file, "no contractions sign", CTO_NoContractSign, |
3791 | 0 | &ruleOffset, noback, nofor, table)) |
3792 | 0 | return 0; |
3793 | 0 | (*table)->noContractSign = ruleOffset; |
3794 | 0 | return 1; |
3795 | 0 | } |
3796 | 0 | case CTO_SeqDelimiter: |
3797 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3798 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3799 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3800 | 0 | if (!c) { |
3801 | 0 | compileError(file, "Sequence delimiter character undefined"); |
3802 | 0 | return 0; |
3803 | 0 | } |
3804 | 0 | c->attributes |= CTC_SeqDelimiter; |
3805 | 0 | (*table)->usesSequences = 1; |
3806 | 0 | } |
3807 | 0 | return 1; |
3808 | | |
3809 | 0 | case CTO_SeqBeforeChars: |
3810 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3811 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3812 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3813 | 0 | if (!c) { |
3814 | 0 | compileError(file, "Sequence before character undefined"); |
3815 | 0 | return 0; |
3816 | 0 | } |
3817 | 0 | c->attributes |= CTC_SeqBefore; |
3818 | 0 | } |
3819 | 0 | return 1; |
3820 | | |
3821 | 0 | case CTO_SeqAfterChars: |
3822 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3823 | 0 | for (int k = 0; k < ruleChars.length; k++) { |
3824 | 0 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3825 | 0 | if (!c) { |
3826 | 0 | compileError(file, "Sequence after character undefined"); |
3827 | 0 | return 0; |
3828 | 0 | } |
3829 | 0 | c->attributes |= CTC_SeqAfter; |
3830 | 0 | } |
3831 | 0 | return 1; |
3832 | | |
3833 | 0 | case CTO_SeqAfterPattern: |
3834 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3835 | 0 | if (((*table)->seqPatternsCount + ruleChars.length + 1) > SEQPATTERNSIZE) { |
3836 | 0 | compileError(file, "More than %d characters", SEQPATTERNSIZE); |
3837 | 0 | return 0; |
3838 | 0 | } |
3839 | 0 | for (int k = 0; k < ruleChars.length; k++) |
3840 | 0 | (*table)->seqPatterns[(*table)->seqPatternsCount++] = ruleChars.chars[k]; |
3841 | 0 | (*table)->seqPatterns[(*table)->seqPatternsCount++] = 0; |
3842 | 0 | return 1; |
3843 | | |
3844 | 0 | case CTO_SeqAfterExpression: |
3845 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3846 | 0 | if ((ruleChars.length + 1) > SEQPATTERNSIZE) { |
3847 | 0 | compileError(file, "More than %d characters", SEQPATTERNSIZE); |
3848 | 0 | return 0; |
3849 | 0 | } |
3850 | 0 | for (int k = 0; k < ruleChars.length; k++) |
3851 | 0 | (*table)->seqAfterExpression[k] = ruleChars.chars[k]; |
3852 | 0 | (*table)->seqAfterExpression[ruleChars.length] = 0; |
3853 | 0 | (*table)->seqAfterExpressionLength = ruleChars.length; |
3854 | 0 | return 1; |
3855 | | |
3856 | 1 | case CTO_CapsModeChars: |
3857 | 1 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3858 | 2 | for (int k = 0; k < ruleChars.length; k++) { |
3859 | 1 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
3860 | 1 | if (!c) { |
3861 | 0 | compileError(file, "Capital mode character undefined"); |
3862 | 0 | return 0; |
3863 | 0 | } |
3864 | 1 | c->attributes |= CTC_CapsMode; |
3865 | 1 | (*table)->hasCapsModeChars = 1; |
3866 | 1 | } |
3867 | 1 | return 1; |
3868 | | |
3869 | 0 | case CTO_BegComp: { |
3870 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3871 | 0 | TranslationTableOffset ruleOffset = (*table)->begComp; |
3872 | 0 | if (!compileBrailleIndicator(file, "begin computer braille", CTO_BegComp, |
3873 | 0 | &ruleOffset, noback, nofor, table)) |
3874 | 0 | return 0; |
3875 | 0 | (*table)->begComp = ruleOffset; |
3876 | 0 | return 1; |
3877 | 0 | } |
3878 | 0 | case CTO_EndComp: { |
3879 | | // not passing pointer because compileBrailleIndicator may reallocate table |
3880 | 0 | TranslationTableOffset ruleOffset = (*table)->endComp; |
3881 | 0 | if (!compileBrailleIndicator(file, "end computer braslle", CTO_EndComp, |
3882 | 0 | &ruleOffset, noback, nofor, table)) |
3883 | 0 | return 0; |
3884 | 0 | (*table)->endComp = ruleOffset; |
3885 | 0 | return 1; |
3886 | 0 | } |
3887 | 12 | case CTO_NoCross: |
3888 | 12 | if (nocross) { |
3889 | 0 | compileError( |
3890 | 0 | file, "%s already specified.", _lou_findOpcodeName(CTO_NoCross)); |
3891 | 0 | return 0; |
3892 | 0 | } |
3893 | 12 | nocross = 1; |
3894 | 12 | goto doOpcode; |
3895 | 4 | case CTO_Syllable: |
3896 | 4 | (*table)->syllables = 1; |
3897 | 4 | case CTO_Always: |
3898 | 6 | case CTO_LargeSign: |
3899 | 9 | case CTO_WholeWord: |
3900 | 9 | case CTO_PartWord: |
3901 | 10 | case CTO_JoinNum: |
3902 | 35 | case CTO_JoinableWord: |
3903 | 35 | case CTO_LowWord: |
3904 | 35 | case CTO_SuffixableWord: |
3905 | 35 | case CTO_PrefixableWord: |
3906 | 35 | case CTO_BegWord: |
3907 | 35 | case CTO_BegMidWord: |
3908 | 35 | case CTO_MidWord: |
3909 | 35 | case CTO_MidEndWord: |
3910 | 35 | case CTO_EndWord: |
3911 | 35 | case CTO_PrePunc: |
3912 | 38 | case CTO_PostPunc: |
3913 | 39 | case CTO_BegNum: |
3914 | 39 | case CTO_MidNum: |
3915 | 41 | case CTO_EndNum: |
3916 | 42 | case CTO_Repeated: |
3917 | 44 | case CTO_RepWord: |
3918 | 44 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3919 | 44 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
3920 | 44 | if (ruleDots.length == 0) |
3921 | | // check that all characters in a rule with `=` as second operand are |
3922 | | // defined (or based on another character) |
3923 | 2 | for (int k = 0; k < ruleChars.length; k++) { |
3924 | 1 | TranslationTableCharacter *c = |
3925 | 1 | getChar(ruleChars.chars[k], *table, NULL); |
3926 | 1 | if (!(c && (c->definitionRule || c->basechar))) { |
3927 | 0 | compileError(file, "Character %s is not defined", |
3928 | 0 | _lou_showString(&ruleChars.chars[k], 1, 0)); |
3929 | 0 | return 0; |
3930 | 0 | } |
3931 | 1 | } |
3932 | 44 | TranslationTableRule *r; |
3933 | 44 | if (!addRule(file, opcode, &ruleChars, &ruleDots, after, before, NULL, &r, |
3934 | 44 | noback, nofor, table)) |
3935 | 0 | return 0; |
3936 | 44 | if (nocross) r->nocross = 1; |
3937 | 44 | return 1; |
3938 | | // if (opcode == CTO_MidNum) |
3939 | | // { |
3940 | | // TranslationTableCharacter *c = getChar(ruleChars.chars[0]); |
3941 | | // if(c) |
3942 | | // c->attributes |= CTC_NumericMode; |
3943 | | // } |
3944 | 3 | case CTO_RepEndWord: |
3945 | 3 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3946 | 3 | CharsString dots; |
3947 | 3 | if (!getToken(file, &dots, "dots,dots operand")) return 0; |
3948 | 3 | int len = dots.length; |
3949 | 12 | for (int k = 0; k < len - 1; k++) { |
3950 | 12 | if (dots.chars[k] == ',') { |
3951 | 3 | dots.length = k; |
3952 | 3 | if (!parseDots(file, &ruleDots, &dots)) return 0; |
3953 | 3 | ruleDots.chars[ruleDots.length++] = ','; |
3954 | 3 | k++; |
3955 | 3 | if (k == len - 1 && dots.chars[k] == '=') { |
3956 | | // check that all characters are defined (or based on another |
3957 | | // character) |
3958 | 2 | for (int l = 0; l < ruleChars.length; l++) { |
3959 | 1 | TranslationTableCharacter *c = |
3960 | 1 | getChar(ruleChars.chars[l], *table, NULL); |
3961 | 1 | if (!(c && (c->definitionRule || c->basechar))) { |
3962 | 0 | compileError(file, "Character %s is not defined", |
3963 | 0 | _lou_showString(&ruleChars.chars[l], 1, 0)); |
3964 | 0 | return 0; |
3965 | 0 | } |
3966 | 1 | } |
3967 | 2 | } else { |
3968 | 2 | CharsString x, y; |
3969 | 2 | x.length = 0; |
3970 | 8 | while (k < len) x.chars[x.length++] = dots.chars[k++]; |
3971 | 2 | if (parseDots(file, &y, &x)) |
3972 | 6 | for (int l = 0; l < y.length; l++) |
3973 | 4 | ruleDots.chars[ruleDots.length++] = y.chars[l]; |
3974 | 2 | } |
3975 | 3 | return addRule(file, opcode, &ruleChars, &ruleDots, after, before, |
3976 | 3 | NULL, NULL, noback, nofor, table); |
3977 | 3 | } |
3978 | 12 | } |
3979 | 0 | return 0; |
3980 | 10 | case CTO_CompDots: |
3981 | 10 | case CTO_Comp6: { |
3982 | 10 | TranslationTableOffset ruleOffset; |
3983 | 10 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3984 | 10 | if (ruleChars.length != 1) { |
3985 | 0 | compileError(file, "first operand must be 1 character"); |
3986 | 0 | return 0; |
3987 | 0 | } |
3988 | 10 | if (nofor || noback) { |
3989 | 0 | compileWarning(file, "nofor and noback not allowed on comp6 rules"); |
3990 | 0 | } |
3991 | 10 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
3992 | 10 | if (!addRule(file, opcode, &ruleChars, &ruleDots, after, before, &ruleOffset, |
3993 | 10 | NULL, noback, nofor, table)) |
3994 | 0 | return 0; |
3995 | 10 | return 1; |
3996 | 10 | } |
3997 | 0 | case CTO_ExactDots: |
3998 | 0 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
3999 | 0 | if (ruleChars.chars[0] != '@') { |
4000 | 0 | compileError(file, "The operand must begin with an at sign (@)"); |
4001 | 0 | return 0; |
4002 | 0 | } |
4003 | 0 | for (int k = 1; k < ruleChars.length; k++) |
4004 | 0 | scratchPad.chars[k - 1] = ruleChars.chars[k]; |
4005 | 0 | scratchPad.length = ruleChars.length - 1; |
4006 | 0 | if (!parseDots(file, &ruleDots, &scratchPad)) return 0; |
4007 | 0 | return addRule(file, opcode, &ruleChars, &ruleDots, before, after, NULL, NULL, |
4008 | 0 | noback, nofor, table); |
4009 | 5 | case CTO_CapsNoCont: { |
4010 | 5 | TranslationTableOffset ruleOffset; |
4011 | 5 | ruleChars.length = 1; |
4012 | 5 | ruleChars.chars[0] = 'a'; |
4013 | 5 | if (!addRule(file, opcode, &ruleChars, NULL, after, before, &ruleOffset, NULL, |
4014 | 5 | noback, nofor, table)) |
4015 | 0 | return 0; |
4016 | 5 | (*table)->capsNoCont = ruleOffset; |
4017 | 5 | return 1; |
4018 | 5 | } |
4019 | 22 | case CTO_Replace: |
4020 | 22 | if (getRuleCharsText(file, &ruleChars)) { |
4021 | 22 | if (atEndOfLine(file)) |
4022 | 4 | ruleDots.length = ruleDots.chars[0] = 0; |
4023 | 18 | else { |
4024 | 18 | getRuleDotsText(file, &ruleDots); |
4025 | 18 | if (ruleDots.chars[0] == '#') |
4026 | 0 | ruleDots.length = ruleDots.chars[0] = 0; |
4027 | 18 | else if (ruleDots.chars[0] == '\\' && ruleDots.chars[1] == '#') |
4028 | 0 | memmove(&ruleDots.chars[0], &ruleDots.chars[1], |
4029 | 0 | ruleDots.length-- * CHARSIZE); |
4030 | 18 | } |
4031 | 22 | } |
4032 | 528 | for (int k = 0; k < ruleChars.length; k++) |
4033 | 506 | putChar(file, ruleChars.chars[k], table, NULL, (*table)->ruleCounter); |
4034 | 121 | for (int k = 0; k < ruleDots.length; k++) |
4035 | 99 | putChar(file, ruleDots.chars[k], table, NULL, (*table)->ruleCounter); |
4036 | 22 | return addRule(file, opcode, &ruleChars, &ruleDots, after, before, NULL, NULL, |
4037 | 22 | noback, nofor, table); |
4038 | 40 | case CTO_Correct: |
4039 | 40 | (*table)->corrections = 1; |
4040 | 40 | goto doPass; |
4041 | 5 | case CTO_Pass2: |
4042 | 5 | if ((*table)->numPasses < 2) (*table)->numPasses = 2; |
4043 | 5 | goto doPass; |
4044 | 5 | case CTO_Pass3: |
4045 | 5 | if ((*table)->numPasses < 3) (*table)->numPasses = 3; |
4046 | 5 | goto doPass; |
4047 | 5 | case CTO_Pass4: |
4048 | 5 | if ((*table)->numPasses < 4) (*table)->numPasses = 4; |
4049 | 55 | doPass: |
4050 | 65 | case CTO_Context: |
4051 | 65 | if (!(nofor || noback)) { |
4052 | 0 | compileError(file, "%s or %s must be specified.", |
4053 | 0 | _lou_findOpcodeName(CTO_NoFor), _lou_findOpcodeName(CTO_NoBack)); |
4054 | 0 | return 0; |
4055 | 0 | } |
4056 | 65 | return compilePassOpcode(file, opcode, noback, nofor, table); |
4057 | 0 | case CTO_Contraction: |
4058 | 0 | case CTO_NoCont: |
4059 | 0 | case CTO_CompBrl: |
4060 | 1 | case CTO_Literal: |
4061 | 1 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
4062 | | // check that all characters in a compbrl, contraction, |
4063 | | // nocont or literal rule are defined (or based on another |
4064 | | // character) |
4065 | 2 | for (int k = 0; k < ruleChars.length; k++) { |
4066 | 1 | TranslationTableCharacter *c = getChar(ruleChars.chars[k], *table, NULL); |
4067 | 1 | if (!(c && (c->definitionRule || c->basechar))) { |
4068 | 0 | compileError(file, "Character %s is not defined", |
4069 | 0 | _lou_showString(&ruleChars.chars[k], 1, 0)); |
4070 | 0 | return 0; |
4071 | 0 | } |
4072 | 1 | } |
4073 | 1 | return addRule(file, opcode, &ruleChars, NULL, after, before, NULL, NULL, |
4074 | 1 | noback, nofor, table); |
4075 | 0 | case CTO_MultInd: { |
4076 | 0 | ruleChars.length = 0; |
4077 | 0 | if (!getToken(file, &token, "multiple braille indicators") || |
4078 | 0 | !parseDots(file, &cells, &token)) |
4079 | 0 | return 0; |
4080 | 0 | while (getToken(file, &token, "multind opcodes")) { |
4081 | 0 | opcode = getOpcode(file, &token); |
4082 | 0 | if (opcode == CTO_None) { |
4083 | 0 | compileError(file, "opcode %s not defined.", |
4084 | 0 | _lou_showString(token.chars, token.length, 0)); |
4085 | 0 | return 0; |
4086 | 0 | } |
4087 | 0 | if (!(opcode >= CTO_CapsLetter && opcode < CTO_MultInd)) { |
4088 | 0 | compileError(file, "Not a braille indicator opcode."); |
4089 | 0 | return 0; |
4090 | 0 | } |
4091 | 0 | ruleChars.chars[ruleChars.length++] = (widechar)opcode; |
4092 | 0 | if (atEndOfLine(file)) break; |
4093 | 0 | } |
4094 | 0 | return addRule(file, CTO_MultInd, &ruleChars, &cells, after, before, NULL, |
4095 | 0 | NULL, noback, nofor, table); |
4096 | 0 | } |
4097 | | |
4098 | 32 | case CTO_Class: |
4099 | 32 | compileWarning(file, "class is deprecated, use attribute instead"); |
4100 | 32 | case CTO_Attribute: { |
4101 | 32 | if (nofor || noback) { |
4102 | 0 | compileWarning( |
4103 | 0 | file, "nofor and noback not allowed before class/attribute"); |
4104 | 0 | } |
4105 | 32 | if ((opcode == CTO_Class && (*table)->usesAttributeOrClass == 1) || |
4106 | 32 | (opcode == CTO_Attribute && (*table)->usesAttributeOrClass == 2)) { |
4107 | 0 | compileError(file, |
4108 | 0 | "attribute and class rules must not be both present in a table"); |
4109 | 0 | return 0; |
4110 | 0 | } |
4111 | 32 | if (opcode == CTO_Class) |
4112 | 32 | (*table)->usesAttributeOrClass = 2; |
4113 | 0 | else |
4114 | 0 | (*table)->usesAttributeOrClass = 1; |
4115 | 32 | if (!getToken(file, &token, "attribute name")) { |
4116 | 0 | compileError(file, "Expected %s", "attribute name"); |
4117 | 0 | return 0; |
4118 | 0 | } |
4119 | 32 | if (!(*table)->characterClasses && !allocateCharacterClasses(*table)) { |
4120 | 0 | return 0; |
4121 | 0 | } |
4122 | | |
4123 | 32 | TranslationTableCharacterAttributes attribute = 0; |
4124 | 32 | { |
4125 | 32 | int attrNumber = -1; |
4126 | 32 | switch (token.chars[0]) { |
4127 | 0 | case '0': |
4128 | 0 | case '1': |
4129 | 0 | case '2': |
4130 | 0 | case '3': |
4131 | 0 | case '4': |
4132 | 0 | case '5': |
4133 | 0 | case '6': |
4134 | 0 | case '7': |
4135 | 0 | case '8': |
4136 | 0 | case '9': |
4137 | 0 | attrNumber = token.chars[0] - '0'; |
4138 | 0 | break; |
4139 | 32 | } |
4140 | 32 | if (attrNumber >= 0) { |
4141 | 0 | if (opcode == CTO_Class) { |
4142 | 0 | compileError(file, |
4143 | 0 | "Invalid class name: may not contain digits, use " |
4144 | 0 | "attribute instead of class"); |
4145 | 0 | return 0; |
4146 | 0 | } |
4147 | 0 | if (token.length > 1 || attrNumber > 7) { |
4148 | 0 | compileError(file, |
4149 | 0 | "Invalid attribute name: must be a digit between 0 and 7 " |
4150 | 0 | "or a word containing only letters"); |
4151 | 0 | return 0; |
4152 | 0 | } |
4153 | 0 | if (!(*table)->numberedAttributes[attrNumber]) |
4154 | | // attribute not used before yet: assign it a value |
4155 | 0 | (*table)->numberedAttributes[attrNumber] = |
4156 | 0 | getNextNumberedAttribute(*table); |
4157 | 0 | attribute = (*table)->numberedAttributes[attrNumber]; |
4158 | 32 | } else { |
4159 | 32 | const CharacterClass *namedAttr = findCharacterClass(&token, *table); |
4160 | 32 | if (!namedAttr) { |
4161 | | // no class with that name: create one |
4162 | 6 | namedAttr = addCharacterClass( |
4163 | 6 | file, &token.chars[0], token.length, *table, 1); |
4164 | 6 | if (!namedAttr) return 0; |
4165 | 6 | } |
4166 | | // there is a class with that name or a new class was successfully |
4167 | | // created |
4168 | 32 | attribute = namedAttr->attribute; |
4169 | 32 | if (attribute == CTC_UpperCase || attribute == CTC_LowerCase) |
4170 | 15 | attribute |= CTC_Letter; |
4171 | 32 | } |
4172 | 32 | } |
4173 | 32 | CharsString characters; |
4174 | 32 | if (!getCharacters(file, &characters)) return 0; |
4175 | 1.37k | for (int i = 0; i < characters.length; i++) { |
4176 | | // get the character from the table, or if it is not defined yet, |
4177 | | // define it |
4178 | 1.34k | TranslationTableCharacter *character = putChar( |
4179 | 1.34k | file, characters.chars[i], table, NULL, (*table)->ruleCounter); |
4180 | | // set the attribute |
4181 | 1.34k | character->attributes |= attribute; |
4182 | | // also set the attribute on the associated dots (if any) |
4183 | 1.34k | if (character->basechar) |
4184 | 7 | character = (TranslationTableCharacter *)&(*table) |
4185 | 7 | ->ruleArea[character->basechar]; |
4186 | 1.34k | if (character->definitionRule) { |
4187 | 1 | TranslationTableRule *defRule = |
4188 | 1 | (TranslationTableRule *)&(*table) |
4189 | 1 | ->ruleArea[character->definitionRule]; |
4190 | 1 | if (defRule->dotslen == 1) { |
4191 | 1 | TranslationTableCharacter *dots = |
4192 | 1 | getDots(defRule->charsdots[defRule->charslen], *table); |
4193 | 1 | if (dots) dots->attributes |= attribute; |
4194 | 1 | } |
4195 | 1 | } |
4196 | 1.34k | } |
4197 | 32 | (*table)->ruleCounter++; |
4198 | 32 | return 1; |
4199 | 32 | } |
4200 | | |
4201 | 0 | { |
4202 | 0 | TranslationTableCharacterAttributes *attributes; |
4203 | 0 | const CharacterClass *class; |
4204 | 0 | case CTO_After: |
4205 | 0 | attributes = &after; |
4206 | 0 | goto doBeforeAfter; |
4207 | 0 | case CTO_Before: |
4208 | 0 | attributes = &before; |
4209 | 0 | doBeforeAfter: |
4210 | 0 | if (!(*table)->characterClasses) { |
4211 | 0 | if (!allocateCharacterClasses(*table)) return 0; |
4212 | 0 | } |
4213 | 0 | if (!getToken(file, &token, "attribute name")) return 0; |
4214 | 0 | if (!(class = findCharacterClass(&token, *table))) { |
4215 | 0 | compileError(file, "attribute not defined"); |
4216 | 0 | return 0; |
4217 | 0 | } |
4218 | 0 | *attributes |= class->attribute; |
4219 | 0 | goto doOpcode; |
4220 | 0 | } |
4221 | 25 | case CTO_Base: |
4222 | 25 | if (nofor || noback) { |
4223 | 0 | compileWarning(file, "nofor and noback not allowed before base"); |
4224 | 0 | } |
4225 | 25 | if (!getToken(file, &token, "attribute name")) { |
4226 | 0 | compileError( |
4227 | 0 | file, "base opcode must be followed by a valid attribute name."); |
4228 | 0 | return 0; |
4229 | 0 | } |
4230 | 25 | if (!(*table)->characterClasses && !allocateCharacterClasses(*table)) { |
4231 | 0 | return 0; |
4232 | 0 | } |
4233 | 25 | const CharacterClass *mode = findCharacterClass(&token, *table); |
4234 | 25 | if (!mode) { |
4235 | 14 | mode = addCharacterClass(file, token.chars, token.length, *table, 1); |
4236 | 14 | if (!mode) return 0; |
4237 | 14 | } |
4238 | 25 | if (!(mode->attribute == CTC_UpperCase || mode->attribute == CTC_Digit) && |
4239 | 16 | mode->attribute >= CTC_Space && mode->attribute <= CTC_LitDigit) { |
4240 | 0 | compileError(file, |
4241 | 0 | "base opcode must be followed by \"uppercase\", \"digit\", or a " |
4242 | 0 | "custom attribute name."); |
4243 | 0 | return 0; |
4244 | 0 | } |
4245 | 25 | if (!getRuleCharsText(file, &token)) return 0; |
4246 | 25 | if (token.length != 1) { |
4247 | 1 | compileError(file, |
4248 | 1 | "Exactly one character followed by one base character is " |
4249 | 1 | "required."); |
4250 | 1 | return 0; |
4251 | 1 | } |
4252 | 24 | TranslationTableOffset characterOffset; |
4253 | 24 | TranslationTableCharacter *character = putChar( |
4254 | 24 | file, token.chars[0], table, &characterOffset, (*table)->ruleCounter); |
4255 | 24 | if (!getRuleCharsText(file, &token)) return 0; |
4256 | 24 | if (token.length != 1) { |
4257 | 0 | compileError(file, "Exactly one base character is required."); |
4258 | 0 | return 0; |
4259 | 0 | } |
4260 | 24 | TranslationTableOffset basechar; |
4261 | 24 | putChar(file, token.chars[0], table, &basechar, (*table)->ruleCounter); |
4262 | | // putChar may have moved table, so make sure character is still valid |
4263 | 24 | character = (TranslationTableCharacter *)&(*table)->ruleArea[characterOffset]; |
4264 | 24 | if (character->basechar) { |
4265 | 3 | if (character->basechar == basechar && |
4266 | 3 | character->mode == mode->attribute) { |
4267 | 3 | _lou_logMessage(LOU_LOG_DEBUG, "%s:%d: Duplicate base rule.", |
4268 | 3 | file->fileName, file->lineNumber); |
4269 | 3 | } else { |
4270 | 0 | _lou_logMessage(LOU_LOG_DEBUG, |
4271 | 0 | "%s:%d: A different base rule already exists for this " |
4272 | 0 | "character (%s). The existing rule will take precedence " |
4273 | 0 | "over the new one.", |
4274 | 0 | file->fileName, file->lineNumber, |
4275 | 0 | printSource(file->sourceFile, character->sourceFile, |
4276 | 0 | character->sourceLine)); |
4277 | 0 | } |
4278 | 21 | } else { |
4279 | 21 | character->basechar = basechar; |
4280 | 21 | character->mode = mode->attribute; |
4281 | 21 | character->sourceFile = file->sourceFile; |
4282 | 21 | character->sourceLine = file->lineNumber; |
4283 | 21 | character->ruleIndex = (*table)->ruleCounter; |
4284 | | /* some other processing is done at the end of the compilation, in |
4285 | | * finalizeTable() */ |
4286 | 21 | } |
4287 | 24 | (*table)->ruleCounter++; |
4288 | 24 | return 1; |
4289 | 1 | case CTO_EmpMatchBefore: |
4290 | 1 | before |= CTC_EmpMatch; |
4291 | 1 | goto doOpcode; |
4292 | 0 | case CTO_EmpMatchAfter: |
4293 | 0 | after |= CTC_EmpMatch; |
4294 | 0 | goto doOpcode; |
4295 | | |
4296 | 2 | case CTO_SwapCc: |
4297 | 2 | case CTO_SwapCd: |
4298 | 3 | case CTO_SwapDd: |
4299 | 3 | return compileSwap(file, opcode, noback, nofor, table); |
4300 | 0 | case CTO_Hyphen: |
4301 | 6 | case CTO_DecPoint: |
4302 | | // case CTO_Apostrophe: |
4303 | | // case CTO_Initial: |
4304 | 6 | if (!getRuleCharsText(file, &ruleChars)) return 0; |
4305 | 6 | if (!getRuleDotsPattern(file, &ruleDots)) return 0; |
4306 | 6 | if (ruleChars.length != 1 || ruleDots.length < 1) { |
4307 | 0 | compileError(file, |
4308 | 0 | "One Unicode character and at least one cell are " |
4309 | 0 | "required."); |
4310 | 0 | return 0; |
4311 | 0 | } |
4312 | 6 | return addRule(file, opcode, &ruleChars, &ruleDots, after, before, NULL, NULL, |
4313 | 6 | noback, nofor, table); |
4314 | | // if (opcode == CTO_DecPoint) |
4315 | | // { |
4316 | | // TranslationTableCharacter *c = |
4317 | | // getChar(ruleChars.chars[0]); |
4318 | | // if(c) |
4319 | | // c->attributes |= CTC_NumericMode; |
4320 | | // } |
4321 | 0 | default: |
4322 | 0 | compileError(file, "unimplemented opcode."); |
4323 | 0 | return 0; |
4324 | 468 | } |
4325 | 739 | } |
4326 | 0 | return 0; |
4327 | 739 | } |
4328 | | |
4329 | | int EXPORT_CALL |
4330 | 0 | lou_readCharFromFile(const char *fileName, int *mode) { |
4331 | | /* Read a character from a file, whether big-endian, little-endian or |
4332 | | * ASCII8 */ |
4333 | 0 | int ch; |
4334 | 0 | static FileInfo file; |
4335 | 0 | if (fileName == NULL) return 0; |
4336 | 0 | if (*mode == 1) { |
4337 | 0 | *mode = 0; |
4338 | 0 | file.fileName = fileName; |
4339 | 0 | file.encoding = noEncoding; |
4340 | 0 | file.status = 0; |
4341 | 0 | file.lineNumber = 0; |
4342 | 0 | if (!(file.in = fopen(file.fileName, "r"))) { |
4343 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot open file '%s'", file.fileName); |
4344 | 0 | *mode = 1; |
4345 | 0 | return EOF; |
4346 | 0 | } |
4347 | 0 | } |
4348 | 0 | if (file.in == NULL) { |
4349 | 0 | *mode = 1; |
4350 | 0 | return EOF; |
4351 | 0 | } |
4352 | 0 | ch = getAChar(&file); |
4353 | 0 | if (ch == EOF) { |
4354 | 0 | fclose(file.in); |
4355 | 0 | file.in = NULL; |
4356 | 0 | *mode = 1; |
4357 | 0 | } |
4358 | 0 | return ch; |
4359 | 0 | } |
4360 | | |
4361 | | static TranslationTableCharacter * |
4362 | | finalizeCharacter(TranslationTableHeader *table, TranslationTableOffset characterOffset, |
4363 | 756 | int detect_loop) { |
4364 | 756 | TranslationTableCharacter *character = |
4365 | 756 | (TranslationTableCharacter *)&table->ruleArea[characterOffset]; |
4366 | 756 | if (character->basechar) { |
4367 | 25 | TranslationTableOffset basecharOffset = 0; |
4368 | 25 | TranslationTableCharacter *basechar = character; |
4369 | 25 | TranslationTableCharacterAttributes mode = 0; |
4370 | 54 | while (basechar->basechar) { |
4371 | 29 | if (basechar->basechar == characterOffset || detect_loop++ > MAX_MODES) { |
4372 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
4373 | 0 | "%s: error: Character can not be (indirectly) based on " |
4374 | 0 | "itself.", |
4375 | 0 | printSource(NULL, character->sourceFile, character->sourceLine)); |
4376 | 0 | errorCount++; |
4377 | 0 | return NULL; |
4378 | 0 | } |
4379 | | // inherit basechar mode |
4380 | 29 | mode |= basechar->mode; |
4381 | | // compute basechar recursively |
4382 | 29 | basecharOffset = basechar->basechar; |
4383 | 29 | basechar = finalizeCharacter(table, basecharOffset, detect_loop); |
4384 | 29 | if (!basechar) return NULL; |
4385 | 29 | if (character->mode & (basechar->attributes | basechar->mode)) { |
4386 | 0 | char *attributeName = NULL; |
4387 | 0 | const CharacterClass *class = table->characterClasses; |
4388 | 0 | while (class) { |
4389 | 0 | if (class->attribute == character->mode) { |
4390 | 0 | attributeName = |
4391 | 0 | strdup(_lou_showString(class->name, class->length, 0)); |
4392 | 0 | break; |
4393 | 0 | } |
4394 | 0 | class = class->next; |
4395 | 0 | } |
4396 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
4397 | 0 | "%s: error: Base character %s can not have the %s " |
4398 | 0 | "attribute.", |
4399 | 0 | printSource(NULL, character->sourceFile, character->sourceLine), |
4400 | 0 | _lou_showString(&basechar->value, 1, 0), |
4401 | 0 | attributeName != NULL ? attributeName : "?"); |
4402 | 0 | errorCount++; |
4403 | 0 | free(attributeName); |
4404 | 0 | return NULL; |
4405 | 0 | } |
4406 | 29 | } |
4407 | | // unset character definition rule or base rule (whichever was declared |
4408 | | // last) if the dot patterns are not compatible, meaning if the real parts |
4409 | | // (1-8) of the dot patterns do not match |
4410 | 25 | TranslationTableRule *basecharDefRule = |
4411 | 25 | (TranslationTableRule *)&table->ruleArea[basechar->definitionRule]; |
4412 | 25 | if (character->definitionRule) { |
4413 | 0 | TranslationTableRule *defRule = |
4414 | 0 | (TranslationTableRule *)&table->ruleArea[character->definitionRule]; |
4415 | 0 | if (defRule->dotslen != basecharDefRule->dotslen || |
4416 | 0 | memcmp(&defRule->charsdots[defRule->charslen], |
4417 | 0 | &basecharDefRule->charsdots[basecharDefRule->charslen], |
4418 | 0 | defRule->dotslen * CHARSIZE)) { |
4419 | 0 | char *defOpcodeName = strdup(_lou_findOpcodeName(defRule->opcode)); |
4420 | 0 | if (defRule->index < character->ruleIndex) { |
4421 | | // character definition rule was defined before base rule; ignore base |
4422 | | // rule |
4423 | 0 | _lou_logMessage(LOU_LOG_DEBUG, |
4424 | 0 | "%s:%d: Character already defined (%s). The existing %s rule " |
4425 | 0 | "will take precedence over the new base rule.", |
4426 | 0 | character->sourceFile, character->sourceLine, |
4427 | 0 | printSource(character->sourceFile, defRule->sourceFile, |
4428 | 0 | defRule->sourceLine), |
4429 | 0 | defOpcodeName); |
4430 | 0 | free(defOpcodeName); |
4431 | 0 | character->basechar = 0; |
4432 | 0 | character->mode = 0; |
4433 | 0 | character->sourceFile = defRule->sourceFile; |
4434 | 0 | character->sourceLine = defRule->sourceLine; |
4435 | 0 | character->ruleIndex = defRule->index; |
4436 | 0 | character->finalized = 1; |
4437 | 0 | return character; |
4438 | 0 | } else { |
4439 | 0 | _lou_logMessage(LOU_LOG_DEBUG, |
4440 | 0 | "%s:%d: A base rule already exists for this character (%s). " |
4441 | 0 | "The " |
4442 | 0 | "existing base rule will take precedence over the new %s " |
4443 | 0 | "rule.", |
4444 | 0 | defRule->sourceFile, defRule->sourceLine, |
4445 | 0 | printSource(defRule->sourceFile, character->sourceFile, |
4446 | 0 | character->sourceLine), |
4447 | 0 | defOpcodeName); |
4448 | 0 | free(defOpcodeName); |
4449 | 0 | character->definitionRule = 0; |
4450 | 0 | } |
4451 | 0 | } |
4452 | 0 | } |
4453 | 25 | character->mode = mode; |
4454 | 25 | character->basechar = basecharOffset; |
4455 | | // add mode to attributes |
4456 | 25 | character->attributes |= character->mode; |
4457 | 25 | if (character->attributes & (CTC_UpperCase | CTC_LowerCase)) |
4458 | 15 | character->attributes |= CTC_Letter; |
4459 | | // also set the new attributes on the associated dots of the base |
4460 | | // character |
4461 | 25 | if (basecharDefRule->dotslen == 1) { |
4462 | 0 | TranslationTableCharacter *dots = |
4463 | 0 | getDots(basecharDefRule->charsdots[basecharDefRule->charslen], table); |
4464 | 0 | if (dots) { |
4465 | 0 | dots->attributes |= character->mode; |
4466 | 0 | if (dots->attributes & (CTC_UpperCase | CTC_LowerCase)) |
4467 | 0 | dots->attributes |= CTC_Letter; |
4468 | 0 | } |
4469 | 0 | } |
4470 | | // store all characters that are based on a base character in list |
4471 | 25 | if (basechar->linked) character->linked = basechar->linked; |
4472 | 25 | basechar->linked = characterOffset; |
4473 | 25 | } |
4474 | 756 | character->finalized = 1; |
4475 | 756 | return character; |
4476 | 756 | } |
4477 | | |
4478 | | static int |
4479 | 128 | finalizeTable(TranslationTableHeader *table) { |
4480 | 128 | if (table->finalized) return 1; |
4481 | | // normalize basechar and mode of all characters |
4482 | 77.5k | for (int i = 0; i < HASHNUM; i++) { |
4483 | 77.4k | TranslationTableOffset characterOffset = table->characters[i]; |
4484 | 78.2k | while (characterOffset) { |
4485 | 727 | TranslationTableCharacter *character = |
4486 | 727 | finalizeCharacter(table, characterOffset, 0); |
4487 | 727 | if (!character) return 0; |
4488 | 727 | characterOffset = character->next; |
4489 | 727 | } |
4490 | 77.4k | } |
4491 | | // add noletsign rules from single-letter word and largesign rules |
4492 | 77.5k | for (int i = 0; i < HASHNUM; i++) { |
4493 | 77.4k | TranslationTableOffset characterOffset = table->characters[i]; |
4494 | 78.2k | while (characterOffset) { |
4495 | 727 | TranslationTableCharacter *character = |
4496 | 727 | (TranslationTableCharacter *)&table->ruleArea[characterOffset]; |
4497 | 727 | if (character->attributes & CTC_Letter) { |
4498 | 184 | TranslationTableOffset *otherRule = &character->otherRules; |
4499 | 212 | while (*otherRule) { |
4500 | 28 | TranslationTableRule *rule = |
4501 | 28 | (TranslationTableRule *)&table->ruleArea[*otherRule]; |
4502 | 28 | if (rule->opcode == CTO_WholeWord || rule->opcode == CTO_LargeSign) |
4503 | 1 | if (table->noLetsignCount < LETSIGNSIZE) |
4504 | 1 | table->noLetsign[table->noLetsignCount++] = |
4505 | 1 | rule->charsdots[0]; |
4506 | 28 | otherRule = &rule->charsnext; |
4507 | 28 | } |
4508 | 184 | } |
4509 | 727 | characterOffset = character->next; |
4510 | 727 | } |
4511 | 77.4k | } |
4512 | | // Rearrange rules in `forRules' so that when iterating over candidate rules in |
4513 | | // for_selectRule(), both case-sensitive and case-insensitive rules are contained |
4514 | | // within the same ordered list. We do the rearrangement by iterating over all |
4515 | | // case-sensitive rules and if needed move them to another bucket. This may slow down |
4516 | | // the compilation of tables with a lot of context rules, but the good news is that |
4517 | | // translation speed is not affected. |
4518 | 77.5k | for (unsigned long int i = 0; i < HASHNUM; i++) { |
4519 | 77.4k | TranslationTableOffset *p = &table->forRules[i]; |
4520 | 77.5k | while (*p) { |
4521 | 53 | TranslationTableRule *rule = (TranslationTableRule *)&table->ruleArea[*p]; |
4522 | | // For now only move the rules that we know are case-sensitive, namely |
4523 | | // `context' rules. (Note that there may be other case-sensitive rules that |
4524 | | // we're currently not aware of.) We don't move case insensitive rules because |
4525 | | // the user can/should define them using all lowercases. |
4526 | 53 | if (rule->opcode == CTO_Context) { |
4527 | 0 | unsigned long int hash = _lou_stringHash(&rule->charsdots[0], 1, table); |
4528 | | // no need to do anything if the first two characters are not uppercase |
4529 | | // letters |
4530 | 0 | if (hash != i) { |
4531 | | // compute new position |
4532 | 0 | TranslationTableOffset *insert_at = &table->forRules[hash]; |
4533 | 0 | while (*insert_at) { |
4534 | 0 | TranslationTableRule *r = |
4535 | 0 | (TranslationTableRule *)&table->ruleArea[*insert_at]; |
4536 | 0 | if (rule->charslen > r->charslen) |
4537 | 0 | break; |
4538 | 0 | else if (rule->charslen == r->charslen && r->opcode == CTO_Always) |
4539 | 0 | break; |
4540 | 0 | insert_at = &r->charsnext; |
4541 | 0 | } |
4542 | | // remove rule from current list and insert it at the correct position |
4543 | | // in the new list |
4544 | 0 | TranslationTableOffset next = rule->charsnext; |
4545 | 0 | rule->charsnext = *insert_at; |
4546 | 0 | *insert_at = *p; |
4547 | 0 | *p = next; |
4548 | 0 | continue; |
4549 | 0 | } |
4550 | 0 | } |
4551 | 53 | p = &rule->charsnext; |
4552 | 53 | } |
4553 | 77.4k | } |
4554 | 69 | table->finalized = 1; |
4555 | 69 | return 1; |
4556 | 69 | } |
4557 | | |
4558 | | static int |
4559 | | compileString(const char *inString, TranslationTableHeader **table, |
4560 | 84 | DisplayTableHeader **displayTable) { |
4561 | | /* This function can be used to make changes to tables on the fly. */ |
4562 | 84 | int k; |
4563 | 84 | FileInfo file; |
4564 | 84 | if (inString == NULL) return 0; |
4565 | 84 | memset(&file, 0, sizeof(file)); |
4566 | 84 | file.fileName = inString; |
4567 | 84 | file.encoding = noEncoding; |
4568 | 84 | file.lineNumber = 1; |
4569 | 84 | file.status = 0; |
4570 | 84 | file.linepos = 0; |
4571 | 3.69k | for (k = 0; k < MAXSTRING - 1 && inString[k]; k++) file.line[k] = inString[k]; |
4572 | 84 | file.line[k] = 0; |
4573 | 84 | file.linelen = k; |
4574 | 84 | if (table && *table && (*table)->finalized) { |
4575 | 0 | compileError(&file, "Table is finalized"); |
4576 | 0 | return 0; |
4577 | 0 | } |
4578 | 84 | return compileRule(&file, table, displayTable, NULL); |
4579 | 84 | } |
4580 | | |
4581 | | static int |
4582 | 69 | setDefaults(TranslationTableHeader *table) { |
4583 | 276 | for (int i = 0; i < 3; i++) |
4584 | 207 | if (!table->emphRules[i][lenPhraseOffset]) |
4585 | 207 | table->emphRules[i][lenPhraseOffset] = 4; |
4586 | 69 | if (table->numPasses == 0) table->numPasses = 1; |
4587 | 69 | return 1; |
4588 | 69 | } |
4589 | | |
4590 | | /* =============== * |
4591 | | * TABLE RESOLVING * |
4592 | | * =============== * |
4593 | | * |
4594 | | * A table resolver is a function that resolves a `tableList` path against a |
4595 | | * `base` path, and returns the resolved table(s) as a list of absolute file |
4596 | | * paths. |
4597 | | * |
4598 | | * The function must have the following signature: |
4599 | | * |
4600 | | * char ** (const char * tableList, const char * base) |
4601 | | * |
4602 | | * In general, `tableList` is a path in the broad sense. The default |
4603 | | * implementation accepts only *file* paths. But another implementation could |
4604 | | * for instance handle URI's. `base` is always a file path however. |
4605 | | * |
4606 | | * The idea is to give other programs that use liblouis the ability to define |
4607 | | * their own table resolver (in C, Java, Python, etc.) when the default |
4608 | | * resolver is not satisfying. (see also lou_registerTableResolver) |
4609 | | * |
4610 | | */ |
4611 | | |
4612 | | /** |
4613 | | * Resolve a single (sub)table. |
4614 | | * |
4615 | | * Tries to resolve `table` against `base` if base is an absolute path. If |
4616 | | * that fails, searches `searchPath`. |
4617 | | * |
4618 | | */ |
4619 | | static char * |
4620 | 84 | resolveSubtable(const char *table, const char *base, const char *searchPath) { |
4621 | 84 | char *tableFile; |
4622 | 84 | static struct stat info; |
4623 | | |
4624 | 168 | #define MAX_TABLEFILE_SIZE (MAXSTRING * sizeof(char) * 2) |
4625 | 84 | if (table == NULL || table[0] == '\0') return NULL; |
4626 | 84 | tableFile = (char *)malloc(MAX_TABLEFILE_SIZE); |
4627 | | |
4628 | | // |
4629 | | // First try to resolve against base |
4630 | | // |
4631 | 84 | if (base) { |
4632 | 0 | int k; |
4633 | 0 | if (strlen(base) >= MAX_TABLEFILE_SIZE) goto failure; |
4634 | 0 | strcpy(tableFile, base); |
4635 | 0 | k = (int)strlen(tableFile); |
4636 | 0 | while (k >= 0 && tableFile[k] != '/' && tableFile[k] != '\\') k--; |
4637 | 0 | tableFile[++k] = '\0'; |
4638 | 0 | if (strlen(tableFile) + strlen(table) >= MAX_TABLEFILE_SIZE) goto failure; |
4639 | 0 | strcat(tableFile, table); |
4640 | 0 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4641 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4642 | 0 | return tableFile; |
4643 | 0 | } |
4644 | 0 | } |
4645 | | |
4646 | | // |
4647 | | // It could be an absolute path, or a path relative to the current working |
4648 | | // directory |
4649 | | // |
4650 | 84 | if (strlen(table) >= MAX_TABLEFILE_SIZE) goto failure; |
4651 | 84 | strcpy(tableFile, table); |
4652 | 84 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4653 | 84 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4654 | 84 | return tableFile; |
4655 | 84 | } |
4656 | | |
4657 | | // |
4658 | | // Then search `LOUIS_TABLEPATH`, `dataPath` and `programPath` |
4659 | | // |
4660 | 0 | if (searchPath[0] != '\0') { |
4661 | 0 | char *dir; |
4662 | 0 | int last; |
4663 | 0 | char *cp; |
4664 | 0 | char *searchPath_copy = strdup(searchPath); |
4665 | 0 | for (dir = searchPath_copy;; dir = cp + 1) { |
4666 | 0 | for (cp = dir; *cp != '\0' && *cp != ','; cp++) |
4667 | 0 | ; |
4668 | 0 | last = (*cp == '\0'); |
4669 | 0 | *cp = '\0'; |
4670 | 0 | if (dir == cp) dir = "."; |
4671 | 0 | if (strlen(dir) + strlen(table) + 1 >= MAX_TABLEFILE_SIZE) { |
4672 | 0 | free(searchPath_copy); |
4673 | 0 | goto failure; |
4674 | 0 | } |
4675 | 0 | sprintf(tableFile, "%s%c%s", dir, DIR_SEP, table); |
4676 | 0 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4677 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4678 | 0 | free(searchPath_copy); |
4679 | 0 | return tableFile; |
4680 | 0 | } |
4681 | 0 | if (last) break; |
4682 | 0 | if (strlen(dir) + strlen("liblouis") + strlen("tables") + strlen(table) + 3 >= |
4683 | 0 | MAX_TABLEFILE_SIZE) { |
4684 | 0 | free(searchPath_copy); |
4685 | 0 | goto failure; |
4686 | 0 | } |
4687 | 0 | sprintf(tableFile, "%s%c%s%c%s%c%s", dir, DIR_SEP, "liblouis", DIR_SEP, |
4688 | 0 | "tables", DIR_SEP, table); |
4689 | 0 | if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) { |
4690 | 0 | _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile); |
4691 | 0 | free(searchPath_copy); |
4692 | 0 | return tableFile; |
4693 | 0 | } |
4694 | 0 | if (last) break; |
4695 | 0 | } |
4696 | 0 | free(searchPath_copy); |
4697 | 0 | } |
4698 | 0 | failure: |
4699 | 0 | free(tableFile); |
4700 | 0 | return NULL; |
4701 | 0 | } |
4702 | | |
4703 | | char *EXPORT_CALL |
4704 | 84 | _lou_getTablePath(void) { |
4705 | 84 | char searchPath[MAXSTRING]; |
4706 | 84 | char *path; |
4707 | 84 | char *cp; |
4708 | 84 | int envset = 0; |
4709 | 84 | cp = searchPath; |
4710 | 84 | path = getenv("LOUIS_TABLEPATH"); |
4711 | 84 | if (path != NULL && path[0] != '\0') { |
4712 | 0 | envset = 1; |
4713 | 0 | cp += sprintf(cp, ",%s", path); |
4714 | 0 | } |
4715 | 84 | path = dataPathPtr; |
4716 | 84 | if (path != NULL && path[0] != '\0') |
4717 | 0 | cp += sprintf(cp, ",%s%c%s%c%s", path, DIR_SEP, "liblouis", DIR_SEP, "tables"); |
4718 | 84 | if (!envset) { |
4719 | | #ifdef _WIN32 |
4720 | | path = lou_getProgramPath(); |
4721 | | if (path != NULL) { |
4722 | | if (path[0] != '\0') |
4723 | | // assuming the following directory structure: |
4724 | | // . |
4725 | | // ├── bin |
4726 | | // │ ├── liblouis.dll |
4727 | | // ├── include |
4728 | | // ├── lib |
4729 | | // └── share |
4730 | | // ├── doc |
4731 | | // ├── info |
4732 | | // └── liblouis |
4733 | | // └── tables |
4734 | | cp += sprintf(cp, ",%s%s", path, "\\..\\share\\liblouis\\tables"); |
4735 | | free(path); |
4736 | | } |
4737 | | #else |
4738 | 84 | cp += sprintf(cp, ",%s", TABLESDIR); |
4739 | 84 | #endif |
4740 | 84 | } |
4741 | 84 | if (searchPath[0] != '\0') |
4742 | 84 | return strdup(&searchPath[1]); |
4743 | 0 | else |
4744 | 0 | return strdup("."); |
4745 | 84 | } |
4746 | | |
4747 | | /** |
4748 | | * The default table resolver |
4749 | | * |
4750 | | * Tries to resolve tableList against base. The search path is set to |
4751 | | * `LOUIS_TABLEPATH`, `dataPath` and `programPath` (in that order). |
4752 | | * |
4753 | | * @param table A file path, may be absolute or relative. May be a list of |
4754 | | * tables separated by commas. In that case, the first table |
4755 | | * is used as the base for the other subtables. |
4756 | | * @param base A file path or directory path, or NULL. |
4757 | | * @return The file paths of the resolved subtables, or NULL if the table |
4758 | | * could not be resolved. |
4759 | | * |
4760 | | */ |
4761 | | char **EXPORT_CALL |
4762 | 84 | _lou_defaultTableResolver(const char *tableList, const char *base) { |
4763 | 84 | char *searchPath; |
4764 | 84 | char **tableFiles; |
4765 | 84 | char *subTable; |
4766 | 84 | char *tableList_copy; |
4767 | 84 | char *cp; |
4768 | 84 | int last; |
4769 | 84 | int k; |
4770 | | |
4771 | | /* Set up search path */ |
4772 | 84 | searchPath = _lou_getTablePath(); |
4773 | | |
4774 | | /* Count number of subtables in table list */ |
4775 | 84 | k = 0; |
4776 | 1.83k | for (cp = (char *)tableList; *cp != '\0'; cp++) |
4777 | 1.75k | if (*cp == ',') k++; |
4778 | 84 | tableFiles = (char **)calloc(k + 2, sizeof(char *)); |
4779 | 84 | if (!tableFiles) _lou_outOfMemory(); |
4780 | | |
4781 | | /* Resolve subtables */ |
4782 | 84 | k = 0; |
4783 | 84 | tableList_copy = strdup(tableList); |
4784 | 84 | for (subTable = tableList_copy;; subTable = cp + 1) { |
4785 | 1.83k | for (cp = subTable; *cp != '\0' && *cp != ','; cp++) |
4786 | 1.75k | ; |
4787 | 84 | last = (*cp == '\0'); |
4788 | 84 | *cp = '\0'; |
4789 | 84 | if (!(tableFiles[k++] = resolveSubtable(subTable, base, searchPath))) { |
4790 | 0 | char *path; |
4791 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot resolve table '%s'", subTable); |
4792 | 0 | path = getenv("LOUIS_TABLEPATH"); |
4793 | 0 | if (path != NULL && path[0] != '\0') |
4794 | 0 | _lou_logMessage(LOU_LOG_ERROR, "LOUIS_TABLEPATH=%s", path); |
4795 | 0 | free(searchPath); |
4796 | 0 | free(tableList_copy); |
4797 | 0 | lou_freeTableFiles(tableFiles); |
4798 | 0 | return NULL; |
4799 | 0 | } |
4800 | 84 | if (k == 1) base = subTable; |
4801 | 84 | if (last) break; |
4802 | 84 | } |
4803 | 84 | free(searchPath); |
4804 | 84 | free(tableList_copy); |
4805 | 84 | tableFiles[k] = NULL; |
4806 | 84 | return tableFiles; |
4807 | 84 | } |
4808 | | |
4809 | | static char **(EXPORT_CALL *tableResolver)( |
4810 | | const char *tableList, const char *base) = &_lou_defaultTableResolver; |
4811 | | |
4812 | | static char ** |
4813 | 84 | copyStringArray(char **array) { |
4814 | 84 | int len; |
4815 | 84 | char **copy; |
4816 | 84 | if (!array) return NULL; |
4817 | 84 | len = 0; |
4818 | 168 | while (array[len]) len++; |
4819 | 84 | copy = malloc((len + 1) * sizeof(char *)); |
4820 | 84 | copy[len] = NULL; |
4821 | 168 | while (len) { |
4822 | 84 | len--; |
4823 | 84 | copy[len] = strdup(array[len]); |
4824 | 84 | } |
4825 | 84 | return copy; |
4826 | 84 | } |
4827 | | |
4828 | | char **EXPORT_CALL |
4829 | 84 | _lou_resolveTable(const char *tableList, const char *base) { |
4830 | 84 | char **tableFiles = (*tableResolver)(tableList, base); |
4831 | 84 | char **result = copyStringArray(tableFiles); |
4832 | 84 | if (tableResolver == &_lou_defaultTableResolver) lou_freeTableFiles(tableFiles); |
4833 | 84 | return result; |
4834 | 84 | } |
4835 | | |
4836 | | /** |
4837 | | * Register a new table resolver. Overrides the default resolver. |
4838 | | * |
4839 | | * @param resolver The new resolver as a function pointer. |
4840 | | * |
4841 | | */ |
4842 | | void EXPORT_CALL |
4843 | | lou_registerTableResolver( |
4844 | 0 | char **(EXPORT_CALL *resolver)(const char *tableList, const char *base)) { |
4845 | 0 | tableResolver = resolver; |
4846 | 0 | } |
4847 | | |
4848 | | static int fileCount = 0; |
4849 | | |
4850 | | /** |
4851 | | * Compile a single file |
4852 | | * |
4853 | | */ |
4854 | | static int |
4855 | | compileFile(const char *fileName, TranslationTableHeader **table, |
4856 | 84 | DisplayTableHeader **displayTable) { |
4857 | 84 | FileInfo file; |
4858 | 84 | fileCount++; |
4859 | 84 | file.fileName = fileName; |
4860 | 84 | if (table) { |
4861 | 84 | int i; |
4862 | 84 | for (i = 0; (*table)->sourceFiles[i]; i++) |
4863 | 0 | ; |
4864 | 84 | if (i >= MAX_SOURCE_FILES) { |
4865 | 0 | _lou_logMessage(LOU_LOG_WARN, "Max number of source files (%i) reached", |
4866 | 0 | MAX_SOURCE_FILES); |
4867 | 0 | file.sourceFile = NULL; |
4868 | 84 | } else { |
4869 | 84 | file.sourceFile = (*table)->sourceFiles[i] = strdup(fileName); |
4870 | 84 | } |
4871 | 84 | } |
4872 | 84 | file.encoding = noEncoding; |
4873 | 84 | file.status = 0; |
4874 | 84 | file.lineNumber = 0; |
4875 | 84 | if ((file.in = fopen(file.fileName, "rb"))) { |
4876 | | // the scope of a macro is the current file (after the macro definition) |
4877 | 84 | const MacroList *inscopeMacros = NULL; |
4878 | 794 | while (_lou_getALine(&file)) |
4879 | 723 | if (!compileRule(&file, table, displayTable, &inscopeMacros)) { |
4880 | 13 | if (!errorCount) compileError(&file, "Rule could not be compiled"); |
4881 | 13 | break; |
4882 | 13 | } |
4883 | 84 | fclose(file.in); |
4884 | 84 | free_macro_list(inscopeMacros); |
4885 | 84 | } else { |
4886 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Cannot open table '%s'", file.fileName); |
4887 | 0 | errorCount++; |
4888 | 0 | } |
4889 | 84 | return !errorCount; |
4890 | 84 | } |
4891 | | |
4892 | | static void |
4893 | 84 | freeTranslationTable(TranslationTableHeader *t) { |
4894 | 92 | for (int i = 0; i < MAX_EMPH_CLASSES && t->emphClassNames[i]; i++) |
4895 | 84 | free(t->emphClassNames[i]); |
4896 | 168 | for (int i = 0; t->sourceFiles[i]; i++) free(t->sourceFiles[i]); |
4897 | 84 | if (t->characterClasses) deallocateCharacterClasses(t); |
4898 | 84 | if (t->ruleNames) deallocateRuleNames(t); |
4899 | 84 | free(t); |
4900 | 84 | } |
4901 | | |
4902 | | static void |
4903 | 84 | freeDisplayTable(DisplayTableHeader *t) { |
4904 | 84 | free(t); |
4905 | 84 | } |
4906 | | |
4907 | | /** |
4908 | | * Free a char** array |
4909 | | */ |
4910 | | void EXPORT_CALL |
4911 | 168 | lou_freeTableFiles(char **tables) { |
4912 | 168 | if (!tables) return; |
4913 | 336 | for (char **table = tables; *table; table++) free(*table); |
4914 | 168 | free(tables); |
4915 | 168 | } |
4916 | | |
4917 | | /** |
4918 | | * Implement include opcode |
4919 | | * |
4920 | | */ |
4921 | | static int |
4922 | | includeFile(const FileInfo *file, CharsString *includedFile, |
4923 | 0 | TranslationTableHeader **table, DisplayTableHeader **displayTable) { |
4924 | 0 | int k; |
4925 | 0 | char includeThis[MAXSTRING]; |
4926 | 0 | char **tableFiles; |
4927 | 0 | int rv; |
4928 | 0 | for (k = 0; k < includedFile->length; k++) |
4929 | 0 | includeThis[k] = (char)includedFile->chars[k]; |
4930 | 0 | if (k >= MAXSTRING) { |
4931 | 0 | compileError(file, "Include statement too long: 'include %s'", includeThis); |
4932 | 0 | return 0; |
4933 | 0 | } |
4934 | 0 | includeThis[k] = 0; |
4935 | 0 | tableFiles = _lou_resolveTable(includeThis, file->fileName); |
4936 | 0 | if (tableFiles == NULL) { |
4937 | 0 | errorCount++; |
4938 | 0 | return 0; |
4939 | 0 | } |
4940 | 0 | if (tableFiles[1] != NULL) { |
4941 | 0 | lou_freeTableFiles(tableFiles); |
4942 | 0 | compileError(file, "Table list not supported in include statement: 'include %s'", |
4943 | 0 | includeThis); |
4944 | 0 | return 0; |
4945 | 0 | } |
4946 | 0 | rv = compileFile(*tableFiles, table, displayTable); |
4947 | 0 | lou_freeTableFiles(tableFiles); |
4948 | 0 | if (!rv) |
4949 | 0 | _lou_logMessage(LOU_LOG_ERROR, "%s:%d: Error in included file", file->fileName, |
4950 | 0 | file->lineNumber); |
4951 | 0 | return rv; |
4952 | 0 | } |
4953 | | |
4954 | | /** |
4955 | | * Compile source tables into a table in memory |
4956 | | * |
4957 | | */ |
4958 | | static int |
4959 | | compileTable(const char *tableList, const char *displayTableList, |
4960 | 84 | TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) { |
4961 | 84 | char **tableFiles; |
4962 | 84 | char **subTable; |
4963 | 84 | if (translationTable && !tableList) return 0; |
4964 | 84 | if (displayTable && !displayTableList) return 0; |
4965 | 84 | if (!translationTable && !displayTable) return 0; |
4966 | 84 | if (translationTable) *translationTable = NULL; |
4967 | 84 | if (displayTable) *displayTable = NULL; |
4968 | 84 | errorCount = warningCount = fileCount = 0; |
4969 | 84 | if (!opcodeLengths[0]) { |
4970 | 84 | TranslationTableOpcode opcode; |
4971 | 9.91k | for (opcode = 0; opcode < CTO_None; opcode++) |
4972 | 9.82k | opcodeLengths[opcode] = (short)strlen(opcodeNames[opcode]); |
4973 | 84 | } |
4974 | 84 | if (translationTable) allocateTranslationTable(NULL, translationTable); |
4975 | 84 | if (displayTable) allocateDisplayTable(NULL, displayTable); |
4976 | | |
4977 | 84 | if (translationTable) { |
4978 | 84 | (*translationTable)->emphClassNames[0] = NULL; |
4979 | 84 | (*translationTable)->characterClasses = NULL; |
4980 | 84 | (*translationTable)->ruleNames = NULL; |
4981 | 84 | } |
4982 | | |
4983 | | /* Compile things that are necessary for the proper operation of |
4984 | | * liblouis or liblouisxml or liblouisutdml */ |
4985 | | /* TODO: These definitions seem to be necessary for proper functioning of |
4986 | | liblouisutdml. Find a way to satisfy those requirements without hard coding |
4987 | | some characters in every table notably behind the user's back */ |
4988 | 84 | compileString("space \\xffff 123456789abcdef LOU_ENDSEGMENT", translationTable, |
4989 | 84 | displayTable); |
4990 | | |
4991 | 84 | if (displayTable && translationTable && strcmp(tableList, displayTableList) == 0) { |
4992 | | /* Compile the display and translation tables in one go */ |
4993 | | |
4994 | | /* Compile all subtables in the list */ |
4995 | 84 | if (!(tableFiles = _lou_resolveTable(tableList, NULL))) { |
4996 | 0 | errorCount++; |
4997 | 0 | goto cleanup; |
4998 | 0 | } |
4999 | 153 | for (subTable = tableFiles; *subTable; subTable++) |
5000 | 84 | if (!compileFile(*subTable, translationTable, displayTable)) goto cleanup; |
5001 | 84 | } else { |
5002 | | /* Compile the display and translation tables separately */ |
5003 | |
|
5004 | 0 | if (displayTable) { |
5005 | 0 | if (!(tableFiles = _lou_resolveTable(displayTableList, NULL))) { |
5006 | 0 | errorCount++; |
5007 | 0 | goto cleanup; |
5008 | 0 | } |
5009 | 0 | for (subTable = tableFiles; *subTable; subTable++) |
5010 | 0 | if (!compileFile(*subTable, NULL, displayTable)) goto cleanup; |
5011 | 0 | lou_freeTableFiles(tableFiles); |
5012 | 0 | tableFiles = NULL; |
5013 | 0 | } |
5014 | 0 | if (translationTable) { |
5015 | 0 | if (!(tableFiles = _lou_resolveTable(tableList, NULL))) { |
5016 | 0 | errorCount++; |
5017 | 0 | goto cleanup; |
5018 | 0 | } |
5019 | 0 | for (subTable = tableFiles; *subTable; subTable++) |
5020 | 0 | if (!compileFile(*subTable, translationTable, NULL)) goto cleanup; |
5021 | 0 | } |
5022 | 0 | } |
5023 | | |
5024 | | /* Clean up after compiling files */ |
5025 | 84 | cleanup: |
5026 | 84 | lou_freeTableFiles(tableFiles); |
5027 | 84 | if (warningCount) |
5028 | 53 | _lou_logMessage(LOU_LOG_WARN, "%s: %d warnings issued", tableList, warningCount); |
5029 | 84 | if (!errorCount) { |
5030 | 69 | if (translationTable) setDefaults(*translationTable); |
5031 | 69 | return 1; |
5032 | 69 | } else { |
5033 | 15 | _lou_logMessage(LOU_LOG_ERROR, "%d errors found.", errorCount); |
5034 | 15 | if (translationTable) { |
5035 | 15 | if (*translationTable) freeTranslationTable(*translationTable); |
5036 | 15 | *translationTable = NULL; |
5037 | 15 | } |
5038 | 15 | if (displayTable) { |
5039 | 15 | if (*displayTable) freeDisplayTable(*displayTable); |
5040 | 15 | *displayTable = NULL; |
5041 | 15 | } |
5042 | 15 | return 0; |
5043 | 15 | } |
5044 | 84 | } |
5045 | | |
5046 | | /* Return the emphasis classes declared in tableList. */ |
5047 | | char const **EXPORT_CALL |
5048 | 0 | lou_getEmphClasses(const char *tableList) { |
5049 | 0 | const char *names[MAX_EMPH_CLASSES + 1]; |
5050 | 0 | unsigned int count = 0; |
5051 | 0 | const TranslationTableHeader *table = _lou_getTranslationTable(tableList); |
5052 | 0 | if (!table) return NULL; |
5053 | | |
5054 | 0 | while (count < MAX_EMPH_CLASSES) { |
5055 | 0 | char const *name = table->emphClassNames[count]; |
5056 | 0 | if (!name) break; |
5057 | 0 | names[count++] = name; |
5058 | 0 | } |
5059 | 0 | names[count++] = NULL; |
5060 | |
|
5061 | 0 | { |
5062 | 0 | unsigned int size = count * sizeof(names[0]); |
5063 | 0 | char const **result = malloc(size); |
5064 | 0 | if (!result) return NULL; |
5065 | | /* The void* cast is necessary to stop MSVC from warning about |
5066 | | * different 'const' qualifiers (C4090). */ |
5067 | 0 | memcpy((void *)result, names, size); |
5068 | 0 | return result; |
5069 | 0 | } |
5070 | 0 | } |
5071 | | |
5072 | | void EXPORT_CALL |
5073 | 0 | lou_freeEmphClasses(char const **classes) { |
5074 | 0 | free(classes); |
5075 | 0 | } |
5076 | | |
5077 | | void |
5078 | | getTable(const char *tableList, const char *displayTableList, |
5079 | | TranslationTableHeader **translationTable, DisplayTableHeader **displayTable); |
5080 | | |
5081 | | void EXPORT_CALL |
5082 | | _lou_getTable(const char *tableList, const char *displayTableList, |
5083 | | const TranslationTableHeader **translationTable, |
5084 | 143 | const DisplayTableHeader **displayTable) { |
5085 | 143 | TranslationTableHeader *newTable = NULL; |
5086 | 143 | DisplayTableHeader *newDisplayTable = NULL; |
5087 | 143 | getTable(tableList, displayTableList, &newTable, &newDisplayTable); |
5088 | 143 | if (newTable) |
5089 | 128 | if (!finalizeTable(newTable)) newTable = NULL; |
5090 | 143 | *translationTable = newTable; |
5091 | 143 | *displayTable = newDisplayTable; |
5092 | 143 | } |
5093 | | |
5094 | | /* Checks and loads tableList. */ |
5095 | | const void *EXPORT_CALL |
5096 | 84 | lou_getTable(const char *tableList) { |
5097 | 84 | const TranslationTableHeader *table = NULL; |
5098 | 84 | const DisplayTableHeader *displayTable = NULL; |
5099 | 84 | _lou_getTable(tableList, tableList, &table, &displayTable); |
5100 | 84 | if (!table || !displayTable) return NULL; |
5101 | 69 | return table; |
5102 | 84 | } |
5103 | | |
5104 | | const TranslationTableHeader *EXPORT_CALL |
5105 | 0 | _lou_getTranslationTable(const char *tableList) { |
5106 | 0 | TranslationTableHeader *table = NULL; |
5107 | 0 | getTable(tableList, NULL, &table, NULL); |
5108 | 0 | if (table) |
5109 | 0 | if (!finalizeTable(table)) table = NULL; |
5110 | 0 | return table; |
5111 | 0 | } |
5112 | | |
5113 | | const DisplayTableHeader *EXPORT_CALL |
5114 | 0 | _lou_getDisplayTable(const char *tableList) { |
5115 | 0 | DisplayTableHeader *table = NULL; |
5116 | 0 | getTable(NULL, tableList, NULL, &table); |
5117 | 0 | return table; |
5118 | 0 | } |
5119 | | |
5120 | | void |
5121 | | getTable(const char *translationTableList, const char *displayTableList, |
5122 | 143 | TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) { |
5123 | | /* Keep track of which tables have already been compiled */ |
5124 | 143 | int translationTableListLen, displayTableListLen = 0; |
5125 | 143 | if (translationTableList == NULL || *translationTableList == 0) |
5126 | 0 | translationTable = NULL; |
5127 | 143 | if (displayTableList == NULL || *displayTableList == 0) displayTable = NULL; |
5128 | | /* See if translation table has already been compiled */ |
5129 | 143 | if (translationTable) { |
5130 | 143 | translationTableListLen = (int)strlen(translationTableList); |
5131 | 143 | *translationTable = NULL; |
5132 | 143 | TranslationTableChainEntry *currentEntry = translationTableChain; |
5133 | 143 | TranslationTableChainEntry *prevEntry = NULL; |
5134 | 143 | while (currentEntry != NULL) { |
5135 | 59 | if (translationTableListLen == currentEntry->tableListLength && |
5136 | 59 | (memcmp(¤tEntry->tableList[0], translationTableList, |
5137 | 59 | translationTableListLen)) == 0) { |
5138 | | /* Move the table to the top of the table chain. */ |
5139 | 59 | if (prevEntry != NULL) { |
5140 | 0 | prevEntry->next = currentEntry->next; |
5141 | 0 | currentEntry->next = translationTableChain; |
5142 | 0 | translationTableChain = currentEntry; |
5143 | 0 | } |
5144 | 59 | *translationTable = currentEntry->table; |
5145 | 59 | break; |
5146 | 59 | } |
5147 | 0 | prevEntry = currentEntry; |
5148 | 0 | currentEntry = currentEntry->next; |
5149 | 0 | } |
5150 | 143 | } |
5151 | | /* See if display table has already been compiled */ |
5152 | 143 | if (displayTable) { |
5153 | 143 | displayTableListLen = (int)strlen(displayTableList); |
5154 | 143 | *displayTable = NULL; |
5155 | 143 | DisplayTableChainEntry *currentEntry = displayTableChain; |
5156 | 143 | DisplayTableChainEntry *prevEntry = NULL; |
5157 | 143 | while (currentEntry != NULL) { |
5158 | 59 | if (displayTableListLen == currentEntry->tableListLength && |
5159 | 59 | (memcmp(¤tEntry->tableList[0], displayTableList, |
5160 | 59 | displayTableListLen)) == 0) { |
5161 | | /* Move the table to the top of the table chain. */ |
5162 | 59 | if (prevEntry != NULL) { |
5163 | 0 | prevEntry->next = currentEntry->next; |
5164 | 0 | currentEntry->next = displayTableChain; |
5165 | 0 | displayTableChain = currentEntry; |
5166 | 0 | } |
5167 | 59 | *displayTable = currentEntry->table; |
5168 | 59 | break; |
5169 | 59 | } |
5170 | 0 | prevEntry = currentEntry; |
5171 | 0 | currentEntry = currentEntry->next; |
5172 | 0 | } |
5173 | 143 | } |
5174 | 143 | if ((translationTable && *translationTable == NULL) || |
5175 | 84 | (displayTable && *displayTable == NULL)) { |
5176 | 84 | TranslationTableHeader *newTranslationTable = NULL; |
5177 | 84 | DisplayTableHeader *newDisplayTable = NULL; |
5178 | 84 | if (compileTable(translationTableList, displayTableList, |
5179 | 84 | (translationTable && *translationTable == NULL) ? &newTranslationTable |
5180 | 84 | : NULL, |
5181 | 84 | (displayTable && *displayTable == NULL) ? &newDisplayTable : NULL)) { |
5182 | | /* Add a new entry to the top of the table chain. */ |
5183 | 69 | if (newTranslationTable != NULL) { |
5184 | 69 | int entrySize = |
5185 | 69 | sizeof(TranslationTableChainEntry) + translationTableListLen; |
5186 | 69 | TranslationTableChainEntry *newEntry = malloc(entrySize); |
5187 | 69 | if (!newEntry) _lou_outOfMemory(); |
5188 | 69 | newEntry->next = translationTableChain; |
5189 | 69 | newEntry->table = newTranslationTable; |
5190 | 69 | newEntry->tableListLength = translationTableListLen; |
5191 | 69 | memcpy(&newEntry->tableList[0], translationTableList, |
5192 | 69 | translationTableListLen); |
5193 | 69 | translationTableChain = newEntry; |
5194 | 69 | *translationTable = newTranslationTable; |
5195 | 69 | } |
5196 | 69 | if (newDisplayTable != NULL) { |
5197 | 69 | int entrySize = sizeof(DisplayTableChainEntry) + displayTableListLen; |
5198 | 69 | DisplayTableChainEntry *newEntry = malloc(entrySize); |
5199 | 69 | if (!newEntry) _lou_outOfMemory(); |
5200 | 69 | newEntry->next = displayTableChain; |
5201 | 69 | newEntry->table = newDisplayTable; |
5202 | 69 | newEntry->tableListLength = displayTableListLen; |
5203 | 69 | memcpy(&newEntry->tableList[0], displayTableList, displayTableListLen); |
5204 | 69 | displayTableChain = newEntry; |
5205 | 69 | *displayTable = newDisplayTable; |
5206 | 69 | } |
5207 | 69 | } else { |
5208 | 15 | _lou_logMessage( |
5209 | 15 | LOU_LOG_ERROR, "%s could not be compiled", translationTableList); |
5210 | 15 | return; |
5211 | 15 | } |
5212 | 84 | } |
5213 | 143 | } |
5214 | | |
5215 | | int EXPORT_CALL |
5216 | 84 | lou_checkTable(const char *tableList) { |
5217 | 84 | if (lou_getTable(tableList)) return 1; |
5218 | 15 | return 0; |
5219 | 84 | } |
5220 | | |
5221 | | formtype EXPORT_CALL |
5222 | 0 | lou_getTypeformForEmphClass(const char *tableList, const char *emphClass) { |
5223 | 0 | const TranslationTableHeader *table = _lou_getTranslationTable(tableList); |
5224 | 0 | if (!table) return 0; |
5225 | 0 | for (int i = 0; i < MAX_EMPH_CLASSES && table->emphClassNames[i]; i++) |
5226 | 0 | if (strcmp(emphClass, table->emphClassNames[i]) == 0) return italic << i; |
5227 | 0 | return 0; |
5228 | 0 | } |
5229 | | |
5230 | | static unsigned char *destSpacing = NULL; |
5231 | | static int sizeDestSpacing = 0; |
5232 | | static formtype *typebuf = NULL; |
5233 | | static unsigned int *wordBuffer = NULL; |
5234 | | static EmphasisInfo *emphasisBuffer = NULL; |
5235 | | static int sizeTypebuf = 0; |
5236 | | static widechar *passbuf[MAXPASSBUF] = { NULL }; |
5237 | | static int sizePassbuf[MAXPASSBUF] = { 0 }; |
5238 | | static int *posMapping1 = NULL; |
5239 | | static int sizePosMapping1 = 0; |
5240 | | static int *posMapping2 = NULL; |
5241 | | static int sizePosMapping2 = 0; |
5242 | | static int *posMapping3 = NULL; |
5243 | | static int sizePosMapping3 = 0; |
5244 | | void *EXPORT_CALL |
5245 | 434 | _lou_allocMem(AllocBuf buffer, int index, int srcmax, int destmax) { |
5246 | 434 | if (srcmax < 1024) srcmax = 1024; |
5247 | 434 | if (destmax < 1024) destmax = 1024; |
5248 | 434 | switch (buffer) { |
5249 | 59 | case alloc_typebuf: |
5250 | 59 | if (destmax > sizeTypebuf) { |
5251 | 59 | if (typebuf != NULL) free(typebuf); |
5252 | | // TODO: should this be srcmax? |
5253 | 59 | typebuf = malloc((destmax + 4) * sizeof(formtype)); |
5254 | 59 | if (!typebuf) _lou_outOfMemory(); |
5255 | 59 | sizeTypebuf = destmax; |
5256 | 59 | } |
5257 | 59 | return typebuf; |
5258 | | |
5259 | 59 | case alloc_wordBuffer: |
5260 | | |
5261 | 59 | if (wordBuffer != NULL) free(wordBuffer); |
5262 | 59 | wordBuffer = calloc(srcmax + 4, sizeof(unsigned int)); |
5263 | 59 | if (wordBuffer == NULL) _lou_outOfMemory(); |
5264 | 59 | return wordBuffer; |
5265 | | |
5266 | 59 | case alloc_emphasisBuffer: |
5267 | | |
5268 | 59 | if (emphasisBuffer != NULL) free(emphasisBuffer); |
5269 | 59 | emphasisBuffer = calloc(srcmax + 4, sizeof(EmphasisInfo)); |
5270 | 59 | if (emphasisBuffer == NULL) _lou_outOfMemory(); |
5271 | 59 | return emphasisBuffer; |
5272 | | |
5273 | 0 | case alloc_destSpacing: |
5274 | 0 | if (destmax > sizeDestSpacing) { |
5275 | 0 | if (destSpacing != NULL) free(destSpacing); |
5276 | 0 | destSpacing = malloc(destmax + 4); |
5277 | 0 | if (!destSpacing) _lou_outOfMemory(); |
5278 | 0 | sizeDestSpacing = destmax; |
5279 | 0 | } |
5280 | 0 | return destSpacing; |
5281 | 112 | case alloc_passbuf: |
5282 | 112 | if (index < 0 || index >= MAXPASSBUF) { |
5283 | 0 | _lou_logMessage(LOU_LOG_FATAL, "Index out of bounds: %d\n", index); |
5284 | 0 | exit(3); |
5285 | 0 | } |
5286 | 112 | if (destmax > sizePassbuf[index]) { |
5287 | 103 | if (passbuf[index] != NULL) free(passbuf[index]); |
5288 | 103 | passbuf[index] = malloc((destmax + 4) * CHARSIZE); |
5289 | 103 | if (!passbuf[index]) _lou_outOfMemory(); |
5290 | 103 | sizePassbuf[index] = destmax; |
5291 | 103 | } |
5292 | 112 | return passbuf[index]; |
5293 | 59 | case alloc_posMapping1: { |
5294 | 59 | int mapSize; |
5295 | 59 | if (srcmax >= destmax) |
5296 | 7 | mapSize = srcmax; |
5297 | 52 | else |
5298 | 52 | mapSize = destmax; |
5299 | 59 | if (mapSize > sizePosMapping1) { |
5300 | 59 | if (posMapping1 != NULL) free(posMapping1); |
5301 | 59 | posMapping1 = malloc((mapSize + 4) * sizeof(int)); |
5302 | 59 | if (!posMapping1) _lou_outOfMemory(); |
5303 | 59 | sizePosMapping1 = mapSize; |
5304 | 59 | } |
5305 | 59 | } |
5306 | 59 | return posMapping1; |
5307 | 43 | case alloc_posMapping2: { |
5308 | 43 | int mapSize; |
5309 | 43 | if (srcmax >= destmax) |
5310 | 3 | mapSize = srcmax; |
5311 | 40 | else |
5312 | 40 | mapSize = destmax; |
5313 | 43 | if (mapSize > sizePosMapping2) { |
5314 | 43 | if (posMapping2 != NULL) free(posMapping2); |
5315 | 43 | posMapping2 = malloc((mapSize + 4) * sizeof(int)); |
5316 | 43 | if (!posMapping2) _lou_outOfMemory(); |
5317 | 43 | sizePosMapping2 = mapSize; |
5318 | 43 | } |
5319 | 43 | } |
5320 | 43 | return posMapping2; |
5321 | 43 | case alloc_posMapping3: { |
5322 | 43 | int mapSize; |
5323 | 43 | if (srcmax >= destmax) |
5324 | 3 | mapSize = srcmax; |
5325 | 40 | else |
5326 | 40 | mapSize = destmax; |
5327 | 43 | if (mapSize > sizePosMapping3) { |
5328 | 43 | if (posMapping3 != NULL) free(posMapping3); |
5329 | 43 | posMapping3 = malloc((mapSize + 4) * sizeof(int)); |
5330 | 43 | if (!posMapping3) _lou_outOfMemory(); |
5331 | 43 | sizePosMapping3 = mapSize; |
5332 | 43 | } |
5333 | 43 | } |
5334 | 43 | return posMapping3; |
5335 | 0 | default: |
5336 | 0 | return NULL; |
5337 | 434 | } |
5338 | 434 | } |
5339 | | |
5340 | | void EXPORT_CALL |
5341 | 84 | lou_free(void) { |
5342 | 84 | lou_logEnd(); |
5343 | 84 | if (translationTableChain != NULL) { |
5344 | 69 | TranslationTableChainEntry *currentEntry = translationTableChain; |
5345 | 69 | TranslationTableChainEntry *previousEntry; |
5346 | 138 | while (currentEntry) { |
5347 | 69 | freeTranslationTable(currentEntry->table); |
5348 | 69 | previousEntry = currentEntry; |
5349 | 69 | currentEntry = currentEntry->next; |
5350 | 69 | free(previousEntry); |
5351 | 69 | } |
5352 | 69 | translationTableChain = NULL; |
5353 | 69 | } |
5354 | 84 | if (displayTableChain != NULL) { |
5355 | 69 | DisplayTableChainEntry *currentEntry = displayTableChain; |
5356 | 69 | DisplayTableChainEntry *previousEntry; |
5357 | 138 | while (currentEntry) { |
5358 | 69 | freeDisplayTable(currentEntry->table); |
5359 | 69 | previousEntry = currentEntry; |
5360 | 69 | currentEntry = currentEntry->next; |
5361 | 69 | free(previousEntry); |
5362 | 69 | } |
5363 | 69 | displayTableChain = NULL; |
5364 | 69 | } |
5365 | 84 | if (typebuf != NULL) free(typebuf); |
5366 | 84 | typebuf = NULL; |
5367 | 84 | if (wordBuffer != NULL) free(wordBuffer); |
5368 | 84 | wordBuffer = NULL; |
5369 | 84 | if (emphasisBuffer != NULL) free(emphasisBuffer); |
5370 | 84 | emphasisBuffer = NULL; |
5371 | 84 | sizeTypebuf = 0; |
5372 | 84 | if (destSpacing != NULL) free(destSpacing); |
5373 | 84 | destSpacing = NULL; |
5374 | 84 | sizeDestSpacing = 0; |
5375 | 84 | { |
5376 | 84 | int k; |
5377 | 336 | for (k = 0; k < MAXPASSBUF; k++) { |
5378 | 252 | if (passbuf[k] != NULL) free(passbuf[k]); |
5379 | 252 | passbuf[k] = NULL; |
5380 | 252 | sizePassbuf[k] = 0; |
5381 | 252 | } |
5382 | 84 | } |
5383 | 84 | if (posMapping1 != NULL) free(posMapping1); |
5384 | 84 | posMapping1 = NULL; |
5385 | 84 | sizePosMapping1 = 0; |
5386 | 84 | if (posMapping2 != NULL) free(posMapping2); |
5387 | 84 | posMapping2 = NULL; |
5388 | 84 | sizePosMapping2 = 0; |
5389 | 84 | if (posMapping3 != NULL) free(posMapping3); |
5390 | 84 | posMapping3 = NULL; |
5391 | 84 | sizePosMapping3 = 0; |
5392 | 84 | opcodeLengths[0] = 0; |
5393 | 84 | _lou_freeTableIndex(); |
5394 | 84 | } |
5395 | | |
5396 | | const char *EXPORT_CALL |
5397 | 0 | lou_version(void) { |
5398 | 0 | static const char *version = PACKAGE_VERSION; |
5399 | 0 | return version; |
5400 | 0 | } |
5401 | | |
5402 | | int EXPORT_CALL |
5403 | 0 | lou_charSize(void) { |
5404 | 0 | return CHARSIZE; |
5405 | 0 | } |
5406 | | |
5407 | | int EXPORT_CALL |
5408 | 0 | lou_compileString(const char *tableList, const char *inString) { |
5409 | 0 | TranslationTableHeader *table; |
5410 | 0 | DisplayTableHeader *displayTable; |
5411 | 0 | getTable(tableList, tableList, &table, &displayTable); |
5412 | 0 | if (!table) return 0; |
5413 | 0 | if (!compileString(inString, &table, &displayTable)) return 0; |
5414 | 0 | return 1; |
5415 | 0 | } |
5416 | | |
5417 | | int EXPORT_CALL |
5418 | 0 | _lou_compileTranslationRule(const char *tableList, const char *inString) { |
5419 | 0 | TranslationTableHeader *table; |
5420 | 0 | getTable(tableList, NULL, &table, NULL); |
5421 | 0 | return compileString(inString, &table, NULL); |
5422 | 0 | } |
5423 | | |
5424 | | int EXPORT_CALL |
5425 | 0 | _lou_compileDisplayRule(const char *tableList, const char *inString) { |
5426 | 0 | DisplayTableHeader *table; |
5427 | 0 | getTable(NULL, tableList, NULL, &table); |
5428 | | return compileString(inString, NULL, &table); |
5429 | 0 | } |
5430 | | |
5431 | | /** |
5432 | | * This procedure provides a target for cals that serve as breakpoints |
5433 | | * for gdb. |
5434 | | */ |
5435 | | // char *EXPORT_CALL |
5436 | | // lou_getTablePaths (void) |
5437 | | // { |
5438 | | // static char paths[MAXSTRING]; |
5439 | | // static char scratchBuf[MAXSTRING]; |
5440 | | // char *pathList; |
5441 | | // strcpy (paths, tablePath); |
5442 | | // strcat (paths, ","); |
5443 | | // pathList = getenv ("LOUIS_TABLEPATH"); |
5444 | | // if (pathList) |
5445 | | // { |
5446 | | // strcat (paths, pathList); |
5447 | | // strcat (paths, ","); |
5448 | | // } |
5449 | | // pathList = getcwd (scratchBuf, MAXSTRING); |
5450 | | // if (pathList) |
5451 | | // { |
5452 | | // strcat (paths, pathList); |
5453 | | // strcat (paths, ","); |
5454 | | // } |
5455 | | // pathList = lou_getDataPath (); |
5456 | | // if (pathList) |
5457 | | // { |
5458 | | // strcat (paths, pathList); |
5459 | | // strcat (paths, ","); |
5460 | | // } |
5461 | | // #ifdef _WIN32 |
5462 | | // strcpy (paths, lou_getProgramPath ()); |
5463 | | // strcat (paths, "\\share\\liblouss\\tables\\"); |
5464 | | // #else |
5465 | | // strcpy (paths, TABLESDIR); |
5466 | | // #endif |
5467 | | // return paths; |
5468 | | // } |