/src/liblouis/liblouis/lou_translateString.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* liblouis Braille Translation and Back-Translation Library |
2 | | |
3 | | Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The |
4 | | BRLTTY Team |
5 | | |
6 | | Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com |
7 | | Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com |
8 | | Copyright (C) 2016 Mike Gray, American Printing House for the Blind |
9 | | Copyright (C) 2016 Davy Kager, Dedicon |
10 | | |
11 | | This file is part of liblouis. |
12 | | |
13 | | liblouis is free software: you can redistribute it and/or modify it |
14 | | under the terms of the GNU Lesser General Public License as published |
15 | | by the Free Software Foundation, either version 2.1 of the License, or |
16 | | (at your option) any later version. |
17 | | |
18 | | liblouis is distributed in the hope that it will be useful, but |
19 | | WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
21 | | Lesser General Public License for more details. |
22 | | |
23 | | You should have received a copy of the GNU Lesser General Public |
24 | | License along with liblouis. If not, see <http://www.gnu.org/licenses/>. |
25 | | */ |
26 | | |
27 | | /** |
28 | | * @file |
29 | | * @brief Translate to braille |
30 | | */ |
31 | | |
32 | | #include <config.h> |
33 | | |
34 | | #include <stdio.h> |
35 | | #include <stdlib.h> |
36 | | #include <string.h> |
37 | | |
38 | | #include "internal.h" |
39 | | |
40 | | /* additional bits in typebuf */ |
41 | 0 | #define SYLLABLE_MARKER_1 0x2000 |
42 | 0 | #define SYLLABLE_MARKER_2 0x4000 |
43 | 0 | #define CAPSEMPH 0x8000 |
44 | | |
45 | 0 | #define EMPHASIS 0x3fff // all typeform bits that can be used |
46 | | |
47 | | /* bits for wordBuffer */ |
48 | 0 | #define WORD_CHAR 0x00000001 |
49 | 0 | #define WORD_RESET 0x00000002 |
50 | 0 | #define WORD_WHOLE 0x00000004 |
51 | | |
52 | | typedef struct { |
53 | | int size; |
54 | | widechar **buffers; |
55 | | int *inUse; |
56 | | widechar *(*alloc)(int index, int length); |
57 | | void (*free)(widechar *); |
58 | | } StringBufferPool; |
59 | | |
60 | | static widechar * |
61 | 0 | allocStringBuffer(int index, int length) { |
62 | 0 | return _lou_allocMem(alloc_passbuf, index, 0, length); |
63 | 0 | } |
64 | | |
65 | | static const StringBufferPool *stringBufferPool = NULL; |
66 | | |
67 | | static void |
68 | 0 | initStringBufferPool() { |
69 | 0 | static widechar *stringBuffers[MAXPASSBUF] = { NULL }; |
70 | 0 | static int stringBuffersInUse[MAXPASSBUF] = { 0 }; |
71 | 0 | StringBufferPool *pool = malloc(sizeof(StringBufferPool)); |
72 | 0 | pool->size = MAXPASSBUF; |
73 | 0 | pool->buffers = stringBuffers; |
74 | 0 | pool->inUse = stringBuffersInUse; |
75 | 0 | pool->alloc = &allocStringBuffer; |
76 | 0 | pool->free = NULL; |
77 | 0 | stringBufferPool = pool; |
78 | 0 | } |
79 | | |
80 | | static int |
81 | 0 | getStringBuffer(int length) { |
82 | 0 | int i; |
83 | 0 | for (i = 0; i < stringBufferPool->size; i++) { |
84 | 0 | if (!stringBufferPool->inUse[i]) { |
85 | 0 | stringBufferPool->buffers[i] = stringBufferPool->alloc(i, length); |
86 | 0 | stringBufferPool->inUse[i] = 1; |
87 | 0 | return i; |
88 | 0 | } |
89 | 0 | } |
90 | 0 | _lou_outOfMemory(); |
91 | 0 | return -1; |
92 | 0 | } |
93 | | |
94 | | static int |
95 | 0 | releaseStringBuffer(int idx) { |
96 | 0 | if (idx >= 0 && idx < stringBufferPool->size) { |
97 | 0 | int inUse = stringBufferPool->inUse[idx]; |
98 | 0 | if (inUse && stringBufferPool->free) |
99 | 0 | stringBufferPool->free(stringBufferPool->buffers[idx]); |
100 | 0 | stringBufferPool->inUse[idx] = 0; |
101 | 0 | return inUse; |
102 | 0 | } |
103 | 0 | return 0; |
104 | 0 | } |
105 | | |
106 | | typedef struct { |
107 | | int bufferIndex; |
108 | | const widechar *chars; |
109 | | int length; |
110 | | } InString; |
111 | | |
112 | | typedef struct { |
113 | | int bufferIndex; |
114 | | widechar *chars; |
115 | | int maxlength; |
116 | | int length; |
117 | | } OutString; |
118 | | |
119 | | typedef struct { |
120 | | int startMatch; |
121 | | int startReplace; |
122 | | int endReplace; |
123 | | int endMatch; |
124 | | } PassRuleMatch; |
125 | | |
126 | | static int |
127 | | putCharacter(widechar c, const TranslationTableHeader *table, int pos, |
128 | | const InString *input, OutString *output, int *posMapping, int *cursorPosition, |
129 | | int *cursorStatus, int mode); |
130 | | static int |
131 | | passDoTest(const TranslationTableHeader *table, int pos, const InString *input, |
132 | | int transOpcode, const TranslationTableRule *transRule, int *passCharDots, |
133 | | const widechar **passInstructions, int *passIC, PassRuleMatch *match, |
134 | | TranslationTableRule **groupingRule, widechar *groupingOp); |
135 | | static int |
136 | | passDoAction(const TranslationTableHeader *table, const InString **input, |
137 | | OutString *output, int *posMapping, int transOpcode, |
138 | | const TranslationTableRule **transRule, int passCharDots, |
139 | | const widechar *passInstructions, int passIC, int *pos, PassRuleMatch match, |
140 | | int *cursorPosition, int *cursorStatus, TranslationTableRule *groupingRule, |
141 | | widechar groupingOp, int mode); |
142 | | |
143 | | static const TranslationTableRule **appliedRules; |
144 | | static int maxAppliedRules; |
145 | | static int appliedRulesCount; |
146 | | |
147 | | static TranslationTableCharacter * |
148 | 0 | getChar(widechar c, const TranslationTableHeader *table) { |
149 | 0 | static TranslationTableCharacter notFound = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32, |
150 | 0 | 0, 0 }; |
151 | 0 | const TranslationTableOffset bucket = table->characters[_lou_charHash(c)]; |
152 | 0 | TranslationTableOffset offset = bucket; |
153 | 0 | while (offset) { |
154 | 0 | TranslationTableCharacter *character = |
155 | 0 | (TranslationTableCharacter *)&table->ruleArea[offset]; |
156 | 0 | if (character->value == c) return character; |
157 | 0 | offset = character->next; |
158 | 0 | } |
159 | 0 | notFound.value = c; |
160 | 0 | return ¬Found; |
161 | 0 | } |
162 | | |
163 | | static TranslationTableCharacter * |
164 | 0 | getDots(widechar c, const TranslationTableHeader *table) { |
165 | 0 | static TranslationTableCharacter notFound = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, |
166 | 0 | LOU_DOTS, 0, 0 }; |
167 | 0 | const TranslationTableOffset bucket = table->dots[_lou_charHash(c)]; |
168 | 0 | TranslationTableOffset offset = bucket; |
169 | 0 | while (offset) { |
170 | 0 | TranslationTableCharacter *character = |
171 | 0 | (TranslationTableCharacter *)&table->ruleArea[offset]; |
172 | 0 | if (character->value == c) return character; |
173 | 0 | offset = character->next; |
174 | 0 | } |
175 | 0 | notFound.value = c; |
176 | 0 | return ¬Found; |
177 | 0 | } |
178 | | |
179 | | static int |
180 | | checkCharAttr(const widechar c, const TranslationTableCharacterAttributes a, |
181 | 0 | const TranslationTableHeader *table) { |
182 | 0 | return (((getChar(c, table))->attributes & a) ? 1 : 0); |
183 | 0 | } |
184 | | |
185 | | static int |
186 | | checkDotsAttr(const widechar c, const TranslationTableCharacterAttributes a, |
187 | 0 | const TranslationTableHeader *table) { |
188 | 0 | return (((getDots(c, table))->attributes & a) ? 1 : 0); |
189 | 0 | } |
190 | | |
191 | | static int |
192 | | checkCharAttr_safe(const InString *input, int pos, |
193 | | const TranslationTableCharacterAttributes a, |
194 | 0 | const TranslationTableHeader *table) { |
195 | 0 | return ((pos < input->length) ? checkCharAttr(input->chars[pos], a, table) : 0); |
196 | 0 | } |
197 | | |
198 | | static int |
199 | | findForPassRule(const TranslationTableHeader *table, int pos, int currentPass, |
200 | | const InString *input, int *transOpcode, const TranslationTableRule **transRule, |
201 | | int *transCharslen, int *passCharDots, widechar const **passInstructions, |
202 | | int *passIC, PassRuleMatch *match, TranslationTableRule **groupingRule, |
203 | 0 | widechar *groupingOp) { |
204 | 0 | int save_transCharslen = *transCharslen; |
205 | 0 | const TranslationTableRule *save_transRule = *transRule; |
206 | 0 | TranslationTableOpcode save_transOpcode = *transOpcode; |
207 | 0 | TranslationTableOffset ruleOffset; |
208 | 0 | ruleOffset = table->forPassRules[currentPass]; |
209 | 0 | *transCharslen = 0; |
210 | 0 | while (ruleOffset) { |
211 | 0 | *transRule = (TranslationTableRule *)&table->ruleArea[ruleOffset]; |
212 | 0 | *transOpcode = (*transRule)->opcode; |
213 | 0 | if (passDoTest(table, pos, input, *transOpcode, *transRule, passCharDots, |
214 | 0 | passInstructions, passIC, match, groupingRule, groupingOp)) |
215 | 0 | return 1; |
216 | 0 | ruleOffset = (*transRule)->charsnext; |
217 | 0 | } |
218 | 0 | *transCharslen = save_transCharslen; |
219 | 0 | *transRule = save_transRule; |
220 | 0 | *transOpcode = save_transOpcode; |
221 | 0 | return 0; |
222 | 0 | } |
223 | | |
224 | | static widechar |
225 | | toLowercase( |
226 | 0 | const TranslationTableHeader *table, const TranslationTableCharacter *character) { |
227 | 0 | if (character->mode & CTC_UpperCase) { |
228 | 0 | const TranslationTableCharacter *c = character; |
229 | 0 | if (c->basechar) c = (TranslationTableCharacter *)&table->ruleArea[c->basechar]; |
230 | 0 | while (1) { |
231 | 0 | if ((c->mode & (character->mode & ~CTC_UpperCase)) == |
232 | 0 | (character->mode & ~CTC_UpperCase)) |
233 | 0 | return c->value; |
234 | 0 | if (!c->linked) break; |
235 | 0 | c = (TranslationTableCharacter *)&table->ruleArea[c->linked]; |
236 | 0 | } |
237 | 0 | } |
238 | 0 | return character->value; |
239 | 0 | } |
240 | | |
241 | | static int |
242 | | compareChars(const widechar *address1, const widechar *address2, int count, |
243 | 0 | const TranslationTableHeader *table) { |
244 | 0 | int k; |
245 | 0 | if (!count) return 0; |
246 | 0 | for (k = 0; k < count; k++) |
247 | 0 | if (toLowercase(table, getChar(address1[k], table)) != |
248 | 0 | toLowercase(table, getChar(address2[k], table))) |
249 | 0 | return 0; |
250 | 0 | return 1; |
251 | 0 | } |
252 | | |
253 | | static int |
254 | | makeCorrections(const TranslationTableHeader *table, const InString *input, |
255 | | OutString *output, int *posMapping, formtype *typebuf, int *realInlen, |
256 | 0 | int *cursorPosition, int *cursorStatus, int mode) { |
257 | 0 | int pos; |
258 | 0 | int transOpcode; |
259 | 0 | const TranslationTableRule *transRule; |
260 | 0 | int transCharslen; |
261 | 0 | int passCharDots; |
262 | 0 | const widechar *passInstructions; |
263 | 0 | int passIC; /* Instruction counter */ |
264 | 0 | PassRuleMatch patternMatch; |
265 | 0 | TranslationTableRule *groupingRule; |
266 | 0 | widechar groupingOp; |
267 | 0 | const InString *origInput = input; |
268 | 0 | if (!table->corrections) return 1; |
269 | 0 | pos = 0; |
270 | 0 | output->length = 0; |
271 | 0 | int posIncremented = 1; |
272 | 0 | _lou_resetPassVariables(); |
273 | 0 | while (pos < input->length) { |
274 | 0 | int length = input->length - pos; |
275 | 0 | int tryThis = 0; |
276 | | // check posIncremented to avoid endless loop |
277 | 0 | if (!(posIncremented && |
278 | 0 | findForPassRule(table, pos, 0, input, &transOpcode, &transRule, |
279 | 0 | &transCharslen, &passCharDots, &passInstructions, &passIC, |
280 | 0 | &patternMatch, &groupingRule, &groupingOp))) |
281 | 0 | while (tryThis < 3) { |
282 | 0 | TranslationTableOffset ruleOffset = 0; |
283 | 0 | switch (tryThis) { |
284 | 0 | case 0: |
285 | 0 | if (!(length >= 2)) break; |
286 | 0 | ruleOffset = table->forRules[_lou_stringHash( |
287 | 0 | &input->chars[pos], 1, table)]; |
288 | 0 | break; |
289 | 0 | case 1: |
290 | 0 | if (!(length >= 1)) break; |
291 | 0 | length = 1; |
292 | 0 | ruleOffset = getChar(input->chars[pos], table)->otherRules; |
293 | 0 | break; |
294 | 0 | case 2: /* No rule found */ |
295 | 0 | transOpcode = CTO_Always; |
296 | 0 | ruleOffset = 0; |
297 | 0 | break; |
298 | 0 | } |
299 | 0 | while (ruleOffset) { |
300 | 0 | transRule = (TranslationTableRule *)&table->ruleArea[ruleOffset]; |
301 | 0 | transOpcode = transRule->opcode; |
302 | 0 | transCharslen = transRule->charslen; |
303 | 0 | if (tryThis == 1 || |
304 | 0 | (transCharslen <= length && |
305 | 0 | compareChars(&transRule->charsdots[0], |
306 | 0 | &input->chars[pos], transCharslen, table))) { |
307 | 0 | if (posIncremented && transOpcode == CTO_Correct && |
308 | 0 | passDoTest(table, pos, input, transOpcode, transRule, |
309 | 0 | &passCharDots, &passInstructions, &passIC, |
310 | 0 | &patternMatch, &groupingRule, &groupingOp)) { |
311 | 0 | tryThis = 4; |
312 | 0 | break; |
313 | 0 | } |
314 | 0 | } |
315 | 0 | ruleOffset = transRule->charsnext; |
316 | 0 | } |
317 | 0 | tryThis++; |
318 | 0 | } |
319 | 0 | posIncremented = 1; |
320 | |
|
321 | 0 | switch (transOpcode) { |
322 | 0 | case CTO_Always: |
323 | 0 | if (output->length >= output->maxlength) goto failure; |
324 | 0 | posMapping[output->length] = pos; |
325 | 0 | output->chars[output->length++] = input->chars[pos++]; |
326 | 0 | break; |
327 | 0 | case CTO_Correct: { |
328 | 0 | const InString *inputBefore = input; |
329 | 0 | int posBefore = pos; |
330 | 0 | if (appliedRules != NULL && appliedRulesCount < maxAppliedRules) |
331 | 0 | appliedRules[appliedRulesCount++] = transRule; |
332 | 0 | if (!passDoAction(table, &input, output, posMapping, transOpcode, &transRule, |
333 | 0 | passCharDots, passInstructions, passIC, &pos, patternMatch, |
334 | 0 | cursorPosition, cursorStatus, groupingRule, groupingOp, mode)) |
335 | 0 | goto failure; |
336 | 0 | if (input->bufferIndex != inputBefore->bufferIndex && |
337 | 0 | inputBefore->bufferIndex != origInput->bufferIndex) |
338 | 0 | releaseStringBuffer(inputBefore->bufferIndex); |
339 | 0 | if (pos == posBefore) posIncremented = 0; |
340 | 0 | break; |
341 | 0 | } |
342 | 0 | default: |
343 | 0 | break; |
344 | 0 | } |
345 | 0 | } |
346 | | |
347 | 0 | { // We have to transform typebuf accordingly |
348 | 0 | int k; |
349 | 0 | formtype *typebuf_temp; |
350 | 0 | if ((typebuf_temp = malloc(output->length * sizeof(formtype))) == NULL) |
351 | 0 | _lou_outOfMemory(); |
352 | 0 | for (k = 0; k < output->length; k++) |
353 | | // posMapping will never be < 0 but in theory it could |
354 | 0 | if (posMapping[k] < 0) |
355 | 0 | typebuf_temp[k] = typebuf[0]; // prepend to next |
356 | 0 | else if (posMapping[k] >= input->length) |
357 | 0 | typebuf_temp[k] = typebuf[input->length - 1]; // append to previous |
358 | 0 | else |
359 | 0 | typebuf_temp[k] = typebuf[posMapping[k]]; |
360 | 0 | memcpy(typebuf, typebuf_temp, output->length * sizeof(formtype)); |
361 | 0 | free(typebuf_temp); |
362 | 0 | } |
363 | |
|
364 | 0 | failure: |
365 | 0 | *realInlen = pos; |
366 | 0 | if (input->bufferIndex != origInput->bufferIndex) |
367 | 0 | releaseStringBuffer(input->bufferIndex); |
368 | 0 | return 1; |
369 | 0 | } |
370 | | |
371 | | static int |
372 | | matchCurrentInput( |
373 | 0 | const InString *input, int pos, const widechar *passInstructions, int passIC) { |
374 | 0 | int k; |
375 | 0 | int kk = pos; |
376 | 0 | for (k = passIC + 2; |
377 | 0 | ((k < passIC + 2 + passInstructions[passIC + 1]) && (kk < input->length)); |
378 | 0 | k++) |
379 | 0 | if (input->chars[kk] == LOU_ENDSEGMENT || |
380 | 0 | passInstructions[k] != input->chars[kk++]) |
381 | 0 | return 0; |
382 | 0 | return 1; |
383 | 0 | } |
384 | | |
385 | | static int |
386 | | swapTest(int swapIC, int *pos, const TranslationTableHeader *table, const InString *input, |
387 | 0 | const widechar *passInstructions) { |
388 | 0 | int p = *pos; |
389 | 0 | TranslationTableOffset swapRuleOffset; |
390 | 0 | TranslationTableRule *swapRule; |
391 | 0 | swapRuleOffset = (passInstructions[swapIC + 1] << 16) | passInstructions[swapIC + 2]; |
392 | 0 | swapRule = (TranslationTableRule *)&table->ruleArea[swapRuleOffset]; |
393 | 0 | while (p - *pos < passInstructions[swapIC + 3]) { |
394 | 0 | int test; |
395 | 0 | if (p >= input->length) return 0; |
396 | 0 | if (swapRule->opcode == CTO_SwapDd) { |
397 | 0 | for (test = 1; test < swapRule->charslen; test += 2) { |
398 | 0 | if (input->chars[p] == swapRule->charsdots[test]) break; |
399 | 0 | } |
400 | 0 | } else { |
401 | 0 | for (test = 0; test < swapRule->charslen; test++) { |
402 | 0 | if (input->chars[p] == swapRule->charsdots[test]) break; |
403 | 0 | } |
404 | 0 | } |
405 | 0 | if (test >= swapRule->charslen) return 0; |
406 | 0 | p++; |
407 | 0 | } |
408 | 0 | if (passInstructions[swapIC + 3] == passInstructions[swapIC + 4]) { |
409 | 0 | *pos = p; |
410 | 0 | return 1; |
411 | 0 | } |
412 | 0 | while (p - *pos < passInstructions[swapIC + 4]) { |
413 | 0 | int test; |
414 | 0 | if (p >= input->length) { |
415 | 0 | *pos = p; |
416 | 0 | return 1; |
417 | 0 | } |
418 | 0 | if (swapRule->opcode == CTO_SwapDd) { |
419 | 0 | for (test = 1; test < swapRule->charslen; test += 2) { |
420 | 0 | if (input->chars[p] == swapRule->charsdots[test]) break; |
421 | 0 | } |
422 | 0 | } else { |
423 | 0 | for (test = 0; test < swapRule->charslen; test++) { |
424 | 0 | if (input->chars[p] == swapRule->charsdots[test]) break; |
425 | 0 | } |
426 | 0 | } |
427 | 0 | if (test >= swapRule->charslen) { |
428 | 0 | *pos = p; |
429 | 0 | return 1; |
430 | 0 | } |
431 | 0 | p++; |
432 | 0 | } |
433 | 0 | *pos = p; |
434 | 0 | return 1; |
435 | 0 | } |
436 | | |
437 | | static int |
438 | | swapReplace(int start, int end, const TranslationTableHeader *table, |
439 | | const InString *input, OutString *output, int *posMapping, |
440 | 0 | const widechar *passInstructions, int passIC) { |
441 | 0 | TranslationTableOffset swapRuleOffset; |
442 | 0 | TranslationTableRule *swapRule; |
443 | 0 | widechar *replacements; |
444 | 0 | int p; |
445 | 0 | swapRuleOffset = (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2]; |
446 | 0 | swapRule = (TranslationTableRule *)&table->ruleArea[swapRuleOffset]; |
447 | 0 | replacements = &swapRule->charsdots[swapRule->charslen]; |
448 | 0 | for (p = start; p < end; p++) { |
449 | 0 | int rep; |
450 | 0 | int test; |
451 | 0 | int k; |
452 | 0 | if (swapRule->opcode == CTO_SwapDd) { |
453 | | // A sequence of dot patterns is encoded as the length of the first dot |
454 | | // pattern (single widechar) followed by the contents of the first dot pattern |
455 | | // (one widechar per cell) followed by the length of the second dot pattern, |
456 | | // etc. See the function `compileSwapDots'. Because the third operand of a |
457 | | // swapdd rule can only contain single-cell dot patterns, the elements at |
458 | | // index 0, 2, ... are "1" and the elements at index 1, 3, ... are the dot |
459 | | // patterns. |
460 | 0 | for (test = 0; test * 2 + 1 < swapRule->charslen; test++) |
461 | 0 | if (input->chars[p] == swapRule->charsdots[test * 2 + 1]) break; |
462 | 0 | if (test * 2 == swapRule->charslen) continue; |
463 | 0 | } else { |
464 | 0 | for (test = 0; test < swapRule->charslen; test++) |
465 | 0 | if (input->chars[p] == swapRule->charsdots[test]) break; |
466 | 0 | if (test == swapRule->charslen) continue; |
467 | 0 | } |
468 | 0 | k = 0; |
469 | 0 | for (rep = 0; rep < test; rep++) |
470 | 0 | if (swapRule->opcode == CTO_SwapCc) |
471 | 0 | k++; |
472 | 0 | else |
473 | 0 | k += replacements[k]; |
474 | 0 | if (swapRule->opcode == CTO_SwapCc) { |
475 | 0 | if ((output->length + 1) > output->maxlength) return 0; |
476 | 0 | posMapping[output->length] = p; |
477 | 0 | output->chars[output->length++] = replacements[k]; |
478 | 0 | } else { |
479 | 0 | int l = replacements[k] - 1; |
480 | 0 | int d = output->length + l; |
481 | 0 | if (d > output->maxlength) return 0; |
482 | 0 | while (--d >= output->length) posMapping[d] = p; |
483 | | // if length is negative fail |
484 | 0 | int length = l * sizeof(*output->chars); |
485 | 0 | if (length < 0) return 0; |
486 | 0 | memcpy(&output->chars[output->length], &replacements[k + 1], length); |
487 | 0 | output->length += l; |
488 | 0 | } |
489 | 0 | } |
490 | 0 | return 1; |
491 | 0 | } |
492 | | |
493 | | static int |
494 | | replaceGrouping(const TranslationTableHeader *table, const InString **input, |
495 | | OutString *output, int transOpcode, int passCharDots, |
496 | | const widechar *passInstructions, int passIC, int startReplace, |
497 | 0 | TranslationTableRule *groupingRule, widechar groupingOp) { |
498 | 0 | widechar startCharDots = groupingRule->charsdots[2 * passCharDots]; |
499 | 0 | widechar endCharDots = groupingRule->charsdots[2 * passCharDots + 1]; |
500 | 0 | int p; |
501 | 0 | int level = 0; |
502 | 0 | TranslationTableOffset replaceOffset = |
503 | 0 | passInstructions[passIC + 1] << 16 | (passInstructions[passIC + 2] & 0xff); |
504 | 0 | TranslationTableRule *replaceRule = |
505 | 0 | (TranslationTableRule *)&table->ruleArea[replaceOffset]; |
506 | 0 | widechar replaceStart = replaceRule->charsdots[2 * passCharDots]; |
507 | 0 | widechar replaceEnd = replaceRule->charsdots[2 * passCharDots + 1]; |
508 | 0 | if (groupingOp == pass_groupstart) { |
509 | 0 | for (p = startReplace + 1; p < (*input)->length; p++) { |
510 | 0 | if ((*input)->chars[p] == startCharDots) level--; |
511 | 0 | if ((*input)->chars[p] == endCharDots) level++; |
512 | 0 | if (level == 1) break; |
513 | 0 | } |
514 | 0 | if (p == (*input)->length) |
515 | 0 | return 0; |
516 | 0 | else { |
517 | | // Create a new string instead of modifying it. This is slightly less |
518 | | // efficient, but makes the code more readable. Grouping is not a much used |
519 | | // feature anyway. |
520 | 0 | int idx = getStringBuffer((*input)->length); |
521 | 0 | widechar *chars = stringBufferPool->buffers[idx]; |
522 | 0 | memcpy(chars, (*input)->chars, (*input)->length * sizeof(widechar)); |
523 | 0 | chars[startReplace] = replaceStart; |
524 | 0 | chars[p] = replaceEnd; |
525 | 0 | static InString stringStore; |
526 | 0 | stringStore = (InString){ |
527 | 0 | .chars = chars, .length = (*input)->length, .bufferIndex = idx |
528 | 0 | }; |
529 | 0 | *input = &stringStore; |
530 | 0 | } |
531 | 0 | } else { |
532 | 0 | if (transOpcode == CTO_Context) { |
533 | 0 | startCharDots = groupingRule->charsdots[2]; |
534 | 0 | endCharDots = groupingRule->charsdots[3]; |
535 | 0 | replaceStart = replaceRule->charsdots[2]; |
536 | 0 | replaceEnd = replaceRule->charsdots[3]; |
537 | 0 | } |
538 | 0 | output->chars[output->length] = replaceEnd; |
539 | 0 | for (p = output->length - 1; p >= 0; p--) { |
540 | 0 | if (output->chars[p] == endCharDots) level--; |
541 | 0 | if (output->chars[p] == startCharDots) level++; |
542 | 0 | if (level == 1) break; |
543 | 0 | } |
544 | 0 | if (p < 0) return 0; |
545 | 0 | output->chars[p] = replaceStart; |
546 | 0 | output->length++; |
547 | 0 | } |
548 | 0 | return 1; |
549 | 0 | } |
550 | | |
551 | | static int |
552 | | removeGrouping(const InString **input, OutString *output, int passCharDots, |
553 | 0 | int startReplace, TranslationTableRule *groupingRule, widechar groupingOp) { |
554 | 0 | widechar startCharDots = groupingRule->charsdots[2 * passCharDots]; |
555 | 0 | widechar endCharDots = groupingRule->charsdots[2 * passCharDots + 1]; |
556 | 0 | int p; |
557 | 0 | int level = 0; |
558 | 0 | if (groupingOp == pass_groupstart) { |
559 | 0 | for (p = startReplace + 1; p < (*input)->length; p++) { |
560 | 0 | if ((*input)->chars[p] == startCharDots) level--; |
561 | 0 | if ((*input)->chars[p] == endCharDots) level++; |
562 | 0 | if (level == 1) break; |
563 | 0 | } |
564 | 0 | if (p == (*input)->length) |
565 | 0 | return 0; |
566 | 0 | else { |
567 | | // Create a new string instead of modifying it. This is slightly less |
568 | | // efficient, but makes the code more readable. Grouping is not a much used |
569 | | // feature anyway. |
570 | 0 | int idx = getStringBuffer((*input)->length); |
571 | 0 | widechar *chars = stringBufferPool->buffers[idx]; |
572 | 0 | int len = 0; |
573 | 0 | int k; |
574 | 0 | for (k = 0; k < (*input)->length; k++) { |
575 | 0 | if (k == p) continue; |
576 | 0 | chars[len++] = (*input)->chars[k]; |
577 | 0 | } |
578 | 0 | static InString stringStore; |
579 | 0 | stringStore = (InString){ .chars = chars, .length = len, .bufferIndex = idx }; |
580 | 0 | *input = &stringStore; |
581 | 0 | } |
582 | 0 | } else { |
583 | 0 | for (p = output->length - 1; p >= 0; p--) { |
584 | 0 | if (output->chars[p] == endCharDots) level--; |
585 | 0 | if (output->chars[p] == startCharDots) level++; |
586 | 0 | if (level == 1) break; |
587 | 0 | } |
588 | 0 | if (p < 0) return 0; |
589 | 0 | p++; |
590 | 0 | for (; p < output->length; p++) output->chars[p - 1] = output->chars[p]; |
591 | 0 | output->length--; |
592 | 0 | } |
593 | 0 | return 1; |
594 | 0 | } |
595 | | |
596 | | static int |
597 | | doPassSearch(const TranslationTableHeader *table, const InString *input, |
598 | | const TranslationTableRule *transRule, int passCharDots, int pos, |
599 | | const widechar *passInstructions, int passIC, int *searchIC, int *searchPos, |
600 | 0 | TranslationTableRule *groupingRule, widechar groupingOp) { |
601 | 0 | int level = 0; |
602 | 0 | int k, kk; |
603 | 0 | int notOperator = 0; // whether next operand should be reversed |
604 | 0 | TranslationTableOffset ruleOffset; |
605 | 0 | TranslationTableRule *rule; |
606 | 0 | TranslationTableCharacterAttributes attributes; |
607 | 0 | while (pos < input->length) { |
608 | 0 | *searchIC = passIC + 1; |
609 | 0 | *searchPos = pos; |
610 | 0 | while (*searchIC < transRule->dotslen) { |
611 | 0 | int itsTrue = 1; // whether we have a match or not |
612 | 0 | if (*searchPos >= input->length) return 0; |
613 | 0 | switch (passInstructions[*searchIC]) { |
614 | 0 | case pass_lookback: |
615 | 0 | *searchPos -= passInstructions[*searchIC + 1]; |
616 | 0 | if (*searchPos < 0) { |
617 | 0 | *searchPos = 0; |
618 | 0 | itsTrue = 0; |
619 | 0 | } |
620 | 0 | *searchIC += 2; |
621 | 0 | break; |
622 | 0 | case pass_not: |
623 | 0 | notOperator = !notOperator; |
624 | 0 | (*searchIC)++; |
625 | 0 | continue; |
626 | 0 | case pass_string: |
627 | 0 | case pass_dots: |
628 | 0 | kk = *searchPos; |
629 | 0 | for (k = *searchIC + 2; |
630 | 0 | k < *searchIC + 2 + passInstructions[*searchIC + 1]; k++) |
631 | 0 | if (input->chars[kk] == LOU_ENDSEGMENT || |
632 | 0 | passInstructions[k] != input->chars[kk++]) { |
633 | 0 | itsTrue = 0; |
634 | 0 | break; |
635 | 0 | } |
636 | 0 | *searchPos += passInstructions[*searchIC + 1]; |
637 | 0 | *searchIC += passInstructions[*searchIC + 1] + 2; |
638 | 0 | break; |
639 | 0 | case pass_startReplace: |
640 | 0 | (*searchIC)++; |
641 | 0 | break; |
642 | 0 | case pass_endReplace: |
643 | 0 | (*searchIC)++; |
644 | 0 | break; |
645 | 0 | case pass_attributes: |
646 | 0 | attributes = passInstructions[*searchIC + 1]; |
647 | 0 | attributes <<= 16; |
648 | 0 | attributes |= passInstructions[*searchIC + 2]; |
649 | 0 | attributes <<= 16; |
650 | 0 | attributes |= passInstructions[*searchIC + 3]; |
651 | 0 | attributes <<= 16; |
652 | 0 | attributes |= passInstructions[*searchIC + 4]; |
653 | 0 | for (k = 0; k < passInstructions[*searchIC + 5]; k++) { |
654 | 0 | if (input->chars[*searchPos] == LOU_ENDSEGMENT) |
655 | 0 | itsTrue = 0; |
656 | 0 | else { |
657 | 0 | itsTrue = (passCharDots ? getDots(input->chars[(*searchPos)++], |
658 | 0 | table) |
659 | 0 | : getChar(input->chars[(*searchPos)++], |
660 | 0 | table)) |
661 | 0 | ->attributes & |
662 | 0 | attributes; |
663 | 0 | if (notOperator) itsTrue = !itsTrue; |
664 | 0 | } |
665 | 0 | if (!itsTrue) break; |
666 | 0 | } |
667 | 0 | if (itsTrue) { |
668 | 0 | for (k = passInstructions[*searchIC + 5]; |
669 | 0 | k < passInstructions[*searchIC + 6]; k++) { |
670 | 0 | if (*searchPos >= input->length) return 0; |
671 | 0 | if (input->chars[*searchPos] == LOU_ENDSEGMENT) { |
672 | 0 | itsTrue = 0; |
673 | 0 | break; |
674 | 0 | } |
675 | 0 | if (!((passCharDots ? getDots(input->chars[*searchPos], table) |
676 | 0 | : getChar(input->chars[*searchPos], table)) |
677 | 0 | ->attributes & |
678 | 0 | attributes)) { |
679 | 0 | if (!notOperator) break; |
680 | 0 | } else if (notOperator) |
681 | 0 | break; |
682 | 0 | (*searchPos)++; |
683 | 0 | } |
684 | 0 | } |
685 | 0 | notOperator = 0; |
686 | 0 | *searchIC += 7; |
687 | 0 | break; |
688 | 0 | case pass_groupstart: |
689 | 0 | case pass_groupend: |
690 | 0 | ruleOffset = (passInstructions[*searchIC + 1] << 16) | |
691 | 0 | passInstructions[*searchIC + 2]; |
692 | 0 | rule = (TranslationTableRule *)&table->ruleArea[ruleOffset]; |
693 | 0 | if (passInstructions[*searchIC] == pass_groupstart) |
694 | 0 | itsTrue = (input->chars[*searchPos] == |
695 | 0 | rule->charsdots[2 * passCharDots]) |
696 | 0 | ? 1 |
697 | 0 | : 0; |
698 | 0 | else |
699 | 0 | itsTrue = (input->chars[*searchPos] == |
700 | 0 | rule->charsdots[2 * passCharDots + 1]) |
701 | 0 | ? 1 |
702 | 0 | : 0; |
703 | 0 | if (groupingRule != NULL && groupingOp == pass_groupstart && |
704 | 0 | rule == groupingRule) { |
705 | 0 | if (input->chars[*searchPos] == rule->charsdots[2 * passCharDots]) |
706 | 0 | level--; |
707 | 0 | else if (input->chars[*searchPos] == |
708 | 0 | rule->charsdots[2 * passCharDots + 1]) |
709 | 0 | level++; |
710 | 0 | } |
711 | 0 | (*searchPos)++; |
712 | 0 | *searchIC += 3; |
713 | 0 | break; |
714 | 0 | case pass_swap: |
715 | 0 | itsTrue = swapTest(*searchIC, searchPos, table, input, passInstructions); |
716 | 0 | *searchIC += 5; |
717 | 0 | break; |
718 | 0 | case pass_endTest: |
719 | 0 | if (itsTrue) { |
720 | 0 | if ((groupingRule && level == 1) || !groupingRule) return 1; |
721 | 0 | } |
722 | 0 | *searchIC = transRule->dotslen; |
723 | 0 | break; |
724 | 0 | default: |
725 | 0 | if (_lou_handlePassVariableTest(passInstructions, searchIC, &itsTrue)) |
726 | 0 | break; |
727 | 0 | break; |
728 | 0 | } |
729 | 0 | if ((!notOperator && !itsTrue) || (notOperator && itsTrue)) break; |
730 | 0 | notOperator = 0; |
731 | 0 | } |
732 | 0 | pos++; |
733 | 0 | } |
734 | 0 | return 0; |
735 | 0 | } |
736 | | |
737 | | static int |
738 | | passDoTest(const TranslationTableHeader *table, int pos, const InString *input, |
739 | | int transOpcode, const TranslationTableRule *transRule, int *passCharDots, |
740 | | widechar const **passInstructions, int *passIC, PassRuleMatch *match, |
741 | 0 | TranslationTableRule **groupingRule, widechar *groupingOp) { |
742 | 0 | int searchIC, searchPos; |
743 | 0 | int k; |
744 | 0 | int notOperator = 0; // whether next operand should be reversed |
745 | 0 | TranslationTableOffset ruleOffset = 0; |
746 | 0 | TranslationTableRule *rule = NULL; |
747 | 0 | TranslationTableCharacterAttributes attributes = 0; |
748 | 0 | int startMatch = pos; |
749 | 0 | int endMatch = pos; |
750 | 0 | int startReplace = -1; |
751 | 0 | int endReplace = -1; |
752 | 0 | *groupingRule = NULL; |
753 | 0 | *passInstructions = &transRule->charsdots[transRule->charslen]; |
754 | 0 | *passIC = 0; |
755 | 0 | if (transOpcode == CTO_Context || transOpcode == CTO_Correct) |
756 | 0 | *passCharDots = 0; |
757 | 0 | else |
758 | 0 | *passCharDots = 1; |
759 | 0 | while (*passIC < transRule->dotslen) { |
760 | 0 | int itsTrue = 1; // whether we have a match or not |
761 | | // check if `pos` is within the input string, |
762 | | // maybe a unsigned type would be better to omit negative values |
763 | 0 | if (pos > input->length || pos < 0) return 0; |
764 | 0 | switch ((*passInstructions)[*passIC]) { |
765 | 0 | case pass_first: |
766 | 0 | if (pos != 0) itsTrue = 0; |
767 | 0 | (*passIC)++; |
768 | 0 | break; |
769 | 0 | case pass_last: |
770 | 0 | if (pos != input->length) itsTrue = 0; |
771 | 0 | (*passIC)++; |
772 | 0 | break; |
773 | 0 | case pass_lookback: |
774 | 0 | pos -= (*passInstructions)[*passIC + 1]; |
775 | 0 | if (pos < 0) { |
776 | 0 | searchPos = 0; |
777 | 0 | itsTrue = 0; |
778 | 0 | } |
779 | 0 | *passIC += 2; |
780 | 0 | break; |
781 | 0 | case pass_not: |
782 | 0 | notOperator = !notOperator; |
783 | 0 | (*passIC)++; |
784 | 0 | continue; |
785 | 0 | case pass_string: |
786 | 0 | case pass_dots: |
787 | 0 | itsTrue = matchCurrentInput(input, pos, *passInstructions, *passIC); |
788 | 0 | pos += (*passInstructions)[*passIC + 1]; |
789 | 0 | *passIC += (*passInstructions)[*passIC + 1] + 2; |
790 | 0 | break; |
791 | 0 | case pass_startReplace: |
792 | 0 | startReplace = pos; |
793 | 0 | (*passIC)++; |
794 | 0 | break; |
795 | 0 | case pass_endReplace: |
796 | 0 | endReplace = pos; |
797 | 0 | (*passIC)++; |
798 | 0 | break; |
799 | 0 | case pass_attributes: |
800 | 0 | attributes = (*passInstructions)[*passIC + 1]; |
801 | 0 | attributes <<= 16; |
802 | 0 | attributes |= (*passInstructions)[*passIC + 2]; |
803 | 0 | attributes <<= 16; |
804 | 0 | attributes |= (*passInstructions)[*passIC + 3]; |
805 | 0 | attributes <<= 16; |
806 | 0 | attributes |= (*passInstructions)[*passIC + 4]; |
807 | 0 | for (k = 0; k < (*passInstructions)[*passIC + 5]; k++) { |
808 | 0 | if (pos >= input->length) { |
809 | 0 | itsTrue = 0; |
810 | 0 | break; |
811 | 0 | } |
812 | 0 | if (input->chars[pos] == LOU_ENDSEGMENT) { |
813 | 0 | itsTrue = 0; |
814 | 0 | break; |
815 | 0 | } |
816 | 0 | if (!((*passCharDots ? getDots(input->chars[pos], table) |
817 | 0 | : getChar(input->chars[pos], table)) |
818 | 0 | ->attributes & |
819 | 0 | attributes)) { |
820 | 0 | if (!notOperator) { |
821 | 0 | itsTrue = 0; |
822 | 0 | break; |
823 | 0 | } |
824 | 0 | } else if (notOperator) { |
825 | 0 | itsTrue = 0; |
826 | 0 | break; |
827 | 0 | } |
828 | 0 | pos++; |
829 | 0 | } |
830 | 0 | if (itsTrue) { |
831 | 0 | for (k = (*passInstructions)[*passIC + 5]; |
832 | 0 | k < (*passInstructions)[*passIC + 6] && pos < input->length; |
833 | 0 | k++) { |
834 | 0 | if (input->chars[pos] == LOU_ENDSEGMENT) { |
835 | 0 | itsTrue = 0; |
836 | 0 | break; |
837 | 0 | } |
838 | 0 | if (!((*passCharDots ? getDots(input->chars[pos], table) |
839 | 0 | : getChar(input->chars[pos], table)) |
840 | 0 | ->attributes & |
841 | 0 | attributes)) { |
842 | 0 | if (!notOperator) break; |
843 | 0 | } else if (notOperator) |
844 | 0 | break; |
845 | 0 | pos++; |
846 | 0 | } |
847 | 0 | } |
848 | 0 | notOperator = 0; |
849 | 0 | *passIC += 7; |
850 | 0 | break; |
851 | 0 | case pass_groupstart: |
852 | 0 | case pass_groupend: |
853 | 0 | ruleOffset = ((*passInstructions)[*passIC + 1] << 16) | |
854 | 0 | (*passInstructions)[*passIC + 2]; |
855 | 0 | rule = (TranslationTableRule *)&table->ruleArea[ruleOffset]; |
856 | 0 | if (*passIC == 0 || |
857 | 0 | (*passIC > 0 && |
858 | 0 | (*passInstructions)[*passIC - 1] == pass_startReplace)) { |
859 | 0 | *groupingRule = rule; |
860 | 0 | *groupingOp = (*passInstructions)[*passIC]; |
861 | 0 | } |
862 | 0 | if ((*passInstructions)[*passIC] == pass_groupstart) |
863 | 0 | itsTrue = |
864 | 0 | (input->chars[pos] == rule->charsdots[2 * *passCharDots]) ? 1 : 0; |
865 | 0 | else |
866 | 0 | itsTrue = (input->chars[pos] == rule->charsdots[2 * *passCharDots + 1]) |
867 | 0 | ? 1 |
868 | 0 | : 0; |
869 | 0 | pos++; |
870 | 0 | *passIC += 3; |
871 | 0 | break; |
872 | 0 | case pass_swap: |
873 | 0 | itsTrue = swapTest(*passIC, &pos, table, input, *passInstructions); |
874 | 0 | *passIC += 5; |
875 | 0 | break; |
876 | 0 | case pass_search: |
877 | 0 | itsTrue = doPassSearch(table, input, transRule, *passCharDots, pos, |
878 | 0 | *passInstructions, *passIC, &searchIC, &searchPos, *groupingRule, |
879 | 0 | *groupingOp); |
880 | 0 | if ((!notOperator && !itsTrue) || (notOperator && itsTrue)) return 0; |
881 | 0 | *passIC = searchIC; |
882 | 0 | pos = searchPos; |
883 | 0 | case pass_endTest: |
884 | 0 | (*passIC)++; |
885 | 0 | endMatch = pos; |
886 | 0 | if (startReplace == -1) { |
887 | 0 | startReplace = startMatch; |
888 | 0 | endReplace = endMatch; |
889 | 0 | } |
890 | | // Check whetehr endReplace != -1 while startReplace! = -1 |
891 | 0 | if (startReplace < startMatch || endReplace == -1) |
892 | 0 | return 0; |
893 | 0 | else { |
894 | 0 | *match = (PassRuleMatch){ .startMatch = startMatch, |
895 | 0 | .startReplace = startReplace, |
896 | 0 | .endReplace = endReplace, |
897 | 0 | .endMatch = endMatch }; |
898 | 0 | return 1; |
899 | 0 | } |
900 | 0 | break; |
901 | 0 | default: |
902 | 0 | if (_lou_handlePassVariableTest(*passInstructions, passIC, &itsTrue)) break; |
903 | 0 | return 0; |
904 | 0 | } |
905 | 0 | if ((!notOperator && !itsTrue) || (notOperator && itsTrue)) return 0; |
906 | 0 | notOperator = 0; |
907 | 0 | } |
908 | 0 | return 0; |
909 | 0 | } |
910 | | |
911 | | static int |
912 | | copyCharacters(int from, int to, const TranslationTableHeader *table, |
913 | | const InString *input, OutString *output, int *posMapping, int transOpcode, |
914 | 0 | int *cursorPosition, int *cursorStatus, int mode) { |
915 | 0 | if (transOpcode == CTO_Context) { |
916 | 0 | while (from < to) { |
917 | 0 | if (!putCharacter(input->chars[from], table, from, input, output, posMapping, |
918 | 0 | cursorPosition, cursorStatus, mode)) |
919 | 0 | return 0; |
920 | 0 | from++; |
921 | 0 | } |
922 | 0 | } else { |
923 | 0 | if (to > from) { |
924 | 0 | if ((output->length + to - from) > output->maxlength) return 0; |
925 | 0 | while (to > from) { |
926 | 0 | posMapping[output->length] = from; |
927 | 0 | output->chars[output->length] = input->chars[from]; |
928 | 0 | output->length++; |
929 | 0 | from++; |
930 | 0 | } |
931 | 0 | } |
932 | 0 | } |
933 | | |
934 | 0 | return 1; |
935 | 0 | } |
936 | | |
937 | | static int |
938 | | passDoAction(const TranslationTableHeader *table, const InString **input, |
939 | | OutString *output, int *posMapping, int transOpcode, |
940 | | const TranslationTableRule **transRule, int passCharDots, |
941 | | const widechar *passInstructions, int passIC, int *pos, PassRuleMatch match, |
942 | | int *cursorPosition, int *cursorStatus, TranslationTableRule *groupingRule, |
943 | 0 | widechar groupingOp, int mode) { |
944 | 0 | int k; |
945 | 0 | TranslationTableOffset ruleOffset = 0; |
946 | 0 | TranslationTableRule *rule = NULL; |
947 | 0 | int destStartMatch = output->length; |
948 | 0 | int destStartReplace; |
949 | 0 | int newPos = match.endReplace; |
950 | |
|
951 | 0 | if (!copyCharacters(match.startMatch, match.startReplace, table, *input, output, |
952 | 0 | posMapping, transOpcode, cursorPosition, cursorStatus, mode)) |
953 | 0 | return 0; |
954 | 0 | destStartReplace = output->length; |
955 | |
|
956 | 0 | while (passIC < (*transRule)->dotslen) switch (passInstructions[passIC]) { |
957 | 0 | case pass_string: |
958 | 0 | case pass_dots: |
959 | 0 | if ((output->length + passInstructions[passIC + 1]) > output->maxlength) |
960 | 0 | return 0; |
961 | 0 | for (k = 0; k < passInstructions[passIC + 1]; ++k) |
962 | 0 | posMapping[output->length + k] = match.startReplace; |
963 | 0 | memcpy(&output->chars[output->length], &passInstructions[passIC + 2], |
964 | 0 | passInstructions[passIC + 1] * CHARSIZE); |
965 | 0 | output->length += passInstructions[passIC + 1]; |
966 | 0 | passIC += passInstructions[passIC + 1] + 2; |
967 | 0 | break; |
968 | 0 | case pass_groupstart: |
969 | 0 | ruleOffset = |
970 | 0 | (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2]; |
971 | 0 | rule = (TranslationTableRule *)&table->ruleArea[ruleOffset]; |
972 | 0 | posMapping[output->length] = match.startMatch; |
973 | 0 | output->chars[output->length++] = rule->charsdots[2 * passCharDots]; |
974 | 0 | passIC += 3; |
975 | 0 | break; |
976 | 0 | case pass_groupend: |
977 | 0 | ruleOffset = |
978 | 0 | (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2]; |
979 | 0 | rule = (TranslationTableRule *)&table->ruleArea[ruleOffset]; |
980 | 0 | posMapping[output->length] = match.startMatch; |
981 | 0 | output->chars[output->length++] = rule->charsdots[2 * passCharDots + 1]; |
982 | 0 | passIC += 3; |
983 | 0 | break; |
984 | 0 | case pass_swap: |
985 | 0 | if (!swapReplace(match.startReplace, match.endReplace, table, *input, output, |
986 | 0 | posMapping, passInstructions, passIC)) |
987 | 0 | return 0; |
988 | 0 | passIC += 3; |
989 | 0 | break; |
990 | 0 | case pass_groupreplace: |
991 | 0 | if (!groupingRule || |
992 | 0 | !replaceGrouping(table, input, output, transOpcode, passCharDots, |
993 | 0 | passInstructions, passIC, match.startReplace, groupingRule, |
994 | 0 | groupingOp)) |
995 | 0 | return 0; |
996 | 0 | passIC += 3; |
997 | 0 | break; |
998 | 0 | case pass_omit: |
999 | 0 | if (groupingRule) |
1000 | 0 | removeGrouping(input, output, passCharDots, match.startReplace, |
1001 | 0 | groupingRule, groupingOp); |
1002 | 0 | passIC++; |
1003 | 0 | break; |
1004 | 0 | case pass_copy: { |
1005 | 0 | int count = destStartReplace - destStartMatch; |
1006 | 0 | if (count > 0) { |
1007 | 0 | if (destStartReplace + count > output->maxlength) return 0; |
1008 | 0 | memmove(&output->chars[destStartMatch], &output->chars[destStartReplace], |
1009 | 0 | count * sizeof(*output->chars)); |
1010 | 0 | output->length -= count; |
1011 | 0 | destStartReplace = destStartMatch; |
1012 | 0 | } |
1013 | 0 | } |
1014 | | |
1015 | 0 | if (!copyCharacters(match.startReplace, match.endReplace, table, *input, |
1016 | 0 | output, posMapping, transOpcode, cursorPosition, cursorStatus, |
1017 | 0 | mode)) |
1018 | 0 | return 0; |
1019 | 0 | newPos = match.endMatch; |
1020 | 0 | passIC++; |
1021 | 0 | break; |
1022 | 0 | default: |
1023 | 0 | if (_lou_handlePassVariableAction(passInstructions, &passIC)) break; |
1024 | 0 | return 0; |
1025 | 0 | } |
1026 | 0 | *pos = newPos; |
1027 | 0 | return 1; |
1028 | 0 | } |
1029 | | |
1030 | | static void |
1031 | | passSelectRule(const TranslationTableHeader *table, int pos, int currentPass, |
1032 | | const InString *input, int *transOpcode, const TranslationTableRule **transRule, |
1033 | | int *transCharslen, int *passCharDots, widechar const **passInstructions, |
1034 | | int *passIC, PassRuleMatch *match, TranslationTableRule **groupingRule, |
1035 | 0 | widechar *groupingOp) { |
1036 | 0 | if (!findForPassRule(table, pos, currentPass, input, transOpcode, transRule, |
1037 | 0 | transCharslen, passCharDots, passInstructions, passIC, match, |
1038 | 0 | groupingRule, groupingOp)) { |
1039 | 0 | *transOpcode = CTO_Always; |
1040 | 0 | } |
1041 | 0 | } |
1042 | | |
1043 | | static int |
1044 | | translatePass(const TranslationTableHeader *table, int currentPass, const InString *input, |
1045 | | OutString *output, int *posMapping, int *realInlen, int *cursorPosition, |
1046 | 0 | int *cursorStatus, int mode) { |
1047 | 0 | int pos; |
1048 | 0 | int transOpcode; |
1049 | 0 | const TranslationTableRule *transRule; |
1050 | 0 | int transCharslen; |
1051 | 0 | int passCharDots; |
1052 | 0 | const widechar *passInstructions; |
1053 | 0 | int passIC; /* Instruction counter */ |
1054 | 0 | PassRuleMatch patternMatch; |
1055 | 0 | TranslationTableRule *groupingRule; |
1056 | 0 | widechar groupingOp; |
1057 | 0 | const InString *origInput = input; |
1058 | 0 | pos = output->length = 0; |
1059 | 0 | int posIncremented = 1; |
1060 | 0 | _lou_resetPassVariables(); |
1061 | 0 | while (pos < input->length) { /* the main multipass translation loop */ |
1062 | | // check posIncremented to avoid endless loop |
1063 | 0 | if (!posIncremented) |
1064 | 0 | transOpcode = CTO_Always; |
1065 | 0 | else |
1066 | 0 | passSelectRule(table, pos, currentPass, input, &transOpcode, &transRule, |
1067 | 0 | &transCharslen, &passCharDots, &passInstructions, &passIC, |
1068 | 0 | &patternMatch, &groupingRule, &groupingOp); |
1069 | 0 | posIncremented = 1; |
1070 | 0 | switch (transOpcode) { |
1071 | 0 | case CTO_Context: |
1072 | 0 | case CTO_Pass2: |
1073 | 0 | case CTO_Pass3: |
1074 | 0 | case CTO_Pass4: { |
1075 | 0 | const InString *inputBefore = input; |
1076 | 0 | int posBefore = pos; |
1077 | 0 | if (appliedRules != NULL && appliedRulesCount < maxAppliedRules) |
1078 | 0 | appliedRules[appliedRulesCount++] = transRule; |
1079 | 0 | if (!passDoAction(table, &input, output, posMapping, transOpcode, &transRule, |
1080 | 0 | passCharDots, passInstructions, passIC, &pos, patternMatch, |
1081 | 0 | cursorPosition, cursorStatus, groupingRule, groupingOp, mode)) |
1082 | 0 | goto failure; |
1083 | 0 | if (input->bufferIndex != inputBefore->bufferIndex && |
1084 | 0 | inputBefore->bufferIndex != origInput->bufferIndex) |
1085 | 0 | releaseStringBuffer(inputBefore->bufferIndex); |
1086 | 0 | if (pos == posBefore) posIncremented = 0; |
1087 | 0 | break; |
1088 | 0 | } |
1089 | 0 | case CTO_Always: |
1090 | 0 | if ((output->length + 1) > output->maxlength) goto failure; |
1091 | 0 | posMapping[output->length] = pos; |
1092 | 0 | output->chars[output->length++] = input->chars[pos++]; |
1093 | 0 | break; |
1094 | 0 | default: |
1095 | 0 | goto failure; |
1096 | 0 | } |
1097 | 0 | } |
1098 | 0 | failure: |
1099 | 0 | if (pos < input->length) { |
1100 | 0 | while (checkDotsAttr(input->chars[pos], CTC_Space, table)) |
1101 | 0 | if (++pos == input->length) break; |
1102 | 0 | } |
1103 | 0 | *realInlen = pos; |
1104 | 0 | if (input->bufferIndex != origInput->bufferIndex) |
1105 | 0 | releaseStringBuffer(input->bufferIndex); |
1106 | 0 | return 1; |
1107 | 0 | } |
1108 | | |
1109 | 0 | #define MIN(a, b) (((a) < (b)) ? (a) : (b)) |
1110 | | |
1111 | | static int |
1112 | | translateString(const TranslationTableHeader *table, int mode, int currentPass, |
1113 | | const InString *input, OutString *output, int *posMapping, formtype *typebuf, |
1114 | | unsigned char *srcSpacing, unsigned char *destSpacing, unsigned int *wordBuffer, |
1115 | | EmphasisInfo *emphasisBuffer, int haveEmphasis, int *realInlen, |
1116 | | int *cursorPosition, int *cursorStatus, int compbrlStart, int compbrlEnd); |
1117 | | |
1118 | | int EXPORT_CALL |
1119 | | lou_translateString(const char *tableList, const widechar *inbufx, int *inlen, |
1120 | 0 | widechar *outbuf, int *outlen, formtype *typeform, char *spacing, int mode) { |
1121 | 0 | return lou_translate(tableList, inbufx, inlen, outbuf, outlen, typeform, spacing, |
1122 | 0 | NULL, NULL, NULL, mode); |
1123 | 0 | } |
1124 | | |
1125 | | int EXPORT_CALL |
1126 | | lou_translate(const char *tableList, const widechar *inbufx, int *inlen, widechar *outbuf, |
1127 | | int *outlen, formtype *typeform, char *spacing, int *outputPos, int *inputPos, |
1128 | 0 | int *cursorPos, int mode) { |
1129 | 0 | return _lou_translate(tableList, tableList, inbufx, inlen, outbuf, outlen, typeform, |
1130 | 0 | spacing, outputPos, inputPos, cursorPos, mode, NULL, NULL); |
1131 | 0 | } |
1132 | | |
1133 | | int EXPORT_CALL |
1134 | | _lou_translate(const char *tableList, const char *displayTableList, |
1135 | | const widechar *inbufx, int *inlen, widechar *outbuf, int *outlen, |
1136 | | formtype *typeform, char *spacing, int *outputPos, int *inputPos, int *cursorPos, |
1137 | 0 | int mode, const TranslationTableRule **rules, int *rulesLen) { |
1138 | | // int i; |
1139 | | // for(i = 0; i < *inlen; i++) |
1140 | | // { |
1141 | | // outbuf[i] = inbufx[i]; |
1142 | | // if(inputPos) |
1143 | | // inputPos[i] = i; |
1144 | | // if(outputPos) |
1145 | | // outputPos[i] = i; |
1146 | | // } |
1147 | | // *inlen = i; |
1148 | | // *outlen = i; |
1149 | | // return 1; |
1150 | 0 | const TranslationTableHeader *table; |
1151 | 0 | const DisplayTableHeader *displayTable; |
1152 | 0 | InString input; |
1153 | 0 | OutString output; |
1154 | | // posMapping contains position mapping info between the initial input and the output |
1155 | | // of the current pass. It is 1 longer than the output. The values are monotonically |
1156 | | // increasing and can range between -1 and the (consumed) input length. At the end the |
1157 | | // position info is passed to the user as an inputPos and outputPos array. inputPos |
1158 | | // has the length of the final output and has values ranging from 0 to inlen-1. |
1159 | | // outputPos has the length of the (consumed) initial input and has values ranging |
1160 | | // from 0 to outlen-1. |
1161 | 0 | int *posMapping; |
1162 | 0 | int *posMapping1; |
1163 | 0 | int *posMapping2; |
1164 | 0 | int *posMapping3; |
1165 | 0 | formtype *typebuf; |
1166 | 0 | unsigned char *srcSpacing; |
1167 | 0 | unsigned char *destSpacing; |
1168 | 0 | unsigned int *wordBuffer; |
1169 | 0 | EmphasisInfo *emphasisBuffer; |
1170 | 0 | int cursorPosition; |
1171 | 0 | int cursorStatus; |
1172 | 0 | int haveEmphasis; |
1173 | 0 | int compbrlStart = -1; |
1174 | 0 | int compbrlEnd = -1; |
1175 | 0 | int k; |
1176 | 0 | int goodTrans = 1; |
1177 | 0 | if (tableList == NULL || inbufx == NULL || inlen == NULL || outbuf == NULL || |
1178 | 0 | outlen == NULL) |
1179 | 0 | return 0; |
1180 | 0 | _lou_logMessage(LOU_LOG_ALL, "Performing translation: tableList=%s, inlen=%d", |
1181 | 0 | tableList, *inlen); |
1182 | 0 | _lou_logWidecharBuf(LOU_LOG_ALL, "Inbuf=", inbufx, *inlen); |
1183 | |
|
1184 | 0 | if (!_lou_isValidMode(mode)) |
1185 | 0 | _lou_logMessage(LOU_LOG_ERROR, "Invalid mode parameter: %d", mode); |
1186 | |
|
1187 | 0 | if (displayTableList == NULL) displayTableList = tableList; |
1188 | 0 | _lou_getTable(tableList, displayTableList, &table, &displayTable); |
1189 | 0 | if (table == NULL || *inlen < 0 || *outlen < 0) return 0; |
1190 | 0 | k = 0; |
1191 | 0 | while (k < *inlen && inbufx[k]) k++; |
1192 | 0 | input = (InString){ .chars = inbufx, .length = k, .bufferIndex = -1 }; |
1193 | 0 | haveEmphasis = 0; |
1194 | 0 | if (!(typebuf = _lou_allocMem(alloc_typebuf, 0, input.length, *outlen))) return 0; |
1195 | 0 | if (typeform != NULL) { |
1196 | 0 | for (k = 0; k < input.length; k++) { |
1197 | 0 | typebuf[k] = typeform[k]; |
1198 | 0 | if (typebuf[k] & EMPHASIS) haveEmphasis = 1; |
1199 | 0 | } |
1200 | 0 | } else |
1201 | 0 | memset(typebuf, 0, input.length * sizeof(formtype)); |
1202 | |
|
1203 | 0 | if (!(spacing == NULL || *spacing == 'X')) |
1204 | 0 | srcSpacing = (unsigned char *)spacing; |
1205 | 0 | else |
1206 | 0 | srcSpacing = NULL; |
1207 | 0 | if (outputPos != NULL) |
1208 | 0 | for (k = 0; k < input.length; k++) outputPos[k] = -1; |
1209 | 0 | if (cursorPos != NULL && *cursorPos >= 0) { |
1210 | 0 | cursorStatus = 0; |
1211 | 0 | cursorPosition = *cursorPos; |
1212 | 0 | if ((mode & (compbrlAtCursor | compbrlLeftCursor))) { |
1213 | 0 | compbrlStart = cursorPosition; |
1214 | 0 | if (checkCharAttr(input.chars[compbrlStart], CTC_Space, table)) |
1215 | | /* It would have been simpler to just set compbrlStart and compbrlEnd to |
1216 | | * -1 (i.e. disable compbrlAtCursor/compbrlLeftCursor mode) if the cursor |
1217 | | * is set on a space. But maybe there are cases where a space in computer |
1218 | | * braille does not map to a blank cell, and the user expects to see the |
1219 | | * computer braille representation when the space is under the cursor, so |
1220 | | * we better leave it as it is. |
1221 | | */ |
1222 | 0 | compbrlEnd = compbrlStart + 1; |
1223 | 0 | else { |
1224 | 0 | while (compbrlStart >= 0 && |
1225 | 0 | !checkCharAttr(input.chars[compbrlStart], CTC_Space, table)) |
1226 | 0 | compbrlStart--; |
1227 | 0 | compbrlStart++; |
1228 | 0 | compbrlEnd = cursorPosition; |
1229 | 0 | if (!(mode & compbrlLeftCursor)) |
1230 | 0 | while (compbrlEnd < input.length && |
1231 | 0 | !checkCharAttr(input.chars[compbrlEnd], CTC_Space, table)) |
1232 | 0 | compbrlEnd++; |
1233 | 0 | } |
1234 | 0 | } |
1235 | 0 | } else { |
1236 | 0 | cursorPosition = -1; |
1237 | 0 | cursorStatus = 1; /* so it won't check cursor position */ |
1238 | 0 | } |
1239 | 0 | if (!(posMapping1 = _lou_allocMem(alloc_posMapping1, 0, input.length, *outlen))) |
1240 | 0 | return 0; |
1241 | 0 | if (table->numPasses > 1 || table->corrections) { |
1242 | 0 | if (!(posMapping2 = _lou_allocMem(alloc_posMapping2, 0, input.length, *outlen))) |
1243 | 0 | return 0; |
1244 | 0 | if (!(posMapping3 = _lou_allocMem(alloc_posMapping3, 0, input.length, *outlen))) |
1245 | 0 | return 0; |
1246 | 0 | } |
1247 | 0 | if (srcSpacing != NULL) { |
1248 | 0 | if (!(destSpacing = _lou_allocMem(alloc_destSpacing, 0, input.length, *outlen))) |
1249 | 0 | goodTrans = 0; |
1250 | 0 | else |
1251 | 0 | memset(destSpacing, '*', *outlen); |
1252 | 0 | } else |
1253 | 0 | destSpacing = NULL; |
1254 | 0 | appliedRulesCount = 0; |
1255 | 0 | if (rules != NULL && rulesLen != NULL) { |
1256 | 0 | appliedRules = rules; |
1257 | 0 | maxAppliedRules = *rulesLen; |
1258 | 0 | } else { |
1259 | 0 | appliedRules = NULL; |
1260 | 0 | maxAppliedRules = 0; |
1261 | 0 | } |
1262 | 0 | { |
1263 | 0 | int idx; |
1264 | 0 | if (!stringBufferPool) initStringBufferPool(); |
1265 | 0 | for (idx = 0; idx < stringBufferPool->size; idx++) releaseStringBuffer(idx); |
1266 | 0 | idx = getStringBuffer(*outlen); |
1267 | 0 | output = (OutString){ .chars = stringBufferPool->buffers[idx], |
1268 | 0 | .maxlength = *outlen, |
1269 | 0 | .length = 0, |
1270 | 0 | .bufferIndex = idx }; |
1271 | 0 | } |
1272 | 0 | posMapping = posMapping1; |
1273 | |
|
1274 | 0 | int currentPass = table->corrections ? 0 : 1; |
1275 | 0 | int *passPosMapping = posMapping; |
1276 | 0 | while (1) { |
1277 | 0 | int realInlen; |
1278 | 0 | switch (currentPass) { |
1279 | 0 | case 0: |
1280 | 0 | goodTrans = makeCorrections(table, &input, &output, passPosMapping, typebuf, |
1281 | 0 | &realInlen, &cursorPosition, &cursorStatus, mode); |
1282 | 0 | break; |
1283 | 0 | case 1: { |
1284 | 0 | if (!(wordBuffer = _lou_allocMem(alloc_wordBuffer, 0, input.length, *outlen))) |
1285 | 0 | return 0; |
1286 | 0 | if (!(emphasisBuffer = _lou_allocMem( |
1287 | 0 | alloc_emphasisBuffer, 0, input.length, *outlen))) |
1288 | 0 | return 0; |
1289 | 0 | goodTrans = translateString(table, mode, currentPass, &input, &output, |
1290 | 0 | passPosMapping, typebuf, srcSpacing, destSpacing, wordBuffer, |
1291 | 0 | emphasisBuffer, haveEmphasis, &realInlen, &cursorPosition, |
1292 | 0 | &cursorStatus, compbrlStart, compbrlEnd); |
1293 | 0 | break; |
1294 | 0 | } |
1295 | 0 | default: |
1296 | 0 | goodTrans = translatePass(table, currentPass, &input, &output, passPosMapping, |
1297 | 0 | &realInlen, &cursorPosition, &cursorStatus, mode); |
1298 | 0 | break; |
1299 | 0 | } |
1300 | 0 | passPosMapping[output.length] = realInlen; |
1301 | 0 | if (passPosMapping == posMapping) { |
1302 | 0 | passPosMapping = posMapping2; |
1303 | 0 | } else { |
1304 | 0 | int *prevPosMapping = posMapping3; |
1305 | 0 | memcpy((int *)prevPosMapping, posMapping, (*outlen + 1) * sizeof(int)); |
1306 | 0 | for (k = 0; k <= output.length; k++) |
1307 | 0 | if (passPosMapping[k] < 0) |
1308 | 0 | posMapping[k] = prevPosMapping[0]; |
1309 | 0 | else |
1310 | 0 | posMapping[k] = prevPosMapping[passPosMapping[k]]; |
1311 | 0 | } |
1312 | 0 | currentPass++; |
1313 | 0 | if (currentPass <= table->numPasses && goodTrans) { |
1314 | 0 | int idx; |
1315 | 0 | releaseStringBuffer(input.bufferIndex); |
1316 | 0 | input = (InString){ .chars = output.chars, |
1317 | 0 | .length = output.length, |
1318 | 0 | .bufferIndex = output.bufferIndex }; |
1319 | 0 | idx = getStringBuffer(*outlen); |
1320 | 0 | output = (OutString){ .chars = stringBufferPool->buffers[idx], |
1321 | 0 | .maxlength = *outlen, |
1322 | 0 | .length = 0, |
1323 | 0 | .bufferIndex = idx }; |
1324 | 0 | continue; |
1325 | 0 | } |
1326 | 0 | break; |
1327 | 0 | } |
1328 | 0 | if (goodTrans) { |
1329 | 0 | for (k = 0; k < output.length; k++) { |
1330 | 0 | if (typeform != NULL) { |
1331 | 0 | if ((output.chars[k] & (LOU_DOT_7 | LOU_DOT_8))) |
1332 | 0 | typeform[k] = '8'; |
1333 | 0 | else |
1334 | 0 | typeform[k] = '0'; |
1335 | 0 | } |
1336 | 0 | if ((mode & dotsIO)) { |
1337 | 0 | if ((mode & ucBrl)) |
1338 | 0 | outbuf[k] = ((output.chars[k] & 0xff) | LOU_ROW_BRAILLE); |
1339 | 0 | else |
1340 | 0 | outbuf[k] = output.chars[k]; |
1341 | 0 | } else { |
1342 | 0 | outbuf[k] = _lou_getCharForDots(output.chars[k], displayTable); |
1343 | 0 | if (!outbuf[k]) { |
1344 | | // assume that if NUL character is returned, it's because the display |
1345 | | // table has no mapping for the dot pattern (not because it maps to |
1346 | | // NUL) |
1347 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
1348 | 0 | "%s: no mapping for dot pattern %s in display table", |
1349 | 0 | displayTableList, _lou_showDots(&output.chars[k], 1)); |
1350 | 0 | return 0; |
1351 | 0 | } |
1352 | 0 | } |
1353 | 0 | } |
1354 | 0 | *inlen = posMapping[output.length]; |
1355 | 0 | *outlen = output.length; |
1356 | | // Compute inputPos and outputPos from posMapping. The value at the last index of |
1357 | | // posMapping is currectly not used. |
1358 | 0 | if (inputPos != NULL) { |
1359 | 0 | for (k = 0; k < *outlen; k++) |
1360 | 0 | if (posMapping[k] < 0) |
1361 | 0 | inputPos[k] = 0; |
1362 | 0 | else if (posMapping[k] > *inlen - 1) |
1363 | 0 | inputPos[k] = *inlen - 1; |
1364 | 0 | else |
1365 | 0 | inputPos[k] = posMapping[k]; |
1366 | 0 | } |
1367 | 0 | if (outputPos != NULL) { |
1368 | 0 | int inpos = -1; |
1369 | 0 | int outpos = -1; |
1370 | 0 | for (k = 0; k < *outlen; k++) |
1371 | 0 | if (posMapping[k] > inpos) { |
1372 | 0 | while (inpos < posMapping[k]) { |
1373 | 0 | if (inpos >= 0 && inpos < *inlen) |
1374 | 0 | outputPos[inpos] = outpos < 0 ? 0 : outpos; |
1375 | 0 | inpos++; |
1376 | 0 | } |
1377 | 0 | outpos = k; |
1378 | 0 | } |
1379 | 0 | if (inpos < 0) inpos = 0; |
1380 | 0 | while (inpos < *inlen) outputPos[inpos++] = outpos; |
1381 | 0 | } |
1382 | 0 | } |
1383 | 0 | if (destSpacing != NULL) { |
1384 | 0 | memcpy(srcSpacing, destSpacing, input.length); |
1385 | 0 | srcSpacing[input.length] = 0; |
1386 | 0 | } |
1387 | 0 | if (cursorPos != NULL && *cursorPos != -1) { |
1388 | 0 | if (outputPos != NULL) |
1389 | 0 | *cursorPos = outputPos[*cursorPos]; |
1390 | 0 | else |
1391 | 0 | *cursorPos = cursorPosition; |
1392 | 0 | } |
1393 | 0 | if (rulesLen != NULL) *rulesLen = appliedRulesCount; |
1394 | 0 | _lou_logMessage(LOU_LOG_ALL, "Translation complete: outlen=%d", *outlen); |
1395 | 0 | _lou_logWidecharBuf(LOU_LOG_ALL, "Outbuf=", (const widechar *)outbuf, *outlen); |
1396 | |
|
1397 | 0 | return goodTrans; |
1398 | 0 | } |
1399 | | |
1400 | | int EXPORT_CALL |
1401 | | lou_translatePrehyphenated(const char *tableList, const widechar *inbufx, int *inlen, |
1402 | | widechar *outbuf, int *outlen, formtype *typeform, char *spacing, int *outputPos, |
1403 | | int *inputPos, int *cursorPos, char *inputHyphens, char *outputHyphens, |
1404 | 0 | int mode) { |
1405 | 0 | int rv = 1; |
1406 | 0 | int *alloc_inputPos = NULL; |
1407 | 0 | if (inputHyphens != NULL) { |
1408 | 0 | if (outputHyphens == NULL) return 0; |
1409 | 0 | if (inputPos == NULL) { |
1410 | 0 | if ((alloc_inputPos = malloc(*outlen * sizeof(int))) == NULL) |
1411 | 0 | _lou_outOfMemory(); |
1412 | 0 | inputPos = alloc_inputPos; |
1413 | 0 | } |
1414 | 0 | } |
1415 | 0 | if (lou_translate(tableList, inbufx, inlen, outbuf, outlen, typeform, spacing, |
1416 | 0 | outputPos, inputPos, cursorPos, mode)) { |
1417 | 0 | if (inputHyphens != NULL) { |
1418 | 0 | int inpos = 0; |
1419 | 0 | int outpos; |
1420 | 0 | for (outpos = 0; outpos < *outlen; outpos++) { |
1421 | 0 | int new_inpos = inputPos[outpos]; |
1422 | 0 | if (new_inpos < inpos) { |
1423 | 0 | rv = 0; |
1424 | 0 | break; |
1425 | 0 | } |
1426 | 0 | if (new_inpos > inpos) |
1427 | 0 | outputHyphens[outpos] = inputHyphens[new_inpos]; |
1428 | 0 | else |
1429 | 0 | outputHyphens[outpos] = '0'; |
1430 | 0 | inpos = new_inpos; |
1431 | 0 | } |
1432 | 0 | } |
1433 | 0 | } |
1434 | 0 | if (alloc_inputPos != NULL) free(alloc_inputPos); |
1435 | 0 | return rv; |
1436 | 0 | } |
1437 | | |
1438 | | static int |
1439 | | hyphenateWord(const widechar *word, int wordSize, char *hyphens, |
1440 | 0 | const TranslationTableHeader *table) { |
1441 | 0 | widechar *prepWord; |
1442 | 0 | int i, k, limit; |
1443 | 0 | int stateNum; |
1444 | 0 | widechar ch; |
1445 | 0 | HyphenationState *statesArray = |
1446 | 0 | (HyphenationState *)&table->ruleArea[table->hyphenStatesArray]; |
1447 | 0 | HyphenationState *currentState; |
1448 | 0 | HyphenationTrans *transitionsArray; |
1449 | 0 | char *hyphenPattern; |
1450 | 0 | int patternOffset; |
1451 | 0 | if (!table->hyphenStatesArray || (wordSize + 3) > MAXSTRING) return 0; |
1452 | 0 | prepWord = (widechar *)calloc(wordSize + 3, sizeof(widechar)); |
1453 | | /* prepWord is of the format ".hello." |
1454 | | * hyphens is the length of the word "hello" "00000" */ |
1455 | 0 | prepWord[0] = '.'; |
1456 | 0 | for (i = 0; i < wordSize; i++) { |
1457 | 0 | prepWord[i + 1] = toLowercase(table, getChar(word[i], table)); |
1458 | 0 | hyphens[i] = '0'; |
1459 | 0 | } |
1460 | 0 | prepWord[wordSize + 1] = '.'; |
1461 | | |
1462 | | /* now, run the finite state machine */ |
1463 | 0 | stateNum = 0; |
1464 | | |
1465 | | // we need to walk all of ".hello." |
1466 | 0 | for (i = 0; i < wordSize + 2; i++) { |
1467 | 0 | ch = prepWord[i]; |
1468 | 0 | while (1) { |
1469 | 0 | if (stateNum == 0xffff) { |
1470 | 0 | stateNum = 0; |
1471 | 0 | goto nextLetter; |
1472 | 0 | } |
1473 | 0 | currentState = &statesArray[stateNum]; |
1474 | 0 | if (currentState->trans.offset) { |
1475 | 0 | transitionsArray = |
1476 | 0 | (HyphenationTrans *)&table->ruleArea[currentState->trans.offset]; |
1477 | 0 | for (k = 0; k < currentState->numTrans; k++) { |
1478 | 0 | if (transitionsArray[k].ch == ch) { |
1479 | 0 | stateNum = transitionsArray[k].newState; |
1480 | 0 | goto stateFound; |
1481 | 0 | } |
1482 | 0 | } |
1483 | 0 | } |
1484 | 0 | stateNum = currentState->fallbackState; |
1485 | 0 | } |
1486 | 0 | stateFound: |
1487 | 0 | currentState = &statesArray[stateNum]; |
1488 | 0 | if (currentState->hyphenPattern) { |
1489 | 0 | hyphenPattern = (char *)&table->ruleArea[currentState->hyphenPattern]; |
1490 | 0 | patternOffset = i + 1 - (int)strlen(hyphenPattern); |
1491 | | |
1492 | | /* Need to ensure that we don't overrun hyphens, |
1493 | | * in some cases hyphenPattern is longer than the remaining letters, |
1494 | | * and if we write out all of it we would have overshot our buffer. */ |
1495 | 0 | limit = MIN((int)strlen(hyphenPattern), wordSize - patternOffset); |
1496 | 0 | for (k = 0; k < limit; k++) { |
1497 | 0 | if (hyphens[patternOffset + k] < hyphenPattern[k]) |
1498 | 0 | hyphens[patternOffset + k] = hyphenPattern[k]; |
1499 | 0 | } |
1500 | 0 | } |
1501 | 0 | nextLetter:; |
1502 | 0 | } |
1503 | 0 | hyphens[wordSize] = 0; |
1504 | 0 | free(prepWord); |
1505 | 0 | return 1; |
1506 | 0 | } |
1507 | | |
1508 | | static int |
1509 | | doCompTrans(int start, int end, const TranslationTableHeader *table, int *pos, |
1510 | | const InString *input, OutString *output, int *posMapping, |
1511 | | EmphasisInfo *emphasisBuffer, const TranslationTableRule **transRule, |
1512 | | int *cursorPosition, int *cursorStatus, int mode); |
1513 | | |
1514 | | // The `shift' argument should be used with care because it can mess up the positions |
1515 | | // array which is supposed to be monotonically increasing. It is set to -1 in order to |
1516 | | // append certain indicators (endemphword, endemph, endemphphrase after, endcapsword, |
1517 | | // endcaps, endcapsphrase after) to the preceding character. |
1518 | | static int |
1519 | | for_updatePositions(const widechar *outChars, int inLength, int outLength, int shift, |
1520 | | int pos, const InString *input, OutString *output, int *posMapping, |
1521 | 0 | int *cursorPosition, int *cursorStatus) { |
1522 | 0 | int k; |
1523 | 0 | if ((output->length + outLength) > output->maxlength || |
1524 | 0 | (pos + inLength) > input->length) |
1525 | 0 | return 0; |
1526 | 0 | memcpy(&output->chars[output->length], outChars, outLength * CHARSIZE); |
1527 | 0 | if (!*cursorStatus) { |
1528 | 0 | if (*cursorPosition >= pos && *cursorPosition < (pos + inLength)) { |
1529 | 0 | *cursorPosition = output->length; |
1530 | 0 | *cursorStatus = 1; |
1531 | 0 | } else if (input->chars[*cursorPosition] == 0 && |
1532 | 0 | *cursorPosition == (pos + inLength)) { |
1533 | 0 | *cursorPosition = output->length + outLength / 2 + 1; |
1534 | 0 | *cursorStatus = 1; |
1535 | 0 | } |
1536 | 0 | } else if (*cursorStatus == 2 && *cursorPosition == pos) |
1537 | 0 | *cursorPosition = output->length; |
1538 | 0 | for (k = 0; k < outLength; k++) posMapping[output->length + k] = pos + shift; |
1539 | 0 | output->length += outLength; |
1540 | 0 | return 1; |
1541 | 0 | } |
1542 | | |
1543 | | static int |
1544 | | syllableBreak(const TranslationTableHeader *table, int pos, const InString *input, |
1545 | 0 | int transCharslen) { |
1546 | 0 | int wordStart = 0; |
1547 | 0 | int wordEnd = 0; |
1548 | 0 | int wordSize = 0; |
1549 | 0 | int k = 0; |
1550 | 0 | char *hyphens = NULL; |
1551 | 0 | for (wordStart = pos; wordStart >= 0; wordStart--) |
1552 | 0 | if (!((getChar(input->chars[wordStart], table))->attributes & CTC_Letter)) { |
1553 | 0 | wordStart++; |
1554 | 0 | break; |
1555 | 0 | } |
1556 | 0 | if (wordStart < 0) wordStart = 0; |
1557 | 0 | for (wordEnd = pos; wordEnd < input->length; wordEnd++) |
1558 | 0 | if (!((getChar(input->chars[wordEnd], table))->attributes & CTC_Letter)) { |
1559 | 0 | wordEnd--; |
1560 | 0 | break; |
1561 | 0 | } |
1562 | 0 | if (wordEnd == input->length) wordEnd--; |
1563 | | /* At this stage wordStart is the 0 based index of the first letter in the word, |
1564 | | * wordEnd is the 0 based index of the last letter in the word. |
1565 | | * example: "hello" wordstart=0, wordEnd=4. */ |
1566 | 0 | wordSize = wordEnd - wordStart + 1; |
1567 | 0 | hyphens = (char *)calloc(wordSize + 1, sizeof(char)); |
1568 | 0 | if (!hyphenateWord(&input->chars[wordStart], wordSize, hyphens, table)) { |
1569 | 0 | free(hyphens); |
1570 | 0 | return 0; |
1571 | 0 | } |
1572 | 0 | for (k = pos - wordStart + 1; k < (pos - wordStart + transCharslen); k++) |
1573 | 0 | if (hyphens[k] & 1) { |
1574 | 0 | free(hyphens); |
1575 | 0 | return 1; |
1576 | 0 | } |
1577 | 0 | free(hyphens); |
1578 | 0 | return 0; |
1579 | 0 | } |
1580 | | |
1581 | | static void |
1582 | | setBefore(const TranslationTableHeader *table, int pos, const InString *input, |
1583 | 0 | TranslationTableCharacterAttributes *beforeAttributes) { |
1584 | 0 | widechar before; |
1585 | 0 | if (pos >= 2 && input->chars[pos - 1] == LOU_ENDSEGMENT) |
1586 | 0 | before = input->chars[pos - 2]; |
1587 | 0 | else |
1588 | 0 | before = (pos == 0) ? ' ' : input->chars[pos - 1]; |
1589 | 0 | *beforeAttributes = (getChar(before, table))->attributes; |
1590 | 0 | } |
1591 | | |
1592 | | static void |
1593 | | setAfter(int length, const TranslationTableHeader *table, int pos, const InString *input, |
1594 | 0 | TranslationTableCharacterAttributes *afterAttributes) { |
1595 | 0 | widechar after; |
1596 | 0 | if ((pos + length + 2) < input->length && input->chars[pos + 1] == LOU_ENDSEGMENT) |
1597 | 0 | after = input->chars[pos + 2]; |
1598 | 0 | else |
1599 | 0 | after = (pos + length < input->length) ? input->chars[pos + length] : ' '; |
1600 | 0 | *afterAttributes = (getChar(after, table))->attributes; |
1601 | 0 | } |
1602 | | |
1603 | | static int |
1604 | | brailleIndicatorDefined(TranslationTableOffset offset, |
1605 | 0 | const TranslationTableHeader *table, const TranslationTableRule **indicRule) { |
1606 | 0 | if (!offset) return 0; |
1607 | 0 | *indicRule = (TranslationTableRule *)&table->ruleArea[offset]; |
1608 | 0 | return 1; |
1609 | 0 | } |
1610 | | |
1611 | | /** |
1612 | | * Return 1 if both `indicator1` and `indicator2` are defined and use the same dot |
1613 | | * pattern. Otherwise return 0. |
1614 | | */ |
1615 | | static int |
1616 | | isIndicatorEqual(TranslationTableOffset indicator1, TranslationTableOffset indicator2, |
1617 | 0 | const TranslationTableHeader *table) { |
1618 | 0 | const TranslationTableRule *indicatorRule1; |
1619 | 0 | const TranslationTableRule *indicatorRule2; |
1620 | |
|
1621 | 0 | if (brailleIndicatorDefined(indicator1, table, &indicatorRule1) && |
1622 | 0 | brailleIndicatorDefined(indicator2, table, &indicatorRule2) && |
1623 | 0 | indicatorRule1->dotslen == indicatorRule2->dotslen && |
1624 | 0 | !memcmp(&indicatorRule1->charsdots[0], &indicatorRule2->charsdots[0], |
1625 | 0 | indicatorRule1->dotslen * CHARSIZE)) { |
1626 | 0 | return 1; |
1627 | 0 | } else { |
1628 | 0 | return 0; |
1629 | 0 | } |
1630 | 0 | } |
1631 | | |
1632 | | static int |
1633 | 0 | capsletterDefined(const TranslationTableHeader *table) { |
1634 | 0 | return table->emphRules[MAX_EMPH_CLASSES][letterOffset]; |
1635 | 0 | } |
1636 | | |
1637 | | static int |
1638 | | validMatch(const TranslationTableHeader *table, int pos, const InString *input, |
1639 | 0 | formtype *typebuf, const TranslationTableRule *transRule, int transCharslen) { |
1640 | | /* Analyze the typeform parameter and also check for capitalization */ |
1641 | 0 | TranslationTableCharacter *inputChar; |
1642 | 0 | TranslationTableCharacter *ruleChar; |
1643 | 0 | TranslationTableCharacterAttributes prevAttr = 0; |
1644 | 0 | int k; |
1645 | 0 | int kk = 0; |
1646 | 0 | if (!transCharslen) return 0; |
1647 | 0 | for (k = pos; k < pos + transCharslen; k++) { |
1648 | 0 | if (input->chars[k] == LOU_ENDSEGMENT) { |
1649 | 0 | if (k == pos && transCharslen == 1) |
1650 | 0 | return 1; |
1651 | 0 | else |
1652 | 0 | return 0; |
1653 | 0 | } |
1654 | 0 | inputChar = getChar(input->chars[k], table); |
1655 | 0 | if (k == pos) prevAttr = inputChar->attributes; |
1656 | 0 | ruleChar = getChar(transRule->charsdots[kk++], table); |
1657 | 0 | if (toLowercase(table, inputChar) != toLowercase(table, ruleChar)) return 0; |
1658 | 0 | if (typebuf != NULL && (typebuf[pos] & CAPSEMPH) == 0 && |
1659 | 0 | (typebuf[k] | typebuf[pos]) != typebuf[pos]) |
1660 | 0 | return 0; |
1661 | 0 | if (inputChar->attributes != CTC_Letter) { |
1662 | 0 | if (k != (pos + 1) && (prevAttr & CTC_Letter) && |
1663 | 0 | (inputChar->attributes & CTC_Letter) && |
1664 | 0 | ((inputChar->attributes & |
1665 | 0 | (CTC_LowerCase | CTC_UpperCase | CTC_Letter)) != |
1666 | 0 | (prevAttr & (CTC_LowerCase | CTC_UpperCase | CTC_Letter)))) |
1667 | 0 | return 0; |
1668 | 0 | } |
1669 | 0 | prevAttr = inputChar->attributes; |
1670 | 0 | } |
1671 | 0 | return 1; |
1672 | 0 | } |
1673 | | |
1674 | | static int |
1675 | | insertNumberSign(const TranslationTableHeader *table, int pos, const InString *input, |
1676 | | OutString *output, int *posMapping, int prevTransOpcode, int *cursorPosition, |
1677 | 0 | int *cursorStatus, TranslationTableCharacterAttributes beforeAttributes) { |
1678 | 0 | const TranslationTableRule *numberSign; |
1679 | 0 | if (brailleIndicatorDefined(table->numberSign, table, &numberSign) && |
1680 | 0 | checkCharAttr_safe(input, pos, CTC_Digit, table) && |
1681 | 0 | (prevTransOpcode == CTO_ExactDots || |
1682 | 0 | (!(beforeAttributes & CTC_Digit) && prevTransOpcode != CTO_MidNum))) { |
1683 | 0 | if (!for_updatePositions(&numberSign->charsdots[0], 0, numberSign->dotslen, 0, |
1684 | 0 | pos, input, output, posMapping, cursorPosition, cursorStatus)) |
1685 | 0 | return 0; |
1686 | 0 | } |
1687 | 0 | return 1; |
1688 | 0 | } |
1689 | | |
1690 | | static int |
1691 | 0 | isNoLetsign(widechar c, const TranslationTableHeader *table) { |
1692 | 0 | for (int k = 0; k < table->noLetsignCount; k++) |
1693 | 0 | if (c == table->noLetsign[k]) return 1; |
1694 | 0 | return 0; |
1695 | 0 | } |
1696 | | |
1697 | | static int |
1698 | 0 | isNoLetsignBefore(widechar c, const TranslationTableHeader *table) { |
1699 | 0 | for (int k = 0; k < table->noLetsignBeforeCount; k++) |
1700 | 0 | if (c == table->noLetsignBefore[k]) return 1; |
1701 | 0 | return 0; |
1702 | 0 | } |
1703 | | |
1704 | | static int |
1705 | 0 | isNoLetsignAfter(widechar c, const TranslationTableHeader *table) { |
1706 | 0 | for (int k = 0; k < table->noLetsignAfterCount; k++) |
1707 | 0 | if (c == table->noLetsignAfter[k]) return 1; |
1708 | 0 | return 0; |
1709 | 0 | } |
1710 | | |
1711 | | static int |
1712 | | insertLetterSign(const TranslationTableHeader *table, int pos, const InString *input, |
1713 | | OutString *output, int *posMapping, int transOpcode, int *cursorPosition, |
1714 | 0 | int *cursorStatus, TranslationTableCharacterAttributes beforeAttributes) { |
1715 | 0 | const TranslationTableRule *letterSign; |
1716 | 0 | if (brailleIndicatorDefined(table->letterSign, table, &letterSign)) { |
1717 | 0 | if (transOpcode == CTO_Contraction) { |
1718 | 0 | if (!for_updatePositions(&letterSign->charsdots[0], 0, letterSign->dotslen, 0, |
1719 | 0 | pos, input, output, posMapping, cursorPosition, cursorStatus)) |
1720 | 0 | return 0; |
1721 | 0 | } else if ((checkCharAttr_safe(input, pos, CTC_Letter, table) && |
1722 | 0 | !(beforeAttributes & CTC_Letter)) && |
1723 | 0 | (!checkCharAttr_safe(input, pos + 1, CTC_Letter, table) || |
1724 | 0 | (beforeAttributes & CTC_Digit))) { |
1725 | 0 | if (pos > 0 && isNoLetsignBefore(input->chars[pos - 1], table)) return 1; |
1726 | 0 | if (isNoLetsign(input->chars[pos], table)) return 1; |
1727 | 0 | if (pos + 1 < input->length && isNoLetsignAfter(input->chars[pos + 1], table)) |
1728 | 0 | return 1; |
1729 | 0 | if (!for_updatePositions(&letterSign->charsdots[0], 0, letterSign->dotslen, 0, |
1730 | 0 | pos, input, output, posMapping, cursorPosition, cursorStatus)) |
1731 | 0 | return 0; |
1732 | 0 | } |
1733 | 0 | } |
1734 | 0 | return 1; |
1735 | 0 | } |
1736 | | |
1737 | | static int |
1738 | | onlyLettersBehind(const TranslationTableHeader *table, int pos, const InString *input, |
1739 | 0 | TranslationTableCharacterAttributes beforeAttributes) { |
1740 | | /* Actually, spaces, then letters */ |
1741 | 0 | int k; |
1742 | 0 | if (!(beforeAttributes & CTC_Space)) return 0; |
1743 | 0 | for (k = pos - 2; k >= 0; k--) { |
1744 | 0 | TranslationTableCharacterAttributes attr = |
1745 | 0 | (getChar(input->chars[k], table))->attributes; |
1746 | 0 | if ((attr & CTC_Space)) continue; |
1747 | 0 | if ((attr & CTC_Letter)) |
1748 | 0 | return 1; |
1749 | 0 | else |
1750 | 0 | return 0; |
1751 | 0 | } |
1752 | 0 | return 1; |
1753 | 0 | } |
1754 | | |
1755 | | static int |
1756 | | onlyLettersAhead(const TranslationTableHeader *table, int pos, const InString *input, |
1757 | 0 | int transCharslen, TranslationTableCharacterAttributes afterAttributes) { |
1758 | | /* Actullly, spaces, then letters */ |
1759 | 0 | int k; |
1760 | 0 | if (!(afterAttributes & CTC_Space)) return 0; |
1761 | 0 | for (k = pos + transCharslen + 1; k < input->length; k++) { |
1762 | 0 | TranslationTableCharacterAttributes attr = |
1763 | 0 | (getChar(input->chars[k], table))->attributes; |
1764 | 0 | if ((attr & CTC_Space)) continue; |
1765 | 0 | if ((attr & (CTC_Letter | CTC_LitDigit))) |
1766 | 0 | return 1; |
1767 | 0 | else |
1768 | 0 | return 0; |
1769 | 0 | } |
1770 | 0 | return 0; |
1771 | 0 | } |
1772 | | |
1773 | | static int |
1774 | | noCompbrlAhead(const TranslationTableHeader *table, int pos, int mode, |
1775 | 0 | const InString *input, int transOpcode, int transCharslen, int cursorPosition) { |
1776 | 0 | int start = pos + transCharslen; |
1777 | 0 | int end; |
1778 | 0 | int p; |
1779 | 0 | if (start >= input->length) return 1; |
1780 | 0 | while (start < input->length && checkCharAttr(input->chars[start], CTC_Space, table)) |
1781 | 0 | start++; |
1782 | 0 | if (start == input->length || |
1783 | 0 | (transOpcode == CTO_JoinableWord && |
1784 | 0 | (!checkCharAttr(input->chars[start], CTC_Letter | CTC_Digit, table) || |
1785 | 0 | !checkCharAttr(input->chars[start - 1], CTC_Space, table)))) |
1786 | 0 | return 1; |
1787 | 0 | end = start; |
1788 | 0 | while (end < input->length && !checkCharAttr(input->chars[end], CTC_Space, table)) |
1789 | 0 | end++; |
1790 | 0 | if ((mode & (compbrlAtCursor | compbrlLeftCursor)) && cursorPosition >= start && |
1791 | 0 | cursorPosition < end) |
1792 | 0 | return 0; |
1793 | | /* Look ahead for rules with CTO_CompBrl */ |
1794 | 0 | for (p = start; p < end; p++) { |
1795 | 0 | int length = input->length - p; |
1796 | 0 | int tryThis; |
1797 | 0 | int k; |
1798 | 0 | for (tryThis = 0; tryThis < 2; tryThis++) { |
1799 | 0 | TranslationTableOffset ruleOffset = 0; |
1800 | 0 | TranslationTableRule *testRule; |
1801 | 0 | switch (tryThis) { |
1802 | 0 | case 0: |
1803 | 0 | if (!(length >= 2)) break; |
1804 | 0 | ruleOffset = table->forRules[_lou_stringHash(&input->chars[p], 1, table)]; |
1805 | 0 | break; |
1806 | 0 | case 1: |
1807 | 0 | if (!(length >= 1)) break; |
1808 | 0 | length = 1; |
1809 | 0 | ruleOffset = getChar(input->chars[p], table)->otherRules; |
1810 | 0 | break; |
1811 | 0 | } |
1812 | 0 | while (ruleOffset) { |
1813 | 0 | const TranslationTableCharacter *character1; |
1814 | 0 | const TranslationTableCharacter *character2; |
1815 | 0 | testRule = (TranslationTableRule *)&table->ruleArea[ruleOffset]; |
1816 | 0 | for (k = 0; k < testRule->charslen && k < length; k++) { |
1817 | 0 | character1 = getChar(testRule->charsdots[k], table); |
1818 | 0 | character2 = getChar(input->chars[p + k], table); |
1819 | 0 | if (toLowercase(table, character1) != toLowercase(table, character2)) |
1820 | 0 | break; |
1821 | 0 | } |
1822 | 0 | if (tryThis == 1 || k == testRule->charslen) { |
1823 | 0 | if (testRule->opcode == CTO_CompBrl || |
1824 | 0 | testRule->opcode == CTO_Literal) |
1825 | 0 | return 0; |
1826 | 0 | } |
1827 | 0 | ruleOffset = testRule->charsnext; |
1828 | 0 | } |
1829 | 0 | } |
1830 | 0 | } |
1831 | 0 | return 1; |
1832 | 0 | } |
1833 | | |
1834 | | static int |
1835 | 0 | checkEmphasisChange(int pos, int len, const EmphasisInfo *emphasisBuffer) { |
1836 | 0 | int i; |
1837 | 0 | for (i = pos + 1; i < pos + len; i++) |
1838 | 0 | if (emphasisBuffer[i].begin || emphasisBuffer[i].end || emphasisBuffer[i].word || |
1839 | 0 | emphasisBuffer[i].symbol) |
1840 | 0 | return 1; |
1841 | 0 | return 0; |
1842 | 0 | } |
1843 | | |
1844 | | static int |
1845 | | isRepeatedWord(const TranslationTableHeader *table, int pos, const InString *input, |
1846 | | const EmphasisInfo *emphasisBuffer, int outputLength, const int *posMapping, |
1847 | 0 | int transCharslen, int *repwordLength) { |
1848 | | /* transCharslen is the length of the character sequence that separates the repeated |
1849 | | * parts */ |
1850 | 0 | int len; |
1851 | | /* maximum length that the repeated part can have is determined by how many letters |
1852 | | * there are before and after the separator */ |
1853 | 0 | for (len = 1; pos - len >= 0 && pos + transCharslen + len - 1 < input->length && |
1854 | 0 | checkCharAttr(input->chars[pos - len], CTC_Letter, table) && |
1855 | 0 | checkCharAttr(input->chars[pos + transCharslen + len - 1], CTC_Letter, table); |
1856 | 0 | len++) |
1857 | 0 | ; |
1858 | 0 | len--; |
1859 | | /* now actually compare the parts, starting with the maximal length and making them |
1860 | | * shorter if they don't match */ |
1861 | 0 | while (len > 0) { |
1862 | 0 | int start = pos - len; |
1863 | 0 | if (compareChars(&input->chars[start], &input->chars[pos + transCharslen], len, |
1864 | 0 | table)) { |
1865 | | /* part must not start within a contraction */ |
1866 | 0 | for (int k = outputLength - 1; k >= 0; k--) |
1867 | 0 | if (posMapping[k] == start) |
1868 | 0 | break; |
1869 | 0 | else if (posMapping[k] < start) |
1870 | 0 | return 0; |
1871 | | /* capitalisation and emphasis may not change except at the beginning of the |
1872 | | * parts */ |
1873 | 0 | if (checkEmphasisChange(start, len + transCharslen, emphasisBuffer) || |
1874 | 0 | checkEmphasisChange(pos + transCharslen, len, emphasisBuffer)) |
1875 | 0 | return 0; |
1876 | 0 | *repwordLength = len; |
1877 | 0 | return 1; |
1878 | 0 | } |
1879 | 0 | len--; |
1880 | 0 | } |
1881 | 0 | return 0; |
1882 | 0 | } |
1883 | | |
1884 | | static int |
1885 | | inSequence(const TranslationTableHeader *table, int pos, const InString *input, |
1886 | 0 | const TranslationTableRule *transRule) { |
1887 | 0 | int i, j, s, match; |
1888 | | // TODO: all caps words |
1889 | | // const TranslationTableCharacter *c = NULL; |
1890 | | |
1891 | | /* check before sequence */ |
1892 | 0 | for (i = pos - 1; i >= 0; i--) { |
1893 | 0 | if (checkCharAttr(input->chars[i], CTC_SeqBefore, table)) continue; |
1894 | 0 | if (!(checkCharAttr(input->chars[i], CTC_Space | CTC_SeqDelimiter, table))) |
1895 | 0 | return 0; |
1896 | 0 | break; |
1897 | 0 | } |
1898 | | |
1899 | | /* check after sequence */ |
1900 | 0 | for (i = pos + transRule->charslen; i < input->length; i++) { |
1901 | | /* check sequence after patterns */ |
1902 | 0 | if (table->seqPatternsCount) { |
1903 | 0 | match = 0; |
1904 | 0 | for (j = i, s = 0; j <= input->length && s < table->seqPatternsCount; |
1905 | 0 | j++, s++) { |
1906 | | /* matching */ |
1907 | 0 | if (match == 1) { |
1908 | 0 | if (table->seqPatterns[s]) { |
1909 | 0 | if (input->chars[j] == table->seqPatterns[s]) |
1910 | 0 | match = 1; |
1911 | 0 | else { |
1912 | 0 | match = -1; |
1913 | 0 | j = i - 1; |
1914 | 0 | } |
1915 | 0 | } |
1916 | | |
1917 | | /* found match */ |
1918 | 0 | else { |
1919 | | /* pattern at end of input */ |
1920 | 0 | if (j >= input->length) return 1; |
1921 | | |
1922 | 0 | i = j; |
1923 | 0 | break; |
1924 | 0 | } |
1925 | 0 | } |
1926 | | |
1927 | | /* looking for match */ |
1928 | 0 | else if (match == 0) { |
1929 | 0 | if (table->seqPatterns[s]) { |
1930 | 0 | if (input->chars[j] == table->seqPatterns[s]) |
1931 | 0 | match = 1; |
1932 | 0 | else { |
1933 | 0 | match = -1; |
1934 | 0 | j = i - 1; |
1935 | 0 | } |
1936 | 0 | } |
1937 | 0 | } |
1938 | | |
1939 | | /* next pattarn */ |
1940 | 0 | else if (match == -1) { |
1941 | 0 | if (!table->seqPatterns[s]) { |
1942 | 0 | match = 0; |
1943 | 0 | j = i - 1; |
1944 | 0 | } |
1945 | 0 | } |
1946 | 0 | } |
1947 | 0 | } |
1948 | | |
1949 | 0 | if (checkCharAttr(input->chars[i], CTC_SeqAfter, table)) continue; |
1950 | 0 | if (!(checkCharAttr(input->chars[i], CTC_Space | CTC_SeqDelimiter, table))) |
1951 | 0 | return 0; |
1952 | 0 | break; |
1953 | 0 | } |
1954 | | |
1955 | 0 | return 1; |
1956 | 0 | } |
1957 | | |
1958 | | static void |
1959 | | for_selectRule(const TranslationTableHeader *table, int pos, OutString output, |
1960 | | const int *posMapping, int mode, const InString *input, formtype *typebuf, |
1961 | | EmphasisInfo *emphasisBuffer, int *transOpcode, int prevTransOpcode, |
1962 | | const TranslationTableRule **transRule, int *transCharslen, int *passCharDots, |
1963 | | widechar const **passInstructions, int *passIC, PassRuleMatch *patternMatch, |
1964 | | int posIncremented, int cursorPosition, int *repwordLength, int dontContract, |
1965 | | int compbrlStart, int compbrlEnd, |
1966 | | TranslationTableCharacterAttributes beforeAttributes, |
1967 | | TranslationTableCharacter **curCharDef, TranslationTableRule **groupingRule, |
1968 | 0 | widechar *groupingOp) { |
1969 | | /* check for valid Translations. Return value is in transRule. */ |
1970 | 0 | static TranslationTableRule pseudoRule = { 0 }; |
1971 | 0 | int length = ((pos < compbrlStart) ? compbrlStart : input->length) - pos; |
1972 | 0 | int tryThis; |
1973 | 0 | int k; |
1974 | 0 | TranslationTableOffset ruleOffset = 0; |
1975 | 0 | *curCharDef = getChar(input->chars[pos], table); |
1976 | 0 | for (tryThis = 0; tryThis < 3; tryThis++) { |
1977 | 0 | switch (tryThis) { |
1978 | 0 | case 0: |
1979 | 0 | if (!(length >= 2)) break; |
1980 | 0 | ruleOffset = table->forRules[_lou_stringHash(&input->chars[pos], 1, table)]; |
1981 | 0 | break; |
1982 | 0 | case 1: |
1983 | 0 | if (!(length >= 1)) break; |
1984 | 0 | length = 1; |
1985 | 0 | ruleOffset = (*curCharDef)->otherRules; |
1986 | 0 | break; |
1987 | 0 | case 2: /* No rule found */ |
1988 | 0 | *transRule = &pseudoRule; |
1989 | 0 | *transOpcode = pseudoRule.opcode = CTO_None; |
1990 | 0 | *transCharslen = pseudoRule.charslen = 1; |
1991 | 0 | pseudoRule.charsdots[0] = input->chars[pos]; |
1992 | 0 | pseudoRule.dotslen = 0; |
1993 | 0 | return; |
1994 | 0 | } |
1995 | 0 | while (ruleOffset) { |
1996 | 0 | *transRule = (TranslationTableRule *)&table->ruleArea[ruleOffset]; |
1997 | 0 | *transOpcode = (*transRule)->opcode; |
1998 | 0 | *transCharslen = (*transRule)->charslen; |
1999 | 0 | if (tryThis == 1 || |
2000 | 0 | ((*transCharslen <= length) && |
2001 | 0 | validMatch(table, pos, input, typebuf, *transRule, |
2002 | 0 | *transCharslen))) { |
2003 | 0 | TranslationTableCharacterAttributes afterAttributes; |
2004 | | /* check before emphasis match */ |
2005 | 0 | if ((*transRule)->before & CTC_EmpMatch) { |
2006 | 0 | if (emphasisBuffer[pos].begin || emphasisBuffer[pos].end || |
2007 | 0 | emphasisBuffer[pos].word || emphasisBuffer[pos].symbol) |
2008 | 0 | break; |
2009 | 0 | } |
2010 | | |
2011 | | /* check after emphasis match */ |
2012 | 0 | if ((*transRule)->after & CTC_EmpMatch) { |
2013 | 0 | if (emphasisBuffer[pos + *transCharslen].begin || |
2014 | 0 | emphasisBuffer[pos + *transCharslen].end || |
2015 | 0 | emphasisBuffer[pos + *transCharslen].word || |
2016 | 0 | emphasisBuffer[pos + *transCharslen].symbol) |
2017 | 0 | break; |
2018 | 0 | } |
2019 | | |
2020 | | /* check this rule */ |
2021 | 0 | setAfter(*transCharslen, table, pos, input, &afterAttributes); |
2022 | 0 | if ((!((*transRule)->after & ~CTC_EmpMatch) || |
2023 | 0 | (beforeAttributes & (*transRule)->after)) && |
2024 | 0 | (!((*transRule)->before & ~CTC_EmpMatch) || |
2025 | 0 | (afterAttributes & (*transRule)->before))) |
2026 | | /* check nocross */ |
2027 | 0 | if (!((*transRule)->nocross && |
2028 | 0 | syllableBreak(table, pos, input, *transCharslen))) { |
2029 | 0 | switch (*transOpcode) { /* check validity of this Translation */ |
2030 | 0 | case CTO_Space: |
2031 | 0 | case CTO_Letter: |
2032 | 0 | case CTO_UpperCase: |
2033 | 0 | case CTO_LowerCase: |
2034 | 0 | case CTO_Digit: |
2035 | 0 | case CTO_LitDigit: |
2036 | 0 | case CTO_Punctuation: |
2037 | 0 | case CTO_Math: |
2038 | 0 | case CTO_Sign: |
2039 | 0 | case CTO_Hyphen: |
2040 | 0 | case CTO_Replace: |
2041 | 0 | case CTO_CompBrl: |
2042 | 0 | case CTO_Literal: |
2043 | 0 | return; |
2044 | 0 | case CTO_Repeated: |
2045 | 0 | if (dontContract || (mode & noContractions)) break; |
2046 | 0 | if ((mode & (compbrlAtCursor | compbrlLeftCursor)) && |
2047 | 0 | pos >= compbrlStart && pos <= compbrlEnd) |
2048 | 0 | break; |
2049 | 0 | return; |
2050 | 0 | case CTO_RepWord: |
2051 | 0 | case CTO_RepEndWord: |
2052 | 0 | if (dontContract || (mode & noContractions)) break; |
2053 | 0 | if (isRepeatedWord(table, pos, input, emphasisBuffer, |
2054 | 0 | output.length, posMapping, *transCharslen, |
2055 | 0 | repwordLength)) { |
2056 | 0 | if ((pos > *repwordLength && |
2057 | 0 | checkCharAttr(input->chars[pos - |
2058 | 0 | *repwordLength - 1], |
2059 | 0 | CTC_Letter, table)) == |
2060 | 0 | (*transOpcode == CTO_RepEndWord)) { |
2061 | 0 | return; |
2062 | 0 | } |
2063 | 0 | } |
2064 | 0 | break; |
2065 | 0 | case CTO_NoCont: |
2066 | 0 | if (dontContract || (mode & noContractions)) break; |
2067 | 0 | return; |
2068 | 0 | case CTO_Syllable: |
2069 | 0 | *transOpcode = CTO_Always; |
2070 | 0 | case CTO_Always: |
2071 | 0 | if (checkEmphasisChange(pos, *transCharslen, emphasisBuffer)) |
2072 | 0 | break; |
2073 | 0 | if (dontContract || (mode & noContractions)) break; |
2074 | 0 | return; |
2075 | 0 | case CTO_ExactDots: |
2076 | 0 | return; |
2077 | 0 | case CTO_Context: |
2078 | | // check posIncremented to avoid endless loop |
2079 | 0 | if (!posIncremented || |
2080 | 0 | !passDoTest(table, pos, input, *transOpcode, |
2081 | 0 | *transRule, passCharDots, passInstructions, |
2082 | 0 | passIC, patternMatch, groupingRule, |
2083 | 0 | groupingOp)) |
2084 | 0 | break; |
2085 | 0 | return; |
2086 | 0 | case CTO_LargeSign: |
2087 | 0 | if (dontContract || (mode & noContractions)) break; |
2088 | 0 | if (!((beforeAttributes & (CTC_Space | CTC_Punctuation)) || |
2089 | 0 | onlyLettersBehind( |
2090 | 0 | table, pos, input, beforeAttributes)) || |
2091 | 0 | !((afterAttributes & CTC_Space) || |
2092 | 0 | prevTransOpcode == CTO_LargeSign) || |
2093 | 0 | (afterAttributes & CTC_Letter) || |
2094 | 0 | !noCompbrlAhead(table, pos, mode, input, *transOpcode, |
2095 | 0 | *transCharslen, cursorPosition)) |
2096 | 0 | *transOpcode = CTO_Always; |
2097 | 0 | return; |
2098 | 0 | case CTO_WholeWord: |
2099 | 0 | if (dontContract || (mode & noContractions)) break; |
2100 | 0 | if (checkEmphasisChange(pos, *transCharslen, emphasisBuffer)) |
2101 | 0 | break; |
2102 | 0 | case CTO_Contraction: |
2103 | 0 | if (table->usesSequences) { |
2104 | 0 | if (inSequence(table, pos, input, *transRule)) return; |
2105 | 0 | } else { |
2106 | 0 | if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) && |
2107 | 0 | (afterAttributes & (CTC_Space | CTC_Punctuation))) |
2108 | 0 | return; |
2109 | 0 | } |
2110 | 0 | break; |
2111 | 0 | case CTO_PartWord: |
2112 | 0 | if (dontContract || (mode & noContractions)) break; |
2113 | 0 | if ((beforeAttributes & CTC_Letter) || |
2114 | 0 | (afterAttributes & CTC_Letter)) |
2115 | 0 | return; |
2116 | 0 | break; |
2117 | 0 | case CTO_JoinNum: |
2118 | 0 | if (dontContract || (mode & noContractions)) break; |
2119 | 0 | if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) && |
2120 | 0 | (afterAttributes & CTC_Space) && |
2121 | 0 | (output.length + (*transRule)->dotslen < |
2122 | 0 | output.maxlength)) { |
2123 | 0 | int p = pos + *transCharslen + 1; |
2124 | 0 | while (p < input->length) { |
2125 | 0 | if (!checkCharAttr( |
2126 | 0 | input->chars[p], CTC_Space, table)) { |
2127 | 0 | if (checkCharAttr( |
2128 | 0 | input->chars[p], CTC_Digit, table)) |
2129 | 0 | return; |
2130 | 0 | break; |
2131 | 0 | } |
2132 | 0 | p++; |
2133 | 0 | } |
2134 | 0 | } |
2135 | 0 | break; |
2136 | 0 | case CTO_LowWord: |
2137 | 0 | if (dontContract || (mode & noContractions)) break; |
2138 | 0 | if ((beforeAttributes & CTC_Space) && |
2139 | 0 | (afterAttributes & CTC_Space) && |
2140 | 0 | (prevTransOpcode != CTO_JoinableWord)) |
2141 | 0 | return; |
2142 | 0 | break; |
2143 | 0 | case CTO_JoinableWord: |
2144 | 0 | if (dontContract || (mode & noContractions)) break; |
2145 | 0 | if (beforeAttributes & (CTC_Space | CTC_Punctuation) && |
2146 | 0 | onlyLettersAhead(table, pos, input, *transCharslen, |
2147 | 0 | afterAttributes) && |
2148 | 0 | noCompbrlAhead(table, pos, mode, input, *transOpcode, |
2149 | 0 | *transCharslen, cursorPosition)) |
2150 | 0 | return; |
2151 | 0 | break; |
2152 | 0 | case CTO_SuffixableWord: |
2153 | 0 | if (dontContract || (mode & noContractions)) break; |
2154 | 0 | if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) && |
2155 | 0 | (afterAttributes & |
2156 | 0 | (CTC_Space | CTC_Letter | CTC_Punctuation))) |
2157 | 0 | return; |
2158 | 0 | break; |
2159 | 0 | case CTO_PrefixableWord: |
2160 | 0 | if (dontContract || (mode & noContractions)) break; |
2161 | 0 | if ((beforeAttributes & |
2162 | 0 | (CTC_Space | CTC_Letter | CTC_Punctuation)) && |
2163 | 0 | (afterAttributes & (CTC_Space | CTC_Punctuation))) |
2164 | 0 | return; |
2165 | 0 | break; |
2166 | 0 | case CTO_BegWord: |
2167 | 0 | if (dontContract || (mode & noContractions)) break; |
2168 | 0 | if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) && |
2169 | 0 | (afterAttributes & CTC_Letter)) |
2170 | 0 | return; |
2171 | 0 | break; |
2172 | 0 | case CTO_BegMidWord: |
2173 | 0 | if (dontContract || (mode & noContractions)) break; |
2174 | 0 | if ((beforeAttributes & |
2175 | 0 | (CTC_Letter | CTC_Space | CTC_Punctuation)) && |
2176 | 0 | (afterAttributes & CTC_Letter)) |
2177 | 0 | return; |
2178 | 0 | break; |
2179 | 0 | case CTO_MidWord: |
2180 | 0 | if (dontContract || (mode & noContractions)) break; |
2181 | 0 | if (beforeAttributes & CTC_Letter && |
2182 | 0 | afterAttributes & CTC_Letter) |
2183 | 0 | return; |
2184 | 0 | break; |
2185 | 0 | case CTO_MidEndWord: |
2186 | 0 | if (dontContract || (mode & noContractions)) break; |
2187 | 0 | if (beforeAttributes & CTC_Letter && |
2188 | 0 | afterAttributes & |
2189 | 0 | (CTC_Letter | CTC_Space | CTC_Punctuation)) |
2190 | 0 | return; |
2191 | 0 | break; |
2192 | 0 | case CTO_EndWord: |
2193 | 0 | if (dontContract || (mode & noContractions)) break; |
2194 | 0 | if (beforeAttributes & CTC_Letter && |
2195 | 0 | afterAttributes & (CTC_Space | CTC_Punctuation)) |
2196 | 0 | return; |
2197 | 0 | break; |
2198 | 0 | case CTO_BegNum: |
2199 | 0 | if (beforeAttributes & (CTC_Space | CTC_Punctuation) && |
2200 | 0 | afterAttributes & CTC_Digit) |
2201 | 0 | return; |
2202 | 0 | break; |
2203 | 0 | case CTO_MidNum: |
2204 | 0 | if (prevTransOpcode != CTO_ExactDots && |
2205 | 0 | beforeAttributes & CTC_Digit && |
2206 | 0 | afterAttributes & CTC_Digit) |
2207 | 0 | return; |
2208 | 0 | break; |
2209 | 0 | case CTO_EndNum: |
2210 | 0 | if (beforeAttributes & CTC_Digit && |
2211 | 0 | prevTransOpcode != CTO_ExactDots) |
2212 | 0 | return; |
2213 | 0 | break; |
2214 | 0 | case CTO_DecPoint: |
2215 | 0 | if (!(afterAttributes & CTC_Digit)) break; |
2216 | 0 | if (beforeAttributes & CTC_Digit) *transOpcode = CTO_MidNum; |
2217 | 0 | return; |
2218 | 0 | case CTO_PrePunc: |
2219 | 0 | if (!checkCharAttr( |
2220 | 0 | input->chars[pos], CTC_Punctuation, table) || |
2221 | 0 | (pos > 0 && |
2222 | 0 | checkCharAttr(input->chars[pos - 1], |
2223 | 0 | CTC_Letter, table))) |
2224 | 0 | break; |
2225 | 0 | for (k = pos + *transCharslen; k < input->length; k++) { |
2226 | 0 | if (checkCharAttr(input->chars[k], |
2227 | 0 | (CTC_Letter | CTC_Digit), table)) |
2228 | 0 | return; |
2229 | 0 | if (checkCharAttr(input->chars[k], CTC_Space, table)) |
2230 | 0 | break; |
2231 | 0 | } |
2232 | 0 | break; |
2233 | 0 | case CTO_PostPunc: |
2234 | 0 | if (!checkCharAttr( |
2235 | 0 | input->chars[pos], CTC_Punctuation, table) || |
2236 | 0 | (pos < (input->length - 1) && |
2237 | 0 | checkCharAttr(input->chars[pos + 1], |
2238 | 0 | CTC_Letter, table))) |
2239 | 0 | break; |
2240 | 0 | for (k = pos; k >= 0; k--) { |
2241 | 0 | if (checkCharAttr(input->chars[k], |
2242 | 0 | (CTC_Letter | CTC_Digit), table)) |
2243 | 0 | return; |
2244 | 0 | if (checkCharAttr(input->chars[k], CTC_Space, table)) |
2245 | 0 | break; |
2246 | 0 | } |
2247 | 0 | break; |
2248 | | |
2249 | 0 | case CTO_Match: { |
2250 | 0 | widechar *patterns, *pattern; |
2251 | |
|
2252 | 0 | if (dontContract || (mode & noContractions)) break; |
2253 | 0 | if (checkEmphasisChange(pos, *transCharslen, emphasisBuffer)) |
2254 | 0 | break; |
2255 | | |
2256 | 0 | patterns = |
2257 | 0 | (widechar *)&table->ruleArea[(*transRule)->patterns]; |
2258 | | |
2259 | | /* check before pattern */ |
2260 | 0 | pattern = &patterns[1]; |
2261 | 0 | if (!_lou_pattern_check( |
2262 | 0 | input->chars, pos - 1, -1, -1, pattern, table)) |
2263 | 0 | break; |
2264 | | |
2265 | | /* check after pattern */ |
2266 | 0 | pattern = &patterns[patterns[0]]; |
2267 | 0 | if (!_lou_pattern_check(input->chars, |
2268 | 0 | pos + (*transRule)->charslen, input->length, 1, |
2269 | 0 | pattern, table)) |
2270 | 0 | break; |
2271 | | |
2272 | 0 | return; |
2273 | 0 | } |
2274 | | |
2275 | 0 | default: |
2276 | 0 | break; |
2277 | 0 | } |
2278 | 0 | } |
2279 | 0 | } |
2280 | | /* Done with checking this rule */ |
2281 | 0 | ruleOffset = (*transRule)->charsnext; |
2282 | 0 | } |
2283 | 0 | } |
2284 | 0 | } |
2285 | | |
2286 | | static int |
2287 | | undefinedCharacter(widechar c, const TranslationTableHeader *table, int pos, |
2288 | | const InString *input, OutString *output, int *posMapping, int *cursorPosition, |
2289 | 0 | int *cursorStatus, int mode) { |
2290 | | /* Display an undefined character in the output buffer */ |
2291 | 0 | if (table->undefined) { |
2292 | 0 | TranslationTableRule *rule = |
2293 | 0 | (TranslationTableRule *)&table->ruleArea[table->undefined]; |
2294 | |
|
2295 | 0 | return for_updatePositions(&rule->charsdots[rule->charslen], rule->charslen, |
2296 | 0 | rule->dotslen, 0, pos, input, output, posMapping, cursorPosition, |
2297 | 0 | cursorStatus); |
2298 | 0 | } |
2299 | | |
2300 | 0 | const char *text = (mode & noUndefined) ? "" : _lou_showString(&c, 1, 1); |
2301 | 0 | size_t length = strlen(text); |
2302 | 0 | widechar dots[length == 0 ? 1 : length]; |
2303 | |
|
2304 | 0 | for (unsigned int k = 0; k < length; k += 1) { |
2305 | 0 | dots[k] = 0; |
2306 | 0 | TranslationTableOffset offset = getChar(text[k], table)->otherRules; |
2307 | 0 | while (offset) { |
2308 | 0 | const TranslationTableRule *r = |
2309 | 0 | (TranslationTableRule *)&table->ruleArea[offset]; |
2310 | 0 | if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow && r->dotslen == 1) { |
2311 | 0 | dots[k] = r->charsdots[1]; |
2312 | 0 | break; |
2313 | 0 | } |
2314 | 0 | offset = r->charsnext; |
2315 | 0 | } |
2316 | 0 | if (!dots[k]) dots[k] = _lou_charToFallbackDots(text[k]); |
2317 | 0 | } |
2318 | |
|
2319 | 0 | return for_updatePositions(dots, 1, length, 0, pos, input, output, posMapping, |
2320 | 0 | cursorPosition, cursorStatus); |
2321 | 0 | } |
2322 | | |
2323 | | static int |
2324 | | putCharacter(widechar character, const TranslationTableHeader *table, int pos, |
2325 | | const InString *input, OutString *output, int *posMapping, int *cursorPosition, |
2326 | 0 | int *cursorStatus, int mode) { |
2327 | | /* Insert the dots equivalent of a character into the output buffer */ |
2328 | 0 | TranslationTableCharacter *chardef = getChar(character, table); |
2329 | 0 | if (!chardef->definitionRule && chardef->basechar) |
2330 | 0 | chardef = (TranslationTableCharacter *)&table->ruleArea[chardef->basechar]; |
2331 | 0 | if (chardef->definitionRule) { |
2332 | 0 | const TranslationTableRule *rule = |
2333 | 0 | (TranslationTableRule *)&table->ruleArea[chardef->definitionRule]; |
2334 | 0 | return for_updatePositions(&rule->charsdots[1], 1, rule->dotslen, 0, pos, input, |
2335 | 0 | output, posMapping, cursorPosition, cursorStatus); |
2336 | 0 | } |
2337 | 0 | return undefinedCharacter(character, table, pos, input, output, posMapping, |
2338 | 0 | cursorPosition, cursorStatus, mode); |
2339 | 0 | } |
2340 | | |
2341 | | static int |
2342 | | putCharacters(const widechar *characters, int count, const TranslationTableHeader *table, |
2343 | | int pos, const InString *input, OutString *output, int *posMapping, |
2344 | 0 | int *cursorPosition, int *cursorStatus, int mode) { |
2345 | | /* Insert the dot equivalents of a series of characters in the output |
2346 | | * buffer */ |
2347 | 0 | int k; |
2348 | 0 | for (k = 0; k < count; k++) |
2349 | 0 | if (!putCharacter(characters[k], table, pos, input, output, posMapping, |
2350 | 0 | cursorPosition, cursorStatus, mode)) |
2351 | 0 | return 0; |
2352 | 0 | return 1; |
2353 | 0 | } |
2354 | | |
2355 | | // state at the beginning of the current word, used for back-tracking and also for the |
2356 | | // nocont and compbrl rules |
2357 | | typedef struct { |
2358 | | int inPos; // begin position of the current word in the input |
2359 | | int outPos; // begin position of the current word in the output |
2360 | | int emphasisInPos; // position of the next character in the input for which to insert |
2361 | | // emphasis marks |
2362 | | } LastWord; |
2363 | | |
2364 | | static int |
2365 | | doCompbrl(const TranslationTableHeader *table, int *pos, const InString *input, |
2366 | | OutString *output, int *posMapping, EmphasisInfo *emphasisBuffer, |
2367 | | const TranslationTableRule **transRule, int *cursorPosition, int *cursorStatus, |
2368 | 0 | const LastWord *lastWord, int *insertEmphasesFrom, int mode) { |
2369 | | /* Handle strings containing substrings defined by the compbrl opcode */ |
2370 | 0 | int stringStart, stringEnd; |
2371 | 0 | if (checkCharAttr(input->chars[*pos], CTC_Space, table)) return 1; |
2372 | 0 | stringStart = lastWord->outPos ? lastWord->inPos : 0; |
2373 | 0 | stringEnd = *pos; |
2374 | 0 | while (stringEnd < input->length && |
2375 | 0 | !checkCharAttr(input->chars[stringEnd], CTC_Space, table)) |
2376 | 0 | stringEnd++; |
2377 | 0 | *pos = stringStart; |
2378 | 0 | output->length = lastWord->outPos; |
2379 | 0 | *insertEmphasesFrom = lastWord->emphasisInPos; |
2380 | 0 | return doCompTrans(stringStart, stringEnd, table, pos, input, output, posMapping, |
2381 | 0 | emphasisBuffer, transRule, cursorPosition, cursorStatus, mode); |
2382 | 0 | } |
2383 | | |
2384 | | static int |
2385 | | doCompTrans(int start, int end, const TranslationTableHeader *table, int *pos, |
2386 | | const InString *input, OutString *output, int *posMapping, |
2387 | | EmphasisInfo *emphasisBuffer, const TranslationTableRule **transRule, |
2388 | 0 | int *cursorPosition, int *cursorStatus, int mode) { |
2389 | 0 | const TranslationTableRule *indicRule; |
2390 | 0 | int k; |
2391 | 0 | int haveEndsegment = 0; |
2392 | 0 | if (*cursorStatus != 2 && brailleIndicatorDefined(table->begComp, table, &indicRule)) |
2393 | 0 | if (!for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, *pos, |
2394 | 0 | input, output, posMapping, cursorPosition, cursorStatus)) |
2395 | 0 | return 0; |
2396 | 0 | for (k = start; k < end; k++) { |
2397 | 0 | TranslationTableOffset compdots = 0; |
2398 | | /* HACK: computer braille is one-to-one so it |
2399 | | * can't have any emphasis indicators. |
2400 | | * A better solution is to treat computer braille as its own mode. */ |
2401 | 0 | emphasisBuffer[k] = (EmphasisInfo){ 0 }; |
2402 | 0 | if (input->chars[k] == LOU_ENDSEGMENT) { |
2403 | 0 | haveEndsegment = 1; |
2404 | 0 | continue; |
2405 | 0 | } |
2406 | 0 | *pos = k; |
2407 | 0 | compdots = getChar(input->chars[k], table)->compRule; |
2408 | 0 | if (compdots != 0) { |
2409 | 0 | *transRule = (TranslationTableRule *)&table->ruleArea[compdots]; |
2410 | 0 | if (!for_updatePositions(&(*transRule)->charsdots[(*transRule)->charslen], |
2411 | 0 | (*transRule)->charslen, (*transRule)->dotslen, 0, *pos, input, |
2412 | 0 | output, posMapping, cursorPosition, cursorStatus)) |
2413 | 0 | return 0; |
2414 | 0 | } else if (!putCharacter(input->chars[k], table, *pos, input, output, posMapping, |
2415 | 0 | cursorPosition, cursorStatus, mode)) |
2416 | 0 | return 0; |
2417 | 0 | } |
2418 | 0 | if (*cursorStatus != 2 && brailleIndicatorDefined(table->endComp, table, &indicRule)) |
2419 | 0 | if (!for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, *pos, |
2420 | 0 | input, output, posMapping, cursorPosition, cursorStatus)) |
2421 | 0 | return 0; |
2422 | 0 | *pos = end; |
2423 | 0 | if (haveEndsegment) { |
2424 | 0 | widechar endSegment = LOU_ENDSEGMENT; |
2425 | 0 | if (!for_updatePositions(&endSegment, 0, 1, 0, *pos, input, output, posMapping, |
2426 | 0 | cursorPosition, cursorStatus)) |
2427 | 0 | return 0; |
2428 | 0 | } |
2429 | 0 | return 1; |
2430 | 0 | } |
2431 | | |
2432 | | static int |
2433 | | doNocont(const TranslationTableHeader *table, int *pos, OutString *output, int mode, |
2434 | | const InString *input, const LastWord *lastWord, int *dontContract, |
2435 | 0 | int *insertEmphasesFrom) { |
2436 | | /* Handle strings containing substrings defined by the nocont opcode */ |
2437 | 0 | if (checkCharAttr(input->chars[*pos], CTC_Space, table) || *dontContract || |
2438 | 0 | (mode & noContractions)) |
2439 | 0 | return 1; |
2440 | 0 | if (lastWord->outPos) { |
2441 | 0 | *pos = lastWord->inPos; |
2442 | 0 | output->length = lastWord->outPos; |
2443 | 0 | } else { |
2444 | 0 | *pos = 0; |
2445 | 0 | output->length = 0; |
2446 | 0 | } |
2447 | 0 | *insertEmphasesFrom = lastWord->emphasisInPos; |
2448 | 0 | *dontContract = 1; |
2449 | 0 | return 1; |
2450 | 0 | } |
2451 | | |
2452 | | static int |
2453 | | markSyllables( |
2454 | 0 | const TranslationTableHeader *table, const InString *input, formtype *typebuf) { |
2455 | 0 | int pos; |
2456 | 0 | int k; |
2457 | 0 | int currentMark = 0; |
2458 | 0 | int const syllable_marks[] = { SYLLABLE_MARKER_1, SYLLABLE_MARKER_2 }; |
2459 | 0 | int syllable_mark_selector = 0; |
2460 | 0 | const TranslationTableRule *transRule; |
2461 | 0 | int transOpcode; |
2462 | 0 | int transCharslen; |
2463 | |
|
2464 | 0 | if (typebuf == NULL || !table->syllables) return 1; |
2465 | 0 | pos = 0; |
2466 | 0 | while (pos < input->length) { /* the main multipass translation loop */ |
2467 | 0 | int length = input->length - pos; |
2468 | 0 | int tryThis = 0; |
2469 | 0 | while (tryThis < 3) { |
2470 | 0 | TranslationTableOffset ruleOffset = 0; |
2471 | 0 | switch (tryThis) { |
2472 | 0 | case 0: |
2473 | 0 | if (!(length >= 2)) break; |
2474 | | // memory overflow when pos == input->length - 1 |
2475 | 0 | ruleOffset = |
2476 | 0 | table->forRules[_lou_stringHash(&input->chars[pos], 1, table)]; |
2477 | 0 | break; |
2478 | 0 | case 1: |
2479 | 0 | if (!(length >= 1)) break; |
2480 | 0 | length = 1; |
2481 | 0 | ruleOffset = getChar(input->chars[pos], table)->otherRules; |
2482 | 0 | break; |
2483 | 0 | case 2: /* No rule found */ |
2484 | 0 | transOpcode = CTO_Always; |
2485 | 0 | ruleOffset = 0; |
2486 | 0 | break; |
2487 | 0 | } |
2488 | 0 | while (ruleOffset) { |
2489 | 0 | transRule = (TranslationTableRule *)&table->ruleArea[ruleOffset]; |
2490 | 0 | transOpcode = transRule->opcode; |
2491 | 0 | transCharslen = transRule->charslen; |
2492 | 0 | if (tryThis == 1 || |
2493 | 0 | (transCharslen <= length && |
2494 | 0 | compareChars(&transRule->charsdots[0], &input->chars[pos], |
2495 | 0 | transCharslen, table))) { |
2496 | 0 | if (transOpcode == CTO_Syllable) { |
2497 | 0 | tryThis = 4; |
2498 | 0 | break; |
2499 | 0 | } |
2500 | 0 | } |
2501 | 0 | ruleOffset = transRule->charsnext; |
2502 | 0 | } |
2503 | 0 | tryThis++; |
2504 | 0 | } |
2505 | 0 | switch (transOpcode) { |
2506 | 0 | case CTO_Always: |
2507 | 0 | if (pos >= input->length) return 0; |
2508 | 0 | typebuf[pos++] |= currentMark; |
2509 | 0 | break; |
2510 | 0 | case CTO_Syllable: |
2511 | | /* cycle between SYLLABLE_MARKER_1 and SYLLABLE_MARKER_2 so |
2512 | | * we can distinguinsh two consequtive syllables */ |
2513 | 0 | currentMark = syllable_marks[syllable_mark_selector]; |
2514 | 0 | syllable_mark_selector = (syllable_mark_selector + 1) % 2; |
2515 | |
|
2516 | 0 | if ((pos + transCharslen) > input->length) return 0; |
2517 | 0 | for (k = 0; k < transCharslen; k++) typebuf[pos++] |= currentMark; |
2518 | 0 | break; |
2519 | 0 | default: |
2520 | 0 | break; |
2521 | 0 | } |
2522 | 0 | } |
2523 | 0 | return 1; |
2524 | 0 | } |
2525 | | |
2526 | | static int |
2527 | | resetsEmphMode( |
2528 | 0 | widechar c, const TranslationTableHeader *table, const EmphasisClass *emphClass) { |
2529 | | /* Whether a character cancels word emphasis mode or not. */ |
2530 | 0 | if (emphClass->mode) { |
2531 | 0 | const TranslationTableCharacter *chardef = getChar(c, table); |
2532 | | /* the base character of a character belonging to a mode can never cancel the mode |
2533 | | */ |
2534 | 0 | if (chardef->attributes & emphClass->mode) |
2535 | 0 | return 0; |
2536 | 0 | else { |
2537 | 0 | const TranslationTableCharacter *ch = chardef; |
2538 | 0 | if (ch->basechar) |
2539 | 0 | ch = (TranslationTableCharacter *)&table->ruleArea[ch->basechar]; |
2540 | 0 | while (ch->linked) { |
2541 | 0 | ch = (TranslationTableCharacter *)&table->ruleArea[ch->linked]; |
2542 | 0 | if ((ch->mode & chardef->mode) == chardef->mode && |
2543 | 0 | ch->attributes & emphClass->mode) { |
2544 | 0 | return 0; |
2545 | 0 | } |
2546 | 0 | } |
2547 | 0 | } |
2548 | 0 | if (emphClass->mode == CTC_UpperCase) { |
2549 | | /* characters that are not letter and not capsmodechars cancel capsword mode |
2550 | | */ |
2551 | 0 | return !checkCharAttr(c, CTC_Letter | CTC_CapsMode, table); |
2552 | 0 | } else if (emphClass->mode == CTC_Digit) { |
2553 | | /* characters that are not digit or litdigit or numericmodechars cancel |
2554 | | * numeric mode */ |
2555 | 0 | return !checkCharAttr(c, |
2556 | 0 | CTC_Digit | CTC_LitDigit | CTC_NumericMode | CTC_MidEndNumericMode, |
2557 | 0 | table); |
2558 | 0 | } else { |
2559 | | /* characters that are not letter cancel other word modes */ |
2560 | 0 | return !checkCharAttr(c, CTC_Letter, table); |
2561 | 0 | } |
2562 | 0 | } else { |
2563 | 0 | if (checkCharAttr(c, CTC_Letter, table)) /* a letter never cancels emphasis */ |
2564 | 0 | return 0; |
2565 | 0 | const widechar *emphmodechars = table->emphModeChars[emphClass->rule]; |
2566 | | /* by default (if emphmodechars is not declared) only space cancels emphasis */ |
2567 | 0 | if (!emphmodechars[0]) return checkCharAttr(c, CTC_Space, table); |
2568 | 0 | for (int k = 0; emphmodechars[k]; k++) |
2569 | 0 | if (c == emphmodechars[k]) return 0; |
2570 | 0 | return 1; |
2571 | 0 | } |
2572 | 0 | } |
2573 | | |
2574 | | static int |
2575 | | isEmphasizable( |
2576 | 0 | widechar c, const TranslationTableHeader *table, const EmphasisClass *emphClass) { |
2577 | | /* Whether emphasis is indicated on a character or not. */ |
2578 | 0 | if (emphClass->mode) { |
2579 | | /* a character is emphasizable if it belongs to the mode or if it has the same |
2580 | | * base as a character that belongs to the mode */ |
2581 | 0 | const TranslationTableCharacter *chardef = getChar(c, table); |
2582 | 0 | if (chardef->basechar) |
2583 | 0 | chardef = (TranslationTableCharacter *)&table->ruleArea[chardef->basechar]; |
2584 | 0 | if (chardef->attributes & emphClass->mode) return 1; |
2585 | 0 | while (chardef->linked) { |
2586 | 0 | chardef = (TranslationTableCharacter *)&table->ruleArea[chardef->linked]; |
2587 | 0 | if (chardef->attributes & emphClass->mode) return 1; |
2588 | 0 | } |
2589 | 0 | return 0; |
2590 | 0 | } else { |
2591 | 0 | const widechar *noemphchars = table->noEmphChars[emphClass->rule]; |
2592 | | /* if noemphchars is not declared emphasis is indicated on all characters except |
2593 | | * spaces */ |
2594 | 0 | if (!noemphchars[0]) return !checkCharAttr(c, CTC_Space, table); |
2595 | 0 | for (int k = 0; noemphchars[k]; k++) |
2596 | 0 | if (c == noemphchars[k]) return 0; |
2597 | 0 | return 1; |
2598 | 0 | } |
2599 | 0 | } |
2600 | | |
2601 | | static int |
2602 | | isEmphasized(widechar c, const TranslationTableHeader *table, |
2603 | 0 | const EmphasisClass *emphClass, formtype typeform) { |
2604 | | /* Whether a character is emphasized or not. */ |
2605 | 0 | if (!isEmphasizable(c, table, emphClass)) return 0; |
2606 | 0 | if (emphClass->mode) |
2607 | 0 | return checkCharAttr(c, emphClass->mode, table); |
2608 | 0 | else |
2609 | 0 | return typeform & emphClass->typeform; |
2610 | 0 | } |
2611 | | |
2612 | | static int |
2613 | | isEmphSpace( |
2614 | 0 | widechar c, const TranslationTableHeader *table, const EmphasisClass *emphClass) { |
2615 | | /* For determining word boundaries. */ |
2616 | | /* Note that this is not the only function that is used for this purpose. In |
2617 | | * resolveEmphasisWords the beginning and end of words are further refined based on |
2618 | | * the isEmphasizable function. */ |
2619 | 0 | const int word_enabled = table->emphRules[emphClass->rule][begWordOffset]; |
2620 | 0 | if (emphClass->mode == CTC_UpperCase) { |
2621 | | /* The old behavior was that words are determined by spaces. However for some |
2622 | | * tables it is a requirement that words are determined based on letters and |
2623 | | * capsmodechars. While the latter probably makes most sense, we don't want to |
2624 | | * break the old behavior because there is no easy way to achieve it using |
2625 | | * table rules. A good middle ground is to let the behavior depend on the |
2626 | | * presence of a capsmodechars rule. */ |
2627 | 0 | if (!(word_enabled && table->hasCapsModeChars)) |
2628 | 0 | return checkCharAttr(c, CTC_Space, table); |
2629 | 0 | } |
2630 | 0 | return !isEmphasizable(c, table, emphClass) && |
2631 | 0 | (!word_enabled || resetsEmphMode(c, table, emphClass)); |
2632 | 0 | } |
2633 | | |
2634 | | static void |
2635 | | resolveEmphasisBeginEnd(EmphasisInfo *buffer, const EmphasisClass *class, |
2636 | | const TranslationTableHeader *table, const InString *input, |
2637 | 0 | const formtype *typebuf, const unsigned int *wordBuffer) { |
2638 | | /* mark emphasized (capitalized) sections, i.e. sections that */ |
2639 | | /* - start with an emphasized (uppercase) character, */ |
2640 | | /* - extend as long as no unemphasized (lowercase) character is encountered, and */ |
2641 | | /* - do not end with a word that contains no emphasized (uppercase) characters */ |
2642 | | /* in addition, if phrase rules are present, sections are split up as needed so that |
2643 | | * they do not end in the middle of a word */ |
2644 | |
|
2645 | 0 | int last_space = -1; // position of the last encountered space |
2646 | 0 | int emph_start = -1; // position of the first emphasized (uppercase) character after |
2647 | | // which no unemphasized (lowercase) character was encountered |
2648 | 0 | int last_word = -1; // position of the first space following the last encountered |
2649 | | // character if that character was emphasized (uppercase) |
2650 | 0 | int emph = 0; // whether or not the last encountered character was emphasized |
2651 | | // (uppercase) and happened in the current word |
2652 | 0 | int phrase_enabled = table->emphRules[class->rule][begPhraseOffset]; |
2653 | |
|
2654 | 0 | for (int i = 0; i < input->length; i++) { |
2655 | 0 | int isSpace = !(wordBuffer[i] & WORD_CHAR); |
2656 | 0 | if (isSpace) { |
2657 | | /* character is a space */ |
2658 | 0 | last_space = i; |
2659 | 0 | if (emph) { |
2660 | 0 | last_word = i; |
2661 | 0 | emph = 0; |
2662 | 0 | } |
2663 | 0 | } |
2664 | | /* if character is an emphasized (uppercase) character, emphasis mode begins or |
2665 | | * continues */ |
2666 | 0 | if (!isSpace && isEmphasized(input->chars[i], table, class, typebuf[i])) { |
2667 | 0 | if (emph_start < 0) emph_start = i; |
2668 | 0 | emph = 1; |
2669 | 0 | } else { |
2670 | | /* else if emphasis mode has begun, it should continue if there are no |
2671 | | * unemphasized (lowercase) characters before the next emphasized (uppercase) |
2672 | | * character */ |
2673 | | /* characters that cancel emphasis mode are handled later in |
2674 | | * resolveEmphasisResets (note that letters that are neither uppercase nor |
2675 | | * lowercase do not cancel caps mode) */ |
2676 | 0 | if (!isSpace && isEmphasizable(input->chars[i], table, class)) { |
2677 | 0 | if (emph_start >= 0) { |
2678 | 0 | buffer[emph_start].begin |= class->value; |
2679 | 0 | if (emph) { |
2680 | | /* a passage can not end on a word without emphasized (uppercase) |
2681 | | * characters, so if emphasis did not start inside the current |
2682 | | * word, end it after the last word that contained an emphasized |
2683 | | * (uppercase) character, and start over from the beginning of the |
2684 | | * current word */ |
2685 | 0 | if (phrase_enabled && emph_start < last_space) { |
2686 | 0 | buffer[last_word].end |= class->value; |
2687 | 0 | emph_start = -1; |
2688 | 0 | last_word = -1; |
2689 | 0 | emph = 0; |
2690 | 0 | i = last_space; |
2691 | 0 | continue; |
2692 | 0 | } else |
2693 | | /* don't split into two sections if no phrase rules are |
2694 | | * present or emphasis started inside the current word */ |
2695 | 0 | buffer[i].end |= class->value; |
2696 | 0 | } else |
2697 | | /* current word had no emphasis yet */ |
2698 | 0 | buffer[last_word].end |= class->value; |
2699 | 0 | emph_start = -1; |
2700 | 0 | last_word = -1; |
2701 | 0 | emph = 0; |
2702 | 0 | } |
2703 | 0 | } |
2704 | 0 | } |
2705 | 0 | } |
2706 | | |
2707 | | /* clean up input->length */ |
2708 | 0 | if (emph_start >= 0) { |
2709 | 0 | buffer[emph_start].begin |= class->value; |
2710 | 0 | if (emph) |
2711 | 0 | buffer[input->length].end |= class->value; |
2712 | 0 | else |
2713 | 0 | buffer[last_word].end |= class->value; |
2714 | 0 | } |
2715 | 0 | } |
2716 | | |
2717 | | static void |
2718 | | resolveEmphasisWords(EmphasisInfo *buffer, const EmphasisClass *class, |
2719 | | const TranslationTableHeader *table, const InString *input, |
2720 | 0 | unsigned int *wordBuffer) { |
2721 | 0 | int in_word = 0, in_emp = 0; |
2722 | 0 | int word_start = -1; // start position of the current emphasized word section |
2723 | 0 | int char_cnt = 0; // number of emphasizable characters within the current emphasized |
2724 | | // word section |
2725 | 0 | int last_char = -1; // position of the last emphasizable character |
2726 | 0 | const TranslationTableOffset *emphRule = table->emphRules[class->rule]; |
2727 | 0 | int letter_defined = emphRule[letterOffset]; |
2728 | 0 | int endphraseafter_defined = emphRule[begPhraseOffset] && |
2729 | 0 | (emphRule[endPhraseAfterOffset] || emphRule[endOffset]); |
2730 | |
|
2731 | 0 | for (int i = 0; i < input->length; i++) { |
2732 | | |
2733 | | /* check if at beginning of emphasis */ |
2734 | 0 | if (!in_emp) |
2735 | 0 | if (buffer[i].begin & class->value) { |
2736 | 0 | in_emp = 1; |
2737 | 0 | buffer[i].begin &= ~class->value; |
2738 | | |
2739 | | /* emphasis started inside word (and is therefore not a whole word) */ |
2740 | 0 | if (in_word) word_start = i; |
2741 | | |
2742 | | /* emphasis started on space */ |
2743 | 0 | if (!(wordBuffer[i] & WORD_CHAR)) word_start = -1; |
2744 | 0 | } |
2745 | | |
2746 | | /* check if at end of emphasis */ |
2747 | 0 | if (in_emp) |
2748 | 0 | if (buffer[i].end & class->value) { |
2749 | 0 | in_emp = 0; |
2750 | 0 | buffer[i].end &= ~class->value; |
2751 | 0 | if (in_word && word_start >= 0) { |
2752 | | /* if word is one symbol, turn it into a symbol (unless emphletter is |
2753 | | * not defined) */ |
2754 | 0 | if (letter_defined && char_cnt == 1) |
2755 | 0 | buffer[word_start].symbol |= class->value; |
2756 | 0 | else { |
2757 | | /* else mark the word start point and, if emphasis ended inside a |
2758 | | * word, also mark the end point */ |
2759 | 0 | buffer[word_start].word |= class->value; |
2760 | 0 | if (wordBuffer[i] & WORD_CHAR) { |
2761 | 0 | buffer[i].end |= class->value; |
2762 | 0 | buffer[i].word |= class->value; |
2763 | 0 | } |
2764 | 0 | } |
2765 | 0 | } |
2766 | 0 | } |
2767 | | |
2768 | | /* check if at beginning of word (first character that is not a space) */ |
2769 | 0 | if (!in_word) |
2770 | 0 | if (wordBuffer[i] & WORD_CHAR) { |
2771 | | /* check if word started on a character that is not emphasizable */ |
2772 | 0 | if (isEmphasizable(input->chars[i], table, class)) { |
2773 | 0 | in_word = 1; |
2774 | 0 | if (in_emp) word_start = i; |
2775 | | /* remove WORD_CHAR marks at the end of the previous word */ |
2776 | 0 | for (int j = last_char + 1; j < i; j++) wordBuffer[j] &= ~WORD_CHAR; |
2777 | | /* also delete possible word end point */ |
2778 | 0 | if (last_char >= 0 && !(buffer[last_char].symbol & class->value)) { |
2779 | 0 | if ((buffer[last_char].word & class->value) && |
2780 | 0 | !(buffer[last_char].end & class->value)) |
2781 | 0 | buffer[last_char].symbol |= class->value; |
2782 | 0 | for (int j = last_char; j < i - 1; j++) |
2783 | 0 | if (buffer[j + 1].end & class->value) { |
2784 | 0 | buffer[j + 1].end &= ~class->value; |
2785 | 0 | buffer[j + 1].word &= ~class->value; |
2786 | 0 | break; |
2787 | 0 | } |
2788 | 0 | } |
2789 | 0 | } |
2790 | 0 | } |
2791 | | |
2792 | | /* check if at end of word (last character that is not a space) */ |
2793 | 0 | if (in_word) |
2794 | 0 | if (!(wordBuffer[i] & WORD_CHAR)) { |
2795 | | /* made it through whole word */ |
2796 | 0 | if (in_emp && word_start >= 0) { |
2797 | | /* if word is one symbol, turn it into a symbol (unless emphletter is |
2798 | | * not defined) */ |
2799 | 0 | if (letter_defined && char_cnt == 1) |
2800 | 0 | buffer[word_start].symbol |= class->value; |
2801 | 0 | else |
2802 | | /* else mark it as a word */ |
2803 | 0 | buffer[word_start].word |= class->value; |
2804 | 0 | } |
2805 | 0 | in_word = 0; |
2806 | 0 | word_start = -1; |
2807 | 0 | } |
2808 | | |
2809 | | /* count characters within the current emphasized word (section) that are |
2810 | | * emphasizable */ |
2811 | 0 | if (i == word_start) { |
2812 | 0 | last_char = i; |
2813 | 0 | char_cnt = 1; |
2814 | 0 | } else if (in_word && |
2815 | 0 | (endphraseafter_defined /* hack to achieve old behavior of endemphphrase |
2816 | | * after: if the last word of the passage ends |
2817 | | * with unemphasizable characters, the indicator |
2818 | | * is inserted after them */ |
2819 | 0 | || isEmphasizable(input->chars[i], table, class))) { |
2820 | 0 | last_char = i; |
2821 | 0 | if (in_emp) char_cnt++; |
2822 | 0 | } |
2823 | 0 | } |
2824 | | |
2825 | | /* clean up end */ |
2826 | 0 | if (in_emp) { |
2827 | 0 | buffer[input->length].end &= ~class->value; |
2828 | |
|
2829 | 0 | if (in_word) |
2830 | 0 | if (word_start >= 0) { |
2831 | | /* if word is one symbol, turn it into a symbol (unless emphletter is not |
2832 | | * defined) */ |
2833 | 0 | if (letter_defined && char_cnt == 1) |
2834 | 0 | buffer[word_start].symbol |= class->value; |
2835 | 0 | else |
2836 | | /* else mark it as a word */ |
2837 | 0 | buffer[word_start].word |= class->value; |
2838 | 0 | } |
2839 | 0 | } |
2840 | | |
2841 | | /* remove WORD_CHAR marks at the end of the previous word */ |
2842 | 0 | for (int j = last_char + 1; j < input->length; j++) wordBuffer[j] &= ~WORD_CHAR; |
2843 | | /* also delete possible word end point */ |
2844 | 0 | if (last_char >= 0 && !(buffer[last_char].symbol & class->value)) { |
2845 | 0 | if ((buffer[last_char].word & class->value) && |
2846 | 0 | !(buffer[last_char].end & class->value)) |
2847 | 0 | buffer[last_char].symbol |= class->value; |
2848 | 0 | for (int j = last_char; j < input->length - 1; j++) |
2849 | 0 | if (buffer[j + 1].end & class->value) { |
2850 | 0 | buffer[j + 1].end &= ~class->value; |
2851 | 0 | buffer[j + 1].word &= ~class->value; |
2852 | 0 | break; |
2853 | 0 | } |
2854 | 0 | } |
2855 | | |
2856 | | /* mark whole words */ |
2857 | 0 | word_start = -1; |
2858 | 0 | for (int i = 0; i < input->length; i++) { |
2859 | 0 | if (buffer[i].symbol & class->value) { |
2860 | 0 | if ((i == 0 || !(wordBuffer[i - 1] & WORD_CHAR)) && |
2861 | 0 | (i + 1 == input->length || !(wordBuffer[i + 1] & WORD_CHAR))) |
2862 | 0 | wordBuffer[i] |= WORD_WHOLE; |
2863 | 0 | } else if (buffer[i].word & class->value) { |
2864 | 0 | if (buffer[i].end & class->value) { |
2865 | 0 | if (word_start >= 0 && wordBuffer[i] & WORD_CHAR) |
2866 | 0 | wordBuffer[word_start] &= ~WORD_WHOLE; |
2867 | 0 | word_start = -1; |
2868 | 0 | } else { |
2869 | 0 | if (i == 0 || !(wordBuffer[i - 1] & WORD_CHAR)) |
2870 | 0 | wordBuffer[i] |= WORD_WHOLE; |
2871 | 0 | word_start = i; |
2872 | 0 | } |
2873 | 0 | } |
2874 | 0 | } |
2875 | 0 | } |
2876 | | |
2877 | | static void |
2878 | | convertToPassage(const int pass_start, const int pass_end, const int word_start, |
2879 | | EmphasisInfo *buffer, const EmphasisClass *class, |
2880 | 0 | const TranslationTableHeader *table, unsigned int *wordBuffer) { |
2881 | 0 | int i; |
2882 | 0 | const TranslationTableOffset *emphRule = table->emphRules[class->rule]; |
2883 | 0 | const TranslationTableRule *indicRule; |
2884 | |
|
2885 | 0 | for (i = pass_start; i <= pass_end; i++) { |
2886 | 0 | buffer[i].symbol &= ~class->value; |
2887 | 0 | buffer[i].word &= ~class->value; |
2888 | 0 | wordBuffer[i] &= ~WORD_WHOLE; |
2889 | 0 | } |
2890 | |
|
2891 | 0 | buffer[pass_start].begin |= class->value; |
2892 | 0 | if (brailleIndicatorDefined(emphRule[endOffset], table, &indicRule) || |
2893 | 0 | brailleIndicatorDefined(emphRule[endPhraseAfterOffset], table, &indicRule)) |
2894 | 0 | buffer[pass_end].end |= class->value; |
2895 | 0 | else if (brailleIndicatorDefined( |
2896 | 0 | emphRule[endPhraseBeforeOffset], table, &indicRule)) { |
2897 | | /* if the phrase end indicator is the same as the word indicator, mark it as a |
2898 | | * word so that the resolveEmphasisResets code applies */ |
2899 | 0 | const TranslationTableRule *begwordRule; |
2900 | 0 | if (brailleIndicatorDefined(emphRule[begWordOffset], table, &begwordRule) && |
2901 | 0 | indicRule->dotslen == begwordRule->dotslen && |
2902 | 0 | !memcmp(&indicRule->charsdots[0], &begwordRule->charsdots[0], |
2903 | 0 | begwordRule->dotslen * CHARSIZE)) { |
2904 | 0 | buffer[word_start].word |= class->value; |
2905 | | /* a passage has only whole emphasized words */ |
2906 | 0 | wordBuffer[word_start] |= WORD_WHOLE; |
2907 | 0 | } else { |
2908 | 0 | buffer[word_start].end |= class->value; |
2909 | 0 | } |
2910 | 0 | } |
2911 | 0 | } |
2912 | | |
2913 | | static void |
2914 | | resolveEmphasisPassages(EmphasisInfo *buffer, const EmphasisClass *class, |
2915 | | const TranslationTableHeader *table, const InString *input, |
2916 | 0 | unsigned int *wordBuffer) { |
2917 | 0 | const TranslationTableOffset *emphRule = table->emphRules[class->rule]; |
2918 | 0 | int in_word = 0, last_word_start = -1, last_word_end = -1; |
2919 | 0 | int in_emph_word = 0, last_emph_symbol = -1; |
2920 | 0 | int in_pass = 0, last_pass_word_start = -1, last_pass_word_end = -1, pass_start = -1; |
2921 | 0 | unsigned int pass_word_cnt = 0; |
2922 | 0 | int endphraseafter_defined = emphRule[endPhraseAfterOffset] || emphRule[endOffset]; |
2923 | |
|
2924 | 0 | for (int i = 0; i < input->length; i++) { |
2925 | | |
2926 | | /* check if at beginning of word (words are determined by isEmphSpace() and |
2927 | | * further refined at the beginning and end of words based on isEmphasizable()) */ |
2928 | 0 | if (!in_word && wordBuffer[i] & WORD_CHAR) { |
2929 | 0 | in_word = 1; |
2930 | 0 | last_word_start = i; |
2931 | 0 | } else { /* check if at end of word */ |
2932 | 0 | if (in_word && !(wordBuffer[i] & WORD_CHAR)) { |
2933 | 0 | in_word = 0; |
2934 | 0 | last_word_end = i; |
2935 | 0 | } |
2936 | 0 | } |
2937 | | |
2938 | | /* check for symbol or word indicator */ |
2939 | 0 | if (!in_emph_word && |
2940 | 0 | (buffer[i].symbol & class->value || |
2941 | 0 | (buffer[i].word & class->value && |
2942 | 0 | !(buffer[i].end & class->value)))) { |
2943 | 0 | if (buffer[i].symbol & class->value) { |
2944 | 0 | last_emph_symbol = i; |
2945 | 0 | } else { |
2946 | 0 | in_emph_word = 1; |
2947 | 0 | } |
2948 | 0 | if (in_pass) { |
2949 | | /* only whole capitalized words (words without lowercase letters) can be |
2950 | | * part of a passage (note that this also includes words without letters |
2951 | | * if the next word with letters is a whole word) */ |
2952 | 0 | if (!class->mode || (wordBuffer[i] & WORD_WHOLE)) { |
2953 | 0 | last_pass_word_start = i; |
2954 | 0 | pass_word_cnt++; |
2955 | 0 | } else |
2956 | 0 | goto end_passage; |
2957 | 0 | } |
2958 | 0 | } else { /* check for word end indicator or word end */ |
2959 | 0 | if ((in_emph_word && |
2960 | 0 | (buffer[i].word & class->value && |
2961 | 0 | buffer[i].end & class->value)) || |
2962 | 0 | last_word_end == i) { |
2963 | 0 | in_emph_word = 0; |
2964 | 0 | if (in_pass) { |
2965 | | /* only whole capitalized words can be part of a passage */ |
2966 | 0 | last_pass_word_end = i; |
2967 | 0 | } |
2968 | 0 | } |
2969 | 0 | } |
2970 | | |
2971 | | /* check if possibly at beginning of passage */ |
2972 | 0 | if (!in_pass && (in_emph_word || last_emph_symbol == i)) { |
2973 | | /* only whole capitalized words can be part of a passage */ |
2974 | 0 | if (!class->mode || (wordBuffer[i] & WORD_WHOLE)) { |
2975 | 0 | in_pass = 1; |
2976 | 0 | pass_start = i; |
2977 | 0 | last_pass_word_start = i; |
2978 | 0 | last_pass_word_end = -1; |
2979 | 0 | pass_word_cnt = 1; |
2980 | 0 | } |
2981 | 0 | } else { /* check if at end of passage */ |
2982 | 0 | if (in_pass) { |
2983 | 0 | if (in_word && !(in_emph_word || last_emph_symbol == i)) { |
2984 | 0 | end_passage: |
2985 | 0 | in_pass = 0; |
2986 | 0 | if (last_pass_word_end < last_pass_word_start) { |
2987 | 0 | last_pass_word_end = i; |
2988 | 0 | } |
2989 | | /* it is a passage only if the number of words is greater than or |
2990 | | * equal to the minimum length (lencapsphrase / lenemphphrase) */ |
2991 | | /* if the phrase closing indicator is placed before the last word and |
2992 | | * it was not a whole word, the minimum phrase length is increased */ |
2993 | 0 | if (!endphraseafter_defined && last_pass_word_end != last_word_end) { |
2994 | 0 | pass_word_cnt--; |
2995 | 0 | } |
2996 | 0 | if (pass_word_cnt >= emphRule[lenPhraseOffset]) |
2997 | 0 | convertToPassage(pass_start, last_pass_word_end, |
2998 | 0 | last_pass_word_start, buffer, class, table, wordBuffer); |
2999 | 0 | } else if (i == input->length - 1) { |
3000 | 0 | if (pass_word_cnt >= emphRule[lenPhraseOffset]) { |
3001 | 0 | if (last_pass_word_end < last_pass_word_start) { |
3002 | 0 | last_pass_word_end = input->length; |
3003 | 0 | } |
3004 | 0 | convertToPassage(pass_start, last_pass_word_end, |
3005 | 0 | last_pass_word_start, buffer, class, table, wordBuffer); |
3006 | 0 | } |
3007 | 0 | } |
3008 | 0 | } |
3009 | 0 | } |
3010 | 0 | } |
3011 | 0 | } |
3012 | | |
3013 | | static void |
3014 | | resolveEmphasisSingleSymbols( |
3015 | 0 | EmphasisInfo *buffer, const EmphasisClass *class, const InString *input) { |
3016 | 0 | int i; |
3017 | |
|
3018 | 0 | for (i = 0; i < input->length; i++) { |
3019 | 0 | if (buffer[i].begin & class->value) |
3020 | 0 | if (buffer[i + 1].end & class->value) { |
3021 | 0 | buffer[i].begin &= ~class->value; |
3022 | 0 | buffer[i + 1].end &= ~class->value; |
3023 | 0 | buffer[i].symbol |= class->value; |
3024 | 0 | } |
3025 | 0 | } |
3026 | 0 | } |
3027 | | |
3028 | | static void |
3029 | | resolveEmphasisAllSymbols(EmphasisInfo *buffer, const EmphasisClass *class, |
3030 | | const TranslationTableHeader *table, formtype *typebuf, const InString *input, |
3031 | 0 | unsigned int *wordBuffer) { |
3032 | | |
3033 | | /* Mark every emphasized character individually with symbol if begemphword is not |
3034 | | * defined (assumes resolveEmphasisWords has not been run) */ |
3035 | | /* Mark every emphasized character individually with symbol if endemphword is not |
3036 | | * defined |
3037 | | * and emphasis ends within a word (assumes resolveEmphasisWords has been run) */ |
3038 | | /* Note that it is possible that emphletter is also not defined, in which case the |
3039 | | * emphasis will not be marked at all. */ |
3040 | |
|
3041 | 0 | const TranslationTableOffset *emphRule = table->emphRules[class->rule]; |
3042 | 0 | const int begword_enabled = emphRule[begWordOffset]; |
3043 | 0 | const int endword_enabled = emphRule[endWordOffset]; |
3044 | |
|
3045 | 0 | if (!begword_enabled) { |
3046 | 0 | int in_emph = 0; |
3047 | 0 | for (int i = 0; i < input->length; i++) { |
3048 | 0 | if (in_emph) { |
3049 | 0 | if (buffer[i].end & class->value) { |
3050 | 0 | in_emph = 0; |
3051 | 0 | buffer[i].end &= ~class->value; |
3052 | 0 | } |
3053 | 0 | } else { |
3054 | 0 | if (buffer[i].begin & class->value) { |
3055 | 0 | in_emph = 1; |
3056 | 0 | buffer[i].begin &= ~class->value; |
3057 | 0 | } |
3058 | 0 | } |
3059 | 0 | if (in_emph) { |
3060 | 0 | buffer[i].symbol |= class->value; |
3061 | 0 | } |
3062 | 0 | } |
3063 | 0 | } else if (!endword_enabled) { |
3064 | 0 | int in_pass = 0, in_word = 0, word_start = -1; |
3065 | 0 | for (int i = 0; i < input->length; i++) { |
3066 | 0 | if (in_pass) |
3067 | 0 | if (buffer[i].end & class->value || buffer[i].word & class->value) |
3068 | 0 | in_pass = 0; |
3069 | 0 | if (!in_pass) { |
3070 | 0 | if (buffer[i].begin & class->value) |
3071 | 0 | in_pass = 1; |
3072 | 0 | else { |
3073 | 0 | if (!in_word) |
3074 | 0 | if (buffer[i].word & class->value) { |
3075 | 0 | in_word = 1; |
3076 | 0 | word_start = i; |
3077 | 0 | } |
3078 | 0 | if (in_word) { |
3079 | 0 | if (buffer[i].word & class->value && |
3080 | 0 | buffer[i].end & class->value) { |
3081 | 0 | in_word = 0; |
3082 | 0 | if (begword_enabled && !endword_enabled) { |
3083 | 0 | buffer[i].end &= ~class->value; |
3084 | 0 | buffer[i].word &= ~class->value; |
3085 | 0 | buffer[word_start].word &= ~class->value; |
3086 | 0 | for (int j = word_start; j < i; j++) |
3087 | 0 | buffer[j].symbol |= class->value; |
3088 | 0 | } |
3089 | 0 | } else if (!(wordBuffer[i] & WORD_CHAR)) { |
3090 | 0 | in_word = 0; |
3091 | 0 | } |
3092 | 0 | } |
3093 | 0 | } |
3094 | 0 | } |
3095 | 0 | } |
3096 | 0 | } |
3097 | 0 | } |
3098 | | |
3099 | | static void |
3100 | | resolveEmphasisResets(EmphasisInfo *buffer, const EmphasisClass *class, |
3101 | | const TranslationTableHeader *table, const InString *input, |
3102 | 0 | unsigned int *wordBuffer) { |
3103 | 0 | int in_word = 0, in_pass = 0, word_start = -1, word_reset = 0, letter_cnt = 0, |
3104 | 0 | pass_end = -1; |
3105 | 0 | int i; |
3106 | 0 | int letter_defined = table->emphRules[class->rule][letterOffset]; |
3107 | |
|
3108 | 0 | for (i = 0; i < input->length; i++) { |
3109 | 0 | if (in_pass) { |
3110 | 0 | if (buffer[i].end & class->value) |
3111 | 0 | in_pass = 0; |
3112 | 0 | else if (buffer[i].word & class->value) { |
3113 | | /* the passage is ended with a "endphrase before" indicator and this |
3114 | | * indicator is the same as the "begword" indicator (see convertToPassage) |
3115 | | */ |
3116 | 0 | in_pass = 0; |
3117 | | /* remember this position so that if there is a reset later in this word, |
3118 | | * we can remove this indicator */ |
3119 | 0 | pass_end = i; |
3120 | 0 | } |
3121 | 0 | } |
3122 | 0 | if (!in_pass) { |
3123 | 0 | if (buffer[i].begin & class->value) { |
3124 | 0 | in_pass = 1; |
3125 | 0 | } else { |
3126 | 0 | if (!in_word) { |
3127 | 0 | if (buffer[i].word & class->value) { |
3128 | | /* deal with case when reset was at beginning of word */ |
3129 | 0 | if (wordBuffer[i] & WORD_RESET || |
3130 | 0 | resetsEmphMode(input->chars[i], table, class)) { |
3131 | 0 | if (!letter_defined) |
3132 | | /* if emphletter is not defined, use the word indicator */ |
3133 | 0 | ; |
3134 | 0 | else if (pass_end == i) |
3135 | | /* also use the word indicator if the reset marks the end |
3136 | | * of a passage */ |
3137 | 0 | ; |
3138 | 0 | else { |
3139 | | /* use the symbol indicator symbol for the current |
3140 | | * character */ |
3141 | 0 | buffer[i].symbol |= class->value; |
3142 | | /* move the word indicator to the next character or remove |
3143 | | * it altogether if the next character is a space */ |
3144 | 0 | if (wordBuffer[i + 1] & WORD_CHAR) { |
3145 | 0 | buffer[i + 1].word |= class->value; |
3146 | 0 | if (wordBuffer[i] & WORD_WHOLE) |
3147 | 0 | wordBuffer[i + 1] |= WORD_WHOLE; |
3148 | 0 | if (pass_end == i) pass_end++; |
3149 | 0 | } |
3150 | 0 | buffer[i].word &= ~class->value; |
3151 | 0 | wordBuffer[i] &= ~WORD_WHOLE; |
3152 | 0 | continue; |
3153 | 0 | } |
3154 | 0 | } |
3155 | | |
3156 | 0 | in_word = 1; |
3157 | 0 | word_start = i; |
3158 | 0 | letter_cnt = 0; |
3159 | 0 | word_reset = 0; |
3160 | 0 | } |
3161 | | |
3162 | | /* it is possible for a character to have been marked as a symbol when |
3163 | | * it should not be one */ |
3164 | 0 | else if (buffer[i].symbol & class->value) { |
3165 | 0 | if (wordBuffer[i] & WORD_RESET || |
3166 | 0 | resetsEmphMode(input->chars[i], table, class)) |
3167 | 0 | buffer[i].symbol &= ~class->value; |
3168 | 0 | } |
3169 | 0 | } |
3170 | | |
3171 | 0 | if (in_word) { |
3172 | | |
3173 | | /* at end of word */ |
3174 | 0 | if (!(wordBuffer[i] & WORD_CHAR) || |
3175 | 0 | (buffer[i].word & class->value && |
3176 | 0 | buffer[i].end & class->value)) { |
3177 | 0 | in_word = 0; |
3178 | | |
3179 | | /* check if symbol */ |
3180 | 0 | if (letter_defined && letter_cnt == 1 && word_start != pass_end) { |
3181 | 0 | buffer[word_start].symbol |= class->value; |
3182 | 0 | buffer[word_start].word &= ~class->value; |
3183 | 0 | wordBuffer[word_start] &= ~WORD_WHOLE; |
3184 | 0 | buffer[i].end &= ~class->value; |
3185 | 0 | buffer[i].word &= ~class->value; |
3186 | 0 | } |
3187 | | |
3188 | | /* if word ended on a reset or last char was a reset, get rid of |
3189 | | * end bits */ |
3190 | 0 | if (word_reset || wordBuffer[i] & WORD_RESET || |
3191 | 0 | resetsEmphMode(input->chars[i], table, class)) { |
3192 | 0 | buffer[i].end &= ~class->value; |
3193 | 0 | buffer[i].word &= ~class->value; |
3194 | 0 | } |
3195 | | |
3196 | | /* if word ended when it began, get rid of all bits */ |
3197 | 0 | if (i == word_start) { |
3198 | 0 | wordBuffer[word_start] &= ~WORD_WHOLE; |
3199 | 0 | buffer[i].end &= ~class->value; |
3200 | 0 | buffer[i].word &= ~class->value; |
3201 | 0 | } |
3202 | 0 | } else { |
3203 | | /* hit reset */ |
3204 | 0 | if (wordBuffer[i] & WORD_RESET || |
3205 | 0 | resetsEmphMode(input->chars[i], table, class)) { |
3206 | | |
3207 | | /* check if symbol is not already resetting */ |
3208 | 0 | if (letter_defined && letter_cnt == 1 && |
3209 | 0 | word_start != pass_end) { |
3210 | 0 | buffer[word_start].symbol |= class->value; |
3211 | 0 | buffer[word_start].word &= ~class->value; |
3212 | 0 | wordBuffer[word_start] &= ~WORD_WHOLE; |
3213 | 0 | } |
3214 | | |
3215 | | /* if reset is a letter or emphmodechar, make it the new |
3216 | | * word_start */ |
3217 | 0 | if (!resetsEmphMode(input->chars[i], table, class)) { |
3218 | 0 | if (word_start == pass_end) |
3219 | | /* move the word marker that ends the passage to the |
3220 | | * current position */ |
3221 | 0 | buffer[pass_end].word &= ~class->value; |
3222 | 0 | pass_end = -1; |
3223 | 0 | word_reset = 0; |
3224 | 0 | word_start = i; |
3225 | 0 | letter_cnt = 1; |
3226 | 0 | buffer[i].word |= class->value; |
3227 | 0 | } else |
3228 | 0 | word_reset = 1; |
3229 | |
|
3230 | 0 | continue; |
3231 | 0 | } |
3232 | | |
3233 | 0 | if (word_reset) { |
3234 | 0 | if (word_start == pass_end) |
3235 | | /* move the word marker that ends the passage to the |
3236 | | * current position */ |
3237 | 0 | buffer[pass_end].word &= ~class->value; |
3238 | 0 | pass_end = -1; |
3239 | 0 | word_reset = 0; |
3240 | 0 | word_start = i; |
3241 | 0 | letter_cnt = 0; |
3242 | 0 | buffer[i].word |= class->value; |
3243 | 0 | } |
3244 | |
|
3245 | 0 | letter_cnt++; |
3246 | 0 | } |
3247 | 0 | } |
3248 | 0 | } |
3249 | 0 | } |
3250 | 0 | } |
3251 | | |
3252 | | /* clean up end */ |
3253 | 0 | if (in_word) { |
3254 | | /* check if symbol */ |
3255 | 0 | if (letter_defined && letter_cnt == 1 && word_start != pass_end) { |
3256 | 0 | buffer[word_start].symbol |= class->value; |
3257 | 0 | buffer[word_start].word &= ~class->value; |
3258 | 0 | wordBuffer[word_start] &= ~WORD_WHOLE; |
3259 | 0 | buffer[i].end &= ~class->value; |
3260 | 0 | buffer[i].word &= ~class->value; |
3261 | 0 | } |
3262 | |
|
3263 | 0 | if (word_reset) { |
3264 | 0 | buffer[i].end &= ~class->value; |
3265 | 0 | buffer[i].word &= ~class->value; |
3266 | 0 | } |
3267 | 0 | } |
3268 | 0 | } |
3269 | | |
3270 | | static void |
3271 | | markEmphases(const TranslationTableHeader *table, const InString *input, |
3272 | 0 | formtype *typebuf, unsigned int *wordBuffer, EmphasisInfo *emphasisBuffer) { |
3273 | | |
3274 | | /* handle capsnocont */ |
3275 | 0 | if (table->capsNoCont) { |
3276 | 0 | int caps_cnt = 0; // number of consecutive characters ending with the current |
3277 | | // that are uppercase letters |
3278 | 0 | for (int i = 0; i < input->length; i++) { |
3279 | 0 | if (checkCharAttr(input->chars[i], CTC_UpperCase, table)) { |
3280 | | /* mark two or more consecutive caps with nocont */ |
3281 | 0 | caps_cnt++; |
3282 | 0 | if (caps_cnt >= 2) { |
3283 | 0 | typebuf[i] |= no_contract; |
3284 | | /* also mark the previous one */ |
3285 | 0 | if (caps_cnt == 2) typebuf[i - 1] |= no_contract; |
3286 | 0 | } |
3287 | 0 | } else { |
3288 | 0 | caps_cnt = 0; |
3289 | 0 | } |
3290 | 0 | } |
3291 | 0 | } |
3292 | |
|
3293 | 0 | for (int j = 0; j < MAX_EMPH_CLASSES + MAX_MODES; j++) { |
3294 | 0 | const EmphasisClass *emphClass = j < MAX_EMPH_CLASSES |
3295 | 0 | ? &table->emphClasses[j] |
3296 | 0 | : &table->modes[j - MAX_EMPH_CLASSES]; |
3297 | 0 | if (!emphClass->value) continue; |
3298 | 0 | const TranslationTableOffset *emphRule = table->emphRules[emphClass->rule]; |
3299 | | |
3300 | | /* clear out previous word markings and mark non-space characters in word buffer |
3301 | | */ |
3302 | 0 | for (int i = 0; i < input->length; i++) { |
3303 | 0 | if (isEmphSpace(input->chars[i], table, emphClass)) |
3304 | 0 | wordBuffer[i] &= ~WORD_CHAR; |
3305 | 0 | else |
3306 | 0 | wordBuffer[i] |= WORD_CHAR; |
3307 | 0 | wordBuffer[i] &= ~WORD_WHOLE; |
3308 | 0 | } |
3309 | | |
3310 | | /* mark beginning and end points */ |
3311 | 0 | resolveEmphasisBeginEnd( |
3312 | 0 | emphasisBuffer, emphClass, table, input, typebuf, wordBuffer); |
3313 | |
|
3314 | 0 | if (emphRule[begWordOffset]) { |
3315 | | /* mark word beginning and end points, whole words, and symbols (single |
3316 | | * characters) */ |
3317 | 0 | resolveEmphasisWords(emphasisBuffer, emphClass, table, input, wordBuffer); |
3318 | 0 | if (emphRule[lenPhraseOffset]) |
3319 | | /* remove markings of words that form a passage, and mark the begin and |
3320 | | * end of these passages instead */ |
3321 | 0 | resolveEmphasisPassages( |
3322 | 0 | emphasisBuffer, emphClass, table, input, wordBuffer); |
3323 | | /* mark where emphasis in a word needs to be retriggered after it was reset */ |
3324 | 0 | resolveEmphasisResets(emphasisBuffer, emphClass, table, input, wordBuffer); |
3325 | 0 | if (!emphRule[endWordOffset]) |
3326 | | /* if endword is not defined and emphasis ends within a word, mark every |
3327 | | * emphasised character individually as symbol */ |
3328 | 0 | resolveEmphasisAllSymbols( |
3329 | 0 | emphasisBuffer, emphClass, table, typebuf, input, wordBuffer); |
3330 | 0 | } else if (emphRule[letterOffset]) { |
3331 | 0 | if (emphRule[begOffset]) |
3332 | 0 | resolveEmphasisSingleSymbols(emphasisBuffer, emphClass, input); |
3333 | 0 | else |
3334 | 0 | resolveEmphasisAllSymbols( |
3335 | 0 | emphasisBuffer, emphClass, table, typebuf, input, wordBuffer); |
3336 | 0 | } |
3337 | 0 | if (emphClass->mode) { |
3338 | | /* only mark if actually a capital letter (don't mark spaces or punctuation). |
3339 | | */ |
3340 | 0 | for (int i = 0; i < input->length; i++) { |
3341 | 0 | if (emphasisBuffer[i].symbol & emphClass->value) { |
3342 | 0 | if (emphClass->mode == CTC_UpperCase) { |
3343 | 0 | if (!(typebuf[i] & CAPSEMPH)) |
3344 | 0 | emphasisBuffer[i].symbol &= ~emphClass->value; |
3345 | 0 | } else { |
3346 | 0 | if (!checkCharAttr(input->chars[i], emphClass->mode, table)) |
3347 | 0 | emphasisBuffer[i].symbol &= ~emphClass->value; |
3348 | 0 | } |
3349 | 0 | } |
3350 | 0 | } |
3351 | 0 | } |
3352 | 0 | } |
3353 | 0 | } |
3354 | | |
3355 | | static void |
3356 | | insertEmphasisSymbol(const EmphasisInfo *buffer, const int at, const EmphasisClass *class, |
3357 | | const TranslationTableHeader *table, int pos, const InString *input, |
3358 | 0 | OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus) { |
3359 | 0 | if (buffer[at].symbol & class->value) { |
3360 | 0 | const TranslationTableRule *indicRule; |
3361 | 0 | if (brailleIndicatorDefined( |
3362 | 0 | table->emphRules[class->rule][letterOffset], table, &indicRule)) |
3363 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos, |
3364 | 0 | input, output, posMapping, cursorPosition, cursorStatus); |
3365 | 0 | } |
3366 | 0 | } |
3367 | | |
3368 | | static void |
3369 | | insertEmphasisBegin(const EmphasisInfo *buffer, const int at, const EmphasisClass *class, |
3370 | | const TranslationTableHeader *table, int pos, const InString *input, |
3371 | 0 | OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus) { |
3372 | 0 | const TranslationTableOffset *emphRule = table->emphRules[class->rule]; |
3373 | 0 | const TranslationTableRule *indicRule; |
3374 | 0 | if (buffer[at].begin & class->value) { |
3375 | 0 | if (brailleIndicatorDefined(emphRule[begPhraseOffset], table, &indicRule)) |
3376 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos, |
3377 | 0 | input, output, posMapping, cursorPosition, cursorStatus); |
3378 | 0 | else if (brailleIndicatorDefined(emphRule[begOffset], table, &indicRule)) |
3379 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos, |
3380 | 0 | input, output, posMapping, cursorPosition, cursorStatus); |
3381 | 0 | } |
3382 | |
|
3383 | 0 | if (buffer[at].word & class->value |
3384 | | // && !(buffer[at].begin & class->value) |
3385 | 0 | && !(buffer[at].end & class->value)) { |
3386 | 0 | if (brailleIndicatorDefined(emphRule[begWordOffset], table, &indicRule)) |
3387 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos, |
3388 | 0 | input, output, posMapping, cursorPosition, cursorStatus); |
3389 | 0 | } |
3390 | 0 | } |
3391 | | |
3392 | | static void |
3393 | | insertEmphasisEnd(const EmphasisInfo *buffer, const int at, const EmphasisClass *class, |
3394 | | const TranslationTableHeader *table, int pos, const InString *input, |
3395 | 0 | OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus) { |
3396 | 0 | const TranslationTableOffset *emphRule = table->emphRules[class->rule]; |
3397 | 0 | if (buffer[at].end & class->value) { |
3398 | 0 | const TranslationTableRule *indicRule; |
3399 | 0 | if (buffer[at].word & class->value) { |
3400 | 0 | if (brailleIndicatorDefined(emphRule[endWordOffset], table, &indicRule)) |
3401 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, -1, |
3402 | 0 | pos, input, output, posMapping, cursorPosition, cursorStatus); |
3403 | 0 | } else { |
3404 | 0 | if (brailleIndicatorDefined(emphRule[endOffset], table, &indicRule)) |
3405 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, -1, |
3406 | 0 | pos, input, output, posMapping, cursorPosition, cursorStatus); |
3407 | 0 | else if (brailleIndicatorDefined( |
3408 | 0 | emphRule[endPhraseAfterOffset], table, &indicRule)) |
3409 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, -1, |
3410 | 0 | pos, input, output, posMapping, cursorPosition, cursorStatus); |
3411 | 0 | else if (brailleIndicatorDefined( |
3412 | 0 | emphRule[endPhraseBeforeOffset], table, &indicRule)) |
3413 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, |
3414 | 0 | pos, input, output, posMapping, cursorPosition, cursorStatus); |
3415 | 0 | } |
3416 | 0 | } |
3417 | 0 | } |
3418 | | |
3419 | | static int |
3420 | 0 | endCount(const EmphasisInfo *buffer, const int at, const EmphasisClass *class) { |
3421 | 0 | int i, cnt = 1; |
3422 | 0 | if (!(buffer[at].end & class->value)) return 0; |
3423 | 0 | for (i = at - 1; i >= 0; i--) |
3424 | 0 | if (buffer[i].begin & class->value || buffer[i].word & class->value) |
3425 | 0 | break; |
3426 | 0 | else |
3427 | 0 | cnt++; |
3428 | 0 | return cnt; |
3429 | 0 | } |
3430 | | |
3431 | | static int |
3432 | | beginCount(const EmphasisInfo *buffer, const int at, const EmphasisClass *class, |
3433 | 0 | const TranslationTableHeader *table, const InString *input) { |
3434 | 0 | if (buffer[at].begin & class->value) { |
3435 | 0 | int i, cnt = 1; |
3436 | 0 | for (i = at + 1; i < input->length; i++) |
3437 | 0 | if (buffer[i].end & class->value) |
3438 | 0 | break; |
3439 | 0 | else |
3440 | 0 | cnt++; |
3441 | 0 | return cnt; |
3442 | 0 | } else if (buffer[at].word & class->value) { |
3443 | 0 | int i, cnt = 1; |
3444 | 0 | for (i = at + 1; i < input->length; i++) |
3445 | 0 | if (buffer[i].end & class->value) |
3446 | 0 | break; |
3447 | 0 | else if (checkCharAttr(input->chars[i], CTC_SeqDelimiter, table)) |
3448 | 0 | break; |
3449 | 0 | else if (isEmphSpace(input->chars[i], table, class)) |
3450 | 0 | break; |
3451 | 0 | else |
3452 | 0 | cnt++; |
3453 | 0 | return cnt; |
3454 | 0 | } |
3455 | 0 | return 0; |
3456 | 0 | } |
3457 | | |
3458 | | static void |
3459 | | insertEmphasesAt(int begin, int end, int caps, int other, const int at, |
3460 | | const TranslationTableHeader *table, int pos, const InString *input, |
3461 | | OutString *output, int *posMapping, const EmphasisInfo *emphasisBuffer, |
3462 | 0 | int *cursorPosition, int *cursorStatus) { |
3463 | | |
3464 | | /* The order of inserting the end symbols must be the reverse |
3465 | | * of the insertions of the begin symbols so that they will |
3466 | | * nest properly when multiple emphases start and end at |
3467 | | * the same place */ |
3468 | | // TODO: ordering with partial word |
3469 | |
|
3470 | 0 | if (end && caps) |
3471 | 0 | for (int i = 0; i < MAX_MODES; i++) { |
3472 | 0 | const EmphasisClass *emphClass = &table->modes[i]; |
3473 | 0 | if (!emphClass->value) continue; |
3474 | 0 | if ((emphasisBuffer[at].begin | emphasisBuffer[at].end | |
3475 | 0 | emphasisBuffer[at].word | emphasisBuffer[at].symbol) & |
3476 | 0 | emphClass->value) |
3477 | 0 | insertEmphasisEnd(emphasisBuffer, at, emphClass, table, pos, input, |
3478 | 0 | output, posMapping, cursorPosition, cursorStatus); |
3479 | 0 | } |
3480 | |
|
3481 | 0 | if (end && other) { |
3482 | 0 | int type_counts[MAX_EMPH_CLASSES]; |
3483 | | |
3484 | | /* end bits */ |
3485 | 0 | for (int i = 0; i < MAX_EMPH_CLASSES; i++) { |
3486 | 0 | const EmphasisClass *emphClass = &table->emphClasses[i]; |
3487 | 0 | if (!emphClass->value) |
3488 | 0 | type_counts[i] = 0; |
3489 | 0 | else |
3490 | 0 | type_counts[i] = endCount(emphasisBuffer, at, emphClass); |
3491 | 0 | } |
3492 | |
|
3493 | 0 | while (1) { |
3494 | 0 | int min = -1; |
3495 | 0 | for (int i = 0; i < MAX_EMPH_CLASSES; i++) |
3496 | 0 | if (type_counts[i] > 0) |
3497 | 0 | if (min < 0 || type_counts[i] < type_counts[min]) min = i; |
3498 | 0 | if (min < 0) break; |
3499 | 0 | type_counts[min] = 0; |
3500 | 0 | insertEmphasisEnd(emphasisBuffer, at, &table->emphClasses[min], table, pos, |
3501 | 0 | input, output, posMapping, cursorPosition, cursorStatus); |
3502 | 0 | } |
3503 | 0 | } |
3504 | |
|
3505 | 0 | if (begin && other) { |
3506 | 0 | int type_counts[MAX_EMPH_CLASSES]; |
3507 | | |
3508 | | /* begin and word bits */ |
3509 | 0 | for (int i = 0; i < MAX_EMPH_CLASSES; i++) { |
3510 | 0 | const EmphasisClass *emphClass = &table->emphClasses[i]; |
3511 | 0 | if (!emphClass->value) |
3512 | 0 | type_counts[i] = 0; |
3513 | 0 | else |
3514 | 0 | type_counts[i] = beginCount(emphasisBuffer, at, emphClass, table, input); |
3515 | 0 | } |
3516 | |
|
3517 | 0 | while (1) { |
3518 | 0 | int max = MAX_EMPH_CLASSES - 1; |
3519 | 0 | for (int i = MAX_EMPH_CLASSES - 1; i >= 0; i--) |
3520 | 0 | if (type_counts[max] < type_counts[i]) max = i; |
3521 | 0 | if (!type_counts[max]) break; |
3522 | 0 | type_counts[max] = 0; |
3523 | 0 | insertEmphasisBegin(emphasisBuffer, at, &table->emphClasses[max], table, pos, |
3524 | 0 | input, output, posMapping, cursorPosition, cursorStatus); |
3525 | 0 | } |
3526 | | |
3527 | | /* symbol bits */ |
3528 | 0 | for (int i = MAX_EMPH_CLASSES - 1; i >= 0; i--) |
3529 | 0 | if ((emphasisBuffer[at].begin | emphasisBuffer[at].end | |
3530 | 0 | emphasisBuffer[at].word | emphasisBuffer[at].symbol) & |
3531 | 0 | table->emphClasses[i].value) |
3532 | 0 | insertEmphasisSymbol(emphasisBuffer, at, &table->emphClasses[i], table, |
3533 | 0 | pos, input, output, posMapping, cursorPosition, cursorStatus); |
3534 | 0 | } |
3535 | |
|
3536 | 0 | if (begin && caps) { |
3537 | | |
3538 | | /* insert capitalization last so it will be closest to word */ |
3539 | | /* other mode indicators are inserted so that those who are defined first are |
3540 | | * closest to word */ |
3541 | 0 | for (int i = MAX_MODES - 1; i >= 0; i--) { |
3542 | 0 | const EmphasisClass *emphClass = &table->modes[i]; |
3543 | 0 | if (!emphClass->value) continue; |
3544 | 0 | if ((emphasisBuffer[at].begin | emphasisBuffer[at].end | |
3545 | 0 | emphasisBuffer[at].word | emphasisBuffer[at].symbol) & |
3546 | 0 | emphClass->value) { |
3547 | 0 | insertEmphasisBegin(emphasisBuffer, at, emphClass, table, pos, input, |
3548 | 0 | output, posMapping, cursorPosition, cursorStatus); |
3549 | 0 | insertEmphasisSymbol(emphasisBuffer, at, emphClass, table, pos, input, |
3550 | 0 | output, posMapping, cursorPosition, cursorStatus); |
3551 | 0 | } |
3552 | 0 | } |
3553 | 0 | } |
3554 | 0 | } |
3555 | | |
3556 | | static void |
3557 | | checkNumericMode(const TranslationTableHeader *table, int pos, const InString *input, |
3558 | | OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus, |
3559 | 0 | int *dontContract, int *numericMode) { |
3560 | | /* check if numeric mode is active and insert number sign and nocontract sign when |
3561 | | * needed */ |
3562 | |
|
3563 | 0 | int i; |
3564 | 0 | const TranslationTableRule *indicRule; |
3565 | 0 | if (!brailleIndicatorDefined(table->numberSign, table, &indicRule)) return; |
3566 | | |
3567 | | /* not in numeric mode */ |
3568 | 0 | if (!*numericMode) { |
3569 | 0 | if (checkCharAttr(input->chars[pos], CTC_Digit | CTC_LitDigit, table)) { |
3570 | 0 | *numericMode = 1; |
3571 | | /* if the nocontractsign is defined and it is the same as the nonumsign then |
3572 | | disable contraction */ |
3573 | 0 | if (isIndicatorEqual(table->noContractSign, table->noNumberSign, table)) |
3574 | 0 | *dontContract = 1; |
3575 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos, |
3576 | 0 | input, output, posMapping, cursorPosition, cursorStatus); |
3577 | 0 | } else if (checkCharAttr(input->chars[pos], CTC_NumericMode, table)) { |
3578 | 0 | for (i = pos + 1; i < input->length; i++) { |
3579 | 0 | if (checkCharAttr(input->chars[i], CTC_Digit | CTC_LitDigit, table)) { |
3580 | 0 | *numericMode = 1; |
3581 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, |
3582 | 0 | 0, pos, input, output, posMapping, cursorPosition, |
3583 | 0 | cursorStatus); |
3584 | 0 | break; |
3585 | 0 | } else if (!checkCharAttr(input->chars[i], CTC_NumericMode, table)) |
3586 | 0 | break; |
3587 | 0 | } |
3588 | 0 | } |
3589 | 0 | } |
3590 | | |
3591 | | /* in numeric mode */ |
3592 | 0 | else { |
3593 | 0 | if (!checkCharAttr(input->chars[pos], |
3594 | 0 | CTC_Digit | CTC_LitDigit | CTC_NumericMode | CTC_MidEndNumericMode, |
3595 | 0 | table)) { |
3596 | 0 | *numericMode = 0; |
3597 | 0 | if (brailleIndicatorDefined(table->noNumberSign, table, &indicRule)) |
3598 | 0 | if (checkCharAttr(input->chars[pos], CTC_NumericNoContract, table)) |
3599 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, |
3600 | 0 | 0, pos, input, output, posMapping, cursorPosition, |
3601 | 0 | cursorStatus); |
3602 | 0 | } |
3603 | 0 | } |
3604 | 0 | } |
3605 | | |
3606 | | static int |
3607 | | translateString(const TranslationTableHeader *table, int mode, int currentPass, |
3608 | | const InString *input, OutString *output, int *posMapping, formtype *typebuf, |
3609 | | unsigned char *srcSpacing, unsigned char *destSpacing, unsigned int *wordBuffer, |
3610 | | EmphasisInfo *emphasisBuffer, int haveEmphasis, int *realInlen, |
3611 | 0 | int *cursorPosition, int *cursorStatus, int compbrlStart, int compbrlEnd) { |
3612 | 0 | int pos; |
3613 | 0 | int transOpcode; |
3614 | 0 | int prevTransOpcode; |
3615 | 0 | const TranslationTableRule *transRule; |
3616 | 0 | int transCharslen; |
3617 | 0 | int passCharDots; |
3618 | 0 | const widechar *passInstructions; |
3619 | 0 | int passIC; /* Instruction counter */ |
3620 | 0 | PassRuleMatch patternMatch; |
3621 | 0 | TranslationTableRule *groupingRule; |
3622 | 0 | widechar groupingOp; |
3623 | 0 | int numericMode; |
3624 | 0 | int dontContract; |
3625 | 0 | LastWord lastWord; |
3626 | 0 | int insertEmphasesFrom; |
3627 | 0 | TranslationTableCharacter *curCharDef; |
3628 | 0 | int repwordStart; |
3629 | 0 | int repwordLength; |
3630 | 0 | const InString *origInput = input; |
3631 | 0 | int warnedForNoTranslate = 0; |
3632 | | /* Main translation routine */ |
3633 | 0 | int k; |
3634 | 0 | translation_direction = 1; |
3635 | 0 | markSyllables(table, input, typebuf); |
3636 | 0 | numericMode = 0; |
3637 | 0 | lastWord = (LastWord){ 0, 0, 0 }; |
3638 | 0 | dontContract = 0; |
3639 | 0 | prevTransOpcode = CTO_None; |
3640 | 0 | pos = output->length = 0; |
3641 | 0 | int posIncremented = 1; |
3642 | 0 | insertEmphasesFrom = 0; |
3643 | 0 | _lou_resetPassVariables(); |
3644 | 0 | if (typebuf && capsletterDefined(table)) |
3645 | 0 | for (k = 0; k < input->length; k++) |
3646 | 0 | if (checkCharAttr(input->chars[k], CTC_UpperCase, table)) |
3647 | 0 | typebuf[k] |= CAPSEMPH; |
3648 | |
|
3649 | 0 | markEmphases(table, input, typebuf, wordBuffer, emphasisBuffer); |
3650 | |
|
3651 | 0 | while (pos <= input->length) { /* the main translation loop */ |
3652 | 0 | if (pos > 0 && checkCharAttr(input->chars[pos - 1], CTC_Space, table) && |
3653 | 0 | (transOpcode != CTO_JoinableWord)) |
3654 | 0 | lastWord = (LastWord){ pos, output->length, insertEmphasesFrom }; |
3655 | 0 | if (pos == input->length) break; |
3656 | 0 | if (pos >= compbrlStart && pos < compbrlEnd) { |
3657 | 0 | int cs = 2; // cursor status for this call |
3658 | 0 | if (!doCompTrans(pos, compbrlEnd, table, &pos, input, output, posMapping, |
3659 | 0 | emphasisBuffer, &transRule, cursorPosition, &cs, mode)) |
3660 | 0 | goto failure; |
3661 | 0 | continue; |
3662 | 0 | } |
3663 | 0 | TranslationTableCharacterAttributes beforeAttributes; |
3664 | 0 | setBefore(table, pos, input, &beforeAttributes); |
3665 | 0 | if (pos >= input->length) break; |
3666 | | |
3667 | 0 | if (!dontContract) dontContract = typebuf[pos] & no_contract; |
3668 | 0 | if (typebuf[pos] & no_translate) { |
3669 | 0 | if (!warnedForNoTranslate) { |
3670 | 0 | _lou_logMessage(LOU_LOG_WARN, |
3671 | 0 | "warning: Typeform no_translate is deprecated for input."); |
3672 | 0 | warnedForNoTranslate = 1; |
3673 | 0 | } |
3674 | 0 | if (input->chars[pos] < 32 || input->chars[pos] > 126) goto failure; |
3675 | 0 | widechar d = LOU_DOTS; |
3676 | 0 | TranslationTableOffset offset = getChar(input->chars[pos], table)->otherRules; |
3677 | 0 | while (offset) { |
3678 | 0 | const TranslationTableRule *r = |
3679 | 0 | (TranslationTableRule *)&table->ruleArea[offset]; |
3680 | 0 | if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow && r->dotslen == 1) { |
3681 | 0 | d = r->charsdots[1]; |
3682 | 0 | break; |
3683 | 0 | } |
3684 | 0 | offset = r->charsnext; |
3685 | 0 | } |
3686 | 0 | if (!for_updatePositions(&d, 1, 1, 0, pos, input, output, posMapping, |
3687 | 0 | cursorPosition, cursorStatus)) |
3688 | 0 | goto failure; |
3689 | 0 | pos++; |
3690 | 0 | posIncremented = 1; |
3691 | 0 | insertEmphasesFrom = pos; |
3692 | 0 | continue; |
3693 | 0 | } |
3694 | 0 | repwordLength = 0; |
3695 | 0 | for_selectRule(table, pos, *output, posMapping, mode, input, typebuf, |
3696 | 0 | emphasisBuffer, &transOpcode, prevTransOpcode, &transRule, &transCharslen, |
3697 | 0 | &passCharDots, &passInstructions, &passIC, &patternMatch, posIncremented, |
3698 | 0 | *cursorPosition, &repwordLength, dontContract, compbrlStart, compbrlEnd, |
3699 | 0 | beforeAttributes, &curCharDef, &groupingRule, &groupingOp); |
3700 | |
|
3701 | 0 | switch (transOpcode) /* Rules that pre-empt context and swap */ |
3702 | 0 | { |
3703 | 0 | case CTO_CompBrl: |
3704 | 0 | case CTO_Literal: |
3705 | 0 | if (!doCompbrl(table, &pos, input, output, posMapping, emphasisBuffer, |
3706 | 0 | &transRule, cursorPosition, cursorStatus, &lastWord, |
3707 | 0 | &insertEmphasesFrom, mode)) |
3708 | 0 | goto failure; |
3709 | 0 | continue; |
3710 | 0 | default: |
3711 | 0 | break; |
3712 | 0 | } |
3713 | | |
3714 | | /* Skip repword separator to make caps/emph indicators appear before repword |
3715 | | * indicator */ |
3716 | 0 | if (repwordLength) pos += transCharslen; |
3717 | |
|
3718 | 0 | for (int at = insertEmphasesFrom; at <= pos; at++) { |
3719 | | /* insert caps end indicator */ |
3720 | 0 | insertEmphasesAt(0, 1, 1, 0, at, table, pos, input, output, posMapping, |
3721 | 0 | emphasisBuffer, cursorPosition, cursorStatus); |
3722 | 0 | if (haveEmphasis) { |
3723 | | /* insert emphasis end indicator */ |
3724 | 0 | insertEmphasesAt(0, 1, 0, 1, at, table, pos, input, output, posMapping, |
3725 | 0 | emphasisBuffer, cursorPosition, cursorStatus); |
3726 | | /* insert emphasis start indicator */ |
3727 | 0 | insertEmphasesAt(1, 0, 0, 1, at, table, pos, input, output, posMapping, |
3728 | 0 | emphasisBuffer, cursorPosition, cursorStatus); |
3729 | 0 | } |
3730 | 0 | if (at < pos) |
3731 | 0 | insertEmphasesAt(1, 0, 1, 0, at, table, pos, input, output, posMapping, |
3732 | 0 | emphasisBuffer, cursorPosition, cursorStatus); |
3733 | 0 | } |
3734 | 0 | insertEmphasesFrom = pos + 1; |
3735 | | /* insert grade 1 mode indicator (nocontractsign) before contraction */ |
3736 | 0 | if (transOpcode == CTO_Contraction) { |
3737 | 0 | const TranslationTableRule *indicRule; |
3738 | 0 | if (brailleIndicatorDefined(table->noContractSign, table, &indicRule)) |
3739 | 0 | for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, |
3740 | 0 | pos, input, output, posMapping, cursorPosition, cursorStatus); |
3741 | 0 | } |
3742 | | /* insert letter sign */ |
3743 | 0 | if (!insertLetterSign(table, pos, input, output, posMapping, transOpcode, |
3744 | 0 | cursorPosition, cursorStatus, beforeAttributes)) |
3745 | 0 | goto failure; |
3746 | | /* insert caps start indicator */ |
3747 | 0 | insertEmphasesAt(1, 0, 1, 0, pos, table, pos, input, output, posMapping, |
3748 | 0 | emphasisBuffer, cursorPosition, cursorStatus); |
3749 | | /* insert number sign (not if numericmodechars, midnumericmodechars or |
3750 | | * numericnocontchars has been defined) */ |
3751 | 0 | if (!table->usesNumericMode) |
3752 | 0 | if (!insertNumberSign(table, pos, input, output, posMapping, prevTransOpcode, |
3753 | 0 | cursorPosition, cursorStatus, beforeAttributes)) |
3754 | 0 | goto failure; |
3755 | | /* insert number sign and number cancel sign (nocontractsign) (only if |
3756 | | * numericmodechars, midnumericmodechars or numericnocontchars has been defined) |
3757 | | */ |
3758 | 0 | if (table->usesNumericMode) |
3759 | 0 | checkNumericMode(table, pos, input, output, posMapping, cursorPosition, |
3760 | 0 | cursorStatus, &dontContract, &numericMode); |
3761 | |
|
3762 | 0 | if (transOpcode == CTO_Context || |
3763 | 0 | (posIncremented && |
3764 | 0 | findForPassRule(table, pos, currentPass, input, &transOpcode, |
3765 | 0 | &transRule, &transCharslen, &passCharDots, |
3766 | 0 | &passInstructions, &passIC, &patternMatch, &groupingRule, |
3767 | 0 | &groupingOp))) { |
3768 | 0 | posIncremented = 1; |
3769 | 0 | switch (transOpcode) { |
3770 | 0 | case CTO_Context: { |
3771 | 0 | const InString *inputBefore = input; |
3772 | 0 | int posBefore = pos; |
3773 | 0 | if (appliedRules != NULL && appliedRulesCount < maxAppliedRules) |
3774 | 0 | appliedRules[appliedRulesCount++] = transRule; |
3775 | 0 | if (!passDoAction(table, &input, output, posMapping, transOpcode, |
3776 | 0 | &transRule, passCharDots, passInstructions, passIC, &pos, |
3777 | 0 | patternMatch, cursorPosition, cursorStatus, groupingRule, |
3778 | 0 | groupingOp, mode)) |
3779 | 0 | goto failure; |
3780 | 0 | if (input->bufferIndex != inputBefore->bufferIndex && |
3781 | 0 | inputBefore->bufferIndex != origInput->bufferIndex) |
3782 | 0 | releaseStringBuffer(inputBefore->bufferIndex); |
3783 | 0 | if (pos == posBefore) posIncremented = 0; |
3784 | 0 | continue; |
3785 | 0 | } |
3786 | 0 | default: |
3787 | 0 | break; |
3788 | 0 | } |
3789 | 0 | } else { |
3790 | 0 | if (appliedRules != NULL && appliedRulesCount < maxAppliedRules) |
3791 | 0 | appliedRules[appliedRulesCount++] = transRule; |
3792 | 0 | posIncremented = 1; |
3793 | 0 | } |
3794 | | |
3795 | | /* Processing before replacement */ |
3796 | | |
3797 | | /* check if leaving no contraction (grade 1) mode */ |
3798 | 0 | if (checkCharAttr(input->chars[pos], CTC_SeqDelimiter | CTC_Space, table)) |
3799 | 0 | dontContract = 0; |
3800 | |
|
3801 | 0 | switch (transOpcode) { |
3802 | 0 | case CTO_EndNum: |
3803 | 0 | if (table->letterSign && checkCharAttr(input->chars[pos], CTC_Letter, table)) |
3804 | 0 | output->length--; |
3805 | 0 | break; |
3806 | 0 | case CTO_Repeated: |
3807 | 0 | case CTO_Space: |
3808 | 0 | dontContract = 0; |
3809 | 0 | break; |
3810 | 0 | case CTO_LargeSign: |
3811 | 0 | if (prevTransOpcode == CTO_LargeSign) { |
3812 | 0 | int hasEndSegment = 0; |
3813 | 0 | while (output->length > 0 && |
3814 | 0 | checkDotsAttr( |
3815 | 0 | output->chars[output->length - 1], CTC_Space, table)) { |
3816 | 0 | if (output->chars[output->length - 1] == LOU_ENDSEGMENT) { |
3817 | 0 | hasEndSegment = 1; |
3818 | 0 | } |
3819 | 0 | output->length--; |
3820 | 0 | } |
3821 | 0 | if (hasEndSegment != 0) { |
3822 | 0 | output->chars[output->length] = 0xffff; |
3823 | 0 | output->length++; |
3824 | 0 | } |
3825 | 0 | } |
3826 | 0 | break; |
3827 | 0 | case CTO_DecPoint: |
3828 | 0 | if (!table->usesNumericMode && table->numberSign) { |
3829 | 0 | TranslationTableRule *numRule = |
3830 | 0 | (TranslationTableRule *)&table->ruleArea[table->numberSign]; |
3831 | 0 | if (!for_updatePositions(&numRule->charsdots[numRule->charslen], |
3832 | 0 | numRule->charslen, numRule->dotslen, 0, pos, input, output, |
3833 | 0 | posMapping, cursorPosition, cursorStatus)) |
3834 | 0 | goto failure; |
3835 | 0 | } |
3836 | 0 | transOpcode = CTO_MidNum; |
3837 | 0 | break; |
3838 | 0 | case CTO_NoCont: |
3839 | 0 | if (!dontContract) |
3840 | 0 | doNocont(table, &pos, output, mode, input, &lastWord, &dontContract, |
3841 | 0 | &insertEmphasesFrom); |
3842 | 0 | continue; |
3843 | 0 | case CTO_RepWord: |
3844 | 0 | case CTO_RepEndWord: |
3845 | 0 | repwordStart = pos - transCharslen - repwordLength; |
3846 | 0 | break; |
3847 | 0 | default: |
3848 | 0 | break; |
3849 | 0 | } /* end of action */ |
3850 | | |
3851 | | /* replacement processing */ |
3852 | 0 | switch (transOpcode) { |
3853 | 0 | case CTO_Replace: |
3854 | 0 | pos += transCharslen; |
3855 | 0 | if (!putCharacters(&transRule->charsdots[transCharslen], transRule->dotslen, |
3856 | 0 | table, pos, input, output, posMapping, cursorPosition, |
3857 | 0 | cursorStatus, mode)) |
3858 | 0 | goto failure; |
3859 | 0 | break; |
3860 | 0 | case CTO_None: |
3861 | | /* no definition or translation rules found for this character, but it may be |
3862 | | * based on another character */ |
3863 | 0 | if (!putCharacter(input->chars[pos], table, pos, input, output, posMapping, |
3864 | 0 | cursorPosition, cursorStatus, mode)) |
3865 | 0 | goto failure; |
3866 | 0 | pos++; |
3867 | 0 | break; |
3868 | 0 | default: { |
3869 | 0 | const widechar *dots = &transRule->charsdots[transCharslen]; |
3870 | 0 | int dotslen = transRule->dotslen; |
3871 | 0 | if (transOpcode == CTO_RepEndWord) { |
3872 | 0 | int k; |
3873 | 0 | for (k = 1; dots[k] != ','; k++) |
3874 | 0 | ; |
3875 | 0 | k++; |
3876 | 0 | dots = &dots[k]; |
3877 | 0 | dotslen -= k; |
3878 | 0 | } |
3879 | 0 | if (dotslen) { |
3880 | 0 | if (repwordLength) { |
3881 | | /* repword sepatator is already skipped */ |
3882 | 0 | if (!for_updatePositions(dots, 0, dotslen, 0, pos, input, output, |
3883 | 0 | posMapping, cursorPosition, cursorStatus)) |
3884 | 0 | goto failure; |
3885 | 0 | } else { |
3886 | 0 | if (!for_updatePositions(dots, transCharslen, dotslen, 0, pos, input, |
3887 | 0 | output, posMapping, cursorPosition, cursorStatus)) |
3888 | 0 | goto failure; |
3889 | 0 | pos += transCharslen; |
3890 | 0 | } |
3891 | 0 | } else { |
3892 | 0 | for (k = 0; k < transCharslen; k++) { |
3893 | 0 | if (!putCharacter(input->chars[pos], table, pos, input, output, |
3894 | 0 | posMapping, cursorPosition, cursorStatus, mode)) |
3895 | 0 | goto failure; |
3896 | 0 | if (++pos >= input->length) break; |
3897 | 0 | } |
3898 | 0 | } |
3899 | 0 | break; |
3900 | 0 | } |
3901 | 0 | } |
3902 | | |
3903 | | /* processing after replacement */ |
3904 | 0 | switch (transOpcode) { |
3905 | 0 | case CTO_Repeated: { |
3906 | | /* Skip repeated characters. */ |
3907 | 0 | int srclim = input->length - transCharslen; |
3908 | 0 | if (mode & (compbrlAtCursor | compbrlLeftCursor) && compbrlStart < srclim) |
3909 | | /* Don't skip characters from compbrlStart onwards. */ |
3910 | 0 | srclim = compbrlStart - 1; |
3911 | 0 | while ((pos <= srclim) && |
3912 | 0 | compareChars(&transRule->charsdots[0], &input->chars[pos], |
3913 | 0 | transCharslen, table)) { |
3914 | 0 | if (!*cursorStatus && pos <= *cursorPosition && |
3915 | 0 | *cursorPosition < pos + transCharslen) { |
3916 | 0 | *cursorStatus = 1; |
3917 | 0 | *cursorPosition = output->length - 1; |
3918 | 0 | } |
3919 | 0 | pos += transCharslen; |
3920 | 0 | } |
3921 | 0 | break; |
3922 | 0 | } |
3923 | 0 | case CTO_RepEndWord: { |
3924 | | /* Go back and insert dots at repwordStart and update posMapping accordingly |
3925 | | */ |
3926 | 0 | const widechar *dots = &transRule->charsdots[transCharslen]; |
3927 | 0 | int dotslen; |
3928 | 0 | for (dotslen = 1; dots[dotslen] != ','; dotslen++) |
3929 | 0 | ; |
3930 | 0 | if ((output->length + dotslen) > output->maxlength) goto failure; |
3931 | 0 | int k; |
3932 | 0 | for (k = output->length - 1; k >= 0; k--) |
3933 | 0 | if (posMapping[k] >= repwordStart) { |
3934 | 0 | output->chars[k + dotslen] = output->chars[k]; |
3935 | 0 | posMapping[k + dotslen] = posMapping[k]; |
3936 | 0 | } else |
3937 | 0 | break; |
3938 | 0 | k++; |
3939 | 0 | memcpy(&output->chars[k], dots, dotslen * sizeof(*output->chars)); |
3940 | 0 | for (int l = 0; l < dotslen; l++) posMapping[k + l] = posMapping[k]; |
3941 | 0 | output->length += dotslen; |
3942 | 0 | if (*cursorStatus && *cursorPosition >= k) *cursorPosition += dotslen; |
3943 | 0 | } |
3944 | 0 | case CTO_RepWord: { |
3945 | | /* Skip repeated characters. */ |
3946 | 0 | int srclim = input->length; |
3947 | 0 | if (mode & (compbrlAtCursor | compbrlLeftCursor) && compbrlStart < srclim) |
3948 | | /* Don't skip characters from compbrlStart onwards. */ |
3949 | 0 | srclim = compbrlStart; |
3950 | | /* Skip first and subsequent repetitions */ |
3951 | | /* Loop body is be executed at least once. */ |
3952 | 0 | int firstRep = 1; |
3953 | 0 | while (pos + repwordLength <= srclim && |
3954 | 0 | compareChars(&input->chars[repwordStart], &input->chars[pos], |
3955 | 0 | repwordLength, table)) { |
3956 | | /* Check that capitalisation and emphasis do not change within or in |
3957 | | * between subsequent repetitions. It is allowed to change right before |
3958 | | * the first repetition because that can be indicated. That it does not |
3959 | | * change within the first repetition is already checked in |
3960 | | * isRepeatedWord. */ |
3961 | 0 | if (!firstRep && |
3962 | 0 | checkEmphasisChange(pos - 1, repwordLength, emphasisBuffer)) |
3963 | 0 | break; |
3964 | 0 | if (!*cursorStatus && *cursorPosition >= pos - transCharslen && |
3965 | 0 | *cursorPosition < pos + repwordLength) { |
3966 | 0 | *cursorStatus = 1; |
3967 | 0 | *cursorPosition = output->length - 1; |
3968 | 0 | } |
3969 | 0 | pos += repwordLength; |
3970 | 0 | if (pos + transCharslen <= srclim && |
3971 | 0 | !memcmp(transRule->charsdots, &input->chars[pos], |
3972 | 0 | transCharslen * sizeof(*transRule->charsdots))) |
3973 | 0 | pos += transCharslen; |
3974 | 0 | else { |
3975 | 0 | pos += transCharslen; |
3976 | 0 | break; |
3977 | 0 | } |
3978 | 0 | firstRep = 0; |
3979 | 0 | } |
3980 | 0 | pos -= transCharslen; |
3981 | 0 | break; |
3982 | 0 | } |
3983 | 0 | case CTO_JoinNum: |
3984 | 0 | case CTO_JoinableWord: |
3985 | 0 | while (pos < input->length && |
3986 | 0 | checkCharAttr(input->chars[pos], CTC_Space, table) && |
3987 | 0 | input->chars[pos] != LOU_ENDSEGMENT) |
3988 | 0 | pos++; |
3989 | 0 | break; |
3990 | 0 | default: |
3991 | 0 | break; |
3992 | 0 | } |
3993 | 0 | if (srcSpacing != NULL && srcSpacing[pos] >= '0' && srcSpacing[pos] <= '9') |
3994 | 0 | destSpacing[output->length] = srcSpacing[pos]; |
3995 | 0 | if ((transOpcode >= CTO_Always && transOpcode <= CTO_None) || |
3996 | 0 | (transOpcode >= CTO_Digit && transOpcode <= CTO_LitDigit)) |
3997 | 0 | prevTransOpcode = transOpcode; |
3998 | 0 | } |
3999 | | |
4000 | 0 | for (int at = insertEmphasesFrom; at <= pos; at++) { |
4001 | | /* insert caps end indicator */ |
4002 | 0 | insertEmphasesAt(0, 1, 1, 0, at, table, pos, input, output, posMapping, |
4003 | 0 | emphasisBuffer, cursorPosition, cursorStatus); |
4004 | 0 | if (haveEmphasis) { |
4005 | | /* insert emphasis end indicator */ |
4006 | 0 | insertEmphasesAt(0, 1, 0, 1, at, table, pos, input, output, posMapping, |
4007 | 0 | emphasisBuffer, cursorPosition, cursorStatus); |
4008 | | /* insert emphasis start indicator */ |
4009 | 0 | insertEmphasesAt(1, 0, 0, 1, at, table, pos, input, output, posMapping, |
4010 | 0 | emphasisBuffer, cursorPosition, cursorStatus); |
4011 | 0 | } |
4012 | | /* insert caps start indicator */ |
4013 | 0 | insertEmphasesAt(1, 0, 1, 0, at, table, pos, input, output, posMapping, |
4014 | 0 | emphasisBuffer, cursorPosition, cursorStatus); |
4015 | 0 | } |
4016 | |
|
4017 | 0 | failure: |
4018 | 0 | if (lastWord.outPos != 0 && pos < input->length && |
4019 | 0 | !checkCharAttr(input->chars[pos], CTC_Space, table)) { |
4020 | 0 | pos = lastWord.inPos; |
4021 | 0 | output->length = lastWord.outPos; |
4022 | 0 | } |
4023 | 0 | if (pos < input->length) { |
4024 | 0 | while (checkCharAttr(input->chars[pos], CTC_Space, table)) |
4025 | 0 | if (++pos == input->length) break; |
4026 | 0 | } |
4027 | 0 | *realInlen = pos; |
4028 | 0 | if (input->bufferIndex != origInput->bufferIndex) |
4029 | 0 | releaseStringBuffer(input->bufferIndex); |
4030 | 0 | return 1; |
4031 | 0 | } /* first pass translation completed */ |
4032 | | |
4033 | | static int |
4034 | 0 | isHyphen(const TranslationTableHeader *table, widechar c) { |
4035 | 0 | TranslationTableRule *rule; |
4036 | 0 | TranslationTableOffset offset = getChar(c, table)->otherRules; |
4037 | 0 | while (offset) { |
4038 | 0 | rule = (TranslationTableRule *)&table->ruleArea[offset]; |
4039 | 0 | if (rule->opcode == CTO_Hyphen) return 1; |
4040 | 0 | offset = rule->dotsnext; |
4041 | 0 | } |
4042 | 0 | return 0; |
4043 | 0 | } |
4044 | | |
4045 | | /** |
4046 | | * Hyphenate an input string which can either be text (mode = 0) or braille (mode = 1). If |
4047 | | * the input is braille, back-translation will be performed with `tableList'. The input |
4048 | | * string can contain any character (even space), but only break points within words |
4049 | | * (between letters) are considered. If the string can not be broken before the character |
4050 | | * at index k, the value of `hyphens[k]' is '0'. If it can be broken by inserting a hyphen |
4051 | | * at the break point, the value is '1'. If it can be broken without adding a hyphen, the |
4052 | | * value is '2'. |
4053 | | */ |
4054 | | int EXPORT_CALL |
4055 | | lou_hyphenate(const char *tableList, const widechar *inbuf, int inlen, char *hyphens, |
4056 | 0 | int mode) { |
4057 | 0 | #define HYPHSTRING 100 |
4058 | 0 | const TranslationTableHeader *table; |
4059 | 0 | widechar textBuffer[HYPHSTRING]; |
4060 | 0 | char *textHyphens; |
4061 | 0 | int *inputPos; |
4062 | 0 | int k; |
4063 | 0 | int textLen; |
4064 | 0 | int wordStart; |
4065 | 0 | table = lou_getTable(tableList); |
4066 | 0 | if (table == NULL || inbuf == NULL || hyphens == NULL || |
4067 | 0 | table->hyphenStatesArray == 0 || inlen >= HYPHSTRING) |
4068 | 0 | return 0; |
4069 | 0 | if (mode != 0) { |
4070 | 0 | int brailleLen = inlen; |
4071 | 0 | textLen = HYPHSTRING; |
4072 | 0 | inputPos = malloc(textLen * sizeof(int)); |
4073 | 0 | if (!lou_backTranslate(tableList, inbuf, &brailleLen, textBuffer, &textLen, NULL, |
4074 | 0 | NULL, NULL, inputPos, NULL, 0)) { |
4075 | 0 | free(inputPos); |
4076 | 0 | return 0; |
4077 | 0 | } |
4078 | 0 | textHyphens = malloc((textLen + 1) * sizeof(char)); |
4079 | 0 | } else { |
4080 | 0 | memcpy(textBuffer, inbuf, CHARSIZE * inlen); |
4081 | 0 | textLen = inlen; |
4082 | 0 | textHyphens = hyphens; |
4083 | 0 | } |
4084 | | |
4085 | | // initialize hyphens array |
4086 | 0 | for (k = 0; k < textLen; k++) textHyphens[k] = '0'; |
4087 | 0 | textHyphens[k] = 0; |
4088 | | |
4089 | | // for every word part |
4090 | 0 | for (wordStart = 0;;) { |
4091 | 0 | int wordEnd; |
4092 | | // find start of word |
4093 | 0 | for (; wordStart < textLen; wordStart++) |
4094 | 0 | if ((getChar(textBuffer[wordStart], table))->attributes & CTC_Letter) break; |
4095 | 0 | if (wordStart == textLen) break; |
4096 | | // find end of word |
4097 | 0 | for (wordEnd = wordStart + 1; wordEnd < textLen; wordEnd++) |
4098 | 0 | if (!((getChar(textBuffer[wordEnd], table))->attributes & CTC_Letter)) break; |
4099 | | // hyphenate |
4100 | 0 | if (!hyphenateWord(&textBuffer[wordStart], wordEnd - wordStart, |
4101 | 0 | &textHyphens[wordStart], table)) |
4102 | 0 | return 0; |
4103 | | // normalize to '0', '1' or '2' |
4104 | 0 | if (wordStart >= 2 && isHyphen(table, textBuffer[wordStart - 1]) && |
4105 | 0 | ((getChar(textBuffer[wordStart - 2], table))->attributes & CTC_Letter)) |
4106 | 0 | textHyphens[wordStart] = '2'; |
4107 | 0 | else |
4108 | 0 | textHyphens[wordStart] = '0'; |
4109 | 0 | for (k = wordStart + 1; k < wordEnd; k++) |
4110 | 0 | if (textHyphens[k] & 1) |
4111 | 0 | textHyphens[k] = '1'; |
4112 | 0 | else |
4113 | 0 | textHyphens[k] = '0'; |
4114 | 0 | if (wordEnd == textLen) break; |
4115 | 0 | textHyphens[wordEnd] = '0'; // because hyphenateWord sets it to 0 |
4116 | 0 | wordStart = wordEnd + 1; |
4117 | 0 | } |
4118 | | |
4119 | | // map hyphen positions if the input was braille |
4120 | 0 | if (mode != 0) { |
4121 | 0 | for (k = 0; k < inlen; k++) hyphens[k] = '0'; |
4122 | 0 | hyphens[k] = 0; |
4123 | 0 | int prevPos = -1; |
4124 | 0 | for (k = 0; k < textLen; k++) { |
4125 | 0 | int braillePos = inputPos[k]; |
4126 | 0 | if (braillePos > inlen || braillePos < 0) break; |
4127 | 0 | if (braillePos > prevPos) { |
4128 | 0 | hyphens[braillePos] = textHyphens[k]; |
4129 | 0 | prevPos = braillePos; |
4130 | 0 | } |
4131 | 0 | } |
4132 | 0 | free(textHyphens); |
4133 | 0 | free(inputPos); |
4134 | 0 | } |
4135 | 0 | return 1; |
4136 | 0 | } |
4137 | | |
4138 | | int EXPORT_CALL |
4139 | | lou_dotsToChar( |
4140 | 0 | const char *tableList, widechar *inbuf, widechar *outbuf, int length, int mode) { |
4141 | 0 | const DisplayTableHeader *table; |
4142 | 0 | int k; |
4143 | 0 | widechar dots; |
4144 | 0 | if (tableList == NULL || inbuf == NULL || outbuf == NULL) return 0; |
4145 | | |
4146 | 0 | table = _lou_getDisplayTable(tableList); |
4147 | 0 | if (table == NULL || length <= 0) return 0; |
4148 | 0 | for (k = 0; k < length; k++) { |
4149 | 0 | dots = inbuf[k]; |
4150 | 0 | if (!(dots & LOU_DOTS) && |
4151 | 0 | (dots & 0xff00) == LOU_ROW_BRAILLE) /* Unicode braille */ |
4152 | 0 | dots = (dots & 0x00ff) | LOU_DOTS; |
4153 | 0 | outbuf[k] = _lou_getCharForDots(dots, table); |
4154 | | // assume that if NUL character is returned, it's because the display table has no |
4155 | | // mapping for the dot pattern (not because it maps to NUL) |
4156 | 0 | if (outbuf[k] == '\0') outbuf[k] = ' '; |
4157 | 0 | } |
4158 | 0 | return 1; |
4159 | 0 | } |
4160 | | |
4161 | | int EXPORT_CALL |
4162 | | lou_charToDots(const char *tableList, const widechar *inbuf, widechar *outbuf, int length, |
4163 | 0 | int mode) { |
4164 | 0 | const DisplayTableHeader *table; |
4165 | 0 | int k; |
4166 | 0 | if (tableList == NULL || inbuf == NULL || outbuf == NULL) return 0; |
4167 | | |
4168 | 0 | table = _lou_getDisplayTable(tableList); |
4169 | 0 | if (table == NULL || length <= 0) return 0; |
4170 | 0 | for (k = 0; k < length; k++) |
4171 | 0 | if ((mode & ucBrl)) |
4172 | 0 | outbuf[k] = ((_lou_getDotsForChar(inbuf[k], table) & 0xff) | LOU_ROW_BRAILLE); |
4173 | 0 | else |
4174 | 0 | outbuf[k] = _lou_getDotsForChar(inbuf[k], table); |
4175 | 0 | return 1; |
4176 | 0 | } |