Coverage Report

Created: 2025-07-01 06:26

/src/liblouis/liblouis/lou_translateString.c
Line
Count
Source (jump to first uncovered line)
1
/* liblouis Braille Translation and Back-Translation Library
2
3
   Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The
4
   BRLTTY Team
5
6
   Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com
7
   Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com
8
   Copyright (C) 2016 Mike Gray, American Printing House for the Blind
9
   Copyright (C) 2016 Davy Kager, Dedicon
10
11
   This file is part of liblouis.
12
13
   liblouis is free software: you can redistribute it and/or modify it
14
   under the terms of the GNU Lesser General Public License as published
15
   by the Free Software Foundation, either version 2.1 of the License, or
16
   (at your option) any later version.
17
18
   liblouis is distributed in the hope that it will be useful, but
19
   WITHOUT ANY WARRANTY; without even the implied warranty of
20
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
   Lesser General Public License for more details.
22
23
   You should have received a copy of the GNU Lesser General Public
24
   License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
25
*/
26
27
/**
28
 * @file
29
 * @brief Translate to braille
30
 */
31
32
#include <config.h>
33
34
#include <stdio.h>
35
#include <stdlib.h>
36
#include <string.h>
37
38
#include "internal.h"
39
40
/* additional bits in typebuf */
41
0
#define SYLLABLE_MARKER_1 0x2000
42
0
#define SYLLABLE_MARKER_2 0x4000
43
0
#define CAPSEMPH 0x8000
44
45
0
#define EMPHASIS 0x3fff   // all typeform bits that can be used
46
47
/* bits for wordBuffer */
48
0
#define WORD_CHAR 0x00000001
49
0
#define WORD_RESET 0x00000002
50
0
#define WORD_WHOLE 0x00000004
51
52
typedef struct {
53
  int size;
54
  widechar **buffers;
55
  int *inUse;
56
  widechar *(*alloc)(int index, int length);
57
  void (*free)(widechar *);
58
} StringBufferPool;
59
60
static widechar *
61
0
allocStringBuffer(int index, int length) {
62
0
  return _lou_allocMem(alloc_passbuf, index, 0, length);
63
0
}
64
65
static const StringBufferPool *stringBufferPool = NULL;
66
67
static void
68
0
initStringBufferPool() {
69
0
  static widechar *stringBuffers[MAXPASSBUF] = { NULL };
70
0
  static int stringBuffersInUse[MAXPASSBUF] = { 0 };
71
0
  StringBufferPool *pool = malloc(sizeof(StringBufferPool));
72
0
  pool->size = MAXPASSBUF;
73
0
  pool->buffers = stringBuffers;
74
0
  pool->inUse = stringBuffersInUse;
75
0
  pool->alloc = &allocStringBuffer;
76
0
  pool->free = NULL;
77
0
  stringBufferPool = pool;
78
0
}
79
80
static int
81
0
getStringBuffer(int length) {
82
0
  int i;
83
0
  for (i = 0; i < stringBufferPool->size; i++) {
84
0
    if (!stringBufferPool->inUse[i]) {
85
0
      stringBufferPool->buffers[i] = stringBufferPool->alloc(i, length);
86
0
      stringBufferPool->inUse[i] = 1;
87
0
      return i;
88
0
    }
89
0
  }
90
0
  _lou_outOfMemory();
91
0
  return -1;
92
0
}
93
94
static int
95
0
releaseStringBuffer(int idx) {
96
0
  if (idx >= 0 && idx < stringBufferPool->size) {
97
0
    int inUse = stringBufferPool->inUse[idx];
98
0
    if (inUse && stringBufferPool->free)
99
0
      stringBufferPool->free(stringBufferPool->buffers[idx]);
100
0
    stringBufferPool->inUse[idx] = 0;
101
0
    return inUse;
102
0
  }
103
0
  return 0;
104
0
}
105
106
typedef struct {
107
  int bufferIndex;
108
  const widechar *chars;
109
  int length;
110
} InString;
111
112
typedef struct {
113
  int bufferIndex;
114
  widechar *chars;
115
  int maxlength;
116
  int length;
117
} OutString;
118
119
typedef struct {
120
  int startMatch;
121
  int startReplace;
122
  int endReplace;
123
  int endMatch;
124
} PassRuleMatch;
125
126
static int
127
putCharacter(widechar c, const TranslationTableHeader *table, int pos,
128
    const InString *input, OutString *output, int *posMapping, int *cursorPosition,
129
    int *cursorStatus, int mode);
130
static int
131
passDoTest(const TranslationTableHeader *table, int pos, const InString *input,
132
    int transOpcode, const TranslationTableRule *transRule, int *passCharDots,
133
    const widechar **passInstructions, int *passIC, PassRuleMatch *match,
134
    TranslationTableRule **groupingRule, widechar *groupingOp);
135
static int
136
passDoAction(const TranslationTableHeader *table, const InString **input,
137
    OutString *output, int *posMapping, int transOpcode,
138
    const TranslationTableRule **transRule, int passCharDots,
139
    const widechar *passInstructions, int passIC, int *pos, PassRuleMatch match,
140
    int *cursorPosition, int *cursorStatus, TranslationTableRule *groupingRule,
141
    widechar groupingOp, int mode);
142
143
static const TranslationTableRule **appliedRules;
144
static int maxAppliedRules;
145
static int appliedRulesCount;
146
147
static TranslationTableCharacter *
148
0
getChar(widechar c, const TranslationTableHeader *table) {
149
0
  static TranslationTableCharacter notFound = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32,
150
0
    0, 0 };
151
0
  const TranslationTableOffset bucket = table->characters[_lou_charHash(c)];
152
0
  TranslationTableOffset offset = bucket;
153
0
  while (offset) {
154
0
    TranslationTableCharacter *character =
155
0
        (TranslationTableCharacter *)&table->ruleArea[offset];
156
0
    if (character->value == c) return character;
157
0
    offset = character->next;
158
0
  }
159
0
  notFound.value = c;
160
0
  return &notFound;
161
0
}
162
163
static TranslationTableCharacter *
164
0
getDots(widechar c, const TranslationTableHeader *table) {
165
0
  static TranslationTableCharacter notFound = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0,
166
0
    LOU_DOTS, 0, 0 };
167
0
  const TranslationTableOffset bucket = table->dots[_lou_charHash(c)];
168
0
  TranslationTableOffset offset = bucket;
169
0
  while (offset) {
170
0
    TranslationTableCharacter *character =
171
0
        (TranslationTableCharacter *)&table->ruleArea[offset];
172
0
    if (character->value == c) return character;
173
0
    offset = character->next;
174
0
  }
175
0
  notFound.value = c;
176
0
  return &notFound;
177
0
}
178
179
static int
180
checkCharAttr(const widechar c, const TranslationTableCharacterAttributes a,
181
0
    const TranslationTableHeader *table) {
182
0
  return (((getChar(c, table))->attributes & a) ? 1 : 0);
183
0
}
184
185
static int
186
checkDotsAttr(const widechar c, const TranslationTableCharacterAttributes a,
187
0
    const TranslationTableHeader *table) {
188
0
  return (((getDots(c, table))->attributes & a) ? 1 : 0);
189
0
}
190
191
static int
192
checkCharAttr_safe(const InString *input, int pos,
193
    const TranslationTableCharacterAttributes a,
194
0
    const TranslationTableHeader *table) {
195
0
  return ((pos < input->length) ? checkCharAttr(input->chars[pos], a, table) : 0);
196
0
}
197
198
static int
199
findForPassRule(const TranslationTableHeader *table, int pos, int currentPass,
200
    const InString *input, int *transOpcode, const TranslationTableRule **transRule,
201
    int *transCharslen, int *passCharDots, widechar const **passInstructions,
202
    int *passIC, PassRuleMatch *match, TranslationTableRule **groupingRule,
203
0
    widechar *groupingOp) {
204
0
  int save_transCharslen = *transCharslen;
205
0
  const TranslationTableRule *save_transRule = *transRule;
206
0
  TranslationTableOpcode save_transOpcode = *transOpcode;
207
0
  TranslationTableOffset ruleOffset;
208
0
  ruleOffset = table->forPassRules[currentPass];
209
0
  *transCharslen = 0;
210
0
  while (ruleOffset) {
211
0
    *transRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
212
0
    *transOpcode = (*transRule)->opcode;
213
0
    if (passDoTest(table, pos, input, *transOpcode, *transRule, passCharDots,
214
0
          passInstructions, passIC, match, groupingRule, groupingOp))
215
0
      return 1;
216
0
    ruleOffset = (*transRule)->charsnext;
217
0
  }
218
0
  *transCharslen = save_transCharslen;
219
0
  *transRule = save_transRule;
220
0
  *transOpcode = save_transOpcode;
221
0
  return 0;
222
0
}
223
224
static widechar
225
toLowercase(
226
0
    const TranslationTableHeader *table, const TranslationTableCharacter *character) {
227
0
  if (character->mode & CTC_UpperCase) {
228
0
    const TranslationTableCharacter *c = character;
229
0
    if (c->basechar) c = (TranslationTableCharacter *)&table->ruleArea[c->basechar];
230
0
    while (1) {
231
0
      if ((c->mode & (character->mode & ~CTC_UpperCase)) ==
232
0
          (character->mode & ~CTC_UpperCase))
233
0
        return c->value;
234
0
      if (!c->linked) break;
235
0
      c = (TranslationTableCharacter *)&table->ruleArea[c->linked];
236
0
    }
237
0
  }
238
0
  return character->value;
239
0
}
240
241
static int
242
compareChars(const widechar *address1, const widechar *address2, int count,
243
0
    const TranslationTableHeader *table) {
244
0
  int k;
245
0
  if (!count) return 0;
246
0
  for (k = 0; k < count; k++)
247
0
    if (toLowercase(table, getChar(address1[k], table)) !=
248
0
        toLowercase(table, getChar(address2[k], table)))
249
0
      return 0;
250
0
  return 1;
251
0
}
252
253
static int
254
makeCorrections(const TranslationTableHeader *table, const InString *input,
255
    OutString *output, int *posMapping, formtype *typebuf, int *realInlen,
256
0
    int *cursorPosition, int *cursorStatus, int mode) {
257
0
  int pos;
258
0
  int transOpcode;
259
0
  const TranslationTableRule *transRule;
260
0
  int transCharslen;
261
0
  int passCharDots;
262
0
  const widechar *passInstructions;
263
0
  int passIC; /* Instruction counter */
264
0
  PassRuleMatch patternMatch;
265
0
  TranslationTableRule *groupingRule;
266
0
  widechar groupingOp;
267
0
  const InString *origInput = input;
268
0
  if (!table->corrections) return 1;
269
0
  pos = 0;
270
0
  output->length = 0;
271
0
  int posIncremented = 1;
272
0
  _lou_resetPassVariables();
273
0
  while (pos < input->length) {
274
0
    int length = input->length - pos;
275
0
    int tryThis = 0;
276
    // check posIncremented to avoid endless loop
277
0
    if (!(posIncremented &&
278
0
          findForPassRule(table, pos, 0, input, &transOpcode, &transRule,
279
0
              &transCharslen, &passCharDots, &passInstructions, &passIC,
280
0
              &patternMatch, &groupingRule, &groupingOp)))
281
0
      while (tryThis < 3) {
282
0
        TranslationTableOffset ruleOffset = 0;
283
0
        switch (tryThis) {
284
0
        case 0:
285
0
          if (!(length >= 2)) break;
286
0
          ruleOffset = table->forRules[_lou_stringHash(
287
0
              &input->chars[pos], 1, table)];
288
0
          break;
289
0
        case 1:
290
0
          if (!(length >= 1)) break;
291
0
          length = 1;
292
0
          ruleOffset = getChar(input->chars[pos], table)->otherRules;
293
0
          break;
294
0
        case 2: /* No rule found */
295
0
          transOpcode = CTO_Always;
296
0
          ruleOffset = 0;
297
0
          break;
298
0
        }
299
0
        while (ruleOffset) {
300
0
          transRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
301
0
          transOpcode = transRule->opcode;
302
0
          transCharslen = transRule->charslen;
303
0
          if (tryThis == 1 ||
304
0
              (transCharslen <= length &&
305
0
                  compareChars(&transRule->charsdots[0],
306
0
                      &input->chars[pos], transCharslen, table))) {
307
0
            if (posIncremented && transOpcode == CTO_Correct &&
308
0
                passDoTest(table, pos, input, transOpcode, transRule,
309
0
                    &passCharDots, &passInstructions, &passIC,
310
0
                    &patternMatch, &groupingRule, &groupingOp)) {
311
0
              tryThis = 4;
312
0
              break;
313
0
            }
314
0
          }
315
0
          ruleOffset = transRule->charsnext;
316
0
        }
317
0
        tryThis++;
318
0
      }
319
0
    posIncremented = 1;
320
321
0
    switch (transOpcode) {
322
0
    case CTO_Always:
323
0
      if (output->length >= output->maxlength) goto failure;
324
0
      posMapping[output->length] = pos;
325
0
      output->chars[output->length++] = input->chars[pos++];
326
0
      break;
327
0
    case CTO_Correct: {
328
0
      const InString *inputBefore = input;
329
0
      int posBefore = pos;
330
0
      if (appliedRules != NULL && appliedRulesCount < maxAppliedRules)
331
0
        appliedRules[appliedRulesCount++] = transRule;
332
0
      if (!passDoAction(table, &input, output, posMapping, transOpcode, &transRule,
333
0
            passCharDots, passInstructions, passIC, &pos, patternMatch,
334
0
            cursorPosition, cursorStatus, groupingRule, groupingOp, mode))
335
0
        goto failure;
336
0
      if (input->bufferIndex != inputBefore->bufferIndex &&
337
0
          inputBefore->bufferIndex != origInput->bufferIndex)
338
0
        releaseStringBuffer(inputBefore->bufferIndex);
339
0
      if (pos == posBefore) posIncremented = 0;
340
0
      break;
341
0
    }
342
0
    default:
343
0
      break;
344
0
    }
345
0
  }
346
347
0
  {  // We have to transform typebuf accordingly
348
0
    int k;
349
0
    formtype *typebuf_temp;
350
0
    if ((typebuf_temp = malloc(output->length * sizeof(formtype))) == NULL)
351
0
      _lou_outOfMemory();
352
0
    for (k = 0; k < output->length; k++)
353
      // posMapping will never be < 0 but in theory it could
354
0
      if (posMapping[k] < 0)
355
0
        typebuf_temp[k] = typebuf[0];  // prepend to next
356
0
      else if (posMapping[k] >= input->length)
357
0
        typebuf_temp[k] = typebuf[input->length - 1];  // append to previous
358
0
      else
359
0
        typebuf_temp[k] = typebuf[posMapping[k]];
360
0
    memcpy(typebuf, typebuf_temp, output->length * sizeof(formtype));
361
0
    free(typebuf_temp);
362
0
  }
363
364
0
failure:
365
0
  *realInlen = pos;
366
0
  if (input->bufferIndex != origInput->bufferIndex)
367
0
    releaseStringBuffer(input->bufferIndex);
368
0
  return 1;
369
0
}
370
371
static int
372
matchCurrentInput(
373
0
    const InString *input, int pos, const widechar *passInstructions, int passIC) {
374
0
  int k;
375
0
  int kk = pos;
376
0
  for (k = passIC + 2;
377
0
      ((k < passIC + 2 + passInstructions[passIC + 1]) && (kk < input->length));
378
0
      k++)
379
0
    if (input->chars[kk] == LOU_ENDSEGMENT ||
380
0
        passInstructions[k] != input->chars[kk++])
381
0
      return 0;
382
0
  return 1;
383
0
}
384
385
static int
386
swapTest(int swapIC, int *pos, const TranslationTableHeader *table, const InString *input,
387
0
    const widechar *passInstructions) {
388
0
  int p = *pos;
389
0
  TranslationTableOffset swapRuleOffset;
390
0
  TranslationTableRule *swapRule;
391
0
  swapRuleOffset = (passInstructions[swapIC + 1] << 16) | passInstructions[swapIC + 2];
392
0
  swapRule = (TranslationTableRule *)&table->ruleArea[swapRuleOffset];
393
0
  while (p - *pos < passInstructions[swapIC + 3]) {
394
0
    int test;
395
0
    if (p >= input->length) return 0;
396
0
    if (swapRule->opcode == CTO_SwapDd) {
397
0
      for (test = 1; test < swapRule->charslen; test += 2) {
398
0
        if (input->chars[p] == swapRule->charsdots[test]) break;
399
0
      }
400
0
    } else {
401
0
      for (test = 0; test < swapRule->charslen; test++) {
402
0
        if (input->chars[p] == swapRule->charsdots[test]) break;
403
0
      }
404
0
    }
405
0
    if (test >= swapRule->charslen) return 0;
406
0
    p++;
407
0
  }
408
0
  if (passInstructions[swapIC + 3] == passInstructions[swapIC + 4]) {
409
0
    *pos = p;
410
0
    return 1;
411
0
  }
412
0
  while (p - *pos < passInstructions[swapIC + 4]) {
413
0
    int test;
414
0
    if (p >= input->length) {
415
0
      *pos = p;
416
0
      return 1;
417
0
    }
418
0
    if (swapRule->opcode == CTO_SwapDd) {
419
0
      for (test = 1; test < swapRule->charslen; test += 2) {
420
0
        if (input->chars[p] == swapRule->charsdots[test]) break;
421
0
      }
422
0
    } else {
423
0
      for (test = 0; test < swapRule->charslen; test++) {
424
0
        if (input->chars[p] == swapRule->charsdots[test]) break;
425
0
      }
426
0
    }
427
0
    if (test >= swapRule->charslen) {
428
0
      *pos = p;
429
0
      return 1;
430
0
    }
431
0
    p++;
432
0
  }
433
0
  *pos = p;
434
0
  return 1;
435
0
}
436
437
static int
438
swapReplace(int start, int end, const TranslationTableHeader *table,
439
    const InString *input, OutString *output, int *posMapping,
440
0
    const widechar *passInstructions, int passIC) {
441
0
  TranslationTableOffset swapRuleOffset;
442
0
  TranslationTableRule *swapRule;
443
0
  widechar *replacements;
444
0
  int p;
445
0
  swapRuleOffset = (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2];
446
0
  swapRule = (TranslationTableRule *)&table->ruleArea[swapRuleOffset];
447
0
  replacements = &swapRule->charsdots[swapRule->charslen];
448
0
  for (p = start; p < end; p++) {
449
0
    int rep;
450
0
    int test;
451
0
    int k;
452
0
    if (swapRule->opcode == CTO_SwapDd) {
453
      // A sequence of dot patterns is encoded as the length of the first dot
454
      // pattern (single widechar) followed by the contents of the first dot pattern
455
      // (one widechar per cell) followed by the length of the second dot pattern,
456
      // etc. See the function `compileSwapDots'. Because the third operand of a
457
      // swapdd rule can only contain single-cell dot patterns, the elements at
458
      // index 0, 2, ... are "1" and the elements at index 1, 3, ... are the dot
459
      // patterns.
460
0
      for (test = 0; test * 2 + 1 < swapRule->charslen; test++)
461
0
        if (input->chars[p] == swapRule->charsdots[test * 2 + 1]) break;
462
0
      if (test * 2 == swapRule->charslen) continue;
463
0
    } else {
464
0
      for (test = 0; test < swapRule->charslen; test++)
465
0
        if (input->chars[p] == swapRule->charsdots[test]) break;
466
0
      if (test == swapRule->charslen) continue;
467
0
    }
468
0
    k = 0;
469
0
    for (rep = 0; rep < test; rep++)
470
0
      if (swapRule->opcode == CTO_SwapCc)
471
0
        k++;
472
0
      else
473
0
        k += replacements[k];
474
0
    if (swapRule->opcode == CTO_SwapCc) {
475
0
      if ((output->length + 1) > output->maxlength) return 0;
476
0
      posMapping[output->length] = p;
477
0
      output->chars[output->length++] = replacements[k];
478
0
    } else {
479
0
      int l = replacements[k] - 1;
480
0
      int d = output->length + l;
481
0
      if (d > output->maxlength) return 0;
482
0
      while (--d >= output->length) posMapping[d] = p;
483
      // if length is negative fail
484
0
      int length = l * sizeof(*output->chars);
485
0
      if (length < 0) return 0;
486
0
      memcpy(&output->chars[output->length], &replacements[k + 1], length);
487
0
      output->length += l;
488
0
    }
489
0
  }
490
0
  return 1;
491
0
}
492
493
static int
494
replaceGrouping(const TranslationTableHeader *table, const InString **input,
495
    OutString *output, int transOpcode, int passCharDots,
496
    const widechar *passInstructions, int passIC, int startReplace,
497
0
    TranslationTableRule *groupingRule, widechar groupingOp) {
498
0
  widechar startCharDots = groupingRule->charsdots[2 * passCharDots];
499
0
  widechar endCharDots = groupingRule->charsdots[2 * passCharDots + 1];
500
0
  int p;
501
0
  int level = 0;
502
0
  TranslationTableOffset replaceOffset =
503
0
      passInstructions[passIC + 1] << 16 | (passInstructions[passIC + 2] & 0xff);
504
0
  TranslationTableRule *replaceRule =
505
0
      (TranslationTableRule *)&table->ruleArea[replaceOffset];
506
0
  widechar replaceStart = replaceRule->charsdots[2 * passCharDots];
507
0
  widechar replaceEnd = replaceRule->charsdots[2 * passCharDots + 1];
508
0
  if (groupingOp == pass_groupstart) {
509
0
    for (p = startReplace + 1; p < (*input)->length; p++) {
510
0
      if ((*input)->chars[p] == startCharDots) level--;
511
0
      if ((*input)->chars[p] == endCharDots) level++;
512
0
      if (level == 1) break;
513
0
    }
514
0
    if (p == (*input)->length)
515
0
      return 0;
516
0
    else {
517
      // Create a new string instead of modifying it. This is slightly less
518
      // efficient, but makes the code more readable. Grouping is not a much used
519
      // feature anyway.
520
0
      int idx = getStringBuffer((*input)->length);
521
0
      widechar *chars = stringBufferPool->buffers[idx];
522
0
      memcpy(chars, (*input)->chars, (*input)->length * sizeof(widechar));
523
0
      chars[startReplace] = replaceStart;
524
0
      chars[p] = replaceEnd;
525
0
      static InString stringStore;
526
0
      stringStore = (InString){
527
0
        .chars = chars, .length = (*input)->length, .bufferIndex = idx
528
0
      };
529
0
      *input = &stringStore;
530
0
    }
531
0
  } else {
532
0
    if (transOpcode == CTO_Context) {
533
0
      startCharDots = groupingRule->charsdots[2];
534
0
      endCharDots = groupingRule->charsdots[3];
535
0
      replaceStart = replaceRule->charsdots[2];
536
0
      replaceEnd = replaceRule->charsdots[3];
537
0
    }
538
0
    output->chars[output->length] = replaceEnd;
539
0
    for (p = output->length - 1; p >= 0; p--) {
540
0
      if (output->chars[p] == endCharDots) level--;
541
0
      if (output->chars[p] == startCharDots) level++;
542
0
      if (level == 1) break;
543
0
    }
544
0
    if (p < 0) return 0;
545
0
    output->chars[p] = replaceStart;
546
0
    output->length++;
547
0
  }
548
0
  return 1;
549
0
}
550
551
static int
552
removeGrouping(const InString **input, OutString *output, int passCharDots,
553
0
    int startReplace, TranslationTableRule *groupingRule, widechar groupingOp) {
554
0
  widechar startCharDots = groupingRule->charsdots[2 * passCharDots];
555
0
  widechar endCharDots = groupingRule->charsdots[2 * passCharDots + 1];
556
0
  int p;
557
0
  int level = 0;
558
0
  if (groupingOp == pass_groupstart) {
559
0
    for (p = startReplace + 1; p < (*input)->length; p++) {
560
0
      if ((*input)->chars[p] == startCharDots) level--;
561
0
      if ((*input)->chars[p] == endCharDots) level++;
562
0
      if (level == 1) break;
563
0
    }
564
0
    if (p == (*input)->length)
565
0
      return 0;
566
0
    else {
567
      // Create a new string instead of modifying it. This is slightly less
568
      // efficient, but makes the code more readable. Grouping is not a much used
569
      // feature anyway.
570
0
      int idx = getStringBuffer((*input)->length);
571
0
      widechar *chars = stringBufferPool->buffers[idx];
572
0
      int len = 0;
573
0
      int k;
574
0
      for (k = 0; k < (*input)->length; k++) {
575
0
        if (k == p) continue;
576
0
        chars[len++] = (*input)->chars[k];
577
0
      }
578
0
      static InString stringStore;
579
0
      stringStore = (InString){ .chars = chars, .length = len, .bufferIndex = idx };
580
0
      *input = &stringStore;
581
0
    }
582
0
  } else {
583
0
    for (p = output->length - 1; p >= 0; p--) {
584
0
      if (output->chars[p] == endCharDots) level--;
585
0
      if (output->chars[p] == startCharDots) level++;
586
0
      if (level == 1) break;
587
0
    }
588
0
    if (p < 0) return 0;
589
0
    p++;
590
0
    for (; p < output->length; p++) output->chars[p - 1] = output->chars[p];
591
0
    output->length--;
592
0
  }
593
0
  return 1;
594
0
}
595
596
static int
597
doPassSearch(const TranslationTableHeader *table, const InString *input,
598
    const TranslationTableRule *transRule, int passCharDots, int pos,
599
    const widechar *passInstructions, int passIC, int *searchIC, int *searchPos,
600
0
    TranslationTableRule *groupingRule, widechar groupingOp) {
601
0
  int level = 0;
602
0
  int k, kk;
603
0
  int notOperator = 0;  // whether next operand should be reversed
604
0
  TranslationTableOffset ruleOffset;
605
0
  TranslationTableRule *rule;
606
0
  TranslationTableCharacterAttributes attributes;
607
0
  while (pos < input->length) {
608
0
    *searchIC = passIC + 1;
609
0
    *searchPos = pos;
610
0
    while (*searchIC < transRule->dotslen) {
611
0
      int itsTrue = 1;  // whether we have a match or not
612
0
      if (*searchPos >= input->length) return 0;
613
0
      switch (passInstructions[*searchIC]) {
614
0
      case pass_lookback:
615
0
        *searchPos -= passInstructions[*searchIC + 1];
616
0
        if (*searchPos < 0) {
617
0
          *searchPos = 0;
618
0
          itsTrue = 0;
619
0
        }
620
0
        *searchIC += 2;
621
0
        break;
622
0
      case pass_not:
623
0
        notOperator = !notOperator;
624
0
        (*searchIC)++;
625
0
        continue;
626
0
      case pass_string:
627
0
      case pass_dots:
628
0
        kk = *searchPos;
629
0
        for (k = *searchIC + 2;
630
0
            k < *searchIC + 2 + passInstructions[*searchIC + 1]; k++)
631
0
          if (input->chars[kk] == LOU_ENDSEGMENT ||
632
0
              passInstructions[k] != input->chars[kk++]) {
633
0
            itsTrue = 0;
634
0
            break;
635
0
          }
636
0
        *searchPos += passInstructions[*searchIC + 1];
637
0
        *searchIC += passInstructions[*searchIC + 1] + 2;
638
0
        break;
639
0
      case pass_startReplace:
640
0
        (*searchIC)++;
641
0
        break;
642
0
      case pass_endReplace:
643
0
        (*searchIC)++;
644
0
        break;
645
0
      case pass_attributes:
646
0
        attributes = passInstructions[*searchIC + 1];
647
0
        attributes <<= 16;
648
0
        attributes |= passInstructions[*searchIC + 2];
649
0
        attributes <<= 16;
650
0
        attributes |= passInstructions[*searchIC + 3];
651
0
        attributes <<= 16;
652
0
        attributes |= passInstructions[*searchIC + 4];
653
0
        for (k = 0; k < passInstructions[*searchIC + 5]; k++) {
654
0
          if (input->chars[*searchPos] == LOU_ENDSEGMENT)
655
0
            itsTrue = 0;
656
0
          else {
657
0
            itsTrue = (passCharDots ? getDots(input->chars[(*searchPos)++],
658
0
                              table)
659
0
                        : getChar(input->chars[(*searchPos)++],
660
0
                              table))
661
0
                      ->attributes &
662
0
                attributes;
663
0
            if (notOperator) itsTrue = !itsTrue;
664
0
          }
665
0
          if (!itsTrue) break;
666
0
        }
667
0
        if (itsTrue) {
668
0
          for (k = passInstructions[*searchIC + 5];
669
0
              k < passInstructions[*searchIC + 6]; k++) {
670
0
            if (*searchPos >= input->length) return 0;
671
0
            if (input->chars[*searchPos] == LOU_ENDSEGMENT) {
672
0
              itsTrue = 0;
673
0
              break;
674
0
            }
675
0
            if (!((passCharDots ? getDots(input->chars[*searchPos], table)
676
0
                      : getChar(input->chars[*searchPos], table))
677
0
                      ->attributes &
678
0
                  attributes)) {
679
0
              if (!notOperator) break;
680
0
            } else if (notOperator)
681
0
              break;
682
0
            (*searchPos)++;
683
0
          }
684
0
        }
685
0
        notOperator = 0;
686
0
        *searchIC += 7;
687
0
        break;
688
0
      case pass_groupstart:
689
0
      case pass_groupend:
690
0
        ruleOffset = (passInstructions[*searchIC + 1] << 16) |
691
0
            passInstructions[*searchIC + 2];
692
0
        rule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
693
0
        if (passInstructions[*searchIC] == pass_groupstart)
694
0
          itsTrue = (input->chars[*searchPos] ==
695
0
                    rule->charsdots[2 * passCharDots])
696
0
              ? 1
697
0
              : 0;
698
0
        else
699
0
          itsTrue = (input->chars[*searchPos] ==
700
0
                    rule->charsdots[2 * passCharDots + 1])
701
0
              ? 1
702
0
              : 0;
703
0
        if (groupingRule != NULL && groupingOp == pass_groupstart &&
704
0
            rule == groupingRule) {
705
0
          if (input->chars[*searchPos] == rule->charsdots[2 * passCharDots])
706
0
            level--;
707
0
          else if (input->chars[*searchPos] ==
708
0
              rule->charsdots[2 * passCharDots + 1])
709
0
            level++;
710
0
        }
711
0
        (*searchPos)++;
712
0
        *searchIC += 3;
713
0
        break;
714
0
      case pass_swap:
715
0
        itsTrue = swapTest(*searchIC, searchPos, table, input, passInstructions);
716
0
        *searchIC += 5;
717
0
        break;
718
0
      case pass_endTest:
719
0
        if (itsTrue) {
720
0
          if ((groupingRule && level == 1) || !groupingRule) return 1;
721
0
        }
722
0
        *searchIC = transRule->dotslen;
723
0
        break;
724
0
      default:
725
0
        if (_lou_handlePassVariableTest(passInstructions, searchIC, &itsTrue))
726
0
          break;
727
0
        break;
728
0
      }
729
0
      if ((!notOperator && !itsTrue) || (notOperator && itsTrue)) break;
730
0
      notOperator = 0;
731
0
    }
732
0
    pos++;
733
0
  }
734
0
  return 0;
735
0
}
736
737
static int
738
passDoTest(const TranslationTableHeader *table, int pos, const InString *input,
739
    int transOpcode, const TranslationTableRule *transRule, int *passCharDots,
740
    widechar const **passInstructions, int *passIC, PassRuleMatch *match,
741
0
    TranslationTableRule **groupingRule, widechar *groupingOp) {
742
0
  int searchIC, searchPos;
743
0
  int k;
744
0
  int notOperator = 0;  // whether next operand should be reversed
745
0
  TranslationTableOffset ruleOffset = 0;
746
0
  TranslationTableRule *rule = NULL;
747
0
  TranslationTableCharacterAttributes attributes = 0;
748
0
  int startMatch = pos;
749
0
  int endMatch = pos;
750
0
  int startReplace = -1;
751
0
  int endReplace = -1;
752
0
  *groupingRule = NULL;
753
0
  *passInstructions = &transRule->charsdots[transRule->charslen];
754
0
  *passIC = 0;
755
0
  if (transOpcode == CTO_Context || transOpcode == CTO_Correct)
756
0
    *passCharDots = 0;
757
0
  else
758
0
    *passCharDots = 1;
759
0
  while (*passIC < transRule->dotslen) {
760
0
    int itsTrue = 1;  // whether we have a match or not
761
    // check if `pos` is within the input string,
762
    // maybe a unsigned type would be better to omit negative values
763
0
    if (pos > input->length || pos < 0) return 0;
764
0
    switch ((*passInstructions)[*passIC]) {
765
0
    case pass_first:
766
0
      if (pos != 0) itsTrue = 0;
767
0
      (*passIC)++;
768
0
      break;
769
0
    case pass_last:
770
0
      if (pos != input->length) itsTrue = 0;
771
0
      (*passIC)++;
772
0
      break;
773
0
    case pass_lookback:
774
0
      pos -= (*passInstructions)[*passIC + 1];
775
0
      if (pos < 0) {
776
0
        searchPos = 0;
777
0
        itsTrue = 0;
778
0
      }
779
0
      *passIC += 2;
780
0
      break;
781
0
    case pass_not:
782
0
      notOperator = !notOperator;
783
0
      (*passIC)++;
784
0
      continue;
785
0
    case pass_string:
786
0
    case pass_dots:
787
0
      itsTrue = matchCurrentInput(input, pos, *passInstructions, *passIC);
788
0
      pos += (*passInstructions)[*passIC + 1];
789
0
      *passIC += (*passInstructions)[*passIC + 1] + 2;
790
0
      break;
791
0
    case pass_startReplace:
792
0
      startReplace = pos;
793
0
      (*passIC)++;
794
0
      break;
795
0
    case pass_endReplace:
796
0
      endReplace = pos;
797
0
      (*passIC)++;
798
0
      break;
799
0
    case pass_attributes:
800
0
      attributes = (*passInstructions)[*passIC + 1];
801
0
      attributes <<= 16;
802
0
      attributes |= (*passInstructions)[*passIC + 2];
803
0
      attributes <<= 16;
804
0
      attributes |= (*passInstructions)[*passIC + 3];
805
0
      attributes <<= 16;
806
0
      attributes |= (*passInstructions)[*passIC + 4];
807
0
      for (k = 0; k < (*passInstructions)[*passIC + 5]; k++) {
808
0
        if (pos >= input->length) {
809
0
          itsTrue = 0;
810
0
          break;
811
0
        }
812
0
        if (input->chars[pos] == LOU_ENDSEGMENT) {
813
0
          itsTrue = 0;
814
0
          break;
815
0
        }
816
0
        if (!((*passCharDots ? getDots(input->chars[pos], table)
817
0
                   : getChar(input->chars[pos], table))
818
0
                  ->attributes &
819
0
              attributes)) {
820
0
          if (!notOperator) {
821
0
            itsTrue = 0;
822
0
            break;
823
0
          }
824
0
        } else if (notOperator) {
825
0
          itsTrue = 0;
826
0
          break;
827
0
        }
828
0
        pos++;
829
0
      }
830
0
      if (itsTrue) {
831
0
        for (k = (*passInstructions)[*passIC + 5];
832
0
            k < (*passInstructions)[*passIC + 6] && pos < input->length;
833
0
            k++) {
834
0
          if (input->chars[pos] == LOU_ENDSEGMENT) {
835
0
            itsTrue = 0;
836
0
            break;
837
0
          }
838
0
          if (!((*passCharDots ? getDots(input->chars[pos], table)
839
0
                     : getChar(input->chars[pos], table))
840
0
                    ->attributes &
841
0
                attributes)) {
842
0
            if (!notOperator) break;
843
0
          } else if (notOperator)
844
0
            break;
845
0
          pos++;
846
0
        }
847
0
      }
848
0
      notOperator = 0;
849
0
      *passIC += 7;
850
0
      break;
851
0
    case pass_groupstart:
852
0
    case pass_groupend:
853
0
      ruleOffset = ((*passInstructions)[*passIC + 1] << 16) |
854
0
          (*passInstructions)[*passIC + 2];
855
0
      rule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
856
0
      if (*passIC == 0 ||
857
0
          (*passIC > 0 &&
858
0
              (*passInstructions)[*passIC - 1] == pass_startReplace)) {
859
0
        *groupingRule = rule;
860
0
        *groupingOp = (*passInstructions)[*passIC];
861
0
      }
862
0
      if ((*passInstructions)[*passIC] == pass_groupstart)
863
0
        itsTrue =
864
0
            (input->chars[pos] == rule->charsdots[2 * *passCharDots]) ? 1 : 0;
865
0
      else
866
0
        itsTrue = (input->chars[pos] == rule->charsdots[2 * *passCharDots + 1])
867
0
            ? 1
868
0
            : 0;
869
0
      pos++;
870
0
      *passIC += 3;
871
0
      break;
872
0
    case pass_swap:
873
0
      itsTrue = swapTest(*passIC, &pos, table, input, *passInstructions);
874
0
      *passIC += 5;
875
0
      break;
876
0
    case pass_search:
877
0
      itsTrue = doPassSearch(table, input, transRule, *passCharDots, pos,
878
0
          *passInstructions, *passIC, &searchIC, &searchPos, *groupingRule,
879
0
          *groupingOp);
880
0
      if ((!notOperator && !itsTrue) || (notOperator && itsTrue)) return 0;
881
0
      *passIC = searchIC;
882
0
      pos = searchPos;
883
0
    case pass_endTest:
884
0
      (*passIC)++;
885
0
      endMatch = pos;
886
0
      if (startReplace == -1) {
887
0
        startReplace = startMatch;
888
0
        endReplace = endMatch;
889
0
      }
890
      // Check whetehr endReplace != -1 while startReplace! = -1
891
0
      if (startReplace < startMatch || endReplace == -1)
892
0
        return 0;
893
0
      else {
894
0
        *match = (PassRuleMatch){ .startMatch = startMatch,
895
0
          .startReplace = startReplace,
896
0
          .endReplace = endReplace,
897
0
          .endMatch = endMatch };
898
0
        return 1;
899
0
      }
900
0
      break;
901
0
    default:
902
0
      if (_lou_handlePassVariableTest(*passInstructions, passIC, &itsTrue)) break;
903
0
      return 0;
904
0
    }
905
0
    if ((!notOperator && !itsTrue) || (notOperator && itsTrue)) return 0;
906
0
    notOperator = 0;
907
0
  }
908
0
  return 0;
909
0
}
910
911
static int
912
copyCharacters(int from, int to, const TranslationTableHeader *table,
913
    const InString *input, OutString *output, int *posMapping, int transOpcode,
914
0
    int *cursorPosition, int *cursorStatus, int mode) {
915
0
  if (transOpcode == CTO_Context) {
916
0
    while (from < to) {
917
0
      if (!putCharacter(input->chars[from], table, from, input, output, posMapping,
918
0
            cursorPosition, cursorStatus, mode))
919
0
        return 0;
920
0
      from++;
921
0
    }
922
0
  } else {
923
0
    if (to > from) {
924
0
      if ((output->length + to - from) > output->maxlength) return 0;
925
0
      while (to > from) {
926
0
        posMapping[output->length] = from;
927
0
        output->chars[output->length] = input->chars[from];
928
0
        output->length++;
929
0
        from++;
930
0
      }
931
0
    }
932
0
  }
933
934
0
  return 1;
935
0
}
936
937
static int
938
passDoAction(const TranslationTableHeader *table, const InString **input,
939
    OutString *output, int *posMapping, int transOpcode,
940
    const TranslationTableRule **transRule, int passCharDots,
941
    const widechar *passInstructions, int passIC, int *pos, PassRuleMatch match,
942
    int *cursorPosition, int *cursorStatus, TranslationTableRule *groupingRule,
943
0
    widechar groupingOp, int mode) {
944
0
  int k;
945
0
  TranslationTableOffset ruleOffset = 0;
946
0
  TranslationTableRule *rule = NULL;
947
0
  int destStartMatch = output->length;
948
0
  int destStartReplace;
949
0
  int newPos = match.endReplace;
950
951
0
  if (!copyCharacters(match.startMatch, match.startReplace, table, *input, output,
952
0
        posMapping, transOpcode, cursorPosition, cursorStatus, mode))
953
0
    return 0;
954
0
  destStartReplace = output->length;
955
956
0
  while (passIC < (*transRule)->dotslen) switch (passInstructions[passIC]) {
957
0
    case pass_string:
958
0
    case pass_dots:
959
0
      if ((output->length + passInstructions[passIC + 1]) > output->maxlength)
960
0
        return 0;
961
0
      for (k = 0; k < passInstructions[passIC + 1]; ++k)
962
0
        posMapping[output->length + k] = match.startReplace;
963
0
      memcpy(&output->chars[output->length], &passInstructions[passIC + 2],
964
0
          passInstructions[passIC + 1] * CHARSIZE);
965
0
      output->length += passInstructions[passIC + 1];
966
0
      passIC += passInstructions[passIC + 1] + 2;
967
0
      break;
968
0
    case pass_groupstart:
969
0
      ruleOffset =
970
0
          (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2];
971
0
      rule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
972
0
      posMapping[output->length] = match.startMatch;
973
0
      output->chars[output->length++] = rule->charsdots[2 * passCharDots];
974
0
      passIC += 3;
975
0
      break;
976
0
    case pass_groupend:
977
0
      ruleOffset =
978
0
          (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2];
979
0
      rule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
980
0
      posMapping[output->length] = match.startMatch;
981
0
      output->chars[output->length++] = rule->charsdots[2 * passCharDots + 1];
982
0
      passIC += 3;
983
0
      break;
984
0
    case pass_swap:
985
0
      if (!swapReplace(match.startReplace, match.endReplace, table, *input, output,
986
0
            posMapping, passInstructions, passIC))
987
0
        return 0;
988
0
      passIC += 3;
989
0
      break;
990
0
    case pass_groupreplace:
991
0
      if (!groupingRule ||
992
0
          !replaceGrouping(table, input, output, transOpcode, passCharDots,
993
0
              passInstructions, passIC, match.startReplace, groupingRule,
994
0
              groupingOp))
995
0
        return 0;
996
0
      passIC += 3;
997
0
      break;
998
0
    case pass_omit:
999
0
      if (groupingRule)
1000
0
        removeGrouping(input, output, passCharDots, match.startReplace,
1001
0
            groupingRule, groupingOp);
1002
0
      passIC++;
1003
0
      break;
1004
0
    case pass_copy: {
1005
0
      int count = destStartReplace - destStartMatch;
1006
0
      if (count > 0) {
1007
0
        if (destStartReplace + count > output->maxlength) return 0;
1008
0
        memmove(&output->chars[destStartMatch], &output->chars[destStartReplace],
1009
0
            count * sizeof(*output->chars));
1010
0
        output->length -= count;
1011
0
        destStartReplace = destStartMatch;
1012
0
      }
1013
0
    }
1014
1015
0
      if (!copyCharacters(match.startReplace, match.endReplace, table, *input,
1016
0
            output, posMapping, transOpcode, cursorPosition, cursorStatus,
1017
0
            mode))
1018
0
        return 0;
1019
0
      newPos = match.endMatch;
1020
0
      passIC++;
1021
0
      break;
1022
0
    default:
1023
0
      if (_lou_handlePassVariableAction(passInstructions, &passIC)) break;
1024
0
      return 0;
1025
0
    }
1026
0
  *pos = newPos;
1027
0
  return 1;
1028
0
}
1029
1030
static void
1031
passSelectRule(const TranslationTableHeader *table, int pos, int currentPass,
1032
    const InString *input, int *transOpcode, const TranslationTableRule **transRule,
1033
    int *transCharslen, int *passCharDots, widechar const **passInstructions,
1034
    int *passIC, PassRuleMatch *match, TranslationTableRule **groupingRule,
1035
0
    widechar *groupingOp) {
1036
0
  if (!findForPassRule(table, pos, currentPass, input, transOpcode, transRule,
1037
0
        transCharslen, passCharDots, passInstructions, passIC, match,
1038
0
        groupingRule, groupingOp)) {
1039
0
    *transOpcode = CTO_Always;
1040
0
  }
1041
0
}
1042
1043
static int
1044
translatePass(const TranslationTableHeader *table, int currentPass, const InString *input,
1045
    OutString *output, int *posMapping, int *realInlen, int *cursorPosition,
1046
0
    int *cursorStatus, int mode) {
1047
0
  int pos;
1048
0
  int transOpcode;
1049
0
  const TranslationTableRule *transRule;
1050
0
  int transCharslen;
1051
0
  int passCharDots;
1052
0
  const widechar *passInstructions;
1053
0
  int passIC; /* Instruction counter */
1054
0
  PassRuleMatch patternMatch;
1055
0
  TranslationTableRule *groupingRule;
1056
0
  widechar groupingOp;
1057
0
  const InString *origInput = input;
1058
0
  pos = output->length = 0;
1059
0
  int posIncremented = 1;
1060
0
  _lou_resetPassVariables();
1061
0
  while (pos < input->length) { /* the main multipass translation loop */
1062
    // check posIncremented to avoid endless loop
1063
0
    if (!posIncremented)
1064
0
      transOpcode = CTO_Always;
1065
0
    else
1066
0
      passSelectRule(table, pos, currentPass, input, &transOpcode, &transRule,
1067
0
          &transCharslen, &passCharDots, &passInstructions, &passIC,
1068
0
          &patternMatch, &groupingRule, &groupingOp);
1069
0
    posIncremented = 1;
1070
0
    switch (transOpcode) {
1071
0
    case CTO_Context:
1072
0
    case CTO_Pass2:
1073
0
    case CTO_Pass3:
1074
0
    case CTO_Pass4: {
1075
0
      const InString *inputBefore = input;
1076
0
      int posBefore = pos;
1077
0
      if (appliedRules != NULL && appliedRulesCount < maxAppliedRules)
1078
0
        appliedRules[appliedRulesCount++] = transRule;
1079
0
      if (!passDoAction(table, &input, output, posMapping, transOpcode, &transRule,
1080
0
            passCharDots, passInstructions, passIC, &pos, patternMatch,
1081
0
            cursorPosition, cursorStatus, groupingRule, groupingOp, mode))
1082
0
        goto failure;
1083
0
      if (input->bufferIndex != inputBefore->bufferIndex &&
1084
0
          inputBefore->bufferIndex != origInput->bufferIndex)
1085
0
        releaseStringBuffer(inputBefore->bufferIndex);
1086
0
      if (pos == posBefore) posIncremented = 0;
1087
0
      break;
1088
0
    }
1089
0
    case CTO_Always:
1090
0
      if ((output->length + 1) > output->maxlength) goto failure;
1091
0
      posMapping[output->length] = pos;
1092
0
      output->chars[output->length++] = input->chars[pos++];
1093
0
      break;
1094
0
    default:
1095
0
      goto failure;
1096
0
    }
1097
0
  }
1098
0
failure:
1099
0
  if (pos < input->length) {
1100
0
    while (checkDotsAttr(input->chars[pos], CTC_Space, table))
1101
0
      if (++pos == input->length) break;
1102
0
  }
1103
0
  *realInlen = pos;
1104
0
  if (input->bufferIndex != origInput->bufferIndex)
1105
0
    releaseStringBuffer(input->bufferIndex);
1106
0
  return 1;
1107
0
}
1108
1109
0
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
1110
1111
static int
1112
translateString(const TranslationTableHeader *table, int mode, int currentPass,
1113
    const InString *input, OutString *output, int *posMapping, formtype *typebuf,
1114
    unsigned char *srcSpacing, unsigned char *destSpacing, unsigned int *wordBuffer,
1115
    EmphasisInfo *emphasisBuffer, int haveEmphasis, int *realInlen,
1116
    int *cursorPosition, int *cursorStatus, int compbrlStart, int compbrlEnd);
1117
1118
int EXPORT_CALL
1119
lou_translateString(const char *tableList, const widechar *inbufx, int *inlen,
1120
0
    widechar *outbuf, int *outlen, formtype *typeform, char *spacing, int mode) {
1121
0
  return lou_translate(tableList, inbufx, inlen, outbuf, outlen, typeform, spacing,
1122
0
      NULL, NULL, NULL, mode);
1123
0
}
1124
1125
int EXPORT_CALL
1126
lou_translate(const char *tableList, const widechar *inbufx, int *inlen, widechar *outbuf,
1127
    int *outlen, formtype *typeform, char *spacing, int *outputPos, int *inputPos,
1128
0
    int *cursorPos, int mode) {
1129
0
  return _lou_translate(tableList, tableList, inbufx, inlen, outbuf, outlen, typeform,
1130
0
      spacing, outputPos, inputPos, cursorPos, mode, NULL, NULL);
1131
0
}
1132
1133
int EXPORT_CALL
1134
_lou_translate(const char *tableList, const char *displayTableList,
1135
    const widechar *inbufx, int *inlen, widechar *outbuf, int *outlen,
1136
    formtype *typeform, char *spacing, int *outputPos, int *inputPos, int *cursorPos,
1137
0
    int mode, const TranslationTableRule **rules, int *rulesLen) {
1138
  // int i;
1139
  // for(i = 0; i < *inlen; i++)
1140
  // {
1141
  //  outbuf[i] = inbufx[i];
1142
  //  if(inputPos)
1143
  //    inputPos[i] = i;
1144
  //  if(outputPos)
1145
  //    outputPos[i] = i;
1146
  // }
1147
  // *inlen = i;
1148
  // *outlen = i;
1149
  // return 1;
1150
0
  const TranslationTableHeader *table;
1151
0
  const DisplayTableHeader *displayTable;
1152
0
  InString input;
1153
0
  OutString output;
1154
  // posMapping contains position mapping info between the initial input and the output
1155
  // of the current pass. It is 1 longer than the output. The values are monotonically
1156
  // increasing and can range between -1 and the (consumed) input length. At the end the
1157
  // position info is passed to the user as an inputPos and outputPos array. inputPos
1158
  // has the length of the final output and has values ranging from 0 to inlen-1.
1159
  // outputPos has the length of the (consumed) initial input and has values ranging
1160
  // from 0 to outlen-1.
1161
0
  int *posMapping;
1162
0
  int *posMapping1;
1163
0
  int *posMapping2;
1164
0
  int *posMapping3;
1165
0
  formtype *typebuf;
1166
0
  unsigned char *srcSpacing;
1167
0
  unsigned char *destSpacing;
1168
0
  unsigned int *wordBuffer;
1169
0
  EmphasisInfo *emphasisBuffer;
1170
0
  int cursorPosition;
1171
0
  int cursorStatus;
1172
0
  int haveEmphasis;
1173
0
  int compbrlStart = -1;
1174
0
  int compbrlEnd = -1;
1175
0
  int k;
1176
0
  int goodTrans = 1;
1177
0
  if (tableList == NULL || inbufx == NULL || inlen == NULL || outbuf == NULL ||
1178
0
      outlen == NULL)
1179
0
    return 0;
1180
0
  _lou_logMessage(LOU_LOG_ALL, "Performing translation: tableList=%s, inlen=%d",
1181
0
      tableList, *inlen);
1182
0
  _lou_logWidecharBuf(LOU_LOG_ALL, "Inbuf=", inbufx, *inlen);
1183
1184
0
  if (!_lou_isValidMode(mode))
1185
0
    _lou_logMessage(LOU_LOG_ERROR, "Invalid mode parameter: %d", mode);
1186
1187
0
  if (displayTableList == NULL) displayTableList = tableList;
1188
0
  _lou_getTable(tableList, displayTableList, &table, &displayTable);
1189
0
  if (table == NULL || *inlen < 0 || *outlen < 0) return 0;
1190
0
  k = 0;
1191
0
  while (k < *inlen && inbufx[k]) k++;
1192
0
  input = (InString){ .chars = inbufx, .length = k, .bufferIndex = -1 };
1193
0
  haveEmphasis = 0;
1194
0
  if (!(typebuf = _lou_allocMem(alloc_typebuf, 0, input.length, *outlen))) return 0;
1195
0
  if (typeform != NULL) {
1196
0
    for (k = 0; k < input.length; k++) {
1197
0
      typebuf[k] = typeform[k];
1198
0
      if (typebuf[k] & EMPHASIS) haveEmphasis = 1;
1199
0
    }
1200
0
  } else
1201
0
    memset(typebuf, 0, input.length * sizeof(formtype));
1202
1203
0
  if (!(spacing == NULL || *spacing == 'X'))
1204
0
    srcSpacing = (unsigned char *)spacing;
1205
0
  else
1206
0
    srcSpacing = NULL;
1207
0
  if (outputPos != NULL)
1208
0
    for (k = 0; k < input.length; k++) outputPos[k] = -1;
1209
0
  if (cursorPos != NULL && *cursorPos >= 0) {
1210
0
    cursorStatus = 0;
1211
0
    cursorPosition = *cursorPos;
1212
0
    if ((mode & (compbrlAtCursor | compbrlLeftCursor))) {
1213
0
      compbrlStart = cursorPosition;
1214
0
      if (checkCharAttr(input.chars[compbrlStart], CTC_Space, table))
1215
        /* It would have been simpler to just set compbrlStart and compbrlEnd to
1216
         * -1 (i.e. disable compbrlAtCursor/compbrlLeftCursor mode) if the cursor
1217
         * is set on a space. But maybe there are cases where a space in computer
1218
         * braille does not map to a blank cell, and the user expects to see the
1219
         * computer braille representation when the space is under the cursor, so
1220
         * we better leave it as it is.
1221
         */
1222
0
        compbrlEnd = compbrlStart + 1;
1223
0
      else {
1224
0
        while (compbrlStart >= 0 &&
1225
0
            !checkCharAttr(input.chars[compbrlStart], CTC_Space, table))
1226
0
          compbrlStart--;
1227
0
        compbrlStart++;
1228
0
        compbrlEnd = cursorPosition;
1229
0
        if (!(mode & compbrlLeftCursor))
1230
0
          while (compbrlEnd < input.length &&
1231
0
              !checkCharAttr(input.chars[compbrlEnd], CTC_Space, table))
1232
0
            compbrlEnd++;
1233
0
      }
1234
0
    }
1235
0
  } else {
1236
0
    cursorPosition = -1;
1237
0
    cursorStatus = 1; /* so it won't check cursor position */
1238
0
  }
1239
0
  if (!(posMapping1 = _lou_allocMem(alloc_posMapping1, 0, input.length, *outlen)))
1240
0
    return 0;
1241
0
  if (table->numPasses > 1 || table->corrections) {
1242
0
    if (!(posMapping2 = _lou_allocMem(alloc_posMapping2, 0, input.length, *outlen)))
1243
0
      return 0;
1244
0
    if (!(posMapping3 = _lou_allocMem(alloc_posMapping3, 0, input.length, *outlen)))
1245
0
      return 0;
1246
0
  }
1247
0
  if (srcSpacing != NULL) {
1248
0
    if (!(destSpacing = _lou_allocMem(alloc_destSpacing, 0, input.length, *outlen)))
1249
0
      goodTrans = 0;
1250
0
    else
1251
0
      memset(destSpacing, '*', *outlen);
1252
0
  } else
1253
0
    destSpacing = NULL;
1254
0
  appliedRulesCount = 0;
1255
0
  if (rules != NULL && rulesLen != NULL) {
1256
0
    appliedRules = rules;
1257
0
    maxAppliedRules = *rulesLen;
1258
0
  } else {
1259
0
    appliedRules = NULL;
1260
0
    maxAppliedRules = 0;
1261
0
  }
1262
0
  {
1263
0
    int idx;
1264
0
    if (!stringBufferPool) initStringBufferPool();
1265
0
    for (idx = 0; idx < stringBufferPool->size; idx++) releaseStringBuffer(idx);
1266
0
    idx = getStringBuffer(*outlen);
1267
0
    output = (OutString){ .chars = stringBufferPool->buffers[idx],
1268
0
      .maxlength = *outlen,
1269
0
      .length = 0,
1270
0
      .bufferIndex = idx };
1271
0
  }
1272
0
  posMapping = posMapping1;
1273
1274
0
  int currentPass = table->corrections ? 0 : 1;
1275
0
  int *passPosMapping = posMapping;
1276
0
  while (1) {
1277
0
    int realInlen;
1278
0
    switch (currentPass) {
1279
0
    case 0:
1280
0
      goodTrans = makeCorrections(table, &input, &output, passPosMapping, typebuf,
1281
0
          &realInlen, &cursorPosition, &cursorStatus, mode);
1282
0
      break;
1283
0
    case 1: {
1284
0
      if (!(wordBuffer = _lou_allocMem(alloc_wordBuffer, 0, input.length, *outlen)))
1285
0
        return 0;
1286
0
      if (!(emphasisBuffer = _lou_allocMem(
1287
0
              alloc_emphasisBuffer, 0, input.length, *outlen)))
1288
0
        return 0;
1289
0
      goodTrans = translateString(table, mode, currentPass, &input, &output,
1290
0
          passPosMapping, typebuf, srcSpacing, destSpacing, wordBuffer,
1291
0
          emphasisBuffer, haveEmphasis, &realInlen, &cursorPosition,
1292
0
          &cursorStatus, compbrlStart, compbrlEnd);
1293
0
      break;
1294
0
    }
1295
0
    default:
1296
0
      goodTrans = translatePass(table, currentPass, &input, &output, passPosMapping,
1297
0
          &realInlen, &cursorPosition, &cursorStatus, mode);
1298
0
      break;
1299
0
    }
1300
0
    passPosMapping[output.length] = realInlen;
1301
0
    if (passPosMapping == posMapping) {
1302
0
      passPosMapping = posMapping2;
1303
0
    } else {
1304
0
      int *prevPosMapping = posMapping3;
1305
0
      memcpy((int *)prevPosMapping, posMapping, (*outlen + 1) * sizeof(int));
1306
0
      for (k = 0; k <= output.length; k++)
1307
0
        if (passPosMapping[k] < 0)
1308
0
          posMapping[k] = prevPosMapping[0];
1309
0
        else
1310
0
          posMapping[k] = prevPosMapping[passPosMapping[k]];
1311
0
    }
1312
0
    currentPass++;
1313
0
    if (currentPass <= table->numPasses && goodTrans) {
1314
0
      int idx;
1315
0
      releaseStringBuffer(input.bufferIndex);
1316
0
      input = (InString){ .chars = output.chars,
1317
0
        .length = output.length,
1318
0
        .bufferIndex = output.bufferIndex };
1319
0
      idx = getStringBuffer(*outlen);
1320
0
      output = (OutString){ .chars = stringBufferPool->buffers[idx],
1321
0
        .maxlength = *outlen,
1322
0
        .length = 0,
1323
0
        .bufferIndex = idx };
1324
0
      continue;
1325
0
    }
1326
0
    break;
1327
0
  }
1328
0
  if (goodTrans) {
1329
0
    for (k = 0; k < output.length; k++) {
1330
0
      if (typeform != NULL) {
1331
0
        if ((output.chars[k] & (LOU_DOT_7 | LOU_DOT_8)))
1332
0
          typeform[k] = '8';
1333
0
        else
1334
0
          typeform[k] = '0';
1335
0
      }
1336
0
      if ((mode & dotsIO)) {
1337
0
        if ((mode & ucBrl))
1338
0
          outbuf[k] = ((output.chars[k] & 0xff) | LOU_ROW_BRAILLE);
1339
0
        else
1340
0
          outbuf[k] = output.chars[k];
1341
0
      } else {
1342
0
        outbuf[k] = _lou_getCharForDots(output.chars[k], displayTable);
1343
0
        if (!outbuf[k]) {
1344
          // assume that if NUL character is returned, it's because the display
1345
          // table has no mapping for the dot pattern (not because it maps to
1346
          // NUL)
1347
0
          _lou_logMessage(LOU_LOG_ERROR,
1348
0
              "%s: no mapping for dot pattern %s in display table",
1349
0
              displayTableList, _lou_showDots(&output.chars[k], 1));
1350
0
          return 0;
1351
0
        }
1352
0
      }
1353
0
    }
1354
0
    *inlen = posMapping[output.length];
1355
0
    *outlen = output.length;
1356
    // Compute inputPos and outputPos from posMapping. The value at the last index of
1357
    // posMapping is currectly not used.
1358
0
    if (inputPos != NULL) {
1359
0
      for (k = 0; k < *outlen; k++)
1360
0
        if (posMapping[k] < 0)
1361
0
          inputPos[k] = 0;
1362
0
        else if (posMapping[k] > *inlen - 1)
1363
0
          inputPos[k] = *inlen - 1;
1364
0
        else
1365
0
          inputPos[k] = posMapping[k];
1366
0
    }
1367
0
    if (outputPos != NULL) {
1368
0
      int inpos = -1;
1369
0
      int outpos = -1;
1370
0
      for (k = 0; k < *outlen; k++)
1371
0
        if (posMapping[k] > inpos) {
1372
0
          while (inpos < posMapping[k]) {
1373
0
            if (inpos >= 0 && inpos < *inlen)
1374
0
              outputPos[inpos] = outpos < 0 ? 0 : outpos;
1375
0
            inpos++;
1376
0
          }
1377
0
          outpos = k;
1378
0
        }
1379
0
      if (inpos < 0) inpos = 0;
1380
0
      while (inpos < *inlen) outputPos[inpos++] = outpos;
1381
0
    }
1382
0
  }
1383
0
  if (destSpacing != NULL) {
1384
0
    memcpy(srcSpacing, destSpacing, input.length);
1385
0
    srcSpacing[input.length] = 0;
1386
0
  }
1387
0
  if (cursorPos != NULL && *cursorPos != -1) {
1388
0
    if (outputPos != NULL)
1389
0
      *cursorPos = outputPos[*cursorPos];
1390
0
    else
1391
0
      *cursorPos = cursorPosition;
1392
0
  }
1393
0
  if (rulesLen != NULL) *rulesLen = appliedRulesCount;
1394
0
  _lou_logMessage(LOU_LOG_ALL, "Translation complete: outlen=%d", *outlen);
1395
0
  _lou_logWidecharBuf(LOU_LOG_ALL, "Outbuf=", (const widechar *)outbuf, *outlen);
1396
1397
0
  return goodTrans;
1398
0
}
1399
1400
int EXPORT_CALL
1401
lou_translatePrehyphenated(const char *tableList, const widechar *inbufx, int *inlen,
1402
    widechar *outbuf, int *outlen, formtype *typeform, char *spacing, int *outputPos,
1403
    int *inputPos, int *cursorPos, char *inputHyphens, char *outputHyphens,
1404
0
    int mode) {
1405
0
  int rv = 1;
1406
0
  int *alloc_inputPos = NULL;
1407
0
  if (inputHyphens != NULL) {
1408
0
    if (outputHyphens == NULL) return 0;
1409
0
    if (inputPos == NULL) {
1410
0
      if ((alloc_inputPos = malloc(*outlen * sizeof(int))) == NULL)
1411
0
        _lou_outOfMemory();
1412
0
      inputPos = alloc_inputPos;
1413
0
    }
1414
0
  }
1415
0
  if (lou_translate(tableList, inbufx, inlen, outbuf, outlen, typeform, spacing,
1416
0
        outputPos, inputPos, cursorPos, mode)) {
1417
0
    if (inputHyphens != NULL) {
1418
0
      int inpos = 0;
1419
0
      int outpos;
1420
0
      for (outpos = 0; outpos < *outlen; outpos++) {
1421
0
        int new_inpos = inputPos[outpos];
1422
0
        if (new_inpos < inpos) {
1423
0
          rv = 0;
1424
0
          break;
1425
0
        }
1426
0
        if (new_inpos > inpos)
1427
0
          outputHyphens[outpos] = inputHyphens[new_inpos];
1428
0
        else
1429
0
          outputHyphens[outpos] = '0';
1430
0
        inpos = new_inpos;
1431
0
      }
1432
0
    }
1433
0
  }
1434
0
  if (alloc_inputPos != NULL) free(alloc_inputPos);
1435
0
  return rv;
1436
0
}
1437
1438
static int
1439
hyphenateWord(const widechar *word, int wordSize, char *hyphens,
1440
0
    const TranslationTableHeader *table) {
1441
0
  widechar *prepWord;
1442
0
  int i, k, limit;
1443
0
  int stateNum;
1444
0
  widechar ch;
1445
0
  HyphenationState *statesArray =
1446
0
      (HyphenationState *)&table->ruleArea[table->hyphenStatesArray];
1447
0
  HyphenationState *currentState;
1448
0
  HyphenationTrans *transitionsArray;
1449
0
  char *hyphenPattern;
1450
0
  int patternOffset;
1451
0
  if (!table->hyphenStatesArray || (wordSize + 3) > MAXSTRING) return 0;
1452
0
  prepWord = (widechar *)calloc(wordSize + 3, sizeof(widechar));
1453
  /* prepWord is of the format ".hello."
1454
   * hyphens is the length of the word "hello" "00000" */
1455
0
  prepWord[0] = '.';
1456
0
  for (i = 0; i < wordSize; i++) {
1457
0
    prepWord[i + 1] = toLowercase(table, getChar(word[i], table));
1458
0
    hyphens[i] = '0';
1459
0
  }
1460
0
  prepWord[wordSize + 1] = '.';
1461
1462
  /* now, run the finite state machine */
1463
0
  stateNum = 0;
1464
1465
  // we need to walk all of ".hello."
1466
0
  for (i = 0; i < wordSize + 2; i++) {
1467
0
    ch = prepWord[i];
1468
0
    while (1) {
1469
0
      if (stateNum == 0xffff) {
1470
0
        stateNum = 0;
1471
0
        goto nextLetter;
1472
0
      }
1473
0
      currentState = &statesArray[stateNum];
1474
0
      if (currentState->trans.offset) {
1475
0
        transitionsArray =
1476
0
            (HyphenationTrans *)&table->ruleArea[currentState->trans.offset];
1477
0
        for (k = 0; k < currentState->numTrans; k++) {
1478
0
          if (transitionsArray[k].ch == ch) {
1479
0
            stateNum = transitionsArray[k].newState;
1480
0
            goto stateFound;
1481
0
          }
1482
0
        }
1483
0
      }
1484
0
      stateNum = currentState->fallbackState;
1485
0
    }
1486
0
  stateFound:
1487
0
    currentState = &statesArray[stateNum];
1488
0
    if (currentState->hyphenPattern) {
1489
0
      hyphenPattern = (char *)&table->ruleArea[currentState->hyphenPattern];
1490
0
      patternOffset = i + 1 - (int)strlen(hyphenPattern);
1491
1492
      /* Need to ensure that we don't overrun hyphens,
1493
       * in some cases hyphenPattern is longer than the remaining letters,
1494
       * and if we write out all of it we would have overshot our buffer. */
1495
0
      limit = MIN((int)strlen(hyphenPattern), wordSize - patternOffset);
1496
0
      for (k = 0; k < limit; k++) {
1497
0
        if (hyphens[patternOffset + k] < hyphenPattern[k])
1498
0
          hyphens[patternOffset + k] = hyphenPattern[k];
1499
0
      }
1500
0
    }
1501
0
  nextLetter:;
1502
0
  }
1503
0
  hyphens[wordSize] = 0;
1504
0
  free(prepWord);
1505
0
  return 1;
1506
0
}
1507
1508
static int
1509
doCompTrans(int start, int end, const TranslationTableHeader *table, int *pos,
1510
    const InString *input, OutString *output, int *posMapping,
1511
    EmphasisInfo *emphasisBuffer, const TranslationTableRule **transRule,
1512
    int *cursorPosition, int *cursorStatus, int mode);
1513
1514
// The `shift' argument should be used with care because it can mess up the positions
1515
// array which is supposed to be monotonically increasing. It is set to -1 in order to
1516
//  append certain indicators (endemphword, endemph, endemphphrase after, endcapsword,
1517
// endcaps, endcapsphrase after) to the preceding character.
1518
static int
1519
for_updatePositions(const widechar *outChars, int inLength, int outLength, int shift,
1520
    int pos, const InString *input, OutString *output, int *posMapping,
1521
0
    int *cursorPosition, int *cursorStatus) {
1522
0
  int k;
1523
0
  if ((output->length + outLength) > output->maxlength ||
1524
0
      (pos + inLength) > input->length)
1525
0
    return 0;
1526
0
  memcpy(&output->chars[output->length], outChars, outLength * CHARSIZE);
1527
0
  if (!*cursorStatus) {
1528
0
    if (*cursorPosition >= pos && *cursorPosition < (pos + inLength)) {
1529
0
      *cursorPosition = output->length;
1530
0
      *cursorStatus = 1;
1531
0
    } else if (input->chars[*cursorPosition] == 0 &&
1532
0
        *cursorPosition == (pos + inLength)) {
1533
0
      *cursorPosition = output->length + outLength / 2 + 1;
1534
0
      *cursorStatus = 1;
1535
0
    }
1536
0
  } else if (*cursorStatus == 2 && *cursorPosition == pos)
1537
0
    *cursorPosition = output->length;
1538
0
  for (k = 0; k < outLength; k++) posMapping[output->length + k] = pos + shift;
1539
0
  output->length += outLength;
1540
0
  return 1;
1541
0
}
1542
1543
static int
1544
syllableBreak(const TranslationTableHeader *table, int pos, const InString *input,
1545
0
    int transCharslen) {
1546
0
  int wordStart = 0;
1547
0
  int wordEnd = 0;
1548
0
  int wordSize = 0;
1549
0
  int k = 0;
1550
0
  char *hyphens = NULL;
1551
0
  for (wordStart = pos; wordStart >= 0; wordStart--)
1552
0
    if (!((getChar(input->chars[wordStart], table))->attributes & CTC_Letter)) {
1553
0
      wordStart++;
1554
0
      break;
1555
0
    }
1556
0
  if (wordStart < 0) wordStart = 0;
1557
0
  for (wordEnd = pos; wordEnd < input->length; wordEnd++)
1558
0
    if (!((getChar(input->chars[wordEnd], table))->attributes & CTC_Letter)) {
1559
0
      wordEnd--;
1560
0
      break;
1561
0
    }
1562
0
  if (wordEnd == input->length) wordEnd--;
1563
  /* At this stage wordStart is the 0 based index of the first letter in the word,
1564
   * wordEnd is the 0 based index of the last letter in the word.
1565
   * example: "hello" wordstart=0, wordEnd=4. */
1566
0
  wordSize = wordEnd - wordStart + 1;
1567
0
  hyphens = (char *)calloc(wordSize + 1, sizeof(char));
1568
0
  if (!hyphenateWord(&input->chars[wordStart], wordSize, hyphens, table)) {
1569
0
    free(hyphens);
1570
0
    return 0;
1571
0
  }
1572
0
  for (k = pos - wordStart + 1; k < (pos - wordStart + transCharslen); k++)
1573
0
    if (hyphens[k] & 1) {
1574
0
      free(hyphens);
1575
0
      return 1;
1576
0
    }
1577
0
  free(hyphens);
1578
0
  return 0;
1579
0
}
1580
1581
static void
1582
setBefore(const TranslationTableHeader *table, int pos, const InString *input,
1583
0
    TranslationTableCharacterAttributes *beforeAttributes) {
1584
0
  widechar before;
1585
0
  if (pos >= 2 && input->chars[pos - 1] == LOU_ENDSEGMENT)
1586
0
    before = input->chars[pos - 2];
1587
0
  else
1588
0
    before = (pos == 0) ? ' ' : input->chars[pos - 1];
1589
0
  *beforeAttributes = (getChar(before, table))->attributes;
1590
0
}
1591
1592
static void
1593
setAfter(int length, const TranslationTableHeader *table, int pos, const InString *input,
1594
0
    TranslationTableCharacterAttributes *afterAttributes) {
1595
0
  widechar after;
1596
0
  if ((pos + length + 2) < input->length && input->chars[pos + 1] == LOU_ENDSEGMENT)
1597
0
    after = input->chars[pos + 2];
1598
0
  else
1599
0
    after = (pos + length < input->length) ? input->chars[pos + length] : ' ';
1600
0
  *afterAttributes = (getChar(after, table))->attributes;
1601
0
}
1602
1603
static int
1604
brailleIndicatorDefined(TranslationTableOffset offset,
1605
0
    const TranslationTableHeader *table, const TranslationTableRule **indicRule) {
1606
0
  if (!offset) return 0;
1607
0
  *indicRule = (TranslationTableRule *)&table->ruleArea[offset];
1608
0
  return 1;
1609
0
}
1610
1611
/**
1612
 * Return 1 if both `indicator1` and `indicator2` are defined and use the same dot
1613
 * pattern. Otherwise return 0.
1614
 */
1615
static int
1616
isIndicatorEqual(TranslationTableOffset indicator1, TranslationTableOffset indicator2,
1617
0
    const TranslationTableHeader *table) {
1618
0
  const TranslationTableRule *indicatorRule1;
1619
0
  const TranslationTableRule *indicatorRule2;
1620
1621
0
  if (brailleIndicatorDefined(indicator1, table, &indicatorRule1) &&
1622
0
      brailleIndicatorDefined(indicator2, table, &indicatorRule2) &&
1623
0
      indicatorRule1->dotslen == indicatorRule2->dotslen &&
1624
0
      !memcmp(&indicatorRule1->charsdots[0], &indicatorRule2->charsdots[0],
1625
0
          indicatorRule1->dotslen * CHARSIZE)) {
1626
0
    return 1;
1627
0
  } else {
1628
0
    return 0;
1629
0
  }
1630
0
}
1631
1632
static int
1633
0
capsletterDefined(const TranslationTableHeader *table) {
1634
0
  return table->emphRules[MAX_EMPH_CLASSES][letterOffset];
1635
0
}
1636
1637
static int
1638
validMatch(const TranslationTableHeader *table, int pos, const InString *input,
1639
0
    formtype *typebuf, const TranslationTableRule *transRule, int transCharslen) {
1640
  /* Analyze the typeform parameter and also check for capitalization */
1641
0
  TranslationTableCharacter *inputChar;
1642
0
  TranslationTableCharacter *ruleChar;
1643
0
  TranslationTableCharacterAttributes prevAttr = 0;
1644
0
  int k;
1645
0
  int kk = 0;
1646
0
  if (!transCharslen) return 0;
1647
0
  for (k = pos; k < pos + transCharslen; k++) {
1648
0
    if (input->chars[k] == LOU_ENDSEGMENT) {
1649
0
      if (k == pos && transCharslen == 1)
1650
0
        return 1;
1651
0
      else
1652
0
        return 0;
1653
0
    }
1654
0
    inputChar = getChar(input->chars[k], table);
1655
0
    if (k == pos) prevAttr = inputChar->attributes;
1656
0
    ruleChar = getChar(transRule->charsdots[kk++], table);
1657
0
    if (toLowercase(table, inputChar) != toLowercase(table, ruleChar)) return 0;
1658
0
    if (typebuf != NULL && (typebuf[pos] & CAPSEMPH) == 0 &&
1659
0
        (typebuf[k] | typebuf[pos]) != typebuf[pos])
1660
0
      return 0;
1661
0
    if (inputChar->attributes != CTC_Letter) {
1662
0
      if (k != (pos + 1) && (prevAttr & CTC_Letter) &&
1663
0
          (inputChar->attributes & CTC_Letter) &&
1664
0
          ((inputChar->attributes &
1665
0
               (CTC_LowerCase | CTC_UpperCase | CTC_Letter)) !=
1666
0
              (prevAttr & (CTC_LowerCase | CTC_UpperCase | CTC_Letter))))
1667
0
        return 0;
1668
0
    }
1669
0
    prevAttr = inputChar->attributes;
1670
0
  }
1671
0
  return 1;
1672
0
}
1673
1674
static int
1675
insertNumberSign(const TranslationTableHeader *table, int pos, const InString *input,
1676
    OutString *output, int *posMapping, int prevTransOpcode, int *cursorPosition,
1677
0
    int *cursorStatus, TranslationTableCharacterAttributes beforeAttributes) {
1678
0
  const TranslationTableRule *numberSign;
1679
0
  if (brailleIndicatorDefined(table->numberSign, table, &numberSign) &&
1680
0
      checkCharAttr_safe(input, pos, CTC_Digit, table) &&
1681
0
      (prevTransOpcode == CTO_ExactDots ||
1682
0
          (!(beforeAttributes & CTC_Digit) && prevTransOpcode != CTO_MidNum))) {
1683
0
    if (!for_updatePositions(&numberSign->charsdots[0], 0, numberSign->dotslen, 0,
1684
0
          pos, input, output, posMapping, cursorPosition, cursorStatus))
1685
0
      return 0;
1686
0
  }
1687
0
  return 1;
1688
0
}
1689
1690
static int
1691
0
isNoLetsign(widechar c, const TranslationTableHeader *table) {
1692
0
  for (int k = 0; k < table->noLetsignCount; k++)
1693
0
    if (c == table->noLetsign[k]) return 1;
1694
0
  return 0;
1695
0
}
1696
1697
static int
1698
0
isNoLetsignBefore(widechar c, const TranslationTableHeader *table) {
1699
0
  for (int k = 0; k < table->noLetsignBeforeCount; k++)
1700
0
    if (c == table->noLetsignBefore[k]) return 1;
1701
0
  return 0;
1702
0
}
1703
1704
static int
1705
0
isNoLetsignAfter(widechar c, const TranslationTableHeader *table) {
1706
0
  for (int k = 0; k < table->noLetsignAfterCount; k++)
1707
0
    if (c == table->noLetsignAfter[k]) return 1;
1708
0
  return 0;
1709
0
}
1710
1711
static int
1712
insertLetterSign(const TranslationTableHeader *table, int pos, const InString *input,
1713
    OutString *output, int *posMapping, int transOpcode, int *cursorPosition,
1714
0
    int *cursorStatus, TranslationTableCharacterAttributes beforeAttributes) {
1715
0
  const TranslationTableRule *letterSign;
1716
0
  if (brailleIndicatorDefined(table->letterSign, table, &letterSign)) {
1717
0
    if (transOpcode == CTO_Contraction) {
1718
0
      if (!for_updatePositions(&letterSign->charsdots[0], 0, letterSign->dotslen, 0,
1719
0
            pos, input, output, posMapping, cursorPosition, cursorStatus))
1720
0
        return 0;
1721
0
    } else if ((checkCharAttr_safe(input, pos, CTC_Letter, table) &&
1722
0
               !(beforeAttributes & CTC_Letter)) &&
1723
0
        (!checkCharAttr_safe(input, pos + 1, CTC_Letter, table) ||
1724
0
            (beforeAttributes & CTC_Digit))) {
1725
0
      if (pos > 0 && isNoLetsignBefore(input->chars[pos - 1], table)) return 1;
1726
0
      if (isNoLetsign(input->chars[pos], table)) return 1;
1727
0
      if (pos + 1 < input->length && isNoLetsignAfter(input->chars[pos + 1], table))
1728
0
        return 1;
1729
0
      if (!for_updatePositions(&letterSign->charsdots[0], 0, letterSign->dotslen, 0,
1730
0
            pos, input, output, posMapping, cursorPosition, cursorStatus))
1731
0
        return 0;
1732
0
    }
1733
0
  }
1734
0
  return 1;
1735
0
}
1736
1737
static int
1738
onlyLettersBehind(const TranslationTableHeader *table, int pos, const InString *input,
1739
0
    TranslationTableCharacterAttributes beforeAttributes) {
1740
  /* Actually, spaces, then letters */
1741
0
  int k;
1742
0
  if (!(beforeAttributes & CTC_Space)) return 0;
1743
0
  for (k = pos - 2; k >= 0; k--) {
1744
0
    TranslationTableCharacterAttributes attr =
1745
0
        (getChar(input->chars[k], table))->attributes;
1746
0
    if ((attr & CTC_Space)) continue;
1747
0
    if ((attr & CTC_Letter))
1748
0
      return 1;
1749
0
    else
1750
0
      return 0;
1751
0
  }
1752
0
  return 1;
1753
0
}
1754
1755
static int
1756
onlyLettersAhead(const TranslationTableHeader *table, int pos, const InString *input,
1757
0
    int transCharslen, TranslationTableCharacterAttributes afterAttributes) {
1758
  /* Actullly, spaces, then letters */
1759
0
  int k;
1760
0
  if (!(afterAttributes & CTC_Space)) return 0;
1761
0
  for (k = pos + transCharslen + 1; k < input->length; k++) {
1762
0
    TranslationTableCharacterAttributes attr =
1763
0
        (getChar(input->chars[k], table))->attributes;
1764
0
    if ((attr & CTC_Space)) continue;
1765
0
    if ((attr & (CTC_Letter | CTC_LitDigit)))
1766
0
      return 1;
1767
0
    else
1768
0
      return 0;
1769
0
  }
1770
0
  return 0;
1771
0
}
1772
1773
static int
1774
noCompbrlAhead(const TranslationTableHeader *table, int pos, int mode,
1775
0
    const InString *input, int transOpcode, int transCharslen, int cursorPosition) {
1776
0
  int start = pos + transCharslen;
1777
0
  int end;
1778
0
  int p;
1779
0
  if (start >= input->length) return 1;
1780
0
  while (start < input->length && checkCharAttr(input->chars[start], CTC_Space, table))
1781
0
    start++;
1782
0
  if (start == input->length ||
1783
0
      (transOpcode == CTO_JoinableWord &&
1784
0
          (!checkCharAttr(input->chars[start], CTC_Letter | CTC_Digit, table) ||
1785
0
              !checkCharAttr(input->chars[start - 1], CTC_Space, table))))
1786
0
    return 1;
1787
0
  end = start;
1788
0
  while (end < input->length && !checkCharAttr(input->chars[end], CTC_Space, table))
1789
0
    end++;
1790
0
  if ((mode & (compbrlAtCursor | compbrlLeftCursor)) && cursorPosition >= start &&
1791
0
      cursorPosition < end)
1792
0
    return 0;
1793
  /* Look ahead for rules with CTO_CompBrl */
1794
0
  for (p = start; p < end; p++) {
1795
0
    int length = input->length - p;
1796
0
    int tryThis;
1797
0
    int k;
1798
0
    for (tryThis = 0; tryThis < 2; tryThis++) {
1799
0
      TranslationTableOffset ruleOffset = 0;
1800
0
      TranslationTableRule *testRule;
1801
0
      switch (tryThis) {
1802
0
      case 0:
1803
0
        if (!(length >= 2)) break;
1804
0
        ruleOffset = table->forRules[_lou_stringHash(&input->chars[p], 1, table)];
1805
0
        break;
1806
0
      case 1:
1807
0
        if (!(length >= 1)) break;
1808
0
        length = 1;
1809
0
        ruleOffset = getChar(input->chars[p], table)->otherRules;
1810
0
        break;
1811
0
      }
1812
0
      while (ruleOffset) {
1813
0
        const TranslationTableCharacter *character1;
1814
0
        const TranslationTableCharacter *character2;
1815
0
        testRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
1816
0
        for (k = 0; k < testRule->charslen && k < length; k++) {
1817
0
          character1 = getChar(testRule->charsdots[k], table);
1818
0
          character2 = getChar(input->chars[p + k], table);
1819
0
          if (toLowercase(table, character1) != toLowercase(table, character2))
1820
0
            break;
1821
0
        }
1822
0
        if (tryThis == 1 || k == testRule->charslen) {
1823
0
          if (testRule->opcode == CTO_CompBrl ||
1824
0
              testRule->opcode == CTO_Literal)
1825
0
            return 0;
1826
0
        }
1827
0
        ruleOffset = testRule->charsnext;
1828
0
      }
1829
0
    }
1830
0
  }
1831
0
  return 1;
1832
0
}
1833
1834
static int
1835
0
checkEmphasisChange(int pos, int len, const EmphasisInfo *emphasisBuffer) {
1836
0
  int i;
1837
0
  for (i = pos + 1; i < pos + len; i++)
1838
0
    if (emphasisBuffer[i].begin || emphasisBuffer[i].end || emphasisBuffer[i].word ||
1839
0
        emphasisBuffer[i].symbol)
1840
0
      return 1;
1841
0
  return 0;
1842
0
}
1843
1844
static int
1845
isRepeatedWord(const TranslationTableHeader *table, int pos, const InString *input,
1846
    const EmphasisInfo *emphasisBuffer, int outputLength, const int *posMapping,
1847
0
    int transCharslen, int *repwordLength) {
1848
  /* transCharslen is the length of the character sequence that separates the repeated
1849
   * parts */
1850
0
  int len;
1851
  /* maximum length that the repeated part can have is determined by how many letters
1852
   * there are before and after the separator */
1853
0
  for (len = 1; pos - len >= 0 && pos + transCharslen + len - 1 < input->length &&
1854
0
      checkCharAttr(input->chars[pos - len], CTC_Letter, table) &&
1855
0
      checkCharAttr(input->chars[pos + transCharslen + len - 1], CTC_Letter, table);
1856
0
      len++)
1857
0
    ;
1858
0
  len--;
1859
  /* now actually compare the parts, starting with the maximal length and making them
1860
   * shorter if they don't match */
1861
0
  while (len > 0) {
1862
0
    int start = pos - len;
1863
0
    if (compareChars(&input->chars[start], &input->chars[pos + transCharslen], len,
1864
0
          table)) {
1865
      /* part must not start within a contraction */
1866
0
      for (int k = outputLength - 1; k >= 0; k--)
1867
0
        if (posMapping[k] == start)
1868
0
          break;
1869
0
        else if (posMapping[k] < start)
1870
0
          return 0;
1871
      /* capitalisation and emphasis may not change except at the beginning of the
1872
       * parts */
1873
0
      if (checkEmphasisChange(start, len + transCharslen, emphasisBuffer) ||
1874
0
          checkEmphasisChange(pos + transCharslen, len, emphasisBuffer))
1875
0
        return 0;
1876
0
      *repwordLength = len;
1877
0
      return 1;
1878
0
    }
1879
0
    len--;
1880
0
  }
1881
0
  return 0;
1882
0
}
1883
1884
static int
1885
inSequence(const TranslationTableHeader *table, int pos, const InString *input,
1886
0
    const TranslationTableRule *transRule) {
1887
0
  int i, j, s, match;
1888
  // TODO: all caps words
1889
  // const TranslationTableCharacter *c = NULL;
1890
1891
  /* check before sequence */
1892
0
  for (i = pos - 1; i >= 0; i--) {
1893
0
    if (checkCharAttr(input->chars[i], CTC_SeqBefore, table)) continue;
1894
0
    if (!(checkCharAttr(input->chars[i], CTC_Space | CTC_SeqDelimiter, table)))
1895
0
      return 0;
1896
0
    break;
1897
0
  }
1898
1899
  /* check after sequence */
1900
0
  for (i = pos + transRule->charslen; i < input->length; i++) {
1901
    /* check sequence after patterns */
1902
0
    if (table->seqPatternsCount) {
1903
0
      match = 0;
1904
0
      for (j = i, s = 0; j <= input->length && s < table->seqPatternsCount;
1905
0
          j++, s++) {
1906
        /* matching */
1907
0
        if (match == 1) {
1908
0
          if (table->seqPatterns[s]) {
1909
0
            if (input->chars[j] == table->seqPatterns[s])
1910
0
              match = 1;
1911
0
            else {
1912
0
              match = -1;
1913
0
              j = i - 1;
1914
0
            }
1915
0
          }
1916
1917
          /* found match */
1918
0
          else {
1919
            /* pattern at end of input */
1920
0
            if (j >= input->length) return 1;
1921
1922
0
            i = j;
1923
0
            break;
1924
0
          }
1925
0
        }
1926
1927
        /* looking for match */
1928
0
        else if (match == 0) {
1929
0
          if (table->seqPatterns[s]) {
1930
0
            if (input->chars[j] == table->seqPatterns[s])
1931
0
              match = 1;
1932
0
            else {
1933
0
              match = -1;
1934
0
              j = i - 1;
1935
0
            }
1936
0
          }
1937
0
        }
1938
1939
        /* next pattarn */
1940
0
        else if (match == -1) {
1941
0
          if (!table->seqPatterns[s]) {
1942
0
            match = 0;
1943
0
            j = i - 1;
1944
0
          }
1945
0
        }
1946
0
      }
1947
0
    }
1948
1949
0
    if (checkCharAttr(input->chars[i], CTC_SeqAfter, table)) continue;
1950
0
    if (!(checkCharAttr(input->chars[i], CTC_Space | CTC_SeqDelimiter, table)))
1951
0
      return 0;
1952
0
    break;
1953
0
  }
1954
1955
0
  return 1;
1956
0
}
1957
1958
static void
1959
for_selectRule(const TranslationTableHeader *table, int pos, OutString output,
1960
    const int *posMapping, int mode, const InString *input, formtype *typebuf,
1961
    EmphasisInfo *emphasisBuffer, int *transOpcode, int prevTransOpcode,
1962
    const TranslationTableRule **transRule, int *transCharslen, int *passCharDots,
1963
    widechar const **passInstructions, int *passIC, PassRuleMatch *patternMatch,
1964
    int posIncremented, int cursorPosition, int *repwordLength, int dontContract,
1965
    int compbrlStart, int compbrlEnd,
1966
    TranslationTableCharacterAttributes beforeAttributes,
1967
    TranslationTableCharacter **curCharDef, TranslationTableRule **groupingRule,
1968
0
    widechar *groupingOp) {
1969
  /* check for valid Translations. Return value is in transRule. */
1970
0
  static TranslationTableRule pseudoRule = { 0 };
1971
0
  int length = ((pos < compbrlStart) ? compbrlStart : input->length) - pos;
1972
0
  int tryThis;
1973
0
  int k;
1974
0
  TranslationTableOffset ruleOffset = 0;
1975
0
  *curCharDef = getChar(input->chars[pos], table);
1976
0
  for (tryThis = 0; tryThis < 3; tryThis++) {
1977
0
    switch (tryThis) {
1978
0
    case 0:
1979
0
      if (!(length >= 2)) break;
1980
0
      ruleOffset = table->forRules[_lou_stringHash(&input->chars[pos], 1, table)];
1981
0
      break;
1982
0
    case 1:
1983
0
      if (!(length >= 1)) break;
1984
0
      length = 1;
1985
0
      ruleOffset = (*curCharDef)->otherRules;
1986
0
      break;
1987
0
    case 2: /* No rule found */
1988
0
      *transRule = &pseudoRule;
1989
0
      *transOpcode = pseudoRule.opcode = CTO_None;
1990
0
      *transCharslen = pseudoRule.charslen = 1;
1991
0
      pseudoRule.charsdots[0] = input->chars[pos];
1992
0
      pseudoRule.dotslen = 0;
1993
0
      return;
1994
0
    }
1995
0
    while (ruleOffset) {
1996
0
      *transRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
1997
0
      *transOpcode = (*transRule)->opcode;
1998
0
      *transCharslen = (*transRule)->charslen;
1999
0
      if (tryThis == 1 ||
2000
0
          ((*transCharslen <= length) &&
2001
0
              validMatch(table, pos, input, typebuf, *transRule,
2002
0
                  *transCharslen))) {
2003
0
        TranslationTableCharacterAttributes afterAttributes;
2004
        /* check before emphasis match */
2005
0
        if ((*transRule)->before & CTC_EmpMatch) {
2006
0
          if (emphasisBuffer[pos].begin || emphasisBuffer[pos].end ||
2007
0
              emphasisBuffer[pos].word || emphasisBuffer[pos].symbol)
2008
0
            break;
2009
0
        }
2010
2011
        /* check after emphasis match */
2012
0
        if ((*transRule)->after & CTC_EmpMatch) {
2013
0
          if (emphasisBuffer[pos + *transCharslen].begin ||
2014
0
              emphasisBuffer[pos + *transCharslen].end ||
2015
0
              emphasisBuffer[pos + *transCharslen].word ||
2016
0
              emphasisBuffer[pos + *transCharslen].symbol)
2017
0
            break;
2018
0
        }
2019
2020
        /* check this rule */
2021
0
        setAfter(*transCharslen, table, pos, input, &afterAttributes);
2022
0
        if ((!((*transRule)->after & ~CTC_EmpMatch) ||
2023
0
              (beforeAttributes & (*transRule)->after)) &&
2024
0
            (!((*transRule)->before & ~CTC_EmpMatch) ||
2025
0
                (afterAttributes & (*transRule)->before)))
2026
          /* check nocross */
2027
0
          if (!((*transRule)->nocross &&
2028
0
                syllableBreak(table, pos, input, *transCharslen))) {
2029
0
            switch (*transOpcode) { /* check validity of this Translation */
2030
0
            case CTO_Space:
2031
0
            case CTO_Letter:
2032
0
            case CTO_UpperCase:
2033
0
            case CTO_LowerCase:
2034
0
            case CTO_Digit:
2035
0
            case CTO_LitDigit:
2036
0
            case CTO_Punctuation:
2037
0
            case CTO_Math:
2038
0
            case CTO_Sign:
2039
0
            case CTO_Hyphen:
2040
0
            case CTO_Replace:
2041
0
            case CTO_CompBrl:
2042
0
            case CTO_Literal:
2043
0
              return;
2044
0
            case CTO_Repeated:
2045
0
              if (dontContract || (mode & noContractions)) break;
2046
0
              if ((mode & (compbrlAtCursor | compbrlLeftCursor)) &&
2047
0
                  pos >= compbrlStart && pos <= compbrlEnd)
2048
0
                break;
2049
0
              return;
2050
0
            case CTO_RepWord:
2051
0
            case CTO_RepEndWord:
2052
0
              if (dontContract || (mode & noContractions)) break;
2053
0
              if (isRepeatedWord(table, pos, input, emphasisBuffer,
2054
0
                    output.length, posMapping, *transCharslen,
2055
0
                    repwordLength)) {
2056
0
                if ((pos > *repwordLength &&
2057
0
                      checkCharAttr(input->chars[pos -
2058
0
                                  *repwordLength - 1],
2059
0
                          CTC_Letter, table)) ==
2060
0
                    (*transOpcode == CTO_RepEndWord)) {
2061
0
                  return;
2062
0
                }
2063
0
              }
2064
0
              break;
2065
0
            case CTO_NoCont:
2066
0
              if (dontContract || (mode & noContractions)) break;
2067
0
              return;
2068
0
            case CTO_Syllable:
2069
0
              *transOpcode = CTO_Always;
2070
0
            case CTO_Always:
2071
0
              if (checkEmphasisChange(pos, *transCharslen, emphasisBuffer))
2072
0
                break;
2073
0
              if (dontContract || (mode & noContractions)) break;
2074
0
              return;
2075
0
            case CTO_ExactDots:
2076
0
              return;
2077
0
            case CTO_Context:
2078
              // check posIncremented to avoid endless loop
2079
0
              if (!posIncremented ||
2080
0
                  !passDoTest(table, pos, input, *transOpcode,
2081
0
                      *transRule, passCharDots, passInstructions,
2082
0
                      passIC, patternMatch, groupingRule,
2083
0
                      groupingOp))
2084
0
                break;
2085
0
              return;
2086
0
            case CTO_LargeSign:
2087
0
              if (dontContract || (mode & noContractions)) break;
2088
0
              if (!((beforeAttributes & (CTC_Space | CTC_Punctuation)) ||
2089
0
                    onlyLettersBehind(
2090
0
                        table, pos, input, beforeAttributes)) ||
2091
0
                  !((afterAttributes & CTC_Space) ||
2092
0
                      prevTransOpcode == CTO_LargeSign) ||
2093
0
                  (afterAttributes & CTC_Letter) ||
2094
0
                  !noCompbrlAhead(table, pos, mode, input, *transOpcode,
2095
0
                      *transCharslen, cursorPosition))
2096
0
                *transOpcode = CTO_Always;
2097
0
              return;
2098
0
            case CTO_WholeWord:
2099
0
              if (dontContract || (mode & noContractions)) break;
2100
0
              if (checkEmphasisChange(pos, *transCharslen, emphasisBuffer))
2101
0
                break;
2102
0
            case CTO_Contraction:
2103
0
              if (table->usesSequences) {
2104
0
                if (inSequence(table, pos, input, *transRule)) return;
2105
0
              } else {
2106
0
                if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
2107
0
                    (afterAttributes & (CTC_Space | CTC_Punctuation)))
2108
0
                  return;
2109
0
              }
2110
0
              break;
2111
0
            case CTO_PartWord:
2112
0
              if (dontContract || (mode & noContractions)) break;
2113
0
              if ((beforeAttributes & CTC_Letter) ||
2114
0
                  (afterAttributes & CTC_Letter))
2115
0
                return;
2116
0
              break;
2117
0
            case CTO_JoinNum:
2118
0
              if (dontContract || (mode & noContractions)) break;
2119
0
              if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
2120
0
                  (afterAttributes & CTC_Space) &&
2121
0
                  (output.length + (*transRule)->dotslen <
2122
0
                      output.maxlength)) {
2123
0
                int p = pos + *transCharslen + 1;
2124
0
                while (p < input->length) {
2125
0
                  if (!checkCharAttr(
2126
0
                        input->chars[p], CTC_Space, table)) {
2127
0
                    if (checkCharAttr(
2128
0
                          input->chars[p], CTC_Digit, table))
2129
0
                      return;
2130
0
                    break;
2131
0
                  }
2132
0
                  p++;
2133
0
                }
2134
0
              }
2135
0
              break;
2136
0
            case CTO_LowWord:
2137
0
              if (dontContract || (mode & noContractions)) break;
2138
0
              if ((beforeAttributes & CTC_Space) &&
2139
0
                  (afterAttributes & CTC_Space) &&
2140
0
                  (prevTransOpcode != CTO_JoinableWord))
2141
0
                return;
2142
0
              break;
2143
0
            case CTO_JoinableWord:
2144
0
              if (dontContract || (mode & noContractions)) break;
2145
0
              if (beforeAttributes & (CTC_Space | CTC_Punctuation) &&
2146
0
                  onlyLettersAhead(table, pos, input, *transCharslen,
2147
0
                      afterAttributes) &&
2148
0
                  noCompbrlAhead(table, pos, mode, input, *transOpcode,
2149
0
                      *transCharslen, cursorPosition))
2150
0
                return;
2151
0
              break;
2152
0
            case CTO_SuffixableWord:
2153
0
              if (dontContract || (mode & noContractions)) break;
2154
0
              if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
2155
0
                  (afterAttributes &
2156
0
                      (CTC_Space | CTC_Letter | CTC_Punctuation)))
2157
0
                return;
2158
0
              break;
2159
0
            case CTO_PrefixableWord:
2160
0
              if (dontContract || (mode & noContractions)) break;
2161
0
              if ((beforeAttributes &
2162
0
                    (CTC_Space | CTC_Letter | CTC_Punctuation)) &&
2163
0
                  (afterAttributes & (CTC_Space | CTC_Punctuation)))
2164
0
                return;
2165
0
              break;
2166
0
            case CTO_BegWord:
2167
0
              if (dontContract || (mode & noContractions)) break;
2168
0
              if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
2169
0
                  (afterAttributes & CTC_Letter))
2170
0
                return;
2171
0
              break;
2172
0
            case CTO_BegMidWord:
2173
0
              if (dontContract || (mode & noContractions)) break;
2174
0
              if ((beforeAttributes &
2175
0
                    (CTC_Letter | CTC_Space | CTC_Punctuation)) &&
2176
0
                  (afterAttributes & CTC_Letter))
2177
0
                return;
2178
0
              break;
2179
0
            case CTO_MidWord:
2180
0
              if (dontContract || (mode & noContractions)) break;
2181
0
              if (beforeAttributes & CTC_Letter &&
2182
0
                  afterAttributes & CTC_Letter)
2183
0
                return;
2184
0
              break;
2185
0
            case CTO_MidEndWord:
2186
0
              if (dontContract || (mode & noContractions)) break;
2187
0
              if (beforeAttributes & CTC_Letter &&
2188
0
                  afterAttributes &
2189
0
                      (CTC_Letter | CTC_Space | CTC_Punctuation))
2190
0
                return;
2191
0
              break;
2192
0
            case CTO_EndWord:
2193
0
              if (dontContract || (mode & noContractions)) break;
2194
0
              if (beforeAttributes & CTC_Letter &&
2195
0
                  afterAttributes & (CTC_Space | CTC_Punctuation))
2196
0
                return;
2197
0
              break;
2198
0
            case CTO_BegNum:
2199
0
              if (beforeAttributes & (CTC_Space | CTC_Punctuation) &&
2200
0
                  afterAttributes & CTC_Digit)
2201
0
                return;
2202
0
              break;
2203
0
            case CTO_MidNum:
2204
0
              if (prevTransOpcode != CTO_ExactDots &&
2205
0
                  beforeAttributes & CTC_Digit &&
2206
0
                  afterAttributes & CTC_Digit)
2207
0
                return;
2208
0
              break;
2209
0
            case CTO_EndNum:
2210
0
              if (beforeAttributes & CTC_Digit &&
2211
0
                  prevTransOpcode != CTO_ExactDots)
2212
0
                return;
2213
0
              break;
2214
0
            case CTO_DecPoint:
2215
0
              if (!(afterAttributes & CTC_Digit)) break;
2216
0
              if (beforeAttributes & CTC_Digit) *transOpcode = CTO_MidNum;
2217
0
              return;
2218
0
            case CTO_PrePunc:
2219
0
              if (!checkCharAttr(
2220
0
                    input->chars[pos], CTC_Punctuation, table) ||
2221
0
                  (pos > 0 &&
2222
0
                      checkCharAttr(input->chars[pos - 1],
2223
0
                          CTC_Letter, table)))
2224
0
                break;
2225
0
              for (k = pos + *transCharslen; k < input->length; k++) {
2226
0
                if (checkCharAttr(input->chars[k],
2227
0
                      (CTC_Letter | CTC_Digit), table))
2228
0
                  return;
2229
0
                if (checkCharAttr(input->chars[k], CTC_Space, table))
2230
0
                  break;
2231
0
              }
2232
0
              break;
2233
0
            case CTO_PostPunc:
2234
0
              if (!checkCharAttr(
2235
0
                    input->chars[pos], CTC_Punctuation, table) ||
2236
0
                  (pos < (input->length - 1) &&
2237
0
                      checkCharAttr(input->chars[pos + 1],
2238
0
                          CTC_Letter, table)))
2239
0
                break;
2240
0
              for (k = pos; k >= 0; k--) {
2241
0
                if (checkCharAttr(input->chars[k],
2242
0
                      (CTC_Letter | CTC_Digit), table))
2243
0
                  return;
2244
0
                if (checkCharAttr(input->chars[k], CTC_Space, table))
2245
0
                  break;
2246
0
              }
2247
0
              break;
2248
2249
0
            case CTO_Match: {
2250
0
              widechar *patterns, *pattern;
2251
2252
0
              if (dontContract || (mode & noContractions)) break;
2253
0
              if (checkEmphasisChange(pos, *transCharslen, emphasisBuffer))
2254
0
                break;
2255
2256
0
              patterns =
2257
0
                  (widechar *)&table->ruleArea[(*transRule)->patterns];
2258
2259
              /* check before pattern */
2260
0
              pattern = &patterns[1];
2261
0
              if (!_lou_pattern_check(
2262
0
                    input->chars, pos - 1, -1, -1, pattern, table))
2263
0
                break;
2264
2265
              /* check after pattern */
2266
0
              pattern = &patterns[patterns[0]];
2267
0
              if (!_lou_pattern_check(input->chars,
2268
0
                    pos + (*transRule)->charslen, input->length, 1,
2269
0
                    pattern, table))
2270
0
                break;
2271
2272
0
              return;
2273
0
            }
2274
2275
0
            default:
2276
0
              break;
2277
0
            }
2278
0
          }
2279
0
      }
2280
      /* Done with checking this rule */
2281
0
      ruleOffset = (*transRule)->charsnext;
2282
0
    }
2283
0
  }
2284
0
}
2285
2286
static int
2287
undefinedCharacter(widechar c, const TranslationTableHeader *table, int pos,
2288
    const InString *input, OutString *output, int *posMapping, int *cursorPosition,
2289
0
    int *cursorStatus, int mode) {
2290
  /* Display an undefined character in the output buffer */
2291
0
  if (table->undefined) {
2292
0
    TranslationTableRule *rule =
2293
0
        (TranslationTableRule *)&table->ruleArea[table->undefined];
2294
2295
0
    return for_updatePositions(&rule->charsdots[rule->charslen], rule->charslen,
2296
0
        rule->dotslen, 0, pos, input, output, posMapping, cursorPosition,
2297
0
        cursorStatus);
2298
0
  }
2299
2300
0
  const char *text = (mode & noUndefined) ? "" : _lou_showString(&c, 1, 1);
2301
0
  size_t length = strlen(text);
2302
0
  widechar dots[length == 0 ? 1 : length];
2303
2304
0
  for (unsigned int k = 0; k < length; k += 1) {
2305
0
    dots[k] = 0;
2306
0
    TranslationTableOffset offset = getChar(text[k], table)->otherRules;
2307
0
    while (offset) {
2308
0
      const TranslationTableRule *r =
2309
0
          (TranslationTableRule *)&table->ruleArea[offset];
2310
0
      if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow && r->dotslen == 1) {
2311
0
        dots[k] = r->charsdots[1];
2312
0
        break;
2313
0
      }
2314
0
      offset = r->charsnext;
2315
0
    }
2316
0
    if (!dots[k]) dots[k] = _lou_charToFallbackDots(text[k]);
2317
0
  }
2318
2319
0
  return for_updatePositions(dots, 1, length, 0, pos, input, output, posMapping,
2320
0
      cursorPosition, cursorStatus);
2321
0
}
2322
2323
static int
2324
putCharacter(widechar character, const TranslationTableHeader *table, int pos,
2325
    const InString *input, OutString *output, int *posMapping, int *cursorPosition,
2326
0
    int *cursorStatus, int mode) {
2327
  /* Insert the dots equivalent of a character into the output buffer */
2328
0
  TranslationTableCharacter *chardef = getChar(character, table);
2329
0
  if (!chardef->definitionRule && chardef->basechar)
2330
0
    chardef = (TranslationTableCharacter *)&table->ruleArea[chardef->basechar];
2331
0
  if (chardef->definitionRule) {
2332
0
    const TranslationTableRule *rule =
2333
0
        (TranslationTableRule *)&table->ruleArea[chardef->definitionRule];
2334
0
    return for_updatePositions(&rule->charsdots[1], 1, rule->dotslen, 0, pos, input,
2335
0
        output, posMapping, cursorPosition, cursorStatus);
2336
0
  }
2337
0
  return undefinedCharacter(character, table, pos, input, output, posMapping,
2338
0
      cursorPosition, cursorStatus, mode);
2339
0
}
2340
2341
static int
2342
putCharacters(const widechar *characters, int count, const TranslationTableHeader *table,
2343
    int pos, const InString *input, OutString *output, int *posMapping,
2344
0
    int *cursorPosition, int *cursorStatus, int mode) {
2345
  /* Insert the dot equivalents of a series of characters in the output
2346
   * buffer */
2347
0
  int k;
2348
0
  for (k = 0; k < count; k++)
2349
0
    if (!putCharacter(characters[k], table, pos, input, output, posMapping,
2350
0
          cursorPosition, cursorStatus, mode))
2351
0
      return 0;
2352
0
  return 1;
2353
0
}
2354
2355
// state at the beginning of the current word, used for back-tracking and also for the
2356
// nocont and compbrl rules
2357
typedef struct {
2358
  int inPos;      // begin position of the current word in the input
2359
  int outPos;     // begin position of the current word in the output
2360
  int emphasisInPos;  // position of the next character in the input for which to insert
2361
            // emphasis marks
2362
} LastWord;
2363
2364
static int
2365
doCompbrl(const TranslationTableHeader *table, int *pos, const InString *input,
2366
    OutString *output, int *posMapping, EmphasisInfo *emphasisBuffer,
2367
    const TranslationTableRule **transRule, int *cursorPosition, int *cursorStatus,
2368
0
    const LastWord *lastWord, int *insertEmphasesFrom, int mode) {
2369
  /* Handle strings containing substrings defined by the compbrl opcode */
2370
0
  int stringStart, stringEnd;
2371
0
  if (checkCharAttr(input->chars[*pos], CTC_Space, table)) return 1;
2372
0
  stringStart = lastWord->outPos ? lastWord->inPos : 0;
2373
0
  stringEnd = *pos;
2374
0
  while (stringEnd < input->length &&
2375
0
      !checkCharAttr(input->chars[stringEnd], CTC_Space, table))
2376
0
    stringEnd++;
2377
0
  *pos = stringStart;
2378
0
  output->length = lastWord->outPos;
2379
0
  *insertEmphasesFrom = lastWord->emphasisInPos;
2380
0
  return doCompTrans(stringStart, stringEnd, table, pos, input, output, posMapping,
2381
0
      emphasisBuffer, transRule, cursorPosition, cursorStatus, mode);
2382
0
}
2383
2384
static int
2385
doCompTrans(int start, int end, const TranslationTableHeader *table, int *pos,
2386
    const InString *input, OutString *output, int *posMapping,
2387
    EmphasisInfo *emphasisBuffer, const TranslationTableRule **transRule,
2388
0
    int *cursorPosition, int *cursorStatus, int mode) {
2389
0
  const TranslationTableRule *indicRule;
2390
0
  int k;
2391
0
  int haveEndsegment = 0;
2392
0
  if (*cursorStatus != 2 && brailleIndicatorDefined(table->begComp, table, &indicRule))
2393
0
    if (!for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, *pos,
2394
0
          input, output, posMapping, cursorPosition, cursorStatus))
2395
0
      return 0;
2396
0
  for (k = start; k < end; k++) {
2397
0
    TranslationTableOffset compdots = 0;
2398
    /* HACK: computer braille is one-to-one so it
2399
     * can't have any emphasis indicators.
2400
     * A better solution is to treat computer braille as its own mode. */
2401
0
    emphasisBuffer[k] = (EmphasisInfo){ 0 };
2402
0
    if (input->chars[k] == LOU_ENDSEGMENT) {
2403
0
      haveEndsegment = 1;
2404
0
      continue;
2405
0
    }
2406
0
    *pos = k;
2407
0
    compdots = getChar(input->chars[k], table)->compRule;
2408
0
    if (compdots != 0) {
2409
0
      *transRule = (TranslationTableRule *)&table->ruleArea[compdots];
2410
0
      if (!for_updatePositions(&(*transRule)->charsdots[(*transRule)->charslen],
2411
0
            (*transRule)->charslen, (*transRule)->dotslen, 0, *pos, input,
2412
0
            output, posMapping, cursorPosition, cursorStatus))
2413
0
        return 0;
2414
0
    } else if (!putCharacter(input->chars[k], table, *pos, input, output, posMapping,
2415
0
               cursorPosition, cursorStatus, mode))
2416
0
      return 0;
2417
0
  }
2418
0
  if (*cursorStatus != 2 && brailleIndicatorDefined(table->endComp, table, &indicRule))
2419
0
    if (!for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, *pos,
2420
0
          input, output, posMapping, cursorPosition, cursorStatus))
2421
0
      return 0;
2422
0
  *pos = end;
2423
0
  if (haveEndsegment) {
2424
0
    widechar endSegment = LOU_ENDSEGMENT;
2425
0
    if (!for_updatePositions(&endSegment, 0, 1, 0, *pos, input, output, posMapping,
2426
0
          cursorPosition, cursorStatus))
2427
0
      return 0;
2428
0
  }
2429
0
  return 1;
2430
0
}
2431
2432
static int
2433
doNocont(const TranslationTableHeader *table, int *pos, OutString *output, int mode,
2434
    const InString *input, const LastWord *lastWord, int *dontContract,
2435
0
    int *insertEmphasesFrom) {
2436
  /* Handle strings containing substrings defined by the nocont opcode */
2437
0
  if (checkCharAttr(input->chars[*pos], CTC_Space, table) || *dontContract ||
2438
0
      (mode & noContractions))
2439
0
    return 1;
2440
0
  if (lastWord->outPos) {
2441
0
    *pos = lastWord->inPos;
2442
0
    output->length = lastWord->outPos;
2443
0
  } else {
2444
0
    *pos = 0;
2445
0
    output->length = 0;
2446
0
  }
2447
0
  *insertEmphasesFrom = lastWord->emphasisInPos;
2448
0
  *dontContract = 1;
2449
0
  return 1;
2450
0
}
2451
2452
static int
2453
markSyllables(
2454
0
    const TranslationTableHeader *table, const InString *input, formtype *typebuf) {
2455
0
  int pos;
2456
0
  int k;
2457
0
  int currentMark = 0;
2458
0
  int const syllable_marks[] = { SYLLABLE_MARKER_1, SYLLABLE_MARKER_2 };
2459
0
  int syllable_mark_selector = 0;
2460
0
  const TranslationTableRule *transRule;
2461
0
  int transOpcode;
2462
0
  int transCharslen;
2463
2464
0
  if (typebuf == NULL || !table->syllables) return 1;
2465
0
  pos = 0;
2466
0
  while (pos < input->length) { /* the main multipass translation loop */
2467
0
    int length = input->length - pos;
2468
0
    int tryThis = 0;
2469
0
    while (tryThis < 3) {
2470
0
      TranslationTableOffset ruleOffset = 0;
2471
0
      switch (tryThis) {
2472
0
      case 0:
2473
0
        if (!(length >= 2)) break;
2474
        // memory overflow when pos == input->length - 1
2475
0
        ruleOffset =
2476
0
            table->forRules[_lou_stringHash(&input->chars[pos], 1, table)];
2477
0
        break;
2478
0
      case 1:
2479
0
        if (!(length >= 1)) break;
2480
0
        length = 1;
2481
0
        ruleOffset = getChar(input->chars[pos], table)->otherRules;
2482
0
        break;
2483
0
      case 2: /* No rule found */
2484
0
        transOpcode = CTO_Always;
2485
0
        ruleOffset = 0;
2486
0
        break;
2487
0
      }
2488
0
      while (ruleOffset) {
2489
0
        transRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
2490
0
        transOpcode = transRule->opcode;
2491
0
        transCharslen = transRule->charslen;
2492
0
        if (tryThis == 1 ||
2493
0
            (transCharslen <= length &&
2494
0
                compareChars(&transRule->charsdots[0], &input->chars[pos],
2495
0
                    transCharslen, table))) {
2496
0
          if (transOpcode == CTO_Syllable) {
2497
0
            tryThis = 4;
2498
0
            break;
2499
0
          }
2500
0
        }
2501
0
        ruleOffset = transRule->charsnext;
2502
0
      }
2503
0
      tryThis++;
2504
0
    }
2505
0
    switch (transOpcode) {
2506
0
    case CTO_Always:
2507
0
      if (pos >= input->length) return 0;
2508
0
      typebuf[pos++] |= currentMark;
2509
0
      break;
2510
0
    case CTO_Syllable:
2511
      /* cycle between SYLLABLE_MARKER_1 and SYLLABLE_MARKER_2 so
2512
       * we can distinguinsh two consequtive syllables */
2513
0
      currentMark = syllable_marks[syllable_mark_selector];
2514
0
      syllable_mark_selector = (syllable_mark_selector + 1) % 2;
2515
2516
0
      if ((pos + transCharslen) > input->length) return 0;
2517
0
      for (k = 0; k < transCharslen; k++) typebuf[pos++] |= currentMark;
2518
0
      break;
2519
0
    default:
2520
0
      break;
2521
0
    }
2522
0
  }
2523
0
  return 1;
2524
0
}
2525
2526
static int
2527
resetsEmphMode(
2528
0
    widechar c, const TranslationTableHeader *table, const EmphasisClass *emphClass) {
2529
  /* Whether a character cancels word emphasis mode or not. */
2530
0
  if (emphClass->mode) {
2531
0
    const TranslationTableCharacter *chardef = getChar(c, table);
2532
    /* the base character of a character belonging to a mode can never cancel the mode
2533
     */
2534
0
    if (chardef->attributes & emphClass->mode)
2535
0
      return 0;
2536
0
    else {
2537
0
      const TranslationTableCharacter *ch = chardef;
2538
0
      if (ch->basechar)
2539
0
        ch = (TranslationTableCharacter *)&table->ruleArea[ch->basechar];
2540
0
      while (ch->linked) {
2541
0
        ch = (TranslationTableCharacter *)&table->ruleArea[ch->linked];
2542
0
        if ((ch->mode & chardef->mode) == chardef->mode &&
2543
0
            ch->attributes & emphClass->mode) {
2544
0
          return 0;
2545
0
        }
2546
0
      }
2547
0
    }
2548
0
    if (emphClass->mode == CTC_UpperCase) {
2549
      /* characters that are not letter and not capsmodechars cancel capsword mode
2550
       */
2551
0
      return !checkCharAttr(c, CTC_Letter | CTC_CapsMode, table);
2552
0
    } else if (emphClass->mode == CTC_Digit) {
2553
      /* characters that are not digit or litdigit or numericmodechars cancel
2554
       * numeric mode */
2555
0
      return !checkCharAttr(c,
2556
0
          CTC_Digit | CTC_LitDigit | CTC_NumericMode | CTC_MidEndNumericMode,
2557
0
          table);
2558
0
    } else {
2559
      /* characters that are not letter cancel other word modes */
2560
0
      return !checkCharAttr(c, CTC_Letter, table);
2561
0
    }
2562
0
  } else {
2563
0
    if (checkCharAttr(c, CTC_Letter, table)) /* a letter never cancels emphasis */
2564
0
      return 0;
2565
0
    const widechar *emphmodechars = table->emphModeChars[emphClass->rule];
2566
    /* by default (if emphmodechars is not declared) only space cancels emphasis */
2567
0
    if (!emphmodechars[0]) return checkCharAttr(c, CTC_Space, table);
2568
0
    for (int k = 0; emphmodechars[k]; k++)
2569
0
      if (c == emphmodechars[k]) return 0;
2570
0
    return 1;
2571
0
  }
2572
0
}
2573
2574
static int
2575
isEmphasizable(
2576
0
    widechar c, const TranslationTableHeader *table, const EmphasisClass *emphClass) {
2577
  /* Whether emphasis is indicated on a character or not. */
2578
0
  if (emphClass->mode) {
2579
    /* a character is emphasizable if it belongs to the mode or if it has the same
2580
     * base as a character that belongs to the mode */
2581
0
    const TranslationTableCharacter *chardef = getChar(c, table);
2582
0
    if (chardef->basechar)
2583
0
      chardef = (TranslationTableCharacter *)&table->ruleArea[chardef->basechar];
2584
0
    if (chardef->attributes & emphClass->mode) return 1;
2585
0
    while (chardef->linked) {
2586
0
      chardef = (TranslationTableCharacter *)&table->ruleArea[chardef->linked];
2587
0
      if (chardef->attributes & emphClass->mode) return 1;
2588
0
    }
2589
0
    return 0;
2590
0
  } else {
2591
0
    const widechar *noemphchars = table->noEmphChars[emphClass->rule];
2592
    /* if noemphchars is not declared emphasis is indicated on all characters except
2593
     * spaces */
2594
0
    if (!noemphchars[0]) return !checkCharAttr(c, CTC_Space, table);
2595
0
    for (int k = 0; noemphchars[k]; k++)
2596
0
      if (c == noemphchars[k]) return 0;
2597
0
    return 1;
2598
0
  }
2599
0
}
2600
2601
static int
2602
isEmphasized(widechar c, const TranslationTableHeader *table,
2603
0
    const EmphasisClass *emphClass, formtype typeform) {
2604
  /* Whether a character is emphasized or not. */
2605
0
  if (!isEmphasizable(c, table, emphClass)) return 0;
2606
0
  if (emphClass->mode)
2607
0
    return checkCharAttr(c, emphClass->mode, table);
2608
0
  else
2609
0
    return typeform & emphClass->typeform;
2610
0
}
2611
2612
static int
2613
isEmphSpace(
2614
0
    widechar c, const TranslationTableHeader *table, const EmphasisClass *emphClass) {
2615
  /* For determining word boundaries. */
2616
  /* Note that this is not the only function that is used for this purpose. In
2617
   * resolveEmphasisWords the beginning and end of words are further refined based on
2618
   * the isEmphasizable function. */
2619
0
  const int word_enabled = table->emphRules[emphClass->rule][begWordOffset];
2620
0
  if (emphClass->mode == CTC_UpperCase) {
2621
    /* The old behavior was that words are determined by spaces. However for some
2622
     * tables it is a requirement that words are determined based on letters and
2623
     * capsmodechars. While the latter probably makes most sense, we don't want to
2624
     * break the old behavior because there is no easy way to achieve it using
2625
     * table rules. A good middle ground is to let the behavior depend on the
2626
     * presence of a capsmodechars rule. */
2627
0
    if (!(word_enabled && table->hasCapsModeChars))
2628
0
      return checkCharAttr(c, CTC_Space, table);
2629
0
  }
2630
0
  return !isEmphasizable(c, table, emphClass) &&
2631
0
      (!word_enabled || resetsEmphMode(c, table, emphClass));
2632
0
}
2633
2634
static void
2635
resolveEmphasisBeginEnd(EmphasisInfo *buffer, const EmphasisClass *class,
2636
    const TranslationTableHeader *table, const InString *input,
2637
0
    const formtype *typebuf, const unsigned int *wordBuffer) {
2638
  /* mark emphasized (capitalized) sections, i.e. sections that */
2639
  /* - start with an emphasized (uppercase) character, */
2640
  /* - extend as long as no unemphasized (lowercase) character is encountered, and */
2641
  /* - do not end with a word that contains no emphasized (uppercase) characters */
2642
  /* in addition, if phrase rules are present, sections are split up as needed so that
2643
   * they do not end in the middle of a word */
2644
2645
0
  int last_space = -1;  // position of the last encountered space
2646
0
  int emph_start = -1;  // position of the first emphasized (uppercase) character after
2647
              // which no unemphasized (lowercase) character was encountered
2648
0
  int last_word = -1;   // position of the first space following the last encountered
2649
              // character if that character was emphasized (uppercase)
2650
0
  int emph = 0;     // whether or not the last encountered character was emphasized
2651
              // (uppercase) and happened in the current word
2652
0
  int phrase_enabled = table->emphRules[class->rule][begPhraseOffset];
2653
2654
0
  for (int i = 0; i < input->length; i++) {
2655
0
    int isSpace = !(wordBuffer[i] & WORD_CHAR);
2656
0
    if (isSpace) {
2657
      /* character is a space */
2658
0
      last_space = i;
2659
0
      if (emph) {
2660
0
        last_word = i;
2661
0
        emph = 0;
2662
0
      }
2663
0
    }
2664
    /* if character is an emphasized (uppercase) character, emphasis mode begins or
2665
     * continues */
2666
0
    if (!isSpace && isEmphasized(input->chars[i], table, class, typebuf[i])) {
2667
0
      if (emph_start < 0) emph_start = i;
2668
0
      emph = 1;
2669
0
    } else {
2670
      /* else if emphasis mode has begun, it should continue if there are no
2671
       * unemphasized (lowercase) characters before the next emphasized (uppercase)
2672
       * character */
2673
      /* characters that cancel emphasis mode are handled later in
2674
       * resolveEmphasisResets (note that letters that are neither uppercase nor
2675
       * lowercase do not cancel caps mode) */
2676
0
      if (!isSpace && isEmphasizable(input->chars[i], table, class)) {
2677
0
        if (emph_start >= 0) {
2678
0
          buffer[emph_start].begin |= class->value;
2679
0
          if (emph) {
2680
            /* a passage can not end on a word without emphasized (uppercase)
2681
             * characters, so if emphasis did not start inside the current
2682
             * word, end it after the last word that contained an emphasized
2683
             * (uppercase) character, and start over from the beginning of the
2684
             * current word */
2685
0
            if (phrase_enabled && emph_start < last_space) {
2686
0
              buffer[last_word].end |= class->value;
2687
0
              emph_start = -1;
2688
0
              last_word = -1;
2689
0
              emph = 0;
2690
0
              i = last_space;
2691
0
              continue;
2692
0
            } else
2693
              /* don't split into two sections if no phrase rules are
2694
               * present or emphasis started inside the current word */
2695
0
              buffer[i].end |= class->value;
2696
0
          } else
2697
            /* current word had no emphasis yet */
2698
0
            buffer[last_word].end |= class->value;
2699
0
          emph_start = -1;
2700
0
          last_word = -1;
2701
0
          emph = 0;
2702
0
        }
2703
0
      }
2704
0
    }
2705
0
  }
2706
2707
  /* clean up input->length */
2708
0
  if (emph_start >= 0) {
2709
0
    buffer[emph_start].begin |= class->value;
2710
0
    if (emph)
2711
0
      buffer[input->length].end |= class->value;
2712
0
    else
2713
0
      buffer[last_word].end |= class->value;
2714
0
  }
2715
0
}
2716
2717
static void
2718
resolveEmphasisWords(EmphasisInfo *buffer, const EmphasisClass *class,
2719
    const TranslationTableHeader *table, const InString *input,
2720
0
    unsigned int *wordBuffer) {
2721
0
  int in_word = 0, in_emp = 0;
2722
0
  int word_start = -1;  // start position of the current emphasized word section
2723
0
  int char_cnt = 0;  // number of emphasizable characters within the current emphasized
2724
             // word section
2725
0
  int last_char = -1;  // position of the last emphasizable character
2726
0
  const TranslationTableOffset *emphRule = table->emphRules[class->rule];
2727
0
  int letter_defined = emphRule[letterOffset];
2728
0
  int endphraseafter_defined = emphRule[begPhraseOffset] &&
2729
0
      (emphRule[endPhraseAfterOffset] || emphRule[endOffset]);
2730
2731
0
  for (int i = 0; i < input->length; i++) {
2732
2733
    /* check if at beginning of emphasis */
2734
0
    if (!in_emp)
2735
0
      if (buffer[i].begin & class->value) {
2736
0
        in_emp = 1;
2737
0
        buffer[i].begin &= ~class->value;
2738
2739
        /* emphasis started inside word (and is therefore not a whole word) */
2740
0
        if (in_word) word_start = i;
2741
2742
        /* emphasis started on space */
2743
0
        if (!(wordBuffer[i] & WORD_CHAR)) word_start = -1;
2744
0
      }
2745
2746
    /* check if at end of emphasis */
2747
0
    if (in_emp)
2748
0
      if (buffer[i].end & class->value) {
2749
0
        in_emp = 0;
2750
0
        buffer[i].end &= ~class->value;
2751
0
        if (in_word && word_start >= 0) {
2752
          /* if word is one symbol, turn it into a symbol (unless emphletter is
2753
           * not defined) */
2754
0
          if (letter_defined && char_cnt == 1)
2755
0
            buffer[word_start].symbol |= class->value;
2756
0
          else {
2757
            /* else mark the word start point and, if emphasis ended inside a
2758
             * word, also mark the end point */
2759
0
            buffer[word_start].word |= class->value;
2760
0
            if (wordBuffer[i] & WORD_CHAR) {
2761
0
              buffer[i].end |= class->value;
2762
0
              buffer[i].word |= class->value;
2763
0
            }
2764
0
          }
2765
0
        }
2766
0
      }
2767
2768
    /* check if at beginning of word (first character that is not a space) */
2769
0
    if (!in_word)
2770
0
      if (wordBuffer[i] & WORD_CHAR) {
2771
        /* check if word started on a character that is not emphasizable */
2772
0
        if (isEmphasizable(input->chars[i], table, class)) {
2773
0
          in_word = 1;
2774
0
          if (in_emp) word_start = i;
2775
          /* remove WORD_CHAR marks at the end of the previous word */
2776
0
          for (int j = last_char + 1; j < i; j++) wordBuffer[j] &= ~WORD_CHAR;
2777
          /* also delete possible word end point */
2778
0
          if (last_char >= 0 && !(buffer[last_char].symbol & class->value)) {
2779
0
            if ((buffer[last_char].word & class->value) &&
2780
0
                !(buffer[last_char].end & class->value))
2781
0
              buffer[last_char].symbol |= class->value;
2782
0
            for (int j = last_char; j < i - 1; j++)
2783
0
              if (buffer[j + 1].end & class->value) {
2784
0
                buffer[j + 1].end &= ~class->value;
2785
0
                buffer[j + 1].word &= ~class->value;
2786
0
                break;
2787
0
              }
2788
0
          }
2789
0
        }
2790
0
      }
2791
2792
    /* check if at end of word (last character that is not a space) */
2793
0
    if (in_word)
2794
0
      if (!(wordBuffer[i] & WORD_CHAR)) {
2795
        /* made it through whole word */
2796
0
        if (in_emp && word_start >= 0) {
2797
          /* if word is one symbol, turn it into a symbol (unless emphletter is
2798
           * not defined) */
2799
0
          if (letter_defined && char_cnt == 1)
2800
0
            buffer[word_start].symbol |= class->value;
2801
0
          else
2802
            /* else mark it as a word */
2803
0
            buffer[word_start].word |= class->value;
2804
0
        }
2805
0
        in_word = 0;
2806
0
        word_start = -1;
2807
0
      }
2808
2809
    /* count characters within the current emphasized word (section) that are
2810
     * emphasizable */
2811
0
    if (i == word_start) {
2812
0
      last_char = i;
2813
0
      char_cnt = 1;
2814
0
    } else if (in_word &&
2815
0
        (endphraseafter_defined /* hack to achieve old behavior of endemphphrase
2816
                     * after: if the last word of the passage ends
2817
                     * with unemphasizable characters, the indicator
2818
                     * is inserted after them  */
2819
0
            || isEmphasizable(input->chars[i], table, class))) {
2820
0
      last_char = i;
2821
0
      if (in_emp) char_cnt++;
2822
0
    }
2823
0
  }
2824
2825
  /* clean up end */
2826
0
  if (in_emp) {
2827
0
    buffer[input->length].end &= ~class->value;
2828
2829
0
    if (in_word)
2830
0
      if (word_start >= 0) {
2831
        /* if word is one symbol, turn it into a symbol (unless emphletter is not
2832
         * defined) */
2833
0
        if (letter_defined && char_cnt == 1)
2834
0
          buffer[word_start].symbol |= class->value;
2835
0
        else
2836
          /* else mark it as a word */
2837
0
          buffer[word_start].word |= class->value;
2838
0
      }
2839
0
  }
2840
2841
  /* remove WORD_CHAR marks at the end of the previous word */
2842
0
  for (int j = last_char + 1; j < input->length; j++) wordBuffer[j] &= ~WORD_CHAR;
2843
  /* also delete possible word end point */
2844
0
  if (last_char >= 0 && !(buffer[last_char].symbol & class->value)) {
2845
0
    if ((buffer[last_char].word & class->value) &&
2846
0
        !(buffer[last_char].end & class->value))
2847
0
      buffer[last_char].symbol |= class->value;
2848
0
    for (int j = last_char; j < input->length - 1; j++)
2849
0
      if (buffer[j + 1].end & class->value) {
2850
0
        buffer[j + 1].end &= ~class->value;
2851
0
        buffer[j + 1].word &= ~class->value;
2852
0
        break;
2853
0
      }
2854
0
  }
2855
2856
  /* mark whole words */
2857
0
  word_start = -1;
2858
0
  for (int i = 0; i < input->length; i++) {
2859
0
    if (buffer[i].symbol & class->value) {
2860
0
      if ((i == 0 || !(wordBuffer[i - 1] & WORD_CHAR)) &&
2861
0
          (i + 1 == input->length || !(wordBuffer[i + 1] & WORD_CHAR)))
2862
0
        wordBuffer[i] |= WORD_WHOLE;
2863
0
    } else if (buffer[i].word & class->value) {
2864
0
      if (buffer[i].end & class->value) {
2865
0
        if (word_start >= 0 && wordBuffer[i] & WORD_CHAR)
2866
0
          wordBuffer[word_start] &= ~WORD_WHOLE;
2867
0
        word_start = -1;
2868
0
      } else {
2869
0
        if (i == 0 || !(wordBuffer[i - 1] & WORD_CHAR))
2870
0
          wordBuffer[i] |= WORD_WHOLE;
2871
0
        word_start = i;
2872
0
      }
2873
0
    }
2874
0
  }
2875
0
}
2876
2877
static void
2878
convertToPassage(const int pass_start, const int pass_end, const int word_start,
2879
    EmphasisInfo *buffer, const EmphasisClass *class,
2880
0
    const TranslationTableHeader *table, unsigned int *wordBuffer) {
2881
0
  int i;
2882
0
  const TranslationTableOffset *emphRule = table->emphRules[class->rule];
2883
0
  const TranslationTableRule *indicRule;
2884
2885
0
  for (i = pass_start; i <= pass_end; i++) {
2886
0
    buffer[i].symbol &= ~class->value;
2887
0
    buffer[i].word &= ~class->value;
2888
0
    wordBuffer[i] &= ~WORD_WHOLE;
2889
0
  }
2890
2891
0
  buffer[pass_start].begin |= class->value;
2892
0
  if (brailleIndicatorDefined(emphRule[endOffset], table, &indicRule) ||
2893
0
      brailleIndicatorDefined(emphRule[endPhraseAfterOffset], table, &indicRule))
2894
0
    buffer[pass_end].end |= class->value;
2895
0
  else if (brailleIndicatorDefined(
2896
0
           emphRule[endPhraseBeforeOffset], table, &indicRule)) {
2897
    /* if the phrase end indicator is the same as the word indicator, mark it as a
2898
     * word so that the resolveEmphasisResets code applies */
2899
0
    const TranslationTableRule *begwordRule;
2900
0
    if (brailleIndicatorDefined(emphRule[begWordOffset], table, &begwordRule) &&
2901
0
        indicRule->dotslen == begwordRule->dotslen &&
2902
0
        !memcmp(&indicRule->charsdots[0], &begwordRule->charsdots[0],
2903
0
            begwordRule->dotslen * CHARSIZE)) {
2904
0
      buffer[word_start].word |= class->value;
2905
      /* a passage has only whole emphasized words */
2906
0
      wordBuffer[word_start] |= WORD_WHOLE;
2907
0
    } else {
2908
0
      buffer[word_start].end |= class->value;
2909
0
    }
2910
0
  }
2911
0
}
2912
2913
static void
2914
resolveEmphasisPassages(EmphasisInfo *buffer, const EmphasisClass *class,
2915
    const TranslationTableHeader *table, const InString *input,
2916
0
    unsigned int *wordBuffer) {
2917
0
  const TranslationTableOffset *emphRule = table->emphRules[class->rule];
2918
0
  int in_word = 0, last_word_start = -1, last_word_end = -1;
2919
0
  int in_emph_word = 0, last_emph_symbol = -1;
2920
0
  int in_pass = 0, last_pass_word_start = -1, last_pass_word_end = -1, pass_start = -1;
2921
0
  unsigned int pass_word_cnt = 0;
2922
0
  int endphraseafter_defined = emphRule[endPhraseAfterOffset] || emphRule[endOffset];
2923
2924
0
  for (int i = 0; i < input->length; i++) {
2925
2926
    /* check if at beginning of word (words are determined by isEmphSpace() and
2927
     * further refined at the beginning and end of words based on isEmphasizable()) */
2928
0
    if (!in_word && wordBuffer[i] & WORD_CHAR) {
2929
0
      in_word = 1;
2930
0
      last_word_start = i;
2931
0
    } else { /* check if at end of word */
2932
0
      if (in_word && !(wordBuffer[i] & WORD_CHAR)) {
2933
0
        in_word = 0;
2934
0
        last_word_end = i;
2935
0
      }
2936
0
    }
2937
2938
    /* check for symbol or word indicator */
2939
0
    if (!in_emph_word &&
2940
0
        (buffer[i].symbol & class->value ||
2941
0
            (buffer[i].word & class->value &&
2942
0
                !(buffer[i].end & class->value)))) {
2943
0
      if (buffer[i].symbol & class->value) {
2944
0
        last_emph_symbol = i;
2945
0
      } else {
2946
0
        in_emph_word = 1;
2947
0
      }
2948
0
      if (in_pass) {
2949
        /* only whole capitalized words (words without lowercase letters) can be
2950
         * part of a passage (note that this also includes words without letters
2951
         * if the next word with letters is a whole word) */
2952
0
        if (!class->mode || (wordBuffer[i] & WORD_WHOLE)) {
2953
0
          last_pass_word_start = i;
2954
0
          pass_word_cnt++;
2955
0
        } else
2956
0
          goto end_passage;
2957
0
      }
2958
0
    } else { /* check for word end indicator or word end */
2959
0
      if ((in_emph_word &&
2960
0
            (buffer[i].word & class->value &&
2961
0
                buffer[i].end & class->value)) ||
2962
0
          last_word_end == i) {
2963
0
        in_emph_word = 0;
2964
0
        if (in_pass) {
2965
          /* only whole capitalized words can be part of a passage */
2966
0
          last_pass_word_end = i;
2967
0
        }
2968
0
      }
2969
0
    }
2970
2971
    /* check if possibly at beginning of passage */
2972
0
    if (!in_pass && (in_emph_word || last_emph_symbol == i)) {
2973
      /* only whole capitalized words can be part of a passage */
2974
0
      if (!class->mode || (wordBuffer[i] & WORD_WHOLE)) {
2975
0
        in_pass = 1;
2976
0
        pass_start = i;
2977
0
        last_pass_word_start = i;
2978
0
        last_pass_word_end = -1;
2979
0
        pass_word_cnt = 1;
2980
0
      }
2981
0
    } else { /* check if at end of passage */
2982
0
      if (in_pass) {
2983
0
        if (in_word && !(in_emph_word || last_emph_symbol == i)) {
2984
0
        end_passage:
2985
0
          in_pass = 0;
2986
0
          if (last_pass_word_end < last_pass_word_start) {
2987
0
            last_pass_word_end = i;
2988
0
          }
2989
          /* it is a passage only if the number of words is greater than or
2990
           * equal to the minimum length (lencapsphrase / lenemphphrase) */
2991
          /* if the phrase closing indicator is placed before the last word and
2992
           * it was not a whole word, the minimum phrase length is increased */
2993
0
          if (!endphraseafter_defined && last_pass_word_end != last_word_end) {
2994
0
            pass_word_cnt--;
2995
0
          }
2996
0
          if (pass_word_cnt >= emphRule[lenPhraseOffset])
2997
0
            convertToPassage(pass_start, last_pass_word_end,
2998
0
                last_pass_word_start, buffer, class, table, wordBuffer);
2999
0
        } else if (i == input->length - 1) {
3000
0
          if (pass_word_cnt >= emphRule[lenPhraseOffset]) {
3001
0
            if (last_pass_word_end < last_pass_word_start) {
3002
0
              last_pass_word_end = input->length;
3003
0
            }
3004
0
            convertToPassage(pass_start, last_pass_word_end,
3005
0
                last_pass_word_start, buffer, class, table, wordBuffer);
3006
0
          }
3007
0
        }
3008
0
      }
3009
0
    }
3010
0
  }
3011
0
}
3012
3013
static void
3014
resolveEmphasisSingleSymbols(
3015
0
    EmphasisInfo *buffer, const EmphasisClass *class, const InString *input) {
3016
0
  int i;
3017
3018
0
  for (i = 0; i < input->length; i++) {
3019
0
    if (buffer[i].begin & class->value)
3020
0
      if (buffer[i + 1].end & class->value) {
3021
0
        buffer[i].begin &= ~class->value;
3022
0
        buffer[i + 1].end &= ~class->value;
3023
0
        buffer[i].symbol |= class->value;
3024
0
      }
3025
0
  }
3026
0
}
3027
3028
static void
3029
resolveEmphasisAllSymbols(EmphasisInfo *buffer, const EmphasisClass *class,
3030
    const TranslationTableHeader *table, formtype *typebuf, const InString *input,
3031
0
    unsigned int *wordBuffer) {
3032
3033
  /* Mark every emphasized character individually with symbol if begemphword is not
3034
   * defined (assumes resolveEmphasisWords has not been run) */
3035
  /* Mark every emphasized character individually with symbol if endemphword is not
3036
   * defined
3037
   * and emphasis ends within a word (assumes resolveEmphasisWords has been run) */
3038
  /* Note that it is possible that emphletter is also not defined, in which case the
3039
   * emphasis will not be marked at all. */
3040
3041
0
  const TranslationTableOffset *emphRule = table->emphRules[class->rule];
3042
0
  const int begword_enabled = emphRule[begWordOffset];
3043
0
  const int endword_enabled = emphRule[endWordOffset];
3044
3045
0
  if (!begword_enabled) {
3046
0
    int in_emph = 0;
3047
0
    for (int i = 0; i < input->length; i++) {
3048
0
      if (in_emph) {
3049
0
        if (buffer[i].end & class->value) {
3050
0
          in_emph = 0;
3051
0
          buffer[i].end &= ~class->value;
3052
0
        }
3053
0
      } else {
3054
0
        if (buffer[i].begin & class->value) {
3055
0
          in_emph = 1;
3056
0
          buffer[i].begin &= ~class->value;
3057
0
        }
3058
0
      }
3059
0
      if (in_emph) {
3060
0
        buffer[i].symbol |= class->value;
3061
0
      }
3062
0
    }
3063
0
  } else if (!endword_enabled) {
3064
0
    int in_pass = 0, in_word = 0, word_start = -1;
3065
0
    for (int i = 0; i < input->length; i++) {
3066
0
      if (in_pass)
3067
0
        if (buffer[i].end & class->value || buffer[i].word & class->value)
3068
0
          in_pass = 0;
3069
0
      if (!in_pass) {
3070
0
        if (buffer[i].begin & class->value)
3071
0
          in_pass = 1;
3072
0
        else {
3073
0
          if (!in_word)
3074
0
            if (buffer[i].word & class->value) {
3075
0
              in_word = 1;
3076
0
              word_start = i;
3077
0
            }
3078
0
          if (in_word) {
3079
0
            if (buffer[i].word & class->value &&
3080
0
                buffer[i].end & class->value) {
3081
0
              in_word = 0;
3082
0
              if (begword_enabled && !endword_enabled) {
3083
0
                buffer[i].end &= ~class->value;
3084
0
                buffer[i].word &= ~class->value;
3085
0
                buffer[word_start].word &= ~class->value;
3086
0
                for (int j = word_start; j < i; j++)
3087
0
                  buffer[j].symbol |= class->value;
3088
0
              }
3089
0
            } else if (!(wordBuffer[i] & WORD_CHAR)) {
3090
0
              in_word = 0;
3091
0
            }
3092
0
          }
3093
0
        }
3094
0
      }
3095
0
    }
3096
0
  }
3097
0
}
3098
3099
static void
3100
resolveEmphasisResets(EmphasisInfo *buffer, const EmphasisClass *class,
3101
    const TranslationTableHeader *table, const InString *input,
3102
0
    unsigned int *wordBuffer) {
3103
0
  int in_word = 0, in_pass = 0, word_start = -1, word_reset = 0, letter_cnt = 0,
3104
0
    pass_end = -1;
3105
0
  int i;
3106
0
  int letter_defined = table->emphRules[class->rule][letterOffset];
3107
3108
0
  for (i = 0; i < input->length; i++) {
3109
0
    if (in_pass) {
3110
0
      if (buffer[i].end & class->value)
3111
0
        in_pass = 0;
3112
0
      else if (buffer[i].word & class->value) {
3113
        /* the passage is ended with a "endphrase before" indicator and this
3114
         * indicator is the same as the "begword" indicator (see convertToPassage)
3115
         */
3116
0
        in_pass = 0;
3117
        /* remember this position so that if there is a reset later in this word,
3118
         * we can remove this indicator */
3119
0
        pass_end = i;
3120
0
      }
3121
0
    }
3122
0
    if (!in_pass) {
3123
0
      if (buffer[i].begin & class->value) {
3124
0
        in_pass = 1;
3125
0
      } else {
3126
0
        if (!in_word) {
3127
0
          if (buffer[i].word & class->value) {
3128
            /* deal with case when reset was at beginning of word */
3129
0
            if (wordBuffer[i] & WORD_RESET ||
3130
0
                resetsEmphMode(input->chars[i], table, class)) {
3131
0
              if (!letter_defined)
3132
                /* if emphletter is not defined, use the word indicator */
3133
0
                ;
3134
0
              else if (pass_end == i)
3135
                /* also use the word indicator if the reset marks the end
3136
                 * of a passage */
3137
0
                ;
3138
0
              else {
3139
                /* use the symbol indicator symbol for the current
3140
                 * character */
3141
0
                buffer[i].symbol |= class->value;
3142
                /* move the word indicator to the next character or remove
3143
                 * it altogether if the next character is a space */
3144
0
                if (wordBuffer[i + 1] & WORD_CHAR) {
3145
0
                  buffer[i + 1].word |= class->value;
3146
0
                  if (wordBuffer[i] & WORD_WHOLE)
3147
0
                    wordBuffer[i + 1] |= WORD_WHOLE;
3148
0
                  if (pass_end == i) pass_end++;
3149
0
                }
3150
0
                buffer[i].word &= ~class->value;
3151
0
                wordBuffer[i] &= ~WORD_WHOLE;
3152
0
                continue;
3153
0
              }
3154
0
            }
3155
3156
0
            in_word = 1;
3157
0
            word_start = i;
3158
0
            letter_cnt = 0;
3159
0
            word_reset = 0;
3160
0
          }
3161
3162
          /* it is possible for a character to have been marked as a symbol when
3163
           * it should not be one */
3164
0
          else if (buffer[i].symbol & class->value) {
3165
0
            if (wordBuffer[i] & WORD_RESET ||
3166
0
                resetsEmphMode(input->chars[i], table, class))
3167
0
              buffer[i].symbol &= ~class->value;
3168
0
          }
3169
0
        }
3170
3171
0
        if (in_word) {
3172
3173
          /* at end of word */
3174
0
          if (!(wordBuffer[i] & WORD_CHAR) ||
3175
0
              (buffer[i].word & class->value &&
3176
0
                  buffer[i].end & class->value)) {
3177
0
            in_word = 0;
3178
3179
            /* check if symbol */
3180
0
            if (letter_defined && letter_cnt == 1 && word_start != pass_end) {
3181
0
              buffer[word_start].symbol |= class->value;
3182
0
              buffer[word_start].word &= ~class->value;
3183
0
              wordBuffer[word_start] &= ~WORD_WHOLE;
3184
0
              buffer[i].end &= ~class->value;
3185
0
              buffer[i].word &= ~class->value;
3186
0
            }
3187
3188
            /* if word ended on a reset or last char was a reset, get rid of
3189
             * end bits */
3190
0
            if (word_reset || wordBuffer[i] & WORD_RESET ||
3191
0
                resetsEmphMode(input->chars[i], table, class)) {
3192
0
              buffer[i].end &= ~class->value;
3193
0
              buffer[i].word &= ~class->value;
3194
0
            }
3195
3196
            /* if word ended when it began, get rid of all bits */
3197
0
            if (i == word_start) {
3198
0
              wordBuffer[word_start] &= ~WORD_WHOLE;
3199
0
              buffer[i].end &= ~class->value;
3200
0
              buffer[i].word &= ~class->value;
3201
0
            }
3202
0
          } else {
3203
            /* hit reset */
3204
0
            if (wordBuffer[i] & WORD_RESET ||
3205
0
                resetsEmphMode(input->chars[i], table, class)) {
3206
3207
              /* check if symbol is not already resetting */
3208
0
              if (letter_defined && letter_cnt == 1 &&
3209
0
                  word_start != pass_end) {
3210
0
                buffer[word_start].symbol |= class->value;
3211
0
                buffer[word_start].word &= ~class->value;
3212
0
                wordBuffer[word_start] &= ~WORD_WHOLE;
3213
0
              }
3214
3215
              /* if reset is a letter or emphmodechar, make it the new
3216
               * word_start */
3217
0
              if (!resetsEmphMode(input->chars[i], table, class)) {
3218
0
                if (word_start == pass_end)
3219
                  /* move the word marker that ends the passage to the
3220
                   * current position */
3221
0
                  buffer[pass_end].word &= ~class->value;
3222
0
                pass_end = -1;
3223
0
                word_reset = 0;
3224
0
                word_start = i;
3225
0
                letter_cnt = 1;
3226
0
                buffer[i].word |= class->value;
3227
0
              } else
3228
0
                word_reset = 1;
3229
3230
0
              continue;
3231
0
            }
3232
3233
0
            if (word_reset) {
3234
0
              if (word_start == pass_end)
3235
                /* move the word marker that ends the passage to the
3236
                 * current position */
3237
0
                buffer[pass_end].word &= ~class->value;
3238
0
              pass_end = -1;
3239
0
              word_reset = 0;
3240
0
              word_start = i;
3241
0
              letter_cnt = 0;
3242
0
              buffer[i].word |= class->value;
3243
0
            }
3244
3245
0
            letter_cnt++;
3246
0
          }
3247
0
        }
3248
0
      }
3249
0
    }
3250
0
  }
3251
3252
  /* clean up end */
3253
0
  if (in_word) {
3254
    /* check if symbol */
3255
0
    if (letter_defined && letter_cnt == 1 && word_start != pass_end) {
3256
0
      buffer[word_start].symbol |= class->value;
3257
0
      buffer[word_start].word &= ~class->value;
3258
0
      wordBuffer[word_start] &= ~WORD_WHOLE;
3259
0
      buffer[i].end &= ~class->value;
3260
0
      buffer[i].word &= ~class->value;
3261
0
    }
3262
3263
0
    if (word_reset) {
3264
0
      buffer[i].end &= ~class->value;
3265
0
      buffer[i].word &= ~class->value;
3266
0
    }
3267
0
  }
3268
0
}
3269
3270
static void
3271
markEmphases(const TranslationTableHeader *table, const InString *input,
3272
0
    formtype *typebuf, unsigned int *wordBuffer, EmphasisInfo *emphasisBuffer) {
3273
3274
  /* handle capsnocont */
3275
0
  if (table->capsNoCont) {
3276
0
    int caps_cnt = 0;  // number of consecutive characters ending with the current
3277
               // that are uppercase letters
3278
0
    for (int i = 0; i < input->length; i++) {
3279
0
      if (checkCharAttr(input->chars[i], CTC_UpperCase, table)) {
3280
        /* mark two or more consecutive caps with nocont */
3281
0
        caps_cnt++;
3282
0
        if (caps_cnt >= 2) {
3283
0
          typebuf[i] |= no_contract;
3284
          /* also mark the previous one */
3285
0
          if (caps_cnt == 2) typebuf[i - 1] |= no_contract;
3286
0
        }
3287
0
      } else {
3288
0
        caps_cnt = 0;
3289
0
      }
3290
0
    }
3291
0
  }
3292
3293
0
  for (int j = 0; j < MAX_EMPH_CLASSES + MAX_MODES; j++) {
3294
0
    const EmphasisClass *emphClass = j < MAX_EMPH_CLASSES
3295
0
        ? &table->emphClasses[j]
3296
0
        : &table->modes[j - MAX_EMPH_CLASSES];
3297
0
    if (!emphClass->value) continue;
3298
0
    const TranslationTableOffset *emphRule = table->emphRules[emphClass->rule];
3299
3300
    /* clear out previous word markings and mark non-space characters in word buffer
3301
     */
3302
0
    for (int i = 0; i < input->length; i++) {
3303
0
      if (isEmphSpace(input->chars[i], table, emphClass))
3304
0
        wordBuffer[i] &= ~WORD_CHAR;
3305
0
      else
3306
0
        wordBuffer[i] |= WORD_CHAR;
3307
0
      wordBuffer[i] &= ~WORD_WHOLE;
3308
0
    }
3309
3310
    /* mark beginning and end points */
3311
0
    resolveEmphasisBeginEnd(
3312
0
        emphasisBuffer, emphClass, table, input, typebuf, wordBuffer);
3313
3314
0
    if (emphRule[begWordOffset]) {
3315
      /* mark word beginning and end points, whole words, and symbols (single
3316
       * characters) */
3317
0
      resolveEmphasisWords(emphasisBuffer, emphClass, table, input, wordBuffer);
3318
0
      if (emphRule[lenPhraseOffset])
3319
        /* remove markings of words that form a passage, and mark the begin and
3320
         * end of these passages instead */
3321
0
        resolveEmphasisPassages(
3322
0
            emphasisBuffer, emphClass, table, input, wordBuffer);
3323
      /* mark where emphasis in a word needs to be retriggered after it was reset */
3324
0
      resolveEmphasisResets(emphasisBuffer, emphClass, table, input, wordBuffer);
3325
0
      if (!emphRule[endWordOffset])
3326
        /* if endword is not defined and emphasis ends within a word, mark every
3327
         * emphasised character individually as symbol */
3328
0
        resolveEmphasisAllSymbols(
3329
0
            emphasisBuffer, emphClass, table, typebuf, input, wordBuffer);
3330
0
    } else if (emphRule[letterOffset]) {
3331
0
      if (emphRule[begOffset])
3332
0
        resolveEmphasisSingleSymbols(emphasisBuffer, emphClass, input);
3333
0
      else
3334
0
        resolveEmphasisAllSymbols(
3335
0
            emphasisBuffer, emphClass, table, typebuf, input, wordBuffer);
3336
0
    }
3337
0
    if (emphClass->mode) {
3338
      /* only mark if actually a capital letter (don't mark spaces or punctuation).
3339
       */
3340
0
      for (int i = 0; i < input->length; i++) {
3341
0
        if (emphasisBuffer[i].symbol & emphClass->value) {
3342
0
          if (emphClass->mode == CTC_UpperCase) {
3343
0
            if (!(typebuf[i] & CAPSEMPH))
3344
0
              emphasisBuffer[i].symbol &= ~emphClass->value;
3345
0
          } else {
3346
0
            if (!checkCharAttr(input->chars[i], emphClass->mode, table))
3347
0
              emphasisBuffer[i].symbol &= ~emphClass->value;
3348
0
          }
3349
0
        }
3350
0
      }
3351
0
    }
3352
0
  }
3353
0
}
3354
3355
static void
3356
insertEmphasisSymbol(const EmphasisInfo *buffer, const int at, const EmphasisClass *class,
3357
    const TranslationTableHeader *table, int pos, const InString *input,
3358
0
    OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus) {
3359
0
  if (buffer[at].symbol & class->value) {
3360
0
    const TranslationTableRule *indicRule;
3361
0
    if (brailleIndicatorDefined(
3362
0
          table->emphRules[class->rule][letterOffset], table, &indicRule))
3363
0
      for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos,
3364
0
          input, output, posMapping, cursorPosition, cursorStatus);
3365
0
  }
3366
0
}
3367
3368
static void
3369
insertEmphasisBegin(const EmphasisInfo *buffer, const int at, const EmphasisClass *class,
3370
    const TranslationTableHeader *table, int pos, const InString *input,
3371
0
    OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus) {
3372
0
  const TranslationTableOffset *emphRule = table->emphRules[class->rule];
3373
0
  const TranslationTableRule *indicRule;
3374
0
  if (buffer[at].begin & class->value) {
3375
0
    if (brailleIndicatorDefined(emphRule[begPhraseOffset], table, &indicRule))
3376
0
      for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos,
3377
0
          input, output, posMapping, cursorPosition, cursorStatus);
3378
0
    else if (brailleIndicatorDefined(emphRule[begOffset], table, &indicRule))
3379
0
      for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos,
3380
0
          input, output, posMapping, cursorPosition, cursorStatus);
3381
0
  }
3382
3383
0
  if (buffer[at].word & class->value
3384
      // && !(buffer[at].begin & class->value)
3385
0
      && !(buffer[at].end & class->value)) {
3386
0
    if (brailleIndicatorDefined(emphRule[begWordOffset], table, &indicRule))
3387
0
      for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos,
3388
0
          input, output, posMapping, cursorPosition, cursorStatus);
3389
0
  }
3390
0
}
3391
3392
static void
3393
insertEmphasisEnd(const EmphasisInfo *buffer, const int at, const EmphasisClass *class,
3394
    const TranslationTableHeader *table, int pos, const InString *input,
3395
0
    OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus) {
3396
0
  const TranslationTableOffset *emphRule = table->emphRules[class->rule];
3397
0
  if (buffer[at].end & class->value) {
3398
0
    const TranslationTableRule *indicRule;
3399
0
    if (buffer[at].word & class->value) {
3400
0
      if (brailleIndicatorDefined(emphRule[endWordOffset], table, &indicRule))
3401
0
        for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, -1,
3402
0
            pos, input, output, posMapping, cursorPosition, cursorStatus);
3403
0
    } else {
3404
0
      if (brailleIndicatorDefined(emphRule[endOffset], table, &indicRule))
3405
0
        for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, -1,
3406
0
            pos, input, output, posMapping, cursorPosition, cursorStatus);
3407
0
      else if (brailleIndicatorDefined(
3408
0
               emphRule[endPhraseAfterOffset], table, &indicRule))
3409
0
        for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, -1,
3410
0
            pos, input, output, posMapping, cursorPosition, cursorStatus);
3411
0
      else if (brailleIndicatorDefined(
3412
0
               emphRule[endPhraseBeforeOffset], table, &indicRule))
3413
0
        for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0,
3414
0
            pos, input, output, posMapping, cursorPosition, cursorStatus);
3415
0
    }
3416
0
  }
3417
0
}
3418
3419
static int
3420
0
endCount(const EmphasisInfo *buffer, const int at, const EmphasisClass *class) {
3421
0
  int i, cnt = 1;
3422
0
  if (!(buffer[at].end & class->value)) return 0;
3423
0
  for (i = at - 1; i >= 0; i--)
3424
0
    if (buffer[i].begin & class->value || buffer[i].word & class->value)
3425
0
      break;
3426
0
    else
3427
0
      cnt++;
3428
0
  return cnt;
3429
0
}
3430
3431
static int
3432
beginCount(const EmphasisInfo *buffer, const int at, const EmphasisClass *class,
3433
0
    const TranslationTableHeader *table, const InString *input) {
3434
0
  if (buffer[at].begin & class->value) {
3435
0
    int i, cnt = 1;
3436
0
    for (i = at + 1; i < input->length; i++)
3437
0
      if (buffer[i].end & class->value)
3438
0
        break;
3439
0
      else
3440
0
        cnt++;
3441
0
    return cnt;
3442
0
  } else if (buffer[at].word & class->value) {
3443
0
    int i, cnt = 1;
3444
0
    for (i = at + 1; i < input->length; i++)
3445
0
      if (buffer[i].end & class->value)
3446
0
        break;
3447
0
      else if (checkCharAttr(input->chars[i], CTC_SeqDelimiter, table))
3448
0
        break;
3449
0
      else if (isEmphSpace(input->chars[i], table, class))
3450
0
        break;
3451
0
      else
3452
0
        cnt++;
3453
0
    return cnt;
3454
0
  }
3455
0
  return 0;
3456
0
}
3457
3458
static void
3459
insertEmphasesAt(int begin, int end, int caps, int other, const int at,
3460
    const TranslationTableHeader *table, int pos, const InString *input,
3461
    OutString *output, int *posMapping, const EmphasisInfo *emphasisBuffer,
3462
0
    int *cursorPosition, int *cursorStatus) {
3463
3464
  /* The order of inserting the end symbols must be the reverse
3465
   * of the insertions of the begin symbols so that they will
3466
   * nest properly when multiple emphases start and end at
3467
   * the same place */
3468
  // TODO: ordering with partial word
3469
3470
0
  if (end && caps)
3471
0
    for (int i = 0; i < MAX_MODES; i++) {
3472
0
      const EmphasisClass *emphClass = &table->modes[i];
3473
0
      if (!emphClass->value) continue;
3474
0
      if ((emphasisBuffer[at].begin | emphasisBuffer[at].end |
3475
0
            emphasisBuffer[at].word | emphasisBuffer[at].symbol) &
3476
0
          emphClass->value)
3477
0
        insertEmphasisEnd(emphasisBuffer, at, emphClass, table, pos, input,
3478
0
            output, posMapping, cursorPosition, cursorStatus);
3479
0
    }
3480
3481
0
  if (end && other) {
3482
0
    int type_counts[MAX_EMPH_CLASSES];
3483
3484
    /* end bits */
3485
0
    for (int i = 0; i < MAX_EMPH_CLASSES; i++) {
3486
0
      const EmphasisClass *emphClass = &table->emphClasses[i];
3487
0
      if (!emphClass->value)
3488
0
        type_counts[i] = 0;
3489
0
      else
3490
0
        type_counts[i] = endCount(emphasisBuffer, at, emphClass);
3491
0
    }
3492
3493
0
    while (1) {
3494
0
      int min = -1;
3495
0
      for (int i = 0; i < MAX_EMPH_CLASSES; i++)
3496
0
        if (type_counts[i] > 0)
3497
0
          if (min < 0 || type_counts[i] < type_counts[min]) min = i;
3498
0
      if (min < 0) break;
3499
0
      type_counts[min] = 0;
3500
0
      insertEmphasisEnd(emphasisBuffer, at, &table->emphClasses[min], table, pos,
3501
0
          input, output, posMapping, cursorPosition, cursorStatus);
3502
0
    }
3503
0
  }
3504
3505
0
  if (begin && other) {
3506
0
    int type_counts[MAX_EMPH_CLASSES];
3507
3508
    /* begin and word bits */
3509
0
    for (int i = 0; i < MAX_EMPH_CLASSES; i++) {
3510
0
      const EmphasisClass *emphClass = &table->emphClasses[i];
3511
0
      if (!emphClass->value)
3512
0
        type_counts[i] = 0;
3513
0
      else
3514
0
        type_counts[i] = beginCount(emphasisBuffer, at, emphClass, table, input);
3515
0
    }
3516
3517
0
    while (1) {
3518
0
      int max = MAX_EMPH_CLASSES - 1;
3519
0
      for (int i = MAX_EMPH_CLASSES - 1; i >= 0; i--)
3520
0
        if (type_counts[max] < type_counts[i]) max = i;
3521
0
      if (!type_counts[max]) break;
3522
0
      type_counts[max] = 0;
3523
0
      insertEmphasisBegin(emphasisBuffer, at, &table->emphClasses[max], table, pos,
3524
0
          input, output, posMapping, cursorPosition, cursorStatus);
3525
0
    }
3526
3527
    /* symbol bits */
3528
0
    for (int i = MAX_EMPH_CLASSES - 1; i >= 0; i--)
3529
0
      if ((emphasisBuffer[at].begin | emphasisBuffer[at].end |
3530
0
            emphasisBuffer[at].word | emphasisBuffer[at].symbol) &
3531
0
          table->emphClasses[i].value)
3532
0
        insertEmphasisSymbol(emphasisBuffer, at, &table->emphClasses[i], table,
3533
0
            pos, input, output, posMapping, cursorPosition, cursorStatus);
3534
0
  }
3535
3536
0
  if (begin && caps) {
3537
3538
    /* insert capitalization last so it will be closest to word */
3539
    /* other mode indicators are inserted so that those who are defined first are
3540
     * closest to word */
3541
0
    for (int i = MAX_MODES - 1; i >= 0; i--) {
3542
0
      const EmphasisClass *emphClass = &table->modes[i];
3543
0
      if (!emphClass->value) continue;
3544
0
      if ((emphasisBuffer[at].begin | emphasisBuffer[at].end |
3545
0
            emphasisBuffer[at].word | emphasisBuffer[at].symbol) &
3546
0
          emphClass->value) {
3547
0
        insertEmphasisBegin(emphasisBuffer, at, emphClass, table, pos, input,
3548
0
            output, posMapping, cursorPosition, cursorStatus);
3549
0
        insertEmphasisSymbol(emphasisBuffer, at, emphClass, table, pos, input,
3550
0
            output, posMapping, cursorPosition, cursorStatus);
3551
0
      }
3552
0
    }
3553
0
  }
3554
0
}
3555
3556
static void
3557
checkNumericMode(const TranslationTableHeader *table, int pos, const InString *input,
3558
    OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus,
3559
0
    int *dontContract, int *numericMode) {
3560
  /* check if numeric mode is active and insert number sign and nocontract sign when
3561
   * needed */
3562
3563
0
  int i;
3564
0
  const TranslationTableRule *indicRule;
3565
0
  if (!brailleIndicatorDefined(table->numberSign, table, &indicRule)) return;
3566
3567
  /* not in numeric mode */
3568
0
  if (!*numericMode) {
3569
0
    if (checkCharAttr(input->chars[pos], CTC_Digit | CTC_LitDigit, table)) {
3570
0
      *numericMode = 1;
3571
      /* if the nocontractsign is defined and it is the same as the nonumsign then
3572
         disable contraction */
3573
0
      if (isIndicatorEqual(table->noContractSign, table->noNumberSign, table))
3574
0
        *dontContract = 1;
3575
0
      for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0, pos,
3576
0
          input, output, posMapping, cursorPosition, cursorStatus);
3577
0
    } else if (checkCharAttr(input->chars[pos], CTC_NumericMode, table)) {
3578
0
      for (i = pos + 1; i < input->length; i++) {
3579
0
        if (checkCharAttr(input->chars[i], CTC_Digit | CTC_LitDigit, table)) {
3580
0
          *numericMode = 1;
3581
0
          for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen,
3582
0
              0, pos, input, output, posMapping, cursorPosition,
3583
0
              cursorStatus);
3584
0
          break;
3585
0
        } else if (!checkCharAttr(input->chars[i], CTC_NumericMode, table))
3586
0
          break;
3587
0
      }
3588
0
    }
3589
0
  }
3590
3591
  /* in numeric mode */
3592
0
  else {
3593
0
    if (!checkCharAttr(input->chars[pos],
3594
0
          CTC_Digit | CTC_LitDigit | CTC_NumericMode | CTC_MidEndNumericMode,
3595
0
          table)) {
3596
0
      *numericMode = 0;
3597
0
      if (brailleIndicatorDefined(table->noNumberSign, table, &indicRule))
3598
0
        if (checkCharAttr(input->chars[pos], CTC_NumericNoContract, table))
3599
0
          for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen,
3600
0
              0, pos, input, output, posMapping, cursorPosition,
3601
0
              cursorStatus);
3602
0
    }
3603
0
  }
3604
0
}
3605
3606
static int
3607
translateString(const TranslationTableHeader *table, int mode, int currentPass,
3608
    const InString *input, OutString *output, int *posMapping, formtype *typebuf,
3609
    unsigned char *srcSpacing, unsigned char *destSpacing, unsigned int *wordBuffer,
3610
    EmphasisInfo *emphasisBuffer, int haveEmphasis, int *realInlen,
3611
0
    int *cursorPosition, int *cursorStatus, int compbrlStart, int compbrlEnd) {
3612
0
  int pos;
3613
0
  int transOpcode;
3614
0
  int prevTransOpcode;
3615
0
  const TranslationTableRule *transRule;
3616
0
  int transCharslen;
3617
0
  int passCharDots;
3618
0
  const widechar *passInstructions;
3619
0
  int passIC; /* Instruction counter */
3620
0
  PassRuleMatch patternMatch;
3621
0
  TranslationTableRule *groupingRule;
3622
0
  widechar groupingOp;
3623
0
  int numericMode;
3624
0
  int dontContract;
3625
0
  LastWord lastWord;
3626
0
  int insertEmphasesFrom;
3627
0
  TranslationTableCharacter *curCharDef;
3628
0
  int repwordStart;
3629
0
  int repwordLength;
3630
0
  const InString *origInput = input;
3631
0
  int warnedForNoTranslate = 0;
3632
  /* Main translation routine */
3633
0
  int k;
3634
0
  translation_direction = 1;
3635
0
  markSyllables(table, input, typebuf);
3636
0
  numericMode = 0;
3637
0
  lastWord = (LastWord){ 0, 0, 0 };
3638
0
  dontContract = 0;
3639
0
  prevTransOpcode = CTO_None;
3640
0
  pos = output->length = 0;
3641
0
  int posIncremented = 1;
3642
0
  insertEmphasesFrom = 0;
3643
0
  _lou_resetPassVariables();
3644
0
  if (typebuf && capsletterDefined(table))
3645
0
    for (k = 0; k < input->length; k++)
3646
0
      if (checkCharAttr(input->chars[k], CTC_UpperCase, table))
3647
0
        typebuf[k] |= CAPSEMPH;
3648
3649
0
  markEmphases(table, input, typebuf, wordBuffer, emphasisBuffer);
3650
3651
0
  while (pos <= input->length) { /* the main translation loop */
3652
0
    if (pos > 0 && checkCharAttr(input->chars[pos - 1], CTC_Space, table) &&
3653
0
        (transOpcode != CTO_JoinableWord))
3654
0
      lastWord = (LastWord){ pos, output->length, insertEmphasesFrom };
3655
0
    if (pos == input->length) break;
3656
0
    if (pos >= compbrlStart && pos < compbrlEnd) {
3657
0
      int cs = 2;  // cursor status for this call
3658
0
      if (!doCompTrans(pos, compbrlEnd, table, &pos, input, output, posMapping,
3659
0
            emphasisBuffer, &transRule, cursorPosition, &cs, mode))
3660
0
        goto failure;
3661
0
      continue;
3662
0
    }
3663
0
    TranslationTableCharacterAttributes beforeAttributes;
3664
0
    setBefore(table, pos, input, &beforeAttributes);
3665
0
    if (pos >= input->length) break;
3666
3667
0
    if (!dontContract) dontContract = typebuf[pos] & no_contract;
3668
0
    if (typebuf[pos] & no_translate) {
3669
0
      if (!warnedForNoTranslate) {
3670
0
        _lou_logMessage(LOU_LOG_WARN,
3671
0
            "warning: Typeform no_translate is deprecated for input.");
3672
0
        warnedForNoTranslate = 1;
3673
0
      }
3674
0
      if (input->chars[pos] < 32 || input->chars[pos] > 126) goto failure;
3675
0
      widechar d = LOU_DOTS;
3676
0
      TranslationTableOffset offset = getChar(input->chars[pos], table)->otherRules;
3677
0
      while (offset) {
3678
0
        const TranslationTableRule *r =
3679
0
            (TranslationTableRule *)&table->ruleArea[offset];
3680
0
        if (r->opcode >= CTO_Space && r->opcode < CTO_UpLow && r->dotslen == 1) {
3681
0
          d = r->charsdots[1];
3682
0
          break;
3683
0
        }
3684
0
        offset = r->charsnext;
3685
0
      }
3686
0
      if (!for_updatePositions(&d, 1, 1, 0, pos, input, output, posMapping,
3687
0
            cursorPosition, cursorStatus))
3688
0
        goto failure;
3689
0
      pos++;
3690
0
      posIncremented = 1;
3691
0
      insertEmphasesFrom = pos;
3692
0
      continue;
3693
0
    }
3694
0
    repwordLength = 0;
3695
0
    for_selectRule(table, pos, *output, posMapping, mode, input, typebuf,
3696
0
        emphasisBuffer, &transOpcode, prevTransOpcode, &transRule, &transCharslen,
3697
0
        &passCharDots, &passInstructions, &passIC, &patternMatch, posIncremented,
3698
0
        *cursorPosition, &repwordLength, dontContract, compbrlStart, compbrlEnd,
3699
0
        beforeAttributes, &curCharDef, &groupingRule, &groupingOp);
3700
3701
0
    switch (transOpcode) /* Rules that pre-empt context and swap */
3702
0
    {
3703
0
    case CTO_CompBrl:
3704
0
    case CTO_Literal:
3705
0
      if (!doCompbrl(table, &pos, input, output, posMapping, emphasisBuffer,
3706
0
            &transRule, cursorPosition, cursorStatus, &lastWord,
3707
0
            &insertEmphasesFrom, mode))
3708
0
        goto failure;
3709
0
      continue;
3710
0
    default:
3711
0
      break;
3712
0
    }
3713
3714
    /* Skip repword separator to make caps/emph indicators appear before repword
3715
     * indicator */
3716
0
    if (repwordLength) pos += transCharslen;
3717
3718
0
    for (int at = insertEmphasesFrom; at <= pos; at++) {
3719
      /* insert caps end indicator */
3720
0
      insertEmphasesAt(0, 1, 1, 0, at, table, pos, input, output, posMapping,
3721
0
          emphasisBuffer, cursorPosition, cursorStatus);
3722
0
      if (haveEmphasis) {
3723
        /* insert emphasis end indicator */
3724
0
        insertEmphasesAt(0, 1, 0, 1, at, table, pos, input, output, posMapping,
3725
0
            emphasisBuffer, cursorPosition, cursorStatus);
3726
        /* insert emphasis start indicator */
3727
0
        insertEmphasesAt(1, 0, 0, 1, at, table, pos, input, output, posMapping,
3728
0
            emphasisBuffer, cursorPosition, cursorStatus);
3729
0
      }
3730
0
      if (at < pos)
3731
0
        insertEmphasesAt(1, 0, 1, 0, at, table, pos, input, output, posMapping,
3732
0
            emphasisBuffer, cursorPosition, cursorStatus);
3733
0
    }
3734
0
    insertEmphasesFrom = pos + 1;
3735
    /* insert grade 1 mode indicator (nocontractsign) before contraction */
3736
0
    if (transOpcode == CTO_Contraction) {
3737
0
      const TranslationTableRule *indicRule;
3738
0
      if (brailleIndicatorDefined(table->noContractSign, table, &indicRule))
3739
0
        for_updatePositions(&indicRule->charsdots[0], 0, indicRule->dotslen, 0,
3740
0
            pos, input, output, posMapping, cursorPosition, cursorStatus);
3741
0
    }
3742
    /* insert letter sign */
3743
0
    if (!insertLetterSign(table, pos, input, output, posMapping, transOpcode,
3744
0
          cursorPosition, cursorStatus, beforeAttributes))
3745
0
      goto failure;
3746
    /* insert caps start indicator */
3747
0
    insertEmphasesAt(1, 0, 1, 0, pos, table, pos, input, output, posMapping,
3748
0
        emphasisBuffer, cursorPosition, cursorStatus);
3749
    /* insert number sign (not if numericmodechars, midnumericmodechars or
3750
     * numericnocontchars has been defined) */
3751
0
    if (!table->usesNumericMode)
3752
0
      if (!insertNumberSign(table, pos, input, output, posMapping, prevTransOpcode,
3753
0
            cursorPosition, cursorStatus, beforeAttributes))
3754
0
        goto failure;
3755
    /* insert number sign and number cancel sign (nocontractsign) (only if
3756
     * numericmodechars, midnumericmodechars or numericnocontchars has been defined)
3757
     */
3758
0
    if (table->usesNumericMode)
3759
0
      checkNumericMode(table, pos, input, output, posMapping, cursorPosition,
3760
0
          cursorStatus, &dontContract, &numericMode);
3761
3762
0
    if (transOpcode == CTO_Context ||
3763
0
        (posIncremented &&
3764
0
            findForPassRule(table, pos, currentPass, input, &transOpcode,
3765
0
                &transRule, &transCharslen, &passCharDots,
3766
0
                &passInstructions, &passIC, &patternMatch, &groupingRule,
3767
0
                &groupingOp))) {
3768
0
      posIncremented = 1;
3769
0
      switch (transOpcode) {
3770
0
      case CTO_Context: {
3771
0
        const InString *inputBefore = input;
3772
0
        int posBefore = pos;
3773
0
        if (appliedRules != NULL && appliedRulesCount < maxAppliedRules)
3774
0
          appliedRules[appliedRulesCount++] = transRule;
3775
0
        if (!passDoAction(table, &input, output, posMapping, transOpcode,
3776
0
              &transRule, passCharDots, passInstructions, passIC, &pos,
3777
0
              patternMatch, cursorPosition, cursorStatus, groupingRule,
3778
0
              groupingOp, mode))
3779
0
          goto failure;
3780
0
        if (input->bufferIndex != inputBefore->bufferIndex &&
3781
0
            inputBefore->bufferIndex != origInput->bufferIndex)
3782
0
          releaseStringBuffer(inputBefore->bufferIndex);
3783
0
        if (pos == posBefore) posIncremented = 0;
3784
0
        continue;
3785
0
      }
3786
0
      default:
3787
0
        break;
3788
0
      }
3789
0
    } else {
3790
0
      if (appliedRules != NULL && appliedRulesCount < maxAppliedRules)
3791
0
        appliedRules[appliedRulesCount++] = transRule;
3792
0
      posIncremented = 1;
3793
0
    }
3794
3795
    /* Processing before replacement */
3796
3797
    /* check if leaving no contraction (grade 1) mode */
3798
0
    if (checkCharAttr(input->chars[pos], CTC_SeqDelimiter | CTC_Space, table))
3799
0
      dontContract = 0;
3800
3801
0
    switch (transOpcode) {
3802
0
    case CTO_EndNum:
3803
0
      if (table->letterSign && checkCharAttr(input->chars[pos], CTC_Letter, table))
3804
0
        output->length--;
3805
0
      break;
3806
0
    case CTO_Repeated:
3807
0
    case CTO_Space:
3808
0
      dontContract = 0;
3809
0
      break;
3810
0
    case CTO_LargeSign:
3811
0
      if (prevTransOpcode == CTO_LargeSign) {
3812
0
        int hasEndSegment = 0;
3813
0
        while (output->length > 0 &&
3814
0
            checkDotsAttr(
3815
0
                output->chars[output->length - 1], CTC_Space, table)) {
3816
0
          if (output->chars[output->length - 1] == LOU_ENDSEGMENT) {
3817
0
            hasEndSegment = 1;
3818
0
          }
3819
0
          output->length--;
3820
0
        }
3821
0
        if (hasEndSegment != 0) {
3822
0
          output->chars[output->length] = 0xffff;
3823
0
          output->length++;
3824
0
        }
3825
0
      }
3826
0
      break;
3827
0
    case CTO_DecPoint:
3828
0
      if (!table->usesNumericMode && table->numberSign) {
3829
0
        TranslationTableRule *numRule =
3830
0
            (TranslationTableRule *)&table->ruleArea[table->numberSign];
3831
0
        if (!for_updatePositions(&numRule->charsdots[numRule->charslen],
3832
0
              numRule->charslen, numRule->dotslen, 0, pos, input, output,
3833
0
              posMapping, cursorPosition, cursorStatus))
3834
0
          goto failure;
3835
0
      }
3836
0
      transOpcode = CTO_MidNum;
3837
0
      break;
3838
0
    case CTO_NoCont:
3839
0
      if (!dontContract)
3840
0
        doNocont(table, &pos, output, mode, input, &lastWord, &dontContract,
3841
0
            &insertEmphasesFrom);
3842
0
      continue;
3843
0
    case CTO_RepWord:
3844
0
    case CTO_RepEndWord:
3845
0
      repwordStart = pos - transCharslen - repwordLength;
3846
0
      break;
3847
0
    default:
3848
0
      break;
3849
0
    } /* end of action */
3850
3851
    /* replacement processing */
3852
0
    switch (transOpcode) {
3853
0
    case CTO_Replace:
3854
0
      pos += transCharslen;
3855
0
      if (!putCharacters(&transRule->charsdots[transCharslen], transRule->dotslen,
3856
0
            table, pos, input, output, posMapping, cursorPosition,
3857
0
            cursorStatus, mode))
3858
0
        goto failure;
3859
0
      break;
3860
0
    case CTO_None:
3861
      /* no definition or translation rules found for this character, but it may be
3862
       * based on another character */
3863
0
      if (!putCharacter(input->chars[pos], table, pos, input, output, posMapping,
3864
0
            cursorPosition, cursorStatus, mode))
3865
0
        goto failure;
3866
0
      pos++;
3867
0
      break;
3868
0
    default: {
3869
0
      const widechar *dots = &transRule->charsdots[transCharslen];
3870
0
      int dotslen = transRule->dotslen;
3871
0
      if (transOpcode == CTO_RepEndWord) {
3872
0
        int k;
3873
0
        for (k = 1; dots[k] != ','; k++)
3874
0
          ;
3875
0
        k++;
3876
0
        dots = &dots[k];
3877
0
        dotslen -= k;
3878
0
      }
3879
0
      if (dotslen) {
3880
0
        if (repwordLength) {
3881
          /* repword sepatator is already skipped */
3882
0
          if (!for_updatePositions(dots, 0, dotslen, 0, pos, input, output,
3883
0
                posMapping, cursorPosition, cursorStatus))
3884
0
            goto failure;
3885
0
        } else {
3886
0
          if (!for_updatePositions(dots, transCharslen, dotslen, 0, pos, input,
3887
0
                output, posMapping, cursorPosition, cursorStatus))
3888
0
            goto failure;
3889
0
          pos += transCharslen;
3890
0
        }
3891
0
      } else {
3892
0
        for (k = 0; k < transCharslen; k++) {
3893
0
          if (!putCharacter(input->chars[pos], table, pos, input, output,
3894
0
                posMapping, cursorPosition, cursorStatus, mode))
3895
0
            goto failure;
3896
0
          if (++pos >= input->length) break;
3897
0
        }
3898
0
      }
3899
0
      break;
3900
0
    }
3901
0
    }
3902
3903
    /* processing after replacement */
3904
0
    switch (transOpcode) {
3905
0
    case CTO_Repeated: {
3906
      /* Skip repeated characters. */
3907
0
      int srclim = input->length - transCharslen;
3908
0
      if (mode & (compbrlAtCursor | compbrlLeftCursor) && compbrlStart < srclim)
3909
        /* Don't skip characters from compbrlStart onwards. */
3910
0
        srclim = compbrlStart - 1;
3911
0
      while ((pos <= srclim) &&
3912
0
          compareChars(&transRule->charsdots[0], &input->chars[pos],
3913
0
              transCharslen, table)) {
3914
0
        if (!*cursorStatus && pos <= *cursorPosition &&
3915
0
            *cursorPosition < pos + transCharslen) {
3916
0
          *cursorStatus = 1;
3917
0
          *cursorPosition = output->length - 1;
3918
0
        }
3919
0
        pos += transCharslen;
3920
0
      }
3921
0
      break;
3922
0
    }
3923
0
    case CTO_RepEndWord: {
3924
      /* Go back and insert dots at repwordStart and update posMapping accordingly
3925
       */
3926
0
      const widechar *dots = &transRule->charsdots[transCharslen];
3927
0
      int dotslen;
3928
0
      for (dotslen = 1; dots[dotslen] != ','; dotslen++)
3929
0
        ;
3930
0
      if ((output->length + dotslen) > output->maxlength) goto failure;
3931
0
      int k;
3932
0
      for (k = output->length - 1; k >= 0; k--)
3933
0
        if (posMapping[k] >= repwordStart) {
3934
0
          output->chars[k + dotslen] = output->chars[k];
3935
0
          posMapping[k + dotslen] = posMapping[k];
3936
0
        } else
3937
0
          break;
3938
0
      k++;
3939
0
      memcpy(&output->chars[k], dots, dotslen * sizeof(*output->chars));
3940
0
      for (int l = 0; l < dotslen; l++) posMapping[k + l] = posMapping[k];
3941
0
      output->length += dotslen;
3942
0
      if (*cursorStatus && *cursorPosition >= k) *cursorPosition += dotslen;
3943
0
    }
3944
0
    case CTO_RepWord: {
3945
      /* Skip repeated characters. */
3946
0
      int srclim = input->length;
3947
0
      if (mode & (compbrlAtCursor | compbrlLeftCursor) && compbrlStart < srclim)
3948
        /* Don't skip characters from compbrlStart onwards. */
3949
0
        srclim = compbrlStart;
3950
      /* Skip first and subsequent repetitions */
3951
      /* Loop body is be executed at least once. */
3952
0
      int firstRep = 1;
3953
0
      while (pos + repwordLength <= srclim &&
3954
0
          compareChars(&input->chars[repwordStart], &input->chars[pos],
3955
0
              repwordLength, table)) {
3956
        /* Check that capitalisation and emphasis do not change within or in
3957
         * between subsequent repetitions. It is allowed to change right before
3958
         * the first repetition because that can be indicated. That it does not
3959
         * change within the first repetition is already checked in
3960
         * isRepeatedWord. */
3961
0
        if (!firstRep &&
3962
0
            checkEmphasisChange(pos - 1, repwordLength, emphasisBuffer))
3963
0
          break;
3964
0
        if (!*cursorStatus && *cursorPosition >= pos - transCharslen &&
3965
0
            *cursorPosition < pos + repwordLength) {
3966
0
          *cursorStatus = 1;
3967
0
          *cursorPosition = output->length - 1;
3968
0
        }
3969
0
        pos += repwordLength;
3970
0
        if (pos + transCharslen <= srclim &&
3971
0
            !memcmp(transRule->charsdots, &input->chars[pos],
3972
0
                transCharslen * sizeof(*transRule->charsdots)))
3973
0
          pos += transCharslen;
3974
0
        else {
3975
0
          pos += transCharslen;
3976
0
          break;
3977
0
        }
3978
0
        firstRep = 0;
3979
0
      }
3980
0
      pos -= transCharslen;
3981
0
      break;
3982
0
    }
3983
0
    case CTO_JoinNum:
3984
0
    case CTO_JoinableWord:
3985
0
      while (pos < input->length &&
3986
0
          checkCharAttr(input->chars[pos], CTC_Space, table) &&
3987
0
          input->chars[pos] != LOU_ENDSEGMENT)
3988
0
        pos++;
3989
0
      break;
3990
0
    default:
3991
0
      break;
3992
0
    }
3993
0
    if (srcSpacing != NULL && srcSpacing[pos] >= '0' && srcSpacing[pos] <= '9')
3994
0
      destSpacing[output->length] = srcSpacing[pos];
3995
0
    if ((transOpcode >= CTO_Always && transOpcode <= CTO_None) ||
3996
0
        (transOpcode >= CTO_Digit && transOpcode <= CTO_LitDigit))
3997
0
      prevTransOpcode = transOpcode;
3998
0
  }
3999
4000
0
  for (int at = insertEmphasesFrom; at <= pos; at++) {
4001
    /* insert caps end indicator */
4002
0
    insertEmphasesAt(0, 1, 1, 0, at, table, pos, input, output, posMapping,
4003
0
        emphasisBuffer, cursorPosition, cursorStatus);
4004
0
    if (haveEmphasis) {
4005
      /* insert emphasis end indicator */
4006
0
      insertEmphasesAt(0, 1, 0, 1, at, table, pos, input, output, posMapping,
4007
0
          emphasisBuffer, cursorPosition, cursorStatus);
4008
      /* insert emphasis start indicator */
4009
0
      insertEmphasesAt(1, 0, 0, 1, at, table, pos, input, output, posMapping,
4010
0
          emphasisBuffer, cursorPosition, cursorStatus);
4011
0
    }
4012
    /* insert caps start indicator */
4013
0
    insertEmphasesAt(1, 0, 1, 0, at, table, pos, input, output, posMapping,
4014
0
        emphasisBuffer, cursorPosition, cursorStatus);
4015
0
  }
4016
4017
0
failure:
4018
0
  if (lastWord.outPos != 0 && pos < input->length &&
4019
0
      !checkCharAttr(input->chars[pos], CTC_Space, table)) {
4020
0
    pos = lastWord.inPos;
4021
0
    output->length = lastWord.outPos;
4022
0
  }
4023
0
  if (pos < input->length) {
4024
0
    while (checkCharAttr(input->chars[pos], CTC_Space, table))
4025
0
      if (++pos == input->length) break;
4026
0
  }
4027
0
  *realInlen = pos;
4028
0
  if (input->bufferIndex != origInput->bufferIndex)
4029
0
    releaseStringBuffer(input->bufferIndex);
4030
0
  return 1;
4031
0
} /* first pass translation completed */
4032
4033
static int
4034
0
isHyphen(const TranslationTableHeader *table, widechar c) {
4035
0
  TranslationTableRule *rule;
4036
0
  TranslationTableOffset offset = getChar(c, table)->otherRules;
4037
0
  while (offset) {
4038
0
    rule = (TranslationTableRule *)&table->ruleArea[offset];
4039
0
    if (rule->opcode == CTO_Hyphen) return 1;
4040
0
    offset = rule->dotsnext;
4041
0
  }
4042
0
  return 0;
4043
0
}
4044
4045
/**
4046
 * Hyphenate an input string which can either be text (mode = 0) or braille (mode = 1). If
4047
 * the input is braille, back-translation will be performed with `tableList'. The input
4048
 * string can contain any character (even space), but only break points within words
4049
 * (between letters) are considered. If the string can not be broken before the character
4050
 * at index k, the value of `hyphens[k]' is '0'. If it can be broken by inserting a hyphen
4051
 * at the break point, the value is '1'. If it can be broken without adding a hyphen, the
4052
 * value is '2'.
4053
 */
4054
int EXPORT_CALL
4055
lou_hyphenate(const char *tableList, const widechar *inbuf, int inlen, char *hyphens,
4056
0
    int mode) {
4057
0
#define HYPHSTRING 100
4058
0
  const TranslationTableHeader *table;
4059
0
  widechar textBuffer[HYPHSTRING];
4060
0
  char *textHyphens;
4061
0
  int *inputPos;
4062
0
  int k;
4063
0
  int textLen;
4064
0
  int wordStart;
4065
0
  table = lou_getTable(tableList);
4066
0
  if (table == NULL || inbuf == NULL || hyphens == NULL ||
4067
0
      table->hyphenStatesArray == 0 || inlen >= HYPHSTRING)
4068
0
    return 0;
4069
0
  if (mode != 0) {
4070
0
    int brailleLen = inlen;
4071
0
    textLen = HYPHSTRING;
4072
0
    inputPos = malloc(textLen * sizeof(int));
4073
0
    if (!lou_backTranslate(tableList, inbuf, &brailleLen, textBuffer, &textLen, NULL,
4074
0
          NULL, NULL, inputPos, NULL, 0)) {
4075
0
      free(inputPos);
4076
0
      return 0;
4077
0
    }
4078
0
    textHyphens = malloc((textLen + 1) * sizeof(char));
4079
0
  } else {
4080
0
    memcpy(textBuffer, inbuf, CHARSIZE * inlen);
4081
0
    textLen = inlen;
4082
0
    textHyphens = hyphens;
4083
0
  }
4084
4085
  // initialize hyphens array
4086
0
  for (k = 0; k < textLen; k++) textHyphens[k] = '0';
4087
0
  textHyphens[k] = 0;
4088
4089
  // for every word part
4090
0
  for (wordStart = 0;;) {
4091
0
    int wordEnd;
4092
    // find start of word
4093
0
    for (; wordStart < textLen; wordStart++)
4094
0
      if ((getChar(textBuffer[wordStart], table))->attributes & CTC_Letter) break;
4095
0
    if (wordStart == textLen) break;
4096
    // find end of word
4097
0
    for (wordEnd = wordStart + 1; wordEnd < textLen; wordEnd++)
4098
0
      if (!((getChar(textBuffer[wordEnd], table))->attributes & CTC_Letter)) break;
4099
    // hyphenate
4100
0
    if (!hyphenateWord(&textBuffer[wordStart], wordEnd - wordStart,
4101
0
          &textHyphens[wordStart], table))
4102
0
      return 0;
4103
    // normalize to '0', '1' or '2'
4104
0
    if (wordStart >= 2 && isHyphen(table, textBuffer[wordStart - 1]) &&
4105
0
        ((getChar(textBuffer[wordStart - 2], table))->attributes & CTC_Letter))
4106
0
      textHyphens[wordStart] = '2';
4107
0
    else
4108
0
      textHyphens[wordStart] = '0';
4109
0
    for (k = wordStart + 1; k < wordEnd; k++)
4110
0
      if (textHyphens[k] & 1)
4111
0
        textHyphens[k] = '1';
4112
0
      else
4113
0
        textHyphens[k] = '0';
4114
0
    if (wordEnd == textLen) break;
4115
0
    textHyphens[wordEnd] = '0';  // because hyphenateWord sets it to 0
4116
0
    wordStart = wordEnd + 1;
4117
0
  }
4118
4119
  // map hyphen positions if the input was braille
4120
0
  if (mode != 0) {
4121
0
    for (k = 0; k < inlen; k++) hyphens[k] = '0';
4122
0
    hyphens[k] = 0;
4123
0
    int prevPos = -1;
4124
0
    for (k = 0; k < textLen; k++) {
4125
0
      int braillePos = inputPos[k];
4126
0
      if (braillePos > inlen || braillePos < 0) break;
4127
0
      if (braillePos > prevPos) {
4128
0
        hyphens[braillePos] = textHyphens[k];
4129
0
        prevPos = braillePos;
4130
0
      }
4131
0
    }
4132
0
    free(textHyphens);
4133
0
    free(inputPos);
4134
0
  }
4135
0
  return 1;
4136
0
}
4137
4138
int EXPORT_CALL
4139
lou_dotsToChar(
4140
0
    const char *tableList, widechar *inbuf, widechar *outbuf, int length, int mode) {
4141
0
  const DisplayTableHeader *table;
4142
0
  int k;
4143
0
  widechar dots;
4144
0
  if (tableList == NULL || inbuf == NULL || outbuf == NULL) return 0;
4145
4146
0
  table = _lou_getDisplayTable(tableList);
4147
0
  if (table == NULL || length <= 0) return 0;
4148
0
  for (k = 0; k < length; k++) {
4149
0
    dots = inbuf[k];
4150
0
    if (!(dots & LOU_DOTS) &&
4151
0
        (dots & 0xff00) == LOU_ROW_BRAILLE) /* Unicode braille */
4152
0
      dots = (dots & 0x00ff) | LOU_DOTS;
4153
0
    outbuf[k] = _lou_getCharForDots(dots, table);
4154
    // assume that if NUL character is returned, it's because the display table has no
4155
    // mapping for the dot pattern (not because it maps to NUL)
4156
0
    if (outbuf[k] == '\0') outbuf[k] = ' ';
4157
0
  }
4158
0
  return 1;
4159
0
}
4160
4161
int EXPORT_CALL
4162
lou_charToDots(const char *tableList, const widechar *inbuf, widechar *outbuf, int length,
4163
0
    int mode) {
4164
0
  const DisplayTableHeader *table;
4165
0
  int k;
4166
0
  if (tableList == NULL || inbuf == NULL || outbuf == NULL) return 0;
4167
4168
0
  table = _lou_getDisplayTable(tableList);
4169
0
  if (table == NULL || length <= 0) return 0;
4170
0
  for (k = 0; k < length; k++)
4171
0
    if ((mode & ucBrl))
4172
0
      outbuf[k] = ((_lou_getDotsForChar(inbuf[k], table) & 0xff) | LOU_ROW_BRAILLE);
4173
0
    else
4174
0
      outbuf[k] = _lou_getDotsForChar(inbuf[k], table);
4175
0
  return 1;
4176
0
}