Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/hyphenation/hyphen/hyphen.c
Line
Count
Source (jump to first uncovered line)
1
/* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
2
 * licenses follows.
3
 */
4
5
/* LibHnj - a library for high quality hyphenation and justification
6
 * Copyright (C) 1998 Raph Levien, 
7
 *       (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), 
8
 *           (C) 2001 Peter Novodvorsky (nidd@cs.msu.su)
9
 *           (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo)
10
 *
11
 * This library is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Library General Public
13
 * License as published by the Free Software Foundation; either
14
 * version 2 of the License, or (at your option) any later version.
15
 *
16
 * This library is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Library General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Library General Public
22
 * License along with this library; if not, write to the 
23
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
24
 * Boston, MA  02111-1307  USA.
25
*/
26
27
/*
28
 * The contents of this file are subject to the Mozilla Public License
29
 * Version 1.0 (the "MPL"); you may not use this file except in
30
 * compliance with the MPL.  You may obtain a copy of the MPL at
31
 * http://www.mozilla.org/MPL/
32
 *
33
 * Software distributed under the MPL is distributed on an "AS IS" basis,
34
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
35
 * for the specific language governing rights and limitations under the
36
 * MPL.
37
 *
38
 */
39
#include <stdlib.h> /* for NULL, malloc */
40
#include <stdio.h>  /* for fprintf */
41
#include <string.h> /* for strdup */
42
#include <limits.h> /* for INT_MAX */
43
44
#ifdef UNX
45
#include <unistd.h> /* for exit */
46
#endif
47
48
#define noVERBOSE
49
50
/* calculate hyphenmin values with long ligature length (2 or 3 characters
51
 * instead of 1 or 2) for comparison with hyphenation without ligatures */
52
#define noLONG_LIGATURE
53
54
#ifdef LONG_LIGATURE
55
#define LIG_xx  1
56
#define LIG_xxx 2
57
#else
58
0
#define LIG_xx  0
59
0
#define LIG_xxx 1
60
#endif
61
62
#include "hnjalloc.h"
63
#include "hyphen.h"
64
65
static char *
66
hnj_strdup (const char *s)
67
0
{
68
0
  char *newstr;
69
0
  int l;
70
0
71
0
  l = strlen (s);
72
0
  newstr = (char *) hnj_malloc (l + 1);
73
0
  memcpy (newstr, s, l);
74
0
  newstr[l] = 0;
75
0
  return newstr;
76
0
}
77
78
/* remove cross-platform text line end characters */
79
void hnj_strchomp(char * s)
80
0
{
81
0
  int k = strlen(s);
82
0
  if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
83
0
  if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
84
0
}
85
86
/* a little bit of a hash table implementation. This simply maps strings
87
   to state numbers */
88
89
typedef struct _HashTab HashTab;
90
typedef struct _HashEntry HashEntry;
91
92
/* A cheap, but effective, hack. */
93
0
#define HASH_SIZE 31627
94
95
struct _HashTab {
96
  HashEntry *entries[HASH_SIZE];
97
};
98
99
struct _HashEntry {
100
  HashEntry *next;
101
  char *key;
102
  int val;
103
};
104
105
/* a char* hash function from ASU - adapted from Gtk+ */
106
static unsigned int
107
hnj_string_hash (const char *s)
108
0
{
109
0
  const char *p;
110
0
  unsigned int h=0, g;
111
0
  for(p = s; *p != '\0'; p += 1) {
112
0
    h = ( h << 4 ) + *p;
113
0
    if ( ( g = h & 0xf0000000 ) ) {
114
0
      h = h ^ (g >> 24);
115
0
      h = h ^ g;
116
0
    }
117
0
  }
118
0
  return h /* % M */;
119
0
}
120
121
static HashTab *
122
hnj_hash_new (void)
123
0
{
124
0
  HashTab *hashtab;
125
0
  int i;
126
0
127
0
  hashtab = (HashTab *) hnj_malloc (sizeof(HashTab));
128
0
  for (i = 0; i < HASH_SIZE; i++)
129
0
    hashtab->entries[i] = NULL;
130
0
131
0
  return hashtab;
132
0
}
133
134
static void
135
hnj_hash_free (HashTab *hashtab)
136
0
{
137
0
  int i;
138
0
  HashEntry *e, *next;
139
0
140
0
  for (i = 0; i < HASH_SIZE; i++)
141
0
    for (e = hashtab->entries[i]; e; e = next)
142
0
      {
143
0
  next = e->next;
144
0
  hnj_free (e->key);
145
0
  hnj_free (e);
146
0
      }
147
0
148
0
  hnj_free (hashtab);
149
0
}
150
151
/* assumes that key is not already present! */
152
static void
153
hnj_hash_insert (HashTab *hashtab, const char *key, int val)
154
0
{
155
0
  int i;
156
0
  HashEntry *e;
157
0
158
0
  i = hnj_string_hash (key) % HASH_SIZE;
159
0
  e = (HashEntry *) hnj_malloc (sizeof(HashEntry));
160
0
  e->next = hashtab->entries[i];
161
0
  e->key = hnj_strdup (key);
162
0
  e->val = val;
163
0
  hashtab->entries[i] = e;
164
0
}
165
166
/* return val if found, otherwise -1 */
167
static int
168
hnj_hash_lookup (HashTab *hashtab, const char *key)
169
0
{
170
0
  int i;
171
0
  HashEntry *e;
172
0
  i = hnj_string_hash (key) % HASH_SIZE;
173
0
  for (e = hashtab->entries[i]; e; e = e->next)
174
0
    if (!strcmp (key, e->key))
175
0
      return e->val;
176
0
  return -1;
177
0
}
178
179
/* Get the state number, allocating a new state if necessary. */
180
static int
181
hnj_get_state (HyphenDict *dict, HashTab *hashtab, const char *string)
182
0
{
183
0
  int state_num;
184
0
185
0
  state_num = hnj_hash_lookup (hashtab, string);
186
0
187
0
  if (state_num >= 0)
188
0
    return state_num;
189
0
190
0
  hnj_hash_insert (hashtab, string, dict->num_states);
191
0
  /* predicate is true if dict->num_states is a power of two */
192
0
  if (!(dict->num_states & (dict->num_states - 1)))
193
0
    {
194
0
      dict->states = (HyphenState *) hnj_realloc (dict->states,
195
0
          (dict->num_states << 1) *
196
0
          sizeof(HyphenState));
197
0
    }
198
0
  dict->states[dict->num_states].match = NULL;
199
0
  dict->states[dict->num_states].repl = NULL;
200
0
  dict->states[dict->num_states].fallback_state = -1;
201
0
  dict->states[dict->num_states].num_trans = 0;
202
0
  dict->states[dict->num_states].trans = NULL;
203
0
  return dict->num_states++;
204
0
}
205
206
/* add a transition from state1 to state2 through ch - assumes that the
207
   transition does not already exist */
208
static void
209
hnj_add_trans (HyphenDict *dict, int state1, int state2, char ch)
210
0
{
211
0
  int num_trans;
212
0
213
0
  num_trans = dict->states[state1].num_trans;
214
0
  if (num_trans == 0)
215
0
    {
216
0
      dict->states[state1].trans = (HyphenTrans *) hnj_malloc (sizeof(HyphenTrans));
217
0
    }
218
0
  else if (!(num_trans & (num_trans - 1)))
219
0
    {
220
0
      dict->states[state1].trans = (HyphenTrans *) hnj_realloc (dict->states[state1].trans,
221
0
            (num_trans << 1) *
222
0
            sizeof(HyphenTrans));
223
0
    }
224
0
  dict->states[state1].trans[num_trans].ch = ch;
225
0
  dict->states[state1].trans[num_trans].new_state = state2;
226
0
  dict->states[state1].num_trans++;
227
0
}
228
229
#ifdef VERBOSE
230
HashTab *global[1];
231
232
static char *
233
get_state_str (int state, int level)
234
{
235
  int i;
236
  HashEntry *e;
237
238
  for (i = 0; i < HASH_SIZE; i++)
239
    for (e = global[level]->entries[i]; e; e = e->next)
240
      if (e->val == state)
241
  return e->key;
242
  return NULL;
243
}
244
#endif
245
246
void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
247
  int i, j;
248
  char word[MAX_CHARS];
249
  char pattern[MAX_CHARS];
250
  char * repl;
251
  signed char replindex;
252
  signed char replcut;
253
  int state_num = 0;
254
  int last_state;
255
  char ch;
256
  int found;
257
258
    if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
259
      dict->lhmin = atoi(buf + 13);
260
      return;
261
    } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
262
      dict->rhmin = atoi(buf + 14);
263
      return;
264
    } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
265
      dict->clhmin = atoi(buf + 21);
266
      return;
267
    } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
268
      dict->crhmin = atoi(buf + 22);
269
      return;
270
    } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
271
      char * space = buf + 8;
272
      while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
273
      if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
274
      if (dict->nohyphen) {
275
          char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
276
          *nhe = 0;
277
          for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
278
                  if (*nhe == ',') {
279
                      dict->nohyphenl++;
280
                      *nhe = 0;
281
                  }
282
          }
283
      }
284
      return;
285
    } 
286
    j = 0;
287
    pattern[j] = '0';
288
          repl = strchr(buf, '/');
289
          replindex = 0;
290
          replcut = 0;
291
          if (repl) {
292
            char * index = strchr(repl + 1, ',');
293
            *repl = '\0';
294
            if (index) {
295
                char * index2 = strchr(index + 1, ',');
296
                *index = '\0';
297
                if (index2) {
298
                    *index2 = '\0';
299
                    replindex = (signed char) atoi(index + 1) - 1;
300
                    replcut = (signed char) atoi(index2 + 1);                
301
                }
302
            } else {
303
                hnj_strchomp(repl + 1);
304
                replindex = 0;
305
                replcut = (signed char) strlen(buf);
306
            }
307
            repl = hnj_strdup(repl + 1);
308
          }
309
    for (i = 0; (unsigned char)buf[i] > (unsigned char)' '; i++)
310
      {
311
        if (buf[i] >= '0' && buf[i] <= '9')
312
    pattern[j] = buf[i];
313
        else
314
    {
315
      word[j] = buf[i];
316
      pattern[++j] = '0';
317
    }
318
      }
319
    word[j] = '\0';
320
    pattern[j + 1] = '\0';
321
322
          i = 0;
323
    if (!repl) {
324
      /* Optimize away leading zeroes */
325
            for (; pattern[i] == '0'; i++);
326
          } else {
327
            if (*word == '.') i++;
328
            /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
329
            if (dict->utf8) {
330
                int pu = -1;        /* unicode character position */
331
                int ps = -1;        /* unicode start position (original replindex) */
332
                size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position */
333
                for (; pc < (strlen(word) + 1); pc++) {
334
                /* beginning of an UTF-8 character (not '10' start bits) */
335
                    if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
336
                    if ((ps < 0) && (replindex == pu)) {
337
                        ps = replindex;
338
                        replindex = (signed char) pc;
339
                    }
340
                    if ((ps >= 0) && ((pu - ps) == replcut)) {
341
                        replcut = (signed char) (pc - replindex);
342
                        break;
343
                    }
344
                }
345
                if (*word == '.') replindex--;
346
            }
347
          }
348
349
#ifdef VERBOSE
350
    printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, j, repl);
351
#endif
352
    found = hnj_hash_lookup (hashtab, word);
353
    state_num = hnj_get_state (dict, hashtab, word);
354
    dict->states[state_num].match = hnj_strdup (pattern + i);
355
    dict->states[state_num].repl = repl;
356
    dict->states[state_num].replindex = replindex;
357
          if (!replcut) {
358
            dict->states[state_num].replcut = (signed char) strlen(word);
359
          } else {
360
            dict->states[state_num].replcut = replcut;
361
          }
362
363
    /* now, put in the prefix transitions */
364
          for (; found < 0 && j > 0; --j)
365
      {
366
        last_state = state_num;
367
        ch = word[j - 1];
368
        word[j - 1] = '\0';
369
        found = hnj_hash_lookup (hashtab, word);
370
        state_num = hnj_get_state (dict, hashtab, word);
371
        hnj_add_trans (dict, state_num, last_state, ch);
372
      }
373
}
374
375
HyphenDict *
376
hnj_hyphen_load (const char *fn)
377
0
{
378
0
  HyphenDict *result;
379
0
  FILE *f;
380
0
  f = fopen (fn, "r");
381
0
  if (f == NULL)
382
0
    return NULL;
383
0
384
0
  result = hnj_hyphen_load_file(f);
385
0
386
0
  fclose(f);
387
0
  return result;
388
0
}
389
390
HyphenDict *
391
hnj_hyphen_load_file (FILE *f)
392
0
{
393
0
  HyphenDict *dict[2];
394
0
  HashTab *hashtab;
395
0
  char buf[MAX_CHARS];
396
0
  int nextlevel = 0;
397
0
  int i, j, k;
398
0
  HashEntry *e;
399
0
  int state_num = 0;
400
0
/* loading one or two dictionaries (separated by NEXTLEVEL keyword) */
401
0
for (k = 0; k < 2; k++) { 
402
0
  hashtab = hnj_hash_new ();
403
#ifdef VERBOSE
404
  global[k] = hashtab;
405
#endif
406
  hnj_hash_insert (hashtab, "", 0);
407
0
  dict[k] = (HyphenDict *) hnj_malloc (sizeof(HyphenDict));
408
0
  dict[k]->num_states = 1;
409
0
  dict[k]->states = (HyphenState *) hnj_malloc (sizeof(HyphenState));
410
0
  dict[k]->states[0].match = NULL;
411
0
  dict[k]->states[0].repl = NULL;
412
0
  dict[k]->states[0].fallback_state = -1;
413
0
  dict[k]->states[0].num_trans = 0;
414
0
  dict[k]->states[0].trans = NULL;
415
0
  dict[k]->nextlevel = NULL;
416
0
  dict[k]->lhmin = 0;
417
0
  dict[k]->rhmin = 0;
418
0
  dict[k]->clhmin = 0;
419
0
  dict[k]->crhmin = 0;
420
0
  dict[k]->nohyphen = NULL;
421
0
  dict[k]->nohyphenl = 0;
422
0
423
0
  /* read in character set info */
424
0
  if (k == 0) {
425
0
    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
426
0
    if (fgets(dict[k]->cset,  sizeof(dict[k]->cset),f) != NULL) {
427
0
      for (i=0;i<MAX_NAME;i++)
428
0
        if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
429
0
          dict[k]->cset[i] = 0;
430
0
    } else {
431
0
      dict[k]->cset[0] = 0;
432
0
    }
433
0
    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
434
0
  } else {
435
0
    strncpy(dict[k]->cset, dict[0]->cset, sizeof(dict[k]->cset)-1);
436
0
    dict[k]->cset[sizeof(dict[k]->cset)-1] = '\0';
437
0
    dict[k]->utf8 = dict[0]->utf8;
438
0
  }
439
0
440
0
  if (k == 0 || nextlevel) {
441
0
    while (fgets(buf, sizeof(buf), f) != NULL) {
442
0
      
443
0
      /* discard lines that don't fit in buffer */
444
0
      if (!feof(f) && strchr(buf, '\n') == NULL) {
445
0
        int c;
446
0
        while ((c = fgetc(f)) != '\n' && c != EOF);
447
0
        /* issue warning if not a comment */
448
0
        if (buf[0] != '%') {
449
0
          fprintf(stderr, "Warning: skipping too long pattern (more than %lu chars)\n", sizeof(buf));
450
0
        }
451
0
        continue;
452
0
      }
453
0
      
454
0
      if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
455
0
        nextlevel = 1;
456
0
        break;
457
0
      } else if (buf[0] != '%') {
458
0
        hnj_hyphen_load_line(buf, dict[k], hashtab);
459
0
      }
460
0
    }
461
0
  } else if (k == 1) {
462
0
    /* default first level: hyphen and ASCII apostrophe */
463
0
    if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN ',-\n", dict[k], hashtab);
464
0
    else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99,-\n", dict[k], hashtab);
465
0
    strncpy(buf, "1-1\n", MAX_CHARS-1); /* buf rewritten by hnj_hyphen_load here */
466
0
    buf[MAX_CHARS-1] = '\0';
467
0
    hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
468
0
    hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
469
0
    if (dict[0]->utf8) {
470
0
      hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
471
0
      hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
472
0
    }
473
0
  }
474
0
475
0
  /* Could do unioning of matches here (instead of the preprocessor script).
476
0
     If we did, the pseudocode would look something like this:
477
0
478
0
     foreach state in the hash table
479
0
        foreach i = [1..length(state) - 1]
480
0
           state to check is substr (state, i)
481
0
           look it up
482
0
           if found, and if there is a match, union the match in.
483
0
484
0
     It's also possible to avoid the quadratic blowup by doing the
485
0
     search in order of increasing state string sizes - then you
486
0
     can break the loop after finding the first match.
487
0
488
0
     This step should be optional in any case - if there is a
489
0
     preprocessed rule table, it's always faster to use that.
490
0
491
0
*/
492
0
493
0
  /* put in the fallback states */
494
0
  for (i = 0; i < HASH_SIZE; i++)
495
0
    for (e = hashtab->entries[i]; e; e = e->next)
496
0
      {
497
0
  if (*(e->key)) for (j = 1; 1; j++)
498
0
    {          
499
0
      state_num = hnj_hash_lookup (hashtab, e->key + j);
500
0
      if (state_num >= 0)
501
0
        break;
502
0
    }
503
0
        /* KBH: FIXME state 0 fallback_state should always be -1? */
504
0
  if (e->val)
505
0
    dict[k]->states[e->val].fallback_state = state_num;
506
0
      }
507
#ifdef VERBOSE
508
  for (i = 0; i < HASH_SIZE; i++)
509
    for (e = hashtab->entries[i]; e; e = e->next)
510
      {
511
  printf ("%d string %s state %d, fallback=%d\n", i, e->key, e->val,
512
    dict[k]->states[e->val].fallback_state);
513
  for (j = 0; j < dict[k]->states[e->val].num_trans; j++)
514
    printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch,
515
      dict[k]->states[e->val].trans[j].new_state);
516
      }
517
#endif
518
519
0
#ifndef VERBOSE
520
0
  hnj_hash_free (hashtab);
521
0
#endif
522
0
  state_num = 0;
523
0
}
524
0
  if (nextlevel) dict[0]->nextlevel = dict[1];
525
0
  else {
526
0
    dict[1] -> nextlevel = dict[0];
527
0
    dict[1]->lhmin = dict[0]->lhmin;
528
0
    dict[1]->rhmin = dict[0]->rhmin;
529
0
    dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
530
0
    dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
531
#ifdef VERBOSE
532
    HashTab *r = global[0];
533
    global[0] = global[1];
534
    global[1] = r;
535
#endif
536
    return dict[1];
537
0
  }
538
0
  return dict[0];
539
0
}
540
541
void hnj_hyphen_free (HyphenDict *dict)
542
0
{
543
0
  int state_num;
544
0
  HyphenState *hstate;
545
0
546
0
  for (state_num = 0; state_num < dict->num_states; state_num++)
547
0
    {
548
0
      hstate = &dict->states[state_num];
549
0
      if (hstate->match)
550
0
  hnj_free (hstate->match);
551
0
      if (hstate->repl)
552
0
  hnj_free (hstate->repl);
553
0
      if (hstate->trans)
554
0
  hnj_free (hstate->trans);
555
0
    }
556
0
  if (dict->nextlevel) hnj_hyphen_free(dict->nextlevel);
557
0
558
0
  if (dict->nohyphen) hnj_free(dict->nohyphen);
559
0
560
0
  hnj_free (dict->states);
561
0
562
0
  hnj_free (dict);
563
0
}
564
565
#define MAX_WORD 256
566
567
int hnj_hyphen_hyphenate (HyphenDict *dict,
568
         const char *word, int word_size,
569
         char *hyphens)
570
0
{
571
0
  char *prep_word;
572
0
  int i, j, k;
573
0
  int state;
574
0
  char ch;
575
0
  HyphenState *hstate;
576
0
  char *match;
577
0
  int offset;
578
0
579
0
  prep_word = (char*) hnj_malloc (word_size + 3);
580
0
581
0
  j = 0;
582
0
  prep_word[j++] = '.';
583
0
584
0
  for (i = 0; i < word_size; i++) {
585
0
    if (word[i] <= '9' && word[i] >= '0') {
586
0
      prep_word[j++] = '.';
587
0
    } else {
588
0
      prep_word[j++] = word[i];
589
0
    }
590
0
  }
591
0
592
0
  prep_word[j++] = '.';
593
0
  prep_word[j] = '\0';
594
0
595
0
  for (i = 0; i < word_size + 5; i++)
596
0
    hyphens[i] = '0';
597
0
598
#ifdef VERBOSE
599
  printf ("prep_word = %s\n", prep_word);
600
#endif
601
602
0
  /* now, run the finite state machine */
603
0
  state = 0;
604
0
  for (i = 0; i < j; i++)
605
0
    {
606
0
      ch = prep_word[i];
607
0
      for (;;)
608
0
  {
609
0
610
0
    if (state == -1) {
611
0
            /* return 1; */
612
0
      /*  KBH: FIXME shouldn't this be as follows? */
613
0
            state = 0;
614
0
            goto try_next_letter;
615
0
          }          
616
0
617
#ifdef VERBOSE
618
    char *state_str;
619
    state_str = get_state_str (state, 0);
620
621
    for (k = 0; k < i - strlen (state_str); k++)
622
      putchar (' ');
623
    printf ("%s", state_str);
624
#endif
625
626
0
    hstate = &dict->states[state];
627
0
    for (k = 0; k < hstate->num_trans; k++)
628
0
      if (hstate->trans[k].ch == ch)
629
0
        {
630
0
    state = hstate->trans[k].new_state;
631
0
    goto found_state;
632
0
        }
633
0
    state = hstate->fallback_state;
634
#ifdef VERBOSE
635
    printf (" falling back, fallback_state %d\n", state);
636
#endif
637
  }
638
0
    found_state:
639
#ifdef VERBOSE
640
      printf ("found state %d\n",state);
641
#endif
642
      /* Additional optimization is possible here - especially,
643
0
   elimination of trailing zeroes from the match. Leading zeroes
644
0
   have already been optimized. */
645
0
      match = dict->states[state].match;
646
0
      /* replacing rules not handled by hyphen_hyphenate() */
647
0
      if (match && !dict->states[state].repl)
648
0
  {
649
0
    offset = i + 1 - strlen (match);
650
#ifdef VERBOSE
651
    for (k = 0; k < offset; k++)
652
      putchar (' ');
653
    printf ("%s\n", match);
654
#endif
655
    /* This is a linear search because I tried a binary search and
656
0
       found it to be just a teeny bit slower. */
657
0
    for (k = 0; match[k]; k++)
658
0
      if (hyphens[offset + k] < match[k])
659
0
        hyphens[offset + k] = match[k];
660
0
  }
661
0
662
0
      /* KBH: we need this to make sure we keep looking in a word */
663
0
      /* for patterns even if the current character is not known in state 0 */
664
0
      /* since patterns for hyphenation may occur anywhere in the word */
665
0
      try_next_letter: ;
666
0
667
0
    }
668
#ifdef VERBOSE
669
  for (i = 0; i < j; i++)
670
    putchar (hyphens[i]);
671
  putchar ('\n');
672
#endif
673
674
0
  for (i = 0; i < j - 4; i++)
675
#if 0
676
    if (hyphens[i + 1] & 1)
677
      hyphens[i] = '-';
678
#else
679
0
    hyphens[i] = hyphens[i + 1];
680
0
#endif
681
0
  hyphens[0] = '0';
682
0
  for (; i < word_size; i++)
683
0
    hyphens[i] = '0';
684
0
  hyphens[word_size] = '\0';
685
0
686
0
  hnj_free (prep_word);
687
0
    
688
0
  return 0;    
689
0
}
690
691
/* Unicode ligature length */
692
0
int hnj_ligature(unsigned char c) {
693
0
    switch (c) {
694
0
        case 0x80:      /* ff */
695
0
        case 0x81:      /* fi */
696
0
        case 0x82: return LIG_xx; /* fl */
697
0
        case 0x83:      /* ffi */
698
0
        case 0x84: return LIG_xxx; /* ffl */
699
0
        case 0x85:      /* long st */
700
0
        case 0x86: return LIG_xx; /* st */
701
0
    }
702
0
    return 0;
703
0
}
704
705
/* character length of the first n byte of the input word */
706
int hnj_hyphen_strnlen(const char * word, int n, int utf8)
707
0
{
708
0
    int i = 0;
709
0
    int j = 0;
710
0
    while (j < n && word[j] != '\0') {
711
0
      i++;
712
0
      /* Unicode ligature support */
713
0
      if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC))  {
714
0
        i += hnj_ligature(word[j + 2]);
715
0
      }
716
0
      for (j++; utf8 && (word[j] & 0xc0) == 0x80; j++);
717
0
    }
718
0
    return i;
719
0
}
720
721
int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens,
722
  char *** rep, int ** pos, int ** cut, int lhmin)
723
0
{
724
0
    int i = 1, j;
725
0
726
0
    /* Unicode ligature support */
727
0
    if (utf8 && ((unsigned char) word[0] == 0xEF) && ((unsigned char) word[1] == 0xAC))  {
728
0
      i += hnj_ligature(word[2]);
729
0
    }
730
0
731
0
    /* ignore numbers */
732
0
    for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
733
0
734
0
    for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
735
0
      /* check length of the non-standard part */
736
0
      if (*rep && *pos && *cut && (*rep)[j]) {
737
0
        char * rh = strchr((*rep)[j], '=');
738
0
        if (rh && (hnj_hyphen_strnlen(word, j - (*pos)[j] + 1, utf8) +
739
0
          hnj_hyphen_strnlen((*rep)[j], rh - (*rep)[j], utf8)) < lhmin) {
740
0
            free((*rep)[j]);
741
0
            (*rep)[j] = NULL;
742
0
            hyphens[j] = '0';
743
0
          }
744
0
       } else {
745
0
         hyphens[j] = '0';
746
0
       }
747
0
       j++;
748
0
749
0
       /* Unicode ligature support */
750
0
       if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC))  {
751
0
         i += hnj_ligature(word[j + 2]);
752
0
       }
753
0
    } while (utf8 && (word[j] & 0xc0) == 0x80);
754
0
    return 0;
755
0
}
756
757
int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
758
  char *** rep, int ** pos, int ** cut, int rhmin)
759
0
{
760
0
    int i = 0;
761
0
    int j;
762
0
763
0
    /* ignore numbers */
764
0
    for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
765
0
766
0
    for (j = word_size - 1; i < rhmin && j > 0; j--) {
767
0
      /* check length of the non-standard part */
768
0
      if (*rep && *pos && *cut && (*rep)[j]) {
769
0
        char * rh = strchr((*rep)[j], '=');
770
0
        if (rh && (hnj_hyphen_strnlen(word + j - (*pos)[j] + (*cut)[j] + 1, 100, utf8) +
771
0
          hnj_hyphen_strnlen(rh + 1, strlen(rh + 1), utf8)) < rhmin) {
772
0
            free((*rep)[j]);
773
0
            (*rep)[j] = NULL;
774
0
            hyphens[j] = '0';
775
0
          }
776
0
       } else {
777
0
         hyphens[j] = '0';
778
0
       }
779
0
       if (!utf8 || (word[j] & 0xc0) == 0xc0 || (word[j] & 0x80) != 0x80) i++;
780
0
    }
781
0
    return 0;
782
0
}
783
784
/* recursive function for compound level hyphenation */
785
int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
786
    char * hyphens, char *** rep, int ** pos, int ** cut,
787
    int clhmin, int crhmin, int lend, int rend)
788
0
{
789
0
  char *prep_word;
790
0
  int i, j, k;
791
0
  int state;
792
0
  char ch;
793
0
  HyphenState *hstate;
794
0
  char *match;
795
0
  char *repl;
796
0
  signed char replindex;
797
0
  signed char replcut;
798
0
  int offset;
799
0
  int * matchlen;
800
0
  int * matchindex;
801
0
  char ** matchrepl;  
802
0
  int isrepl = 0;
803
0
  int nHyphCount;
804
0
805
0
  size_t prep_word_size = word_size + 3;
806
0
  prep_word = (char*) hnj_malloc (prep_word_size);
807
0
  matchlen = (int*) hnj_malloc ((word_size + 3) * sizeof(int));
808
0
  matchindex = (int*) hnj_malloc ((word_size + 3) * sizeof(int));
809
0
  matchrepl = (char**) hnj_malloc ((word_size + 3) * sizeof(char *));
810
0
811
0
  j = 0;
812
0
  prep_word[j++] = '.';
813
0
  
814
0
  for (i = 0; i < word_size; i++) {
815
0
    if (word[i] <= '9' && word[i] >= '0') {
816
0
      prep_word[j++] = '.';
817
0
    } else {
818
0
      prep_word[j++] = word[i];
819
0
    }
820
0
  }
821
0
822
0
823
0
824
0
  prep_word[j++] = '.';
825
0
  prep_word[j] = '\0';
826
0
827
0
  for (i = 0; i < j; i++)
828
0
    hyphens[i] = '0';    
829
0
830
#ifdef VERBOSE
831
  printf ("prep_word = %s\n", prep_word);
832
#endif
833
834
0
  /* now, run the finite state machine */
835
0
  state = 0;
836
0
  for (i = 0; i < j; i++)
837
0
    {
838
0
      ch = prep_word[i];
839
0
      for (;;)
840
0
  {
841
0
842
0
    if (state == -1) {
843
0
            /* return 1; */
844
0
      /*  KBH: FIXME shouldn't this be as follows? */
845
0
            state = 0;
846
0
            goto try_next_letter;
847
0
          }          
848
0
849
#ifdef VERBOSE
850
    char *state_str;
851
    state_str = get_state_str (state, 1);
852
853
    for (k = 0; k < i - strlen (state_str); k++)
854
      putchar (' ');
855
    printf ("%s", state_str);
856
#endif
857
858
0
    hstate = &dict->states[state];
859
0
    for (k = 0; k < hstate->num_trans; k++)
860
0
      if (hstate->trans[k].ch == ch)
861
0
        {
862
0
    state = hstate->trans[k].new_state;
863
0
    goto found_state;
864
0
        }
865
0
    state = hstate->fallback_state;
866
#ifdef VERBOSE
867
    printf (" falling back, fallback_state %d\n", state);
868
#endif
869
  }
870
0
    found_state:
871
#ifdef VERBOSE
872
      printf ("found state %d\n",state);
873
#endif
874
      /* Additional optimization is possible here - especially,
875
0
   elimination of trailing zeroes from the match. Leading zeroes
876
0
   have already been optimized. */
877
0
      match = dict->states[state].match;
878
0
      repl = dict->states[state].repl;
879
0
      replindex = dict->states[state].replindex;
880
0
      replcut = dict->states[state].replcut;
881
0
      /* replacing rules not handled by hyphen_hyphenate() */
882
0
      if (match)
883
0
  {
884
0
    offset = i + 1 - strlen (match);
885
#ifdef VERBOSE
886
    for (k = 0; k < offset; k++)
887
      putchar (' ');
888
    printf ("%s (%s)\n", match, repl);
889
#endif
890
0
          if (repl) {
891
0
            if (!isrepl) for(; isrepl < word_size; isrepl++) {
892
0
                matchrepl[isrepl] = NULL;
893
0
                matchindex[isrepl] = -1;
894
0
            }
895
0
            matchlen[offset + replindex] = replcut;
896
0
          }
897
0
    /* This is a linear search because I tried a binary search and
898
0
       found it to be just a teeny bit slower. */
899
0
    for (k = 0; match[k]; k++) {
900
0
      if ((hyphens[offset + k] < match[k])) {
901
0
        hyphens[offset + k] = match[k];
902
0
              if (match[k]&1) {
903
0
                matchrepl[offset + k] = repl;
904
0
                if (repl && (k >= replindex) && (k <= replindex + replcut)) {
905
0
                    matchindex[offset + replindex] = offset + k;
906
0
                }
907
0
              }
908
0
            }
909
0
          }
910
0
          
911
0
  }
912
0
913
0
      /* KBH: we need this to make sure we keep looking in a word */
914
0
      /* for patterns even if the current character is not known in state 0 */
915
0
      /* since patterns for hyphenation may occur anywhere in the word */
916
0
      try_next_letter: ;
917
0
918
0
    }
919
#ifdef VERBOSE
920
  for (i = 0; i < j; i++)
921
    putchar (hyphens[i]);
922
  putchar ('\n');
923
#endif
924
925
0
  for (i = 0; i < j - 3; i++)
926
#if 0
927
    if (hyphens[i + 1] & 1)
928
      hyphens[i] = '-';
929
#else
930
0
    hyphens[i] = hyphens[i + 1];
931
0
#endif
932
0
  for (; i < word_size; i++)
933
0
    hyphens[i] = '0';
934
0
  hyphens[word_size] = '\0';
935
0
936
0
       /* now create a new char string showing hyphenation positions */
937
0
       /* count the hyphens and allocate space for the new hyphenated string */
938
0
       nHyphCount = 0;
939
0
       for (i = 0; i < word_size; i++)
940
0
          if (hyphens[i]&1)
941
0
             nHyphCount++;
942
0
       j = 0;
943
0
       for (i = 0; i < word_size; i++) {
944
0
           if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) { 
945
0
                if (rep && pos && cut) {
946
0
                    if (!*rep)
947
0
                        *rep = (char **) calloc(word_size, sizeof(char *));
948
0
                    if (!*pos)
949
0
                        *pos = (int *) calloc(word_size, sizeof(int));
950
0
                    if (!*cut) {
951
0
                        *cut = (int *) calloc(word_size, sizeof(int));
952
0
                    }
953
0
                    (*rep)[matchindex[i] - 1] = hnj_strdup(matchrepl[matchindex[i]]);
954
0
                    (*pos)[matchindex[i] - 1] = matchindex[i] - i;
955
0
                    (*cut)[matchindex[i] - 1] = matchlen[i];
956
0
                }
957
0
                j += strlen(matchrepl[matchindex[i]]);
958
0
                i += matchlen[i] - 1;
959
0
          }
960
0
       }
961
0
962
0
  hnj_free (matchrepl);
963
0
  hnj_free (matchlen);
964
0
  hnj_free (matchindex);
965
0
966
0
  /* recursive hyphenation of the first (compound) level segments */
967
0
  if (dict->nextlevel) {
968
0
     char ** rep2;
969
0
     int * pos2;
970
0
     int * cut2;
971
0
     char * hyphens2;
972
0
     int begin = 0;
973
0
974
0
     rep2 = (char**) hnj_malloc (word_size * sizeof(char *));
975
0
     pos2 = (int*) hnj_malloc (word_size * sizeof(int));
976
0
     cut2 = (int*) hnj_malloc (word_size * sizeof(int));
977
0
     hyphens2 = (char*) hnj_malloc (word_size + 3);
978
0
     for (i = 0; i < word_size; i++) rep2[i] = NULL;
979
0
     for (i = 0; i < word_size; i++) if 
980
0
        (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
981
0
        if (i - begin > 0) {
982
0
            int hyph = 0;
983
0
            prep_word[i + 2] = '\0';
984
0
            /* non-standard hyphenation at compound boundary (Schiffahrt) */
985
0
            if (rep && *rep && *pos && *cut && (*rep)[i]) {
986
0
                char * l = strchr((*rep)[i], '=');
987
0
                size_t offset = 2 + i - (*pos)[i];
988
0
                strncpy(prep_word + offset, (*rep)[i], prep_word_size - offset - 1);
989
0
                prep_word[prep_word_size - 1] = '\0';
990
0
                if (l) {
991
0
                    hyph = (l - (*rep)[i]) - (*pos)[i];
992
0
                    prep_word[2 + i + hyph] = '\0';
993
0
                }
994
0
            }
995
0
            hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph,
996
0
                hyphens2, &rep2, &pos2, &cut2, clhmin,
997
0
                crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend));
998
0
            for (j = 0; j < i - begin; j++) {
999
0
                hyphens[begin + j] = hyphens2[j];
1000
0
                if (rep2[j] && rep && pos && cut) {
1001
0
                    if (!*rep && !*pos && !*cut) {
1002
0
                        int k;
1003
0
                        *rep = (char **) malloc(sizeof(char *) * word_size);
1004
0
                        *pos = (int *) malloc(sizeof(int) * word_size);
1005
0
                        *cut = (int *) malloc(sizeof(int) * word_size);
1006
0
                        for (k = 0; k < word_size; k++) {
1007
0
                            (*rep)[k] = NULL;
1008
0
                            (*pos)[k] = 0;
1009
0
                            (*cut)[k] = 0;
1010
0
                        }
1011
0
                    }
1012
0
                    (*rep)[begin + j] = rep2[j];
1013
0
                    (*pos)[begin + j] = pos2[j];
1014
0
                    (*cut)[begin + j] = cut2[j];
1015
0
                }
1016
0
            }
1017
0
            prep_word[i + 2] = word[i + 1];
1018
0
            if (*rep && *pos && *cut && (*rep)[i]) {
1019
0
                size_t offset = 1;
1020
0
                strncpy(prep_word + offset, word, prep_word_size - offset - 1);
1021
0
                prep_word[prep_word_size - 1] = '\0';
1022
0
            }
1023
0
        }
1024
0
        begin = i + 1;
1025
0
        for (j = 0; j < word_size; j++) rep2[j] = NULL;
1026
0
     }
1027
0
     
1028
0
     /* non-compound */
1029
0
     if (begin == 0) {
1030
0
        hnj_hyphen_hyph_(dict->nextlevel, word, word_size,
1031
0
            hyphens, rep, pos, cut, clhmin, crhmin, lend, rend);
1032
0
        if (!lend) hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
1033
0
            rep, pos, cut, clhmin);
1034
0
        if (!rend) hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
1035
0
            rep, pos, cut, crhmin);
1036
0
     }
1037
0
     
1038
0
     free(rep2);
1039
0
     free(cut2);
1040
0
     free(pos2);
1041
0
     free(hyphens2);
1042
0
  }
1043
0
1044
0
  hnj_free (prep_word);
1045
0
  return 0;
1046
0
}
1047
1048
/* UTF-8 normalization of hyphen and non-standard positions */
1049
int hnj_hyphen_norm(const char *word, int word_size, char * hyphens,
1050
  char *** rep, int ** pos, int ** cut)
1051
0
{
1052
0
  int i, j, k;
1053
0
  if ((((unsigned char) word[0]) >> 6) == 2) {
1054
0
    fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word);
1055
0
    return 1;
1056
0
  }
1057
0
1058
0
  /* calculate UTF-8 character positions */
1059
0
  for (i = 0, j = -1; i < word_size; i++) {
1060
0
    /* beginning of an UTF-8 character (not '10' start bits) */
1061
0
    if ((((unsigned char) word[i]) >> 6) != 2) j++;
1062
0
    hyphens[j] = hyphens[i];
1063
0
    if (rep && pos && cut && *rep && *pos && *cut) {
1064
0
        int l = (*pos)[i];
1065
0
        (*pos)[j] = 0;
1066
0
        for (k = 0; k < l; k++) {
1067
0
            if ((((unsigned char) word[i - k]) >> 6) != 2) (*pos)[j]++;
1068
0
        }
1069
0
        k = i - l + 1;
1070
0
        l = k + (*cut)[i];
1071
0
        (*cut)[j] = 0;        
1072
0
        for (; k < l; k++) {
1073
0
            if ((((unsigned char) word[k]) >> 6) != 2) (*cut)[j]++;
1074
0
        }
1075
0
        (*rep)[j] = (*rep)[i];
1076
0
        if (j < i) {
1077
0
            (*rep)[i] = NULL;
1078
0
            (*pos)[i] = 0;
1079
0
            (*cut)[i] = 0;
1080
0
        }
1081
0
    }
1082
0
  }
1083
0
  hyphens[j + 1] = '\0';
1084
#ifdef VERBOSE
1085
  printf ("nums: %s\n", hyphens);
1086
#endif
1087
  return 0;
1088
0
}
1089
1090
/* get the word with all possible hyphenations (output: hyphword) */
1091
void hnj_hyphen_hyphword(const char * word, int word_size, const char * hyphens,
1092
    char * hyphword, char *** rep, int ** pos, int ** cut)
1093
0
{
1094
0
  
1095
0
  if (word_size <= 0 || word_size > INT_MAX / 2) {
1096
0
    hyphword[0] = '\0';
1097
0
    return;
1098
0
  }
1099
0
  
1100
0
  /* hyphword buffer size must be at least 2 * l */
1101
0
  int hyphword_size = 2 * word_size - 1;
1102
0
1103
0
  int nonstandard = 0;
1104
0
  if (*rep && *pos && *cut) {
1105
0
    nonstandard = 1;
1106
0
  }
1107
0
1108
0
  int i;
1109
0
  int j = 0;
1110
0
  for (i = 0; i < word_size && j < hyphword_size; i++) {
1111
0
    hyphword[j++] = word[i];
1112
0
    if (hyphens[i]&1 && j < hyphword_size) {
1113
0
      if (nonstandard && (*rep)[i] && j >= (*pos)[i]) {
1114
0
        /* non-standard */
1115
0
        j -= (*pos)[i];
1116
0
        char *s = (*rep)[i];
1117
0
        while (*s && j < hyphword_size) {
1118
0
          hyphword[j++] = *s++;
1119
0
        }
1120
0
        i += (*cut)[i] - (*pos)[i];
1121
0
      } else {
1122
0
        /* standard */
1123
0
        hyphword[j++] = '=';
1124
0
      }
1125
0
    }
1126
0
  }
1127
0
  hyphword[j] = '\0';
1128
0
}
1129
1130
1131
/* main api function with default hyphenmin parameters */
1132
int hnj_hyphen_hyphenate2 (HyphenDict *dict,
1133
         const char *word, int word_size, char * hyphens,
1134
         char *hyphword, char *** rep, int ** pos, int ** cut)
1135
0
{
1136
0
  hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
1137
0
    dict->clhmin, dict->crhmin, 1, 1);
1138
0
  hnj_hyphen_lhmin(dict->utf8, word, word_size,
1139
0
    hyphens, rep, pos, cut, (dict->lhmin > 0 ? dict->lhmin : 2));
1140
0
  hnj_hyphen_rhmin(dict->utf8, word, word_size,
1141
0
    hyphens, rep, pos, cut, (dict->rhmin > 0 ? dict->rhmin : 2));
1142
0
1143
0
  /* nohyphen */
1144
0
  if (dict->nohyphen) {
1145
0
    char * nh = dict->nohyphen;
1146
0
    int nhi;
1147
0
    for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
1148
0
        char * nhy = (char *) strstr(word, nh);
1149
0
        while (nhy) {
1150
0
            hyphens[nhy - word + strlen(nh) - 1] = '0';
1151
0
            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = '0';
1152
0
            nhy = (char *) strstr(nhy + 1, nh);
1153
0
        }
1154
0
        nh = nh + strlen(nh) + 1;
1155
0
    }
1156
0
  }
1157
0
1158
0
  if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
1159
0
  if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
1160
#ifdef VERBOSE
1161
  printf ("nums: %s\n", hyphens);
1162
#endif
1163
0
  return 0;
1164
0
}
1165
1166
/* previous main api function with hyphenmin parameters */
1167
int hnj_hyphen_hyphenate3 (HyphenDict *dict,
1168
  const char *word, int word_size, char * hyphens,
1169
  char *hyphword, char *** rep, int ** pos, int ** cut,
1170
  int lhmin, int rhmin, int clhmin, int crhmin)
1171
0
{
1172
0
  lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
1173
0
  rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
1174
0
  clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
1175
0
  crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
1176
0
  hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
1177
0
    clhmin, crhmin, 1, 1);
1178
0
  hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
1179
0
    rep, pos, cut, (lhmin > 0 ? lhmin : 2));
1180
0
  hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
1181
0
    rep, pos, cut, (rhmin > 0 ? rhmin : 2));
1182
0
  if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
1183
0
1184
0
  /* nohyphen */
1185
0
  if (dict->nohyphen) {
1186
0
    char * nh = dict->nohyphen;
1187
0
    int nhi;
1188
0
    for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
1189
0
        char * nhy = (char *) strstr(word, nh);
1190
0
        while (nhy) {
1191
0
            hyphens[nhy - word + strlen(nh) - 1] = 0;
1192
0
            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = 0;
1193
0
            nhy = (char *) strstr(nhy + 1, nh);
1194
0
        }
1195
0
        nh = nh + strlen(nh) + 1;
1196
0
    }
1197
0
  }
1198
0
1199
0
  if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
1200
0
  return 0;
1201
0
}