Coverage Report

Created: 2025-07-07 10:01

/work/workdir/UnpackedTarball/hyphen/hyphen.c
Line
Count
Source (jump to first uncovered line)
1
/* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
2
 * licenses follows.
3
 */
4
5
/* LibHnj - a library for high quality hyphenation and justification
6
 * Copyright (C) 1998 Raph Levien, 
7
 *       (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), 
8
 *           (C) 2001 Peter Novodvorsky (nidd@cs.msu.su)
9
 *           (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo)
10
 *
11
 * This library is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Library General Public
13
 * License as published by the Free Software Foundation; either
14
 * version 2 of the License, or (at your option) any later version.
15
 *
16
 * This library is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Library General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Library General Public
22
 * License along with this library; if not, write to the 
23
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
24
 * Boston, MA  02111-1307  USA.
25
*/
26
27
/*
28
 * The contents of this file are subject to the Mozilla Public License
29
 * Version 1.0 (the "MPL"); you may not use this file except in
30
 * compliance with the MPL.  You may obtain a copy of the MPL at
31
 * http://www.mozilla.org/MPL/
32
 *
33
 * Software distributed under the MPL is distributed on an "AS IS" basis,
34
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
35
 * for the specific language governing rights and limitations under the
36
 * MPL.
37
 *
38
 */
39
#include <stdlib.h> /* for NULL, malloc */
40
#include <stdio.h>  /* for fprintf */
41
#include <string.h> /* for strdup */
42
43
#ifdef UNX
44
#include <unistd.h> /* for exit */
45
#endif
46
47
#ifdef _WIN32
48
#include <windows.h>
49
#include <wchar.h>
50
#endif
51
52
#define noVERBOSE
53
54
/* calculate hyphenmin values with long ligature length (2 or 3 characters
55
 * instead of 1 or 2) for comparison with hyphenation without ligatures */
56
#define noLONG_LIGATURE
57
58
#ifdef LONG_LIGATURE
59
#define LIG_xx  1
60
#define LIG_xxx 2
61
#else
62
0
#define LIG_xx  0
63
0
#define LIG_xxx 1
64
#endif
65
66
#include "hnjalloc.h"
67
#include "hyphen.h"
68
69
static char *
70
hnj_strdup (const char *s)
71
0
{
72
0
  char *newstr;
73
0
  int l;
74
75
0
  l = strlen (s);
76
0
  newstr = (char *) hnj_malloc (l + 1);
77
0
  memcpy (newstr, s, l);
78
0
  newstr[l] = 0;
79
0
  return newstr;
80
0
}
81
82
/* remove cross-platform text line end characters */
83
void hnj_strchomp(char * s)
84
0
{
85
0
  int k = strlen(s);
86
0
  if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
87
0
  if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
88
0
}
89
90
/* a little bit of a hash table implementation. This simply maps strings
91
   to state numbers */
92
93
typedef struct _HashTab HashTab;
94
typedef struct _HashEntry HashEntry;
95
96
/* A cheap, but effective, hack. */
97
0
#define HASH_SIZE 31627
98
99
struct _HashTab {
100
  HashEntry *entries[HASH_SIZE];
101
};
102
103
struct _HashEntry {
104
  HashEntry *next;
105
  char *key;
106
  int val;
107
};
108
109
/* a char* hash function from ASU - adapted from Gtk+ */
110
static unsigned int
111
hnj_string_hash (const char *s)
112
0
{
113
0
  const char *p;
114
0
  unsigned int h=0, g;
115
0
  for(p = s; *p != '\0'; p += 1) {
116
0
    h = ( h << 4 ) + *p;
117
0
    if ( ( g = h & 0xf0000000 ) ) {
118
0
      h = h ^ (g >> 24);
119
0
      h = h ^ g;
120
0
    }
121
0
  }
122
0
  return h /* % M */;
123
0
}
124
125
static HashTab *
126
hnj_hash_new (void)
127
0
{
128
0
  HashTab *hashtab;
129
0
  int i;
130
131
0
  hashtab = (HashTab *) hnj_malloc (sizeof(HashTab));
132
0
  for (i = 0; i < HASH_SIZE; i++)
133
0
    hashtab->entries[i] = NULL;
134
135
0
  return hashtab;
136
0
}
137
138
static void
139
hnj_hash_free (HashTab *hashtab)
140
0
{
141
0
  int i;
142
0
  HashEntry *e, *next;
143
144
0
  for (i = 0; i < HASH_SIZE; i++)
145
0
    for (e = hashtab->entries[i]; e; e = next)
146
0
      {
147
0
  next = e->next;
148
0
  hnj_free (e->key);
149
0
  hnj_free (e);
150
0
      }
151
152
0
  hnj_free (hashtab);
153
0
}
154
155
/* assumes that key is not already present! */
156
static void
157
hnj_hash_insert (HashTab *hashtab, const char *key, int val)
158
0
{
159
0
  int i;
160
0
  HashEntry *e;
161
162
0
  i = hnj_string_hash (key) % HASH_SIZE;
163
0
  e = (HashEntry *) hnj_malloc (sizeof(HashEntry));
164
0
  e->next = hashtab->entries[i];
165
0
  e->key = hnj_strdup (key);
166
0
  e->val = val;
167
0
  hashtab->entries[i] = e;
168
0
}
169
170
/* return val if found, otherwise -1 */
171
static int
172
hnj_hash_lookup (HashTab *hashtab, const char *key)
173
0
{
174
0
  int i;
175
0
  HashEntry *e;
176
0
  i = hnj_string_hash (key) % HASH_SIZE;
177
0
  for (e = hashtab->entries[i]; e; e = e->next)
178
0
    if (!strcmp (key, e->key))
179
0
      return e->val;
180
0
  return -1;
181
0
}
182
183
/* Get the state number, allocating a new state if necessary. */
184
static int
185
hnj_get_state (HyphenDict *dict, HashTab *hashtab, const char *string)
186
0
{
187
0
  int state_num;
188
189
0
  state_num = hnj_hash_lookup (hashtab, string);
190
191
0
  if (state_num >= 0)
192
0
    return state_num;
193
194
0
  hnj_hash_insert (hashtab, string, dict->num_states);
195
  /* predicate is true if dict->num_states is a power of two */
196
0
  if (!(dict->num_states & (dict->num_states - 1)))
197
0
    {
198
0
      dict->states = (HyphenState *) hnj_realloc (dict->states,
199
0
          (dict->num_states << 1) *
200
0
          sizeof(HyphenState));
201
0
    }
202
0
  dict->states[dict->num_states].match = NULL;
203
0
  dict->states[dict->num_states].repl = NULL;
204
0
  dict->states[dict->num_states].fallback_state = -1;
205
0
  dict->states[dict->num_states].num_trans = 0;
206
0
  dict->states[dict->num_states].trans = NULL;
207
0
  return dict->num_states++;
208
0
}
209
210
/* add a transition from state1 to state2 through ch - assumes that the
211
   transition does not already exist */
212
static void
213
hnj_add_trans (HyphenDict *dict, int state1, int state2, char ch)
214
0
{
215
0
  int num_trans;
216
217
0
  num_trans = dict->states[state1].num_trans;
218
0
  if (num_trans == 0)
219
0
    {
220
0
      dict->states[state1].trans = (HyphenTrans *) hnj_malloc (sizeof(HyphenTrans));
221
0
    }
222
0
  else if (!(num_trans & (num_trans - 1)))
223
0
    {
224
0
      dict->states[state1].trans = (HyphenTrans *) hnj_realloc (dict->states[state1].trans,
225
0
            (num_trans << 1) *
226
0
            sizeof(HyphenTrans));
227
0
    }
228
0
  dict->states[state1].trans[num_trans].ch = ch;
229
0
  dict->states[state1].trans[num_trans].new_state = state2;
230
0
  dict->states[state1].num_trans++;
231
0
}
232
233
#ifdef VERBOSE
234
HashTab *global[1];
235
236
static char *
237
get_state_str (int state, int level)
238
{
239
  int i;
240
  HashEntry *e;
241
242
  for (i = 0; i < HASH_SIZE; i++)
243
    for (e = global[level]->entries[i]; e; e = e->next)
244
      if (e->val == state)
245
  return e->key;
246
  return NULL;
247
}
248
#endif
249
250
0
void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
251
0
  int i, j;
252
0
  char word[MAX_CHARS];
253
0
  char pattern[MAX_CHARS];
254
0
  char * repl;
255
0
  signed char replindex;
256
0
  signed char replcut;
257
0
  int state_num = 0;
258
0
  int last_state;
259
0
  char ch;
260
0
  int found;
261
262
0
    if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
263
0
      dict->lhmin = atoi(buf + 13);
264
0
      return;
265
0
    } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
266
0
      dict->rhmin = atoi(buf + 14);
267
0
      return;
268
0
    } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
269
0
      dict->clhmin = atoi(buf + 21);
270
0
      return;
271
0
    } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
272
0
      dict->crhmin = atoi(buf + 22);
273
0
      return;
274
0
    } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
275
0
      char * space = buf + 8;
276
0
      while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
277
0
      if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
278
0
      if (dict->nohyphen) {
279
0
          char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
280
0
          *nhe = 0;
281
0
          for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
282
0
                  if (*nhe == ',') {
283
0
                      dict->nohyphenl++;
284
0
                      *nhe = 0;
285
0
                  }
286
0
          }
287
0
      }
288
0
      return;
289
0
    } 
290
0
    j = 0;
291
0
    pattern[j] = '0';
292
0
          repl = strchr(buf, '/');
293
0
          replindex = 0;
294
0
          replcut = 0;
295
0
          if (repl) {
296
0
            char * index = strchr(repl + 1, ',');
297
0
            *repl = '\0';
298
0
            if (index) {
299
0
                char * index2 = strchr(index + 1, ',');
300
0
                *index = '\0';
301
0
                if (index2) {
302
0
                    *index2 = '\0';
303
0
                    replindex = (signed char) atoi(index + 1) - 1;
304
0
                    replcut = (signed char) atoi(index2 + 1);                
305
0
                }
306
0
            } else {
307
0
                hnj_strchomp(repl + 1);
308
0
                replindex = 0;
309
0
                replcut = (signed char) strlen(buf);
310
0
            }
311
0
            repl = hnj_strdup(repl + 1);
312
0
          }
313
0
    for (i = 0; ((buf[i] > ' ') || (buf[i] < 0)); i++)
314
0
      {
315
0
        if (buf[i] >= '0' && buf[i] <= '9')
316
0
    pattern[j] = buf[i];
317
0
        else
318
0
    {
319
0
      word[j] = buf[i];
320
0
      pattern[++j] = '0';
321
0
    }
322
0
      }
323
0
    word[j] = '\0';
324
0
    pattern[j + 1] = '\0';
325
326
0
          i = 0;
327
0
    if (!repl) {
328
      /* Optimize away leading zeroes */
329
0
            for (; pattern[i] == '0'; i++);
330
0
          } else {
331
0
            if (*word == '.') i++;
332
            /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
333
0
            if (dict->utf8) {
334
0
                int pu = -1;        /* unicode character position */
335
0
                int ps = -1;        /* unicode start position (original replindex) */
336
0
                int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
337
0
                for (; pc < (strlen(word) + 1); pc++) {
338
                /* beginning of an UTF-8 character (not '10' start bits) */
339
0
                    if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
340
0
                    if ((ps < 0) && (replindex == pu)) {
341
0
                        ps = replindex;
342
0
                        replindex = (signed char) pc;
343
0
                    }
344
0
                    if ((ps >= 0) && ((pu - ps) == replcut)) {
345
0
                        replcut = (signed char) (pc - replindex);
346
0
                        break;
347
0
                    }
348
0
                }
349
0
                if (*word == '.') replindex--;
350
0
            }
351
0
          }
352
353
#ifdef VERBOSE
354
    printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, j, repl);
355
#endif
356
0
    found = hnj_hash_lookup (hashtab, word);
357
0
    state_num = hnj_get_state (dict, hashtab, word);
358
0
    dict->states[state_num].match = hnj_strdup (pattern + i);
359
0
    dict->states[state_num].repl = repl;
360
0
    dict->states[state_num].replindex = replindex;
361
0
          if (!replcut) {
362
0
            dict->states[state_num].replcut = (signed char) strlen(word);
363
0
          } else {
364
0
            dict->states[state_num].replcut = replcut;
365
0
          }
366
367
    /* now, put in the prefix transitions */
368
0
          for (; found < 0 && j > 0; --j)
369
0
      {
370
0
        last_state = state_num;
371
0
        ch = word[j - 1];
372
0
        word[j - 1] = '\0';
373
0
        found = hnj_hash_lookup (hashtab, word);
374
0
        state_num = hnj_get_state (dict, hashtab, word);
375
0
        hnj_add_trans (dict, state_num, last_state, ch);
376
0
      }
377
0
}
378
379
0
FILE * hnj_fopen(const char * path, const char * mode) {
380
#ifdef _WIN32
381
#define WIN32_LONG_PATH_PREFIX "\\\\?\\"
382
    if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
383
        int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
384
        wchar_t *buff = (wchar_t *) malloc(len * sizeof(wchar_t));
385
        wchar_t *buff2 = (wchar_t *) malloc(len * sizeof(wchar_t));
386
        FILE * f = NULL;
387
        MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
388
        if (_wfullpath( buff2, buff, len ) != NULL) {
389
          f = _wfopen(buff2, (strcmp(mode, "r") == 0) ? L"r" : L"rb");
390
        }
391
        free(buff);
392
        free(buff2);
393
        return f;
394
    }
395
#endif
396
0
    return fopen(path, mode);
397
0
}
398
399
HyphenDict *
400
hnj_hyphen_load (const char *fn)
401
0
{
402
0
  HyphenDict *result;
403
0
  FILE *f;
404
0
  f = hnj_fopen (fn, "r");
405
0
  if (f == NULL)
406
0
    return NULL;
407
408
0
  result = hnj_hyphen_load_file(f);
409
410
0
  fclose(f);
411
0
  return result;
412
0
}
413
414
HyphenDict *
415
hnj_hyphen_load_file (FILE *f)
416
0
{
417
0
  HyphenDict *dict[2];
418
0
  HashTab *hashtab;
419
0
  char buf[MAX_CHARS];
420
0
  int nextlevel = 0;
421
0
  int i, j, k;
422
0
  HashEntry *e;
423
0
  int state_num = 0;
424
// loading one or two dictionaries (separated by NEXTLEVEL keyword)
425
0
for (k = 0; k < 2; k++) { 
426
0
  hashtab = hnj_hash_new ();
427
#ifdef VERBOSE
428
  global[k] = hashtab;
429
#endif
430
0
  hnj_hash_insert (hashtab, "", 0);
431
0
  dict[k] = (HyphenDict *) hnj_malloc (sizeof(HyphenDict));
432
0
  dict[k]->num_states = 1;
433
0
  dict[k]->states = (HyphenState *) hnj_malloc (sizeof(HyphenState));
434
0
  dict[k]->states[0].match = NULL;
435
0
  dict[k]->states[0].repl = NULL;
436
0
  dict[k]->states[0].fallback_state = -1;
437
0
  dict[k]->states[0].num_trans = 0;
438
0
  dict[k]->states[0].trans = NULL;
439
0
  dict[k]->nextlevel = NULL;
440
0
  dict[k]->lhmin = 0;
441
0
  dict[k]->rhmin = 0;
442
0
  dict[k]->clhmin = 0;
443
0
  dict[k]->crhmin = 0;
444
0
  dict[k]->nohyphen = NULL;
445
0
  dict[k]->nohyphenl = 0;
446
447
  /* read in character set info */
448
0
  if (k == 0) {
449
0
    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
450
0
    if (fgets(dict[k]->cset,  sizeof(dict[k]->cset),f) != NULL) {
451
0
      for (i=0;i<MAX_NAME;i++)
452
0
        if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
453
0
          dict[k]->cset[i] = 0;
454
0
    } else {
455
0
      dict[k]->cset[0] = 0;
456
0
    }
457
0
    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
458
0
  } else {
459
0
    strncpy(dict[k]->cset, dict[0]->cset, sizeof(dict[k]->cset)-1);
460
0
    dict[k]->cset[sizeof(dict[k]->cset)-1] = '\0';
461
0
    dict[k]->utf8 = dict[0]->utf8;
462
0
  }
463
464
0
  if (k == 0 || nextlevel) {
465
0
    while (fgets (buf, sizeof(buf), f) != NULL) {
466
0
      if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
467
0
  nextlevel = 1;
468
0
  break;
469
0
      } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
470
0
    }
471
0
  } else if (k == 1) {
472
    /* default first level: hyphen and ASCII apostrophe */
473
0
    if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN ',-\n", dict[k], hashtab);
474
0
    else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99,-\n", dict[k], hashtab);
475
0
    strncpy(buf, "1-1\n", MAX_CHARS-1); // buf rewritten by hnj_hyphen_load here
476
0
    buf[MAX_CHARS-1] = '\0';
477
0
    hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
478
0
    hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
479
0
    if (dict[0]->utf8) {
480
0
      hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
481
0
      hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
482
0
    }
483
0
  }
484
485
  /* Could do unioning of matches here (instead of the preprocessor script).
486
     If we did, the pseudocode would look something like this:
487
488
     foreach state in the hash table
489
        foreach i = [1..length(state) - 1]
490
           state to check is substr (state, i)
491
           look it up
492
           if found, and if there is a match, union the match in.
493
494
     It's also possible to avoid the quadratic blowup by doing the
495
     search in order of increasing state string sizes - then you
496
     can break the loop after finding the first match.
497
498
     This step should be optional in any case - if there is a
499
     preprocessed rule table, it's always faster to use that.
500
501
*/
502
503
  /* put in the fallback states */
504
0
  for (i = 0; i < HASH_SIZE; i++)
505
0
    for (e = hashtab->entries[i]; e; e = e->next)
506
0
      {
507
0
  if (*(e->key)) for (j = 1; 1; j++)
508
0
    {          
509
0
      state_num = hnj_hash_lookup (hashtab, e->key + j);
510
0
      if (state_num >= 0)
511
0
        break;
512
0
    }
513
        /* KBH: FIXME state 0 fallback_state should always be -1? */
514
0
  if (e->val)
515
0
    dict[k]->states[e->val].fallback_state = state_num;
516
0
      }
517
#ifdef VERBOSE
518
  for (i = 0; i < HASH_SIZE; i++)
519
    for (e = hashtab->entries[i]; e; e = e->next)
520
      {
521
  printf ("%d string %s state %d, fallback=%d\n", i, e->key, e->val,
522
    dict[k]->states[e->val].fallback_state);
523
  for (j = 0; j < dict[k]->states[e->val].num_trans; j++)
524
    printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch,
525
      dict[k]->states[e->val].trans[j].new_state);
526
      }
527
#endif
528
529
0
#ifndef VERBOSE
530
0
  hnj_hash_free (hashtab);
531
0
#endif
532
0
  state_num = 0;
533
0
}
534
0
  if (nextlevel) dict[0]->nextlevel = dict[1];
535
0
  else {
536
0
    dict[1] -> nextlevel = dict[0];
537
0
    dict[1]->lhmin = dict[0]->lhmin;
538
0
    dict[1]->rhmin = dict[0]->rhmin;
539
0
    dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
540
0
    dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
541
#ifdef VERBOSE
542
    HashTab *r = global[0];
543
    global[0] = global[1];
544
    global[1] = r;
545
#endif
546
0
    return dict[1];
547
0
  }
548
0
  return dict[0];
549
0
}
550
551
void hnj_hyphen_free (HyphenDict *dict)
552
0
{
553
0
  int state_num;
554
0
  HyphenState *hstate;
555
556
0
  for (state_num = 0; state_num < dict->num_states; state_num++)
557
0
    {
558
0
      hstate = &dict->states[state_num];
559
0
      if (hstate->match)
560
0
  hnj_free (hstate->match);
561
0
      if (hstate->repl)
562
0
  hnj_free (hstate->repl);
563
0
      if (hstate->trans)
564
0
  hnj_free (hstate->trans);
565
0
    }
566
0
  if (dict->nextlevel) hnj_hyphen_free(dict->nextlevel);
567
568
0
  if (dict->nohyphen) hnj_free(dict->nohyphen);
569
570
0
  hnj_free (dict->states);
571
572
0
  hnj_free (dict);
573
0
}
574
575
#define MAX_WORD 256
576
577
int hnj_hyphen_hyphenate (HyphenDict *dict,
578
         const char *word, int word_size,
579
         char *hyphens)
580
0
{
581
0
  char *prep_word;
582
0
  int i, j, k;
583
0
  int state;
584
0
  char ch;
585
0
  HyphenState *hstate;
586
0
  char *match;
587
0
  int offset;
588
589
0
  prep_word = (char*) hnj_malloc (word_size + 3);
590
591
0
  j = 0;
592
0
  prep_word[j++] = '.';
593
594
0
  for (i = 0; i < word_size; i++) {
595
0
    if (word[i] <= '9' && word[i] >= '0') {
596
0
      prep_word[j++] = '.';
597
0
    } else {
598
0
      prep_word[j++] = word[i];
599
0
    }
600
0
  }
601
602
0
  prep_word[j++] = '.';
603
0
  prep_word[j] = '\0';
604
605
0
  for (i = 0; i < word_size + 5; i++)
606
0
    hyphens[i] = '0';
607
608
#ifdef VERBOSE
609
  printf ("prep_word = %s\n", prep_word);
610
#endif
611
612
  /* now, run the finite state machine */
613
0
  state = 0;
614
0
  for (i = 0; i < j; i++)
615
0
    {
616
0
      ch = prep_word[i];
617
0
      for (;;)
618
0
  {
619
620
0
    if (state == -1) {
621
            /* return 1; */
622
      /*  KBH: FIXME shouldn't this be as follows? */
623
0
            state = 0;
624
0
            goto try_next_letter;
625
0
          }          
626
627
#ifdef VERBOSE
628
    char *state_str;
629
    state_str = get_state_str (state, 0);
630
631
    for (k = 0; k < i - strlen (state_str); k++)
632
      putchar (' ');
633
    printf ("%s", state_str);
634
#endif
635
636
0
    hstate = &dict->states[state];
637
0
    for (k = 0; k < hstate->num_trans; k++)
638
0
      if (hstate->trans[k].ch == ch)
639
0
        {
640
0
    state = hstate->trans[k].new_state;
641
0
    goto found_state;
642
0
        }
643
0
    state = hstate->fallback_state;
644
#ifdef VERBOSE
645
    printf (" falling back, fallback_state %d\n", state);
646
#endif
647
0
  }
648
0
    found_state:
649
#ifdef VERBOSE
650
      printf ("found state %d\n",state);
651
#endif
652
      /* Additional optimization is possible here - especially,
653
   elimination of trailing zeroes from the match. Leading zeroes
654
   have already been optimized. */
655
0
      match = dict->states[state].match;
656
      /* replacing rules not handled by hyphen_hyphenate() */
657
0
      if (match && !dict->states[state].repl)
658
0
  {
659
0
    offset = i + 1 - strlen (match);
660
#ifdef VERBOSE
661
    for (k = 0; k < offset; k++)
662
      putchar (' ');
663
    printf ("%s\n", match);
664
#endif
665
    /* This is a linear search because I tried a binary search and
666
       found it to be just a teeny bit slower. */
667
0
    for (k = 0; match[k]; k++)
668
0
      if (hyphens[offset + k] < match[k])
669
0
        hyphens[offset + k] = match[k];
670
0
  }
671
672
      /* KBH: we need this to make sure we keep looking in a word */
673
      /* for patterns even if the current character is not known in state 0 */
674
      /* since patterns for hyphenation may occur anywhere in the word */
675
0
      try_next_letter: ;
676
677
0
    }
678
#ifdef VERBOSE
679
  for (i = 0; i < j; i++)
680
    putchar (hyphens[i]);
681
  putchar ('\n');
682
#endif
683
684
0
  for (i = 0; i < j - 4; i++)
685
#if 0
686
    if (hyphens[i + 1] & 1)
687
      hyphens[i] = '-';
688
#else
689
0
    hyphens[i] = hyphens[i + 1];
690
0
#endif
691
0
  hyphens[0] = '0';
692
0
  for (; i < word_size; i++)
693
0
    hyphens[i] = '0';
694
0
  hyphens[word_size] = '\0';
695
696
0
  hnj_free (prep_word);
697
    
698
0
  return 0;    
699
0
}
700
701
/* Unicode ligature length */
702
0
int hnj_ligature(unsigned char c) {
703
0
    switch (c) {
704
0
        case 0x80:      /* ff */
705
0
        case 0x81:      /* fi */
706
0
        case 0x82: return LIG_xx; /* fl */
707
0
        case 0x83:      /* ffi */
708
0
        case 0x84: return LIG_xxx; /* ffl */
709
0
        case 0x85:      /* long st */
710
0
        case 0x86: return LIG_xx; /* st */
711
0
    }
712
0
    return 0;
713
0
}
714
715
/* character length of the first n byte of the input word */
716
int hnj_hyphen_strnlen(const char * word, int n, int utf8)
717
0
{
718
0
    int i = 0;
719
0
    int j = 0;
720
0
    while (j < n && word[j] != '\0') {
721
0
      i++;
722
      // Unicode ligature support
723
0
      if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC))  {
724
0
        i += hnj_ligature(word[j + 2]);
725
0
      }
726
0
      for (j++; utf8 && (word[j] & 0xc0) == 0x80; j++);
727
0
    }
728
0
    return i;
729
0
}
730
731
int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens,
732
  char *** rep, int ** pos, int ** cut, int lhmin)
733
0
{
734
0
    int i = 1, j;
735
736
    // Unicode ligature support
737
0
    if (utf8 && ((unsigned char) word[0] == 0xEF) && ((unsigned char) word[1] == 0xAC))  {
738
0
      i += hnj_ligature(word[2]);
739
0
    }
740
741
    // ignore numbers
742
0
    for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
743
744
0
    for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
745
      // check length of the non-standard part
746
0
      if (*rep && *pos && *cut && (*rep)[j]) {
747
0
        char * rh = strchr((*rep)[j], '=');
748
0
        if (rh && (hnj_hyphen_strnlen(word, j - (*pos)[j] + 1, utf8) +
749
0
          hnj_hyphen_strnlen((*rep)[j], rh - (*rep)[j], utf8)) < lhmin) {
750
0
            free((*rep)[j]);
751
0
            (*rep)[j] = NULL;
752
0
            hyphens[j] = '0';
753
0
          }
754
0
       } else {
755
0
         hyphens[j] = '0';
756
0
       }
757
0
       j++;
758
759
       // Unicode ligature support
760
0
       if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC))  {
761
0
         i += hnj_ligature(word[j + 2]);
762
0
       }
763
0
    } while (utf8 && (word[j] & 0xc0) == 0x80);
764
0
    return 0;
765
0
}
766
767
int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
768
  char *** rep, int ** pos, int ** cut, int rhmin)
769
0
{
770
0
    int i = 0;
771
0
    int j;
772
773
    // ignore numbers
774
0
    for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
775
776
0
    for (j = word_size - 1; i < rhmin && j > 0; j--) {
777
      // check length of the non-standard part
778
0
      if (*rep && *pos && *cut && (*rep)[j]) {
779
0
        char * rh = strchr((*rep)[j], '=');
780
0
        if (rh && (hnj_hyphen_strnlen(word + j - (*pos)[j] + (*cut)[j] + 1, 100, utf8) +
781
0
          hnj_hyphen_strnlen(rh + 1, strlen(rh + 1), utf8)) < rhmin) {
782
0
            free((*rep)[j]);
783
0
            (*rep)[j] = NULL;
784
0
            hyphens[j] = '0';
785
0
          }
786
0
       } else {
787
0
         hyphens[j] = '0';
788
0
       }
789
0
       if (!utf8 || (word[j] & 0xc0) == 0xc0 || (word[j] & 0x80) != 0x80) i++;
790
0
    }
791
0
    return 0;
792
0
}
793
794
// recursive function for compound level hyphenation
795
int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
796
    char * hyphens, char *** rep, int ** pos, int ** cut,
797
    int clhmin, int crhmin, int lend, int rend)
798
0
{
799
0
  char *prep_word;
800
0
  int i, j, k;
801
0
  int state;
802
0
  char ch;
803
0
  HyphenState *hstate;
804
0
  char *match;
805
0
  char *repl;
806
0
  signed char replindex;
807
0
  signed char replcut;
808
0
  int offset;
809
0
  int * matchlen;
810
0
  int * matchindex;
811
0
  char ** matchrepl;  
812
0
  int isrepl = 0;
813
0
  int nHyphCount;
814
815
0
  size_t prep_word_size = word_size + 3;
816
0
  prep_word = (char*) hnj_malloc (prep_word_size);
817
0
  matchlen = (int*) hnj_malloc ((word_size + 3) * sizeof(int));
818
0
  matchindex = (int*) hnj_malloc ((word_size + 3) * sizeof(int));
819
0
  matchrepl = (char**) hnj_malloc ((word_size + 3) * sizeof(char *));
820
821
0
  j = 0;
822
0
  prep_word[j++] = '.';
823
  
824
0
  for (i = 0; i < word_size; i++) {
825
0
    if (word[i] <= '9' && word[i] >= '0') {
826
0
      prep_word[j++] = '.';
827
0
    } else {
828
0
      prep_word[j++] = word[i];
829
0
    }
830
0
  }
831
832
833
834
0
  prep_word[j++] = '.';
835
0
  prep_word[j] = '\0';
836
837
0
  for (i = 0; i < j; i++)
838
0
    hyphens[i] = '0';    
839
840
#ifdef VERBOSE
841
  printf ("prep_word = %s\n", prep_word);
842
#endif
843
844
  /* now, run the finite state machine */
845
0
  state = 0;
846
0
  for (i = 0; i < j; i++)
847
0
    {
848
0
      ch = prep_word[i];
849
0
      for (;;)
850
0
  {
851
852
0
    if (state == -1) {
853
            /* return 1; */
854
      /*  KBH: FIXME shouldn't this be as follows? */
855
0
            state = 0;
856
0
            goto try_next_letter;
857
0
          }          
858
859
#ifdef VERBOSE
860
    char *state_str;
861
    state_str = get_state_str (state, 1);
862
863
    for (k = 0; k < i - strlen (state_str); k++)
864
      putchar (' ');
865
    printf ("%s", state_str);
866
#endif
867
868
0
    hstate = &dict->states[state];
869
0
    for (k = 0; k < hstate->num_trans; k++)
870
0
      if (hstate->trans[k].ch == ch)
871
0
        {
872
0
    state = hstate->trans[k].new_state;
873
0
    goto found_state;
874
0
        }
875
0
    state = hstate->fallback_state;
876
#ifdef VERBOSE
877
    printf (" falling back, fallback_state %d\n", state);
878
#endif
879
0
  }
880
0
    found_state:
881
#ifdef VERBOSE
882
      printf ("found state %d\n",state);
883
#endif
884
      /* Additional optimization is possible here - especially,
885
   elimination of trailing zeroes from the match. Leading zeroes
886
   have already been optimized. */
887
0
      match = dict->states[state].match;
888
0
      repl = dict->states[state].repl;
889
0
      replindex = dict->states[state].replindex;
890
0
      replcut = dict->states[state].replcut;
891
      /* replacing rules not handled by hyphen_hyphenate() */
892
0
      if (match)
893
0
  {
894
0
    offset = i + 1 - strlen (match);
895
#ifdef VERBOSE
896
    for (k = 0; k < offset; k++)
897
      putchar (' ');
898
    printf ("%s (%s)\n", match, repl);
899
#endif
900
0
          if (repl) {
901
0
            if (!isrepl) for(; isrepl < word_size; isrepl++) {
902
0
                matchrepl[isrepl] = NULL;
903
0
                matchindex[isrepl] = -1;
904
0
            }
905
0
            matchlen[offset + replindex] = replcut;
906
0
          }
907
    /* This is a linear search because I tried a binary search and
908
       found it to be just a teeny bit slower. */
909
0
    for (k = 0; match[k]; k++) {
910
0
      if ((hyphens[offset + k] < match[k])) {
911
0
        hyphens[offset + k] = match[k];
912
0
              if (match[k]&1) {
913
0
                matchrepl[offset + k] = repl;
914
0
                if (repl && (k >= replindex) && (k <= replindex + replcut)) {
915
0
                    matchindex[offset + replindex] = offset + k;
916
0
                }
917
0
              }
918
0
            }
919
0
          }
920
          
921
0
  }
922
923
      /* KBH: we need this to make sure we keep looking in a word */
924
      /* for patterns even if the current character is not known in state 0 */
925
      /* since patterns for hyphenation may occur anywhere in the word */
926
0
      try_next_letter: ;
927
928
0
    }
929
#ifdef VERBOSE
930
  for (i = 0; i < j; i++)
931
    putchar (hyphens[i]);
932
  putchar ('\n');
933
#endif
934
935
0
  for (i = 0; i < j - 3; i++)
936
#if 0
937
    if (hyphens[i + 1] & 1)
938
      hyphens[i] = '-';
939
#else
940
0
    hyphens[i] = hyphens[i + 1];
941
0
#endif
942
0
  for (; i < word_size; i++)
943
0
    hyphens[i] = '0';
944
0
  hyphens[word_size] = '\0';
945
946
       /* now create a new char string showing hyphenation positions */
947
       /* count the hyphens and allocate space for the new hyphenated string */
948
0
       nHyphCount = 0;
949
0
       for (i = 0; i < word_size; i++)
950
0
          if (hyphens[i]&1)
951
0
             nHyphCount++;
952
0
       j = 0;
953
0
       for (i = 0; i < word_size; i++) {
954
0
           if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) { 
955
0
                if (rep && pos && cut) {
956
0
                    if (!*rep)
957
0
                        *rep = (char **) calloc(word_size, sizeof(char *));
958
0
                    if (!*pos)
959
0
                        *pos = (int *) calloc(word_size, sizeof(int));
960
0
                    if (!*cut) {
961
0
                        *cut = (int *) calloc(word_size, sizeof(int));
962
0
                    }
963
0
                    (*rep)[matchindex[i] - 1] = hnj_strdup(matchrepl[matchindex[i]]);
964
0
                    (*pos)[matchindex[i] - 1] = matchindex[i] - i;
965
0
                    (*cut)[matchindex[i] - 1] = matchlen[i];
966
0
                }
967
0
                j += strlen(matchrepl[matchindex[i]]);
968
0
                i += matchlen[i] - 1;
969
0
          }
970
0
       }
971
972
0
  hnj_free (matchrepl);
973
0
  hnj_free (matchlen);
974
0
  hnj_free (matchindex);
975
976
  // recursive hyphenation of the first (compound) level segments
977
0
  if (dict->nextlevel) {
978
0
     char ** rep2;
979
0
     int * pos2;
980
0
     int * cut2;
981
0
     char * hyphens2;
982
0
     int begin = 0;
983
984
0
     rep2 = (char**) hnj_malloc (word_size * sizeof(char *));
985
0
     pos2 = (int*) hnj_malloc (word_size * sizeof(int));
986
0
     cut2 = (int*) hnj_malloc (word_size * sizeof(int));
987
0
     hyphens2 = (char*) hnj_malloc (word_size + 3);
988
0
     for (i = 0; i < word_size; i++) rep2[i] = NULL;
989
0
     for (i = 0; i < word_size; i++) if 
990
0
        (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
991
0
        if (i - begin > 1) {
992
0
            int hyph = 0;
993
0
            prep_word[i + 2] = '\0';
994
            /* non-standard hyphenation at compound boundary (Schiffahrt) */
995
0
            if (rep && *rep && *pos && *cut && (*rep)[i]) {
996
0
                char * l = strchr((*rep)[i], '=');
997
0
                size_t offset = 2 + i - (*pos)[i];
998
0
                strncpy(prep_word + offset, (*rep)[i], prep_word_size - offset - 1);
999
0
                prep_word[prep_word_size - 1] = '\0';
1000
0
                if (l) {
1001
0
                    hyph = (l - (*rep)[i]) - (*pos)[i];
1002
0
                    prep_word[2 + i + hyph] = '\0';
1003
0
                }
1004
0
            }
1005
0
            hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph,
1006
0
                hyphens2, &rep2, &pos2, &cut2, clhmin,
1007
0
                crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend));
1008
0
            for (j = 0; j < i - begin - 1; j++) {
1009
0
                hyphens[begin + j] = hyphens2[j];
1010
0
                if (rep2[j] && rep && pos && cut) {
1011
0
                    if (!*rep && !*pos && !*cut) {
1012
0
                        int k;
1013
0
                        *rep = (char **) malloc(sizeof(char *) * word_size);
1014
0
                        *pos = (int *) malloc(sizeof(int) * word_size);
1015
0
                        *cut = (int *) malloc(sizeof(int) * word_size);
1016
0
                        for (k = 0; k < word_size; k++) {
1017
0
                            (*rep)[k] = NULL;
1018
0
                            (*pos)[k] = 0;
1019
0
                            (*cut)[k] = 0;
1020
0
                        }
1021
0
                    }
1022
0
                    (*rep)[begin + j] = rep2[j];
1023
0
                    (*pos)[begin + j] = pos2[j];
1024
0
                    (*cut)[begin + j] = cut2[j];
1025
0
                }
1026
0
            }
1027
0
            prep_word[i + 2] = word[i + 1];
1028
0
            if (*rep && *pos && *cut && (*rep)[i]) {
1029
0
                size_t offset = 1;
1030
0
                strncpy(prep_word + offset, word, prep_word_size - offset - 1);
1031
0
                prep_word[prep_word_size - 1] = '\0';
1032
0
            }
1033
0
        }
1034
0
        begin = i + 1;
1035
0
        for (j = 0; j < word_size; j++) rep2[j] = NULL;
1036
0
     }
1037
     
1038
     // non-compound
1039
0
     if (begin == 0) {
1040
0
        hnj_hyphen_hyph_(dict->nextlevel, word, word_size,
1041
0
            hyphens, rep, pos, cut, clhmin, crhmin, lend, rend);
1042
0
        if (!lend) hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
1043
0
            rep, pos, cut, clhmin);
1044
0
        if (!rend) hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
1045
0
            rep, pos, cut, crhmin);
1046
0
     }
1047
     
1048
0
     free(rep2);
1049
0
     free(cut2);
1050
0
     free(pos2);
1051
0
     free(hyphens2);
1052
0
  }
1053
1054
0
  hnj_free (prep_word);
1055
0
  return 0;
1056
0
}
1057
1058
/* UTF-8 normalization of hyphen and non-standard positions */
1059
int hnj_hyphen_norm(const char *word, int word_size, char * hyphens,
1060
  char *** rep, int ** pos, int ** cut)
1061
0
{
1062
0
  int i, j, k;
1063
0
  if ((((unsigned char) word[0]) >> 6) == 2) {
1064
0
    fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word);
1065
0
    return 1;
1066
0
  }
1067
1068
  /* calculate UTF-8 character positions */
1069
0
  for (i = 0, j = -1; i < word_size; i++) {
1070
    /* beginning of an UTF-8 character (not '10' start bits) */
1071
0
    if ((((unsigned char) word[i]) >> 6) != 2) j++;
1072
0
    hyphens[j] = hyphens[i];
1073
0
    if (rep && pos && cut && *rep && *pos && *cut) {
1074
0
        int l = (*pos)[i];
1075
0
        (*pos)[j] = 0;
1076
0
        for (k = 0; k < l; k++) {
1077
0
            if ((((unsigned char) word[i - k]) >> 6) != 2) (*pos)[j]++;
1078
0
        }
1079
0
        k = i - l + 1;
1080
0
        l = k + (*cut)[i];
1081
0
        (*cut)[j] = 0;        
1082
0
        for (; k < l; k++) {
1083
0
            if ((((unsigned char) word[k]) >> 6) != 2) (*cut)[j]++;
1084
0
        }
1085
0
        (*rep)[j] = (*rep)[i];
1086
0
        if (j < i) {
1087
0
            (*rep)[i] = NULL;
1088
0
            (*pos)[i] = 0;
1089
0
            (*cut)[i] = 0;
1090
0
        }
1091
0
    }
1092
0
  }
1093
0
  hyphens[j + 1] = '\0';
1094
#ifdef VERBOSE
1095
  printf ("nums: %s\n", hyphens);
1096
#endif
1097
0
  return 0;
1098
0
}
1099
1100
/* get the word with all possible hyphenations (output: hyphword) */
1101
void hnj_hyphen_hyphword(const char * word, int l, const char * hyphens, 
1102
    char * hyphword, char *** rep, int ** pos, int ** cut)
1103
0
{
1104
0
  int hyphenslen = l + 5;
1105
1106
0
  int i, j;
1107
0
  for (i = 0, j = 0; i < l; i++, j++) {
1108
0
    if (hyphens[i]&1) {
1109
0
      hyphword[j] = word[i];
1110
0
      if (*rep && *pos && *cut && (*rep)[i]) {
1111
0
        size_t offset = j - (*pos)[i] + 1;
1112
0
        strncpy(hyphword + offset, (*rep)[i], hyphenslen - offset - 1);
1113
0
        hyphword[hyphenslen-1] = '\0';
1114
0
        j += strlen((*rep)[i]) - (*pos)[i];
1115
0
        i += (*cut)[i] - (*pos)[i];
1116
0
      } else hyphword[++j] = '=';
1117
0
    } else hyphword[j] = word[i];
1118
0
  }
1119
0
  hyphword[j] = '\0';
1120
0
}
1121
1122
1123
/* main api function with default hyphenmin parameters */
1124
int hnj_hyphen_hyphenate2 (HyphenDict *dict,
1125
         const char *word, int word_size, char * hyphens,
1126
         char *hyphword, char *** rep, int ** pos, int ** cut)
1127
0
{
1128
0
  hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
1129
0
    dict->clhmin, dict->crhmin, 1, 1);
1130
0
  hnj_hyphen_lhmin(dict->utf8, word, word_size,
1131
0
    hyphens, rep, pos, cut, (dict->lhmin > 0 ? dict->lhmin : 2));
1132
0
  hnj_hyphen_rhmin(dict->utf8, word, word_size,
1133
0
    hyphens, rep, pos, cut, (dict->rhmin > 0 ? dict->rhmin : 2));
1134
1135
  /* nohyphen */
1136
0
  if (dict->nohyphen) {
1137
0
    char * nh = dict->nohyphen;
1138
0
    int nhi;
1139
0
    for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
1140
0
        char * nhy = (char *) strstr(word, nh);
1141
0
        while (nhy) {
1142
0
            hyphens[nhy - word + strlen(nh) - 1] = '0';
1143
0
            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = '0';
1144
0
            nhy = (char *) strstr(nhy + 1, nh);
1145
0
        }
1146
0
        nh = nh + strlen(nh) + 1;
1147
0
    }
1148
0
  }
1149
1150
0
  if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
1151
0
  if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
1152
#ifdef VERBOSE
1153
  printf ("nums: %s\n", hyphens);
1154
#endif
1155
0
  return 0;
1156
0
}
1157
1158
/* previous main api function with hyphenmin parameters */
1159
int hnj_hyphen_hyphenate3 (HyphenDict *dict,
1160
  const char *word, int word_size, char * hyphens,
1161
  char *hyphword, char *** rep, int ** pos, int ** cut,
1162
  int lhmin, int rhmin, int clhmin, int crhmin)
1163
0
{
1164
0
  lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
1165
0
  rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
1166
0
  clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
1167
0
  crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
1168
0
  hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
1169
0
    clhmin, crhmin, 1, 1);
1170
0
  hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
1171
0
    rep, pos, cut, (lhmin > 0 ? lhmin : 2));
1172
0
  hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
1173
0
    rep, pos, cut, (rhmin > 0 ? rhmin : 2));
1174
0
  if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
1175
1176
  /* nohyphen */
1177
0
  if (dict->nohyphen) {
1178
0
    char * nh = dict->nohyphen;
1179
0
    int nhi;
1180
0
    for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
1181
0
        char * nhy = (char *) strstr(word, nh);
1182
0
        while (nhy) {
1183
0
            hyphens[nhy - word + strlen(nh) - 1] = 0;
1184
0
            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = 0;
1185
0
            nhy = (char *) strstr(nhy + 1, nh);
1186
0
        }
1187
0
        nh = nh + strlen(nh) + 1;
1188
0
    }
1189
0
  }
1190
1191
0
  if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
1192
0
  return 0;
1193
0
}