Coverage Report

Created: 2025-07-07 10:01

/work/workdir/UnpackedTarball/libexttextcat/src/fingerprint.c
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/**
3
 * fingerprint.c -- Routines for creating an n-gram fingerprint of a
4
 * buffer.
5
 *
6
 * Copyright (c) 2003, WiseGuys Internet B.V.
7
 * All rights reserved.
8
 *
9
 * THE BSD LICENSE
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 *
15
 * - Redistributions of source code must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.
17
 *
18
 * - Redistributions in binary form must reproduce the above copyright
19
 * notice, this list of conditions and the following disclaimer in the
20
 * documentation and/or other materials provided with the
21
 * distribution.
22
 *
23
 * - Neither the name of the WiseGuys Internet B.V. nor the names of
24
 * its contributors may be used to endorse or promote products derived
25
 * from this software without specific prior written permission.
26
 *
27
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
 *
39
 * DESCRIPTION
40
 *
41
 * A fingerprint is a list of most common n-grams, ordered by
42
 * frequency. (Note that we can use other strings than n-grams, for
43
 * instance entire words.)
44
 *
45
 * HOW DOES IT WORK?
46
 *
47
 * - Buffer is sliced up into n-grams
48
 * - N-grams are inserted into a hash table that records their frequency
49
 * - The table entries are filtered through a N-sized heap to
50
 *   get the N most frequent n-grams.
51
 *
52
 * The reason why we go through the trouble of doing a partial
53
 * (heap)sort is that a full quicksort behaves horribly on the data:
54
 * most n-grams have a very low count, resulting in a data set in
55
 * nearly-sorted order. This causes quicksort to behave very badly.
56
 * Heapsort, on the other hand, behaves handsomely: worst case is
57
 * Mlog(N) for M n-grams filtered through a N-sized heap.
58
 *
59
 * REVISION HISTORY
60
 *
61
 * Mar 28, 2003 frank@wise-guys.nl -- created
62
 *
63
 * TODO:
64
 * - put table/heap datastructure in a separate file.
65
 */
66
67
#ifdef HAVE_CONFIG_H
68
#include "config.h"
69
#endif
70
#include <stdio.h>
71
#include <stdlib.h>
72
#include <string.h>
73
#include <ctype.h>
74
75
#include "common_impl.h"
76
#include "wg_mempool.h"
77
#include "constants.h"
78
79
#include "utf8misc.h"
80
#include "fingerprint.h"
81
82
0
#define TABLESIZE  (1<<TABLEPOW)
83
0
#define TABLEMASK  ((TABLESIZE)-1)
84
85
typedef struct
86
{
87
88
    sint2 rank;
89
    char str[MAXNGRAMSIZE + 1];
90
91
} ngram_t;
92
93
typedef struct fp_s
94
{
95
96
    const char *name;
97
    ngram_t *fprint;
98
    uint4 size;
99
    uint4 mindocsize;
100
    boole utfaware;
101
102
} fp_t;
103
104
typedef struct entry_s
105
{
106
    char str[MAXNGRAMSIZE + 1];
107
    unsigned int cnt;
108
    struct entry_s *next;
109
} entry_t;
110
111
typedef struct table_s
112
{
113
    void *pool;
114
    entry_t **table;
115
    entry_t *heap;
116
117
    struct table_s *next;
118
119
    uint4 heapsize;
120
    uint4 size;
121
} table_t;
122
123
/* 
124
 * fast and furious little hash function
125
 *
126
 * (Note that we could use some kind of rolling checksum, and update it
127
 * during n-gram construction)
128
 */
129
static uint4 simplehash(const char *p, int len)
130
0
{
131
0
    uint4 h = len * 13;
132
0
    while (*p)
133
0
    {
134
0
        h = (h << 5) - h + *p++;
135
0
    }
136
0
    return h;
137
0
}
138
139
/* increases frequency of ngram(p,len) */
140
static int increasefreq(table_t * t, char *p, int len,
141
                        int issameimpl(char *, char *, int))
142
0
{
143
0
    uint4 hash = simplehash(p, len) & TABLEMASK;
144
0
    entry_t *entry = t->table[hash];
145
146
0
    while (entry)
147
0
    {
148
0
        if (issameimpl(entry->str, p, len))
149
0
        {
150
            /*** Found it! ***/
151
0
            entry->cnt++;
152
0
            return 1;
153
0
        }
154
0
        else
155
0
        {
156
0
            entry = entry->next;
157
0
        }
158
0
    }
159
160
    /*** Not found, so create ***/
161
0
    entry = (entry_t *) (wgmempool_alloc(t->pool, sizeof(entry_t)));
162
0
    strncpy(entry->str, p, MAXNGRAMSIZE);
163
0
    entry->str[MAXNGRAMSIZE] = 0;
164
0
    entry->cnt = 1;
165
166
0
    entry->next = t->table[hash];
167
0
    t->table[hash] = entry;
168
169
0
    return 1;
170
0
}
171
172
0
#define GREATER(x,y) ((x).cnt > (y).cnt)
173
0
#define LESS(x,y)    ((x).cnt < (y).cnt)
174
175
static void siftup(table_t * t, unsigned int child)
176
0
{
177
0
    entry_t *heap = t->heap;
178
0
    unsigned int parent = (child - 1) >> 1;
179
0
    entry_t tmp;
180
181
0
    while (child > 0)
182
0
    {
183
0
        if (GREATER(heap[parent], heap[child]))
184
0
        {
185
0
            memcpy(&tmp, &heap[parent], sizeof(entry_t));
186
0
            memcpy(&heap[parent], &heap[child], sizeof(entry_t));
187
0
            memcpy(&heap[child], &tmp, sizeof(entry_t));
188
0
        }
189
0
        else
190
0
            return;
191
192
0
        child = parent;
193
0
        parent = (child - 1) >> 1;
194
0
    }
195
0
}
196
197
static void siftdown(table_t * t, unsigned int heapsize, uint4 parent)
198
0
{
199
0
    entry_t *heap = t->heap;
200
0
    unsigned int child = parent * 2 + 1;
201
0
    entry_t tmp;
202
203
0
    while (child < heapsize)
204
0
    {
205
0
        if (child + 1 < heapsize && GREATER(heap[child], heap[child + 1]))
206
0
        {
207
0
            child++;
208
0
        }
209
0
        if (GREATER(heap[parent], heap[child]))
210
0
        {
211
0
            memcpy(&tmp, &heap[parent], sizeof(entry_t));
212
0
            memcpy(&heap[parent], &heap[child], sizeof(entry_t));
213
0
            memcpy(&heap[child], &tmp, sizeof(entry_t));
214
0
        }
215
0
        else
216
0
            return;
217
0
        parent = child;
218
0
        child = (parent * 2) + 1;
219
0
    }
220
0
}
221
222
static int heapinsert(table_t * t, entry_t * item)
223
0
{
224
0
    entry_t *heap = t->heap;
225
226
    /*** Still room for an entry? ***/
227
0
    if (t->size < t->heapsize)
228
0
    {
229
0
        memcpy(&(heap[t->size]), item, sizeof(entry_t));
230
0
        siftup(t, t->size);
231
0
        t->size++;
232
0
        return 0;
233
0
    }
234
235
    /*** Worse than the worst performer? ***/
236
0
    if (LESS(*item, heap[0]))
237
0
    {
238
0
        return 0;
239
0
    }
240
241
    /*** Insert into heap and reheap ***/
242
0
    memcpy(&(heap[0]), item, sizeof(entry_t));
243
0
    siftdown(t, t->size, 0);
244
0
    return 0;
245
0
}
246
247
static int heapextract(table_t * t, entry_t * item)
248
0
{
249
0
    entry_t *p;
250
251
0
    if (t->size == 0)
252
0
        return 0;
253
254
0
    p = &(t->heap[0]);
255
256
0
    memcpy(item, p, sizeof(entry_t));
257
0
    if (t->size > 1)
258
0
        memcpy(&(t->heap[0]), &(t->heap[t->size - 1]), sizeof(entry_t));
259
260
0
    siftdown(t, t->size, 0);
261
0
    t->size--;
262
263
0
    return 1;
264
0
}
265
266
/*** Makes a heap of all table entries ***/
267
static int table2heap(table_t * t)
268
0
{
269
0
    int i;
270
271
    /*** Fill result heap ***/
272
0
    for (i = 0; i < TABLESIZE; i++)
273
0
    {
274
0
        entry_t *p = t->table[i];
275
0
        while (p)
276
0
        {
277
0
            heapinsert(t, p);
278
0
            p = p->next;
279
0
        }
280
0
    }
281
282
0
    return 1;
283
0
}
284
285
static table_t *inittable(uint4 maxngrams)
286
0
{
287
0
    table_t *result = (table_t *) calloc(1, sizeof(table_t));
288
0
    result->table = (entry_t **) calloc(1, sizeof(entry_t *) * TABLESIZE);
289
0
    result->pool = wgmempool_Init(10000, 10);
290
291
0
    result->heap = (entry_t *) malloc(sizeof(entry_t) * maxngrams);
292
0
    result->heapsize = maxngrams;
293
0
    result->size = 0;
294
295
0
    return result;
296
0
}
297
298
static void tabledone(table_t * t)
299
0
{
300
0
    if (!t)
301
0
        return;
302
303
0
    wgmempool_Done(t->pool);
304
0
    free(t->table);
305
0
    free(t->heap);
306
0
    free(t);
307
0
}
308
309
extern void *fp_Init(const char *name)
310
0
{
311
0
    fp_t *h = (fp_t *) calloc(1, sizeof(fp_t));
312
313
0
    h->utfaware = TC_TRUE;
314
0
    h->mindocsize = MINDOCSIZE;
315
316
0
    if (name)
317
0
        h->name = strdup(name);
318
319
0
    return (void *)h;
320
0
}
321
322
extern void fp_Done(void *handle)
323
0
{
324
0
    fp_t *h = (fp_t *) handle;
325
326
0
    if (h->name)
327
0
    {
328
0
        free((void *)h->name);
329
0
    }
330
0
    if (h->fprint)
331
0
    {
332
0
        free(h->fprint);
333
0
    }
334
335
0
    free(h);
336
0
}
337
338
extern const char *fp_Name(void *handle)
339
0
{
340
0
    fp_t *h = (fp_t *) handle;
341
0
    return h->name;
342
0
}
343
344
/**
345
 * Function that prepares buffer for n-grammification:
346
 * runs of invalid characters are collapsed to a single
347
 * underscore.
348
 *
349
 * Function is implemented as a finite state machine.
350
 */
351
static char *prepbuffer(const char *src, size_t bufsize, uint4 mindocsize)
352
0
{
353
0
    const char *p = src;
354
0
    char *dest = (char *)malloc(bufsize + 3);
355
0
    char *w = dest;
356
0
    char *wlimit = dest + bufsize + 1;
357
358
0
    if (INVALID(*p))
359
0
    {
360
0
        goto SPACE;
361
0
    }
362
0
    else if (*p == '\0')
363
0
    {
364
0
        goto END;
365
0
    }
366
367
0
    *w++ = '_';
368
0
    if (w == wlimit)
369
0
    {
370
0
        goto STOP;
371
0
    }
372
373
0
    goto WORD;
374
375
376
0
  SPACE:
377
    /*** Inside string of invalid characters ***/
378
0
    p++;
379
0
    if (INVALID(*p))
380
0
    {
381
0
        goto SPACE;
382
0
    }
383
0
    else if (*p == '\0')
384
0
    {
385
0
        goto END;
386
0
    }
387
388
0
    *w++ = '_';
389
0
    if (w == wlimit)
390
0
    {
391
0
        goto STOP;
392
0
    }
393
394
0
    goto WORD;
395
396
0
  WORD:
397
    /*** Inside string of valid characters ***/
398
0
    *w++ = *p++;
399
0
    if (w == wlimit)
400
0
    {
401
0
        goto END;
402
0
    }
403
0
    else if (INVALID(*p))
404
0
    {
405
0
        goto SPACE;
406
0
    }
407
0
    else if (*p == '\0')
408
0
    {
409
0
        goto STOP;
410
0
    }
411
0
    goto WORD;
412
413
0
  END:
414
0
    *w++ = '_';
415
416
0
  STOP:
417
0
    *w++ = '\0';
418
419
    /*** Docs that are too small for a fingerprint, are refused ***/
420
0
    if (w - dest < mindocsize)
421
0
    {
422
0
        free(dest);
423
0
        return NULL;
424
0
    }
425
426
0
    return dest;
427
0
}
428
429
/**
430
* this function extract all n-gram from past buffer and put them into the table "t"
431
* [modified] by Jocelyn Merand to accept utf-8 multi-character symbols to be used in OpenOffice
432
*/
433
static void utfcreatengramtable(table_t * t, const char *buf)
434
0
{
435
0
    char n[MAXNGRAMSIZE + 1];
436
0
    const char *p = buf;
437
0
    int i, decay;
438
439
    /*** Get all n-grams where 1<=n<=MAXNGRAMSIZE. Allow underscores only at borders. ***/
440
0
    while (1)
441
0
    {
442
443
0
        const char *q = p;
444
0
        char *m = n;
445
446
        /*** First char may be an underscore ***/
447
0
        decay = utf8_charcopy(q, m);    /* [modified] previously *q++ = *m++ */
448
449
0
        q += decay;             /* [modified] */
450
0
        m += decay;             /* [modified] */
451
0
        *m = '\0';
452
453
0
        increasefreq(t, n, 1, utf8_issame);
454
455
456
0
        if (*q == '\0')
457
0
            return;
458
459
        /*** Let the compiler unroll this ***/
460
0
        for (i = 2; i <= MAXNGRAMSYMBOL; i++)
461
0
        {
462
0
            decay = utf8_charcopy(q, m);    /* [modified] like above */
463
0
            m += decay;
464
0
            *m = '\0';
465
466
0
            increasefreq(t, n, i, utf8_issame);
467
468
0
            if (*q == '_')
469
0
                break;
470
0
            q += decay;
471
0
            if (*q == '\0')
472
0
                return;
473
0
        }
474
475
0
        p = utf8_next_char(p);  /* [modified] */
476
0
    }
477
0
    return;
478
0
}
479
480
/* checks if n-gram lex is a prefix of key and of length len */
481
static int issame(char *lex, char *key, int len)
482
0
{
483
0
    int i;
484
0
    for (i = 0; i < len; i++)
485
0
    {
486
0
        if (key[i] != lex[i])
487
0
        {
488
0
            return 0;
489
0
        }
490
0
    }
491
0
    if (lex[i] != 0)
492
0
    {
493
0
        return 0;
494
0
    }
495
0
    return 1;
496
0
}
497
498
static void createngramtable(table_t * t, const char *buf)
499
0
{
500
0
    char n[MAXNGRAMSIZE + 1];
501
0
    const char *p = buf;
502
0
    int i;
503
504
    /*** Get all n-grams where 1<=n<=MAXNGRAMSIZE. Allow underscores only at borders. ***/
505
0
    for (;; p++)
506
0
    {
507
508
0
        const char *q = p;
509
0
        char *m = n;
510
511
        /*** First char may be an underscore ***/
512
0
        *m++ = *q++;
513
0
        *m = '\0';
514
515
0
        increasefreq(t, n, 1, issame);
516
517
0
        if (*q == '\0')
518
0
        {
519
0
            return;
520
0
        }
521
522
        /*** Let the compiler unroll this ***/
523
0
        for (i = 2; i <= MAXNGRAMSIZE; i++)
524
0
        {
525
526
0
            *m++ = *q;
527
0
            *m = '\0';
528
529
0
            increasefreq(t, n, i, issame);
530
531
0
            if (*q == '_')
532
0
                break;
533
0
            q++;
534
0
            if (*q == '\0')
535
0
            {
536
0
                return;
537
0
            }
538
0
        }
539
0
    }
540
0
    return;
541
0
}
542
543
544
static int mystrcmp(const char *a, const char *b)
545
0
{
546
0
    while (*a && *a == *b)
547
0
    {
548
0
        a++;
549
0
        b++;
550
0
    }
551
0
    return (*a - *b);
552
0
}
553
554
555
static int ngramcmp_str(const void *a, const void *b)
556
0
{
557
0
    ngram_t *x = (ngram_t *) a;
558
0
    ngram_t *y = (ngram_t *) b;
559
560
0
    return mystrcmp(x->str, y->str);
561
0
}
562
563
static int ngramcmp_rank(const void *a, const void *b)
564
0
{
565
0
    ngram_t *x = (ngram_t *) a;
566
0
    ngram_t *y = (ngram_t *) b;
567
568
0
    return x->rank - y->rank;
569
0
}
570
571
extern int fp_SetProperty(void *handle, textcat_Property property, sint4 value)
572
0
{
573
0
    fp_t *h = (fp_t *) handle;
574
0
    switch (property)
575
0
    {
576
0
    case TCPROP_UTF8AWARE:
577
0
        if ((value == TC_TRUE) || (value == TC_FALSE))
578
0
        {
579
0
            h->utfaware = value;
580
0
            return 0;
581
0
        }
582
0
        return -2;
583
0
        break;
584
0
    case TCPROP_MINIMUM_DOCUMENT_SIZE:
585
0
        h->mindocsize = (uint4) value;
586
0
        return 0;
587
0
        break;
588
0
    default:
589
0
        break;
590
0
    }
591
0
    return -1;
592
0
}
593
594
/**
595
 * Create a fingerprint:
596
 * - record the frequency of each unique n-gram in a hash table
597
 * - take the most frequent n-grams
598
 * - sort them alphabetically, recording their relative rank
599
 */
600
extern int fp_Create(void *handle, const char *buffer, uint4 bufsize,
601
                     uint4 maxngrams)
602
0
{
603
0
    sint4 i = 0;
604
0
    table_t *t = NULL;
605
0
    char *tmp = NULL;
606
607
0
    fp_t *h = (fp_t *) handle;
608
609
0
    if (bufsize < h->mindocsize)
610
0
        return 0;
611
612
    /*** Throw out all invalid chars ***/
613
0
    tmp = prepbuffer(buffer, bufsize, h->mindocsize);
614
    /* printf("Cleaned buffer : %s\n",tmp); */
615
0
    if (tmp == NULL)
616
0
        return 0;
617
0
    t = inittable(maxngrams);
618
    /* printf("Table initialized\n"); */
619
620
    /*** Create a hash table containing n-gram counts ***/
621
0
    if (h->utfaware)
622
0
    {
623
0
        utfcreatengramtable(t, tmp);
624
0
    }
625
0
    else
626
0
    {
627
0
        createngramtable(t, tmp);
628
0
    }
629
    /* printf("Table created\n"); */
630
    /*** Take the top N n-grams and add them to the profile ***/
631
0
    table2heap(t);
632
0
    maxngrams = WGMIN(maxngrams, t->size);
633
634
0
    h->fprint = (ngram_t *) malloc(sizeof(ngram_t) * maxngrams);
635
0
    h->size = maxngrams;
636
637
    /*** Pull n-grams out of heap (backwards) ***/
638
0
    for (i = maxngrams - 1; i >= 0; i--)
639
0
    {
640
0
        entry_t tmp2;
641
642
0
        heapextract(t, &tmp2);
643
644
        /*** the string and its rank is all we need ***/
645
0
        strcpy(h->fprint[i].str, tmp2.str);
646
0
        h->fprint[i].rank = i;
647
0
    }
648
649
0
    tabledone(t);
650
0
    free(tmp);
651
652
    /*** Sort n-grams alphabetically, for easy comparison ***/
653
0
    qsort(h->fprint, h->size, sizeof(ngram_t), ngramcmp_str);
654
0
    return 1;
655
0
}
656
657
extern int fp_Read(void *handle, const char *fname, int maxngrams)
658
0
{
659
0
    fp_t *h = (fp_t *) handle;
660
0
    FILE *fp;
661
0
    char line[1024];
662
0
    int cnt = 0;
663
664
0
    fp = fopen(fname, "r");
665
0
    if (!fp)
666
0
    {
667
0
#ifdef VERBOSE
668
0
        fprintf(stderr, "Failed to open fingerprint file '%s'\n", fname);
669
0
#endif
670
0
        return 0;
671
0
    }
672
673
0
    h->fprint = (ngram_t *) malloc(maxngrams * sizeof(ngram_t));
674
675
0
    while (cnt < maxngrams && wg_getline(line, 1024, fp))
676
0
    {
677
678
0
        char *p;
679
680
0
        wg_trim(line, line);
681
682
0
        p = strpbrk(line, " \t");
683
0
        if (p)
684
0
        {
685
0
            *p = '\0';
686
0
        }
687
688
0
        if (strlen(line) > MAXNGRAMSIZE)
689
0
        {
690
0
            continue;
691
0
        }
692
693
0
        strcpy(h->fprint[cnt].str, line);
694
0
        h->fprint[cnt].rank = cnt;
695
696
0
        cnt++;
697
0
    }
698
699
0
    h->size = cnt;
700
701
    /*** Sort n-grams, for easy comparison later on ***/
702
0
    qsort(h->fprint, h->size, sizeof(ngram_t), ngramcmp_str);
703
704
0
    fclose(fp);
705
706
0
    return 1;
707
0
}
708
709
extern void fp_Print(void *handle, FILE * fp)
710
0
{
711
0
    uint4 i;
712
0
    fp_t *h = (fp_t *) handle;
713
0
    ngram_t *tmp = (ngram_t *) malloc(sizeof(ngram_t) * h->size);
714
715
    /*** Make a temporary and sort it on rank ***/
716
0
    memcpy(tmp, h->fprint, h->size * sizeof(ngram_t));
717
0
    qsort(tmp, h->size, sizeof(ngram_t), ngramcmp_rank);
718
719
0
    for (i = 0; i < h->size; i++)
720
0
    {
721
        /* fprintf( fp, "%s\t%i\n", tmp[i].str, tmp[i].rank ); */
722
0
        fprintf(fp, "%s\n", tmp[i].str);
723
0
    }
724
0
    free(tmp);
725
0
}
726
727
728
729
extern sint4 fp_Compare(void *cat, void *unknown, int cutoff)
730
0
{
731
0
    fp_t *c = (fp_t *) cat;
732
0
    fp_t *u = (fp_t *) unknown;
733
0
    uint4 i = 0;
734
0
    uint4 j = 0;
735
0
    sint4 sum = 0;
736
737
    /*** Compare the profiles in mergesort fashion ***/
738
0
    while (i < c->size && j < u->size)
739
0
    {
740
741
0
        int cmp = mystrcmp(c->fprint[i].str, u->fprint[j].str);
742
743
0
        if (cmp < 0)
744
0
        {
745
0
            i++;
746
0
        }
747
0
        else if (cmp == 0)
748
0
        {
749
0
            sum += abs(c->fprint[i].rank - u->fprint[j].rank);
750
0
            if (sum > cutoff)
751
0
                return MAXSCORE;
752
0
            i++;
753
0
            j++;
754
0
        }
755
0
        else
756
0
        {
757
0
            sum += MAXOUTOFPLACE;
758
0
            if (sum > cutoff)
759
0
                return MAXSCORE;
760
0
            j++;
761
0
        }
762
0
    }
763
764
    /*** Process tail of unknown, if any ***/
765
0
    while (j < u->size)
766
0
    {
767
0
        sum += MAXOUTOFPLACE;
768
0
        if (sum > cutoff)
769
0
            return MAXSCORE;
770
0
        j++;
771
0
    }
772
773
0
    return sum;
774
0
}
775
776
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */