Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/propsvec.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2002-2011, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  propsvec.c
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2002feb22
16
*   created by: Markus W. Scherer
17
*
18
*   Store bits (Unicode character properties) in bit set vectors.
19
*/
20
21
#include <stdlib.h>
22
#include "unicode/utypes.h"
23
#include "cmemory.h"
24
#include "utrie.h"
25
#include "utrie2.h"
26
#include "uarrsort.h"
27
#include "propsvec.h"
28
#include "uassert.h"
29
30
struct UPropsVectors {
31
    uint32_t *v;
32
    int32_t columns;  /* number of columns, plus two for start & limit values */
33
    int32_t maxRows;
34
    int32_t rows;
35
    int32_t prevRow;  /* search optimization: remember last row seen */
36
    UBool isCompacted;
37
};
38
39
0
#define UPVEC_INITIAL_ROWS (1<<12)
40
0
#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
41
0
#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
42
43
U_CAPI UPropsVectors * U_EXPORT2
44
0
upvec_open(int32_t columns, UErrorCode *pErrorCode) {
45
0
    UPropsVectors *pv;
46
0
    uint32_t *v, *row;
47
0
    uint32_t cp;
48
0
49
0
    if(U_FAILURE(*pErrorCode)) {
50
0
        return NULL;
51
0
    }
52
0
    if(columns<1) {
53
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
54
0
        return NULL;
55
0
    }
56
0
    columns+=2; /* count range start and limit columns */
57
0
58
0
    pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
59
0
    v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
60
0
    if(pv==NULL || v==NULL) {
61
0
        uprv_free(pv);
62
0
        uprv_free(v);
63
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
64
0
        return NULL;
65
0
    }
66
0
    uprv_memset(pv, 0, sizeof(UPropsVectors));
67
0
    pv->v=v;
68
0
    pv->columns=columns;
69
0
    pv->maxRows=UPVEC_INITIAL_ROWS;
70
0
    pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
71
0
72
0
    /* set the all-Unicode row and the special-value rows */
73
0
    row=pv->v;
74
0
    uprv_memset(row, 0, pv->rows*columns*4);
75
0
    row[0]=0;
76
0
    row[1]=0x110000;
77
0
    row+=columns;
78
0
    for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
79
0
        row[0]=cp;
80
0
        row[1]=cp+1;
81
0
        row+=columns;
82
0
    }
83
0
    return pv;
84
0
}
85
86
U_CAPI void U_EXPORT2
87
0
upvec_close(UPropsVectors *pv) {
88
0
    if(pv!=NULL) {
89
0
        uprv_free(pv->v);
90
0
        uprv_free(pv);
91
0
    }
92
0
}
93
94
static uint32_t *
95
0
_findRow(UPropsVectors *pv, UChar32 rangeStart) {
96
0
    uint32_t *row;
97
0
    int32_t columns, i, start, limit, prevRow;
98
0
99
0
    columns=pv->columns;
100
0
    limit=pv->rows;
101
0
    prevRow=pv->prevRow;
102
0
103
0
    /* check the vicinity of the last-seen row (start searching with an unrolled loop) */
104
0
    row=pv->v+prevRow*columns;
105
0
    if(rangeStart>=(UChar32)row[0]) {
106
0
        if(rangeStart<(UChar32)row[1]) {
107
0
            /* same row as last seen */
108
0
            return row;
109
0
        } else if(rangeStart<(UChar32)(row+=columns)[1]) {
110
0
            /* next row after the last one */
111
0
            pv->prevRow=prevRow+1;
112
0
            return row;
113
0
        } else if(rangeStart<(UChar32)(row+=columns)[1]) {
114
0
            /* second row after the last one */
115
0
            pv->prevRow=prevRow+2;
116
0
            return row;
117
0
        } else if((rangeStart-(UChar32)row[1])<10) {
118
0
            /* we are close, continue looping */
119
0
            prevRow+=2;
120
0
            do {
121
0
                ++prevRow;
122
0
                row+=columns;
123
0
            } while(rangeStart>=(UChar32)row[1]);
124
0
            pv->prevRow=prevRow;
125
0
            return row;
126
0
        }
127
0
    } else if(rangeStart<(UChar32)pv->v[1]) {
128
0
        /* the very first row */
129
0
        pv->prevRow=0;
130
0
        return pv->v;
131
0
    }
132
0
133
0
    /* do a binary search for the start of the range */
134
0
    start=0;
135
0
    while(start<limit-1) {
136
0
        i=(start+limit)/2;
137
0
        row=pv->v+i*columns;
138
0
        if(rangeStart<(UChar32)row[0]) {
139
0
            limit=i;
140
0
        } else if(rangeStart<(UChar32)row[1]) {
141
0
            pv->prevRow=i;
142
0
            return row;
143
0
        } else {
144
0
            start=i;
145
0
        }
146
0
    }
147
0
148
0
    /* must be found because all ranges together always cover all of Unicode */
149
0
    pv->prevRow=start;
150
0
    return pv->v+start*columns;
151
0
}
152
153
U_CAPI void U_EXPORT2
154
upvec_setValue(UPropsVectors *pv,
155
               UChar32 start, UChar32 end,
156
               int32_t column,
157
               uint32_t value, uint32_t mask,
158
0
               UErrorCode *pErrorCode) {
159
0
    uint32_t *firstRow, *lastRow;
160
0
    int32_t columns;
161
0
    UChar32 limit;
162
0
    UBool splitFirstRow, splitLastRow;
163
0
164
0
    /* argument checking */
165
0
    if(U_FAILURE(*pErrorCode)) {
166
0
        return;
167
0
    }
168
0
    if( pv==NULL ||
169
0
        start<0 || start>end || end>UPVEC_MAX_CP ||
170
0
        column<0 || column>=(pv->columns-2)
171
0
    ) {
172
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
173
0
        return;
174
0
    }
175
0
    if(pv->isCompacted) {
176
0
        *pErrorCode=U_NO_WRITE_PERMISSION;
177
0
        return;
178
0
    }
179
0
    limit=end+1;
180
0
181
0
    /* initialize */
182
0
    columns=pv->columns;
183
0
    column+=2; /* skip range start and limit columns */
184
0
    value&=mask;
185
0
186
0
    /* find the rows whose ranges overlap with the input range */
187
0
188
0
    /* find the first and last rows, always successful */
189
0
    firstRow=_findRow(pv, start);
190
0
    lastRow=_findRow(pv, end);
191
0
192
0
    /*
193
0
     * Rows need to be split if they partially overlap with the
194
0
     * input range (only possible for the first and last rows)
195
0
     * and if their value differs from the input value.
196
0
     */
197
0
    splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
198
0
    splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
199
0
200
0
    /* split first/last rows if necessary */
201
0
    if(splitFirstRow || splitLastRow) {
202
0
        int32_t count, rows;
203
0
204
0
        rows=pv->rows;
205
0
        if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
206
0
            uint32_t *newVectors;
207
0
            int32_t newMaxRows;
208
0
209
0
            if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
210
0
                newMaxRows=UPVEC_MEDIUM_ROWS;
211
0
            } else if(pv->maxRows<UPVEC_MAX_ROWS) {
212
0
                newMaxRows=UPVEC_MAX_ROWS;
213
0
            } else {
214
0
                /* Implementation bug, or UPVEC_MAX_ROWS too low. */
215
0
                *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
216
0
                return;
217
0
            }
218
0
            newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
219
0
            if(newVectors==NULL) {
220
0
                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
221
0
                return;
222
0
            }
223
0
            uprv_memcpy(newVectors, pv->v, (size_t)rows*columns*4);
224
0
            firstRow=newVectors+(firstRow-pv->v);
225
0
            lastRow=newVectors+(lastRow-pv->v);
226
0
            uprv_free(pv->v);
227
0
            pv->v=newVectors;
228
0
            pv->maxRows=newMaxRows;
229
0
        }
230
0
231
0
        /* count the number of row cells to move after the last row, and move them */
232
0
        count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
233
0
        if(count>0) {
234
0
            uprv_memmove(
235
0
                lastRow+(1+splitFirstRow+splitLastRow)*columns,
236
0
                lastRow+columns,
237
0
                count*4);
238
0
        }
239
0
        pv->rows=rows+splitFirstRow+splitLastRow;
240
0
241
0
        /* split the first row, and move the firstRow pointer to the second part */
242
0
        if(splitFirstRow) {
243
0
            /* copy all affected rows up one and move the lastRow pointer */
244
0
            count = (int32_t)((lastRow-firstRow)+columns);
245
0
            uprv_memmove(firstRow+columns, firstRow, (size_t)count*4);
246
0
            lastRow+=columns;
247
0
248
0
            /* split the range and move the firstRow pointer */
249
0
            firstRow[1]=firstRow[columns]=(uint32_t)start;
250
0
            firstRow+=columns;
251
0
        }
252
0
253
0
        /* split the last row */
254
0
        if(splitLastRow) {
255
0
            /* copy the last row data */
256
0
            uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4);
257
0
258
0
            /* split the range and move the firstRow pointer */
259
0
            lastRow[1]=lastRow[columns]=(uint32_t)limit;
260
0
        }
261
0
    }
262
0
263
0
    /* set the "row last seen" to the last row for the range */
264
0
    pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
265
0
266
0
    /* set the input value in all remaining rows */
267
0
    firstRow+=column;
268
0
    lastRow+=column;
269
0
    mask=~mask;
270
0
    for(;;) {
271
0
        *firstRow=(*firstRow&mask)|value;
272
0
        if(firstRow==lastRow) {
273
0
            break;
274
0
        }
275
0
        firstRow+=columns;
276
0
    }
277
0
}
278
279
U_CAPI uint32_t U_EXPORT2
280
0
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
281
0
    uint32_t *row;
282
0
    UPropsVectors *ncpv;
283
0
284
0
    if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
285
0
        return 0;
286
0
    }
287
0
    ncpv=(UPropsVectors *)pv;
288
0
    row=_findRow(ncpv, c);
289
0
    return row[2+column];
290
0
}
291
292
U_CAPI uint32_t * U_EXPORT2
293
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
294
0
             UChar32 *pRangeStart, UChar32 *pRangeEnd) {
295
0
    uint32_t *row;
296
0
    int32_t columns;
297
0
298
0
    if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
299
0
        return NULL;
300
0
    }
301
0
302
0
    columns=pv->columns;
303
0
    row=pv->v+rowIndex*columns;
304
0
    if(pRangeStart!=NULL) {
305
0
        *pRangeStart=(UChar32)row[0];
306
0
    }
307
0
    if(pRangeEnd!=NULL) {
308
0
        *pRangeEnd=(UChar32)row[1]-1;
309
0
    }
310
0
    return row+2;
311
0
}
312
313
static int32_t U_CALLCONV
314
0
upvec_compareRows(const void *context, const void *l, const void *r) {
315
0
    const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
316
0
    const UPropsVectors *pv=(const UPropsVectors *)context;
317
0
    int32_t i, count, columns;
318
0
319
0
    count=columns=pv->columns; /* includes start/limit columns */
320
0
321
0
    /* start comparing after start/limit but wrap around to them */
322
0
    i=2;
323
0
    do {
324
0
        if(left[i]!=right[i]) {
325
0
            return left[i]<right[i] ? -1 : 1;
326
0
        }
327
0
        if(++i==columns) {
328
0
            i=0;
329
0
        }
330
0
    } while(--count>0);
331
0
332
0
    return 0;
333
0
}
334
335
U_CAPI void U_EXPORT2
336
0
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
337
0
    uint32_t *row;
338
0
    int32_t i, columns, valueColumns, rows, count;
339
0
    UChar32 start, limit;
340
0
341
0
    /* argument checking */
342
0
    if(U_FAILURE(*pErrorCode)) {
343
0
        return;
344
0
    }
345
0
    if(handler==NULL) {
346
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
347
0
        return;
348
0
    }
349
0
    if(pv->isCompacted) {
350
0
        return;
351
0
    }
352
0
353
0
    /* Set the flag now: Sorting and compacting destroys the builder data structure. */
354
0
    pv->isCompacted=TRUE;
355
0
356
0
    rows=pv->rows;
357
0
    columns=pv->columns;
358
0
    U_ASSERT(columns>=3); /* upvec_open asserts this */
359
0
    valueColumns=columns-2; /* not counting start & limit */
360
0
361
0
    /* sort the properties vectors to find unique vector values */
362
0
    uprv_sortArray(pv->v, rows, columns*4,
363
0
                   upvec_compareRows, pv, FALSE, pErrorCode);
364
0
    if(U_FAILURE(*pErrorCode)) {
365
0
        return;
366
0
    }
367
0
368
0
    /*
369
0
     * Find and set the special values.
370
0
     * This has to do almost the same work as the compaction below,
371
0
     * to find the indexes where the special-value rows will move.
372
0
     */
373
0
    row=pv->v;
374
0
    count=-valueColumns;
375
0
    for(i=0; i<rows; ++i) {
376
0
        start=(UChar32)row[0];
377
0
378
0
        /* count a new values vector if it is different from the current one */
379
0
        if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) {
380
0
            count+=valueColumns;
381
0
        }
382
0
383
0
        if(start>=UPVEC_FIRST_SPECIAL_CP) {
384
0
            handler(context, start, start, count, row+2, valueColumns, pErrorCode);
385
0
            if(U_FAILURE(*pErrorCode)) {
386
0
                return;
387
0
            }
388
0
        }
389
0
390
0
        row+=columns;
391
0
    }
392
0
393
0
    /* count is at the beginning of the last vector, add valueColumns to include that last vector */
394
0
    count+=valueColumns;
395
0
396
0
    /* Call the handler once more to signal the start of delivering real values. */
397
0
    handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
398
0
            count, row-valueColumns, valueColumns, pErrorCode);
399
0
    if(U_FAILURE(*pErrorCode)) {
400
0
        return;
401
0
    }
402
0
403
0
    /*
404
0
     * Move vector contents up to a contiguous array with only unique
405
0
     * vector values, and call the handler function for each vector.
406
0
     *
407
0
     * This destroys the Properties Vector structure and replaces it
408
0
     * with an array of just vector values.
409
0
     */
410
0
    row=pv->v;
411
0
    count=-valueColumns;
412
0
    for(i=0; i<rows; ++i) {
413
0
        /* fetch these first before memmove() may overwrite them */
414
0
        start=(UChar32)row[0];
415
0
        limit=(UChar32)row[1];
416
0
417
0
        /* add a new values vector if it is different from the current one */
418
0
        if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
419
0
            count+=valueColumns;
420
0
            uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4);
421
0
        }
422
0
423
0
        if(start<UPVEC_FIRST_SPECIAL_CP) {
424
0
            handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
425
0
            if(U_FAILURE(*pErrorCode)) {
426
0
                return;
427
0
            }
428
0
        }
429
0
430
0
        row+=columns;
431
0
    }
432
0
433
0
    /* count is at the beginning of the last vector, add one to include that last vector */
434
0
    pv->rows=count/valueColumns+1;
435
0
}
436
437
U_CAPI const uint32_t * U_EXPORT2
438
0
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
439
0
    if(!pv->isCompacted) {
440
0
        return NULL;
441
0
    }
442
0
    if(pRows!=NULL) {
443
0
        *pRows=pv->rows;
444
0
    }
445
0
    if(pColumns!=NULL) {
446
0
        *pColumns=pv->columns-2;
447
0
    }
448
0
    return pv->v;
449
0
}
450
451
U_CAPI uint32_t * U_EXPORT2
452
upvec_cloneArray(const UPropsVectors *pv,
453
0
                 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
454
0
    uint32_t *clonedArray;
455
0
    int32_t byteLength;
456
0
457
0
    if(U_FAILURE(*pErrorCode)) {
458
0
        return NULL;
459
0
    }
460
0
    if(!pv->isCompacted) {
461
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
462
0
        return NULL;
463
0
    }
464
0
    byteLength=pv->rows*(pv->columns-2)*4;
465
0
    clonedArray=(uint32_t *)uprv_malloc(byteLength);
466
0
    if(clonedArray==NULL) {
467
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
468
0
        return NULL;
469
0
    }
470
0
    uprv_memcpy(clonedArray, pv->v, byteLength);
471
0
    if(pRows!=NULL) {
472
0
        *pRows=pv->rows;
473
0
    }
474
0
    if(pColumns!=NULL) {
475
0
        *pColumns=pv->columns-2;
476
0
    }
477
0
    return clonedArray;
478
0
}
479
480
U_CAPI UTrie2 * U_EXPORT2
481
0
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
482
0
    UPVecToUTrie2Context toUTrie2={ NULL, 0, 0, 0 };
483
0
    upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
484
0
    utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
485
0
    if(U_FAILURE(*pErrorCode)) {
486
0
        utrie2_close(toUTrie2.trie);
487
0
        toUTrie2.trie=NULL;
488
0
    }
489
0
    return toUTrie2.trie;
490
0
}
491
492
/*
493
 * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
494
 * some 16-bit field and builds and returns a UTrie2.
495
 */
496
497
U_CAPI void U_CALLCONV
498
upvec_compactToUTrie2Handler(void *context,
499
                             UChar32 start, UChar32 end,
500
                             int32_t rowIndex, uint32_t *row, int32_t columns,
501
0
                             UErrorCode *pErrorCode) {
502
0
    (void)row;
503
0
    (void)columns;
504
0
    UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
505
0
    if(start<UPVEC_FIRST_SPECIAL_CP) {
506
0
        utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode);
507
0
    } else {
508
0
        switch(start) {
509
0
        case UPVEC_INITIAL_VALUE_CP:
510
0
            toUTrie2->initialValue=rowIndex;
511
0
            break;
512
0
        case UPVEC_ERROR_VALUE_CP:
513
0
            toUTrie2->errorValue=rowIndex;
514
0
            break;
515
0
        case UPVEC_START_REAL_VALUES_CP:
516
0
            toUTrie2->maxValue=rowIndex;
517
0
            if(rowIndex>0xffff) {
518
0
                /* too many rows for a 16-bit trie */
519
0
                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
520
0
            } else {
521
0
                toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
522
0
                                           toUTrie2->errorValue, pErrorCode);
523
0
            }
524
0
            break;
525
0
        default:
526
0
            break;
527
0
        }
528
0
    }
529
0
}