Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/rbbirb.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
//
4
//  file:  rbbirb.cpp
5
//
6
//  Copyright (C) 2002-2011, International Business Machines Corporation and others.
7
//  All Rights Reserved.
8
//
9
//  This file contains the RBBIRuleBuilder class implementation.  This is the main class for
10
//    building (compiling) break rules into the tables required by the runtime
11
//    RBBI engine.
12
//
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_BREAK_ITERATION
17
18
#include "unicode/brkiter.h"
19
#include "unicode/rbbi.h"
20
#include "unicode/ubrk.h"
21
#include "unicode/unistr.h"
22
#include "unicode/uniset.h"
23
#include "unicode/uchar.h"
24
#include "unicode/uchriter.h"
25
#include "unicode/parsepos.h"
26
#include "unicode/parseerr.h"
27
28
#include "cmemory.h"
29
#include "cstring.h"
30
#include "rbbirb.h"
31
#include "rbbinode.h"
32
#include "rbbiscan.h"
33
#include "rbbisetb.h"
34
#include "rbbitblb.h"
35
#include "rbbidata.h"
36
#include "uassert.h"
37
38
39
U_NAMESPACE_BEGIN
40
41
42
//----------------------------------------------------------------------------------------
43
//
44
//  Constructor.
45
//
46
//----------------------------------------------------------------------------------------
47
RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString   &rules,
48
                                       UParseError     *parseErr,
49
                                       UErrorCode      &status)
50
 : fRules(rules), fStrippedRules(rules)
51
0
{
52
0
    fStatus = &status; // status is checked below
53
0
    fParseError = parseErr;
54
0
    fDebugEnv   = NULL;
55
#ifdef RBBI_DEBUG
56
    fDebugEnv   = getenv("U_RBBIDEBUG");
57
#endif
58
59
0
60
0
    fForwardTree        = NULL;
61
0
    fReverseTree        = NULL;
62
0
    fSafeFwdTree        = NULL;
63
0
    fSafeRevTree        = NULL;
64
0
    fDefaultTree        = &fForwardTree;
65
0
    fForwardTable       = NULL;
66
0
    fRuleStatusVals     = NULL;
67
0
    fChainRules         = FALSE;
68
0
    fLBCMNoChain        = FALSE;
69
0
    fLookAheadHardBreak = FALSE;
70
0
    fUSetNodes          = NULL;
71
0
    fRuleStatusVals     = NULL;
72
0
    fScanner            = NULL;
73
0
    fSetBuilder         = NULL;
74
0
    if (parseErr) {
75
0
        uprv_memset(parseErr, 0, sizeof(UParseError));
76
0
    }
77
0
78
0
    if (U_FAILURE(status)) {
79
0
        return;
80
0
    }
81
0
82
0
    fUSetNodes          = new UVector(status); // bcos status gets overwritten here
83
0
    fRuleStatusVals     = new UVector(status);
84
0
    fScanner            = new RBBIRuleScanner(this);
85
0
    fSetBuilder         = new RBBISetBuilder(this);
86
0
    if (U_FAILURE(status)) {
87
0
        return;
88
0
    }
89
0
    if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
90
0
        status = U_MEMORY_ALLOCATION_ERROR;
91
0
    }
92
0
}
93
94
95
96
//----------------------------------------------------------------------------------------
97
//
98
//  Destructor
99
//
100
//----------------------------------------------------------------------------------------
101
0
RBBIRuleBuilder::~RBBIRuleBuilder() {
102
0
103
0
    int        i;
104
0
    for (i=0; ; i++) {
105
0
        RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
106
0
        if (n==NULL) {
107
0
            break;
108
0
        }
109
0
        delete n;
110
0
    }
111
0
112
0
    delete fUSetNodes;
113
0
    delete fSetBuilder;
114
0
    delete fForwardTable;
115
0
    delete fForwardTree;
116
0
    delete fReverseTree;
117
0
    delete fSafeFwdTree;
118
0
    delete fSafeRevTree;
119
0
    delete fScanner;
120
0
    delete fRuleStatusVals;
121
0
}
122
123
124
125
126
127
//----------------------------------------------------------------------------------------
128
//
129
//   flattenData() -  Collect up the compiled RBBI rule data and put it into
130
//                    the format for saving in ICU data files,
131
//                    which is also the format needed by the RBBI runtime engine.
132
//
133
//----------------------------------------------------------------------------------------
134
0
static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
135
136
0
RBBIDataHeader *RBBIRuleBuilder::flattenData() {
137
0
    int32_t    i;
138
0
139
0
    if (U_FAILURE(*fStatus)) {
140
0
        return NULL;
141
0
    }
142
0
143
0
    // Remove whitespace from the rules to make it smaller.
144
0
    // The rule parser has already removed comments.
145
0
    fStrippedRules = fScanner->stripRules(fStrippedRules);
146
0
147
0
    // Calculate the size of each section in the data.
148
0
    //   Sizes here are padded up to a multiple of 8 for better memory alignment.
149
0
    //   Sections sizes actually stored in the header are for the actual data
150
0
    //     without the padding.
151
0
    //
152
0
    int32_t headerSize        = align8(sizeof(RBBIDataHeader));
153
0
    int32_t forwardTableSize  = align8(fForwardTable->getTableSize());
154
0
    int32_t reverseTableSize  = align8(fForwardTable->getSafeTableSize());
155
0
    int32_t trieSize          = align8(fSetBuilder->getTrieSize());
156
0
    int32_t statusTableSize   = align8(fRuleStatusVals->size() * sizeof(int32_t));
157
0
    int32_t rulesSize         = align8((fStrippedRules.length()+1) * sizeof(UChar));
158
0
159
0
    int32_t         totalSize = headerSize
160
0
                                + forwardTableSize
161
0
                                + reverseTableSize
162
0
                                + statusTableSize + trieSize + rulesSize;
163
0
164
0
    RBBIDataHeader  *data     = (RBBIDataHeader *)uprv_malloc(totalSize);
165
0
    if (data == NULL) {
166
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
167
0
        return NULL;
168
0
    }
169
0
    uprv_memset(data, 0, totalSize);
170
0
171
0
172
0
    data->fMagic            = 0xb1a0;
173
0
    data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
174
0
    data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
175
0
    data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
176
0
    data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
177
0
    data->fLength           = totalSize;
178
0
    data->fCatCount         = fSetBuilder->getNumCharCategories();
179
0
180
0
    data->fFTable        = headerSize;
181
0
    data->fFTableLen     = forwardTableSize;
182
0
183
0
    data->fRTable        = data->fFTable  + data->fFTableLen;
184
0
    data->fRTableLen     = reverseTableSize;
185
0
186
0
    data->fTrie          = data->fRTable + data->fRTableLen;
187
0
    data->fTrieLen       = fSetBuilder->getTrieSize();
188
0
    data->fStatusTable   = data->fTrie    + trieSize;
189
0
    data->fStatusTableLen= statusTableSize;
190
0
    data->fRuleSource    = data->fStatusTable + statusTableSize;
191
0
    data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar);
192
0
193
0
    uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
194
0
195
0
    fForwardTable->exportTable((uint8_t *)data + data->fFTable);
196
0
    fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable);
197
0
    fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
198
0
199
0
    int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
200
0
    for (i=0; i<fRuleStatusVals->size(); i++) {
201
0
        ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
202
0
    }
203
0
204
0
    fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
205
0
206
0
    return data;
207
0
}
208
209
210
//----------------------------------------------------------------------------------------
211
//
212
//  createRuleBasedBreakIterator    construct from source rules that are passed in
213
//                                  in a UnicodeString
214
//
215
//----------------------------------------------------------------------------------------
216
BreakIterator *
217
RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString    &rules,
218
                                    UParseError      *parseError,
219
                                    UErrorCode       &status)
220
0
{
221
0
    //
222
0
    // Read the input rules, generate a parse tree, symbol table,
223
0
    // and list of all Unicode Sets referenced by the rules.
224
0
    //
225
0
    RBBIRuleBuilder  builder(rules, parseError, status);
226
0
    if (U_FAILURE(status)) { // status checked here bcos build below doesn't
227
0
        return NULL;
228
0
    }
229
0
230
0
    RBBIDataHeader *data = builder.build(status);
231
0
232
0
    if (U_FAILURE(status)) {
233
0
        return nullptr;
234
0
    }
235
0
236
0
    //
237
0
    //  Create a break iterator from the compiled rules.
238
0
    //     (Identical to creation from stored pre-compiled rules)
239
0
    //
240
0
    // status is checked after init in construction.
241
0
    RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
242
0
    if (U_FAILURE(status)) {
243
0
        delete This;
244
0
        This = NULL;
245
0
    } 
246
0
    else if(This == NULL) { // test for NULL
247
0
        status = U_MEMORY_ALLOCATION_ERROR;
248
0
    }
249
0
    return This;
250
0
}
251
252
0
RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
253
0
    if (U_FAILURE(status)) {
254
0
        return nullptr;
255
0
    }
256
0
257
0
    fScanner->parse();
258
0
    if (U_FAILURE(status)) {
259
0
        return nullptr;
260
0
    }
261
0
262
0
    //
263
0
    // UnicodeSet processing.
264
0
    //    Munge the Unicode Sets to create a set of character categories.
265
0
    //    Generate the mapping tables (TRIE) from input code points to
266
0
    //    the character categories.
267
0
    //
268
0
    fSetBuilder->buildRanges();
269
0
270
0
    //
271
0
    //   Generate the DFA state transition table.
272
0
    //
273
0
    fForwardTable = new RBBITableBuilder(this, &fForwardTree, status);
274
0
    if (fForwardTable == nullptr) {
275
0
        status = U_MEMORY_ALLOCATION_ERROR;
276
0
        return nullptr;
277
0
    }
278
0
279
0
    fForwardTable->buildForwardTable();
280
0
    optimizeTables();
281
0
    fForwardTable->buildSafeReverseTable(status);
282
0
283
0
284
#ifdef RBBI_DEBUG
285
    if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) {
286
        fForwardTable->printStates();
287
        fForwardTable->printRuleStatusTable();
288
        fForwardTable->printReverseTable();
289
    }
290
#endif
291
292
0
    fSetBuilder->buildTrie();
293
0
294
0
    //
295
0
    //   Package up the compiled data into a memory image
296
0
    //      in the run-time format.
297
0
    //
298
0
    RBBIDataHeader *data = flattenData(); // returns NULL if error
299
0
    if (U_FAILURE(status)) {
300
0
        return nullptr;
301
0
    }
302
0
    return data;
303
0
}
304
305
0
void RBBIRuleBuilder::optimizeTables() {
306
0
307
0
    // Begin looking for duplicates with char class 3.
308
0
    // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
309
0
    // and should not have other categories merged into them.
310
0
    IntPair duplPair = {3, 0};
311
0
312
0
    while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
313
0
        fSetBuilder->mergeCategories(duplPair);
314
0
        fForwardTable->removeColumn(duplPair.second);
315
0
    }
316
0
    fForwardTable->removeDuplicateStates();
317
0
}
318
319
U_NAMESPACE_END
320
321
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */