Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/rbbistbl.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
//
4
//  file:  rbbistbl.cpp    Implementation of the ICU RBBISymbolTable class
5
//
6
/*
7
***************************************************************************
8
*   Copyright (C) 2002-2014 International Business Machines Corporation
9
*   and others. All rights reserved.
10
***************************************************************************
11
*/
12
13
#include "unicode/utypes.h"
14
15
#if !UCONFIG_NO_BREAK_ITERATION
16
17
#include "unicode/unistr.h"
18
#include "unicode/uniset.h"
19
#include "unicode/uchar.h"
20
#include "unicode/parsepos.h"
21
22
#include "cstr.h"
23
#include "rbbinode.h"
24
#include "rbbirb.h"
25
#include "umutex.h"
26
27
28
//
29
//  RBBISymbolTableEntry_deleter    Used by the UHashTable to delete the contents
30
//                                  when the hash table is deleted.
31
//
32
U_CDECL_BEGIN
33
0
static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
34
0
    icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
35
0
    delete px;
36
0
}
37
U_CDECL_END
38
39
40
41
U_NAMESPACE_BEGIN
42
43
RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
44
    :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
45
0
{
46
0
    fHashTable       = NULL;
47
0
    fCachedSetLookup = NULL;
48
    
49
0
    fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
50
    // uhash_open checks status
51
0
    if (U_FAILURE(status)) {
52
0
        return;
53
0
    }
54
0
    uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
55
0
}
56
57
58
59
RBBISymbolTable::~RBBISymbolTable()
60
0
{
61
0
    uhash_close(fHashTable);
62
0
}
63
64
65
//
66
//  RBBISymbolTable::lookup       This function from the abstract symbol table inteface
67
//                                looks up a variable name and returns a UnicodeString
68
//                                containing the substitution text.
69
//
70
//                                The variable name does NOT include the leading $.
71
//
72
const UnicodeString  *RBBISymbolTable::lookup(const UnicodeString& s) const
73
0
{
74
0
    RBBISymbolTableEntry  *el;
75
0
    RBBINode              *varRefNode;
76
0
    RBBINode              *exprNode;
77
0
    RBBINode              *usetNode;
78
0
    const UnicodeString   *retString;
79
0
    RBBISymbolTable       *This = (RBBISymbolTable *)this;   // cast off const
80
81
0
    el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
82
0
    if (el == NULL) {
83
0
        return NULL;
84
0
    }
85
86
0
    varRefNode = el->val;
87
0
    exprNode   = varRefNode->fLeftChild;     // Root node of expression for variable
88
0
    if (exprNode->fType == RBBINode::setRef) {
89
        // The $variable refers to a single UnicodeSet
90
        //   return the ffffString, which will subsequently be interpreted as a
91
        //   stand-in character for the set by RBBISymbolTable::lookupMatcher()
92
0
        usetNode = exprNode->fLeftChild;
93
0
        This->fCachedSetLookup = usetNode->fInputSet;
94
0
        retString = &ffffString;
95
0
    }
96
0
    else
97
0
    {
98
        // The variable refers to something other than just a set.
99
        // return the original source string for the expression
100
0
        retString = &exprNode->fText;
101
0
        This->fCachedSetLookup = NULL;
102
0
    }
103
0
    return retString;
104
0
}
105
106
107
108
//
109
//  RBBISymbolTable::lookupMatcher   This function from the abstract symbol table
110
//                                   interface maps a single stand-in character to a
111
//                                   pointer to a Unicode Set.   The Unicode Set code uses this
112
//                                   mechanism to get all references to the same $variable
113
//                                   name to refer to a single common Unicode Set instance.
114
//
115
//    This implementation cheats a little, and does not maintain a map of stand-in chars
116
//    to sets.  Instead, it takes advantage of the fact that  the UnicodeSet
117
//    constructor will always call this function right after calling lookup(),
118
//    and we just need to remember what set to return between these two calls.
119
const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
120
0
{
121
0
    UnicodeSet *retVal = NULL;
122
0
    RBBISymbolTable *This = (RBBISymbolTable *)this;   // cast off const
123
0
    if (ch == 0xffff) {
124
0
        retVal = fCachedSetLookup;
125
0
        This->fCachedSetLookup = 0;
126
0
    }
127
0
    return retVal;
128
0
}
129
130
//
131
// RBBISymbolTable::parseReference   This function from the abstract symbol table interface
132
//                                   looks for a $variable name in the source text.
133
//                                   It does not look it up, only scans for it.
134
//                                   It is used by the UnicodeSet parser.
135
//
136
//                                   This implementation is lifted pretty much verbatim
137
//                                   from the rules based transliterator implementation.
138
//                                   I didn't see an obvious way of sharing it.
139
//
140
UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
141
                                                ParsePosition& pos, int32_t limit) const
142
0
{
143
0
    int32_t start = pos.getIndex();
144
0
    int32_t i = start;
145
0
    UnicodeString result;
146
0
    while (i < limit) {
147
0
        UChar c = text.charAt(i);
148
0
        if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
149
0
            break;
150
0
        }
151
0
        ++i;
152
0
    }
153
0
    if (i == start) { // No valid name chars
154
0
        return result; // Indicate failure with empty string
155
0
    }
156
0
    pos.setIndex(i);
157
0
    text.extractBetween(start, i, result);
158
0
    return result;
159
0
}
160
161
162
163
//
164
// RBBISymbolTable::lookupNode      Given a key (a variable name), return the
165
//                                  corresponding RBBI Node.  If there is no entry
166
//                                  in the table for this name, return NULL.
167
//
168
0
RBBINode       *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
169
170
0
    RBBINode             *retNode = NULL;
171
0
    RBBISymbolTableEntry *el;
172
173
0
    el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
174
0
    if (el != NULL) {
175
0
        retNode = el->val;
176
0
    }
177
0
    return retNode;
178
0
}
179
180
181
//
182
//    RBBISymbolTable::addEntry     Add a new entry to the symbol table.
183
//                                  Indicate an error if the name already exists -
184
//                                    this will only occur in the case of duplicate
185
//                                    variable assignments.
186
//
187
0
void            RBBISymbolTable::addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
188
0
    RBBISymbolTableEntry *e;
189
    /* test for buffer overflows */
190
0
    if (U_FAILURE(err)) {
191
0
        return;
192
0
    }
193
0
    e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
194
0
    if (e != NULL) {
195
0
        err = U_BRK_VARIABLE_REDFINITION;
196
0
        return;
197
0
    }
198
199
0
    e = new RBBISymbolTableEntry;
200
0
    if (e == NULL) {
201
0
        err = U_MEMORY_ALLOCATION_ERROR;
202
0
        return;
203
0
    }
204
0
    e->key = key;
205
0
    e->val = val;
206
0
    uhash_put( fHashTable, &e->key, e, &err);
207
0
}
208
209
210
0
RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
211
212
0
RBBISymbolTableEntry::~RBBISymbolTableEntry() {
213
    // The "val" of a symbol table entry is a variable reference node.
214
    // The l. child of the val is the rhs expression from the assignment.
215
    // Unlike other node types, children of variable reference nodes are not
216
    //    automatically recursively deleted.  We do it manually here.
217
0
    delete val->fLeftChild;
218
0
    val->fLeftChild = NULL;
219
220
0
    delete  val;
221
222
    // Note: the key UnicodeString is destructed by virtue of being in the object by value.
223
0
}
224
225
226
//
227
//  RBBISymbolTable::print    Debugging function, dump out the symbol table contents.
228
//
229
#ifdef RBBI_DEBUG
230
void RBBISymbolTable::rbbiSymtablePrint() const {
231
    RBBIDebugPrintf("Variable Definitions Symbol Table\n"
232
           "Name                  Node         serial  String Val\n"
233
           "-------------------------------------------------------------------\n");
234
235
    int32_t pos = UHASH_FIRST;
236
    const UHashElement  *e   = NULL;
237
    for (;;) {
238
        e = uhash_nextElement(fHashTable,  &pos);
239
        if (e == NULL ) {
240
            break;
241
        }
242
        RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
243
244
        RBBIDebugPrintf("%-19s   %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum);
245
        RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)());
246
    }
247
248
    RBBIDebugPrintf("\nParsed Variable Definitions\n");
249
    pos = -1;
250
    for (;;) {
251
        e = uhash_nextElement(fHashTable,  &pos);
252
        if (e == NULL ) {
253
            break;
254
        }
255
        RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
256
        RBBIDebugPrintf("%s\n", CStr(s->key)());
257
        RBBINode::printTree(s->val, TRUE);
258
        RBBINode::printTree(s->val->fLeftChild, FALSE);
259
        RBBIDebugPrintf("\n");
260
    }
261
}
262
#endif
263
264
265
266
267
268
U_NAMESPACE_END
269
270
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */