Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/rbbiscan.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
//
4
//  rbbiscan.h
5
//
6
//  Copyright (C) 2002-2016, International Business Machines Corporation and others.
7
//  All Rights Reserved.
8
//
9
//  This file contains declarations for class RBBIRuleScanner
10
//
11
12
13
#ifndef RBBISCAN_H
14
#define RBBISCAN_H
15
16
#include "unicode/utypes.h"
17
#include "unicode/uobject.h"
18
#include "unicode/rbbi.h"
19
#include "unicode/uniset.h"
20
#include "unicode/parseerr.h"
21
#include "uhash.h"
22
#include "uvector.h"
23
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
24
                          //    looks up references to $variables within a set.
25
#include "rbbinode.h"
26
#include "rbbirpt.h"
27
28
U_NAMESPACE_BEGIN
29
30
class   RBBIRuleBuilder;
31
class   RBBISymbolTable;
32
33
34
//--------------------------------------------------------------------------------
35
//
36
//  class RBBIRuleScanner does the lowest level, character-at-a-time
37
//                        scanning of break iterator rules.  
38
//
39
//                        The output of the scanner is parse trees for
40
//                        the rule expressions and a list of all Unicode Sets
41
//                        encountered.
42
//
43
//--------------------------------------------------------------------------------
44
45
class RBBIRuleScanner : public UMemory {
46
public:
47
48
    enum {
49
        kStackSize = 100            // The size of the state stack for
50
    };                              //   rules parsing.  Corresponds roughly
51
                                    //   to the depth of parentheses nesting
52
                                    //   that is allowed in the rules.
53
54
    struct RBBIRuleChar {
55
        UChar32             fChar;
56
        UBool               fEscaped;
57
0
        RBBIRuleChar() : fChar(0), fEscaped(false) {}
58
    };
59
60
    RBBIRuleScanner(RBBIRuleBuilder  *rb);
61
62
63
    virtual    ~RBBIRuleScanner();
64
65
    void        nextChar(RBBIRuleChar &c);          // Get the next char from the input stream.
66
                                                    // Return false if at end.
67
68
    UBool       push(const RBBIRuleChar &c);        // Push (unget) one character.
69
                                                    //   Only a single character may be pushed.
70
71
    void        parse();                            // Parse the rules, generating two parse
72
                                                    //   trees, one each for the forward and
73
                                                    //   reverse rules,
74
                                                    //   and a list of UnicodeSets encountered.
75
76
    int32_t     numRules();                         // Return the number of rules that have been seen.
77
78
    /**
79
     * Return a rules string without unnecessary
80
     * characters.
81
     */
82
    static UnicodeString stripRules(const UnicodeString &rules);
83
private:
84
85
    UBool       doParseActions(int32_t a);
86
    void        error(UErrorCode e);                   // error reporting convenience function.
87
    void        fixOpStack(RBBINode::OpPrecedence p);
88
                                                       //   a character.
89
    void        findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
90
91
    UChar32     nextCharLL();
92
#ifdef RBBI_DEBUG
93
    void        printNodeStack(const char *title);
94
#endif
95
    RBBINode    *pushNewNode(RBBINode::NodeType  t);
96
    void        scanSet();
97
98
99
    RBBIRuleBuilder               *fRB;              // The rule builder that we are part of.
100
101
    int32_t                       fScanIndex;        // Index of current character being processed
102
                                                     //   in the rule input string.
103
    int32_t                       fNextIndex;        // Index of the next character, which
104
                                                     //   is the first character not yet scanned.
105
    UBool                         fQuoteMode;        // Scan is in a 'quoted region'
106
    int32_t                       fLineNum;          // Line number in input file.
107
    int32_t                       fCharNum;          // Char position within the line.
108
    UChar32                       fLastChar;         // Previous char, needed to count CR-LF
109
                                                     //   as a single line, not two.
110
111
    RBBIRuleChar                  fC;                // Current char for parse state machine
112
                                                     //   processing.
113
    UnicodeString                 fVarName;          // $variableName, valid when we've just
114
                                                     //   scanned one.
115
116
    RBBIRuleTableEl               **fStateTable;     // State Transition Table for RBBI Rule
117
                                                     //   parsing.  index by p[state][char-class]
118
119
    uint16_t                      fStack[kStackSize];  // State stack, holds state pushes
120
    int32_t                       fStackPtr;           //  and pops as specified in the state
121
                                                       //  transition rules.
122
123
    RBBINode                      *fNodeStack[kStackSize]; // Node stack, holds nodes created
124
                                                           //  during the parse of a rule
125
    int32_t                        fNodeStackPtr;
126
127
128
    UBool                          fReverseRule;     // True if the rule currently being scanned
129
                                                     //  is a reverse direction rule (if it
130
                                                     //  starts with a '!')
131
132
    UBool                          fLookAheadRule;   // True if the rule includes a '/'
133
                                                     //   somewhere within it.
134
135
    UBool                          fNoChainInRule;   // True if the current rule starts with a '^'.
136
137
    RBBISymbolTable               *fSymbolTable;     // symbol table, holds definitions of
138
                                                     //   $variable symbols.
139
140
    UHashtable                    *fSetTable;        // UnicocodeSet hash table, holds indexes to
141
                                                     //   the sets created while parsing rules.
142
                                                     //   The key is the string used for creating
143
                                                     //   the set.
144
145
    UnicodeSet                     fRuleSets[10];    // Unicode Sets that are needed during
146
                                                     //  the scanning of RBBI rules.  The
147
                                                     //  indicies for these are assigned by the
148
                                                     //  perl script that builds the state tables.
149
                                                     //  See rbbirpt.h.
150
151
    int32_t                        fRuleNum;         // Counts each rule as it is scanned.
152
153
    int32_t                        fOptionStart;     // Input index of start of a !!option
154
                                                     //   keyword, while being scanned.
155
156
    UnicodeSet *gRuleSet_rule_char;
157
    UnicodeSet *gRuleSet_white_space;
158
    UnicodeSet *gRuleSet_name_char;
159
    UnicodeSet *gRuleSet_name_start_char;
160
161
    RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
162
    RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
163
};
164
165
U_NAMESPACE_END
166
167
#endif