Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/rbbiscan.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
//
4
//  rbbiscan.h
5
//
6
//  Copyright (C) 2002-2016, International Business Machines Corporation and others.
7
//  All Rights Reserved.
8
//
9
//  This file contains declarations for class RBBIRuleScanner
10
//
11
12
13
#ifndef RBBISCAN_H
14
#define RBBISCAN_H
15
16
#include "unicode/utypes.h"
17
#include "unicode/uobject.h"
18
#include "unicode/rbbi.h"
19
#include "unicode/uniset.h"
20
#include "unicode/parseerr.h"
21
#include "uhash.h"
22
#include "uvector.h"
23
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
24
                          //    looks up references to $variables within a set.
25
#include "rbbinode.h"
26
#include "rbbirpt.h"
27
28
U_NAMESPACE_BEGIN
29
30
class   RBBIRuleBuilder;
31
class   RBBISymbolTable;
32
33
34
//--------------------------------------------------------------------------------
35
//
36
//  class RBBIRuleScanner does the lowest level, character-at-a-time
37
//                        scanning of break iterator rules.  
38
//
39
//                        The output of the scanner is parse trees for
40
//                        the rule expressions and a list of all Unicode Sets
41
//                        encountered.
42
//
43
//--------------------------------------------------------------------------------
44
45
class RBBIRuleScanner : public UMemory {
46
public:
47
48
    enum {
49
        kStackSize = 100            // The size of the state stack for
50
    };                              //   rules parsing.  Corresponds roughly
51
                                    //   to the depth of parentheses nesting
52
                                    //   that is allowed in the rules.
53
54
    struct RBBIRuleChar {
55
        UChar32             fChar;
56
        UBool               fEscaped;
57
0
        RBBIRuleChar() : fChar(0), fEscaped(FALSE) {};
58
    };
59
60
    RBBIRuleScanner(RBBIRuleBuilder  *rb);
61
62
63
    virtual    ~RBBIRuleScanner();
64
65
    void        nextChar(RBBIRuleChar &c);          // Get the next char from the input stream.
66
                                                    // Return false if at end.
67
68
    UBool       push(const RBBIRuleChar &c);        // Push (unget) one character.
69
                                                    //   Only a single character may be pushed.
70
71
    void        parse();                            // Parse the rules, generating two parse
72
                                                    //   trees, one each for the forward and
73
                                                    //   reverse rules,
74
                                                    //   and a list of UnicodeSets encountered.
75
76
    /**
77
     * Return a rules string without unnecessary
78
     * characters.
79
     */
80
    static UnicodeString stripRules(const UnicodeString &rules);
81
private:
82
83
    UBool       doParseActions(int32_t a);
84
    void        error(UErrorCode e);                   // error reporting convenience function.
85
    void        fixOpStack(RBBINode::OpPrecedence p);
86
                                                       //   a character.
87
    void        findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
88
89
    UChar32     nextCharLL();
90
#ifdef RBBI_DEBUG
91
    void        printNodeStack(const char *title);
92
#endif
93
    RBBINode    *pushNewNode(RBBINode::NodeType  t);
94
    void        scanSet();
95
96
97
    RBBIRuleBuilder               *fRB;              // The rule builder that we are part of.
98
99
    int32_t                       fScanIndex;        // Index of current character being processed
100
                                                     //   in the rule input string.
101
    int32_t                       fNextIndex;        // Index of the next character, which
102
                                                     //   is the first character not yet scanned.
103
    UBool                         fQuoteMode;        // Scan is in a 'quoted region'
104
    int32_t                       fLineNum;          // Line number in input file.
105
    int32_t                       fCharNum;          // Char position within the line.
106
    UChar32                       fLastChar;         // Previous char, needed to count CR-LF
107
                                                     //   as a single line, not two.
108
109
    RBBIRuleChar                  fC;                // Current char for parse state machine
110
                                                     //   processing.
111
    UnicodeString                 fVarName;          // $variableName, valid when we've just
112
                                                     //   scanned one.
113
114
    RBBIRuleTableEl               **fStateTable;     // State Transition Table for RBBI Rule
115
                                                     //   parsing.  index by p[state][char-class]
116
117
    uint16_t                      fStack[kStackSize];  // State stack, holds state pushes
118
    int32_t                       fStackPtr;           //  and pops as specified in the state
119
                                                       //  transition rules.
120
121
    RBBINode                      *fNodeStack[kStackSize]; // Node stack, holds nodes created
122
                                                           //  during the parse of a rule
123
    int32_t                        fNodeStackPtr;
124
125
126
    UBool                          fReverseRule;     // True if the rule currently being scanned
127
                                                     //  is a reverse direction rule (if it
128
                                                     //  starts with a '!')
129
130
    UBool                          fLookAheadRule;   // True if the rule includes a '/'
131
                                                     //   somewhere within it.
132
133
    UBool                          fNoChainInRule;   // True if the current rule starts with a '^'.
134
135
    RBBISymbolTable               *fSymbolTable;     // symbol table, holds definitions of
136
                                                     //   $variable symbols.
137
138
    UHashtable                    *fSetTable;        // UnicocodeSet hash table, holds indexes to
139
                                                     //   the sets created while parsing rules.
140
                                                     //   The key is the string used for creating
141
                                                     //   the set.
142
143
    UnicodeSet                     fRuleSets[10];    // Unicode Sets that are needed during
144
                                                     //  the scanning of RBBI rules.  The
145
                                                     //  indicies for these are assigned by the
146
                                                     //  perl script that builds the state tables.
147
                                                     //  See rbbirpt.h.
148
149
    int32_t                        fRuleNum;         // Counts each rule as it is scanned.
150
151
    int32_t                        fOptionStart;     // Input index of start of a !!option
152
                                                     //   keyword, while being scanned.
153
154
    UnicodeSet *gRuleSet_rule_char;
155
    UnicodeSet *gRuleSet_white_space;
156
    UnicodeSet *gRuleSet_name_char;
157
    UnicodeSet *gRuleSet_name_start_char;
158
159
    RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
160
    RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
161
};
162
163
U_NAMESPACE_END
164
165
#endif