/src/mozilla-central/dom/xslt/xpath/txExprLexer.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | |
7 | | #ifndef MITREXSL_EXPRLEXER_H |
8 | | #define MITREXSL_EXPRLEXER_H |
9 | | |
10 | | #include "txCore.h" |
11 | | #include "nsString.h" |
12 | | |
13 | | /** |
14 | | * A Token class for the ExprLexer. |
15 | | * |
16 | | * This class was ported from XSL:P, an open source Java based |
17 | | * XSLT processor, written by yours truly. |
18 | | */ |
19 | | class Token |
20 | | { |
21 | | public: |
22 | | |
23 | | /** |
24 | | * Token types |
25 | | */ |
26 | | enum Type { |
27 | | //-- Trivial Tokens |
28 | | NULL_TOKEN = 1, |
29 | | LITERAL, |
30 | | NUMBER, |
31 | | CNAME, |
32 | | VAR_REFERENCE, |
33 | | PARENT_NODE, |
34 | | SELF_NODE, |
35 | | R_PAREN, |
36 | | R_BRACKET, // 9 |
37 | | /** |
38 | | * start of tokens for 3.7, bullet 1 |
39 | | * ExprLexer::nextIsOperatorToken bails if the tokens aren't |
40 | | * consecutive. |
41 | | */ |
42 | | COMMA, |
43 | | AT_SIGN, |
44 | | L_PAREN, |
45 | | L_BRACKET, |
46 | | AXIS_IDENTIFIER, |
47 | | |
48 | | // These tokens include their following left parenthesis |
49 | | FUNCTION_NAME_AND_PAREN, // 15 |
50 | | COMMENT_AND_PAREN, |
51 | | NODE_AND_PAREN, |
52 | | PROC_INST_AND_PAREN, |
53 | | TEXT_AND_PAREN, |
54 | | |
55 | | /** |
56 | | * operators |
57 | | */ |
58 | | //-- boolean ops |
59 | | AND_OP, // 20 |
60 | | OR_OP, |
61 | | |
62 | | //-- relational |
63 | | EQUAL_OP, // 22 |
64 | | NOT_EQUAL_OP, |
65 | | LESS_THAN_OP, |
66 | | GREATER_THAN_OP, |
67 | | LESS_OR_EQUAL_OP, |
68 | | GREATER_OR_EQUAL_OP, |
69 | | //-- additive operators |
70 | | ADDITION_OP, // 28 |
71 | | SUBTRACTION_OP, |
72 | | //-- multiplicative |
73 | | DIVIDE_OP, // 30 |
74 | | MULTIPLY_OP, |
75 | | MODULUS_OP, |
76 | | //-- path operators |
77 | | PARENT_OP, // 33 |
78 | | ANCESTOR_OP, |
79 | | UNION_OP, |
80 | | /** |
81 | | * end of tokens for 3.7, bullet 1 -/ |
82 | | */ |
83 | | //-- Special endtoken |
84 | | END // 36 |
85 | | }; |
86 | | |
87 | | |
88 | | /** |
89 | | * Constructors |
90 | | */ |
91 | | typedef nsAString::const_char_iterator iterator; |
92 | | |
93 | | Token(iterator aStart, iterator aEnd, Type aType) |
94 | | : mStart(aStart), |
95 | | mEnd(aEnd), |
96 | | mType(aType), |
97 | | mNext(nullptr) |
98 | 0 | { |
99 | 0 | } |
100 | | Token(iterator aChar, Type aType) |
101 | | : mStart(aChar), |
102 | | mEnd(aChar + 1), |
103 | | mType(aType), |
104 | | mNext(nullptr) |
105 | 0 | { |
106 | 0 | } |
107 | | |
108 | | const nsDependentSubstring Value() |
109 | 0 | { |
110 | 0 | return Substring(mStart, mEnd); |
111 | 0 | } |
112 | | |
113 | | iterator mStart, mEnd; |
114 | | Type mType; |
115 | | Token* mNext; |
116 | | }; |
117 | | |
118 | | /** |
119 | | * A class for splitting an "Expr" String into tokens and |
120 | | * performing basic Lexical Analysis. |
121 | | * |
122 | | * This class was ported from XSL:P, an open source Java based XSL processor |
123 | | */ |
124 | | |
125 | | class txExprLexer |
126 | | { |
127 | | public: |
128 | | |
129 | | txExprLexer(); |
130 | | ~txExprLexer(); |
131 | | |
132 | | /** |
133 | | * Parse the given string. |
134 | | * returns an error result if lexing failed. |
135 | | * The given string must outlive the use of the lexer, as the |
136 | | * generated Tokens point to Substrings of it. |
137 | | * mPosition points to the offending location in case of an error. |
138 | | */ |
139 | | nsresult parse(const nsAString& aPattern); |
140 | | |
141 | | typedef nsAString::const_char_iterator iterator; |
142 | | iterator mPosition; |
143 | | |
144 | | /** |
145 | | * Functions for iterating over the TokenList |
146 | | */ |
147 | | |
148 | | Token* nextToken(); |
149 | | Token* peek() |
150 | 0 | { |
151 | 0 | NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer"); |
152 | 0 | return mCurrentItem; |
153 | 0 | } |
154 | | Token* peekAhead() |
155 | 0 | { |
156 | 0 | NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer"); |
157 | 0 | // Don't peek past the end node |
158 | 0 | return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext : mCurrentItem; |
159 | 0 | } |
160 | | bool hasMoreTokens() |
161 | 0 | { |
162 | 0 | NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer"); |
163 | 0 | return (mCurrentItem && mCurrentItem->mType != Token::END); |
164 | 0 | } |
165 | | |
166 | | /** |
167 | | * Trivial Tokens |
168 | | */ |
169 | | //-- LF, changed to enum |
170 | | enum _TrivialTokens { |
171 | | D_QUOTE = '\"', |
172 | | S_QUOTE = '\'', |
173 | | L_PAREN = '(', |
174 | | R_PAREN = ')', |
175 | | L_BRACKET = '[', |
176 | | R_BRACKET = ']', |
177 | | L_ANGLE = '<', |
178 | | R_ANGLE = '>', |
179 | | COMMA = ',', |
180 | | PERIOD = '.', |
181 | | ASTERISK = '*', |
182 | | FORWARD_SLASH = '/', |
183 | | EQUAL = '=', |
184 | | BANG = '!', |
185 | | VERT_BAR = '|', |
186 | | AT_SIGN = '@', |
187 | | DOLLAR_SIGN = '$', |
188 | | PLUS = '+', |
189 | | HYPHEN = '-', |
190 | | COLON = ':', |
191 | | //-- whitespace tokens |
192 | | SPACE = ' ', |
193 | | TX_TAB = '\t', |
194 | | TX_CR = '\n', |
195 | | TX_LF = '\r' |
196 | | }; |
197 | | |
198 | | private: |
199 | | |
200 | | Token* mCurrentItem; |
201 | | Token* mFirstItem; |
202 | | Token* mLastItem; |
203 | | |
204 | | int mTokenCount; |
205 | | |
206 | | void addToken(Token* aToken); |
207 | | |
208 | | /** |
209 | | * Returns true if the following Token should be an operator. |
210 | | * This is a helper for the first bullet of [XPath 3.7] |
211 | | * Lexical Structure |
212 | | */ |
213 | | bool nextIsOperatorToken(Token* aToken); |
214 | | |
215 | | /** |
216 | | * Returns true if the given character represents a numeric letter (digit) |
217 | | * Implemented in ExprLexerChars.cpp |
218 | | */ |
219 | | static bool isXPathDigit(char16_t ch) |
220 | 0 | { |
221 | 0 | return (ch >= '0' && ch <= '9'); |
222 | 0 | } |
223 | | }; |
224 | | |
225 | | #endif |
226 | | |