/src/trafficserver/include/tscore/Tokenizer.h
Line | Count | Source |
1 | | /** @file |
2 | | |
3 | | A brief file description |
4 | | |
5 | | @section license License |
6 | | |
7 | | Licensed to the Apache Software Foundation (ASF) under one |
8 | | or more contributor license agreements. See the NOTICE file |
9 | | distributed with this work for additional information |
10 | | regarding copyright ownership. The ASF licenses this file |
11 | | to you under the Apache License, Version 2.0 (the |
12 | | "License"); you may not use this file except in compliance |
13 | | with the License. You may obtain a copy of the License at |
14 | | |
15 | | http://www.apache.org/licenses/LICENSE-2.0 |
16 | | |
17 | | Unless required by applicable law or agreed to in writing, software |
18 | | distributed under the License is distributed on an "AS IS" BASIS, |
19 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
20 | | See the License for the specific language governing permissions and |
21 | | limitations under the License. |
22 | | */ |
23 | | |
24 | | /***************************************/ |
25 | | |
26 | | #pragma once |
27 | | |
28 | | /**************************************************************************** |
29 | | * |
30 | | * Tokenizer.h - A string tokenzier |
31 | | * |
32 | | * |
33 | | * |
34 | | ****************************************************************************/ |
35 | | |
36 | | /********************************************************** |
37 | | * class Tokenizer |
38 | | * |
39 | | * Tokenizes a string, and then allows array like access |
40 | | * |
41 | | * The delimiters are determined by the string passed to the |
42 | | * the constructor. |
43 | | * |
44 | | * There are three memory options. |
45 | | * SHARE_TOKS - this modifies the original string passed in |
46 | | * through Initialize() and shares its space. NULLs |
47 | | * are inserted into string after each token. Choosing |
48 | | * this option means the user is responsible for not |
49 | | * deallocating the string storage before deallocating |
50 | | * the tokenizer object |
51 | | * COPY_TOKS - this option copies the original string and |
52 | | * leaves the original unchanged. The deallocation of the |
53 | | * original string and the deallocation of the Tokenizer |
54 | | * object are now independent. |
55 | | * Note: If neither SHARE_TOKS or COPY_TOKS is selected, COPY_TOKS |
56 | | * is the default |
57 | | * ALLOW_EMPTY_TOKENS: If multiple delimiters appear next to each |
58 | | * other, each delimiter creates a token some of which |
59 | | * will be zero length. The default is to skip repeated |
60 | | * delimiters |
61 | | * |
62 | | * Tokenizer(const char* StrOfDelimit) - a string that contains |
63 | | * the delimiters for tokenizing. This string is copied. |
64 | | * |
65 | | * Initialize(char* str, TokenizerOpts opt) - Submits a string |
66 | | * to be tokenized according to the memory options listed above |
67 | | * |
68 | | * ReUse() - Allows the object to be reused for a new string |
69 | | * After ReUse() is called, Initialize() can be called safely |
70 | | * again |
71 | | * |
72 | | * operator[index] - returns a pointer to the number token given |
73 | | * by index. If index > numTokens-1, NULL is returned. |
74 | | * Because of way tokens are stored, this is O(n) operation |
75 | | * It is very fast though for the first 16 tokens and |
76 | | * is intended to be used on a small number of tokens |
77 | | * |
78 | | * iterFirst(tok_iter_state* state) - Returns the first |
79 | | * token and initializes state argument for subsequent |
80 | | * calls to iterNext. If no tokens exist, NULL is |
81 | | * returned |
82 | | * |
83 | | * iterNext(tok_iter_state* state) - Returns the next token after |
84 | | * what arg state returned next last time. Returns NULL if no |
85 | | * more tokens exists. |
86 | | * |
87 | | * Note: To iterate through a list using operator[] takes O(n^2) time |
88 | | * Using iterFirst, iterNext the running time is O(n), so use |
89 | | * the iteration where possible |
90 | | * |
91 | | * count() - returns the number of tokens |
92 | | * |
93 | | * setMaxTokens() - sets the maximum number of tokens. Once maxTokens |
94 | | * is reached, delimiters are ignored and the |
95 | | * last token is rest of the string. Negative numbers |
96 | | * mean no limit on the number of tokens |
97 | | * |
98 | | * getMaxTokens() - returns maxTokens. UINT_MAX means no limit |
99 | | * |
100 | | * Print() - Debugging method to print out the tokens |
101 | | * |
102 | | *******************************************************************/ |
103 | | |
104 | | #include "tscore/ink_apidefs.h" |
105 | | |
106 | 0 | #define COPY_TOKS (1u << 0) |
107 | 0 | #define SHARE_TOKS (1u << 1) |
108 | 0 | #define ALLOW_EMPTY_TOKS (1u << 2) |
109 | 0 | #define ALLOW_SPACES (1u << 3) |
110 | | |
111 | 0 | #define TOK_NODE_ELEMENTS 16 |
112 | | |
113 | | struct tok_node { |
114 | | char *el[TOK_NODE_ELEMENTS]; |
115 | | tok_node *next; |
116 | | }; |
117 | | |
118 | | struct tok_iter_state { |
119 | | tok_node *node; |
120 | | int index; |
121 | | }; |
122 | | |
123 | | class Tokenizer |
124 | | { |
125 | | public: |
126 | | Tokenizer(const char *StrOfDelimiters); |
127 | | ~Tokenizer(); |
128 | | |
129 | | unsigned Initialize(char *str, unsigned options); |
130 | | unsigned Initialize(const char *str); // Automatically sets option to copy |
131 | | const char *operator[](unsigned index) const; |
132 | | |
133 | | void |
134 | | setMaxTokens(unsigned max) |
135 | 0 | { |
136 | 0 | maxTokens = max; |
137 | 0 | }; |
138 | | |
139 | | unsigned |
140 | | getMaxTokens() const |
141 | 0 | { |
142 | 0 | return maxTokens; |
143 | 0 | }; |
144 | | |
145 | | unsigned count() const; |
146 | | void Print() const; |
147 | | |
148 | | const char *iterFirst(tok_iter_state *state); |
149 | | const char *iterNext(tok_iter_state *state); |
150 | | |
151 | | // noncopyable |
152 | | Tokenizer &operator=(const Tokenizer &) = delete; |
153 | | Tokenizer(const Tokenizer &) = delete; |
154 | | |
155 | | private: |
156 | | int isDelimiter(char c); |
157 | | void addToken(char *startAddr, int length); |
158 | | void ReUse(); |
159 | | char *strOfDelimit; |
160 | | tok_node start_node; |
161 | | unsigned numValidTokens; |
162 | | unsigned maxTokens; |
163 | | int options; |
164 | | bool quoteFound; |
165 | | |
166 | | // State about where to add the next token |
167 | | tok_node *add_node; |
168 | | int add_index; |
169 | | }; |