/src/brpc/src/butil/string_splitter.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | // Date: Mon. Apr. 18 19:52:34 CST 2011 |
19 | | |
20 | | // Iteratively split a string by one or multiple separators. |
21 | | |
22 | | #ifndef BUTIL_STRING_SPLITTER_H |
23 | | #define BUTIL_STRING_SPLITTER_H |
24 | | |
25 | | #include <stdlib.h> |
26 | | #include <stdint.h> |
27 | | #include "butil/strings/string_piece.h" |
28 | | |
29 | | // It's common to encode data into strings separated by special characters |
30 | | // and decode them back, but functions such as `split_string' has to modify |
31 | | // the input string, which is bad. If we parse the string from scratch, the |
32 | | // code will be filled with pointer operations and obscure to understand. |
33 | | // |
34 | | // What we want is: |
35 | | // - Scan the string once: just do simple things efficiently. |
36 | | // - Do not modify input string: Changing input is bad, it may bring hidden |
37 | | // bugs, concurrency issues and non-const propagations. |
38 | | // - Split the string in-place without additional buffer/array. |
39 | | // |
40 | | // StringSplitter does meet these requirements. |
41 | | // Usage: |
42 | | // const char* the_string_to_split = ...; |
43 | | // for (StringSplitter s(the_string_to_split, '\t'); s; ++s) { |
44 | | // printf("%*s\n", s.length(), s.field()); |
45 | | // } |
46 | | // |
47 | | // "s" behaves as an iterator and evaluates to true before ending. |
48 | | // "s.field()" and "s.length()" are address and length of current field |
49 | | // respectively. Notice that "s.field()" may not end with '\0' because |
50 | | // we don't modify input. You can copy the field to a dedicated buffer |
51 | | // or apply a function supporting length. |
52 | | |
53 | | namespace butil { |
54 | | |
55 | | enum EmptyFieldAction { |
56 | | SKIP_EMPTY_FIELD, |
57 | | ALLOW_EMPTY_FIELD |
58 | | }; |
59 | | |
60 | | // Split a string with one character |
61 | | class StringSplitter { |
62 | | public: |
63 | | // Split `input' with `separator'. If `action' is SKIP_EMPTY_FIELD, zero- |
64 | | // length() field() will be skipped. |
65 | | inline StringSplitter(const char* input, char separator, |
66 | | EmptyFieldAction action = SKIP_EMPTY_FIELD); |
67 | | // Allows containing embedded '\0' characters and separator can be '\0', |
68 | | // if str_end is not NULL. |
69 | | inline StringSplitter(const char* str_begin, const char* str_end, |
70 | | char separator, |
71 | | EmptyFieldAction action = SKIP_EMPTY_FIELD); |
72 | | // Allows containing embedded '\0' characters and separator can be '\0', |
73 | | inline StringSplitter(const StringPiece& input, char separator, |
74 | | EmptyFieldAction action = SKIP_EMPTY_FIELD); |
75 | | |
76 | | // Move splitter forward. |
77 | | inline StringSplitter& operator++(); |
78 | | inline StringSplitter operator++(int); |
79 | | |
80 | | // True iff field() is valid. |
81 | | inline operator const void*() const; |
82 | | |
83 | | // Beginning address and length of the field. *(field() + length()) may |
84 | | // not be '\0' because we don't modify `input'. |
85 | | inline const char* field() const; |
86 | | inline size_t length() const; |
87 | | inline StringPiece field_sp() const; |
88 | | |
89 | | // Cast field to specific type, and write the value into `pv'. |
90 | | // Returns 0 on success, -1 otherwise. |
91 | | // NOTE: If separator is a digit, casting functions always return -1. |
92 | | inline int to_int8(int8_t *pv) const; |
93 | | inline int to_uint8(uint8_t *pv) const; |
94 | | inline int to_int(int *pv) const; |
95 | | inline int to_uint(unsigned int *pv) const; |
96 | | inline int to_long(long *pv) const; |
97 | | inline int to_ulong(unsigned long *pv) const; |
98 | | inline int to_longlong(long long *pv) const; |
99 | | inline int to_ulonglong(unsigned long long *pv) const; |
100 | | inline int to_float(float *pv) const; |
101 | | inline int to_double(double *pv) const; |
102 | | |
103 | | private: |
104 | | inline bool not_end(const char* p) const; |
105 | | inline void init(); |
106 | | |
107 | | const char* _head; |
108 | | const char* _tail; |
109 | | const char* _str_tail; |
110 | | const char _sep; |
111 | | const EmptyFieldAction _empty_field_action; |
112 | | }; |
113 | | |
114 | | // Split a string with one of the separators |
115 | | class StringMultiSplitter { |
116 | | public: |
117 | | // Split `input' with one character of `separators'. If `action' is |
118 | | // SKIP_EMPTY_FIELD, zero-length() field() will be skipped. |
119 | | // NOTE: This utility stores pointer of `separators' directly rather than |
120 | | // copying the content because this utility is intended to be used |
121 | | // in ad-hoc manner where lifetime of `separators' is generally |
122 | | // longer than this utility. |
123 | | inline StringMultiSplitter(const char* input, const char* separators, |
124 | | EmptyFieldAction action = SKIP_EMPTY_FIELD); |
125 | | // Allows containing embedded '\0' characters if str_end is not NULL. |
126 | | // NOTE: `separators` cannot contain embedded '\0' character. |
127 | | inline StringMultiSplitter(const char* str_begin, const char* str_end, |
128 | | const char* separators, |
129 | | EmptyFieldAction action = SKIP_EMPTY_FIELD); |
130 | | |
131 | | // Move splitter forward. |
132 | | inline StringMultiSplitter& operator++(); |
133 | | inline StringMultiSplitter operator++(int); |
134 | | |
135 | | // True iff field() is valid. |
136 | | inline operator const void*() const; |
137 | | |
138 | | // Beginning address and length of the field. *(field() + length()) may |
139 | | // not be '\0' because we don't modify `input'. |
140 | | inline const char* field() const; |
141 | | inline size_t length() const; |
142 | | inline StringPiece field_sp() const; |
143 | | |
144 | | // Cast field to specific type, and write the value into `pv'. |
145 | | // Returns 0 on success, -1 otherwise. |
146 | | // NOTE: If separators contains digit, casting functions always return -1. |
147 | | inline int to_int8(int8_t *pv) const; |
148 | | inline int to_uint8(uint8_t *pv) const; |
149 | | inline int to_int(int *pv) const; |
150 | | inline int to_uint(unsigned int *pv) const; |
151 | | inline int to_long(long *pv) const; |
152 | | inline int to_ulong(unsigned long *pv) const; |
153 | | inline int to_longlong(long long *pv) const; |
154 | | inline int to_ulonglong(unsigned long long *pv) const; |
155 | | inline int to_float(float *pv) const; |
156 | | inline int to_double(double *pv) const; |
157 | | |
158 | | private: |
159 | | inline bool is_sep(char c) const; |
160 | | inline bool not_end(const char* p) const; |
161 | | inline void init(); |
162 | | |
163 | | const char* _head; |
164 | | const char* _tail; |
165 | | const char* _str_tail; |
166 | | const char* const _seps; |
167 | | const EmptyFieldAction _empty_field_action; |
168 | | }; |
169 | | |
170 | | // Split query in the format according to the given delimiters. |
171 | | // This class can also handle some exceptional cases. |
172 | | // 1. consecutive pair_delimiter are omitted, for example, |
173 | | // suppose key_value_delimiter is '=' and pair_delimiter |
174 | | // is '&', then 'k1=v1&&&k2=v2' is normalized to 'k1=k2&k2=v2'. |
175 | | // 2. key or value can be empty or both can be empty. |
176 | | // 3. consecutive key_value_delimiter are not omitted, for example, |
177 | | // suppose input is 'k1===v2' and key_value_delimiter is '=', then |
178 | | // key() returns 'k1', value() returns '==v2'. |
179 | | class KeyValuePairsSplitter { |
180 | | public: |
181 | | inline KeyValuePairsSplitter(const char* str_begin, |
182 | | const char* str_end, |
183 | | char pair_delimiter, |
184 | | char key_value_delimiter) |
185 | 0 | : _sp(str_begin, str_end, pair_delimiter) |
186 | 0 | , _delim_pos(StringPiece::npos) |
187 | 0 | , _key_value_delim(key_value_delimiter) { |
188 | 0 | UpdateDelimiterPosition(); |
189 | 0 | } |
190 | | |
191 | | inline KeyValuePairsSplitter(const char* str_begin, |
192 | | char pair_delimiter, |
193 | | char key_value_delimiter) |
194 | 0 | : KeyValuePairsSplitter(str_begin, NULL, |
195 | 0 | pair_delimiter, key_value_delimiter) {} |
196 | | |
197 | | inline KeyValuePairsSplitter(const StringPiece &sp, |
198 | | char pair_delimiter, |
199 | | char key_value_delimiter) |
200 | 0 | : KeyValuePairsSplitter(sp.begin(), sp.end(), |
201 | 0 | pair_delimiter, key_value_delimiter) {} |
202 | | |
203 | 0 | inline StringPiece key() { |
204 | 0 | return key_and_value().substr(0, _delim_pos); |
205 | 0 | } |
206 | | |
207 | 0 | inline StringPiece value() { |
208 | 0 | return key_and_value().substr(_delim_pos + 1); |
209 | 0 | } |
210 | | |
211 | | // Get the current value of key and value |
212 | | // in the format of "key=value" |
213 | 0 | inline StringPiece key_and_value() { |
214 | 0 | return StringPiece(_sp.field(), _sp.length()); |
215 | 0 | } |
216 | | |
217 | | // Move splitter forward. |
218 | 0 | inline KeyValuePairsSplitter& operator++() { |
219 | 0 | ++_sp; |
220 | 0 | UpdateDelimiterPosition(); |
221 | 0 | return *this; |
222 | 0 | } |
223 | | |
224 | 0 | inline KeyValuePairsSplitter operator++(int) { |
225 | 0 | KeyValuePairsSplitter tmp = *this; |
226 | 0 | operator++(); |
227 | 0 | return tmp; |
228 | 0 | } |
229 | | |
230 | 0 | inline operator const void*() const { return _sp; } |
231 | | |
232 | | private: |
233 | | inline void UpdateDelimiterPosition(); |
234 | | |
235 | | private: |
236 | | StringSplitter _sp; |
237 | | StringPiece::size_type _delim_pos; |
238 | | const char _key_value_delim; |
239 | | }; |
240 | | |
241 | | } // namespace butil |
242 | | |
243 | | #include "butil/string_splitter_inl.h" |
244 | | |
245 | | #endif // BUTIL_STRING_SPLITTER_H |