Coverage Report

Created: 2025-11-24 06:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/brpc/src/butil/string_splitter.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
// Date: Mon. Apr. 18 19:52:34 CST 2011
19
20
// Iteratively split a string by one or multiple separators.
21
22
#ifndef BUTIL_STRING_SPLITTER_H
23
#define BUTIL_STRING_SPLITTER_H
24
25
#include <stdlib.h>
26
#include <stdint.h>
27
#include "butil/strings/string_piece.h"
28
29
// It's common to encode data into strings separated by special characters
30
// and decode them back, but functions such as `split_string' has to modify
31
// the input string, which is bad. If we parse the string from scratch, the
32
// code will be filled with pointer operations and obscure to understand.
33
//
34
// What we want is:
35
// - Scan the string once: just do simple things efficiently.
36
// - Do not modify input string: Changing input is bad, it may bring hidden
37
//   bugs, concurrency issues and non-const propagations.
38
// - Split the string in-place without additional buffer/array.
39
//
40
// StringSplitter does meet these requirements.
41
// Usage:
42
//     const char* the_string_to_split = ...;
43
//     for (StringSplitter s(the_string_to_split, '\t'); s; ++s) {
44
//         printf("%*s\n", s.length(), s.field());    
45
//     }
46
// 
47
// "s" behaves as an iterator and evaluates to true before ending.
48
// "s.field()" and "s.length()" are address and length of current field
49
// respectively. Notice that "s.field()" may not end with '\0' because
50
// we don't modify input. You can copy the field to a dedicated buffer
51
// or apply a function supporting length.
52
53
namespace butil {
54
55
enum EmptyFieldAction {
56
    SKIP_EMPTY_FIELD,
57
    ALLOW_EMPTY_FIELD
58
};
59
60
// Split a string with one character
61
class StringSplitter {
62
public:
63
    // Split `input' with `separator'. If `action' is SKIP_EMPTY_FIELD, zero-
64
    // length() field() will be skipped.
65
    inline StringSplitter(const char* input, char separator,
66
                          EmptyFieldAction action = SKIP_EMPTY_FIELD);
67
    // Allows containing embedded '\0' characters and separator can be '\0',
68
    // if str_end is not NULL.
69
    inline StringSplitter(const char* str_begin, const char* str_end,
70
                          char separator,
71
                          EmptyFieldAction action = SKIP_EMPTY_FIELD);
72
    // Allows containing embedded '\0' characters and separator can be '\0',
73
    inline StringSplitter(const StringPiece& input, char separator,
74
                          EmptyFieldAction action = SKIP_EMPTY_FIELD);
75
76
    // Move splitter forward.
77
    inline StringSplitter& operator++();
78
    inline StringSplitter operator++(int);
79
80
    // True iff field() is valid.
81
    inline operator const void*() const;
82
83
    // Beginning address and length of the field. *(field() + length()) may
84
    // not be '\0' because we don't modify `input'.
85
    inline const char* field() const;
86
    inline size_t length() const;
87
    inline StringPiece field_sp() const;
88
89
    // Cast field to specific type, and write the value into `pv'.
90
    // Returns 0 on success, -1 otherwise.
91
    // NOTE: If separator is a digit, casting functions always return -1.
92
    inline int to_int8(int8_t *pv) const;
93
    inline int to_uint8(uint8_t *pv) const;
94
    inline int to_int(int *pv) const;
95
    inline int to_uint(unsigned int *pv) const;
96
    inline int to_long(long *pv) const;
97
    inline int to_ulong(unsigned long *pv) const;
98
    inline int to_longlong(long long *pv) const;
99
    inline int to_ulonglong(unsigned long long *pv) const;
100
    inline int to_float(float *pv) const;
101
    inline int to_double(double *pv) const;
102
    
103
private:
104
    inline bool not_end(const char* p) const;
105
    inline void init();
106
    
107
    const char* _head;
108
    const char* _tail;
109
    const char* _str_tail;
110
    const char _sep;
111
    const EmptyFieldAction _empty_field_action;
112
};
113
114
// Split a string with one of the separators
115
class StringMultiSplitter {
116
public:
117
    // Split `input' with one character of `separators'. If `action' is
118
    // SKIP_EMPTY_FIELD, zero-length() field() will be skipped.
119
    // NOTE: This utility stores pointer of `separators' directly rather than
120
    //       copying the content because this utility is intended to be used
121
    //       in ad-hoc manner where lifetime of `separators' is generally
122
    //       longer than this utility.
123
    inline StringMultiSplitter(const char* input, const char* separators,
124
                               EmptyFieldAction action = SKIP_EMPTY_FIELD);
125
    // Allows containing embedded '\0' characters if str_end is not NULL.
126
    // NOTE: `separators` cannot contain embedded '\0' character.
127
    inline StringMultiSplitter(const char* str_begin, const char* str_end,
128
                               const char* separators,
129
                               EmptyFieldAction action = SKIP_EMPTY_FIELD);
130
131
    // Move splitter forward.
132
    inline StringMultiSplitter& operator++();
133
    inline StringMultiSplitter operator++(int);
134
135
    // True iff field() is valid.
136
    inline operator const void*() const;
137
138
    // Beginning address and length of the field. *(field() + length()) may
139
    // not be '\0' because we don't modify `input'.
140
    inline const char* field() const;
141
    inline size_t length() const;
142
    inline StringPiece field_sp() const;
143
144
    // Cast field to specific type, and write the value into `pv'.
145
    // Returns 0 on success, -1 otherwise.
146
    // NOTE: If separators contains digit, casting functions always return -1.
147
    inline int to_int8(int8_t *pv) const;
148
    inline int to_uint8(uint8_t *pv) const;
149
    inline int to_int(int *pv) const;
150
    inline int to_uint(unsigned int *pv) const;
151
    inline int to_long(long *pv) const;
152
    inline int to_ulong(unsigned long *pv) const;
153
    inline int to_longlong(long long *pv) const;
154
    inline int to_ulonglong(unsigned long long *pv) const;
155
    inline int to_float(float *pv) const;
156
    inline int to_double(double *pv) const;
157
158
private:
159
    inline bool is_sep(char c) const;
160
    inline bool not_end(const char* p) const;
161
    inline void init();
162
    
163
    const char* _head;
164
    const char* _tail;
165
    const char* _str_tail;
166
    const char* const _seps;
167
    const EmptyFieldAction _empty_field_action;
168
};
169
170
// Split query in the format according to the given delimiters.
171
// This class can also handle some exceptional cases.
172
// 1. consecutive pair_delimiter are omitted, for example,
173
//    suppose key_value_delimiter is '=' and pair_delimiter
174
//    is '&', then 'k1=v1&&&k2=v2' is normalized to 'k1=k2&k2=v2'.
175
// 2. key or value can be empty or both can be empty.
176
// 3. consecutive key_value_delimiter are not omitted, for example,
177
//    suppose input is 'k1===v2' and key_value_delimiter is '=', then
178
//    key() returns 'k1', value() returns '==v2'.
179
class KeyValuePairsSplitter {
180
public:
181
    inline KeyValuePairsSplitter(const char* str_begin,
182
                                 const char* str_end,
183
                                 char pair_delimiter,
184
                                 char key_value_delimiter)
185
0
        : _sp(str_begin, str_end, pair_delimiter)
186
0
        , _delim_pos(StringPiece::npos)
187
0
        , _key_value_delim(key_value_delimiter) {
188
0
        UpdateDelimiterPosition();
189
0
    }
190
191
    inline KeyValuePairsSplitter(const char* str_begin,
192
                                 char pair_delimiter,
193
                                 char key_value_delimiter)
194
0
        : KeyValuePairsSplitter(str_begin, NULL,
195
0
                pair_delimiter, key_value_delimiter) {}
196
197
    inline KeyValuePairsSplitter(const StringPiece &sp,
198
                                 char pair_delimiter,
199
                                 char key_value_delimiter)
200
0
        : KeyValuePairsSplitter(sp.begin(), sp.end(),
201
0
                pair_delimiter, key_value_delimiter) {}
202
203
0
    inline StringPiece key() {
204
0
        return key_and_value().substr(0, _delim_pos);
205
0
    }
206
207
0
    inline StringPiece value() {
208
0
        return key_and_value().substr(_delim_pos + 1);
209
0
    }
210
211
    // Get the current value of key and value
212
    // in the format of "key=value"
213
0
    inline StringPiece key_and_value() {
214
0
        return StringPiece(_sp.field(), _sp.length());
215
0
    }
216
217
    // Move splitter forward.
218
0
    inline KeyValuePairsSplitter& operator++() {
219
0
        ++_sp;
220
0
        UpdateDelimiterPosition();
221
0
        return *this;
222
0
    }
223
224
0
    inline KeyValuePairsSplitter operator++(int) {
225
0
        KeyValuePairsSplitter tmp = *this;
226
0
        operator++();
227
0
        return tmp;
228
0
    }
229
230
0
    inline operator const void*() const { return _sp; }
231
232
private:
233
    inline void UpdateDelimiterPosition();
234
235
private:
236
    StringSplitter _sp;
237
    StringPiece::size_type _delim_pos;
238
    const char _key_value_delim;
239
};
240
241
}  // namespace butil
242
243
#include "butil/string_splitter_inl.h"
244
245
#endif  // BUTIL_STRING_SPLITTER_H