/src/sentencepiece/third_party/protobuf-lite/stringpiece.cc
Line | Count | Source |
1 | | // Protocol Buffers - Google's data interchange format |
2 | | // Copyright 2008 Google Inc. All rights reserved. |
3 | | // https://developers.google.com/protocol-buffers/ |
4 | | // |
5 | | // Redistribution and use in source and binary forms, with or without |
6 | | // modification, are permitted provided that the following conditions are |
7 | | // met: |
8 | | // |
9 | | // * Redistributions of source code must retain the above copyright |
10 | | // notice, this list of conditions and the following disclaimer. |
11 | | // * Redistributions in binary form must reproduce the above |
12 | | // copyright notice, this list of conditions and the following disclaimer |
13 | | // in the documentation and/or other materials provided with the |
14 | | // distribution. |
15 | | // * Neither the name of Google Inc. nor the names of its |
16 | | // contributors may be used to endorse or promote products derived from |
17 | | // this software without specific prior written permission. |
18 | | // |
19 | | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | | #include <google/protobuf/stubs/stringpiece.h> |
31 | | |
32 | | #include <string.h> |
33 | | #include <algorithm> |
34 | | #include <climits> |
35 | | #include <string> |
36 | | #include <ostream> |
37 | | |
38 | | #include <google/protobuf/stubs/logging.h> |
39 | | |
40 | | namespace google { |
41 | | namespace protobuf { |
42 | 0 | std::ostream& operator<<(std::ostream& o, StringPiece piece) { |
43 | 0 | o.write(piece.data(), piece.size()); |
44 | 0 | return o; |
45 | 0 | } |
46 | | |
47 | | // Out-of-line error path. |
48 | 0 | void StringPiece::LogFatalSizeTooBig(size_t size, const char* details) { |
49 | 0 | GOOGLE_LOG(FATAL) << "size too big: " << size << " details: " << details; |
50 | 0 | } |
51 | | |
52 | | StringPiece::StringPiece(StringPiece x, stringpiece_ssize_type pos) |
53 | 0 | : ptr_(x.ptr_ + pos), length_(x.length_ - pos) { |
54 | 0 | GOOGLE_DCHECK_LE(0, pos); |
55 | 0 | GOOGLE_DCHECK_LE(pos, x.length_); |
56 | 0 | } |
57 | | |
58 | | StringPiece::StringPiece(StringPiece x, |
59 | | stringpiece_ssize_type pos, |
60 | | stringpiece_ssize_type len) |
61 | 0 | : ptr_(x.ptr_ + pos), length_(std::min(len, x.length_ - pos)) { |
62 | 0 | GOOGLE_DCHECK_LE(0, pos); |
63 | 0 | GOOGLE_DCHECK_LE(pos, x.length_); |
64 | 0 | GOOGLE_DCHECK_GE(len, 0); |
65 | 0 | } |
66 | | |
67 | 0 | void StringPiece::CopyToString(std::string* target) const { |
68 | 0 | target->assign(ptr_, length_); |
69 | 0 | } |
70 | | |
71 | 0 | void StringPiece::AppendToString(std::string* target) const { |
72 | 0 | target->append(ptr_, length_); |
73 | 0 | } |
74 | | |
75 | 0 | bool StringPiece::Consume(StringPiece x) { |
76 | 0 | if (starts_with(x)) { |
77 | 0 | ptr_ += x.length_; |
78 | 0 | length_ -= x.length_; |
79 | 0 | return true; |
80 | 0 | } |
81 | 0 | return false; |
82 | 0 | } |
83 | | |
84 | 0 | bool StringPiece::ConsumeFromEnd(StringPiece x) { |
85 | 0 | if (ends_with(x)) { |
86 | 0 | length_ -= x.length_; |
87 | 0 | return true; |
88 | 0 | } |
89 | 0 | return false; |
90 | 0 | } |
91 | | |
92 | | stringpiece_ssize_type StringPiece::copy(char* buf, |
93 | | size_type n, |
94 | 0 | size_type pos) const { |
95 | 0 | stringpiece_ssize_type ret = std::min(length_ - pos, n); |
96 | 0 | memcpy(buf, ptr_ + pos, ret); |
97 | 0 | return ret; |
98 | 0 | } |
99 | | |
100 | 0 | bool StringPiece::contains(StringPiece s) const { |
101 | 0 | return find(s, 0) != npos; |
102 | 0 | } |
103 | | |
104 | 0 | stringpiece_ssize_type StringPiece::find(StringPiece s, size_type pos) const { |
105 | 0 | if (length_ <= 0 || pos > static_cast<size_type>(length_)) { |
106 | 0 | if (length_ == 0 && pos == 0 && s.length_ == 0) return 0; |
107 | 0 | return npos; |
108 | 0 | } |
109 | 0 | const char *result = std::search(ptr_ + pos, ptr_ + length_, |
110 | 0 | s.ptr_, s.ptr_ + s.length_); |
111 | 0 | return result == ptr_ + length_ ? npos : result - ptr_; |
112 | 0 | } |
113 | | |
114 | 0 | stringpiece_ssize_type StringPiece::find(char c, size_type pos) const { |
115 | 0 | if (length_ <= 0 || pos >= static_cast<size_type>(length_)) { |
116 | 0 | return npos; |
117 | 0 | } |
118 | 0 | const char* result = static_cast<const char*>( |
119 | 0 | memchr(ptr_ + pos, c, length_ - pos)); |
120 | 0 | return result != nullptr ? result - ptr_ : npos; |
121 | 0 | } |
122 | | |
123 | 0 | stringpiece_ssize_type StringPiece::rfind(StringPiece s, size_type pos) const { |
124 | 0 | if (length_ < s.length_) return npos; |
125 | 0 | const size_t ulen = length_; |
126 | 0 | if (s.length_ == 0) return std::min(ulen, pos); |
127 | | |
128 | 0 | const char* last = ptr_ + std::min(ulen - s.length_, pos) + s.length_; |
129 | 0 | const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); |
130 | 0 | return result != last ? result - ptr_ : npos; |
131 | 0 | } |
132 | | |
133 | | // Search range is [0..pos] inclusive. If pos == npos, search everything. |
134 | 0 | stringpiece_ssize_type StringPiece::rfind(char c, size_type pos) const { |
135 | | // Note: memrchr() is not available on Windows. |
136 | 0 | if (length_ <= 0) return npos; |
137 | 0 | for (stringpiece_ssize_type i = |
138 | 0 | std::min(pos, static_cast<size_type>(length_ - 1)); |
139 | 0 | i >= 0; --i) { |
140 | 0 | if (ptr_[i] == c) { |
141 | 0 | return i; |
142 | 0 | } |
143 | 0 | } |
144 | 0 | return npos; |
145 | 0 | } |
146 | | |
147 | | // For each character in characters_wanted, sets the index corresponding |
148 | | // to the ASCII code of that character to 1 in table. This is used by |
149 | | // the find_.*_of methods below to tell whether or not a character is in |
150 | | // the lookup table in constant time. |
151 | | // The argument `table' must be an array that is large enough to hold all |
152 | | // the possible values of an unsigned char. Thus it should be be declared |
153 | | // as follows: |
154 | | // bool table[UCHAR_MAX + 1] |
155 | | static inline void BuildLookupTable(StringPiece characters_wanted, |
156 | 0 | bool* table) { |
157 | 0 | const stringpiece_ssize_type length = characters_wanted.length(); |
158 | 0 | const char* const data = characters_wanted.data(); |
159 | 0 | for (stringpiece_ssize_type i = 0; i < length; ++i) { |
160 | 0 | table[static_cast<unsigned char>(data[i])] = true; |
161 | 0 | } |
162 | 0 | } |
163 | | |
164 | | stringpiece_ssize_type StringPiece::find_first_of(StringPiece s, |
165 | 0 | size_type pos) const { |
166 | 0 | if (length_ <= 0 || s.length_ <= 0) { |
167 | 0 | return npos; |
168 | 0 | } |
169 | | // Avoid the cost of BuildLookupTable() for a single-character search. |
170 | 0 | if (s.length_ == 1) return find_first_of(s.ptr_[0], pos); |
171 | | |
172 | 0 | bool lookup[UCHAR_MAX + 1] = { false }; |
173 | 0 | BuildLookupTable(s, lookup); |
174 | 0 | for (stringpiece_ssize_type i = pos; i < length_; ++i) { |
175 | 0 | if (lookup[static_cast<unsigned char>(ptr_[i])]) { |
176 | 0 | return i; |
177 | 0 | } |
178 | 0 | } |
179 | 0 | return npos; |
180 | 0 | } |
181 | | |
182 | | stringpiece_ssize_type StringPiece::find_first_not_of(StringPiece s, |
183 | 0 | size_type pos) const { |
184 | 0 | if (length_ <= 0) return npos; |
185 | 0 | if (s.length_ <= 0) return 0; |
186 | | // Avoid the cost of BuildLookupTable() for a single-character search. |
187 | 0 | if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos); |
188 | | |
189 | 0 | bool lookup[UCHAR_MAX + 1] = { false }; |
190 | 0 | BuildLookupTable(s, lookup); |
191 | 0 | for (stringpiece_ssize_type i = pos; i < length_; ++i) { |
192 | 0 | if (!lookup[static_cast<unsigned char>(ptr_[i])]) { |
193 | 0 | return i; |
194 | 0 | } |
195 | 0 | } |
196 | 0 | return npos; |
197 | 0 | } |
198 | | |
199 | | stringpiece_ssize_type StringPiece::find_first_not_of(char c, |
200 | 0 | size_type pos) const { |
201 | 0 | if (length_ <= 0) return npos; |
202 | | |
203 | 0 | for (; pos < static_cast<size_type>(length_); ++pos) { |
204 | 0 | if (ptr_[pos] != c) { |
205 | 0 | return pos; |
206 | 0 | } |
207 | 0 | } |
208 | 0 | return npos; |
209 | 0 | } |
210 | | |
211 | | stringpiece_ssize_type StringPiece::find_last_of(StringPiece s, |
212 | 0 | size_type pos) const { |
213 | 0 | if (length_ <= 0 || s.length_ <= 0) return npos; |
214 | | // Avoid the cost of BuildLookupTable() for a single-character search. |
215 | 0 | if (s.length_ == 1) return find_last_of(s.ptr_[0], pos); |
216 | | |
217 | 0 | bool lookup[UCHAR_MAX + 1] = { false }; |
218 | 0 | BuildLookupTable(s, lookup); |
219 | 0 | for (stringpiece_ssize_type i = |
220 | 0 | std::min(pos, static_cast<size_type>(length_ - 1)); i >= 0; --i) { |
221 | 0 | if (lookup[static_cast<unsigned char>(ptr_[i])]) { |
222 | 0 | return i; |
223 | 0 | } |
224 | 0 | } |
225 | 0 | return npos; |
226 | 0 | } |
227 | | |
228 | | stringpiece_ssize_type StringPiece::find_last_not_of(StringPiece s, |
229 | 0 | size_type pos) const { |
230 | 0 | if (length_ <= 0) return npos; |
231 | | |
232 | 0 | stringpiece_ssize_type i = std::min(pos, static_cast<size_type>(length_ - 1)); |
233 | 0 | if (s.length_ <= 0) return i; |
234 | | |
235 | | // Avoid the cost of BuildLookupTable() for a single-character search. |
236 | 0 | if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos); |
237 | | |
238 | 0 | bool lookup[UCHAR_MAX + 1] = { false }; |
239 | 0 | BuildLookupTable(s, lookup); |
240 | 0 | for (; i >= 0; --i) { |
241 | 0 | if (!lookup[static_cast<unsigned char>(ptr_[i])]) { |
242 | 0 | return i; |
243 | 0 | } |
244 | 0 | } |
245 | 0 | return npos; |
246 | 0 | } |
247 | | |
248 | | stringpiece_ssize_type StringPiece::find_last_not_of(char c, |
249 | 0 | size_type pos) const { |
250 | 0 | if (length_ <= 0) return npos; |
251 | | |
252 | 0 | for (stringpiece_ssize_type i = |
253 | 0 | std::min(pos, static_cast<size_type>(length_ - 1)); i >= 0; --i) { |
254 | 0 | if (ptr_[i] != c) { |
255 | 0 | return i; |
256 | 0 | } |
257 | 0 | } |
258 | 0 | return npos; |
259 | 0 | } |
260 | | |
261 | 0 | StringPiece StringPiece::substr(size_type pos, size_type n) const { |
262 | 0 | if (pos > length_) pos = length_; |
263 | 0 | if (n > length_ - pos) n = length_ - pos; |
264 | 0 | return StringPiece(ptr_ + pos, n); |
265 | 0 | } |
266 | | |
267 | | const StringPiece::size_type StringPiece::npos = size_type(-1); |
268 | | |
269 | | } // namespace protobuf |
270 | | } // namespace google |