/src/sentencepiece/src/util.h
Line | Count | Source |
1 | | // Copyright 2016 Google Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License.! |
14 | | |
15 | | #ifndef UTIL_H_ |
16 | | #define UTIL_H_ |
17 | | |
18 | | #include <stdio.h> |
19 | | #include <string.h> |
20 | | |
21 | | #include <algorithm> |
22 | | #include <functional> |
23 | | #include <memory> |
24 | | #include <random> |
25 | | #include <sstream> |
26 | | #include <string> |
27 | | #include <thread> |
28 | | #include <utility> |
29 | | #include <vector> |
30 | | |
31 | | #include "common.h" |
32 | | #include "config.h" |
33 | | #include "sentencepiece_processor.h" |
34 | | #include "third_party/absl/strings/string_view.h" |
35 | | |
36 | | namespace sentencepiece { |
37 | | template <typename T> |
38 | | std::ostream &operator<<(std::ostream &out, const std::vector<T> &v) { |
39 | | for (const auto n : v) { |
40 | | out << " " << n; |
41 | | } |
42 | | return out; |
43 | | } |
44 | | |
45 | | uint32_t GetRandomGeneratorSeed(); |
46 | | |
47 | | // Sets data dir containing the global resources, e.g., pre-compiled |
48 | | // normalization data. |
49 | | void SetDataDir(absl::string_view data_dir); |
50 | | |
51 | | std::string GetDataDir(); |
52 | | |
53 | | // String utilities |
54 | | namespace string_util { |
55 | | |
56 | | template <typename Target> |
57 | | inline bool lexical_cast(absl::string_view arg, Target *result) { |
58 | | std::stringstream ss; |
59 | | return (ss << arg.data() && ss >> *result); |
60 | | } |
61 | | |
62 | | template <> |
63 | 0 | inline bool lexical_cast(absl::string_view arg, bool *result) { |
64 | 0 | const char *kTrue[] = {"1", "t", "true", "y", "yes"}; |
65 | 0 | const char *kFalse[] = {"0", "f", "false", "n", "no"}; |
66 | 0 | std::string lower_value = std::string(arg); |
67 | 0 | std::transform(lower_value.begin(), lower_value.end(), lower_value.begin(), |
68 | 0 | ::tolower); |
69 | 0 | for (size_t i = 0; i < 5; ++i) { |
70 | 0 | if (lower_value == kTrue[i]) { |
71 | 0 | *result = true; |
72 | 0 | return true; |
73 | 0 | } else if (lower_value == kFalse[i]) { |
74 | 0 | *result = false; |
75 | 0 | return true; |
76 | 0 | } |
77 | 0 | } |
78 | 0 |
|
79 | 0 | return false; |
80 | 0 | } |
81 | | |
82 | | template <> |
83 | 0 | inline bool lexical_cast(absl::string_view arg, std::string *result) { |
84 | 0 | *result = std::string(arg); |
85 | 0 | return true; |
86 | 0 | } |
87 | | |
88 | | template <typename T> |
89 | 0 | inline bool DecodePOD(absl::string_view str, T *result) { |
90 | 0 | if (sizeof(*result) != str.size()) { |
91 | 0 | return false; |
92 | 0 | } |
93 | 0 | memcpy(result, str.data(), sizeof(T)); |
94 | 0 | return true; |
95 | 0 | } |
96 | | |
97 | | template <typename T> |
98 | 0 | inline std::string EncodePOD(const T &value) { |
99 | 0 | std::string s; |
100 | 0 | s.resize(sizeof(T)); |
101 | 0 | memcpy(const_cast<char *>(s.data()), &value, sizeof(T)); |
102 | 0 | return s; |
103 | 0 | } |
104 | | |
105 | | template <typename T> |
106 | | inline std::string IntToHex(T value) { |
107 | | std::ostringstream os; |
108 | | os << std::hex << std::uppercase << value; |
109 | | return os.str(); |
110 | | } |
111 | | |
112 | | template <typename T> |
113 | | inline T HexToInt(absl::string_view value) { |
114 | | T n; |
115 | | std::istringstream is(value.data()); |
116 | | is >> std::hex >> n; |
117 | | return n; |
118 | | } |
119 | | |
120 | | template <typename T> |
121 | | inline size_t Itoa(T val, char *s) { |
122 | | char *org = s; |
123 | | |
124 | | if (val < 0) { |
125 | | *s++ = '-'; |
126 | | val = -val; |
127 | | } |
128 | | char *t = s; |
129 | | |
130 | | T mod = 0; |
131 | | while (val) { |
132 | | mod = val % 10; |
133 | | *t++ = static_cast<char>(mod) + '0'; |
134 | | val /= 10; |
135 | | } |
136 | | |
137 | | if (s == t) { |
138 | | *t++ = '0'; |
139 | | } |
140 | | |
141 | | *t = '\0'; |
142 | | std::reverse(s, t); |
143 | | return static_cast<size_t>(t - org); |
144 | | } |
145 | | |
146 | | template <typename T> |
147 | | std::string SimpleItoa(T val) { |
148 | | char buf[32]; |
149 | | Itoa<T>(val, buf); |
150 | | return std::string(buf); |
151 | | } |
152 | | |
153 | | // Return length of a single UTF-8 source character |
154 | 0 | inline size_t OneCharLen(const char *src) { |
155 | 0 | return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"[(*src & 0xFF) >> 4]; |
156 | 0 | } |
157 | | |
158 | | // Return (x & 0xC0) == 0x80; |
159 | | // Since trail bytes are always in [0x80, 0xBF], we can optimize: |
160 | 0 | inline bool IsTrailByte(char x) { return static_cast<signed char>(x) < -0x40; } |
161 | | |
162 | 0 | inline bool IsValidCodepoint(char32 c) { |
163 | 0 | return (static_cast<uint32_t>(c) < 0xD800) || (c >= 0xE000 && c <= 0x10FFFF); |
164 | 0 | } |
165 | | |
166 | | bool IsStructurallyValid(absl::string_view str); |
167 | | |
168 | | using UnicodeText = std::vector<char32>; |
169 | | |
170 | | char32 DecodeUTF8(const char *begin, const char *end, size_t *mblen); |
171 | | |
172 | 0 | inline char32 DecodeUTF8(absl::string_view input, size_t *mblen) { |
173 | 0 | return DecodeUTF8(input.data(), input.data() + input.size(), mblen); |
174 | 0 | } |
175 | | |
176 | 0 | inline bool IsValidDecodeUTF8(absl::string_view input, size_t *mblen) { |
177 | 0 | const char32 c = DecodeUTF8(input, mblen); |
178 | 0 | return c != kUnicodeError || *mblen == 3; |
179 | 0 | } |
180 | | |
181 | | size_t EncodeUTF8(char32 c, char *output); |
182 | | |
183 | | // Return the length of the UTF-8 character in bytes. |
184 | 0 | inline size_t UTF8Length(char32 c) { |
185 | 0 | if (c <= 0x7F) { |
186 | 0 | return 1; |
187 | 0 | } |
188 | 0 | if (c <= 0x7FF) { |
189 | 0 | return 2; |
190 | 0 | } |
191 | 0 | // If `c` is out of range, we consider it as kUnicodeError, which is 3 bytes. |
192 | 0 | if (c <= 0xFFFF || c > 0x10FFFF) { |
193 | 0 | return 3; |
194 | 0 | } |
195 | 0 | return 4; |
196 | 0 | } |
197 | | |
198 | | std::string UnicodeCharToUTF8(const char32 c); |
199 | | |
200 | | UnicodeText UTF8ToUnicodeText(absl::string_view utf8); |
201 | | |
202 | | std::string UnicodeTextToUTF8(const UnicodeText &utext); |
203 | | |
204 | | } // namespace string_util |
205 | | |
206 | | // other map/ptr utilties |
207 | | namespace port { |
208 | | |
209 | | template <class Collection, class Key> |
210 | | bool ContainsKey(const Collection &collection, const Key &key) { |
211 | | return collection.find(key) != collection.end(); |
212 | | } |
213 | | |
214 | | template <class Collection> |
215 | | const typename Collection::value_type::second_type &FindOrDie( |
216 | | const Collection &collection, |
217 | | const typename Collection::value_type::first_type &key) { |
218 | | const auto it = collection.find(key); |
219 | | // if (it == collection.end()) { |
220 | | // LOG(FATAL) << "Map key not found: " << key; |
221 | | // } |
222 | | return it->second; |
223 | | } |
224 | | |
225 | | template <class Collection> |
226 | | const typename Collection::value_type::second_type &FindWithDefault( |
227 | | const Collection &collection, |
228 | | const typename Collection::value_type::first_type &key, |
229 | | const typename Collection::value_type::second_type &value) { |
230 | | if (const auto it = collection.find(key); it != collection.end()) { |
231 | | return it->second; |
232 | | } |
233 | | return value; |
234 | | } |
235 | | |
236 | | template <class Collection> |
237 | | bool InsertIfNotPresent(Collection *const collection, |
238 | 0 | const typename Collection::value_type &vt) { |
239 | 0 | return collection->insert(vt).second; |
240 | 0 | } |
241 | | |
242 | | template <class Collection> |
243 | | bool InsertIfNotPresent( |
244 | | Collection *const collection, |
245 | | const typename Collection::value_type::first_type &key, |
246 | 0 | const typename Collection::value_type::second_type &value) { |
247 | 0 | return InsertIfNotPresent(collection, |
248 | 0 | typename Collection::value_type(key, value)); |
249 | 0 | } |
250 | | |
251 | | template <class Collection> |
252 | | void InsertOrDie(Collection *const collection, |
253 | | const typename Collection::value_type::first_type &key, |
254 | | const typename Collection::value_type::second_type &data) { |
255 | | CHECK(InsertIfNotPresent(collection, key, data)) << "duplicate key"; |
256 | | } |
257 | | |
258 | | // hash |
259 | 0 | inline void mix(uint64_t &a, uint64_t &b, uint64_t &c) { // 64bit version |
260 | 0 | a -= b; |
261 | 0 | a -= c; |
262 | 0 | a ^= (c >> 43); |
263 | 0 | b -= c; |
264 | 0 | b -= a; |
265 | 0 | b ^= (a << 9); |
266 | 0 | c -= a; |
267 | 0 | c -= b; |
268 | 0 | c ^= (b >> 8); |
269 | 0 | a -= b; |
270 | 0 | a -= c; |
271 | 0 | a ^= (c >> 38); |
272 | 0 | b -= c; |
273 | 0 | b -= a; |
274 | 0 | b ^= (a << 23); |
275 | 0 | c -= a; |
276 | 0 | c -= b; |
277 | 0 | c ^= (b >> 5); |
278 | 0 | a -= b; |
279 | 0 | a -= c; |
280 | 0 | a ^= (c >> 35); |
281 | 0 | b -= c; |
282 | 0 | b -= a; |
283 | 0 | b ^= (a << 49); |
284 | 0 | c -= a; |
285 | 0 | c -= b; |
286 | 0 | c ^= (b >> 11); |
287 | 0 | a -= b; |
288 | 0 | a -= c; |
289 | 0 | a ^= (c >> 12); |
290 | 0 | b -= c; |
291 | 0 | b -= a; |
292 | 0 | b ^= (a << 18); |
293 | 0 | c -= a; |
294 | 0 | c -= b; |
295 | 0 | c ^= (b >> 22); |
296 | 0 | } |
297 | | |
298 | 0 | inline uint64_t FingerprintCat(uint64_t x, uint64_t y) { |
299 | 0 | uint64_t b = 0xe08c1d668b756f82; // more of the golden ratio |
300 | 0 | mix(x, b, y); |
301 | 0 | return y; |
302 | 0 | } |
303 | | |
304 | | } // namespace port |
305 | | |
306 | | namespace random { |
307 | | |
308 | | std::mt19937 *GetRandomGenerator(); |
309 | | |
310 | | template <typename T> |
311 | | class ReservoirSampler { |
312 | | public: |
313 | | explicit ReservoirSampler(std::vector<T> *sampled, uint64_t size) |
314 | | : sampled_(sampled), size_(size), engine_(GetRandomGeneratorSeed()) {} |
315 | | explicit ReservoirSampler(std::vector<T> *sampled, uint64_t size, |
316 | | uint64_t seed) |
317 | | : sampled_(sampled), size_(size), engine_(seed) {} |
318 | | virtual ~ReservoirSampler() {} |
319 | | |
320 | | void Add(const T &item) { |
321 | | if (size_ == 0) return; |
322 | | |
323 | | ++total_; |
324 | | if (sampled_->size() < size_) { |
325 | | sampled_->push_back(item); |
326 | | } else { |
327 | | std::uniform_int_distribution<uint64_t> dist(0, total_ - 1); |
328 | | const uint64_t n = dist(engine_); |
329 | | if (n < sampled_->size()) (*sampled_)[n] = item; |
330 | | } |
331 | | } |
332 | | |
333 | | uint64_t total_size() const { return total_; } |
334 | | |
335 | | private: |
336 | | std::vector<T> *sampled_ = nullptr; |
337 | | uint64_t size_ = 0; |
338 | | uint64_t total_ = 0; |
339 | | std::mt19937 engine_; |
340 | | }; |
341 | | |
342 | | } // namespace random |
343 | | |
344 | | namespace util { |
345 | | |
346 | | #if defined(_FREEBSD) |
347 | | #include <sys/endian.h> |
348 | | #endif |
349 | | #if !defined(__APPLE__) && !defined(_WIN32) && !defined(_FREEBSD) && \ |
350 | | !defined(_AIX) |
351 | | #include <endian.h> |
352 | | #if BYTE_ORDER == __BIG_ENDIAN |
353 | | #define IS_BIG_ENDIAN |
354 | | #endif |
355 | | #endif |
356 | | |
357 | | #if defined(_AIX) && BYTE_ORDER == BIG_ENDIAN |
358 | | #define IS_BIG_ENDIAN |
359 | | #endif |
360 | | |
361 | 0 | constexpr bool is_bigendian() { |
362 | 0 | #ifdef IS_BIG_ENDIAN |
363 | 0 | return true; |
364 | 0 | #else // IS_BIG_ENDIAN |
365 | 0 | return false; |
366 | 0 | #endif // IS_BIG_ENDIAN |
367 | 0 | } |
368 | | |
369 | 0 | inline uint32_t Swap32(uint32_t x) { |
370 | 0 | #ifdef OS_WIN |
371 | 0 | return _byteswap_ulong(x); |
372 | 0 | #else // OS_WIN |
373 | 0 | return __builtin_bswap32(x); |
374 | 0 | #endif // OS_WIN |
375 | 0 | } |
376 | | |
377 | 0 | inline std::string JoinPath(absl::string_view path) { |
378 | 0 | return std::string(path.data(), path.size()); |
379 | 0 | } |
380 | | |
381 | | template <typename... T> |
382 | | inline std::string JoinPath(absl::string_view first, const T &...rest) { |
383 | | #ifdef OS_WIN |
384 | | return JoinPath(first) + "\\" + JoinPath(rest...); |
385 | | #else |
386 | | return JoinPath(first) + "/" + JoinPath(rest...); |
387 | | #endif |
388 | | } |
389 | | |
390 | | std::string StrError(int errnum); |
391 | | |
392 | | std::vector<std::string> StrSplitAsCSV(absl::string_view text); |
393 | | |
394 | | #ifdef OS_WIN |
395 | | std::wstring Utf8ToWide(const absl::string_view input); |
396 | | #endif |
397 | | |
398 | 0 | inline Status OkStatus() { return Status(); } |
399 | | |
400 | | #define DECLARE_ERROR(FUNC) \ |
401 | 0 | inline util::Status FUNC##Error(absl::string_view str) { \ |
402 | 0 | return util::Status(StatusCode::k##FUNC, str.data()); \ |
403 | 0 | } \ Unexecuted instantiation: sentencepiece::util::InternalError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::NotFoundError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::CancelledError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::InvalidArgumentError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::AlreadyExistsError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::ResourceExhaustedError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::UnavailableError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::FailedPreconditionError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::OutOfRangeError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::UnimplementedError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::AbortedError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::DeadlineExceededError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::DataLossError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::UnknownError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::PermissionDeniedError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) Unexecuted instantiation: sentencepiece::util::UnauthenticatedError(std::__1::basic_string_view<char, std::__1::char_traits<char> >) |
404 | 0 | inline bool Is##FUNC(const util::Status &status) { \ |
405 | 0 | return status.code() == StatusCode::k##FUNC; \ |
406 | 0 | } Unexecuted instantiation: sentencepiece::util::IsCancelled(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsInvalidArgument(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsNotFound(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsAlreadyExists(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsResourceExhausted(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsUnavailable(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsFailedPrecondition(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsOutOfRange(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsUnimplemented(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsInternal(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsAborted(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsDeadlineExceeded(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsDataLoss(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsUnknown(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsPermissionDenied(sentencepiece::util::Status const&) Unexecuted instantiation: sentencepiece::util::IsUnauthenticated(sentencepiece::util::Status const&) |
407 | | |
408 | | DECLARE_ERROR(Cancelled) |
409 | | DECLARE_ERROR(InvalidArgument) |
410 | | DECLARE_ERROR(NotFound) |
411 | | DECLARE_ERROR(AlreadyExists) |
412 | | DECLARE_ERROR(ResourceExhausted) |
413 | | DECLARE_ERROR(Unavailable) |
414 | | DECLARE_ERROR(FailedPrecondition) |
415 | | DECLARE_ERROR(OutOfRange) |
416 | | DECLARE_ERROR(Unimplemented) |
417 | | DECLARE_ERROR(Internal) |
418 | | DECLARE_ERROR(Aborted) |
419 | | DECLARE_ERROR(DeadlineExceeded) |
420 | | DECLARE_ERROR(DataLoss) |
421 | | DECLARE_ERROR(Unknown) |
422 | | DECLARE_ERROR(PermissionDenied) |
423 | | DECLARE_ERROR(Unauthenticated) |
424 | | |
425 | 0 | #define GTL_LOC (0) |
426 | | |
427 | | class StatusBuilder { |
428 | | public: |
429 | 113 | explicit StatusBuilder(StatusCode code) : code_(code) {} |
430 | 0 | explicit StatusBuilder(StatusCode code, int loc) : code_(code) {} |
431 | | |
432 | | template <typename T> |
433 | 791 | StatusBuilder &operator<<(const T &value) { |
434 | 791 | os_ << value; |
435 | 791 | return *this; |
436 | 791 | } Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [50]>(char const (&) [50]) sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [31]>(char const (&) [31]) Line | Count | Source | 433 | 113 | StatusBuilder &operator<<(const T &value) { | 434 | 113 | os_ << value; | 435 | 113 | return *this; | 436 | 113 | } |
sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [2]>(char const (&) [2]) Line | Count | Source | 433 | 113 | StatusBuilder &operator<<(const T &value) { | 434 | 113 | os_ << value; | 435 | 113 | return *this; | 436 | 113 | } |
sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <int>(int const&) Line | Count | Source | 433 | 113 | StatusBuilder &operator<<(const T &value) { | 434 | 113 | os_ << value; | 435 | 113 | return *this; | 436 | 113 | } |
sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [4]>(char const (&) [4]) Line | Count | Source | 433 | 113 | StatusBuilder &operator<<(const T &value) { | 434 | 113 | os_ << value; | 435 | 113 | return *this; | 436 | 113 | } |
Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [66]>(char const (&) [66]) sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [3]>(char const (&) [3]) Line | Count | Source | 433 | 113 | StatusBuilder &operator<<(const T &value) { | 434 | 113 | os_ << value; | 435 | 113 | return *this; | 436 | 113 | } |
sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [7]>(char const (&) [7]) Line | Count | Source | 433 | 113 | StatusBuilder &operator<<(const T &value) { | 434 | 113 | os_ << value; | 435 | 113 | return *this; | 436 | 113 | } |
sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [26]>(char const (&) [26]) Line | Count | Source | 433 | 113 | StatusBuilder &operator<<(const T &value) { | 434 | 113 | os_ << value; | 435 | 113 | return *this; | 436 | 113 | } |
Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [12]>(char const (&) [12]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [57]>(char const (&) [57]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [56]>(char const (&) [56]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [18]>(char const (&) [18]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [14]>(char const (&) [14]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [30]>(char const (&) [30]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [25]>(char const (&) [25]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [11]>(char const (&) [11]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [28]>(char const (&) [28]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [32]>(char const (&) [32]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [29]>(char const (&) [29]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [27]>(char const (&) [27]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [34]>(char const (&) [34]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [44]>(char const (&) [44]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [21]>(char const (&) [21]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [10]>(char const (&) [10]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [33]>(char const (&) [33]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [52]>(char const (&) [52]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [16]>(char const (&) [16]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [22]>(char const (&) [22]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [37]>(char const (&) [37]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [53]>(char const (&) [53]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [42]>(char const (&) [42]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [61]>(char const (&) [61]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [17]>(char const (&) [17]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [43]>(char const (&) [43]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [38]>(char const (&) [38]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [9]>(char const (&) [9]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [20]>(char const (&) [20]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [69]>(char const (&) [69]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [47]>(char const (&) [47]) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char const*>(char const* const&) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) Unexecuted instantiation: sentencepiece::util::StatusBuilder& sentencepiece::util::StatusBuilder::operator<< <char [51]>(char const (&) [51]) |
437 | | |
438 | 113 | operator Status() const { return Status(code_, os_.str()); } |
439 | | |
440 | | private: |
441 | | StatusCode code_; |
442 | | std::ostringstream os_; |
443 | | }; |
444 | | |
445 | | #define CHECK_OR_RETURN(condition) \ |
446 | 226 | if (condition) { \ |
447 | 0 | } else /* NOLINT */ \ |
448 | 226 | return ::sentencepiece::util::StatusBuilder( \ |
449 | 113 | ::sentencepiece::util::StatusCode::kInternal) \ |
450 | 113 | << __FILE__ << "(" << __LINE__ << ") [" << #condition << "] " |
451 | | |
452 | 0 | #define CHECK_EQ_OR_RETURN(a, b) CHECK_OR_RETURN((a) == (b)) |
453 | | #define CHECK_NE_OR_RETURN(a, b) CHECK_OR_RETURN((a) != (b)) |
454 | 0 | #define CHECK_GE_OR_RETURN(a, b) CHECK_OR_RETURN((a) >= (b)) |
455 | 0 | #define CHECK_LE_OR_RETURN(a, b) CHECK_OR_RETURN((a) <= (b)) |
456 | | #define CHECK_GT_OR_RETURN(a, b) CHECK_OR_RETURN((a) > (b)) |
457 | 0 | #define CHECK_LT_OR_RETURN(a, b) CHECK_OR_RETURN((a) < (b)) |
458 | | |
459 | | } // namespace util |
460 | | |
461 | | namespace port { |
462 | | template <typename T> |
463 | | void STLDeleteElements(std::vector<T *> *vec) { |
464 | | for (auto item : *vec) { |
465 | | delete item; |
466 | | } |
467 | | vec->clear(); |
468 | | } |
469 | | } // namespace port |
470 | | |
471 | | class ThreadPool { |
472 | | public: |
473 | 0 | ThreadPool(int32_t n) {} |
474 | 0 | virtual ~ThreadPool() { |
475 | 0 | for (auto &task : tasks_) { |
476 | 0 | task.join(); |
477 | 0 | } |
478 | 0 | } |
479 | | |
480 | 0 | void Schedule(std::function<void()> closure) { tasks_.emplace_back(closure); } |
481 | 0 | void StartWorkers() {} |
482 | | |
483 | | private: |
484 | | std::vector<std::thread> tasks_; |
485 | | }; |
486 | | |
487 | | namespace log_domain { |
488 | | |
489 | | double LogSum(const std::vector<double> &xs); |
490 | | |
491 | | } // namespace log_domain |
492 | | } // namespace sentencepiece |
493 | | #endif // UTIL_H_ |