/src/aspell/common/string.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | // This file is part of The New Aspell |
2 | | // Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license |
3 | | // version 2.0 or 2.1. You should have received a copy of the LGPL |
4 | | // license along with this library if you did not you can find |
5 | | // it at http://www.gnu.org/. |
6 | | |
7 | | #ifndef ASPELL_STRING__HPP |
8 | | #define ASPELL_STRING__HPP |
9 | | |
10 | | #include <string.h> |
11 | | #include <stdlib.h> |
12 | | |
13 | | #include <algorithm> |
14 | | |
15 | | #include "hash_fun.hpp" |
16 | | #include "parm_string.hpp" |
17 | | #include "mutable_string.hpp" |
18 | | #include "ostream.hpp" |
19 | | #include "istream.hpp" |
20 | | |
21 | | // |
22 | | // acommon::String is similar to std::string, but without many of the |
23 | | // extra non-stl like methods. The string is guaranteed to be stored |
24 | | // in a continues areas of memory but is not guaranteed to be null |
25 | | // terminated. However, space is always allocated for the null |
26 | | // characters. Thus, the c_str() method will never invalided any |
27 | | // exiting pointers. The string is also null terminated when accessed |
28 | | // via the str() and mstr() methods. In addition the method |
29 | | // ensure_null_end() will null terminate the string. Once null |
30 | | // terminated the string will remain as such until the length of the |
31 | | // string changes. |
32 | | // |
33 | | |
34 | | namespace acommon { |
35 | | |
36 | | template <typename Ret> class PosibErr; |
37 | | |
38 | | class String : public OStream |
39 | | { |
40 | | public: |
41 | | typedef const char * const_iterator; |
42 | | typedef char * iterator; |
43 | | typedef size_t size_type; |
44 | | |
45 | | private: |
46 | | // if begin_ != 0 than storage_end_ - begin_ > 1 |
47 | | char * begin_; |
48 | | char * end_; |
49 | | char * storage_end_; |
50 | | |
51 | | void assign_only_nonnull(const char * b, unsigned size) |
52 | 2.58M | { |
53 | 2.58M | begin_ = (char *)malloc(size + 1); |
54 | 2.58M | memmove(begin_, b, size); |
55 | 2.58M | end_ = begin_ + size; |
56 | 2.58M | storage_end_ = end_ + 1; |
57 | 2.58M | } |
58 | | void zero() |
59 | 339k | { |
60 | 339k | begin_ = 0; |
61 | 339k | end_ = 0; |
62 | 339k | storage_end_ = 0; |
63 | 339k | } |
64 | | void assign_only(const char * b) |
65 | 57.5k | { |
66 | 57.5k | if (b && *b) assign_only_nonnull(b, strlen(b)); |
67 | 2.35k | else zero(); |
68 | 57.5k | } |
69 | | void assign_only(const char * b, unsigned size) |
70 | 2.86M | { |
71 | 2.86M | if (b && size > 0) assign_only_nonnull(b, size); |
72 | 337k | else zero(); |
73 | 2.86M | } |
74 | | void reserve_i(size_t s = 0); |
75 | | public: |
76 | | void reserve(size_t s) |
77 | 136M | { |
78 | 136M | if (storage_end_ - begin_ >= (int)s + 1) return; |
79 | 628k | reserve_i(s); |
80 | 628k | } |
81 | | |
82 | 35.0k | char * begin() {return begin_;} |
83 | 32.9k | char * end() {return end_;} |
84 | | |
85 | 0 | const char * begin() const {return begin_;} |
86 | 0 | const char * end() const {return end_;} |
87 | | |
88 | 3.67k | char * pbegin() {return begin_;} |
89 | 3.67k | char * pend() {return end_;} |
90 | | |
91 | 0 | const char * pbegin() const {return begin_;} |
92 | 0 | const char * pend() const {return end_;} |
93 | | |
94 | 171M | size_t size() const {return end_ - begin_;} |
95 | 112k | bool empty() const {return begin_ == end_;} |
96 | 0 | size_t max_size() const {return INT_MAX;} |
97 | 0 | size_t capacity() const {return storage_end_ ? storage_end_ - begin_ - 1 : 0;} |
98 | | |
99 | 38.4M | void ensure_null_end() const { |
100 | 38.4M | if (!begin_) const_cast<String *>(this)->reserve_i(); |
101 | 38.4M | *end_ = '\0'; |
102 | 38.4M | } |
103 | | |
104 | 39.2M | const char * c_str() const { |
105 | 39.2M | if (begin_) {ensure_null_end(); return begin_;} |
106 | 3.51M | else return ""; |
107 | 39.2M | } |
108 | 314k | const char * str() const {return c_str();} |
109 | | char * mstr() |
110 | 2.75M | { |
111 | 2.75M | if (!begin_) reserve_i(); |
112 | 2.75M | ensure_null_end(); |
113 | 2.75M | return begin_; |
114 | 2.75M | } |
115 | | |
116 | 18.3M | char * data() {return begin_;} |
117 | 212k | const char * data() const {return begin_;} |
118 | | |
119 | 0 | char * data(int pos) {return begin_ + pos;} |
120 | 0 | char * data_end() {return end_;} |
121 | | |
122 | | template <typename U> |
123 | | U * datap() { |
124 | | return reinterpret_cast<U * >(begin_); |
125 | | } |
126 | | template <typename U> |
127 | | U * datap(int pos) { |
128 | | return reinterpret_cast<U * >(begin_ + pos); |
129 | | } |
130 | | |
131 | 5.54M | char & operator[] (size_t pos) {return begin_[pos];} |
132 | 2.62M | char operator[] (size_t pos) const {return begin_[pos];} |
133 | | |
134 | 10.4k | char & back() {return end_[-1];} |
135 | 0 | char back() const {return end_[-1];} |
136 | | |
137 | 5.12M | void clear() {end_ = begin_;} |
138 | | |
139 | | // |
140 | | // constructors |
141 | | // |
142 | | |
143 | 12.5M | String() : begin_(0), end_(0), storage_end_(0) {} |
144 | 57.5k | String(const char * s) {assign_only(s);} |
145 | 82.9k | String(const char * s, unsigned size) {assign_only(s, size);} |
146 | 1.58M | String(ParmStr s) {assign_only(s, s.size());} |
147 | 5.35k | String(MutableString s) {assign_only(s.str, s.size);} |
148 | 1.19M | String(const String & other) {assign_only(other.begin_, other.end_-other.begin_);} |
149 | | |
150 | | // |
151 | | // assign |
152 | | // |
153 | | |
154 | | void assign(const char * b, size_t size) |
155 | 513k | { |
156 | 513k | clear(); |
157 | 513k | if (size != 0) { |
158 | 452k | reserve(size); |
159 | 452k | memmove(begin_, b, size); |
160 | 452k | end_ = begin_ + size; |
161 | 452k | } |
162 | 513k | } |
163 | | void assign(const char * b) |
164 | 245k | { |
165 | 245k | if (b) assign(b, strlen(b)); |
166 | 245k | } |
167 | 243k | String & operator= (const char * s) { |
168 | 243k | assign(s); |
169 | 243k | return *this; |
170 | 243k | } |
171 | | inline String & operator= (const PosibErr<const char *> & s); |
172 | 124k | String & operator= (ParmStr s) { |
173 | 124k | assign(s, s.size()); |
174 | 124k | return *this; |
175 | 124k | } |
176 | 7.99k | String & operator= (MutableString s) { |
177 | 7.99k | assign(s.str, s.size); |
178 | 7.99k | return *this; |
179 | 7.99k | } |
180 | 53.2k | String & operator= (const String & s) { |
181 | 53.2k | assign(s.begin_, s.end_ - s.begin_); |
182 | 53.2k | return *this; |
183 | 53.2k | } |
184 | | /*inline*/ String & operator= (const PosibErr<String> & s); |
185 | | |
186 | | // |
187 | | // append |
188 | | // |
189 | | |
190 | | String & append(const void * str, unsigned int sz) |
191 | 3.41M | { |
192 | 3.41M | reserve(size() + sz); |
193 | 3.41M | if (sz > 0) memcpy(end_, str, sz); |
194 | 3.41M | end_ += sz; |
195 | 3.41M | return *this; |
196 | 3.41M | } |
197 | | String & append(const void * d, const void * e) |
198 | 7.35k | { |
199 | 7.35k | append(d, (const char *)e - (const char *)d); |
200 | 7.35k | return *this; |
201 | 7.35k | } |
202 | | String & append(String & str, unsigned int sz) |
203 | 0 | { |
204 | 0 | append(str.begin_, sz); |
205 | 0 | return *this; |
206 | 0 | } |
207 | | String & append(const char * str) |
208 | 39.2k | { |
209 | 39.2k | if (!end_) reserve_i(); |
210 | 410k | for (; *str && end_ != storage_end_ - 1; ++str, ++end_) |
211 | 371k | *end_ = *str; |
212 | 39.2k | if (end_ == storage_end_ - 1) append(str, strlen(str)); |
213 | 39.2k | return *this; |
214 | 39.2k | } |
215 | | String & append(char c) |
216 | 132M | { |
217 | 132M | reserve(size() + 1); |
218 | 132M | *end_ = c; |
219 | 132M | ++end_; |
220 | 132M | return *this; |
221 | 132M | } |
222 | | |
223 | 37.1k | String & operator+= (const char * s) { |
224 | 37.1k | append(s); |
225 | 37.1k | return *this; |
226 | 37.1k | } |
227 | 35.1M | String & operator+= (char c) { |
228 | 35.1M | append(c); |
229 | 35.1M | return *this; |
230 | 35.1M | } |
231 | 41.2k | String & operator+= (ParmStr s) { |
232 | 41.2k | if (s.have_size()) |
233 | 39.1k | append(s, s.size()); |
234 | 2.14k | else |
235 | 2.14k | append(s); |
236 | 41.2k | return *this; |
237 | 41.2k | } |
238 | 0 | String & operator+= (MutableString s) { |
239 | 0 | append(s.str, s.size); |
240 | 0 | return *this; |
241 | 0 | } |
242 | 43.4k | String & operator+= (const String & s) { |
243 | 43.4k | append(s.begin_, s.end_ - s.begin_); |
244 | 43.4k | return *this; |
245 | 43.4k | } |
246 | | |
247 | | // |
248 | | // |
249 | | // |
250 | | |
251 | 15.3M | ~String() {if (begin_) free(begin_);} |
252 | | |
253 | 718 | void swap(String & other) { |
254 | 718 | std::swap(begin_, other.begin_); |
255 | 718 | std::swap(end_, other.end_); |
256 | 718 | std::swap(storage_end_, other.storage_end_); |
257 | 718 | } |
258 | | |
259 | | // |
260 | | // |
261 | | // |
262 | | |
263 | | int vprintf(const char * format, va_list ap); |
264 | | |
265 | | // |
266 | | // |
267 | | // |
268 | | |
269 | 308k | void push_back(char c) {append(c);} |
270 | | |
271 | 1.18k | void pop_back(size_t p = 1) {end_ -= p;} |
272 | | |
273 | | char * insert(size_t p, char c) |
274 | 2.11k | { |
275 | 2.11k | reserve(size() + 1); |
276 | 2.11k | char * pos = begin_ + p; |
277 | 2.11k | size_t to_move = end_ - pos; |
278 | 2.11k | if (to_move) memmove(pos + 1, pos, to_move); |
279 | 2.11k | *pos = c; |
280 | 2.11k | ++end_; |
281 | 2.11k | return pos; |
282 | 2.11k | } |
283 | | char * insert(char * pos, char c) |
284 | 2.11k | { |
285 | 2.11k | return insert(pos - begin_, c); |
286 | 2.11k | } |
287 | | void insert(size_t p, const char * str, size_t sz) |
288 | 0 | { |
289 | 0 | reserve(size() + sz); |
290 | 0 | char * pos = begin_ + p; |
291 | 0 | size_t to_move = end_ - pos; |
292 | 0 | if (to_move) memmove(pos + sz, pos, to_move); |
293 | 0 | memcpy(pos, str, sz); |
294 | 0 | end_ += sz; |
295 | 0 | } |
296 | | void insert(char * pos, const char * f, const char * l) |
297 | 0 | { |
298 | 0 | insert(pos - begin_, f, l - f); |
299 | 0 | } |
300 | | |
301 | | char * erase(char * pos) |
302 | 0 | { |
303 | 0 | size_t to_move = end_ - pos - 1; |
304 | 0 | if (to_move) memmove(pos, pos + 1, to_move); |
305 | 0 | --end_; |
306 | 0 | return pos; |
307 | 0 | } |
308 | | char * erase(char * f, char * l) |
309 | 48.1k | { |
310 | 48.1k | if (l >= end_) { |
311 | 21.4k | end_ = f < end_ ? f : end_; |
312 | 26.7k | } else { |
313 | 26.7k | size_t sz = l - f; |
314 | 26.7k | memmove(f, f + sz, end_ - l); |
315 | 26.7k | end_ -= sz; |
316 | 26.7k | } |
317 | 48.1k | return f; |
318 | 48.1k | } |
319 | | void erase(size_t pos, size_t s) |
320 | 48.1k | { |
321 | 48.1k | erase(begin_ + pos, begin_ + pos + s); |
322 | 48.1k | } |
323 | | |
324 | | //FIXME: Make this more efficient by rewriting the implementation |
325 | | // to work with raw memory rather than using vector<char> |
326 | | template <typename Itr> |
327 | | void replace(iterator start, iterator stop, Itr rstart, Itr rstop) |
328 | 0 | { |
329 | 0 | iterator i = erase(start,stop); |
330 | 0 | insert(i, rstart, rstop); |
331 | 0 | } |
332 | | |
333 | | void replace(size_t pos, size_t n, const char * with, size_t s) |
334 | 0 | { |
335 | 0 | replace(begin_ + pos, begin_ + pos + n, with, with + s); |
336 | 0 | } |
337 | | void resize(size_t n) |
338 | 115k | { |
339 | 115k | reserve(n); |
340 | 115k | end_ = begin_ + n; |
341 | 115k | } |
342 | | void resize(size_t n, char c) |
343 | 0 | { |
344 | 0 | size_t old_size = size(); |
345 | 0 | reserve(n); |
346 | 0 | end_ = begin_ + n; |
347 | 0 | int diff = n - old_size; |
348 | 0 | if (diff > 0) memset(begin_ + old_size, c, diff); |
349 | 0 | } |
350 | 0 | int alloc(int s) { |
351 | 0 | int pos = size(); |
352 | 0 | resize(pos + s); |
353 | 0 | return pos; |
354 | 0 | } |
355 | | |
356 | | bool prefix(ParmStr str, size_t offset = 0) const |
357 | 0 | { |
358 | 0 | if (str.size() > size() - offset) return false; |
359 | 0 | return memcmp(begin_ + offset, str.str(), str.size()) == 0; |
360 | 0 | }; |
361 | | bool suffix(ParmStr str) const |
362 | 73.5k | { |
363 | 73.5k | if (str.size() > size()) return false; |
364 | 48.7k | return memcmp(end_ - str.size(), str.str(), str.size()) == 0; |
365 | 73.5k | } |
366 | | |
367 | | // FIXME: Eventually remove |
368 | | static const size_t npos = INT_MAX; |
369 | 32.1k | size_t find(char c, size_t pos = 0) const { |
370 | 32.1k | char * res = (char *)memchr(begin_ + pos, c, size() - pos); |
371 | 32.1k | if (res == 0) return npos; |
372 | 26.7k | else return res - begin_; |
373 | 32.1k | } |
374 | 2.09k | size_t rfind(char c) const { |
375 | 25.0k | for (int i = size() - 1; i >= 0; --i) { |
376 | 25.0k | if (begin_[i] == c) return i; |
377 | 25.0k | } |
378 | 3 | return npos; |
379 | 2.09k | } |
380 | | String substr(size_t pos = 0, size_t n = npos) const |
381 | 11.3k | { |
382 | 11.3k | if (n == npos) |
383 | 0 | return String(begin_ + pos, size() - pos); |
384 | 11.3k | else |
385 | 11.3k | return String(begin_ + pos, n); |
386 | 11.3k | } |
387 | | // END FIXME |
388 | | |
389 | | unsigned short & at16(unsigned int pos) |
390 | 0 | {return reinterpret_cast<unsigned short &>(operator[](pos));} |
391 | | unsigned int & at32(unsigned int pos) |
392 | 0 | {return reinterpret_cast<unsigned int &>(operator[](pos));} |
393 | | |
394 | 0 | void write (char c) {append(c);} |
395 | 0 | void write (ParmStr str) {operator+=(str);} |
396 | 627k | void write (const void * str, unsigned int sz) {append(str,sz);} |
397 | | |
398 | | |
399 | 0 | String & operator << (ParmStr str) { |
400 | 0 | append(str); |
401 | 0 | return *this; |
402 | 0 | } |
403 | | |
404 | 0 | String & operator << (char c) { |
405 | 0 | append(c); |
406 | 0 | return *this; |
407 | 0 | } |
408 | | }; |
409 | | |
410 | | inline String operator+ (ParmStr lhs, ParmStr rhs) |
411 | 9.25k | { |
412 | 9.25k | String tmp; |
413 | 9.25k | tmp.reserve(lhs.size() + rhs.size()); |
414 | 9.25k | tmp += lhs; |
415 | 9.25k | tmp += rhs; |
416 | 9.25k | return tmp; |
417 | 9.25k | } |
418 | | |
419 | | inline bool operator== (const String & x, const String & y) |
420 | 293k | { |
421 | 293k | if (x.size() != y.size()) return false; |
422 | 107k | if (x.size() == 0) return true; |
423 | 103k | return memcmp(x.data(), y.data(), x.size()) == 0; |
424 | 107k | } |
425 | | inline bool operator== (const String & x, const char * y) |
426 | 8.67M | { |
427 | 8.67M | return strcmp(x.c_str(), y) == 0; |
428 | 8.67M | } |
429 | | inline bool operator== (const char * x, const String & y) |
430 | 0 | { |
431 | 0 | return strcmp(x, y.c_str()) == 0; |
432 | 0 | } |
433 | | inline bool operator== (const String & x, ParmStr y) |
434 | 17.1k | { |
435 | 17.1k | if (y == 0) return x.size() == 0; |
436 | 17.1k | return strcmp(x.c_str(), y) == 0; |
437 | 17.1k | } |
438 | | inline bool operator== (ParmStr x, const String & y) |
439 | 2.13k | { |
440 | 2.13k | if (x == 0) return y.size() == 0; |
441 | 2.13k | return strcmp(x, y.c_str()) == 0; |
442 | 2.13k | } |
443 | | |
444 | | inline bool operator!= (const String & x, const String & y) |
445 | 12.6k | { |
446 | 12.6k | return !(x == y); |
447 | 12.6k | } |
448 | | inline bool operator!= (const String & x, const char * y) |
449 | 2.05k | { |
450 | 2.05k | return strcmp(x.c_str(), y) != 0; |
451 | 2.05k | } |
452 | | inline bool operator!= (const char * x, const String & y) |
453 | 0 | { |
454 | 0 | return strcmp(x, y.c_str()) != 0; |
455 | 0 | } |
456 | | inline bool operator!= (const String & x, ParmStr y) |
457 | 0 | { |
458 | 0 | return !(x == y); |
459 | 0 | } |
460 | | inline bool operator!= (ParmStr x, const String & y) |
461 | 0 | { |
462 | 0 | return !(x == y); |
463 | 0 | } |
464 | | |
465 | 16.5M | inline ParmString::ParmString(const String & s) : str_(s.c_str()), size_(s.size()) {} |
466 | | |
467 | | class StringIStream : public IStream { |
468 | | const char * in_str; |
469 | | char delem; |
470 | | public: |
471 | | StringIStream(ParmStr s, char d = ';') |
472 | 153 | : IStream(d), in_str(s) {} |
473 | | bool append_line(String & str, char c); |
474 | | bool read(void * data, unsigned int size); |
475 | | }; |
476 | | |
477 | | template <> struct hash<String> : public HashString<String> {}; |
478 | | |
479 | | inline bool IStream::getline(String & str, char c) |
480 | 0 | { |
481 | 0 | str.clear(); |
482 | 0 | return append_line(str,c); |
483 | 0 | } |
484 | | |
485 | | inline bool IStream::getline(String & str) |
486 | 1.47M | { |
487 | 1.47M | str.clear(); |
488 | 1.47M | return append_line(str,delem); |
489 | 1.47M | } |
490 | | |
491 | | } |
492 | | |
493 | | namespace std |
494 | | { |
495 | 0 | template<> inline void swap(acommon::String & x, acommon::String & y) {return x.swap(y);} |
496 | | } |
497 | | |
498 | | #endif |