Coverage Report

Created: 2023-03-04 07:00

/src/icu/icu4c/source/common/unistr.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
* Copyright (C) 1999-2016, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
******************************************************************************
8
*
9
* File unistr.cpp
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   09/25/98    stephen     Creation.
15
*   04/20/99    stephen     Overhauled per 4/16 code review.
16
*   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17
*   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
18
*                           Replaceable.
19
*   06/25/01    grhoten     Removed the dependency on iostream
20
******************************************************************************
21
*/
22
23
#include "unicode/utypes.h"
24
#include "unicode/appendable.h"
25
#include "unicode/putil.h"
26
#include "cstring.h"
27
#include "cmemory.h"
28
#include "unicode/ustring.h"
29
#include "unicode/unistr.h"
30
#include "unicode/utf.h"
31
#include "unicode/utf16.h"
32
#include "uelement.h"
33
#include "ustr_imp.h"
34
#include "umutex.h"
35
#include "uassert.h"
36
37
#if 0
38
39
#include <iostream>
40
using namespace std;
41
42
//DEBUGGING
43
void
44
print(const UnicodeString& s,
45
      const char *name)
46
{
47
  char16_t c;
48
  cout << name << ":|";
49
  for(int i = 0; i < s.length(); ++i) {
50
    c = s[i];
51
    if(c>= 0x007E || c < 0x0020)
52
      cout << "[0x" << hex << s[i] << "]";
53
    else
54
      cout << (char) s[i];
55
  }
56
  cout << '|' << endl;
57
}
58
59
void
60
print(const char16_t *s,
61
      int32_t len,
62
      const char *name)
63
{
64
  char16_t c;
65
  cout << name << ":|";
66
  for(int i = 0; i < len; ++i) {
67
    c = s[i];
68
    if(c>= 0x007E || c < 0x0020)
69
      cout << "[0x" << hex << s[i] << "]";
70
    else
71
      cout << (char) s[i];
72
  }
73
  cout << '|' << endl;
74
}
75
// END DEBUGGING
76
#endif
77
78
// Local function definitions for now
79
80
// need to copy areas that may overlap
81
static
82
inline void
83
us_arrayCopy(const char16_t *src, int32_t srcStart,
84
         char16_t *dst, int32_t dstStart, int32_t count)
85
925M
{
86
925M
  if(count>0) {
87
899M
    uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
88
899M
  }
89
925M
}
90
91
// u_unescapeAt() callback to get a char16_t from a UnicodeString
92
U_CDECL_BEGIN
93
static char16_t U_CALLCONV
94
137k
UnicodeString_charAt(int32_t offset, void *context) {
95
137k
    return ((icu::UnicodeString*) context)->charAt(offset);
96
137k
}
97
U_CDECL_END
98
99
U_NAMESPACE_BEGIN
100
101
/* The Replaceable virtual destructor can't be defined in the header
102
   due to how AIX works with multiple definitions of virtual functions.
103
*/
104
525M
Replaceable::~Replaceable() {}
105
106
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
107
108
UnicodeString U_EXPORT2
109
39.6k
operator+ (const UnicodeString &s1, const UnicodeString &s2) {
110
39.6k
    return
111
39.6k
        UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
112
39.6k
            append(s1).
113
39.6k
                append(s2);
114
39.6k
}
115
116
//========================================
117
// Reference Counting functions, put at top of file so that optimizing compilers
118
//                               have a chance to automatically inline.
119
//========================================
120
121
void
122
85.9M
UnicodeString::addRef() {
123
85.9M
  umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
124
85.9M
}
125
126
int32_t
127
94.9M
UnicodeString::removeRef() {
128
94.9M
  return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
129
94.9M
}
130
131
int32_t
132
1.58G
UnicodeString::refCount() const {
133
1.58G
  return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
134
1.58G
}
135
136
void
137
713M
UnicodeString::releaseArray() {
138
713M
  if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
139
9.77M
    uprv_free((int32_t *)fUnion.fFields.fArray - 1);
140
9.77M
  }
141
713M
}
142
143
144
145
//========================================
146
// Constructors
147
//========================================
148
149
// The default constructor is inline in unistr.h.
150
151
39.6k
UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
152
39.6k
  fUnion.fFields.fLengthAndFlags = 0;
153
39.6k
  if(count <= 0 || (uint32_t)c > 0x10ffff) {
154
    // just allocate and do not do anything else
155
39.6k
    allocate(capacity);
156
39.6k
  } else if(c <= 0xffff) {
157
0
    int32_t length = count;
158
0
    if(capacity < length) {
159
0
      capacity = length;
160
0
    }
161
0
    if(allocate(capacity)) {
162
0
      char16_t *array = getArrayStart();
163
0
      char16_t unit = (char16_t)c;
164
0
      for(int32_t i = 0; i < length; ++i) {
165
0
        array[i] = unit;
166
0
      }
167
0
      setLength(length);
168
0
    }
169
0
  } else {  // supplementary code point, write surrogate pairs
170
0
    if(count > (INT32_MAX / 2)) {
171
      // We would get more than 2G UChars.
172
0
      allocate(capacity);
173
0
      return;
174
0
    }
175
0
    int32_t length = count * 2;
176
0
    if(capacity < length) {
177
0
      capacity = length;
178
0
    }
179
0
    if(allocate(capacity)) {
180
0
      char16_t *array = getArrayStart();
181
0
      char16_t lead = U16_LEAD(c);
182
0
      char16_t trail = U16_TRAIL(c);
183
0
      for(int32_t i = 0; i < length; i += 2) {
184
0
        array[i] = lead;
185
0
        array[i + 1] = trail;
186
0
      }
187
0
      setLength(length);
188
0
    }
189
0
  }
190
39.6k
}
191
192
196k
UnicodeString::UnicodeString(char16_t ch) {
193
196k
  fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
194
196k
  fUnion.fStackFields.fBuffer[0] = ch;
195
196k
}
196
197
18.9M
UnicodeString::UnicodeString(UChar32 ch) {
198
18.9M
  fUnion.fFields.fLengthAndFlags = kShortString;
199
18.9M
  int32_t i = 0;
200
18.9M
  UBool isError = false;
201
18.9M
  U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
202
  // We test isError so that the compiler does not complain that we don't.
203
  // If isError then i==0 which is what we want anyway.
204
18.9M
  if(!isError) {
205
18.9M
    setShortLength(i);
206
18.9M
  }
207
18.9M
}
208
209
14.1k
UnicodeString::UnicodeString(const char16_t *text) {
210
14.1k
  fUnion.fFields.fLengthAndFlags = kShortString;
211
14.1k
  doAppend(text, 0, -1);
212
14.1k
}
213
214
UnicodeString::UnicodeString(const char16_t *text,
215
6.10M
                             int32_t textLength) {
216
6.10M
  fUnion.fFields.fLengthAndFlags = kShortString;
217
6.10M
  doAppend(text, 0, textLength);
218
6.10M
}
219
220
UnicodeString::UnicodeString(UBool isTerminated,
221
                             ConstChar16Ptr textPtr,
222
261M
                             int32_t textLength) {
223
261M
  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
224
261M
  const char16_t *text = textPtr;
225
261M
  if(text == nullptr) {
226
    // treat as an empty string, do not alias
227
0
    setToEmpty();
228
261M
  } else if(textLength < -1 ||
229
261M
            (textLength == -1 && !isTerminated) ||
230
261M
            (textLength >= 0 && isTerminated && text[textLength] != 0)
231
261M
  ) {
232
0
    setToBogus();
233
261M
  } else {
234
261M
    if(textLength == -1) {
235
      // text is terminated, or else it would have failed the above test
236
59.8k
      textLength = u_strlen(text);
237
59.8k
    }
238
261M
    setArray(const_cast<char16_t *>(text), textLength,
239
261M
             isTerminated ? textLength + 1 : textLength);
240
261M
  }
241
261M
}
242
243
UnicodeString::UnicodeString(char16_t *buff,
244
                             int32_t buffLength,
245
0
                             int32_t buffCapacity) {
246
0
  fUnion.fFields.fLengthAndFlags = kWritableAlias;
247
0
  if(buff == nullptr) {
248
    // treat as an empty string, do not alias
249
0
    setToEmpty();
250
0
  } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
251
0
    setToBogus();
252
0
  } else {
253
0
    if(buffLength == -1) {
254
      // fLength = u_strlen(buff); but do not look beyond buffCapacity
255
0
      const char16_t *p = buff, *limit = buff + buffCapacity;
256
0
      while(p != limit && *p != 0) {
257
0
        ++p;
258
0
      }
259
0
      buffLength = (int32_t)(p - buff);
260
0
    }
261
0
    setArray(buff, buffLength, buffCapacity);
262
0
  }
263
0
}
264
265
12.5k
UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
266
12.5k
  fUnion.fFields.fLengthAndFlags = kShortString;
267
12.5k
  if(src==nullptr) {
268
    // treat as an empty string
269
12.5k
  } else {
270
12.5k
    if(length<0) {
271
3.49k
      length=(int32_t)uprv_strlen(src);
272
3.49k
    }
273
12.5k
    if(cloneArrayIfNeeded(length, length, false)) {
274
12.5k
      u_charsToUChars(src, getArrayStart(), length);
275
12.5k
      setLength(length);
276
12.5k
    } else {
277
0
      setToBogus();
278
0
    }
279
12.5k
  }
280
12.5k
}
281
282
#if U_CHARSET_IS_UTF8
283
284
0
UnicodeString::UnicodeString(const char *codepageData) {
285
0
  fUnion.fFields.fLengthAndFlags = kShortString;
286
0
  if(codepageData != 0) {
287
0
    setToUTF8(codepageData);
288
0
  }
289
0
}
290
291
0
UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
292
0
  fUnion.fFields.fLengthAndFlags = kShortString;
293
  // if there's nothing to convert, do nothing
294
0
  if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
295
0
    return;
296
0
  }
297
0
  if(dataLength == -1) {
298
0
    dataLength = (int32_t)uprv_strlen(codepageData);
299
0
  }
300
0
  setToUTF8(StringPiece(codepageData, dataLength));
301
0
}
302
303
// else see unistr_cnv.cpp
304
#endif
305
306
165M
UnicodeString::UnicodeString(const UnicodeString& that) {
307
165M
  fUnion.fFields.fLengthAndFlags = kShortString;
308
165M
  copyFrom(that);
309
165M
}
310
311
0
UnicodeString::UnicodeString(UnicodeString &&src) noexcept {
312
0
  copyFieldsFrom(src, true);
313
0
}
314
315
UnicodeString::UnicodeString(const UnicodeString& that,
316
11.7M
                             int32_t srcStart) {
317
11.7M
  fUnion.fFields.fLengthAndFlags = kShortString;
318
11.7M
  setTo(that, srcStart);
319
11.7M
}
320
321
UnicodeString::UnicodeString(const UnicodeString& that,
322
                             int32_t srcStart,
323
599k
                             int32_t srcLength) {
324
599k
  fUnion.fFields.fLengthAndFlags = kShortString;
325
599k
  setTo(that, srcStart, srcLength);
326
599k
}
327
328
// Replaceable base class clone() default implementation, does not clone
329
Replaceable *
330
0
Replaceable::clone() const {
331
0
  return nullptr;
332
0
}
333
334
// UnicodeString overrides clone() with a real implementation
335
UnicodeString *
336
0
UnicodeString::clone() const {
337
0
  LocalPointer<UnicodeString> clonedString(new UnicodeString(*this));
338
0
  return clonedString.isValid() && !clonedString->isBogus() ? clonedString.orphan() : nullptr;
339
0
}
340
341
//========================================
342
// array allocation
343
//========================================
344
345
namespace {
346
347
const int32_t kGrowSize = 128;
348
349
// The number of bytes for one int32_t reference counter and capacity UChars
350
// must fit into a 32-bit size_t (at least when on a 32-bit platform).
351
// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
352
// and round up to a multiple of 16 bytes.
353
// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
354
// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
355
// but that does not seem worth it.)
356
const int32_t kMaxCapacity = 0x7ffffff5;
357
358
19.1M
int32_t getGrowCapacity(int32_t newLength) {
359
19.1M
  int32_t growSize = (newLength >> 2) + kGrowSize;
360
19.1M
  if(growSize <= (kMaxCapacity - newLength)) {
361
19.1M
    return newLength + growSize;
362
19.1M
  } else {
363
0
    return kMaxCapacity;
364
0
  }
365
19.1M
}
366
367
}  // namespace
368
369
UBool
370
12.5M
UnicodeString::allocate(int32_t capacity) {
371
12.5M
  if(capacity <= US_STACKBUF_SIZE) {
372
1.85M
    fUnion.fFields.fLengthAndFlags = kShortString;
373
1.85M
    return true;
374
1.85M
  }
375
10.7M
  if(capacity <= kMaxCapacity) {
376
10.7M
    ++capacity;  // for the NUL
377
    // Switch to size_t which is unsigned so that we can allocate up to 4GB.
378
    // Reference counter + UChars.
379
10.7M
    size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
380
    // Round up to a multiple of 16.
381
10.7M
    numBytes = (numBytes + 15) & ~15;
382
10.7M
    int32_t *array = (int32_t *) uprv_malloc(numBytes);
383
10.7M
    if(array != nullptr) {
384
      // set initial refCount and point behind the refCount
385
10.7M
      *array++ = 1;
386
10.7M
      numBytes -= sizeof(int32_t);
387
388
      // have fArray point to the first char16_t
389
10.7M
      fUnion.fFields.fArray = (char16_t *)array;
390
10.7M
      fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
391
10.7M
      fUnion.fFields.fLengthAndFlags = kLongString;
392
10.7M
      return true;
393
10.7M
    }
394
10.7M
  }
395
0
  fUnion.fFields.fLengthAndFlags = kIsBogus;
396
0
  fUnion.fFields.fArray = 0;
397
0
  fUnion.fFields.fCapacity = 0;
398
0
  return false;
399
10.7M
}
400
401
//========================================
402
// Destructor
403
//========================================
404
405
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
406
static u_atomic_int32_t finalLengthCounts[0x400];  // UnicodeString::kMaxShortLength+1
407
static u_atomic_int32_t beyondCount(0);
408
409
U_CAPI void unistr_printLengths() {
410
  int32_t i;
411
  for(i = 0; i <= 59; ++i) {
412
    printf("%2d,  %9d\n", i, (int32_t)finalLengthCounts[i]);
413
  }
414
  int32_t beyond = beyondCount;
415
  for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
416
    beyond += finalLengthCounts[i];
417
  }
418
  printf(">59, %9d\n", beyond);
419
}
420
#endif
421
422
UnicodeString::~UnicodeString()
423
525M
{
424
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
425
  // Count lengths of strings at the end of their lifetime.
426
  // Useful for discussion of a desirable stack buffer size.
427
  // Count the contents length, not the optional NUL terminator nor further capacity.
428
  // Ignore open-buffer strings and strings which alias external storage.
429
  if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
430
    if(hasShortLength()) {
431
      umtx_atomic_inc(finalLengthCounts + getShortLength());
432
    } else {
433
      umtx_atomic_inc(&beyondCount);
434
    }
435
  }
436
#endif
437
438
525M
  releaseArray();
439
525M
}
440
441
//========================================
442
// Factory methods
443
//========================================
444
445
8.24k
UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
446
8.24k
  UnicodeString result;
447
8.24k
  result.setToUTF8(utf8);
448
8.24k
  return result;
449
8.24k
}
450
451
0
UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
452
0
  UnicodeString result;
453
0
  int32_t capacity;
454
  // Most UTF-32 strings will be BMP-only and result in a same-length
455
  // UTF-16 string. We overestimate the capacity just slightly,
456
  // just in case there are a few supplementary characters.
457
0
  if(length <= US_STACKBUF_SIZE) {
458
0
    capacity = US_STACKBUF_SIZE;
459
0
  } else {
460
0
    capacity = length + (length >> 4) + 4;
461
0
  }
462
0
  do {
463
0
    char16_t *utf16 = result.getBuffer(capacity);
464
0
    int32_t length16;
465
0
    UErrorCode errorCode = U_ZERO_ERROR;
466
0
    u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
467
0
        utf32, length,
468
0
        0xfffd,  // Substitution character.
469
0
        nullptr,    // Don't care about number of substitutions.
470
0
        &errorCode);
471
0
    result.releaseBuffer(length16);
472
0
    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
473
0
      capacity = length16 + 1;  // +1 for the terminating NUL.
474
0
      continue;
475
0
    } else if(U_FAILURE(errorCode)) {
476
0
      result.setToBogus();
477
0
    }
478
0
    break;
479
0
  } while(true);
480
0
  return result;
481
0
}
482
483
//========================================
484
// Assignment
485
//========================================
486
487
UnicodeString &
488
6.64M
UnicodeString::operator=(const UnicodeString &src) {
489
6.64M
  return copyFrom(src);
490
6.64M
}
491
492
UnicodeString &
493
11.6M
UnicodeString::fastCopyFrom(const UnicodeString &src) {
494
11.6M
  return copyFrom(src, true);
495
11.6M
}
496
497
UnicodeString &
498
183M
UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
499
  // if assigning to ourselves, do nothing
500
183M
  if(this == &src) {
501
0
    return *this;
502
0
  }
503
504
  // is the right side bogus?
505
183M
  if(src.isBogus()) {
506
2.40M
    setToBogus();
507
2.40M
    return *this;
508
2.40M
  }
509
510
  // delete the current contents
511
180M
  releaseArray();
512
513
180M
  if(src.isEmpty()) {
514
    // empty string - use the stack buffer
515
1.09M
    setToEmpty();
516
1.09M
    return *this;
517
1.09M
  }
518
519
  // fLength>0 and not an "open" src.getBuffer(minCapacity)
520
179M
  fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
521
179M
  switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
522
80.6M
  case kShortString:
523
    // short string using the stack buffer, do the same
524
80.6M
    uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
525
80.6M
                getShortLength() * U_SIZEOF_UCHAR);
526
80.6M
    break;
527
85.9M
  case kLongString:
528
    // src uses a refCounted string buffer, use that buffer with refCount
529
    // src is const, use a cast - we don't actually change it
530
85.9M
    const_cast<UnicodeString &>(src).addRef();
531
    // copy all fields, share the reference-counted buffer
532
85.9M
    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
533
85.9M
    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
534
85.9M
    if(!hasShortLength()) {
535
70.8k
      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
536
70.8k
    }
537
85.9M
    break;
538
13.2M
  case kReadonlyAlias:
539
13.2M
    if(fastCopy) {
540
      // src is a readonly alias, do the same
541
      // -> maintain the readonly alias as such
542
11.4M
      fUnion.fFields.fArray = src.fUnion.fFields.fArray;
543
11.4M
      fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
544
11.4M
      if(!hasShortLength()) {
545
653k
        fUnion.fFields.fLength = src.fUnion.fFields.fLength;
546
653k
      }
547
11.4M
      break;
548
11.4M
    }
549
    // else if(!fastCopy) fall through to case kWritableAlias
550
    // -> allocate a new buffer and copy the contents
551
13.2M
    U_FALLTHROUGH;
552
1.81M
  case kWritableAlias: {
553
    // src is a writable alias; we make a copy of that instead
554
1.81M
    int32_t srcLength = src.length();
555
1.81M
    if(allocate(srcLength)) {
556
1.81M
      u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
557
1.81M
      setLength(srcLength);
558
1.81M
      break;
559
1.81M
    }
560
    // if there is not enough memory, then fall through to setting to bogus
561
1.81M
    U_FALLTHROUGH;
562
0
  }
563
0
  default:
564
    // if src is bogus, set ourselves to bogus
565
    // do not call setToBogus() here because fArray and flags are not consistent here
566
0
    fUnion.fFields.fLengthAndFlags = kIsBogus;
567
0
    fUnion.fFields.fArray = 0;
568
0
    fUnion.fFields.fCapacity = 0;
569
0
    break;
570
179M
  }
571
572
179M
  return *this;
573
179M
}
574
575
449k
UnicodeString &UnicodeString::operator=(UnicodeString &&src) noexcept {
576
  // No explicit check for self move assignment, consistent with standard library.
577
  // Self move assignment causes no crash nor leak but might make the object bogus.
578
449k
  releaseArray();
579
449k
  copyFieldsFrom(src, true);
580
449k
  return *this;
581
449k
}
582
583
// Same as move assignment except without memory management.
584
449k
void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept {
585
449k
  int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
586
449k
  if(lengthAndFlags & kUsingStackBuffer) {
587
    // Short string using the stack buffer, copy the contents.
588
    // Check for self assignment to prevent "overlap in memcpy" warnings,
589
    // although it should be harmless to copy a buffer to itself exactly.
590
387k
    if(this != &src) {
591
387k
      uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
592
387k
                  getShortLength() * U_SIZEOF_UCHAR);
593
387k
    }
594
387k
  } else {
595
    // In all other cases, copy all fields.
596
61.2k
    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
597
61.2k
    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
598
61.2k
    if(!hasShortLength()) {
599
12.1k
      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
600
12.1k
    }
601
61.2k
    if(setSrcToBogus) {
602
      // Set src to bogus without releasing any memory.
603
61.2k
      src.fUnion.fFields.fLengthAndFlags = kIsBogus;
604
61.2k
      src.fUnion.fFields.fArray = nullptr;
605
61.2k
      src.fUnion.fFields.fCapacity = 0;
606
61.2k
    }
607
61.2k
  }
608
449k
}
609
610
0
void UnicodeString::swap(UnicodeString &other) noexcept {
611
0
  UnicodeString temp;  // Empty short string: Known not to need releaseArray().
612
  // Copy fields without resetting source values in between.
613
0
  temp.copyFieldsFrom(*this, false);
614
0
  this->copyFieldsFrom(other, false);
615
0
  other.copyFieldsFrom(temp, false);
616
  // Set temp to an empty string so that other's memory is not released twice.
617
0
  temp.fUnion.fFields.fLengthAndFlags = kShortString;
618
0
}
619
620
//========================================
621
// Miscellaneous operations
622
//========================================
623
624
0
UnicodeString UnicodeString::unescape() const {
625
0
    UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
626
0
    if (result.isBogus()) {
627
0
        return result;
628
0
    }
629
0
    const char16_t *array = getBuffer();
630
0
    int32_t len = length();
631
0
    int32_t prev = 0;
632
0
    for (int32_t i=0;;) {
633
0
        if (i == len) {
634
0
            result.append(array, prev, len - prev);
635
0
            break;
636
0
        }
637
0
        if (array[i++] == 0x5C /*'\\'*/) {
638
0
            result.append(array, prev, (i - 1) - prev);
639
0
            UChar32 c = unescapeAt(i); // advances i
640
0
            if (c < 0) {
641
0
                result.remove(); // return empty string
642
0
                break; // invalid escape sequence
643
0
            }
644
0
            result.append(c);
645
0
            prev = i;
646
0
        }
647
0
    }
648
0
    return result;
649
0
}
650
651
122k
UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
652
122k
    return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
653
122k
}
654
655
//========================================
656
// Read-only implementation
657
//========================================
658
UBool
659
108M
UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
660
  // Requires: this & text not bogus and have same lengths.
661
  // Byte-wise comparison works for equality regardless of endianness.
662
108M
  return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
663
108M
}
664
665
int8_t
666
UnicodeString::doCompare( int32_t start,
667
              int32_t length,
668
              const char16_t *srcChars,
669
              int32_t srcStart,
670
              int32_t srcLength) const
671
114M
{
672
  // compare illegal string values
673
114M
  if(isBogus()) {
674
0
    return -1;
675
0
  }
676
  
677
  // pin indices to legal values
678
114M
  pinIndices(start, length);
679
680
114M
  if(srcChars == nullptr) {
681
    // treat const char16_t *srcChars==nullptr as an empty string
682
0
    return length == 0 ? 0 : 1;
683
0
  }
684
685
  // get the correct pointer
686
114M
  const char16_t *chars = getArrayStart();
687
688
114M
  chars += start;
689
114M
  srcChars += srcStart;
690
691
114M
  int32_t minLength;
692
114M
  int8_t lengthResult;
693
694
  // get the srcLength if necessary
695
114M
  if(srcLength < 0) {
696
15.0k
    srcLength = u_strlen(srcChars + srcStart);
697
15.0k
  }
698
699
  // are we comparing different lengths?
700
114M
  if(length != srcLength) {
701
52.6M
    if(length < srcLength) {
702
25.2M
      minLength = length;
703
25.2M
      lengthResult = -1;
704
27.3M
    } else {
705
27.3M
      minLength = srcLength;
706
27.3M
      lengthResult = 1;
707
27.3M
    }
708
62.2M
  } else {
709
62.2M
    minLength = length;
710
62.2M
    lengthResult = 0;
711
62.2M
  }
712
713
  /*
714
   * note that uprv_memcmp() returns an int but we return an int8_t;
715
   * we need to take care not to truncate the result -
716
   * one way to do this is to right-shift the value to
717
   * move the sign bit into the lower 8 bits and making sure that this
718
   * does not become 0 itself
719
   */
720
721
114M
  if(minLength > 0 && chars != srcChars) {
722
111M
    int32_t result;
723
724
#   if U_IS_BIG_ENDIAN 
725
      // big-endian: byte comparison works
726
      result = uprv_memcmp(chars, srcChars, minLength * sizeof(char16_t));
727
      if(result != 0) {
728
        return (int8_t)(result >> 15 | 1);
729
      }
730
#   else
731
      // little-endian: compare char16_t units
732
7.48G
      do {
733
7.48G
        result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
734
7.48G
        if(result != 0) {
735
99.4M
          return (int8_t)(result >> 15 | 1);
736
99.4M
        }
737
7.48G
      } while(--minLength > 0);
738
111M
#   endif
739
111M
  }
740
15.3M
  return lengthResult;
741
114M
}
742
743
/* String compare in code point order - doCompare() compares in code unit order. */
744
int8_t
745
UnicodeString::doCompareCodePointOrder(int32_t start,
746
                                       int32_t length,
747
                                       const char16_t *srcChars,
748
                                       int32_t srcStart,
749
                                       int32_t srcLength) const
750
0
{
751
  // compare illegal string values
752
  // treat const char16_t *srcChars==nullptr as an empty string
753
0
  if(isBogus()) {
754
0
    return -1;
755
0
  }
756
757
  // pin indices to legal values
758
0
  pinIndices(start, length);
759
760
0
  if(srcChars == nullptr) {
761
0
    srcStart = srcLength = 0;
762
0
  }
763
764
0
  int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=nullptr)?(srcChars + srcStart):nullptr, srcLength, false, true);
765
  /* translate the 32-bit result into an 8-bit one */
766
0
  if(diff!=0) {
767
0
    return (int8_t)(diff >> 15 | 1);
768
0
  } else {
769
0
    return 0;
770
0
  }
771
0
}
772
773
int32_t
774
0
UnicodeString::getLength() const {
775
0
    return length();
776
0
}
777
778
char16_t
779
0
UnicodeString::getCharAt(int32_t offset) const {
780
0
  return charAt(offset);
781
0
}
782
783
UChar32
784
0
UnicodeString::getChar32At(int32_t offset) const {
785
0
  return char32At(offset);
786
0
}
787
788
UChar32
789
UnicodeString::char32At(int32_t offset) const
790
1.35G
{
791
1.35G
  int32_t len = length();
792
1.35G
  if((uint32_t)offset < (uint32_t)len) {
793
1.35G
    const char16_t *array = getArrayStart();
794
1.35G
    UChar32 c;
795
1.35G
    U16_GET(array, 0, offset, len, c);
796
1.35G
    return c;
797
1.35G
  } else {
798
0
    return kInvalidUChar;
799
0
  }
800
1.35G
}
801
802
int32_t
803
0
UnicodeString::getChar32Start(int32_t offset) const {
804
0
  if((uint32_t)offset < (uint32_t)length()) {
805
0
    const char16_t *array = getArrayStart();
806
0
    U16_SET_CP_START(array, 0, offset);
807
0
    return offset;
808
0
  } else {
809
0
    return 0;
810
0
  }
811
0
}
812
813
int32_t
814
0
UnicodeString::getChar32Limit(int32_t offset) const {
815
0
  int32_t len = length();
816
0
  if((uint32_t)offset < (uint32_t)len) {
817
0
    const char16_t *array = getArrayStart();
818
0
    U16_SET_CP_LIMIT(array, 0, offset, len);
819
0
    return offset;
820
0
  } else {
821
0
    return len;
822
0
  }
823
0
}
824
825
int32_t
826
11.6M
UnicodeString::countChar32(int32_t start, int32_t length) const {
827
11.6M
  pinIndices(start, length);
828
  // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for nullptr
829
11.6M
  return u_countChar32(getArrayStart()+start, length);
830
11.6M
}
831
832
UBool
833
0
UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
834
0
  pinIndices(start, length);
835
  // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for nullptr
836
0
  return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
837
0
}
838
839
int32_t
840
321M
UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
841
  // pin index
842
321M
  int32_t len = length();
843
321M
  if(index<0) {
844
0
    index=0;
845
321M
  } else if(index>len) {
846
0
    index=len;
847
0
  }
848
849
321M
  const char16_t *array = getArrayStart();
850
321M
  if(delta>0) {
851
309M
    U16_FWD_N(array, index, len, delta);
852
309M
  } else {
853
11.6M
    U16_BACK_N(array, 0, index, -delta);
854
11.6M
  }
855
856
321M
  return index;
857
321M
}
858
859
void
860
UnicodeString::doExtract(int32_t start,
861
             int32_t length,
862
             char16_t *dst,
863
             int32_t dstStart) const
864
0
{
865
  // pin indices to legal values
866
0
  pinIndices(start, length);
867
868
  // do not copy anything if we alias dst itself
869
0
  const char16_t *array = getArrayStart();
870
0
  if(array + start != dst + dstStart) {
871
0
    us_arrayCopy(array, start, dst, dstStart, length);
872
0
  }
873
0
}
874
875
int32_t
876
UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
877
5.26M
                       UErrorCode &errorCode) const {
878
5.26M
  int32_t len = length();
879
5.26M
  if(U_SUCCESS(errorCode)) {
880
5.26M
    if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
881
0
      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
882
5.26M
    } else {
883
5.26M
      const char16_t *array = getArrayStart();
884
5.26M
      if(len>0 && len<=destCapacity && array!=dest) {
885
5.26M
        u_memcpy(dest, array, len);
886
5.26M
      }
887
5.26M
      return u_terminateUChars(dest, destCapacity, len, &errorCode);
888
5.26M
    }
889
5.26M
  }
890
891
0
  return len;
892
5.26M
}
893
894
int32_t
895
UnicodeString::extract(int32_t start,
896
                       int32_t length,
897
                       char *target,
898
                       int32_t targetCapacity,
899
                       enum EInvariant) const
900
2.98k
{
901
  // if the arguments are illegal, then do nothing
902
2.98k
  if(targetCapacity < 0 || (targetCapacity > 0 && target == nullptr)) {
903
0
    return 0;
904
0
  }
905
906
  // pin the indices to legal values
907
2.98k
  pinIndices(start, length);
908
909
2.98k
  if(length <= targetCapacity) {
910
2.98k
    u_UCharsToChars(getArrayStart() + start, target, length);
911
2.98k
  }
912
2.98k
  UErrorCode status = U_ZERO_ERROR;
913
2.98k
  return u_terminateChars(target, targetCapacity, length, &status);
914
2.98k
}
915
916
UnicodeString
917
258M
UnicodeString::tempSubString(int32_t start, int32_t len) const {
918
258M
  pinIndices(start, len);
919
258M
  const char16_t *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
920
258M
  if(array==nullptr) {
921
0
    array=fUnion.fStackFields.fBuffer;  // anything not nullptr because that would make an empty string
922
0
    len=-2;  // bogus result string
923
0
  }
924
258M
  return UnicodeString(false, array + start, len);
925
258M
}
926
927
int32_t
928
UnicodeString::toUTF8(int32_t start, int32_t len,
929
0
                      char *target, int32_t capacity) const {
930
0
  pinIndices(start, len);
931
0
  int32_t length8;
932
0
  UErrorCode errorCode = U_ZERO_ERROR;
933
0
  u_strToUTF8WithSub(target, capacity, &length8,
934
0
                     getBuffer() + start, len,
935
0
                     0xFFFD,  // Standard substitution character.
936
0
                     nullptr,    // Don't care about number of substitutions.
937
0
                     &errorCode);
938
0
  return length8;
939
0
}
940
941
#if U_CHARSET_IS_UTF8
942
943
int32_t
944
UnicodeString::extract(int32_t start, int32_t len,
945
0
                       char *target, uint32_t dstSize) const {
946
  // if the arguments are illegal, then do nothing
947
0
  if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
948
0
    return 0;
949
0
  }
950
0
  return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
951
0
}
952
953
// else see unistr_cnv.cpp
954
#endif
955
956
void 
957
UnicodeString::extractBetween(int32_t start,
958
                  int32_t limit,
959
139k
                  UnicodeString& target) const {
960
139k
  pinIndex(start);
961
139k
  pinIndex(limit);
962
139k
  doExtract(start, limit - start, target);
963
139k
}
964
965
// When converting from UTF-16 to UTF-8, the result will have at most 3 times
966
// as many bytes as the source has UChars.
967
// The "worst cases" are writing systems like Indic, Thai and CJK with
968
// 3:1 bytes:UChars.
969
void
970
0
UnicodeString::toUTF8(ByteSink &sink) const {
971
0
  int32_t length16 = length();
972
0
  if(length16 != 0) {
973
0
    char stackBuffer[1024];
974
0
    int32_t capacity = (int32_t)sizeof(stackBuffer);
975
0
    UBool utf8IsOwned = false;
976
0
    char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
977
0
                                      3*length16,
978
0
                                      stackBuffer, capacity,
979
0
                                      &capacity);
980
0
    int32_t length8 = 0;
981
0
    UErrorCode errorCode = U_ZERO_ERROR;
982
0
    u_strToUTF8WithSub(utf8, capacity, &length8,
983
0
                       getBuffer(), length16,
984
0
                       0xFFFD,  // Standard substitution character.
985
0
                       nullptr,    // Don't care about number of substitutions.
986
0
                       &errorCode);
987
0
    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
988
0
      utf8 = (char *)uprv_malloc(length8);
989
0
      if(utf8 != nullptr) {
990
0
        utf8IsOwned = true;
991
0
        errorCode = U_ZERO_ERROR;
992
0
        u_strToUTF8WithSub(utf8, length8, &length8,
993
0
                           getBuffer(), length16,
994
0
                           0xFFFD,  // Standard substitution character.
995
0
                           nullptr,    // Don't care about number of substitutions.
996
0
                           &errorCode);
997
0
      } else {
998
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
999
0
      }
1000
0
    }
1001
0
    if(U_SUCCESS(errorCode)) {
1002
0
      sink.Append(utf8, length8);
1003
0
      sink.Flush();
1004
0
    }
1005
0
    if(utf8IsOwned) {
1006
0
      uprv_free(utf8);
1007
0
    }
1008
0
  }
1009
0
}
1010
1011
int32_t
1012
0
UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
1013
0
  int32_t length32=0;
1014
0
  if(U_SUCCESS(errorCode)) {
1015
    // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1016
0
    u_strToUTF32WithSub(utf32, capacity, &length32,
1017
0
        getBuffer(), length(),
1018
0
        0xfffd,  // Substitution character.
1019
0
        nullptr,    // Don't care about number of substitutions.
1020
0
        &errorCode);
1021
0
  }
1022
0
  return length32;
1023
0
}
1024
1025
int32_t 
1026
UnicodeString::indexOf(const char16_t *srcChars,
1027
               int32_t srcStart,
1028
               int32_t srcLength,
1029
               int32_t start,
1030
               int32_t length) const
1031
507k
{
1032
507k
  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1033
0
    return -1;
1034
0
  }
1035
1036
  // UnicodeString does not find empty substrings
1037
507k
  if(srcLength < 0 && srcChars[srcStart] == 0) {
1038
0
    return -1;
1039
0
  }
1040
1041
  // get the indices within bounds
1042
507k
  pinIndices(start, length);
1043
1044
  // find the first occurrence of the substring
1045
507k
  const char16_t *array = getArrayStart();
1046
507k
  const char16_t *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
1047
507k
  if(match == nullptr) {
1048
213k
    return -1;
1049
293k
  } else {
1050
293k
    return (int32_t)(match - array);
1051
293k
  }
1052
507k
}
1053
1054
int32_t
1055
UnicodeString::doIndexOf(char16_t c,
1056
             int32_t start,
1057
             int32_t length) const
1058
215k
{
1059
  // pin indices
1060
215k
  pinIndices(start, length);
1061
1062
  // find the first occurrence of c
1063
215k
  const char16_t *array = getArrayStart();
1064
215k
  const char16_t *match = u_memchr(array + start, c, length);
1065
215k
  if(match == nullptr) {
1066
64.4k
    return -1;
1067
150k
  } else {
1068
150k
    return (int32_t)(match - array);
1069
150k
  }
1070
215k
}
1071
1072
int32_t
1073
UnicodeString::doIndexOf(UChar32 c,
1074
                         int32_t start,
1075
0
                         int32_t length) const {
1076
  // pin indices
1077
0
  pinIndices(start, length);
1078
1079
  // find the first occurrence of c
1080
0
  const char16_t *array = getArrayStart();
1081
0
  const char16_t *match = u_memchr32(array + start, c, length);
1082
0
  if(match == nullptr) {
1083
0
    return -1;
1084
0
  } else {
1085
0
    return (int32_t)(match - array);
1086
0
  }
1087
0
}
1088
1089
int32_t 
1090
UnicodeString::lastIndexOf(const char16_t *srcChars,
1091
               int32_t srcStart,
1092
               int32_t srcLength,
1093
               int32_t start,
1094
               int32_t length) const
1095
0
{
1096
0
  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1097
0
    return -1;
1098
0
  }
1099
1100
  // UnicodeString does not find empty substrings
1101
0
  if(srcLength < 0 && srcChars[srcStart] == 0) {
1102
0
    return -1;
1103
0
  }
1104
1105
  // get the indices within bounds
1106
0
  pinIndices(start, length);
1107
1108
  // find the last occurrence of the substring
1109
0
  const char16_t *array = getArrayStart();
1110
0
  const char16_t *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1111
0
  if(match == nullptr) {
1112
0
    return -1;
1113
0
  } else {
1114
0
    return (int32_t)(match - array);
1115
0
  }
1116
0
}
1117
1118
int32_t
1119
UnicodeString::doLastIndexOf(char16_t c,
1120
                 int32_t start,
1121
                 int32_t length) const
1122
3.81k
{
1123
3.81k
  if(isBogus()) {
1124
0
    return -1;
1125
0
  }
1126
1127
  // pin indices
1128
3.81k
  pinIndices(start, length);
1129
1130
  // find the last occurrence of c
1131
3.81k
  const char16_t *array = getArrayStart();
1132
3.81k
  const char16_t *match = u_memrchr(array + start, c, length);
1133
3.81k
  if(match == nullptr) {
1134
65
    return -1;
1135
3.75k
  } else {
1136
3.75k
    return (int32_t)(match - array);
1137
3.75k
  }
1138
3.81k
}
1139
1140
int32_t
1141
UnicodeString::doLastIndexOf(UChar32 c,
1142
                             int32_t start,
1143
0
                             int32_t length) const {
1144
  // pin indices
1145
0
  pinIndices(start, length);
1146
1147
  // find the last occurrence of c
1148
0
  const char16_t *array = getArrayStart();
1149
0
  const char16_t *match = u_memrchr32(array + start, c, length);
1150
0
  if(match == nullptr) {
1151
0
    return -1;
1152
0
  } else {
1153
0
    return (int32_t)(match - array);
1154
0
  }
1155
0
}
1156
1157
//========================================
1158
// Write implementation
1159
//========================================
1160
1161
UnicodeString& 
1162
UnicodeString::findAndReplace(int32_t start,
1163
                  int32_t length,
1164
                  const UnicodeString& oldText,
1165
                  int32_t oldStart,
1166
                  int32_t oldLength,
1167
                  const UnicodeString& newText,
1168
                  int32_t newStart,
1169
                  int32_t newLength)
1170
0
{
1171
0
  if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1172
0
    return *this;
1173
0
  }
1174
1175
0
  pinIndices(start, length);
1176
0
  oldText.pinIndices(oldStart, oldLength);
1177
0
  newText.pinIndices(newStart, newLength);
1178
1179
0
  if(oldLength == 0) {
1180
0
    return *this;
1181
0
  }
1182
1183
0
  while(length > 0 && length >= oldLength) {
1184
0
    int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1185
0
    if(pos < 0) {
1186
      // no more oldText's here: done
1187
0
      break;
1188
0
    } else {
1189
      // we found oldText, replace it by newText and go beyond it
1190
0
      replace(pos, oldLength, newText, newStart, newLength);
1191
0
      length -= pos + oldLength - start;
1192
0
      start = pos + newLength;
1193
0
    }
1194
0
  }
1195
1196
0
  return *this;
1197
0
}
1198
1199
1200
void
1201
UnicodeString::setToBogus()
1202
4.02M
{
1203
4.02M
  releaseArray();
1204
1205
4.02M
  fUnion.fFields.fLengthAndFlags = kIsBogus;
1206
4.02M
  fUnion.fFields.fArray = 0;
1207
4.02M
  fUnion.fFields.fCapacity = 0;
1208
4.02M
}
1209
1210
// turn a bogus string into an empty one
1211
void
1212
15.3M
UnicodeString::unBogus() {
1213
15.3M
  if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
1214
0
    setToEmpty();
1215
0
  }
1216
15.3M
}
1217
1218
const char16_t *
1219
2.42k
UnicodeString::getTerminatedBuffer() {
1220
2.42k
  if(!isWritable()) {
1221
0
    return nullptr;
1222
0
  }
1223
2.42k
  char16_t *array = getArrayStart();
1224
2.42k
  int32_t len = length();
1225
2.42k
  if(len < getCapacity()) {
1226
2.42k
    if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
1227
      // If len<capacity on a read-only alias, then array[len] is
1228
      // either the original NUL (if constructed with (true, s, length))
1229
      // or one of the original string contents characters (if later truncated),
1230
      // therefore we can assume that array[len] is initialized memory.
1231
0
      if(array[len] == 0) {
1232
0
        return array;
1233
0
      }
1234
2.42k
    } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
1235
      // kRefCounted: Do not write the NUL if the buffer is shared.
1236
      // That is mostly safe, except when the length of one copy was modified
1237
      // without copy-on-write, e.g., via truncate(newLength) or remove().
1238
      // Then the NUL would be written into the middle of another copy's string.
1239
1240
      // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1241
      // Do not test if there is a NUL already because it might be uninitialized memory.
1242
      // (That would be safe, but tools like valgrind & Purify would complain.)
1243
2.42k
      array[len] = 0;
1244
2.42k
      return array;
1245
2.42k
    }
1246
2.42k
  }
1247
0
  if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
1248
0
    array = getArrayStart();
1249
0
    array[len] = 0;
1250
0
    return array;
1251
0
  } else {
1252
0
    return nullptr;
1253
0
  }
1254
0
}
1255
1256
// setTo() analogous to the readonly-aliasing constructor with the same signature
1257
UnicodeString &
1258
UnicodeString::setTo(UBool isTerminated,
1259
                     ConstChar16Ptr textPtr,
1260
                     int32_t textLength)
1261
3.45M
{
1262
3.45M
  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1263
    // do not modify a string that has an "open" getBuffer(minCapacity)
1264
0
    return *this;
1265
0
  }
1266
1267
3.45M
  const char16_t *text = textPtr;
1268
3.45M
  if(text == nullptr) {
1269
    // treat as an empty string, do not alias
1270
0
    releaseArray();
1271
0
    setToEmpty();
1272
0
    return *this;
1273
0
  }
1274
1275
3.45M
  if( textLength < -1 ||
1276
3.45M
      (textLength == -1 && !isTerminated) ||
1277
3.45M
      (textLength >= 0 && isTerminated && text[textLength] != 0)
1278
3.45M
  ) {
1279
0
    setToBogus();
1280
0
    return *this;
1281
0
  }
1282
1283
3.45M
  releaseArray();
1284
1285
3.45M
  if(textLength == -1) {
1286
    // text is terminated, or else it would have failed the above test
1287
0
    textLength = u_strlen(text);
1288
0
  }
1289
3.45M
  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
1290
3.45M
  setArray((char16_t *)text, textLength, isTerminated ? textLength + 1 : textLength);
1291
3.45M
  return *this;
1292
3.45M
}
1293
1294
// setTo() analogous to the writable-aliasing constructor with the same signature
1295
UnicodeString &
1296
UnicodeString::setTo(char16_t *buffer,
1297
                     int32_t buffLength,
1298
0
                     int32_t buffCapacity) {
1299
0
  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1300
    // do not modify a string that has an "open" getBuffer(minCapacity)
1301
0
    return *this;
1302
0
  }
1303
1304
0
  if(buffer == nullptr) {
1305
    // treat as an empty string, do not alias
1306
0
    releaseArray();
1307
0
    setToEmpty();
1308
0
    return *this;
1309
0
  }
1310
1311
0
  if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1312
0
    setToBogus();
1313
0
    return *this;
1314
0
  } else if(buffLength == -1) {
1315
    // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1316
0
    const char16_t *p = buffer, *limit = buffer + buffCapacity;
1317
0
    while(p != limit && *p != 0) {
1318
0
      ++p;
1319
0
    }
1320
0
    buffLength = (int32_t)(p - buffer);
1321
0
  }
1322
1323
0
  releaseArray();
1324
1325
0
  fUnion.fFields.fLengthAndFlags = kWritableAlias;
1326
0
  setArray(buffer, buffLength, buffCapacity);
1327
0
  return *this;
1328
0
}
1329
1330
8.24k
UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
1331
8.24k
  unBogus();
1332
8.24k
  int32_t length = utf8.length();
1333
8.24k
  int32_t capacity;
1334
  // The UTF-16 string will be at most as long as the UTF-8 string.
1335
8.24k
  if(length <= US_STACKBUF_SIZE) {
1336
0
    capacity = US_STACKBUF_SIZE;
1337
8.24k
  } else {
1338
8.24k
    capacity = length + 1;  // +1 for the terminating NUL.
1339
8.24k
  }
1340
8.24k
  char16_t *utf16 = getBuffer(capacity);
1341
8.24k
  int32_t length16;
1342
8.24k
  UErrorCode errorCode = U_ZERO_ERROR;
1343
8.24k
  u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1344
8.24k
      utf8.data(), length,
1345
8.24k
      0xfffd,  // Substitution character.
1346
8.24k
      nullptr,    // Don't care about number of substitutions.
1347
8.24k
      &errorCode);
1348
8.24k
  releaseBuffer(length16);
1349
8.24k
  if(U_FAILURE(errorCode)) {
1350
0
    setToBogus();
1351
0
  }
1352
8.24k
  return *this;
1353
8.24k
}
1354
1355
UnicodeString&
1356
UnicodeString::setCharAt(int32_t offset,
1357
             char16_t c)
1358
235k
{
1359
235k
  int32_t len = length();
1360
235k
  if(cloneArrayIfNeeded() && len > 0) {
1361
235k
    if(offset < 0) {
1362
0
      offset = 0;
1363
235k
    } else if(offset >= len) {
1364
0
      offset = len - 1;
1365
0
    }
1366
1367
235k
    getArrayStart()[offset] = c;
1368
235k
  }
1369
235k
  return *this;
1370
235k
}
1371
1372
UnicodeString&
1373
UnicodeString::replace(int32_t start,
1374
               int32_t _length,
1375
1.90M
               UChar32 srcChar) {
1376
1.90M
  char16_t buffer[U16_MAX_LENGTH];
1377
1.90M
  int32_t count = 0;
1378
1.90M
  UBool isError = false;
1379
1.90M
  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1380
  // We test isError so that the compiler does not complain that we don't.
1381
  // If isError (srcChar is not a valid code point) then count==0 which means
1382
  // we remove the source segment rather than replacing it with srcChar.
1383
1.90M
  return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1384
1.90M
}
1385
1386
UnicodeString&
1387
619M
UnicodeString::append(UChar32 srcChar) {
1388
619M
  char16_t buffer[U16_MAX_LENGTH];
1389
619M
  int32_t _length = 0;
1390
619M
  UBool isError = false;
1391
619M
  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1392
  // We test isError so that the compiler does not complain that we don't.
1393
  // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1394
619M
  return isError ? *this : doAppend(buffer, 0, _length);
1395
619M
}
1396
1397
UnicodeString&
1398
UnicodeString::doReplace( int32_t start,
1399
              int32_t length,
1400
              const UnicodeString& src,
1401
              int32_t srcStart,
1402
              int32_t srcLength)
1403
22.8M
{
1404
  // pin the indices to legal values
1405
22.8M
  src.pinIndices(srcStart, srcLength);
1406
1407
  // get the characters from src
1408
  // and replace the range in ourselves with them
1409
22.8M
  return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1410
22.8M
}
1411
1412
UnicodeString&
1413
UnicodeString::doReplace(int32_t start,
1414
             int32_t length,
1415
             const char16_t *srcChars,
1416
             int32_t srcStart,
1417
             int32_t srcLength)
1418
30.9M
{
1419
30.9M
  if(!isWritable()) {
1420
0
    return *this;
1421
0
  }
1422
1423
30.9M
  int32_t oldLength = this->length();
1424
1425
  // optimize (read-only alias).remove(0, start) and .remove(start, end)
1426
30.9M
  if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
1427
0
    if(start == 0) {
1428
      // remove prefix by adjusting the array pointer
1429
0
      pinIndex(length);
1430
0
      fUnion.fFields.fArray += length;
1431
0
      fUnion.fFields.fCapacity -= length;
1432
0
      setLength(oldLength - length);
1433
0
      return *this;
1434
0
    } else {
1435
0
      pinIndex(start);
1436
0
      if(length >= (oldLength - start)) {
1437
        // remove suffix by reducing the length (like truncate())
1438
0
        setLength(start);
1439
0
        fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1440
0
        return *this;
1441
0
      }
1442
0
    }
1443
0
  }
1444
1445
30.9M
  if(start == oldLength) {
1446
21.6M
    return doAppend(srcChars, srcStart, srcLength);
1447
21.6M
  }
1448
1449
9.32M
  if(srcChars == 0) {
1450
6.12M
    srcLength = 0;
1451
6.12M
  } else {
1452
    // Perform all remaining operations relative to srcChars + srcStart.
1453
    // From this point forward, do not use srcStart.
1454
3.19M
    srcChars += srcStart;
1455
3.19M
    if (srcLength < 0) {
1456
      // get the srcLength if necessary
1457
5.01k
      srcLength = u_strlen(srcChars);
1458
5.01k
    }
1459
3.19M
  }
1460
1461
  // pin the indices to legal values
1462
9.32M
  pinIndices(start, length);
1463
1464
  // Calculate the size of the string after the replace.
1465
  // Avoid int32_t overflow.
1466
9.32M
  int32_t newLength = oldLength - length;
1467
9.32M
  if(srcLength > (INT32_MAX - newLength)) {
1468
0
    setToBogus();
1469
0
    return *this;
1470
0
  }
1471
9.32M
  newLength += srcLength;
1472
1473
  // Check for insertion into ourself
1474
9.32M
  const char16_t *oldArray = getArrayStart();
1475
9.32M
  if (isBufferWritable() &&
1476
9.32M
      oldArray < srcChars + srcLength &&
1477
9.32M
      srcChars < oldArray + oldLength) {
1478
    // Copy into a new UnicodeString and start over
1479
0
    UnicodeString copy(srcChars, srcLength);
1480
0
    if (copy.isBogus()) {
1481
0
      setToBogus();
1482
0
      return *this;
1483
0
    }
1484
0
    return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
1485
0
  }
1486
1487
  // cloneArrayIfNeeded(doCopyArray=false) may change fArray but will not copy the current contents;
1488
  // therefore we need to keep the current fArray
1489
9.32M
  char16_t oldStackBuffer[US_STACKBUF_SIZE];
1490
9.32M
  if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1491
    // copy the stack buffer contents because it will be overwritten with
1492
    // fUnion.fFields values
1493
1.39k
    u_memcpy(oldStackBuffer, oldArray, oldLength);
1494
1.39k
    oldArray = oldStackBuffer;
1495
1.39k
  }
1496
1497
  // clone our array and allocate a bigger array if needed
1498
9.32M
  int32_t *bufferToDelete = 0;
1499
9.32M
  if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
1500
9.32M
                         false, &bufferToDelete)
1501
9.32M
  ) {
1502
0
    return *this;
1503
0
  }
1504
1505
  // now do the replace
1506
1507
9.32M
  char16_t *newArray = getArrayStart();
1508
9.32M
  if(newArray != oldArray) {
1509
    // if fArray changed, then we need to copy everything except what will change
1510
793k
    us_arrayCopy(oldArray, 0, newArray, 0, start);
1511
793k
    us_arrayCopy(oldArray, start + length,
1512
793k
                 newArray, start + srcLength,
1513
793k
                 oldLength - (start + length));
1514
8.53M
  } else if(length != srcLength) {
1515
    // fArray did not change; copy only the portion that isn't changing, leaving a hole
1516
6.84M
    us_arrayCopy(oldArray, start + length,
1517
6.84M
                 newArray, start + srcLength,
1518
6.84M
                 oldLength - (start + length));
1519
6.84M
  }
1520
1521
  // now fill in the hole with the new string
1522
9.32M
  us_arrayCopy(srcChars, 0, newArray, start, srcLength);
1523
1524
9.32M
  setLength(newLength);
1525
1526
  // delayed delete in case srcChars == fArray when we started, and
1527
  // to keep oldArray alive for the above operations
1528
9.32M
  if (bufferToDelete) {
1529
442
    uprv_free(bufferToDelete);
1530
442
  }
1531
1532
9.32M
  return *this;
1533
9.32M
}
1534
1535
// Versions of doReplace() only for append() variants.
1536
// doReplace() and doAppend() optimize for different cases.
1537
1538
UnicodeString&
1539
171M
UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1540
171M
  if(srcLength == 0) {
1541
690k
    return *this;
1542
690k
  }
1543
1544
  // pin the indices to legal values
1545
170M
  src.pinIndices(srcStart, srcLength);
1546
170M
  return doAppend(src.getArrayStart(), srcStart, srcLength);
1547
171M
}
1548
1549
UnicodeString&
1550
890M
UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) {
1551
890M
  if(!isWritable() || srcLength == 0 || srcChars == nullptr) {
1552
1.33M
    return *this;
1553
1.33M
  }
1554
1555
  // Perform all remaining operations relative to srcChars + srcStart.
1556
  // From this point forward, do not use srcStart.
1557
889M
  srcChars += srcStart;
1558
1559
889M
  if(srcLength < 0) {
1560
    // get the srcLength if necessary
1561
160k
    if((srcLength = u_strlen(srcChars)) == 0) {
1562
0
      return *this;
1563
0
    }
1564
160k
  }
1565
1566
889M
  int32_t oldLength = length();
1567
889M
  int32_t newLength;
1568
889M
  if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
1569
0
    setToBogus();
1570
0
    return *this;
1571
0
  }
1572
1573
  // Check for append onto ourself
1574
889M
  const char16_t* oldArray = getArrayStart();
1575
889M
  if (isBufferWritable() &&
1576
889M
      oldArray < srcChars + srcLength &&
1577
889M
      srcChars < oldArray + oldLength) {
1578
    // Copy into a new UnicodeString and start over
1579
0
    UnicodeString copy(srcChars, srcLength);
1580
0
    if (copy.isBogus()) {
1581
0
      setToBogus();
1582
0
      return *this;
1583
0
    }
1584
0
    return doAppend(copy.getArrayStart(), 0, srcLength);
1585
0
  }
1586
1587
  // optimize append() onto a large-enough, owned string
1588
889M
  if((newLength <= getCapacity() && isBufferWritable()) ||
1589
889M
      cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
1590
889M
    char16_t *newArray = getArrayStart();
1591
    // Do not copy characters when
1592
    //   char16_t *buffer=str.getAppendBuffer(...);
1593
    // is followed by
1594
    //   str.append(buffer, length);
1595
    // or
1596
    //   str.appendString(buffer, length)
1597
    // or similar.
1598
889M
    if(srcChars != newArray + oldLength) {
1599
889M
      us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
1600
889M
    }
1601
889M
    setLength(newLength);
1602
889M
  }
1603
889M
  return *this;
1604
889M
}
1605
1606
/**
1607
 * Replaceable API
1608
 */
1609
void
1610
UnicodeString::handleReplaceBetween(int32_t start,
1611
                                    int32_t limit,
1612
0
                                    const UnicodeString& text) {
1613
0
    replaceBetween(start, limit, text);
1614
0
}
1615
1616
/**
1617
 * Replaceable API
1618
 */
1619
void 
1620
0
UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1621
0
    if (limit <= start) {
1622
0
        return; // Nothing to do; avoid bogus malloc call
1623
0
    }
1624
0
    char16_t* text = (char16_t*) uprv_malloc( sizeof(char16_t) * (limit - start) );
1625
    // Check to make sure text is not null.
1626
0
    if (text != nullptr) {
1627
0
      extractBetween(start, limit, text, 0);
1628
0
      insert(dest, text, 0, limit - start);    
1629
0
      uprv_free(text);
1630
0
    }
1631
0
}
1632
1633
/**
1634
 * Replaceable API
1635
 *
1636
 * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1637
 * so we implement this function here.
1638
 */
1639
0
UBool Replaceable::hasMetaData() const {
1640
0
    return true;
1641
0
}
1642
1643
/**
1644
 * Replaceable API
1645
 */
1646
0
UBool UnicodeString::hasMetaData() const {
1647
0
    return false;
1648
0
}
1649
1650
UnicodeString&
1651
402k
UnicodeString::doReverse(int32_t start, int32_t length) {
1652
402k
  if(length <= 1 || !cloneArrayIfNeeded()) {
1653
21.6k
    return *this;
1654
21.6k
  }
1655
1656
  // pin the indices to legal values
1657
380k
  pinIndices(start, length);
1658
380k
  if(length <= 1) {  // pinIndices() might have shrunk the length
1659
0
    return *this;
1660
0
  }
1661
1662
380k
  char16_t *left = getArrayStart() + start;
1663
380k
  char16_t *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1664
380k
  char16_t swap;
1665
380k
  UBool hasSupplementary = false;
1666
1667
  // Before the loop we know left<right because length>=2.
1668
48.7M
  do {
1669
48.7M
    hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1670
48.7M
    hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1671
48.7M
    *right-- = swap;
1672
48.7M
  } while(left < right);
1673
  // Make sure to test the middle code unit of an odd-length string.
1674
  // Redundant if the length is even.
1675
380k
  hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1676
1677
  /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1678
380k
  if(hasSupplementary) {
1679
48.5k
    char16_t swap2;
1680
1681
48.5k
    left = getArrayStart() + start;
1682
48.5k
    right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1683
28.4M
    while(left < right) {
1684
28.4M
      if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1685
59.8k
        *left++ = swap2;
1686
59.8k
        *left++ = swap;
1687
28.3M
      } else {
1688
28.3M
        ++left;
1689
28.3M
      }
1690
28.4M
    }
1691
48.5k
  }
1692
1693
380k
  return *this;
1694
380k
}
1695
1696
UBool 
1697
UnicodeString::padLeading(int32_t targetLength,
1698
                          char16_t padChar)
1699
0
{
1700
0
  int32_t oldLength = length();
1701
0
  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1702
0
    return false;
1703
0
  } else {
1704
    // move contents up by padding width
1705
0
    char16_t *array = getArrayStart();
1706
0
    int32_t start = targetLength - oldLength;
1707
0
    us_arrayCopy(array, 0, array, start, oldLength);
1708
1709
    // fill in padding character
1710
0
    while(--start >= 0) {
1711
0
      array[start] = padChar;
1712
0
    }
1713
0
    setLength(targetLength);
1714
0
    return true;
1715
0
  }
1716
0
}
1717
1718
UBool 
1719
UnicodeString::padTrailing(int32_t targetLength,
1720
                           char16_t padChar)
1721
0
{
1722
0
  int32_t oldLength = length();
1723
0
  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1724
0
    return false;
1725
0
  } else {
1726
    // fill in padding character
1727
0
    char16_t *array = getArrayStart();
1728
0
    int32_t length = targetLength;
1729
0
    while(--length >= oldLength) {
1730
0
      array[length] = padChar;
1731
0
    }
1732
0
    setLength(targetLength);
1733
0
    return true;
1734
0
  }
1735
0
}
1736
1737
//========================================
1738
// Hashing
1739
//========================================
1740
int32_t
1741
UnicodeString::doHashCode() const
1742
28.8M
{
1743
    /* Delegate hash computation to uhash.  This makes UnicodeString
1744
     * hashing consistent with char16_t* hashing.  */
1745
28.8M
    int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1746
28.8M
    if (hashCode == kInvalidHashCode) {
1747
5.85M
        hashCode = kEmptyHashCode;
1748
5.85M
    }
1749
28.8M
    return hashCode;
1750
28.8M
}
1751
1752
//========================================
1753
// External Buffer
1754
//========================================
1755
1756
char16_t *
1757
58.2M
UnicodeString::getBuffer(int32_t minCapacity) {
1758
58.2M
  if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1759
58.2M
    fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
1760
58.2M
    setZeroLength();
1761
58.2M
    return getArrayStart();
1762
58.2M
  } else {
1763
0
    return nullptr;
1764
0
  }
1765
58.2M
}
1766
1767
void
1768
58.2M
UnicodeString::releaseBuffer(int32_t newLength) {
1769
58.2M
  if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
1770
    // set the new fLength
1771
58.2M
    int32_t capacity=getCapacity();
1772
58.2M
    if(newLength==-1) {
1773
      // the new length is the string length, capped by fCapacity
1774
0
      const char16_t *array=getArrayStart(), *p=array, *limit=array+capacity;
1775
0
      while(p<limit && *p!=0) {
1776
0
        ++p;
1777
0
      }
1778
0
      newLength=(int32_t)(p-array);
1779
58.2M
    } else if(newLength>capacity) {
1780
0
      newLength=capacity;
1781
0
    }
1782
58.2M
    setLength(newLength);
1783
58.2M
    fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
1784
58.2M
  }
1785
58.2M
}
1786
1787
//========================================
1788
// Miscellaneous
1789
//========================================
1790
UBool
1791
UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1792
                                  int32_t growCapacity,
1793
                                  UBool doCopyArray,
1794
                                  int32_t **pBufferToDelete,
1795
78.0M
                                  UBool forceClone) {
1796
  // default parameters need to be static, therefore
1797
  // the defaults are -1 to have convenience defaults
1798
78.0M
  if(newCapacity == -1) {
1799
616k
    newCapacity = getCapacity();
1800
616k
  }
1801
1802
  // while a getBuffer(minCapacity) is "open",
1803
  // prevent any modifications of the string by returning false here
1804
  // if the string is bogus, then only an assignment or similar can revive it
1805
78.0M
  if(!isWritable()) {
1806
0
    return false;
1807
0
  }
1808
1809
  /*
1810
   * We need to make a copy of the array if
1811
   * the buffer is read-only, or
1812
   * the buffer is refCounted (shared), and refCount>1, or
1813
   * the buffer is too small.
1814
   * Return false if memory could not be allocated.
1815
   */
1816
78.0M
  if(forceClone ||
1817
78.0M
     fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
1818
78.0M
     (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
1819
78.0M
     newCapacity > getCapacity()
1820
78.0M
  ) {
1821
    // check growCapacity for default value and use of the stack buffer
1822
10.7M
    if(growCapacity < 0) {
1823
41.6k
      growCapacity = newCapacity;
1824
10.6M
    } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1825
17.7k
      growCapacity = US_STACKBUF_SIZE;
1826
17.7k
    }
1827
1828
    // save old values
1829
10.7M
    char16_t oldStackBuffer[US_STACKBUF_SIZE];
1830
10.7M
    char16_t *oldArray;
1831
10.7M
    int32_t oldLength = length();
1832
10.7M
    int16_t flags = fUnion.fFields.fLengthAndFlags;
1833
1834
10.7M
    if(flags&kUsingStackBuffer) {
1835
8.96M
      U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1836
8.96M
      if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1837
        // copy the stack buffer contents because it will be overwritten with
1838
        // fUnion.fFields values
1839
8.96M
        us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
1840
8.96M
        oldArray = oldStackBuffer;
1841
8.96M
      } else {
1842
3.99k
        oldArray = nullptr; // no need to copy from the stack buffer to itself
1843
3.99k
      }
1844
8.96M
    } else {
1845
1.73M
      oldArray = fUnion.fFields.fArray;
1846
1.73M
      U_ASSERT(oldArray!=nullptr); /* when stack buffer is not used, oldArray must have a non-nullptr reference */
1847
1.73M
    }
1848
1849
    // allocate a new array
1850
10.7M
    if(allocate(growCapacity) ||
1851
10.7M
       (newCapacity < growCapacity && allocate(newCapacity))
1852
10.7M
    ) {
1853
10.7M
      if(doCopyArray) {
1854
        // copy the contents
1855
        // do not copy more than what fits - it may be smaller than before
1856
9.90M
        int32_t minLength = oldLength;
1857
9.90M
        newCapacity = getCapacity();
1858
9.90M
        if(newCapacity < minLength) {
1859
0
          minLength = newCapacity;
1860
0
        }
1861
9.90M
        if(oldArray != nullptr) {
1862
9.90M
          us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1863
9.90M
        }
1864
9.90M
        setLength(minLength);
1865
9.90M
      } else {
1866
797k
        setZeroLength();
1867
797k
      }
1868
1869
      // release the old array
1870
10.7M
      if(flags & kRefCounted) {
1871
        // the array is refCounted; decrement and release if 0
1872
1.73M
        u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1873
1.73M
        if(umtx_atomic_dec(pRefCount) == 0) {
1874
928k
          if(pBufferToDelete == 0) {
1875
              // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1876
              // is defined as volatile. (Volatile has useful non-standard behavior
1877
              //   with this compiler.)
1878
927k
            uprv_free((void *)pRefCount);
1879
927k
          } else {
1880
            // the caller requested to delete it himself
1881
1.84k
            *pBufferToDelete = (int32_t *)pRefCount;
1882
1.84k
          }
1883
928k
        }
1884
1.73M
      }
1885
10.7M
    } else {
1886
      // not enough memory for growCapacity and not even for the smaller newCapacity
1887
      // reset the old values for setToBogus() to release the array
1888
0
      if(!(flags&kUsingStackBuffer)) {
1889
0
        fUnion.fFields.fArray = oldArray;
1890
0
      }
1891
0
      fUnion.fFields.fLengthAndFlags = flags;
1892
0
      setToBogus();
1893
0
      return false;
1894
0
    }
1895
10.7M
  }
1896
78.0M
  return true;
1897
78.0M
}
1898
1899
// UnicodeStringAppendable ------------------------------------------------- ***
1900
1901
0
UnicodeStringAppendable::~UnicodeStringAppendable() {}
1902
1903
UBool
1904
0
UnicodeStringAppendable::appendCodeUnit(char16_t c) {
1905
0
  return str.doAppend(&c, 0, 1).isWritable();
1906
0
}
1907
1908
UBool
1909
0
UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1910
0
  char16_t buffer[U16_MAX_LENGTH];
1911
0
  int32_t cLength = 0;
1912
0
  UBool isError = false;
1913
0
  U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1914
0
  return !isError && str.doAppend(buffer, 0, cLength).isWritable();
1915
0
}
1916
1917
UBool
1918
0
UnicodeStringAppendable::appendString(const char16_t *s, int32_t length) {
1919
0
  return str.doAppend(s, 0, length).isWritable();
1920
0
}
1921
1922
UBool
1923
0
UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1924
0
  return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1925
0
}
1926
1927
char16_t *
1928
UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1929
                                         int32_t desiredCapacityHint,
1930
                                         char16_t *scratch, int32_t scratchCapacity,
1931
0
                                         int32_t *resultCapacity) {
1932
0
  if(minCapacity < 1 || scratchCapacity < minCapacity) {
1933
0
    *resultCapacity = 0;
1934
0
    return nullptr;
1935
0
  }
1936
0
  int32_t oldLength = str.length();
1937
0
  if(minCapacity <= (kMaxCapacity - oldLength) &&
1938
0
      desiredCapacityHint <= (kMaxCapacity - oldLength) &&
1939
0
      str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1940
0
    *resultCapacity = str.getCapacity() - oldLength;
1941
0
    return str.getArrayStart() + oldLength;
1942
0
  }
1943
0
  *resultCapacity = scratchCapacity;
1944
0
  return scratch;
1945
0
}
1946
1947
U_NAMESPACE_END
1948
1949
U_NAMESPACE_USE
1950
1951
U_CAPI int32_t U_EXPORT2
1952
28.8M
uhash_hashUnicodeString(const UElement key) {
1953
28.8M
    const UnicodeString *str = (const UnicodeString*) key.pointer;
1954
28.8M
    return (str == nullptr) ? 0 : str->hashCode();
1955
28.8M
}
1956
1957
// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1958
// does not depend on hashtable code.
1959
U_CAPI UBool U_EXPORT2
1960
133M
uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1961
133M
    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1962
133M
    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1963
133M
    if (str1 == str2) {
1964
0
        return true;
1965
0
    }
1966
133M
    if (str1 == nullptr || str2 == nullptr) {
1967
0
        return false;
1968
0
    }
1969
133M
    return *str1 == *str2;
1970
133M
}
1971
1972
#ifdef U_STATIC_IMPLEMENTATION
1973
/*
1974
This should never be called. It is defined here to make sure that the
1975
virtual vector deleting destructor is defined within unistr.cpp.
1976
The vector deleting destructor is already a part of UObject,
1977
but defining it here makes sure that it is included with this object file.
1978
This makes sure that static library dependencies are kept to a minimum.
1979
*/
1980
#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100
1981
#pragma GCC diagnostic push
1982
#pragma GCC diagnostic ignored "-Wunused-function"
1983
static void uprv_UnicodeStringDummy() {
1984
    delete [] (new UnicodeString[2]);
1985
}
1986
#pragma GCC diagnostic pop
1987
#endif
1988
#endif