Coverage Report

Created: 2026-06-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/source/common/unistr.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
* Copyright (C) 1999-2016, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
******************************************************************************
8
*
9
* File unistr.cpp
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   09/25/98    stephen     Creation.
15
*   04/20/99    stephen     Overhauled per 4/16 code review.
16
*   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17
*   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
18
*                           Replaceable.
19
*   06/25/01    grhoten     Removed the dependency on iostream
20
******************************************************************************
21
*/
22
23
#include <string_view>
24
25
#include "unicode/utypes.h"
26
#include "unicode/appendable.h"
27
#include "unicode/putil.h"
28
#include "cstring.h"
29
#include "cmemory.h"
30
#include "unicode/ustring.h"
31
#include "unicode/unistr.h"
32
#include "unicode/utf.h"
33
#include "unicode/utf16.h"
34
#include "uelement.h"
35
#include "ustr_imp.h"
36
#include "umutex.h"
37
#include "uassert.h"
38
39
#if 0
40
41
#include <iostream>
42
using namespace std;
43
44
//DEBUGGING
45
void
46
print(const UnicodeString& s,
47
      const char *name)
48
{
49
  char16_t c;
50
  cout << name << ":|";
51
  for(int i = 0; i < s.length(); ++i) {
52
    c = s[i];
53
    if(c>= 0x007E || c < 0x0020)
54
      cout << "[0x" << hex << s[i] << "]";
55
    else
56
      cout << (char) s[i];
57
  }
58
  cout << '|' << endl;
59
}
60
61
void
62
print(const char16_t *s,
63
      int32_t len,
64
      const char *name)
65
{
66
  char16_t c;
67
  cout << name << ":|";
68
  for(int i = 0; i < len; ++i) {
69
    c = s[i];
70
    if(c>= 0x007E || c < 0x0020)
71
      cout << "[0x" << hex << s[i] << "]";
72
    else
73
      cout << (char) s[i];
74
  }
75
  cout << '|' << endl;
76
}
77
// END DEBUGGING
78
#endif
79
80
// Local function definitions for now
81
82
// need to copy areas that may overlap
83
static
84
inline void
85
us_arrayCopy(const char16_t *src, int32_t srcStart,
86
         char16_t *dst, int32_t dstStart, int32_t count)
87
0
{
88
0
  if(count>0) {
89
0
    uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
90
0
  }
91
0
}
92
93
// u_unescapeAt() callback to get a char16_t from a UnicodeString
94
U_CDECL_BEGIN
95
static char16_t U_CALLCONV
96
0
UnicodeString_charAt(int32_t offset, void *context) {
97
0
    return ((icu::UnicodeString*) context)->charAt(offset);
98
0
}
99
U_CDECL_END
100
101
U_NAMESPACE_BEGIN
102
103
/* The Replaceable virtual destructor can't be defined in the header
104
   due to how AIX works with multiple definitions of virtual functions.
105
*/
106
0
Replaceable::~Replaceable() {}
107
108
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
109
110
UnicodeString U_EXPORT2
111
0
operator+ (const UnicodeString &s1, const UnicodeString &s2) {
112
0
  int32_t sumLengths;
113
0
  if (uprv_add32_overflow(s1.length(), s2.length(), &sumLengths)) {
114
0
    UnicodeString bogus;
115
0
    bogus.setToBogus();
116
0
    return bogus;
117
0
  }
118
0
  if (sumLengths != INT32_MAX) {
119
0
    ++sumLengths;  // space for a terminating NUL if we need one
120
0
  }
121
0
  return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
122
0
}
123
124
U_COMMON_API UnicodeString U_EXPORT2
125
0
unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2) {
126
0
  int32_t sumLengths;
127
0
  if (s2.length() > INT32_MAX ||
128
0
      uprv_add32_overflow(s1.length(), static_cast<int32_t>(s2.length()), &sumLengths)) {
129
0
    UnicodeString bogus;
130
0
    bogus.setToBogus();
131
0
    return bogus;
132
0
  }
133
0
  if (sumLengths != INT32_MAX) {
134
0
    ++sumLengths;  // space for a terminating NUL if we need one
135
0
  }
136
0
  return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
137
0
}
138
139
140
//========================================
141
// Reference Counting functions, put at top of file so that optimizing compilers
142
//                               have a chance to automatically inline.
143
//========================================
144
145
void
146
0
UnicodeString::addRef() {
147
0
  umtx_atomic_inc(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1);
148
0
}
149
150
int32_t
151
0
UnicodeString::removeRef() {
152
0
  return umtx_atomic_dec(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1);
153
0
}
154
155
int32_t
156
0
UnicodeString::refCount() const {
157
0
  return umtx_loadAcquire(*(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1));
158
0
}
159
160
void
161
0
UnicodeString::releaseArray() {
162
0
  if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
163
0
    uprv_free(reinterpret_cast<int32_t*>(fUnion.fFields.fArray) - 1);
164
0
  }
165
0
}
166
167
168
169
//========================================
170
// Constructors
171
//========================================
172
173
// The default constructor is inline in unistr.h.
174
175
0
UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
176
0
  fUnion.fFields.fLengthAndFlags = 0;
177
0
  if (count <= 0 || static_cast<uint32_t>(c) > 0x10ffff) {
178
    // just allocate and do not do anything else
179
0
    allocate(capacity);
180
0
  } else if(c <= 0xffff) {
181
0
    int32_t length = count;
182
0
    if(capacity < length) {
183
0
      capacity = length;
184
0
    }
185
0
    if(allocate(capacity)) {
186
0
      char16_t *array = getArrayStart();
187
0
      char16_t unit = static_cast<char16_t>(c);
188
0
      for(int32_t i = 0; i < length; ++i) {
189
0
        array[i] = unit;
190
0
      }
191
0
      setLength(length);
192
0
    }
193
0
  } else {  // supplementary code point, write surrogate pairs
194
0
    if(count > (INT32_MAX / 2)) {
195
      // We would get more than 2G UChars.
196
0
      allocate(capacity);
197
0
      return;
198
0
    }
199
0
    int32_t length = count * 2;
200
0
    if(capacity < length) {
201
0
      capacity = length;
202
0
    }
203
0
    if(allocate(capacity)) {
204
0
      char16_t *array = getArrayStart();
205
0
      char16_t lead = U16_LEAD(c);
206
0
      char16_t trail = U16_TRAIL(c);
207
0
      for(int32_t i = 0; i < length; i += 2) {
208
0
        array[i] = lead;
209
0
        array[i + 1] = trail;
210
0
      }
211
0
      setLength(length);
212
0
    }
213
0
  }
214
0
}
215
216
0
UnicodeString::UnicodeString(char16_t ch) {
217
0
  fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
218
0
  fUnion.fStackFields.fBuffer[0] = ch;
219
0
}
220
221
0
UnicodeString::UnicodeString(UChar32 ch) {
222
0
  fUnion.fFields.fLengthAndFlags = kShortString;
223
0
  int32_t i = 0;
224
0
  UBool isError = false;
225
0
  U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
226
  // We test isError so that the compiler does not complain that we don't.
227
  // If isError then i==0 which is what we want anyway.
228
0
  if(!isError) {
229
0
    setShortLength(i);
230
0
  }
231
0
}
232
233
UnicodeString::UnicodeString(const char16_t *text,
234
0
                             int32_t textLength) {
235
0
  fUnion.fFields.fLengthAndFlags = kShortString;
236
0
  doAppend(text, 0, textLength);
237
0
}
238
239
UnicodeString::UnicodeString(UBool isTerminated,
240
                             ConstChar16Ptr textPtr,
241
0
                             int32_t textLength) {
242
0
  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
243
0
  const char16_t *text = textPtr;
244
0
  if(text == nullptr) {
245
    // treat as an empty string, do not alias
246
0
    setToEmpty();
247
0
  } else if(textLength < -1 ||
248
0
            (textLength == -1 && !isTerminated) ||
249
0
            (textLength >= 0 && isTerminated && text[textLength] != 0)
250
0
  ) {
251
0
    setToBogus();
252
0
  } else {
253
0
    if(textLength == -1) {
254
      // text is terminated, or else it would have failed the above test
255
0
      textLength = u_strlen(text);
256
0
    }
257
0
    setArray(const_cast<char16_t *>(text), textLength,
258
0
             isTerminated ? textLength + 1 : textLength);
259
0
  }
260
0
}
261
262
UnicodeString::UnicodeString(char16_t *buff,
263
                             int32_t buffLength,
264
0
                             int32_t buffCapacity) {
265
0
  fUnion.fFields.fLengthAndFlags = kWritableAlias;
266
0
  if(buff == nullptr) {
267
    // treat as an empty string, do not alias
268
0
    setToEmpty();
269
0
  } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
270
0
    setToBogus();
271
0
  } else {
272
0
    if(buffLength == -1) {
273
      // fLength = u_strlen(buff); but do not look beyond buffCapacity
274
0
      const char16_t *p = buff, *limit = buff + buffCapacity;
275
0
      while(p != limit && *p != 0) {
276
0
        ++p;
277
0
      }
278
0
      buffLength = static_cast<int32_t>(p - buff);
279
0
    }
280
0
    setArray(buff, buffLength, buffCapacity);
281
0
  }
282
0
}
283
284
0
UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
285
0
  fUnion.fFields.fLengthAndFlags = kShortString;
286
0
  if(src==nullptr) {
287
    // treat as an empty string
288
0
  } else {
289
0
    if(length<0) {
290
0
      length = static_cast<int32_t>(uprv_strlen(src));
291
0
    }
292
0
    if(cloneArrayIfNeeded(length, length, false)) {
293
0
      u_charsToUChars(src, getArrayStart(), length);
294
0
      setLength(length);
295
0
    } else {
296
0
      setToBogus();
297
0
    }
298
0
  }
299
0
}
300
301
0
UnicodeString UnicodeString::readOnlyAliasFromU16StringView(std::u16string_view text) {
302
0
  UnicodeString result;
303
0
  if (text.length() <= INT32_MAX) {
304
0
    result.setTo(false, text.data(), static_cast<int32_t>(text.length()));
305
0
  } else {
306
0
    result.setToBogus();
307
0
  }
308
0
  return result;
309
0
}
310
311
0
UnicodeString UnicodeString::readOnlyAliasFromUnicodeString(const UnicodeString &text) {
312
0
  UnicodeString result;
313
0
  if (text.isBogus()) {
314
0
    result.setToBogus();
315
0
  } else {
316
0
    result.setTo(false, text.getBuffer(), text.length());
317
0
  }
318
0
  return result;
319
0
}
320
321
#if U_CHARSET_IS_UTF8
322
323
0
UnicodeString::UnicodeString(const char *codepageData) {
324
0
  fUnion.fFields.fLengthAndFlags = kShortString;
325
0
  if (codepageData != nullptr) {
326
0
    setToUTF8(codepageData);
327
0
  }
328
0
}
329
330
0
UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
331
0
  fUnion.fFields.fLengthAndFlags = kShortString;
332
  // if there's nothing to convert, do nothing
333
0
  if (codepageData == nullptr || dataLength == 0 || dataLength < -1) {
334
0
    return;
335
0
  }
336
0
  if(dataLength == -1) {
337
0
    dataLength = static_cast<int32_t>(uprv_strlen(codepageData));
338
0
  }
339
0
  setToUTF8(StringPiece(codepageData, dataLength));
340
0
}
341
342
// else see unistr_cnv.cpp
343
#endif
344
345
0
UnicodeString::UnicodeString(const UnicodeString& that) {
346
0
  fUnion.fFields.fLengthAndFlags = kShortString;
347
0
  copyFrom(that);
348
0
}
349
350
0
UnicodeString::UnicodeString(UnicodeString &&src) noexcept {
351
0
  copyFieldsFrom(src, true);
352
0
}
353
354
UnicodeString::UnicodeString(const UnicodeString& that,
355
0
                             int32_t srcStart) {
356
0
  fUnion.fFields.fLengthAndFlags = kShortString;
357
0
  setTo(that, srcStart);
358
0
}
359
360
UnicodeString::UnicodeString(const UnicodeString& that,
361
                             int32_t srcStart,
362
0
                             int32_t srcLength) {
363
0
  fUnion.fFields.fLengthAndFlags = kShortString;
364
0
  setTo(that, srcStart, srcLength);
365
0
}
366
367
// Replaceable base class clone() default implementation, does not clone
368
Replaceable *
369
0
Replaceable::clone() const {
370
0
  return nullptr;
371
0
}
372
373
// UnicodeString overrides clone() with a real implementation
374
UnicodeString *
375
0
UnicodeString::clone() const {
376
0
  LocalPointer<UnicodeString> clonedString(new UnicodeString(*this));
377
0
  return clonedString.isValid() && !clonedString->isBogus() ? clonedString.orphan() : nullptr;
378
0
}
379
380
//========================================
381
// array allocation
382
//========================================
383
384
namespace {
385
386
const int32_t kGrowSize = 128;
387
388
// The number of bytes for one int32_t reference counter and capacity UChars
389
// must fit into a 32-bit size_t (at least when on a 32-bit platform).
390
// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
391
// and round up to a multiple of 16 bytes.
392
// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
393
// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
394
// but that does not seem worth it.)
395
const int32_t kMaxCapacity = 0x7ffffff5;
396
397
0
int32_t getGrowCapacity(int32_t newLength) {
398
0
  int32_t growSize = (newLength >> 2) + kGrowSize;
399
0
  if(growSize <= (kMaxCapacity - newLength)) {
400
0
    return newLength + growSize;
401
0
  } else {
402
0
    return kMaxCapacity;
403
0
  }
404
0
}
405
406
}  // namespace
407
408
UBool
409
0
UnicodeString::allocate(int32_t capacity) {
410
0
  if(capacity <= US_STACKBUF_SIZE) {
411
0
    fUnion.fFields.fLengthAndFlags = kShortString;
412
0
    return true;
413
0
  }
414
0
  if(capacity <= kMaxCapacity) {
415
0
    ++capacity;  // for the NUL
416
    // Switch to size_t which is unsigned so that we can allocate up to 4GB.
417
    // Reference counter + UChars.
418
0
    size_t numBytes = sizeof(int32_t) + static_cast<size_t>(capacity) * U_SIZEOF_UCHAR;
419
    // Round up to a multiple of 16.
420
0
    numBytes = (numBytes + 15) & ~15;
421
0
    int32_t* array = static_cast<int32_t*>(uprv_malloc(numBytes));
422
0
    if(array != nullptr) {
423
      // set initial refCount and point behind the refCount
424
0
      *array++ = 1;
425
0
      numBytes -= sizeof(int32_t);
426
427
      // have fArray point to the first char16_t
428
0
      fUnion.fFields.fArray = reinterpret_cast<char16_t*>(array);
429
0
      fUnion.fFields.fCapacity = static_cast<int32_t>(numBytes / U_SIZEOF_UCHAR);
430
0
      fUnion.fFields.fLengthAndFlags = kLongString;
431
0
      return true;
432
0
    }
433
0
  }
434
0
  fUnion.fFields.fLengthAndFlags = kIsBogus;
435
0
  fUnion.fFields.fArray = nullptr;
436
0
  fUnion.fFields.fCapacity = 0;
437
0
  return false;
438
0
}
439
440
//========================================
441
// Destructor
442
//========================================
443
444
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
445
static u_atomic_int32_t finalLengthCounts[0x400];  // UnicodeString::kMaxShortLength+1
446
static u_atomic_int32_t beyondCount(0);
447
448
U_CAPI void unistr_printLengths() {
449
  int32_t i;
450
  for(i = 0; i <= 59; ++i) {
451
    printf("%2d,  %9d\n", i, (int32_t)finalLengthCounts[i]);
452
  }
453
  int32_t beyond = beyondCount;
454
  for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
455
    beyond += finalLengthCounts[i];
456
  }
457
  printf(">59, %9d\n", beyond);
458
}
459
#endif
460
461
UnicodeString::~UnicodeString()
462
0
{
463
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
464
  // Count lengths of strings at the end of their lifetime.
465
  // Useful for discussion of a desirable stack buffer size.
466
  // Count the contents length, not the optional NUL terminator nor further capacity.
467
  // Ignore open-buffer strings and strings which alias external storage.
468
  if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
469
    if(hasShortLength()) {
470
      umtx_atomic_inc(finalLengthCounts + getShortLength());
471
    } else {
472
      umtx_atomic_inc(&beyondCount);
473
    }
474
  }
475
#endif
476
477
0
  releaseArray();
478
0
}
479
480
//========================================
481
// Factory methods
482
//========================================
483
484
0
UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
485
0
  UnicodeString result;
486
0
  result.setToUTF8(utf8);
487
0
  return result;
488
0
}
489
490
0
UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
491
0
  UnicodeString result;
492
0
  int32_t capacity;
493
  // Most UTF-32 strings will be BMP-only and result in a same-length
494
  // UTF-16 string. We overestimate the capacity just slightly,
495
  // just in case there are a few supplementary characters.
496
0
  if(length <= US_STACKBUF_SIZE) {
497
0
    capacity = US_STACKBUF_SIZE;
498
0
  } else {
499
0
    capacity = length + (length >> 4) + 4;
500
0
  }
501
0
  do {
502
0
    char16_t *utf16 = result.getBuffer(capacity);
503
0
    int32_t length16;
504
0
    UErrorCode errorCode = U_ZERO_ERROR;
505
0
    u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
506
0
        utf32, length,
507
0
        0xfffd,  // Substitution character.
508
0
        nullptr,    // Don't care about number of substitutions.
509
0
        &errorCode);
510
0
    result.releaseBuffer(length16);
511
0
    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
512
0
      capacity = length16 + 1;  // +1 for the terminating NUL.
513
0
      continue;
514
0
    } else if(U_FAILURE(errorCode)) {
515
0
      result.setToBogus();
516
0
    }
517
0
    break;
518
0
  } while(true);
519
0
  return result;
520
0
}
521
522
//========================================
523
// Assignment
524
//========================================
525
526
UnicodeString &
527
0
UnicodeString::operator=(const UnicodeString &src) {
528
0
  return copyFrom(src);
529
0
}
530
531
UnicodeString &
532
0
UnicodeString::fastCopyFrom(const UnicodeString &src) {
533
0
  return copyFrom(src, true);
534
0
}
535
536
UnicodeString &
537
0
UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
538
  // if assigning to ourselves, do nothing
539
0
  if(this == &src) {
540
0
    return *this;
541
0
  }
542
543
  // is the right side bogus?
544
0
  if(src.isBogus()) {
545
0
    setToBogus();
546
0
    return *this;
547
0
  }
548
549
  // delete the current contents
550
0
  releaseArray();
551
552
0
  if(src.isEmpty()) {
553
    // empty string - use the stack buffer
554
0
    setToEmpty();
555
0
    return *this;
556
0
  }
557
558
  // fLength>0 and not an "open" src.getBuffer(minCapacity)
559
0
  fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
560
0
  switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
561
0
  case kShortString:
562
    // short string using the stack buffer, do the same
563
0
    uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
564
0
                getShortLength() * U_SIZEOF_UCHAR);
565
0
    break;
566
0
  case kLongString:
567
    // src uses a refCounted string buffer, use that buffer with refCount
568
    // src is const, use a cast - we don't actually change it
569
0
    const_cast<UnicodeString &>(src).addRef();
570
    // copy all fields, share the reference-counted buffer
571
0
    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
572
0
    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
573
0
    if(!hasShortLength()) {
574
0
      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
575
0
    }
576
0
    break;
577
0
  case kReadonlyAlias:
578
0
    if(fastCopy) {
579
      // src is a readonly alias, do the same
580
      // -> maintain the readonly alias as such
581
0
      fUnion.fFields.fArray = src.fUnion.fFields.fArray;
582
0
      fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
583
0
      if(!hasShortLength()) {
584
0
        fUnion.fFields.fLength = src.fUnion.fFields.fLength;
585
0
      }
586
0
      break;
587
0
    }
588
    // else if(!fastCopy) fall through to case kWritableAlias
589
    // -> allocate a new buffer and copy the contents
590
0
    U_FALLTHROUGH;
591
0
  case kWritableAlias: {
592
    // src is a writable alias; we make a copy of that instead
593
0
    int32_t srcLength = src.length();
594
0
    if(allocate(srcLength)) {
595
0
      u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
596
0
      setLength(srcLength);
597
0
      break;
598
0
    }
599
    // if there is not enough memory, then fall through to setting to bogus
600
0
    U_FALLTHROUGH;
601
0
  }
602
0
  default:
603
    // if src is bogus, set ourselves to bogus
604
    // do not call setToBogus() here because fArray and flags are not consistent here
605
0
    fUnion.fFields.fLengthAndFlags = kIsBogus;
606
0
    fUnion.fFields.fArray = nullptr;
607
0
    fUnion.fFields.fCapacity = 0;
608
0
    break;
609
0
  }
610
611
0
  return *this;
612
0
}
613
614
0
UnicodeString &UnicodeString::operator=(UnicodeString &&src) noexcept {
615
  // No explicit check for self move assignment, consistent with standard library.
616
  // Self move assignment causes no crash nor leak but might make the object bogus.
617
0
  releaseArray();
618
0
  copyFieldsFrom(src, true);
619
0
  return *this;
620
0
}
621
622
// Same as move assignment except without memory management.
623
0
void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept {
624
0
  int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
625
0
  if(lengthAndFlags & kUsingStackBuffer) {
626
    // Short string using the stack buffer, copy the contents.
627
    // Check for self assignment to prevent "overlap in memcpy" warnings,
628
    // although it should be harmless to copy a buffer to itself exactly.
629
0
    if(this != &src) {
630
0
      uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
631
0
                  getShortLength() * U_SIZEOF_UCHAR);
632
0
    }
633
0
  } else {
634
    // In all other cases, copy all fields.
635
0
    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
636
0
    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
637
0
    if(!hasShortLength()) {
638
0
      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
639
0
    }
640
0
    if(setSrcToBogus) {
641
      // Set src to bogus without releasing any memory.
642
0
      src.fUnion.fFields.fLengthAndFlags = kIsBogus;
643
0
      src.fUnion.fFields.fArray = nullptr;
644
0
      src.fUnion.fFields.fCapacity = 0;
645
0
    }
646
0
  }
647
0
}
648
649
0
void UnicodeString::swap(UnicodeString &other) noexcept {
650
0
  UnicodeString temp;  // Empty short string: Known not to need releaseArray().
651
  // Copy fields without resetting source values in between.
652
0
  temp.copyFieldsFrom(*this, false);
653
0
  this->copyFieldsFrom(other, false);
654
0
  other.copyFieldsFrom(temp, false);
655
  // Set temp to an empty string so that other's memory is not released twice.
656
0
  temp.fUnion.fFields.fLengthAndFlags = kShortString;
657
0
}
658
659
//========================================
660
// Miscellaneous operations
661
//========================================
662
663
0
UnicodeString UnicodeString::unescape() const {
664
0
    UnicodeString result(length(), static_cast<UChar32>(0), static_cast<int32_t>(0)); // construct with capacity
665
0
    if (result.isBogus()) {
666
0
        return result;
667
0
    }
668
0
    const char16_t *array = getBuffer();
669
0
    int32_t len = length();
670
0
    int32_t prev = 0;
671
0
    for (int32_t i=0;;) {
672
0
        if (i == len) {
673
0
            result.append(array, prev, len - prev);
674
0
            break;
675
0
        }
676
0
        if (array[i++] == 0x5C /*'\\'*/) {
677
0
            result.append(array, prev, (i - 1) - prev);
678
0
            UChar32 c = unescapeAt(i); // advances i
679
0
            if (c < 0) {
680
0
                result.remove(); // return empty string
681
0
                break; // invalid escape sequence
682
0
            }
683
0
            result.append(c);
684
0
            prev = i;
685
0
        }
686
0
    }
687
0
    return result;
688
0
}
689
690
0
UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
691
0
    return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
692
0
}
693
694
//========================================
695
// Read-only implementation
696
//========================================
697
UBool
698
0
UnicodeString::doEquals(const char16_t *text, int32_t len) const {
699
  // Requires: this not bogus and have same lengths.
700
  // Byte-wise comparison works for equality regardless of endianness.
701
0
  return uprv_memcmp(getArrayStart(), text, len * U_SIZEOF_UCHAR) == 0;
702
0
}
703
704
UBool
705
UnicodeString::doEqualsSubstring( int32_t start,
706
              int32_t length,
707
              const char16_t *srcChars,
708
              int32_t srcStart,
709
              int32_t srcLength) const
710
0
{
711
  // compare illegal string values
712
0
  if(isBogus()) {
713
0
    return false;
714
0
  }
715
  
716
  // pin indices to legal values
717
0
  pinIndices(start, length);
718
719
0
  if(srcChars == nullptr) {
720
    // treat const char16_t *srcChars==nullptr as an empty string
721
0
    return length == 0 ? true : false;
722
0
  }
723
724
  // get the correct pointer
725
0
  const char16_t *chars = getArrayStart();
726
727
0
  chars += start;
728
0
  srcChars += srcStart;
729
730
  // get the srcLength if necessary
731
0
  if(srcLength < 0) {
732
0
    srcLength = u_strlen(srcChars + srcStart);
733
0
  }
734
735
0
  if (length != srcLength) {
736
0
    return false;
737
0
  }
738
739
0
  if(length == 0 || chars == srcChars) {
740
0
    return true;
741
0
  }
742
743
0
  return u_memcmp(chars, srcChars, srcLength) == 0;
744
0
}
745
746
int8_t
747
UnicodeString::doCompare( int32_t start,
748
              int32_t length,
749
              const char16_t *srcChars,
750
              int32_t srcStart,
751
              int32_t srcLength) const
752
0
{
753
  // compare illegal string values
754
0
  if(isBogus()) {
755
0
    return -1;
756
0
  }
757
  
758
  // pin indices to legal values
759
0
  pinIndices(start, length);
760
761
0
  if(srcChars == nullptr) {
762
    // treat const char16_t *srcChars==nullptr as an empty string
763
0
    return length == 0 ? 0 : 1;
764
0
  }
765
766
  // get the correct pointer
767
0
  const char16_t *chars = getArrayStart();
768
769
0
  chars += start;
770
0
  srcChars += srcStart;
771
772
0
  int32_t minLength;
773
0
  int8_t lengthResult;
774
775
  // get the srcLength if necessary
776
0
  if(srcLength < 0) {
777
0
    srcLength = u_strlen(srcChars + srcStart);
778
0
  }
779
780
  // are we comparing different lengths?
781
0
  if(length != srcLength) {
782
0
    if(length < srcLength) {
783
0
      minLength = length;
784
0
      lengthResult = -1;
785
0
    } else {
786
0
      minLength = srcLength;
787
0
      lengthResult = 1;
788
0
    }
789
0
  } else {
790
0
    minLength = length;
791
0
    lengthResult = 0;
792
0
  }
793
794
  /*
795
   * note that uprv_memcmp() returns an int but we return an int8_t;
796
   * we need to take care not to truncate the result -
797
   * one way to do this is to right-shift the value to
798
   * move the sign bit into the lower 8 bits and making sure that this
799
   * does not become 0 itself
800
   */
801
802
0
  if(minLength > 0 && chars != srcChars) {
803
0
    int32_t result;
804
805
#   if U_IS_BIG_ENDIAN 
806
      // big-endian: byte comparison works
807
      result = uprv_memcmp(chars, srcChars, minLength * sizeof(char16_t));
808
      if(result != 0) {
809
        return (int8_t)(result >> 15 | 1);
810
      }
811
#   else
812
      // little-endian: compare char16_t units
813
0
      do {
814
0
        result = static_cast<int32_t>(*(chars++)) - static_cast<int32_t>(*(srcChars++));
815
0
        if(result != 0) {
816
0
          return static_cast<int8_t>(result >> 15 | 1);
817
0
        }
818
0
      } while(--minLength > 0);
819
0
#   endif
820
0
  }
821
0
  return lengthResult;
822
0
}
823
824
/* String compare in code point order - doCompare() compares in code unit order. */
825
int8_t
826
UnicodeString::doCompareCodePointOrder(int32_t start,
827
                                       int32_t length,
828
                                       const char16_t *srcChars,
829
                                       int32_t srcStart,
830
                                       int32_t srcLength) const
831
0
{
832
  // compare illegal string values
833
  // treat const char16_t *srcChars==nullptr as an empty string
834
0
  if(isBogus()) {
835
0
    return -1;
836
0
  }
837
838
  // pin indices to legal values
839
0
  pinIndices(start, length);
840
841
0
  if(srcChars == nullptr) {
842
0
    srcStart = srcLength = 0;
843
0
  }
844
845
0
  int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=nullptr)?(srcChars + srcStart):nullptr, srcLength, false, true);
846
  /* translate the 32-bit result into an 8-bit one */
847
0
  if(diff!=0) {
848
0
    return static_cast<int8_t>(diff >> 15 | 1);
849
0
  } else {
850
0
    return 0;
851
0
  }
852
0
}
853
854
int32_t
855
0
UnicodeString::getLength() const {
856
0
    return length();
857
0
}
858
859
char16_t
860
0
UnicodeString::getCharAt(int32_t offset) const {
861
0
  return charAt(offset);
862
0
}
863
864
UChar32
865
0
UnicodeString::getChar32At(int32_t offset) const {
866
0
  return char32At(offset);
867
0
}
868
869
UChar32
870
UnicodeString::char32At(int32_t offset) const
871
0
{
872
0
  int32_t len = length();
873
0
  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) {
874
0
    const char16_t *array = getArrayStart();
875
0
    UChar32 c;
876
0
    U16_GET(array, 0, offset, len, c);
877
0
    return c;
878
0
  } else {
879
0
    return kInvalidUChar;
880
0
  }
881
0
}
882
883
int32_t
884
0
UnicodeString::getChar32Start(int32_t offset) const {
885
0
  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
886
0
    const char16_t *array = getArrayStart();
887
0
    U16_SET_CP_START(array, 0, offset);
888
0
    return offset;
889
0
  } else {
890
0
    return 0;
891
0
  }
892
0
}
893
894
int32_t
895
0
UnicodeString::getChar32Limit(int32_t offset) const {
896
0
  int32_t len = length();
897
0
  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) {
898
0
    const char16_t *array = getArrayStart();
899
0
    U16_SET_CP_LIMIT(array, 0, offset, len);
900
0
    return offset;
901
0
  } else {
902
0
    return len;
903
0
  }
904
0
}
905
906
int32_t
907
0
UnicodeString::countChar32(int32_t start, int32_t length) const {
908
0
  pinIndices(start, length);
909
  // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for nullptr
910
0
  return u_countChar32(getArrayStart()+start, length);
911
0
}
912
913
UBool
914
0
UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
915
0
  pinIndices(start, length);
916
  // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for nullptr
917
0
  return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
918
0
}
919
920
int32_t
921
0
UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
922
  // pin index
923
0
  int32_t len = length();
924
0
  if(index<0) {
925
0
    index=0;
926
0
  } else if(index>len) {
927
0
    index=len;
928
0
  }
929
930
0
  const char16_t *array = getArrayStart();
931
0
  if(delta>0) {
932
0
    U16_FWD_N(array, index, len, delta);
933
0
  } else {
934
0
    U16_BACK_N(array, 0, index, -delta);
935
0
  }
936
937
0
  return index;
938
0
}
939
940
void
941
UnicodeString::doExtract(int32_t start,
942
             int32_t length,
943
             char16_t *dst,
944
             int32_t dstStart) const
945
0
{
946
  // pin indices to legal values
947
0
  pinIndices(start, length);
948
949
  // do not copy anything if we alias dst itself
950
0
  const char16_t *array = getArrayStart();
951
0
  if(array + start != dst + dstStart) {
952
0
    us_arrayCopy(array, start, dst, dstStart, length);
953
0
  }
954
0
}
955
956
int32_t
957
UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
958
0
                       UErrorCode &errorCode) const {
959
0
  int32_t len = length();
960
0
  if(U_SUCCESS(errorCode)) {
961
0
    if (isBogus() || destCapacity < 0 || (destCapacity > 0 && dest == nullptr)) {
962
0
      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
963
0
    } else {
964
0
      const char16_t *array = getArrayStart();
965
0
      if(len>0 && len<=destCapacity && array!=dest) {
966
0
        u_memcpy(dest, array, len);
967
0
      }
968
0
      return u_terminateUChars(dest, destCapacity, len, &errorCode);
969
0
    }
970
0
  }
971
972
0
  return len;
973
0
}
974
975
int32_t
976
UnicodeString::extract(int32_t start,
977
                       int32_t length,
978
                       char *target,
979
                       int32_t targetCapacity,
980
                       enum EInvariant) const
981
0
{
982
  // if the arguments are illegal, then do nothing
983
0
  if(targetCapacity < 0 || (targetCapacity > 0 && target == nullptr)) {
984
0
    return 0;
985
0
  }
986
987
  // pin the indices to legal values
988
0
  pinIndices(start, length);
989
990
0
  if(length <= targetCapacity) {
991
0
    u_UCharsToChars(getArrayStart() + start, target, length);
992
0
  }
993
0
  UErrorCode status = U_ZERO_ERROR;
994
0
  return u_terminateChars(target, targetCapacity, length, &status);
995
0
}
996
997
UnicodeString
998
0
UnicodeString::tempSubString(int32_t start, int32_t len) const {
999
0
  pinIndices(start, len);
1000
0
  const char16_t *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
1001
0
  if(array==nullptr) {
1002
0
    array=fUnion.fStackFields.fBuffer;  // anything not nullptr because that would make an empty string
1003
0
    len=-2;  // bogus result string
1004
0
  }
1005
0
  return UnicodeString(false, array + start, len);
1006
0
}
1007
1008
int32_t
1009
UnicodeString::toUTF8(int32_t start, int32_t len,
1010
0
                      char *target, int32_t capacity) const {
1011
0
  pinIndices(start, len);
1012
0
  int32_t length8;
1013
0
  UErrorCode errorCode = U_ZERO_ERROR;
1014
0
  u_strToUTF8WithSub(target, capacity, &length8,
1015
0
                     getBuffer() + start, len,
1016
0
                     0xFFFD,  // Standard substitution character.
1017
0
                     nullptr,    // Don't care about number of substitutions.
1018
0
                     &errorCode);
1019
0
  return length8;
1020
0
}
1021
1022
#if U_CHARSET_IS_UTF8
1023
1024
int32_t
1025
UnicodeString::extract(int32_t start, int32_t len,
1026
0
                       char *target, uint32_t dstSize) const {
1027
  // if the arguments are illegal, then do nothing
1028
0
  if (/*dstSize < 0 || */(dstSize > 0 && target == nullptr)) {
1029
0
    return 0;
1030
0
  }
1031
0
  return toUTF8(start, len, target, dstSize <= 0x7fffffff ? static_cast<int32_t>(dstSize) : 0x7fffffff);
1032
0
}
1033
1034
// else see unistr_cnv.cpp
1035
#endif
1036
1037
void 
1038
UnicodeString::extractBetween(int32_t start,
1039
                  int32_t limit,
1040
0
                  UnicodeString& target) const {
1041
0
  pinIndex(start);
1042
0
  pinIndex(limit);
1043
0
  doExtract(start, limit - start, target);
1044
0
}
1045
1046
// When converting from UTF-16 to UTF-8, the result will have at most 3 times
1047
// as many bytes as the source has UChars.
1048
// The "worst cases" are writing systems like Indic, Thai and CJK with
1049
// 3:1 bytes:UChars.
1050
void
1051
0
UnicodeString::toUTF8(ByteSink &sink) const {
1052
0
  int32_t length16 = length();
1053
0
  if(length16 != 0) {
1054
0
    char stackBuffer[1024];
1055
0
    int32_t capacity = static_cast<int32_t>(sizeof(stackBuffer));
1056
0
    UBool utf8IsOwned = false;
1057
0
    char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
1058
0
                                      3*length16,
1059
0
                                      stackBuffer, capacity,
1060
0
                                      &capacity);
1061
0
    int32_t length8 = 0;
1062
0
    UErrorCode errorCode = U_ZERO_ERROR;
1063
0
    u_strToUTF8WithSub(utf8, capacity, &length8,
1064
0
                       getBuffer(), length16,
1065
0
                       0xFFFD,  // Standard substitution character.
1066
0
                       nullptr,    // Don't care about number of substitutions.
1067
0
                       &errorCode);
1068
0
    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
1069
0
      utf8 = static_cast<char*>(uprv_malloc(length8));
1070
0
      if(utf8 != nullptr) {
1071
0
        utf8IsOwned = true;
1072
0
        errorCode = U_ZERO_ERROR;
1073
0
        u_strToUTF8WithSub(utf8, length8, &length8,
1074
0
                           getBuffer(), length16,
1075
0
                           0xFFFD,  // Standard substitution character.
1076
0
                           nullptr,    // Don't care about number of substitutions.
1077
0
                           &errorCode);
1078
0
      } else {
1079
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1080
0
      }
1081
0
    }
1082
0
    if(U_SUCCESS(errorCode)) {
1083
0
      sink.Append(utf8, length8);
1084
0
      sink.Flush();
1085
0
    }
1086
0
    if(utf8IsOwned) {
1087
0
      uprv_free(utf8);
1088
0
    }
1089
0
  }
1090
0
}
1091
1092
int32_t
1093
0
UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
1094
0
  int32_t length32=0;
1095
0
  if(U_SUCCESS(errorCode)) {
1096
    // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1097
0
    u_strToUTF32WithSub(utf32, capacity, &length32,
1098
0
        getBuffer(), length(),
1099
0
        0xfffd,  // Substitution character.
1100
0
        nullptr,    // Don't care about number of substitutions.
1101
0
        &errorCode);
1102
0
  }
1103
0
  return length32;
1104
0
}
1105
1106
int32_t 
1107
UnicodeString::indexOf(const char16_t *srcChars,
1108
               int32_t srcStart,
1109
               int32_t srcLength,
1110
               int32_t start,
1111
               int32_t length) const
1112
0
{
1113
0
  if (isBogus() || srcChars == nullptr || srcStart < 0 || srcLength == 0) {
1114
0
    return -1;
1115
0
  }
1116
1117
  // UnicodeString does not find empty substrings
1118
0
  if(srcLength < 0 && srcChars[srcStart] == 0) {
1119
0
    return -1;
1120
0
  }
1121
1122
  // get the indices within bounds
1123
0
  pinIndices(start, length);
1124
1125
  // find the first occurrence of the substring
1126
0
  const char16_t *array = getArrayStart();
1127
0
  const char16_t *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
1128
0
  if(match == nullptr) {
1129
0
    return -1;
1130
0
  } else {
1131
0
    return static_cast<int32_t>(match - array);
1132
0
  }
1133
0
}
1134
1135
int32_t
1136
UnicodeString::doIndexOf(char16_t c,
1137
             int32_t start,
1138
             int32_t length) const
1139
0
{
1140
  // pin indices
1141
0
  pinIndices(start, length);
1142
1143
  // find the first occurrence of c
1144
0
  const char16_t *array = getArrayStart();
1145
0
  const char16_t *match = u_memchr(array + start, c, length);
1146
0
  if(match == nullptr) {
1147
0
    return -1;
1148
0
  } else {
1149
0
    return static_cast<int32_t>(match - array);
1150
0
  }
1151
0
}
1152
1153
int32_t
1154
UnicodeString::doIndexOf(UChar32 c,
1155
                         int32_t start,
1156
0
                         int32_t length) const {
1157
  // pin indices
1158
0
  pinIndices(start, length);
1159
1160
  // find the first occurrence of c
1161
0
  const char16_t *array = getArrayStart();
1162
0
  const char16_t *match = u_memchr32(array + start, c, length);
1163
0
  if(match == nullptr) {
1164
0
    return -1;
1165
0
  } else {
1166
0
    return static_cast<int32_t>(match - array);
1167
0
  }
1168
0
}
1169
1170
int32_t 
1171
UnicodeString::lastIndexOf(const char16_t *srcChars,
1172
               int32_t srcStart,
1173
               int32_t srcLength,
1174
               int32_t start,
1175
               int32_t length) const
1176
0
{
1177
0
  if (isBogus() || srcChars == nullptr || srcStart < 0 || srcLength == 0) {
1178
0
    return -1;
1179
0
  }
1180
1181
  // UnicodeString does not find empty substrings
1182
0
  if(srcLength < 0 && srcChars[srcStart] == 0) {
1183
0
    return -1;
1184
0
  }
1185
1186
  // get the indices within bounds
1187
0
  pinIndices(start, length);
1188
1189
  // find the last occurrence of the substring
1190
0
  const char16_t *array = getArrayStart();
1191
0
  const char16_t *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1192
0
  if(match == nullptr) {
1193
0
    return -1;
1194
0
  } else {
1195
0
    return static_cast<int32_t>(match - array);
1196
0
  }
1197
0
}
1198
1199
int32_t
1200
UnicodeString::doLastIndexOf(char16_t c,
1201
                 int32_t start,
1202
                 int32_t length) const
1203
0
{
1204
0
  if(isBogus()) {
1205
0
    return -1;
1206
0
  }
1207
1208
  // pin indices
1209
0
  pinIndices(start, length);
1210
1211
  // find the last occurrence of c
1212
0
  const char16_t *array = getArrayStart();
1213
0
  const char16_t *match = u_memrchr(array + start, c, length);
1214
0
  if(match == nullptr) {
1215
0
    return -1;
1216
0
  } else {
1217
0
    return static_cast<int32_t>(match - array);
1218
0
  }
1219
0
}
1220
1221
int32_t
1222
UnicodeString::doLastIndexOf(UChar32 c,
1223
                             int32_t start,
1224
0
                             int32_t length) const {
1225
  // pin indices
1226
0
  pinIndices(start, length);
1227
1228
  // find the last occurrence of c
1229
0
  const char16_t *array = getArrayStart();
1230
0
  const char16_t *match = u_memrchr32(array + start, c, length);
1231
0
  if(match == nullptr) {
1232
0
    return -1;
1233
0
  } else {
1234
0
    return static_cast<int32_t>(match - array);
1235
0
  }
1236
0
}
1237
1238
//========================================
1239
// Write implementation
1240
//========================================
1241
1242
UnicodeString& 
1243
UnicodeString::findAndReplace(int32_t start,
1244
                  int32_t length,
1245
                  const UnicodeString& oldText,
1246
                  int32_t oldStart,
1247
                  int32_t oldLength,
1248
                  const UnicodeString& newText,
1249
                  int32_t newStart,
1250
                  int32_t newLength)
1251
0
{
1252
0
  if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1253
0
    return *this;
1254
0
  }
1255
1256
0
  pinIndices(start, length);
1257
0
  oldText.pinIndices(oldStart, oldLength);
1258
0
  newText.pinIndices(newStart, newLength);
1259
1260
0
  if(oldLength == 0) {
1261
0
    return *this;
1262
0
  }
1263
1264
0
  while(length > 0 && length >= oldLength) {
1265
0
    int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1266
0
    if(pos < 0) {
1267
      // no more oldText's here: done
1268
0
      break;
1269
0
    } else {
1270
      // we found oldText, replace it by newText and go beyond it
1271
0
      replace(pos, oldLength, newText, newStart, newLength);
1272
0
      length -= pos + oldLength - start;
1273
0
      start = pos + newLength;
1274
0
    }
1275
0
  }
1276
1277
0
  return *this;
1278
0
}
1279
1280
1281
void
1282
UnicodeString::setToBogus()
1283
0
{
1284
0
  releaseArray();
1285
1286
0
  fUnion.fFields.fLengthAndFlags = kIsBogus;
1287
0
  fUnion.fFields.fArray = nullptr;
1288
0
  fUnion.fFields.fCapacity = 0;
1289
0
}
1290
1291
// turn a bogus string into an empty one
1292
void
1293
0
UnicodeString::unBogus() {
1294
0
  if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
1295
0
    setToEmpty();
1296
0
  }
1297
0
}
1298
1299
const char16_t *
1300
0
UnicodeString::getTerminatedBuffer() {
1301
0
  if(!isWritable()) {
1302
0
    return nullptr;
1303
0
  }
1304
0
  char16_t *array = getArrayStart();
1305
0
  int32_t len = length();
1306
0
  if(len < getCapacity()) {
1307
0
    if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
1308
      // If len<capacity on a read-only alias, then array[len] is
1309
      // either the original NUL (if constructed with (true, s, length))
1310
      // or one of the original string contents characters (if later truncated),
1311
      // therefore we can assume that array[len] is initialized memory.
1312
0
      if(array[len] == 0) {
1313
0
        return array;
1314
0
      }
1315
0
    } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
1316
      // kRefCounted: Do not write the NUL if the buffer is shared.
1317
      // That is mostly safe, except when the length of one copy was modified
1318
      // without copy-on-write, e.g., via truncate(newLength) or remove().
1319
      // Then the NUL would be written into the middle of another copy's string.
1320
1321
      // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1322
      // Do not test if there is a NUL already because it might be uninitialized memory.
1323
      // (That would be safe, but tools like valgrind & Purify would complain.)
1324
0
      array[len] = 0;
1325
0
      return array;
1326
0
    }
1327
0
  }
1328
0
  if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
1329
0
    array = getArrayStart();
1330
0
    array[len] = 0;
1331
0
    return array;
1332
0
  } else {
1333
0
    return nullptr;
1334
0
  }
1335
0
}
1336
1337
// setTo() analogous to the readonly-aliasing constructor with the same signature
1338
UnicodeString &
1339
UnicodeString::setTo(UBool isTerminated,
1340
                     ConstChar16Ptr textPtr,
1341
                     int32_t textLength)
1342
0
{
1343
0
  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1344
    // do not modify a string that has an "open" getBuffer(minCapacity)
1345
0
    return *this;
1346
0
  }
1347
1348
0
  const char16_t *text = textPtr;
1349
0
  if(text == nullptr) {
1350
    // treat as an empty string, do not alias
1351
0
    releaseArray();
1352
0
    setToEmpty();
1353
0
    return *this;
1354
0
  }
1355
1356
0
  if( textLength < -1 ||
1357
0
      (textLength == -1 && !isTerminated) ||
1358
0
      (textLength >= 0 && isTerminated && text[textLength] != 0)
1359
0
  ) {
1360
0
    setToBogus();
1361
0
    return *this;
1362
0
  }
1363
1364
0
  releaseArray();
1365
1366
0
  if(textLength == -1) {
1367
    // text is terminated, or else it would have failed the above test
1368
0
    textLength = u_strlen(text);
1369
0
  }
1370
0
  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
1371
0
  setArray(const_cast<char16_t*>(text), textLength, isTerminated ? textLength + 1 : textLength);
1372
0
  return *this;
1373
0
}
1374
1375
// setTo() analogous to the writable-aliasing constructor with the same signature
1376
UnicodeString &
1377
UnicodeString::setTo(char16_t *buffer,
1378
                     int32_t buffLength,
1379
0
                     int32_t buffCapacity) {
1380
0
  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1381
    // do not modify a string that has an "open" getBuffer(minCapacity)
1382
0
    return *this;
1383
0
  }
1384
1385
0
  if(buffer == nullptr) {
1386
    // treat as an empty string, do not alias
1387
0
    releaseArray();
1388
0
    setToEmpty();
1389
0
    return *this;
1390
0
  }
1391
1392
0
  if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1393
0
    setToBogus();
1394
0
    return *this;
1395
0
  } else if(buffLength == -1) {
1396
    // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1397
0
    const char16_t *p = buffer, *limit = buffer + buffCapacity;
1398
0
    while(p != limit && *p != 0) {
1399
0
      ++p;
1400
0
    }
1401
0
    buffLength = static_cast<int32_t>(p - buffer);
1402
0
  }
1403
1404
0
  releaseArray();
1405
1406
0
  fUnion.fFields.fLengthAndFlags = kWritableAlias;
1407
0
  setArray(buffer, buffLength, buffCapacity);
1408
0
  return *this;
1409
0
}
1410
1411
0
UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
1412
0
  unBogus();
1413
0
  int32_t length = utf8.length();
1414
0
  int32_t capacity;
1415
  // The UTF-16 string will be at most as long as the UTF-8 string.
1416
0
  if(length <= US_STACKBUF_SIZE) {
1417
0
    capacity = US_STACKBUF_SIZE;
1418
0
  } else {
1419
0
    capacity = length + 1;  // +1 for the terminating NUL.
1420
0
  }
1421
0
  char16_t *utf16 = getBuffer(capacity);
1422
0
  int32_t length16;
1423
0
  UErrorCode errorCode = U_ZERO_ERROR;
1424
0
  u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1425
0
      utf8.data(), length,
1426
0
      0xfffd,  // Substitution character.
1427
0
      nullptr,    // Don't care about number of substitutions.
1428
0
      &errorCode);
1429
0
  releaseBuffer(length16);
1430
0
  if(U_FAILURE(errorCode)) {
1431
0
    setToBogus();
1432
0
  }
1433
0
  return *this;
1434
0
}
1435
1436
UnicodeString&
1437
UnicodeString::setCharAt(int32_t offset,
1438
             char16_t c)
1439
0
{
1440
0
  int32_t len = length();
1441
0
  if(cloneArrayIfNeeded() && len > 0) {
1442
0
    if(offset < 0) {
1443
0
      offset = 0;
1444
0
    } else if(offset >= len) {
1445
0
      offset = len - 1;
1446
0
    }
1447
1448
0
    getArrayStart()[offset] = c;
1449
0
  }
1450
0
  return *this;
1451
0
}
1452
1453
UnicodeString&
1454
UnicodeString::replace(int32_t start,
1455
               int32_t _length,
1456
0
               UChar32 srcChar) {
1457
0
  char16_t buffer[U16_MAX_LENGTH];
1458
0
  int32_t count = 0;
1459
0
  UBool isError = false;
1460
0
  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1461
  // We test isError so that the compiler does not complain that we don't.
1462
  // If isError (srcChar is not a valid code point) then count==0 which means
1463
  // we remove the source segment rather than replacing it with srcChar.
1464
0
  return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1465
0
}
1466
1467
UnicodeString&
1468
0
UnicodeString::append(UChar32 srcChar) {
1469
0
  char16_t buffer[U16_MAX_LENGTH];
1470
0
  int32_t _length = 0;
1471
0
  UBool isError = false;
1472
0
  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1473
  // We test isError so that the compiler does not complain that we don't.
1474
  // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1475
0
  return isError ? *this : doAppend(buffer, 0, _length);
1476
0
}
1477
1478
UnicodeString&
1479
UnicodeString::doReplace( int32_t start,
1480
              int32_t length,
1481
              const UnicodeString& src,
1482
              int32_t srcStart,
1483
              int32_t srcLength)
1484
0
{
1485
  // pin the indices to legal values
1486
0
  src.pinIndices(srcStart, srcLength);
1487
1488
  // get the characters from src
1489
  // and replace the range in ourselves with them
1490
0
  return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1491
0
}
1492
1493
UnicodeString&
1494
UnicodeString::doReplace(int32_t start,
1495
             int32_t length,
1496
             const char16_t *srcChars,
1497
             int32_t srcStart,
1498
             int32_t srcLength)
1499
0
{
1500
0
  if(!isWritable()) {
1501
0
    return *this;
1502
0
  }
1503
1504
0
  int32_t oldLength = this->length();
1505
1506
  // optimize (read-only alias).remove(0, start) and .remove(start, end)
1507
0
  if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
1508
0
    if(start == 0) {
1509
      // remove prefix by adjusting the array pointer
1510
0
      pinIndex(length);
1511
0
      fUnion.fFields.fArray += length;
1512
0
      fUnion.fFields.fCapacity -= length;
1513
0
      setLength(oldLength - length);
1514
0
      return *this;
1515
0
    } else {
1516
0
      pinIndex(start);
1517
0
      if(length >= (oldLength - start)) {
1518
        // remove suffix by reducing the length (like truncate())
1519
0
        setLength(start);
1520
0
        fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1521
0
        return *this;
1522
0
      }
1523
0
    }
1524
0
  }
1525
1526
0
  if(start == oldLength) {
1527
0
    return doAppend(srcChars, srcStart, srcLength);
1528
0
  }
1529
1530
0
  if (srcChars == nullptr) {
1531
0
    srcLength = 0;
1532
0
  } else {
1533
    // Perform all remaining operations relative to srcChars + srcStart.
1534
    // From this point forward, do not use srcStart.
1535
0
    srcChars += srcStart;
1536
0
    if (srcLength < 0) {
1537
      // get the srcLength if necessary
1538
0
      srcLength = u_strlen(srcChars);
1539
0
    }
1540
0
  }
1541
1542
  // pin the indices to legal values
1543
0
  pinIndices(start, length);
1544
1545
  // Calculate the size of the string after the replace.
1546
  // Avoid int32_t overflow.
1547
0
  int32_t newLength = oldLength - length;
1548
0
  if(srcLength > (INT32_MAX - newLength)) {
1549
0
    setToBogus();
1550
0
    return *this;
1551
0
  }
1552
0
  newLength += srcLength;
1553
1554
  // Check for insertion into ourself
1555
0
  const char16_t *oldArray = getArrayStart();
1556
0
  if (isBufferWritable() &&
1557
0
      oldArray < srcChars + srcLength &&
1558
0
      srcChars < oldArray + oldLength) {
1559
    // Copy into a new UnicodeString and start over
1560
0
    UnicodeString copy(srcChars, srcLength);
1561
0
    if (copy.isBogus()) {
1562
0
      setToBogus();
1563
0
      return *this;
1564
0
    }
1565
0
    return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
1566
0
  }
1567
1568
  // cloneArrayIfNeeded(doCopyArray=false) may change fArray but will not copy the current contents;
1569
  // therefore we need to keep the current fArray
1570
0
  char16_t oldStackBuffer[US_STACKBUF_SIZE];
1571
0
  if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1572
    // copy the stack buffer contents because it will be overwritten with
1573
    // fUnion.fFields values
1574
0
    u_memcpy(oldStackBuffer, oldArray, oldLength);
1575
0
    oldArray = oldStackBuffer;
1576
0
  }
1577
1578
  // clone our array and allocate a bigger array if needed
1579
0
  int32_t *bufferToDelete = nullptr;
1580
0
  if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
1581
0
                         false, &bufferToDelete)
1582
0
  ) {
1583
0
    return *this;
1584
0
  }
1585
1586
  // now do the replace
1587
1588
0
  char16_t *newArray = getArrayStart();
1589
0
  if(newArray != oldArray) {
1590
    // if fArray changed, then we need to copy everything except what will change
1591
0
    us_arrayCopy(oldArray, 0, newArray, 0, start);
1592
0
    us_arrayCopy(oldArray, start + length,
1593
0
                 newArray, start + srcLength,
1594
0
                 oldLength - (start + length));
1595
0
  } else if(length != srcLength) {
1596
    // fArray did not change; copy only the portion that isn't changing, leaving a hole
1597
0
    us_arrayCopy(oldArray, start + length,
1598
0
                 newArray, start + srcLength,
1599
0
                 oldLength - (start + length));
1600
0
  }
1601
1602
  // now fill in the hole with the new string
1603
0
  us_arrayCopy(srcChars, 0, newArray, start, srcLength);
1604
1605
0
  setLength(newLength);
1606
1607
  // delayed delete in case srcChars == fArray when we started, and
1608
  // to keep oldArray alive for the above operations
1609
0
  if (bufferToDelete) {
1610
0
    uprv_free(bufferToDelete);
1611
0
  }
1612
1613
0
  return *this;
1614
0
}
1615
1616
UnicodeString&
1617
0
UnicodeString::doReplace(int32_t start, int32_t length, std::u16string_view src) {
1618
0
  if (!isWritable()) {
1619
0
    return *this;
1620
0
  }
1621
0
  if (src.length() > INT32_MAX) {
1622
0
    setToBogus();
1623
0
    return *this;
1624
0
  }
1625
0
  return doReplace(start, length, src.data(), 0, static_cast<int32_t>(src.length()));
1626
0
}
1627
1628
// Versions of doReplace() only for append() variants.
1629
// doReplace() and doAppend() optimize for different cases.
1630
1631
UnicodeString&
1632
0
UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1633
0
  if(srcLength == 0) {
1634
0
    return *this;
1635
0
  }
1636
1637
  // pin the indices to legal values
1638
0
  src.pinIndices(srcStart, srcLength);
1639
0
  return doAppend(src.getArrayStart(), srcStart, srcLength);
1640
0
}
1641
1642
UnicodeString&
1643
0
UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) {
1644
0
  if(!isWritable() || srcLength == 0 || srcChars == nullptr) {
1645
0
    return *this;
1646
0
  }
1647
1648
  // Perform all remaining operations relative to srcChars + srcStart.
1649
  // From this point forward, do not use srcStart.
1650
0
  srcChars += srcStart;
1651
1652
0
  if(srcLength < 0) {
1653
    // get the srcLength if necessary
1654
0
    if((srcLength = u_strlen(srcChars)) == 0) {
1655
0
      return *this;
1656
0
    }
1657
0
  }
1658
1659
0
  int32_t oldLength = length();
1660
0
  int32_t newLength;
1661
1662
0
  if (srcLength <= getCapacity() - oldLength && isBufferWritable()) {
1663
0
    newLength = oldLength + srcLength;
1664
    // Faster than a memmove
1665
0
    if (srcLength <= 4) {
1666
0
      char16_t *arr = getArrayStart();
1667
0
      arr[oldLength] = srcChars[0];
1668
0
      if (srcLength > 1) arr[oldLength+1] = srcChars[1];
1669
0
      if (srcLength > 2) arr[oldLength+2] = srcChars[2];
1670
0
      if (srcLength > 3) arr[oldLength+3] = srcChars[3];
1671
0
      setLength(newLength);
1672
0
      return *this;
1673
0
    }
1674
0
  } else {
1675
0
    if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
1676
0
      setToBogus();
1677
0
      return *this;
1678
0
    }
1679
1680
    // Check for append onto ourself
1681
0
    const char16_t* oldArray = getArrayStart();
1682
0
    if (isBufferWritable() &&
1683
0
        oldArray < srcChars + srcLength &&
1684
0
        srcChars < oldArray + oldLength) {
1685
      // Copy into a new UnicodeString and start over
1686
0
      UnicodeString copy(srcChars, srcLength);
1687
0
      if (copy.isBogus()) {
1688
0
        setToBogus();
1689
0
        return *this;
1690
0
      }
1691
0
      return doAppend(copy.getArrayStart(), 0, srcLength);
1692
0
    }
1693
1694
    // optimize append() onto a large-enough, owned string
1695
0
    if (!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
1696
0
      return *this;
1697
0
    }
1698
0
  }
1699
1700
0
  char16_t *newArray = getArrayStart();
1701
  // Do not copy characters when
1702
  //   char16_t *buffer=str.getAppendBuffer(...);
1703
  // is followed by
1704
  //   str.append(buffer, length);
1705
  // or
1706
  //   str.appendString(buffer, length)
1707
  // or similar.
1708
0
  if(srcChars != newArray + oldLength) {
1709
0
    us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
1710
0
  }
1711
0
  setLength(newLength);
1712
1713
0
  return *this;
1714
0
}
1715
1716
UnicodeString&
1717
0
UnicodeString::doAppend(std::u16string_view src) {
1718
0
  if (!isWritable() || src.empty()) {
1719
0
    return *this;
1720
0
  }
1721
0
  if (src.length() > INT32_MAX) {
1722
0
    setToBogus();
1723
0
    return *this;
1724
0
  }
1725
0
  return doAppend(src.data(), 0, static_cast<int32_t>(src.length()));
1726
0
}
1727
1728
/**
1729
 * Replaceable API
1730
 */
1731
void
1732
UnicodeString::handleReplaceBetween(int32_t start,
1733
                                    int32_t limit,
1734
0
                                    const UnicodeString& text) {
1735
0
    replaceBetween(start, limit, text);
1736
0
}
1737
1738
/**
1739
 * Replaceable API
1740
 */
1741
void 
1742
0
UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1743
0
    if (limit <= start) {
1744
0
        return; // Nothing to do; avoid bogus malloc call
1745
0
    }
1746
0
    char16_t* text = static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * (limit - start)));
1747
    // Check to make sure text is not null.
1748
0
    if (text != nullptr) {
1749
0
      extractBetween(start, limit, text, 0);
1750
0
      insert(dest, text, 0, limit - start);    
1751
0
      uprv_free(text);
1752
0
    }
1753
0
}
1754
1755
/**
1756
 * Replaceable API
1757
 *
1758
 * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1759
 * so we implement this function here.
1760
 */
1761
0
UBool Replaceable::hasMetaData() const {
1762
0
    return true;
1763
0
}
1764
1765
/**
1766
 * Replaceable API
1767
 */
1768
0
UBool UnicodeString::hasMetaData() const {
1769
0
    return false;
1770
0
}
1771
1772
UnicodeString&
1773
0
UnicodeString::doReverse(int32_t start, int32_t length) {
1774
0
  if(length <= 1 || !cloneArrayIfNeeded()) {
1775
0
    return *this;
1776
0
  }
1777
1778
  // pin the indices to legal values
1779
0
  pinIndices(start, length);
1780
0
  if(length <= 1) {  // pinIndices() might have shrunk the length
1781
0
    return *this;
1782
0
  }
1783
1784
0
  char16_t *left = getArrayStart() + start;
1785
0
  char16_t *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1786
0
  char16_t swap;
1787
0
  UBool hasSupplementary = false;
1788
1789
  // Before the loop we know left<right because length>=2.
1790
0
  do {
1791
0
    hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(swap = *left));
1792
0
    hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left++ = *right));
1793
0
    *right-- = swap;
1794
0
  } while(left < right);
1795
  // Make sure to test the middle code unit of an odd-length string.
1796
  // Redundant if the length is even.
1797
0
  hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left));
1798
1799
  /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1800
0
  if(hasSupplementary) {
1801
0
    char16_t swap2;
1802
1803
0
    left = getArrayStart() + start;
1804
0
    right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1805
0
    while(left < right) {
1806
0
      if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1807
0
        *left++ = swap2;
1808
0
        *left++ = swap;
1809
0
      } else {
1810
0
        ++left;
1811
0
      }
1812
0
    }
1813
0
  }
1814
1815
0
  return *this;
1816
0
}
1817
1818
UBool 
1819
UnicodeString::padLeading(int32_t targetLength,
1820
                          char16_t padChar)
1821
0
{
1822
0
  int32_t oldLength = length();
1823
0
  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1824
0
    return false;
1825
0
  } else {
1826
    // move contents up by padding width
1827
0
    char16_t *array = getArrayStart();
1828
0
    int32_t start = targetLength - oldLength;
1829
0
    us_arrayCopy(array, 0, array, start, oldLength);
1830
1831
    // fill in padding character
1832
0
    while(--start >= 0) {
1833
0
      array[start] = padChar;
1834
0
    }
1835
0
    setLength(targetLength);
1836
0
    return true;
1837
0
  }
1838
0
}
1839
1840
UBool 
1841
UnicodeString::padTrailing(int32_t targetLength,
1842
                           char16_t padChar)
1843
0
{
1844
0
  int32_t oldLength = length();
1845
0
  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1846
0
    return false;
1847
0
  } else {
1848
    // fill in padding character
1849
0
    char16_t *array = getArrayStart();
1850
0
    int32_t length = targetLength;
1851
0
    while(--length >= oldLength) {
1852
0
      array[length] = padChar;
1853
0
    }
1854
0
    setLength(targetLength);
1855
0
    return true;
1856
0
  }
1857
0
}
1858
1859
//========================================
1860
// Hashing
1861
//========================================
1862
int32_t
1863
UnicodeString::doHashCode() const
1864
0
{
1865
    /* Delegate hash computation to uhash.  This makes UnicodeString
1866
     * hashing consistent with char16_t* hashing.  */
1867
0
    int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1868
0
    if (hashCode == kInvalidHashCode) {
1869
0
        hashCode = kEmptyHashCode;
1870
0
    }
1871
0
    return hashCode;
1872
0
}
1873
1874
//========================================
1875
// External Buffer
1876
//========================================
1877
1878
char16_t *
1879
0
UnicodeString::getBuffer(int32_t minCapacity) {
1880
0
  if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1881
0
    fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
1882
0
    setZeroLength();
1883
0
    return getArrayStart();
1884
0
  } else {
1885
0
    return nullptr;
1886
0
  }
1887
0
}
1888
1889
void
1890
0
UnicodeString::releaseBuffer(int32_t newLength) {
1891
0
  if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
1892
    // set the new fLength
1893
0
    int32_t capacity=getCapacity();
1894
0
    if(newLength==-1) {
1895
      // the new length is the string length, capped by fCapacity
1896
0
      const char16_t *array=getArrayStart(), *p=array, *limit=array+capacity;
1897
0
      while(p<limit && *p!=0) {
1898
0
        ++p;
1899
0
      }
1900
0
      newLength = static_cast<int32_t>(p - array);
1901
0
    } else if(newLength>capacity) {
1902
0
      newLength=capacity;
1903
0
    }
1904
0
    setLength(newLength);
1905
0
    fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
1906
0
  }
1907
0
}
1908
1909
//========================================
1910
// Miscellaneous
1911
//========================================
1912
UBool
1913
UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1914
                                  int32_t growCapacity,
1915
                                  UBool doCopyArray,
1916
                                  int32_t **pBufferToDelete,
1917
0
                                  UBool forceClone) {
1918
  // default parameters need to be static, therefore
1919
  // the defaults are -1 to have convenience defaults
1920
0
  if(newCapacity == -1) {
1921
0
    newCapacity = getCapacity();
1922
0
  }
1923
1924
  // while a getBuffer(minCapacity) is "open",
1925
  // prevent any modifications of the string by returning false here
1926
  // if the string is bogus, then only an assignment or similar can revive it
1927
0
  if(!isWritable()) {
1928
0
    return false;
1929
0
  }
1930
1931
  /*
1932
   * We need to make a copy of the array if
1933
   * the buffer is read-only, or
1934
   * the buffer is refCounted (shared), and refCount>1, or
1935
   * the buffer is too small.
1936
   * Return false if memory could not be allocated.
1937
   */
1938
0
  if(forceClone ||
1939
0
     fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
1940
0
     (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
1941
0
     newCapacity > getCapacity()
1942
0
  ) {
1943
    // check growCapacity for default value and use of the stack buffer
1944
0
    if(growCapacity < 0) {
1945
0
      growCapacity = newCapacity;
1946
0
    } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1947
0
      growCapacity = US_STACKBUF_SIZE;
1948
0
    } else if(newCapacity > growCapacity) {
1949
0
      setToBogus();
1950
0
      return false;  // bad inputs
1951
0
    }
1952
0
    if(growCapacity > kMaxCapacity) {
1953
0
      setToBogus();
1954
0
      return false;
1955
0
    }
1956
1957
    // save old values
1958
0
    char16_t oldStackBuffer[US_STACKBUF_SIZE];
1959
0
    char16_t *oldArray;
1960
0
    int32_t oldLength = length();
1961
0
    int16_t flags = fUnion.fFields.fLengthAndFlags;
1962
1963
0
    if(flags&kUsingStackBuffer) {
1964
0
      U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1965
0
      if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1966
        // copy the stack buffer contents because it will be overwritten with
1967
        // fUnion.fFields values
1968
0
        us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
1969
0
        oldArray = oldStackBuffer;
1970
0
      } else {
1971
0
        oldArray = nullptr; // no need to copy from the stack buffer to itself
1972
0
      }
1973
0
    } else {
1974
0
      oldArray = fUnion.fFields.fArray;
1975
0
      U_ASSERT(oldArray!=nullptr); /* when stack buffer is not used, oldArray must have a non-nullptr reference */
1976
0
    }
1977
1978
    // allocate a new array
1979
0
    if(allocate(growCapacity) ||
1980
0
       (newCapacity < growCapacity && allocate(newCapacity))
1981
0
    ) {
1982
0
      if(doCopyArray) {
1983
        // copy the contents
1984
        // do not copy more than what fits - it may be smaller than before
1985
0
        int32_t minLength = oldLength;
1986
0
        newCapacity = getCapacity();
1987
0
        if(newCapacity < minLength) {
1988
0
          minLength = newCapacity;
1989
0
        }
1990
0
        if(oldArray != nullptr) {
1991
0
          us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1992
0
        }
1993
0
        setLength(minLength);
1994
0
      } else {
1995
0
        setZeroLength();
1996
0
      }
1997
1998
      // release the old array
1999
0
      if(flags & kRefCounted) {
2000
        // the array is refCounted; decrement and release if 0
2001
0
        u_atomic_int32_t* pRefCount = reinterpret_cast<u_atomic_int32_t*>(oldArray) - 1;
2002
0
        if(umtx_atomic_dec(pRefCount) == 0) {
2003
0
          if (pBufferToDelete == nullptr) {
2004
              // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
2005
              // is defined as volatile. (Volatile has useful non-standard behavior
2006
              //   with this compiler.)
2007
0
            uprv_free((void *)pRefCount);
2008
0
          } else {
2009
            // the caller requested to delete it himself
2010
0
            *pBufferToDelete = reinterpret_cast<int32_t*>(pRefCount);
2011
0
          }
2012
0
        }
2013
0
      }
2014
0
    } else {
2015
      // not enough memory for growCapacity and not even for the smaller newCapacity
2016
      // reset the old values for setToBogus() to release the array
2017
0
      if(!(flags&kUsingStackBuffer)) {
2018
0
        fUnion.fFields.fArray = oldArray;
2019
0
      }
2020
0
      fUnion.fFields.fLengthAndFlags = flags;
2021
0
      setToBogus();
2022
0
      return false;
2023
0
    }
2024
0
  }
2025
0
  return true;
2026
0
}
2027
2028
// UnicodeStringAppendable ------------------------------------------------- ***
2029
2030
0
UnicodeStringAppendable::~UnicodeStringAppendable() {}
2031
2032
UBool
2033
0
UnicodeStringAppendable::appendCodeUnit(char16_t c) {
2034
0
  return str.doAppend(&c, 0, 1).isWritable();
2035
0
}
2036
2037
UBool
2038
0
UnicodeStringAppendable::appendCodePoint(UChar32 c) {
2039
0
  char16_t buffer[U16_MAX_LENGTH];
2040
0
  int32_t cLength = 0;
2041
0
  UBool isError = false;
2042
0
  U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
2043
0
  return !isError && str.doAppend(buffer, 0, cLength).isWritable();
2044
0
}
2045
2046
UBool
2047
0
UnicodeStringAppendable::appendString(const char16_t *s, int32_t length) {
2048
0
  return str.doAppend(s, 0, length).isWritable();
2049
0
}
2050
2051
UBool
2052
0
UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
2053
0
  return str.cloneArrayIfNeeded(str.length() + appendCapacity);
2054
0
}
2055
2056
char16_t *
2057
UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
2058
                                         int32_t desiredCapacityHint,
2059
                                         char16_t *scratch, int32_t scratchCapacity,
2060
0
                                         int32_t *resultCapacity) {
2061
0
  if(minCapacity < 1 || scratchCapacity < minCapacity) {
2062
0
    *resultCapacity = 0;
2063
0
    return nullptr;
2064
0
  }
2065
0
  int32_t oldLength = str.length();
2066
0
  if(minCapacity <= (kMaxCapacity - oldLength) &&
2067
0
      desiredCapacityHint <= (kMaxCapacity - oldLength) &&
2068
0
      str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
2069
0
    *resultCapacity = str.getCapacity() - oldLength;
2070
0
    return str.getArrayStart() + oldLength;
2071
0
  }
2072
0
  *resultCapacity = scratchCapacity;
2073
0
  return scratch;
2074
0
}
2075
2076
U_NAMESPACE_END
2077
2078
U_NAMESPACE_USE
2079
2080
U_CAPI int32_t U_EXPORT2
2081
0
uhash_hashUnicodeString(const UElement key) {
2082
0
    const UnicodeString *str = (const UnicodeString*) key.pointer;
2083
0
    return (str == nullptr) ? 0 : str->hashCode();
2084
0
}
2085
2086
// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
2087
// does not depend on hashtable code.
2088
U_CAPI UBool U_EXPORT2
2089
0
uhash_compareUnicodeString(const UElement key1, const UElement key2) {
2090
0
    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
2091
0
    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
2092
0
    if (str1 == str2) {
2093
0
        return true;
2094
0
    }
2095
0
    if (str1 == nullptr || str2 == nullptr) {
2096
0
        return false;
2097
0
    }
2098
0
    return *str1 == *str2;
2099
0
}
2100
2101
#ifdef U_STATIC_IMPLEMENTATION
2102
/*
2103
This should never be called. It is defined here to make sure that the
2104
virtual vector deleting destructor is defined within unistr.cpp.
2105
The vector deleting destructor is already a part of UObject,
2106
but defining it here makes sure that it is included with this object file.
2107
This makes sure that static library dependencies are kept to a minimum.
2108
*/
2109
#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100
2110
#pragma GCC diagnostic push
2111
#pragma GCC diagnostic ignored "-Wunused-function"
2112
static void uprv_UnicodeStringDummy() {
2113
    delete [] (new UnicodeString[2]);
2114
}
2115
#pragma GCC diagnostic pop
2116
#endif
2117
#endif