Coverage Report

Created: 2023-06-07 07:18

/src/icu/source/common/unistr.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
* Copyright (C) 1999-2016, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
******************************************************************************
8
*
9
* File unistr.cpp
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   09/25/98    stephen     Creation.
15
*   04/20/99    stephen     Overhauled per 4/16 code review.
16
*   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17
*   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
18
*                           Replaceable.
19
*   06/25/01    grhoten     Removed the dependency on iostream
20
******************************************************************************
21
*/
22
23
#include "unicode/utypes.h"
24
#include "unicode/appendable.h"
25
#include "unicode/putil.h"
26
#include "cstring.h"
27
#include "cmemory.h"
28
#include "unicode/ustring.h"
29
#include "unicode/unistr.h"
30
#include "unicode/utf.h"
31
#include "unicode/utf16.h"
32
#include "uelement.h"
33
#include "ustr_imp.h"
34
#include "umutex.h"
35
#include "uassert.h"
36
37
#if 0
38
39
#include <iostream>
40
using namespace std;
41
42
//DEBUGGING
43
void
44
print(const UnicodeString& s,
45
      const char *name)
46
{
47
  UChar c;
48
  cout << name << ":|";
49
  for(int i = 0; i < s.length(); ++i) {
50
    c = s[i];
51
    if(c>= 0x007E || c < 0x0020)
52
      cout << "[0x" << hex << s[i] << "]";
53
    else
54
      cout << (char) s[i];
55
  }
56
  cout << '|' << endl;
57
}
58
59
void
60
print(const UChar *s,
61
      int32_t len,
62
      const char *name)
63
{
64
  UChar c;
65
  cout << name << ":|";
66
  for(int i = 0; i < len; ++i) {
67
    c = s[i];
68
    if(c>= 0x007E || c < 0x0020)
69
      cout << "[0x" << hex << s[i] << "]";
70
    else
71
      cout << (char) s[i];
72
  }
73
  cout << '|' << endl;
74
}
75
// END DEBUGGING
76
#endif
77
78
// Local function definitions for now
79
80
// need to copy areas that may overlap
81
static
82
inline void
83
us_arrayCopy(const UChar *src, int32_t srcStart,
84
         UChar *dst, int32_t dstStart, int32_t count)
85
24.8M
{
86
24.8M
  if(count>0) {
87
7.32M
    uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
88
7.32M
  }
89
24.8M
}
90
91
// u_unescapeAt() callback to get a UChar from a UnicodeString
92
U_CDECL_BEGIN
93
static UChar U_CALLCONV
94
0
UnicodeString_charAt(int32_t offset, void *context) {
95
0
    return ((icu::UnicodeString*) context)->charAt(offset);
96
0
}
97
U_CDECL_END
98
99
U_NAMESPACE_BEGIN
100
101
/* The Replaceable virtual destructor can't be defined in the header
102
   due to how AIX works with multiple definitions of virtual functions.
103
*/
104
27.8M
Replaceable::~Replaceable() {}
105
106
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
107
108
UnicodeString U_EXPORT2
109
0
operator+ (const UnicodeString &s1, const UnicodeString &s2) {
110
0
    return
111
0
        UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
112
0
            append(s1).
113
0
                append(s2);
114
0
}
115
116
//========================================
117
// Reference Counting functions, put at top of file so that optimizing compilers
118
//                               have a chance to automatically inline.
119
//========================================
120
121
void
122
0
UnicodeString::addRef() {
123
0
  umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
124
0
}
125
126
int32_t
127
5.86M
UnicodeString::removeRef() {
128
5.86M
  return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
129
5.86M
}
130
131
int32_t
132
4.77k
UnicodeString::refCount() const {
133
4.77k
  return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
134
4.77k
}
135
136
void
137
28.2M
UnicodeString::releaseArray() {
138
28.2M
  if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
139
5.86M
    uprv_free((int32_t *)fUnion.fFields.fArray - 1);
140
5.86M
  }
141
28.2M
}
142
143
144
145
//========================================
146
// Constructors
147
//========================================
148
149
// The default constructor is inline in unistr.h.
150
151
0
UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
152
0
  fUnion.fFields.fLengthAndFlags = 0;
153
0
  if(count <= 0 || (uint32_t)c > 0x10ffff) {
154
    // just allocate and do not do anything else
155
0
    allocate(capacity);
156
0
  } else if(c <= 0xffff) {
157
0
    int32_t length = count;
158
0
    if(capacity < length) {
159
0
      capacity = length;
160
0
    }
161
0
    if(allocate(capacity)) {
162
0
      UChar *array = getArrayStart();
163
0
      UChar unit = (UChar)c;
164
0
      for(int32_t i = 0; i < length; ++i) {
165
0
        array[i] = unit;
166
0
      }
167
0
      setLength(length);
168
0
    }
169
0
  } else {  // supplementary code point, write surrogate pairs
170
0
    if(count > (INT32_MAX / 2)) {
171
      // We would get more than 2G UChars.
172
0
      allocate(capacity);
173
0
      return;
174
0
    }
175
0
    int32_t length = count * 2;
176
0
    if(capacity < length) {
177
0
      capacity = length;
178
0
    }
179
0
    if(allocate(capacity)) {
180
0
      UChar *array = getArrayStart();
181
0
      UChar lead = U16_LEAD(c);
182
0
      UChar trail = U16_TRAIL(c);
183
0
      for(int32_t i = 0; i < length; i += 2) {
184
0
        array[i] = lead;
185
0
        array[i + 1] = trail;
186
0
      }
187
0
      setLength(length);
188
0
    }
189
0
  }
190
0
}
191
192
0
UnicodeString::UnicodeString(UChar ch) {
193
0
  fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
194
0
  fUnion.fStackFields.fBuffer[0] = ch;
195
0
}
196
197
0
UnicodeString::UnicodeString(UChar32 ch) {
198
0
  fUnion.fFields.fLengthAndFlags = kShortString;
199
0
  int32_t i = 0;
200
0
  UBool isError = FALSE;
201
0
  U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
202
  // We test isError so that the compiler does not complain that we don't.
203
  // If isError then i==0 which is what we want anyway.
204
0
  if(!isError) {
205
0
    setShortLength(i);
206
0
  }
207
0
}
208
209
0
UnicodeString::UnicodeString(const UChar *text) {
210
0
  fUnion.fFields.fLengthAndFlags = kShortString;
211
0
  doAppend(text, 0, -1);
212
0
}
213
214
UnicodeString::UnicodeString(const UChar *text,
215
371k
                             int32_t textLength) {
216
371k
  fUnion.fFields.fLengthAndFlags = kShortString;
217
371k
  doAppend(text, 0, textLength);
218
371k
}
219
220
UnicodeString::UnicodeString(UBool isTerminated,
221
                             ConstChar16Ptr textPtr,
222
7.33M
                             int32_t textLength) {
223
7.33M
  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
224
7.33M
  const UChar *text = textPtr;
225
7.33M
  if(text == NULL) {
226
    // treat as an empty string, do not alias
227
0
    setToEmpty();
228
7.33M
  } else if(textLength < -1 ||
229
7.33M
            (textLength == -1 && !isTerminated) ||
230
7.33M
            (textLength >= 0 && isTerminated && text[textLength] != 0)
231
7.33M
  ) {
232
0
    setToBogus();
233
7.33M
  } else {
234
7.33M
    if(textLength == -1) {
235
      // text is terminated, or else it would have failed the above test
236
0
      textLength = u_strlen(text);
237
0
    }
238
7.33M
    setArray(const_cast<UChar *>(text), textLength,
239
7.33M
             isTerminated ? textLength + 1 : textLength);
240
7.33M
  }
241
7.33M
}
242
243
UnicodeString::UnicodeString(UChar *buff,
244
                             int32_t buffLength,
245
6.95M
                             int32_t buffCapacity) {
246
6.95M
  fUnion.fFields.fLengthAndFlags = kWritableAlias;
247
6.95M
  if(buff == NULL) {
248
    // treat as an empty string, do not alias
249
0
    setToEmpty();
250
6.95M
  } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
251
0
    setToBogus();
252
6.95M
  } else {
253
6.95M
    if(buffLength == -1) {
254
      // fLength = u_strlen(buff); but do not look beyond buffCapacity
255
0
      const UChar *p = buff, *limit = buff + buffCapacity;
256
0
      while(p != limit && *p != 0) {
257
0
        ++p;
258
0
      }
259
0
      buffLength = (int32_t)(p - buff);
260
0
    }
261
6.95M
    setArray(buff, buffLength, buffCapacity);
262
6.95M
  }
263
6.95M
}
264
265
0
UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
266
0
  fUnion.fFields.fLengthAndFlags = kShortString;
267
0
  if(src==NULL) {
268
    // treat as an empty string
269
0
  } else {
270
0
    if(length<0) {
271
0
      length=(int32_t)uprv_strlen(src);
272
0
    }
273
0
    if(cloneArrayIfNeeded(length, length, FALSE)) {
274
0
      u_charsToUChars(src, getArrayStart(), length);
275
0
      setLength(length);
276
0
    } else {
277
0
      setToBogus();
278
0
    }
279
0
  }
280
0
}
281
282
#if U_CHARSET_IS_UTF8
283
284
UnicodeString::UnicodeString(const char *codepageData) {
285
  fUnion.fFields.fLengthAndFlags = kShortString;
286
  if(codepageData != 0) {
287
    setToUTF8(codepageData);
288
  }
289
}
290
291
UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
292
  fUnion.fFields.fLengthAndFlags = kShortString;
293
  // if there's nothing to convert, do nothing
294
  if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
295
    return;
296
  }
297
  if(dataLength == -1) {
298
    dataLength = (int32_t)uprv_strlen(codepageData);
299
  }
300
  setToUTF8(StringPiece(codepageData, dataLength));
301
}
302
303
// else see unistr_cnv.cpp
304
#endif
305
306
0
UnicodeString::UnicodeString(const UnicodeString& that) {
307
0
  fUnion.fFields.fLengthAndFlags = kShortString;
308
0
  copyFrom(that);
309
0
}
310
311
#if U_HAVE_RVALUE_REFERENCES
312
0
UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
313
0
  fUnion.fFields.fLengthAndFlags = kShortString;
314
0
  moveFrom(src);
315
0
}
316
#endif
317
318
UnicodeString::UnicodeString(const UnicodeString& that,
319
0
                             int32_t srcStart) {
320
0
  fUnion.fFields.fLengthAndFlags = kShortString;
321
0
  setTo(that, srcStart);
322
0
}
323
324
UnicodeString::UnicodeString(const UnicodeString& that,
325
                             int32_t srcStart,
326
0
                             int32_t srcLength) {
327
0
  fUnion.fFields.fLengthAndFlags = kShortString;
328
0
  setTo(that, srcStart, srcLength);
329
0
}
330
331
// Replaceable base class clone() default implementation, does not clone
332
Replaceable *
333
0
Replaceable::clone() const {
334
0
  return NULL;
335
0
}
336
337
// UnicodeString overrides clone() with a real implementation
338
Replaceable *
339
0
UnicodeString::clone() const {
340
0
  return new UnicodeString(*this);
341
0
}
342
343
//========================================
344
// array allocation
345
//========================================
346
347
namespace {
348
349
const int32_t kGrowSize = 128;
350
351
// The number of bytes for one int32_t reference counter and capacity UChars
352
// must fit into a 32-bit size_t (at least when on a 32-bit platform).
353
// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
354
// and round up to a multiple of 16 bytes.
355
// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
356
// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
357
// but that does not seem worth it.)
358
const int32_t kMaxCapacity = 0x7ffffff5;
359
360
6.55M
int32_t getGrowCapacity(int32_t newLength) {
361
6.55M
  int32_t growSize = (newLength >> 2) + kGrowSize;
362
6.55M
  if(growSize <= (kMaxCapacity - newLength)) {
363
6.55M
    return newLength + growSize;
364
6.55M
  } else {
365
0
    return kMaxCapacity;
366
0
  }
367
6.55M
}
368
369
}  // namespace
370
371
UBool
372
5.86M
UnicodeString::allocate(int32_t capacity) {
373
5.86M
  if(capacity <= US_STACKBUF_SIZE) {
374
0
    fUnion.fFields.fLengthAndFlags = kShortString;
375
0
    return TRUE;
376
0
  }
377
5.86M
  if(capacity <= kMaxCapacity) {
378
5.86M
    ++capacity;  // for the NUL
379
    // Switch to size_t which is unsigned so that we can allocate up to 4GB.
380
    // Reference counter + UChars.
381
5.86M
    size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
382
    // Round up to a multiple of 16.
383
5.86M
    numBytes = (numBytes + 15) & ~15;
384
5.86M
    int32_t *array = (int32_t *) uprv_malloc(numBytes);
385
5.86M
    if(array != NULL) {
386
      // set initial refCount and point behind the refCount
387
5.86M
      *array++ = 1;
388
5.86M
      numBytes -= sizeof(int32_t);
389
390
      // have fArray point to the first UChar
391
5.86M
      fUnion.fFields.fArray = (UChar *)array;
392
5.86M
      fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
393
5.86M
      fUnion.fFields.fLengthAndFlags = kLongString;
394
5.86M
      return TRUE;
395
5.86M
    }
396
5.86M
  }
397
0
  fUnion.fFields.fLengthAndFlags = kIsBogus;
398
0
  fUnion.fFields.fArray = 0;
399
0
  fUnion.fFields.fCapacity = 0;
400
0
  return FALSE;
401
5.86M
}
402
403
//========================================
404
// Destructor
405
//========================================
406
407
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
408
static u_atomic_int32_t finalLengthCounts[0x400];  // UnicodeString::kMaxShortLength+1
409
static u_atomic_int32_t beyondCount(0);
410
411
U_CAPI void unistr_printLengths() {
412
  int32_t i;
413
  for(i = 0; i <= 59; ++i) {
414
    printf("%2d,  %9d\n", i, (int32_t)finalLengthCounts[i]);
415
  }
416
  int32_t beyond = beyondCount;
417
  for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
418
    beyond += finalLengthCounts[i];
419
  }
420
  printf(">59, %9d\n", beyond);
421
}
422
#endif
423
424
UnicodeString::~UnicodeString()
425
27.8M
{
426
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
427
  // Count lengths of strings at the end of their lifetime.
428
  // Useful for discussion of a desirable stack buffer size.
429
  // Count the contents length, not the optional NUL terminator nor further capacity.
430
  // Ignore open-buffer strings and strings which alias external storage.
431
  if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
432
    if(hasShortLength()) {
433
      umtx_atomic_inc(finalLengthCounts + getShortLength());
434
    } else {
435
      umtx_atomic_inc(&beyondCount);
436
    }
437
  }
438
#endif
439
440
27.8M
  releaseArray();
441
27.8M
}
442
443
//========================================
444
// Factory methods
445
//========================================
446
447
0
UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
448
0
  UnicodeString result;
449
0
  result.setToUTF8(utf8);
450
0
  return result;
451
0
}
452
453
0
UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
454
0
  UnicodeString result;
455
0
  int32_t capacity;
456
  // Most UTF-32 strings will be BMP-only and result in a same-length
457
  // UTF-16 string. We overestimate the capacity just slightly,
458
  // just in case there are a few supplementary characters.
459
0
  if(length <= US_STACKBUF_SIZE) {
460
0
    capacity = US_STACKBUF_SIZE;
461
0
  } else {
462
0
    capacity = length + (length >> 4) + 4;
463
0
  }
464
0
  do {
465
0
    UChar *utf16 = result.getBuffer(capacity);
466
0
    int32_t length16;
467
0
    UErrorCode errorCode = U_ZERO_ERROR;
468
0
    u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
469
0
        utf32, length,
470
0
        0xfffd,  // Substitution character.
471
0
        NULL,    // Don't care about number of substitutions.
472
0
        &errorCode);
473
0
    result.releaseBuffer(length16);
474
0
    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
475
0
      capacity = length16 + 1;  // +1 for the terminating NUL.
476
0
      continue;
477
0
    } else if(U_FAILURE(errorCode)) {
478
0
      result.setToBogus();
479
0
    }
480
0
    break;
481
0
  } while(TRUE);
482
0
  return result;
483
0
}
484
485
//========================================
486
// Assignment
487
//========================================
488
489
UnicodeString &
490
371k
UnicodeString::operator=(const UnicodeString &src) {
491
371k
  return copyFrom(src);
492
371k
}
493
494
UnicodeString &
495
0
UnicodeString::fastCopyFrom(const UnicodeString &src) {
496
0
  return copyFrom(src, TRUE);
497
0
}
498
499
UnicodeString &
500
371k
UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
501
  // if assigning to ourselves, do nothing
502
371k
  if(this == &src) {
503
0
    return *this;
504
0
  }
505
506
  // is the right side bogus?
507
371k
  if(src.isBogus()) {
508
0
    setToBogus();
509
0
    return *this;
510
0
  }
511
512
  // delete the current contents
513
371k
  releaseArray();
514
515
371k
  if(src.isEmpty()) {
516
    // empty string - use the stack buffer
517
0
    setToEmpty();
518
0
    return *this;
519
0
  }
520
521
  // fLength>0 and not an "open" src.getBuffer(minCapacity)
522
371k
  fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
523
371k
  switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
524
371k
  case kShortString:
525
    // short string using the stack buffer, do the same
526
371k
    uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
527
371k
                getShortLength() * U_SIZEOF_UCHAR);
528
371k
    break;
529
0
  case kLongString:
530
    // src uses a refCounted string buffer, use that buffer with refCount
531
    // src is const, use a cast - we don't actually change it
532
0
    ((UnicodeString &)src).addRef();
533
    // copy all fields, share the reference-counted buffer
534
0
    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
535
0
    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
536
0
    if(!hasShortLength()) {
537
0
      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
538
0
    }
539
0
    break;
540
0
  case kReadonlyAlias:
541
0
    if(fastCopy) {
542
      // src is a readonly alias, do the same
543
      // -> maintain the readonly alias as such
544
0
      fUnion.fFields.fArray = src.fUnion.fFields.fArray;
545
0
      fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
546
0
      if(!hasShortLength()) {
547
0
        fUnion.fFields.fLength = src.fUnion.fFields.fLength;
548
0
      }
549
0
      break;
550
0
    }
551
    // else if(!fastCopy) fall through to case kWritableAlias
552
    // -> allocate a new buffer and copy the contents
553
0
    U_FALLTHROUGH;
554
0
  case kWritableAlias: {
555
    // src is a writable alias; we make a copy of that instead
556
0
    int32_t srcLength = src.length();
557
0
    if(allocate(srcLength)) {
558
0
      u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
559
0
      setLength(srcLength);
560
0
      break;
561
0
    }
562
    // if there is not enough memory, then fall through to setting to bogus
563
0
    U_FALLTHROUGH;
564
0
  }
565
0
  default:
566
    // if src is bogus, set ourselves to bogus
567
    // do not call setToBogus() here because fArray and flags are not consistent here
568
0
    fUnion.fFields.fLengthAndFlags = kIsBogus;
569
0
    fUnion.fFields.fArray = 0;
570
0
    fUnion.fFields.fCapacity = 0;
571
0
    break;
572
371k
  }
573
574
371k
  return *this;
575
371k
}
576
577
0
UnicodeString &UnicodeString::moveFrom(UnicodeString &src) U_NOEXCEPT {
578
  // No explicit check for self move assignment, consistent with standard library.
579
  // Self move assignment causes no crash nor leak but might make the object bogus.
580
0
  releaseArray();
581
0
  copyFieldsFrom(src, TRUE);
582
0
  return *this;
583
0
}
584
585
// Same as moveFrom() except without memory management.
586
0
void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
587
0
  int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
588
0
  if(lengthAndFlags & kUsingStackBuffer) {
589
    // Short string using the stack buffer, copy the contents.
590
    // Check for self assignment to prevent "overlap in memcpy" warnings,
591
    // although it should be harmless to copy a buffer to itself exactly.
592
0
    if(this != &src) {
593
0
      uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
594
0
                  getShortLength() * U_SIZEOF_UCHAR);
595
0
    }
596
0
  } else {
597
    // In all other cases, copy all fields.
598
0
    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
599
0
    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
600
0
    if(!hasShortLength()) {
601
0
      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
602
0
    }
603
0
    if(setSrcToBogus) {
604
      // Set src to bogus without releasing any memory.
605
0
      src.fUnion.fFields.fLengthAndFlags = kIsBogus;
606
0
      src.fUnion.fFields.fArray = NULL;
607
0
      src.fUnion.fFields.fCapacity = 0;
608
0
    }
609
0
  }
610
0
}
611
612
0
void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
613
0
  UnicodeString temp;  // Empty short string: Known not to need releaseArray().
614
  // Copy fields without resetting source values in between.
615
0
  temp.copyFieldsFrom(*this, FALSE);
616
0
  this->copyFieldsFrom(other, FALSE);
617
0
  other.copyFieldsFrom(temp, FALSE);
618
  // Set temp to an empty string so that other's memory is not released twice.
619
0
  temp.fUnion.fFields.fLengthAndFlags = kShortString;
620
0
}
621
622
//========================================
623
// Miscellaneous operations
624
//========================================
625
626
0
UnicodeString UnicodeString::unescape() const {
627
0
    UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
628
0
    if (result.isBogus()) {
629
0
        return result;
630
0
    }
631
0
    const UChar *array = getBuffer();
632
0
    int32_t len = length();
633
0
    int32_t prev = 0;
634
0
    for (int32_t i=0;;) {
635
0
        if (i == len) {
636
0
            result.append(array, prev, len - prev);
637
0
            break;
638
0
        }
639
0
        if (array[i++] == 0x5C /*'\\'*/) {
640
0
            result.append(array, prev, (i - 1) - prev);
641
0
            UChar32 c = unescapeAt(i); // advances i
642
0
            if (c < 0) {
643
0
                result.remove(); // return empty string
644
0
                break; // invalid escape sequence
645
0
            }
646
0
            result.append(c);
647
0
            prev = i;
648
0
        }
649
0
    }
650
0
    return result;
651
0
}
652
653
0
UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
654
0
    return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
655
0
}
656
657
//========================================
658
// Read-only implementation
659
//========================================
660
UBool
661
0
UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
662
  // Requires: this & text not bogus and have same lengths.
663
  // Byte-wise comparison works for equality regardless of endianness.
664
0
  return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
665
0
}
666
667
int8_t
668
UnicodeString::doCompare( int32_t start,
669
              int32_t length,
670
              const UChar *srcChars,
671
              int32_t srcStart,
672
              int32_t srcLength) const
673
0
{
674
  // compare illegal string values
675
0
  if(isBogus()) {
676
0
    return -1;
677
0
  }
678
  
679
  // pin indices to legal values
680
0
  pinIndices(start, length);
681
682
0
  if(srcChars == NULL) {
683
    // treat const UChar *srcChars==NULL as an empty string
684
0
    return length == 0 ? 0 : 1;
685
0
  }
686
687
  // get the correct pointer
688
0
  const UChar *chars = getArrayStart();
689
690
0
  chars += start;
691
0
  srcChars += srcStart;
692
693
0
  int32_t minLength;
694
0
  int8_t lengthResult;
695
696
  // get the srcLength if necessary
697
0
  if(srcLength < 0) {
698
0
    srcLength = u_strlen(srcChars + srcStart);
699
0
  }
700
701
  // are we comparing different lengths?
702
0
  if(length != srcLength) {
703
0
    if(length < srcLength) {
704
0
      minLength = length;
705
0
      lengthResult = -1;
706
0
    } else {
707
0
      minLength = srcLength;
708
0
      lengthResult = 1;
709
0
    }
710
0
  } else {
711
0
    minLength = length;
712
0
    lengthResult = 0;
713
0
  }
714
715
  /*
716
   * note that uprv_memcmp() returns an int but we return an int8_t;
717
   * we need to take care not to truncate the result -
718
   * one way to do this is to right-shift the value to
719
   * move the sign bit into the lower 8 bits and making sure that this
720
   * does not become 0 itself
721
   */
722
723
0
  if(minLength > 0 && chars != srcChars) {
724
0
    int32_t result;
725
726
#   if U_IS_BIG_ENDIAN 
727
      // big-endian: byte comparison works
728
      result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
729
      if(result != 0) {
730
        return (int8_t)(result >> 15 | 1);
731
      }
732
#   else
733
      // little-endian: compare UChar units
734
0
      do {
735
0
        result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
736
0
        if(result != 0) {
737
0
          return (int8_t)(result >> 15 | 1);
738
0
        }
739
0
      } while(--minLength > 0);
740
0
#   endif
741
0
  }
742
0
  return lengthResult;
743
0
}
744
745
/* String compare in code point order - doCompare() compares in code unit order. */
746
int8_t
747
UnicodeString::doCompareCodePointOrder(int32_t start,
748
                                       int32_t length,
749
                                       const UChar *srcChars,
750
                                       int32_t srcStart,
751
                                       int32_t srcLength) const
752
0
{
753
  // compare illegal string values
754
  // treat const UChar *srcChars==NULL as an empty string
755
0
  if(isBogus()) {
756
0
    return -1;
757
0
  }
758
759
  // pin indices to legal values
760
0
  pinIndices(start, length);
761
762
0
  if(srcChars == NULL) {
763
0
    srcStart = srcLength = 0;
764
0
  }
765
766
0
  int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
767
  /* translate the 32-bit result into an 8-bit one */
768
0
  if(diff!=0) {
769
0
    return (int8_t)(diff >> 15 | 1);
770
0
  } else {
771
0
    return 0;
772
0
  }
773
0
}
774
775
int32_t
776
0
UnicodeString::getLength() const {
777
0
    return length();
778
0
}
779
780
UChar
781
0
UnicodeString::getCharAt(int32_t offset) const {
782
0
  return charAt(offset);
783
0
}
784
785
UChar32
786
0
UnicodeString::getChar32At(int32_t offset) const {
787
0
  return char32At(offset);
788
0
}
789
790
UChar32
791
UnicodeString::char32At(int32_t offset) const
792
0
{
793
0
  int32_t len = length();
794
0
  if((uint32_t)offset < (uint32_t)len) {
795
0
    const UChar *array = getArrayStart();
796
0
    UChar32 c;
797
0
    U16_GET(array, 0, offset, len, c);
798
0
    return c;
799
0
  } else {
800
0
    return kInvalidUChar;
801
0
  }
802
0
}
803
804
int32_t
805
0
UnicodeString::getChar32Start(int32_t offset) const {
806
0
  if((uint32_t)offset < (uint32_t)length()) {
807
0
    const UChar *array = getArrayStart();
808
0
    U16_SET_CP_START(array, 0, offset);
809
0
    return offset;
810
0
  } else {
811
0
    return 0;
812
0
  }
813
0
}
814
815
int32_t
816
0
UnicodeString::getChar32Limit(int32_t offset) const {
817
0
  int32_t len = length();
818
0
  if((uint32_t)offset < (uint32_t)len) {
819
0
    const UChar *array = getArrayStart();
820
0
    U16_SET_CP_LIMIT(array, 0, offset, len);
821
0
    return offset;
822
0
  } else {
823
0
    return len;
824
0
  }
825
0
}
826
827
int32_t
828
0
UnicodeString::countChar32(int32_t start, int32_t length) const {
829
0
  pinIndices(start, length);
830
  // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
831
0
  return u_countChar32(getArrayStart()+start, length);
832
0
}
833
834
UBool
835
0
UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
836
0
  pinIndices(start, length);
837
  // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
838
0
  return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
839
0
}
840
841
int32_t
842
0
UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
843
  // pin index
844
0
  int32_t len = length();
845
0
  if(index<0) {
846
0
    index=0;
847
0
  } else if(index>len) {
848
0
    index=len;
849
0
  }
850
851
0
  const UChar *array = getArrayStart();
852
0
  if(delta>0) {
853
0
    U16_FWD_N(array, index, len, delta);
854
0
  } else {
855
0
    U16_BACK_N(array, 0, index, -delta);
856
0
  }
857
858
0
  return index;
859
0
}
860
861
void
862
UnicodeString::doExtract(int32_t start,
863
             int32_t length,
864
             UChar *dst,
865
             int32_t dstStart) const
866
0
{
867
  // pin indices to legal values
868
0
  pinIndices(start, length);
869
870
  // do not copy anything if we alias dst itself
871
0
  const UChar *array = getArrayStart();
872
0
  if(array + start != dst + dstStart) {
873
0
    us_arrayCopy(array, start, dst, dstStart, length);
874
0
  }
875
0
}
876
877
int32_t
878
UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
879
6.95M
                       UErrorCode &errorCode) const {
880
6.95M
  int32_t len = length();
881
6.95M
  if(U_SUCCESS(errorCode)) {
882
6.95M
    if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
883
0
      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
884
6.95M
    } else {
885
6.95M
      const UChar *array = getArrayStart();
886
6.95M
      if(len>0 && len<=destCapacity && array!=dest) {
887
202
        u_memcpy(dest, array, len);
888
202
      }
889
6.95M
      return u_terminateUChars(dest, destCapacity, len, &errorCode);
890
6.95M
    }
891
6.95M
  }
892
893
207
  return len;
894
6.95M
}
895
896
int32_t
897
UnicodeString::extract(int32_t start,
898
                       int32_t length,
899
                       char *target,
900
                       int32_t targetCapacity,
901
                       enum EInvariant) const
902
0
{
903
  // if the arguments are illegal, then do nothing
904
0
  if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
905
0
    return 0;
906
0
  }
907
908
  // pin the indices to legal values
909
0
  pinIndices(start, length);
910
911
0
  if(length <= targetCapacity) {
912
0
    u_UCharsToChars(getArrayStart() + start, target, length);
913
0
  }
914
0
  UErrorCode status = U_ZERO_ERROR;
915
0
  return u_terminateChars(target, targetCapacity, length, &status);
916
0
}
917
918
UnicodeString
919
379k
UnicodeString::tempSubString(int32_t start, int32_t len) const {
920
379k
  pinIndices(start, len);
921
379k
  const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
922
379k
  if(array==NULL) {
923
0
    array=fUnion.fStackFields.fBuffer;  // anything not NULL because that would make an empty string
924
0
    len=-2;  // bogus result string
925
0
  }
926
379k
  return UnicodeString(FALSE, array + start, len);
927
379k
}
928
929
int32_t
930
UnicodeString::toUTF8(int32_t start, int32_t len,
931
0
                      char *target, int32_t capacity) const {
932
0
  pinIndices(start, len);
933
0
  int32_t length8;
934
0
  UErrorCode errorCode = U_ZERO_ERROR;
935
0
  u_strToUTF8WithSub(target, capacity, &length8,
936
0
                     getBuffer() + start, len,
937
0
                     0xFFFD,  // Standard substitution character.
938
0
                     NULL,    // Don't care about number of substitutions.
939
0
                     &errorCode);
940
0
  return length8;
941
0
}
942
943
#if U_CHARSET_IS_UTF8
944
945
int32_t
946
UnicodeString::extract(int32_t start, int32_t len,
947
                       char *target, uint32_t dstSize) const {
948
  // if the arguments are illegal, then do nothing
949
  if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
950
    return 0;
951
  }
952
  return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
953
}
954
955
// else see unistr_cnv.cpp
956
#endif
957
958
void 
959
UnicodeString::extractBetween(int32_t start,
960
                  int32_t limit,
961
0
                  UnicodeString& target) const {
962
0
  pinIndex(start);
963
0
  pinIndex(limit);
964
0
  doExtract(start, limit - start, target);
965
0
}
966
967
// When converting from UTF-16 to UTF-8, the result will have at most 3 times
968
// as many bytes as the source has UChars.
969
// The "worst cases" are writing systems like Indic, Thai and CJK with
970
// 3:1 bytes:UChars.
971
void
972
0
UnicodeString::toUTF8(ByteSink &sink) const {
973
0
  int32_t length16 = length();
974
0
  if(length16 != 0) {
975
0
    char stackBuffer[1024];
976
0
    int32_t capacity = (int32_t)sizeof(stackBuffer);
977
0
    UBool utf8IsOwned = FALSE;
978
0
    char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
979
0
                                      3*length16,
980
0
                                      stackBuffer, capacity,
981
0
                                      &capacity);
982
0
    int32_t length8 = 0;
983
0
    UErrorCode errorCode = U_ZERO_ERROR;
984
0
    u_strToUTF8WithSub(utf8, capacity, &length8,
985
0
                       getBuffer(), length16,
986
0
                       0xFFFD,  // Standard substitution character.
987
0
                       NULL,    // Don't care about number of substitutions.
988
0
                       &errorCode);
989
0
    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
990
0
      utf8 = (char *)uprv_malloc(length8);
991
0
      if(utf8 != NULL) {
992
0
        utf8IsOwned = TRUE;
993
0
        errorCode = U_ZERO_ERROR;
994
0
        u_strToUTF8WithSub(utf8, length8, &length8,
995
0
                           getBuffer(), length16,
996
0
                           0xFFFD,  // Standard substitution character.
997
0
                           NULL,    // Don't care about number of substitutions.
998
0
                           &errorCode);
999
0
      } else {
1000
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1001
0
      }
1002
0
    }
1003
0
    if(U_SUCCESS(errorCode)) {
1004
0
      sink.Append(utf8, length8);
1005
0
      sink.Flush();
1006
0
    }
1007
0
    if(utf8IsOwned) {
1008
0
      uprv_free(utf8);
1009
0
    }
1010
0
  }
1011
0
}
1012
1013
int32_t
1014
0
UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
1015
0
  int32_t length32=0;
1016
0
  if(U_SUCCESS(errorCode)) {
1017
    // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1018
0
    u_strToUTF32WithSub(utf32, capacity, &length32,
1019
0
        getBuffer(), length(),
1020
0
        0xfffd,  // Substitution character.
1021
0
        NULL,    // Don't care about number of substitutions.
1022
0
        &errorCode);
1023
0
  }
1024
0
  return length32;
1025
0
}
1026
1027
int32_t 
1028
UnicodeString::indexOf(const UChar *srcChars,
1029
               int32_t srcStart,
1030
               int32_t srcLength,
1031
               int32_t start,
1032
               int32_t length) const
1033
0
{
1034
0
  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1035
0
    return -1;
1036
0
  }
1037
1038
  // UnicodeString does not find empty substrings
1039
0
  if(srcLength < 0 && srcChars[srcStart] == 0) {
1040
0
    return -1;
1041
0
  }
1042
1043
  // get the indices within bounds
1044
0
  pinIndices(start, length);
1045
1046
  // find the first occurrence of the substring
1047
0
  const UChar *array = getArrayStart();
1048
0
  const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
1049
0
  if(match == NULL) {
1050
0
    return -1;
1051
0
  } else {
1052
0
    return (int32_t)(match - array);
1053
0
  }
1054
0
}
1055
1056
int32_t
1057
UnicodeString::doIndexOf(UChar c,
1058
             int32_t start,
1059
             int32_t length) const
1060
0
{
1061
  // pin indices
1062
0
  pinIndices(start, length);
1063
1064
  // find the first occurrence of c
1065
0
  const UChar *array = getArrayStart();
1066
0
  const UChar *match = u_memchr(array + start, c, length);
1067
0
  if(match == NULL) {
1068
0
    return -1;
1069
0
  } else {
1070
0
    return (int32_t)(match - array);
1071
0
  }
1072
0
}
1073
1074
int32_t
1075
UnicodeString::doIndexOf(UChar32 c,
1076
                         int32_t start,
1077
0
                         int32_t length) const {
1078
  // pin indices
1079
0
  pinIndices(start, length);
1080
1081
  // find the first occurrence of c
1082
0
  const UChar *array = getArrayStart();
1083
0
  const UChar *match = u_memchr32(array + start, c, length);
1084
0
  if(match == NULL) {
1085
0
    return -1;
1086
0
  } else {
1087
0
    return (int32_t)(match - array);
1088
0
  }
1089
0
}
1090
1091
int32_t 
1092
UnicodeString::lastIndexOf(const UChar *srcChars,
1093
               int32_t srcStart,
1094
               int32_t srcLength,
1095
               int32_t start,
1096
               int32_t length) const
1097
0
{
1098
0
  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1099
0
    return -1;
1100
0
  }
1101
1102
  // UnicodeString does not find empty substrings
1103
0
  if(srcLength < 0 && srcChars[srcStart] == 0) {
1104
0
    return -1;
1105
0
  }
1106
1107
  // get the indices within bounds
1108
0
  pinIndices(start, length);
1109
1110
  // find the last occurrence of the substring
1111
0
  const UChar *array = getArrayStart();
1112
0
  const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1113
0
  if(match == NULL) {
1114
0
    return -1;
1115
0
  } else {
1116
0
    return (int32_t)(match - array);
1117
0
  }
1118
0
}
1119
1120
int32_t
1121
UnicodeString::doLastIndexOf(UChar c,
1122
                 int32_t start,
1123
                 int32_t length) const
1124
0
{
1125
0
  if(isBogus()) {
1126
0
    return -1;
1127
0
  }
1128
1129
  // pin indices
1130
0
  pinIndices(start, length);
1131
1132
  // find the last occurrence of c
1133
0
  const UChar *array = getArrayStart();
1134
0
  const UChar *match = u_memrchr(array + start, c, length);
1135
0
  if(match == NULL) {
1136
0
    return -1;
1137
0
  } else {
1138
0
    return (int32_t)(match - array);
1139
0
  }
1140
0
}
1141
1142
int32_t
1143
UnicodeString::doLastIndexOf(UChar32 c,
1144
                             int32_t start,
1145
0
                             int32_t length) const {
1146
  // pin indices
1147
0
  pinIndices(start, length);
1148
1149
  // find the last occurrence of c
1150
0
  const UChar *array = getArrayStart();
1151
0
  const UChar *match = u_memrchr32(array + start, c, length);
1152
0
  if(match == NULL) {
1153
0
    return -1;
1154
0
  } else {
1155
0
    return (int32_t)(match - array);
1156
0
  }
1157
0
}
1158
1159
//========================================
1160
// Write implementation
1161
//========================================
1162
1163
UnicodeString& 
1164
UnicodeString::findAndReplace(int32_t start,
1165
                  int32_t length,
1166
                  const UnicodeString& oldText,
1167
                  int32_t oldStart,
1168
                  int32_t oldLength,
1169
                  const UnicodeString& newText,
1170
                  int32_t newStart,
1171
                  int32_t newLength)
1172
0
{
1173
0
  if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1174
0
    return *this;
1175
0
  }
1176
1177
0
  pinIndices(start, length);
1178
0
  oldText.pinIndices(oldStart, oldLength);
1179
0
  newText.pinIndices(newStart, newLength);
1180
1181
0
  if(oldLength == 0) {
1182
0
    return *this;
1183
0
  }
1184
1185
0
  while(length > 0 && length >= oldLength) {
1186
0
    int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1187
0
    if(pos < 0) {
1188
      // no more oldText's here: done
1189
0
      break;
1190
0
    } else {
1191
      // we found oldText, replace it by newText and go beyond it
1192
0
      replace(pos, oldLength, newText, newStart, newLength);
1193
0
      length -= pos + oldLength - start;
1194
0
      start = pos + newLength;
1195
0
    }
1196
0
  }
1197
1198
0
  return *this;
1199
0
}
1200
1201
1202
void
1203
UnicodeString::setToBogus()
1204
0
{
1205
0
  releaseArray();
1206
1207
0
  fUnion.fFields.fLengthAndFlags = kIsBogus;
1208
0
  fUnion.fFields.fArray = 0;
1209
0
  fUnion.fFields.fCapacity = 0;
1210
0
}
1211
1212
// turn a bogus string into an empty one
1213
void
1214
0
UnicodeString::unBogus() {
1215
0
  if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
1216
0
    setToEmpty();
1217
0
  }
1218
0
}
1219
1220
const char16_t *
1221
0
UnicodeString::getTerminatedBuffer() {
1222
0
  if(!isWritable()) {
1223
0
    return nullptr;
1224
0
  }
1225
0
  UChar *array = getArrayStart();
1226
0
  int32_t len = length();
1227
0
  if(len < getCapacity()) {
1228
0
    if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
1229
      // If len<capacity on a read-only alias, then array[len] is
1230
      // either the original NUL (if constructed with (TRUE, s, length))
1231
      // or one of the original string contents characters (if later truncated),
1232
      // therefore we can assume that array[len] is initialized memory.
1233
0
      if(array[len] == 0) {
1234
0
        return array;
1235
0
      }
1236
0
    } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
1237
      // kRefCounted: Do not write the NUL if the buffer is shared.
1238
      // That is mostly safe, except when the length of one copy was modified
1239
      // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1240
      // Then the NUL would be written into the middle of another copy's string.
1241
1242
      // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1243
      // Do not test if there is a NUL already because it might be uninitialized memory.
1244
      // (That would be safe, but tools like valgrind & Purify would complain.)
1245
0
      array[len] = 0;
1246
0
      return array;
1247
0
    }
1248
0
  }
1249
0
  if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
1250
0
    array = getArrayStart();
1251
0
    array[len] = 0;
1252
0
    return array;
1253
0
  } else {
1254
0
    return nullptr;
1255
0
  }
1256
0
}
1257
1258
// setTo() analogous to the readonly-aliasing constructor with the same signature
1259
UnicodeString &
1260
UnicodeString::setTo(UBool isTerminated,
1261
                     ConstChar16Ptr textPtr,
1262
                     int32_t textLength)
1263
0
{
1264
0
  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1265
    // do not modify a string that has an "open" getBuffer(minCapacity)
1266
0
    return *this;
1267
0
  }
1268
1269
0
  const UChar *text = textPtr;
1270
0
  if(text == NULL) {
1271
    // treat as an empty string, do not alias
1272
0
    releaseArray();
1273
0
    setToEmpty();
1274
0
    return *this;
1275
0
  }
1276
1277
0
  if( textLength < -1 ||
1278
0
      (textLength == -1 && !isTerminated) ||
1279
0
      (textLength >= 0 && isTerminated && text[textLength] != 0)
1280
0
  ) {
1281
0
    setToBogus();
1282
0
    return *this;
1283
0
  }
1284
1285
0
  releaseArray();
1286
1287
0
  if(textLength == -1) {
1288
    // text is terminated, or else it would have failed the above test
1289
0
    textLength = u_strlen(text);
1290
0
  }
1291
0
  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
1292
0
  setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1293
0
  return *this;
1294
0
}
1295
1296
// setTo() analogous to the writable-aliasing constructor with the same signature
1297
UnicodeString &
1298
UnicodeString::setTo(UChar *buffer,
1299
                     int32_t buffLength,
1300
0
                     int32_t buffCapacity) {
1301
0
  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1302
    // do not modify a string that has an "open" getBuffer(minCapacity)
1303
0
    return *this;
1304
0
  }
1305
1306
0
  if(buffer == NULL) {
1307
    // treat as an empty string, do not alias
1308
0
    releaseArray();
1309
0
    setToEmpty();
1310
0
    return *this;
1311
0
  }
1312
1313
0
  if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1314
0
    setToBogus();
1315
0
    return *this;
1316
0
  } else if(buffLength == -1) {
1317
    // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1318
0
    const UChar *p = buffer, *limit = buffer + buffCapacity;
1319
0
    while(p != limit && *p != 0) {
1320
0
      ++p;
1321
0
    }
1322
0
    buffLength = (int32_t)(p - buffer);
1323
0
  }
1324
1325
0
  releaseArray();
1326
1327
0
  fUnion.fFields.fLengthAndFlags = kWritableAlias;
1328
0
  setArray(buffer, buffLength, buffCapacity);
1329
0
  return *this;
1330
0
}
1331
1332
0
UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
1333
0
  unBogus();
1334
0
  int32_t length = utf8.length();
1335
0
  int32_t capacity;
1336
  // The UTF-16 string will be at most as long as the UTF-8 string.
1337
0
  if(length <= US_STACKBUF_SIZE) {
1338
0
    capacity = US_STACKBUF_SIZE;
1339
0
  } else {
1340
0
    capacity = length + 1;  // +1 for the terminating NUL.
1341
0
  }
1342
0
  UChar *utf16 = getBuffer(capacity);
1343
0
  int32_t length16;
1344
0
  UErrorCode errorCode = U_ZERO_ERROR;
1345
0
  u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1346
0
      utf8.data(), length,
1347
0
      0xfffd,  // Substitution character.
1348
0
      NULL,    // Don't care about number of substitutions.
1349
0
      &errorCode);
1350
0
  releaseBuffer(length16);
1351
0
  if(U_FAILURE(errorCode)) {
1352
0
    setToBogus();
1353
0
  }
1354
0
  return *this;
1355
0
}
1356
1357
UnicodeString&
1358
UnicodeString::setCharAt(int32_t offset,
1359
             UChar c)
1360
0
{
1361
0
  int32_t len = length();
1362
0
  if(cloneArrayIfNeeded() && len > 0) {
1363
0
    if(offset < 0) {
1364
0
      offset = 0;
1365
0
    } else if(offset >= len) {
1366
0
      offset = len - 1;
1367
0
    }
1368
1369
0
    getArrayStart()[offset] = c;
1370
0
  }
1371
0
  return *this;
1372
0
}
1373
1374
UnicodeString&
1375
UnicodeString::replace(int32_t start,
1376
               int32_t _length,
1377
0
               UChar32 srcChar) {
1378
0
  UChar buffer[U16_MAX_LENGTH];
1379
0
  int32_t count = 0;
1380
0
  UBool isError = FALSE;
1381
0
  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1382
  // We test isError so that the compiler does not complain that we don't.
1383
  // If isError (srcChar is not a valid code point) then count==0 which means
1384
  // we remove the source segment rather than replacing it with srcChar.
1385
0
  return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1386
0
}
1387
1388
UnicodeString&
1389
0
UnicodeString::append(UChar32 srcChar) {
1390
0
  UChar buffer[U16_MAX_LENGTH];
1391
0
  int32_t _length = 0;
1392
0
  UBool isError = FALSE;
1393
0
  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1394
  // We test isError so that the compiler does not complain that we don't.
1395
  // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1396
0
  return isError ? *this : doAppend(buffer, 0, _length);
1397
0
}
1398
1399
UnicodeString&
1400
UnicodeString::doReplace( int32_t start,
1401
              int32_t length,
1402
              const UnicodeString& src,
1403
              int32_t srcStart,
1404
              int32_t srcLength)
1405
5.86M
{
1406
  // pin the indices to legal values
1407
5.86M
  src.pinIndices(srcStart, srcLength);
1408
1409
  // get the characters from src
1410
  // and replace the range in ourselves with them
1411
5.86M
  return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1412
5.86M
}
1413
1414
UnicodeString&
1415
UnicodeString::doReplace(int32_t start,
1416
             int32_t length,
1417
             const UChar *srcChars,
1418
             int32_t srcStart,
1419
             int32_t srcLength)
1420
6.55M
{
1421
6.55M
  if(!isWritable()) {
1422
0
    return *this;
1423
0
  }
1424
1425
6.55M
  int32_t oldLength = this->length();
1426
1427
  // optimize (read-only alias).remove(0, start) and .remove(start, end)
1428
6.55M
  if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
1429
0
    if(start == 0) {
1430
      // remove prefix by adjusting the array pointer
1431
0
      pinIndex(length);
1432
0
      fUnion.fFields.fArray += length;
1433
0
      fUnion.fFields.fCapacity -= length;
1434
0
      setLength(oldLength - length);
1435
0
      return *this;
1436
0
    } else {
1437
0
      pinIndex(start);
1438
0
      if(length >= (oldLength - start)) {
1439
        // remove suffix by reducing the length (like truncate())
1440
0
        setLength(start);
1441
0
        fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1442
0
        return *this;
1443
0
      }
1444
0
    }
1445
0
  }
1446
1447
6.55M
  if(start == oldLength) {
1448
1.56k
    return doAppend(srcChars, srcStart, srcLength);
1449
1.56k
  }
1450
1451
6.55M
  if(srcChars == 0) {
1452
0
    srcStart = srcLength = 0;
1453
6.55M
  } else if(srcLength < 0) {
1454
    // get the srcLength if necessary
1455
0
    srcLength = u_strlen(srcChars + srcStart);
1456
0
  }
1457
1458
  // pin the indices to legal values
1459
6.55M
  pinIndices(start, length);
1460
1461
  // Calculate the size of the string after the replace.
1462
  // Avoid int32_t overflow.
1463
6.55M
  int32_t newLength = oldLength - length;
1464
6.55M
  if(srcLength > (INT32_MAX - newLength)) {
1465
0
    setToBogus();
1466
0
    return *this;
1467
0
  }
1468
6.55M
  newLength += srcLength;
1469
1470
  // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
1471
  // therefore we need to keep the current fArray
1472
6.55M
  UChar oldStackBuffer[US_STACKBUF_SIZE];
1473
6.55M
  UChar *oldArray;
1474
6.55M
  if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1475
    // copy the stack buffer contents because it will be overwritten with
1476
    // fUnion.fFields values
1477
0
    u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
1478
0
    oldArray = oldStackBuffer;
1479
6.55M
  } else {
1480
6.55M
    oldArray = getArrayStart();
1481
6.55M
  }
1482
1483
  // clone our array and allocate a bigger array if needed
1484
6.55M
  int32_t *bufferToDelete = 0;
1485
6.55M
  if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
1486
6.55M
                         FALSE, &bufferToDelete)
1487
6.55M
  ) {
1488
0
    return *this;
1489
0
  }
1490
1491
  // now do the replace
1492
1493
6.55M
  UChar *newArray = getArrayStart();
1494
6.55M
  if(newArray != oldArray) {
1495
    // if fArray changed, then we need to copy everything except what will change
1496
1.34k
    us_arrayCopy(oldArray, 0, newArray, 0, start);
1497
1.34k
    us_arrayCopy(oldArray, start + length,
1498
1.34k
                 newArray, start + srcLength,
1499
1.34k
                 oldLength - (start + length));
1500
6.55M
  } else if(length != srcLength) {
1501
    // fArray did not change; copy only the portion that isn't changing, leaving a hole
1502
5.86M
    us_arrayCopy(oldArray, start + length,
1503
5.86M
                 newArray, start + srcLength,
1504
5.86M
                 oldLength - (start + length));
1505
5.86M
  }
1506
1507
  // now fill in the hole with the new string
1508
6.55M
  us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1509
1510
6.55M
  setLength(newLength);
1511
1512
  // delayed delete in case srcChars == fArray when we started, and
1513
  // to keep oldArray alive for the above operations
1514
6.55M
  if (bufferToDelete) {
1515
215
    uprv_free(bufferToDelete);
1516
215
  }
1517
1518
6.55M
  return *this;
1519
6.55M
}
1520
1521
// Versions of doReplace() only for append() variants.
1522
// doReplace() and doAppend() optimize for different cases.
1523
1524
UnicodeString&
1525
0
UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1526
0
  if(srcLength == 0) {
1527
0
    return *this;
1528
0
  }
1529
1530
  // pin the indices to legal values
1531
0
  src.pinIndices(srcStart, srcLength);
1532
0
  return doAppend(src.getArrayStart(), srcStart, srcLength);
1533
0
}
1534
1535
UnicodeString&
1536
744k
UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
1537
744k
  if(!isWritable() || srcLength == 0 || srcChars == NULL) {
1538
0
    return *this;
1539
0
  }
1540
1541
744k
  if(srcLength < 0) {
1542
    // get the srcLength if necessary
1543
0
    if((srcLength = u_strlen(srcChars + srcStart)) == 0) {
1544
0
      return *this;
1545
0
    }
1546
0
  }
1547
1548
744k
  int32_t oldLength = length();
1549
744k
  int32_t newLength = oldLength + srcLength;
1550
  // optimize append() onto a large-enough, owned string
1551
744k
  if((newLength <= getCapacity() && isBufferWritable()) ||
1552
744k
      cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
1553
744k
    UChar *newArray = getArrayStart();
1554
    // Do not copy characters when
1555
    //   UChar *buffer=str.getAppendBuffer(...);
1556
    // is followed by
1557
    //   str.append(buffer, length);
1558
    // or
1559
    //   str.appendString(buffer, length)
1560
    // or similar.
1561
744k
    if(srcChars + srcStart != newArray + oldLength) {
1562
744k
      us_arrayCopy(srcChars, srcStart, newArray, oldLength, srcLength);
1563
744k
    }
1564
744k
    setLength(newLength);
1565
744k
  }
1566
744k
  return *this;
1567
744k
}
1568
1569
/**
1570
 * Replaceable API
1571
 */
1572
void
1573
UnicodeString::handleReplaceBetween(int32_t start,
1574
                                    int32_t limit,
1575
0
                                    const UnicodeString& text) {
1576
0
    replaceBetween(start, limit, text);
1577
0
}
1578
1579
/**
1580
 * Replaceable API
1581
 */
1582
void 
1583
0
UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1584
0
    if (limit <= start) {
1585
0
        return; // Nothing to do; avoid bogus malloc call
1586
0
    }
1587
0
    UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1588
    // Check to make sure text is not null.
1589
0
    if (text != NULL) {
1590
0
      extractBetween(start, limit, text, 0);
1591
0
      insert(dest, text, 0, limit - start);    
1592
0
      uprv_free(text);
1593
0
    }
1594
0
}
1595
1596
/**
1597
 * Replaceable API
1598
 *
1599
 * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1600
 * so we implement this function here.
1601
 */
1602
0
UBool Replaceable::hasMetaData() const {
1603
0
    return TRUE;
1604
0
}
1605
1606
/**
1607
 * Replaceable API
1608
 */
1609
0
UBool UnicodeString::hasMetaData() const {
1610
0
    return FALSE;
1611
0
}
1612
1613
UnicodeString&
1614
0
UnicodeString::doReverse(int32_t start, int32_t length) {
1615
0
  if(length <= 1 || !cloneArrayIfNeeded()) {
1616
0
    return *this;
1617
0
  }
1618
1619
  // pin the indices to legal values
1620
0
  pinIndices(start, length);
1621
0
  if(length <= 1) {  // pinIndices() might have shrunk the length
1622
0
    return *this;
1623
0
  }
1624
1625
0
  UChar *left = getArrayStart() + start;
1626
0
  UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1627
0
  UChar swap;
1628
0
  UBool hasSupplementary = FALSE;
1629
1630
  // Before the loop we know left<right because length>=2.
1631
0
  do {
1632
0
    hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1633
0
    hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1634
0
    *right-- = swap;
1635
0
  } while(left < right);
1636
  // Make sure to test the middle code unit of an odd-length string.
1637
  // Redundant if the length is even.
1638
0
  hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1639
1640
  /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1641
0
  if(hasSupplementary) {
1642
0
    UChar swap2;
1643
1644
0
    left = getArrayStart() + start;
1645
0
    right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1646
0
    while(left < right) {
1647
0
      if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1648
0
        *left++ = swap2;
1649
0
        *left++ = swap;
1650
0
      } else {
1651
0
        ++left;
1652
0
      }
1653
0
    }
1654
0
  }
1655
1656
0
  return *this;
1657
0
}
1658
1659
UBool 
1660
UnicodeString::padLeading(int32_t targetLength,
1661
                          UChar padChar)
1662
0
{
1663
0
  int32_t oldLength = length();
1664
0
  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1665
0
    return FALSE;
1666
0
  } else {
1667
    // move contents up by padding width
1668
0
    UChar *array = getArrayStart();
1669
0
    int32_t start = targetLength - oldLength;
1670
0
    us_arrayCopy(array, 0, array, start, oldLength);
1671
1672
    // fill in padding character
1673
0
    while(--start >= 0) {
1674
0
      array[start] = padChar;
1675
0
    }
1676
0
    setLength(targetLength);
1677
0
    return TRUE;
1678
0
  }
1679
0
}
1680
1681
UBool 
1682
UnicodeString::padTrailing(int32_t targetLength,
1683
                           UChar padChar)
1684
0
{
1685
0
  int32_t oldLength = length();
1686
0
  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1687
0
    return FALSE;
1688
0
  } else {
1689
    // fill in padding character
1690
0
    UChar *array = getArrayStart();
1691
0
    int32_t length = targetLength;
1692
0
    while(--length >= oldLength) {
1693
0
      array[length] = padChar;
1694
0
    }
1695
0
    setLength(targetLength);
1696
0
    return TRUE;
1697
0
  }
1698
0
}
1699
1700
//========================================
1701
// Hashing
1702
//========================================
1703
int32_t
1704
UnicodeString::doHashCode() const
1705
0
{
1706
    /* Delegate hash computation to uhash.  This makes UnicodeString
1707
     * hashing consistent with UChar* hashing.  */
1708
0
    int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1709
0
    if (hashCode == kInvalidHashCode) {
1710
0
        hashCode = kEmptyHashCode;
1711
0
    }
1712
0
    return hashCode;
1713
0
}
1714
1715
//========================================
1716
// External Buffer
1717
//========================================
1718
1719
char16_t *
1720
19.7M
UnicodeString::getBuffer(int32_t minCapacity) {
1721
19.7M
  if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1722
19.7M
    fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
1723
19.7M
    setZeroLength();
1724
19.7M
    return getArrayStart();
1725
19.7M
  } else {
1726
0
    return nullptr;
1727
0
  }
1728
19.7M
}
1729
1730
void
1731
19.7M
UnicodeString::releaseBuffer(int32_t newLength) {
1732
19.7M
  if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
1733
    // set the new fLength
1734
19.7M
    int32_t capacity=getCapacity();
1735
19.7M
    if(newLength==-1) {
1736
      // the new length is the string length, capped by fCapacity
1737
0
      const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1738
0
      while(p<limit && *p!=0) {
1739
0
        ++p;
1740
0
      }
1741
0
      newLength=(int32_t)(p-array);
1742
19.7M
    } else if(newLength>capacity) {
1743
0
      newLength=capacity;
1744
0
    }
1745
19.7M
    setLength(newLength);
1746
19.7M
    fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
1747
19.7M
  }
1748
19.7M
}
1749
1750
//========================================
1751
// Miscellaneous
1752
//========================================
1753
UBool
1754
UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1755
                                  int32_t growCapacity,
1756
                                  UBool doCopyArray,
1757
                                  int32_t **pBufferToDelete,
1758
26.3M
                                  UBool forceClone) {
1759
  // default parameters need to be static, therefore
1760
  // the defaults are -1 to have convenience defaults
1761
26.3M
  if(newCapacity == -1) {
1762
7.56k
    newCapacity = getCapacity();
1763
7.56k
  }
1764
1765
  // while a getBuffer(minCapacity) is "open",
1766
  // prevent any modifications of the string by returning FALSE here
1767
  // if the string is bogus, then only an assignment or similar can revive it
1768
26.3M
  if(!isWritable()) {
1769
0
    return FALSE;
1770
0
  }
1771
1772
  /*
1773
   * We need to make a copy of the array if
1774
   * the buffer is read-only, or
1775
   * the buffer is refCounted (shared), and refCount>1, or
1776
   * the buffer is too small.
1777
   * Return FALSE if memory could not be allocated.
1778
   */
1779
26.3M
  if(forceClone ||
1780
26.3M
     fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
1781
26.3M
     (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
1782
26.3M
     newCapacity > getCapacity()
1783
26.3M
  ) {
1784
    // check growCapacity for default value and use of the stack buffer
1785
5.86M
    if(growCapacity < 0) {
1786
5.86M
      growCapacity = newCapacity;
1787
5.86M
    } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1788
0
      growCapacity = US_STACKBUF_SIZE;
1789
0
    }
1790
1791
    // save old values
1792
5.86M
    UChar oldStackBuffer[US_STACKBUF_SIZE];
1793
5.86M
    UChar *oldArray;
1794
5.86M
    int32_t oldLength = length();
1795
5.86M
    int16_t flags = fUnion.fFields.fLengthAndFlags;
1796
1797
5.86M
    if(flags&kUsingStackBuffer) {
1798
5.86M
      U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1799
5.86M
      if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1800
        // copy the stack buffer contents because it will be overwritten with
1801
        // fUnion.fFields values
1802
5.86M
        us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
1803
5.86M
        oldArray = oldStackBuffer;
1804
5.86M
      } else {
1805
0
        oldArray = NULL; // no need to copy from the stack buffer to itself
1806
0
      }
1807
5.86M
    } else {
1808
4.63k
      oldArray = fUnion.fFields.fArray;
1809
4.63k
      U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1810
4.63k
    }
1811
1812
    // allocate a new array
1813
5.86M
    if(allocate(growCapacity) ||
1814
5.86M
       (newCapacity < growCapacity && allocate(newCapacity))
1815
5.86M
    ) {
1816
5.86M
      if(doCopyArray) {
1817
        // copy the contents
1818
        // do not copy more than what fits - it may be smaller than before
1819
5.86M
        int32_t minLength = oldLength;
1820
5.86M
        newCapacity = getCapacity();
1821
5.86M
        if(newCapacity < minLength) {
1822
0
          minLength = newCapacity;
1823
0
        }
1824
5.86M
        if(oldArray != NULL) {
1825
5.86M
          us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1826
5.86M
        }
1827
5.86M
        setLength(minLength);
1828
5.86M
      } else {
1829
1.34k
        setZeroLength();
1830
1.34k
      }
1831
1832
      // release the old array
1833
5.86M
      if(flags & kRefCounted) {
1834
        // the array is refCounted; decrement and release if 0
1835
1.35k
        u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1836
1.35k
        if(umtx_atomic_dec(pRefCount) == 0) {
1837
1.35k
          if(pBufferToDelete == 0) {
1838
              // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1839
              // is defined as volatile. (Volatile has useful non-standard behavior
1840
              //   with this compiler.)
1841
1.14k
            uprv_free((void *)pRefCount);
1842
1.14k
          } else {
1843
            // the caller requested to delete it himself
1844
215
            *pBufferToDelete = (int32_t *)pRefCount;
1845
215
          }
1846
1.35k
        }
1847
1.35k
      }
1848
5.86M
    } else {
1849
      // not enough memory for growCapacity and not even for the smaller newCapacity
1850
      // reset the old values for setToBogus() to release the array
1851
0
      if(!(flags&kUsingStackBuffer)) {
1852
0
        fUnion.fFields.fArray = oldArray;
1853
0
      }
1854
0
      fUnion.fFields.fLengthAndFlags = flags;
1855
0
      setToBogus();
1856
0
      return FALSE;
1857
0
    }
1858
5.86M
  }
1859
26.3M
  return TRUE;
1860
26.3M
}
1861
1862
// UnicodeStringAppendable ------------------------------------------------- ***
1863
1864
0
UnicodeStringAppendable::~UnicodeStringAppendable() {}
1865
1866
UBool
1867
0
UnicodeStringAppendable::appendCodeUnit(UChar c) {
1868
0
  return str.doAppend(&c, 0, 1).isWritable();
1869
0
}
1870
1871
UBool
1872
0
UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1873
0
  UChar buffer[U16_MAX_LENGTH];
1874
0
  int32_t cLength = 0;
1875
0
  UBool isError = FALSE;
1876
0
  U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1877
0
  return !isError && str.doAppend(buffer, 0, cLength).isWritable();
1878
0
}
1879
1880
UBool
1881
0
UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1882
0
  return str.doAppend(s, 0, length).isWritable();
1883
0
}
1884
1885
UBool
1886
0
UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1887
0
  return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1888
0
}
1889
1890
UChar *
1891
UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1892
                                         int32_t desiredCapacityHint,
1893
                                         UChar *scratch, int32_t scratchCapacity,
1894
0
                                         int32_t *resultCapacity) {
1895
0
  if(minCapacity < 1 || scratchCapacity < minCapacity) {
1896
0
    *resultCapacity = 0;
1897
0
    return NULL;
1898
0
  }
1899
0
  int32_t oldLength = str.length();
1900
0
  if(minCapacity <= (kMaxCapacity - oldLength) &&
1901
0
      desiredCapacityHint <= (kMaxCapacity - oldLength) &&
1902
0
      str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1903
0
    *resultCapacity = str.getCapacity() - oldLength;
1904
0
    return str.getArrayStart() + oldLength;
1905
0
  }
1906
0
  *resultCapacity = scratchCapacity;
1907
0
  return scratch;
1908
0
}
1909
1910
U_NAMESPACE_END
1911
1912
U_NAMESPACE_USE
1913
1914
U_CAPI int32_t U_EXPORT2
1915
0
uhash_hashUnicodeString(const UElement key) {
1916
0
    const UnicodeString *str = (const UnicodeString*) key.pointer;
1917
0
    return (str == NULL) ? 0 : str->hashCode();
1918
0
}
1919
1920
// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1921
// does not depend on hashtable code.
1922
U_CAPI UBool U_EXPORT2
1923
0
uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1924
0
    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1925
0
    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1926
0
    if (str1 == str2) {
1927
0
        return TRUE;
1928
0
    }
1929
0
    if (str1 == NULL || str2 == NULL) {
1930
0
        return FALSE;
1931
0
    }
1932
0
    return *str1 == *str2;
1933
0
}
1934
1935
#ifdef U_STATIC_IMPLEMENTATION
1936
/*
1937
This should never be called. It is defined here to make sure that the
1938
virtual vector deleting destructor is defined within unistr.cpp.
1939
The vector deleting destructor is already a part of UObject,
1940
but defining it here makes sure that it is included with this object file.
1941
This makes sure that static library dependencies are kept to a minimum.
1942
*/
1943
static void uprv_UnicodeStringDummy(void) {
1944
    delete [] (new UnicodeString[2]);
1945
}
1946
#endif