Coverage Report

Created: 2023-02-22 06:51

/src/icu/source/common/unistr.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
* Copyright (C) 1999-2016, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
******************************************************************************
8
*
9
* File unistr.cpp
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   09/25/98    stephen     Creation.
15
*   04/20/99    stephen     Overhauled per 4/16 code review.
16
*   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17
*   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
18
*                           Replaceable.
19
*   06/25/01    grhoten     Removed the dependency on iostream
20
******************************************************************************
21
*/
22
23
#include "unicode/utypes.h"
24
#include "unicode/appendable.h"
25
#include "unicode/putil.h"
26
#include "cstring.h"
27
#include "cmemory.h"
28
#include "unicode/ustring.h"
29
#include "unicode/unistr.h"
30
#include "unicode/utf.h"
31
#include "unicode/utf16.h"
32
#include "uelement.h"
33
#include "ustr_imp.h"
34
#include "umutex.h"
35
#include "uassert.h"
36
37
#if 0
38
39
#include <iostream>
40
using namespace std;
41
42
//DEBUGGING
43
void
44
print(const UnicodeString& s,
45
      const char *name)
46
{
47
  UChar c;
48
  cout << name << ":|";
49
  for(int i = 0; i < s.length(); ++i) {
50
    c = s[i];
51
    if(c>= 0x007E || c < 0x0020)
52
      cout << "[0x" << hex << s[i] << "]";
53
    else
54
      cout << (char) s[i];
55
  }
56
  cout << '|' << endl;
57
}
58
59
void
60
print(const UChar *s,
61
      int32_t len,
62
      const char *name)
63
{
64
  UChar c;
65
  cout << name << ":|";
66
  for(int i = 0; i < len; ++i) {
67
    c = s[i];
68
    if(c>= 0x007E || c < 0x0020)
69
      cout << "[0x" << hex << s[i] << "]";
70
    else
71
      cout << (char) s[i];
72
  }
73
  cout << '|' << endl;
74
}
75
// END DEBUGGING
76
#endif
77
78
// Local function definitions for now
79
80
// need to copy areas that may overlap
81
static
82
inline void
83
us_arrayCopy(const UChar *src, int32_t srcStart,
84
         UChar *dst, int32_t dstStart, int32_t count)
85
0
{
86
0
  if(count>0) {
87
0
    uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
88
0
  }
89
0
}
90
91
// u_unescapeAt() callback to get a UChar from a UnicodeString
92
U_CDECL_BEGIN
93
static UChar U_CALLCONV
94
0
UnicodeString_charAt(int32_t offset, void *context) {
95
0
    return ((icu::UnicodeString*) context)->charAt(offset);
96
0
}
97
U_CDECL_END
98
99
U_NAMESPACE_BEGIN
100
101
/* The Replaceable virtual destructor can't be defined in the header
102
   due to how AIX works with multiple definitions of virtual functions.
103
*/
104
0
Replaceable::~Replaceable() {}
105
106
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
107
108
UnicodeString U_EXPORT2
109
0
operator+ (const UnicodeString &s1, const UnicodeString &s2) {
110
0
    return
111
0
        UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
112
0
            append(s1).
113
0
                append(s2);
114
0
}
115
116
//========================================
117
// Reference Counting functions, put at top of file so that optimizing compilers
118
//                               have a chance to automatically inline.
119
//========================================
120
121
void
122
0
UnicodeString::addRef() {
123
0
  umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
124
0
}
125
126
int32_t
127
0
UnicodeString::removeRef() {
128
0
  return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
129
0
}
130
131
int32_t
132
0
UnicodeString::refCount() const {
133
0
  return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
134
0
}
135
136
void
137
0
UnicodeString::releaseArray() {
138
0
  if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
139
0
    uprv_free((int32_t *)fUnion.fFields.fArray - 1);
140
0
  }
141
0
}
142
143
144
145
//========================================
146
// Constructors
147
//========================================
148
149
// The default constructor is inline in unistr.h.
150
151
0
UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
152
0
  fUnion.fFields.fLengthAndFlags = 0;
153
0
  if(count <= 0 || (uint32_t)c > 0x10ffff) {
154
    // just allocate and do not do anything else
155
0
    allocate(capacity);
156
0
  } else if(c <= 0xffff) {
157
0
    int32_t length = count;
158
0
    if(capacity < length) {
159
0
      capacity = length;
160
0
    }
161
0
    if(allocate(capacity)) {
162
0
      UChar *array = getArrayStart();
163
0
      UChar unit = (UChar)c;
164
0
      for(int32_t i = 0; i < length; ++i) {
165
0
        array[i] = unit;
166
0
      }
167
0
      setLength(length);
168
0
    }
169
0
  } else {  // supplementary code point, write surrogate pairs
170
0
    if(count > (INT32_MAX / 2)) {
171
      // We would get more than 2G UChars.
172
0
      allocate(capacity);
173
0
      return;
174
0
    }
175
0
    int32_t length = count * 2;
176
0
    if(capacity < length) {
177
0
      capacity = length;
178
0
    }
179
0
    if(allocate(capacity)) {
180
0
      UChar *array = getArrayStart();
181
0
      UChar lead = U16_LEAD(c);
182
0
      UChar trail = U16_TRAIL(c);
183
0
      for(int32_t i = 0; i < length; i += 2) {
184
0
        array[i] = lead;
185
0
        array[i + 1] = trail;
186
0
      }
187
0
      setLength(length);
188
0
    }
189
0
  }
190
0
}
191
192
0
UnicodeString::UnicodeString(UChar ch) {
193
0
  fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
194
0
  fUnion.fStackFields.fBuffer[0] = ch;
195
0
}
196
197
0
UnicodeString::UnicodeString(UChar32 ch) {
198
0
  fUnion.fFields.fLengthAndFlags = kShortString;
199
0
  int32_t i = 0;
200
0
  UBool isError = FALSE;
201
0
  U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
202
  // We test isError so that the compiler does not complain that we don't.
203
  // If isError then i==0 which is what we want anyway.
204
0
  if(!isError) {
205
0
    setShortLength(i);
206
0
  }
207
0
}
208
209
0
UnicodeString::UnicodeString(const UChar *text) {
210
0
  fUnion.fFields.fLengthAndFlags = kShortString;
211
0
  doAppend(text, 0, -1);
212
0
}
213
214
UnicodeString::UnicodeString(const UChar *text,
215
0
                             int32_t textLength) {
216
0
  fUnion.fFields.fLengthAndFlags = kShortString;
217
0
  doAppend(text, 0, textLength);
218
0
}
219
220
UnicodeString::UnicodeString(UBool isTerminated,
221
                             ConstChar16Ptr textPtr,
222
0
                             int32_t textLength) {
223
0
  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
224
0
  const UChar *text = textPtr;
225
0
  if(text == NULL) {
226
    // treat as an empty string, do not alias
227
0
    setToEmpty();
228
0
  } else if(textLength < -1 ||
229
0
            (textLength == -1 && !isTerminated) ||
230
0
            (textLength >= 0 && isTerminated && text[textLength] != 0)
231
0
  ) {
232
0
    setToBogus();
233
0
  } else {
234
0
    if(textLength == -1) {
235
      // text is terminated, or else it would have failed the above test
236
0
      textLength = u_strlen(text);
237
0
    }
238
0
    setArray(const_cast<UChar *>(text), textLength,
239
0
             isTerminated ? textLength + 1 : textLength);
240
0
  }
241
0
}
242
243
UnicodeString::UnicodeString(UChar *buff,
244
                             int32_t buffLength,
245
0
                             int32_t buffCapacity) {
246
0
  fUnion.fFields.fLengthAndFlags = kWritableAlias;
247
0
  if(buff == NULL) {
248
    // treat as an empty string, do not alias
249
0
    setToEmpty();
250
0
  } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
251
0
    setToBogus();
252
0
  } else {
253
0
    if(buffLength == -1) {
254
      // fLength = u_strlen(buff); but do not look beyond buffCapacity
255
0
      const UChar *p = buff, *limit = buff + buffCapacity;
256
0
      while(p != limit && *p != 0) {
257
0
        ++p;
258
0
      }
259
0
      buffLength = (int32_t)(p - buff);
260
0
    }
261
0
    setArray(buff, buffLength, buffCapacity);
262
0
  }
263
0
}
264
265
0
UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
266
0
  fUnion.fFields.fLengthAndFlags = kShortString;
267
0
  if(src==NULL) {
268
    // treat as an empty string
269
0
  } else {
270
0
    if(length<0) {
271
0
      length=(int32_t)uprv_strlen(src);
272
0
    }
273
0
    if(cloneArrayIfNeeded(length, length, FALSE)) {
274
0
      u_charsToUChars(src, getArrayStart(), length);
275
0
      setLength(length);
276
0
    } else {
277
0
      setToBogus();
278
0
    }
279
0
  }
280
0
}
281
282
#if U_CHARSET_IS_UTF8
283
284
0
UnicodeString::UnicodeString(const char *codepageData) {
285
0
  fUnion.fFields.fLengthAndFlags = kShortString;
286
0
  if(codepageData != 0) {
287
0
    setToUTF8(codepageData);
288
0
  }
289
0
}
290
291
0
UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
292
0
  fUnion.fFields.fLengthAndFlags = kShortString;
293
  // if there's nothing to convert, do nothing
294
0
  if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
295
0
    return;
296
0
  }
297
0
  if(dataLength == -1) {
298
0
    dataLength = (int32_t)uprv_strlen(codepageData);
299
0
  }
300
0
  setToUTF8(StringPiece(codepageData, dataLength));
301
0
}
302
303
// else see unistr_cnv.cpp
304
#endif
305
306
0
UnicodeString::UnicodeString(const UnicodeString& that) {
307
0
  fUnion.fFields.fLengthAndFlags = kShortString;
308
0
  copyFrom(that);
309
0
}
310
311
0
UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
312
0
  copyFieldsFrom(src, TRUE);
313
0
}
314
315
UnicodeString::UnicodeString(const UnicodeString& that,
316
0
                             int32_t srcStart) {
317
0
  fUnion.fFields.fLengthAndFlags = kShortString;
318
0
  setTo(that, srcStart);
319
0
}
320
321
UnicodeString::UnicodeString(const UnicodeString& that,
322
                             int32_t srcStart,
323
0
                             int32_t srcLength) {
324
0
  fUnion.fFields.fLengthAndFlags = kShortString;
325
0
  setTo(that, srcStart, srcLength);
326
0
}
327
328
// Replaceable base class clone() default implementation, does not clone
329
Replaceable *
330
0
Replaceable::clone() const {
331
0
  return NULL;
332
0
}
333
334
// UnicodeString overrides clone() with a real implementation
335
UnicodeString *
336
0
UnicodeString::clone() const {
337
0
  return new UnicodeString(*this);
338
0
}
339
340
//========================================
341
// array allocation
342
//========================================
343
344
namespace {
345
346
const int32_t kGrowSize = 128;
347
348
// The number of bytes for one int32_t reference counter and capacity UChars
349
// must fit into a 32-bit size_t (at least when on a 32-bit platform).
350
// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
351
// and round up to a multiple of 16 bytes.
352
// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
353
// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
354
// but that does not seem worth it.)
355
const int32_t kMaxCapacity = 0x7ffffff5;
356
357
0
int32_t getGrowCapacity(int32_t newLength) {
358
0
  int32_t growSize = (newLength >> 2) + kGrowSize;
359
0
  if(growSize <= (kMaxCapacity - newLength)) {
360
0
    return newLength + growSize;
361
0
  } else {
362
0
    return kMaxCapacity;
363
0
  }
364
0
}
365
366
}  // namespace
367
368
UBool
369
0
UnicodeString::allocate(int32_t capacity) {
370
0
  if(capacity <= US_STACKBUF_SIZE) {
371
0
    fUnion.fFields.fLengthAndFlags = kShortString;
372
0
    return TRUE;
373
0
  }
374
0
  if(capacity <= kMaxCapacity) {
375
0
    ++capacity;  // for the NUL
376
    // Switch to size_t which is unsigned so that we can allocate up to 4GB.
377
    // Reference counter + UChars.
378
0
    size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
379
    // Round up to a multiple of 16.
380
0
    numBytes = (numBytes + 15) & ~15;
381
0
    int32_t *array = (int32_t *) uprv_malloc(numBytes);
382
0
    if(array != NULL) {
383
      // set initial refCount and point behind the refCount
384
0
      *array++ = 1;
385
0
      numBytes -= sizeof(int32_t);
386
387
      // have fArray point to the first UChar
388
0
      fUnion.fFields.fArray = (UChar *)array;
389
0
      fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
390
0
      fUnion.fFields.fLengthAndFlags = kLongString;
391
0
      return TRUE;
392
0
    }
393
0
  }
394
0
  fUnion.fFields.fLengthAndFlags = kIsBogus;
395
0
  fUnion.fFields.fArray = 0;
396
0
  fUnion.fFields.fCapacity = 0;
397
0
  return FALSE;
398
0
}
399
400
//========================================
401
// Destructor
402
//========================================
403
404
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
405
static u_atomic_int32_t finalLengthCounts[0x400];  // UnicodeString::kMaxShortLength+1
406
static u_atomic_int32_t beyondCount(0);
407
408
U_CAPI void unistr_printLengths() {
409
  int32_t i;
410
  for(i = 0; i <= 59; ++i) {
411
    printf("%2d,  %9d\n", i, (int32_t)finalLengthCounts[i]);
412
  }
413
  int32_t beyond = beyondCount;
414
  for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
415
    beyond += finalLengthCounts[i];
416
  }
417
  printf(">59, %9d\n", beyond);
418
}
419
#endif
420
421
UnicodeString::~UnicodeString()
422
0
{
423
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
424
  // Count lengths of strings at the end of their lifetime.
425
  // Useful for discussion of a desirable stack buffer size.
426
  // Count the contents length, not the optional NUL terminator nor further capacity.
427
  // Ignore open-buffer strings and strings which alias external storage.
428
  if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
429
    if(hasShortLength()) {
430
      umtx_atomic_inc(finalLengthCounts + getShortLength());
431
    } else {
432
      umtx_atomic_inc(&beyondCount);
433
    }
434
  }
435
#endif
436
437
0
  releaseArray();
438
0
}
439
440
//========================================
441
// Factory methods
442
//========================================
443
444
0
UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
445
0
  UnicodeString result;
446
0
  result.setToUTF8(utf8);
447
0
  return result;
448
0
}
449
450
0
UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
451
0
  UnicodeString result;
452
0
  int32_t capacity;
453
  // Most UTF-32 strings will be BMP-only and result in a same-length
454
  // UTF-16 string. We overestimate the capacity just slightly,
455
  // just in case there are a few supplementary characters.
456
0
  if(length <= US_STACKBUF_SIZE) {
457
0
    capacity = US_STACKBUF_SIZE;
458
0
  } else {
459
0
    capacity = length + (length >> 4) + 4;
460
0
  }
461
0
  do {
462
0
    UChar *utf16 = result.getBuffer(capacity);
463
0
    int32_t length16;
464
0
    UErrorCode errorCode = U_ZERO_ERROR;
465
0
    u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
466
0
        utf32, length,
467
0
        0xfffd,  // Substitution character.
468
0
        NULL,    // Don't care about number of substitutions.
469
0
        &errorCode);
470
0
    result.releaseBuffer(length16);
471
0
    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
472
0
      capacity = length16 + 1;  // +1 for the terminating NUL.
473
0
      continue;
474
0
    } else if(U_FAILURE(errorCode)) {
475
0
      result.setToBogus();
476
0
    }
477
0
    break;
478
0
  } while(TRUE);
479
0
  return result;
480
0
}
481
482
//========================================
483
// Assignment
484
//========================================
485
486
UnicodeString &
487
0
UnicodeString::operator=(const UnicodeString &src) {
488
0
  return copyFrom(src);
489
0
}
490
491
UnicodeString &
492
0
UnicodeString::fastCopyFrom(const UnicodeString &src) {
493
0
  return copyFrom(src, TRUE);
494
0
}
495
496
UnicodeString &
497
0
UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
498
  // if assigning to ourselves, do nothing
499
0
  if(this == &src) {
500
0
    return *this;
501
0
  }
502
503
  // is the right side bogus?
504
0
  if(src.isBogus()) {
505
0
    setToBogus();
506
0
    return *this;
507
0
  }
508
509
  // delete the current contents
510
0
  releaseArray();
511
512
0
  if(src.isEmpty()) {
513
    // empty string - use the stack buffer
514
0
    setToEmpty();
515
0
    return *this;
516
0
  }
517
518
  // fLength>0 and not an "open" src.getBuffer(minCapacity)
519
0
  fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
520
0
  switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
521
0
  case kShortString:
522
    // short string using the stack buffer, do the same
523
0
    uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
524
0
                getShortLength() * U_SIZEOF_UCHAR);
525
0
    break;
526
0
  case kLongString:
527
    // src uses a refCounted string buffer, use that buffer with refCount
528
    // src is const, use a cast - we don't actually change it
529
0
    ((UnicodeString &)src).addRef();
530
    // copy all fields, share the reference-counted buffer
531
0
    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
532
0
    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
533
0
    if(!hasShortLength()) {
534
0
      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
535
0
    }
536
0
    break;
537
0
  case kReadonlyAlias:
538
0
    if(fastCopy) {
539
      // src is a readonly alias, do the same
540
      // -> maintain the readonly alias as such
541
0
      fUnion.fFields.fArray = src.fUnion.fFields.fArray;
542
0
      fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
543
0
      if(!hasShortLength()) {
544
0
        fUnion.fFields.fLength = src.fUnion.fFields.fLength;
545
0
      }
546
0
      break;
547
0
    }
548
    // else if(!fastCopy) fall through to case kWritableAlias
549
    // -> allocate a new buffer and copy the contents
550
0
    U_FALLTHROUGH;
551
0
  case kWritableAlias: {
552
    // src is a writable alias; we make a copy of that instead
553
0
    int32_t srcLength = src.length();
554
0
    if(allocate(srcLength)) {
555
0
      u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
556
0
      setLength(srcLength);
557
0
      break;
558
0
    }
559
    // if there is not enough memory, then fall through to setting to bogus
560
0
    U_FALLTHROUGH;
561
0
  }
562
0
  default:
563
    // if src is bogus, set ourselves to bogus
564
    // do not call setToBogus() here because fArray and flags are not consistent here
565
0
    fUnion.fFields.fLengthAndFlags = kIsBogus;
566
0
    fUnion.fFields.fArray = 0;
567
0
    fUnion.fFields.fCapacity = 0;
568
0
    break;
569
0
  }
570
571
0
  return *this;
572
0
}
573
574
0
UnicodeString &UnicodeString::operator=(UnicodeString &&src) U_NOEXCEPT {
575
  // No explicit check for self move assignment, consistent with standard library.
576
  // Self move assignment causes no crash nor leak but might make the object bogus.
577
0
  releaseArray();
578
0
  copyFieldsFrom(src, TRUE);
579
0
  return *this;
580
0
}
581
582
// Same as move assignment except without memory management.
583
0
void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
584
0
  int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
585
0
  if(lengthAndFlags & kUsingStackBuffer) {
586
    // Short string using the stack buffer, copy the contents.
587
    // Check for self assignment to prevent "overlap in memcpy" warnings,
588
    // although it should be harmless to copy a buffer to itself exactly.
589
0
    if(this != &src) {
590
0
      uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
591
0
                  getShortLength() * U_SIZEOF_UCHAR);
592
0
    }
593
0
  } else {
594
    // In all other cases, copy all fields.
595
0
    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
596
0
    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
597
0
    if(!hasShortLength()) {
598
0
      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
599
0
    }
600
0
    if(setSrcToBogus) {
601
      // Set src to bogus without releasing any memory.
602
0
      src.fUnion.fFields.fLengthAndFlags = kIsBogus;
603
0
      src.fUnion.fFields.fArray = NULL;
604
0
      src.fUnion.fFields.fCapacity = 0;
605
0
    }
606
0
  }
607
0
}
608
609
0
void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
610
0
  UnicodeString temp;  // Empty short string: Known not to need releaseArray().
611
  // Copy fields without resetting source values in between.
612
0
  temp.copyFieldsFrom(*this, FALSE);
613
0
  this->copyFieldsFrom(other, FALSE);
614
0
  other.copyFieldsFrom(temp, FALSE);
615
  // Set temp to an empty string so that other's memory is not released twice.
616
0
  temp.fUnion.fFields.fLengthAndFlags = kShortString;
617
0
}
618
619
//========================================
620
// Miscellaneous operations
621
//========================================
622
623
0
UnicodeString UnicodeString::unescape() const {
624
0
    UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
625
0
    if (result.isBogus()) {
626
0
        return result;
627
0
    }
628
0
    const UChar *array = getBuffer();
629
0
    int32_t len = length();
630
0
    int32_t prev = 0;
631
0
    for (int32_t i=0;;) {
632
0
        if (i == len) {
633
0
            result.append(array, prev, len - prev);
634
0
            break;
635
0
        }
636
0
        if (array[i++] == 0x5C /*'\\'*/) {
637
0
            result.append(array, prev, (i - 1) - prev);
638
0
            UChar32 c = unescapeAt(i); // advances i
639
0
            if (c < 0) {
640
0
                result.remove(); // return empty string
641
0
                break; // invalid escape sequence
642
0
            }
643
0
            result.append(c);
644
0
            prev = i;
645
0
        }
646
0
    }
647
0
    return result;
648
0
}
649
650
0
UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
651
0
    return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
652
0
}
653
654
//========================================
655
// Read-only implementation
656
//========================================
657
UBool
658
0
UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
659
  // Requires: this & text not bogus and have same lengths.
660
  // Byte-wise comparison works for equality regardless of endianness.
661
0
  return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
662
0
}
663
664
int8_t
665
UnicodeString::doCompare( int32_t start,
666
              int32_t length,
667
              const UChar *srcChars,
668
              int32_t srcStart,
669
              int32_t srcLength) const
670
0
{
671
  // compare illegal string values
672
0
  if(isBogus()) {
673
0
    return -1;
674
0
  }
675
  
676
  // pin indices to legal values
677
0
  pinIndices(start, length);
678
679
0
  if(srcChars == NULL) {
680
    // treat const UChar *srcChars==NULL as an empty string
681
0
    return length == 0 ? 0 : 1;
682
0
  }
683
684
  // get the correct pointer
685
0
  const UChar *chars = getArrayStart();
686
687
0
  chars += start;
688
0
  srcChars += srcStart;
689
690
0
  int32_t minLength;
691
0
  int8_t lengthResult;
692
693
  // get the srcLength if necessary
694
0
  if(srcLength < 0) {
695
0
    srcLength = u_strlen(srcChars + srcStart);
696
0
  }
697
698
  // are we comparing different lengths?
699
0
  if(length != srcLength) {
700
0
    if(length < srcLength) {
701
0
      minLength = length;
702
0
      lengthResult = -1;
703
0
    } else {
704
0
      minLength = srcLength;
705
0
      lengthResult = 1;
706
0
    }
707
0
  } else {
708
0
    minLength = length;
709
0
    lengthResult = 0;
710
0
  }
711
712
  /*
713
   * note that uprv_memcmp() returns an int but we return an int8_t;
714
   * we need to take care not to truncate the result -
715
   * one way to do this is to right-shift the value to
716
   * move the sign bit into the lower 8 bits and making sure that this
717
   * does not become 0 itself
718
   */
719
720
0
  if(minLength > 0 && chars != srcChars) {
721
0
    int32_t result;
722
723
#   if U_IS_BIG_ENDIAN 
724
      // big-endian: byte comparison works
725
      result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
726
      if(result != 0) {
727
        return (int8_t)(result >> 15 | 1);
728
      }
729
#   else
730
      // little-endian: compare UChar units
731
0
      do {
732
0
        result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
733
0
        if(result != 0) {
734
0
          return (int8_t)(result >> 15 | 1);
735
0
        }
736
0
      } while(--minLength > 0);
737
0
#   endif
738
0
  }
739
0
  return lengthResult;
740
0
}
741
742
/* String compare in code point order - doCompare() compares in code unit order. */
743
int8_t
744
UnicodeString::doCompareCodePointOrder(int32_t start,
745
                                       int32_t length,
746
                                       const UChar *srcChars,
747
                                       int32_t srcStart,
748
                                       int32_t srcLength) const
749
0
{
750
  // compare illegal string values
751
  // treat const UChar *srcChars==NULL as an empty string
752
0
  if(isBogus()) {
753
0
    return -1;
754
0
  }
755
756
  // pin indices to legal values
757
0
  pinIndices(start, length);
758
759
0
  if(srcChars == NULL) {
760
0
    srcStart = srcLength = 0;
761
0
  }
762
763
0
  int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
764
  /* translate the 32-bit result into an 8-bit one */
765
0
  if(diff!=0) {
766
0
    return (int8_t)(diff >> 15 | 1);
767
0
  } else {
768
0
    return 0;
769
0
  }
770
0
}
771
772
int32_t
773
0
UnicodeString::getLength() const {
774
0
    return length();
775
0
}
776
777
UChar
778
0
UnicodeString::getCharAt(int32_t offset) const {
779
0
  return charAt(offset);
780
0
}
781
782
UChar32
783
0
UnicodeString::getChar32At(int32_t offset) const {
784
0
  return char32At(offset);
785
0
}
786
787
UChar32
788
UnicodeString::char32At(int32_t offset) const
789
0
{
790
0
  int32_t len = length();
791
0
  if((uint32_t)offset < (uint32_t)len) {
792
0
    const UChar *array = getArrayStart();
793
0
    UChar32 c;
794
0
    U16_GET(array, 0, offset, len, c);
795
0
    return c;
796
0
  } else {
797
0
    return kInvalidUChar;
798
0
  }
799
0
}
800
801
int32_t
802
0
UnicodeString::getChar32Start(int32_t offset) const {
803
0
  if((uint32_t)offset < (uint32_t)length()) {
804
0
    const UChar *array = getArrayStart();
805
0
    U16_SET_CP_START(array, 0, offset);
806
0
    return offset;
807
0
  } else {
808
0
    return 0;
809
0
  }
810
0
}
811
812
int32_t
813
0
UnicodeString::getChar32Limit(int32_t offset) const {
814
0
  int32_t len = length();
815
0
  if((uint32_t)offset < (uint32_t)len) {
816
0
    const UChar *array = getArrayStart();
817
0
    U16_SET_CP_LIMIT(array, 0, offset, len);
818
0
    return offset;
819
0
  } else {
820
0
    return len;
821
0
  }
822
0
}
823
824
int32_t
825
0
UnicodeString::countChar32(int32_t start, int32_t length) const {
826
0
  pinIndices(start, length);
827
  // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
828
0
  return u_countChar32(getArrayStart()+start, length);
829
0
}
830
831
UBool
832
0
UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
833
0
  pinIndices(start, length);
834
  // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
835
0
  return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
836
0
}
837
838
int32_t
839
0
UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
840
  // pin index
841
0
  int32_t len = length();
842
0
  if(index<0) {
843
0
    index=0;
844
0
  } else if(index>len) {
845
0
    index=len;
846
0
  }
847
848
0
  const UChar *array = getArrayStart();
849
0
  if(delta>0) {
850
0
    U16_FWD_N(array, index, len, delta);
851
0
  } else {
852
0
    U16_BACK_N(array, 0, index, -delta);
853
0
  }
854
855
0
  return index;
856
0
}
857
858
void
859
UnicodeString::doExtract(int32_t start,
860
             int32_t length,
861
             UChar *dst,
862
             int32_t dstStart) const
863
0
{
864
  // pin indices to legal values
865
0
  pinIndices(start, length);
866
867
  // do not copy anything if we alias dst itself
868
0
  const UChar *array = getArrayStart();
869
0
  if(array + start != dst + dstStart) {
870
0
    us_arrayCopy(array, start, dst, dstStart, length);
871
0
  }
872
0
}
873
874
int32_t
875
UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
876
0
                       UErrorCode &errorCode) const {
877
0
  int32_t len = length();
878
0
  if(U_SUCCESS(errorCode)) {
879
0
    if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
880
0
      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
881
0
    } else {
882
0
      const UChar *array = getArrayStart();
883
0
      if(len>0 && len<=destCapacity && array!=dest) {
884
0
        u_memcpy(dest, array, len);
885
0
      }
886
0
      return u_terminateUChars(dest, destCapacity, len, &errorCode);
887
0
    }
888
0
  }
889
890
0
  return len;
891
0
}
892
893
int32_t
894
UnicodeString::extract(int32_t start,
895
                       int32_t length,
896
                       char *target,
897
                       int32_t targetCapacity,
898
                       enum EInvariant) const
899
0
{
900
  // if the arguments are illegal, then do nothing
901
0
  if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
902
0
    return 0;
903
0
  }
904
905
  // pin the indices to legal values
906
0
  pinIndices(start, length);
907
908
0
  if(length <= targetCapacity) {
909
0
    u_UCharsToChars(getArrayStart() + start, target, length);
910
0
  }
911
0
  UErrorCode status = U_ZERO_ERROR;
912
0
  return u_terminateChars(target, targetCapacity, length, &status);
913
0
}
914
915
UnicodeString
916
0
UnicodeString::tempSubString(int32_t start, int32_t len) const {
917
0
  pinIndices(start, len);
918
0
  const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
919
0
  if(array==NULL) {
920
0
    array=fUnion.fStackFields.fBuffer;  // anything not NULL because that would make an empty string
921
0
    len=-2;  // bogus result string
922
0
  }
923
0
  return UnicodeString(FALSE, array + start, len);
924
0
}
925
926
int32_t
927
UnicodeString::toUTF8(int32_t start, int32_t len,
928
0
                      char *target, int32_t capacity) const {
929
0
  pinIndices(start, len);
930
0
  int32_t length8;
931
0
  UErrorCode errorCode = U_ZERO_ERROR;
932
0
  u_strToUTF8WithSub(target, capacity, &length8,
933
0
                     getBuffer() + start, len,
934
0
                     0xFFFD,  // Standard substitution character.
935
0
                     NULL,    // Don't care about number of substitutions.
936
0
                     &errorCode);
937
0
  return length8;
938
0
}
939
940
#if U_CHARSET_IS_UTF8
941
942
int32_t
943
UnicodeString::extract(int32_t start, int32_t len,
944
0
                       char *target, uint32_t dstSize) const {
945
  // if the arguments are illegal, then do nothing
946
0
  if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
947
0
    return 0;
948
0
  }
949
0
  return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
950
0
}
951
952
// else see unistr_cnv.cpp
953
#endif
954
955
void 
956
UnicodeString::extractBetween(int32_t start,
957
                  int32_t limit,
958
0
                  UnicodeString& target) const {
959
0
  pinIndex(start);
960
0
  pinIndex(limit);
961
0
  doExtract(start, limit - start, target);
962
0
}
963
964
// When converting from UTF-16 to UTF-8, the result will have at most 3 times
965
// as many bytes as the source has UChars.
966
// The "worst cases" are writing systems like Indic, Thai and CJK with
967
// 3:1 bytes:UChars.
968
void
969
0
UnicodeString::toUTF8(ByteSink &sink) const {
970
0
  int32_t length16 = length();
971
0
  if(length16 != 0) {
972
0
    char stackBuffer[1024];
973
0
    int32_t capacity = (int32_t)sizeof(stackBuffer);
974
0
    UBool utf8IsOwned = FALSE;
975
0
    char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
976
0
                                      3*length16,
977
0
                                      stackBuffer, capacity,
978
0
                                      &capacity);
979
0
    int32_t length8 = 0;
980
0
    UErrorCode errorCode = U_ZERO_ERROR;
981
0
    u_strToUTF8WithSub(utf8, capacity, &length8,
982
0
                       getBuffer(), length16,
983
0
                       0xFFFD,  // Standard substitution character.
984
0
                       NULL,    // Don't care about number of substitutions.
985
0
                       &errorCode);
986
0
    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
987
0
      utf8 = (char *)uprv_malloc(length8);
988
0
      if(utf8 != NULL) {
989
0
        utf8IsOwned = TRUE;
990
0
        errorCode = U_ZERO_ERROR;
991
0
        u_strToUTF8WithSub(utf8, length8, &length8,
992
0
                           getBuffer(), length16,
993
0
                           0xFFFD,  // Standard substitution character.
994
0
                           NULL,    // Don't care about number of substitutions.
995
0
                           &errorCode);
996
0
      } else {
997
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
998
0
      }
999
0
    }
1000
0
    if(U_SUCCESS(errorCode)) {
1001
0
      sink.Append(utf8, length8);
1002
0
      sink.Flush();
1003
0
    }
1004
0
    if(utf8IsOwned) {
1005
0
      uprv_free(utf8);
1006
0
    }
1007
0
  }
1008
0
}
1009
1010
int32_t
1011
0
UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
1012
0
  int32_t length32=0;
1013
0
  if(U_SUCCESS(errorCode)) {
1014
    // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1015
0
    u_strToUTF32WithSub(utf32, capacity, &length32,
1016
0
        getBuffer(), length(),
1017
0
        0xfffd,  // Substitution character.
1018
0
        NULL,    // Don't care about number of substitutions.
1019
0
        &errorCode);
1020
0
  }
1021
0
  return length32;
1022
0
}
1023
1024
int32_t 
1025
UnicodeString::indexOf(const UChar *srcChars,
1026
               int32_t srcStart,
1027
               int32_t srcLength,
1028
               int32_t start,
1029
               int32_t length) const
1030
0
{
1031
0
  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1032
0
    return -1;
1033
0
  }
1034
1035
  // UnicodeString does not find empty substrings
1036
0
  if(srcLength < 0 && srcChars[srcStart] == 0) {
1037
0
    return -1;
1038
0
  }
1039
1040
  // get the indices within bounds
1041
0
  pinIndices(start, length);
1042
1043
  // find the first occurrence of the substring
1044
0
  const UChar *array = getArrayStart();
1045
0
  const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
1046
0
  if(match == NULL) {
1047
0
    return -1;
1048
0
  } else {
1049
0
    return (int32_t)(match - array);
1050
0
  }
1051
0
}
1052
1053
int32_t
1054
UnicodeString::doIndexOf(UChar c,
1055
             int32_t start,
1056
             int32_t length) const
1057
0
{
1058
  // pin indices
1059
0
  pinIndices(start, length);
1060
1061
  // find the first occurrence of c
1062
0
  const UChar *array = getArrayStart();
1063
0
  const UChar *match = u_memchr(array + start, c, length);
1064
0
  if(match == NULL) {
1065
0
    return -1;
1066
0
  } else {
1067
0
    return (int32_t)(match - array);
1068
0
  }
1069
0
}
1070
1071
int32_t
1072
UnicodeString::doIndexOf(UChar32 c,
1073
                         int32_t start,
1074
0
                         int32_t length) const {
1075
  // pin indices
1076
0
  pinIndices(start, length);
1077
1078
  // find the first occurrence of c
1079
0
  const UChar *array = getArrayStart();
1080
0
  const UChar *match = u_memchr32(array + start, c, length);
1081
0
  if(match == NULL) {
1082
0
    return -1;
1083
0
  } else {
1084
0
    return (int32_t)(match - array);
1085
0
  }
1086
0
}
1087
1088
int32_t 
1089
UnicodeString::lastIndexOf(const UChar *srcChars,
1090
               int32_t srcStart,
1091
               int32_t srcLength,
1092
               int32_t start,
1093
               int32_t length) const
1094
0
{
1095
0
  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1096
0
    return -1;
1097
0
  }
1098
1099
  // UnicodeString does not find empty substrings
1100
0
  if(srcLength < 0 && srcChars[srcStart] == 0) {
1101
0
    return -1;
1102
0
  }
1103
1104
  // get the indices within bounds
1105
0
  pinIndices(start, length);
1106
1107
  // find the last occurrence of the substring
1108
0
  const UChar *array = getArrayStart();
1109
0
  const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1110
0
  if(match == NULL) {
1111
0
    return -1;
1112
0
  } else {
1113
0
    return (int32_t)(match - array);
1114
0
  }
1115
0
}
1116
1117
int32_t
1118
UnicodeString::doLastIndexOf(UChar c,
1119
                 int32_t start,
1120
                 int32_t length) const
1121
0
{
1122
0
  if(isBogus()) {
1123
0
    return -1;
1124
0
  }
1125
1126
  // pin indices
1127
0
  pinIndices(start, length);
1128
1129
  // find the last occurrence of c
1130
0
  const UChar *array = getArrayStart();
1131
0
  const UChar *match = u_memrchr(array + start, c, length);
1132
0
  if(match == NULL) {
1133
0
    return -1;
1134
0
  } else {
1135
0
    return (int32_t)(match - array);
1136
0
  }
1137
0
}
1138
1139
int32_t
1140
UnicodeString::doLastIndexOf(UChar32 c,
1141
                             int32_t start,
1142
0
                             int32_t length) const {
1143
  // pin indices
1144
0
  pinIndices(start, length);
1145
1146
  // find the last occurrence of c
1147
0
  const UChar *array = getArrayStart();
1148
0
  const UChar *match = u_memrchr32(array + start, c, length);
1149
0
  if(match == NULL) {
1150
0
    return -1;
1151
0
  } else {
1152
0
    return (int32_t)(match - array);
1153
0
  }
1154
0
}
1155
1156
//========================================
1157
// Write implementation
1158
//========================================
1159
1160
UnicodeString& 
1161
UnicodeString::findAndReplace(int32_t start,
1162
                  int32_t length,
1163
                  const UnicodeString& oldText,
1164
                  int32_t oldStart,
1165
                  int32_t oldLength,
1166
                  const UnicodeString& newText,
1167
                  int32_t newStart,
1168
                  int32_t newLength)
1169
0
{
1170
0
  if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1171
0
    return *this;
1172
0
  }
1173
1174
0
  pinIndices(start, length);
1175
0
  oldText.pinIndices(oldStart, oldLength);
1176
0
  newText.pinIndices(newStart, newLength);
1177
1178
0
  if(oldLength == 0) {
1179
0
    return *this;
1180
0
  }
1181
1182
0
  while(length > 0 && length >= oldLength) {
1183
0
    int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1184
0
    if(pos < 0) {
1185
      // no more oldText's here: done
1186
0
      break;
1187
0
    } else {
1188
      // we found oldText, replace it by newText and go beyond it
1189
0
      replace(pos, oldLength, newText, newStart, newLength);
1190
0
      length -= pos + oldLength - start;
1191
0
      start = pos + newLength;
1192
0
    }
1193
0
  }
1194
1195
0
  return *this;
1196
0
}
1197
1198
1199
void
1200
UnicodeString::setToBogus()
1201
0
{
1202
0
  releaseArray();
1203
1204
0
  fUnion.fFields.fLengthAndFlags = kIsBogus;
1205
0
  fUnion.fFields.fArray = 0;
1206
0
  fUnion.fFields.fCapacity = 0;
1207
0
}
1208
1209
// turn a bogus string into an empty one
1210
void
1211
0
UnicodeString::unBogus() {
1212
0
  if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
1213
0
    setToEmpty();
1214
0
  }
1215
0
}
1216
1217
const char16_t *
1218
0
UnicodeString::getTerminatedBuffer() {
1219
0
  if(!isWritable()) {
1220
0
    return nullptr;
1221
0
  }
1222
0
  UChar *array = getArrayStart();
1223
0
  int32_t len = length();
1224
0
  if(len < getCapacity()) {
1225
0
    if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
1226
      // If len<capacity on a read-only alias, then array[len] is
1227
      // either the original NUL (if constructed with (TRUE, s, length))
1228
      // or one of the original string contents characters (if later truncated),
1229
      // therefore we can assume that array[len] is initialized memory.
1230
0
      if(array[len] == 0) {
1231
0
        return array;
1232
0
      }
1233
0
    } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
1234
      // kRefCounted: Do not write the NUL if the buffer is shared.
1235
      // That is mostly safe, except when the length of one copy was modified
1236
      // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1237
      // Then the NUL would be written into the middle of another copy's string.
1238
1239
      // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1240
      // Do not test if there is a NUL already because it might be uninitialized memory.
1241
      // (That would be safe, but tools like valgrind & Purify would complain.)
1242
0
      array[len] = 0;
1243
0
      return array;
1244
0
    }
1245
0
  }
1246
0
  if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
1247
0
    array = getArrayStart();
1248
0
    array[len] = 0;
1249
0
    return array;
1250
0
  } else {
1251
0
    return nullptr;
1252
0
  }
1253
0
}
1254
1255
// setTo() analogous to the readonly-aliasing constructor with the same signature
1256
UnicodeString &
1257
UnicodeString::setTo(UBool isTerminated,
1258
                     ConstChar16Ptr textPtr,
1259
                     int32_t textLength)
1260
0
{
1261
0
  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1262
    // do not modify a string that has an "open" getBuffer(minCapacity)
1263
0
    return *this;
1264
0
  }
1265
1266
0
  const UChar *text = textPtr;
1267
0
  if(text == NULL) {
1268
    // treat as an empty string, do not alias
1269
0
    releaseArray();
1270
0
    setToEmpty();
1271
0
    return *this;
1272
0
  }
1273
1274
0
  if( textLength < -1 ||
1275
0
      (textLength == -1 && !isTerminated) ||
1276
0
      (textLength >= 0 && isTerminated && text[textLength] != 0)
1277
0
  ) {
1278
0
    setToBogus();
1279
0
    return *this;
1280
0
  }
1281
1282
0
  releaseArray();
1283
1284
0
  if(textLength == -1) {
1285
    // text is terminated, or else it would have failed the above test
1286
0
    textLength = u_strlen(text);
1287
0
  }
1288
0
  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
1289
0
  setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1290
0
  return *this;
1291
0
}
1292
1293
// setTo() analogous to the writable-aliasing constructor with the same signature
1294
UnicodeString &
1295
UnicodeString::setTo(UChar *buffer,
1296
                     int32_t buffLength,
1297
0
                     int32_t buffCapacity) {
1298
0
  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1299
    // do not modify a string that has an "open" getBuffer(minCapacity)
1300
0
    return *this;
1301
0
  }
1302
1303
0
  if(buffer == NULL) {
1304
    // treat as an empty string, do not alias
1305
0
    releaseArray();
1306
0
    setToEmpty();
1307
0
    return *this;
1308
0
  }
1309
1310
0
  if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1311
0
    setToBogus();
1312
0
    return *this;
1313
0
  } else if(buffLength == -1) {
1314
    // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1315
0
    const UChar *p = buffer, *limit = buffer + buffCapacity;
1316
0
    while(p != limit && *p != 0) {
1317
0
      ++p;
1318
0
    }
1319
0
    buffLength = (int32_t)(p - buffer);
1320
0
  }
1321
1322
0
  releaseArray();
1323
1324
0
  fUnion.fFields.fLengthAndFlags = kWritableAlias;
1325
0
  setArray(buffer, buffLength, buffCapacity);
1326
0
  return *this;
1327
0
}
1328
1329
0
UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
1330
0
  unBogus();
1331
0
  int32_t length = utf8.length();
1332
0
  int32_t capacity;
1333
  // The UTF-16 string will be at most as long as the UTF-8 string.
1334
0
  if(length <= US_STACKBUF_SIZE) {
1335
0
    capacity = US_STACKBUF_SIZE;
1336
0
  } else {
1337
0
    capacity = length + 1;  // +1 for the terminating NUL.
1338
0
  }
1339
0
  UChar *utf16 = getBuffer(capacity);
1340
0
  int32_t length16;
1341
0
  UErrorCode errorCode = U_ZERO_ERROR;
1342
0
  u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1343
0
      utf8.data(), length,
1344
0
      0xfffd,  // Substitution character.
1345
0
      NULL,    // Don't care about number of substitutions.
1346
0
      &errorCode);
1347
0
  releaseBuffer(length16);
1348
0
  if(U_FAILURE(errorCode)) {
1349
0
    setToBogus();
1350
0
  }
1351
0
  return *this;
1352
0
}
1353
1354
UnicodeString&
1355
UnicodeString::setCharAt(int32_t offset,
1356
             UChar c)
1357
0
{
1358
0
  int32_t len = length();
1359
0
  if(cloneArrayIfNeeded() && len > 0) {
1360
0
    if(offset < 0) {
1361
0
      offset = 0;
1362
0
    } else if(offset >= len) {
1363
0
      offset = len - 1;
1364
0
    }
1365
1366
0
    getArrayStart()[offset] = c;
1367
0
  }
1368
0
  return *this;
1369
0
}
1370
1371
UnicodeString&
1372
UnicodeString::replace(int32_t start,
1373
               int32_t _length,
1374
0
               UChar32 srcChar) {
1375
0
  UChar buffer[U16_MAX_LENGTH];
1376
0
  int32_t count = 0;
1377
0
  UBool isError = FALSE;
1378
0
  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1379
  // We test isError so that the compiler does not complain that we don't.
1380
  // If isError (srcChar is not a valid code point) then count==0 which means
1381
  // we remove the source segment rather than replacing it with srcChar.
1382
0
  return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1383
0
}
1384
1385
UnicodeString&
1386
0
UnicodeString::append(UChar32 srcChar) {
1387
0
  UChar buffer[U16_MAX_LENGTH];
1388
0
  int32_t _length = 0;
1389
0
  UBool isError = FALSE;
1390
0
  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1391
  // We test isError so that the compiler does not complain that we don't.
1392
  // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1393
0
  return isError ? *this : doAppend(buffer, 0, _length);
1394
0
}
1395
1396
UnicodeString&
1397
UnicodeString::doReplace( int32_t start,
1398
              int32_t length,
1399
              const UnicodeString& src,
1400
              int32_t srcStart,
1401
              int32_t srcLength)
1402
0
{
1403
  // pin the indices to legal values
1404
0
  src.pinIndices(srcStart, srcLength);
1405
1406
  // get the characters from src
1407
  // and replace the range in ourselves with them
1408
0
  return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1409
0
}
1410
1411
UnicodeString&
1412
UnicodeString::doReplace(int32_t start,
1413
             int32_t length,
1414
             const UChar *srcChars,
1415
             int32_t srcStart,
1416
             int32_t srcLength)
1417
0
{
1418
0
  if(!isWritable()) {
1419
0
    return *this;
1420
0
  }
1421
1422
0
  int32_t oldLength = this->length();
1423
1424
  // optimize (read-only alias).remove(0, start) and .remove(start, end)
1425
0
  if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
1426
0
    if(start == 0) {
1427
      // remove prefix by adjusting the array pointer
1428
0
      pinIndex(length);
1429
0
      fUnion.fFields.fArray += length;
1430
0
      fUnion.fFields.fCapacity -= length;
1431
0
      setLength(oldLength - length);
1432
0
      return *this;
1433
0
    } else {
1434
0
      pinIndex(start);
1435
0
      if(length >= (oldLength - start)) {
1436
        // remove suffix by reducing the length (like truncate())
1437
0
        setLength(start);
1438
0
        fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1439
0
        return *this;
1440
0
      }
1441
0
    }
1442
0
  }
1443
1444
0
  if(start == oldLength) {
1445
0
    return doAppend(srcChars, srcStart, srcLength);
1446
0
  }
1447
1448
0
  if(srcChars == 0) {
1449
0
    srcLength = 0;
1450
0
  } else {
1451
    // Perform all remaining operations relative to srcChars + srcStart.
1452
    // From this point forward, do not use srcStart.
1453
0
    srcChars += srcStart;
1454
0
    if (srcLength < 0) {
1455
      // get the srcLength if necessary
1456
0
      srcLength = u_strlen(srcChars);
1457
0
    }
1458
0
  }
1459
1460
  // pin the indices to legal values
1461
0
  pinIndices(start, length);
1462
1463
  // Calculate the size of the string after the replace.
1464
  // Avoid int32_t overflow.
1465
0
  int32_t newLength = oldLength - length;
1466
0
  if(srcLength > (INT32_MAX - newLength)) {
1467
0
    setToBogus();
1468
0
    return *this;
1469
0
  }
1470
0
  newLength += srcLength;
1471
1472
  // Check for insertion into ourself
1473
0
  const UChar *oldArray = getArrayStart();
1474
0
  if (isBufferWritable() &&
1475
0
      oldArray < srcChars + srcLength &&
1476
0
      srcChars < oldArray + oldLength) {
1477
    // Copy into a new UnicodeString and start over
1478
0
    UnicodeString copy(srcChars, srcLength);
1479
0
    if (copy.isBogus()) {
1480
0
      setToBogus();
1481
0
      return *this;
1482
0
    }
1483
0
    return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
1484
0
  }
1485
1486
  // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
1487
  // therefore we need to keep the current fArray
1488
0
  UChar oldStackBuffer[US_STACKBUF_SIZE];
1489
0
  if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1490
    // copy the stack buffer contents because it will be overwritten with
1491
    // fUnion.fFields values
1492
0
    u_memcpy(oldStackBuffer, oldArray, oldLength);
1493
0
    oldArray = oldStackBuffer;
1494
0
  }
1495
1496
  // clone our array and allocate a bigger array if needed
1497
0
  int32_t *bufferToDelete = 0;
1498
0
  if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
1499
0
                         FALSE, &bufferToDelete)
1500
0
  ) {
1501
0
    return *this;
1502
0
  }
1503
1504
  // now do the replace
1505
1506
0
  UChar *newArray = getArrayStart();
1507
0
  if(newArray != oldArray) {
1508
    // if fArray changed, then we need to copy everything except what will change
1509
0
    us_arrayCopy(oldArray, 0, newArray, 0, start);
1510
0
    us_arrayCopy(oldArray, start + length,
1511
0
                 newArray, start + srcLength,
1512
0
                 oldLength - (start + length));
1513
0
  } else if(length != srcLength) {
1514
    // fArray did not change; copy only the portion that isn't changing, leaving a hole
1515
0
    us_arrayCopy(oldArray, start + length,
1516
0
                 newArray, start + srcLength,
1517
0
                 oldLength - (start + length));
1518
0
  }
1519
1520
  // now fill in the hole with the new string
1521
0
  us_arrayCopy(srcChars, 0, newArray, start, srcLength);
1522
1523
0
  setLength(newLength);
1524
1525
  // delayed delete in case srcChars == fArray when we started, and
1526
  // to keep oldArray alive for the above operations
1527
0
  if (bufferToDelete) {
1528
0
    uprv_free(bufferToDelete);
1529
0
  }
1530
1531
0
  return *this;
1532
0
}
1533
1534
// Versions of doReplace() only for append() variants.
1535
// doReplace() and doAppend() optimize for different cases.
1536
1537
UnicodeString&
1538
0
UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1539
0
  if(srcLength == 0) {
1540
0
    return *this;
1541
0
  }
1542
1543
  // pin the indices to legal values
1544
0
  src.pinIndices(srcStart, srcLength);
1545
0
  return doAppend(src.getArrayStart(), srcStart, srcLength);
1546
0
}
1547
1548
UnicodeString&
1549
0
UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
1550
0
  if(!isWritable() || srcLength == 0 || srcChars == NULL) {
1551
0
    return *this;
1552
0
  }
1553
1554
  // Perform all remaining operations relative to srcChars + srcStart.
1555
  // From this point forward, do not use srcStart.
1556
0
  srcChars += srcStart;
1557
1558
0
  if(srcLength < 0) {
1559
    // get the srcLength if necessary
1560
0
    if((srcLength = u_strlen(srcChars)) == 0) {
1561
0
      return *this;
1562
0
    }
1563
0
  }
1564
1565
0
  int32_t oldLength = length();
1566
0
  int32_t newLength;
1567
0
  if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
1568
0
    setToBogus();
1569
0
    return *this;
1570
0
  }
1571
1572
  // Check for append onto ourself
1573
0
  const UChar* oldArray = getArrayStart();
1574
0
  if (isBufferWritable() &&
1575
0
      oldArray < srcChars + srcLength &&
1576
0
      srcChars < oldArray + oldLength) {
1577
    // Copy into a new UnicodeString and start over
1578
0
    UnicodeString copy(srcChars, srcLength);
1579
0
    if (copy.isBogus()) {
1580
0
      setToBogus();
1581
0
      return *this;
1582
0
    }
1583
0
    return doAppend(copy.getArrayStart(), 0, srcLength);
1584
0
  }
1585
1586
  // optimize append() onto a large-enough, owned string
1587
0
  if((newLength <= getCapacity() && isBufferWritable()) ||
1588
0
      cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
1589
0
    UChar *newArray = getArrayStart();
1590
    // Do not copy characters when
1591
    //   UChar *buffer=str.getAppendBuffer(...);
1592
    // is followed by
1593
    //   str.append(buffer, length);
1594
    // or
1595
    //   str.appendString(buffer, length)
1596
    // or similar.
1597
0
    if(srcChars != newArray + oldLength) {
1598
0
      us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
1599
0
    }
1600
0
    setLength(newLength);
1601
0
  }
1602
0
  return *this;
1603
0
}
1604
1605
/**
1606
 * Replaceable API
1607
 */
1608
void
1609
UnicodeString::handleReplaceBetween(int32_t start,
1610
                                    int32_t limit,
1611
0
                                    const UnicodeString& text) {
1612
0
    replaceBetween(start, limit, text);
1613
0
}
1614
1615
/**
1616
 * Replaceable API
1617
 */
1618
void 
1619
0
UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1620
0
    if (limit <= start) {
1621
0
        return; // Nothing to do; avoid bogus malloc call
1622
0
    }
1623
0
    UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1624
    // Check to make sure text is not null.
1625
0
    if (text != NULL) {
1626
0
      extractBetween(start, limit, text, 0);
1627
0
      insert(dest, text, 0, limit - start);    
1628
0
      uprv_free(text);
1629
0
    }
1630
0
}
1631
1632
/**
1633
 * Replaceable API
1634
 *
1635
 * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1636
 * so we implement this function here.
1637
 */
1638
0
UBool Replaceable::hasMetaData() const {
1639
0
    return TRUE;
1640
0
}
1641
1642
/**
1643
 * Replaceable API
1644
 */
1645
0
UBool UnicodeString::hasMetaData() const {
1646
0
    return FALSE;
1647
0
}
1648
1649
UnicodeString&
1650
0
UnicodeString::doReverse(int32_t start, int32_t length) {
1651
0
  if(length <= 1 || !cloneArrayIfNeeded()) {
1652
0
    return *this;
1653
0
  }
1654
1655
  // pin the indices to legal values
1656
0
  pinIndices(start, length);
1657
0
  if(length <= 1) {  // pinIndices() might have shrunk the length
1658
0
    return *this;
1659
0
  }
1660
1661
0
  UChar *left = getArrayStart() + start;
1662
0
  UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1663
0
  UChar swap;
1664
0
  UBool hasSupplementary = FALSE;
1665
1666
  // Before the loop we know left<right because length>=2.
1667
0
  do {
1668
0
    hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1669
0
    hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1670
0
    *right-- = swap;
1671
0
  } while(left < right);
1672
  // Make sure to test the middle code unit of an odd-length string.
1673
  // Redundant if the length is even.
1674
0
  hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1675
1676
  /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1677
0
  if(hasSupplementary) {
1678
0
    UChar swap2;
1679
1680
0
    left = getArrayStart() + start;
1681
0
    right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1682
0
    while(left < right) {
1683
0
      if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1684
0
        *left++ = swap2;
1685
0
        *left++ = swap;
1686
0
      } else {
1687
0
        ++left;
1688
0
      }
1689
0
    }
1690
0
  }
1691
1692
0
  return *this;
1693
0
}
1694
1695
UBool 
1696
UnicodeString::padLeading(int32_t targetLength,
1697
                          UChar padChar)
1698
0
{
1699
0
  int32_t oldLength = length();
1700
0
  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1701
0
    return FALSE;
1702
0
  } else {
1703
    // move contents up by padding width
1704
0
    UChar *array = getArrayStart();
1705
0
    int32_t start = targetLength - oldLength;
1706
0
    us_arrayCopy(array, 0, array, start, oldLength);
1707
1708
    // fill in padding character
1709
0
    while(--start >= 0) {
1710
0
      array[start] = padChar;
1711
0
    }
1712
0
    setLength(targetLength);
1713
0
    return TRUE;
1714
0
  }
1715
0
}
1716
1717
UBool 
1718
UnicodeString::padTrailing(int32_t targetLength,
1719
                           UChar padChar)
1720
0
{
1721
0
  int32_t oldLength = length();
1722
0
  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1723
0
    return FALSE;
1724
0
  } else {
1725
    // fill in padding character
1726
0
    UChar *array = getArrayStart();
1727
0
    int32_t length = targetLength;
1728
0
    while(--length >= oldLength) {
1729
0
      array[length] = padChar;
1730
0
    }
1731
0
    setLength(targetLength);
1732
0
    return TRUE;
1733
0
  }
1734
0
}
1735
1736
//========================================
1737
// Hashing
1738
//========================================
1739
int32_t
1740
UnicodeString::doHashCode() const
1741
0
{
1742
    /* Delegate hash computation to uhash.  This makes UnicodeString
1743
     * hashing consistent with UChar* hashing.  */
1744
0
    int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1745
0
    if (hashCode == kInvalidHashCode) {
1746
0
        hashCode = kEmptyHashCode;
1747
0
    }
1748
0
    return hashCode;
1749
0
}
1750
1751
//========================================
1752
// External Buffer
1753
//========================================
1754
1755
char16_t *
1756
0
UnicodeString::getBuffer(int32_t minCapacity) {
1757
0
  if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1758
0
    fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
1759
0
    setZeroLength();
1760
0
    return getArrayStart();
1761
0
  } else {
1762
0
    return nullptr;
1763
0
  }
1764
0
}
1765
1766
void
1767
0
UnicodeString::releaseBuffer(int32_t newLength) {
1768
0
  if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
1769
    // set the new fLength
1770
0
    int32_t capacity=getCapacity();
1771
0
    if(newLength==-1) {
1772
      // the new length is the string length, capped by fCapacity
1773
0
      const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1774
0
      while(p<limit && *p!=0) {
1775
0
        ++p;
1776
0
      }
1777
0
      newLength=(int32_t)(p-array);
1778
0
    } else if(newLength>capacity) {
1779
0
      newLength=capacity;
1780
0
    }
1781
0
    setLength(newLength);
1782
0
    fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
1783
0
  }
1784
0
}
1785
1786
//========================================
1787
// Miscellaneous
1788
//========================================
1789
UBool
1790
UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1791
                                  int32_t growCapacity,
1792
                                  UBool doCopyArray,
1793
                                  int32_t **pBufferToDelete,
1794
0
                                  UBool forceClone) {
1795
  // default parameters need to be static, therefore
1796
  // the defaults are -1 to have convenience defaults
1797
0
  if(newCapacity == -1) {
1798
0
    newCapacity = getCapacity();
1799
0
  }
1800
1801
  // while a getBuffer(minCapacity) is "open",
1802
  // prevent any modifications of the string by returning FALSE here
1803
  // if the string is bogus, then only an assignment or similar can revive it
1804
0
  if(!isWritable()) {
1805
0
    return FALSE;
1806
0
  }
1807
1808
  /*
1809
   * We need to make a copy of the array if
1810
   * the buffer is read-only, or
1811
   * the buffer is refCounted (shared), and refCount>1, or
1812
   * the buffer is too small.
1813
   * Return FALSE if memory could not be allocated.
1814
   */
1815
0
  if(forceClone ||
1816
0
     fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
1817
0
     (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
1818
0
     newCapacity > getCapacity()
1819
0
  ) {
1820
    // check growCapacity for default value and use of the stack buffer
1821
0
    if(growCapacity < 0) {
1822
0
      growCapacity = newCapacity;
1823
0
    } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1824
0
      growCapacity = US_STACKBUF_SIZE;
1825
0
    }
1826
1827
    // save old values
1828
0
    UChar oldStackBuffer[US_STACKBUF_SIZE];
1829
0
    UChar *oldArray;
1830
0
    int32_t oldLength = length();
1831
0
    int16_t flags = fUnion.fFields.fLengthAndFlags;
1832
1833
0
    if(flags&kUsingStackBuffer) {
1834
0
      U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1835
0
      if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1836
        // copy the stack buffer contents because it will be overwritten with
1837
        // fUnion.fFields values
1838
0
        us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
1839
0
        oldArray = oldStackBuffer;
1840
0
      } else {
1841
0
        oldArray = NULL; // no need to copy from the stack buffer to itself
1842
0
      }
1843
0
    } else {
1844
0
      oldArray = fUnion.fFields.fArray;
1845
0
      U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1846
0
    }
1847
1848
    // allocate a new array
1849
0
    if(allocate(growCapacity) ||
1850
0
       (newCapacity < growCapacity && allocate(newCapacity))
1851
0
    ) {
1852
0
      if(doCopyArray) {
1853
        // copy the contents
1854
        // do not copy more than what fits - it may be smaller than before
1855
0
        int32_t minLength = oldLength;
1856
0
        newCapacity = getCapacity();
1857
0
        if(newCapacity < minLength) {
1858
0
          minLength = newCapacity;
1859
0
        }
1860
0
        if(oldArray != NULL) {
1861
0
          us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1862
0
        }
1863
0
        setLength(minLength);
1864
0
      } else {
1865
0
        setZeroLength();
1866
0
      }
1867
1868
      // release the old array
1869
0
      if(flags & kRefCounted) {
1870
        // the array is refCounted; decrement and release if 0
1871
0
        u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1872
0
        if(umtx_atomic_dec(pRefCount) == 0) {
1873
0
          if(pBufferToDelete == 0) {
1874
              // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1875
              // is defined as volatile. (Volatile has useful non-standard behavior
1876
              //   with this compiler.)
1877
0
            uprv_free((void *)pRefCount);
1878
0
          } else {
1879
            // the caller requested to delete it himself
1880
0
            *pBufferToDelete = (int32_t *)pRefCount;
1881
0
          }
1882
0
        }
1883
0
      }
1884
0
    } else {
1885
      // not enough memory for growCapacity and not even for the smaller newCapacity
1886
      // reset the old values for setToBogus() to release the array
1887
0
      if(!(flags&kUsingStackBuffer)) {
1888
0
        fUnion.fFields.fArray = oldArray;
1889
0
      }
1890
0
      fUnion.fFields.fLengthAndFlags = flags;
1891
0
      setToBogus();
1892
0
      return FALSE;
1893
0
    }
1894
0
  }
1895
0
  return TRUE;
1896
0
}
1897
1898
// UnicodeStringAppendable ------------------------------------------------- ***
1899
1900
0
UnicodeStringAppendable::~UnicodeStringAppendable() {}
1901
1902
UBool
1903
0
UnicodeStringAppendable::appendCodeUnit(UChar c) {
1904
0
  return str.doAppend(&c, 0, 1).isWritable();
1905
0
}
1906
1907
UBool
1908
0
UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1909
0
  UChar buffer[U16_MAX_LENGTH];
1910
0
  int32_t cLength = 0;
1911
0
  UBool isError = FALSE;
1912
0
  U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1913
0
  return !isError && str.doAppend(buffer, 0, cLength).isWritable();
1914
0
}
1915
1916
UBool
1917
0
UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1918
0
  return str.doAppend(s, 0, length).isWritable();
1919
0
}
1920
1921
UBool
1922
0
UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1923
0
  return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1924
0
}
1925
1926
UChar *
1927
UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1928
                                         int32_t desiredCapacityHint,
1929
                                         UChar *scratch, int32_t scratchCapacity,
1930
0
                                         int32_t *resultCapacity) {
1931
0
  if(minCapacity < 1 || scratchCapacity < minCapacity) {
1932
0
    *resultCapacity = 0;
1933
0
    return NULL;
1934
0
  }
1935
0
  int32_t oldLength = str.length();
1936
0
  if(minCapacity <= (kMaxCapacity - oldLength) &&
1937
0
      desiredCapacityHint <= (kMaxCapacity - oldLength) &&
1938
0
      str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1939
0
    *resultCapacity = str.getCapacity() - oldLength;
1940
0
    return str.getArrayStart() + oldLength;
1941
0
  }
1942
0
  *resultCapacity = scratchCapacity;
1943
0
  return scratch;
1944
0
}
1945
1946
U_NAMESPACE_END
1947
1948
U_NAMESPACE_USE
1949
1950
U_CAPI int32_t U_EXPORT2
1951
0
uhash_hashUnicodeString(const UElement key) {
1952
0
    const UnicodeString *str = (const UnicodeString*) key.pointer;
1953
0
    return (str == NULL) ? 0 : str->hashCode();
1954
0
}
1955
1956
// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1957
// does not depend on hashtable code.
1958
U_CAPI UBool U_EXPORT2
1959
0
uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1960
0
    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1961
0
    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1962
0
    if (str1 == str2) {
1963
0
        return TRUE;
1964
0
    }
1965
0
    if (str1 == NULL || str2 == NULL) {
1966
0
        return FALSE;
1967
0
    }
1968
0
    return *str1 == *str2;
1969
0
}
1970
1971
#ifdef U_STATIC_IMPLEMENTATION
1972
/*
1973
This should never be called. It is defined here to make sure that the
1974
virtual vector deleting destructor is defined within unistr.cpp.
1975
The vector deleting destructor is already a part of UObject,
1976
but defining it here makes sure that it is included with this object file.
1977
This makes sure that static library dependencies are kept to a minimum.
1978
*/
1979
static void uprv_UnicodeStringDummy(void) {
1980
    delete [] (new UnicodeString[2]);
1981
}
1982
#endif