/src/icu/icu4c/source/common/bytesinkutil.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2017 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | // bytesinkutil.cpp  | 
5  |  | // created: 2017sep14 Markus W. Scherer  | 
6  |  |  | 
7  |  | #include "unicode/utypes.h"  | 
8  |  | #include "unicode/bytestream.h"  | 
9  |  | #include "unicode/edits.h"  | 
10  |  | #include "unicode/stringoptions.h"  | 
11  |  | #include "unicode/utf8.h"  | 
12  |  | #include "unicode/utf16.h"  | 
13  |  | #include "bytesinkutil.h"  | 
14  |  | #include "charstr.h"  | 
15  |  | #include "cmemory.h"  | 
16  |  | #include "uassert.h"  | 
17  |  |  | 
18  |  | U_NAMESPACE_BEGIN  | 
19  |  |  | 
20  |  | UBool  | 
21  |  | ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,  | 
22  | 0  |                            ByteSink &sink, Edits *edits, UErrorCode &errorCode) { | 
23  | 0  |     if (U_FAILURE(errorCode)) { return false; } | 
24  | 0  |     char scratch[200];  | 
25  | 0  |     int32_t s8Length = 0;  | 
26  | 0  |     for (int32_t i = 0; i < s16Length;) { | 
27  | 0  |         int32_t capacity;  | 
28  | 0  |         int32_t desiredCapacity = s16Length - i;  | 
29  | 0  |         if (desiredCapacity < (INT32_MAX / 3)) { | 
30  | 0  |             desiredCapacity *= 3;  // max 3 UTF-8 bytes per UTF-16 code unit  | 
31  | 0  |         } else if (desiredCapacity < (INT32_MAX / 2)) { | 
32  | 0  |             desiredCapacity *= 2;  | 
33  | 0  |         } else { | 
34  | 0  |             desiredCapacity = INT32_MAX;  | 
35  | 0  |         }  | 
36  | 0  |         char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,  | 
37  | 0  |                                             scratch, UPRV_LENGTHOF(scratch), &capacity);  | 
38  | 0  |         capacity -= U8_MAX_LENGTH - 1;  | 
39  | 0  |         int32_t j = 0;  | 
40  | 0  |         for (; i < s16Length && j < capacity;) { | 
41  | 0  |             UChar32 c;  | 
42  | 0  |             U16_NEXT_UNSAFE(s16, i, c);  | 
43  | 0  |             U8_APPEND_UNSAFE(buffer, j, c);  | 
44  | 0  |         }  | 
45  | 0  |         if (j > (INT32_MAX - s8Length)) { | 
46  | 0  |             errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
47  | 0  |             return false;  | 
48  | 0  |         }  | 
49  | 0  |         sink.Append(buffer, j);  | 
50  | 0  |         s8Length += j;  | 
51  | 0  |     }  | 
52  | 0  |     if (edits != nullptr) { | 
53  | 0  |         edits->addReplace(length, s8Length);  | 
54  | 0  |     }  | 
55  | 0  |     return true;  | 
56  | 0  | }  | 
57  |  |  | 
58  |  | UBool  | 
59  |  | ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,  | 
60  |  |                            const char16_t *s16, int32_t s16Length,  | 
61  | 0  |                            ByteSink &sink, Edits *edits, UErrorCode &errorCode) { | 
62  | 0  |     if (U_FAILURE(errorCode)) { return false; } | 
63  | 0  |     if ((limit - s) > INT32_MAX) { | 
64  | 0  |         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
65  | 0  |         return false;  | 
66  | 0  |     }  | 
67  | 0  |     return appendChange(static_cast<int32_t>(limit - s), s16, s16Length, sink, edits, errorCode);  | 
68  | 0  | }  | 
69  |  |  | 
70  |  | void  | 
71  | 0  | ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) { | 
72  | 0  |     char s8[U8_MAX_LENGTH];  | 
73  | 0  |     int32_t s8Length = 0;  | 
74  | 0  |     U8_APPEND_UNSAFE(s8, s8Length, c);  | 
75  | 0  |     if (edits != nullptr) { | 
76  | 0  |         edits->addReplace(length, s8Length);  | 
77  | 0  |     }  | 
78  | 0  |     sink.Append(s8, s8Length);  | 
79  | 0  | }  | 
80  |  |  | 
81  |  | namespace { | 
82  |  |  | 
83  |  | // See unicode/utf8.h U8_APPEND_UNSAFE().  | 
84  | 0  | inline uint8_t getTwoByteLead(UChar32 c) { return static_cast<uint8_t>((c >> 6) | 0xc0); } | 
85  | 0  | inline uint8_t getTwoByteTrail(UChar32 c) { return static_cast<uint8_t>((c & 0x3f) | 0x80); } | 
86  |  |  | 
87  |  | }  // namespace  | 
88  |  |  | 
89  |  | void  | 
90  | 0  | ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) { | 
91  | 0  |     U_ASSERT(0x80 <= c && c <= 0x7ff);  // 2-byte UTF-8  | 
92  | 0  |     char s8[2] = {static_cast<char>(getTwoByteLead(c)), static_cast<char>(getTwoByteTrail(c))}; | 
93  | 0  |     sink.Append(s8, 2);  | 
94  | 0  | }  | 
95  |  |  | 
96  |  | void  | 
97  |  | ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,  | 
98  | 0  |                                       ByteSink &sink, uint32_t options, Edits *edits) { | 
99  | 0  |     U_ASSERT(length > 0);  | 
100  | 0  |     if (edits != nullptr) { | 
101  | 0  |         edits->addUnchanged(length);  | 
102  | 0  |     }  | 
103  | 0  |     if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { | 
104  | 0  |         sink.Append(reinterpret_cast<const char *>(s), length);  | 
105  | 0  |     }  | 
106  | 0  | }  | 
107  |  |  | 
108  |  | UBool  | 
109  |  | ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,  | 
110  |  |                               ByteSink &sink, uint32_t options, Edits *edits,  | 
111  | 0  |                               UErrorCode &errorCode) { | 
112  | 0  |     if (U_FAILURE(errorCode)) { return false; } | 
113  | 0  |     if ((limit - s) > INT32_MAX) { | 
114  | 0  |         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
115  | 0  |         return false;  | 
116  | 0  |     }  | 
117  | 0  |     int32_t length = static_cast<int32_t>(limit - s);  | 
118  | 0  |     if (length > 0) { | 
119  | 0  |         appendNonEmptyUnchanged(s, length, sink, options, edits);  | 
120  | 0  |     }  | 
121  | 0  |     return true;  | 
122  | 0  | }  | 
123  |  |  | 
124  | 0  | CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) { | 
125  | 0  | }  | 
126  |  |  | 
127  | 0  | CharStringByteSink::~CharStringByteSink() = default;  | 
128  |  |  | 
129  |  | void  | 
130  | 0  | CharStringByteSink::Append(const char* bytes, int32_t n) { | 
131  | 0  |     UErrorCode status = U_ZERO_ERROR;  | 
132  | 0  |     dest_.append(bytes, n, status);  | 
133  |  |     // Any errors are silently ignored.  | 
134  | 0  | }  | 
135  |  |  | 
136  |  | char*  | 
137  |  | CharStringByteSink::GetAppendBuffer(int32_t min_capacity,  | 
138  |  |                                     int32_t desired_capacity_hint,  | 
139  |  |                                     char* scratch,  | 
140  |  |                                     int32_t scratch_capacity,  | 
141  | 0  |                                     int32_t* result_capacity) { | 
142  | 0  |     if (min_capacity < 1 || scratch_capacity < min_capacity) { | 
143  | 0  |         *result_capacity = 0;  | 
144  | 0  |         return nullptr;  | 
145  | 0  |     }  | 
146  |  |  | 
147  | 0  |     UErrorCode status = U_ZERO_ERROR;  | 
148  | 0  |     char* result = dest_.getAppendBuffer(  | 
149  | 0  |             min_capacity,  | 
150  | 0  |             desired_capacity_hint,  | 
151  | 0  |             *result_capacity,  | 
152  | 0  |             status);  | 
153  | 0  |     if (U_SUCCESS(status)) { | 
154  | 0  |         return result;  | 
155  | 0  |     }  | 
156  |  |  | 
157  | 0  |     *result_capacity = scratch_capacity;  | 
158  | 0  |     return scratch;  | 
159  | 0  | }  | 
160  |  |  | 
161  |  | U_NAMESPACE_END  |