/src/assimp/contrib/utf8cpp/source/utf8/checked.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2006-2016 Nemanja Trifunovic |
2 | | |
3 | | /* |
4 | | Permission is hereby granted, free of charge, to any person or organization |
5 | | obtaining a copy of the software and accompanying documentation covered by |
6 | | this license (the "Software") to use, reproduce, display, distribute, |
7 | | execute, and transmit the Software, and to prepare derivative works of the |
8 | | Software, and to permit third-parties to whom the Software is furnished to |
9 | | do so, all subject to the following: |
10 | | |
11 | | The copyright notices in the Software and this entire statement, including |
12 | | the above license grant, this restriction and the following disclaimer, |
13 | | must be included in all copies of the Software, in whole or in part, and |
14 | | all derivative works of the Software, unless such copies or derivative |
15 | | works are solely in the form of machine-executable object code generated by |
16 | | a source language processor. |
17 | | |
18 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
20 | | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT |
21 | | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE |
22 | | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, |
23 | | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
24 | | DEALINGS IN THE SOFTWARE. |
25 | | */ |
26 | | |
27 | | |
28 | | #ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 |
29 | | #define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 |
30 | | |
31 | | #include "core.h" |
32 | | #include <stdexcept> |
33 | | |
34 | | namespace utf8 |
35 | | { |
36 | | // Base for the exceptions that may be thrown from the library |
37 | | class exception : public ::std::exception { |
38 | | }; |
39 | | |
40 | | // Exceptions that may be thrown from the library functions. |
41 | | class invalid_code_point : public exception { |
42 | | uint32_t cp; |
43 | | public: |
44 | 4 | invalid_code_point(uint32_t codepoint) : cp(codepoint) {} |
45 | 8 | virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid code point"; } |
46 | 0 | uint32_t code_point() const {return cp;} |
47 | | }; |
48 | | |
49 | | class invalid_utf8 : public exception { |
50 | | uint8_t u8; |
51 | | public: |
52 | 0 | invalid_utf8 (uint8_t u) : u8(u) {} |
53 | 0 | invalid_utf8 (char c) : u8(static_cast<uint8_t>(c)) {} |
54 | 0 | virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; } |
55 | 0 | uint8_t utf8_octet() const {return u8;} |
56 | | }; |
57 | | |
58 | | class invalid_utf16 : public exception { |
59 | | uint16_t u16; |
60 | | public: |
61 | 0 | invalid_utf16 (uint16_t u) : u16(u) {} |
62 | 0 | virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-16"; } |
63 | 0 | uint16_t utf16_word() const {return u16;} |
64 | | }; |
65 | | |
66 | | class not_enough_room : public exception { |
67 | | public: |
68 | 0 | virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Not enough space"; } |
69 | | }; |
70 | | |
71 | | /// The library API - functions intended to be called by the users |
72 | | |
73 | | template <typename octet_iterator> |
74 | | octet_iterator append(uint32_t cp, octet_iterator result) |
75 | 14.0M | { |
76 | 14.0M | if (!utf8::internal::is_code_point_valid(cp)) |
77 | 4 | throw invalid_code_point(cp); |
78 | | |
79 | 14.0M | return internal::append(cp, result); |
80 | 14.0M | } Unexecuted instantiation: std::__1::back_insert_iterator<std::__1::vector<char, std::__1::allocator<char> > > utf8::append<std::__1::back_insert_iterator<std::__1::vector<char, std::__1::allocator<char> > > >(unsigned int, std::__1::back_insert_iterator<std::__1::vector<char, std::__1::allocator<char> > >) std::__1::back_insert_iterator<std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > > utf8::append<std::__1::back_insert_iterator<std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > > >(unsigned int, std::__1::back_insert_iterator<std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > >) Line | Count | Source | 75 | 14.0M | { | 76 | 14.0M | if (!utf8::internal::is_code_point_valid(cp)) | 77 | 0 | throw invalid_code_point(cp); | 78 | | | 79 | 14.0M | return internal::append(cp, result); | 80 | 14.0M | } |
Unexecuted instantiation: std::__1::back_insert_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > utf8::append<std::__1::back_insert_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >(unsigned int, std::__1::back_insert_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >) Unexecuted instantiation: char* utf8::append<char*>(unsigned int, char*) unsigned char* utf8::append<unsigned char*>(unsigned int, unsigned char*) Line | Count | Source | 75 | 384 | { | 76 | 384 | if (!utf8::internal::is_code_point_valid(cp)) | 77 | 4 | throw invalid_code_point(cp); | 78 | | | 79 | 380 | return internal::append(cp, result); | 80 | 384 | } |
|
81 | | |
82 | | template <typename octet_iterator, typename output_iterator> |
83 | | output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) |
84 | 0 | { |
85 | 0 | while (start != end) { |
86 | 0 | octet_iterator sequence_start = start; |
87 | 0 | internal::utf_error err_code = utf8::internal::validate_next(start, end); |
88 | 0 | switch (err_code) { |
89 | 0 | case internal::UTF8_OK : |
90 | 0 | for (octet_iterator it = sequence_start; it != start; ++it) |
91 | 0 | *out++ = *it; |
92 | 0 | break; |
93 | 0 | case internal::NOT_ENOUGH_ROOM: |
94 | 0 | out = utf8::append (replacement, out); |
95 | 0 | start = end; |
96 | 0 | break; |
97 | 0 | case internal::INVALID_LEAD: |
98 | 0 | out = utf8::append (replacement, out); |
99 | 0 | ++start; |
100 | 0 | break; |
101 | 0 | case internal::INCOMPLETE_SEQUENCE: |
102 | 0 | case internal::OVERLONG_SEQUENCE: |
103 | 0 | case internal::INVALID_CODE_POINT: |
104 | 0 | out = utf8::append (replacement, out); |
105 | 0 | ++start; |
106 | 0 | // just one replacement mark for the sequence |
107 | 0 | while (start != end && utf8::internal::is_trail(*start)) |
108 | 0 | ++start; |
109 | 0 | break; |
110 | 0 | } |
111 | 0 | } |
112 | 0 | return out; |
113 | 0 | } |
114 | | |
115 | | template <typename octet_iterator, typename output_iterator> |
116 | | inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) |
117 | 0 | { |
118 | 0 | static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); |
119 | 0 | return utf8::replace_invalid(start, end, out, replacement_marker); |
120 | 0 | } |
121 | | |
122 | | template <typename octet_iterator> |
123 | | uint32_t next(octet_iterator& it, octet_iterator end) |
124 | 0 | { |
125 | 0 | uint32_t cp = 0; |
126 | 0 | internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); |
127 | 0 | switch (err_code) { |
128 | 0 | case internal::UTF8_OK : |
129 | 0 | break; |
130 | 0 | case internal::NOT_ENOUGH_ROOM : |
131 | 0 | throw not_enough_room(); |
132 | 0 | case internal::INVALID_LEAD : |
133 | 0 | case internal::INCOMPLETE_SEQUENCE : |
134 | 0 | case internal::OVERLONG_SEQUENCE : |
135 | 0 | throw invalid_utf8(static_cast<uint8_t>(*it)); |
136 | 0 | case internal::INVALID_CODE_POINT : |
137 | 0 | throw invalid_code_point(cp); |
138 | 0 | } |
139 | 0 | return cp; |
140 | 0 | } |
141 | | |
142 | | template <typename octet_iterator> |
143 | | uint32_t peek_next(octet_iterator it, octet_iterator end) |
144 | | { |
145 | | return utf8::next(it, end); |
146 | | } |
147 | | |
148 | | template <typename octet_iterator> |
149 | | uint32_t prior(octet_iterator& it, octet_iterator start) |
150 | | { |
151 | | // can't do much if it == start |
152 | | if (it == start) |
153 | | throw not_enough_room(); |
154 | | |
155 | | octet_iterator end = it; |
156 | | // Go back until we hit either a lead octet or start |
157 | | while (utf8::internal::is_trail(*(--it))) |
158 | | if (it == start) |
159 | | throw invalid_utf8(*it); // error - no lead byte in the sequence |
160 | | return utf8::peek_next(it, end); |
161 | | } |
162 | | |
163 | | template <typename octet_iterator, typename distance_type> |
164 | | void advance (octet_iterator& it, distance_type n, octet_iterator end) |
165 | | { |
166 | | const distance_type zero(0); |
167 | | if (n < zero) { |
168 | | // backward |
169 | | for (distance_type i = n; i < zero; ++i) |
170 | | utf8::prior(it, end); |
171 | | } else { |
172 | | // forward |
173 | | for (distance_type i = zero; i < n; ++i) |
174 | | utf8::next(it, end); |
175 | | } |
176 | | } |
177 | | |
178 | | template <typename octet_iterator> |
179 | | typename std::iterator_traits<octet_iterator>::difference_type |
180 | | distance (octet_iterator first, octet_iterator last) |
181 | | { |
182 | | typename std::iterator_traits<octet_iterator>::difference_type dist; |
183 | | for (dist = 0; first < last; ++dist) |
184 | | utf8::next(first, last); |
185 | | return dist; |
186 | | } |
187 | | |
188 | | template <typename u16bit_iterator, typename octet_iterator> |
189 | | octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) |
190 | 321 | { |
191 | 14.0M | while (start != end) { |
192 | 14.0M | uint32_t cp = utf8::internal::mask16(*start++); |
193 | | // Take care of surrogate pairs first |
194 | 14.0M | if (utf8::internal::is_lead_surrogate(cp)) { |
195 | 0 | if (start != end) { |
196 | 0 | uint32_t trail_surrogate = utf8::internal::mask16(*start++); |
197 | 0 | if (utf8::internal::is_trail_surrogate(trail_surrogate)) |
198 | 0 | cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; |
199 | 0 | else |
200 | 0 | throw invalid_utf16(static_cast<uint16_t>(trail_surrogate)); |
201 | 0 | } |
202 | 0 | else |
203 | 0 | throw invalid_utf16(static_cast<uint16_t>(cp)); |
204 | |
|
205 | 0 | } |
206 | | // Lone trail surrogate |
207 | 14.0M | else if (utf8::internal::is_trail_surrogate(cp)) |
208 | 0 | throw invalid_utf16(static_cast<uint16_t>(cp)); |
209 | | |
210 | 14.0M | result = utf8::append(cp, result); |
211 | 14.0M | } |
212 | 321 | return result; |
213 | 321 | } std::__1::back_insert_iterator<std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > > utf8::utf16to8<std::__1::__wrap_iter<char*>, std::__1::back_insert_iterator<std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > > >(std::__1::__wrap_iter<char*>, std::__1::__wrap_iter<char*>, std::__1::back_insert_iterator<std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > >) Line | Count | Source | 190 | 321 | { | 191 | 14.0M | while (start != end) { | 192 | 14.0M | uint32_t cp = utf8::internal::mask16(*start++); | 193 | | // Take care of surrogate pairs first | 194 | 14.0M | if (utf8::internal::is_lead_surrogate(cp)) { | 195 | 0 | if (start != end) { | 196 | 0 | uint32_t trail_surrogate = utf8::internal::mask16(*start++); | 197 | 0 | if (utf8::internal::is_trail_surrogate(trail_surrogate)) | 198 | 0 | cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; | 199 | 0 | else | 200 | 0 | throw invalid_utf16(static_cast<uint16_t>(trail_surrogate)); | 201 | 0 | } | 202 | 0 | else | 203 | 0 | throw invalid_utf16(static_cast<uint16_t>(cp)); | 204 | |
| 205 | 0 | } | 206 | | // Lone trail surrogate | 207 | 14.0M | else if (utf8::internal::is_trail_surrogate(cp)) | 208 | 0 | throw invalid_utf16(static_cast<uint16_t>(cp)); | 209 | | | 210 | 14.0M | result = utf8::append(cp, result); | 211 | 14.0M | } | 212 | 321 | return result; | 213 | 321 | } |
Unexecuted instantiation: std::__1::back_insert_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > utf8::utf16to8<char16_t const*, std::__1::back_insert_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >(char16_t const*, char16_t const*, std::__1::back_insert_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >) Unexecuted instantiation: std::__1::back_insert_iterator<std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > > utf8::utf16to8<unsigned short const*, std::__1::back_insert_iterator<std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > > >(unsigned short const*, unsigned short const*, std::__1::back_insert_iterator<std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > >) Unexecuted instantiation: char* utf8::utf16to8<unsigned short const*, char*>(unsigned short const*, unsigned short const*, char*) Unexecuted instantiation: unsigned char* utf8::utf16to8<unsigned short const*, unsigned char*>(unsigned short const*, unsigned short const*, unsigned char*) |
214 | | |
215 | | template <typename u16bit_iterator, typename octet_iterator> |
216 | | u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) |
217 | 0 | { |
218 | 0 | while (start < end) { |
219 | 0 | uint32_t cp = utf8::next(start, end); |
220 | 0 | if (cp > 0xffff) { //make a surrogate pair |
221 | 0 | *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET); |
222 | 0 | *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); |
223 | 0 | } |
224 | 0 | else |
225 | 0 | *result++ = static_cast<uint16_t>(cp); |
226 | 0 | } |
227 | 0 | return result; |
228 | 0 | } |
229 | | |
230 | | template <typename octet_iterator, typename u32bit_iterator> |
231 | | octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) |
232 | 384 | { |
233 | 768 | while (start != end) |
234 | 384 | result = utf8::append(*(start++), result); |
235 | | |
236 | 384 | return result; |
237 | 384 | } Unexecuted instantiation: std::__1::back_insert_iterator<std::__1::vector<char, std::__1::allocator<char> > > utf8::utf32to8<std::__1::back_insert_iterator<std::__1::vector<char, std::__1::allocator<char> > >, unsigned int*>(unsigned int*, unsigned int*, std::__1::back_insert_iterator<std::__1::vector<char, std::__1::allocator<char> > >) Unexecuted instantiation: std::__1::back_insert_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > utf8::utf32to8<std::__1::back_insert_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, char32_t const*>(char32_t const*, char32_t const*, std::__1::back_insert_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >) unsigned char* utf8::utf32to8<unsigned char*, unsigned int const*>(unsigned int const*, unsigned int const*, unsigned char*) Line | Count | Source | 232 | 384 | { | 233 | 768 | while (start != end) | 234 | 384 | result = utf8::append(*(start++), result); | 235 | | | 236 | 384 | return result; | 237 | 384 | } |
|
238 | | |
239 | | template <typename octet_iterator, typename u32bit_iterator> |
240 | | u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) |
241 | 0 | { |
242 | 0 | while (start < end) |
243 | 0 | (*result++) = utf8::next(start, end); |
244 | 0 |
|
245 | 0 | return result; |
246 | 0 | } |
247 | | |
248 | | // The iterator class |
249 | | template <typename octet_iterator> |
250 | | class iterator { |
251 | | octet_iterator it; |
252 | | octet_iterator range_start; |
253 | | octet_iterator range_end; |
254 | | public: |
255 | | typedef uint32_t value_type; |
256 | | typedef uint32_t* pointer; |
257 | | typedef uint32_t& reference; |
258 | | typedef std::ptrdiff_t difference_type; |
259 | | typedef std::bidirectional_iterator_tag iterator_category; |
260 | | iterator () {} |
261 | | explicit iterator (const octet_iterator& octet_it, |
262 | | const octet_iterator& rangestart, |
263 | | const octet_iterator& rangeend) : |
264 | | it(octet_it), range_start(rangestart), range_end(rangeend) |
265 | | { |
266 | | if (it < range_start || it > range_end) |
267 | | throw std::out_of_range("Invalid utf-8 iterator position"); |
268 | | } |
269 | | // the default "big three" are OK |
270 | | octet_iterator base () const { return it; } |
271 | | uint32_t operator * () const |
272 | | { |
273 | | octet_iterator temp = it; |
274 | | return utf8::next(temp, range_end); |
275 | | } |
276 | | bool operator == (const iterator& rhs) const |
277 | | { |
278 | | if (range_start != rhs.range_start || range_end != rhs.range_end) |
279 | | throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); |
280 | | return (it == rhs.it); |
281 | | } |
282 | | bool operator != (const iterator& rhs) const |
283 | | { |
284 | | return !(operator == (rhs)); |
285 | | } |
286 | | iterator& operator ++ () |
287 | | { |
288 | | utf8::next(it, range_end); |
289 | | return *this; |
290 | | } |
291 | | iterator operator ++ (int) |
292 | | { |
293 | | iterator temp = *this; |
294 | | utf8::next(it, range_end); |
295 | | return temp; |
296 | | } |
297 | | iterator& operator -- () |
298 | | { |
299 | | utf8::prior(it, range_start); |
300 | | return *this; |
301 | | } |
302 | | iterator operator -- (int) |
303 | | { |
304 | | iterator temp = *this; |
305 | | utf8::prior(it, range_start); |
306 | | return temp; |
307 | | } |
308 | | }; // class iterator |
309 | | |
310 | | } // namespace utf8 |
311 | | |
312 | | #if UTF_CPP_CPLUSPLUS >= 201703L // C++ 17 or later |
313 | | #include "cpp17.h" |
314 | | #elif UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later |
315 | | #include "cpp11.h" |
316 | | #endif // C++ 11 or later |
317 | | |
318 | | #endif //header guard |
319 | | |