Coverage Report

Created: 2024-10-02 06:58

/src/libevt/libuna/libuna_unicode_character.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Unicode character functions
3
 *
4
 * Copyright (C) 2008-2024, Joachim Metz <joachim.metz@gmail.com>
5
 *
6
 * Refer to AUTHORS for acknowledgements.
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public License
19
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
20
 */
21
22
#include <common.h>
23
#include <types.h>
24
25
#include "libuna_base64_stream.h"
26
#include "libuna_codepage_iso_8859_2.h"
27
#include "libuna_codepage_iso_8859_3.h"
28
#include "libuna_codepage_iso_8859_4.h"
29
#include "libuna_codepage_iso_8859_5.h"
30
#include "libuna_codepage_iso_8859_6.h"
31
#include "libuna_codepage_iso_8859_7.h"
32
#include "libuna_codepage_iso_8859_8.h"
33
#include "libuna_codepage_iso_8859_9.h"
34
#include "libuna_codepage_iso_8859_10.h"
35
#include "libuna_codepage_iso_8859_13.h"
36
#include "libuna_codepage_iso_8859_14.h"
37
#include "libuna_codepage_iso_8859_15.h"
38
#include "libuna_codepage_iso_8859_16.h"
39
#include "libuna_codepage_koi8_r.h"
40
#include "libuna_codepage_koi8_u.h"
41
#include "libuna_codepage_mac_arabic.h"
42
#include "libuna_codepage_mac_celtic.h"
43
#include "libuna_codepage_mac_centraleurroman.h"
44
#include "libuna_codepage_mac_croatian.h"
45
#include "libuna_codepage_mac_cyrillic.h"
46
#include "libuna_codepage_mac_dingbats.h"
47
#include "libuna_codepage_mac_farsi.h"
48
#include "libuna_codepage_mac_gaelic.h"
49
#include "libuna_codepage_mac_greek.h"
50
#include "libuna_codepage_mac_icelandic.h"
51
#include "libuna_codepage_mac_inuit.h"
52
#include "libuna_codepage_mac_roman.h"
53
#include "libuna_codepage_mac_romanian.h"
54
#include "libuna_codepage_mac_russian.h"
55
#include "libuna_codepage_mac_symbol.h"
56
#include "libuna_codepage_mac_thai.h"
57
#include "libuna_codepage_mac_turkish.h"
58
#include "libuna_codepage_mac_ukrainian.h"
59
#include "libuna_codepage_windows_874.h"
60
#include "libuna_codepage_windows_932.h"
61
#include "libuna_codepage_windows_936.h"
62
#include "libuna_codepage_windows_949.h"
63
#include "libuna_codepage_windows_950.h"
64
#include "libuna_codepage_windows_1250.h"
65
#include "libuna_codepage_windows_1251.h"
66
#include "libuna_codepage_windows_1252.h"
67
#include "libuna_codepage_windows_1253.h"
68
#include "libuna_codepage_windows_1254.h"
69
#include "libuna_codepage_windows_1255.h"
70
#include "libuna_codepage_windows_1256.h"
71
#include "libuna_codepage_windows_1257.h"
72
#include "libuna_codepage_windows_1258.h"
73
#include "libuna_definitions.h"
74
#include "libuna_libcerror.h"
75
#include "libuna_types.h"
76
#include "libuna_unicode_character.h"
77
#include "libuna_unused.h"
78
79
/* Valid directly encoded characters: A-Z, a-z, 0-9, '\', '(', ')', ',', '-', '.', '/', ':', '?'
80
 * Valid directly encoded whitespace: '\t', '\n', '\r', ' '
81
 * Valid optional directly encoded characters: '!', '"', '#', '$', '%', '&', '*', ';', '<', '=', '>', '@', '[', ']', '^', '_', '`', '{', '|', '}'
82
 */
83
uint8_t libuna_unicode_character_utf7_valid_directly_encoded_character[ 256 ] = {
84
  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
85
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86
  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
87
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
88
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
90
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
92
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
100
101
/* Valid UTF-7 base64 characters: A-Z, a-z, 0-9, '+' and '/'
102
 */
103
uint8_t libuna_unicode_character_utf7_valid_base64_character[ 256 ] = {
104
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
107
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
108
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
110
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
112
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
118
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
120
121
/* Determines the size of a byte stream character from an Unicode character
122
 * Adds the size to the byte stream character size value
123
 * Returns 1 if successful, 0 if the byte stream character is valid but not supported since it requires special handling or -1 on error
124
 */
125
int libuna_unicode_character_size_to_byte_stream(
126
     libuna_unicode_character_t unicode_character,
127
     int codepage,
128
     size_t *byte_stream_character_size,
129
     libcerror_error_t **error )
130
0
{
131
0
  static char *function                  = "libuna_unicode_character_size_to_byte_stream";
132
0
  size_t safe_byte_stream_character_size = 0;
133
0
  int result                             = 1;
134
135
0
  if( byte_stream_character_size == NULL )
136
0
  {
137
0
    libcerror_error_set(
138
0
     error,
139
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
140
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
141
0
     "%s: invalid byte stream character size.",
142
0
     function );
143
144
0
    return( -1 );
145
0
  }
146
0
  safe_byte_stream_character_size = *byte_stream_character_size;
147
148
0
  switch( codepage )
149
0
  {
150
0
    case LIBUNA_CODEPAGE_ASCII:
151
0
    case LIBUNA_CODEPAGE_ISO_8859_1:
152
0
    case LIBUNA_CODEPAGE_ISO_8859_2:
153
0
    case LIBUNA_CODEPAGE_ISO_8859_3:
154
0
    case LIBUNA_CODEPAGE_ISO_8859_4:
155
0
    case LIBUNA_CODEPAGE_ISO_8859_5:
156
0
    case LIBUNA_CODEPAGE_ISO_8859_6:
157
0
    case LIBUNA_CODEPAGE_ISO_8859_7:
158
0
    case LIBUNA_CODEPAGE_ISO_8859_8:
159
0
    case LIBUNA_CODEPAGE_ISO_8859_9:
160
0
    case LIBUNA_CODEPAGE_ISO_8859_10:
161
0
    case LIBUNA_CODEPAGE_ISO_8859_11:
162
0
    case LIBUNA_CODEPAGE_ISO_8859_13:
163
0
    case LIBUNA_CODEPAGE_ISO_8859_14:
164
0
    case LIBUNA_CODEPAGE_ISO_8859_15:
165
0
    case LIBUNA_CODEPAGE_ISO_8859_16:
166
0
    case LIBUNA_CODEPAGE_KOI8_R:
167
0
    case LIBUNA_CODEPAGE_KOI8_U:
168
0
    case LIBUNA_CODEPAGE_MAC_ARABIC:
169
0
    case LIBUNA_CODEPAGE_MAC_CELTIC:
170
0
    case LIBUNA_CODEPAGE_MAC_CENTRALEURROMAN:
171
0
    case LIBUNA_CODEPAGE_MAC_CROATIAN:
172
0
    case LIBUNA_CODEPAGE_MAC_CYRILLIC:
173
0
    case LIBUNA_CODEPAGE_MAC_DINGBATS:
174
0
    case LIBUNA_CODEPAGE_MAC_FARSI:
175
0
    case LIBUNA_CODEPAGE_MAC_GAELIC:
176
0
    case LIBUNA_CODEPAGE_MAC_GREEK:
177
0
    case LIBUNA_CODEPAGE_MAC_ICELANDIC:
178
0
    case LIBUNA_CODEPAGE_MAC_INUIT:
179
0
    case LIBUNA_CODEPAGE_MAC_ROMAN:
180
0
    case LIBUNA_CODEPAGE_MAC_ROMANIAN:
181
0
    case LIBUNA_CODEPAGE_MAC_RUSSIAN:
182
0
    case LIBUNA_CODEPAGE_MAC_TURKISH:
183
0
    case LIBUNA_CODEPAGE_MAC_UKRAINIAN:
184
0
    case LIBUNA_CODEPAGE_WINDOWS_874:
185
0
    case LIBUNA_CODEPAGE_WINDOWS_1250:
186
0
    case LIBUNA_CODEPAGE_WINDOWS_1251:
187
0
    case LIBUNA_CODEPAGE_WINDOWS_1252:
188
0
    case LIBUNA_CODEPAGE_WINDOWS_1253:
189
0
    case LIBUNA_CODEPAGE_WINDOWS_1254:
190
0
    case LIBUNA_CODEPAGE_WINDOWS_1255:
191
0
    case LIBUNA_CODEPAGE_WINDOWS_1256:
192
0
    case LIBUNA_CODEPAGE_WINDOWS_1257:
193
0
    case LIBUNA_CODEPAGE_WINDOWS_1258:
194
0
      safe_byte_stream_character_size += 1;
195
0
      break;
196
197
0
    case LIBUNA_CODEPAGE_MAC_SYMBOL:
198
0
      result = libuna_codepage_mac_symbol_unicode_character_size_to_byte_stream(
199
0
                unicode_character,
200
0
                &safe_byte_stream_character_size,
201
0
                error );
202
0
      break;
203
204
0
    case LIBUNA_CODEPAGE_MAC_THAI:
205
0
      result = libuna_codepage_mac_thai_unicode_character_size_to_byte_stream(
206
0
                unicode_character,
207
0
                &safe_byte_stream_character_size,
208
0
                error );
209
0
      break;
210
211
0
    case LIBUNA_CODEPAGE_WINDOWS_932:
212
0
      result = libuna_codepage_windows_932_unicode_character_size_to_byte_stream(
213
0
                unicode_character,
214
0
                &safe_byte_stream_character_size,
215
0
                error );
216
0
      break;
217
218
0
    case LIBUNA_CODEPAGE_WINDOWS_936:
219
0
      result = libuna_codepage_windows_936_unicode_character_size_to_byte_stream(
220
0
                unicode_character,
221
0
                &safe_byte_stream_character_size,
222
0
                error );
223
0
      break;
224
225
0
    case LIBUNA_CODEPAGE_WINDOWS_949:
226
0
      result = libuna_codepage_windows_949_unicode_character_size_to_byte_stream(
227
0
                unicode_character,
228
0
                &safe_byte_stream_character_size,
229
0
                error );
230
0
      break;
231
232
0
    case LIBUNA_CODEPAGE_WINDOWS_950:
233
0
      result = libuna_codepage_windows_950_unicode_character_size_to_byte_stream(
234
0
                unicode_character,
235
0
                &safe_byte_stream_character_size,
236
0
                error );
237
0
      break;
238
239
0
    default:
240
0
      libcerror_error_set(
241
0
       error,
242
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
243
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
244
0
       "%s: unsupported codepage: %d.",
245
0
       function,
246
0
       codepage );
247
248
0
      return( -1 );
249
0
  }
250
0
  if( result == -1 )
251
0
  {
252
0
    libcerror_error_set(
253
0
     error,
254
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
255
0
     LIBCERROR_RUNTIME_ERROR_GET_FAILED,
256
0
     "%s: unable to determine byte stream character size.",
257
0
     function );
258
259
0
    return( -1 );
260
0
  }
261
0
  *byte_stream_character_size = safe_byte_stream_character_size;
262
263
0
  return( result );
264
0
}
265
266
/* Copies an Unicode character from a byte stream
267
 * Returns 1 if successful, 0 if the byte stream character is valid but not supported since it requires special handling or -1 on error
268
 */
269
int libuna_unicode_character_copy_from_byte_stream(
270
     libuna_unicode_character_t *unicode_character,
271
     const uint8_t *byte_stream,
272
     size_t byte_stream_size,
273
     size_t *byte_stream_index,
274
     int codepage,
275
     libcerror_error_t **error )
276
205k
{
277
205k
  static char *function                             = "libuna_unicode_character_copy_from_byte_stream";
278
205k
  libuna_unicode_character_t safe_unicode_character = 0;
279
205k
  size_t safe_byte_stream_index                     = 0;
280
205k
  uint8_t byte_stream_character                     = 0;
281
205k
  int result                                        = 1;
282
283
205k
  if( unicode_character == NULL )
284
0
  {
285
0
    libcerror_error_set(
286
0
     error,
287
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
288
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
289
0
     "%s: invalid Unicode character.",
290
0
     function );
291
292
0
    return( -1 );
293
0
  }
294
205k
  if( byte_stream == NULL )
295
0
  {
296
0
    libcerror_error_set(
297
0
     error,
298
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
299
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
300
0
     "%s: invalid byte stream.",
301
0
     function );
302
303
0
    return( -1 );
304
0
  }
305
205k
  if( byte_stream_size > (size_t) SSIZE_MAX )
306
0
  {
307
0
    libcerror_error_set(
308
0
     error,
309
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
310
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
311
0
     "%s: invalid byte stream size value exceeds maximum.",
312
0
     function );
313
314
0
    return( -1 );
315
0
  }
316
205k
  if( byte_stream_index == NULL )
317
0
  {
318
0
    libcerror_error_set(
319
0
     error,
320
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
321
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
322
0
     "%s: invalid byte stream index.",
323
0
     function );
324
325
0
    return( -1 );
326
0
  }
327
205k
  safe_byte_stream_index = *byte_stream_index;
328
329
205k
  if( safe_byte_stream_index >= byte_stream_size )
330
0
  {
331
0
    libcerror_error_set(
332
0
     error,
333
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
334
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
335
0
     "%s: byte stream too small.",
336
0
     function );
337
338
0
    return( -1 );
339
0
  }
340
205k
  byte_stream_character = byte_stream[ safe_byte_stream_index ];
341
342
205k
  switch( codepage )
343
205k
  {
344
113k
    case LIBUNA_CODEPAGE_ASCII:
345
113k
      if( byte_stream_character < 0x80 )
346
110k
      {
347
110k
        safe_unicode_character = byte_stream_character;
348
110k
      }
349
2.83k
      else
350
2.83k
      {
351
2.83k
        safe_unicode_character = 0xfffd;
352
2.83k
      }
353
113k
      safe_byte_stream_index += 1;
354
355
113k
      break;
356
357
0
    case LIBUNA_CODEPAGE_ISO_8859_1:
358
0
      safe_unicode_character = byte_stream_character;
359
360
0
      safe_byte_stream_index += 1;
361
362
0
      break;
363
364
0
    case LIBUNA_CODEPAGE_ISO_8859_2:
365
0
      if( byte_stream_character < 0xa0 )
366
0
      {
367
0
        safe_unicode_character = byte_stream_character;
368
0
      }
369
0
      else
370
0
      {
371
0
        byte_stream_character -= 0xa0;
372
373
0
        safe_unicode_character = libuna_codepage_iso_8859_2_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
374
0
      }
375
0
      safe_byte_stream_index += 1;
376
377
0
      break;
378
379
0
    case LIBUNA_CODEPAGE_ISO_8859_3:
380
0
      if( byte_stream_character < 0xa0 )
381
0
      {
382
0
        safe_unicode_character = byte_stream_character;
383
0
      }
384
0
      else
385
0
      {
386
0
        byte_stream_character -= 0xa0;
387
388
0
        safe_unicode_character = libuna_codepage_iso_8859_3_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
389
0
      }
390
0
      safe_byte_stream_index += 1;
391
392
0
      break;
393
394
0
    case LIBUNA_CODEPAGE_ISO_8859_4:
395
0
      if( byte_stream_character < 0xa0 )
396
0
      {
397
0
        safe_unicode_character = byte_stream_character;
398
0
      }
399
0
      else
400
0
      {
401
0
        byte_stream_character -= 0xa0;
402
403
0
        safe_unicode_character = libuna_codepage_iso_8859_4_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
404
0
      }
405
0
      safe_byte_stream_index += 1;
406
407
0
      break;
408
409
0
    case LIBUNA_CODEPAGE_ISO_8859_5:
410
0
      if( byte_stream_character < 0xa0 )
411
0
      {
412
0
        safe_unicode_character = byte_stream_character;
413
0
      }
414
0
      else
415
0
      {
416
0
        byte_stream_character -= 0xa0;
417
418
0
        safe_unicode_character = libuna_codepage_iso_8859_5_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
419
0
      }
420
0
      safe_byte_stream_index += 1;
421
422
0
      break;
423
424
0
    case LIBUNA_CODEPAGE_ISO_8859_6:
425
0
      if( byte_stream_character < 0xa0 )
426
0
      {
427
0
        safe_unicode_character = byte_stream_character;
428
0
      }
429
0
      else
430
0
      {
431
0
        byte_stream_character -= 0xa0;
432
433
0
        safe_unicode_character = libuna_codepage_iso_8859_6_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
434
0
      }
435
0
      safe_byte_stream_index += 1;
436
437
0
      break;
438
439
0
    case LIBUNA_CODEPAGE_ISO_8859_7:
440
0
      if( byte_stream_character < 0xa0 )
441
0
      {
442
0
        safe_unicode_character = byte_stream_character;
443
0
      }
444
0
      else
445
0
      {
446
0
        byte_stream_character -= 0xa0;
447
448
0
        safe_unicode_character = libuna_codepage_iso_8859_7_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
449
0
      }
450
0
      safe_byte_stream_index += 1;
451
452
0
      break;
453
454
0
    case LIBUNA_CODEPAGE_ISO_8859_8:
455
0
      if( byte_stream_character < 0xa0 )
456
0
      {
457
0
        safe_unicode_character = byte_stream_character;
458
0
      }
459
0
      else
460
0
      {
461
0
        byte_stream_character -= 0xa0;
462
463
0
        safe_unicode_character = libuna_codepage_iso_8859_8_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
464
0
      }
465
0
      safe_byte_stream_index += 1;
466
467
0
      break;
468
469
0
    case LIBUNA_CODEPAGE_ISO_8859_9:
470
0
      if( byte_stream_character < 0xd0 )
471
0
      {
472
0
        safe_unicode_character = byte_stream_character;
473
0
      }
474
0
      else
475
0
      {
476
0
        byte_stream_character -= 0xd0;
477
478
0
        safe_unicode_character = libuna_codepage_iso_8859_9_byte_stream_to_unicode_base_0xd0[ byte_stream_character ];
479
0
      }
480
0
      safe_byte_stream_index += 1;
481
482
0
      break;
483
484
0
    case LIBUNA_CODEPAGE_ISO_8859_10:
485
0
      if( byte_stream_character < 0xa0 )
486
0
      {
487
0
        safe_unicode_character = byte_stream_character;
488
0
      }
489
0
      else
490
0
      {
491
0
        byte_stream_character -= 0xa0;
492
493
0
        safe_unicode_character = libuna_codepage_iso_8859_10_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
494
0
      }
495
0
      safe_byte_stream_index += 1;
496
497
0
      break;
498
499
0
    case LIBUNA_CODEPAGE_ISO_8859_11:
500
0
      if( byte_stream_character < 0xa1 )
501
0
      {
502
0
        safe_unicode_character = byte_stream_character;
503
0
      }
504
0
      else if( byte_stream_character < 0xdb )
505
0
      {
506
0
        safe_unicode_character = byte_stream_character + 0x0d60;
507
0
      }
508
0
      else if( byte_stream_character < 0xdf )
509
0
      {
510
0
        safe_unicode_character = 0xfffd;
511
0
      }
512
0
      else if( byte_stream_character < 0xfc )
513
0
      {
514
0
        safe_unicode_character = byte_stream_character + 0x0d60;
515
0
      }
516
0
      else
517
0
      {
518
0
        safe_unicode_character = 0xfffd;
519
0
      }
520
0
      safe_byte_stream_index += 1;
521
522
0
      break;
523
524
0
    case LIBUNA_CODEPAGE_ISO_8859_13:
525
0
      if( byte_stream_character < 0xa0 )
526
0
      {
527
0
        safe_unicode_character = byte_stream_character;
528
0
      }
529
0
      else
530
0
      {
531
0
        byte_stream_character -= 0xa0;
532
533
0
        safe_unicode_character = libuna_codepage_iso_8859_13_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
534
0
      }
535
0
      safe_byte_stream_index += 1;
536
537
0
      break;
538
539
0
    case LIBUNA_CODEPAGE_ISO_8859_14:
540
0
      if( byte_stream_character < 0xa0 )
541
0
      {
542
0
        safe_unicode_character = byte_stream_character;
543
0
      }
544
0
      else
545
0
      {
546
0
        byte_stream_character -= 0xa0;
547
548
0
        safe_unicode_character = libuna_codepage_iso_8859_14_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
549
0
      }
550
0
      safe_byte_stream_index += 1;
551
552
0
      break;
553
554
0
    case LIBUNA_CODEPAGE_ISO_8859_15:
555
0
      if( ( byte_stream_character >= 0xa0 )
556
0
       && ( byte_stream_character < 0xc0 ) )
557
0
      {
558
0
        byte_stream_character -= 0xa0;
559
560
0
        safe_unicode_character = libuna_codepage_iso_8859_15_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
561
0
      }
562
0
      else
563
0
      {
564
0
        safe_unicode_character = byte_stream_character;
565
0
      }
566
0
      safe_byte_stream_index += 1;
567
568
0
      break;
569
570
0
    case LIBUNA_CODEPAGE_ISO_8859_16:
571
0
      if( byte_stream_character < 0xa0 )
572
0
      {
573
0
        safe_unicode_character = byte_stream_character;
574
0
      }
575
0
      else
576
0
      {
577
0
        byte_stream_character -= 0xa0;
578
579
0
        safe_unicode_character = libuna_codepage_iso_8859_16_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
580
0
      }
581
0
      safe_byte_stream_index += 1;
582
583
0
      break;
584
585
0
    case LIBUNA_CODEPAGE_KOI8_R:
586
0
      result = libuna_codepage_koi8_r_copy_from_byte_stream(
587
0
                &safe_unicode_character,
588
0
                byte_stream,
589
0
                byte_stream_size,
590
0
                &safe_byte_stream_index,
591
0
                error );
592
0
      break;
593
594
0
    case LIBUNA_CODEPAGE_KOI8_U:
595
0
      result = libuna_codepage_koi8_u_copy_from_byte_stream(
596
0
                &safe_unicode_character,
597
0
                byte_stream,
598
0
                byte_stream_size,
599
0
                &safe_byte_stream_index,
600
0
                error );
601
0
      break;
602
603
0
    case LIBUNA_CODEPAGE_MAC_ARABIC:
604
0
      result = libuna_codepage_mac_arabic_copy_from_byte_stream(
605
0
                &safe_unicode_character,
606
0
                byte_stream,
607
0
                byte_stream_size,
608
0
                &safe_byte_stream_index,
609
0
                error );
610
0
      break;
611
612
0
    case LIBUNA_CODEPAGE_MAC_CELTIC:
613
0
      result = libuna_codepage_mac_celtic_copy_from_byte_stream(
614
0
                &safe_unicode_character,
615
0
                byte_stream,
616
0
                byte_stream_size,
617
0
                &safe_byte_stream_index,
618
0
                error );
619
0
      break;
620
621
0
    case LIBUNA_CODEPAGE_MAC_CENTRALEURROMAN:
622
0
      result = libuna_codepage_mac_centraleurroman_copy_from_byte_stream(
623
0
                &safe_unicode_character,
624
0
                byte_stream,
625
0
                byte_stream_size,
626
0
                &safe_byte_stream_index,
627
0
                error );
628
0
      break;
629
630
0
    case LIBUNA_CODEPAGE_MAC_CROATIAN:
631
0
      result = libuna_codepage_mac_croatian_copy_from_byte_stream(
632
0
                &safe_unicode_character,
633
0
                byte_stream,
634
0
                byte_stream_size,
635
0
                &safe_byte_stream_index,
636
0
                error );
637
0
      break;
638
639
0
    case LIBUNA_CODEPAGE_MAC_CYRILLIC:
640
0
      result = libuna_codepage_mac_cyrillic_copy_from_byte_stream(
641
0
                &safe_unicode_character,
642
0
                byte_stream,
643
0
                byte_stream_size,
644
0
                &safe_byte_stream_index,
645
0
                error );
646
0
      break;
647
648
0
    case LIBUNA_CODEPAGE_MAC_DINGBATS:
649
0
      result = libuna_codepage_mac_dingbats_copy_from_byte_stream(
650
0
                &safe_unicode_character,
651
0
                byte_stream,
652
0
                byte_stream_size,
653
0
                &safe_byte_stream_index,
654
0
                error );
655
0
      break;
656
657
0
    case LIBUNA_CODEPAGE_MAC_FARSI:
658
0
      result = libuna_codepage_mac_farsi_copy_from_byte_stream(
659
0
                &safe_unicode_character,
660
0
                byte_stream,
661
0
                byte_stream_size,
662
0
                &safe_byte_stream_index,
663
0
                error );
664
0
      break;
665
666
0
    case LIBUNA_CODEPAGE_MAC_GAELIC:
667
0
      result = libuna_codepage_mac_gaelic_copy_from_byte_stream(
668
0
                &safe_unicode_character,
669
0
                byte_stream,
670
0
                byte_stream_size,
671
0
                &safe_byte_stream_index,
672
0
                error );
673
0
      break;
674
675
0
    case LIBUNA_CODEPAGE_MAC_GREEK:
676
0
      result = libuna_codepage_mac_greek_copy_from_byte_stream(
677
0
                &safe_unicode_character,
678
0
                byte_stream,
679
0
                byte_stream_size,
680
0
                &safe_byte_stream_index,
681
0
                error );
682
0
      break;
683
684
0
    case LIBUNA_CODEPAGE_MAC_ICELANDIC:
685
0
      result = libuna_codepage_mac_icelandic_copy_from_byte_stream(
686
0
                &safe_unicode_character,
687
0
                byte_stream,
688
0
                byte_stream_size,
689
0
                &safe_byte_stream_index,
690
0
                error );
691
0
      break;
692
693
0
    case LIBUNA_CODEPAGE_MAC_INUIT:
694
0
      result = libuna_codepage_mac_inuit_copy_from_byte_stream(
695
0
                &safe_unicode_character,
696
0
                byte_stream,
697
0
                byte_stream_size,
698
0
                &safe_byte_stream_index,
699
0
                error );
700
0
      break;
701
702
0
    case LIBUNA_CODEPAGE_MAC_ROMAN:
703
0
      result = libuna_codepage_mac_roman_copy_from_byte_stream(
704
0
                &safe_unicode_character,
705
0
                byte_stream,
706
0
                byte_stream_size,
707
0
                &safe_byte_stream_index,
708
0
                error );
709
0
      break;
710
711
0
    case LIBUNA_CODEPAGE_MAC_ROMANIAN:
712
0
      result = libuna_codepage_mac_romanian_copy_from_byte_stream(
713
0
                &safe_unicode_character,
714
0
                byte_stream,
715
0
                byte_stream_size,
716
0
                &safe_byte_stream_index,
717
0
                error );
718
0
      break;
719
720
0
    case LIBUNA_CODEPAGE_MAC_RUSSIAN:
721
0
      result = libuna_codepage_mac_russian_copy_from_byte_stream(
722
0
                &safe_unicode_character,
723
0
                byte_stream,
724
0
                byte_stream_size,
725
0
                &safe_byte_stream_index,
726
0
                error );
727
0
      break;
728
729
0
    case LIBUNA_CODEPAGE_MAC_SYMBOL:
730
0
      result = libuna_codepage_mac_symbol_copy_from_byte_stream(
731
0
                &safe_unicode_character,
732
0
                byte_stream,
733
0
                byte_stream_size,
734
0
                &safe_byte_stream_index,
735
0
                error );
736
0
      break;
737
738
0
    case LIBUNA_CODEPAGE_MAC_THAI:
739
0
      result = libuna_codepage_mac_thai_copy_from_byte_stream(
740
0
                &safe_unicode_character,
741
0
                byte_stream,
742
0
                byte_stream_size,
743
0
                &safe_byte_stream_index,
744
0
                error );
745
0
      break;
746
747
0
    case LIBUNA_CODEPAGE_MAC_TURKISH:
748
0
      result = libuna_codepage_mac_turkish_copy_from_byte_stream(
749
0
                &safe_unicode_character,
750
0
                byte_stream,
751
0
                byte_stream_size,
752
0
                &safe_byte_stream_index,
753
0
                error );
754
0
      break;
755
756
0
    case LIBUNA_CODEPAGE_MAC_UKRAINIAN:
757
0
      result = libuna_codepage_mac_ukrainian_copy_from_byte_stream(
758
0
                &safe_unicode_character,
759
0
                byte_stream,
760
0
                byte_stream_size,
761
0
                &safe_byte_stream_index,
762
0
                error );
763
0
      break;
764
765
0
    case LIBUNA_CODEPAGE_WINDOWS_874:
766
0
      result = libuna_codepage_windows_874_copy_from_byte_stream(
767
0
                &safe_unicode_character,
768
0
                byte_stream,
769
0
                byte_stream_size,
770
0
                &safe_byte_stream_index,
771
0
                error );
772
0
      break;
773
774
0
    case LIBUNA_CODEPAGE_WINDOWS_932:
775
0
      result = libuna_codepage_windows_932_copy_from_byte_stream(
776
0
                &safe_unicode_character,
777
0
                byte_stream,
778
0
                byte_stream_size,
779
0
                &safe_byte_stream_index,
780
0
                error );
781
0
      break;
782
783
0
    case LIBUNA_CODEPAGE_WINDOWS_936:
784
0
      result = libuna_codepage_windows_936_copy_from_byte_stream(
785
0
                &safe_unicode_character,
786
0
                byte_stream,
787
0
                byte_stream_size,
788
0
                &safe_byte_stream_index,
789
0
                error );
790
0
      break;
791
792
0
    case LIBUNA_CODEPAGE_WINDOWS_949:
793
0
      result = libuna_codepage_windows_949_copy_from_byte_stream(
794
0
                &safe_unicode_character,
795
0
                byte_stream,
796
0
                byte_stream_size,
797
0
                &safe_byte_stream_index,
798
0
                error );
799
0
      break;
800
801
0
    case LIBUNA_CODEPAGE_WINDOWS_950:
802
0
      result = libuna_codepage_windows_950_copy_from_byte_stream(
803
0
                &safe_unicode_character,
804
0
                byte_stream,
805
0
                byte_stream_size,
806
0
                &safe_byte_stream_index,
807
0
                error );
808
0
      break;
809
810
0
    case LIBUNA_CODEPAGE_WINDOWS_1250:
811
0
      result = libuna_codepage_windows_1250_copy_from_byte_stream(
812
0
                &safe_unicode_character,
813
0
                byte_stream,
814
0
                byte_stream_size,
815
0
                &safe_byte_stream_index,
816
0
                error );
817
0
      break;
818
819
0
    case LIBUNA_CODEPAGE_WINDOWS_1251:
820
0
      result = libuna_codepage_windows_1251_copy_from_byte_stream(
821
0
                &safe_unicode_character,
822
0
                byte_stream,
823
0
                byte_stream_size,
824
0
                &safe_byte_stream_index,
825
0
                error );
826
0
      break;
827
828
92.1k
    case LIBUNA_CODEPAGE_WINDOWS_1252:
829
92.1k
      result = libuna_codepage_windows_1252_copy_from_byte_stream(
830
92.1k
                &safe_unicode_character,
831
92.1k
                byte_stream,
832
92.1k
                byte_stream_size,
833
92.1k
                &safe_byte_stream_index,
834
92.1k
                error );
835
92.1k
      break;
836
837
0
    case LIBUNA_CODEPAGE_WINDOWS_1253:
838
0
      result = libuna_codepage_windows_1253_copy_from_byte_stream(
839
0
                &safe_unicode_character,
840
0
                byte_stream,
841
0
                byte_stream_size,
842
0
                &safe_byte_stream_index,
843
0
                error );
844
0
      break;
845
846
0
    case LIBUNA_CODEPAGE_WINDOWS_1254:
847
0
      result = libuna_codepage_windows_1254_copy_from_byte_stream(
848
0
                &safe_unicode_character,
849
0
                byte_stream,
850
0
                byte_stream_size,
851
0
                &safe_byte_stream_index,
852
0
                error );
853
0
      break;
854
855
0
    case LIBUNA_CODEPAGE_WINDOWS_1255:
856
0
      result = libuna_codepage_windows_1255_copy_from_byte_stream(
857
0
                &safe_unicode_character,
858
0
                byte_stream,
859
0
                byte_stream_size,
860
0
                &safe_byte_stream_index,
861
0
                error );
862
0
      break;
863
864
0
    case LIBUNA_CODEPAGE_WINDOWS_1256:
865
0
      result = libuna_codepage_windows_1256_copy_from_byte_stream(
866
0
                &safe_unicode_character,
867
0
                byte_stream,
868
0
                byte_stream_size,
869
0
                &safe_byte_stream_index,
870
0
                error );
871
0
      break;
872
873
0
    case LIBUNA_CODEPAGE_WINDOWS_1257:
874
0
      result = libuna_codepage_windows_1257_copy_from_byte_stream(
875
0
                &safe_unicode_character,
876
0
                byte_stream,
877
0
                byte_stream_size,
878
0
                &safe_byte_stream_index,
879
0
                error );
880
0
      break;
881
882
0
    case LIBUNA_CODEPAGE_WINDOWS_1258:
883
0
      result = libuna_codepage_windows_1258_copy_from_byte_stream(
884
0
                &safe_unicode_character,
885
0
                byte_stream,
886
0
                byte_stream_size,
887
0
                &safe_byte_stream_index,
888
0
                error );
889
0
      break;
890
891
0
    default:
892
0
      libcerror_error_set(
893
0
       error,
894
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
895
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
896
0
      "%s: unsupported codepage: %d.",
897
0
       function,
898
0
       codepage );
899
900
0
      return( -1 );
901
205k
  }
902
205k
  if( result == -1 )
903
0
  {
904
0
    libcerror_error_set(
905
0
     error,
906
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
907
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
908
0
     "%s: unable to copy Unicode character from byte stream.",
909
0
     function );
910
911
0
    return( -1 );
912
0
  }
913
205k
  *unicode_character = safe_unicode_character;
914
205k
  *byte_stream_index = safe_byte_stream_index;
915
916
205k
  return( result );
917
205k
}
918
919
/* Copies an Unicode character to a byte stream
920
 * Returns 1 if successful, 0 if the Unicode character is valid but not supported since it requires special handling or -1 on error
921
 */
922
int libuna_unicode_character_copy_to_byte_stream(
923
     libuna_unicode_character_t unicode_character,
924
     uint8_t *byte_stream,
925
     size_t byte_stream_size,
926
     size_t *byte_stream_index,
927
     int codepage,
928
     libcerror_error_t **error )
929
0
{
930
0
  static char *function         = "libuna_unicode_character_copy_to_byte_stream";
931
0
  size_t safe_byte_stream_index = 0;
932
0
  int result                    = 1;
933
934
0
  if( byte_stream == NULL )
935
0
  {
936
0
    libcerror_error_set(
937
0
     error,
938
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
939
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
940
0
     "%s: invalid byte stream.",
941
0
     function );
942
943
0
    return( -1 );
944
0
  }
945
0
  if( byte_stream_size > (size_t) SSIZE_MAX )
946
0
  {
947
0
    libcerror_error_set(
948
0
     error,
949
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
950
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
951
0
     "%s: invalid byte stream size value exceeds maximum.",
952
0
     function );
953
954
0
    return( -1 );
955
0
  }
956
0
  if( byte_stream_index == NULL )
957
0
  {
958
0
    libcerror_error_set(
959
0
     error,
960
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
961
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
962
0
     "%s: invalid byte stream index.",
963
0
     function );
964
965
0
    return( -1 );
966
0
  }
967
0
  safe_byte_stream_index = *byte_stream_index;
968
969
0
  if( safe_byte_stream_index >= byte_stream_size )
970
0
  {
971
0
    libcerror_error_set(
972
0
     error,
973
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
974
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
975
0
     "%s: byte stream too small.",
976
0
     function );
977
978
0
    return( -1 );
979
0
  }
980
0
  switch( codepage )
981
0
  {
982
0
    case LIBUNA_CODEPAGE_ASCII:
983
0
      if( unicode_character < 0x0080 )
984
0
      {
985
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
986
0
      }
987
0
      else
988
0
      {
989
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
990
0
      }
991
0
      safe_byte_stream_index += 1;
992
993
0
      break;
994
995
0
    case LIBUNA_CODEPAGE_ISO_8859_1:
996
0
      if( unicode_character < 0x0100 )
997
0
      {
998
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
999
0
      }
1000
0
      else
1001
0
      {
1002
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1003
0
      }
1004
0
      safe_byte_stream_index += 1;
1005
1006
0
      break;
1007
1008
0
    case LIBUNA_CODEPAGE_ISO_8859_2:
1009
0
      if( unicode_character < 0x00a0 )
1010
0
      {
1011
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1012
0
      }
1013
0
      else if( ( unicode_character >= 0x00a0 )
1014
0
            && ( unicode_character < 0x0120 ) )
1015
0
      {
1016
0
        unicode_character -= 0x00a0;
1017
1018
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1019
0
      }
1020
0
      else if( ( unicode_character >= 0x0138 )
1021
0
            && ( unicode_character < 0x0180 ) )
1022
0
      {
1023
0
        unicode_character -= 0x0138;
1024
1025
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x0138[ unicode_character ];
1026
0
      }
1027
0
      else if( ( unicode_character >= 0x02d8 )
1028
0
            && ( unicode_character < 0x02e0 ) )
1029
0
      {
1030
0
        unicode_character -= 0x02d8;
1031
1032
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x02d8[ unicode_character ];
1033
0
      }
1034
0
      else if( unicode_character == 0x02c7 )
1035
0
      {
1036
0
        byte_stream[ safe_byte_stream_index ] = 0xb7;
1037
0
      }
1038
0
      else
1039
0
      {
1040
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1041
0
      }
1042
0
      safe_byte_stream_index += 1;
1043
1044
0
      break;
1045
1046
0
    case LIBUNA_CODEPAGE_ISO_8859_3:
1047
0
      if( unicode_character < 0x00a0 )
1048
0
      {
1049
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1050
0
      }
1051
0
      else if( ( unicode_character >= 0x00a0 )
1052
0
            && ( unicode_character < 0x0100 ) )
1053
0
      {
1054
0
        unicode_character -= 0x00a0;
1055
1056
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1057
0
      }
1058
0
      else if( ( unicode_character >= 0x0108 )
1059
0
            && ( unicode_character < 0x0110 ) )
1060
0
      {
1061
0
        unicode_character -= 0x0108;
1062
1063
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0108[ unicode_character ];
1064
0
      }
1065
0
      else if( ( unicode_character >= 0x0118 )
1066
0
            && ( unicode_character < 0x0128 ) )
1067
0
      {
1068
0
        unicode_character -= 0x0118;
1069
1070
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0118[ unicode_character ];
1071
0
      }
1072
0
      else if( ( unicode_character >= 0x0130 )
1073
0
            && ( unicode_character < 0x0138 ) )
1074
0
      {
1075
0
        unicode_character -= 0x0130;
1076
1077
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0130[ unicode_character ];
1078
0
      }
1079
0
      else if( ( unicode_character >= 0x0158 )
1080
0
            && ( unicode_character < 0x0160 ) )
1081
0
      {
1082
0
        unicode_character -= 0x0158;
1083
1084
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0158[ unicode_character ];
1085
0
      }
1086
0
      else switch( unicode_character )
1087
0
      {
1088
0
        case 0x016c:
1089
0
          byte_stream[ safe_byte_stream_index ] = 0xdd;
1090
0
          break;
1091
1092
0
        case 0x016d:
1093
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1094
0
          break;
1095
1096
0
        case 0x017b:
1097
0
          byte_stream[ safe_byte_stream_index ] = 0xaf;
1098
0
          break;
1099
1100
0
        case 0x017c:
1101
0
          byte_stream[ safe_byte_stream_index ] = 0xbf;
1102
0
          break;
1103
1104
0
        case 0x02d8:
1105
0
          byte_stream[ safe_byte_stream_index ] = 0xa2;
1106
0
          break;
1107
1108
0
        case 0x02d9:
1109
0
          byte_stream[ safe_byte_stream_index ] = 0xff;
1110
0
          break;
1111
1112
0
        default:
1113
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1114
0
          break;
1115
0
      }
1116
0
      safe_byte_stream_index += 1;
1117
1118
0
      break;
1119
1120
0
    case LIBUNA_CODEPAGE_ISO_8859_4:
1121
0
      if( unicode_character < 0x00a0 )
1122
0
      {
1123
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1124
0
      }
1125
0
      else if( ( unicode_character >= 0x00a0 )
1126
0
            && ( unicode_character < 0x0158 ) )
1127
0
      {
1128
0
        unicode_character -= 0x00a0;
1129
1130
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1131
0
      }
1132
0
      else if( ( unicode_character >= 0x0160 )
1133
0
            && ( unicode_character < 0x0180 ) )
1134
0
      {
1135
0
        unicode_character -= 0x0160;
1136
1137
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1138
0
      }
1139
0
      else switch( unicode_character )
1140
0
      {
1141
0
        case 0x02c7:
1142
0
          byte_stream[ safe_byte_stream_index ] = 0xb7;
1143
0
          break;
1144
1145
0
        case 0x02d9:
1146
0
          byte_stream[ safe_byte_stream_index ] = 0xff;
1147
0
          break;
1148
1149
0
        case 0x02db:
1150
0
          byte_stream[ safe_byte_stream_index ] = 0xb2;
1151
0
          break;
1152
1153
0
        default:
1154
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1155
0
          break;
1156
0
      }
1157
0
      safe_byte_stream_index += 1;
1158
1159
0
      break;
1160
1161
0
    case LIBUNA_CODEPAGE_ISO_8859_5:
1162
0
      if( unicode_character < 0x00a1 )
1163
0
      {
1164
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1165
0
      }
1166
0
      else if( ( unicode_character >= 0x0400 )
1167
0
            && ( unicode_character < 0x0460 ) )
1168
0
      {
1169
0
        unicode_character -= 0x0400;
1170
1171
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_5_unicode_to_byte_stream_base_0x0400[ unicode_character ];
1172
0
      }
1173
0
      else switch( unicode_character )
1174
0
      {
1175
0
        case 0x00a7:
1176
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1177
0
          break;
1178
1179
0
        case 0x00ad:
1180
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1181
0
          break;
1182
1183
0
        case 0x2116:
1184
0
          byte_stream[ safe_byte_stream_index ] = 0xf0;
1185
0
          break;
1186
1187
0
        default:
1188
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1189
0
          break;
1190
0
      }
1191
0
      safe_byte_stream_index += 1;
1192
1193
0
      break;
1194
1195
0
    case LIBUNA_CODEPAGE_ISO_8859_6:
1196
0
      if( unicode_character < 0x00a1 )
1197
0
      {
1198
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1199
0
      }
1200
0
      else if( ( unicode_character >= 0x0618 )
1201
0
            && ( unicode_character < 0x658 ) )
1202
0
      {
1203
0
        unicode_character -= 0x0618;
1204
1205
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_6_unicode_to_byte_stream_base_0x0618[ unicode_character ];
1206
0
      }
1207
0
      else switch( unicode_character )
1208
0
      {
1209
0
        case 0x00a4:
1210
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1211
0
          break;
1212
1213
0
        case 0x00ad:
1214
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1215
0
          break;
1216
1217
0
        case 0x060c:
1218
0
          byte_stream[ safe_byte_stream_index ] = 0xac;
1219
0
          break;
1220
1221
0
        default:
1222
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1223
0
          break;
1224
0
      }
1225
0
      safe_byte_stream_index += 1;
1226
1227
0
      break;
1228
1229
0
    case LIBUNA_CODEPAGE_ISO_8859_7:
1230
0
      if( unicode_character < 0x00a0 )
1231
0
      {
1232
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1233
0
      }
1234
0
      else if( ( unicode_character >= 0x00a0 )
1235
0
            && ( unicode_character < 0x00b8 ) )
1236
0
      {
1237
0
        unicode_character -= 0x00a0;
1238
1239
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1240
0
      }
1241
0
      else if( ( unicode_character >= 0x0380 )
1242
0
            && ( unicode_character < 0x03d0 ) )
1243
0
      {
1244
0
        unicode_character -= 0x0380;
1245
1246
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x0380[ unicode_character ];
1247
0
      }
1248
0
      else switch( unicode_character )
1249
0
      {
1250
0
        case 0x00bb:
1251
0
          byte_stream[ safe_byte_stream_index ] = 0xbb;
1252
0
          break;
1253
1254
0
        case 0x00bd:
1255
0
          byte_stream[ safe_byte_stream_index ] = 0xbd;
1256
0
          break;
1257
1258
0
        case 0x037a:
1259
0
          byte_stream[ safe_byte_stream_index ] = 0xaa;
1260
0
          break;
1261
1262
0
        case 0x2015:
1263
0
          byte_stream[ safe_byte_stream_index ] = 0xaf;
1264
0
          break;
1265
1266
0
        case 0x2018:
1267
0
          byte_stream[ safe_byte_stream_index ] = 0xa1;
1268
0
          break;
1269
1270
0
        case 0x2019:
1271
0
          byte_stream[ safe_byte_stream_index ] = 0xa2;
1272
0
          break;
1273
1274
0
        case 0x20ac:
1275
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1276
0
          break;
1277
1278
0
        case 0x20af:
1279
0
          byte_stream[ safe_byte_stream_index ] = 0xa5;
1280
0
          break;
1281
1282
0
        default:
1283
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1284
0
          break;
1285
0
      }
1286
0
      safe_byte_stream_index += 1;
1287
1288
0
      break;
1289
1290
0
    case LIBUNA_CODEPAGE_ISO_8859_8:
1291
0
      if( unicode_character < 0x00a0 )
1292
0
      {
1293
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1294
0
      }
1295
0
      else if( ( unicode_character >= 0x00a0 )
1296
0
            && ( unicode_character < 0x00c0 ) )
1297
0
      {
1298
0
        unicode_character -= 0x00a0;
1299
1300
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1301
0
      }
1302
0
      else if( ( unicode_character >= 0x05d0 )
1303
0
            && ( unicode_character < 0x05f0 ) )
1304
0
      {
1305
0
        unicode_character -= 0x05d0;
1306
1307
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x05d0[ unicode_character ];
1308
0
      }
1309
0
      else switch( unicode_character )
1310
0
      {
1311
0
        case 0x00d7:
1312
0
          byte_stream[ safe_byte_stream_index ] = 0xaa;
1313
0
          break;
1314
1315
0
        case 0x00f7:
1316
0
          byte_stream[ safe_byte_stream_index ] = 0xba;
1317
0
          break;
1318
1319
0
        case 0x200e:
1320
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1321
0
          break;
1322
1323
0
        case 0x200f:
1324
0
          byte_stream[ safe_byte_stream_index ] = 0xfe;
1325
0
          break;
1326
1327
0
        case 0x2017:
1328
0
          byte_stream[ safe_byte_stream_index ] = 0xdf;
1329
0
          break;
1330
1331
0
        default:
1332
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1333
0
          break;
1334
0
      }
1335
0
      safe_byte_stream_index += 1;
1336
1337
0
      break;
1338
1339
0
    case LIBUNA_CODEPAGE_ISO_8859_9:
1340
0
      if( unicode_character < 0x00d0 )
1341
0
      {
1342
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1343
0
      }
1344
0
      else if( ( unicode_character >= 0x00d0 )
1345
0
            && ( unicode_character < 0x0100 ) )
1346
0
      {
1347
0
        unicode_character -= 0x00d0;
1348
1349
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_9_unicode_to_byte_stream_base_0x00d0[ unicode_character ];
1350
0
      }
1351
0
      else switch( unicode_character )
1352
0
      {
1353
0
        case 0x011e:
1354
0
          byte_stream[ safe_byte_stream_index ] = 0xd0;
1355
0
          break;
1356
1357
0
        case 0x011f:
1358
0
          byte_stream[ safe_byte_stream_index ] = 0xf0;
1359
0
          break;
1360
1361
0
        case 0x0130:
1362
0
          byte_stream[ safe_byte_stream_index ] = 0xdd;
1363
0
          break;
1364
1365
0
        case 0x0131:
1366
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1367
0
          break;
1368
1369
0
        case 0x015e:
1370
0
          byte_stream[ safe_byte_stream_index ] = 0xde;
1371
0
          break;
1372
1373
0
        case 0x015f:
1374
0
          byte_stream[ safe_byte_stream_index ] = 0xfe;
1375
0
          break;
1376
1377
0
        default:
1378
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1379
0
          break;
1380
0
      }
1381
0
      safe_byte_stream_index += 1;
1382
1383
0
      break;
1384
1385
0
    case LIBUNA_CODEPAGE_ISO_8859_10:
1386
0
      if( unicode_character < 0x00a1 )
1387
0
      {
1388
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1389
0
      }
1390
0
      else if( ( unicode_character >= 0x00c0 )
1391
0
            && ( unicode_character < 0x0150 ) )
1392
0
      {
1393
0
        unicode_character -= 0x00c0;
1394
1395
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1396
0
      }
1397
0
      else if( ( unicode_character >= 0x0160 )
1398
0
            && ( unicode_character < 0x0170 ) )
1399
0
      {
1400
0
        unicode_character -= 0x0160;
1401
1402
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1403
0
      }
1404
0
      else switch( unicode_character )
1405
0
      {
1406
0
        case 0x00a7:
1407
0
          byte_stream[ safe_byte_stream_index ] = 0xa7;
1408
0
          break;
1409
1410
0
        case 0x00ad:
1411
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1412
0
          break;
1413
1414
0
        case 0x00b0:
1415
0
          byte_stream[ safe_byte_stream_index ] = 0xb0;
1416
0
          break;
1417
1418
0
        case 0x00b7:
1419
0
          byte_stream[ safe_byte_stream_index ] = 0xb7;
1420
0
          break;
1421
1422
0
        case 0x0172:
1423
0
          byte_stream[ safe_byte_stream_index ] = 0xd9;
1424
0
          break;
1425
1426
0
        case 0x0173:
1427
0
          byte_stream[ safe_byte_stream_index ] = 0xf9;
1428
0
          break;
1429
1430
0
        case 0x017d:
1431
0
          byte_stream[ safe_byte_stream_index ] = 0xac;
1432
0
          break;
1433
1434
0
        case 0x017e:
1435
0
          byte_stream[ safe_byte_stream_index ] = 0xbc;
1436
0
          break;
1437
1438
0
        case 0x2015:
1439
0
          byte_stream[ safe_byte_stream_index ] = 0xbd;
1440
0
          break;
1441
1442
0
        default:
1443
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1444
0
          break;
1445
0
      }
1446
0
      safe_byte_stream_index += 1;
1447
1448
0
      break;
1449
1450
0
    case LIBUNA_CODEPAGE_ISO_8859_11:
1451
0
      if( unicode_character < 0x00a1 )
1452
0
      {
1453
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1454
0
      }
1455
0
      else if( ( unicode_character >= 0x0e01 )
1456
0
            && ( unicode_character < 0x0e3b ) )
1457
0
      {
1458
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1459
0
      }
1460
0
      else if( ( unicode_character >= 0x0e3f )
1461
0
            && ( unicode_character < 0x0e5c ) )
1462
0
      {
1463
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1464
0
      }
1465
0
      else
1466
0
      {
1467
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1468
0
      }
1469
0
      safe_byte_stream_index += 1;
1470
1471
0
      break;
1472
1473
0
    case LIBUNA_CODEPAGE_ISO_8859_13:
1474
0
      if( unicode_character < 0x00a0 )
1475
0
      {
1476
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1477
0
      }
1478
0
      else if( ( unicode_character >= 0x00a0 )
1479
0
            && ( unicode_character < 0x0180 ) )
1480
0
      {
1481
0
        unicode_character -= 0x00a0;
1482
1483
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1484
0
      }
1485
0
      else if( ( unicode_character >= 0x2018 )
1486
0
             && ( unicode_character < 0x2020 ) )
1487
0
      {
1488
0
        unicode_character -= 0x2018;
1489
1490
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x2018[ unicode_character ];
1491
0
      }
1492
0
      else
1493
0
      {
1494
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1495
0
      }
1496
0
      safe_byte_stream_index += 1;
1497
1498
0
      break;
1499
1500
0
    case LIBUNA_CODEPAGE_ISO_8859_14:
1501
0
      if( unicode_character < 0x00a1 )
1502
0
      {
1503
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1504
0
      }
1505
0
      else if( ( unicode_character >= 0x00c0 )
1506
0
            && ( unicode_character < 0x0100 ) )
1507
0
      {
1508
0
        unicode_character -= 0x00c0;
1509
1510
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1511
0
      }
1512
0
      else if( ( unicode_character >= 0x0170 )
1513
0
            && ( unicode_character < 0x0178 ) )
1514
0
      {
1515
0
        unicode_character -= 0x0170;
1516
1517
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x0170[ unicode_character ];
1518
0
      }
1519
0
      else if( ( unicode_character >= 0x1e80 )
1520
0
            && ( unicode_character < 0x1e88 ) )
1521
0
      {
1522
0
        unicode_character -= 0x1e80;
1523
1524
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x1e80[ unicode_character ];
1525
0
      }
1526
0
      else switch( unicode_character )
1527
0
      {
1528
0
        case 0x00a3:
1529
0
          byte_stream[ safe_byte_stream_index ] = 0xa3;
1530
0
          break;
1531
1532
0
        case 0x00a7:
1533
0
          byte_stream[ safe_byte_stream_index ] = 0xa7;
1534
0
          break;
1535
1536
0
        case 0x00a9:
1537
0
          byte_stream[ safe_byte_stream_index ] = 0xa9;
1538
0
          break;
1539
1540
0
        case 0x00ad:
1541
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1542
0
          break;
1543
1544
0
        case 0x00ae:
1545
0
          byte_stream[ safe_byte_stream_index ] = 0xae;
1546
0
          break;
1547
1548
0
        case 0x00b6:
1549
0
          byte_stream[ safe_byte_stream_index ] = 0xb6;
1550
0
          break;
1551
1552
0
        case 0x010a:
1553
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1554
0
          break;
1555
1556
0
        case 0x010b:
1557
0
          byte_stream[ safe_byte_stream_index ] = 0xa5;
1558
0
          break;
1559
1560
0
        case 0x0120:
1561
0
          byte_stream[ safe_byte_stream_index ] = 0xb2;
1562
0
          break;
1563
1564
0
        case 0x0121:
1565
0
          byte_stream[ safe_byte_stream_index ] = 0xb3;
1566
0
          break;
1567
1568
0
        case 0x0178:
1569
0
          byte_stream[ safe_byte_stream_index ] = 0xaf;
1570
0
          break;
1571
1572
0
        case 0x1e02:
1573
0
          byte_stream[ safe_byte_stream_index ] = 0xa1;
1574
0
          break;
1575
1576
0
        case 0x1e03:
1577
0
          byte_stream[ safe_byte_stream_index ] = 0xa2;
1578
0
          break;
1579
1580
0
        case 0x1e0a:
1581
0
          byte_stream[ safe_byte_stream_index ] = 0xa6;
1582
0
          break;
1583
1584
0
        case 0x1e0b:
1585
0
          byte_stream[ safe_byte_stream_index ] = 0xab;
1586
0
          break;
1587
1588
0
        case 0x1e1e:
1589
0
          byte_stream[ safe_byte_stream_index ] = 0xb0;
1590
0
          break;
1591
1592
0
        case 0x1e1f:
1593
0
          byte_stream[ safe_byte_stream_index ] = 0xb1;
1594
0
          break;
1595
1596
0
        case 0x1e40:
1597
0
          byte_stream[ safe_byte_stream_index ] = 0xb4;
1598
0
          break;
1599
1600
0
        case 0x1e41:
1601
0
          byte_stream[ safe_byte_stream_index ] = 0xb5;
1602
0
          break;
1603
1604
0
        case 0x1e56:
1605
0
          byte_stream[ safe_byte_stream_index ] = 0xb7;
1606
0
          break;
1607
1608
0
        case 0x1e57:
1609
0
          byte_stream[ safe_byte_stream_index ] = 0xb9;
1610
0
          break;
1611
1612
0
        case 0x1e60:
1613
0
          byte_stream[ safe_byte_stream_index ] = 0xbb;
1614
0
          break;
1615
1616
0
        case 0x1e61:
1617
0
          byte_stream[ safe_byte_stream_index ] = 0xbf;
1618
0
          break;
1619
1620
0
        case 0x1e6a:
1621
0
          byte_stream[ safe_byte_stream_index ] = 0xd7;
1622
0
          break;
1623
1624
0
        case 0x1e6b:
1625
0
          byte_stream[ safe_byte_stream_index ] = 0xf7;
1626
0
          break;
1627
1628
0
        case 0x1ef2:
1629
0
          byte_stream[ safe_byte_stream_index ] = 0xac;
1630
0
          break;
1631
1632
0
        case 0x1ef3:
1633
0
          byte_stream[ safe_byte_stream_index ] = 0xbc;
1634
0
          break;
1635
1636
0
        default:
1637
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1638
0
          break;
1639
0
      }
1640
0
      safe_byte_stream_index += 1;
1641
1642
0
      break;
1643
1644
0
    case LIBUNA_CODEPAGE_ISO_8859_15:
1645
0
      if( unicode_character < 0x00a0 )
1646
0
      {
1647
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1648
0
      }
1649
0
      else if( ( unicode_character >= 0x00a0 )
1650
0
            && ( unicode_character < 0x00c0 ) )
1651
0
      {
1652
0
        unicode_character -= 0x00a0;
1653
1654
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_15_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1655
0
      }
1656
0
      else if( unicode_character < 0x0100 )
1657
0
      {
1658
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1659
0
      }
1660
0
      else switch( unicode_character )
1661
0
      {
1662
0
        case 0x0152:
1663
0
          byte_stream[ safe_byte_stream_index ] = 0xbc;
1664
0
          break;
1665
1666
0
        case 0x0153:
1667
0
          byte_stream[ safe_byte_stream_index ] = 0xbd;
1668
0
          break;
1669
1670
0
        case 0x0160:
1671
0
          byte_stream[ safe_byte_stream_index ] = 0xa6;
1672
0
          break;
1673
1674
0
        case 0x0161:
1675
0
          byte_stream[ safe_byte_stream_index ] = 0xa8;
1676
0
          break;
1677
1678
0
        case 0x0178:
1679
0
          byte_stream[ safe_byte_stream_index ] = 0xbe;
1680
0
          break;
1681
1682
0
        case 0x017d:
1683
0
          byte_stream[ safe_byte_stream_index ] = 0xb4;
1684
0
          break;
1685
1686
0
        case 0x017e:
1687
0
          byte_stream[ safe_byte_stream_index ] = 0xb8;
1688
0
          break;
1689
1690
0
        case 0x20ac:
1691
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1692
0
          break;
1693
1694
0
        default:
1695
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1696
0
          break;
1697
0
      }
1698
0
      safe_byte_stream_index += 1;
1699
1700
0
      break;
1701
1702
0
    case LIBUNA_CODEPAGE_ISO_8859_16:
1703
0
      if( unicode_character < 0x00a1 )
1704
0
      {
1705
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1706
0
      }
1707
0
      else if( ( unicode_character >= 0x00a8 )
1708
0
            && ( unicode_character < 0x0108 ) )
1709
0
      {
1710
0
        unicode_character -= 0x00a8;
1711
1712
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x00a8[ unicode_character ];
1713
0
      }
1714
0
      else if( ( unicode_character >= 0x0140 )
1715
0
            && ( unicode_character < 0x0148 ) )
1716
0
      {
1717
0
        unicode_character -= 0x0140;
1718
1719
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0140[ unicode_character ];
1720
0
      }
1721
0
      else if( ( unicode_character >= 0x0150 )
1722
0
            && ( unicode_character < 0x0158 ) )
1723
0
      {
1724
0
        unicode_character -= 0x0150;
1725
1726
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0150[ unicode_character ];
1727
0
      }
1728
0
      else if( ( unicode_character >= 0x0178 )
1729
0
            && ( unicode_character < 0x0180 ) )
1730
0
      {
1731
0
        unicode_character -= 0x0178;
1732
1733
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0178[ unicode_character ];
1734
0
      }
1735
0
      else if( ( unicode_character >= 0x0218 )
1736
0
            && ( unicode_character < 0x0220 ) )
1737
0
      {
1738
0
        unicode_character -= 0x0218;
1739
1740
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0218[ unicode_character ];
1741
0
      }
1742
0
      else switch( unicode_character )
1743
0
      {
1744
0
        case 0x00a7:
1745
0
          byte_stream[ safe_byte_stream_index ] = 0xa7;
1746
0
          break;
1747
1748
0
        case 0x010c:
1749
0
          byte_stream[ safe_byte_stream_index ] = 0xb2;
1750
0
          break;
1751
1752
0
        case 0x010d:
1753
0
          byte_stream[ safe_byte_stream_index ] = 0xb9;
1754
0
          break;
1755
1756
0
        case 0x0110:
1757
0
          byte_stream[ safe_byte_stream_index ] = 0xd0;
1758
0
          break;
1759
1760
0
        case 0x0111:
1761
0
          byte_stream[ safe_byte_stream_index ] = 0xf0;
1762
0
          break;
1763
1764
0
        case 0x0118:
1765
0
          byte_stream[ safe_byte_stream_index ] = 0xdd;
1766
0
          break;
1767
1768
0
        case 0x0119:
1769
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1770
0
          break;
1771
1772
0
        case 0x015a:
1773
0
          byte_stream[ safe_byte_stream_index ] = 0xd7;
1774
0
          break;
1775
1776
0
        case 0x015b:
1777
0
          byte_stream[ safe_byte_stream_index ] = 0xf7;
1778
0
          break;
1779
1780
0
        case 0x0160:
1781
0
          byte_stream[ safe_byte_stream_index ] = 0xa6;
1782
0
          break;
1783
1784
0
        case 0x0161:
1785
0
          byte_stream[ safe_byte_stream_index ] = 0xa8;
1786
0
          break;
1787
1788
0
        case 0x0170:
1789
0
          byte_stream[ safe_byte_stream_index ] = 0xd8;
1790
0
          break;
1791
1792
0
        case 0x0171:
1793
0
          byte_stream[ safe_byte_stream_index ] = 0xf8;
1794
0
          break;
1795
1796
0
        case 0x201d:
1797
0
          byte_stream[ safe_byte_stream_index ] = 0xb5;
1798
0
          break;
1799
1800
0
        case 0x201e:
1801
0
          byte_stream[ safe_byte_stream_index ] = 0xa5;
1802
0
          break;
1803
1804
0
        case 0x20ac:
1805
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1806
0
          break;
1807
1808
0
        default:
1809
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1810
0
          break;
1811
0
      }
1812
0
      safe_byte_stream_index += 1;
1813
1814
0
      break;
1815
1816
0
    case LIBUNA_CODEPAGE_KOI8_R:
1817
0
      result = libuna_codepage_koi8_r_copy_to_byte_stream(
1818
0
                unicode_character,
1819
0
                byte_stream,
1820
0
                byte_stream_size,
1821
0
                &safe_byte_stream_index,
1822
0
                error );
1823
0
      break;
1824
1825
0
    case LIBUNA_CODEPAGE_KOI8_U:
1826
0
      result = libuna_codepage_koi8_u_copy_to_byte_stream(
1827
0
                unicode_character,
1828
0
                byte_stream,
1829
0
                byte_stream_size,
1830
0
                &safe_byte_stream_index,
1831
0
                error );
1832
0
      break;
1833
1834
0
    case LIBUNA_CODEPAGE_MAC_ARABIC:
1835
0
      result = libuna_codepage_mac_arabic_copy_to_byte_stream(
1836
0
                unicode_character,
1837
0
                byte_stream,
1838
0
                byte_stream_size,
1839
0
                &safe_byte_stream_index,
1840
0
                error );
1841
0
      break;
1842
1843
0
    case LIBUNA_CODEPAGE_MAC_CELTIC:
1844
0
      result = libuna_codepage_mac_celtic_copy_to_byte_stream(
1845
0
                unicode_character,
1846
0
                byte_stream,
1847
0
                byte_stream_size,
1848
0
                &safe_byte_stream_index,
1849
0
                error );
1850
0
      break;
1851
1852
0
    case LIBUNA_CODEPAGE_MAC_CENTRALEURROMAN:
1853
0
      result = libuna_codepage_mac_centraleurroman_copy_to_byte_stream(
1854
0
                unicode_character,
1855
0
                byte_stream,
1856
0
                byte_stream_size,
1857
0
                &safe_byte_stream_index,
1858
0
                error );
1859
0
      break;
1860
1861
0
    case LIBUNA_CODEPAGE_MAC_CROATIAN:
1862
0
      result = libuna_codepage_mac_croatian_copy_to_byte_stream(
1863
0
                unicode_character,
1864
0
                byte_stream,
1865
0
                byte_stream_size,
1866
0
                &safe_byte_stream_index,
1867
0
                error );
1868
0
      break;
1869
1870
0
    case LIBUNA_CODEPAGE_MAC_CYRILLIC:
1871
0
      result = libuna_codepage_mac_cyrillic_copy_to_byte_stream(
1872
0
                unicode_character,
1873
0
                byte_stream,
1874
0
                byte_stream_size,
1875
0
                &safe_byte_stream_index,
1876
0
                error );
1877
0
      break;
1878
1879
0
    case LIBUNA_CODEPAGE_MAC_DINGBATS:
1880
0
      result = libuna_codepage_mac_dingbats_copy_to_byte_stream(
1881
0
                unicode_character,
1882
0
                byte_stream,
1883
0
                byte_stream_size,
1884
0
                &safe_byte_stream_index,
1885
0
                error );
1886
0
      break;
1887
1888
0
    case LIBUNA_CODEPAGE_MAC_FARSI:
1889
0
      result = libuna_codepage_mac_farsi_copy_to_byte_stream(
1890
0
                unicode_character,
1891
0
                byte_stream,
1892
0
                byte_stream_size,
1893
0
                &safe_byte_stream_index,
1894
0
                error );
1895
0
      break;
1896
1897
0
    case LIBUNA_CODEPAGE_MAC_GAELIC:
1898
0
      result = libuna_codepage_mac_gaelic_copy_to_byte_stream(
1899
0
                unicode_character,
1900
0
                byte_stream,
1901
0
                byte_stream_size,
1902
0
                &safe_byte_stream_index,
1903
0
                error );
1904
0
      break;
1905
1906
0
    case LIBUNA_CODEPAGE_MAC_GREEK:
1907
0
      result = libuna_codepage_mac_greek_copy_to_byte_stream(
1908
0
                unicode_character,
1909
0
                byte_stream,
1910
0
                byte_stream_size,
1911
0
                &safe_byte_stream_index,
1912
0
                error );
1913
0
      break;
1914
1915
0
    case LIBUNA_CODEPAGE_MAC_ICELANDIC:
1916
0
      result = libuna_codepage_mac_icelandic_copy_to_byte_stream(
1917
0
                unicode_character,
1918
0
                byte_stream,
1919
0
                byte_stream_size,
1920
0
                &safe_byte_stream_index,
1921
0
                error );
1922
0
      break;
1923
1924
0
    case LIBUNA_CODEPAGE_MAC_INUIT:
1925
0
      result = libuna_codepage_mac_inuit_copy_to_byte_stream(
1926
0
                unicode_character,
1927
0
                byte_stream,
1928
0
                byte_stream_size,
1929
0
                &safe_byte_stream_index,
1930
0
                error );
1931
0
      break;
1932
1933
0
    case LIBUNA_CODEPAGE_MAC_ROMAN:
1934
0
      result = libuna_codepage_mac_roman_copy_to_byte_stream(
1935
0
                unicode_character,
1936
0
                byte_stream,
1937
0
                byte_stream_size,
1938
0
                &safe_byte_stream_index,
1939
0
                error );
1940
0
      break;
1941
1942
0
    case LIBUNA_CODEPAGE_MAC_ROMANIAN:
1943
0
      result = libuna_codepage_mac_romanian_copy_to_byte_stream(
1944
0
                unicode_character,
1945
0
                byte_stream,
1946
0
                byte_stream_size,
1947
0
                &safe_byte_stream_index,
1948
0
                error );
1949
0
      break;
1950
1951
0
    case LIBUNA_CODEPAGE_MAC_RUSSIAN:
1952
0
      result = libuna_codepage_mac_russian_copy_to_byte_stream(
1953
0
                unicode_character,
1954
0
                byte_stream,
1955
0
                byte_stream_size,
1956
0
                &safe_byte_stream_index,
1957
0
                error );
1958
0
      break;
1959
1960
0
    case LIBUNA_CODEPAGE_MAC_SYMBOL:
1961
0
      result = libuna_codepage_mac_symbol_copy_to_byte_stream(
1962
0
                unicode_character,
1963
0
                byte_stream,
1964
0
                byte_stream_size,
1965
0
                &safe_byte_stream_index,
1966
0
                error );
1967
0
      break;
1968
1969
0
    case LIBUNA_CODEPAGE_MAC_THAI:
1970
0
      result = libuna_codepage_mac_thai_copy_to_byte_stream(
1971
0
                unicode_character,
1972
0
                byte_stream,
1973
0
                byte_stream_size,
1974
0
                &safe_byte_stream_index,
1975
0
                error );
1976
0
      break;
1977
1978
0
    case LIBUNA_CODEPAGE_MAC_TURKISH:
1979
0
      result = libuna_codepage_mac_turkish_copy_to_byte_stream(
1980
0
                unicode_character,
1981
0
                byte_stream,
1982
0
                byte_stream_size,
1983
0
                &safe_byte_stream_index,
1984
0
                error );
1985
0
      break;
1986
1987
0
    case LIBUNA_CODEPAGE_MAC_UKRAINIAN:
1988
0
      result = libuna_codepage_mac_ukrainian_copy_to_byte_stream(
1989
0
                unicode_character,
1990
0
                byte_stream,
1991
0
                byte_stream_size,
1992
0
                &safe_byte_stream_index,
1993
0
                error );
1994
0
      break;
1995
1996
0
    case LIBUNA_CODEPAGE_WINDOWS_874:
1997
0
      result = libuna_codepage_windows_874_copy_to_byte_stream(
1998
0
                unicode_character,
1999
0
                byte_stream,
2000
0
                byte_stream_size,
2001
0
                &safe_byte_stream_index,
2002
0
                error );
2003
0
      break;
2004
2005
0
    case LIBUNA_CODEPAGE_WINDOWS_932:
2006
0
      result = libuna_codepage_windows_932_copy_to_byte_stream(
2007
0
                unicode_character,
2008
0
                byte_stream,
2009
0
                byte_stream_size,
2010
0
                &safe_byte_stream_index,
2011
0
                error );
2012
0
      break;
2013
2014
0
    case LIBUNA_CODEPAGE_WINDOWS_936:
2015
0
      result = libuna_codepage_windows_936_copy_to_byte_stream(
2016
0
                unicode_character,
2017
0
                byte_stream,
2018
0
                byte_stream_size,
2019
0
                &safe_byte_stream_index,
2020
0
                error );
2021
0
      break;
2022
2023
0
    case LIBUNA_CODEPAGE_WINDOWS_949:
2024
0
      result = libuna_codepage_windows_949_copy_to_byte_stream(
2025
0
                unicode_character,
2026
0
                byte_stream,
2027
0
                byte_stream_size,
2028
0
                &safe_byte_stream_index,
2029
0
                error );
2030
0
      break;
2031
2032
0
    case LIBUNA_CODEPAGE_WINDOWS_950:
2033
0
      result = libuna_codepage_windows_950_copy_to_byte_stream(
2034
0
                unicode_character,
2035
0
                byte_stream,
2036
0
                byte_stream_size,
2037
0
                &safe_byte_stream_index,
2038
0
                error );
2039
0
      break;
2040
2041
0
    case LIBUNA_CODEPAGE_WINDOWS_1250:
2042
0
      result = libuna_codepage_windows_1250_copy_to_byte_stream(
2043
0
                unicode_character,
2044
0
                byte_stream,
2045
0
                byte_stream_size,
2046
0
                &safe_byte_stream_index,
2047
0
                error );
2048
0
      break;
2049
2050
0
    case LIBUNA_CODEPAGE_WINDOWS_1251:
2051
0
      result = libuna_codepage_windows_1251_copy_to_byte_stream(
2052
0
                unicode_character,
2053
0
                byte_stream,
2054
0
                byte_stream_size,
2055
0
                &safe_byte_stream_index,
2056
0
                error );
2057
0
      break;
2058
2059
0
    case LIBUNA_CODEPAGE_WINDOWS_1252:
2060
0
      result = libuna_codepage_windows_1252_copy_to_byte_stream(
2061
0
                unicode_character,
2062
0
                byte_stream,
2063
0
                byte_stream_size,
2064
0
                &safe_byte_stream_index,
2065
0
                error );
2066
0
      break;
2067
2068
0
    case LIBUNA_CODEPAGE_WINDOWS_1253:
2069
0
      result = libuna_codepage_windows_1253_copy_to_byte_stream(
2070
0
                unicode_character,
2071
0
                byte_stream,
2072
0
                byte_stream_size,
2073
0
                &safe_byte_stream_index,
2074
0
                error );
2075
0
      break;
2076
2077
0
    case LIBUNA_CODEPAGE_WINDOWS_1254:
2078
0
      result = libuna_codepage_windows_1254_copy_to_byte_stream(
2079
0
                unicode_character,
2080
0
                byte_stream,
2081
0
                byte_stream_size,
2082
0
                &safe_byte_stream_index,
2083
0
                error );
2084
0
      break;
2085
2086
0
    case LIBUNA_CODEPAGE_WINDOWS_1255:
2087
0
      result = libuna_codepage_windows_1255_copy_to_byte_stream(
2088
0
                unicode_character,
2089
0
                byte_stream,
2090
0
                byte_stream_size,
2091
0
                &safe_byte_stream_index,
2092
0
                error );
2093
0
      break;
2094
2095
0
    case LIBUNA_CODEPAGE_WINDOWS_1256:
2096
0
      result = libuna_codepage_windows_1256_copy_to_byte_stream(
2097
0
                unicode_character,
2098
0
                byte_stream,
2099
0
                byte_stream_size,
2100
0
                &safe_byte_stream_index,
2101
0
                error );
2102
0
      break;
2103
2104
0
    case LIBUNA_CODEPAGE_WINDOWS_1257:
2105
0
      result = libuna_codepage_windows_1257_copy_to_byte_stream(
2106
0
                unicode_character,
2107
0
                byte_stream,
2108
0
                byte_stream_size,
2109
0
                &safe_byte_stream_index,
2110
0
                error );
2111
0
      break;
2112
2113
0
    case LIBUNA_CODEPAGE_WINDOWS_1258:
2114
0
      result = libuna_codepage_windows_1258_copy_to_byte_stream(
2115
0
                unicode_character,
2116
0
                byte_stream,
2117
0
                byte_stream_size,
2118
0
                &safe_byte_stream_index,
2119
0
                error );
2120
0
      break;
2121
2122
0
    default:
2123
0
      libcerror_error_set(
2124
0
       error,
2125
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2126
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2127
0
      "%s: unsupported codepage: %d.",
2128
0
       function,
2129
0
             codepage );
2130
2131
0
      return( -1 );
2132
0
  }
2133
0
  if( result == -1 )
2134
0
  {
2135
0
    libcerror_error_set(
2136
0
     error,
2137
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2138
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2139
0
     "%s: unable to copy Unicode character to byte stream.",
2140
0
     function );
2141
2142
0
    return( -1 );
2143
0
  }
2144
0
  *byte_stream_index = safe_byte_stream_index;
2145
2146
0
  return( result );
2147
0
}
2148
2149
/* Determines the size of an UCS-2 character from an Unicode character
2150
 * Adds the size to the UCS-2 character size value
2151
 * Returns 1 if successful or -1 on error
2152
 */
2153
int libuna_unicode_character_size_to_ucs2(
2154
     libuna_unicode_character_t unicode_character,
2155
     size_t *ucs2_character_size,
2156
     libcerror_error_t **error )
2157
0
{
2158
0
  static char *function = "libuna_unicode_character_size_to_ucs2";
2159
2160
0
  if( ucs2_character_size == NULL )
2161
0
  {
2162
0
    libcerror_error_set(
2163
0
     error,
2164
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2165
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2166
0
     "%s: invalid UCS-2 character size.",
2167
0
     function );
2168
2169
0
    return( -1 );
2170
0
  }
2171
  /* Determine if the Unicode character is valid
2172
   * UCS-2 with surrogate pairs supports upto 0x10ffff characters
2173
   */
2174
0
  if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
2175
0
  {
2176
0
    libcerror_error_set(
2177
0
     error,
2178
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2179
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2180
0
     "%s: unsupported Unicode character.",
2181
0
     function );
2182
2183
0
    return( -1 );
2184
0
  }
2185
0
  if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
2186
0
  {
2187
0
    *ucs2_character_size += 2;
2188
0
  }
2189
0
  else
2190
0
  {
2191
0
    *ucs2_character_size += 1;
2192
0
  }
2193
0
  return( 1 );
2194
0
}
2195
2196
/* Copies an Unicode character from an UCS-2 string
2197
 * Returns 1 if successful or -1 on error
2198
 */
2199
int libuna_unicode_character_copy_from_ucs2(
2200
     libuna_unicode_character_t *unicode_character,
2201
     const libuna_utf16_character_t *ucs2_string,
2202
     size_t ucs2_string_size,
2203
     size_t *ucs2_string_index,
2204
     libcerror_error_t **error )
2205
0
{
2206
0
  static char *function                             = "libuna_unicode_character_copy_from_ucs2";
2207
0
  libuna_utf16_character_t ucs2_surrogate           = 0;
2208
0
  libuna_unicode_character_t safe_unicode_character = 0;
2209
0
  size_t safe_ucs2_string_index                     = 0;
2210
2211
0
  if( unicode_character == NULL )
2212
0
  {
2213
0
    libcerror_error_set(
2214
0
     error,
2215
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2216
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2217
0
     "%s: invalid Unicode character.",
2218
0
     function );
2219
2220
0
    return( -1 );
2221
0
  }
2222
0
  if( ucs2_string == NULL )
2223
0
  {
2224
0
    libcerror_error_set(
2225
0
     error,
2226
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2227
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2228
0
     "%s: invalid UCS-2 string.",
2229
0
     function );
2230
2231
0
    return( -1 );
2232
0
  }
2233
0
  if( ucs2_string_size > (size_t) SSIZE_MAX )
2234
0
  {
2235
0
    libcerror_error_set(
2236
0
     error,
2237
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2238
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2239
0
     "%s: invalid UCS-2 string size value exceeds maximum.",
2240
0
     function );
2241
2242
0
    return( -1 );
2243
0
  }
2244
0
  if( ucs2_string_index == NULL )
2245
0
  {
2246
0
    libcerror_error_set(
2247
0
     error,
2248
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2249
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2250
0
     "%s: invalid UCS-2 string index.",
2251
0
     function );
2252
2253
0
    return( -1 );
2254
0
  }
2255
0
  safe_ucs2_string_index = *ucs2_string_index;
2256
2257
0
  if( safe_ucs2_string_index >= ucs2_string_size )
2258
0
  {
2259
0
    libcerror_error_set(
2260
0
     error,
2261
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2262
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2263
0
     "%s: UCS-2 string too small.",
2264
0
     function );
2265
2266
0
    return( -1 );
2267
0
  }
2268
0
  safe_unicode_character  = ucs2_string[ safe_ucs2_string_index ];
2269
0
  safe_ucs2_string_index += 1;
2270
2271
  /* Determine if the UCS-2 character is within the high surrogate range
2272
   */
2273
0
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
2274
0
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
2275
0
  {
2276
0
    if( safe_ucs2_string_index >= ucs2_string_size )
2277
0
    {
2278
0
      libcerror_error_set(
2279
0
       error,
2280
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2281
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2282
0
       "%s: missing surrogate UCS-2 character bytes.",
2283
0
       function );
2284
2285
0
      return( -1 );
2286
0
    }
2287
0
    ucs2_surrogate = ucs2_string[ safe_ucs2_string_index ];
2288
2289
    /* Determine if the UCS-2 character is within the low surrogate range
2290
     */
2291
0
    if( ( ucs2_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
2292
0
     && ( ucs2_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
2293
0
    {
2294
0
      safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
2295
0
      safe_unicode_character <<= 10;
2296
0
      safe_unicode_character  += ucs2_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
2297
0
      safe_unicode_character  += 0x010000;
2298
2299
0
      safe_ucs2_string_index += 1;
2300
0
    }
2301
0
  }
2302
  /* Determine if the Unicode character is valid
2303
   * UCS-2 with surrogate pairs supports upto 0x10ffff characters
2304
   */
2305
0
  if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
2306
0
  {
2307
0
    libcerror_error_set(
2308
0
     error,
2309
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2310
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2311
0
     "%s: unsupported Unicode character.",
2312
0
     function );
2313
2314
0
    return( -1 );
2315
0
  }
2316
0
  *unicode_character = safe_unicode_character;
2317
0
  *ucs2_string_index = safe_ucs2_string_index;
2318
2319
0
  return( 1 );
2320
0
}
2321
2322
/* Copies an Unicode character into a UCS-2 string
2323
 * Returns 1 if successful or -1 on error
2324
 */
2325
int libuna_unicode_character_copy_to_ucs2(
2326
     libuna_unicode_character_t unicode_character,
2327
     libuna_utf16_character_t *ucs2_string,
2328
     size_t ucs2_string_size,
2329
     size_t *ucs2_string_index,
2330
     libcerror_error_t **error )
2331
0
{
2332
0
  static char *function         = "libuna_unicode_character_copy_to_ucs2";
2333
0
  size_t safe_ucs2_string_index = 0;
2334
2335
0
  if( ucs2_string == NULL )
2336
0
  {
2337
0
    libcerror_error_set(
2338
0
     error,
2339
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2340
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2341
0
     "%s: invalid UCS-2 string.",
2342
0
     function );
2343
2344
0
    return( -1 );
2345
0
  }
2346
0
  if( ucs2_string_size > (size_t) SSIZE_MAX )
2347
0
  {
2348
0
    libcerror_error_set(
2349
0
     error,
2350
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2351
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2352
0
     "%s: invalid UCS-2 string size value exceeds maximum.",
2353
0
     function );
2354
2355
0
    return( -1 );
2356
0
  }
2357
0
  if( ucs2_string_index == NULL )
2358
0
  {
2359
0
    libcerror_error_set(
2360
0
     error,
2361
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2362
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2363
0
     "%s: invalid UCS-2 string index.",
2364
0
     function );
2365
2366
0
    return( -1 );
2367
0
  }
2368
0
  safe_ucs2_string_index = *ucs2_string_index;
2369
2370
0
  if( safe_ucs2_string_index >= ucs2_string_size )
2371
0
  {
2372
0
    libcerror_error_set(
2373
0
     error,
2374
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2375
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2376
0
     "%s: UCS-2 string too small.",
2377
0
     function );
2378
2379
0
    return( -1 );
2380
0
  }
2381
  /* Determine if the Unicode character is valid
2382
   */
2383
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
2384
0
  {
2385
0
    libcerror_error_set(
2386
0
     error,
2387
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2388
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2389
0
     "%s: unsupported Unicode character.",
2390
0
     function );
2391
2392
0
    return( -1 );
2393
0
  }
2394
0
  if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
2395
0
  {
2396
0
    ucs2_string[ safe_ucs2_string_index++ ] = (libuna_utf16_character_t) unicode_character;
2397
0
  }
2398
0
  else
2399
0
  {
2400
0
    if( ( ucs2_string_size < 2 )
2401
0
     || ( safe_ucs2_string_index > ( ucs2_string_size - 2 ) ) )
2402
0
    {
2403
0
      libcerror_error_set(
2404
0
       error,
2405
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2406
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2407
0
       "%s: UCS-2 string too small.",
2408
0
       function );
2409
2410
0
      return( -1 );
2411
0
    }
2412
0
    unicode_character                      -= 0x010000;
2413
0
    ucs2_string[ safe_ucs2_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
2414
0
    ucs2_string[ safe_ucs2_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
2415
0
  }
2416
0
  *ucs2_string_index = safe_ucs2_string_index;
2417
2418
0
  return( 1 );
2419
0
}
2420
2421
/* Determines the size of an UCS-4 character from an Unicode character
2422
 * Adds the size to the UCS-4 character size value
2423
 * Returns 1 if successful or -1 on error
2424
 */
2425
int libuna_unicode_character_size_to_ucs4(
2426
     libuna_unicode_character_t unicode_character,
2427
     size_t *ucs4_character_size,
2428
     libcerror_error_t **error )
2429
0
{
2430
0
  static char *function = "libuna_unicode_character_size_to_ucs4";
2431
2432
0
  LIBUNA_UNREFERENCED_PARAMETER( unicode_character )
2433
2434
0
  if( ucs4_character_size == NULL )
2435
0
  {
2436
0
    libcerror_error_set(
2437
0
     error,
2438
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2439
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2440
0
     "%s: invalid UCS-4 character size.",
2441
0
     function );
2442
2443
0
    return( -1 );
2444
0
  }
2445
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
2446
0
  {
2447
0
    libcerror_error_set(
2448
0
     error,
2449
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2450
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2451
0
     "%s: unsupported Unicode character.",
2452
0
     function );
2453
2454
0
    return( -1 );
2455
0
  }
2456
0
  *ucs4_character_size += 1;
2457
2458
0
  return( 1 );
2459
0
}
2460
2461
/* Copies an Unicode character from an UCS-4 string
2462
 * Returns 1 if successful or -1 on error
2463
 */
2464
int libuna_unicode_character_copy_from_ucs4(
2465
     libuna_unicode_character_t *unicode_character,
2466
     const libuna_utf32_character_t *ucs4_string,
2467
     size_t ucs4_string_size,
2468
     size_t *ucs4_string_index,
2469
     libcerror_error_t **error )
2470
0
{
2471
0
  static char *function                             = "libuna_unicode_character_copy_from_ucs4";
2472
0
  libuna_unicode_character_t safe_unicode_character = 0;
2473
0
  size_t safe_ucs4_string_index                     = 0;
2474
2475
0
  if( unicode_character == NULL )
2476
0
  {
2477
0
    libcerror_error_set(
2478
0
     error,
2479
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2480
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2481
0
     "%s: invalid Unicode character.",
2482
0
     function );
2483
2484
0
    return( -1 );
2485
0
  }
2486
0
  if( ucs4_string == NULL )
2487
0
  {
2488
0
    libcerror_error_set(
2489
0
     error,
2490
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2491
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2492
0
     "%s: invalid UCS-4 string.",
2493
0
     function );
2494
2495
0
    return( -1 );
2496
0
  }
2497
0
  if( ucs4_string_size > (size_t) SSIZE_MAX )
2498
0
  {
2499
0
    libcerror_error_set(
2500
0
     error,
2501
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2502
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2503
0
     "%s: invalid UCS-4 string size value exceeds maximum.",
2504
0
     function );
2505
2506
0
    return( -1 );
2507
0
  }
2508
0
  if( ucs4_string_index == NULL )
2509
0
  {
2510
0
    libcerror_error_set(
2511
0
     error,
2512
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2513
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2514
0
     "%s: invalid UCS-4 string index.",
2515
0
     function );
2516
2517
0
    return( -1 );
2518
0
  }
2519
0
  safe_ucs4_string_index = *ucs4_string_index;
2520
2521
0
  if( safe_ucs4_string_index >= ucs4_string_size )
2522
0
  {
2523
0
    libcerror_error_set(
2524
0
     error,
2525
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2526
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2527
0
     "%s: UCS-4 string too small.",
2528
0
     function );
2529
2530
0
    return( -1 );
2531
0
  }
2532
0
  safe_unicode_character = ucs4_string[ safe_ucs4_string_index ];
2533
2534
  /* Determine if the Unicode character is valid
2535
   */
2536
0
  if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
2537
0
  {
2538
0
    libcerror_error_set(
2539
0
     error,
2540
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2541
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2542
0
     "%s: unsupported Unicode character.",
2543
0
     function );
2544
2545
0
    return( -1 );
2546
0
  }
2547
0
  *unicode_character = safe_unicode_character;
2548
0
  *ucs4_string_index = safe_ucs4_string_index + 1;
2549
2550
0
  return( 1 );
2551
0
}
2552
2553
/* Copies an Unicode character into a UCS-4 string
2554
 * Returns 1 if successful or -1 on error
2555
 */
2556
int libuna_unicode_character_copy_to_ucs4(
2557
     libuna_unicode_character_t unicode_character,
2558
     libuna_utf32_character_t *ucs4_string,
2559
     size_t ucs4_string_size,
2560
     size_t *ucs4_string_index,
2561
     libcerror_error_t **error )
2562
0
{
2563
0
  static char *function         = "libuna_unicode_character_copy_to_ucs4";
2564
0
  size_t safe_ucs4_string_index = 0;
2565
2566
0
  if( ucs4_string == NULL )
2567
0
  {
2568
0
    libcerror_error_set(
2569
0
     error,
2570
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2571
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2572
0
     "%s: invalid UCS-4 string.",
2573
0
     function );
2574
2575
0
    return( -1 );
2576
0
  }
2577
0
  if( ucs4_string_size > (size_t) SSIZE_MAX )
2578
0
  {
2579
0
    libcerror_error_set(
2580
0
     error,
2581
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2582
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2583
0
     "%s: invalid UCS-4 string size value exceeds maximum.",
2584
0
     function );
2585
2586
0
    return( -1 );
2587
0
  }
2588
0
  if( ucs4_string_index == NULL )
2589
0
  {
2590
0
    libcerror_error_set(
2591
0
     error,
2592
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2593
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2594
0
     "%s: invalid UCS-4 string index.",
2595
0
     function );
2596
2597
0
    return( -1 );
2598
0
  }
2599
0
  safe_ucs4_string_index = *ucs4_string_index;
2600
2601
0
  if( safe_ucs4_string_index >= ucs4_string_size )
2602
0
  {
2603
0
    libcerror_error_set(
2604
0
     error,
2605
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2606
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2607
0
     "%s: UCS-4 string too small.",
2608
0
     function );
2609
2610
0
    return( -1 );
2611
0
  }
2612
  /* Determine if the Unicode character is valid
2613
   */
2614
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
2615
0
  {
2616
0
    libcerror_error_set(
2617
0
     error,
2618
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2619
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2620
0
     "%s: unsupported Unicode character.",
2621
0
     function );
2622
2623
0
    return( -1 );
2624
0
  }
2625
0
  ucs4_string[ safe_ucs4_string_index ] = (libuna_utf32_character_t) unicode_character;
2626
2627
0
  *ucs4_string_index = safe_ucs4_string_index + 1;
2628
2629
0
  return( 1 );
2630
0
}
2631
2632
/* Determines the size of an UTF-7 stream character from an Unicode character
2633
 * Adds the size to the UTF-7 stream character size value
2634
 * Returns 1 if successful or -1 on error
2635
 */
2636
int libuna_unicode_character_size_to_utf7_stream(
2637
     libuna_unicode_character_t unicode_character,
2638
     size_t *utf7_stream_character_size,
2639
     uint32_t *utf7_stream_base64_data,
2640
     libcerror_error_t **error )
2641
0
{
2642
0
  static char *function                    = "libuna_unicode_character_size_to_utf7_stream";
2643
0
  libuna_utf16_character_t utf16_surrogate = 0;
2644
0
  size_t safe_utf7_stream_character_size   = 0;
2645
0
  uint32_t base64_triplet                  = 0;
2646
0
  uint32_t safe_utf7_stream_base64_data    = 0;
2647
0
  uint8_t base64_encode_character          = 0;
2648
0
  uint8_t byte_bit_shift                   = 0;
2649
0
  uint8_t current_byte                     = 0;
2650
0
  uint8_t number_of_bytes                  = 0;
2651
2652
0
  if( utf7_stream_character_size == NULL )
2653
0
  {
2654
0
    libcerror_error_set(
2655
0
     error,
2656
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2657
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2658
0
     "%s: invalid UTF-7 stream character size.",
2659
0
     function );
2660
2661
0
    return( -1 );
2662
0
  }
2663
0
  if( utf7_stream_base64_data == NULL )
2664
0
  {
2665
0
    libcerror_error_set(
2666
0
     error,
2667
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2668
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2669
0
     "%s: invalid UTF-7 stream base64 data.",
2670
0
     function );
2671
2672
0
    return( -1 );
2673
0
  }
2674
0
  safe_utf7_stream_character_size = *utf7_stream_character_size;
2675
0
  safe_utf7_stream_base64_data    = *utf7_stream_base64_data;
2676
2677
  /* Determine if the Unicode character is valid
2678
   */
2679
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
2680
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
2681
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
2682
0
  {
2683
0
    libcerror_error_set(
2684
0
     error,
2685
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2686
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2687
0
     "%s: unsupported Unicode character.",
2688
0
     function );
2689
2690
0
    return( -1 );
2691
0
  }
2692
  /* The + character must be escaped
2693
   */
2694
0
  if( unicode_character == (libuna_unicode_character_t) '+' )
2695
0
  {
2696
0
  }
2697
  /* Allow for the end of string character
2698
   */
2699
0
  else if( unicode_character == 0 )
2700
0
  {
2701
0
  }
2702
0
  else if( ( unicode_character >= 256 )
2703
0
        || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
2704
0
  {
2705
0
    base64_encode_character = 1;
2706
0
  }
2707
0
  if( base64_encode_character == 0 )
2708
0
  {
2709
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
2710
0
    {
2711
0
      safe_utf7_stream_base64_data = 0;
2712
0
    }
2713
0
    safe_utf7_stream_character_size += 1;
2714
2715
    /* The + character must be escaped
2716
     */
2717
0
    if( unicode_character == (libuna_unicode_character_t) '+' )
2718
0
    {
2719
0
      safe_utf7_stream_character_size += 1;
2720
0
    }
2721
0
  }
2722
0
  else
2723
0
  {
2724
    /* Escape the base64 encoded characters with a +
2725
     */
2726
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
2727
0
    {
2728
0
      safe_utf7_stream_character_size += 1;
2729
0
    }
2730
    /* Otherwise continue the previously base64 encoded characters
2731
     */
2732
0
    else
2733
0
    {
2734
0
      base64_triplet  = safe_utf7_stream_base64_data & 0x00ffffff;
2735
0
      number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
2736
0
      current_byte    = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
2737
2738
0
      if( number_of_bytes > 0 )
2739
0
      {
2740
0
        if( safe_utf7_stream_character_size < (size_t) ( number_of_bytes + 1 ) )
2741
0
        {
2742
0
          libcerror_error_set(
2743
0
           error,
2744
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
2745
0
           LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2746
0
           "%s: invalid UTF-7 stream character size value out of bounds.",
2747
0
           function );
2748
2749
0
          return( -1 );
2750
0
        }
2751
        /* Correct the size for the last partial base64 stream
2752
         */
2753
0
        safe_utf7_stream_character_size -= number_of_bytes + 1;
2754
0
      }
2755
0
      if( safe_utf7_stream_character_size < 1 )
2756
0
      {
2757
0
        libcerror_error_set(
2758
0
         error,
2759
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
2760
0
         LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2761
0
         "%s: invalid UTF-7 stream character size value out of bounds.",
2762
0
         function );
2763
2764
0
        return( -1 );
2765
0
      }
2766
      /* Correct the size for the base64 stream termination character
2767
       */
2768
0
      safe_utf7_stream_character_size -= 1;
2769
0
    }
2770
0
    safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
2771
2772
0
    if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
2773
0
    {
2774
0
      unicode_character -= 0x010000;
2775
2776
0
      utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
2777
2778
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
2779
0
      base64_triplet  += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
2780
0
      current_byte    += 1;
2781
0
      number_of_bytes += 1;
2782
2783
0
      if( number_of_bytes == 3 )
2784
0
      {
2785
0
        safe_utf7_stream_character_size += 4;
2786
0
        number_of_bytes                  = 0;
2787
0
        current_byte                     = 0;
2788
0
        base64_triplet                   = 0;
2789
0
      }
2790
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
2791
0
      base64_triplet  += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
2792
0
      current_byte    += 1;
2793
0
      number_of_bytes += 1;
2794
2795
0
      if( number_of_bytes == 3 )
2796
0
      {
2797
0
        safe_utf7_stream_character_size += 4;
2798
0
        number_of_bytes                  = 0;
2799
0
        current_byte                     = 0;
2800
0
        base64_triplet                   = 0;
2801
0
      }
2802
0
      unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
2803
0
    }
2804
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
2805
0
    base64_triplet  += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
2806
0
    current_byte    += 1;
2807
0
    number_of_bytes += 1;
2808
2809
0
    if( number_of_bytes == 3 )
2810
0
    {
2811
0
      safe_utf7_stream_character_size += 4;
2812
0
      number_of_bytes                  = 0;
2813
0
      current_byte                     = 0;
2814
0
      base64_triplet                   = 0;
2815
0
    }
2816
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
2817
0
    base64_triplet  += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
2818
0
    current_byte    += 1;
2819
0
    number_of_bytes += 1;
2820
2821
0
    if( number_of_bytes == 3 )
2822
0
    {
2823
0
      safe_utf7_stream_character_size += 4;
2824
0
      number_of_bytes                  = 0;
2825
0
      current_byte                     = 0;
2826
0
      base64_triplet                   = 0;
2827
0
    }
2828
    /* Terminate the base64 encoded characters
2829
     */
2830
0
    if( number_of_bytes > 0 )
2831
0
    {
2832
0
      safe_utf7_stream_character_size += number_of_bytes + 1;
2833
0
    }
2834
0
    safe_utf7_stream_character_size += 1;
2835
0
  }
2836
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
2837
0
  {
2838
0
    safe_utf7_stream_base64_data  = LIBUNA_UTF7_IS_BASE64_ENCODED;
2839
0
    safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
2840
0
    safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
2841
0
    safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
2842
0
  }
2843
0
  *utf7_stream_character_size = safe_utf7_stream_character_size;
2844
0
  *utf7_stream_base64_data    = safe_utf7_stream_base64_data;
2845
2846
0
  return( 1 );
2847
0
}
2848
2849
/* Copies an Unicode character from an UTF-7 stream
2850
 * The bits of the base64 data contain:
2851
 *   0 - 23 the base64 triplet
2852
 *  24 - 25 the number of bytes in the triplet
2853
 *  26 - 27 unused
2854
 *  28 - 29 the current byte
2855
 *       30 unused
2856
 *       31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
2857
 *
2858
 * Returns 1 if successful or -1 on error
2859
 */
2860
int libuna_unicode_character_copy_from_utf7_stream(
2861
     libuna_unicode_character_t *unicode_character,
2862
     const uint8_t *utf7_stream,
2863
     size_t utf7_stream_size,
2864
     size_t *utf7_stream_index,
2865
     uint32_t *utf7_stream_base64_data,
2866
     libcerror_error_t **error )
2867
0
{
2868
0
  static char *function                             = "libuna_unicode_character_copy_from_utf7_stream";
2869
0
  libuna_unicode_character_t safe_unicode_character = 0;
2870
0
  libuna_utf16_character_t utf16_surrogate          = 0;
2871
0
  size_t safe_utf7_stream_index                     = 0;
2872
0
  uint32_t base64_triplet                           = 0;
2873
0
  uint32_t safe_utf7_stream_base64_data             = 0;
2874
0
  uint8_t byte_bit_shift                            = 0;
2875
0
  uint8_t current_byte                              = 0;
2876
0
  uint8_t number_of_bytes                           = 0;
2877
0
  uint8_t padding_size                              = 0;
2878
0
  uint8_t utf7_character_value                      = 0;
2879
2880
0
  if( unicode_character == NULL )
2881
0
  {
2882
0
    libcerror_error_set(
2883
0
     error,
2884
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2885
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2886
0
     "%s: invalid Unicode character.",
2887
0
     function );
2888
2889
0
    return( -1 );
2890
0
  }
2891
0
  if( utf7_stream == NULL )
2892
0
  {
2893
0
    libcerror_error_set(
2894
0
     error,
2895
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2896
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2897
0
     "%s: invalid UTF-7 stream.",
2898
0
     function );
2899
2900
0
    return( -1 );
2901
0
  }
2902
0
  if( utf7_stream_size > (size_t) SSIZE_MAX )
2903
0
  {
2904
0
    libcerror_error_set(
2905
0
     error,
2906
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2907
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2908
0
     "%s: invalid UTF-7 stream size value exceeds maximum.",
2909
0
     function );
2910
2911
0
    return( -1 );
2912
0
  }
2913
0
  if( utf7_stream_index == NULL )
2914
0
  {
2915
0
    libcerror_error_set(
2916
0
     error,
2917
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2918
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2919
0
     "%s: invalid UTF-7 stream index.",
2920
0
     function );
2921
2922
0
    return( -1 );
2923
0
  }
2924
0
  if( utf7_stream_base64_data == NULL )
2925
0
  {
2926
0
    libcerror_error_set(
2927
0
     error,
2928
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2929
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2930
0
     "%s: invalid UTF-7 base64 data.",
2931
0
     function );
2932
2933
0
    return( -1 );
2934
0
  }
2935
0
  safe_utf7_stream_index       = *utf7_stream_index;
2936
0
  safe_utf7_stream_base64_data = *utf7_stream_base64_data;
2937
2938
0
  if( safe_utf7_stream_index >= utf7_stream_size )
2939
0
  {
2940
0
    libcerror_error_set(
2941
0
     error,
2942
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2943
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2944
0
     "%s: UTF-7 stream too small.",
2945
0
     function );
2946
2947
0
    return( -1 );
2948
0
  }
2949
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
2950
0
  {
2951
0
    base64_triplet  = safe_utf7_stream_base64_data & 0x00ffffff;
2952
0
    number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
2953
0
    current_byte    = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
2954
2955
0
    if( current_byte >= number_of_bytes )
2956
0
    {
2957
0
      if( safe_utf7_stream_index >= utf7_stream_size )
2958
0
      {
2959
0
        libcerror_error_set(
2960
0
         error,
2961
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
2962
0
         LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2963
0
         "%s: invalid UTF-7 stream character size value out of bounds.",
2964
0
         function );
2965
2966
0
        return( -1 );
2967
0
      }
2968
0
      utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
2969
2970
      /* Any character not in the modified base64 alphabet terminates the base64 encoded sequence
2971
       */
2972
0
      if( libuna_unicode_character_utf7_valid_base64_character[ utf7_character_value ] == 0 )
2973
0
      {
2974
0
        safe_utf7_stream_base64_data = 0;
2975
0
      }
2976
0
    }
2977
0
  }
2978
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
2979
0
  {
2980
0
    if( safe_utf7_stream_index >= utf7_stream_size )
2981
0
    {
2982
0
      libcerror_error_set(
2983
0
       error,
2984
0
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
2985
0
       LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2986
0
       "%s: invalid UTF-7 stream character size value out of bounds.",
2987
0
       function );
2988
2989
0
      return( -1 );
2990
0
    }
2991
0
    utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
2992
2993
    /* Determine if the character is modified base64 encoded
2994
     * or a + character
2995
     */
2996
0
    if( utf7_character_value == (uint8_t) '+' )
2997
0
    {
2998
0
      if( ( safe_utf7_stream_index + 1 ) >= utf7_stream_size )
2999
0
      {
3000
0
        libcerror_error_set(
3001
0
         error,
3002
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3003
0
         LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3004
0
         "%s: invalid UTF-7 stream character size value out of bounds.",
3005
0
         function );
3006
3007
0
        return( -1 );
3008
0
      }
3009
0
      if( utf7_stream[ safe_utf7_stream_index + 1 ] != (uint8_t) '-' )
3010
0
      {
3011
0
        safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3012
3013
0
        safe_utf7_stream_index++;
3014
0
      }
3015
0
    }
3016
    /* Allow for the end of string character
3017
     */
3018
0
    else if( utf7_character_value == 0 )
3019
0
    {
3020
0
    }
3021
0
    else if( libuna_unicode_character_utf7_valid_directly_encoded_character[ utf7_character_value ] == 0 )
3022
0
    {
3023
0
      libcerror_error_set(
3024
0
       error,
3025
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3026
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3027
0
       "%s: invalid directly encoded UTF-7 character byte: 0x%02" PRIx8 ".",
3028
0
       function,
3029
0
       utf7_character_value );
3030
3031
0
      return( -1 );
3032
0
    }
3033
0
  }
3034
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3035
0
  {
3036
0
    safe_unicode_character = utf7_stream[ safe_utf7_stream_index++ ];
3037
3038
0
    if( ( safe_unicode_character == (libuna_unicode_character_t) '+' )
3039
0
     && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3040
0
    {
3041
0
      safe_utf7_stream_index++;
3042
0
    }
3043
0
  }
3044
0
  else if( ( number_of_bytes == 0 )
3045
0
        || ( current_byte >= number_of_bytes ) )
3046
0
  {
3047
0
    if( libuna_base64_triplet_copy_from_base64_stream(
3048
0
         &base64_triplet,
3049
0
         utf7_stream,
3050
0
         utf7_stream_size - 1,
3051
0
         &safe_utf7_stream_index,
3052
0
         &padding_size,
3053
0
         LIBUNA_BASE64_VARIANT_UTF7,
3054
0
         error ) != 1 )
3055
0
    {
3056
0
      libcerror_error_set(
3057
0
       error,
3058
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
3059
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3060
0
       "%s: unable to copy base64 encoded UTF-7 characters.",
3061
0
       function );
3062
3063
0
      return( -1 );
3064
0
    }
3065
0
    if( padding_size > 2 )
3066
0
    {
3067
0
      libcerror_error_set(
3068
0
       error,
3069
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3070
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3071
0
       "%s: unsupported padding in base64 encoded UTF-7 characters.",
3072
0
       function );
3073
3074
0
      return( -1 );
3075
0
    }
3076
0
    number_of_bytes = 3 - padding_size;
3077
0
    current_byte    = 0;
3078
0
  }
3079
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3080
0
  {
3081
0
    byte_bit_shift         = 16 - ( current_byte * 8 );
3082
0
    safe_unicode_character = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3083
0
    current_byte          += 1;
3084
3085
0
    if( current_byte >= number_of_bytes )
3086
0
    {
3087
0
      if( libuna_base64_triplet_copy_from_base64_stream(
3088
0
           &base64_triplet,
3089
0
           utf7_stream,
3090
0
           utf7_stream_size - 1,
3091
0
           &safe_utf7_stream_index,
3092
0
           &padding_size,
3093
0
           LIBUNA_BASE64_VARIANT_UTF7,
3094
0
           error ) != 1 )
3095
0
      {
3096
0
        libcerror_error_set(
3097
0
         error,
3098
0
         LIBCERROR_ERROR_DOMAIN_CONVERSION,
3099
0
         LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3100
0
         "%s: unable to copy base64 encoded UTF-7 characters.",
3101
0
         function );
3102
3103
0
        return( -1 );
3104
0
      }
3105
0
      if( padding_size > 2 )
3106
0
      {
3107
0
        libcerror_error_set(
3108
0
         error,
3109
0
         LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3110
0
         LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3111
0
         "%s: unsupported padding in base64 encoded UTF-7 characters.",
3112
0
         function );
3113
3114
0
        return( -1 );
3115
0
      }
3116
0
      number_of_bytes = 3 - padding_size;
3117
0
      current_byte    = 0;
3118
0
    }
3119
0
    byte_bit_shift          = 16 - ( current_byte * 8 );
3120
0
    safe_unicode_character += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3121
0
    current_byte           += 1;
3122
3123
0
    if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3124
0
     && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
3125
0
    {
3126
0
      if( current_byte >= number_of_bytes )
3127
0
      {
3128
0
        if( libuna_base64_triplet_copy_from_base64_stream(
3129
0
             &base64_triplet,
3130
0
             utf7_stream,
3131
0
             utf7_stream_size - 1,
3132
0
             &safe_utf7_stream_index,
3133
0
             &padding_size,
3134
0
             LIBUNA_BASE64_VARIANT_UTF7,
3135
0
             error ) != 1 )
3136
0
        {
3137
0
          libcerror_error_set(
3138
0
           error,
3139
0
           LIBCERROR_ERROR_DOMAIN_CONVERSION,
3140
0
           LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3141
0
           "%s: unable to copy base64 encoded UTF-7 characters.",
3142
0
           function );
3143
3144
0
          return( -1 );
3145
0
        }
3146
0
        if( padding_size > 2 )
3147
0
        {
3148
0
          libcerror_error_set(
3149
0
           error,
3150
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3151
0
           LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3152
0
           "%s: unsupported padding in base64 encoded UTF-7 characters.",
3153
0
           function );
3154
3155
0
          return( -1 );
3156
0
        }
3157
0
        number_of_bytes = 3 - padding_size;
3158
0
        current_byte    = 0;
3159
0
      }
3160
0
      byte_bit_shift  = 16 - ( current_byte * 8 );
3161
0
      utf16_surrogate = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3162
0
      current_byte   += 1;
3163
3164
0
      if( current_byte >= number_of_bytes )
3165
0
      {
3166
0
        if( libuna_base64_triplet_copy_from_base64_stream(
3167
0
             &base64_triplet,
3168
0
             utf7_stream,
3169
0
             utf7_stream_size - 1,
3170
0
             &safe_utf7_stream_index,
3171
0
             &padding_size,
3172
0
             LIBUNA_BASE64_VARIANT_UTF7,
3173
0
             error ) != 1 )
3174
0
        {
3175
0
          libcerror_error_set(
3176
0
           error,
3177
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
3178
0
           LIBCERROR_RUNTIME_ERROR_GET_FAILED,
3179
0
           "%s: unable to retrieve base64 encoded UTF-7 characters.",
3180
0
           function );
3181
3182
0
          return( -1 );
3183
0
        }
3184
0
        if( padding_size > 2 )
3185
0
        {
3186
0
          libcerror_error_set(
3187
0
           error,
3188
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3189
0
           LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3190
0
           "%s: unsupported padding in base64 encoded UTF-7 characters.",
3191
0
           function );
3192
3193
0
          return( -1 );
3194
0
        }
3195
0
        number_of_bytes = 3 - padding_size;
3196
0
        current_byte    = 0;
3197
0
      }
3198
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
3199
0
      utf16_surrogate += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3200
0
      current_byte    += 1;
3201
3202
      /* Determine if the UTF-16 character is within the low surrogate range
3203
       */
3204
0
      if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
3205
0
       && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3206
0
      {
3207
0
        safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
3208
0
        safe_unicode_character <<= 10;
3209
0
        safe_unicode_character  += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
3210
0
        safe_unicode_character  += 0x010000;
3211
0
      }
3212
0
      else
3213
0
      {
3214
0
        libcerror_error_set(
3215
0
         error,
3216
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3217
0
         LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
3218
0
         "%s: unsupported low surrogate UTF-16 character.",
3219
0
         function );
3220
3221
0
        return( -1 );
3222
0
      }
3223
0
    }
3224
0
    if( safe_utf7_stream_index >= utf7_stream_size )
3225
0
    {
3226
0
      libcerror_error_set(
3227
0
       error,
3228
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3229
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3230
0
       "%s: UTF-7 stream too small.",
3231
0
       function );
3232
3233
0
      return( -1 );
3234
0
    }
3235
0
    if( ( current_byte >= number_of_bytes )
3236
0
     && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3237
0
    {
3238
0
      safe_utf7_stream_base64_data = 0;
3239
3240
0
      safe_utf7_stream_index++;
3241
0
    }
3242
0
  }
3243
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3244
0
  {
3245
0
    safe_utf7_stream_base64_data  = LIBUNA_UTF7_IS_BASE64_ENCODED;
3246
0
    safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3247
0
    safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3248
0
    safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3249
0
  }
3250
0
  *unicode_character       = safe_unicode_character;
3251
0
  *utf7_stream_index       = safe_utf7_stream_index;
3252
0
  *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3253
3254
0
  return( 1 );
3255
0
}
3256
3257
/* Copies an Unicode character into a UTF-7 stream
3258
 * The bits of the base64 data contain:
3259
 *   0 - 23 the base64 triplet
3260
 *  24 - 25 the number of bytes in the triplet
3261
 *  26 - 27 unused
3262
 *  28 - 29 the current byte
3263
 *       30 unused
3264
 *       31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
3265
 *
3266
 * Returns 1 if successful or -1 on error
3267
 */
3268
int libuna_unicode_character_copy_to_utf7_stream(
3269
     libuna_unicode_character_t unicode_character,
3270
     uint8_t *utf7_stream,
3271
     size_t utf7_stream_size,
3272
     size_t *utf7_stream_index,
3273
     uint32_t *utf7_stream_base64_data,
3274
     libcerror_error_t **error )
3275
0
{
3276
0
  static char *function                    = "libuna_unicode_character_copy_to_utf7_stream";
3277
0
  libuna_utf16_character_t utf16_surrogate = 0;
3278
0
  size_t safe_utf7_stream_index            = 0;
3279
0
  uint32_t base64_triplet                  = 0;
3280
0
  uint32_t safe_utf7_stream_base64_data    = 0;
3281
0
  uint8_t base64_encode_character          = 0;
3282
0
  uint8_t byte_bit_shift                   = 0;
3283
0
  uint8_t current_byte                     = 0;
3284
0
  uint8_t number_of_bytes                  = 0;
3285
3286
0
  if( utf7_stream == NULL )
3287
0
  {
3288
0
    libcerror_error_set(
3289
0
     error,
3290
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3291
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3292
0
     "%s: invalid UTF-7 stream.",
3293
0
     function );
3294
3295
0
    return( -1 );
3296
0
  }
3297
0
  if( utf7_stream_size > (size_t) SSIZE_MAX )
3298
0
  {
3299
0
    libcerror_error_set(
3300
0
     error,
3301
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3302
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3303
0
     "%s: invalid UTF-7 stream size value exceeds maximum.",
3304
0
     function );
3305
3306
0
    return( -1 );
3307
0
  }
3308
0
  if( utf7_stream_index == NULL )
3309
0
  {
3310
0
    libcerror_error_set(
3311
0
     error,
3312
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3313
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3314
0
     "%s: invalid UTF-7 stream index.",
3315
0
     function );
3316
3317
0
    return( -1 );
3318
0
  }
3319
0
  if( utf7_stream_base64_data == NULL )
3320
0
  {
3321
0
    libcerror_error_set(
3322
0
     error,
3323
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3324
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3325
0
     "%s: invalid UTF-7 stream base64 data.",
3326
0
     function );
3327
3328
0
    return( -1 );
3329
0
  }
3330
0
  safe_utf7_stream_index       = *utf7_stream_index;
3331
0
  safe_utf7_stream_base64_data = *utf7_stream_base64_data;
3332
3333
  /* Determine if the Unicode character is valid
3334
   */
3335
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3336
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3337
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
3338
0
  {
3339
0
    libcerror_error_set(
3340
0
     error,
3341
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
3342
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
3343
0
     "%s: unsupported Unicode character.",
3344
0
     function );
3345
3346
0
    return( -1 );
3347
0
  }
3348
  /* A-Z is not a continous range on an EBCDIC based system
3349
   * it consists of the ranges: A-I, J-R, S-Z
3350
   */
3351
0
  if( ( unicode_character >= 0x41 )
3352
0
   && ( unicode_character <= 0x49 ) )
3353
0
  {
3354
0
    unicode_character = ( unicode_character - 0x41 ) + (libuna_unicode_character_t) 'A';
3355
0
  }
3356
0
  else if( ( unicode_character >= 0x4a )
3357
0
        && ( unicode_character <= 0x52 ) )
3358
0
  {
3359
0
    unicode_character = ( unicode_character - 0x4a ) + (libuna_unicode_character_t) 'J';
3360
0
  }
3361
0
  else if( ( unicode_character >= 0x53 )
3362
0
        && ( unicode_character <= 0x5a ) )
3363
0
  {
3364
0
    unicode_character = ( unicode_character - 0x53 ) + (libuna_unicode_character_t) 'S';
3365
0
  }
3366
  /* a-z is not a continous range on an EBCDIC based system
3367
   * it consists of the ranges: a-i, j-r, s-z
3368
   */
3369
0
  else if( ( unicode_character >= 0x61 )
3370
0
        && ( unicode_character <= 0x69 ) )
3371
0
  {
3372
0
    unicode_character = ( unicode_character - 0x61 ) + (libuna_unicode_character_t) 'a';
3373
0
  }
3374
0
  else if( ( unicode_character >= 0x6a )
3375
0
        && ( unicode_character <= 0x72 ) )
3376
0
  {
3377
0
    unicode_character = ( unicode_character - 0x6a ) + (libuna_unicode_character_t) 'j';
3378
0
  }
3379
0
  else if( ( unicode_character >= 0x73 )
3380
0
        && ( unicode_character <= 0x7a ) )
3381
0
  {
3382
0
    unicode_character = ( unicode_character - 0x73 ) + (libuna_unicode_character_t) 's';
3383
0
  }
3384
  /* 0-9
3385
   */
3386
0
  else if( ( unicode_character >= 0x30 )
3387
0
        && ( unicode_character <= 0x39 ) )
3388
0
  {
3389
0
    unicode_character = ( unicode_character - 0x30 ) + (libuna_unicode_character_t) '0';
3390
0
  }
3391
  /* The + character must be escaped
3392
   */
3393
0
  else if( unicode_character == (libuna_unicode_character_t) '+' )
3394
0
  {
3395
0
  }
3396
  /* Allow for the end of string character
3397
   */
3398
0
  else if( unicode_character == 0 )
3399
0
  {
3400
0
  }
3401
0
  else if( ( unicode_character >= 256 )
3402
0
        || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
3403
0
  {
3404
0
    base64_encode_character = 1;
3405
0
  }
3406
0
  if( base64_encode_character == 0 )
3407
0
  {
3408
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3409
0
    {
3410
0
      safe_utf7_stream_base64_data = 0;
3411
0
    }
3412
0
    if( safe_utf7_stream_index >= utf7_stream_size )
3413
0
    {
3414
0
      libcerror_error_set(
3415
0
       error,
3416
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3417
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3418
0
       "%s: UTF-7 stream too small.",
3419
0
       function );
3420
3421
0
      return( -1 );
3422
0
    }
3423
0
    utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) unicode_character;
3424
3425
    /* The + character must be escaped
3426
     */
3427
0
    if( unicode_character == (libuna_unicode_character_t) '+' )
3428
0
    {
3429
0
      if( safe_utf7_stream_index >= utf7_stream_size )
3430
0
      {
3431
0
        libcerror_error_set(
3432
0
         error,
3433
0
         LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3434
0
         LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3435
0
         "%s: UTF-7 stream too small.",
3436
0
         function );
3437
3438
0
        return( -1 );
3439
0
      }
3440
0
      utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3441
0
    }
3442
0
  }
3443
0
  else
3444
0
  {
3445
    /* Escape the base64 encoded chracters with a +
3446
     */
3447
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3448
0
    {
3449
0
      if( safe_utf7_stream_index >= utf7_stream_size )
3450
0
      {
3451
0
        libcerror_error_set(
3452
0
         error,
3453
0
         LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3454
0
         LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3455
0
         "%s: UTF-7 stream too small.",
3456
0
         function );
3457
3458
0
        return( -1 );
3459
0
      }
3460
0
      utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '+';
3461
0
    }
3462
    /* Otherwise continue the previously base64 encoded characters
3463
     */
3464
0
    else
3465
0
    {
3466
0
      base64_triplet  = safe_utf7_stream_base64_data & 0x00ffffff;
3467
0
      number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
3468
0
      current_byte    = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
3469
3470
0
      if( number_of_bytes > 0 )
3471
0
      {
3472
        /* Correct the index for the last partial base64 stream
3473
         */
3474
0
        safe_utf7_stream_index -= number_of_bytes + 1;
3475
0
      }
3476
      /* Correct the index for the base64 stream termination character
3477
       */
3478
0
      safe_utf7_stream_index -= 1;
3479
0
    }
3480
0
    safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3481
3482
0
    if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
3483
0
    {
3484
0
      unicode_character -= 0x010000;
3485
3486
0
      utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 )
3487
0
                      + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
3488
3489
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
3490
0
      base64_triplet  += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
3491
0
      current_byte    += 1;
3492
0
      number_of_bytes += 1;
3493
3494
0
      if( number_of_bytes == 3 )
3495
0
      {
3496
0
        if( libuna_base64_triplet_copy_to_base64_stream(
3497
0
             base64_triplet,
3498
0
             utf7_stream,
3499
0
             utf7_stream_size,
3500
0
             &safe_utf7_stream_index,
3501
0
             0,
3502
0
             LIBUNA_BASE64_VARIANT_UTF7,
3503
0
             error ) != 1 )
3504
0
        {
3505
0
          libcerror_error_set(
3506
0
           error,
3507
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
3508
0
           LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3509
0
           "%s: unable to set base64 encoded UTF-7 characters.",
3510
0
           function );
3511
3512
0
          return( -1 );
3513
0
        }
3514
0
        number_of_bytes = 0;
3515
0
        current_byte    = 0;
3516
0
        base64_triplet  = 0;
3517
0
      }
3518
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
3519
0
      base64_triplet  += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
3520
0
      current_byte    += 1;
3521
0
      number_of_bytes += 1;
3522
3523
0
      if( number_of_bytes == 3 )
3524
0
      {
3525
0
        if( libuna_base64_triplet_copy_to_base64_stream(
3526
0
             base64_triplet,
3527
0
             utf7_stream,
3528
0
             utf7_stream_size,
3529
0
             &safe_utf7_stream_index,
3530
0
             0,
3531
0
             LIBUNA_BASE64_VARIANT_UTF7,
3532
0
             error ) != 1 )
3533
0
        {
3534
0
          libcerror_error_set(
3535
0
           error,
3536
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
3537
0
           LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3538
0
           "%s: unable to set base64 encoded UTF-7 characters.",
3539
0
           function );
3540
3541
0
          return( -1 );
3542
0
        }
3543
0
        number_of_bytes = 0;
3544
0
        current_byte    = 0;
3545
0
        base64_triplet  = 0;
3546
0
      }
3547
0
      unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff )
3548
0
                        + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
3549
0
    }
3550
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
3551
0
    base64_triplet  += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
3552
0
    current_byte    += 1;
3553
0
    number_of_bytes += 1;
3554
3555
0
    if( number_of_bytes == 3 )
3556
0
    {
3557
0
      if( libuna_base64_triplet_copy_to_base64_stream(
3558
0
           base64_triplet,
3559
0
           utf7_stream,
3560
0
           utf7_stream_size,
3561
0
           &safe_utf7_stream_index,
3562
0
           0,
3563
0
           LIBUNA_BASE64_VARIANT_UTF7,
3564
0
           error ) != 1 )
3565
0
      {
3566
0
        libcerror_error_set(
3567
0
         error,
3568
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3569
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3570
0
         "%s: unable to set base64 encoded UTF-7 characters.",
3571
0
         function );
3572
3573
0
        return( -1 );
3574
0
      }
3575
0
      number_of_bytes = 0;
3576
0
      current_byte    = 0;
3577
0
      base64_triplet  = 0;
3578
0
    }
3579
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
3580
0
    base64_triplet  += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
3581
0
    current_byte    += 1;
3582
0
    number_of_bytes += 1;
3583
3584
0
    if( number_of_bytes == 3 )
3585
0
    {
3586
0
      if( libuna_base64_triplet_copy_to_base64_stream(
3587
0
           base64_triplet,
3588
0
           utf7_stream,
3589
0
           utf7_stream_size,
3590
0
           &safe_utf7_stream_index,
3591
0
           0,
3592
0
           LIBUNA_BASE64_VARIANT_UTF7,
3593
0
           error ) != 1 )
3594
0
      {
3595
0
        libcerror_error_set(
3596
0
         error,
3597
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3598
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3599
0
         "%s: unable to set base64 encoded UTF-7 characters.",
3600
0
         function );
3601
3602
0
        return( -1 );
3603
0
      }
3604
0
      number_of_bytes = 0;
3605
0
      current_byte    = 0;
3606
0
      base64_triplet  = 0;
3607
0
    }
3608
    /* Terminate the base64 encoded characters
3609
     */
3610
0
    if( number_of_bytes > 0 )
3611
0
    {
3612
0
      if( libuna_base64_triplet_copy_to_base64_stream(
3613
0
           base64_triplet,
3614
0
           utf7_stream,
3615
0
           utf7_stream_size,
3616
0
           &safe_utf7_stream_index,
3617
0
           3 - number_of_bytes,
3618
0
           LIBUNA_BASE64_VARIANT_UTF7,
3619
0
           error ) != 1 )
3620
0
      {
3621
0
        libcerror_error_set(
3622
0
         error,
3623
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3624
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3625
0
         "%s: unable to set base64 encoded UTF-7 characters.",
3626
0
         function );
3627
3628
0
        return( -1 );
3629
0
      }
3630
0
    }
3631
0
    if( safe_utf7_stream_index >= utf7_stream_size )
3632
0
    {
3633
0
      libcerror_error_set(
3634
0
       error,
3635
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3636
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3637
0
       "%s: UTF-7 stream too small.",
3638
0
       function );
3639
3640
0
      return( -1 );
3641
0
    }
3642
0
    utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3643
0
  }
3644
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3645
0
  {
3646
0
    safe_utf7_stream_base64_data  = LIBUNA_UTF7_IS_BASE64_ENCODED;
3647
0
    safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3648
0
    safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3649
0
    safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3650
0
  }
3651
0
  *utf7_stream_index       = safe_utf7_stream_index;
3652
0
  *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3653
3654
0
  return( 1 );
3655
0
}
3656
3657
/* Determines the size of an UTF-8 character from an Unicode character
3658
 * This function supports upto U+10FFFF (4 byte UTF-8 characters)
3659
 * Adds the size to the UTF-8 character size value
3660
 * Returns 1 if successful or -1 on error
3661
 */
3662
int libuna_unicode_character_size_to_utf8(
3663
     libuna_unicode_character_t unicode_character,
3664
     size_t *utf8_character_size,
3665
     libcerror_error_t **error )
3666
3.94M
{
3667
3.94M
  static char *function           = "libuna_unicode_character_size_to_utf8";
3668
3.94M
  size_t safe_utf8_character_size = 0;
3669
3670
3.94M
  if( utf8_character_size == NULL )
3671
0
  {
3672
0
    libcerror_error_set(
3673
0
     error,
3674
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3675
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3676
0
     "%s: invalid UTF-8 character size.",
3677
0
     function );
3678
3679
0
    return( -1 );
3680
0
  }
3681
  /* Determine if the Unicode character is valid
3682
   */
3683
3.94M
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3684
3.94M
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3685
3.94M
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
3686
0
  {
3687
0
    libcerror_error_set(
3688
0
     error,
3689
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
3690
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
3691
0
     "%s: unsupported Unicode character.",
3692
0
     function );
3693
3694
0
    return( -1 );
3695
0
  }
3696
  /* RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
3697
   * while its predecessor RFC 2279 allowed up to 6 bytes
3698
   */
3699
3.94M
  if( unicode_character < 0x00000080UL )
3700
940k
  {
3701
940k
    safe_utf8_character_size += 1;
3702
940k
  }
3703
3.00M
  else if( unicode_character < 0x00000800UL )
3704
192k
  {
3705
192k
    safe_utf8_character_size += 2;
3706
192k
  }
3707
2.80M
  else if( unicode_character < 0x00010000UL )
3708
2.79M
  {
3709
2.79M
    safe_utf8_character_size += 3;
3710
2.79M
  }
3711
18.4k
  else
3712
18.4k
  {
3713
18.4k
    safe_utf8_character_size += 4;
3714
18.4k
  }
3715
3.94M
  *utf8_character_size += safe_utf8_character_size;
3716
3717
3.94M
  return( 1 );
3718
3.94M
}
3719
3720
/* Copies an Unicode character from an UTF-8 string
3721
 * This function supports upto U+10FFFF (4 byte UTF-8 characters)
3722
 * Returns 1 if successful or -1 on error
3723
 */
3724
int libuna_unicode_character_copy_from_utf8(
3725
     libuna_unicode_character_t *unicode_character,
3726
     const libuna_utf8_character_t *utf8_string,
3727
     size_t utf8_string_size,
3728
     size_t *utf8_string_index,
3729
     libcerror_error_t **error )
3730
288k
{
3731
288k
  static char *function                             = "libuna_unicode_character_copy_from_utf8";
3732
288k
  libuna_unicode_character_t safe_unicode_character = 0;
3733
288k
  size_t safe_utf8_string_index                     = 0;
3734
288k
  uint8_t byte_value1                               = 0;
3735
288k
  uint8_t byte_value2                               = 0;
3736
288k
  uint8_t byte_value3                               = 0;
3737
288k
  uint8_t byte_value4                               = 0;
3738
288k
  uint8_t utf8_character_additional_bytes           = 0;
3739
288k
  int result                                        = 0;
3740
3741
288k
  if( unicode_character == NULL )
3742
0
  {
3743
0
    libcerror_error_set(
3744
0
     error,
3745
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3746
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3747
0
     "%s: invalid Unicode character.",
3748
0
     function );
3749
3750
0
    return( -1 );
3751
0
  }
3752
288k
  if( utf8_string == NULL )
3753
0
  {
3754
0
    libcerror_error_set(
3755
0
     error,
3756
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3757
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3758
0
     "%s: invalid UTF-8 string.",
3759
0
     function );
3760
3761
0
    return( -1 );
3762
0
  }
3763
288k
  if( utf8_string_size > (size_t) SSIZE_MAX )
3764
0
  {
3765
0
    libcerror_error_set(
3766
0
     error,
3767
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3768
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3769
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
3770
0
     function );
3771
3772
0
    return( -1 );
3773
0
  }
3774
288k
  if( utf8_string_index == NULL )
3775
0
  {
3776
0
    libcerror_error_set(
3777
0
     error,
3778
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3779
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3780
0
     "%s: invalid UTF-8 string index.",
3781
0
     function );
3782
3783
0
    return( -1 );
3784
0
  }
3785
288k
  safe_utf8_string_index = *utf8_string_index;
3786
3787
288k
  if( safe_utf8_string_index >= utf8_string_size )
3788
0
  {
3789
0
    libcerror_error_set(
3790
0
     error,
3791
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3792
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3793
0
     "%s: UTF-8 string too small.",
3794
0
     function );
3795
3796
0
    return( -1 );
3797
0
  }
3798
  /* Determine the number of additional bytes of the UTF-8 character
3799
   */
3800
288k
  byte_value1 = utf8_string[ safe_utf8_string_index ];
3801
3802
  /* Determine the UTF-8 character and make sure it is valid
3803
   * RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
3804
   * while its predecessor RFC 2279 allowed up to 6 bytes
3805
   */
3806
288k
  if( byte_value1 > 0xf4 )
3807
683
  {
3808
683
    libcerror_error_set(
3809
683
     error,
3810
683
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3811
683
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3812
683
     "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
3813
683
     function,
3814
683
     byte_value1 );
3815
3816
683
    return( -1 );
3817
683
  }
3818
287k
  if( byte_value1 < 0xc0 )
3819
266k
  {
3820
266k
    utf8_character_additional_bytes = 0;
3821
266k
  }
3822
21.2k
  else if( byte_value1 < 0xe0 )
3823
4.79k
  {
3824
4.79k
    utf8_character_additional_bytes = 1;
3825
4.79k
  }
3826
16.4k
  else if( byte_value1 < 0xf0 )
3827
11.7k
  {
3828
11.7k
    utf8_character_additional_bytes = 2;
3829
11.7k
  }
3830
4.74k
  else
3831
4.74k
  {
3832
4.74k
    utf8_character_additional_bytes = 3;
3833
4.74k
  }
3834
287k
  if( ( ( (size_t) utf8_character_additional_bytes + 1 ) > utf8_string_size )
3835
287k
   || ( safe_utf8_string_index > ( utf8_string_size - ( utf8_character_additional_bytes + 1 ) ) ) )
3836
151
  {
3837
151
    libcerror_error_set(
3838
151
     error,
3839
151
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3840
151
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3841
151
     "%s: missing UTF-8 character bytes.",
3842
151
     function );
3843
3844
151
    return( -1 );
3845
151
  }
3846
287k
  safe_unicode_character = byte_value1;
3847
3848
287k
  if( utf8_character_additional_bytes == 0 )
3849
266k
  {
3850
266k
    if( byte_value1 >= 0x80 )
3851
597
    {
3852
597
      libcerror_error_set(
3853
597
       error,
3854
597
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3855
597
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3856
597
       "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
3857
597
       function,
3858
597
       byte_value1 );
3859
3860
597
      return( -1 );
3861
597
    }
3862
266k
  }
3863
287k
  if( utf8_character_additional_bytes >= 1 )
3864
21.1k
  {
3865
21.1k
    byte_value2 = utf8_string[ safe_utf8_string_index + 1 ];
3866
3867
21.1k
    if( ( byte_value2 < 0x80 )
3868
21.1k
     || ( byte_value2 > 0xbf ) )
3869
657
    {
3870
657
      libcerror_error_set(
3871
657
       error,
3872
657
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3873
657
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3874
657
       "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
3875
657
       function,
3876
657
       byte_value2 );
3877
3878
657
      return( -1 );
3879
657
    }
3880
20.4k
    result = 1;
3881
3882
20.4k
    switch( byte_value1 )
3883
20.4k
    {
3884
2.27k
      case 0xe0:
3885
2.27k
        if( ( byte_value2 < 0xa0 )
3886
2.27k
         || ( byte_value2 > 0xbf ) )
3887
85
        {
3888
85
          result = 0;
3889
85
        }
3890
2.27k
        break;
3891
3892
2.43k
      case 0xed:
3893
2.43k
        if( ( byte_value2 < 0x80 )
3894
2.43k
         || ( byte_value2 > 0x9f ) )
3895
56
        {
3896
56
          result = 0;
3897
56
        }
3898
2.43k
        break;
3899
3900
2.48k
      case 0xf0:
3901
2.48k
        if( ( byte_value2 < 0x90 )
3902
2.48k
         || ( byte_value2 > 0xbf ) )
3903
84
        {
3904
84
          result = 0;
3905
84
        }
3906
2.48k
        break;
3907
3908
1.09k
      case 0xf4:
3909
1.09k
        if( ( byte_value2 < 0x80 )
3910
1.09k
         || ( byte_value2 > 0xbf ) )
3911
0
        {
3912
0
          result = 0;
3913
0
        }
3914
1.09k
        break;
3915
3916
12.2k
      default:
3917
12.2k
        break;
3918
20.4k
    }
3919
20.4k
    if( result == 0 )
3920
225
    {
3921
225
      libcerror_error_set(
3922
225
       error,
3923
225
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3924
225
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3925
225
       "%s: invalid 1st and 2nd UTF-8 character byte pair: 0x%02" PRIx8 " 0x%02" PRIx8 ".",
3926
225
       function,
3927
225
       byte_value1,
3928
225
       byte_value2 );
3929
3930
225
      return( -1 );
3931
225
    }
3932
20.2k
    safe_unicode_character <<= 6;
3933
20.2k
    safe_unicode_character += byte_value2;
3934
3935
20.2k
    if( utf8_character_additional_bytes == 1 )
3936
4.49k
    {
3937
4.49k
      safe_unicode_character -= 0x03080;
3938
4.49k
    }
3939
20.2k
  }
3940
286k
  if( utf8_character_additional_bytes >= 2 )
3941
15.7k
  {
3942
15.7k
    byte_value3 = utf8_string[ safe_utf8_string_index + 2 ];
3943
3944
15.7k
    if( ( byte_value3 < 0x80 )
3945
15.7k
     || ( byte_value3 > 0xbf ) )
3946
304
    {
3947
304
      libcerror_error_set(
3948
304
       error,
3949
304
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3950
304
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3951
304
       "%s: invalid 3rd UTF-8 character byte: 0x%02" PRIx8 ".",
3952
304
       function,
3953
304
       byte_value3 );
3954
3955
304
      return( -1 );
3956
304
    }
3957
15.4k
    result = 1;
3958
3959
15.4k
    switch( byte_value2 )
3960
15.4k
    {
3961
0
      case 0xe0:
3962
0
        if( ( byte_value2 < 0xa0 )
3963
0
         || ( byte_value2 > 0xbf ) )
3964
0
        {
3965
0
          result = 0;
3966
0
        }
3967
0
        break;
3968
3969
0
      case 0xed:
3970
0
        if( ( byte_value2 < 0x80 )
3971
0
         || ( byte_value2 > 0x9f ) )
3972
0
        {
3973
0
          result = 0;
3974
0
        }
3975
0
        break;
3976
3977
15.4k
      default:
3978
15.4k
        break;
3979
15.4k
    }
3980
15.4k
    if( result == 0 )
3981
0
    {
3982
0
      libcerror_error_set(
3983
0
       error,
3984
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3985
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3986
0
       "%s: invalid 2nd and 3rd UTF-8 character byte pair: 0x%02" PRIx8 " 0x%02" PRIx8 ".",
3987
0
       function,
3988
0
       byte_value2,
3989
0
       byte_value3 );
3990
3991
0
      return( -1 );
3992
0
    }
3993
15.4k
    safe_unicode_character <<= 6;
3994
15.4k
    safe_unicode_character += byte_value3;
3995
3996
15.4k
    if( utf8_character_additional_bytes == 2 )
3997
11.0k
    {
3998
11.0k
      safe_unicode_character -= 0x0e2080;
3999
11.0k
    }
4000
15.4k
  }
4001
285k
  if( utf8_character_additional_bytes >= 3 )
4002
4.40k
  {
4003
4.40k
    byte_value4 = utf8_string[ safe_utf8_string_index + 3 ];
4004
4005
4.40k
    if( ( byte_value4 < 0x80 )
4006
4.40k
     || ( byte_value4 > 0xbf ) )
4007
151
    {
4008
151
      libcerror_error_set(
4009
151
       error,
4010
151
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4011
151
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4012
151
       "%s: invalid 4th UTF-8 character byte: 0x%02" PRIx8 ".",
4013
151
       function,
4014
151
       byte_value4 );
4015
4016
151
      return( -1 );
4017
151
    }
4018
4.25k
    safe_unicode_character <<= 6;
4019
4.25k
    safe_unicode_character += byte_value4;
4020
4021
4.25k
    if( utf8_character_additional_bytes == 3 )
4022
4.25k
    {
4023
4.25k
      safe_unicode_character -= 0x03c82080;
4024
4.25k
    }
4025
4.25k
  }
4026
  /* Determine if the Unicode character is valid
4027
   */
4028
285k
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4029
285k
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4030
285k
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4031
47
  {
4032
47
    libcerror_error_set(
4033
47
     error,
4034
47
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4035
47
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4036
47
     "%s: unsupported Unicode character.",
4037
47
     function );
4038
4039
47
    return( -1 );
4040
47
  }
4041
285k
  *unicode_character = safe_unicode_character;
4042
285k
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4043
4044
285k
  return( 1 );
4045
285k
}
4046
4047
/* Copies an Unicode character into a UTF-8 string
4048
 * This function supports upto U+10FFFF (4 byte UTF-8 characters)
4049
 * Returns 1 if successful or -1 on error
4050
 */
4051
int libuna_unicode_character_copy_to_utf8(
4052
     libuna_unicode_character_t unicode_character,
4053
     libuna_utf8_character_t *utf8_string,
4054
     size_t utf8_string_size,
4055
     size_t *utf8_string_index,
4056
     libcerror_error_t **error )
4057
3.34M
{
4058
3.34M
  static char *function                   = "libuna_unicode_character_copy_to_utf8";
4059
3.34M
  size_t safe_utf8_string_index           = 0;
4060
3.34M
  size_t utf8_character_iterator          = 0;
4061
3.34M
  uint8_t utf8_character_additional_bytes = 0;
4062
3.34M
  uint8_t utf8_first_character_mark       = 0;
4063
4064
3.34M
  if( utf8_string == NULL )
4065
0
  {
4066
0
    libcerror_error_set(
4067
0
     error,
4068
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4069
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4070
0
     "%s: invalid UTF-8 string.",
4071
0
     function );
4072
4073
0
    return( -1 );
4074
0
  }
4075
3.34M
  if( utf8_string_size > (size_t) SSIZE_MAX )
4076
0
  {
4077
0
    libcerror_error_set(
4078
0
     error,
4079
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4080
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4081
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
4082
0
     function );
4083
4084
0
    return( -1 );
4085
0
  }
4086
3.34M
  if( utf8_string_index == NULL )
4087
0
  {
4088
0
    libcerror_error_set(
4089
0
     error,
4090
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4091
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4092
0
     "%s: invalid UTF-8 string index.",
4093
0
     function );
4094
4095
0
    return( -1 );
4096
0
  }
4097
3.34M
  safe_utf8_string_index = *utf8_string_index;
4098
4099
3.34M
  if( safe_utf8_string_index >= utf8_string_size )
4100
301
  {
4101
301
    libcerror_error_set(
4102
301
     error,
4103
301
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4104
301
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4105
301
     "%s: UTF-8 string too small.",
4106
301
     function );
4107
4108
301
    return( -1 );
4109
301
  }
4110
  /* Determine if the Unicode character is valid
4111
   */
4112
3.34M
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4113
3.34M
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4114
3.34M
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4115
0
  {
4116
0
    libcerror_error_set(
4117
0
     error,
4118
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4119
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4120
0
     "%s: unsupported Unicode character.",
4121
0
     function );
4122
4123
0
    return( -1 );
4124
0
  }
4125
  /* Determine how many UTF-8 character bytes are required
4126
   */
4127
3.34M
  if( unicode_character < 0x080 )
4128
858k
  {
4129
858k
    utf8_character_additional_bytes = 0;
4130
858k
    utf8_first_character_mark       = 0;
4131
858k
  }
4132
2.48M
  else if( unicode_character < 0x0800 )
4133
99.4k
  {
4134
99.4k
    utf8_character_additional_bytes = 1;
4135
99.4k
    utf8_first_character_mark       = 0x0c0;
4136
99.4k
  }
4137
2.38M
  else if( unicode_character < 0x010000 )
4138
2.38M
  {
4139
2.38M
    utf8_character_additional_bytes = 2;
4140
2.38M
    utf8_first_character_mark       = 0x0e0;
4141
2.38M
  }
4142
3.43k
  else
4143
3.43k
  {
4144
3.43k
    utf8_character_additional_bytes = 3;
4145
3.43k
    utf8_first_character_mark       = 0x0f0;
4146
3.43k
  }
4147
  /* Convert Unicode character into UTF-8 character bytes
4148
   */
4149
3.34M
  if( ( utf8_character_additional_bytes > utf8_string_size )
4150
3.34M
   || ( safe_utf8_string_index >= ( utf8_string_size - utf8_character_additional_bytes ) ) )
4151
386
  {
4152
386
    libcerror_error_set(
4153
386
     error,
4154
386
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4155
386
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4156
386
     "%s: UTF-8 string too small.",
4157
386
     function );
4158
4159
386
    return( -1 );
4160
386
  }
4161
3.34M
  for( utf8_character_iterator = safe_utf8_string_index + utf8_character_additional_bytes;
4162
8.22M
       utf8_character_iterator > safe_utf8_string_index;
4163
4.87M
       utf8_character_iterator-- )
4164
4.87M
  {
4165
4.87M
    utf8_string[ utf8_character_iterator ] = (libuna_utf8_character_t) ( ( unicode_character & 0x0bf ) | 0x080 );
4166
4167
4.87M
    unicode_character >>= 6;
4168
4.87M
  }
4169
3.34M
  utf8_string[ safe_utf8_string_index ] = (libuna_utf8_character_t) ( unicode_character | utf8_first_character_mark );
4170
4171
3.34M
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4172
4173
3.34M
  return( 1 );
4174
3.34M
}
4175
4176
/* Determines the size of an UTF-8 character from an Unicode character
4177
 * This function supports upto U+7FFFFFF (6 byte UTF-8 characters)
4178
 * Adds the size to the UTF-8 character size value
4179
 * Returns 1 if successful or -1 on error
4180
 */
4181
int libuna_unicode_character_size_to_utf8_rfc2279(
4182
     libuna_unicode_character_t unicode_character,
4183
     size_t *utf8_character_size,
4184
     libcerror_error_t **error )
4185
0
{
4186
0
  static char *function           = "libuna_unicode_character_size_to_utf8_rfc2279";
4187
0
  size_t safe_utf8_character_size = 0;
4188
4189
0
  if( utf8_character_size == NULL )
4190
0
  {
4191
0
    libcerror_error_set(
4192
0
     error,
4193
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4194
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4195
0
     "%s: invalid UTF-8 character size.",
4196
0
     function );
4197
4198
0
    return( -1 );
4199
0
  }
4200
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
4201
0
  {
4202
0
    libcerror_error_set(
4203
0
     error,
4204
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4205
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4206
0
     "%s: unsupported Unicode character.",
4207
0
     function );
4208
4209
0
    return( -1 );
4210
0
  }
4211
  /* RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
4212
   * while its predecessor RFC 2279 allowed up to 6 bytes
4213
   */
4214
0
  if( unicode_character < 0x00000080UL )
4215
0
  {
4216
0
    safe_utf8_character_size += 1;
4217
0
  }
4218
0
  else if( unicode_character < 0x00000800UL )
4219
0
  {
4220
0
    safe_utf8_character_size += 2;
4221
0
  }
4222
0
  else if( unicode_character < 0x00010000UL )
4223
0
  {
4224
0
    safe_utf8_character_size += 3;
4225
0
  }
4226
0
  else if( unicode_character < 0x00200000UL )
4227
0
  {
4228
0
    safe_utf8_character_size += 4;
4229
0
  }
4230
0
  else if( unicode_character < 0x04000000UL )
4231
0
  {
4232
0
    safe_utf8_character_size += 5;
4233
0
  }
4234
0
  else
4235
0
  {
4236
0
    safe_utf8_character_size += 6;
4237
0
  }
4238
0
  *utf8_character_size += safe_utf8_character_size;
4239
4240
0
  return( 1 );
4241
0
}
4242
4243
/* Copies an Unicode character from an UTF-8 string
4244
 * This function supports upto U+7FFFFFF (6 byte UTF-8 characters)
4245
 * Returns 1 if successful or -1 on error
4246
 */
4247
int libuna_unicode_character_copy_from_utf8_rfc2279(
4248
     libuna_unicode_character_t *unicode_character,
4249
     const libuna_utf8_character_t *utf8_string,
4250
     size_t utf8_string_size,
4251
     size_t *utf8_string_index,
4252
     libcerror_error_t **error )
4253
75.9k
{
4254
75.9k
  static char *function                             = "libuna_unicode_character_copy_from_utf8_rfc2279";
4255
75.9k
  libuna_unicode_character_t safe_unicode_character = 0;
4256
75.9k
  size_t safe_utf8_string_index                     = 0;
4257
75.9k
  uint8_t byte_value1                               = 0;
4258
75.9k
  uint8_t byte_value2                               = 0;
4259
75.9k
  uint8_t byte_value3                               = 0;
4260
75.9k
  uint8_t byte_value4                               = 0;
4261
75.9k
  uint8_t byte_value5                               = 0;
4262
75.9k
  uint8_t byte_value6                               = 0;
4263
75.9k
  uint8_t utf8_character_additional_bytes           = 0;
4264
4265
75.9k
  if( unicode_character == NULL )
4266
0
  {
4267
0
    libcerror_error_set(
4268
0
     error,
4269
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4270
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4271
0
     "%s: invalid Unicode character.",
4272
0
     function );
4273
4274
0
    return( -1 );
4275
0
  }
4276
75.9k
  if( utf8_string == NULL )
4277
0
  {
4278
0
    libcerror_error_set(
4279
0
     error,
4280
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4281
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4282
0
     "%s: invalid UTF-8 string.",
4283
0
     function );
4284
4285
0
    return( -1 );
4286
0
  }
4287
75.9k
  if( utf8_string_size > (size_t) SSIZE_MAX )
4288
0
  {
4289
0
    libcerror_error_set(
4290
0
     error,
4291
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4292
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4293
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
4294
0
     function );
4295
4296
0
    return( -1 );
4297
0
  }
4298
75.9k
  if( utf8_string_index == NULL )
4299
0
  {
4300
0
    libcerror_error_set(
4301
0
     error,
4302
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4303
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4304
0
     "%s: invalid UTF-8 string index.",
4305
0
     function );
4306
4307
0
    return( -1 );
4308
0
  }
4309
75.9k
  safe_utf8_string_index = *utf8_string_index;
4310
4311
75.9k
  if( safe_utf8_string_index >= utf8_string_size )
4312
0
  {
4313
0
    libcerror_error_set(
4314
0
     error,
4315
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4316
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4317
0
     "%s: UTF-8 string too small.",
4318
0
     function );
4319
4320
0
    return( -1 );
4321
0
  }
4322
  /* Determine the number of additional bytes of the UTF-8 character
4323
   */
4324
75.9k
  byte_value1 = utf8_string[ safe_utf8_string_index ];
4325
4326
  /* Determine the UTF-8 character and make sure it is valid
4327
   * RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
4328
   * while its predecessor RFC 2279 allowed up to 6 bytes
4329
   */
4330
75.9k
  if( byte_value1 > 0xfd )
4331
0
  {
4332
0
    libcerror_error_set(
4333
0
     error,
4334
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4335
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4336
0
     "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4337
0
     function,
4338
0
     byte_value1 );
4339
4340
0
    return( -1 );
4341
0
  }
4342
75.9k
  if( byte_value1 < 0xc0 )
4343
49.9k
  {
4344
49.9k
    utf8_character_additional_bytes = 0;
4345
49.9k
  }
4346
25.9k
  else if( byte_value1 < 0xe0 )
4347
6.02k
  {
4348
6.02k
    utf8_character_additional_bytes = 1;
4349
6.02k
  }
4350
19.9k
  else if( byte_value1 < 0xf0 )
4351
15.9k
  {
4352
15.9k
    utf8_character_additional_bytes = 2;
4353
15.9k
  }
4354
3.99k
  else if( byte_value1 < 0xf8 )
4355
3.99k
  {
4356
3.99k
    utf8_character_additional_bytes = 3;
4357
3.99k
  }
4358
0
  else if( byte_value1 < 0xfc )
4359
0
  {
4360
0
    utf8_character_additional_bytes = 4;
4361
0
  }
4362
0
  else
4363
0
  {
4364
0
    utf8_character_additional_bytes = 5;
4365
0
  }
4366
75.9k
  if( ( ( (size_t) utf8_character_additional_bytes + 1 ) > utf8_string_size )
4367
75.9k
   || ( safe_utf8_string_index > ( utf8_string_size - ( utf8_character_additional_bytes + 1 ) ) ) )
4368
0
  {
4369
0
    libcerror_error_set(
4370
0
     error,
4371
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4372
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4373
0
     "%s: missing UTF-8 character bytes.",
4374
0
     function );
4375
4376
0
    return( -1 );
4377
0
  }
4378
  /* Determine the UTF-8 character and make sure it is valid
4379
   * RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
4380
   * while its predecessor RFC 2279 allowed up to 6 bytes
4381
   */
4382
75.9k
  safe_unicode_character = byte_value1;
4383
4384
75.9k
  if( utf8_character_additional_bytes == 0 )
4385
49.9k
  {
4386
49.9k
    if( byte_value1 >= 0x80 )
4387
0
    {
4388
0
      libcerror_error_set(
4389
0
       error,
4390
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4391
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4392
0
       "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4393
0
       function,
4394
0
       byte_value1 );
4395
4396
0
      return( -1 );
4397
0
    }
4398
49.9k
  }
4399
75.9k
  if( utf8_character_additional_bytes >= 1 )
4400
25.9k
  {
4401
25.9k
    byte_value2 = utf8_string[ safe_utf8_string_index + 1 ];
4402
4403
25.9k
    if( ( byte_value2 < 0x80 )
4404
25.9k
     || ( byte_value2 > 0xbf ) )
4405
0
    {
4406
0
      libcerror_error_set(
4407
0
       error,
4408
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4409
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4410
0
       "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4411
0
       function,
4412
0
       byte_value2 );
4413
4414
0
      return( -1 );
4415
0
    }
4416
25.9k
    safe_unicode_character <<= 6;
4417
25.9k
    safe_unicode_character += byte_value2;
4418
4419
25.9k
    if( utf8_character_additional_bytes == 1 )
4420
6.02k
    {
4421
6.02k
      safe_unicode_character -= 0x03080;
4422
6.02k
    }
4423
25.9k
  }
4424
75.9k
  if( utf8_character_additional_bytes >= 2 )
4425
19.9k
  {
4426
19.9k
    byte_value3 = utf8_string[ safe_utf8_string_index + 2 ];
4427
4428
19.9k
    if( ( byte_value3 < 0x80 )
4429
19.9k
     || ( byte_value3 > 0xbf ) )
4430
0
    {
4431
0
      libcerror_error_set(
4432
0
       error,
4433
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4434
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4435
0
       "%s: invalid 3rd UTF-8 character byte: 0x%02" PRIx8 ".",
4436
0
       function,
4437
0
       byte_value3 );
4438
4439
0
      return( -1 );
4440
0
    }
4441
19.9k
    safe_unicode_character <<= 6;
4442
19.9k
    safe_unicode_character += byte_value3;
4443
4444
19.9k
    if( utf8_character_additional_bytes == 2 )
4445
15.9k
    {
4446
15.9k
      safe_unicode_character -= 0x0e2080;
4447
15.9k
    }
4448
19.9k
  }
4449
75.9k
  if( utf8_character_additional_bytes >= 3 )
4450
3.99k
  {
4451
3.99k
    byte_value4 = utf8_string[ safe_utf8_string_index + 3 ];
4452
4453
3.99k
    if( ( byte_value4 < 0x80 )
4454
3.99k
     || ( byte_value4 > 0xbf ) )
4455
0
    {
4456
0
      libcerror_error_set(
4457
0
       error,
4458
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4459
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4460
0
       "%s: invalid 4th UTF-8 character byte: 0x%02" PRIx8 ".",
4461
0
       function,
4462
0
       byte_value4 );
4463
4464
0
      return( -1 );
4465
0
    }
4466
3.99k
    safe_unicode_character <<= 6;
4467
3.99k
    safe_unicode_character += byte_value4;
4468
4469
3.99k
    if( utf8_character_additional_bytes == 3 )
4470
3.99k
    {
4471
3.99k
      safe_unicode_character -= 0x03c82080;
4472
3.99k
    }
4473
3.99k
  }
4474
75.9k
  if( utf8_character_additional_bytes >= 4 )
4475
0
  {
4476
0
    byte_value5 = utf8_string[ safe_utf8_string_index + 4 ];
4477
4478
0
    if( ( byte_value5 < 0x80 )
4479
0
     || ( byte_value5 > 0xbf ) )
4480
0
    {
4481
0
      libcerror_error_set(
4482
0
       error,
4483
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4484
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4485
0
       "%s: invalid 5th UTF-8 character byte: 0x%02" PRIx8 ".",
4486
0
       function,
4487
0
       byte_value5 );
4488
4489
0
      return( -1 );
4490
0
    }
4491
0
    safe_unicode_character <<= 6;
4492
0
    safe_unicode_character += byte_value5;
4493
4494
0
    if( utf8_character_additional_bytes == 4 )
4495
0
    {
4496
0
      safe_unicode_character -= 0x0fa082080;
4497
0
    }
4498
0
  }
4499
75.9k
  if( utf8_character_additional_bytes == 5 )
4500
0
  {
4501
0
    byte_value6 = utf8_string[ safe_utf8_string_index + 5 ];
4502
4503
0
    if( ( byte_value6 < 0x80 )
4504
0
      || ( byte_value6 > 0xbf ) )
4505
0
    {
4506
0
      libcerror_error_set(
4507
0
       error,
4508
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4509
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4510
0
       "%s: invalid 6th UTF-8 character byte: 0x%02" PRIx8 ".",
4511
0
       function,
4512
0
       byte_value6 );
4513
4514
0
      return( -1 );
4515
0
    }
4516
0
    safe_unicode_character <<= 6;
4517
0
    safe_unicode_character += byte_value6;
4518
0
    safe_unicode_character -= 0x082082080;
4519
0
  }
4520
  /* Determine if the Unicode character is valid
4521
   */
4522
75.9k
  if( safe_unicode_character > LIBUNA_UCS_CHARACTER_MAX )
4523
0
  {
4524
0
    libcerror_error_set(
4525
0
     error,
4526
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4527
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4528
0
     "%s: unsupported Unicode character.",
4529
0
     function );
4530
4531
0
    return( -1 );
4532
0
  }
4533
75.9k
  *unicode_character = safe_unicode_character;
4534
75.9k
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4535
4536
75.9k
  return( 1 );
4537
75.9k
}
4538
4539
/* Copies an Unicode character into a UTF-8 string
4540
 * This function supports upto U+7FFFFFF (6 byte UTF-8 characters)
4541
 * Returns 1 if successful or -1 on error
4542
 */
4543
int libuna_unicode_character_copy_to_utf8_rfc2279(
4544
     libuna_unicode_character_t unicode_character,
4545
     libuna_utf8_character_t *utf8_string,
4546
     size_t utf8_string_size,
4547
     size_t *utf8_string_index,
4548
     libcerror_error_t **error )
4549
0
{
4550
0
  static char *function                   = "libuna_unicode_character_copy_to_utf8_rfc2279";
4551
0
  size_t safe_utf8_string_index           = 0;
4552
0
  size_t utf8_character_iterator          = 0;
4553
0
  uint8_t utf8_character_additional_bytes = 0;
4554
0
  uint8_t utf8_first_character_mark       = 0;
4555
4556
0
  if( utf8_string == NULL )
4557
0
  {
4558
0
    libcerror_error_set(
4559
0
     error,
4560
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4561
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4562
0
     "%s: invalid UTF-8 string.",
4563
0
     function );
4564
4565
0
    return( -1 );
4566
0
  }
4567
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
4568
0
  {
4569
0
    libcerror_error_set(
4570
0
     error,
4571
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4572
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4573
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
4574
0
     function );
4575
4576
0
    return( -1 );
4577
0
  }
4578
0
  if( utf8_string_index == NULL )
4579
0
  {
4580
0
    libcerror_error_set(
4581
0
     error,
4582
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4583
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4584
0
     "%s: invalid UTF-8 string index.",
4585
0
     function );
4586
4587
0
    return( -1 );
4588
0
  }
4589
0
  safe_utf8_string_index = *utf8_string_index;
4590
4591
0
  if( safe_utf8_string_index >= utf8_string_size )
4592
0
  {
4593
0
    libcerror_error_set(
4594
0
     error,
4595
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4596
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4597
0
     "%s: UTF-8 string too small.",
4598
0
     function );
4599
4600
0
    return( -1 );
4601
0
  }
4602
  /* Determine if the Unicode character is valid
4603
   */
4604
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
4605
0
  {
4606
0
    libcerror_error_set(
4607
0
     error,
4608
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4609
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4610
0
     "%s: unsupported Unicode character.",
4611
0
     function );
4612
4613
0
    return( -1 );
4614
0
  }
4615
  /* Determine how many UTF-8 character bytes are required
4616
   */
4617
0
  if( unicode_character < 0x080 )
4618
0
  {
4619
0
    utf8_character_additional_bytes = 0;
4620
0
    utf8_first_character_mark       = 0;
4621
0
  }
4622
0
  else if( unicode_character < 0x0800 )
4623
0
  {
4624
0
    utf8_character_additional_bytes = 1;
4625
0
    utf8_first_character_mark       = 0x0c0;
4626
0
  }
4627
0
  else if( unicode_character < 0x010000 )
4628
0
  {
4629
0
    utf8_character_additional_bytes = 2;
4630
0
    utf8_first_character_mark       = 0x0e0;
4631
0
  }
4632
0
  else if( unicode_character < 0x0200000 )
4633
0
  {
4634
0
    utf8_character_additional_bytes = 3;
4635
0
    utf8_first_character_mark       = 0x0f0;
4636
0
  }
4637
0
  else if( unicode_character < 0x0400000 )
4638
0
  {
4639
0
    utf8_character_additional_bytes = 4;
4640
0
    utf8_first_character_mark       = 0x0f8;
4641
0
  }
4642
0
  else
4643
0
  {
4644
0
    utf8_character_additional_bytes = 5;
4645
0
    utf8_first_character_mark       = 0x0fc;
4646
0
  }
4647
  /* Convert Unicode character into UTF-8 character bytes
4648
   */
4649
0
  if( ( utf8_character_additional_bytes > utf8_string_size )
4650
0
   || ( safe_utf8_string_index >= ( utf8_string_size - utf8_character_additional_bytes ) ) )
4651
0
  {
4652
0
    libcerror_error_set(
4653
0
     error,
4654
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4655
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4656
0
     "%s: UTF-8 string too small.",
4657
0
     function );
4658
4659
0
    return( -1 );
4660
0
  }
4661
0
  for( utf8_character_iterator = safe_utf8_string_index + utf8_character_additional_bytes;
4662
0
       utf8_character_iterator > safe_utf8_string_index;
4663
0
       utf8_character_iterator-- )
4664
0
  {
4665
0
    utf8_string[ utf8_character_iterator ] = (libuna_utf8_character_t) ( ( unicode_character & 0x0bf ) | 0x080 );
4666
4667
0
    unicode_character >>= 6;
4668
0
  }
4669
0
  utf8_string[ safe_utf8_string_index ] = (libuna_utf8_character_t) ( unicode_character | utf8_first_character_mark );
4670
4671
0
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4672
4673
0
  return( 1 );
4674
0
}
4675
4676
/* Determines the size of an UTF-16 character from an Unicode character
4677
 * Adds the size to the UTF-16 character size value
4678
 * Returns 1 if successful or -1 on error
4679
 */
4680
int libuna_unicode_character_size_to_utf16(
4681
     libuna_unicode_character_t unicode_character,
4682
     size_t *utf16_character_size,
4683
     libcerror_error_t **error )
4684
1.76k
{
4685
1.76k
  static char *function = "libuna_unicode_character_size_to_utf16";
4686
4687
1.76k
  if( utf16_character_size == NULL )
4688
0
  {
4689
0
    libcerror_error_set(
4690
0
     error,
4691
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4692
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4693
0
     "%s: invalid UTF-16 character size.",
4694
0
     function );
4695
4696
0
    return( -1 );
4697
0
  }
4698
1.76k
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4699
1.76k
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4700
1.76k
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4701
0
  {
4702
0
    libcerror_error_set(
4703
0
     error,
4704
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4705
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4706
0
     "%s: unsupported Unicode character.",
4707
0
     function );
4708
4709
0
    return( -1 );
4710
0
  }
4711
1.76k
  if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4712
6
  {
4713
6
    *utf16_character_size += 2;
4714
6
  }
4715
1.76k
  else
4716
1.76k
  {
4717
1.76k
    *utf16_character_size += 1;
4718
1.76k
  }
4719
1.76k
  return( 1 );
4720
1.76k
}
4721
4722
/* Copies an Unicode character from an UTF-16 string
4723
 * Returns 1 if successful or -1 on error
4724
 */
4725
int libuna_unicode_character_copy_from_utf16(
4726
     libuna_unicode_character_t *unicode_character,
4727
     const libuna_utf16_character_t *utf16_string,
4728
     size_t utf16_string_size,
4729
     size_t *utf16_string_index,
4730
     libcerror_error_t **error )
4731
0
{
4732
0
  static char *function                             = "libuna_unicode_character_copy_from_utf16";
4733
0
  libuna_unicode_character_t safe_unicode_character = 0;
4734
0
  libuna_utf16_character_t utf16_surrogate          = 0;
4735
0
  size_t safe_utf16_string_index                    = 0;
4736
4737
0
  if( unicode_character == NULL )
4738
0
  {
4739
0
    libcerror_error_set(
4740
0
     error,
4741
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4742
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4743
0
     "%s: invalid Unicode character.",
4744
0
     function );
4745
4746
0
    return( -1 );
4747
0
  }
4748
0
  if( utf16_string == NULL )
4749
0
  {
4750
0
    libcerror_error_set(
4751
0
     error,
4752
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4753
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4754
0
     "%s: invalid UTF-16 string.",
4755
0
     function );
4756
4757
0
    return( -1 );
4758
0
  }
4759
0
  if( utf16_string_size > (size_t) SSIZE_MAX )
4760
0
  {
4761
0
    libcerror_error_set(
4762
0
     error,
4763
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4764
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4765
0
     "%s: invalid UTF-16 string size value exceeds maximum.",
4766
0
     function );
4767
4768
0
    return( -1 );
4769
0
  }
4770
0
  if( utf16_string_index == NULL )
4771
0
  {
4772
0
    libcerror_error_set(
4773
0
     error,
4774
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4775
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4776
0
     "%s: invalid UTF-16 string index.",
4777
0
     function );
4778
4779
0
    return( -1 );
4780
0
  }
4781
0
  safe_utf16_string_index = *utf16_string_index;
4782
4783
0
  if( safe_utf16_string_index >= utf16_string_size )
4784
0
  {
4785
0
    libcerror_error_set(
4786
0
     error,
4787
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4788
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4789
0
     "%s: UTF-16 string too small.",
4790
0
     function );
4791
4792
0
    return( -1 );
4793
0
  }
4794
0
  safe_unicode_character   = utf16_string[ safe_utf16_string_index ];
4795
0
  safe_utf16_string_index += 1;
4796
4797
  /* Determine if the UTF-16 character is within the high surrogate range
4798
   */
4799
0
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4800
0
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
4801
0
  {
4802
0
    if( safe_utf16_string_index >= utf16_string_size )
4803
0
    {
4804
0
      libcerror_error_set(
4805
0
       error,
4806
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4807
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4808
0
       "%s: missing surrogate UTF-16 character bytes.",
4809
0
       function );
4810
4811
0
      return( -1 );
4812
0
    }
4813
0
    utf16_surrogate          = utf16_string[ safe_utf16_string_index ];
4814
0
    safe_utf16_string_index += 1;
4815
4816
    /* Determine if the UTF-16 character is within the low surrogate range
4817
     */
4818
0
    if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4819
0
     && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4820
0
    {
4821
0
      safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
4822
0
      safe_unicode_character <<= 10;
4823
0
      safe_unicode_character  += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
4824
0
      safe_unicode_character  += 0x010000;
4825
0
    }
4826
0
    else
4827
0
    {
4828
0
      libcerror_error_set(
4829
0
       error,
4830
0
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
4831
0
       LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4832
0
       "%s: unsupported low surrogate UTF-16 character.",
4833
0
       function );
4834
4835
0
      return( -1 );
4836
0
    }
4837
0
  }
4838
  /* Determine if the Unicode character is valid
4839
   */
4840
0
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4841
0
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4842
0
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4843
0
  {
4844
0
    libcerror_error_set(
4845
0
     error,
4846
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4847
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4848
0
     "%s: unsupported Unicode character.",
4849
0
     function );
4850
4851
0
    return( -1 );
4852
0
  }
4853
0
  *unicode_character  = safe_unicode_character;
4854
0
  *utf16_string_index = safe_utf16_string_index;
4855
4856
0
  return( 1 );
4857
0
}
4858
4859
/* Copies an Unicode character into a UTF-16 string
4860
 * Returns 1 if successful or -1 on error
4861
 */
4862
int libuna_unicode_character_copy_to_utf16(
4863
     libuna_unicode_character_t unicode_character,
4864
     libuna_utf16_character_t *utf16_string,
4865
     size_t utf16_string_size,
4866
     size_t *utf16_string_index,
4867
     libcerror_error_t **error )
4868
0
{
4869
0
  static char *function          = "libuna_unicode_character_copy_to_utf16";
4870
0
  size_t safe_utf16_string_index = 0;
4871
4872
0
  if( utf16_string == NULL )
4873
0
  {
4874
0
    libcerror_error_set(
4875
0
     error,
4876
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4877
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4878
0
     "%s: invalid UTF-16 string.",
4879
0
     function );
4880
4881
0
    return( -1 );
4882
0
  }
4883
0
  if( utf16_string_size > (size_t) SSIZE_MAX )
4884
0
  {
4885
0
    libcerror_error_set(
4886
0
     error,
4887
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4888
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4889
0
     "%s: invalid UTF-16 string size value exceeds maximum.",
4890
0
     function );
4891
4892
0
    return( -1 );
4893
0
  }
4894
0
  if( utf16_string_index == NULL )
4895
0
  {
4896
0
    libcerror_error_set(
4897
0
     error,
4898
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4899
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4900
0
     "%s: invalid UTF-16 string index.",
4901
0
     function );
4902
4903
0
    return( -1 );
4904
0
  }
4905
0
  safe_utf16_string_index = *utf16_string_index;
4906
4907
0
  if( safe_utf16_string_index >= utf16_string_size )
4908
0
  {
4909
0
    libcerror_error_set(
4910
0
     error,
4911
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4912
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4913
0
     "%s: UTF-16 string too small.",
4914
0
     function );
4915
4916
0
    return( -1 );
4917
0
  }
4918
  /* Determine if the Unicode character is valid
4919
   */
4920
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4921
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4922
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4923
0
  {
4924
0
    libcerror_error_set(
4925
0
     error,
4926
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4927
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4928
0
     "%s: unsupported Unicode character.",
4929
0
     function );
4930
4931
0
    return( -1 );
4932
0
  }
4933
0
  if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4934
0
  {
4935
0
    utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) unicode_character;
4936
0
  }
4937
0
  else
4938
0
  {
4939
0
    if( ( utf16_string_size < 2 )
4940
0
     || ( safe_utf16_string_index > ( utf16_string_size - 2 ) ) )
4941
0
    {
4942
0
      libcerror_error_set(
4943
0
       error,
4944
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4945
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4946
0
       "%s: UTF-16 string too small.",
4947
0
       function );
4948
4949
0
      return( -1 );
4950
0
    }
4951
0
    unicode_character                        -= 0x010000;
4952
0
    utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
4953
0
    utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
4954
0
  }
4955
0
  *utf16_string_index = safe_utf16_string_index;
4956
4957
0
  return( 1 );
4958
0
}
4959
4960
/* Copies an Unicode character from an UTF-16 stream
4961
 * Returns 1 if successful or -1 on error
4962
 */
4963
int libuna_unicode_character_copy_from_utf16_stream(
4964
     libuna_unicode_character_t *unicode_character,
4965
     const uint8_t *utf16_stream,
4966
     size_t utf16_stream_size,
4967
     size_t *utf16_stream_index,
4968
     int byte_order,
4969
     libcerror_error_t **error )
4970
9.30M
{
4971
9.30M
  static char *function                             = "libuna_unicode_character_copy_from_utf16_stream";
4972
9.30M
  libuna_unicode_character_t safe_unicode_character = 0;
4973
9.30M
  libuna_utf16_character_t utf16_surrogate          = 0;
4974
9.30M
  size_t safe_utf16_stream_index                    = 0;
4975
9.30M
  int byte_order_without_flags                      = 0;
4976
4977
9.30M
  if( unicode_character == NULL )
4978
0
  {
4979
0
    libcerror_error_set(
4980
0
     error,
4981
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4982
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4983
0
     "%s: invalid Unicode character.",
4984
0
     function );
4985
4986
0
    return( -1 );
4987
0
  }
4988
9.30M
  if( utf16_stream == NULL )
4989
0
  {
4990
0
    libcerror_error_set(
4991
0
     error,
4992
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4993
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4994
0
     "%s: invalid UTF-16 stream.",
4995
0
     function );
4996
4997
0
    return( -1 );
4998
0
  }
4999
9.30M
  if( utf16_stream_size > (size_t) SSIZE_MAX )
5000
0
  {
5001
0
    libcerror_error_set(
5002
0
     error,
5003
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5004
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5005
0
     "%s: invalid UTF-16 stream size value exceeds maximum.",
5006
0
     function );
5007
5008
0
    return( -1 );
5009
0
  }
5010
9.30M
  if( utf16_stream_index == NULL )
5011
0
  {
5012
0
    libcerror_error_set(
5013
0
     error,
5014
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5015
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5016
0
     "%s: invalid UTF-16 stream index.",
5017
0
     function );
5018
5019
0
    return( -1 );
5020
0
  }
5021
9.30M
  byte_order_without_flags = byte_order & 0xff;
5022
5023
9.30M
  if( ( byte_order_without_flags != LIBUNA_ENDIAN_BIG )
5024
9.30M
   && ( byte_order_without_flags != LIBUNA_ENDIAN_LITTLE ) )
5025
0
  {
5026
0
    libcerror_error_set(
5027
0
     error,
5028
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5029
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5030
0
     "%s: unsupported byte order.",
5031
0
     function );
5032
5033
0
    return( -1 );
5034
0
  }
5035
9.30M
  safe_utf16_stream_index = *utf16_stream_index;
5036
5037
9.30M
  if( ( utf16_stream_size < 2 )
5038
9.30M
   || ( safe_utf16_stream_index > ( utf16_stream_size - 2 ) ) )
5039
64
  {
5040
64
    libcerror_error_set(
5041
64
     error,
5042
64
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5043
64
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5044
64
     "%s: UTF-16 stream too small.",
5045
64
     function );
5046
5047
64
    return( -1 );
5048
64
  }
5049
9.30M
  if( byte_order_without_flags == LIBUNA_ENDIAN_BIG )
5050
2.49M
  {
5051
2.49M
    safe_unicode_character   = utf16_stream[ safe_utf16_stream_index ];
5052
2.49M
    safe_unicode_character <<= 8;
5053
2.49M
    safe_unicode_character  += utf16_stream[ safe_utf16_stream_index + 1 ];
5054
2.49M
  }
5055
6.81M
  else if( byte_order_without_flags == LIBUNA_ENDIAN_LITTLE )
5056
6.81M
  {
5057
6.81M
    safe_unicode_character   = utf16_stream[ safe_utf16_stream_index + 1 ];
5058
6.81M
    safe_unicode_character <<= 8;
5059
6.81M
    safe_unicode_character  += utf16_stream[ safe_utf16_stream_index ];
5060
6.81M
  }
5061
9.30M
  safe_utf16_stream_index += 2;
5062
5063
  /* Determine if the Unicode character is valid
5064
   */
5065
9.30M
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5066
9.30M
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5067
486
  {
5068
486
    if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5069
464
    {
5070
464
      libcerror_error_set(
5071
464
       error,
5072
464
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
5073
464
       LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5074
464
       "%s: unsupported UTF-16 character.",
5075
464
       function );
5076
5077
464
      return( -1 );
5078
464
    }
5079
486
  }
5080
  /* Determine if the UTF-16 character is within the high surrogate range
5081
   */
5082
9.30M
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5083
9.30M
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
5084
78.6k
  {
5085
78.6k
    if( safe_utf16_stream_index > ( utf16_stream_size - 2 ) )
5086
57
    {
5087
57
      if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5088
53
      {
5089
53
        libcerror_error_set(
5090
53
         error,
5091
53
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
5092
53
         LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5093
53
         "%s: missing surrogate UTF-16 character bytes.",
5094
53
         function );
5095
5096
53
        return( -1 );
5097
53
      }
5098
57
    }
5099
78.5k
    else
5100
78.5k
    {
5101
78.5k
      if( byte_order_without_flags == LIBUNA_ENDIAN_BIG )
5102
66.5k
      {
5103
66.5k
        utf16_surrogate   = utf16_stream[ safe_utf16_stream_index ];
5104
66.5k
        utf16_surrogate <<= 8;
5105
66.5k
        utf16_surrogate  += utf16_stream[ safe_utf16_stream_index + 1 ];
5106
66.5k
      }
5107
11.9k
      else if( byte_order_without_flags == LIBUNA_ENDIAN_LITTLE )
5108
11.9k
      {
5109
11.9k
        utf16_surrogate   = utf16_stream[ safe_utf16_stream_index + 1 ];
5110
11.9k
        utf16_surrogate <<= 8;
5111
11.9k
        utf16_surrogate  += utf16_stream[ safe_utf16_stream_index ];
5112
11.9k
      }
5113
      /* Determine if the UTF-16 character is within the low surrogate range
5114
       */
5115
78.5k
      if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5116
78.5k
       && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5117
77.8k
      {
5118
77.8k
        safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
5119
77.8k
        safe_unicode_character <<= 10;
5120
77.8k
        safe_unicode_character  += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
5121
77.8k
        safe_unicode_character  += 0x010000;
5122
5123
77.8k
        safe_utf16_stream_index += 2;
5124
77.8k
      }
5125
708
      else if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5126
701
      {
5127
701
        libcerror_error_set(
5128
701
         error,
5129
701
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
5130
701
         LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5131
701
         "%s: unsupported low surrogate UTF-16 character.",
5132
701
         function );
5133
5134
701
        return( -1 );
5135
701
      }
5136
78.5k
    }
5137
78.6k
  }
5138
9.30M
  if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
5139
0
  {
5140
0
    libcerror_error_set(
5141
0
     error,
5142
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5143
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5144
0
     "%s: unsupported Unicode character.",
5145
0
     function );
5146
5147
0
    return( -1 );
5148
0
  }
5149
9.30M
  *unicode_character  = safe_unicode_character;
5150
9.30M
  *utf16_stream_index = safe_utf16_stream_index;
5151
5152
9.30M
  return( 1 );
5153
9.30M
}
5154
5155
/* Copies an Unicode character to an UTF-16 stream
5156
 * Returns 1 if successful or -1 on error
5157
 */
5158
int libuna_unicode_character_copy_to_utf16_stream(
5159
     libuna_unicode_character_t unicode_character,
5160
     uint8_t *utf16_stream,
5161
     size_t utf16_stream_size,
5162
     size_t *utf16_stream_index,
5163
     int byte_order,
5164
     libcerror_error_t **error )
5165
0
{
5166
0
  static char *function                    = "libuna_unicode_character_copy_to_utf16_stream";
5167
0
  libuna_utf16_character_t utf16_surrogate = 0;
5168
0
  size_t safe_utf16_stream_index           = 0;
5169
0
  int byte_order_without_flags             = 0;
5170
5171
0
  if( utf16_stream == NULL )
5172
0
  {
5173
0
    libcerror_error_set(
5174
0
     error,
5175
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5176
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5177
0
     "%s: invalid UTF-16 stream.",
5178
0
     function );
5179
5180
0
    return( -1 );
5181
0
  }
5182
0
  if( utf16_stream_size > (size_t) SSIZE_MAX )
5183
0
  {
5184
0
    libcerror_error_set(
5185
0
     error,
5186
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5187
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5188
0
     "%s: invalid UTF-16 stream size value exceeds maximum.",
5189
0
     function );
5190
5191
0
    return( -1 );
5192
0
  }
5193
0
  if( utf16_stream_index == NULL )
5194
0
  {
5195
0
    libcerror_error_set(
5196
0
     error,
5197
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5198
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5199
0
     "%s: invalid UTF-16 stream index.",
5200
0
     function );
5201
5202
0
    return( -1 );
5203
0
  }
5204
0
  byte_order_without_flags = byte_order & 0xff;
5205
5206
0
  if( ( byte_order_without_flags != LIBUNA_ENDIAN_BIG )
5207
0
   && ( byte_order_without_flags != LIBUNA_ENDIAN_LITTLE ) )
5208
0
  {
5209
0
    libcerror_error_set(
5210
0
     error,
5211
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5212
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5213
0
     "%s: unsupported byte order.",
5214
0
     function );
5215
5216
0
    return( -1 );
5217
0
  }
5218
0
  safe_utf16_stream_index = *utf16_stream_index;
5219
5220
  /* Determine if the Unicode character is valid
5221
   */
5222
0
  if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
5223
0
  {
5224
0
    libcerror_error_set(
5225
0
     error,
5226
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5227
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5228
0
     "%s: unsupported Unicode character.",
5229
0
     function );
5230
5231
0
    return( -1 );
5232
0
  }
5233
0
  if( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5234
0
   && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5235
0
  {
5236
0
    if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5237
0
    {
5238
0
      libcerror_error_set(
5239
0
       error,
5240
0
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
5241
0
       LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5242
0
       "%s: unsupported Unicode character.",
5243
0
       function );
5244
5245
0
      return( -1 );
5246
0
    }
5247
0
  }
5248
0
  if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
5249
0
  {
5250
0
    if( ( utf16_stream_size < 2 )
5251
0
     || ( safe_utf16_stream_index > ( utf16_stream_size - 2 ) ) )
5252
0
    {
5253
0
      libcerror_error_set(
5254
0
       error,
5255
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5256
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5257
0
       "%s: UTF-16 stream too small.",
5258
0
       function );
5259
5260
0
      return( -1 );
5261
0
    }
5262
0
    if( byte_order == LIBUNA_ENDIAN_BIG )
5263
0
    {
5264
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5265
0
      unicode_character                         >>= 8;
5266
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5267
0
    }
5268
0
    else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5269
0
    {
5270
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5271
0
      unicode_character                         >>= 8;
5272
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5273
0
    }
5274
0
    safe_utf16_stream_index += 2;
5275
0
  }
5276
0
  else
5277
0
  {
5278
0
    if( ( utf16_stream_size < 4 )
5279
0
     || ( safe_utf16_stream_index > ( utf16_stream_size - 4 ) ) )
5280
0
    {
5281
0
      libcerror_error_set(
5282
0
       error,
5283
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5284
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5285
0
       "%s: UTF-16 stream too small.",
5286
0
       function );
5287
5288
0
      return( -1 );
5289
0
    }
5290
0
    unicode_character -= 0x010000;
5291
5292
0
    utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
5293
5294
0
    if( byte_order == LIBUNA_ENDIAN_BIG )
5295
0
    {
5296
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5297
0
      utf16_surrogate                           >>= 8;
5298
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5299
0
    }
5300
0
    else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5301
0
    {
5302
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5303
0
      utf16_surrogate                           >>= 8;
5304
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5305
0
    }
5306
0
    safe_utf16_stream_index += 2;
5307
5308
0
    utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
5309
5310
0
    if( byte_order == LIBUNA_ENDIAN_BIG )
5311
0
    {
5312
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5313
0
      utf16_surrogate                           >>= 8;
5314
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5315
0
    }
5316
0
    else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5317
0
    {
5318
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5319
0
      utf16_surrogate                           >>= 8;
5320
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5321
0
    }
5322
0
    safe_utf16_stream_index += 2;
5323
0
  }
5324
0
  *utf16_stream_index = safe_utf16_stream_index;
5325
5326
0
  return( 1 );
5327
0
}
5328
5329
/* Determines the size of an UTF-32 character from an Unicode character
5330
 * Adds the size to the UTF-32 character size value
5331
 * Returns 1 if successful or -1 on error
5332
 */
5333
int libuna_unicode_character_size_to_utf32(
5334
     libuna_unicode_character_t unicode_character,
5335
     size_t *utf32_character_size,
5336
     libcerror_error_t **error )
5337
0
{
5338
0
  static char *function = "libuna_unicode_character_size_to_utf32";
5339
5340
0
  LIBUNA_UNREFERENCED_PARAMETER( unicode_character )
5341
5342
0
  if( utf32_character_size == NULL )
5343
0
  {
5344
0
    libcerror_error_set(
5345
0
     error,
5346
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5347
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5348
0
     "%s: invalid UTF-32 character size.",
5349
0
     function );
5350
5351
0
    return( -1 );
5352
0
  }
5353
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5354
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5355
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5356
0
  {
5357
0
    libcerror_error_set(
5358
0
     error,
5359
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5360
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5361
0
     "%s: unsupported Unicode character.",
5362
0
     function );
5363
5364
0
    return( -1 );
5365
0
  }
5366
0
  *utf32_character_size += 1;
5367
5368
0
  return( 1 );
5369
0
}
5370
5371
/* Copies an Unicode character from an UTF-32 string
5372
 * Returns 1 if successful or -1 on error
5373
 */
5374
int libuna_unicode_character_copy_from_utf32(
5375
     libuna_unicode_character_t *unicode_character,
5376
     const libuna_utf32_character_t *utf32_string,
5377
     size_t utf32_string_size,
5378
     size_t *utf32_string_index,
5379
     libcerror_error_t **error )
5380
0
{
5381
0
  static char *function                             = "libuna_unicode_character_copy_from_utf32";
5382
0
  libuna_unicode_character_t safe_unicode_character = 0;
5383
0
  size_t safe_utf32_string_index                    = 0;
5384
5385
0
  if( unicode_character == NULL )
5386
0
  {
5387
0
    libcerror_error_set(
5388
0
     error,
5389
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5390
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5391
0
     "%s: invalid Unicode character.",
5392
0
     function );
5393
5394
0
    return( -1 );
5395
0
  }
5396
0
  if( utf32_string == NULL )
5397
0
  {
5398
0
    libcerror_error_set(
5399
0
     error,
5400
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5401
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5402
0
     "%s: invalid UTF-32 string.",
5403
0
     function );
5404
5405
0
    return( -1 );
5406
0
  }
5407
0
  if( utf32_string_size > (size_t) SSIZE_MAX )
5408
0
  {
5409
0
    libcerror_error_set(
5410
0
     error,
5411
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5412
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5413
0
     "%s: invalid UTF-32 string size value exceeds maximum.",
5414
0
     function );
5415
5416
0
    return( -1 );
5417
0
  }
5418
0
  if( utf32_string_index == NULL )
5419
0
  {
5420
0
    libcerror_error_set(
5421
0
     error,
5422
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5423
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5424
0
     "%s: invalid UTF-32 string index.",
5425
0
     function );
5426
5427
0
    return( -1 );
5428
0
  }
5429
0
  safe_utf32_string_index = *utf32_string_index;
5430
5431
0
  if( safe_utf32_string_index >= utf32_string_size )
5432
0
  {
5433
0
    libcerror_error_set(
5434
0
     error,
5435
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5436
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5437
0
     "%s: UTF-32 string too small.",
5438
0
     function );
5439
5440
0
    return( -1 );
5441
0
  }
5442
0
  safe_unicode_character = utf32_string[ safe_utf32_string_index ];
5443
5444
  /* Determine if the Unicode character is valid
5445
   */
5446
0
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5447
0
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5448
0
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5449
0
  {
5450
0
    libcerror_error_set(
5451
0
     error,
5452
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5453
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5454
0
     "%s: unsupported Unicode character.",
5455
0
     function );
5456
5457
0
    return( -1 );
5458
0
  }
5459
0
  *unicode_character  = safe_unicode_character;
5460
0
  *utf32_string_index = safe_utf32_string_index + 1;
5461
5462
0
  return( 1 );
5463
0
}
5464
5465
/* Copies an Unicode character into a UTF-32 string
5466
 * Returns 1 if successful or -1 on error
5467
 */
5468
int libuna_unicode_character_copy_to_utf32(
5469
     libuna_unicode_character_t unicode_character,
5470
     libuna_utf32_character_t *utf32_string,
5471
     size_t utf32_string_size,
5472
     size_t *utf32_string_index,
5473
     libcerror_error_t **error )
5474
0
{
5475
0
  static char *function          = "libuna_unicode_character_copy_to_utf32";
5476
0
  size_t safe_utf32_string_index = 0;
5477
5478
0
  if( utf32_string == NULL )
5479
0
  {
5480
0
    libcerror_error_set(
5481
0
     error,
5482
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5483
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5484
0
     "%s: invalid UTF-32 string.",
5485
0
     function );
5486
5487
0
    return( -1 );
5488
0
  }
5489
0
  if( utf32_string_size > (size_t) SSIZE_MAX )
5490
0
  {
5491
0
    libcerror_error_set(
5492
0
     error,
5493
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5494
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5495
0
     "%s: invalid UTF-32 string size value exceeds maximum.",
5496
0
     function );
5497
5498
0
    return( -1 );
5499
0
  }
5500
0
  if( utf32_string_index == NULL )
5501
0
  {
5502
0
    libcerror_error_set(
5503
0
     error,
5504
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5505
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5506
0
     "%s: invalid UTF-32 string index.",
5507
0
     function );
5508
5509
0
    return( -1 );
5510
0
  }
5511
0
  safe_utf32_string_index = *utf32_string_index;
5512
5513
0
  if( safe_utf32_string_index >= utf32_string_size )
5514
0
  {
5515
0
    libcerror_error_set(
5516
0
     error,
5517
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5518
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5519
0
     "%s: UTF-32 string too small.",
5520
0
     function );
5521
5522
0
    return( -1 );
5523
0
  }
5524
  /* Determine if the Unicode character is valid
5525
   */
5526
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5527
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5528
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5529
0
  {
5530
0
    libcerror_error_set(
5531
0
     error,
5532
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5533
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5534
0
     "%s: unsupported Unicode character.",
5535
0
     function );
5536
5537
0
    return( -1 );
5538
0
  }
5539
0
  utf32_string[ safe_utf32_string_index ] = (libuna_utf32_character_t) unicode_character;
5540
5541
0
  *utf32_string_index = safe_utf32_string_index + 1;
5542
5543
0
  return( 1 );
5544
0
}
5545
5546
/* Copies an Unicode character from an UTF-32 stream
5547
 * Returns 1 if successful or -1 on error
5548
 */
5549
int libuna_unicode_character_copy_from_utf32_stream(
5550
     libuna_unicode_character_t *unicode_character,
5551
     const uint8_t *utf32_stream,
5552
     size_t utf32_stream_size,
5553
     size_t *utf32_stream_index,
5554
     int byte_order,
5555
     libcerror_error_t **error )
5556
0
{
5557
0
  static char *function                             = "libuna_unicode_character_copy_from_utf32_stream";
5558
0
  libuna_unicode_character_t safe_unicode_character = 0;
5559
0
  size_t safe_utf32_stream_index                    = 0;
5560
5561
0
  if( unicode_character == NULL )
5562
0
  {
5563
0
    libcerror_error_set(
5564
0
     error,
5565
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5566
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5567
0
     "%s: invalid Unicode character.",
5568
0
     function );
5569
5570
0
    return( -1 );
5571
0
  }
5572
0
  if( utf32_stream == NULL )
5573
0
  {
5574
0
    libcerror_error_set(
5575
0
     error,
5576
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5577
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5578
0
     "%s: invalid UTF-32 stream.",
5579
0
     function );
5580
5581
0
    return( -1 );
5582
0
  }
5583
0
  if( utf32_stream_size > (size_t) SSIZE_MAX )
5584
0
  {
5585
0
    libcerror_error_set(
5586
0
     error,
5587
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5588
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5589
0
     "%s: invalid UTF-32 stream size value exceeds maximum.",
5590
0
     function );
5591
5592
0
    return( -1 );
5593
0
  }
5594
0
  if( utf32_stream_index == NULL )
5595
0
  {
5596
0
    libcerror_error_set(
5597
0
     error,
5598
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5599
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5600
0
     "%s: invalid UTF-32 stream index.",
5601
0
     function );
5602
5603
0
    return( -1 );
5604
0
  }
5605
0
  safe_utf32_stream_index = *utf32_stream_index;
5606
5607
0
  if( ( utf32_stream_size < 4 )
5608
0
   || ( safe_utf32_stream_index > ( utf32_stream_size - 4 ) ) )
5609
0
  {
5610
0
    libcerror_error_set(
5611
0
     error,
5612
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5613
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5614
0
     "%s: UTF-32 stream too small.",
5615
0
     function );
5616
5617
0
    return( -1 );
5618
0
  }
5619
0
  if( ( byte_order != LIBUNA_ENDIAN_BIG )
5620
0
   && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5621
0
  {
5622
0
    libcerror_error_set(
5623
0
     error,
5624
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5625
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5626
0
     "%s: unsupported byte order.",
5627
0
     function );
5628
5629
0
    return( -1 );
5630
0
  }
5631
0
  if( byte_order == LIBUNA_ENDIAN_BIG )
5632
0
  {
5633
0
    safe_unicode_character   = utf32_stream[ safe_utf32_stream_index ];
5634
0
    safe_unicode_character <<= 8;
5635
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 1 ];
5636
0
    safe_unicode_character <<= 8;
5637
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 2 ];
5638
0
    safe_unicode_character <<= 8;
5639
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 3 ];
5640
0
  }
5641
0
  else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5642
0
  {
5643
0
    safe_unicode_character   = utf32_stream[ safe_utf32_stream_index + 3 ];
5644
0
    safe_unicode_character <<= 8;
5645
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 2 ];
5646
0
    safe_unicode_character <<= 8;
5647
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 1 ];
5648
0
    safe_unicode_character <<= 8;
5649
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index ];
5650
0
  }
5651
  /* Determine if the Unicode character is valid
5652
   */
5653
0
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5654
0
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5655
0
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5656
0
  {
5657
0
    libcerror_error_set(
5658
0
     error,
5659
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5660
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5661
0
     "%s: unsupported Unicode character.",
5662
0
     function );
5663
5664
0
    return( -1 );
5665
0
  }
5666
0
  *unicode_character  = safe_unicode_character;
5667
0
  *utf32_stream_index = safe_utf32_stream_index + 4;
5668
5669
0
  return( 1 );
5670
0
}
5671
5672
/* Copies an Unicode character to an UTF-32 stream
5673
 * Returns 1 if successful or -1 on error
5674
 */
5675
int libuna_unicode_character_copy_to_utf32_stream(
5676
     libuna_unicode_character_t unicode_character,
5677
     uint8_t *utf32_stream,
5678
     size_t utf32_stream_size,
5679
     size_t *utf32_stream_index,
5680
     int byte_order,
5681
     libcerror_error_t **error )
5682
0
{
5683
0
  static char *function          = "libuna_unicode_character_copy_to_utf32_stream";
5684
0
  size_t safe_utf32_stream_index = 0;
5685
5686
0
  if( utf32_stream == NULL )
5687
0
  {
5688
0
    libcerror_error_set(
5689
0
     error,
5690
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5691
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5692
0
     "%s: invalid UTF-32 stream.",
5693
0
     function );
5694
5695
0
    return( -1 );
5696
0
  }
5697
0
  if( utf32_stream_size > (size_t) SSIZE_MAX )
5698
0
  {
5699
0
    libcerror_error_set(
5700
0
     error,
5701
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5702
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5703
0
     "%s: invalid UTF-32 stream size value exceeds maximum.",
5704
0
     function );
5705
5706
0
    return( -1 );
5707
0
  }
5708
0
  if( utf32_stream_index == NULL )
5709
0
  {
5710
0
    libcerror_error_set(
5711
0
     error,
5712
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5713
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5714
0
     "%s: invalid UTF-32 stream index.",
5715
0
     function );
5716
5717
0
    return( -1 );
5718
0
  }
5719
0
  safe_utf32_stream_index = *utf32_stream_index;
5720
5721
0
  if( ( utf32_stream_size < 4 )
5722
0
   || ( safe_utf32_stream_index > ( utf32_stream_size - 4 ) ) )
5723
0
  {
5724
0
    libcerror_error_set(
5725
0
     error,
5726
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5727
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5728
0
     "%s: UTF-32 stream too small.",
5729
0
     function );
5730
5731
0
    return( -1 );
5732
0
  }
5733
0
  if( ( byte_order != LIBUNA_ENDIAN_BIG )
5734
0
   && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5735
0
  {
5736
0
    libcerror_error_set(
5737
0
     error,
5738
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5739
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5740
0
     "%s: unsupported byte order.",
5741
0
     function );
5742
5743
0
    return( -1 );
5744
0
  }
5745
  /* Determine if the Unicode character is valid
5746
   */
5747
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5748
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5749
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5750
0
  {
5751
0
    libcerror_error_set(
5752
0
     error,
5753
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5754
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5755
0
     "%s: unsupported Unicode character.",
5756
0
     function );
5757
5758
0
    return( -1 );
5759
0
  }
5760
0
  if( byte_order == LIBUNA_ENDIAN_BIG )
5761
0
  {
5762
0
    utf32_stream[ safe_utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5763
0
    unicode_character                         >>= 8;
5764
0
    utf32_stream[ safe_utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5765
0
    unicode_character                         >>= 8;
5766
0
    utf32_stream[ safe_utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5767
0
    unicode_character                         >>= 8;
5768
0
    utf32_stream[ safe_utf32_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5769
0
  }
5770
0
  else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5771
0
  {
5772
0
    utf32_stream[ safe_utf32_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5773
0
    unicode_character                         >>= 8;
5774
0
    utf32_stream[ safe_utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5775
0
    unicode_character                         >>= 8;
5776
0
    utf32_stream[ safe_utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5777
0
    unicode_character                        >>= 8;
5778
0
    utf32_stream[ safe_utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5779
0
  }
5780
0
  *utf32_stream_index = safe_utf32_stream_index + 4;
5781
5782
0
  return( 1 );
5783
0
}
5784