Coverage Report

Created: 2025-06-13 07:22

/src/libluksde/libuna/libuna_unicode_character.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Unicode character functions
3
 *
4
 * Copyright (C) 2008-2024, Joachim Metz <joachim.metz@gmail.com>
5
 *
6
 * Refer to AUTHORS for acknowledgements.
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public License
19
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
20
 */
21
22
#include <common.h>
23
#include <types.h>
24
25
#include "libuna_base64_stream.h"
26
#include "libuna_codepage_iso_8859_2.h"
27
#include "libuna_codepage_iso_8859_3.h"
28
#include "libuna_codepage_iso_8859_4.h"
29
#include "libuna_codepage_iso_8859_5.h"
30
#include "libuna_codepage_iso_8859_6.h"
31
#include "libuna_codepage_iso_8859_7.h"
32
#include "libuna_codepage_iso_8859_8.h"
33
#include "libuna_codepage_iso_8859_9.h"
34
#include "libuna_codepage_iso_8859_10.h"
35
#include "libuna_codepage_iso_8859_13.h"
36
#include "libuna_codepage_iso_8859_14.h"
37
#include "libuna_codepage_iso_8859_15.h"
38
#include "libuna_codepage_iso_8859_16.h"
39
#include "libuna_codepage_koi8_r.h"
40
#include "libuna_codepage_koi8_u.h"
41
#include "libuna_codepage_mac_arabic.h"
42
#include "libuna_codepage_mac_celtic.h"
43
#include "libuna_codepage_mac_centraleurroman.h"
44
#include "libuna_codepage_mac_croatian.h"
45
#include "libuna_codepage_mac_cyrillic.h"
46
#include "libuna_codepage_mac_dingbats.h"
47
#include "libuna_codepage_mac_farsi.h"
48
#include "libuna_codepage_mac_gaelic.h"
49
#include "libuna_codepage_mac_greek.h"
50
#include "libuna_codepage_mac_icelandic.h"
51
#include "libuna_codepage_mac_inuit.h"
52
#include "libuna_codepage_mac_roman.h"
53
#include "libuna_codepage_mac_romanian.h"
54
#include "libuna_codepage_mac_russian.h"
55
#include "libuna_codepage_mac_symbol.h"
56
#include "libuna_codepage_mac_thai.h"
57
#include "libuna_codepage_mac_turkish.h"
58
#include "libuna_codepage_mac_ukrainian.h"
59
#include "libuna_codepage_windows_874.h"
60
#include "libuna_codepage_windows_932.h"
61
#include "libuna_codepage_windows_936.h"
62
#include "libuna_codepage_windows_949.h"
63
#include "libuna_codepage_windows_950.h"
64
#include "libuna_codepage_windows_1250.h"
65
#include "libuna_codepage_windows_1251.h"
66
#include "libuna_codepage_windows_1252.h"
67
#include "libuna_codepage_windows_1253.h"
68
#include "libuna_codepage_windows_1254.h"
69
#include "libuna_codepage_windows_1255.h"
70
#include "libuna_codepage_windows_1256.h"
71
#include "libuna_codepage_windows_1257.h"
72
#include "libuna_codepage_windows_1258.h"
73
#include "libuna_definitions.h"
74
#include "libuna_libcerror.h"
75
#include "libuna_types.h"
76
#include "libuna_unicode_character.h"
77
#include "libuna_unused.h"
78
79
/* Valid directly encoded characters: A-Z, a-z, 0-9, '\', '(', ')', ',', '-', '.', '/', ':', '?'
80
 * Valid directly encoded whitespace: '\t', '\n', '\r', ' '
81
 * Valid optional directly encoded characters: '!', '"', '#', '$', '%', '&', '*', ';', '<', '=', '>', '@', '[', ']', '^', '_', '`', '{', '|', '}'
82
 */
83
uint8_t libuna_unicode_character_utf7_valid_directly_encoded_character[ 256 ] = {
84
  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
85
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86
  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
87
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
88
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
90
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
92
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
100
101
/* Valid UTF-7 base64 characters: A-Z, a-z, 0-9, '+' and '/'
102
 */
103
uint8_t libuna_unicode_character_utf7_valid_base64_character[ 256 ] = {
104
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
107
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
108
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
110
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
112
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
118
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
120
121
/* Determines the size of a byte stream character from an Unicode character
122
 * Adds the size to the byte stream character size value
123
 * Returns 1 if successful, 0 if the byte stream character is valid but not supported since it requires special handling or -1 on error
124
 */
125
int libuna_unicode_character_size_to_byte_stream(
126
     libuna_unicode_character_t unicode_character,
127
     int codepage,
128
     size_t *byte_stream_character_size,
129
     libcerror_error_t **error )
130
0
{
131
0
  static char *function                  = "libuna_unicode_character_size_to_byte_stream";
132
0
  size_t safe_byte_stream_character_size = 0;
133
0
  int result                             = 1;
134
135
0
  if( byte_stream_character_size == NULL )
136
0
  {
137
0
    libcerror_error_set(
138
0
     error,
139
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
140
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
141
0
     "%s: invalid byte stream character size.",
142
0
     function );
143
144
0
    return( -1 );
145
0
  }
146
0
  safe_byte_stream_character_size = *byte_stream_character_size;
147
148
0
  switch( codepage )
149
0
  {
150
0
    case LIBUNA_CODEPAGE_ASCII:
151
0
    case LIBUNA_CODEPAGE_ISO_8859_1:
152
0
    case LIBUNA_CODEPAGE_ISO_8859_2:
153
0
    case LIBUNA_CODEPAGE_ISO_8859_3:
154
0
    case LIBUNA_CODEPAGE_ISO_8859_4:
155
0
    case LIBUNA_CODEPAGE_ISO_8859_5:
156
0
    case LIBUNA_CODEPAGE_ISO_8859_6:
157
0
    case LIBUNA_CODEPAGE_ISO_8859_7:
158
0
    case LIBUNA_CODEPAGE_ISO_8859_8:
159
0
    case LIBUNA_CODEPAGE_ISO_8859_9:
160
0
    case LIBUNA_CODEPAGE_ISO_8859_10:
161
0
    case LIBUNA_CODEPAGE_ISO_8859_11:
162
0
    case LIBUNA_CODEPAGE_ISO_8859_13:
163
0
    case LIBUNA_CODEPAGE_ISO_8859_14:
164
0
    case LIBUNA_CODEPAGE_ISO_8859_15:
165
0
    case LIBUNA_CODEPAGE_ISO_8859_16:
166
0
    case LIBUNA_CODEPAGE_KOI8_R:
167
0
    case LIBUNA_CODEPAGE_KOI8_U:
168
0
    case LIBUNA_CODEPAGE_MAC_ARABIC:
169
0
    case LIBUNA_CODEPAGE_MAC_CELTIC:
170
0
    case LIBUNA_CODEPAGE_MAC_CENTRALEURROMAN:
171
0
    case LIBUNA_CODEPAGE_MAC_CROATIAN:
172
0
    case LIBUNA_CODEPAGE_MAC_CYRILLIC:
173
0
    case LIBUNA_CODEPAGE_MAC_DINGBATS:
174
0
    case LIBUNA_CODEPAGE_MAC_FARSI:
175
0
    case LIBUNA_CODEPAGE_MAC_GAELIC:
176
0
    case LIBUNA_CODEPAGE_MAC_GREEK:
177
0
    case LIBUNA_CODEPAGE_MAC_ICELANDIC:
178
0
    case LIBUNA_CODEPAGE_MAC_INUIT:
179
0
    case LIBUNA_CODEPAGE_MAC_ROMAN:
180
0
    case LIBUNA_CODEPAGE_MAC_ROMANIAN:
181
0
    case LIBUNA_CODEPAGE_MAC_RUSSIAN:
182
0
    case LIBUNA_CODEPAGE_MAC_TURKISH:
183
0
    case LIBUNA_CODEPAGE_MAC_UKRAINIAN:
184
0
    case LIBUNA_CODEPAGE_WINDOWS_874:
185
0
    case LIBUNA_CODEPAGE_WINDOWS_1250:
186
0
    case LIBUNA_CODEPAGE_WINDOWS_1251:
187
0
    case LIBUNA_CODEPAGE_WINDOWS_1252:
188
0
    case LIBUNA_CODEPAGE_WINDOWS_1253:
189
0
    case LIBUNA_CODEPAGE_WINDOWS_1254:
190
0
    case LIBUNA_CODEPAGE_WINDOWS_1255:
191
0
    case LIBUNA_CODEPAGE_WINDOWS_1256:
192
0
    case LIBUNA_CODEPAGE_WINDOWS_1257:
193
0
    case LIBUNA_CODEPAGE_WINDOWS_1258:
194
0
      safe_byte_stream_character_size += 1;
195
0
      break;
196
197
0
    case LIBUNA_CODEPAGE_MAC_SYMBOL:
198
0
      result = libuna_codepage_mac_symbol_unicode_character_size_to_byte_stream(
199
0
                unicode_character,
200
0
                &safe_byte_stream_character_size,
201
0
                error );
202
0
      break;
203
204
0
    case LIBUNA_CODEPAGE_MAC_THAI:
205
0
      result = libuna_codepage_mac_thai_unicode_character_size_to_byte_stream(
206
0
                unicode_character,
207
0
                &safe_byte_stream_character_size,
208
0
                error );
209
0
      break;
210
211
0
    case LIBUNA_CODEPAGE_WINDOWS_932:
212
0
      result = libuna_codepage_windows_932_unicode_character_size_to_byte_stream(
213
0
                unicode_character,
214
0
                &safe_byte_stream_character_size,
215
0
                error );
216
0
      break;
217
218
0
    case LIBUNA_CODEPAGE_WINDOWS_936:
219
0
      result = libuna_codepage_windows_936_unicode_character_size_to_byte_stream(
220
0
                unicode_character,
221
0
                &safe_byte_stream_character_size,
222
0
                error );
223
0
      break;
224
225
0
    case LIBUNA_CODEPAGE_WINDOWS_949:
226
0
      result = libuna_codepage_windows_949_unicode_character_size_to_byte_stream(
227
0
                unicode_character,
228
0
                &safe_byte_stream_character_size,
229
0
                error );
230
0
      break;
231
232
0
    case LIBUNA_CODEPAGE_WINDOWS_950:
233
0
      result = libuna_codepage_windows_950_unicode_character_size_to_byte_stream(
234
0
                unicode_character,
235
0
                &safe_byte_stream_character_size,
236
0
                error );
237
0
      break;
238
239
0
    default:
240
0
      libcerror_error_set(
241
0
       error,
242
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
243
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
244
0
       "%s: unsupported codepage: %d.",
245
0
       function,
246
0
       codepage );
247
248
0
      return( -1 );
249
0
  }
250
0
  if( result == -1 )
251
0
  {
252
0
    libcerror_error_set(
253
0
     error,
254
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
255
0
     LIBCERROR_RUNTIME_ERROR_GET_FAILED,
256
0
     "%s: unable to determine byte stream character size.",
257
0
     function );
258
259
0
    return( -1 );
260
0
  }
261
0
  *byte_stream_character_size = safe_byte_stream_character_size;
262
263
0
  return( result );
264
0
}
265
266
/* Copies an Unicode character from a byte stream
267
 * Returns 1 if successful, 0 if the byte stream character is valid but not supported since it requires special handling or -1 on error
268
 */
269
int libuna_unicode_character_copy_from_byte_stream(
270
     libuna_unicode_character_t *unicode_character,
271
     const uint8_t *byte_stream,
272
     size_t byte_stream_size,
273
     size_t *byte_stream_index,
274
     int codepage,
275
     libcerror_error_t **error )
276
208k
{
277
208k
  static char *function                             = "libuna_unicode_character_copy_from_byte_stream";
278
208k
  libuna_unicode_character_t safe_unicode_character = 0;
279
208k
  size_t safe_byte_stream_index                     = 0;
280
208k
  uint8_t byte_stream_character                     = 0;
281
208k
  int result                                        = 1;
282
283
208k
  if( unicode_character == NULL )
284
0
  {
285
0
    libcerror_error_set(
286
0
     error,
287
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
288
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
289
0
     "%s: invalid Unicode character.",
290
0
     function );
291
292
0
    return( -1 );
293
0
  }
294
208k
  if( byte_stream == NULL )
295
0
  {
296
0
    libcerror_error_set(
297
0
     error,
298
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
299
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
300
0
     "%s: invalid byte stream.",
301
0
     function );
302
303
0
    return( -1 );
304
0
  }
305
208k
  if( byte_stream_size > (size_t) SSIZE_MAX )
306
0
  {
307
0
    libcerror_error_set(
308
0
     error,
309
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
310
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
311
0
     "%s: invalid byte stream size value exceeds maximum.",
312
0
     function );
313
314
0
    return( -1 );
315
0
  }
316
208k
  if( byte_stream_index == NULL )
317
0
  {
318
0
    libcerror_error_set(
319
0
     error,
320
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
321
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
322
0
     "%s: invalid byte stream index.",
323
0
     function );
324
325
0
    return( -1 );
326
0
  }
327
208k
  safe_byte_stream_index = *byte_stream_index;
328
329
208k
  if( safe_byte_stream_index >= byte_stream_size )
330
0
  {
331
0
    libcerror_error_set(
332
0
     error,
333
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
334
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
335
0
     "%s: byte stream too small.",
336
0
     function );
337
338
0
    return( -1 );
339
0
  }
340
208k
  byte_stream_character = byte_stream[ safe_byte_stream_index ];
341
342
208k
  switch( codepage )
343
208k
  {
344
116k
    case LIBUNA_CODEPAGE_ASCII:
345
116k
      if( byte_stream_character < 0x80 )
346
114k
      {
347
114k
        safe_unicode_character = byte_stream_character;
348
114k
      }
349
2.54k
      else
350
2.54k
      {
351
2.54k
        safe_unicode_character = 0xfffd;
352
2.54k
      }
353
116k
      safe_byte_stream_index += 1;
354
355
116k
      break;
356
357
0
    case LIBUNA_CODEPAGE_ISO_8859_1:
358
0
      safe_unicode_character = byte_stream_character;
359
360
0
      safe_byte_stream_index += 1;
361
362
0
      break;
363
364
0
    case LIBUNA_CODEPAGE_ISO_8859_2:
365
0
      if( byte_stream_character < 0xa0 )
366
0
      {
367
0
        safe_unicode_character = byte_stream_character;
368
0
      }
369
0
      else
370
0
      {
371
0
        byte_stream_character -= 0xa0;
372
373
0
        safe_unicode_character = libuna_codepage_iso_8859_2_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
374
0
      }
375
0
      safe_byte_stream_index += 1;
376
377
0
      break;
378
379
0
    case LIBUNA_CODEPAGE_ISO_8859_3:
380
0
      if( byte_stream_character < 0xa0 )
381
0
      {
382
0
        safe_unicode_character = byte_stream_character;
383
0
      }
384
0
      else
385
0
      {
386
0
        byte_stream_character -= 0xa0;
387
388
0
        safe_unicode_character = libuna_codepage_iso_8859_3_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
389
0
      }
390
0
      safe_byte_stream_index += 1;
391
392
0
      break;
393
394
0
    case LIBUNA_CODEPAGE_ISO_8859_4:
395
0
      if( byte_stream_character < 0xa0 )
396
0
      {
397
0
        safe_unicode_character = byte_stream_character;
398
0
      }
399
0
      else
400
0
      {
401
0
        byte_stream_character -= 0xa0;
402
403
0
        safe_unicode_character = libuna_codepage_iso_8859_4_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
404
0
      }
405
0
      safe_byte_stream_index += 1;
406
407
0
      break;
408
409
0
    case LIBUNA_CODEPAGE_ISO_8859_5:
410
0
      if( byte_stream_character < 0xa0 )
411
0
      {
412
0
        safe_unicode_character = byte_stream_character;
413
0
      }
414
0
      else
415
0
      {
416
0
        byte_stream_character -= 0xa0;
417
418
0
        safe_unicode_character = libuna_codepage_iso_8859_5_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
419
0
      }
420
0
      safe_byte_stream_index += 1;
421
422
0
      break;
423
424
0
    case LIBUNA_CODEPAGE_ISO_8859_6:
425
0
      if( byte_stream_character < 0xa0 )
426
0
      {
427
0
        safe_unicode_character = byte_stream_character;
428
0
      }
429
0
      else
430
0
      {
431
0
        byte_stream_character -= 0xa0;
432
433
0
        safe_unicode_character = libuna_codepage_iso_8859_6_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
434
0
      }
435
0
      safe_byte_stream_index += 1;
436
437
0
      break;
438
439
0
    case LIBUNA_CODEPAGE_ISO_8859_7:
440
0
      if( byte_stream_character < 0xa0 )
441
0
      {
442
0
        safe_unicode_character = byte_stream_character;
443
0
      }
444
0
      else
445
0
      {
446
0
        byte_stream_character -= 0xa0;
447
448
0
        safe_unicode_character = libuna_codepage_iso_8859_7_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
449
0
      }
450
0
      safe_byte_stream_index += 1;
451
452
0
      break;
453
454
0
    case LIBUNA_CODEPAGE_ISO_8859_8:
455
0
      if( byte_stream_character < 0xa0 )
456
0
      {
457
0
        safe_unicode_character = byte_stream_character;
458
0
      }
459
0
      else
460
0
      {
461
0
        byte_stream_character -= 0xa0;
462
463
0
        safe_unicode_character = libuna_codepage_iso_8859_8_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
464
0
      }
465
0
      safe_byte_stream_index += 1;
466
467
0
      break;
468
469
0
    case LIBUNA_CODEPAGE_ISO_8859_9:
470
0
      if( byte_stream_character < 0xd0 )
471
0
      {
472
0
        safe_unicode_character = byte_stream_character;
473
0
      }
474
0
      else
475
0
      {
476
0
        byte_stream_character -= 0xd0;
477
478
0
        safe_unicode_character = libuna_codepage_iso_8859_9_byte_stream_to_unicode_base_0xd0[ byte_stream_character ];
479
0
      }
480
0
      safe_byte_stream_index += 1;
481
482
0
      break;
483
484
0
    case LIBUNA_CODEPAGE_ISO_8859_10:
485
0
      if( byte_stream_character < 0xa0 )
486
0
      {
487
0
        safe_unicode_character = byte_stream_character;
488
0
      }
489
0
      else
490
0
      {
491
0
        byte_stream_character -= 0xa0;
492
493
0
        safe_unicode_character = libuna_codepage_iso_8859_10_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
494
0
      }
495
0
      safe_byte_stream_index += 1;
496
497
0
      break;
498
499
0
    case LIBUNA_CODEPAGE_ISO_8859_11:
500
0
      if( byte_stream_character < 0xa1 )
501
0
      {
502
0
        safe_unicode_character = byte_stream_character;
503
0
      }
504
0
      else if( byte_stream_character < 0xdb )
505
0
      {
506
0
        safe_unicode_character = byte_stream_character + 0x0d60;
507
0
      }
508
0
      else if( byte_stream_character < 0xdf )
509
0
      {
510
0
        safe_unicode_character = 0xfffd;
511
0
      }
512
0
      else if( byte_stream_character < 0xfc )
513
0
      {
514
0
        safe_unicode_character = byte_stream_character + 0x0d60;
515
0
      }
516
0
      else
517
0
      {
518
0
        safe_unicode_character = 0xfffd;
519
0
      }
520
0
      safe_byte_stream_index += 1;
521
522
0
      break;
523
524
0
    case LIBUNA_CODEPAGE_ISO_8859_13:
525
0
      if( byte_stream_character < 0xa0 )
526
0
      {
527
0
        safe_unicode_character = byte_stream_character;
528
0
      }
529
0
      else
530
0
      {
531
0
        byte_stream_character -= 0xa0;
532
533
0
        safe_unicode_character = libuna_codepage_iso_8859_13_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
534
0
      }
535
0
      safe_byte_stream_index += 1;
536
537
0
      break;
538
539
0
    case LIBUNA_CODEPAGE_ISO_8859_14:
540
0
      if( byte_stream_character < 0xa0 )
541
0
      {
542
0
        safe_unicode_character = byte_stream_character;
543
0
      }
544
0
      else
545
0
      {
546
0
        byte_stream_character -= 0xa0;
547
548
0
        safe_unicode_character = libuna_codepage_iso_8859_14_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
549
0
      }
550
0
      safe_byte_stream_index += 1;
551
552
0
      break;
553
554
0
    case LIBUNA_CODEPAGE_ISO_8859_15:
555
0
      if( ( byte_stream_character >= 0xa0 )
556
0
       && ( byte_stream_character < 0xc0 ) )
557
0
      {
558
0
        byte_stream_character -= 0xa0;
559
560
0
        safe_unicode_character = libuna_codepage_iso_8859_15_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
561
0
      }
562
0
      else
563
0
      {
564
0
        safe_unicode_character = byte_stream_character;
565
0
      }
566
0
      safe_byte_stream_index += 1;
567
568
0
      break;
569
570
0
    case LIBUNA_CODEPAGE_ISO_8859_16:
571
0
      if( byte_stream_character < 0xa0 )
572
0
      {
573
0
        safe_unicode_character = byte_stream_character;
574
0
      }
575
0
      else
576
0
      {
577
0
        byte_stream_character -= 0xa0;
578
579
0
        safe_unicode_character = libuna_codepage_iso_8859_16_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
580
0
      }
581
0
      safe_byte_stream_index += 1;
582
583
0
      break;
584
585
0
    case LIBUNA_CODEPAGE_KOI8_R:
586
0
      result = libuna_codepage_koi8_r_copy_from_byte_stream(
587
0
                &safe_unicode_character,
588
0
                byte_stream,
589
0
                byte_stream_size,
590
0
                &safe_byte_stream_index,
591
0
                error );
592
0
      break;
593
594
0
    case LIBUNA_CODEPAGE_KOI8_U:
595
0
      result = libuna_codepage_koi8_u_copy_from_byte_stream(
596
0
                &safe_unicode_character,
597
0
                byte_stream,
598
0
                byte_stream_size,
599
0
                &safe_byte_stream_index,
600
0
                error );
601
0
      break;
602
603
0
    case LIBUNA_CODEPAGE_MAC_ARABIC:
604
0
      result = libuna_codepage_mac_arabic_copy_from_byte_stream(
605
0
                &safe_unicode_character,
606
0
                byte_stream,
607
0
                byte_stream_size,
608
0
                &safe_byte_stream_index,
609
0
                error );
610
0
      break;
611
612
0
    case LIBUNA_CODEPAGE_MAC_CELTIC:
613
0
      result = libuna_codepage_mac_celtic_copy_from_byte_stream(
614
0
                &safe_unicode_character,
615
0
                byte_stream,
616
0
                byte_stream_size,
617
0
                &safe_byte_stream_index,
618
0
                error );
619
0
      break;
620
621
0
    case LIBUNA_CODEPAGE_MAC_CENTRALEURROMAN:
622
0
      result = libuna_codepage_mac_centraleurroman_copy_from_byte_stream(
623
0
                &safe_unicode_character,
624
0
                byte_stream,
625
0
                byte_stream_size,
626
0
                &safe_byte_stream_index,
627
0
                error );
628
0
      break;
629
630
0
    case LIBUNA_CODEPAGE_MAC_CROATIAN:
631
0
      result = libuna_codepage_mac_croatian_copy_from_byte_stream(
632
0
                &safe_unicode_character,
633
0
                byte_stream,
634
0
                byte_stream_size,
635
0
                &safe_byte_stream_index,
636
0
                error );
637
0
      break;
638
639
0
    case LIBUNA_CODEPAGE_MAC_CYRILLIC:
640
0
      result = libuna_codepage_mac_cyrillic_copy_from_byte_stream(
641
0
                &safe_unicode_character,
642
0
                byte_stream,
643
0
                byte_stream_size,
644
0
                &safe_byte_stream_index,
645
0
                error );
646
0
      break;
647
648
0
    case LIBUNA_CODEPAGE_MAC_DINGBATS:
649
0
      result = libuna_codepage_mac_dingbats_copy_from_byte_stream(
650
0
                &safe_unicode_character,
651
0
                byte_stream,
652
0
                byte_stream_size,
653
0
                &safe_byte_stream_index,
654
0
                error );
655
0
      break;
656
657
0
    case LIBUNA_CODEPAGE_MAC_FARSI:
658
0
      result = libuna_codepage_mac_farsi_copy_from_byte_stream(
659
0
                &safe_unicode_character,
660
0
                byte_stream,
661
0
                byte_stream_size,
662
0
                &safe_byte_stream_index,
663
0
                error );
664
0
      break;
665
666
0
    case LIBUNA_CODEPAGE_MAC_GAELIC:
667
0
      result = libuna_codepage_mac_gaelic_copy_from_byte_stream(
668
0
                &safe_unicode_character,
669
0
                byte_stream,
670
0
                byte_stream_size,
671
0
                &safe_byte_stream_index,
672
0
                error );
673
0
      break;
674
675
0
    case LIBUNA_CODEPAGE_MAC_GREEK:
676
0
      result = libuna_codepage_mac_greek_copy_from_byte_stream(
677
0
                &safe_unicode_character,
678
0
                byte_stream,
679
0
                byte_stream_size,
680
0
                &safe_byte_stream_index,
681
0
                error );
682
0
      break;
683
684
0
    case LIBUNA_CODEPAGE_MAC_ICELANDIC:
685
0
      result = libuna_codepage_mac_icelandic_copy_from_byte_stream(
686
0
                &safe_unicode_character,
687
0
                byte_stream,
688
0
                byte_stream_size,
689
0
                &safe_byte_stream_index,
690
0
                error );
691
0
      break;
692
693
0
    case LIBUNA_CODEPAGE_MAC_INUIT:
694
0
      result = libuna_codepage_mac_inuit_copy_from_byte_stream(
695
0
                &safe_unicode_character,
696
0
                byte_stream,
697
0
                byte_stream_size,
698
0
                &safe_byte_stream_index,
699
0
                error );
700
0
      break;
701
702
0
    case LIBUNA_CODEPAGE_MAC_ROMAN:
703
0
      result = libuna_codepage_mac_roman_copy_from_byte_stream(
704
0
                &safe_unicode_character,
705
0
                byte_stream,
706
0
                byte_stream_size,
707
0
                &safe_byte_stream_index,
708
0
                error );
709
0
      break;
710
711
0
    case LIBUNA_CODEPAGE_MAC_ROMANIAN:
712
0
      result = libuna_codepage_mac_romanian_copy_from_byte_stream(
713
0
                &safe_unicode_character,
714
0
                byte_stream,
715
0
                byte_stream_size,
716
0
                &safe_byte_stream_index,
717
0
                error );
718
0
      break;
719
720
0
    case LIBUNA_CODEPAGE_MAC_RUSSIAN:
721
0
      result = libuna_codepage_mac_russian_copy_from_byte_stream(
722
0
                &safe_unicode_character,
723
0
                byte_stream,
724
0
                byte_stream_size,
725
0
                &safe_byte_stream_index,
726
0
                error );
727
0
      break;
728
729
0
    case LIBUNA_CODEPAGE_MAC_SYMBOL:
730
0
      result = libuna_codepage_mac_symbol_copy_from_byte_stream(
731
0
                &safe_unicode_character,
732
0
                byte_stream,
733
0
                byte_stream_size,
734
0
                &safe_byte_stream_index,
735
0
                error );
736
0
      break;
737
738
0
    case LIBUNA_CODEPAGE_MAC_THAI:
739
0
      result = libuna_codepage_mac_thai_copy_from_byte_stream(
740
0
                &safe_unicode_character,
741
0
                byte_stream,
742
0
                byte_stream_size,
743
0
                &safe_byte_stream_index,
744
0
                error );
745
0
      break;
746
747
0
    case LIBUNA_CODEPAGE_MAC_TURKISH:
748
0
      result = libuna_codepage_mac_turkish_copy_from_byte_stream(
749
0
                &safe_unicode_character,
750
0
                byte_stream,
751
0
                byte_stream_size,
752
0
                &safe_byte_stream_index,
753
0
                error );
754
0
      break;
755
756
0
    case LIBUNA_CODEPAGE_MAC_UKRAINIAN:
757
0
      result = libuna_codepage_mac_ukrainian_copy_from_byte_stream(
758
0
                &safe_unicode_character,
759
0
                byte_stream,
760
0
                byte_stream_size,
761
0
                &safe_byte_stream_index,
762
0
                error );
763
0
      break;
764
765
0
    case LIBUNA_CODEPAGE_WINDOWS_874:
766
0
      result = libuna_codepage_windows_874_copy_from_byte_stream(
767
0
                &safe_unicode_character,
768
0
                byte_stream,
769
0
                byte_stream_size,
770
0
                &safe_byte_stream_index,
771
0
                error );
772
0
      break;
773
774
0
    case LIBUNA_CODEPAGE_WINDOWS_932:
775
0
      result = libuna_codepage_windows_932_copy_from_byte_stream(
776
0
                &safe_unicode_character,
777
0
                byte_stream,
778
0
                byte_stream_size,
779
0
                &safe_byte_stream_index,
780
0
                error );
781
0
      break;
782
783
0
    case LIBUNA_CODEPAGE_WINDOWS_936:
784
0
      result = libuna_codepage_windows_936_copy_from_byte_stream(
785
0
                &safe_unicode_character,
786
0
                byte_stream,
787
0
                byte_stream_size,
788
0
                &safe_byte_stream_index,
789
0
                error );
790
0
      break;
791
792
0
    case LIBUNA_CODEPAGE_WINDOWS_949:
793
0
      result = libuna_codepage_windows_949_copy_from_byte_stream(
794
0
                &safe_unicode_character,
795
0
                byte_stream,
796
0
                byte_stream_size,
797
0
                &safe_byte_stream_index,
798
0
                error );
799
0
      break;
800
801
0
    case LIBUNA_CODEPAGE_WINDOWS_950:
802
0
      result = libuna_codepage_windows_950_copy_from_byte_stream(
803
0
                &safe_unicode_character,
804
0
                byte_stream,
805
0
                byte_stream_size,
806
0
                &safe_byte_stream_index,
807
0
                error );
808
0
      break;
809
810
0
    case LIBUNA_CODEPAGE_WINDOWS_1250:
811
0
      result = libuna_codepage_windows_1250_copy_from_byte_stream(
812
0
                &safe_unicode_character,
813
0
                byte_stream,
814
0
                byte_stream_size,
815
0
                &safe_byte_stream_index,
816
0
                error );
817
0
      break;
818
819
0
    case LIBUNA_CODEPAGE_WINDOWS_1251:
820
0
      result = libuna_codepage_windows_1251_copy_from_byte_stream(
821
0
                &safe_unicode_character,
822
0
                byte_stream,
823
0
                byte_stream_size,
824
0
                &safe_byte_stream_index,
825
0
                error );
826
0
      break;
827
828
91.5k
    case LIBUNA_CODEPAGE_WINDOWS_1252:
829
91.5k
      result = libuna_codepage_windows_1252_copy_from_byte_stream(
830
91.5k
                &safe_unicode_character,
831
91.5k
                byte_stream,
832
91.5k
                byte_stream_size,
833
91.5k
                &safe_byte_stream_index,
834
91.5k
                error );
835
91.5k
      break;
836
837
0
    case LIBUNA_CODEPAGE_WINDOWS_1253:
838
0
      result = libuna_codepage_windows_1253_copy_from_byte_stream(
839
0
                &safe_unicode_character,
840
0
                byte_stream,
841
0
                byte_stream_size,
842
0
                &safe_byte_stream_index,
843
0
                error );
844
0
      break;
845
846
0
    case LIBUNA_CODEPAGE_WINDOWS_1254:
847
0
      result = libuna_codepage_windows_1254_copy_from_byte_stream(
848
0
                &safe_unicode_character,
849
0
                byte_stream,
850
0
                byte_stream_size,
851
0
                &safe_byte_stream_index,
852
0
                error );
853
0
      break;
854
855
0
    case LIBUNA_CODEPAGE_WINDOWS_1255:
856
0
      result = libuna_codepage_windows_1255_copy_from_byte_stream(
857
0
                &safe_unicode_character,
858
0
                byte_stream,
859
0
                byte_stream_size,
860
0
                &safe_byte_stream_index,
861
0
                error );
862
0
      break;
863
864
0
    case LIBUNA_CODEPAGE_WINDOWS_1256:
865
0
      result = libuna_codepage_windows_1256_copy_from_byte_stream(
866
0
                &safe_unicode_character,
867
0
                byte_stream,
868
0
                byte_stream_size,
869
0
                &safe_byte_stream_index,
870
0
                error );
871
0
      break;
872
873
0
    case LIBUNA_CODEPAGE_WINDOWS_1257:
874
0
      result = libuna_codepage_windows_1257_copy_from_byte_stream(
875
0
                &safe_unicode_character,
876
0
                byte_stream,
877
0
                byte_stream_size,
878
0
                &safe_byte_stream_index,
879
0
                error );
880
0
      break;
881
882
0
    case LIBUNA_CODEPAGE_WINDOWS_1258:
883
0
      result = libuna_codepage_windows_1258_copy_from_byte_stream(
884
0
                &safe_unicode_character,
885
0
                byte_stream,
886
0
                byte_stream_size,
887
0
                &safe_byte_stream_index,
888
0
                error );
889
0
      break;
890
891
0
    default:
892
0
      libcerror_error_set(
893
0
       error,
894
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
895
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
896
0
      "%s: unsupported codepage: %d.",
897
0
       function,
898
0
       codepage );
899
900
0
      return( -1 );
901
208k
  }
902
208k
  if( result == -1 )
903
0
  {
904
0
    libcerror_error_set(
905
0
     error,
906
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
907
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
908
0
     "%s: unable to copy Unicode character from byte stream.",
909
0
     function );
910
911
0
    return( -1 );
912
0
  }
913
208k
  *unicode_character = safe_unicode_character;
914
208k
  *byte_stream_index = safe_byte_stream_index;
915
916
208k
  return( result );
917
208k
}
918
919
/* Copies an Unicode character to a byte stream
920
 * Returns 1 if successful, 0 if the Unicode character is valid but not supported since it requires special handling or -1 on error
921
 */
922
int libuna_unicode_character_copy_to_byte_stream(
923
     libuna_unicode_character_t unicode_character,
924
     uint8_t *byte_stream,
925
     size_t byte_stream_size,
926
     size_t *byte_stream_index,
927
     int codepage,
928
     libcerror_error_t **error )
929
0
{
930
0
  static char *function         = "libuna_unicode_character_copy_to_byte_stream";
931
0
  size_t safe_byte_stream_index = 0;
932
0
  int result                    = 1;
933
934
0
  if( byte_stream == NULL )
935
0
  {
936
0
    libcerror_error_set(
937
0
     error,
938
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
939
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
940
0
     "%s: invalid byte stream.",
941
0
     function );
942
943
0
    return( -1 );
944
0
  }
945
0
  if( byte_stream_size > (size_t) SSIZE_MAX )
946
0
  {
947
0
    libcerror_error_set(
948
0
     error,
949
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
950
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
951
0
     "%s: invalid byte stream size value exceeds maximum.",
952
0
     function );
953
954
0
    return( -1 );
955
0
  }
956
0
  if( byte_stream_index == NULL )
957
0
  {
958
0
    libcerror_error_set(
959
0
     error,
960
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
961
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
962
0
     "%s: invalid byte stream index.",
963
0
     function );
964
965
0
    return( -1 );
966
0
  }
967
0
  safe_byte_stream_index = *byte_stream_index;
968
969
0
  if( safe_byte_stream_index >= byte_stream_size )
970
0
  {
971
0
    libcerror_error_set(
972
0
     error,
973
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
974
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
975
0
     "%s: byte stream too small.",
976
0
     function );
977
978
0
    return( -1 );
979
0
  }
980
0
  switch( codepage )
981
0
  {
982
0
    case LIBUNA_CODEPAGE_ASCII:
983
0
      if( unicode_character < 0x0080 )
984
0
      {
985
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
986
0
      }
987
0
      else
988
0
      {
989
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
990
0
      }
991
0
      safe_byte_stream_index += 1;
992
993
0
      break;
994
995
0
    case LIBUNA_CODEPAGE_ISO_8859_1:
996
0
      if( unicode_character < 0x0100 )
997
0
      {
998
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
999
0
      }
1000
0
      else
1001
0
      {
1002
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1003
0
      }
1004
0
      safe_byte_stream_index += 1;
1005
1006
0
      break;
1007
1008
0
    case LIBUNA_CODEPAGE_ISO_8859_2:
1009
0
      if( unicode_character < 0x00a0 )
1010
0
      {
1011
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1012
0
      }
1013
0
      else if( ( unicode_character >= 0x00a0 )
1014
0
            && ( unicode_character < 0x0120 ) )
1015
0
      {
1016
0
        unicode_character -= 0x00a0;
1017
1018
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1019
0
      }
1020
0
      else if( ( unicode_character >= 0x0138 )
1021
0
            && ( unicode_character < 0x0180 ) )
1022
0
      {
1023
0
        unicode_character -= 0x0138;
1024
1025
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x0138[ unicode_character ];
1026
0
      }
1027
0
      else if( ( unicode_character >= 0x02d8 )
1028
0
            && ( unicode_character < 0x02e0 ) )
1029
0
      {
1030
0
        unicode_character -= 0x02d8;
1031
1032
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x02d8[ unicode_character ];
1033
0
      }
1034
0
      else if( unicode_character == 0x02c7 )
1035
0
      {
1036
0
        byte_stream[ safe_byte_stream_index ] = 0xb7;
1037
0
      }
1038
0
      else
1039
0
      {
1040
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1041
0
      }
1042
0
      safe_byte_stream_index += 1;
1043
1044
0
      break;
1045
1046
0
    case LIBUNA_CODEPAGE_ISO_8859_3:
1047
0
      if( unicode_character < 0x00a0 )
1048
0
      {
1049
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1050
0
      }
1051
0
      else if( ( unicode_character >= 0x00a0 )
1052
0
            && ( unicode_character < 0x0100 ) )
1053
0
      {
1054
0
        unicode_character -= 0x00a0;
1055
1056
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1057
0
      }
1058
0
      else if( ( unicode_character >= 0x0108 )
1059
0
            && ( unicode_character < 0x0110 ) )
1060
0
      {
1061
0
        unicode_character -= 0x0108;
1062
1063
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0108[ unicode_character ];
1064
0
      }
1065
0
      else if( ( unicode_character >= 0x0118 )
1066
0
            && ( unicode_character < 0x0128 ) )
1067
0
      {
1068
0
        unicode_character -= 0x0118;
1069
1070
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0118[ unicode_character ];
1071
0
      }
1072
0
      else if( ( unicode_character >= 0x0130 )
1073
0
            && ( unicode_character < 0x0138 ) )
1074
0
      {
1075
0
        unicode_character -= 0x0130;
1076
1077
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0130[ unicode_character ];
1078
0
      }
1079
0
      else if( ( unicode_character >= 0x0158 )
1080
0
            && ( unicode_character < 0x0160 ) )
1081
0
      {
1082
0
        unicode_character -= 0x0158;
1083
1084
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0158[ unicode_character ];
1085
0
      }
1086
0
      else switch( unicode_character )
1087
0
      {
1088
0
        case 0x016c:
1089
0
          byte_stream[ safe_byte_stream_index ] = 0xdd;
1090
0
          break;
1091
1092
0
        case 0x016d:
1093
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1094
0
          break;
1095
1096
0
        case 0x017b:
1097
0
          byte_stream[ safe_byte_stream_index ] = 0xaf;
1098
0
          break;
1099
1100
0
        case 0x017c:
1101
0
          byte_stream[ safe_byte_stream_index ] = 0xbf;
1102
0
          break;
1103
1104
0
        case 0x02d8:
1105
0
          byte_stream[ safe_byte_stream_index ] = 0xa2;
1106
0
          break;
1107
1108
0
        case 0x02d9:
1109
0
          byte_stream[ safe_byte_stream_index ] = 0xff;
1110
0
          break;
1111
1112
0
        default:
1113
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1114
0
          break;
1115
0
      }
1116
0
      safe_byte_stream_index += 1;
1117
1118
0
      break;
1119
1120
0
    case LIBUNA_CODEPAGE_ISO_8859_4:
1121
0
      if( unicode_character < 0x00a0 )
1122
0
      {
1123
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1124
0
      }
1125
0
      else if( ( unicode_character >= 0x00a0 )
1126
0
            && ( unicode_character < 0x0158 ) )
1127
0
      {
1128
0
        unicode_character -= 0x00a0;
1129
1130
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1131
0
      }
1132
0
      else if( ( unicode_character >= 0x0160 )
1133
0
            && ( unicode_character < 0x0180 ) )
1134
0
      {
1135
0
        unicode_character -= 0x0160;
1136
1137
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1138
0
      }
1139
0
      else switch( unicode_character )
1140
0
      {
1141
0
        case 0x02c7:
1142
0
          byte_stream[ safe_byte_stream_index ] = 0xb7;
1143
0
          break;
1144
1145
0
        case 0x02d9:
1146
0
          byte_stream[ safe_byte_stream_index ] = 0xff;
1147
0
          break;
1148
1149
0
        case 0x02db:
1150
0
          byte_stream[ safe_byte_stream_index ] = 0xb2;
1151
0
          break;
1152
1153
0
        default:
1154
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1155
0
          break;
1156
0
      }
1157
0
      safe_byte_stream_index += 1;
1158
1159
0
      break;
1160
1161
0
    case LIBUNA_CODEPAGE_ISO_8859_5:
1162
0
      if( unicode_character < 0x00a1 )
1163
0
      {
1164
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1165
0
      }
1166
0
      else if( ( unicode_character >= 0x0400 )
1167
0
            && ( unicode_character < 0x0460 ) )
1168
0
      {
1169
0
        unicode_character -= 0x0400;
1170
1171
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_5_unicode_to_byte_stream_base_0x0400[ unicode_character ];
1172
0
      }
1173
0
      else switch( unicode_character )
1174
0
      {
1175
0
        case 0x00a7:
1176
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1177
0
          break;
1178
1179
0
        case 0x00ad:
1180
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1181
0
          break;
1182
1183
0
        case 0x2116:
1184
0
          byte_stream[ safe_byte_stream_index ] = 0xf0;
1185
0
          break;
1186
1187
0
        default:
1188
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1189
0
          break;
1190
0
      }
1191
0
      safe_byte_stream_index += 1;
1192
1193
0
      break;
1194
1195
0
    case LIBUNA_CODEPAGE_ISO_8859_6:
1196
0
      if( unicode_character < 0x00a1 )
1197
0
      {
1198
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1199
0
      }
1200
0
      else if( ( unicode_character >= 0x0618 )
1201
0
            && ( unicode_character < 0x658 ) )
1202
0
      {
1203
0
        unicode_character -= 0x0618;
1204
1205
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_6_unicode_to_byte_stream_base_0x0618[ unicode_character ];
1206
0
      }
1207
0
      else switch( unicode_character )
1208
0
      {
1209
0
        case 0x00a4:
1210
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1211
0
          break;
1212
1213
0
        case 0x00ad:
1214
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1215
0
          break;
1216
1217
0
        case 0x060c:
1218
0
          byte_stream[ safe_byte_stream_index ] = 0xac;
1219
0
          break;
1220
1221
0
        default:
1222
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1223
0
          break;
1224
0
      }
1225
0
      safe_byte_stream_index += 1;
1226
1227
0
      break;
1228
1229
0
    case LIBUNA_CODEPAGE_ISO_8859_7:
1230
0
      if( unicode_character < 0x00a0 )
1231
0
      {
1232
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1233
0
      }
1234
0
      else if( ( unicode_character >= 0x00a0 )
1235
0
            && ( unicode_character < 0x00b8 ) )
1236
0
      {
1237
0
        unicode_character -= 0x00a0;
1238
1239
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1240
0
      }
1241
0
      else if( ( unicode_character >= 0x0380 )
1242
0
            && ( unicode_character < 0x03d0 ) )
1243
0
      {
1244
0
        unicode_character -= 0x0380;
1245
1246
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x0380[ unicode_character ];
1247
0
      }
1248
0
      else switch( unicode_character )
1249
0
      {
1250
0
        case 0x00bb:
1251
0
          byte_stream[ safe_byte_stream_index ] = 0xbb;
1252
0
          break;
1253
1254
0
        case 0x00bd:
1255
0
          byte_stream[ safe_byte_stream_index ] = 0xbd;
1256
0
          break;
1257
1258
0
        case 0x037a:
1259
0
          byte_stream[ safe_byte_stream_index ] = 0xaa;
1260
0
          break;
1261
1262
0
        case 0x2015:
1263
0
          byte_stream[ safe_byte_stream_index ] = 0xaf;
1264
0
          break;
1265
1266
0
        case 0x2018:
1267
0
          byte_stream[ safe_byte_stream_index ] = 0xa1;
1268
0
          break;
1269
1270
0
        case 0x2019:
1271
0
          byte_stream[ safe_byte_stream_index ] = 0xa2;
1272
0
          break;
1273
1274
0
        case 0x20ac:
1275
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1276
0
          break;
1277
1278
0
        case 0x20af:
1279
0
          byte_stream[ safe_byte_stream_index ] = 0xa5;
1280
0
          break;
1281
1282
0
        default:
1283
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1284
0
          break;
1285
0
      }
1286
0
      safe_byte_stream_index += 1;
1287
1288
0
      break;
1289
1290
0
    case LIBUNA_CODEPAGE_ISO_8859_8:
1291
0
      if( unicode_character < 0x00a0 )
1292
0
      {
1293
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1294
0
      }
1295
0
      else if( ( unicode_character >= 0x00a0 )
1296
0
            && ( unicode_character < 0x00c0 ) )
1297
0
      {
1298
0
        unicode_character -= 0x00a0;
1299
1300
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1301
0
      }
1302
0
      else if( ( unicode_character >= 0x05d0 )
1303
0
            && ( unicode_character < 0x05f0 ) )
1304
0
      {
1305
0
        unicode_character -= 0x05d0;
1306
1307
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x05d0[ unicode_character ];
1308
0
      }
1309
0
      else switch( unicode_character )
1310
0
      {
1311
0
        case 0x00d7:
1312
0
          byte_stream[ safe_byte_stream_index ] = 0xaa;
1313
0
          break;
1314
1315
0
        case 0x00f7:
1316
0
          byte_stream[ safe_byte_stream_index ] = 0xba;
1317
0
          break;
1318
1319
0
        case 0x200e:
1320
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1321
0
          break;
1322
1323
0
        case 0x200f:
1324
0
          byte_stream[ safe_byte_stream_index ] = 0xfe;
1325
0
          break;
1326
1327
0
        case 0x2017:
1328
0
          byte_stream[ safe_byte_stream_index ] = 0xdf;
1329
0
          break;
1330
1331
0
        default:
1332
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1333
0
          break;
1334
0
      }
1335
0
      safe_byte_stream_index += 1;
1336
1337
0
      break;
1338
1339
0
    case LIBUNA_CODEPAGE_ISO_8859_9:
1340
0
      if( unicode_character < 0x00d0 )
1341
0
      {
1342
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1343
0
      }
1344
0
      else if( ( unicode_character >= 0x00d0 )
1345
0
            && ( unicode_character < 0x0100 ) )
1346
0
      {
1347
0
        unicode_character -= 0x00d0;
1348
1349
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_9_unicode_to_byte_stream_base_0x00d0[ unicode_character ];
1350
0
      }
1351
0
      else switch( unicode_character )
1352
0
      {
1353
0
        case 0x011e:
1354
0
          byte_stream[ safe_byte_stream_index ] = 0xd0;
1355
0
          break;
1356
1357
0
        case 0x011f:
1358
0
          byte_stream[ safe_byte_stream_index ] = 0xf0;
1359
0
          break;
1360
1361
0
        case 0x0130:
1362
0
          byte_stream[ safe_byte_stream_index ] = 0xdd;
1363
0
          break;
1364
1365
0
        case 0x0131:
1366
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1367
0
          break;
1368
1369
0
        case 0x015e:
1370
0
          byte_stream[ safe_byte_stream_index ] = 0xde;
1371
0
          break;
1372
1373
0
        case 0x015f:
1374
0
          byte_stream[ safe_byte_stream_index ] = 0xfe;
1375
0
          break;
1376
1377
0
        default:
1378
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1379
0
          break;
1380
0
      }
1381
0
      safe_byte_stream_index += 1;
1382
1383
0
      break;
1384
1385
0
    case LIBUNA_CODEPAGE_ISO_8859_10:
1386
0
      if( unicode_character < 0x00a1 )
1387
0
      {
1388
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1389
0
      }
1390
0
      else if( ( unicode_character >= 0x00c0 )
1391
0
            && ( unicode_character < 0x0150 ) )
1392
0
      {
1393
0
        unicode_character -= 0x00c0;
1394
1395
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1396
0
      }
1397
0
      else if( ( unicode_character >= 0x0160 )
1398
0
            && ( unicode_character < 0x0170 ) )
1399
0
      {
1400
0
        unicode_character -= 0x0160;
1401
1402
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1403
0
      }
1404
0
      else switch( unicode_character )
1405
0
      {
1406
0
        case 0x00a7:
1407
0
          byte_stream[ safe_byte_stream_index ] = 0xa7;
1408
0
          break;
1409
1410
0
        case 0x00ad:
1411
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1412
0
          break;
1413
1414
0
        case 0x00b0:
1415
0
          byte_stream[ safe_byte_stream_index ] = 0xb0;
1416
0
          break;
1417
1418
0
        case 0x00b7:
1419
0
          byte_stream[ safe_byte_stream_index ] = 0xb7;
1420
0
          break;
1421
1422
0
        case 0x0172:
1423
0
          byte_stream[ safe_byte_stream_index ] = 0xd9;
1424
0
          break;
1425
1426
0
        case 0x0173:
1427
0
          byte_stream[ safe_byte_stream_index ] = 0xf9;
1428
0
          break;
1429
1430
0
        case 0x017d:
1431
0
          byte_stream[ safe_byte_stream_index ] = 0xac;
1432
0
          break;
1433
1434
0
        case 0x017e:
1435
0
          byte_stream[ safe_byte_stream_index ] = 0xbc;
1436
0
          break;
1437
1438
0
        case 0x2015:
1439
0
          byte_stream[ safe_byte_stream_index ] = 0xbd;
1440
0
          break;
1441
1442
0
        default:
1443
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1444
0
          break;
1445
0
      }
1446
0
      safe_byte_stream_index += 1;
1447
1448
0
      break;
1449
1450
0
    case LIBUNA_CODEPAGE_ISO_8859_11:
1451
0
      if( unicode_character < 0x00a1 )
1452
0
      {
1453
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1454
0
      }
1455
0
      else if( ( unicode_character >= 0x0e01 )
1456
0
            && ( unicode_character < 0x0e3b ) )
1457
0
      {
1458
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1459
0
      }
1460
0
      else if( ( unicode_character >= 0x0e3f )
1461
0
            && ( unicode_character < 0x0e5c ) )
1462
0
      {
1463
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1464
0
      }
1465
0
      else
1466
0
      {
1467
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1468
0
      }
1469
0
      safe_byte_stream_index += 1;
1470
1471
0
      break;
1472
1473
0
    case LIBUNA_CODEPAGE_ISO_8859_13:
1474
0
      if( unicode_character < 0x00a0 )
1475
0
      {
1476
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1477
0
      }
1478
0
      else if( ( unicode_character >= 0x00a0 )
1479
0
            && ( unicode_character < 0x0180 ) )
1480
0
      {
1481
0
        unicode_character -= 0x00a0;
1482
1483
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1484
0
      }
1485
0
      else if( ( unicode_character >= 0x2018 )
1486
0
             && ( unicode_character < 0x2020 ) )
1487
0
      {
1488
0
        unicode_character -= 0x2018;
1489
1490
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x2018[ unicode_character ];
1491
0
      }
1492
0
      else
1493
0
      {
1494
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1495
0
      }
1496
0
      safe_byte_stream_index += 1;
1497
1498
0
      break;
1499
1500
0
    case LIBUNA_CODEPAGE_ISO_8859_14:
1501
0
      if( unicode_character < 0x00a1 )
1502
0
      {
1503
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1504
0
      }
1505
0
      else if( ( unicode_character >= 0x00c0 )
1506
0
            && ( unicode_character < 0x0100 ) )
1507
0
      {
1508
0
        unicode_character -= 0x00c0;
1509
1510
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1511
0
      }
1512
0
      else if( ( unicode_character >= 0x0170 )
1513
0
            && ( unicode_character < 0x0178 ) )
1514
0
      {
1515
0
        unicode_character -= 0x0170;
1516
1517
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x0170[ unicode_character ];
1518
0
      }
1519
0
      else if( ( unicode_character >= 0x1e80 )
1520
0
            && ( unicode_character < 0x1e88 ) )
1521
0
      {
1522
0
        unicode_character -= 0x1e80;
1523
1524
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x1e80[ unicode_character ];
1525
0
      }
1526
0
      else switch( unicode_character )
1527
0
      {
1528
0
        case 0x00a3:
1529
0
          byte_stream[ safe_byte_stream_index ] = 0xa3;
1530
0
          break;
1531
1532
0
        case 0x00a7:
1533
0
          byte_stream[ safe_byte_stream_index ] = 0xa7;
1534
0
          break;
1535
1536
0
        case 0x00a9:
1537
0
          byte_stream[ safe_byte_stream_index ] = 0xa9;
1538
0
          break;
1539
1540
0
        case 0x00ad:
1541
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1542
0
          break;
1543
1544
0
        case 0x00ae:
1545
0
          byte_stream[ safe_byte_stream_index ] = 0xae;
1546
0
          break;
1547
1548
0
        case 0x00b6:
1549
0
          byte_stream[ safe_byte_stream_index ] = 0xb6;
1550
0
          break;
1551
1552
0
        case 0x010a:
1553
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1554
0
          break;
1555
1556
0
        case 0x010b:
1557
0
          byte_stream[ safe_byte_stream_index ] = 0xa5;
1558
0
          break;
1559
1560
0
        case 0x0120:
1561
0
          byte_stream[ safe_byte_stream_index ] = 0xb2;
1562
0
          break;
1563
1564
0
        case 0x0121:
1565
0
          byte_stream[ safe_byte_stream_index ] = 0xb3;
1566
0
          break;
1567
1568
0
        case 0x0178:
1569
0
          byte_stream[ safe_byte_stream_index ] = 0xaf;
1570
0
          break;
1571
1572
0
        case 0x1e02:
1573
0
          byte_stream[ safe_byte_stream_index ] = 0xa1;
1574
0
          break;
1575
1576
0
        case 0x1e03:
1577
0
          byte_stream[ safe_byte_stream_index ] = 0xa2;
1578
0
          break;
1579
1580
0
        case 0x1e0a:
1581
0
          byte_stream[ safe_byte_stream_index ] = 0xa6;
1582
0
          break;
1583
1584
0
        case 0x1e0b:
1585
0
          byte_stream[ safe_byte_stream_index ] = 0xab;
1586
0
          break;
1587
1588
0
        case 0x1e1e:
1589
0
          byte_stream[ safe_byte_stream_index ] = 0xb0;
1590
0
          break;
1591
1592
0
        case 0x1e1f:
1593
0
          byte_stream[ safe_byte_stream_index ] = 0xb1;
1594
0
          break;
1595
1596
0
        case 0x1e40:
1597
0
          byte_stream[ safe_byte_stream_index ] = 0xb4;
1598
0
          break;
1599
1600
0
        case 0x1e41:
1601
0
          byte_stream[ safe_byte_stream_index ] = 0xb5;
1602
0
          break;
1603
1604
0
        case 0x1e56:
1605
0
          byte_stream[ safe_byte_stream_index ] = 0xb7;
1606
0
          break;
1607
1608
0
        case 0x1e57:
1609
0
          byte_stream[ safe_byte_stream_index ] = 0xb9;
1610
0
          break;
1611
1612
0
        case 0x1e60:
1613
0
          byte_stream[ safe_byte_stream_index ] = 0xbb;
1614
0
          break;
1615
1616
0
        case 0x1e61:
1617
0
          byte_stream[ safe_byte_stream_index ] = 0xbf;
1618
0
          break;
1619
1620
0
        case 0x1e6a:
1621
0
          byte_stream[ safe_byte_stream_index ] = 0xd7;
1622
0
          break;
1623
1624
0
        case 0x1e6b:
1625
0
          byte_stream[ safe_byte_stream_index ] = 0xf7;
1626
0
          break;
1627
1628
0
        case 0x1ef2:
1629
0
          byte_stream[ safe_byte_stream_index ] = 0xac;
1630
0
          break;
1631
1632
0
        case 0x1ef3:
1633
0
          byte_stream[ safe_byte_stream_index ] = 0xbc;
1634
0
          break;
1635
1636
0
        default:
1637
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1638
0
          break;
1639
0
      }
1640
0
      safe_byte_stream_index += 1;
1641
1642
0
      break;
1643
1644
0
    case LIBUNA_CODEPAGE_ISO_8859_15:
1645
0
      if( unicode_character < 0x00a0 )
1646
0
      {
1647
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1648
0
      }
1649
0
      else if( ( unicode_character >= 0x00a0 )
1650
0
            && ( unicode_character < 0x00c0 ) )
1651
0
      {
1652
0
        unicode_character -= 0x00a0;
1653
1654
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_15_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1655
0
      }
1656
0
      else if( unicode_character < 0x0100 )
1657
0
      {
1658
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1659
0
      }
1660
0
      else switch( unicode_character )
1661
0
      {
1662
0
        case 0x0152:
1663
0
          byte_stream[ safe_byte_stream_index ] = 0xbc;
1664
0
          break;
1665
1666
0
        case 0x0153:
1667
0
          byte_stream[ safe_byte_stream_index ] = 0xbd;
1668
0
          break;
1669
1670
0
        case 0x0160:
1671
0
          byte_stream[ safe_byte_stream_index ] = 0xa6;
1672
0
          break;
1673
1674
0
        case 0x0161:
1675
0
          byte_stream[ safe_byte_stream_index ] = 0xa8;
1676
0
          break;
1677
1678
0
        case 0x0178:
1679
0
          byte_stream[ safe_byte_stream_index ] = 0xbe;
1680
0
          break;
1681
1682
0
        case 0x017d:
1683
0
          byte_stream[ safe_byte_stream_index ] = 0xb4;
1684
0
          break;
1685
1686
0
        case 0x017e:
1687
0
          byte_stream[ safe_byte_stream_index ] = 0xb8;
1688
0
          break;
1689
1690
0
        case 0x20ac:
1691
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1692
0
          break;
1693
1694
0
        default:
1695
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1696
0
          break;
1697
0
      }
1698
0
      safe_byte_stream_index += 1;
1699
1700
0
      break;
1701
1702
0
    case LIBUNA_CODEPAGE_ISO_8859_16:
1703
0
      if( unicode_character < 0x00a1 )
1704
0
      {
1705
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1706
0
      }
1707
0
      else if( ( unicode_character >= 0x00a8 )
1708
0
            && ( unicode_character < 0x0108 ) )
1709
0
      {
1710
0
        unicode_character -= 0x00a8;
1711
1712
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x00a8[ unicode_character ];
1713
0
      }
1714
0
      else if( ( unicode_character >= 0x0140 )
1715
0
            && ( unicode_character < 0x0148 ) )
1716
0
      {
1717
0
        unicode_character -= 0x0140;
1718
1719
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0140[ unicode_character ];
1720
0
      }
1721
0
      else if( ( unicode_character >= 0x0150 )
1722
0
            && ( unicode_character < 0x0158 ) )
1723
0
      {
1724
0
        unicode_character -= 0x0150;
1725
1726
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0150[ unicode_character ];
1727
0
      }
1728
0
      else if( ( unicode_character >= 0x0178 )
1729
0
            && ( unicode_character < 0x0180 ) )
1730
0
      {
1731
0
        unicode_character -= 0x0178;
1732
1733
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0178[ unicode_character ];
1734
0
      }
1735
0
      else if( ( unicode_character >= 0x0218 )
1736
0
            && ( unicode_character < 0x0220 ) )
1737
0
      {
1738
0
        unicode_character -= 0x0218;
1739
1740
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0218[ unicode_character ];
1741
0
      }
1742
0
      else switch( unicode_character )
1743
0
      {
1744
0
        case 0x00a7:
1745
0
          byte_stream[ safe_byte_stream_index ] = 0xa7;
1746
0
          break;
1747
1748
0
        case 0x010c:
1749
0
          byte_stream[ safe_byte_stream_index ] = 0xb2;
1750
0
          break;
1751
1752
0
        case 0x010d:
1753
0
          byte_stream[ safe_byte_stream_index ] = 0xb9;
1754
0
          break;
1755
1756
0
        case 0x0110:
1757
0
          byte_stream[ safe_byte_stream_index ] = 0xd0;
1758
0
          break;
1759
1760
0
        case 0x0111:
1761
0
          byte_stream[ safe_byte_stream_index ] = 0xf0;
1762
0
          break;
1763
1764
0
        case 0x0118:
1765
0
          byte_stream[ safe_byte_stream_index ] = 0xdd;
1766
0
          break;
1767
1768
0
        case 0x0119:
1769
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1770
0
          break;
1771
1772
0
        case 0x015a:
1773
0
          byte_stream[ safe_byte_stream_index ] = 0xd7;
1774
0
          break;
1775
1776
0
        case 0x015b:
1777
0
          byte_stream[ safe_byte_stream_index ] = 0xf7;
1778
0
          break;
1779
1780
0
        case 0x0160:
1781
0
          byte_stream[ safe_byte_stream_index ] = 0xa6;
1782
0
          break;
1783
1784
0
        case 0x0161:
1785
0
          byte_stream[ safe_byte_stream_index ] = 0xa8;
1786
0
          break;
1787
1788
0
        case 0x0170:
1789
0
          byte_stream[ safe_byte_stream_index ] = 0xd8;
1790
0
          break;
1791
1792
0
        case 0x0171:
1793
0
          byte_stream[ safe_byte_stream_index ] = 0xf8;
1794
0
          break;
1795
1796
0
        case 0x201d:
1797
0
          byte_stream[ safe_byte_stream_index ] = 0xb5;
1798
0
          break;
1799
1800
0
        case 0x201e:
1801
0
          byte_stream[ safe_byte_stream_index ] = 0xa5;
1802
0
          break;
1803
1804
0
        case 0x20ac:
1805
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1806
0
          break;
1807
1808
0
        default:
1809
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1810
0
          break;
1811
0
      }
1812
0
      safe_byte_stream_index += 1;
1813
1814
0
      break;
1815
1816
0
    case LIBUNA_CODEPAGE_KOI8_R:
1817
0
      result = libuna_codepage_koi8_r_copy_to_byte_stream(
1818
0
                unicode_character,
1819
0
                byte_stream,
1820
0
                byte_stream_size,
1821
0
                &safe_byte_stream_index,
1822
0
                error );
1823
0
      break;
1824
1825
0
    case LIBUNA_CODEPAGE_KOI8_U:
1826
0
      result = libuna_codepage_koi8_u_copy_to_byte_stream(
1827
0
                unicode_character,
1828
0
                byte_stream,
1829
0
                byte_stream_size,
1830
0
                &safe_byte_stream_index,
1831
0
                error );
1832
0
      break;
1833
1834
0
    case LIBUNA_CODEPAGE_MAC_ARABIC:
1835
0
      result = libuna_codepage_mac_arabic_copy_to_byte_stream(
1836
0
                unicode_character,
1837
0
                byte_stream,
1838
0
                byte_stream_size,
1839
0
                &safe_byte_stream_index,
1840
0
                error );
1841
0
      break;
1842
1843
0
    case LIBUNA_CODEPAGE_MAC_CELTIC:
1844
0
      result = libuna_codepage_mac_celtic_copy_to_byte_stream(
1845
0
                unicode_character,
1846
0
                byte_stream,
1847
0
                byte_stream_size,
1848
0
                &safe_byte_stream_index,
1849
0
                error );
1850
0
      break;
1851
1852
0
    case LIBUNA_CODEPAGE_MAC_CENTRALEURROMAN:
1853
0
      result = libuna_codepage_mac_centraleurroman_copy_to_byte_stream(
1854
0
                unicode_character,
1855
0
                byte_stream,
1856
0
                byte_stream_size,
1857
0
                &safe_byte_stream_index,
1858
0
                error );
1859
0
      break;
1860
1861
0
    case LIBUNA_CODEPAGE_MAC_CROATIAN:
1862
0
      result = libuna_codepage_mac_croatian_copy_to_byte_stream(
1863
0
                unicode_character,
1864
0
                byte_stream,
1865
0
                byte_stream_size,
1866
0
                &safe_byte_stream_index,
1867
0
                error );
1868
0
      break;
1869
1870
0
    case LIBUNA_CODEPAGE_MAC_CYRILLIC:
1871
0
      result = libuna_codepage_mac_cyrillic_copy_to_byte_stream(
1872
0
                unicode_character,
1873
0
                byte_stream,
1874
0
                byte_stream_size,
1875
0
                &safe_byte_stream_index,
1876
0
                error );
1877
0
      break;
1878
1879
0
    case LIBUNA_CODEPAGE_MAC_DINGBATS:
1880
0
      result = libuna_codepage_mac_dingbats_copy_to_byte_stream(
1881
0
                unicode_character,
1882
0
                byte_stream,
1883
0
                byte_stream_size,
1884
0
                &safe_byte_stream_index,
1885
0
                error );
1886
0
      break;
1887
1888
0
    case LIBUNA_CODEPAGE_MAC_FARSI:
1889
0
      result = libuna_codepage_mac_farsi_copy_to_byte_stream(
1890
0
                unicode_character,
1891
0
                byte_stream,
1892
0
                byte_stream_size,
1893
0
                &safe_byte_stream_index,
1894
0
                error );
1895
0
      break;
1896
1897
0
    case LIBUNA_CODEPAGE_MAC_GAELIC:
1898
0
      result = libuna_codepage_mac_gaelic_copy_to_byte_stream(
1899
0
                unicode_character,
1900
0
                byte_stream,
1901
0
                byte_stream_size,
1902
0
                &safe_byte_stream_index,
1903
0
                error );
1904
0
      break;
1905
1906
0
    case LIBUNA_CODEPAGE_MAC_GREEK:
1907
0
      result = libuna_codepage_mac_greek_copy_to_byte_stream(
1908
0
                unicode_character,
1909
0
                byte_stream,
1910
0
                byte_stream_size,
1911
0
                &safe_byte_stream_index,
1912
0
                error );
1913
0
      break;
1914
1915
0
    case LIBUNA_CODEPAGE_MAC_ICELANDIC:
1916
0
      result = libuna_codepage_mac_icelandic_copy_to_byte_stream(
1917
0
                unicode_character,
1918
0
                byte_stream,
1919
0
                byte_stream_size,
1920
0
                &safe_byte_stream_index,
1921
0
                error );
1922
0
      break;
1923
1924
0
    case LIBUNA_CODEPAGE_MAC_INUIT:
1925
0
      result = libuna_codepage_mac_inuit_copy_to_byte_stream(
1926
0
                unicode_character,
1927
0
                byte_stream,
1928
0
                byte_stream_size,
1929
0
                &safe_byte_stream_index,
1930
0
                error );
1931
0
      break;
1932
1933
0
    case LIBUNA_CODEPAGE_MAC_ROMAN:
1934
0
      result = libuna_codepage_mac_roman_copy_to_byte_stream(
1935
0
                unicode_character,
1936
0
                byte_stream,
1937
0
                byte_stream_size,
1938
0
                &safe_byte_stream_index,
1939
0
                error );
1940
0
      break;
1941
1942
0
    case LIBUNA_CODEPAGE_MAC_ROMANIAN:
1943
0
      result = libuna_codepage_mac_romanian_copy_to_byte_stream(
1944
0
                unicode_character,
1945
0
                byte_stream,
1946
0
                byte_stream_size,
1947
0
                &safe_byte_stream_index,
1948
0
                error );
1949
0
      break;
1950
1951
0
    case LIBUNA_CODEPAGE_MAC_RUSSIAN:
1952
0
      result = libuna_codepage_mac_russian_copy_to_byte_stream(
1953
0
                unicode_character,
1954
0
                byte_stream,
1955
0
                byte_stream_size,
1956
0
                &safe_byte_stream_index,
1957
0
                error );
1958
0
      break;
1959
1960
0
    case LIBUNA_CODEPAGE_MAC_SYMBOL:
1961
0
      result = libuna_codepage_mac_symbol_copy_to_byte_stream(
1962
0
                unicode_character,
1963
0
                byte_stream,
1964
0
                byte_stream_size,
1965
0
                &safe_byte_stream_index,
1966
0
                error );
1967
0
      break;
1968
1969
0
    case LIBUNA_CODEPAGE_MAC_THAI:
1970
0
      result = libuna_codepage_mac_thai_copy_to_byte_stream(
1971
0
                unicode_character,
1972
0
                byte_stream,
1973
0
                byte_stream_size,
1974
0
                &safe_byte_stream_index,
1975
0
                error );
1976
0
      break;
1977
1978
0
    case LIBUNA_CODEPAGE_MAC_TURKISH:
1979
0
      result = libuna_codepage_mac_turkish_copy_to_byte_stream(
1980
0
                unicode_character,
1981
0
                byte_stream,
1982
0
                byte_stream_size,
1983
0
                &safe_byte_stream_index,
1984
0
                error );
1985
0
      break;
1986
1987
0
    case LIBUNA_CODEPAGE_MAC_UKRAINIAN:
1988
0
      result = libuna_codepage_mac_ukrainian_copy_to_byte_stream(
1989
0
                unicode_character,
1990
0
                byte_stream,
1991
0
                byte_stream_size,
1992
0
                &safe_byte_stream_index,
1993
0
                error );
1994
0
      break;
1995
1996
0
    case LIBUNA_CODEPAGE_WINDOWS_874:
1997
0
      result = libuna_codepage_windows_874_copy_to_byte_stream(
1998
0
                unicode_character,
1999
0
                byte_stream,
2000
0
                byte_stream_size,
2001
0
                &safe_byte_stream_index,
2002
0
                error );
2003
0
      break;
2004
2005
0
    case LIBUNA_CODEPAGE_WINDOWS_932:
2006
0
      result = libuna_codepage_windows_932_copy_to_byte_stream(
2007
0
                unicode_character,
2008
0
                byte_stream,
2009
0
                byte_stream_size,
2010
0
                &safe_byte_stream_index,
2011
0
                error );
2012
0
      break;
2013
2014
0
    case LIBUNA_CODEPAGE_WINDOWS_936:
2015
0
      result = libuna_codepage_windows_936_copy_to_byte_stream(
2016
0
                unicode_character,
2017
0
                byte_stream,
2018
0
                byte_stream_size,
2019
0
                &safe_byte_stream_index,
2020
0
                error );
2021
0
      break;
2022
2023
0
    case LIBUNA_CODEPAGE_WINDOWS_949:
2024
0
      result = libuna_codepage_windows_949_copy_to_byte_stream(
2025
0
                unicode_character,
2026
0
                byte_stream,
2027
0
                byte_stream_size,
2028
0
                &safe_byte_stream_index,
2029
0
                error );
2030
0
      break;
2031
2032
0
    case LIBUNA_CODEPAGE_WINDOWS_950:
2033
0
      result = libuna_codepage_windows_950_copy_to_byte_stream(
2034
0
                unicode_character,
2035
0
                byte_stream,
2036
0
                byte_stream_size,
2037
0
                &safe_byte_stream_index,
2038
0
                error );
2039
0
      break;
2040
2041
0
    case LIBUNA_CODEPAGE_WINDOWS_1250:
2042
0
      result = libuna_codepage_windows_1250_copy_to_byte_stream(
2043
0
                unicode_character,
2044
0
                byte_stream,
2045
0
                byte_stream_size,
2046
0
                &safe_byte_stream_index,
2047
0
                error );
2048
0
      break;
2049
2050
0
    case LIBUNA_CODEPAGE_WINDOWS_1251:
2051
0
      result = libuna_codepage_windows_1251_copy_to_byte_stream(
2052
0
                unicode_character,
2053
0
                byte_stream,
2054
0
                byte_stream_size,
2055
0
                &safe_byte_stream_index,
2056
0
                error );
2057
0
      break;
2058
2059
0
    case LIBUNA_CODEPAGE_WINDOWS_1252:
2060
0
      result = libuna_codepage_windows_1252_copy_to_byte_stream(
2061
0
                unicode_character,
2062
0
                byte_stream,
2063
0
                byte_stream_size,
2064
0
                &safe_byte_stream_index,
2065
0
                error );
2066
0
      break;
2067
2068
0
    case LIBUNA_CODEPAGE_WINDOWS_1253:
2069
0
      result = libuna_codepage_windows_1253_copy_to_byte_stream(
2070
0
                unicode_character,
2071
0
                byte_stream,
2072
0
                byte_stream_size,
2073
0
                &safe_byte_stream_index,
2074
0
                error );
2075
0
      break;
2076
2077
0
    case LIBUNA_CODEPAGE_WINDOWS_1254:
2078
0
      result = libuna_codepage_windows_1254_copy_to_byte_stream(
2079
0
                unicode_character,
2080
0
                byte_stream,
2081
0
                byte_stream_size,
2082
0
                &safe_byte_stream_index,
2083
0
                error );
2084
0
      break;
2085
2086
0
    case LIBUNA_CODEPAGE_WINDOWS_1255:
2087
0
      result = libuna_codepage_windows_1255_copy_to_byte_stream(
2088
0
                unicode_character,
2089
0
                byte_stream,
2090
0
                byte_stream_size,
2091
0
                &safe_byte_stream_index,
2092
0
                error );
2093
0
      break;
2094
2095
0
    case LIBUNA_CODEPAGE_WINDOWS_1256:
2096
0
      result = libuna_codepage_windows_1256_copy_to_byte_stream(
2097
0
                unicode_character,
2098
0
                byte_stream,
2099
0
                byte_stream_size,
2100
0
                &safe_byte_stream_index,
2101
0
                error );
2102
0
      break;
2103
2104
0
    case LIBUNA_CODEPAGE_WINDOWS_1257:
2105
0
      result = libuna_codepage_windows_1257_copy_to_byte_stream(
2106
0
                unicode_character,
2107
0
                byte_stream,
2108
0
                byte_stream_size,
2109
0
                &safe_byte_stream_index,
2110
0
                error );
2111
0
      break;
2112
2113
0
    case LIBUNA_CODEPAGE_WINDOWS_1258:
2114
0
      result = libuna_codepage_windows_1258_copy_to_byte_stream(
2115
0
                unicode_character,
2116
0
                byte_stream,
2117
0
                byte_stream_size,
2118
0
                &safe_byte_stream_index,
2119
0
                error );
2120
0
      break;
2121
2122
0
    default:
2123
0
      libcerror_error_set(
2124
0
       error,
2125
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2126
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2127
0
      "%s: unsupported codepage: %d.",
2128
0
       function,
2129
0
             codepage );
2130
2131
0
      return( -1 );
2132
0
  }
2133
0
  if( result == -1 )
2134
0
  {
2135
0
    libcerror_error_set(
2136
0
     error,
2137
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2138
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2139
0
     "%s: unable to copy Unicode character to byte stream.",
2140
0
     function );
2141
2142
0
    return( -1 );
2143
0
  }
2144
0
  *byte_stream_index = safe_byte_stream_index;
2145
2146
0
  return( result );
2147
0
}
2148
2149
/* Determines the size of an UCS-2 character from an Unicode character
2150
 * Adds the size to the UCS-2 character size value
2151
 * Returns 1 if successful or -1 on error
2152
 */
2153
int libuna_unicode_character_size_to_ucs2(
2154
     libuna_unicode_character_t unicode_character,
2155
     size_t *ucs2_character_size,
2156
     libcerror_error_t **error )
2157
0
{
2158
0
  static char *function = "libuna_unicode_character_size_to_ucs2";
2159
2160
0
  if( ucs2_character_size == NULL )
2161
0
  {
2162
0
    libcerror_error_set(
2163
0
     error,
2164
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2165
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2166
0
     "%s: invalid UCS-2 character size.",
2167
0
     function );
2168
2169
0
    return( -1 );
2170
0
  }
2171
  /* Determine if the Unicode character is valid
2172
   * UCS-2 with surrogate pairs supports upto 0x10ffff characters
2173
   */
2174
0
  if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
2175
0
  {
2176
0
    libcerror_error_set(
2177
0
     error,
2178
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2179
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2180
0
     "%s: unsupported Unicode character.",
2181
0
     function );
2182
2183
0
    return( -1 );
2184
0
  }
2185
0
  if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
2186
0
  {
2187
0
    *ucs2_character_size += 2;
2188
0
  }
2189
0
  else
2190
0
  {
2191
0
    *ucs2_character_size += 1;
2192
0
  }
2193
0
  return( 1 );
2194
0
}
2195
2196
/* Copies an Unicode character from an UCS-2 string
2197
 * Returns 1 if successful or -1 on error
2198
 */
2199
int libuna_unicode_character_copy_from_ucs2(
2200
     libuna_unicode_character_t *unicode_character,
2201
     const libuna_utf16_character_t *ucs2_string,
2202
     size_t ucs2_string_size,
2203
     size_t *ucs2_string_index,
2204
     libcerror_error_t **error )
2205
0
{
2206
0
  static char *function                             = "libuna_unicode_character_copy_from_ucs2";
2207
0
  libuna_utf16_character_t ucs2_surrogate           = 0;
2208
0
  libuna_unicode_character_t safe_unicode_character = 0;
2209
0
  size_t safe_ucs2_string_index                     = 0;
2210
2211
0
  if( unicode_character == NULL )
2212
0
  {
2213
0
    libcerror_error_set(
2214
0
     error,
2215
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2216
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2217
0
     "%s: invalid Unicode character.",
2218
0
     function );
2219
2220
0
    return( -1 );
2221
0
  }
2222
0
  if( ucs2_string == NULL )
2223
0
  {
2224
0
    libcerror_error_set(
2225
0
     error,
2226
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2227
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2228
0
     "%s: invalid UCS-2 string.",
2229
0
     function );
2230
2231
0
    return( -1 );
2232
0
  }
2233
0
  if( ucs2_string_size > (size_t) SSIZE_MAX )
2234
0
  {
2235
0
    libcerror_error_set(
2236
0
     error,
2237
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2238
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2239
0
     "%s: invalid UCS-2 string size value exceeds maximum.",
2240
0
     function );
2241
2242
0
    return( -1 );
2243
0
  }
2244
0
  if( ucs2_string_index == NULL )
2245
0
  {
2246
0
    libcerror_error_set(
2247
0
     error,
2248
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2249
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2250
0
     "%s: invalid UCS-2 string index.",
2251
0
     function );
2252
2253
0
    return( -1 );
2254
0
  }
2255
0
  safe_ucs2_string_index = *ucs2_string_index;
2256
2257
0
  if( safe_ucs2_string_index >= ucs2_string_size )
2258
0
  {
2259
0
    libcerror_error_set(
2260
0
     error,
2261
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2262
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2263
0
     "%s: UCS-2 string too small.",
2264
0
     function );
2265
2266
0
    return( -1 );
2267
0
  }
2268
0
  safe_unicode_character  = ucs2_string[ safe_ucs2_string_index ];
2269
0
  safe_ucs2_string_index += 1;
2270
2271
  /* Determine if the UCS-2 character is within the high surrogate range
2272
   */
2273
0
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
2274
0
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
2275
0
  {
2276
0
    if( safe_ucs2_string_index >= ucs2_string_size )
2277
0
    {
2278
0
      libcerror_error_set(
2279
0
       error,
2280
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2281
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2282
0
       "%s: missing surrogate UCS-2 character bytes.",
2283
0
       function );
2284
2285
0
      return( -1 );
2286
0
    }
2287
0
    ucs2_surrogate = ucs2_string[ safe_ucs2_string_index ];
2288
2289
    /* Determine if the UCS-2 character is within the low surrogate range
2290
     */
2291
0
    if( ( ucs2_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
2292
0
     && ( ucs2_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
2293
0
    {
2294
0
      safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
2295
0
      safe_unicode_character <<= 10;
2296
0
      safe_unicode_character  += ucs2_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
2297
0
      safe_unicode_character  += 0x010000;
2298
2299
0
      safe_ucs2_string_index += 1;
2300
0
    }
2301
0
  }
2302
  /* Determine if the Unicode character is valid
2303
   * UCS-2 with surrogate pairs supports upto 0x10ffff characters
2304
   */
2305
0
  if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
2306
0
  {
2307
0
    libcerror_error_set(
2308
0
     error,
2309
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2310
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2311
0
     "%s: unsupported Unicode character.",
2312
0
     function );
2313
2314
0
    return( -1 );
2315
0
  }
2316
0
  *unicode_character = safe_unicode_character;
2317
0
  *ucs2_string_index = safe_ucs2_string_index;
2318
2319
0
  return( 1 );
2320
0
}
2321
2322
/* Copies an Unicode character into a UCS-2 string
2323
 * Returns 1 if successful or -1 on error
2324
 */
2325
int libuna_unicode_character_copy_to_ucs2(
2326
     libuna_unicode_character_t unicode_character,
2327
     libuna_utf16_character_t *ucs2_string,
2328
     size_t ucs2_string_size,
2329
     size_t *ucs2_string_index,
2330
     libcerror_error_t **error )
2331
0
{
2332
0
  static char *function         = "libuna_unicode_character_copy_to_ucs2";
2333
0
  size_t safe_ucs2_string_index = 0;
2334
2335
0
  if( ucs2_string == NULL )
2336
0
  {
2337
0
    libcerror_error_set(
2338
0
     error,
2339
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2340
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2341
0
     "%s: invalid UCS-2 string.",
2342
0
     function );
2343
2344
0
    return( -1 );
2345
0
  }
2346
0
  if( ucs2_string_size > (size_t) SSIZE_MAX )
2347
0
  {
2348
0
    libcerror_error_set(
2349
0
     error,
2350
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2351
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2352
0
     "%s: invalid UCS-2 string size value exceeds maximum.",
2353
0
     function );
2354
2355
0
    return( -1 );
2356
0
  }
2357
0
  if( ucs2_string_index == NULL )
2358
0
  {
2359
0
    libcerror_error_set(
2360
0
     error,
2361
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2362
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2363
0
     "%s: invalid UCS-2 string index.",
2364
0
     function );
2365
2366
0
    return( -1 );
2367
0
  }
2368
0
  safe_ucs2_string_index = *ucs2_string_index;
2369
2370
0
  if( safe_ucs2_string_index >= ucs2_string_size )
2371
0
  {
2372
0
    libcerror_error_set(
2373
0
     error,
2374
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2375
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2376
0
     "%s: UCS-2 string too small.",
2377
0
     function );
2378
2379
0
    return( -1 );
2380
0
  }
2381
  /* Determine if the Unicode character is valid
2382
   */
2383
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
2384
0
  {
2385
0
    libcerror_error_set(
2386
0
     error,
2387
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2388
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2389
0
     "%s: unsupported Unicode character.",
2390
0
     function );
2391
2392
0
    return( -1 );
2393
0
  }
2394
0
  if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
2395
0
  {
2396
0
    ucs2_string[ safe_ucs2_string_index++ ] = (libuna_utf16_character_t) unicode_character;
2397
0
  }
2398
0
  else
2399
0
  {
2400
0
    if( ( ucs2_string_size < 2 )
2401
0
     || ( safe_ucs2_string_index > ( ucs2_string_size - 2 ) ) )
2402
0
    {
2403
0
      libcerror_error_set(
2404
0
       error,
2405
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2406
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2407
0
       "%s: UCS-2 string too small.",
2408
0
       function );
2409
2410
0
      return( -1 );
2411
0
    }
2412
0
    unicode_character                      -= 0x010000;
2413
0
    ucs2_string[ safe_ucs2_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
2414
0
    ucs2_string[ safe_ucs2_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
2415
0
  }
2416
0
  *ucs2_string_index = safe_ucs2_string_index;
2417
2418
0
  return( 1 );
2419
0
}
2420
2421
/* Determines the size of an UCS-4 character from an Unicode character
2422
 * Adds the size to the UCS-4 character size value
2423
 * Returns 1 if successful or -1 on error
2424
 */
2425
int libuna_unicode_character_size_to_ucs4(
2426
     libuna_unicode_character_t unicode_character,
2427
     size_t *ucs4_character_size,
2428
     libcerror_error_t **error )
2429
0
{
2430
0
  static char *function = "libuna_unicode_character_size_to_ucs4";
2431
2432
0
  LIBUNA_UNREFERENCED_PARAMETER( unicode_character )
2433
2434
0
  if( ucs4_character_size == NULL )
2435
0
  {
2436
0
    libcerror_error_set(
2437
0
     error,
2438
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2439
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2440
0
     "%s: invalid UCS-4 character size.",
2441
0
     function );
2442
2443
0
    return( -1 );
2444
0
  }
2445
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
2446
0
  {
2447
0
    libcerror_error_set(
2448
0
     error,
2449
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2450
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2451
0
     "%s: unsupported Unicode character.",
2452
0
     function );
2453
2454
0
    return( -1 );
2455
0
  }
2456
0
  *ucs4_character_size += 1;
2457
2458
0
  return( 1 );
2459
0
}
2460
2461
/* Copies an Unicode character from an UCS-4 string
2462
 * Returns 1 if successful or -1 on error
2463
 */
2464
int libuna_unicode_character_copy_from_ucs4(
2465
     libuna_unicode_character_t *unicode_character,
2466
     const libuna_utf32_character_t *ucs4_string,
2467
     size_t ucs4_string_size,
2468
     size_t *ucs4_string_index,
2469
     libcerror_error_t **error )
2470
0
{
2471
0
  static char *function                             = "libuna_unicode_character_copy_from_ucs4";
2472
0
  libuna_unicode_character_t safe_unicode_character = 0;
2473
0
  size_t safe_ucs4_string_index                     = 0;
2474
2475
0
  if( unicode_character == NULL )
2476
0
  {
2477
0
    libcerror_error_set(
2478
0
     error,
2479
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2480
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2481
0
     "%s: invalid Unicode character.",
2482
0
     function );
2483
2484
0
    return( -1 );
2485
0
  }
2486
0
  if( ucs4_string == NULL )
2487
0
  {
2488
0
    libcerror_error_set(
2489
0
     error,
2490
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2491
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2492
0
     "%s: invalid UCS-4 string.",
2493
0
     function );
2494
2495
0
    return( -1 );
2496
0
  }
2497
0
  if( ucs4_string_size > (size_t) SSIZE_MAX )
2498
0
  {
2499
0
    libcerror_error_set(
2500
0
     error,
2501
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2502
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2503
0
     "%s: invalid UCS-4 string size value exceeds maximum.",
2504
0
     function );
2505
2506
0
    return( -1 );
2507
0
  }
2508
0
  if( ucs4_string_index == NULL )
2509
0
  {
2510
0
    libcerror_error_set(
2511
0
     error,
2512
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2513
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2514
0
     "%s: invalid UCS-4 string index.",
2515
0
     function );
2516
2517
0
    return( -1 );
2518
0
  }
2519
0
  safe_ucs4_string_index = *ucs4_string_index;
2520
2521
0
  if( safe_ucs4_string_index >= ucs4_string_size )
2522
0
  {
2523
0
    libcerror_error_set(
2524
0
     error,
2525
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2526
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2527
0
     "%s: UCS-4 string too small.",
2528
0
     function );
2529
2530
0
    return( -1 );
2531
0
  }
2532
0
  safe_unicode_character = ucs4_string[ safe_ucs4_string_index ];
2533
2534
  /* Determine if the Unicode character is valid
2535
   */
2536
0
  if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
2537
0
  {
2538
0
    libcerror_error_set(
2539
0
     error,
2540
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2541
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2542
0
     "%s: unsupported Unicode character.",
2543
0
     function );
2544
2545
0
    return( -1 );
2546
0
  }
2547
0
  *unicode_character = safe_unicode_character;
2548
0
  *ucs4_string_index = safe_ucs4_string_index + 1;
2549
2550
0
  return( 1 );
2551
0
}
2552
2553
/* Copies an Unicode character into a UCS-4 string
2554
 * Returns 1 if successful or -1 on error
2555
 */
2556
int libuna_unicode_character_copy_to_ucs4(
2557
     libuna_unicode_character_t unicode_character,
2558
     libuna_utf32_character_t *ucs4_string,
2559
     size_t ucs4_string_size,
2560
     size_t *ucs4_string_index,
2561
     libcerror_error_t **error )
2562
0
{
2563
0
  static char *function         = "libuna_unicode_character_copy_to_ucs4";
2564
0
  size_t safe_ucs4_string_index = 0;
2565
2566
0
  if( ucs4_string == NULL )
2567
0
  {
2568
0
    libcerror_error_set(
2569
0
     error,
2570
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2571
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2572
0
     "%s: invalid UCS-4 string.",
2573
0
     function );
2574
2575
0
    return( -1 );
2576
0
  }
2577
0
  if( ucs4_string_size > (size_t) SSIZE_MAX )
2578
0
  {
2579
0
    libcerror_error_set(
2580
0
     error,
2581
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2582
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2583
0
     "%s: invalid UCS-4 string size value exceeds maximum.",
2584
0
     function );
2585
2586
0
    return( -1 );
2587
0
  }
2588
0
  if( ucs4_string_index == NULL )
2589
0
  {
2590
0
    libcerror_error_set(
2591
0
     error,
2592
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2593
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2594
0
     "%s: invalid UCS-4 string index.",
2595
0
     function );
2596
2597
0
    return( -1 );
2598
0
  }
2599
0
  safe_ucs4_string_index = *ucs4_string_index;
2600
2601
0
  if( safe_ucs4_string_index >= ucs4_string_size )
2602
0
  {
2603
0
    libcerror_error_set(
2604
0
     error,
2605
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2606
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2607
0
     "%s: UCS-4 string too small.",
2608
0
     function );
2609
2610
0
    return( -1 );
2611
0
  }
2612
  /* Determine if the Unicode character is valid
2613
   */
2614
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
2615
0
  {
2616
0
    libcerror_error_set(
2617
0
     error,
2618
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2619
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2620
0
     "%s: unsupported Unicode character.",
2621
0
     function );
2622
2623
0
    return( -1 );
2624
0
  }
2625
0
  ucs4_string[ safe_ucs4_string_index ] = (libuna_utf32_character_t) unicode_character;
2626
2627
0
  *ucs4_string_index = safe_ucs4_string_index + 1;
2628
2629
0
  return( 1 );
2630
0
}
2631
2632
/* Determines the size of an UTF-7 stream character from an Unicode character
2633
 * Adds the size to the UTF-7 stream character size value
2634
 * Returns 1 if successful or -1 on error
2635
 */
2636
int libuna_unicode_character_size_to_utf7_stream(
2637
     libuna_unicode_character_t unicode_character,
2638
     size_t *utf7_stream_character_size,
2639
     uint32_t *utf7_stream_base64_data,
2640
     libcerror_error_t **error )
2641
0
{
2642
0
  static char *function                    = "libuna_unicode_character_size_to_utf7_stream";
2643
0
  libuna_utf16_character_t utf16_surrogate = 0;
2644
0
  size_t safe_utf7_stream_character_size   = 0;
2645
0
  uint32_t base64_triplet                  = 0;
2646
0
  uint32_t safe_utf7_stream_base64_data    = 0;
2647
0
  uint8_t base64_encode_character          = 0;
2648
0
  uint8_t byte_bit_shift                   = 0;
2649
0
  uint8_t current_byte                     = 0;
2650
0
  uint8_t number_of_bytes                  = 0;
2651
2652
0
  if( utf7_stream_character_size == NULL )
2653
0
  {
2654
0
    libcerror_error_set(
2655
0
     error,
2656
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2657
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2658
0
     "%s: invalid UTF-7 stream character size.",
2659
0
     function );
2660
2661
0
    return( -1 );
2662
0
  }
2663
0
  if( utf7_stream_base64_data == NULL )
2664
0
  {
2665
0
    libcerror_error_set(
2666
0
     error,
2667
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2668
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2669
0
     "%s: invalid UTF-7 stream base64 data.",
2670
0
     function );
2671
2672
0
    return( -1 );
2673
0
  }
2674
0
  safe_utf7_stream_character_size = *utf7_stream_character_size;
2675
0
  safe_utf7_stream_base64_data    = *utf7_stream_base64_data;
2676
2677
  /* Determine if the Unicode character is valid
2678
   */
2679
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
2680
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
2681
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
2682
0
  {
2683
0
    libcerror_error_set(
2684
0
     error,
2685
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2686
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2687
0
     "%s: unsupported Unicode character.",
2688
0
     function );
2689
2690
0
    return( -1 );
2691
0
  }
2692
  /* The + character must be escaped
2693
   */
2694
0
  if( unicode_character == (libuna_unicode_character_t) '+' )
2695
0
  {
2696
0
  }
2697
  /* Allow for the end of string character
2698
   */
2699
0
  else if( unicode_character == 0 )
2700
0
  {
2701
0
  }
2702
0
  else if( ( unicode_character >= 256 )
2703
0
        || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
2704
0
  {
2705
0
    base64_encode_character = 1;
2706
0
  }
2707
0
  if( base64_encode_character == 0 )
2708
0
  {
2709
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
2710
0
    {
2711
0
      safe_utf7_stream_base64_data = 0;
2712
0
    }
2713
0
    safe_utf7_stream_character_size += 1;
2714
2715
    /* The + character must be escaped
2716
     */
2717
0
    if( unicode_character == (libuna_unicode_character_t) '+' )
2718
0
    {
2719
0
      safe_utf7_stream_character_size += 1;
2720
0
    }
2721
0
  }
2722
0
  else
2723
0
  {
2724
    /* Escape the base64 encoded characters with a +
2725
     */
2726
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
2727
0
    {
2728
0
      safe_utf7_stream_character_size += 1;
2729
0
    }
2730
    /* Otherwise continue the previously base64 encoded characters
2731
     */
2732
0
    else
2733
0
    {
2734
0
      base64_triplet  = safe_utf7_stream_base64_data & 0x00ffffff;
2735
0
      number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
2736
0
      current_byte    = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
2737
2738
0
      if( number_of_bytes > 0 )
2739
0
      {
2740
0
        if( safe_utf7_stream_character_size < (size_t) ( number_of_bytes + 1 ) )
2741
0
        {
2742
0
          libcerror_error_set(
2743
0
           error,
2744
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
2745
0
           LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2746
0
           "%s: invalid UTF-7 stream character size value out of bounds.",
2747
0
           function );
2748
2749
0
          return( -1 );
2750
0
        }
2751
        /* Correct the size for the last partial base64 stream
2752
         */
2753
0
        safe_utf7_stream_character_size -= number_of_bytes + 1;
2754
0
      }
2755
0
      if( safe_utf7_stream_character_size < 1 )
2756
0
      {
2757
0
        libcerror_error_set(
2758
0
         error,
2759
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
2760
0
         LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2761
0
         "%s: invalid UTF-7 stream character size value out of bounds.",
2762
0
         function );
2763
2764
0
        return( -1 );
2765
0
      }
2766
      /* Correct the size for the base64 stream termination character
2767
       */
2768
0
      safe_utf7_stream_character_size -= 1;
2769
0
    }
2770
0
    safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
2771
2772
0
    if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
2773
0
    {
2774
0
      unicode_character -= 0x010000;
2775
2776
0
      utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
2777
2778
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
2779
0
      base64_triplet  += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
2780
0
      current_byte    += 1;
2781
0
      number_of_bytes += 1;
2782
2783
0
      if( number_of_bytes == 3 )
2784
0
      {
2785
0
        safe_utf7_stream_character_size += 4;
2786
0
        number_of_bytes                  = 0;
2787
0
        current_byte                     = 0;
2788
0
        base64_triplet                   = 0;
2789
0
      }
2790
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
2791
0
      base64_triplet  += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
2792
0
      current_byte    += 1;
2793
0
      number_of_bytes += 1;
2794
2795
0
      if( number_of_bytes == 3 )
2796
0
      {
2797
0
        safe_utf7_stream_character_size += 4;
2798
0
        number_of_bytes                  = 0;
2799
0
        current_byte                     = 0;
2800
0
        base64_triplet                   = 0;
2801
0
      }
2802
0
      unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
2803
0
    }
2804
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
2805
0
    base64_triplet  += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
2806
0
    current_byte    += 1;
2807
0
    number_of_bytes += 1;
2808
2809
0
    if( number_of_bytes == 3 )
2810
0
    {
2811
0
      safe_utf7_stream_character_size += 4;
2812
0
      number_of_bytes                  = 0;
2813
0
      current_byte                     = 0;
2814
0
      base64_triplet                   = 0;
2815
0
    }
2816
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
2817
0
    base64_triplet  += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
2818
0
    current_byte    += 1;
2819
0
    number_of_bytes += 1;
2820
2821
0
    if( number_of_bytes == 3 )
2822
0
    {
2823
0
      safe_utf7_stream_character_size += 4;
2824
0
      number_of_bytes                  = 0;
2825
0
      current_byte                     = 0;
2826
0
      base64_triplet                   = 0;
2827
0
    }
2828
    /* Terminate the base64 encoded characters
2829
     */
2830
0
    if( number_of_bytes > 0 )
2831
0
    {
2832
0
      safe_utf7_stream_character_size += number_of_bytes + 1;
2833
0
    }
2834
0
    safe_utf7_stream_character_size += 1;
2835
0
  }
2836
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
2837
0
  {
2838
0
    safe_utf7_stream_base64_data  = LIBUNA_UTF7_IS_BASE64_ENCODED;
2839
0
    safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
2840
0
    safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
2841
0
    safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
2842
0
  }
2843
0
  *utf7_stream_character_size = safe_utf7_stream_character_size;
2844
0
  *utf7_stream_base64_data    = safe_utf7_stream_base64_data;
2845
2846
0
  return( 1 );
2847
0
}
2848
2849
/* Copies an Unicode character from an UTF-7 stream
2850
 * The bits of the base64 data contain:
2851
 *   0 - 23 the base64 triplet
2852
 *  24 - 25 the number of bytes in the triplet
2853
 *  26 - 27 unused
2854
 *  28 - 29 the current byte
2855
 *       30 unused
2856
 *       31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
2857
 *
2858
 * Returns 1 if successful or -1 on error
2859
 */
2860
int libuna_unicode_character_copy_from_utf7_stream(
2861
     libuna_unicode_character_t *unicode_character,
2862
     const uint8_t *utf7_stream,
2863
     size_t utf7_stream_size,
2864
     size_t *utf7_stream_index,
2865
     uint32_t *utf7_stream_base64_data,
2866
     libcerror_error_t **error )
2867
0
{
2868
0
  static char *function                             = "libuna_unicode_character_copy_from_utf7_stream";
2869
0
  libuna_unicode_character_t safe_unicode_character = 0;
2870
0
  libuna_utf16_character_t utf16_surrogate          = 0;
2871
0
  size_t safe_utf7_stream_index                     = 0;
2872
0
  uint32_t base64_triplet                           = 0;
2873
0
  uint32_t safe_utf7_stream_base64_data             = 0;
2874
0
  uint8_t byte_bit_shift                            = 0;
2875
0
  uint8_t current_byte                              = 0;
2876
0
  uint8_t number_of_bytes                           = 0;
2877
0
  uint8_t padding_size                              = 0;
2878
0
  uint8_t utf7_character_value                      = 0;
2879
2880
0
  if( unicode_character == NULL )
2881
0
  {
2882
0
    libcerror_error_set(
2883
0
     error,
2884
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2885
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2886
0
     "%s: invalid Unicode character.",
2887
0
     function );
2888
2889
0
    return( -1 );
2890
0
  }
2891
0
  if( utf7_stream == NULL )
2892
0
  {
2893
0
    libcerror_error_set(
2894
0
     error,
2895
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2896
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2897
0
     "%s: invalid UTF-7 stream.",
2898
0
     function );
2899
2900
0
    return( -1 );
2901
0
  }
2902
0
  if( utf7_stream_size > (size_t) SSIZE_MAX )
2903
0
  {
2904
0
    libcerror_error_set(
2905
0
     error,
2906
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2907
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2908
0
     "%s: invalid UTF-7 stream size value exceeds maximum.",
2909
0
     function );
2910
2911
0
    return( -1 );
2912
0
  }
2913
0
  if( utf7_stream_index == NULL )
2914
0
  {
2915
0
    libcerror_error_set(
2916
0
     error,
2917
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2918
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2919
0
     "%s: invalid UTF-7 stream index.",
2920
0
     function );
2921
2922
0
    return( -1 );
2923
0
  }
2924
0
  if( utf7_stream_base64_data == NULL )
2925
0
  {
2926
0
    libcerror_error_set(
2927
0
     error,
2928
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2929
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2930
0
     "%s: invalid UTF-7 base64 data.",
2931
0
     function );
2932
2933
0
    return( -1 );
2934
0
  }
2935
0
  safe_utf7_stream_index       = *utf7_stream_index;
2936
0
  safe_utf7_stream_base64_data = *utf7_stream_base64_data;
2937
2938
0
  if( safe_utf7_stream_index >= utf7_stream_size )
2939
0
  {
2940
0
    libcerror_error_set(
2941
0
     error,
2942
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2943
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2944
0
     "%s: UTF-7 stream too small.",
2945
0
     function );
2946
2947
0
    return( -1 );
2948
0
  }
2949
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
2950
0
  {
2951
0
    base64_triplet  = safe_utf7_stream_base64_data & 0x00ffffff;
2952
0
    number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
2953
0
    current_byte    = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
2954
2955
0
    if( current_byte >= number_of_bytes )
2956
0
    {
2957
0
      if( safe_utf7_stream_index >= utf7_stream_size )
2958
0
      {
2959
0
        libcerror_error_set(
2960
0
         error,
2961
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
2962
0
         LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2963
0
         "%s: invalid UTF-7 stream character size value out of bounds.",
2964
0
         function );
2965
2966
0
        return( -1 );
2967
0
      }
2968
0
      utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
2969
2970
      /* Any character not in the modified base64 alphabet terminates the base64 encoded sequence
2971
       */
2972
0
      if( libuna_unicode_character_utf7_valid_base64_character[ utf7_character_value ] == 0 )
2973
0
      {
2974
0
        safe_utf7_stream_base64_data = 0;
2975
0
      }
2976
0
    }
2977
0
  }
2978
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
2979
0
  {
2980
0
    if( safe_utf7_stream_index >= utf7_stream_size )
2981
0
    {
2982
0
      libcerror_error_set(
2983
0
       error,
2984
0
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
2985
0
       LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2986
0
       "%s: invalid UTF-7 stream character size value out of bounds.",
2987
0
       function );
2988
2989
0
      return( -1 );
2990
0
    }
2991
0
    utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
2992
2993
    /* Determine if the character is modified base64 encoded
2994
     * or a + character
2995
     */
2996
0
    if( utf7_character_value == (uint8_t) '+' )
2997
0
    {
2998
0
      if( ( safe_utf7_stream_index + 1 ) >= utf7_stream_size )
2999
0
      {
3000
0
        libcerror_error_set(
3001
0
         error,
3002
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3003
0
         LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3004
0
         "%s: invalid UTF-7 stream character size value out of bounds.",
3005
0
         function );
3006
3007
0
        return( -1 );
3008
0
      }
3009
0
      if( utf7_stream[ safe_utf7_stream_index + 1 ] != (uint8_t) '-' )
3010
0
      {
3011
0
        safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3012
3013
0
        safe_utf7_stream_index++;
3014
0
      }
3015
0
    }
3016
    /* Allow for the end of string character
3017
     */
3018
0
    else if( utf7_character_value == 0 )
3019
0
    {
3020
0
    }
3021
0
    else if( libuna_unicode_character_utf7_valid_directly_encoded_character[ utf7_character_value ] == 0 )
3022
0
    {
3023
0
      libcerror_error_set(
3024
0
       error,
3025
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3026
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3027
0
       "%s: invalid directly encoded UTF-7 character byte: 0x%02" PRIx8 ".",
3028
0
       function,
3029
0
       utf7_character_value );
3030
3031
0
      return( -1 );
3032
0
    }
3033
0
  }
3034
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3035
0
  {
3036
0
    safe_unicode_character = utf7_stream[ safe_utf7_stream_index++ ];
3037
3038
0
    if( ( safe_unicode_character == (libuna_unicode_character_t) '+' )
3039
0
     && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3040
0
    {
3041
0
      safe_utf7_stream_index++;
3042
0
    }
3043
0
  }
3044
0
  else if( ( number_of_bytes == 0 )
3045
0
        || ( current_byte >= number_of_bytes ) )
3046
0
  {
3047
0
    if( libuna_base64_triplet_copy_from_base64_stream(
3048
0
         &base64_triplet,
3049
0
         utf7_stream,
3050
0
         utf7_stream_size - 1,
3051
0
         &safe_utf7_stream_index,
3052
0
         &padding_size,
3053
0
         LIBUNA_BASE64_VARIANT_UTF7,
3054
0
         error ) != 1 )
3055
0
    {
3056
0
      libcerror_error_set(
3057
0
       error,
3058
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
3059
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3060
0
       "%s: unable to copy base64 encoded UTF-7 characters.",
3061
0
       function );
3062
3063
0
      return( -1 );
3064
0
    }
3065
0
    if( padding_size > 2 )
3066
0
    {
3067
0
      libcerror_error_set(
3068
0
       error,
3069
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3070
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3071
0
       "%s: unsupported padding in base64 encoded UTF-7 characters.",
3072
0
       function );
3073
3074
0
      return( -1 );
3075
0
    }
3076
0
    number_of_bytes = 3 - padding_size;
3077
0
    current_byte    = 0;
3078
0
  }
3079
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3080
0
  {
3081
0
    byte_bit_shift         = 16 - ( current_byte * 8 );
3082
0
    safe_unicode_character = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3083
0
    current_byte          += 1;
3084
3085
0
    if( current_byte >= number_of_bytes )
3086
0
    {
3087
0
      if( libuna_base64_triplet_copy_from_base64_stream(
3088
0
           &base64_triplet,
3089
0
           utf7_stream,
3090
0
           utf7_stream_size - 1,
3091
0
           &safe_utf7_stream_index,
3092
0
           &padding_size,
3093
0
           LIBUNA_BASE64_VARIANT_UTF7,
3094
0
           error ) != 1 )
3095
0
      {
3096
0
        libcerror_error_set(
3097
0
         error,
3098
0
         LIBCERROR_ERROR_DOMAIN_CONVERSION,
3099
0
         LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3100
0
         "%s: unable to copy base64 encoded UTF-7 characters.",
3101
0
         function );
3102
3103
0
        return( -1 );
3104
0
      }
3105
0
      if( padding_size > 2 )
3106
0
      {
3107
0
        libcerror_error_set(
3108
0
         error,
3109
0
         LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3110
0
         LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3111
0
         "%s: unsupported padding in base64 encoded UTF-7 characters.",
3112
0
         function );
3113
3114
0
        return( -1 );
3115
0
      }
3116
0
      number_of_bytes = 3 - padding_size;
3117
0
      current_byte    = 0;
3118
0
    }
3119
0
    byte_bit_shift          = 16 - ( current_byte * 8 );
3120
0
    safe_unicode_character += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3121
0
    current_byte           += 1;
3122
3123
0
    if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3124
0
     && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
3125
0
    {
3126
0
      if( current_byte >= number_of_bytes )
3127
0
      {
3128
0
        if( libuna_base64_triplet_copy_from_base64_stream(
3129
0
             &base64_triplet,
3130
0
             utf7_stream,
3131
0
             utf7_stream_size - 1,
3132
0
             &safe_utf7_stream_index,
3133
0
             &padding_size,
3134
0
             LIBUNA_BASE64_VARIANT_UTF7,
3135
0
             error ) != 1 )
3136
0
        {
3137
0
          libcerror_error_set(
3138
0
           error,
3139
0
           LIBCERROR_ERROR_DOMAIN_CONVERSION,
3140
0
           LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3141
0
           "%s: unable to copy base64 encoded UTF-7 characters.",
3142
0
           function );
3143
3144
0
          return( -1 );
3145
0
        }
3146
0
        if( padding_size > 2 )
3147
0
        {
3148
0
          libcerror_error_set(
3149
0
           error,
3150
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3151
0
           LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3152
0
           "%s: unsupported padding in base64 encoded UTF-7 characters.",
3153
0
           function );
3154
3155
0
          return( -1 );
3156
0
        }
3157
0
        number_of_bytes = 3 - padding_size;
3158
0
        current_byte    = 0;
3159
0
      }
3160
0
      byte_bit_shift  = 16 - ( current_byte * 8 );
3161
0
      utf16_surrogate = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3162
0
      current_byte   += 1;
3163
3164
0
      if( current_byte >= number_of_bytes )
3165
0
      {
3166
0
        if( libuna_base64_triplet_copy_from_base64_stream(
3167
0
             &base64_triplet,
3168
0
             utf7_stream,
3169
0
             utf7_stream_size - 1,
3170
0
             &safe_utf7_stream_index,
3171
0
             &padding_size,
3172
0
             LIBUNA_BASE64_VARIANT_UTF7,
3173
0
             error ) != 1 )
3174
0
        {
3175
0
          libcerror_error_set(
3176
0
           error,
3177
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
3178
0
           LIBCERROR_RUNTIME_ERROR_GET_FAILED,
3179
0
           "%s: unable to retrieve base64 encoded UTF-7 characters.",
3180
0
           function );
3181
3182
0
          return( -1 );
3183
0
        }
3184
0
        if( padding_size > 2 )
3185
0
        {
3186
0
          libcerror_error_set(
3187
0
           error,
3188
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3189
0
           LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3190
0
           "%s: unsupported padding in base64 encoded UTF-7 characters.",
3191
0
           function );
3192
3193
0
          return( -1 );
3194
0
        }
3195
0
        number_of_bytes = 3 - padding_size;
3196
0
        current_byte    = 0;
3197
0
      }
3198
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
3199
0
      utf16_surrogate += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3200
0
      current_byte    += 1;
3201
3202
      /* Determine if the UTF-16 character is within the low surrogate range
3203
       */
3204
0
      if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
3205
0
       && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3206
0
      {
3207
0
        safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
3208
0
        safe_unicode_character <<= 10;
3209
0
        safe_unicode_character  += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
3210
0
        safe_unicode_character  += 0x010000;
3211
0
      }
3212
0
      else
3213
0
      {
3214
0
        libcerror_error_set(
3215
0
         error,
3216
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3217
0
         LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
3218
0
         "%s: unsupported low surrogate UTF-16 character.",
3219
0
         function );
3220
3221
0
        return( -1 );
3222
0
      }
3223
0
    }
3224
0
    if( safe_utf7_stream_index >= utf7_stream_size )
3225
0
    {
3226
0
      libcerror_error_set(
3227
0
       error,
3228
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3229
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3230
0
       "%s: UTF-7 stream too small.",
3231
0
       function );
3232
3233
0
      return( -1 );
3234
0
    }
3235
0
    if( ( current_byte >= number_of_bytes )
3236
0
     && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3237
0
    {
3238
0
      safe_utf7_stream_base64_data = 0;
3239
3240
0
      safe_utf7_stream_index++;
3241
0
    }
3242
0
  }
3243
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3244
0
  {
3245
0
    safe_utf7_stream_base64_data  = LIBUNA_UTF7_IS_BASE64_ENCODED;
3246
0
    safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3247
0
    safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3248
0
    safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3249
0
  }
3250
0
  *unicode_character       = safe_unicode_character;
3251
0
  *utf7_stream_index       = safe_utf7_stream_index;
3252
0
  *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3253
3254
0
  return( 1 );
3255
0
}
3256
3257
/* Copies an Unicode character into a UTF-7 stream
3258
 * The bits of the base64 data contain:
3259
 *   0 - 23 the base64 triplet
3260
 *  24 - 25 the number of bytes in the triplet
3261
 *  26 - 27 unused
3262
 *  28 - 29 the current byte
3263
 *       30 unused
3264
 *       31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
3265
 *
3266
 * Returns 1 if successful or -1 on error
3267
 */
3268
int libuna_unicode_character_copy_to_utf7_stream(
3269
     libuna_unicode_character_t unicode_character,
3270
     uint8_t *utf7_stream,
3271
     size_t utf7_stream_size,
3272
     size_t *utf7_stream_index,
3273
     uint32_t *utf7_stream_base64_data,
3274
     libcerror_error_t **error )
3275
0
{
3276
0
  static char *function                    = "libuna_unicode_character_copy_to_utf7_stream";
3277
0
  libuna_utf16_character_t utf16_surrogate = 0;
3278
0
  size_t safe_utf7_stream_index            = 0;
3279
0
  uint32_t base64_triplet                  = 0;
3280
0
  uint32_t safe_utf7_stream_base64_data    = 0;
3281
0
  uint8_t base64_encode_character          = 0;
3282
0
  uint8_t byte_bit_shift                   = 0;
3283
0
  uint8_t current_byte                     = 0;
3284
0
  uint8_t number_of_bytes                  = 0;
3285
3286
0
  if( utf7_stream == NULL )
3287
0
  {
3288
0
    libcerror_error_set(
3289
0
     error,
3290
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3291
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3292
0
     "%s: invalid UTF-7 stream.",
3293
0
     function );
3294
3295
0
    return( -1 );
3296
0
  }
3297
0
  if( utf7_stream_size > (size_t) SSIZE_MAX )
3298
0
  {
3299
0
    libcerror_error_set(
3300
0
     error,
3301
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3302
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3303
0
     "%s: invalid UTF-7 stream size value exceeds maximum.",
3304
0
     function );
3305
3306
0
    return( -1 );
3307
0
  }
3308
0
  if( utf7_stream_index == NULL )
3309
0
  {
3310
0
    libcerror_error_set(
3311
0
     error,
3312
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3313
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3314
0
     "%s: invalid UTF-7 stream index.",
3315
0
     function );
3316
3317
0
    return( -1 );
3318
0
  }
3319
0
  if( utf7_stream_base64_data == NULL )
3320
0
  {
3321
0
    libcerror_error_set(
3322
0
     error,
3323
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3324
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3325
0
     "%s: invalid UTF-7 stream base64 data.",
3326
0
     function );
3327
3328
0
    return( -1 );
3329
0
  }
3330
0
  safe_utf7_stream_index       = *utf7_stream_index;
3331
0
  safe_utf7_stream_base64_data = *utf7_stream_base64_data;
3332
3333
  /* Determine if the Unicode character is valid
3334
   */
3335
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3336
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3337
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
3338
0
  {
3339
0
    libcerror_error_set(
3340
0
     error,
3341
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
3342
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
3343
0
     "%s: unsupported Unicode character.",
3344
0
     function );
3345
3346
0
    return( -1 );
3347
0
  }
3348
  /* A-Z is not a continous range on an EBCDIC based system
3349
   * it consists of the ranges: A-I, J-R, S-Z
3350
   */
3351
0
  if( ( unicode_character >= 0x41 )
3352
0
   && ( unicode_character <= 0x49 ) )
3353
0
  {
3354
0
    unicode_character = ( unicode_character - 0x41 ) + (libuna_unicode_character_t) 'A';
3355
0
  }
3356
0
  else if( ( unicode_character >= 0x4a )
3357
0
        && ( unicode_character <= 0x52 ) )
3358
0
  {
3359
0
    unicode_character = ( unicode_character - 0x4a ) + (libuna_unicode_character_t) 'J';
3360
0
  }
3361
0
  else if( ( unicode_character >= 0x53 )
3362
0
        && ( unicode_character <= 0x5a ) )
3363
0
  {
3364
0
    unicode_character = ( unicode_character - 0x53 ) + (libuna_unicode_character_t) 'S';
3365
0
  }
3366
  /* a-z is not a continous range on an EBCDIC based system
3367
   * it consists of the ranges: a-i, j-r, s-z
3368
   */
3369
0
  else if( ( unicode_character >= 0x61 )
3370
0
        && ( unicode_character <= 0x69 ) )
3371
0
  {
3372
0
    unicode_character = ( unicode_character - 0x61 ) + (libuna_unicode_character_t) 'a';
3373
0
  }
3374
0
  else if( ( unicode_character >= 0x6a )
3375
0
        && ( unicode_character <= 0x72 ) )
3376
0
  {
3377
0
    unicode_character = ( unicode_character - 0x6a ) + (libuna_unicode_character_t) 'j';
3378
0
  }
3379
0
  else if( ( unicode_character >= 0x73 )
3380
0
        && ( unicode_character <= 0x7a ) )
3381
0
  {
3382
0
    unicode_character = ( unicode_character - 0x73 ) + (libuna_unicode_character_t) 's';
3383
0
  }
3384
  /* 0-9
3385
   */
3386
0
  else if( ( unicode_character >= 0x30 )
3387
0
        && ( unicode_character <= 0x39 ) )
3388
0
  {
3389
0
    unicode_character = ( unicode_character - 0x30 ) + (libuna_unicode_character_t) '0';
3390
0
  }
3391
  /* The + character must be escaped
3392
   */
3393
0
  else if( unicode_character == (libuna_unicode_character_t) '+' )
3394
0
  {
3395
0
  }
3396
  /* Allow for the end of string character
3397
   */
3398
0
  else if( unicode_character == 0 )
3399
0
  {
3400
0
  }
3401
0
  else if( ( unicode_character >= 256 )
3402
0
        || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
3403
0
  {
3404
0
    base64_encode_character = 1;
3405
0
  }
3406
0
  if( base64_encode_character == 0 )
3407
0
  {
3408
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3409
0
    {
3410
0
      safe_utf7_stream_base64_data = 0;
3411
0
    }
3412
0
    if( safe_utf7_stream_index >= utf7_stream_size )
3413
0
    {
3414
0
      libcerror_error_set(
3415
0
       error,
3416
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3417
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3418
0
       "%s: UTF-7 stream too small.",
3419
0
       function );
3420
3421
0
      return( -1 );
3422
0
    }
3423
0
    utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) unicode_character;
3424
3425
    /* The + character must be escaped
3426
     */
3427
0
    if( unicode_character == (libuna_unicode_character_t) '+' )
3428
0
    {
3429
0
      if( safe_utf7_stream_index >= utf7_stream_size )
3430
0
      {
3431
0
        libcerror_error_set(
3432
0
         error,
3433
0
         LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3434
0
         LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3435
0
         "%s: UTF-7 stream too small.",
3436
0
         function );
3437
3438
0
        return( -1 );
3439
0
      }
3440
0
      utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3441
0
    }
3442
0
  }
3443
0
  else
3444
0
  {
3445
    /* Escape the base64 encoded chracters with a +
3446
     */
3447
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3448
0
    {
3449
0
      if( safe_utf7_stream_index >= utf7_stream_size )
3450
0
      {
3451
0
        libcerror_error_set(
3452
0
         error,
3453
0
         LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3454
0
         LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3455
0
         "%s: UTF-7 stream too small.",
3456
0
         function );
3457
3458
0
        return( -1 );
3459
0
      }
3460
0
      utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '+';
3461
0
    }
3462
    /* Otherwise continue the previously base64 encoded characters
3463
     */
3464
0
    else
3465
0
    {
3466
0
      base64_triplet  = safe_utf7_stream_base64_data & 0x00ffffff;
3467
0
      number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
3468
0
      current_byte    = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
3469
3470
0
      if( number_of_bytes > 0 )
3471
0
      {
3472
        /* Correct the index for the last partial base64 stream
3473
         */
3474
0
        safe_utf7_stream_index -= number_of_bytes + 1;
3475
0
      }
3476
      /* Correct the index for the base64 stream termination character
3477
       */
3478
0
      safe_utf7_stream_index -= 1;
3479
0
    }
3480
0
    safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3481
3482
0
    if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
3483
0
    {
3484
0
      unicode_character -= 0x010000;
3485
3486
0
      utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 )
3487
0
                      + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
3488
3489
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
3490
0
      base64_triplet  += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
3491
0
      current_byte    += 1;
3492
0
      number_of_bytes += 1;
3493
3494
0
      if( number_of_bytes == 3 )
3495
0
      {
3496
0
        if( libuna_base64_triplet_copy_to_base64_stream(
3497
0
             base64_triplet,
3498
0
             utf7_stream,
3499
0
             utf7_stream_size,
3500
0
             &safe_utf7_stream_index,
3501
0
             0,
3502
0
             LIBUNA_BASE64_VARIANT_UTF7,
3503
0
             error ) != 1 )
3504
0
        {
3505
0
          libcerror_error_set(
3506
0
           error,
3507
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
3508
0
           LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3509
0
           "%s: unable to set base64 encoded UTF-7 characters.",
3510
0
           function );
3511
3512
0
          return( -1 );
3513
0
        }
3514
0
        number_of_bytes = 0;
3515
0
        current_byte    = 0;
3516
0
        base64_triplet  = 0;
3517
0
      }
3518
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
3519
0
      base64_triplet  += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
3520
0
      current_byte    += 1;
3521
0
      number_of_bytes += 1;
3522
3523
0
      if( number_of_bytes == 3 )
3524
0
      {
3525
0
        if( libuna_base64_triplet_copy_to_base64_stream(
3526
0
             base64_triplet,
3527
0
             utf7_stream,
3528
0
             utf7_stream_size,
3529
0
             &safe_utf7_stream_index,
3530
0
             0,
3531
0
             LIBUNA_BASE64_VARIANT_UTF7,
3532
0
             error ) != 1 )
3533
0
        {
3534
0
          libcerror_error_set(
3535
0
           error,
3536
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
3537
0
           LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3538
0
           "%s: unable to set base64 encoded UTF-7 characters.",
3539
0
           function );
3540
3541
0
          return( -1 );
3542
0
        }
3543
0
        number_of_bytes = 0;
3544
0
        current_byte    = 0;
3545
0
        base64_triplet  = 0;
3546
0
      }
3547
0
      unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff )
3548
0
                        + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
3549
0
    }
3550
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
3551
0
    base64_triplet  += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
3552
0
    current_byte    += 1;
3553
0
    number_of_bytes += 1;
3554
3555
0
    if( number_of_bytes == 3 )
3556
0
    {
3557
0
      if( libuna_base64_triplet_copy_to_base64_stream(
3558
0
           base64_triplet,
3559
0
           utf7_stream,
3560
0
           utf7_stream_size,
3561
0
           &safe_utf7_stream_index,
3562
0
           0,
3563
0
           LIBUNA_BASE64_VARIANT_UTF7,
3564
0
           error ) != 1 )
3565
0
      {
3566
0
        libcerror_error_set(
3567
0
         error,
3568
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3569
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3570
0
         "%s: unable to set base64 encoded UTF-7 characters.",
3571
0
         function );
3572
3573
0
        return( -1 );
3574
0
      }
3575
0
      number_of_bytes = 0;
3576
0
      current_byte    = 0;
3577
0
      base64_triplet  = 0;
3578
0
    }
3579
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
3580
0
    base64_triplet  += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
3581
0
    current_byte    += 1;
3582
0
    number_of_bytes += 1;
3583
3584
0
    if( number_of_bytes == 3 )
3585
0
    {
3586
0
      if( libuna_base64_triplet_copy_to_base64_stream(
3587
0
           base64_triplet,
3588
0
           utf7_stream,
3589
0
           utf7_stream_size,
3590
0
           &safe_utf7_stream_index,
3591
0
           0,
3592
0
           LIBUNA_BASE64_VARIANT_UTF7,
3593
0
           error ) != 1 )
3594
0
      {
3595
0
        libcerror_error_set(
3596
0
         error,
3597
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3598
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3599
0
         "%s: unable to set base64 encoded UTF-7 characters.",
3600
0
         function );
3601
3602
0
        return( -1 );
3603
0
      }
3604
0
      number_of_bytes = 0;
3605
0
      current_byte    = 0;
3606
0
      base64_triplet  = 0;
3607
0
    }
3608
    /* Terminate the base64 encoded characters
3609
     */
3610
0
    if( number_of_bytes > 0 )
3611
0
    {
3612
0
      if( libuna_base64_triplet_copy_to_base64_stream(
3613
0
           base64_triplet,
3614
0
           utf7_stream,
3615
0
           utf7_stream_size,
3616
0
           &safe_utf7_stream_index,
3617
0
           3 - number_of_bytes,
3618
0
           LIBUNA_BASE64_VARIANT_UTF7,
3619
0
           error ) != 1 )
3620
0
      {
3621
0
        libcerror_error_set(
3622
0
         error,
3623
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3624
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3625
0
         "%s: unable to set base64 encoded UTF-7 characters.",
3626
0
         function );
3627
3628
0
        return( -1 );
3629
0
      }
3630
0
    }
3631
0
    if( safe_utf7_stream_index >= utf7_stream_size )
3632
0
    {
3633
0
      libcerror_error_set(
3634
0
       error,
3635
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3636
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3637
0
       "%s: UTF-7 stream too small.",
3638
0
       function );
3639
3640
0
      return( -1 );
3641
0
    }
3642
0
    utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3643
0
  }
3644
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3645
0
  {
3646
0
    safe_utf7_stream_base64_data  = LIBUNA_UTF7_IS_BASE64_ENCODED;
3647
0
    safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3648
0
    safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3649
0
    safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3650
0
  }
3651
0
  *utf7_stream_index       = safe_utf7_stream_index;
3652
0
  *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3653
3654
0
  return( 1 );
3655
0
}
3656
3657
/* Determines the size of an UTF-8 character from an Unicode character
3658
 * This function supports upto U+10FFFF (4 byte UTF-8 characters)
3659
 * Adds the size to the UTF-8 character size value
3660
 * Returns 1 if successful or -1 on error
3661
 */
3662
int libuna_unicode_character_size_to_utf8(
3663
     libuna_unicode_character_t unicode_character,
3664
     size_t *utf8_character_size,
3665
     libcerror_error_t **error )
3666
4.12M
{
3667
4.12M
  static char *function           = "libuna_unicode_character_size_to_utf8";
3668
4.12M
  size_t safe_utf8_character_size = 0;
3669
3670
4.12M
  if( utf8_character_size == NULL )
3671
0
  {
3672
0
    libcerror_error_set(
3673
0
     error,
3674
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3675
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3676
0
     "%s: invalid UTF-8 character size.",
3677
0
     function );
3678
3679
0
    return( -1 );
3680
0
  }
3681
  /* Determine if the Unicode character is valid
3682
   */
3683
4.12M
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3684
4.12M
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3685
4.12M
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
3686
0
  {
3687
0
    libcerror_error_set(
3688
0
     error,
3689
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
3690
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
3691
0
     "%s: unsupported Unicode character.",
3692
0
     function );
3693
3694
0
    return( -1 );
3695
0
  }
3696
  /* RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
3697
   * while its predecessor RFC 2279 allowed up to 6 bytes
3698
   */
3699
4.12M
  if( unicode_character < 0x00000080UL )
3700
1.12M
  {
3701
1.12M
    safe_utf8_character_size += 1;
3702
1.12M
  }
3703
3.00M
  else if( unicode_character < 0x00000800UL )
3704
211k
  {
3705
211k
    safe_utf8_character_size += 2;
3706
211k
  }
3707
2.79M
  else if( unicode_character < 0x00010000UL )
3708
2.77M
  {
3709
2.77M
    safe_utf8_character_size += 3;
3710
2.77M
  }
3711
19.6k
  else
3712
19.6k
  {
3713
19.6k
    safe_utf8_character_size += 4;
3714
19.6k
  }
3715
4.12M
  *utf8_character_size += safe_utf8_character_size;
3716
3717
4.12M
  return( 1 );
3718
4.12M
}
3719
3720
/* Copies an Unicode character from an UTF-8 string
3721
 * This function supports upto U+10FFFF (4 byte UTF-8 characters)
3722
 * Returns 1 if successful or -1 on error
3723
 */
3724
int libuna_unicode_character_copy_from_utf8(
3725
     libuna_unicode_character_t *unicode_character,
3726
     const libuna_utf8_character_t *utf8_string,
3727
     size_t utf8_string_size,
3728
     size_t *utf8_string_index,
3729
     libcerror_error_t **error )
3730
605k
{
3731
605k
  static char *function                             = "libuna_unicode_character_copy_from_utf8";
3732
605k
  libuna_unicode_character_t safe_unicode_character = 0;
3733
605k
  size_t safe_utf8_string_index                     = 0;
3734
605k
  uint8_t byte_value1                               = 0;
3735
605k
  uint8_t byte_value2                               = 0;
3736
605k
  uint8_t byte_value3                               = 0;
3737
605k
  uint8_t byte_value4                               = 0;
3738
605k
  uint8_t utf8_character_additional_bytes           = 0;
3739
605k
  int result                                        = 0;
3740
3741
605k
  if( unicode_character == NULL )
3742
0
  {
3743
0
    libcerror_error_set(
3744
0
     error,
3745
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3746
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3747
0
     "%s: invalid Unicode character.",
3748
0
     function );
3749
3750
0
    return( -1 );
3751
0
  }
3752
605k
  if( utf8_string == NULL )
3753
0
  {
3754
0
    libcerror_error_set(
3755
0
     error,
3756
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3757
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3758
0
     "%s: invalid UTF-8 string.",
3759
0
     function );
3760
3761
0
    return( -1 );
3762
0
  }
3763
605k
  if( utf8_string_size > (size_t) SSIZE_MAX )
3764
0
  {
3765
0
    libcerror_error_set(
3766
0
     error,
3767
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3768
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3769
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
3770
0
     function );
3771
3772
0
    return( -1 );
3773
0
  }
3774
605k
  if( utf8_string_index == NULL )
3775
0
  {
3776
0
    libcerror_error_set(
3777
0
     error,
3778
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3779
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3780
0
     "%s: invalid UTF-8 string index.",
3781
0
     function );
3782
3783
0
    return( -1 );
3784
0
  }
3785
605k
  safe_utf8_string_index = *utf8_string_index;
3786
3787
605k
  if( safe_utf8_string_index >= utf8_string_size )
3788
0
  {
3789
0
    libcerror_error_set(
3790
0
     error,
3791
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3792
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3793
0
     "%s: UTF-8 string too small.",
3794
0
     function );
3795
3796
0
    return( -1 );
3797
0
  }
3798
  /* Determine the number of additional bytes of the UTF-8 character
3799
   */
3800
605k
  byte_value1 = utf8_string[ safe_utf8_string_index ];
3801
3802
  /* Determine the UTF-8 character and make sure it is valid
3803
   * RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
3804
   * while its predecessor RFC 2279 allowed up to 6 bytes
3805
   */
3806
605k
  if( byte_value1 > 0xf4 )
3807
697
  {
3808
697
    libcerror_error_set(
3809
697
     error,
3810
697
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3811
697
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3812
697
     "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
3813
697
     function,
3814
697
     byte_value1 );
3815
3816
697
    return( -1 );
3817
697
  }
3818
605k
  if( byte_value1 < 0xc0 )
3819
580k
  {
3820
580k
    utf8_character_additional_bytes = 0;
3821
580k
  }
3822
24.2k
  else if( byte_value1 < 0xe0 )
3823
5.57k
  {
3824
5.57k
    utf8_character_additional_bytes = 1;
3825
5.57k
  }
3826
18.6k
  else if( byte_value1 < 0xf0 )
3827
13.1k
  {
3828
13.1k
    utf8_character_additional_bytes = 2;
3829
13.1k
  }
3830
5.50k
  else
3831
5.50k
  {
3832
5.50k
    utf8_character_additional_bytes = 3;
3833
5.50k
  }
3834
605k
  if( ( ( (size_t) utf8_character_additional_bytes + 1 ) > utf8_string_size )
3835
605k
   || ( safe_utf8_string_index > ( utf8_string_size - ( utf8_character_additional_bytes + 1 ) ) ) )
3836
174
  {
3837
174
    libcerror_error_set(
3838
174
     error,
3839
174
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3840
174
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3841
174
     "%s: missing UTF-8 character bytes.",
3842
174
     function );
3843
3844
174
    return( -1 );
3845
174
  }
3846
604k
  safe_unicode_character = byte_value1;
3847
3848
604k
  if( utf8_character_additional_bytes == 0 )
3849
580k
  {
3850
580k
    if( byte_value1 >= 0x80 )
3851
600
    {
3852
600
      libcerror_error_set(
3853
600
       error,
3854
600
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3855
600
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3856
600
       "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
3857
600
       function,
3858
600
       byte_value1 );
3859
3860
600
      return( -1 );
3861
600
    }
3862
580k
  }
3863
604k
  if( utf8_character_additional_bytes >= 1 )
3864
24.0k
  {
3865
24.0k
    byte_value2 = utf8_string[ safe_utf8_string_index + 1 ];
3866
3867
24.0k
    if( ( byte_value2 < 0x80 )
3868
24.0k
     || ( byte_value2 > 0xbf ) )
3869
779
    {
3870
779
      libcerror_error_set(
3871
779
       error,
3872
779
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3873
779
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3874
779
       "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
3875
779
       function,
3876
779
       byte_value2 );
3877
3878
779
      return( -1 );
3879
779
    }
3880
23.2k
    result = 1;
3881
3882
23.2k
    switch( byte_value1 )
3883
23.2k
    {
3884
2.33k
      case 0xe0:
3885
2.33k
        if( ( byte_value2 < 0xa0 )
3886
2.33k
         || ( byte_value2 > 0xbf ) )
3887
84
        {
3888
84
          result = 0;
3889
84
        }
3890
2.33k
        break;
3891
3892
2.91k
      case 0xed:
3893
2.91k
        if( ( byte_value2 < 0x80 )
3894
2.91k
         || ( byte_value2 > 0x9f ) )
3895
50
        {
3896
50
          result = 0;
3897
50
        }
3898
2.91k
        break;
3899
3900
2.90k
      case 0xf0:
3901
2.90k
        if( ( byte_value2 < 0x90 )
3902
2.90k
         || ( byte_value2 > 0xbf ) )
3903
82
        {
3904
82
          result = 0;
3905
82
        }
3906
2.90k
        break;
3907
3908
1.30k
      case 0xf4:
3909
1.30k
        if( ( byte_value2 < 0x80 )
3910
1.30k
         || ( byte_value2 > 0xbf ) )
3911
0
        {
3912
0
          result = 0;
3913
0
        }
3914
1.30k
        break;
3915
3916
13.8k
      default:
3917
13.8k
        break;
3918
23.2k
    }
3919
23.2k
    if( result == 0 )
3920
216
    {
3921
216
      libcerror_error_set(
3922
216
       error,
3923
216
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3924
216
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3925
216
       "%s: invalid 1st and 2nd UTF-8 character byte pair: 0x%02" PRIx8 " 0x%02" PRIx8 ".",
3926
216
       function,
3927
216
       byte_value1,
3928
216
       byte_value2 );
3929
3930
216
      return( -1 );
3931
216
    }
3932
23.0k
    safe_unicode_character <<= 6;
3933
23.0k
    safe_unicode_character += byte_value2;
3934
3935
23.0k
    if( utf8_character_additional_bytes == 1 )
3936
5.21k
    {
3937
5.21k
      safe_unicode_character -= 0x03080;
3938
5.21k
    }
3939
23.0k
  }
3940
603k
  if( utf8_character_additional_bytes >= 2 )
3941
17.8k
  {
3942
17.8k
    byte_value3 = utf8_string[ safe_utf8_string_index + 2 ];
3943
3944
17.8k
    if( ( byte_value3 < 0x80 )
3945
17.8k
     || ( byte_value3 > 0xbf ) )
3946
330
    {
3947
330
      libcerror_error_set(
3948
330
       error,
3949
330
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3950
330
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3951
330
       "%s: invalid 3rd UTF-8 character byte: 0x%02" PRIx8 ".",
3952
330
       function,
3953
330
       byte_value3 );
3954
3955
330
      return( -1 );
3956
330
    }
3957
17.5k
    result = 1;
3958
3959
17.5k
    switch( byte_value2 )
3960
17.5k
    {
3961
0
      case 0xe0:
3962
0
        if( ( byte_value2 < 0xa0 )
3963
0
         || ( byte_value2 > 0xbf ) )
3964
0
        {
3965
0
          result = 0;
3966
0
        }
3967
0
        break;
3968
3969
0
      case 0xed:
3970
0
        if( ( byte_value2 < 0x80 )
3971
0
         || ( byte_value2 > 0x9f ) )
3972
0
        {
3973
0
          result = 0;
3974
0
        }
3975
0
        break;
3976
3977
17.5k
      default:
3978
17.5k
        break;
3979
17.5k
    }
3980
17.5k
    if( result == 0 )
3981
0
    {
3982
0
      libcerror_error_set(
3983
0
       error,
3984
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3985
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3986
0
       "%s: invalid 2nd and 3rd UTF-8 character byte pair: 0x%02" PRIx8 " 0x%02" PRIx8 ".",
3987
0
       function,
3988
0
       byte_value2,
3989
0
       byte_value3 );
3990
3991
0
      return( -1 );
3992
0
    }
3993
17.5k
    safe_unicode_character <<= 6;
3994
17.5k
    safe_unicode_character += byte_value3;
3995
3996
17.5k
    if( utf8_character_additional_bytes == 2 )
3997
12.3k
    {
3998
12.3k
      safe_unicode_character -= 0x0e2080;
3999
12.3k
    }
4000
17.5k
  }
4001
602k
  if( utf8_character_additional_bytes >= 3 )
4002
5.18k
  {
4003
5.18k
    byte_value4 = utf8_string[ safe_utf8_string_index + 3 ];
4004
4005
5.18k
    if( ( byte_value4 < 0x80 )
4006
5.18k
     || ( byte_value4 > 0xbf ) )
4007
168
    {
4008
168
      libcerror_error_set(
4009
168
       error,
4010
168
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4011
168
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4012
168
       "%s: invalid 4th UTF-8 character byte: 0x%02" PRIx8 ".",
4013
168
       function,
4014
168
       byte_value4 );
4015
4016
168
      return( -1 );
4017
168
    }
4018
5.02k
    safe_unicode_character <<= 6;
4019
5.02k
    safe_unicode_character += byte_value4;
4020
4021
5.02k
    if( utf8_character_additional_bytes == 3 )
4022
5.02k
    {
4023
5.02k
      safe_unicode_character -= 0x03c82080;
4024
5.02k
    }
4025
5.02k
  }
4026
  /* Determine if the Unicode character is valid
4027
   */
4028
602k
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4029
602k
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4030
602k
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4031
56
  {
4032
56
    libcerror_error_set(
4033
56
     error,
4034
56
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4035
56
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4036
56
     "%s: unsupported Unicode character.",
4037
56
     function );
4038
4039
56
    return( -1 );
4040
56
  }
4041
602k
  *unicode_character = safe_unicode_character;
4042
602k
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4043
4044
602k
  return( 1 );
4045
602k
}
4046
4047
/* Copies an Unicode character into a UTF-8 string
4048
 * This function supports upto U+10FFFF (4 byte UTF-8 characters)
4049
 * Returns 1 if successful or -1 on error
4050
 */
4051
int libuna_unicode_character_copy_to_utf8(
4052
     libuna_unicode_character_t unicode_character,
4053
     libuna_utf8_character_t *utf8_string,
4054
     size_t utf8_string_size,
4055
     size_t *utf8_string_index,
4056
     libcerror_error_t **error )
4057
3.21M
{
4058
3.21M
  static char *function                   = "libuna_unicode_character_copy_to_utf8";
4059
3.21M
  size_t safe_utf8_string_index           = 0;
4060
3.21M
  size_t utf8_character_iterator          = 0;
4061
3.21M
  uint8_t utf8_character_additional_bytes = 0;
4062
3.21M
  uint8_t utf8_first_character_mark       = 0;
4063
4064
3.21M
  if( utf8_string == NULL )
4065
0
  {
4066
0
    libcerror_error_set(
4067
0
     error,
4068
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4069
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4070
0
     "%s: invalid UTF-8 string.",
4071
0
     function );
4072
4073
0
    return( -1 );
4074
0
  }
4075
3.21M
  if( utf8_string_size > (size_t) SSIZE_MAX )
4076
0
  {
4077
0
    libcerror_error_set(
4078
0
     error,
4079
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4080
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4081
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
4082
0
     function );
4083
4084
0
    return( -1 );
4085
0
  }
4086
3.21M
  if( utf8_string_index == NULL )
4087
0
  {
4088
0
    libcerror_error_set(
4089
0
     error,
4090
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4091
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4092
0
     "%s: invalid UTF-8 string index.",
4093
0
     function );
4094
4095
0
    return( -1 );
4096
0
  }
4097
3.21M
  safe_utf8_string_index = *utf8_string_index;
4098
4099
3.21M
  if( safe_utf8_string_index >= utf8_string_size )
4100
325
  {
4101
325
    libcerror_error_set(
4102
325
     error,
4103
325
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4104
325
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4105
325
     "%s: UTF-8 string too small.",
4106
325
     function );
4107
4108
325
    return( -1 );
4109
325
  }
4110
  /* Determine if the Unicode character is valid
4111
   */
4112
3.21M
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4113
3.21M
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4114
3.21M
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4115
0
  {
4116
0
    libcerror_error_set(
4117
0
     error,
4118
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4119
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4120
0
     "%s: unsupported Unicode character.",
4121
0
     function );
4122
4123
0
    return( -1 );
4124
0
  }
4125
  /* Determine how many UTF-8 character bytes are required
4126
   */
4127
3.21M
  if( unicode_character < 0x080 )
4128
733k
  {
4129
733k
    utf8_character_additional_bytes = 0;
4130
733k
    utf8_first_character_mark       = 0;
4131
733k
  }
4132
2.48M
  else if( unicode_character < 0x0800 )
4133
108k
  {
4134
108k
    utf8_character_additional_bytes = 1;
4135
108k
    utf8_first_character_mark       = 0x0c0;
4136
108k
  }
4137
2.37M
  else if( unicode_character < 0x010000 )
4138
2.36M
  {
4139
2.36M
    utf8_character_additional_bytes = 2;
4140
2.36M
    utf8_first_character_mark       = 0x0e0;
4141
2.36M
  }
4142
3.81k
  else
4143
3.81k
  {
4144
3.81k
    utf8_character_additional_bytes = 3;
4145
3.81k
    utf8_first_character_mark       = 0x0f0;
4146
3.81k
  }
4147
  /* Convert Unicode character into UTF-8 character bytes
4148
   */
4149
3.21M
  if( ( utf8_character_additional_bytes > utf8_string_size )
4150
3.21M
   || ( safe_utf8_string_index >= ( utf8_string_size - utf8_character_additional_bytes ) ) )
4151
476
  {
4152
476
    libcerror_error_set(
4153
476
     error,
4154
476
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4155
476
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4156
476
     "%s: UTF-8 string too small.",
4157
476
     function );
4158
4159
476
    return( -1 );
4160
476
  }
4161
3.21M
  for( utf8_character_iterator = safe_utf8_string_index + utf8_character_additional_bytes;
4162
8.07M
       utf8_character_iterator > safe_utf8_string_index;
4163
4.85M
       utf8_character_iterator-- )
4164
4.85M
  {
4165
4.85M
    utf8_string[ utf8_character_iterator ] = (libuna_utf8_character_t) ( ( unicode_character & 0x0bf ) | 0x080 );
4166
4167
4.85M
    unicode_character >>= 6;
4168
4.85M
  }
4169
3.21M
  utf8_string[ safe_utf8_string_index ] = (libuna_utf8_character_t) ( unicode_character | utf8_first_character_mark );
4170
4171
3.21M
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4172
4173
3.21M
  return( 1 );
4174
3.21M
}
4175
4176
/* Determines the size of an UTF-8 character from an Unicode character
4177
 * This function supports upto U+7FFFFFF (6 byte UTF-8 characters)
4178
 * Adds the size to the UTF-8 character size value
4179
 * Returns 1 if successful or -1 on error
4180
 */
4181
int libuna_unicode_character_size_to_utf8_rfc2279(
4182
     libuna_unicode_character_t unicode_character,
4183
     size_t *utf8_character_size,
4184
     libcerror_error_t **error )
4185
4.13k
{
4186
4.13k
  static char *function           = "libuna_unicode_character_size_to_utf8_rfc2279";
4187
4.13k
  size_t safe_utf8_character_size = 0;
4188
4189
4.13k
  if( utf8_character_size == NULL )
4190
0
  {
4191
0
    libcerror_error_set(
4192
0
     error,
4193
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4194
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4195
0
     "%s: invalid UTF-8 character size.",
4196
0
     function );
4197
4198
0
    return( -1 );
4199
0
  }
4200
4.13k
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
4201
0
  {
4202
0
    libcerror_error_set(
4203
0
     error,
4204
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4205
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4206
0
     "%s: unsupported Unicode character.",
4207
0
     function );
4208
4209
0
    return( -1 );
4210
0
  }
4211
  /* RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
4212
   * while its predecessor RFC 2279 allowed up to 6 bytes
4213
   */
4214
4.13k
  if( unicode_character < 0x00000080UL )
4215
293
  {
4216
293
    safe_utf8_character_size += 1;
4217
293
  }
4218
3.83k
  else if( unicode_character < 0x00000800UL )
4219
333
  {
4220
333
    safe_utf8_character_size += 2;
4221
333
  }
4222
3.50k
  else if( unicode_character < 0x00010000UL )
4223
3.39k
  {
4224
3.39k
    safe_utf8_character_size += 3;
4225
3.39k
  }
4226
111
  else if( unicode_character < 0x00200000UL )
4227
111
  {
4228
111
    safe_utf8_character_size += 4;
4229
111
  }
4230
0
  else if( unicode_character < 0x04000000UL )
4231
0
  {
4232
0
    safe_utf8_character_size += 5;
4233
0
  }
4234
0
  else
4235
0
  {
4236
0
    safe_utf8_character_size += 6;
4237
0
  }
4238
4.13k
  *utf8_character_size += safe_utf8_character_size;
4239
4240
4.13k
  return( 1 );
4241
4.13k
}
4242
4243
/* Copies an Unicode character from an UTF-8 string
4244
 * This function supports upto U+7FFFFFF (6 byte UTF-8 characters)
4245
 * Returns 1 if successful or -1 on error
4246
 */
4247
int libuna_unicode_character_copy_from_utf8_rfc2279(
4248
     libuna_unicode_character_t *unicode_character,
4249
     const libuna_utf8_character_t *utf8_string,
4250
     size_t utf8_string_size,
4251
     size_t *utf8_string_index,
4252
     libcerror_error_t **error )
4253
83.9k
{
4254
83.9k
  static char *function                             = "libuna_unicode_character_copy_from_utf8_rfc2279";
4255
83.9k
  libuna_unicode_character_t safe_unicode_character = 0;
4256
83.9k
  size_t safe_utf8_string_index                     = 0;
4257
83.9k
  uint8_t byte_value1                               = 0;
4258
83.9k
  uint8_t byte_value2                               = 0;
4259
83.9k
  uint8_t byte_value3                               = 0;
4260
83.9k
  uint8_t byte_value4                               = 0;
4261
83.9k
  uint8_t byte_value5                               = 0;
4262
83.9k
  uint8_t byte_value6                               = 0;
4263
83.9k
  uint8_t utf8_character_additional_bytes           = 0;
4264
4265
83.9k
  if( unicode_character == NULL )
4266
0
  {
4267
0
    libcerror_error_set(
4268
0
     error,
4269
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4270
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4271
0
     "%s: invalid Unicode character.",
4272
0
     function );
4273
4274
0
    return( -1 );
4275
0
  }
4276
83.9k
  if( utf8_string == NULL )
4277
0
  {
4278
0
    libcerror_error_set(
4279
0
     error,
4280
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4281
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4282
0
     "%s: invalid UTF-8 string.",
4283
0
     function );
4284
4285
0
    return( -1 );
4286
0
  }
4287
83.9k
  if( utf8_string_size > (size_t) SSIZE_MAX )
4288
0
  {
4289
0
    libcerror_error_set(
4290
0
     error,
4291
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4292
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4293
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
4294
0
     function );
4295
4296
0
    return( -1 );
4297
0
  }
4298
83.9k
  if( utf8_string_index == NULL )
4299
0
  {
4300
0
    libcerror_error_set(
4301
0
     error,
4302
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4303
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4304
0
     "%s: invalid UTF-8 string index.",
4305
0
     function );
4306
4307
0
    return( -1 );
4308
0
  }
4309
83.9k
  safe_utf8_string_index = *utf8_string_index;
4310
4311
83.9k
  if( safe_utf8_string_index >= utf8_string_size )
4312
0
  {
4313
0
    libcerror_error_set(
4314
0
     error,
4315
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4316
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4317
0
     "%s: UTF-8 string too small.",
4318
0
     function );
4319
4320
0
    return( -1 );
4321
0
  }
4322
  /* Determine the number of additional bytes of the UTF-8 character
4323
   */
4324
83.9k
  byte_value1 = utf8_string[ safe_utf8_string_index ];
4325
4326
  /* Determine the UTF-8 character and make sure it is valid
4327
   * RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
4328
   * while its predecessor RFC 2279 allowed up to 6 bytes
4329
   */
4330
83.9k
  if( byte_value1 > 0xfd )
4331
0
  {
4332
0
    libcerror_error_set(
4333
0
     error,
4334
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4335
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4336
0
     "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4337
0
     function,
4338
0
     byte_value1 );
4339
4340
0
    return( -1 );
4341
0
  }
4342
83.9k
  if( byte_value1 < 0xc0 )
4343
50.3k
  {
4344
50.3k
    utf8_character_additional_bytes = 0;
4345
50.3k
  }
4346
33.5k
  else if( byte_value1 < 0xe0 )
4347
8.76k
  {
4348
8.76k
    utf8_character_additional_bytes = 1;
4349
8.76k
  }
4350
24.7k
  else if( byte_value1 < 0xf0 )
4351
19.8k
  {
4352
19.8k
    utf8_character_additional_bytes = 2;
4353
19.8k
  }
4354
4.93k
  else if( byte_value1 < 0xf8 )
4355
4.93k
  {
4356
4.93k
    utf8_character_additional_bytes = 3;
4357
4.93k
  }
4358
0
  else if( byte_value1 < 0xfc )
4359
0
  {
4360
0
    utf8_character_additional_bytes = 4;
4361
0
  }
4362
0
  else
4363
0
  {
4364
0
    utf8_character_additional_bytes = 5;
4365
0
  }
4366
83.9k
  if( ( ( (size_t) utf8_character_additional_bytes + 1 ) > utf8_string_size )
4367
83.9k
   || ( safe_utf8_string_index > ( utf8_string_size - ( utf8_character_additional_bytes + 1 ) ) ) )
4368
0
  {
4369
0
    libcerror_error_set(
4370
0
     error,
4371
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4372
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4373
0
     "%s: missing UTF-8 character bytes.",
4374
0
     function );
4375
4376
0
    return( -1 );
4377
0
  }
4378
  /* Determine the UTF-8 character and make sure it is valid
4379
   * RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
4380
   * while its predecessor RFC 2279 allowed up to 6 bytes
4381
   */
4382
83.9k
  safe_unicode_character = byte_value1;
4383
4384
83.9k
  if( utf8_character_additional_bytes == 0 )
4385
50.3k
  {
4386
50.3k
    if( byte_value1 >= 0x80 )
4387
0
    {
4388
0
      libcerror_error_set(
4389
0
       error,
4390
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4391
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4392
0
       "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4393
0
       function,
4394
0
       byte_value1 );
4395
4396
0
      return( -1 );
4397
0
    }
4398
50.3k
  }
4399
83.9k
  if( utf8_character_additional_bytes >= 1 )
4400
33.5k
  {
4401
33.5k
    byte_value2 = utf8_string[ safe_utf8_string_index + 1 ];
4402
4403
33.5k
    if( ( byte_value2 < 0x80 )
4404
33.5k
     || ( byte_value2 > 0xbf ) )
4405
0
    {
4406
0
      libcerror_error_set(
4407
0
       error,
4408
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4409
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4410
0
       "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4411
0
       function,
4412
0
       byte_value2 );
4413
4414
0
      return( -1 );
4415
0
    }
4416
33.5k
    safe_unicode_character <<= 6;
4417
33.5k
    safe_unicode_character += byte_value2;
4418
4419
33.5k
    if( utf8_character_additional_bytes == 1 )
4420
8.76k
    {
4421
8.76k
      safe_unicode_character -= 0x03080;
4422
8.76k
    }
4423
33.5k
  }
4424
83.9k
  if( utf8_character_additional_bytes >= 2 )
4425
24.7k
  {
4426
24.7k
    byte_value3 = utf8_string[ safe_utf8_string_index + 2 ];
4427
4428
24.7k
    if( ( byte_value3 < 0x80 )
4429
24.7k
     || ( byte_value3 > 0xbf ) )
4430
0
    {
4431
0
      libcerror_error_set(
4432
0
       error,
4433
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4434
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4435
0
       "%s: invalid 3rd UTF-8 character byte: 0x%02" PRIx8 ".",
4436
0
       function,
4437
0
       byte_value3 );
4438
4439
0
      return( -1 );
4440
0
    }
4441
24.7k
    safe_unicode_character <<= 6;
4442
24.7k
    safe_unicode_character += byte_value3;
4443
4444
24.7k
    if( utf8_character_additional_bytes == 2 )
4445
19.8k
    {
4446
19.8k
      safe_unicode_character -= 0x0e2080;
4447
19.8k
    }
4448
24.7k
  }
4449
83.9k
  if( utf8_character_additional_bytes >= 3 )
4450
4.93k
  {
4451
4.93k
    byte_value4 = utf8_string[ safe_utf8_string_index + 3 ];
4452
4453
4.93k
    if( ( byte_value4 < 0x80 )
4454
4.93k
     || ( byte_value4 > 0xbf ) )
4455
0
    {
4456
0
      libcerror_error_set(
4457
0
       error,
4458
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4459
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4460
0
       "%s: invalid 4th UTF-8 character byte: 0x%02" PRIx8 ".",
4461
0
       function,
4462
0
       byte_value4 );
4463
4464
0
      return( -1 );
4465
0
    }
4466
4.93k
    safe_unicode_character <<= 6;
4467
4.93k
    safe_unicode_character += byte_value4;
4468
4469
4.93k
    if( utf8_character_additional_bytes == 3 )
4470
4.93k
    {
4471
4.93k
      safe_unicode_character -= 0x03c82080;
4472
4.93k
    }
4473
4.93k
  }
4474
83.9k
  if( utf8_character_additional_bytes >= 4 )
4475
0
  {
4476
0
    byte_value5 = utf8_string[ safe_utf8_string_index + 4 ];
4477
4478
0
    if( ( byte_value5 < 0x80 )
4479
0
     || ( byte_value5 > 0xbf ) )
4480
0
    {
4481
0
      libcerror_error_set(
4482
0
       error,
4483
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4484
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4485
0
       "%s: invalid 5th UTF-8 character byte: 0x%02" PRIx8 ".",
4486
0
       function,
4487
0
       byte_value5 );
4488
4489
0
      return( -1 );
4490
0
    }
4491
0
    safe_unicode_character <<= 6;
4492
0
    safe_unicode_character += byte_value5;
4493
4494
0
    if( utf8_character_additional_bytes == 4 )
4495
0
    {
4496
0
      safe_unicode_character -= 0x0fa082080;
4497
0
    }
4498
0
  }
4499
83.9k
  if( utf8_character_additional_bytes == 5 )
4500
0
  {
4501
0
    byte_value6 = utf8_string[ safe_utf8_string_index + 5 ];
4502
4503
0
    if( ( byte_value6 < 0x80 )
4504
0
      || ( byte_value6 > 0xbf ) )
4505
0
    {
4506
0
      libcerror_error_set(
4507
0
       error,
4508
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4509
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4510
0
       "%s: invalid 6th UTF-8 character byte: 0x%02" PRIx8 ".",
4511
0
       function,
4512
0
       byte_value6 );
4513
4514
0
      return( -1 );
4515
0
    }
4516
0
    safe_unicode_character <<= 6;
4517
0
    safe_unicode_character += byte_value6;
4518
0
    safe_unicode_character -= 0x082082080;
4519
0
  }
4520
  /* Determine if the Unicode character is valid
4521
   */
4522
83.9k
  if( safe_unicode_character > LIBUNA_UCS_CHARACTER_MAX )
4523
0
  {
4524
0
    libcerror_error_set(
4525
0
     error,
4526
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4527
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4528
0
     "%s: unsupported Unicode character.",
4529
0
     function );
4530
4531
0
    return( -1 );
4532
0
  }
4533
83.9k
  *unicode_character = safe_unicode_character;
4534
83.9k
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4535
4536
83.9k
  return( 1 );
4537
83.9k
}
4538
4539
/* Copies an Unicode character into a UTF-8 string
4540
 * This function supports upto U+7FFFFFF (6 byte UTF-8 characters)
4541
 * Returns 1 if successful or -1 on error
4542
 */
4543
int libuna_unicode_character_copy_to_utf8_rfc2279(
4544
     libuna_unicode_character_t unicode_character,
4545
     libuna_utf8_character_t *utf8_string,
4546
     size_t utf8_string_size,
4547
     size_t *utf8_string_index,
4548
     libcerror_error_t **error )
4549
4.13k
{
4550
4.13k
  static char *function                   = "libuna_unicode_character_copy_to_utf8_rfc2279";
4551
4.13k
  size_t safe_utf8_string_index           = 0;
4552
4.13k
  size_t utf8_character_iterator          = 0;
4553
4.13k
  uint8_t utf8_character_additional_bytes = 0;
4554
4.13k
  uint8_t utf8_first_character_mark       = 0;
4555
4556
4.13k
  if( utf8_string == NULL )
4557
0
  {
4558
0
    libcerror_error_set(
4559
0
     error,
4560
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4561
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4562
0
     "%s: invalid UTF-8 string.",
4563
0
     function );
4564
4565
0
    return( -1 );
4566
0
  }
4567
4.13k
  if( utf8_string_size > (size_t) SSIZE_MAX )
4568
0
  {
4569
0
    libcerror_error_set(
4570
0
     error,
4571
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4572
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4573
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
4574
0
     function );
4575
4576
0
    return( -1 );
4577
0
  }
4578
4.13k
  if( utf8_string_index == NULL )
4579
0
  {
4580
0
    libcerror_error_set(
4581
0
     error,
4582
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4583
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4584
0
     "%s: invalid UTF-8 string index.",
4585
0
     function );
4586
4587
0
    return( -1 );
4588
0
  }
4589
4.13k
  safe_utf8_string_index = *utf8_string_index;
4590
4591
4.13k
  if( safe_utf8_string_index >= utf8_string_size )
4592
0
  {
4593
0
    libcerror_error_set(
4594
0
     error,
4595
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4596
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4597
0
     "%s: UTF-8 string too small.",
4598
0
     function );
4599
4600
0
    return( -1 );
4601
0
  }
4602
  /* Determine if the Unicode character is valid
4603
   */
4604
4.13k
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
4605
0
  {
4606
0
    libcerror_error_set(
4607
0
     error,
4608
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4609
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4610
0
     "%s: unsupported Unicode character.",
4611
0
     function );
4612
4613
0
    return( -1 );
4614
0
  }
4615
  /* Determine how many UTF-8 character bytes are required
4616
   */
4617
4.13k
  if( unicode_character < 0x080 )
4618
293
  {
4619
293
    utf8_character_additional_bytes = 0;
4620
293
    utf8_first_character_mark       = 0;
4621
293
  }
4622
3.83k
  else if( unicode_character < 0x0800 )
4623
333
  {
4624
333
    utf8_character_additional_bytes = 1;
4625
333
    utf8_first_character_mark       = 0x0c0;
4626
333
  }
4627
3.50k
  else if( unicode_character < 0x010000 )
4628
3.39k
  {
4629
3.39k
    utf8_character_additional_bytes = 2;
4630
3.39k
    utf8_first_character_mark       = 0x0e0;
4631
3.39k
  }
4632
111
  else if( unicode_character < 0x0200000 )
4633
111
  {
4634
111
    utf8_character_additional_bytes = 3;
4635
111
    utf8_first_character_mark       = 0x0f0;
4636
111
  }
4637
0
  else if( unicode_character < 0x0400000 )
4638
0
  {
4639
0
    utf8_character_additional_bytes = 4;
4640
0
    utf8_first_character_mark       = 0x0f8;
4641
0
  }
4642
0
  else
4643
0
  {
4644
0
    utf8_character_additional_bytes = 5;
4645
0
    utf8_first_character_mark       = 0x0fc;
4646
0
  }
4647
  /* Convert Unicode character into UTF-8 character bytes
4648
   */
4649
4.13k
  if( ( utf8_character_additional_bytes > utf8_string_size )
4650
4.13k
   || ( safe_utf8_string_index >= ( utf8_string_size - utf8_character_additional_bytes ) ) )
4651
0
  {
4652
0
    libcerror_error_set(
4653
0
     error,
4654
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4655
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4656
0
     "%s: UTF-8 string too small.",
4657
0
     function );
4658
4659
0
    return( -1 );
4660
0
  }
4661
4.13k
  for( utf8_character_iterator = safe_utf8_string_index + utf8_character_additional_bytes;
4662
11.5k
       utf8_character_iterator > safe_utf8_string_index;
4663
7.45k
       utf8_character_iterator-- )
4664
7.45k
  {
4665
7.45k
    utf8_string[ utf8_character_iterator ] = (libuna_utf8_character_t) ( ( unicode_character & 0x0bf ) | 0x080 );
4666
4667
7.45k
    unicode_character >>= 6;
4668
7.45k
  }
4669
4.13k
  utf8_string[ safe_utf8_string_index ] = (libuna_utf8_character_t) ( unicode_character | utf8_first_character_mark );
4670
4671
4.13k
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4672
4673
4.13k
  return( 1 );
4674
4.13k
}
4675
4676
/* Determines the size of an UTF-16 character from an Unicode character
4677
 * Adds the size to the UTF-16 character size value
4678
 * Returns 1 if successful or -1 on error
4679
 */
4680
int libuna_unicode_character_size_to_utf16(
4681
     libuna_unicode_character_t unicode_character,
4682
     size_t *utf16_character_size,
4683
     libcerror_error_t **error )
4684
1.88k
{
4685
1.88k
  static char *function = "libuna_unicode_character_size_to_utf16";
4686
4687
1.88k
  if( utf16_character_size == NULL )
4688
0
  {
4689
0
    libcerror_error_set(
4690
0
     error,
4691
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4692
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4693
0
     "%s: invalid UTF-16 character size.",
4694
0
     function );
4695
4696
0
    return( -1 );
4697
0
  }
4698
1.88k
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4699
1.88k
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4700
1.88k
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4701
0
  {
4702
0
    libcerror_error_set(
4703
0
     error,
4704
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4705
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4706
0
     "%s: unsupported Unicode character.",
4707
0
     function );
4708
4709
0
    return( -1 );
4710
0
  }
4711
1.88k
  if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4712
6
  {
4713
6
    *utf16_character_size += 2;
4714
6
  }
4715
1.87k
  else
4716
1.87k
  {
4717
1.87k
    *utf16_character_size += 1;
4718
1.87k
  }
4719
1.88k
  return( 1 );
4720
1.88k
}
4721
4722
/* Copies an Unicode character from an UTF-16 string
4723
 * Returns 1 if successful or -1 on error
4724
 */
4725
int libuna_unicode_character_copy_from_utf16(
4726
     libuna_unicode_character_t *unicode_character,
4727
     const libuna_utf16_character_t *utf16_string,
4728
     size_t utf16_string_size,
4729
     size_t *utf16_string_index,
4730
     libcerror_error_t **error )
4731
0
{
4732
0
  static char *function                             = "libuna_unicode_character_copy_from_utf16";
4733
0
  libuna_unicode_character_t safe_unicode_character = 0;
4734
0
  libuna_utf16_character_t utf16_surrogate          = 0;
4735
0
  size_t safe_utf16_string_index                    = 0;
4736
4737
0
  if( unicode_character == NULL )
4738
0
  {
4739
0
    libcerror_error_set(
4740
0
     error,
4741
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4742
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4743
0
     "%s: invalid Unicode character.",
4744
0
     function );
4745
4746
0
    return( -1 );
4747
0
  }
4748
0
  if( utf16_string == NULL )
4749
0
  {
4750
0
    libcerror_error_set(
4751
0
     error,
4752
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4753
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4754
0
     "%s: invalid UTF-16 string.",
4755
0
     function );
4756
4757
0
    return( -1 );
4758
0
  }
4759
0
  if( utf16_string_size > (size_t) SSIZE_MAX )
4760
0
  {
4761
0
    libcerror_error_set(
4762
0
     error,
4763
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4764
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4765
0
     "%s: invalid UTF-16 string size value exceeds maximum.",
4766
0
     function );
4767
4768
0
    return( -1 );
4769
0
  }
4770
0
  if( utf16_string_index == NULL )
4771
0
  {
4772
0
    libcerror_error_set(
4773
0
     error,
4774
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4775
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4776
0
     "%s: invalid UTF-16 string index.",
4777
0
     function );
4778
4779
0
    return( -1 );
4780
0
  }
4781
0
  safe_utf16_string_index = *utf16_string_index;
4782
4783
0
  if( safe_utf16_string_index >= utf16_string_size )
4784
0
  {
4785
0
    libcerror_error_set(
4786
0
     error,
4787
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4788
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4789
0
     "%s: UTF-16 string too small.",
4790
0
     function );
4791
4792
0
    return( -1 );
4793
0
  }
4794
0
  safe_unicode_character   = utf16_string[ safe_utf16_string_index ];
4795
0
  safe_utf16_string_index += 1;
4796
4797
  /* Determine if the UTF-16 character is within the high surrogate range
4798
   */
4799
0
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4800
0
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
4801
0
  {
4802
0
    if( safe_utf16_string_index >= utf16_string_size )
4803
0
    {
4804
0
      libcerror_error_set(
4805
0
       error,
4806
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4807
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4808
0
       "%s: missing surrogate UTF-16 character bytes.",
4809
0
       function );
4810
4811
0
      return( -1 );
4812
0
    }
4813
0
    utf16_surrogate          = utf16_string[ safe_utf16_string_index ];
4814
0
    safe_utf16_string_index += 1;
4815
4816
    /* Determine if the UTF-16 character is within the low surrogate range
4817
     */
4818
0
    if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4819
0
     && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4820
0
    {
4821
0
      safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
4822
0
      safe_unicode_character <<= 10;
4823
0
      safe_unicode_character  += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
4824
0
      safe_unicode_character  += 0x010000;
4825
0
    }
4826
0
    else
4827
0
    {
4828
0
      libcerror_error_set(
4829
0
       error,
4830
0
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
4831
0
       LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4832
0
       "%s: unsupported low surrogate UTF-16 character.",
4833
0
       function );
4834
4835
0
      return( -1 );
4836
0
    }
4837
0
  }
4838
  /* Determine if the Unicode character is valid
4839
   */
4840
0
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4841
0
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4842
0
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4843
0
  {
4844
0
    libcerror_error_set(
4845
0
     error,
4846
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4847
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4848
0
     "%s: unsupported Unicode character.",
4849
0
     function );
4850
4851
0
    return( -1 );
4852
0
  }
4853
0
  *unicode_character  = safe_unicode_character;
4854
0
  *utf16_string_index = safe_utf16_string_index;
4855
4856
0
  return( 1 );
4857
0
}
4858
4859
/* Copies an Unicode character into a UTF-16 string
4860
 * Returns 1 if successful or -1 on error
4861
 */
4862
int libuna_unicode_character_copy_to_utf16(
4863
     libuna_unicode_character_t unicode_character,
4864
     libuna_utf16_character_t *utf16_string,
4865
     size_t utf16_string_size,
4866
     size_t *utf16_string_index,
4867
     libcerror_error_t **error )
4868
0
{
4869
0
  static char *function          = "libuna_unicode_character_copy_to_utf16";
4870
0
  size_t safe_utf16_string_index = 0;
4871
4872
0
  if( utf16_string == NULL )
4873
0
  {
4874
0
    libcerror_error_set(
4875
0
     error,
4876
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4877
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4878
0
     "%s: invalid UTF-16 string.",
4879
0
     function );
4880
4881
0
    return( -1 );
4882
0
  }
4883
0
  if( utf16_string_size > (size_t) SSIZE_MAX )
4884
0
  {
4885
0
    libcerror_error_set(
4886
0
     error,
4887
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4888
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4889
0
     "%s: invalid UTF-16 string size value exceeds maximum.",
4890
0
     function );
4891
4892
0
    return( -1 );
4893
0
  }
4894
0
  if( utf16_string_index == NULL )
4895
0
  {
4896
0
    libcerror_error_set(
4897
0
     error,
4898
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4899
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4900
0
     "%s: invalid UTF-16 string index.",
4901
0
     function );
4902
4903
0
    return( -1 );
4904
0
  }
4905
0
  safe_utf16_string_index = *utf16_string_index;
4906
4907
0
  if( safe_utf16_string_index >= utf16_string_size )
4908
0
  {
4909
0
    libcerror_error_set(
4910
0
     error,
4911
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4912
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4913
0
     "%s: UTF-16 string too small.",
4914
0
     function );
4915
4916
0
    return( -1 );
4917
0
  }
4918
  /* Determine if the Unicode character is valid
4919
   */
4920
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4921
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4922
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4923
0
  {
4924
0
    libcerror_error_set(
4925
0
     error,
4926
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4927
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4928
0
     "%s: unsupported Unicode character.",
4929
0
     function );
4930
4931
0
    return( -1 );
4932
0
  }
4933
0
  if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4934
0
  {
4935
0
    utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) unicode_character;
4936
0
  }
4937
0
  else
4938
0
  {
4939
0
    if( ( utf16_string_size < 2 )
4940
0
     || ( safe_utf16_string_index > ( utf16_string_size - 2 ) ) )
4941
0
    {
4942
0
      libcerror_error_set(
4943
0
       error,
4944
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4945
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4946
0
       "%s: UTF-16 string too small.",
4947
0
       function );
4948
4949
0
      return( -1 );
4950
0
    }
4951
0
    unicode_character                        -= 0x010000;
4952
0
    utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
4953
0
    utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
4954
0
  }
4955
0
  *utf16_string_index = safe_utf16_string_index;
4956
4957
0
  return( 1 );
4958
0
}
4959
4960
/* Copies an Unicode character from an UTF-16 stream
4961
 * Returns 1 if successful or -1 on error
4962
 */
4963
int libuna_unicode_character_copy_from_utf16_stream(
4964
     libuna_unicode_character_t *unicode_character,
4965
     const uint8_t *utf16_stream,
4966
     size_t utf16_stream_size,
4967
     size_t *utf16_stream_index,
4968
     int byte_order,
4969
     libcerror_error_t **error )
4970
9.25M
{
4971
9.25M
  static char *function                             = "libuna_unicode_character_copy_from_utf16_stream";
4972
9.25M
  libuna_unicode_character_t safe_unicode_character = 0;
4973
9.25M
  libuna_utf16_character_t utf16_surrogate          = 0;
4974
9.25M
  size_t safe_utf16_stream_index                    = 0;
4975
9.25M
  int byte_order_without_flags                      = 0;
4976
4977
9.25M
  if( unicode_character == NULL )
4978
0
  {
4979
0
    libcerror_error_set(
4980
0
     error,
4981
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4982
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4983
0
     "%s: invalid Unicode character.",
4984
0
     function );
4985
4986
0
    return( -1 );
4987
0
  }
4988
9.25M
  if( utf16_stream == NULL )
4989
0
  {
4990
0
    libcerror_error_set(
4991
0
     error,
4992
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4993
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4994
0
     "%s: invalid UTF-16 stream.",
4995
0
     function );
4996
4997
0
    return( -1 );
4998
0
  }
4999
9.25M
  if( utf16_stream_size > (size_t) SSIZE_MAX )
5000
0
  {
5001
0
    libcerror_error_set(
5002
0
     error,
5003
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5004
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5005
0
     "%s: invalid UTF-16 stream size value exceeds maximum.",
5006
0
     function );
5007
5008
0
    return( -1 );
5009
0
  }
5010
9.25M
  if( utf16_stream_index == NULL )
5011
0
  {
5012
0
    libcerror_error_set(
5013
0
     error,
5014
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5015
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5016
0
     "%s: invalid UTF-16 stream index.",
5017
0
     function );
5018
5019
0
    return( -1 );
5020
0
  }
5021
9.25M
  byte_order_without_flags = byte_order & 0xff;
5022
5023
9.25M
  if( ( byte_order_without_flags != LIBUNA_ENDIAN_BIG )
5024
9.25M
   && ( byte_order_without_flags != LIBUNA_ENDIAN_LITTLE ) )
5025
0
  {
5026
0
    libcerror_error_set(
5027
0
     error,
5028
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5029
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5030
0
     "%s: unsupported byte order.",
5031
0
     function );
5032
5033
0
    return( -1 );
5034
0
  }
5035
9.25M
  safe_utf16_stream_index = *utf16_stream_index;
5036
5037
9.25M
  if( ( utf16_stream_size < 2 )
5038
9.25M
   || ( safe_utf16_stream_index > ( utf16_stream_size - 2 ) ) )
5039
60
  {
5040
60
    libcerror_error_set(
5041
60
     error,
5042
60
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5043
60
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5044
60
     "%s: UTF-16 stream too small.",
5045
60
     function );
5046
5047
60
    return( -1 );
5048
60
  }
5049
9.25M
  if( byte_order_without_flags == LIBUNA_ENDIAN_BIG )
5050
2.76M
  {
5051
2.76M
    safe_unicode_character   = utf16_stream[ safe_utf16_stream_index ];
5052
2.76M
    safe_unicode_character <<= 8;
5053
2.76M
    safe_unicode_character  += utf16_stream[ safe_utf16_stream_index + 1 ];
5054
2.76M
  }
5055
6.48M
  else if( byte_order_without_flags == LIBUNA_ENDIAN_LITTLE )
5056
6.48M
  {
5057
6.48M
    safe_unicode_character   = utf16_stream[ safe_utf16_stream_index + 1 ];
5058
6.48M
    safe_unicode_character <<= 8;
5059
6.48M
    safe_unicode_character  += utf16_stream[ safe_utf16_stream_index ];
5060
6.48M
  }
5061
9.25M
  safe_utf16_stream_index += 2;
5062
5063
  /* Determine if the Unicode character is valid
5064
   */
5065
9.25M
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5066
9.25M
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5067
1.00k
  {
5068
1.00k
    if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5069
470
    {
5070
470
      libcerror_error_set(
5071
470
       error,
5072
470
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
5073
470
       LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5074
470
       "%s: unsupported UTF-16 character.",
5075
470
       function );
5076
5077
470
      return( -1 );
5078
470
    }
5079
1.00k
  }
5080
  /* Determine if the UTF-16 character is within the high surrogate range
5081
   */
5082
9.25M
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5083
9.25M
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
5084
82.0k
  {
5085
82.0k
    if( safe_utf16_stream_index > ( utf16_stream_size - 2 ) )
5086
66
    {
5087
66
      if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5088
52
      {
5089
52
        libcerror_error_set(
5090
52
         error,
5091
52
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
5092
52
         LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5093
52
         "%s: missing surrogate UTF-16 character bytes.",
5094
52
         function );
5095
5096
52
        return( -1 );
5097
52
      }
5098
66
    }
5099
81.9k
    else
5100
81.9k
    {
5101
81.9k
      if( byte_order_without_flags == LIBUNA_ENDIAN_BIG )
5102
66.5k
      {
5103
66.5k
        utf16_surrogate   = utf16_stream[ safe_utf16_stream_index ];
5104
66.5k
        utf16_surrogate <<= 8;
5105
66.5k
        utf16_surrogate  += utf16_stream[ safe_utf16_stream_index + 1 ];
5106
66.5k
      }
5107
15.4k
      else if( byte_order_without_flags == LIBUNA_ENDIAN_LITTLE )
5108
15.4k
      {
5109
15.4k
        utf16_surrogate   = utf16_stream[ safe_utf16_stream_index + 1 ];
5110
15.4k
        utf16_surrogate <<= 8;
5111
15.4k
        utf16_surrogate  += utf16_stream[ safe_utf16_stream_index ];
5112
15.4k
      }
5113
      /* Determine if the UTF-16 character is within the low surrogate range
5114
       */
5115
81.9k
      if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5116
81.9k
       && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5117
79.8k
      {
5118
79.8k
        safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
5119
79.8k
        safe_unicode_character <<= 10;
5120
79.8k
        safe_unicode_character  += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
5121
79.8k
        safe_unicode_character  += 0x010000;
5122
5123
79.8k
        safe_utf16_stream_index += 2;
5124
79.8k
      }
5125
2.11k
      else if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5126
710
      {
5127
710
        libcerror_error_set(
5128
710
         error,
5129
710
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
5130
710
         LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5131
710
         "%s: unsupported low surrogate UTF-16 character.",
5132
710
         function );
5133
5134
710
        return( -1 );
5135
710
      }
5136
81.9k
    }
5137
82.0k
  }
5138
9.24M
  if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
5139
0
  {
5140
0
    libcerror_error_set(
5141
0
     error,
5142
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5143
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5144
0
     "%s: unsupported Unicode character.",
5145
0
     function );
5146
5147
0
    return( -1 );
5148
0
  }
5149
9.24M
  *unicode_character  = safe_unicode_character;
5150
9.24M
  *utf16_stream_index = safe_utf16_stream_index;
5151
5152
9.24M
  return( 1 );
5153
9.24M
}
5154
5155
/* Copies an Unicode character to an UTF-16 stream
5156
 * Returns 1 if successful or -1 on error
5157
 */
5158
int libuna_unicode_character_copy_to_utf16_stream(
5159
     libuna_unicode_character_t unicode_character,
5160
     uint8_t *utf16_stream,
5161
     size_t utf16_stream_size,
5162
     size_t *utf16_stream_index,
5163
     int byte_order,
5164
     libcerror_error_t **error )
5165
0
{
5166
0
  static char *function                    = "libuna_unicode_character_copy_to_utf16_stream";
5167
0
  libuna_utf16_character_t utf16_surrogate = 0;
5168
0
  size_t safe_utf16_stream_index           = 0;
5169
0
  int byte_order_without_flags             = 0;
5170
5171
0
  if( utf16_stream == NULL )
5172
0
  {
5173
0
    libcerror_error_set(
5174
0
     error,
5175
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5176
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5177
0
     "%s: invalid UTF-16 stream.",
5178
0
     function );
5179
5180
0
    return( -1 );
5181
0
  }
5182
0
  if( utf16_stream_size > (size_t) SSIZE_MAX )
5183
0
  {
5184
0
    libcerror_error_set(
5185
0
     error,
5186
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5187
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5188
0
     "%s: invalid UTF-16 stream size value exceeds maximum.",
5189
0
     function );
5190
5191
0
    return( -1 );
5192
0
  }
5193
0
  if( utf16_stream_index == NULL )
5194
0
  {
5195
0
    libcerror_error_set(
5196
0
     error,
5197
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5198
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5199
0
     "%s: invalid UTF-16 stream index.",
5200
0
     function );
5201
5202
0
    return( -1 );
5203
0
  }
5204
0
  byte_order_without_flags = byte_order & 0xff;
5205
5206
0
  if( ( byte_order_without_flags != LIBUNA_ENDIAN_BIG )
5207
0
   && ( byte_order_without_flags != LIBUNA_ENDIAN_LITTLE ) )
5208
0
  {
5209
0
    libcerror_error_set(
5210
0
     error,
5211
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5212
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5213
0
     "%s: unsupported byte order.",
5214
0
     function );
5215
5216
0
    return( -1 );
5217
0
  }
5218
0
  safe_utf16_stream_index = *utf16_stream_index;
5219
5220
  /* Determine if the Unicode character is valid
5221
   */
5222
0
  if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
5223
0
  {
5224
0
    libcerror_error_set(
5225
0
     error,
5226
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5227
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5228
0
     "%s: unsupported Unicode character.",
5229
0
     function );
5230
5231
0
    return( -1 );
5232
0
  }
5233
0
  if( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5234
0
   && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5235
0
  {
5236
0
    if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5237
0
    {
5238
0
      libcerror_error_set(
5239
0
       error,
5240
0
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
5241
0
       LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5242
0
       "%s: unsupported Unicode character.",
5243
0
       function );
5244
5245
0
      return( -1 );
5246
0
    }
5247
0
  }
5248
0
  if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
5249
0
  {
5250
0
    if( ( utf16_stream_size < 2 )
5251
0
     || ( safe_utf16_stream_index > ( utf16_stream_size - 2 ) ) )
5252
0
    {
5253
0
      libcerror_error_set(
5254
0
       error,
5255
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5256
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5257
0
       "%s: UTF-16 stream too small.",
5258
0
       function );
5259
5260
0
      return( -1 );
5261
0
    }
5262
0
    if( byte_order == LIBUNA_ENDIAN_BIG )
5263
0
    {
5264
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5265
0
      unicode_character                         >>= 8;
5266
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5267
0
    }
5268
0
    else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5269
0
    {
5270
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5271
0
      unicode_character                         >>= 8;
5272
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5273
0
    }
5274
0
    safe_utf16_stream_index += 2;
5275
0
  }
5276
0
  else
5277
0
  {
5278
0
    if( ( utf16_stream_size < 4 )
5279
0
     || ( safe_utf16_stream_index > ( utf16_stream_size - 4 ) ) )
5280
0
    {
5281
0
      libcerror_error_set(
5282
0
       error,
5283
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5284
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5285
0
       "%s: UTF-16 stream too small.",
5286
0
       function );
5287
5288
0
      return( -1 );
5289
0
    }
5290
0
    unicode_character -= 0x010000;
5291
5292
0
    utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
5293
5294
0
    if( byte_order == LIBUNA_ENDIAN_BIG )
5295
0
    {
5296
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5297
0
      utf16_surrogate                           >>= 8;
5298
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5299
0
    }
5300
0
    else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5301
0
    {
5302
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5303
0
      utf16_surrogate                           >>= 8;
5304
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5305
0
    }
5306
0
    safe_utf16_stream_index += 2;
5307
5308
0
    utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
5309
5310
0
    if( byte_order == LIBUNA_ENDIAN_BIG )
5311
0
    {
5312
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5313
0
      utf16_surrogate                           >>= 8;
5314
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5315
0
    }
5316
0
    else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5317
0
    {
5318
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5319
0
      utf16_surrogate                           >>= 8;
5320
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5321
0
    }
5322
0
    safe_utf16_stream_index += 2;
5323
0
  }
5324
0
  *utf16_stream_index = safe_utf16_stream_index;
5325
5326
0
  return( 1 );
5327
0
}
5328
5329
/* Determines the size of an UTF-32 character from an Unicode character
5330
 * Adds the size to the UTF-32 character size value
5331
 * Returns 1 if successful or -1 on error
5332
 */
5333
int libuna_unicode_character_size_to_utf32(
5334
     libuna_unicode_character_t unicode_character,
5335
     size_t *utf32_character_size,
5336
     libcerror_error_t **error )
5337
0
{
5338
0
  static char *function = "libuna_unicode_character_size_to_utf32";
5339
5340
0
  LIBUNA_UNREFERENCED_PARAMETER( unicode_character )
5341
5342
0
  if( utf32_character_size == NULL )
5343
0
  {
5344
0
    libcerror_error_set(
5345
0
     error,
5346
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5347
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5348
0
     "%s: invalid UTF-32 character size.",
5349
0
     function );
5350
5351
0
    return( -1 );
5352
0
  }
5353
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5354
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5355
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5356
0
  {
5357
0
    libcerror_error_set(
5358
0
     error,
5359
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5360
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5361
0
     "%s: unsupported Unicode character.",
5362
0
     function );
5363
5364
0
    return( -1 );
5365
0
  }
5366
0
  *utf32_character_size += 1;
5367
5368
0
  return( 1 );
5369
0
}
5370
5371
/* Copies an Unicode character from an UTF-32 string
5372
 * Returns 1 if successful or -1 on error
5373
 */
5374
int libuna_unicode_character_copy_from_utf32(
5375
     libuna_unicode_character_t *unicode_character,
5376
     const libuna_utf32_character_t *utf32_string,
5377
     size_t utf32_string_size,
5378
     size_t *utf32_string_index,
5379
     libcerror_error_t **error )
5380
0
{
5381
0
  static char *function                             = "libuna_unicode_character_copy_from_utf32";
5382
0
  libuna_unicode_character_t safe_unicode_character = 0;
5383
0
  size_t safe_utf32_string_index                    = 0;
5384
5385
0
  if( unicode_character == NULL )
5386
0
  {
5387
0
    libcerror_error_set(
5388
0
     error,
5389
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5390
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5391
0
     "%s: invalid Unicode character.",
5392
0
     function );
5393
5394
0
    return( -1 );
5395
0
  }
5396
0
  if( utf32_string == NULL )
5397
0
  {
5398
0
    libcerror_error_set(
5399
0
     error,
5400
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5401
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5402
0
     "%s: invalid UTF-32 string.",
5403
0
     function );
5404
5405
0
    return( -1 );
5406
0
  }
5407
0
  if( utf32_string_size > (size_t) SSIZE_MAX )
5408
0
  {
5409
0
    libcerror_error_set(
5410
0
     error,
5411
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5412
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5413
0
     "%s: invalid UTF-32 string size value exceeds maximum.",
5414
0
     function );
5415
5416
0
    return( -1 );
5417
0
  }
5418
0
  if( utf32_string_index == NULL )
5419
0
  {
5420
0
    libcerror_error_set(
5421
0
     error,
5422
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5423
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5424
0
     "%s: invalid UTF-32 string index.",
5425
0
     function );
5426
5427
0
    return( -1 );
5428
0
  }
5429
0
  safe_utf32_string_index = *utf32_string_index;
5430
5431
0
  if( safe_utf32_string_index >= utf32_string_size )
5432
0
  {
5433
0
    libcerror_error_set(
5434
0
     error,
5435
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5436
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5437
0
     "%s: UTF-32 string too small.",
5438
0
     function );
5439
5440
0
    return( -1 );
5441
0
  }
5442
0
  safe_unicode_character = utf32_string[ safe_utf32_string_index ];
5443
5444
  /* Determine if the Unicode character is valid
5445
   */
5446
0
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5447
0
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5448
0
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5449
0
  {
5450
0
    libcerror_error_set(
5451
0
     error,
5452
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5453
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5454
0
     "%s: unsupported Unicode character.",
5455
0
     function );
5456
5457
0
    return( -1 );
5458
0
  }
5459
0
  *unicode_character  = safe_unicode_character;
5460
0
  *utf32_string_index = safe_utf32_string_index + 1;
5461
5462
0
  return( 1 );
5463
0
}
5464
5465
/* Copies an Unicode character into a UTF-32 string
5466
 * Returns 1 if successful or -1 on error
5467
 */
5468
int libuna_unicode_character_copy_to_utf32(
5469
     libuna_unicode_character_t unicode_character,
5470
     libuna_utf32_character_t *utf32_string,
5471
     size_t utf32_string_size,
5472
     size_t *utf32_string_index,
5473
     libcerror_error_t **error )
5474
0
{
5475
0
  static char *function          = "libuna_unicode_character_copy_to_utf32";
5476
0
  size_t safe_utf32_string_index = 0;
5477
5478
0
  if( utf32_string == NULL )
5479
0
  {
5480
0
    libcerror_error_set(
5481
0
     error,
5482
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5483
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5484
0
     "%s: invalid UTF-32 string.",
5485
0
     function );
5486
5487
0
    return( -1 );
5488
0
  }
5489
0
  if( utf32_string_size > (size_t) SSIZE_MAX )
5490
0
  {
5491
0
    libcerror_error_set(
5492
0
     error,
5493
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5494
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5495
0
     "%s: invalid UTF-32 string size value exceeds maximum.",
5496
0
     function );
5497
5498
0
    return( -1 );
5499
0
  }
5500
0
  if( utf32_string_index == NULL )
5501
0
  {
5502
0
    libcerror_error_set(
5503
0
     error,
5504
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5505
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5506
0
     "%s: invalid UTF-32 string index.",
5507
0
     function );
5508
5509
0
    return( -1 );
5510
0
  }
5511
0
  safe_utf32_string_index = *utf32_string_index;
5512
5513
0
  if( safe_utf32_string_index >= utf32_string_size )
5514
0
  {
5515
0
    libcerror_error_set(
5516
0
     error,
5517
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5518
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5519
0
     "%s: UTF-32 string too small.",
5520
0
     function );
5521
5522
0
    return( -1 );
5523
0
  }
5524
  /* Determine if the Unicode character is valid
5525
   */
5526
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5527
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5528
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5529
0
  {
5530
0
    libcerror_error_set(
5531
0
     error,
5532
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5533
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5534
0
     "%s: unsupported Unicode character.",
5535
0
     function );
5536
5537
0
    return( -1 );
5538
0
  }
5539
0
  utf32_string[ safe_utf32_string_index ] = (libuna_utf32_character_t) unicode_character;
5540
5541
0
  *utf32_string_index = safe_utf32_string_index + 1;
5542
5543
0
  return( 1 );
5544
0
}
5545
5546
/* Copies an Unicode character from an UTF-32 stream
5547
 * Returns 1 if successful or -1 on error
5548
 */
5549
int libuna_unicode_character_copy_from_utf32_stream(
5550
     libuna_unicode_character_t *unicode_character,
5551
     const uint8_t *utf32_stream,
5552
     size_t utf32_stream_size,
5553
     size_t *utf32_stream_index,
5554
     int byte_order,
5555
     libcerror_error_t **error )
5556
0
{
5557
0
  static char *function                             = "libuna_unicode_character_copy_from_utf32_stream";
5558
0
  libuna_unicode_character_t safe_unicode_character = 0;
5559
0
  size_t safe_utf32_stream_index                    = 0;
5560
5561
0
  if( unicode_character == NULL )
5562
0
  {
5563
0
    libcerror_error_set(
5564
0
     error,
5565
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5566
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5567
0
     "%s: invalid Unicode character.",
5568
0
     function );
5569
5570
0
    return( -1 );
5571
0
  }
5572
0
  if( utf32_stream == NULL )
5573
0
  {
5574
0
    libcerror_error_set(
5575
0
     error,
5576
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5577
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5578
0
     "%s: invalid UTF-32 stream.",
5579
0
     function );
5580
5581
0
    return( -1 );
5582
0
  }
5583
0
  if( utf32_stream_size > (size_t) SSIZE_MAX )
5584
0
  {
5585
0
    libcerror_error_set(
5586
0
     error,
5587
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5588
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5589
0
     "%s: invalid UTF-32 stream size value exceeds maximum.",
5590
0
     function );
5591
5592
0
    return( -1 );
5593
0
  }
5594
0
  if( utf32_stream_index == NULL )
5595
0
  {
5596
0
    libcerror_error_set(
5597
0
     error,
5598
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5599
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5600
0
     "%s: invalid UTF-32 stream index.",
5601
0
     function );
5602
5603
0
    return( -1 );
5604
0
  }
5605
0
  safe_utf32_stream_index = *utf32_stream_index;
5606
5607
0
  if( ( utf32_stream_size < 4 )
5608
0
   || ( safe_utf32_stream_index > ( utf32_stream_size - 4 ) ) )
5609
0
  {
5610
0
    libcerror_error_set(
5611
0
     error,
5612
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5613
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5614
0
     "%s: UTF-32 stream too small.",
5615
0
     function );
5616
5617
0
    return( -1 );
5618
0
  }
5619
0
  if( ( byte_order != LIBUNA_ENDIAN_BIG )
5620
0
   && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5621
0
  {
5622
0
    libcerror_error_set(
5623
0
     error,
5624
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5625
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5626
0
     "%s: unsupported byte order.",
5627
0
     function );
5628
5629
0
    return( -1 );
5630
0
  }
5631
0
  if( byte_order == LIBUNA_ENDIAN_BIG )
5632
0
  {
5633
0
    safe_unicode_character   = utf32_stream[ safe_utf32_stream_index ];
5634
0
    safe_unicode_character <<= 8;
5635
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 1 ];
5636
0
    safe_unicode_character <<= 8;
5637
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 2 ];
5638
0
    safe_unicode_character <<= 8;
5639
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 3 ];
5640
0
  }
5641
0
  else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5642
0
  {
5643
0
    safe_unicode_character   = utf32_stream[ safe_utf32_stream_index + 3 ];
5644
0
    safe_unicode_character <<= 8;
5645
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 2 ];
5646
0
    safe_unicode_character <<= 8;
5647
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 1 ];
5648
0
    safe_unicode_character <<= 8;
5649
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index ];
5650
0
  }
5651
  /* Determine if the Unicode character is valid
5652
   */
5653
0
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5654
0
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5655
0
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5656
0
  {
5657
0
    libcerror_error_set(
5658
0
     error,
5659
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5660
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5661
0
     "%s: unsupported Unicode character.",
5662
0
     function );
5663
5664
0
    return( -1 );
5665
0
  }
5666
0
  *unicode_character  = safe_unicode_character;
5667
0
  *utf32_stream_index = safe_utf32_stream_index + 4;
5668
5669
0
  return( 1 );
5670
0
}
5671
5672
/* Copies an Unicode character to an UTF-32 stream
5673
 * Returns 1 if successful or -1 on error
5674
 */
5675
int libuna_unicode_character_copy_to_utf32_stream(
5676
     libuna_unicode_character_t unicode_character,
5677
     uint8_t *utf32_stream,
5678
     size_t utf32_stream_size,
5679
     size_t *utf32_stream_index,
5680
     int byte_order,
5681
     libcerror_error_t **error )
5682
0
{
5683
0
  static char *function          = "libuna_unicode_character_copy_to_utf32_stream";
5684
0
  size_t safe_utf32_stream_index = 0;
5685
5686
0
  if( utf32_stream == NULL )
5687
0
  {
5688
0
    libcerror_error_set(
5689
0
     error,
5690
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5691
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5692
0
     "%s: invalid UTF-32 stream.",
5693
0
     function );
5694
5695
0
    return( -1 );
5696
0
  }
5697
0
  if( utf32_stream_size > (size_t) SSIZE_MAX )
5698
0
  {
5699
0
    libcerror_error_set(
5700
0
     error,
5701
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5702
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5703
0
     "%s: invalid UTF-32 stream size value exceeds maximum.",
5704
0
     function );
5705
5706
0
    return( -1 );
5707
0
  }
5708
0
  if( utf32_stream_index == NULL )
5709
0
  {
5710
0
    libcerror_error_set(
5711
0
     error,
5712
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5713
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5714
0
     "%s: invalid UTF-32 stream index.",
5715
0
     function );
5716
5717
0
    return( -1 );
5718
0
  }
5719
0
  safe_utf32_stream_index = *utf32_stream_index;
5720
5721
0
  if( ( utf32_stream_size < 4 )
5722
0
   || ( safe_utf32_stream_index > ( utf32_stream_size - 4 ) ) )
5723
0
  {
5724
0
    libcerror_error_set(
5725
0
     error,
5726
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5727
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5728
0
     "%s: UTF-32 stream too small.",
5729
0
     function );
5730
5731
0
    return( -1 );
5732
0
  }
5733
0
  if( ( byte_order != LIBUNA_ENDIAN_BIG )
5734
0
   && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5735
0
  {
5736
0
    libcerror_error_set(
5737
0
     error,
5738
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5739
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5740
0
     "%s: unsupported byte order.",
5741
0
     function );
5742
5743
0
    return( -1 );
5744
0
  }
5745
  /* Determine if the Unicode character is valid
5746
   */
5747
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5748
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5749
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5750
0
  {
5751
0
    libcerror_error_set(
5752
0
     error,
5753
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5754
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5755
0
     "%s: unsupported Unicode character.",
5756
0
     function );
5757
5758
0
    return( -1 );
5759
0
  }
5760
0
  if( byte_order == LIBUNA_ENDIAN_BIG )
5761
0
  {
5762
0
    utf32_stream[ safe_utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5763
0
    unicode_character                         >>= 8;
5764
0
    utf32_stream[ safe_utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5765
0
    unicode_character                         >>= 8;
5766
0
    utf32_stream[ safe_utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5767
0
    unicode_character                         >>= 8;
5768
0
    utf32_stream[ safe_utf32_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5769
0
  }
5770
0
  else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5771
0
  {
5772
0
    utf32_stream[ safe_utf32_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5773
0
    unicode_character                         >>= 8;
5774
0
    utf32_stream[ safe_utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5775
0
    unicode_character                         >>= 8;
5776
0
    utf32_stream[ safe_utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5777
0
    unicode_character                        >>= 8;
5778
0
    utf32_stream[ safe_utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5779
0
  }
5780
0
  *utf32_stream_index = safe_utf32_stream_index + 4;
5781
5782
0
  return( 1 );
5783
0
}
5784