Coverage Report

Created: 2024-02-25 07:20

/src/libfshfs/libuna/libuna_unicode_character.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Unicode character functions
3
 *
4
 * Copyright (C) 2008-2024, Joachim Metz <joachim.metz@gmail.com>
5
 *
6
 * Refer to AUTHORS for acknowledgements.
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public License
19
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
20
 */
21
22
#include <common.h>
23
#include <types.h>
24
25
#include "libuna_base64_stream.h"
26
#include "libuna_codepage_iso_8859_2.h"
27
#include "libuna_codepage_iso_8859_3.h"
28
#include "libuna_codepage_iso_8859_4.h"
29
#include "libuna_codepage_iso_8859_5.h"
30
#include "libuna_codepage_iso_8859_6.h"
31
#include "libuna_codepage_iso_8859_7.h"
32
#include "libuna_codepage_iso_8859_8.h"
33
#include "libuna_codepage_iso_8859_9.h"
34
#include "libuna_codepage_iso_8859_10.h"
35
#include "libuna_codepage_iso_8859_13.h"
36
#include "libuna_codepage_iso_8859_14.h"
37
#include "libuna_codepage_iso_8859_15.h"
38
#include "libuna_codepage_iso_8859_16.h"
39
#include "libuna_codepage_koi8_r.h"
40
#include "libuna_codepage_koi8_u.h"
41
#include "libuna_codepage_mac_arabic.h"
42
#include "libuna_codepage_mac_celtic.h"
43
#include "libuna_codepage_mac_centraleurroman.h"
44
#include "libuna_codepage_mac_croatian.h"
45
#include "libuna_codepage_mac_cyrillic.h"
46
#include "libuna_codepage_mac_dingbats.h"
47
#include "libuna_codepage_mac_farsi.h"
48
#include "libuna_codepage_mac_gaelic.h"
49
#include "libuna_codepage_mac_greek.h"
50
#include "libuna_codepage_mac_icelandic.h"
51
#include "libuna_codepage_mac_inuit.h"
52
#include "libuna_codepage_mac_roman.h"
53
#include "libuna_codepage_mac_romanian.h"
54
#include "libuna_codepage_mac_russian.h"
55
#include "libuna_codepage_mac_symbol.h"
56
#include "libuna_codepage_mac_thai.h"
57
#include "libuna_codepage_mac_turkish.h"
58
#include "libuna_codepage_mac_ukrainian.h"
59
#include "libuna_codepage_windows_874.h"
60
#include "libuna_codepage_windows_932.h"
61
#include "libuna_codepage_windows_936.h"
62
#include "libuna_codepage_windows_949.h"
63
#include "libuna_codepage_windows_950.h"
64
#include "libuna_codepage_windows_1250.h"
65
#include "libuna_codepage_windows_1251.h"
66
#include "libuna_codepage_windows_1252.h"
67
#include "libuna_codepage_windows_1253.h"
68
#include "libuna_codepage_windows_1254.h"
69
#include "libuna_codepage_windows_1255.h"
70
#include "libuna_codepage_windows_1256.h"
71
#include "libuna_codepage_windows_1257.h"
72
#include "libuna_codepage_windows_1258.h"
73
#include "libuna_definitions.h"
74
#include "libuna_libcerror.h"
75
#include "libuna_types.h"
76
#include "libuna_unicode_character.h"
77
#include "libuna_unused.h"
78
79
/* Valid directly encoded characters: A-Z, a-z, 0-9, '\', '(', ')', ',', '-', '.', '/', ':', '?'
80
 * Valid directly encoded whitespace: '\t', '\n', '\r', ' '
81
 * Valid optional directly encoded characters: '!', '"', '#', '$', '%', '&', '*', ';', '<', '=', '>', '@', '[', ']', '^', '_', '`', '{', '|', '}'
82
 */
83
uint8_t libuna_unicode_character_utf7_valid_directly_encoded_character[ 256 ] = {
84
  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
85
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86
  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
87
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
88
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
90
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
92
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
100
101
/* Valid UTF-7 base64 characters: A-Z, a-z, 0-9, '+' and '/'
102
 */
103
uint8_t libuna_unicode_character_utf7_valid_base64_character[ 256 ] = {
104
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
107
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
108
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
110
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
112
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
118
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
120
121
/* Determines the size of a byte stream character from an Unicode character
122
 * Adds the size to the byte stream character size value
123
 * Returns 1 if successful, 0 if the byte stream character is valid but not supported since it requires special handling or -1 on error
124
 */
125
int libuna_unicode_character_size_to_byte_stream(
126
     libuna_unicode_character_t unicode_character,
127
     int codepage,
128
     size_t *byte_stream_character_size,
129
     libcerror_error_t **error )
130
0
{
131
0
  static char *function                  = "libuna_unicode_character_size_to_byte_stream";
132
0
  size_t safe_byte_stream_character_size = 0;
133
0
  int result                             = 1;
134
135
0
  if( byte_stream_character_size == NULL )
136
0
  {
137
0
    libcerror_error_set(
138
0
     error,
139
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
140
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
141
0
     "%s: invalid byte stream character size.",
142
0
     function );
143
144
0
    return( -1 );
145
0
  }
146
0
  safe_byte_stream_character_size = *byte_stream_character_size;
147
148
0
  switch( codepage )
149
0
  {
150
0
    case LIBUNA_CODEPAGE_ASCII:
151
0
    case LIBUNA_CODEPAGE_ISO_8859_1:
152
0
    case LIBUNA_CODEPAGE_ISO_8859_2:
153
0
    case LIBUNA_CODEPAGE_ISO_8859_3:
154
0
    case LIBUNA_CODEPAGE_ISO_8859_4:
155
0
    case LIBUNA_CODEPAGE_ISO_8859_5:
156
0
    case LIBUNA_CODEPAGE_ISO_8859_6:
157
0
    case LIBUNA_CODEPAGE_ISO_8859_7:
158
0
    case LIBUNA_CODEPAGE_ISO_8859_8:
159
0
    case LIBUNA_CODEPAGE_ISO_8859_9:
160
0
    case LIBUNA_CODEPAGE_ISO_8859_10:
161
0
    case LIBUNA_CODEPAGE_ISO_8859_11:
162
0
    case LIBUNA_CODEPAGE_ISO_8859_13:
163
0
    case LIBUNA_CODEPAGE_ISO_8859_14:
164
0
    case LIBUNA_CODEPAGE_ISO_8859_15:
165
0
    case LIBUNA_CODEPAGE_ISO_8859_16:
166
0
    case LIBUNA_CODEPAGE_KOI8_R:
167
0
    case LIBUNA_CODEPAGE_KOI8_U:
168
0
    case LIBUNA_CODEPAGE_MAC_ARABIC:
169
0
    case LIBUNA_CODEPAGE_MAC_CELTIC:
170
0
    case LIBUNA_CODEPAGE_MAC_CENTRALEURROMAN:
171
0
    case LIBUNA_CODEPAGE_MAC_CROATIAN:
172
0
    case LIBUNA_CODEPAGE_MAC_CYRILLIC:
173
0
    case LIBUNA_CODEPAGE_MAC_DINGBATS:
174
0
    case LIBUNA_CODEPAGE_MAC_FARSI:
175
0
    case LIBUNA_CODEPAGE_MAC_GAELIC:
176
0
    case LIBUNA_CODEPAGE_MAC_GREEK:
177
0
    case LIBUNA_CODEPAGE_MAC_ICELANDIC:
178
0
    case LIBUNA_CODEPAGE_MAC_INUIT:
179
0
    case LIBUNA_CODEPAGE_MAC_ROMAN:
180
0
    case LIBUNA_CODEPAGE_MAC_ROMANIAN:
181
0
    case LIBUNA_CODEPAGE_MAC_RUSSIAN:
182
0
    case LIBUNA_CODEPAGE_MAC_TURKISH:
183
0
    case LIBUNA_CODEPAGE_MAC_UKRAINIAN:
184
0
    case LIBUNA_CODEPAGE_WINDOWS_874:
185
0
    case LIBUNA_CODEPAGE_WINDOWS_1250:
186
0
    case LIBUNA_CODEPAGE_WINDOWS_1251:
187
0
    case LIBUNA_CODEPAGE_WINDOWS_1252:
188
0
    case LIBUNA_CODEPAGE_WINDOWS_1253:
189
0
    case LIBUNA_CODEPAGE_WINDOWS_1254:
190
0
    case LIBUNA_CODEPAGE_WINDOWS_1255:
191
0
    case LIBUNA_CODEPAGE_WINDOWS_1256:
192
0
    case LIBUNA_CODEPAGE_WINDOWS_1257:
193
0
    case LIBUNA_CODEPAGE_WINDOWS_1258:
194
0
      safe_byte_stream_character_size += 1;
195
0
      break;
196
197
0
    case LIBUNA_CODEPAGE_MAC_SYMBOL:
198
0
      result = libuna_codepage_mac_symbol_unicode_character_size_to_byte_stream(
199
0
                unicode_character,
200
0
                &safe_byte_stream_character_size,
201
0
                error );
202
0
      break;
203
204
0
    case LIBUNA_CODEPAGE_MAC_THAI:
205
0
      result = libuna_codepage_mac_thai_unicode_character_size_to_byte_stream(
206
0
                unicode_character,
207
0
                &safe_byte_stream_character_size,
208
0
                error );
209
0
      break;
210
211
0
    case LIBUNA_CODEPAGE_WINDOWS_932:
212
0
      result = libuna_codepage_windows_932_unicode_character_size_to_byte_stream(
213
0
                unicode_character,
214
0
                &safe_byte_stream_character_size,
215
0
                error );
216
0
      break;
217
218
0
    case LIBUNA_CODEPAGE_WINDOWS_936:
219
0
      result = libuna_codepage_windows_936_unicode_character_size_to_byte_stream(
220
0
                unicode_character,
221
0
                &safe_byte_stream_character_size,
222
0
                error );
223
0
      break;
224
225
0
    case LIBUNA_CODEPAGE_WINDOWS_949:
226
0
      result = libuna_codepage_windows_949_unicode_character_size_to_byte_stream(
227
0
                unicode_character,
228
0
                &safe_byte_stream_character_size,
229
0
                error );
230
0
      break;
231
232
0
    case LIBUNA_CODEPAGE_WINDOWS_950:
233
0
      result = libuna_codepage_windows_950_unicode_character_size_to_byte_stream(
234
0
                unicode_character,
235
0
                &safe_byte_stream_character_size,
236
0
                error );
237
0
      break;
238
239
0
    default:
240
0
      libcerror_error_set(
241
0
       error,
242
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
243
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
244
0
       "%s: unsupported codepage: %d.",
245
0
       function,
246
0
       codepage );
247
248
0
      return( -1 );
249
0
  }
250
0
  if( result == -1 )
251
0
  {
252
0
    libcerror_error_set(
253
0
     error,
254
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
255
0
     LIBCERROR_RUNTIME_ERROR_GET_FAILED,
256
0
     "%s: unable to determine byte stream character size.",
257
0
     function );
258
259
0
    return( -1 );
260
0
  }
261
0
  *byte_stream_character_size = safe_byte_stream_character_size;
262
263
0
  return( result );
264
0
}
265
266
/* Copies an Unicode character from a byte stream
267
 * Returns 1 if successful, 0 if the byte stream character is valid but not supported since it requires special handling or -1 on error
268
 */
269
int libuna_unicode_character_copy_from_byte_stream(
270
     libuna_unicode_character_t *unicode_character,
271
     const uint8_t *byte_stream,
272
     size_t byte_stream_size,
273
     size_t *byte_stream_index,
274
     int codepage,
275
     libcerror_error_t **error )
276
209k
{
277
209k
  static char *function                             = "libuna_unicode_character_copy_from_byte_stream";
278
209k
  libuna_unicode_character_t safe_unicode_character = 0;
279
209k
  size_t safe_byte_stream_index                     = 0;
280
209k
  uint8_t byte_stream_character                     = 0;
281
209k
  int result                                        = 1;
282
283
209k
  if( unicode_character == NULL )
284
0
  {
285
0
    libcerror_error_set(
286
0
     error,
287
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
288
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
289
0
     "%s: invalid Unicode character.",
290
0
     function );
291
292
0
    return( -1 );
293
0
  }
294
209k
  if( byte_stream == NULL )
295
0
  {
296
0
    libcerror_error_set(
297
0
     error,
298
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
299
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
300
0
     "%s: invalid byte stream.",
301
0
     function );
302
303
0
    return( -1 );
304
0
  }
305
209k
  if( byte_stream_size > (size_t) SSIZE_MAX )
306
0
  {
307
0
    libcerror_error_set(
308
0
     error,
309
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
310
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
311
0
     "%s: invalid byte stream size value exceeds maximum.",
312
0
     function );
313
314
0
    return( -1 );
315
0
  }
316
209k
  if( byte_stream_index == NULL )
317
0
  {
318
0
    libcerror_error_set(
319
0
     error,
320
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
321
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
322
0
     "%s: invalid byte stream index.",
323
0
     function );
324
325
0
    return( -1 );
326
0
  }
327
209k
  safe_byte_stream_index = *byte_stream_index;
328
329
209k
  if( safe_byte_stream_index >= byte_stream_size )
330
0
  {
331
0
    libcerror_error_set(
332
0
     error,
333
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
334
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
335
0
     "%s: byte stream too small.",
336
0
     function );
337
338
0
    return( -1 );
339
0
  }
340
209k
  byte_stream_character = byte_stream[ safe_byte_stream_index ];
341
342
209k
  switch( codepage )
343
209k
  {
344
119k
    case LIBUNA_CODEPAGE_ASCII:
345
119k
      if( byte_stream_character < 0x80 )
346
116k
      {
347
116k
        safe_unicode_character = byte_stream_character;
348
116k
      }
349
2.88k
      else
350
2.88k
      {
351
2.88k
        safe_unicode_character = 0xfffd;
352
2.88k
      }
353
119k
      safe_byte_stream_index += 1;
354
355
119k
      break;
356
357
0
    case LIBUNA_CODEPAGE_ISO_8859_1:
358
0
      safe_unicode_character = byte_stream_character;
359
360
0
      safe_byte_stream_index += 1;
361
362
0
      break;
363
364
0
    case LIBUNA_CODEPAGE_ISO_8859_2:
365
0
      if( byte_stream_character < 0xa0 )
366
0
      {
367
0
        safe_unicode_character = byte_stream_character;
368
0
      }
369
0
      else
370
0
      {
371
0
        byte_stream_character -= 0xa0;
372
373
0
        safe_unicode_character = libuna_codepage_iso_8859_2_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
374
0
      }
375
0
      safe_byte_stream_index += 1;
376
377
0
      break;
378
379
0
    case LIBUNA_CODEPAGE_ISO_8859_3:
380
0
      if( byte_stream_character < 0xa0 )
381
0
      {
382
0
        safe_unicode_character = byte_stream_character;
383
0
      }
384
0
      else
385
0
      {
386
0
        byte_stream_character -= 0xa0;
387
388
0
        safe_unicode_character = libuna_codepage_iso_8859_3_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
389
0
      }
390
0
      safe_byte_stream_index += 1;
391
392
0
      break;
393
394
0
    case LIBUNA_CODEPAGE_ISO_8859_4:
395
0
      if( byte_stream_character < 0xa0 )
396
0
      {
397
0
        safe_unicode_character = byte_stream_character;
398
0
      }
399
0
      else
400
0
      {
401
0
        byte_stream_character -= 0xa0;
402
403
0
        safe_unicode_character = libuna_codepage_iso_8859_4_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
404
0
      }
405
0
      safe_byte_stream_index += 1;
406
407
0
      break;
408
409
0
    case LIBUNA_CODEPAGE_ISO_8859_5:
410
0
      if( byte_stream_character < 0xa0 )
411
0
      {
412
0
        safe_unicode_character = byte_stream_character;
413
0
      }
414
0
      else
415
0
      {
416
0
        byte_stream_character -= 0xa0;
417
418
0
        safe_unicode_character = libuna_codepage_iso_8859_5_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
419
0
      }
420
0
      safe_byte_stream_index += 1;
421
422
0
      break;
423
424
0
    case LIBUNA_CODEPAGE_ISO_8859_6:
425
0
      if( byte_stream_character < 0xa0 )
426
0
      {
427
0
        safe_unicode_character = byte_stream_character;
428
0
      }
429
0
      else
430
0
      {
431
0
        byte_stream_character -= 0xa0;
432
433
0
        safe_unicode_character = libuna_codepage_iso_8859_6_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
434
0
      }
435
0
      safe_byte_stream_index += 1;
436
437
0
      break;
438
439
0
    case LIBUNA_CODEPAGE_ISO_8859_7:
440
0
      if( byte_stream_character < 0xa0 )
441
0
      {
442
0
        safe_unicode_character = byte_stream_character;
443
0
      }
444
0
      else
445
0
      {
446
0
        byte_stream_character -= 0xa0;
447
448
0
        safe_unicode_character = libuna_codepage_iso_8859_7_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
449
0
      }
450
0
      safe_byte_stream_index += 1;
451
452
0
      break;
453
454
0
    case LIBUNA_CODEPAGE_ISO_8859_8:
455
0
      if( byte_stream_character < 0xa0 )
456
0
      {
457
0
        safe_unicode_character = byte_stream_character;
458
0
      }
459
0
      else
460
0
      {
461
0
        byte_stream_character -= 0xa0;
462
463
0
        safe_unicode_character = libuna_codepage_iso_8859_8_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
464
0
      }
465
0
      safe_byte_stream_index += 1;
466
467
0
      break;
468
469
0
    case LIBUNA_CODEPAGE_ISO_8859_9:
470
0
      if( byte_stream_character < 0xd0 )
471
0
      {
472
0
        safe_unicode_character = byte_stream_character;
473
0
      }
474
0
      else
475
0
      {
476
0
        byte_stream_character -= 0xd0;
477
478
0
        safe_unicode_character = libuna_codepage_iso_8859_9_byte_stream_to_unicode_base_0xd0[ byte_stream_character ];
479
0
      }
480
0
      safe_byte_stream_index += 1;
481
482
0
      break;
483
484
0
    case LIBUNA_CODEPAGE_ISO_8859_10:
485
0
      if( byte_stream_character < 0xa0 )
486
0
      {
487
0
        safe_unicode_character = byte_stream_character;
488
0
      }
489
0
      else
490
0
      {
491
0
        byte_stream_character -= 0xa0;
492
493
0
        safe_unicode_character = libuna_codepage_iso_8859_10_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
494
0
      }
495
0
      safe_byte_stream_index += 1;
496
497
0
      break;
498
499
0
    case LIBUNA_CODEPAGE_ISO_8859_11:
500
0
      if( byte_stream_character < 0xa1 )
501
0
      {
502
0
        safe_unicode_character = byte_stream_character;
503
0
      }
504
0
      else if( byte_stream_character < 0xdb )
505
0
      {
506
0
        safe_unicode_character = byte_stream_character + 0x0d60;
507
0
      }
508
0
      else if( byte_stream_character < 0xdf )
509
0
      {
510
0
        safe_unicode_character = 0xfffd;
511
0
      }
512
0
      else if( byte_stream_character < 0xfc )
513
0
      {
514
0
        safe_unicode_character = byte_stream_character + 0x0d60;
515
0
      }
516
0
      else
517
0
      {
518
0
        safe_unicode_character = 0xfffd;
519
0
      }
520
0
      safe_byte_stream_index += 1;
521
522
0
      break;
523
524
0
    case LIBUNA_CODEPAGE_ISO_8859_13:
525
0
      if( byte_stream_character < 0xa0 )
526
0
      {
527
0
        safe_unicode_character = byte_stream_character;
528
0
      }
529
0
      else
530
0
      {
531
0
        byte_stream_character -= 0xa0;
532
533
0
        safe_unicode_character = libuna_codepage_iso_8859_13_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
534
0
      }
535
0
      safe_byte_stream_index += 1;
536
537
0
      break;
538
539
0
    case LIBUNA_CODEPAGE_ISO_8859_14:
540
0
      if( byte_stream_character < 0xa0 )
541
0
      {
542
0
        safe_unicode_character = byte_stream_character;
543
0
      }
544
0
      else
545
0
      {
546
0
        byte_stream_character -= 0xa0;
547
548
0
        safe_unicode_character = libuna_codepage_iso_8859_14_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
549
0
      }
550
0
      safe_byte_stream_index += 1;
551
552
0
      break;
553
554
0
    case LIBUNA_CODEPAGE_ISO_8859_15:
555
0
      if( ( byte_stream_character >= 0xa0 )
556
0
       && ( byte_stream_character < 0xc0 ) )
557
0
      {
558
0
        byte_stream_character -= 0xa0;
559
560
0
        safe_unicode_character = libuna_codepage_iso_8859_15_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
561
0
      }
562
0
      else
563
0
      {
564
0
        safe_unicode_character = byte_stream_character;
565
0
      }
566
0
      safe_byte_stream_index += 1;
567
568
0
      break;
569
570
0
    case LIBUNA_CODEPAGE_ISO_8859_16:
571
0
      if( byte_stream_character < 0xa0 )
572
0
      {
573
0
        safe_unicode_character = byte_stream_character;
574
0
      }
575
0
      else
576
0
      {
577
0
        byte_stream_character -= 0xa0;
578
579
0
        safe_unicode_character = libuna_codepage_iso_8859_16_byte_stream_to_unicode_base_0xa0[ byte_stream_character ];
580
0
      }
581
0
      safe_byte_stream_index += 1;
582
583
0
      break;
584
585
0
    case LIBUNA_CODEPAGE_KOI8_R:
586
0
      result = libuna_codepage_koi8_r_copy_from_byte_stream(
587
0
                &safe_unicode_character,
588
0
                byte_stream,
589
0
                byte_stream_size,
590
0
                &safe_byte_stream_index,
591
0
                error );
592
0
      break;
593
594
0
    case LIBUNA_CODEPAGE_KOI8_U:
595
0
      result = libuna_codepage_koi8_u_copy_from_byte_stream(
596
0
                &safe_unicode_character,
597
0
                byte_stream,
598
0
                byte_stream_size,
599
0
                &safe_byte_stream_index,
600
0
                error );
601
0
      break;
602
603
0
    case LIBUNA_CODEPAGE_MAC_ARABIC:
604
0
      result = libuna_codepage_mac_arabic_copy_from_byte_stream(
605
0
                &safe_unicode_character,
606
0
                byte_stream,
607
0
                byte_stream_size,
608
0
                &safe_byte_stream_index,
609
0
                error );
610
0
      break;
611
612
0
    case LIBUNA_CODEPAGE_MAC_CELTIC:
613
0
      result = libuna_codepage_mac_celtic_copy_from_byte_stream(
614
0
                &safe_unicode_character,
615
0
                byte_stream,
616
0
                byte_stream_size,
617
0
                &safe_byte_stream_index,
618
0
                error );
619
0
      break;
620
621
0
    case LIBUNA_CODEPAGE_MAC_CENTRALEURROMAN:
622
0
      result = libuna_codepage_mac_centraleurroman_copy_from_byte_stream(
623
0
                &safe_unicode_character,
624
0
                byte_stream,
625
0
                byte_stream_size,
626
0
                &safe_byte_stream_index,
627
0
                error );
628
0
      break;
629
630
0
    case LIBUNA_CODEPAGE_MAC_CROATIAN:
631
0
      result = libuna_codepage_mac_croatian_copy_from_byte_stream(
632
0
                &safe_unicode_character,
633
0
                byte_stream,
634
0
                byte_stream_size,
635
0
                &safe_byte_stream_index,
636
0
                error );
637
0
      break;
638
639
0
    case LIBUNA_CODEPAGE_MAC_CYRILLIC:
640
0
      result = libuna_codepage_mac_cyrillic_copy_from_byte_stream(
641
0
                &safe_unicode_character,
642
0
                byte_stream,
643
0
                byte_stream_size,
644
0
                &safe_byte_stream_index,
645
0
                error );
646
0
      break;
647
648
0
    case LIBUNA_CODEPAGE_MAC_DINGBATS:
649
0
      result = libuna_codepage_mac_dingbats_copy_from_byte_stream(
650
0
                &safe_unicode_character,
651
0
                byte_stream,
652
0
                byte_stream_size,
653
0
                &safe_byte_stream_index,
654
0
                error );
655
0
      break;
656
657
0
    case LIBUNA_CODEPAGE_MAC_FARSI:
658
0
      result = libuna_codepage_mac_farsi_copy_from_byte_stream(
659
0
                &safe_unicode_character,
660
0
                byte_stream,
661
0
                byte_stream_size,
662
0
                &safe_byte_stream_index,
663
0
                error );
664
0
      break;
665
666
0
    case LIBUNA_CODEPAGE_MAC_GAELIC:
667
0
      result = libuna_codepage_mac_gaelic_copy_from_byte_stream(
668
0
                &safe_unicode_character,
669
0
                byte_stream,
670
0
                byte_stream_size,
671
0
                &safe_byte_stream_index,
672
0
                error );
673
0
      break;
674
675
0
    case LIBUNA_CODEPAGE_MAC_GREEK:
676
0
      result = libuna_codepage_mac_greek_copy_from_byte_stream(
677
0
                &safe_unicode_character,
678
0
                byte_stream,
679
0
                byte_stream_size,
680
0
                &safe_byte_stream_index,
681
0
                error );
682
0
      break;
683
684
0
    case LIBUNA_CODEPAGE_MAC_ICELANDIC:
685
0
      result = libuna_codepage_mac_icelandic_copy_from_byte_stream(
686
0
                &safe_unicode_character,
687
0
                byte_stream,
688
0
                byte_stream_size,
689
0
                &safe_byte_stream_index,
690
0
                error );
691
0
      break;
692
693
0
    case LIBUNA_CODEPAGE_MAC_INUIT:
694
0
      result = libuna_codepage_mac_inuit_copy_from_byte_stream(
695
0
                &safe_unicode_character,
696
0
                byte_stream,
697
0
                byte_stream_size,
698
0
                &safe_byte_stream_index,
699
0
                error );
700
0
      break;
701
702
0
    case LIBUNA_CODEPAGE_MAC_ROMAN:
703
0
      result = libuna_codepage_mac_roman_copy_from_byte_stream(
704
0
                &safe_unicode_character,
705
0
                byte_stream,
706
0
                byte_stream_size,
707
0
                &safe_byte_stream_index,
708
0
                error );
709
0
      break;
710
711
0
    case LIBUNA_CODEPAGE_MAC_ROMANIAN:
712
0
      result = libuna_codepage_mac_romanian_copy_from_byte_stream(
713
0
                &safe_unicode_character,
714
0
                byte_stream,
715
0
                byte_stream_size,
716
0
                &safe_byte_stream_index,
717
0
                error );
718
0
      break;
719
720
0
    case LIBUNA_CODEPAGE_MAC_RUSSIAN:
721
0
      result = libuna_codepage_mac_russian_copy_from_byte_stream(
722
0
                &safe_unicode_character,
723
0
                byte_stream,
724
0
                byte_stream_size,
725
0
                &safe_byte_stream_index,
726
0
                error );
727
0
      break;
728
729
0
    case LIBUNA_CODEPAGE_MAC_SYMBOL:
730
0
      result = libuna_codepage_mac_symbol_copy_from_byte_stream(
731
0
                &safe_unicode_character,
732
0
                byte_stream,
733
0
                byte_stream_size,
734
0
                &safe_byte_stream_index,
735
0
                error );
736
0
      break;
737
738
0
    case LIBUNA_CODEPAGE_MAC_THAI:
739
0
      result = libuna_codepage_mac_thai_copy_from_byte_stream(
740
0
                &safe_unicode_character,
741
0
                byte_stream,
742
0
                byte_stream_size,
743
0
                &safe_byte_stream_index,
744
0
                error );
745
0
      break;
746
747
0
    case LIBUNA_CODEPAGE_MAC_TURKISH:
748
0
      result = libuna_codepage_mac_turkish_copy_from_byte_stream(
749
0
                &safe_unicode_character,
750
0
                byte_stream,
751
0
                byte_stream_size,
752
0
                &safe_byte_stream_index,
753
0
                error );
754
0
      break;
755
756
0
    case LIBUNA_CODEPAGE_MAC_UKRAINIAN:
757
0
      result = libuna_codepage_mac_ukrainian_copy_from_byte_stream(
758
0
                &safe_unicode_character,
759
0
                byte_stream,
760
0
                byte_stream_size,
761
0
                &safe_byte_stream_index,
762
0
                error );
763
0
      break;
764
765
0
    case LIBUNA_CODEPAGE_WINDOWS_874:
766
0
      result = libuna_codepage_windows_874_copy_from_byte_stream(
767
0
                &safe_unicode_character,
768
0
                byte_stream,
769
0
                byte_stream_size,
770
0
                &safe_byte_stream_index,
771
0
                error );
772
0
      break;
773
774
0
    case LIBUNA_CODEPAGE_WINDOWS_932:
775
0
      result = libuna_codepage_windows_932_copy_from_byte_stream(
776
0
                &safe_unicode_character,
777
0
                byte_stream,
778
0
                byte_stream_size,
779
0
                &safe_byte_stream_index,
780
0
                error );
781
0
      break;
782
783
0
    case LIBUNA_CODEPAGE_WINDOWS_936:
784
0
      result = libuna_codepage_windows_936_copy_from_byte_stream(
785
0
                &safe_unicode_character,
786
0
                byte_stream,
787
0
                byte_stream_size,
788
0
                &safe_byte_stream_index,
789
0
                error );
790
0
      break;
791
792
0
    case LIBUNA_CODEPAGE_WINDOWS_949:
793
0
      result = libuna_codepage_windows_949_copy_from_byte_stream(
794
0
                &safe_unicode_character,
795
0
                byte_stream,
796
0
                byte_stream_size,
797
0
                &safe_byte_stream_index,
798
0
                error );
799
0
      break;
800
801
0
    case LIBUNA_CODEPAGE_WINDOWS_950:
802
0
      result = libuna_codepage_windows_950_copy_from_byte_stream(
803
0
                &safe_unicode_character,
804
0
                byte_stream,
805
0
                byte_stream_size,
806
0
                &safe_byte_stream_index,
807
0
                error );
808
0
      break;
809
810
0
    case LIBUNA_CODEPAGE_WINDOWS_1250:
811
0
      result = libuna_codepage_windows_1250_copy_from_byte_stream(
812
0
                &safe_unicode_character,
813
0
                byte_stream,
814
0
                byte_stream_size,
815
0
                &safe_byte_stream_index,
816
0
                error );
817
0
      break;
818
819
0
    case LIBUNA_CODEPAGE_WINDOWS_1251:
820
0
      result = libuna_codepage_windows_1251_copy_from_byte_stream(
821
0
                &safe_unicode_character,
822
0
                byte_stream,
823
0
                byte_stream_size,
824
0
                &safe_byte_stream_index,
825
0
                error );
826
0
      break;
827
828
90.1k
    case LIBUNA_CODEPAGE_WINDOWS_1252:
829
90.1k
      result = libuna_codepage_windows_1252_copy_from_byte_stream(
830
90.1k
                &safe_unicode_character,
831
90.1k
                byte_stream,
832
90.1k
                byte_stream_size,
833
90.1k
                &safe_byte_stream_index,
834
90.1k
                error );
835
90.1k
      break;
836
837
0
    case LIBUNA_CODEPAGE_WINDOWS_1253:
838
0
      result = libuna_codepage_windows_1253_copy_from_byte_stream(
839
0
                &safe_unicode_character,
840
0
                byte_stream,
841
0
                byte_stream_size,
842
0
                &safe_byte_stream_index,
843
0
                error );
844
0
      break;
845
846
0
    case LIBUNA_CODEPAGE_WINDOWS_1254:
847
0
      result = libuna_codepage_windows_1254_copy_from_byte_stream(
848
0
                &safe_unicode_character,
849
0
                byte_stream,
850
0
                byte_stream_size,
851
0
                &safe_byte_stream_index,
852
0
                error );
853
0
      break;
854
855
0
    case LIBUNA_CODEPAGE_WINDOWS_1255:
856
0
      result = libuna_codepage_windows_1255_copy_from_byte_stream(
857
0
                &safe_unicode_character,
858
0
                byte_stream,
859
0
                byte_stream_size,
860
0
                &safe_byte_stream_index,
861
0
                error );
862
0
      break;
863
864
0
    case LIBUNA_CODEPAGE_WINDOWS_1256:
865
0
      result = libuna_codepage_windows_1256_copy_from_byte_stream(
866
0
                &safe_unicode_character,
867
0
                byte_stream,
868
0
                byte_stream_size,
869
0
                &safe_byte_stream_index,
870
0
                error );
871
0
      break;
872
873
0
    case LIBUNA_CODEPAGE_WINDOWS_1257:
874
0
      result = libuna_codepage_windows_1257_copy_from_byte_stream(
875
0
                &safe_unicode_character,
876
0
                byte_stream,
877
0
                byte_stream_size,
878
0
                &safe_byte_stream_index,
879
0
                error );
880
0
      break;
881
882
0
    case LIBUNA_CODEPAGE_WINDOWS_1258:
883
0
      result = libuna_codepage_windows_1258_copy_from_byte_stream(
884
0
                &safe_unicode_character,
885
0
                byte_stream,
886
0
                byte_stream_size,
887
0
                &safe_byte_stream_index,
888
0
                error );
889
0
      break;
890
891
0
    default:
892
0
      libcerror_error_set(
893
0
       error,
894
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
895
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
896
0
      "%s: unsupported codepage: %d.",
897
0
       function,
898
0
       codepage );
899
900
0
      return( -1 );
901
209k
  }
902
209k
  if( result == -1 )
903
0
  {
904
0
    libcerror_error_set(
905
0
     error,
906
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
907
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
908
0
     "%s: unable to copy Unicode character from byte stream.",
909
0
     function );
910
911
0
    return( -1 );
912
0
  }
913
209k
  *unicode_character = safe_unicode_character;
914
209k
  *byte_stream_index = safe_byte_stream_index;
915
916
209k
  return( result );
917
209k
}
918
919
/* Copies an Unicode character to a byte stream
920
 * Returns 1 if successful, 0 if the Unicode character is valid but not supported since it requires special handling or -1 on error
921
 */
922
int libuna_unicode_character_copy_to_byte_stream(
923
     libuna_unicode_character_t unicode_character,
924
     uint8_t *byte_stream,
925
     size_t byte_stream_size,
926
     size_t *byte_stream_index,
927
     int codepage,
928
     libcerror_error_t **error )
929
0
{
930
0
  static char *function         = "libuna_unicode_character_copy_to_byte_stream";
931
0
  size_t safe_byte_stream_index = 0;
932
0
  int result                    = 1;
933
934
0
  if( byte_stream == NULL )
935
0
  {
936
0
    libcerror_error_set(
937
0
     error,
938
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
939
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
940
0
     "%s: invalid byte stream.",
941
0
     function );
942
943
0
    return( -1 );
944
0
  }
945
0
  if( byte_stream_size > (size_t) SSIZE_MAX )
946
0
  {
947
0
    libcerror_error_set(
948
0
     error,
949
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
950
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
951
0
     "%s: invalid byte stream size value exceeds maximum.",
952
0
     function );
953
954
0
    return( -1 );
955
0
  }
956
0
  if( byte_stream_index == NULL )
957
0
  {
958
0
    libcerror_error_set(
959
0
     error,
960
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
961
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
962
0
     "%s: invalid byte stream index.",
963
0
     function );
964
965
0
    return( -1 );
966
0
  }
967
0
  safe_byte_stream_index = *byte_stream_index;
968
969
0
  if( safe_byte_stream_index >= byte_stream_size )
970
0
  {
971
0
    libcerror_error_set(
972
0
     error,
973
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
974
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
975
0
     "%s: byte stream too small.",
976
0
     function );
977
978
0
    return( -1 );
979
0
  }
980
0
  switch( codepage )
981
0
  {
982
0
    case LIBUNA_CODEPAGE_ASCII:
983
0
      if( unicode_character < 0x0080 )
984
0
      {
985
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
986
0
      }
987
0
      else
988
0
      {
989
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
990
0
      }
991
0
      safe_byte_stream_index += 1;
992
993
0
      break;
994
995
0
    case LIBUNA_CODEPAGE_ISO_8859_1:
996
0
      if( unicode_character < 0x0100 )
997
0
      {
998
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
999
0
      }
1000
0
      else
1001
0
      {
1002
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1003
0
      }
1004
0
      safe_byte_stream_index += 1;
1005
1006
0
      break;
1007
1008
0
    case LIBUNA_CODEPAGE_ISO_8859_2:
1009
0
      if( unicode_character < 0x00a0 )
1010
0
      {
1011
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1012
0
      }
1013
0
      else if( ( unicode_character >= 0x00a0 )
1014
0
            && ( unicode_character < 0x0120 ) )
1015
0
      {
1016
0
        unicode_character -= 0x00a0;
1017
1018
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1019
0
      }
1020
0
      else if( ( unicode_character >= 0x0138 )
1021
0
            && ( unicode_character < 0x0180 ) )
1022
0
      {
1023
0
        unicode_character -= 0x0138;
1024
1025
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x0138[ unicode_character ];
1026
0
      }
1027
0
      else if( ( unicode_character >= 0x02d8 )
1028
0
            && ( unicode_character < 0x02e0 ) )
1029
0
      {
1030
0
        unicode_character -= 0x02d8;
1031
1032
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_2_unicode_to_byte_stream_base_0x02d8[ unicode_character ];
1033
0
      }
1034
0
      else if( unicode_character == 0x02c7 )
1035
0
      {
1036
0
        byte_stream[ safe_byte_stream_index ] = 0xb7;
1037
0
      }
1038
0
      else
1039
0
      {
1040
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1041
0
      }
1042
0
      safe_byte_stream_index += 1;
1043
1044
0
      break;
1045
1046
0
    case LIBUNA_CODEPAGE_ISO_8859_3:
1047
0
      if( unicode_character < 0x00a0 )
1048
0
      {
1049
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1050
0
      }
1051
0
      else if( ( unicode_character >= 0x00a0 )
1052
0
            && ( unicode_character < 0x0100 ) )
1053
0
      {
1054
0
        unicode_character -= 0x00a0;
1055
1056
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1057
0
      }
1058
0
      else if( ( unicode_character >= 0x0108 )
1059
0
            && ( unicode_character < 0x0110 ) )
1060
0
      {
1061
0
        unicode_character -= 0x0108;
1062
1063
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0108[ unicode_character ];
1064
0
      }
1065
0
      else if( ( unicode_character >= 0x0118 )
1066
0
            && ( unicode_character < 0x0128 ) )
1067
0
      {
1068
0
        unicode_character -= 0x0118;
1069
1070
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0118[ unicode_character ];
1071
0
      }
1072
0
      else if( ( unicode_character >= 0x0130 )
1073
0
            && ( unicode_character < 0x0138 ) )
1074
0
      {
1075
0
        unicode_character -= 0x0130;
1076
1077
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0130[ unicode_character ];
1078
0
      }
1079
0
      else if( ( unicode_character >= 0x0158 )
1080
0
            && ( unicode_character < 0x0160 ) )
1081
0
      {
1082
0
        unicode_character -= 0x0158;
1083
1084
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_3_unicode_to_byte_stream_base_0x0158[ unicode_character ];
1085
0
      }
1086
0
      else switch( unicode_character )
1087
0
      {
1088
0
        case 0x016c:
1089
0
          byte_stream[ safe_byte_stream_index ] = 0xdd;
1090
0
          break;
1091
1092
0
        case 0x016d:
1093
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1094
0
          break;
1095
1096
0
        case 0x017b:
1097
0
          byte_stream[ safe_byte_stream_index ] = 0xaf;
1098
0
          break;
1099
1100
0
        case 0x017c:
1101
0
          byte_stream[ safe_byte_stream_index ] = 0xbf;
1102
0
          break;
1103
1104
0
        case 0x02d8:
1105
0
          byte_stream[ safe_byte_stream_index ] = 0xa2;
1106
0
          break;
1107
1108
0
        case 0x02d9:
1109
0
          byte_stream[ safe_byte_stream_index ] = 0xff;
1110
0
          break;
1111
1112
0
        default:
1113
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1114
0
          break;
1115
0
      }
1116
0
      safe_byte_stream_index += 1;
1117
1118
0
      break;
1119
1120
0
    case LIBUNA_CODEPAGE_ISO_8859_4:
1121
0
      if( unicode_character < 0x00a0 )
1122
0
      {
1123
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1124
0
      }
1125
0
      else if( ( unicode_character >= 0x00a0 )
1126
0
            && ( unicode_character < 0x0158 ) )
1127
0
      {
1128
0
        unicode_character -= 0x00a0;
1129
1130
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1131
0
      }
1132
0
      else if( ( unicode_character >= 0x0160 )
1133
0
            && ( unicode_character < 0x0180 ) )
1134
0
      {
1135
0
        unicode_character -= 0x0160;
1136
1137
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_4_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1138
0
      }
1139
0
      else switch( unicode_character )
1140
0
      {
1141
0
        case 0x02c7:
1142
0
          byte_stream[ safe_byte_stream_index ] = 0xb7;
1143
0
          break;
1144
1145
0
        case 0x02d9:
1146
0
          byte_stream[ safe_byte_stream_index ] = 0xff;
1147
0
          break;
1148
1149
0
        case 0x02db:
1150
0
          byte_stream[ safe_byte_stream_index ] = 0xb2;
1151
0
          break;
1152
1153
0
        default:
1154
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1155
0
          break;
1156
0
      }
1157
0
      safe_byte_stream_index += 1;
1158
1159
0
      break;
1160
1161
0
    case LIBUNA_CODEPAGE_ISO_8859_5:
1162
0
      if( unicode_character < 0x00a1 )
1163
0
      {
1164
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1165
0
      }
1166
0
      else if( ( unicode_character >= 0x0400 )
1167
0
            && ( unicode_character < 0x0460 ) )
1168
0
      {
1169
0
        unicode_character -= 0x0400;
1170
1171
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_5_unicode_to_byte_stream_base_0x0400[ unicode_character ];
1172
0
      }
1173
0
      else switch( unicode_character )
1174
0
      {
1175
0
        case 0x00a7:
1176
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1177
0
          break;
1178
1179
0
        case 0x00ad:
1180
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1181
0
          break;
1182
1183
0
        case 0x2116:
1184
0
          byte_stream[ safe_byte_stream_index ] = 0xf0;
1185
0
          break;
1186
1187
0
        default:
1188
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1189
0
          break;
1190
0
      }
1191
0
      safe_byte_stream_index += 1;
1192
1193
0
      break;
1194
1195
0
    case LIBUNA_CODEPAGE_ISO_8859_6:
1196
0
      if( unicode_character < 0x00a1 )
1197
0
      {
1198
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1199
0
      }
1200
0
      else if( ( unicode_character >= 0x0618 )
1201
0
            && ( unicode_character < 0x658 ) )
1202
0
      {
1203
0
        unicode_character -= 0x0618;
1204
1205
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_6_unicode_to_byte_stream_base_0x0618[ unicode_character ];
1206
0
      }
1207
0
      else switch( unicode_character )
1208
0
      {
1209
0
        case 0x00a4:
1210
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1211
0
          break;
1212
1213
0
        case 0x00ad:
1214
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1215
0
          break;
1216
1217
0
        case 0x060c:
1218
0
          byte_stream[ safe_byte_stream_index ] = 0xac;
1219
0
          break;
1220
1221
0
        default:
1222
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1223
0
          break;
1224
0
      }
1225
0
      safe_byte_stream_index += 1;
1226
1227
0
      break;
1228
1229
0
    case LIBUNA_CODEPAGE_ISO_8859_7:
1230
0
      if( unicode_character < 0x00a0 )
1231
0
      {
1232
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1233
0
      }
1234
0
      else if( ( unicode_character >= 0x00a0 )
1235
0
            && ( unicode_character < 0x00b8 ) )
1236
0
      {
1237
0
        unicode_character -= 0x00a0;
1238
1239
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1240
0
      }
1241
0
      else if( ( unicode_character >= 0x0380 )
1242
0
            && ( unicode_character < 0x03d0 ) )
1243
0
      {
1244
0
        unicode_character -= 0x0380;
1245
1246
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_7_unicode_to_byte_stream_base_0x0380[ unicode_character ];
1247
0
      }
1248
0
      else switch( unicode_character )
1249
0
      {
1250
0
        case 0x00bb:
1251
0
          byte_stream[ safe_byte_stream_index ] = 0xbb;
1252
0
          break;
1253
1254
0
        case 0x00bd:
1255
0
          byte_stream[ safe_byte_stream_index ] = 0xbd;
1256
0
          break;
1257
1258
0
        case 0x037a:
1259
0
          byte_stream[ safe_byte_stream_index ] = 0xaa;
1260
0
          break;
1261
1262
0
        case 0x2015:
1263
0
          byte_stream[ safe_byte_stream_index ] = 0xaf;
1264
0
          break;
1265
1266
0
        case 0x2018:
1267
0
          byte_stream[ safe_byte_stream_index ] = 0xa1;
1268
0
          break;
1269
1270
0
        case 0x2019:
1271
0
          byte_stream[ safe_byte_stream_index ] = 0xa2;
1272
0
          break;
1273
1274
0
        case 0x20ac:
1275
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1276
0
          break;
1277
1278
0
        case 0x20af:
1279
0
          byte_stream[ safe_byte_stream_index ] = 0xa5;
1280
0
          break;
1281
1282
0
        default:
1283
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1284
0
          break;
1285
0
      }
1286
0
      safe_byte_stream_index += 1;
1287
1288
0
      break;
1289
1290
0
    case LIBUNA_CODEPAGE_ISO_8859_8:
1291
0
      if( unicode_character < 0x00a0 )
1292
0
      {
1293
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1294
0
      }
1295
0
      else if( ( unicode_character >= 0x00a0 )
1296
0
            && ( unicode_character < 0x00c0 ) )
1297
0
      {
1298
0
        unicode_character -= 0x00a0;
1299
1300
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1301
0
      }
1302
0
      else if( ( unicode_character >= 0x05d0 )
1303
0
            && ( unicode_character < 0x05f0 ) )
1304
0
      {
1305
0
        unicode_character -= 0x05d0;
1306
1307
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_8_unicode_to_byte_stream_base_0x05d0[ unicode_character ];
1308
0
      }
1309
0
      else switch( unicode_character )
1310
0
      {
1311
0
        case 0x00d7:
1312
0
          byte_stream[ safe_byte_stream_index ] = 0xaa;
1313
0
          break;
1314
1315
0
        case 0x00f7:
1316
0
          byte_stream[ safe_byte_stream_index ] = 0xba;
1317
0
          break;
1318
1319
0
        case 0x200e:
1320
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1321
0
          break;
1322
1323
0
        case 0x200f:
1324
0
          byte_stream[ safe_byte_stream_index ] = 0xfe;
1325
0
          break;
1326
1327
0
        case 0x2017:
1328
0
          byte_stream[ safe_byte_stream_index ] = 0xdf;
1329
0
          break;
1330
1331
0
        default:
1332
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1333
0
          break;
1334
0
      }
1335
0
      safe_byte_stream_index += 1;
1336
1337
0
      break;
1338
1339
0
    case LIBUNA_CODEPAGE_ISO_8859_9:
1340
0
      if( unicode_character < 0x00d0 )
1341
0
      {
1342
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1343
0
      }
1344
0
      else if( ( unicode_character >= 0x00d0 )
1345
0
            && ( unicode_character < 0x0100 ) )
1346
0
      {
1347
0
        unicode_character -= 0x00d0;
1348
1349
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_9_unicode_to_byte_stream_base_0x00d0[ unicode_character ];
1350
0
      }
1351
0
      else switch( unicode_character )
1352
0
      {
1353
0
        case 0x011e:
1354
0
          byte_stream[ safe_byte_stream_index ] = 0xd0;
1355
0
          break;
1356
1357
0
        case 0x011f:
1358
0
          byte_stream[ safe_byte_stream_index ] = 0xf0;
1359
0
          break;
1360
1361
0
        case 0x0130:
1362
0
          byte_stream[ safe_byte_stream_index ] = 0xdd;
1363
0
          break;
1364
1365
0
        case 0x0131:
1366
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1367
0
          break;
1368
1369
0
        case 0x015e:
1370
0
          byte_stream[ safe_byte_stream_index ] = 0xde;
1371
0
          break;
1372
1373
0
        case 0x015f:
1374
0
          byte_stream[ safe_byte_stream_index ] = 0xfe;
1375
0
          break;
1376
1377
0
        default:
1378
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1379
0
          break;
1380
0
      }
1381
0
      safe_byte_stream_index += 1;
1382
1383
0
      break;
1384
1385
0
    case LIBUNA_CODEPAGE_ISO_8859_10:
1386
0
      if( unicode_character < 0x00a1 )
1387
0
      {
1388
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1389
0
      }
1390
0
      else if( ( unicode_character >= 0x00c0 )
1391
0
            && ( unicode_character < 0x0150 ) )
1392
0
      {
1393
0
        unicode_character -= 0x00c0;
1394
1395
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1396
0
      }
1397
0
      else if( ( unicode_character >= 0x0160 )
1398
0
            && ( unicode_character < 0x0170 ) )
1399
0
      {
1400
0
        unicode_character -= 0x0160;
1401
1402
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_10_unicode_to_byte_stream_base_0x0160[ unicode_character ];
1403
0
      }
1404
0
      else switch( unicode_character )
1405
0
      {
1406
0
        case 0x00a7:
1407
0
          byte_stream[ safe_byte_stream_index ] = 0xa7;
1408
0
          break;
1409
1410
0
        case 0x00ad:
1411
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1412
0
          break;
1413
1414
0
        case 0x00b0:
1415
0
          byte_stream[ safe_byte_stream_index ] = 0xb0;
1416
0
          break;
1417
1418
0
        case 0x00b7:
1419
0
          byte_stream[ safe_byte_stream_index ] = 0xb7;
1420
0
          break;
1421
1422
0
        case 0x0172:
1423
0
          byte_stream[ safe_byte_stream_index ] = 0xd9;
1424
0
          break;
1425
1426
0
        case 0x0173:
1427
0
          byte_stream[ safe_byte_stream_index ] = 0xf9;
1428
0
          break;
1429
1430
0
        case 0x017d:
1431
0
          byte_stream[ safe_byte_stream_index ] = 0xac;
1432
0
          break;
1433
1434
0
        case 0x017e:
1435
0
          byte_stream[ safe_byte_stream_index ] = 0xbc;
1436
0
          break;
1437
1438
0
        case 0x2015:
1439
0
          byte_stream[ safe_byte_stream_index ] = 0xbd;
1440
0
          break;
1441
1442
0
        default:
1443
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1444
0
          break;
1445
0
      }
1446
0
      safe_byte_stream_index += 1;
1447
1448
0
      break;
1449
1450
0
    case LIBUNA_CODEPAGE_ISO_8859_11:
1451
0
      if( unicode_character < 0x00a1 )
1452
0
      {
1453
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1454
0
      }
1455
0
      else if( ( unicode_character >= 0x0e01 )
1456
0
            && ( unicode_character < 0x0e3b ) )
1457
0
      {
1458
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1459
0
      }
1460
0
      else if( ( unicode_character >= 0x0e3f )
1461
0
            && ( unicode_character < 0x0e5c ) )
1462
0
      {
1463
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) ( unicode_character - 0x0d60 );
1464
0
      }
1465
0
      else
1466
0
      {
1467
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1468
0
      }
1469
0
      safe_byte_stream_index += 1;
1470
1471
0
      break;
1472
1473
0
    case LIBUNA_CODEPAGE_ISO_8859_13:
1474
0
      if( unicode_character < 0x00a0 )
1475
0
      {
1476
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1477
0
      }
1478
0
      else if( ( unicode_character >= 0x00a0 )
1479
0
            && ( unicode_character < 0x0180 ) )
1480
0
      {
1481
0
        unicode_character -= 0x00a0;
1482
1483
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1484
0
      }
1485
0
      else if( ( unicode_character >= 0x2018 )
1486
0
             && ( unicode_character < 0x2020 ) )
1487
0
      {
1488
0
        unicode_character -= 0x2018;
1489
1490
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_13_unicode_to_byte_stream_base_0x2018[ unicode_character ];
1491
0
      }
1492
0
      else
1493
0
      {
1494
0
        byte_stream[ safe_byte_stream_index ] = 0x1a;
1495
0
      }
1496
0
      safe_byte_stream_index += 1;
1497
1498
0
      break;
1499
1500
0
    case LIBUNA_CODEPAGE_ISO_8859_14:
1501
0
      if( unicode_character < 0x00a1 )
1502
0
      {
1503
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1504
0
      }
1505
0
      else if( ( unicode_character >= 0x00c0 )
1506
0
            && ( unicode_character < 0x0100 ) )
1507
0
      {
1508
0
        unicode_character -= 0x00c0;
1509
1510
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x00c0[ unicode_character ];
1511
0
      }
1512
0
      else if( ( unicode_character >= 0x0170 )
1513
0
            && ( unicode_character < 0x0178 ) )
1514
0
      {
1515
0
        unicode_character -= 0x0170;
1516
1517
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x0170[ unicode_character ];
1518
0
      }
1519
0
      else if( ( unicode_character >= 0x1e80 )
1520
0
            && ( unicode_character < 0x1e88 ) )
1521
0
      {
1522
0
        unicode_character -= 0x1e80;
1523
1524
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_14_unicode_to_byte_stream_base_0x1e80[ unicode_character ];
1525
0
      }
1526
0
      else switch( unicode_character )
1527
0
      {
1528
0
        case 0x00a3:
1529
0
          byte_stream[ safe_byte_stream_index ] = 0xa3;
1530
0
          break;
1531
1532
0
        case 0x00a7:
1533
0
          byte_stream[ safe_byte_stream_index ] = 0xa7;
1534
0
          break;
1535
1536
0
        case 0x00a9:
1537
0
          byte_stream[ safe_byte_stream_index ] = 0xa9;
1538
0
          break;
1539
1540
0
        case 0x00ad:
1541
0
          byte_stream[ safe_byte_stream_index ] = 0xad;
1542
0
          break;
1543
1544
0
        case 0x00ae:
1545
0
          byte_stream[ safe_byte_stream_index ] = 0xae;
1546
0
          break;
1547
1548
0
        case 0x00b6:
1549
0
          byte_stream[ safe_byte_stream_index ] = 0xb6;
1550
0
          break;
1551
1552
0
        case 0x010a:
1553
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1554
0
          break;
1555
1556
0
        case 0x010b:
1557
0
          byte_stream[ safe_byte_stream_index ] = 0xa5;
1558
0
          break;
1559
1560
0
        case 0x0120:
1561
0
          byte_stream[ safe_byte_stream_index ] = 0xb2;
1562
0
          break;
1563
1564
0
        case 0x0121:
1565
0
          byte_stream[ safe_byte_stream_index ] = 0xb3;
1566
0
          break;
1567
1568
0
        case 0x0178:
1569
0
          byte_stream[ safe_byte_stream_index ] = 0xaf;
1570
0
          break;
1571
1572
0
        case 0x1e02:
1573
0
          byte_stream[ safe_byte_stream_index ] = 0xa1;
1574
0
          break;
1575
1576
0
        case 0x1e03:
1577
0
          byte_stream[ safe_byte_stream_index ] = 0xa2;
1578
0
          break;
1579
1580
0
        case 0x1e0a:
1581
0
          byte_stream[ safe_byte_stream_index ] = 0xa6;
1582
0
          break;
1583
1584
0
        case 0x1e0b:
1585
0
          byte_stream[ safe_byte_stream_index ] = 0xab;
1586
0
          break;
1587
1588
0
        case 0x1e1e:
1589
0
          byte_stream[ safe_byte_stream_index ] = 0xb0;
1590
0
          break;
1591
1592
0
        case 0x1e1f:
1593
0
          byte_stream[ safe_byte_stream_index ] = 0xb1;
1594
0
          break;
1595
1596
0
        case 0x1e40:
1597
0
          byte_stream[ safe_byte_stream_index ] = 0xb4;
1598
0
          break;
1599
1600
0
        case 0x1e41:
1601
0
          byte_stream[ safe_byte_stream_index ] = 0xb5;
1602
0
          break;
1603
1604
0
        case 0x1e56:
1605
0
          byte_stream[ safe_byte_stream_index ] = 0xb7;
1606
0
          break;
1607
1608
0
        case 0x1e57:
1609
0
          byte_stream[ safe_byte_stream_index ] = 0xb9;
1610
0
          break;
1611
1612
0
        case 0x1e60:
1613
0
          byte_stream[ safe_byte_stream_index ] = 0xbb;
1614
0
          break;
1615
1616
0
        case 0x1e61:
1617
0
          byte_stream[ safe_byte_stream_index ] = 0xbf;
1618
0
          break;
1619
1620
0
        case 0x1e6a:
1621
0
          byte_stream[ safe_byte_stream_index ] = 0xd7;
1622
0
          break;
1623
1624
0
        case 0x1e6b:
1625
0
          byte_stream[ safe_byte_stream_index ] = 0xf7;
1626
0
          break;
1627
1628
0
        case 0x1ef2:
1629
0
          byte_stream[ safe_byte_stream_index ] = 0xac;
1630
0
          break;
1631
1632
0
        case 0x1ef3:
1633
0
          byte_stream[ safe_byte_stream_index ] = 0xbc;
1634
0
          break;
1635
1636
0
        default:
1637
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1638
0
          break;
1639
0
      }
1640
0
      safe_byte_stream_index += 1;
1641
1642
0
      break;
1643
1644
0
    case LIBUNA_CODEPAGE_ISO_8859_15:
1645
0
      if( unicode_character < 0x00a0 )
1646
0
      {
1647
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1648
0
      }
1649
0
      else if( ( unicode_character >= 0x00a0 )
1650
0
            && ( unicode_character < 0x00c0 ) )
1651
0
      {
1652
0
        unicode_character -= 0x00a0;
1653
1654
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_15_unicode_to_byte_stream_base_0x00a0[ unicode_character ];
1655
0
      }
1656
0
      else if( unicode_character < 0x0100 )
1657
0
      {
1658
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1659
0
      }
1660
0
      else switch( unicode_character )
1661
0
      {
1662
0
        case 0x0152:
1663
0
          byte_stream[ safe_byte_stream_index ] = 0xbc;
1664
0
          break;
1665
1666
0
        case 0x0153:
1667
0
          byte_stream[ safe_byte_stream_index ] = 0xbd;
1668
0
          break;
1669
1670
0
        case 0x0160:
1671
0
          byte_stream[ safe_byte_stream_index ] = 0xa6;
1672
0
          break;
1673
1674
0
        case 0x0161:
1675
0
          byte_stream[ safe_byte_stream_index ] = 0xa8;
1676
0
          break;
1677
1678
0
        case 0x0178:
1679
0
          byte_stream[ safe_byte_stream_index ] = 0xbe;
1680
0
          break;
1681
1682
0
        case 0x017d:
1683
0
          byte_stream[ safe_byte_stream_index ] = 0xb4;
1684
0
          break;
1685
1686
0
        case 0x017e:
1687
0
          byte_stream[ safe_byte_stream_index ] = 0xb8;
1688
0
          break;
1689
1690
0
        case 0x20ac:
1691
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1692
0
          break;
1693
1694
0
        default:
1695
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1696
0
          break;
1697
0
      }
1698
0
      safe_byte_stream_index += 1;
1699
1700
0
      break;
1701
1702
0
    case LIBUNA_CODEPAGE_ISO_8859_16:
1703
0
      if( unicode_character < 0x00a1 )
1704
0
      {
1705
0
        byte_stream[ safe_byte_stream_index ] = (uint8_t) unicode_character;
1706
0
      }
1707
0
      else if( ( unicode_character >= 0x00a8 )
1708
0
            && ( unicode_character < 0x0108 ) )
1709
0
      {
1710
0
        unicode_character -= 0x00a8;
1711
1712
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x00a8[ unicode_character ];
1713
0
      }
1714
0
      else if( ( unicode_character >= 0x0140 )
1715
0
            && ( unicode_character < 0x0148 ) )
1716
0
      {
1717
0
        unicode_character -= 0x0140;
1718
1719
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0140[ unicode_character ];
1720
0
      }
1721
0
      else if( ( unicode_character >= 0x0150 )
1722
0
            && ( unicode_character < 0x0158 ) )
1723
0
      {
1724
0
        unicode_character -= 0x0150;
1725
1726
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0150[ unicode_character ];
1727
0
      }
1728
0
      else if( ( unicode_character >= 0x0178 )
1729
0
            && ( unicode_character < 0x0180 ) )
1730
0
      {
1731
0
        unicode_character -= 0x0178;
1732
1733
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0178[ unicode_character ];
1734
0
      }
1735
0
      else if( ( unicode_character >= 0x0218 )
1736
0
            && ( unicode_character < 0x0220 ) )
1737
0
      {
1738
0
        unicode_character -= 0x0218;
1739
1740
0
        byte_stream[ safe_byte_stream_index ] = libuna_codepage_iso_8859_16_unicode_to_byte_stream_base_0x0218[ unicode_character ];
1741
0
      }
1742
0
      else switch( unicode_character )
1743
0
      {
1744
0
        case 0x00a7:
1745
0
          byte_stream[ safe_byte_stream_index ] = 0xa7;
1746
0
          break;
1747
1748
0
        case 0x010c:
1749
0
          byte_stream[ safe_byte_stream_index ] = 0xb2;
1750
0
          break;
1751
1752
0
        case 0x010d:
1753
0
          byte_stream[ safe_byte_stream_index ] = 0xb9;
1754
0
          break;
1755
1756
0
        case 0x0110:
1757
0
          byte_stream[ safe_byte_stream_index ] = 0xd0;
1758
0
          break;
1759
1760
0
        case 0x0111:
1761
0
          byte_stream[ safe_byte_stream_index ] = 0xf0;
1762
0
          break;
1763
1764
0
        case 0x0118:
1765
0
          byte_stream[ safe_byte_stream_index ] = 0xdd;
1766
0
          break;
1767
1768
0
        case 0x0119:
1769
0
          byte_stream[ safe_byte_stream_index ] = 0xfd;
1770
0
          break;
1771
1772
0
        case 0x015a:
1773
0
          byte_stream[ safe_byte_stream_index ] = 0xd7;
1774
0
          break;
1775
1776
0
        case 0x015b:
1777
0
          byte_stream[ safe_byte_stream_index ] = 0xf7;
1778
0
          break;
1779
1780
0
        case 0x0160:
1781
0
          byte_stream[ safe_byte_stream_index ] = 0xa6;
1782
0
          break;
1783
1784
0
        case 0x0161:
1785
0
          byte_stream[ safe_byte_stream_index ] = 0xa8;
1786
0
          break;
1787
1788
0
        case 0x0170:
1789
0
          byte_stream[ safe_byte_stream_index ] = 0xd8;
1790
0
          break;
1791
1792
0
        case 0x0171:
1793
0
          byte_stream[ safe_byte_stream_index ] = 0xf8;
1794
0
          break;
1795
1796
0
        case 0x201d:
1797
0
          byte_stream[ safe_byte_stream_index ] = 0xb5;
1798
0
          break;
1799
1800
0
        case 0x201e:
1801
0
          byte_stream[ safe_byte_stream_index ] = 0xa5;
1802
0
          break;
1803
1804
0
        case 0x20ac:
1805
0
          byte_stream[ safe_byte_stream_index ] = 0xa4;
1806
0
          break;
1807
1808
0
        default:
1809
0
          byte_stream[ safe_byte_stream_index ] = 0x1a;
1810
0
          break;
1811
0
      }
1812
0
      safe_byte_stream_index += 1;
1813
1814
0
      break;
1815
1816
0
    case LIBUNA_CODEPAGE_KOI8_R:
1817
0
      result = libuna_codepage_koi8_r_copy_to_byte_stream(
1818
0
                unicode_character,
1819
0
                byte_stream,
1820
0
                byte_stream_size,
1821
0
                &safe_byte_stream_index,
1822
0
                error );
1823
0
      break;
1824
1825
0
    case LIBUNA_CODEPAGE_KOI8_U:
1826
0
      result = libuna_codepage_koi8_u_copy_to_byte_stream(
1827
0
                unicode_character,
1828
0
                byte_stream,
1829
0
                byte_stream_size,
1830
0
                &safe_byte_stream_index,
1831
0
                error );
1832
0
      break;
1833
1834
0
    case LIBUNA_CODEPAGE_MAC_ARABIC:
1835
0
      result = libuna_codepage_mac_arabic_copy_to_byte_stream(
1836
0
                unicode_character,
1837
0
                byte_stream,
1838
0
                byte_stream_size,
1839
0
                &safe_byte_stream_index,
1840
0
                error );
1841
0
      break;
1842
1843
0
    case LIBUNA_CODEPAGE_MAC_CELTIC:
1844
0
      result = libuna_codepage_mac_celtic_copy_to_byte_stream(
1845
0
                unicode_character,
1846
0
                byte_stream,
1847
0
                byte_stream_size,
1848
0
                &safe_byte_stream_index,
1849
0
                error );
1850
0
      break;
1851
1852
0
    case LIBUNA_CODEPAGE_MAC_CENTRALEURROMAN:
1853
0
      result = libuna_codepage_mac_centraleurroman_copy_to_byte_stream(
1854
0
                unicode_character,
1855
0
                byte_stream,
1856
0
                byte_stream_size,
1857
0
                &safe_byte_stream_index,
1858
0
                error );
1859
0
      break;
1860
1861
0
    case LIBUNA_CODEPAGE_MAC_CROATIAN:
1862
0
      result = libuna_codepage_mac_croatian_copy_to_byte_stream(
1863
0
                unicode_character,
1864
0
                byte_stream,
1865
0
                byte_stream_size,
1866
0
                &safe_byte_stream_index,
1867
0
                error );
1868
0
      break;
1869
1870
0
    case LIBUNA_CODEPAGE_MAC_CYRILLIC:
1871
0
      result = libuna_codepage_mac_cyrillic_copy_to_byte_stream(
1872
0
                unicode_character,
1873
0
                byte_stream,
1874
0
                byte_stream_size,
1875
0
                &safe_byte_stream_index,
1876
0
                error );
1877
0
      break;
1878
1879
0
    case LIBUNA_CODEPAGE_MAC_DINGBATS:
1880
0
      result = libuna_codepage_mac_dingbats_copy_to_byte_stream(
1881
0
                unicode_character,
1882
0
                byte_stream,
1883
0
                byte_stream_size,
1884
0
                &safe_byte_stream_index,
1885
0
                error );
1886
0
      break;
1887
1888
0
    case LIBUNA_CODEPAGE_MAC_FARSI:
1889
0
      result = libuna_codepage_mac_farsi_copy_to_byte_stream(
1890
0
                unicode_character,
1891
0
                byte_stream,
1892
0
                byte_stream_size,
1893
0
                &safe_byte_stream_index,
1894
0
                error );
1895
0
      break;
1896
1897
0
    case LIBUNA_CODEPAGE_MAC_GAELIC:
1898
0
      result = libuna_codepage_mac_gaelic_copy_to_byte_stream(
1899
0
                unicode_character,
1900
0
                byte_stream,
1901
0
                byte_stream_size,
1902
0
                &safe_byte_stream_index,
1903
0
                error );
1904
0
      break;
1905
1906
0
    case LIBUNA_CODEPAGE_MAC_GREEK:
1907
0
      result = libuna_codepage_mac_greek_copy_to_byte_stream(
1908
0
                unicode_character,
1909
0
                byte_stream,
1910
0
                byte_stream_size,
1911
0
                &safe_byte_stream_index,
1912
0
                error );
1913
0
      break;
1914
1915
0
    case LIBUNA_CODEPAGE_MAC_ICELANDIC:
1916
0
      result = libuna_codepage_mac_icelandic_copy_to_byte_stream(
1917
0
                unicode_character,
1918
0
                byte_stream,
1919
0
                byte_stream_size,
1920
0
                &safe_byte_stream_index,
1921
0
                error );
1922
0
      break;
1923
1924
0
    case LIBUNA_CODEPAGE_MAC_INUIT:
1925
0
      result = libuna_codepage_mac_inuit_copy_to_byte_stream(
1926
0
                unicode_character,
1927
0
                byte_stream,
1928
0
                byte_stream_size,
1929
0
                &safe_byte_stream_index,
1930
0
                error );
1931
0
      break;
1932
1933
0
    case LIBUNA_CODEPAGE_MAC_ROMAN:
1934
0
      result = libuna_codepage_mac_roman_copy_to_byte_stream(
1935
0
                unicode_character,
1936
0
                byte_stream,
1937
0
                byte_stream_size,
1938
0
                &safe_byte_stream_index,
1939
0
                error );
1940
0
      break;
1941
1942
0
    case LIBUNA_CODEPAGE_MAC_ROMANIAN:
1943
0
      result = libuna_codepage_mac_romanian_copy_to_byte_stream(
1944
0
                unicode_character,
1945
0
                byte_stream,
1946
0
                byte_stream_size,
1947
0
                &safe_byte_stream_index,
1948
0
                error );
1949
0
      break;
1950
1951
0
    case LIBUNA_CODEPAGE_MAC_RUSSIAN:
1952
0
      result = libuna_codepage_mac_russian_copy_to_byte_stream(
1953
0
                unicode_character,
1954
0
                byte_stream,
1955
0
                byte_stream_size,
1956
0
                &safe_byte_stream_index,
1957
0
                error );
1958
0
      break;
1959
1960
0
    case LIBUNA_CODEPAGE_MAC_SYMBOL:
1961
0
      result = libuna_codepage_mac_symbol_copy_to_byte_stream(
1962
0
                unicode_character,
1963
0
                byte_stream,
1964
0
                byte_stream_size,
1965
0
                &safe_byte_stream_index,
1966
0
                error );
1967
0
      break;
1968
1969
0
    case LIBUNA_CODEPAGE_MAC_THAI:
1970
0
      result = libuna_codepage_mac_thai_copy_to_byte_stream(
1971
0
                unicode_character,
1972
0
                byte_stream,
1973
0
                byte_stream_size,
1974
0
                &safe_byte_stream_index,
1975
0
                error );
1976
0
      break;
1977
1978
0
    case LIBUNA_CODEPAGE_MAC_TURKISH:
1979
0
      result = libuna_codepage_mac_turkish_copy_to_byte_stream(
1980
0
                unicode_character,
1981
0
                byte_stream,
1982
0
                byte_stream_size,
1983
0
                &safe_byte_stream_index,
1984
0
                error );
1985
0
      break;
1986
1987
0
    case LIBUNA_CODEPAGE_MAC_UKRAINIAN:
1988
0
      result = libuna_codepage_mac_ukrainian_copy_to_byte_stream(
1989
0
                unicode_character,
1990
0
                byte_stream,
1991
0
                byte_stream_size,
1992
0
                &safe_byte_stream_index,
1993
0
                error );
1994
0
      break;
1995
1996
0
    case LIBUNA_CODEPAGE_WINDOWS_874:
1997
0
      result = libuna_codepage_windows_874_copy_to_byte_stream(
1998
0
                unicode_character,
1999
0
                byte_stream,
2000
0
                byte_stream_size,
2001
0
                &safe_byte_stream_index,
2002
0
                error );
2003
0
      break;
2004
2005
0
    case LIBUNA_CODEPAGE_WINDOWS_932:
2006
0
      result = libuna_codepage_windows_932_copy_to_byte_stream(
2007
0
                unicode_character,
2008
0
                byte_stream,
2009
0
                byte_stream_size,
2010
0
                &safe_byte_stream_index,
2011
0
                error );
2012
0
      break;
2013
2014
0
    case LIBUNA_CODEPAGE_WINDOWS_936:
2015
0
      result = libuna_codepage_windows_936_copy_to_byte_stream(
2016
0
                unicode_character,
2017
0
                byte_stream,
2018
0
                byte_stream_size,
2019
0
                &safe_byte_stream_index,
2020
0
                error );
2021
0
      break;
2022
2023
0
    case LIBUNA_CODEPAGE_WINDOWS_949:
2024
0
      result = libuna_codepage_windows_949_copy_to_byte_stream(
2025
0
                unicode_character,
2026
0
                byte_stream,
2027
0
                byte_stream_size,
2028
0
                &safe_byte_stream_index,
2029
0
                error );
2030
0
      break;
2031
2032
0
    case LIBUNA_CODEPAGE_WINDOWS_950:
2033
0
      result = libuna_codepage_windows_950_copy_to_byte_stream(
2034
0
                unicode_character,
2035
0
                byte_stream,
2036
0
                byte_stream_size,
2037
0
                &safe_byte_stream_index,
2038
0
                error );
2039
0
      break;
2040
2041
0
    case LIBUNA_CODEPAGE_WINDOWS_1250:
2042
0
      result = libuna_codepage_windows_1250_copy_to_byte_stream(
2043
0
                unicode_character,
2044
0
                byte_stream,
2045
0
                byte_stream_size,
2046
0
                &safe_byte_stream_index,
2047
0
                error );
2048
0
      break;
2049
2050
0
    case LIBUNA_CODEPAGE_WINDOWS_1251:
2051
0
      result = libuna_codepage_windows_1251_copy_to_byte_stream(
2052
0
                unicode_character,
2053
0
                byte_stream,
2054
0
                byte_stream_size,
2055
0
                &safe_byte_stream_index,
2056
0
                error );
2057
0
      break;
2058
2059
0
    case LIBUNA_CODEPAGE_WINDOWS_1252:
2060
0
      result = libuna_codepage_windows_1252_copy_to_byte_stream(
2061
0
                unicode_character,
2062
0
                byte_stream,
2063
0
                byte_stream_size,
2064
0
                &safe_byte_stream_index,
2065
0
                error );
2066
0
      break;
2067
2068
0
    case LIBUNA_CODEPAGE_WINDOWS_1253:
2069
0
      result = libuna_codepage_windows_1253_copy_to_byte_stream(
2070
0
                unicode_character,
2071
0
                byte_stream,
2072
0
                byte_stream_size,
2073
0
                &safe_byte_stream_index,
2074
0
                error );
2075
0
      break;
2076
2077
0
    case LIBUNA_CODEPAGE_WINDOWS_1254:
2078
0
      result = libuna_codepage_windows_1254_copy_to_byte_stream(
2079
0
                unicode_character,
2080
0
                byte_stream,
2081
0
                byte_stream_size,
2082
0
                &safe_byte_stream_index,
2083
0
                error );
2084
0
      break;
2085
2086
0
    case LIBUNA_CODEPAGE_WINDOWS_1255:
2087
0
      result = libuna_codepage_windows_1255_copy_to_byte_stream(
2088
0
                unicode_character,
2089
0
                byte_stream,
2090
0
                byte_stream_size,
2091
0
                &safe_byte_stream_index,
2092
0
                error );
2093
0
      break;
2094
2095
0
    case LIBUNA_CODEPAGE_WINDOWS_1256:
2096
0
      result = libuna_codepage_windows_1256_copy_to_byte_stream(
2097
0
                unicode_character,
2098
0
                byte_stream,
2099
0
                byte_stream_size,
2100
0
                &safe_byte_stream_index,
2101
0
                error );
2102
0
      break;
2103
2104
0
    case LIBUNA_CODEPAGE_WINDOWS_1257:
2105
0
      result = libuna_codepage_windows_1257_copy_to_byte_stream(
2106
0
                unicode_character,
2107
0
                byte_stream,
2108
0
                byte_stream_size,
2109
0
                &safe_byte_stream_index,
2110
0
                error );
2111
0
      break;
2112
2113
0
    case LIBUNA_CODEPAGE_WINDOWS_1258:
2114
0
      result = libuna_codepage_windows_1258_copy_to_byte_stream(
2115
0
                unicode_character,
2116
0
                byte_stream,
2117
0
                byte_stream_size,
2118
0
                &safe_byte_stream_index,
2119
0
                error );
2120
0
      break;
2121
2122
0
    default:
2123
0
      libcerror_error_set(
2124
0
       error,
2125
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2126
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2127
0
      "%s: unsupported codepage: %d.",
2128
0
       function,
2129
0
             codepage );
2130
2131
0
      return( -1 );
2132
0
  }
2133
0
  if( result == -1 )
2134
0
  {
2135
0
    libcerror_error_set(
2136
0
     error,
2137
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2138
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2139
0
     "%s: unable to copy Unicode character to byte stream.",
2140
0
     function );
2141
2142
0
    return( -1 );
2143
0
  }
2144
0
  *byte_stream_index = safe_byte_stream_index;
2145
2146
0
  return( result );
2147
0
}
2148
2149
/* Determines the size of an UCS-2 character from an Unicode character
2150
 * Adds the size to the UCS-2 character size value
2151
 * Returns 1 if successful or -1 on error
2152
 */
2153
int libuna_unicode_character_size_to_ucs2(
2154
     libuna_unicode_character_t unicode_character,
2155
     size_t *ucs2_character_size,
2156
     libcerror_error_t **error )
2157
0
{
2158
0
  static char *function = "libuna_unicode_character_size_to_ucs2";
2159
2160
0
  if( ucs2_character_size == NULL )
2161
0
  {
2162
0
    libcerror_error_set(
2163
0
     error,
2164
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2165
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2166
0
     "%s: invalid UCS-2 character size.",
2167
0
     function );
2168
2169
0
    return( -1 );
2170
0
  }
2171
  /* Determine if the Unicode character is valid
2172
   * UCS-2 with surrogate pairs supports upto 0x10ffff characters
2173
   */
2174
0
  if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
2175
0
  {
2176
0
    libcerror_error_set(
2177
0
     error,
2178
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2179
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2180
0
     "%s: unsupported Unicode character.",
2181
0
     function );
2182
2183
0
    return( -1 );
2184
0
  }
2185
0
  if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
2186
0
  {
2187
0
    *ucs2_character_size += 2;
2188
0
  }
2189
0
  else
2190
0
  {
2191
0
    *ucs2_character_size += 1;
2192
0
  }
2193
0
  return( 1 );
2194
0
}
2195
2196
/* Copies an Unicode character from an UCS-2 string
2197
 * Returns 1 if successful or -1 on error
2198
 */
2199
int libuna_unicode_character_copy_from_ucs2(
2200
     libuna_unicode_character_t *unicode_character,
2201
     const libuna_utf16_character_t *ucs2_string,
2202
     size_t ucs2_string_size,
2203
     size_t *ucs2_string_index,
2204
     libcerror_error_t **error )
2205
0
{
2206
0
  static char *function                             = "libuna_unicode_character_copy_from_ucs2";
2207
0
  libuna_utf16_character_t ucs2_surrogate           = 0;
2208
0
  libuna_unicode_character_t safe_unicode_character = 0;
2209
0
  size_t safe_ucs2_string_index                     = 0;
2210
2211
0
  if( unicode_character == NULL )
2212
0
  {
2213
0
    libcerror_error_set(
2214
0
     error,
2215
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2216
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2217
0
     "%s: invalid Unicode character.",
2218
0
     function );
2219
2220
0
    return( -1 );
2221
0
  }
2222
0
  if( ucs2_string == NULL )
2223
0
  {
2224
0
    libcerror_error_set(
2225
0
     error,
2226
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2227
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2228
0
     "%s: invalid UCS-2 string.",
2229
0
     function );
2230
2231
0
    return( -1 );
2232
0
  }
2233
0
  if( ucs2_string_size > (size_t) SSIZE_MAX )
2234
0
  {
2235
0
    libcerror_error_set(
2236
0
     error,
2237
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2238
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2239
0
     "%s: invalid UCS-2 string size value exceeds maximum.",
2240
0
     function );
2241
2242
0
    return( -1 );
2243
0
  }
2244
0
  if( ucs2_string_index == NULL )
2245
0
  {
2246
0
    libcerror_error_set(
2247
0
     error,
2248
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2249
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2250
0
     "%s: invalid UCS-2 string index.",
2251
0
     function );
2252
2253
0
    return( -1 );
2254
0
  }
2255
0
  safe_ucs2_string_index = *ucs2_string_index;
2256
2257
0
  if( safe_ucs2_string_index >= ucs2_string_size )
2258
0
  {
2259
0
    libcerror_error_set(
2260
0
     error,
2261
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2262
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2263
0
     "%s: UCS-2 string too small.",
2264
0
     function );
2265
2266
0
    return( -1 );
2267
0
  }
2268
0
  safe_unicode_character  = ucs2_string[ safe_ucs2_string_index ];
2269
0
  safe_ucs2_string_index += 1;
2270
2271
  /* Determine if the UCS-2 character is within the high surrogate range
2272
   */
2273
0
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
2274
0
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
2275
0
  {
2276
0
    if( safe_ucs2_string_index >= ucs2_string_size )
2277
0
    {
2278
0
      libcerror_error_set(
2279
0
       error,
2280
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2281
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
2282
0
       "%s: missing surrogate UCS-2 character bytes.",
2283
0
       function );
2284
2285
0
      return( -1 );
2286
0
    }
2287
0
    ucs2_surrogate = ucs2_string[ safe_ucs2_string_index ];
2288
2289
    /* Determine if the UCS-2 character is within the low surrogate range
2290
     */
2291
0
    if( ( ucs2_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
2292
0
     && ( ucs2_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
2293
0
    {
2294
0
      safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
2295
0
      safe_unicode_character <<= 10;
2296
0
      safe_unicode_character  += ucs2_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
2297
0
      safe_unicode_character  += 0x010000;
2298
2299
0
      safe_ucs2_string_index += 1;
2300
0
    }
2301
0
  }
2302
  /* Determine if the Unicode character is valid
2303
   * UCS-2 with surrogate pairs supports upto 0x10ffff characters
2304
   */
2305
0
  if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
2306
0
  {
2307
0
    libcerror_error_set(
2308
0
     error,
2309
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2310
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2311
0
     "%s: unsupported Unicode character.",
2312
0
     function );
2313
2314
0
    return( -1 );
2315
0
  }
2316
0
  *unicode_character = safe_unicode_character;
2317
0
  *ucs2_string_index = safe_ucs2_string_index;
2318
2319
0
  return( 1 );
2320
0
}
2321
2322
/* Copies an Unicode character into a UCS-2 string
2323
 * Returns 1 if successful or -1 on error
2324
 */
2325
int libuna_unicode_character_copy_to_ucs2(
2326
     libuna_unicode_character_t unicode_character,
2327
     libuna_utf16_character_t *ucs2_string,
2328
     size_t ucs2_string_size,
2329
     size_t *ucs2_string_index,
2330
     libcerror_error_t **error )
2331
0
{
2332
0
  static char *function         = "libuna_unicode_character_copy_to_ucs2";
2333
0
  size_t safe_ucs2_string_index = 0;
2334
2335
0
  if( ucs2_string == NULL )
2336
0
  {
2337
0
    libcerror_error_set(
2338
0
     error,
2339
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2340
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2341
0
     "%s: invalid UCS-2 string.",
2342
0
     function );
2343
2344
0
    return( -1 );
2345
0
  }
2346
0
  if( ucs2_string_size > (size_t) SSIZE_MAX )
2347
0
  {
2348
0
    libcerror_error_set(
2349
0
     error,
2350
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2351
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2352
0
     "%s: invalid UCS-2 string size value exceeds maximum.",
2353
0
     function );
2354
2355
0
    return( -1 );
2356
0
  }
2357
0
  if( ucs2_string_index == NULL )
2358
0
  {
2359
0
    libcerror_error_set(
2360
0
     error,
2361
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2362
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2363
0
     "%s: invalid UCS-2 string index.",
2364
0
     function );
2365
2366
0
    return( -1 );
2367
0
  }
2368
0
  safe_ucs2_string_index = *ucs2_string_index;
2369
2370
0
  if( safe_ucs2_string_index >= ucs2_string_size )
2371
0
  {
2372
0
    libcerror_error_set(
2373
0
     error,
2374
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2375
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2376
0
     "%s: UCS-2 string too small.",
2377
0
     function );
2378
2379
0
    return( -1 );
2380
0
  }
2381
  /* Determine if the Unicode character is valid
2382
   */
2383
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
2384
0
  {
2385
0
    libcerror_error_set(
2386
0
     error,
2387
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2388
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2389
0
     "%s: unsupported Unicode character.",
2390
0
     function );
2391
2392
0
    return( -1 );
2393
0
  }
2394
0
  if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
2395
0
  {
2396
0
    ucs2_string[ safe_ucs2_string_index++ ] = (libuna_utf16_character_t) unicode_character;
2397
0
  }
2398
0
  else
2399
0
  {
2400
0
    if( ( ucs2_string_size < 2 )
2401
0
     || ( safe_ucs2_string_index > ( ucs2_string_size - 2 ) ) )
2402
0
    {
2403
0
      libcerror_error_set(
2404
0
       error,
2405
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2406
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2407
0
       "%s: UCS-2 string too small.",
2408
0
       function );
2409
2410
0
      return( -1 );
2411
0
    }
2412
0
    unicode_character                      -= 0x010000;
2413
0
    ucs2_string[ safe_ucs2_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
2414
0
    ucs2_string[ safe_ucs2_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
2415
0
  }
2416
0
  *ucs2_string_index = safe_ucs2_string_index;
2417
2418
0
  return( 1 );
2419
0
}
2420
2421
/* Determines the size of an UCS-4 character from an Unicode character
2422
 * Adds the size to the UCS-4 character size value
2423
 * Returns 1 if successful or -1 on error
2424
 */
2425
int libuna_unicode_character_size_to_ucs4(
2426
     libuna_unicode_character_t unicode_character,
2427
     size_t *ucs4_character_size,
2428
     libcerror_error_t **error )
2429
0
{
2430
0
  static char *function = "libuna_unicode_character_size_to_ucs4";
2431
2432
0
  LIBUNA_UNREFERENCED_PARAMETER( unicode_character )
2433
2434
0
  if( ucs4_character_size == NULL )
2435
0
  {
2436
0
    libcerror_error_set(
2437
0
     error,
2438
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2439
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2440
0
     "%s: invalid UCS-4 character size.",
2441
0
     function );
2442
2443
0
    return( -1 );
2444
0
  }
2445
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
2446
0
  {
2447
0
    libcerror_error_set(
2448
0
     error,
2449
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2450
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2451
0
     "%s: unsupported Unicode character.",
2452
0
     function );
2453
2454
0
    return( -1 );
2455
0
  }
2456
0
  *ucs4_character_size += 1;
2457
2458
0
  return( 1 );
2459
0
}
2460
2461
/* Copies an Unicode character from an UCS-4 string
2462
 * Returns 1 if successful or -1 on error
2463
 */
2464
int libuna_unicode_character_copy_from_ucs4(
2465
     libuna_unicode_character_t *unicode_character,
2466
     const libuna_utf32_character_t *ucs4_string,
2467
     size_t ucs4_string_size,
2468
     size_t *ucs4_string_index,
2469
     libcerror_error_t **error )
2470
0
{
2471
0
  static char *function                             = "libuna_unicode_character_copy_from_ucs4";
2472
0
  libuna_unicode_character_t safe_unicode_character = 0;
2473
0
  size_t safe_ucs4_string_index                     = 0;
2474
2475
0
  if( unicode_character == NULL )
2476
0
  {
2477
0
    libcerror_error_set(
2478
0
     error,
2479
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2480
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2481
0
     "%s: invalid Unicode character.",
2482
0
     function );
2483
2484
0
    return( -1 );
2485
0
  }
2486
0
  if( ucs4_string == NULL )
2487
0
  {
2488
0
    libcerror_error_set(
2489
0
     error,
2490
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2491
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2492
0
     "%s: invalid UCS-4 string.",
2493
0
     function );
2494
2495
0
    return( -1 );
2496
0
  }
2497
0
  if( ucs4_string_size > (size_t) SSIZE_MAX )
2498
0
  {
2499
0
    libcerror_error_set(
2500
0
     error,
2501
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2502
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2503
0
     "%s: invalid UCS-4 string size value exceeds maximum.",
2504
0
     function );
2505
2506
0
    return( -1 );
2507
0
  }
2508
0
  if( ucs4_string_index == NULL )
2509
0
  {
2510
0
    libcerror_error_set(
2511
0
     error,
2512
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2513
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2514
0
     "%s: invalid UCS-4 string index.",
2515
0
     function );
2516
2517
0
    return( -1 );
2518
0
  }
2519
0
  safe_ucs4_string_index = *ucs4_string_index;
2520
2521
0
  if( safe_ucs4_string_index >= ucs4_string_size )
2522
0
  {
2523
0
    libcerror_error_set(
2524
0
     error,
2525
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2526
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2527
0
     "%s: UCS-4 string too small.",
2528
0
     function );
2529
2530
0
    return( -1 );
2531
0
  }
2532
0
  safe_unicode_character = ucs4_string[ safe_ucs4_string_index ];
2533
2534
  /* Determine if the Unicode character is valid
2535
   */
2536
0
  if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
2537
0
  {
2538
0
    libcerror_error_set(
2539
0
     error,
2540
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2541
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2542
0
     "%s: unsupported Unicode character.",
2543
0
     function );
2544
2545
0
    return( -1 );
2546
0
  }
2547
0
  *unicode_character = safe_unicode_character;
2548
0
  *ucs4_string_index = safe_ucs4_string_index + 1;
2549
2550
0
  return( 1 );
2551
0
}
2552
2553
/* Copies an Unicode character into a UCS-4 string
2554
 * Returns 1 if successful or -1 on error
2555
 */
2556
int libuna_unicode_character_copy_to_ucs4(
2557
     libuna_unicode_character_t unicode_character,
2558
     libuna_utf32_character_t *ucs4_string,
2559
     size_t ucs4_string_size,
2560
     size_t *ucs4_string_index,
2561
     libcerror_error_t **error )
2562
0
{
2563
0
  static char *function         = "libuna_unicode_character_copy_to_ucs4";
2564
0
  size_t safe_ucs4_string_index = 0;
2565
2566
0
  if( ucs4_string == NULL )
2567
0
  {
2568
0
    libcerror_error_set(
2569
0
     error,
2570
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2571
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2572
0
     "%s: invalid UCS-4 string.",
2573
0
     function );
2574
2575
0
    return( -1 );
2576
0
  }
2577
0
  if( ucs4_string_size > (size_t) SSIZE_MAX )
2578
0
  {
2579
0
    libcerror_error_set(
2580
0
     error,
2581
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2582
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2583
0
     "%s: invalid UCS-4 string size value exceeds maximum.",
2584
0
     function );
2585
2586
0
    return( -1 );
2587
0
  }
2588
0
  if( ucs4_string_index == NULL )
2589
0
  {
2590
0
    libcerror_error_set(
2591
0
     error,
2592
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2593
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2594
0
     "%s: invalid UCS-4 string index.",
2595
0
     function );
2596
2597
0
    return( -1 );
2598
0
  }
2599
0
  safe_ucs4_string_index = *ucs4_string_index;
2600
2601
0
  if( safe_ucs4_string_index >= ucs4_string_size )
2602
0
  {
2603
0
    libcerror_error_set(
2604
0
     error,
2605
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2606
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2607
0
     "%s: UCS-4 string too small.",
2608
0
     function );
2609
2610
0
    return( -1 );
2611
0
  }
2612
  /* Determine if the Unicode character is valid
2613
   */
2614
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
2615
0
  {
2616
0
    libcerror_error_set(
2617
0
     error,
2618
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2619
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2620
0
     "%s: unsupported Unicode character.",
2621
0
     function );
2622
2623
0
    return( -1 );
2624
0
  }
2625
0
  ucs4_string[ safe_ucs4_string_index ] = (libuna_utf32_character_t) unicode_character;
2626
2627
0
  *ucs4_string_index = safe_ucs4_string_index + 1;
2628
2629
0
  return( 1 );
2630
0
}
2631
2632
/* Determines the size of an UTF-7 stream character from an Unicode character
2633
 * Adds the size to the UTF-7 stream character size value
2634
 * Returns 1 if successful or -1 on error
2635
 */
2636
int libuna_unicode_character_size_to_utf7_stream(
2637
     libuna_unicode_character_t unicode_character,
2638
     size_t *utf7_stream_character_size,
2639
     uint32_t *utf7_stream_base64_data,
2640
     libcerror_error_t **error )
2641
0
{
2642
0
  static char *function                    = "libuna_unicode_character_size_to_utf7_stream";
2643
0
  libuna_utf16_character_t utf16_surrogate = 0;
2644
0
  size_t safe_utf7_stream_character_size   = 0;
2645
0
  uint32_t base64_triplet                  = 0;
2646
0
  uint32_t safe_utf7_stream_base64_data    = 0;
2647
0
  uint8_t base64_encode_character          = 0;
2648
0
  uint8_t byte_bit_shift                   = 0;
2649
0
  uint8_t current_byte                     = 0;
2650
0
  uint8_t number_of_bytes                  = 0;
2651
2652
0
  if( utf7_stream_character_size == NULL )
2653
0
  {
2654
0
    libcerror_error_set(
2655
0
     error,
2656
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2657
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2658
0
     "%s: invalid UTF-7 stream character size.",
2659
0
     function );
2660
2661
0
    return( -1 );
2662
0
  }
2663
0
  if( utf7_stream_base64_data == NULL )
2664
0
  {
2665
0
    libcerror_error_set(
2666
0
     error,
2667
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2668
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2669
0
     "%s: invalid UTF-7 stream base64 data.",
2670
0
     function );
2671
2672
0
    return( -1 );
2673
0
  }
2674
0
  safe_utf7_stream_character_size = *utf7_stream_character_size;
2675
0
  safe_utf7_stream_base64_data    = *utf7_stream_base64_data;
2676
2677
  /* Determine if the Unicode character is valid
2678
   */
2679
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
2680
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
2681
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
2682
0
  {
2683
0
    libcerror_error_set(
2684
0
     error,
2685
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2686
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
2687
0
     "%s: unsupported Unicode character.",
2688
0
     function );
2689
2690
0
    return( -1 );
2691
0
  }
2692
  /* The + character must be escaped
2693
   */
2694
0
  if( unicode_character == (libuna_unicode_character_t) '+' )
2695
0
  {
2696
0
  }
2697
  /* Allow for the end of string character
2698
   */
2699
0
  else if( unicode_character == 0 )
2700
0
  {
2701
0
  }
2702
0
  else if( ( unicode_character >= 256 )
2703
0
        || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
2704
0
  {
2705
0
    base64_encode_character = 1;
2706
0
  }
2707
0
  if( base64_encode_character == 0 )
2708
0
  {
2709
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
2710
0
    {
2711
0
      safe_utf7_stream_base64_data = 0;
2712
0
    }
2713
0
    safe_utf7_stream_character_size += 1;
2714
2715
    /* The + character must be escaped
2716
     */
2717
0
    if( unicode_character == (libuna_unicode_character_t) '+' )
2718
0
    {
2719
0
      safe_utf7_stream_character_size += 1;
2720
0
    }
2721
0
  }
2722
0
  else
2723
0
  {
2724
    /* Escape the base64 encoded characters with a +
2725
     */
2726
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
2727
0
    {
2728
0
      safe_utf7_stream_character_size += 1;
2729
0
    }
2730
    /* Otherwise continue the previously base64 encoded characters
2731
     */
2732
0
    else
2733
0
    {
2734
0
      base64_triplet  = safe_utf7_stream_base64_data & 0x00ffffff;
2735
0
      number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
2736
0
      current_byte    = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
2737
2738
0
      if( number_of_bytes > 0 )
2739
0
      {
2740
0
        if( safe_utf7_stream_character_size < (size_t) ( number_of_bytes + 1 ) )
2741
0
        {
2742
0
          libcerror_error_set(
2743
0
           error,
2744
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
2745
0
           LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2746
0
           "%s: invalid UTF-7 stream character size value out of bounds.",
2747
0
           function );
2748
2749
0
          return( -1 );
2750
0
        }
2751
        /* Correct the size for the last partial base64 stream
2752
         */
2753
0
        safe_utf7_stream_character_size -= number_of_bytes + 1;
2754
0
      }
2755
0
      if( safe_utf7_stream_character_size < 1 )
2756
0
      {
2757
0
        libcerror_error_set(
2758
0
         error,
2759
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
2760
0
         LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2761
0
         "%s: invalid UTF-7 stream character size value out of bounds.",
2762
0
         function );
2763
2764
0
        return( -1 );
2765
0
      }
2766
      /* Correct the size for the base64 stream termination character
2767
       */
2768
0
      safe_utf7_stream_character_size -= 1;
2769
0
    }
2770
0
    safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
2771
2772
0
    if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
2773
0
    {
2774
0
      unicode_character -= 0x010000;
2775
2776
0
      utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
2777
2778
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
2779
0
      base64_triplet  += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
2780
0
      current_byte    += 1;
2781
0
      number_of_bytes += 1;
2782
2783
0
      if( number_of_bytes == 3 )
2784
0
      {
2785
0
        safe_utf7_stream_character_size += 4;
2786
0
        number_of_bytes                  = 0;
2787
0
        current_byte                     = 0;
2788
0
        base64_triplet                   = 0;
2789
0
      }
2790
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
2791
0
      base64_triplet  += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
2792
0
      current_byte    += 1;
2793
0
      number_of_bytes += 1;
2794
2795
0
      if( number_of_bytes == 3 )
2796
0
      {
2797
0
        safe_utf7_stream_character_size += 4;
2798
0
        number_of_bytes                  = 0;
2799
0
        current_byte                     = 0;
2800
0
        base64_triplet                   = 0;
2801
0
      }
2802
0
      unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
2803
0
    }
2804
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
2805
0
    base64_triplet  += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
2806
0
    current_byte    += 1;
2807
0
    number_of_bytes += 1;
2808
2809
0
    if( number_of_bytes == 3 )
2810
0
    {
2811
0
      safe_utf7_stream_character_size += 4;
2812
0
      number_of_bytes                  = 0;
2813
0
      current_byte                     = 0;
2814
0
      base64_triplet                   = 0;
2815
0
    }
2816
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
2817
0
    base64_triplet  += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
2818
0
    current_byte    += 1;
2819
0
    number_of_bytes += 1;
2820
2821
0
    if( number_of_bytes == 3 )
2822
0
    {
2823
0
      safe_utf7_stream_character_size += 4;
2824
0
      number_of_bytes                  = 0;
2825
0
      current_byte                     = 0;
2826
0
      base64_triplet                   = 0;
2827
0
    }
2828
    /* Terminate the base64 encoded characters
2829
     */
2830
0
    if( number_of_bytes > 0 )
2831
0
    {
2832
0
      safe_utf7_stream_character_size += number_of_bytes + 1;
2833
0
    }
2834
0
    safe_utf7_stream_character_size += 1;
2835
0
  }
2836
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
2837
0
  {
2838
0
    safe_utf7_stream_base64_data  = LIBUNA_UTF7_IS_BASE64_ENCODED;
2839
0
    safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
2840
0
    safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
2841
0
    safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
2842
0
  }
2843
0
  *utf7_stream_character_size = safe_utf7_stream_character_size;
2844
0
  *utf7_stream_base64_data    = safe_utf7_stream_base64_data;
2845
2846
0
  return( 1 );
2847
0
}
2848
2849
/* Copies an Unicode character from an UTF-7 stream
2850
 * The bits of the base64 data contain:
2851
 *   0 - 23 the base64 triplet
2852
 *  24 - 25 the number of bytes in the triplet
2853
 *  26 - 27 unused
2854
 *  28 - 29 the current byte
2855
 *       30 unused
2856
 *       31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
2857
 *
2858
 * Returns 1 if successful or -1 on error
2859
 */
2860
int libuna_unicode_character_copy_from_utf7_stream(
2861
     libuna_unicode_character_t *unicode_character,
2862
     const uint8_t *utf7_stream,
2863
     size_t utf7_stream_size,
2864
     size_t *utf7_stream_index,
2865
     uint32_t *utf7_stream_base64_data,
2866
     libcerror_error_t **error )
2867
0
{
2868
0
  static char *function                             = "libuna_unicode_character_copy_from_utf7_stream";
2869
0
  libuna_unicode_character_t safe_unicode_character = 0;
2870
0
  libuna_utf16_character_t utf16_surrogate          = 0;
2871
0
  size_t safe_utf7_stream_index                     = 0;
2872
0
  uint32_t base64_triplet                           = 0;
2873
0
  uint32_t safe_utf7_stream_base64_data             = 0;
2874
0
  uint8_t byte_bit_shift                            = 0;
2875
0
  uint8_t current_byte                              = 0;
2876
0
  uint8_t number_of_bytes                           = 0;
2877
0
  uint8_t padding_size                              = 0;
2878
0
  uint8_t utf7_character_value                      = 0;
2879
2880
0
  if( unicode_character == NULL )
2881
0
  {
2882
0
    libcerror_error_set(
2883
0
     error,
2884
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2885
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2886
0
     "%s: invalid Unicode character.",
2887
0
     function );
2888
2889
0
    return( -1 );
2890
0
  }
2891
0
  if( utf7_stream == NULL )
2892
0
  {
2893
0
    libcerror_error_set(
2894
0
     error,
2895
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2896
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2897
0
     "%s: invalid UTF-7 stream.",
2898
0
     function );
2899
2900
0
    return( -1 );
2901
0
  }
2902
0
  if( utf7_stream_size > (size_t) SSIZE_MAX )
2903
0
  {
2904
0
    libcerror_error_set(
2905
0
     error,
2906
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2907
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2908
0
     "%s: invalid UTF-7 stream size value exceeds maximum.",
2909
0
     function );
2910
2911
0
    return( -1 );
2912
0
  }
2913
0
  if( utf7_stream_index == NULL )
2914
0
  {
2915
0
    libcerror_error_set(
2916
0
     error,
2917
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2918
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2919
0
     "%s: invalid UTF-7 stream index.",
2920
0
     function );
2921
2922
0
    return( -1 );
2923
0
  }
2924
0
  if( utf7_stream_base64_data == NULL )
2925
0
  {
2926
0
    libcerror_error_set(
2927
0
     error,
2928
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2929
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2930
0
     "%s: invalid UTF-7 base64 data.",
2931
0
     function );
2932
2933
0
    return( -1 );
2934
0
  }
2935
0
  safe_utf7_stream_index       = *utf7_stream_index;
2936
0
  safe_utf7_stream_base64_data = *utf7_stream_base64_data;
2937
2938
0
  if( safe_utf7_stream_index >= utf7_stream_size )
2939
0
  {
2940
0
    libcerror_error_set(
2941
0
     error,
2942
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2943
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2944
0
     "%s: UTF-7 stream too small.",
2945
0
     function );
2946
2947
0
    return( -1 );
2948
0
  }
2949
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
2950
0
  {
2951
0
    base64_triplet  = safe_utf7_stream_base64_data & 0x00ffffff;
2952
0
    number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
2953
0
    current_byte    = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
2954
2955
0
    if( current_byte >= number_of_bytes )
2956
0
    {
2957
0
      if( safe_utf7_stream_index >= utf7_stream_size )
2958
0
      {
2959
0
        libcerror_error_set(
2960
0
         error,
2961
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
2962
0
         LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2963
0
         "%s: invalid UTF-7 stream character size value out of bounds.",
2964
0
         function );
2965
2966
0
        return( -1 );
2967
0
      }
2968
0
      utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
2969
2970
      /* Any character not in the modified base64 alphabet terminates the base64 encoded sequence
2971
       */
2972
0
      if( libuna_unicode_character_utf7_valid_base64_character[ utf7_character_value ] == 0 )
2973
0
      {
2974
0
        safe_utf7_stream_base64_data = 0;
2975
0
      }
2976
0
    }
2977
0
  }
2978
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
2979
0
  {
2980
0
    if( safe_utf7_stream_index >= utf7_stream_size )
2981
0
    {
2982
0
      libcerror_error_set(
2983
0
       error,
2984
0
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
2985
0
       LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
2986
0
       "%s: invalid UTF-7 stream character size value out of bounds.",
2987
0
       function );
2988
2989
0
      return( -1 );
2990
0
    }
2991
0
    utf7_character_value = utf7_stream[ safe_utf7_stream_index ];
2992
2993
    /* Determine if the character is modified base64 encoded
2994
     * or a + character
2995
     */
2996
0
    if( utf7_character_value == (uint8_t) '+' )
2997
0
    {
2998
0
      if( ( safe_utf7_stream_index + 1 ) >= utf7_stream_size )
2999
0
      {
3000
0
        libcerror_error_set(
3001
0
         error,
3002
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3003
0
         LIBCERROR_RUNTIME_ERROR_VALUE_OUT_OF_BOUNDS,
3004
0
         "%s: invalid UTF-7 stream character size value out of bounds.",
3005
0
         function );
3006
3007
0
        return( -1 );
3008
0
      }
3009
0
      if( utf7_stream[ safe_utf7_stream_index + 1 ] != (uint8_t) '-' )
3010
0
      {
3011
0
        safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3012
3013
0
        safe_utf7_stream_index++;
3014
0
      }
3015
0
    }
3016
    /* Allow for the end of string character
3017
     */
3018
0
    else if( utf7_character_value == 0 )
3019
0
    {
3020
0
    }
3021
0
    else if( libuna_unicode_character_utf7_valid_directly_encoded_character[ utf7_character_value ] == 0 )
3022
0
    {
3023
0
      libcerror_error_set(
3024
0
       error,
3025
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3026
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3027
0
       "%s: invalid directly encoded UTF-7 character byte: 0x%02" PRIx8 ".",
3028
0
       function,
3029
0
       utf7_character_value );
3030
3031
0
      return( -1 );
3032
0
    }
3033
0
  }
3034
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3035
0
  {
3036
0
    safe_unicode_character = utf7_stream[ safe_utf7_stream_index++ ];
3037
3038
0
    if( ( safe_unicode_character == (libuna_unicode_character_t) '+' )
3039
0
     && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3040
0
    {
3041
0
      safe_utf7_stream_index++;
3042
0
    }
3043
0
  }
3044
0
  else if( ( number_of_bytes == 0 )
3045
0
        || ( current_byte >= number_of_bytes ) )
3046
0
  {
3047
0
    if( libuna_base64_triplet_copy_from_base64_stream(
3048
0
         &base64_triplet,
3049
0
         utf7_stream,
3050
0
         utf7_stream_size - 1,
3051
0
         &safe_utf7_stream_index,
3052
0
         &padding_size,
3053
0
         LIBUNA_BASE64_VARIANT_UTF7,
3054
0
         error ) != 1 )
3055
0
    {
3056
0
      libcerror_error_set(
3057
0
       error,
3058
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
3059
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3060
0
       "%s: unable to copy base64 encoded UTF-7 characters.",
3061
0
       function );
3062
3063
0
      return( -1 );
3064
0
    }
3065
0
    if( padding_size > 2 )
3066
0
    {
3067
0
      libcerror_error_set(
3068
0
       error,
3069
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3070
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3071
0
       "%s: unsupported padding in base64 encoded UTF-7 characters.",
3072
0
       function );
3073
3074
0
      return( -1 );
3075
0
    }
3076
0
    number_of_bytes = 3 - padding_size;
3077
0
    current_byte    = 0;
3078
0
  }
3079
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3080
0
  {
3081
0
    byte_bit_shift         = 16 - ( current_byte * 8 );
3082
0
    safe_unicode_character = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3083
0
    current_byte          += 1;
3084
3085
0
    if( current_byte >= number_of_bytes )
3086
0
    {
3087
0
      if( libuna_base64_triplet_copy_from_base64_stream(
3088
0
           &base64_triplet,
3089
0
           utf7_stream,
3090
0
           utf7_stream_size - 1,
3091
0
           &safe_utf7_stream_index,
3092
0
           &padding_size,
3093
0
           LIBUNA_BASE64_VARIANT_UTF7,
3094
0
           error ) != 1 )
3095
0
      {
3096
0
        libcerror_error_set(
3097
0
         error,
3098
0
         LIBCERROR_ERROR_DOMAIN_CONVERSION,
3099
0
         LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3100
0
         "%s: unable to copy base64 encoded UTF-7 characters.",
3101
0
         function );
3102
3103
0
        return( -1 );
3104
0
      }
3105
0
      if( padding_size > 2 )
3106
0
      {
3107
0
        libcerror_error_set(
3108
0
         error,
3109
0
         LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3110
0
         LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3111
0
         "%s: unsupported padding in base64 encoded UTF-7 characters.",
3112
0
         function );
3113
3114
0
        return( -1 );
3115
0
      }
3116
0
      number_of_bytes = 3 - padding_size;
3117
0
      current_byte    = 0;
3118
0
    }
3119
0
    byte_bit_shift          = 16 - ( current_byte * 8 );
3120
0
    safe_unicode_character += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3121
0
    current_byte           += 1;
3122
3123
0
    if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3124
0
     && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
3125
0
    {
3126
0
      if( current_byte >= number_of_bytes )
3127
0
      {
3128
0
        if( libuna_base64_triplet_copy_from_base64_stream(
3129
0
             &base64_triplet,
3130
0
             utf7_stream,
3131
0
             utf7_stream_size - 1,
3132
0
             &safe_utf7_stream_index,
3133
0
             &padding_size,
3134
0
             LIBUNA_BASE64_VARIANT_UTF7,
3135
0
             error ) != 1 )
3136
0
        {
3137
0
          libcerror_error_set(
3138
0
           error,
3139
0
           LIBCERROR_ERROR_DOMAIN_CONVERSION,
3140
0
           LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3141
0
           "%s: unable to copy base64 encoded UTF-7 characters.",
3142
0
           function );
3143
3144
0
          return( -1 );
3145
0
        }
3146
0
        if( padding_size > 2 )
3147
0
        {
3148
0
          libcerror_error_set(
3149
0
           error,
3150
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3151
0
           LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3152
0
           "%s: unsupported padding in base64 encoded UTF-7 characters.",
3153
0
           function );
3154
3155
0
          return( -1 );
3156
0
        }
3157
0
        number_of_bytes = 3 - padding_size;
3158
0
        current_byte    = 0;
3159
0
      }
3160
0
      byte_bit_shift  = 16 - ( current_byte * 8 );
3161
0
      utf16_surrogate = ( ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL ) << 8;
3162
0
      current_byte   += 1;
3163
3164
0
      if( current_byte >= number_of_bytes )
3165
0
      {
3166
0
        if( libuna_base64_triplet_copy_from_base64_stream(
3167
0
             &base64_triplet,
3168
0
             utf7_stream,
3169
0
             utf7_stream_size - 1,
3170
0
             &safe_utf7_stream_index,
3171
0
             &padding_size,
3172
0
             LIBUNA_BASE64_VARIANT_UTF7,
3173
0
             error ) != 1 )
3174
0
        {
3175
0
          libcerror_error_set(
3176
0
           error,
3177
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
3178
0
           LIBCERROR_RUNTIME_ERROR_GET_FAILED,
3179
0
           "%s: unable to retrieve base64 encoded UTF-7 characters.",
3180
0
           function );
3181
3182
0
          return( -1 );
3183
0
        }
3184
0
        if( padding_size > 2 )
3185
0
        {
3186
0
          libcerror_error_set(
3187
0
           error,
3188
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3189
0
           LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3190
0
           "%s: unsupported padding in base64 encoded UTF-7 characters.",
3191
0
           function );
3192
3193
0
          return( -1 );
3194
0
        }
3195
0
        number_of_bytes = 3 - padding_size;
3196
0
        current_byte    = 0;
3197
0
      }
3198
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
3199
0
      utf16_surrogate += ( base64_triplet >> byte_bit_shift ) & 0x000000ffUL;
3200
0
      current_byte    += 1;
3201
3202
      /* Determine if the UTF-16 character is within the low surrogate range
3203
       */
3204
0
      if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
3205
0
       && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3206
0
      {
3207
0
        safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
3208
0
        safe_unicode_character <<= 10;
3209
0
        safe_unicode_character  += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
3210
0
        safe_unicode_character  += 0x010000;
3211
0
      }
3212
0
      else
3213
0
      {
3214
0
        libcerror_error_set(
3215
0
         error,
3216
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3217
0
         LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
3218
0
         "%s: unsupported low surrogate UTF-16 character.",
3219
0
         function );
3220
3221
0
        return( -1 );
3222
0
      }
3223
0
    }
3224
0
    if( safe_utf7_stream_index >= utf7_stream_size )
3225
0
    {
3226
0
      libcerror_error_set(
3227
0
       error,
3228
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3229
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3230
0
       "%s: UTF-7 stream too small.",
3231
0
       function );
3232
3233
0
      return( -1 );
3234
0
    }
3235
0
    if( ( current_byte >= number_of_bytes )
3236
0
     && ( utf7_stream[ safe_utf7_stream_index ] == (uint8_t) '-' ) )
3237
0
    {
3238
0
      safe_utf7_stream_base64_data = 0;
3239
3240
0
      safe_utf7_stream_index++;
3241
0
    }
3242
0
  }
3243
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3244
0
  {
3245
0
    safe_utf7_stream_base64_data  = LIBUNA_UTF7_IS_BASE64_ENCODED;
3246
0
    safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3247
0
    safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3248
0
    safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3249
0
  }
3250
0
  *unicode_character       = safe_unicode_character;
3251
0
  *utf7_stream_index       = safe_utf7_stream_index;
3252
0
  *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3253
3254
0
  return( 1 );
3255
0
}
3256
3257
/* Copies an Unicode character into a UTF-7 stream
3258
 * The bits of the base64 data contain:
3259
 *   0 - 23 the base64 triplet
3260
 *  24 - 25 the number of bytes in the triplet
3261
 *  26 - 27 unused
3262
 *  28 - 29 the current byte
3263
 *       30 unused
3264
 *       31 flag to indicate the current UTF-7 characters are (modified) base64 encoded
3265
 *
3266
 * Returns 1 if successful or -1 on error
3267
 */
3268
int libuna_unicode_character_copy_to_utf7_stream(
3269
     libuna_unicode_character_t unicode_character,
3270
     uint8_t *utf7_stream,
3271
     size_t utf7_stream_size,
3272
     size_t *utf7_stream_index,
3273
     uint32_t *utf7_stream_base64_data,
3274
     libcerror_error_t **error )
3275
0
{
3276
0
  static char *function                    = "libuna_unicode_character_copy_to_utf7_stream";
3277
0
  libuna_utf16_character_t utf16_surrogate = 0;
3278
0
  size_t safe_utf7_stream_index            = 0;
3279
0
  uint32_t base64_triplet                  = 0;
3280
0
  uint32_t safe_utf7_stream_base64_data    = 0;
3281
0
  uint8_t base64_encode_character          = 0;
3282
0
  uint8_t byte_bit_shift                   = 0;
3283
0
  uint8_t current_byte                     = 0;
3284
0
  uint8_t number_of_bytes                  = 0;
3285
3286
0
  if( utf7_stream == NULL )
3287
0
  {
3288
0
    libcerror_error_set(
3289
0
     error,
3290
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3291
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3292
0
     "%s: invalid UTF-7 stream.",
3293
0
     function );
3294
3295
0
    return( -1 );
3296
0
  }
3297
0
  if( utf7_stream_size > (size_t) SSIZE_MAX )
3298
0
  {
3299
0
    libcerror_error_set(
3300
0
     error,
3301
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3302
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3303
0
     "%s: invalid UTF-7 stream size value exceeds maximum.",
3304
0
     function );
3305
3306
0
    return( -1 );
3307
0
  }
3308
0
  if( utf7_stream_index == NULL )
3309
0
  {
3310
0
    libcerror_error_set(
3311
0
     error,
3312
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3313
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3314
0
     "%s: invalid UTF-7 stream index.",
3315
0
     function );
3316
3317
0
    return( -1 );
3318
0
  }
3319
0
  if( utf7_stream_base64_data == NULL )
3320
0
  {
3321
0
    libcerror_error_set(
3322
0
     error,
3323
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3324
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3325
0
     "%s: invalid UTF-7 stream base64 data.",
3326
0
     function );
3327
3328
0
    return( -1 );
3329
0
  }
3330
0
  safe_utf7_stream_index       = *utf7_stream_index;
3331
0
  safe_utf7_stream_base64_data = *utf7_stream_base64_data;
3332
3333
  /* Determine if the Unicode character is valid
3334
   */
3335
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3336
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3337
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
3338
0
  {
3339
0
    libcerror_error_set(
3340
0
     error,
3341
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
3342
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
3343
0
     "%s: unsupported Unicode character.",
3344
0
     function );
3345
3346
0
    return( -1 );
3347
0
  }
3348
  /* A-Z is not a continous range on an EBCDIC based system
3349
   * it consists of the ranges: A-I, J-R, S-Z
3350
   */
3351
0
  if( ( unicode_character >= 0x41 )
3352
0
   && ( unicode_character <= 0x49 ) )
3353
0
  {
3354
0
    unicode_character = ( unicode_character - 0x41 ) + (libuna_unicode_character_t) 'A';
3355
0
  }
3356
0
  else if( ( unicode_character >= 0x4a )
3357
0
        && ( unicode_character <= 0x52 ) )
3358
0
  {
3359
0
    unicode_character = ( unicode_character - 0x4a ) + (libuna_unicode_character_t) 'J';
3360
0
  }
3361
0
  else if( ( unicode_character >= 0x53 )
3362
0
        && ( unicode_character <= 0x5a ) )
3363
0
  {
3364
0
    unicode_character = ( unicode_character - 0x53 ) + (libuna_unicode_character_t) 'S';
3365
0
  }
3366
  /* a-z is not a continous range on an EBCDIC based system
3367
   * it consists of the ranges: a-i, j-r, s-z
3368
   */
3369
0
  else if( ( unicode_character >= 0x61 )
3370
0
        && ( unicode_character <= 0x69 ) )
3371
0
  {
3372
0
    unicode_character = ( unicode_character - 0x61 ) + (libuna_unicode_character_t) 'a';
3373
0
  }
3374
0
  else if( ( unicode_character >= 0x6a )
3375
0
        && ( unicode_character <= 0x72 ) )
3376
0
  {
3377
0
    unicode_character = ( unicode_character - 0x6a ) + (libuna_unicode_character_t) 'j';
3378
0
  }
3379
0
  else if( ( unicode_character >= 0x73 )
3380
0
        && ( unicode_character <= 0x7a ) )
3381
0
  {
3382
0
    unicode_character = ( unicode_character - 0x73 ) + (libuna_unicode_character_t) 's';
3383
0
  }
3384
  /* 0-9
3385
   */
3386
0
  else if( ( unicode_character >= 0x30 )
3387
0
        && ( unicode_character <= 0x39 ) )
3388
0
  {
3389
0
    unicode_character = ( unicode_character - 0x30 ) + (libuna_unicode_character_t) '0';
3390
0
  }
3391
  /* The + character must be escaped
3392
   */
3393
0
  else if( unicode_character == (libuna_unicode_character_t) '+' )
3394
0
  {
3395
0
  }
3396
  /* Allow for the end of string character
3397
   */
3398
0
  else if( unicode_character == 0 )
3399
0
  {
3400
0
  }
3401
0
  else if( ( unicode_character >= 256 )
3402
0
        || ( libuna_unicode_character_utf7_valid_directly_encoded_character[ (uint8_t) unicode_character ] == 0 ) )
3403
0
  {
3404
0
    base64_encode_character = 1;
3405
0
  }
3406
0
  if( base64_encode_character == 0 )
3407
0
  {
3408
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3409
0
    {
3410
0
      safe_utf7_stream_base64_data = 0;
3411
0
    }
3412
0
    if( safe_utf7_stream_index >= utf7_stream_size )
3413
0
    {
3414
0
      libcerror_error_set(
3415
0
       error,
3416
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3417
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3418
0
       "%s: UTF-7 stream too small.",
3419
0
       function );
3420
3421
0
      return( -1 );
3422
0
    }
3423
0
    utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) unicode_character;
3424
3425
    /* The + character must be escaped
3426
     */
3427
0
    if( unicode_character == (libuna_unicode_character_t) '+' )
3428
0
    {
3429
0
      if( safe_utf7_stream_index >= utf7_stream_size )
3430
0
      {
3431
0
        libcerror_error_set(
3432
0
         error,
3433
0
         LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3434
0
         LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3435
0
         "%s: UTF-7 stream too small.",
3436
0
         function );
3437
3438
0
        return( -1 );
3439
0
      }
3440
0
      utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3441
0
    }
3442
0
  }
3443
0
  else
3444
0
  {
3445
    /* Escape the base64 encoded chracters with a +
3446
     */
3447
0
    if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) == 0 )
3448
0
    {
3449
0
      if( safe_utf7_stream_index >= utf7_stream_size )
3450
0
      {
3451
0
        libcerror_error_set(
3452
0
         error,
3453
0
         LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3454
0
         LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3455
0
         "%s: UTF-7 stream too small.",
3456
0
         function );
3457
3458
0
        return( -1 );
3459
0
      }
3460
0
      utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '+';
3461
0
    }
3462
    /* Otherwise continue the previously base64 encoded characters
3463
     */
3464
0
    else
3465
0
    {
3466
0
      base64_triplet  = safe_utf7_stream_base64_data & 0x00ffffff;
3467
0
      number_of_bytes = ( safe_utf7_stream_base64_data >> 24 ) & 0x03;
3468
0
      current_byte    = ( safe_utf7_stream_base64_data >> 28 ) & 0x03;
3469
3470
0
      if( number_of_bytes > 0 )
3471
0
      {
3472
        /* Correct the index for the last partial base64 stream
3473
         */
3474
0
        safe_utf7_stream_index -= number_of_bytes + 1;
3475
0
      }
3476
      /* Correct the index for the base64 stream termination character
3477
       */
3478
0
      safe_utf7_stream_index -= 1;
3479
0
    }
3480
0
    safe_utf7_stream_base64_data = LIBUNA_UTF7_IS_BASE64_ENCODED;
3481
3482
0
    if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
3483
0
    {
3484
0
      unicode_character -= 0x010000;
3485
3486
0
      utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 )
3487
0
                      + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
3488
3489
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
3490
0
      base64_triplet  += (uint32_t) ( ( utf16_surrogate >> 8 ) & 0xff ) << byte_bit_shift;
3491
0
      current_byte    += 1;
3492
0
      number_of_bytes += 1;
3493
3494
0
      if( number_of_bytes == 3 )
3495
0
      {
3496
0
        if( libuna_base64_triplet_copy_to_base64_stream(
3497
0
             base64_triplet,
3498
0
             utf7_stream,
3499
0
             utf7_stream_size,
3500
0
             &safe_utf7_stream_index,
3501
0
             0,
3502
0
             LIBUNA_BASE64_VARIANT_UTF7,
3503
0
             error ) != 1 )
3504
0
        {
3505
0
          libcerror_error_set(
3506
0
           error,
3507
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
3508
0
           LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3509
0
           "%s: unable to set base64 encoded UTF-7 characters.",
3510
0
           function );
3511
3512
0
          return( -1 );
3513
0
        }
3514
0
        number_of_bytes = 0;
3515
0
        current_byte    = 0;
3516
0
        base64_triplet  = 0;
3517
0
      }
3518
0
      byte_bit_shift   = 16 - ( current_byte * 8 );
3519
0
      base64_triplet  += (uint32_t) ( utf16_surrogate & 0xff ) << byte_bit_shift;
3520
0
      current_byte    += 1;
3521
0
      number_of_bytes += 1;
3522
3523
0
      if( number_of_bytes == 3 )
3524
0
      {
3525
0
        if( libuna_base64_triplet_copy_to_base64_stream(
3526
0
             base64_triplet,
3527
0
             utf7_stream,
3528
0
             utf7_stream_size,
3529
0
             &safe_utf7_stream_index,
3530
0
             0,
3531
0
             LIBUNA_BASE64_VARIANT_UTF7,
3532
0
             error ) != 1 )
3533
0
        {
3534
0
          libcerror_error_set(
3535
0
           error,
3536
0
           LIBCERROR_ERROR_DOMAIN_RUNTIME,
3537
0
           LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3538
0
           "%s: unable to set base64 encoded UTF-7 characters.",
3539
0
           function );
3540
3541
0
          return( -1 );
3542
0
        }
3543
0
        number_of_bytes = 0;
3544
0
        current_byte    = 0;
3545
0
        base64_triplet  = 0;
3546
0
      }
3547
0
      unicode_character = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff )
3548
0
                        + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
3549
0
    }
3550
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
3551
0
    base64_triplet  += (uint32_t) ( ( unicode_character >> 8 ) & 0xff ) << byte_bit_shift;
3552
0
    current_byte    += 1;
3553
0
    number_of_bytes += 1;
3554
3555
0
    if( number_of_bytes == 3 )
3556
0
    {
3557
0
      if( libuna_base64_triplet_copy_to_base64_stream(
3558
0
           base64_triplet,
3559
0
           utf7_stream,
3560
0
           utf7_stream_size,
3561
0
           &safe_utf7_stream_index,
3562
0
           0,
3563
0
           LIBUNA_BASE64_VARIANT_UTF7,
3564
0
           error ) != 1 )
3565
0
      {
3566
0
        libcerror_error_set(
3567
0
         error,
3568
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3569
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3570
0
         "%s: unable to set base64 encoded UTF-7 characters.",
3571
0
         function );
3572
3573
0
        return( -1 );
3574
0
      }
3575
0
      number_of_bytes = 0;
3576
0
      current_byte    = 0;
3577
0
      base64_triplet  = 0;
3578
0
    }
3579
0
    byte_bit_shift   = 16 - ( current_byte * 8 );
3580
0
    base64_triplet  += (uint32_t) ( unicode_character & 0xff ) << byte_bit_shift;
3581
0
    current_byte    += 1;
3582
0
    number_of_bytes += 1;
3583
3584
0
    if( number_of_bytes == 3 )
3585
0
    {
3586
0
      if( libuna_base64_triplet_copy_to_base64_stream(
3587
0
           base64_triplet,
3588
0
           utf7_stream,
3589
0
           utf7_stream_size,
3590
0
           &safe_utf7_stream_index,
3591
0
           0,
3592
0
           LIBUNA_BASE64_VARIANT_UTF7,
3593
0
           error ) != 1 )
3594
0
      {
3595
0
        libcerror_error_set(
3596
0
         error,
3597
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3598
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3599
0
         "%s: unable to set base64 encoded UTF-7 characters.",
3600
0
         function );
3601
3602
0
        return( -1 );
3603
0
      }
3604
0
      number_of_bytes = 0;
3605
0
      current_byte    = 0;
3606
0
      base64_triplet  = 0;
3607
0
    }
3608
    /* Terminate the base64 encoded characters
3609
     */
3610
0
    if( number_of_bytes > 0 )
3611
0
    {
3612
0
      if( libuna_base64_triplet_copy_to_base64_stream(
3613
0
           base64_triplet,
3614
0
           utf7_stream,
3615
0
           utf7_stream_size,
3616
0
           &safe_utf7_stream_index,
3617
0
           3 - number_of_bytes,
3618
0
           LIBUNA_BASE64_VARIANT_UTF7,
3619
0
           error ) != 1 )
3620
0
      {
3621
0
        libcerror_error_set(
3622
0
         error,
3623
0
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
3624
0
         LIBCERROR_RUNTIME_ERROR_SET_FAILED,
3625
0
         "%s: unable to set base64 encoded UTF-7 characters.",
3626
0
         function );
3627
3628
0
        return( -1 );
3629
0
      }
3630
0
    }
3631
0
    if( safe_utf7_stream_index >= utf7_stream_size )
3632
0
    {
3633
0
      libcerror_error_set(
3634
0
       error,
3635
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3636
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3637
0
       "%s: UTF-7 stream too small.",
3638
0
       function );
3639
3640
0
      return( -1 );
3641
0
    }
3642
0
    utf7_stream[ safe_utf7_stream_index++ ] = (uint8_t) '-';
3643
0
  }
3644
0
  if( ( safe_utf7_stream_base64_data & LIBUNA_UTF7_IS_BASE64_ENCODED ) != 0 )
3645
0
  {
3646
0
    safe_utf7_stream_base64_data  = LIBUNA_UTF7_IS_BASE64_ENCODED;
3647
0
    safe_utf7_stream_base64_data |= (uint32_t) current_byte << 28;
3648
0
    safe_utf7_stream_base64_data |= (uint32_t) number_of_bytes << 24;
3649
0
    safe_utf7_stream_base64_data |= base64_triplet & 0x00ffffff;
3650
0
  }
3651
0
  *utf7_stream_index       = safe_utf7_stream_index;
3652
0
  *utf7_stream_base64_data = safe_utf7_stream_base64_data;
3653
3654
0
  return( 1 );
3655
0
}
3656
3657
/* Determines the size of an UTF-8 character from an Unicode character
3658
 * This function supports upto U+10FFFF (4 byte UTF-8 characters)
3659
 * Adds the size to the UTF-8 character size value
3660
 * Returns 1 if successful or -1 on error
3661
 */
3662
int libuna_unicode_character_size_to_utf8(
3663
     libuna_unicode_character_t unicode_character,
3664
     size_t *utf8_character_size,
3665
     libcerror_error_t **error )
3666
4.17M
{
3667
4.17M
  static char *function           = "libuna_unicode_character_size_to_utf8";
3668
4.17M
  size_t safe_utf8_character_size = 0;
3669
3670
4.17M
  if( utf8_character_size == NULL )
3671
0
  {
3672
0
    libcerror_error_set(
3673
0
     error,
3674
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3675
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3676
0
     "%s: invalid UTF-8 character size.",
3677
0
     function );
3678
3679
0
    return( -1 );
3680
0
  }
3681
  /* Determine if the Unicode character is valid
3682
   */
3683
4.17M
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
3684
4.17M
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
3685
4.17M
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
3686
0
  {
3687
0
    libcerror_error_set(
3688
0
     error,
3689
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
3690
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
3691
0
     "%s: unsupported Unicode character.",
3692
0
     function );
3693
3694
0
    return( -1 );
3695
0
  }
3696
  /* RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
3697
   * while its predecessor RFC 2279 allowed up to 6 bytes
3698
   */
3699
4.17M
  if( unicode_character < 0x00000080UL )
3700
1.12M
  {
3701
1.12M
    safe_utf8_character_size += 1;
3702
1.12M
  }
3703
3.05M
  else if( unicode_character < 0x00000800UL )
3704
174k
  {
3705
174k
    safe_utf8_character_size += 2;
3706
174k
  }
3707
2.87M
  else if( unicode_character < 0x00010000UL )
3708
2.86M
  {
3709
2.86M
    safe_utf8_character_size += 3;
3710
2.86M
  }
3711
9.92k
  else
3712
9.92k
  {
3713
9.92k
    safe_utf8_character_size += 4;
3714
9.92k
  }
3715
4.17M
  *utf8_character_size += safe_utf8_character_size;
3716
3717
4.17M
  return( 1 );
3718
4.17M
}
3719
3720
/* Copies an Unicode character from an UTF-8 string
3721
 * This function supports upto U+10FFFF (4 byte UTF-8 characters)
3722
 * Returns 1 if successful or -1 on error
3723
 */
3724
int libuna_unicode_character_copy_from_utf8(
3725
     libuna_unicode_character_t *unicode_character,
3726
     const libuna_utf8_character_t *utf8_string,
3727
     size_t utf8_string_size,
3728
     size_t *utf8_string_index,
3729
     libcerror_error_t **error )
3730
261k
{
3731
261k
  static char *function                             = "libuna_unicode_character_copy_from_utf8";
3732
261k
  libuna_unicode_character_t safe_unicode_character = 0;
3733
261k
  size_t safe_utf8_string_index                     = 0;
3734
261k
  uint8_t byte_value1                               = 0;
3735
261k
  uint8_t byte_value2                               = 0;
3736
261k
  uint8_t byte_value3                               = 0;
3737
261k
  uint8_t byte_value4                               = 0;
3738
261k
  uint8_t utf8_character_additional_bytes           = 0;
3739
261k
  int result                                        = 0;
3740
3741
261k
  if( unicode_character == NULL )
3742
0
  {
3743
0
    libcerror_error_set(
3744
0
     error,
3745
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3746
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3747
0
     "%s: invalid Unicode character.",
3748
0
     function );
3749
3750
0
    return( -1 );
3751
0
  }
3752
261k
  if( utf8_string == NULL )
3753
0
  {
3754
0
    libcerror_error_set(
3755
0
     error,
3756
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3757
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3758
0
     "%s: invalid UTF-8 string.",
3759
0
     function );
3760
3761
0
    return( -1 );
3762
0
  }
3763
261k
  if( utf8_string_size > (size_t) SSIZE_MAX )
3764
0
  {
3765
0
    libcerror_error_set(
3766
0
     error,
3767
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3768
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3769
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
3770
0
     function );
3771
3772
0
    return( -1 );
3773
0
  }
3774
261k
  if( utf8_string_index == NULL )
3775
0
  {
3776
0
    libcerror_error_set(
3777
0
     error,
3778
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3779
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3780
0
     "%s: invalid UTF-8 string index.",
3781
0
     function );
3782
3783
0
    return( -1 );
3784
0
  }
3785
261k
  safe_utf8_string_index = *utf8_string_index;
3786
3787
261k
  if( safe_utf8_string_index >= utf8_string_size )
3788
0
  {
3789
0
    libcerror_error_set(
3790
0
     error,
3791
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3792
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3793
0
     "%s: UTF-8 string too small.",
3794
0
     function );
3795
3796
0
    return( -1 );
3797
0
  }
3798
  /* Determine the number of additional bytes of the UTF-8 character
3799
   */
3800
261k
  byte_value1 = utf8_string[ safe_utf8_string_index ];
3801
3802
  /* Determine the UTF-8 character and make sure it is valid
3803
   * RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
3804
   * while its predecessor RFC 2279 allowed up to 6 bytes
3805
   */
3806
261k
  if( byte_value1 > 0xf4 )
3807
780
  {
3808
780
    libcerror_error_set(
3809
780
     error,
3810
780
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3811
780
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3812
780
     "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
3813
780
     function,
3814
780
     byte_value1 );
3815
3816
780
    return( -1 );
3817
780
  }
3818
260k
  if( byte_value1 < 0xc0 )
3819
244k
  {
3820
244k
    utf8_character_additional_bytes = 0;
3821
244k
  }
3822
16.3k
  else if( byte_value1 < 0xe0 )
3823
3.66k
  {
3824
3.66k
    utf8_character_additional_bytes = 1;
3825
3.66k
  }
3826
12.6k
  else if( byte_value1 < 0xf0 )
3827
8.45k
  {
3828
8.45k
    utf8_character_additional_bytes = 2;
3829
8.45k
  }
3830
4.22k
  else
3831
4.22k
  {
3832
4.22k
    utf8_character_additional_bytes = 3;
3833
4.22k
  }
3834
260k
  if( ( ( (size_t) utf8_character_additional_bytes + 1 ) > utf8_string_size )
3835
260k
   || ( safe_utf8_string_index > ( utf8_string_size - ( utf8_character_additional_bytes + 1 ) ) ) )
3836
140
  {
3837
140
    libcerror_error_set(
3838
140
     error,
3839
140
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3840
140
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3841
140
     "%s: missing UTF-8 character bytes.",
3842
140
     function );
3843
3844
140
    return( -1 );
3845
140
  }
3846
260k
  safe_unicode_character = byte_value1;
3847
3848
260k
  if( utf8_character_additional_bytes == 0 )
3849
244k
  {
3850
244k
    if( byte_value1 >= 0x80 )
3851
616
    {
3852
616
      libcerror_error_set(
3853
616
       error,
3854
616
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3855
616
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3856
616
       "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
3857
616
       function,
3858
616
       byte_value1 );
3859
3860
616
      return( -1 );
3861
616
    }
3862
244k
  }
3863
260k
  if( utf8_character_additional_bytes >= 1 )
3864
16.2k
  {
3865
16.2k
    byte_value2 = utf8_string[ safe_utf8_string_index + 1 ];
3866
3867
16.2k
    if( ( byte_value2 < 0x80 )
3868
16.2k
     || ( byte_value2 > 0xbf ) )
3869
725
    {
3870
725
      libcerror_error_set(
3871
725
       error,
3872
725
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3873
725
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3874
725
       "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
3875
725
       function,
3876
725
       byte_value2 );
3877
3878
725
      return( -1 );
3879
725
    }
3880
15.4k
    result = 1;
3881
3882
15.4k
    switch( byte_value1 )
3883
15.4k
    {
3884
2.15k
      case 0xe0:
3885
2.15k
        if( ( byte_value2 < 0xa0 )
3886
2.15k
         || ( byte_value2 > 0xbf ) )
3887
87
        {
3888
87
          result = 0;
3889
87
        }
3890
2.15k
        break;
3891
3892
1.84k
      case 0xed:
3893
1.84k
        if( ( byte_value2 < 0x80 )
3894
1.84k
         || ( byte_value2 > 0x9f ) )
3895
64
        {
3896
64
          result = 0;
3897
64
        }
3898
1.84k
        break;
3899
3900
2.05k
      case 0xf0:
3901
2.05k
        if( ( byte_value2 < 0x90 )
3902
2.05k
         || ( byte_value2 > 0xbf ) )
3903
72
        {
3904
72
          result = 0;
3905
72
        }
3906
2.05k
        break;
3907
3908
937
      case 0xf4:
3909
937
        if( ( byte_value2 < 0x80 )
3910
937
         || ( byte_value2 > 0xbf ) )
3911
0
        {
3912
0
          result = 0;
3913
0
        }
3914
937
        break;
3915
3916
8.48k
      default:
3917
8.48k
        break;
3918
15.4k
    }
3919
15.4k
    if( result == 0 )
3920
223
    {
3921
223
      libcerror_error_set(
3922
223
       error,
3923
223
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3924
223
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3925
223
       "%s: invalid 1st and 2nd UTF-8 character byte pair: 0x%02" PRIx8 " 0x%02" PRIx8 ".",
3926
223
       function,
3927
223
       byte_value1,
3928
223
       byte_value2 );
3929
3930
223
      return( -1 );
3931
223
    }
3932
15.2k
    safe_unicode_character <<= 6;
3933
15.2k
    safe_unicode_character += byte_value2;
3934
3935
15.2k
    if( utf8_character_additional_bytes == 1 )
3936
3.37k
    {
3937
3.37k
      safe_unicode_character -= 0x03080;
3938
3.37k
    }
3939
15.2k
  }
3940
259k
  if( utf8_character_additional_bytes >= 2 )
3941
11.8k
  {
3942
11.8k
    byte_value3 = utf8_string[ safe_utf8_string_index + 2 ];
3943
3944
11.8k
    if( ( byte_value3 < 0x80 )
3945
11.8k
     || ( byte_value3 > 0xbf ) )
3946
330
    {
3947
330
      libcerror_error_set(
3948
330
       error,
3949
330
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3950
330
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3951
330
       "%s: invalid 3rd UTF-8 character byte: 0x%02" PRIx8 ".",
3952
330
       function,
3953
330
       byte_value3 );
3954
3955
330
      return( -1 );
3956
330
    }
3957
11.5k
    result = 1;
3958
3959
11.5k
    switch( byte_value2 )
3960
11.5k
    {
3961
0
      case 0xe0:
3962
0
        if( ( byte_value2 < 0xa0 )
3963
0
         || ( byte_value2 > 0xbf ) )
3964
0
        {
3965
0
          result = 0;
3966
0
        }
3967
0
        break;
3968
3969
0
      case 0xed:
3970
0
        if( ( byte_value2 < 0x80 )
3971
0
         || ( byte_value2 > 0x9f ) )
3972
0
        {
3973
0
          result = 0;
3974
0
        }
3975
0
        break;
3976
3977
11.5k
      default:
3978
11.5k
        break;
3979
11.5k
    }
3980
11.5k
    if( result == 0 )
3981
0
    {
3982
0
      libcerror_error_set(
3983
0
       error,
3984
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3985
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
3986
0
       "%s: invalid 2nd and 3rd UTF-8 character byte pair: 0x%02" PRIx8 " 0x%02" PRIx8 ".",
3987
0
       function,
3988
0
       byte_value2,
3989
0
       byte_value3 );
3990
3991
0
      return( -1 );
3992
0
    }
3993
11.5k
    safe_unicode_character <<= 6;
3994
11.5k
    safe_unicode_character += byte_value3;
3995
3996
11.5k
    if( utf8_character_additional_bytes == 2 )
3997
7.65k
    {
3998
7.65k
      safe_unicode_character -= 0x0e2080;
3999
7.65k
    }
4000
11.5k
  }
4001
258k
  if( utf8_character_additional_bytes >= 3 )
4002
3.90k
  {
4003
3.90k
    byte_value4 = utf8_string[ safe_utf8_string_index + 3 ];
4004
4005
3.90k
    if( ( byte_value4 < 0x80 )
4006
3.90k
     || ( byte_value4 > 0xbf ) )
4007
163
    {
4008
163
      libcerror_error_set(
4009
163
       error,
4010
163
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4011
163
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4012
163
       "%s: invalid 4th UTF-8 character byte: 0x%02" PRIx8 ".",
4013
163
       function,
4014
163
       byte_value4 );
4015
4016
163
      return( -1 );
4017
163
    }
4018
3.73k
    safe_unicode_character <<= 6;
4019
3.73k
    safe_unicode_character += byte_value4;
4020
4021
3.73k
    if( utf8_character_additional_bytes == 3 )
4022
3.73k
    {
4023
3.73k
      safe_unicode_character -= 0x03c82080;
4024
3.73k
    }
4025
3.73k
  }
4026
  /* Determine if the Unicode character is valid
4027
   */
4028
258k
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4029
258k
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4030
258k
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4031
49
  {
4032
49
    libcerror_error_set(
4033
49
     error,
4034
49
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4035
49
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4036
49
     "%s: unsupported Unicode character.",
4037
49
     function );
4038
4039
49
    return( -1 );
4040
49
  }
4041
258k
  *unicode_character = safe_unicode_character;
4042
258k
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4043
4044
258k
  return( 1 );
4045
258k
}
4046
4047
/* Copies an Unicode character into a UTF-8 string
4048
 * This function supports upto U+10FFFF (4 byte UTF-8 characters)
4049
 * Returns 1 if successful or -1 on error
4050
 */
4051
int libuna_unicode_character_copy_to_utf8(
4052
     libuna_unicode_character_t unicode_character,
4053
     libuna_utf8_character_t *utf8_string,
4054
     size_t utf8_string_size,
4055
     size_t *utf8_string_index,
4056
     libcerror_error_t **error )
4057
3.77M
{
4058
3.77M
  static char *function                   = "libuna_unicode_character_copy_to_utf8";
4059
3.77M
  size_t safe_utf8_string_index           = 0;
4060
3.77M
  size_t utf8_character_iterator          = 0;
4061
3.77M
  uint8_t utf8_character_additional_bytes = 0;
4062
3.77M
  uint8_t utf8_first_character_mark       = 0;
4063
4064
3.77M
  if( utf8_string == NULL )
4065
0
  {
4066
0
    libcerror_error_set(
4067
0
     error,
4068
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4069
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4070
0
     "%s: invalid UTF-8 string.",
4071
0
     function );
4072
4073
0
    return( -1 );
4074
0
  }
4075
3.77M
  if( utf8_string_size > (size_t) SSIZE_MAX )
4076
0
  {
4077
0
    libcerror_error_set(
4078
0
     error,
4079
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4080
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4081
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
4082
0
     function );
4083
4084
0
    return( -1 );
4085
0
  }
4086
3.77M
  if( utf8_string_index == NULL )
4087
0
  {
4088
0
    libcerror_error_set(
4089
0
     error,
4090
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4091
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4092
0
     "%s: invalid UTF-8 string index.",
4093
0
     function );
4094
4095
0
    return( -1 );
4096
0
  }
4097
3.77M
  safe_utf8_string_index = *utf8_string_index;
4098
4099
3.77M
  if( safe_utf8_string_index >= utf8_string_size )
4100
341
  {
4101
341
    libcerror_error_set(
4102
341
     error,
4103
341
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4104
341
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4105
341
     "%s: UTF-8 string too small.",
4106
341
     function );
4107
4108
341
    return( -1 );
4109
341
  }
4110
  /* Determine if the Unicode character is valid
4111
   */
4112
3.77M
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4113
3.77M
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4114
3.77M
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4115
0
  {
4116
0
    libcerror_error_set(
4117
0
     error,
4118
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4119
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4120
0
     "%s: unsupported Unicode character.",
4121
0
     function );
4122
4123
0
    return( -1 );
4124
0
  }
4125
  /* Determine how many UTF-8 character bytes are required
4126
   */
4127
3.77M
  if( unicode_character < 0x080 )
4128
1.04M
  {
4129
1.04M
    utf8_character_additional_bytes = 0;
4130
1.04M
    utf8_first_character_mark       = 0;
4131
1.04M
  }
4132
2.72M
  else if( unicode_character < 0x0800 )
4133
107k
  {
4134
107k
    utf8_character_additional_bytes = 1;
4135
107k
    utf8_first_character_mark       = 0x0c0;
4136
107k
  }
4137
2.61M
  else if( unicode_character < 0x010000 )
4138
2.61M
  {
4139
2.61M
    utf8_character_additional_bytes = 2;
4140
2.61M
    utf8_first_character_mark       = 0x0e0;
4141
2.61M
  }
4142
3.79k
  else
4143
3.79k
  {
4144
3.79k
    utf8_character_additional_bytes = 3;
4145
3.79k
    utf8_first_character_mark       = 0x0f0;
4146
3.79k
  }
4147
  /* Convert Unicode character into UTF-8 character bytes
4148
   */
4149
3.77M
  if( ( utf8_character_additional_bytes > utf8_string_size )
4150
3.77M
   || ( safe_utf8_string_index >= ( utf8_string_size - utf8_character_additional_bytes ) ) )
4151
523
  {
4152
523
    libcerror_error_set(
4153
523
     error,
4154
523
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4155
523
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4156
523
     "%s: UTF-8 string too small.",
4157
523
     function );
4158
4159
523
    return( -1 );
4160
523
  }
4161
3.77M
  for( utf8_character_iterator = safe_utf8_string_index + utf8_character_additional_bytes;
4162
9.11M
       utf8_character_iterator > safe_utf8_string_index;
4163
5.34M
       utf8_character_iterator-- )
4164
5.34M
  {
4165
5.34M
    utf8_string[ utf8_character_iterator ] = (libuna_utf8_character_t) ( ( unicode_character & 0x0bf ) | 0x080 );
4166
4167
5.34M
    unicode_character >>= 6;
4168
5.34M
  }
4169
3.77M
  utf8_string[ safe_utf8_string_index ] = (libuna_utf8_character_t) ( unicode_character | utf8_first_character_mark );
4170
4171
3.77M
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4172
4173
3.77M
  return( 1 );
4174
3.77M
}
4175
4176
/* Determines the size of an UTF-8 character from an Unicode character
4177
 * This function supports upto U+7FFFFFF (6 byte UTF-8 characters)
4178
 * Adds the size to the UTF-8 character size value
4179
 * Returns 1 if successful or -1 on error
4180
 */
4181
int libuna_unicode_character_size_to_utf8_rfc2279(
4182
     libuna_unicode_character_t unicode_character,
4183
     size_t *utf8_character_size,
4184
     libcerror_error_t **error )
4185
0
{
4186
0
  static char *function           = "libuna_unicode_character_size_to_utf8_rfc2279";
4187
0
  size_t safe_utf8_character_size = 0;
4188
4189
0
  if( utf8_character_size == NULL )
4190
0
  {
4191
0
    libcerror_error_set(
4192
0
     error,
4193
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4194
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4195
0
     "%s: invalid UTF-8 character size.",
4196
0
     function );
4197
4198
0
    return( -1 );
4199
0
  }
4200
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
4201
0
  {
4202
0
    libcerror_error_set(
4203
0
     error,
4204
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4205
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4206
0
     "%s: unsupported Unicode character.",
4207
0
     function );
4208
4209
0
    return( -1 );
4210
0
  }
4211
  /* RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
4212
   * while its predecessor RFC 2279 allowed up to 6 bytes
4213
   */
4214
0
  if( unicode_character < 0x00000080UL )
4215
0
  {
4216
0
    safe_utf8_character_size += 1;
4217
0
  }
4218
0
  else if( unicode_character < 0x00000800UL )
4219
0
  {
4220
0
    safe_utf8_character_size += 2;
4221
0
  }
4222
0
  else if( unicode_character < 0x00010000UL )
4223
0
  {
4224
0
    safe_utf8_character_size += 3;
4225
0
  }
4226
0
  else if( unicode_character < 0x00200000UL )
4227
0
  {
4228
0
    safe_utf8_character_size += 4;
4229
0
  }
4230
0
  else if( unicode_character < 0x04000000UL )
4231
0
  {
4232
0
    safe_utf8_character_size += 5;
4233
0
  }
4234
0
  else
4235
0
  {
4236
0
    safe_utf8_character_size += 6;
4237
0
  }
4238
0
  *utf8_character_size += safe_utf8_character_size;
4239
4240
0
  return( 1 );
4241
0
}
4242
4243
/* Copies an Unicode character from an UTF-8 string
4244
 * This function supports upto U+7FFFFFF (6 byte UTF-8 characters)
4245
 * Returns 1 if successful or -1 on error
4246
 */
4247
int libuna_unicode_character_copy_from_utf8_rfc2279(
4248
     libuna_unicode_character_t *unicode_character,
4249
     const libuna_utf8_character_t *utf8_string,
4250
     size_t utf8_string_size,
4251
     size_t *utf8_string_index,
4252
     libcerror_error_t **error )
4253
82.0k
{
4254
82.0k
  static char *function                             = "libuna_unicode_character_copy_from_utf8_rfc2279";
4255
82.0k
  libuna_unicode_character_t safe_unicode_character = 0;
4256
82.0k
  size_t safe_utf8_string_index                     = 0;
4257
82.0k
  uint8_t byte_value1                               = 0;
4258
82.0k
  uint8_t byte_value2                               = 0;
4259
82.0k
  uint8_t byte_value3                               = 0;
4260
82.0k
  uint8_t byte_value4                               = 0;
4261
82.0k
  uint8_t byte_value5                               = 0;
4262
82.0k
  uint8_t byte_value6                               = 0;
4263
82.0k
  uint8_t utf8_character_additional_bytes           = 0;
4264
4265
82.0k
  if( unicode_character == NULL )
4266
0
  {
4267
0
    libcerror_error_set(
4268
0
     error,
4269
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4270
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4271
0
     "%s: invalid Unicode character.",
4272
0
     function );
4273
4274
0
    return( -1 );
4275
0
  }
4276
82.0k
  if( utf8_string == NULL )
4277
0
  {
4278
0
    libcerror_error_set(
4279
0
     error,
4280
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4281
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4282
0
     "%s: invalid UTF-8 string.",
4283
0
     function );
4284
4285
0
    return( -1 );
4286
0
  }
4287
82.0k
  if( utf8_string_size > (size_t) SSIZE_MAX )
4288
0
  {
4289
0
    libcerror_error_set(
4290
0
     error,
4291
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4292
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4293
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
4294
0
     function );
4295
4296
0
    return( -1 );
4297
0
  }
4298
82.0k
  if( utf8_string_index == NULL )
4299
0
  {
4300
0
    libcerror_error_set(
4301
0
     error,
4302
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4303
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4304
0
     "%s: invalid UTF-8 string index.",
4305
0
     function );
4306
4307
0
    return( -1 );
4308
0
  }
4309
82.0k
  safe_utf8_string_index = *utf8_string_index;
4310
4311
82.0k
  if( safe_utf8_string_index >= utf8_string_size )
4312
0
  {
4313
0
    libcerror_error_set(
4314
0
     error,
4315
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4316
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4317
0
     "%s: UTF-8 string too small.",
4318
0
     function );
4319
4320
0
    return( -1 );
4321
0
  }
4322
  /* Determine the number of additional bytes of the UTF-8 character
4323
   */
4324
82.0k
  byte_value1 = utf8_string[ safe_utf8_string_index ];
4325
4326
  /* Determine the UTF-8 character and make sure it is valid
4327
   * RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
4328
   * while its predecessor RFC 2279 allowed up to 6 bytes
4329
   */
4330
82.0k
  if( byte_value1 > 0xfd )
4331
0
  {
4332
0
    libcerror_error_set(
4333
0
     error,
4334
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4335
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4336
0
     "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4337
0
     function,
4338
0
     byte_value1 );
4339
4340
0
    return( -1 );
4341
0
  }
4342
82.0k
  if( byte_value1 < 0xc0 )
4343
53.2k
  {
4344
53.2k
    utf8_character_additional_bytes = 0;
4345
53.2k
  }
4346
28.8k
  else if( byte_value1 < 0xe0 )
4347
7.64k
  {
4348
7.64k
    utf8_character_additional_bytes = 1;
4349
7.64k
  }
4350
21.2k
  else if( byte_value1 < 0xf0 )
4351
16.6k
  {
4352
16.6k
    utf8_character_additional_bytes = 2;
4353
16.6k
  }
4354
4.54k
  else if( byte_value1 < 0xf8 )
4355
4.54k
  {
4356
4.54k
    utf8_character_additional_bytes = 3;
4357
4.54k
  }
4358
0
  else if( byte_value1 < 0xfc )
4359
0
  {
4360
0
    utf8_character_additional_bytes = 4;
4361
0
  }
4362
0
  else
4363
0
  {
4364
0
    utf8_character_additional_bytes = 5;
4365
0
  }
4366
82.0k
  if( ( ( (size_t) utf8_character_additional_bytes + 1 ) > utf8_string_size )
4367
82.0k
   || ( safe_utf8_string_index > ( utf8_string_size - ( utf8_character_additional_bytes + 1 ) ) ) )
4368
0
  {
4369
0
    libcerror_error_set(
4370
0
     error,
4371
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4372
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4373
0
     "%s: missing UTF-8 character bytes.",
4374
0
     function );
4375
4376
0
    return( -1 );
4377
0
  }
4378
  /* Determine the UTF-8 character and make sure it is valid
4379
   * RFC 3629 limits the UTF-8 character to consist of a maximum of 4 bytes
4380
   * while its predecessor RFC 2279 allowed up to 6 bytes
4381
   */
4382
82.0k
  safe_unicode_character = byte_value1;
4383
4384
82.0k
  if( utf8_character_additional_bytes == 0 )
4385
53.2k
  {
4386
53.2k
    if( byte_value1 >= 0x80 )
4387
0
    {
4388
0
      libcerror_error_set(
4389
0
       error,
4390
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4391
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4392
0
       "%s: invalid 1st UTF-8 character byte: 0x%02" PRIx8 ".",
4393
0
       function,
4394
0
       byte_value1 );
4395
4396
0
      return( -1 );
4397
0
    }
4398
53.2k
  }
4399
82.0k
  if( utf8_character_additional_bytes >= 1 )
4400
28.8k
  {
4401
28.8k
    byte_value2 = utf8_string[ safe_utf8_string_index + 1 ];
4402
4403
28.8k
    if( ( byte_value2 < 0x80 )
4404
28.8k
     || ( byte_value2 > 0xbf ) )
4405
0
    {
4406
0
      libcerror_error_set(
4407
0
       error,
4408
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4409
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4410
0
       "%s: invalid 2nd UTF-8 character byte: 0x%02" PRIx8 ".",
4411
0
       function,
4412
0
       byte_value2 );
4413
4414
0
      return( -1 );
4415
0
    }
4416
28.8k
    safe_unicode_character <<= 6;
4417
28.8k
    safe_unicode_character += byte_value2;
4418
4419
28.8k
    if( utf8_character_additional_bytes == 1 )
4420
7.64k
    {
4421
7.64k
      safe_unicode_character -= 0x03080;
4422
7.64k
    }
4423
28.8k
  }
4424
82.0k
  if( utf8_character_additional_bytes >= 2 )
4425
21.2k
  {
4426
21.2k
    byte_value3 = utf8_string[ safe_utf8_string_index + 2 ];
4427
4428
21.2k
    if( ( byte_value3 < 0x80 )
4429
21.2k
     || ( byte_value3 > 0xbf ) )
4430
0
    {
4431
0
      libcerror_error_set(
4432
0
       error,
4433
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4434
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4435
0
       "%s: invalid 3rd UTF-8 character byte: 0x%02" PRIx8 ".",
4436
0
       function,
4437
0
       byte_value3 );
4438
4439
0
      return( -1 );
4440
0
    }
4441
21.2k
    safe_unicode_character <<= 6;
4442
21.2k
    safe_unicode_character += byte_value3;
4443
4444
21.2k
    if( utf8_character_additional_bytes == 2 )
4445
16.6k
    {
4446
16.6k
      safe_unicode_character -= 0x0e2080;
4447
16.6k
    }
4448
21.2k
  }
4449
82.0k
  if( utf8_character_additional_bytes >= 3 )
4450
4.54k
  {
4451
4.54k
    byte_value4 = utf8_string[ safe_utf8_string_index + 3 ];
4452
4453
4.54k
    if( ( byte_value4 < 0x80 )
4454
4.54k
     || ( byte_value4 > 0xbf ) )
4455
0
    {
4456
0
      libcerror_error_set(
4457
0
       error,
4458
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4459
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4460
0
       "%s: invalid 4th UTF-8 character byte: 0x%02" PRIx8 ".",
4461
0
       function,
4462
0
       byte_value4 );
4463
4464
0
      return( -1 );
4465
0
    }
4466
4.54k
    safe_unicode_character <<= 6;
4467
4.54k
    safe_unicode_character += byte_value4;
4468
4469
4.54k
    if( utf8_character_additional_bytes == 3 )
4470
4.54k
    {
4471
4.54k
      safe_unicode_character -= 0x03c82080;
4472
4.54k
    }
4473
4.54k
  }
4474
82.0k
  if( utf8_character_additional_bytes >= 4 )
4475
0
  {
4476
0
    byte_value5 = utf8_string[ safe_utf8_string_index + 4 ];
4477
4478
0
    if( ( byte_value5 < 0x80 )
4479
0
     || ( byte_value5 > 0xbf ) )
4480
0
    {
4481
0
      libcerror_error_set(
4482
0
       error,
4483
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4484
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4485
0
       "%s: invalid 5th UTF-8 character byte: 0x%02" PRIx8 ".",
4486
0
       function,
4487
0
       byte_value5 );
4488
4489
0
      return( -1 );
4490
0
    }
4491
0
    safe_unicode_character <<= 6;
4492
0
    safe_unicode_character += byte_value5;
4493
4494
0
    if( utf8_character_additional_bytes == 4 )
4495
0
    {
4496
0
      safe_unicode_character -= 0x0fa082080;
4497
0
    }
4498
0
  }
4499
82.0k
  if( utf8_character_additional_bytes == 5 )
4500
0
  {
4501
0
    byte_value6 = utf8_string[ safe_utf8_string_index + 5 ];
4502
4503
0
    if( ( byte_value6 < 0x80 )
4504
0
      || ( byte_value6 > 0xbf ) )
4505
0
    {
4506
0
      libcerror_error_set(
4507
0
       error,
4508
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4509
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4510
0
       "%s: invalid 6th UTF-8 character byte: 0x%02" PRIx8 ".",
4511
0
       function,
4512
0
       byte_value6 );
4513
4514
0
      return( -1 );
4515
0
    }
4516
0
    safe_unicode_character <<= 6;
4517
0
    safe_unicode_character += byte_value6;
4518
0
    safe_unicode_character -= 0x082082080;
4519
0
  }
4520
  /* Determine if the Unicode character is valid
4521
   */
4522
82.0k
  if( safe_unicode_character > LIBUNA_UCS_CHARACTER_MAX )
4523
0
  {
4524
0
    libcerror_error_set(
4525
0
     error,
4526
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4527
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4528
0
     "%s: unsupported Unicode character.",
4529
0
     function );
4530
4531
0
    return( -1 );
4532
0
  }
4533
82.0k
  *unicode_character = safe_unicode_character;
4534
82.0k
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4535
4536
82.0k
  return( 1 );
4537
82.0k
}
4538
4539
/* Copies an Unicode character into a UTF-8 string
4540
 * This function supports upto U+7FFFFFF (6 byte UTF-8 characters)
4541
 * Returns 1 if successful or -1 on error
4542
 */
4543
int libuna_unicode_character_copy_to_utf8_rfc2279(
4544
     libuna_unicode_character_t unicode_character,
4545
     libuna_utf8_character_t *utf8_string,
4546
     size_t utf8_string_size,
4547
     size_t *utf8_string_index,
4548
     libcerror_error_t **error )
4549
0
{
4550
0
  static char *function                   = "libuna_unicode_character_copy_to_utf8_rfc2279";
4551
0
  size_t safe_utf8_string_index           = 0;
4552
0
  size_t utf8_character_iterator          = 0;
4553
0
  uint8_t utf8_character_additional_bytes = 0;
4554
0
  uint8_t utf8_first_character_mark       = 0;
4555
4556
0
  if( utf8_string == NULL )
4557
0
  {
4558
0
    libcerror_error_set(
4559
0
     error,
4560
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4561
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4562
0
     "%s: invalid UTF-8 string.",
4563
0
     function );
4564
4565
0
    return( -1 );
4566
0
  }
4567
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
4568
0
  {
4569
0
    libcerror_error_set(
4570
0
     error,
4571
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4572
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4573
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
4574
0
     function );
4575
4576
0
    return( -1 );
4577
0
  }
4578
0
  if( utf8_string_index == NULL )
4579
0
  {
4580
0
    libcerror_error_set(
4581
0
     error,
4582
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4583
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4584
0
     "%s: invalid UTF-8 string index.",
4585
0
     function );
4586
4587
0
    return( -1 );
4588
0
  }
4589
0
  safe_utf8_string_index = *utf8_string_index;
4590
4591
0
  if( safe_utf8_string_index >= utf8_string_size )
4592
0
  {
4593
0
    libcerror_error_set(
4594
0
     error,
4595
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4596
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4597
0
     "%s: UTF-8 string too small.",
4598
0
     function );
4599
4600
0
    return( -1 );
4601
0
  }
4602
  /* Determine if the Unicode character is valid
4603
   */
4604
0
  if( unicode_character > LIBUNA_UCS_CHARACTER_MAX )
4605
0
  {
4606
0
    libcerror_error_set(
4607
0
     error,
4608
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4609
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4610
0
     "%s: unsupported Unicode character.",
4611
0
     function );
4612
4613
0
    return( -1 );
4614
0
  }
4615
  /* Determine how many UTF-8 character bytes are required
4616
   */
4617
0
  if( unicode_character < 0x080 )
4618
0
  {
4619
0
    utf8_character_additional_bytes = 0;
4620
0
    utf8_first_character_mark       = 0;
4621
0
  }
4622
0
  else if( unicode_character < 0x0800 )
4623
0
  {
4624
0
    utf8_character_additional_bytes = 1;
4625
0
    utf8_first_character_mark       = 0x0c0;
4626
0
  }
4627
0
  else if( unicode_character < 0x010000 )
4628
0
  {
4629
0
    utf8_character_additional_bytes = 2;
4630
0
    utf8_first_character_mark       = 0x0e0;
4631
0
  }
4632
0
  else if( unicode_character < 0x0200000 )
4633
0
  {
4634
0
    utf8_character_additional_bytes = 3;
4635
0
    utf8_first_character_mark       = 0x0f0;
4636
0
  }
4637
0
  else if( unicode_character < 0x0400000 )
4638
0
  {
4639
0
    utf8_character_additional_bytes = 4;
4640
0
    utf8_first_character_mark       = 0x0f8;
4641
0
  }
4642
0
  else
4643
0
  {
4644
0
    utf8_character_additional_bytes = 5;
4645
0
    utf8_first_character_mark       = 0x0fc;
4646
0
  }
4647
  /* Convert Unicode character into UTF-8 character bytes
4648
   */
4649
0
  if( ( utf8_character_additional_bytes > utf8_string_size )
4650
0
   || ( safe_utf8_string_index >= ( utf8_string_size - utf8_character_additional_bytes ) ) )
4651
0
  {
4652
0
    libcerror_error_set(
4653
0
     error,
4654
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4655
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4656
0
     "%s: UTF-8 string too small.",
4657
0
     function );
4658
4659
0
    return( -1 );
4660
0
  }
4661
0
  for( utf8_character_iterator = safe_utf8_string_index + utf8_character_additional_bytes;
4662
0
       utf8_character_iterator > safe_utf8_string_index;
4663
0
       utf8_character_iterator-- )
4664
0
  {
4665
0
    utf8_string[ utf8_character_iterator ] = (libuna_utf8_character_t) ( ( unicode_character & 0x0bf ) | 0x080 );
4666
4667
0
    unicode_character >>= 6;
4668
0
  }
4669
0
  utf8_string[ safe_utf8_string_index ] = (libuna_utf8_character_t) ( unicode_character | utf8_first_character_mark );
4670
4671
0
  *utf8_string_index = safe_utf8_string_index + 1 + utf8_character_additional_bytes;
4672
4673
0
  return( 1 );
4674
0
}
4675
4676
/* Determines the size of an UTF-16 character from an Unicode character
4677
 * Adds the size to the UTF-16 character size value
4678
 * Returns 1 if successful or -1 on error
4679
 */
4680
int libuna_unicode_character_size_to_utf16(
4681
     libuna_unicode_character_t unicode_character,
4682
     size_t *utf16_character_size,
4683
     libcerror_error_t **error )
4684
1.39k
{
4685
1.39k
  static char *function = "libuna_unicode_character_size_to_utf16";
4686
4687
1.39k
  if( utf16_character_size == NULL )
4688
0
  {
4689
0
    libcerror_error_set(
4690
0
     error,
4691
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4692
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4693
0
     "%s: invalid UTF-16 character size.",
4694
0
     function );
4695
4696
0
    return( -1 );
4697
0
  }
4698
1.39k
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4699
1.39k
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4700
1.39k
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4701
0
  {
4702
0
    libcerror_error_set(
4703
0
     error,
4704
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4705
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4706
0
     "%s: unsupported Unicode character.",
4707
0
     function );
4708
4709
0
    return( -1 );
4710
0
  }
4711
1.39k
  if( unicode_character > LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4712
6
  {
4713
6
    *utf16_character_size += 2;
4714
6
  }
4715
1.38k
  else
4716
1.38k
  {
4717
1.38k
    *utf16_character_size += 1;
4718
1.38k
  }
4719
1.39k
  return( 1 );
4720
1.39k
}
4721
4722
/* Copies an Unicode character from an UTF-16 string
4723
 * Returns 1 if successful or -1 on error
4724
 */
4725
int libuna_unicode_character_copy_from_utf16(
4726
     libuna_unicode_character_t *unicode_character,
4727
     const libuna_utf16_character_t *utf16_string,
4728
     size_t utf16_string_size,
4729
     size_t *utf16_string_index,
4730
     libcerror_error_t **error )
4731
0
{
4732
0
  static char *function                             = "libuna_unicode_character_copy_from_utf16";
4733
0
  libuna_unicode_character_t safe_unicode_character = 0;
4734
0
  libuna_utf16_character_t utf16_surrogate          = 0;
4735
0
  size_t safe_utf16_string_index                    = 0;
4736
4737
0
  if( unicode_character == NULL )
4738
0
  {
4739
0
    libcerror_error_set(
4740
0
     error,
4741
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4742
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4743
0
     "%s: invalid Unicode character.",
4744
0
     function );
4745
4746
0
    return( -1 );
4747
0
  }
4748
0
  if( utf16_string == NULL )
4749
0
  {
4750
0
    libcerror_error_set(
4751
0
     error,
4752
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4753
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4754
0
     "%s: invalid UTF-16 string.",
4755
0
     function );
4756
4757
0
    return( -1 );
4758
0
  }
4759
0
  if( utf16_string_size > (size_t) SSIZE_MAX )
4760
0
  {
4761
0
    libcerror_error_set(
4762
0
     error,
4763
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4764
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4765
0
     "%s: invalid UTF-16 string size value exceeds maximum.",
4766
0
     function );
4767
4768
0
    return( -1 );
4769
0
  }
4770
0
  if( utf16_string_index == NULL )
4771
0
  {
4772
0
    libcerror_error_set(
4773
0
     error,
4774
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4775
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4776
0
     "%s: invalid UTF-16 string index.",
4777
0
     function );
4778
4779
0
    return( -1 );
4780
0
  }
4781
0
  safe_utf16_string_index = *utf16_string_index;
4782
4783
0
  if( safe_utf16_string_index >= utf16_string_size )
4784
0
  {
4785
0
    libcerror_error_set(
4786
0
     error,
4787
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4788
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4789
0
     "%s: UTF-16 string too small.",
4790
0
     function );
4791
4792
0
    return( -1 );
4793
0
  }
4794
0
  safe_unicode_character   = utf16_string[ safe_utf16_string_index ];
4795
0
  safe_utf16_string_index += 1;
4796
4797
  /* Determine if the UTF-16 character is within the high surrogate range
4798
   */
4799
0
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4800
0
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
4801
0
  {
4802
0
    if( safe_utf16_string_index >= utf16_string_size )
4803
0
    {
4804
0
      libcerror_error_set(
4805
0
       error,
4806
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4807
0
       LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
4808
0
       "%s: missing surrogate UTF-16 character bytes.",
4809
0
       function );
4810
4811
0
      return( -1 );
4812
0
    }
4813
0
    utf16_surrogate          = utf16_string[ safe_utf16_string_index ];
4814
0
    safe_utf16_string_index += 1;
4815
4816
    /* Determine if the UTF-16 character is within the low surrogate range
4817
     */
4818
0
    if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
4819
0
     && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4820
0
    {
4821
0
      safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
4822
0
      safe_unicode_character <<= 10;
4823
0
      safe_unicode_character  += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
4824
0
      safe_unicode_character  += 0x010000;
4825
0
    }
4826
0
    else
4827
0
    {
4828
0
      libcerror_error_set(
4829
0
       error,
4830
0
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
4831
0
       LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4832
0
       "%s: unsupported low surrogate UTF-16 character.",
4833
0
       function );
4834
4835
0
      return( -1 );
4836
0
    }
4837
0
  }
4838
  /* Determine if the Unicode character is valid
4839
   */
4840
0
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4841
0
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4842
0
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4843
0
  {
4844
0
    libcerror_error_set(
4845
0
     error,
4846
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4847
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4848
0
     "%s: unsupported Unicode character.",
4849
0
     function );
4850
4851
0
    return( -1 );
4852
0
  }
4853
0
  *unicode_character  = safe_unicode_character;
4854
0
  *utf16_string_index = safe_utf16_string_index;
4855
4856
0
  return( 1 );
4857
0
}
4858
4859
/* Copies an Unicode character into a UTF-16 string
4860
 * Returns 1 if successful or -1 on error
4861
 */
4862
int libuna_unicode_character_copy_to_utf16(
4863
     libuna_unicode_character_t unicode_character,
4864
     libuna_utf16_character_t *utf16_string,
4865
     size_t utf16_string_size,
4866
     size_t *utf16_string_index,
4867
     libcerror_error_t **error )
4868
0
{
4869
0
  static char *function          = "libuna_unicode_character_copy_to_utf16";
4870
0
  size_t safe_utf16_string_index = 0;
4871
4872
0
  if( utf16_string == NULL )
4873
0
  {
4874
0
    libcerror_error_set(
4875
0
     error,
4876
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4877
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4878
0
     "%s: invalid UTF-16 string.",
4879
0
     function );
4880
4881
0
    return( -1 );
4882
0
  }
4883
0
  if( utf16_string_size > (size_t) SSIZE_MAX )
4884
0
  {
4885
0
    libcerror_error_set(
4886
0
     error,
4887
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4888
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
4889
0
     "%s: invalid UTF-16 string size value exceeds maximum.",
4890
0
     function );
4891
4892
0
    return( -1 );
4893
0
  }
4894
0
  if( utf16_string_index == NULL )
4895
0
  {
4896
0
    libcerror_error_set(
4897
0
     error,
4898
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4899
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4900
0
     "%s: invalid UTF-16 string index.",
4901
0
     function );
4902
4903
0
    return( -1 );
4904
0
  }
4905
0
  safe_utf16_string_index = *utf16_string_index;
4906
4907
0
  if( safe_utf16_string_index >= utf16_string_size )
4908
0
  {
4909
0
    libcerror_error_set(
4910
0
     error,
4911
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4912
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4913
0
     "%s: UTF-16 string too small.",
4914
0
     function );
4915
4916
0
    return( -1 );
4917
0
  }
4918
  /* Determine if the Unicode character is valid
4919
   */
4920
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
4921
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
4922
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
4923
0
  {
4924
0
    libcerror_error_set(
4925
0
     error,
4926
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
4927
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
4928
0
     "%s: unsupported Unicode character.",
4929
0
     function );
4930
4931
0
    return( -1 );
4932
0
  }
4933
0
  if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
4934
0
  {
4935
0
    utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) unicode_character;
4936
0
  }
4937
0
  else
4938
0
  {
4939
0
    if( ( utf16_string_size < 2 )
4940
0
     || ( safe_utf16_string_index > ( utf16_string_size - 2 ) ) )
4941
0
    {
4942
0
      libcerror_error_set(
4943
0
       error,
4944
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4945
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4946
0
       "%s: UTF-16 string too small.",
4947
0
       function );
4948
4949
0
      return( -1 );
4950
0
    }
4951
0
    unicode_character                        -= 0x010000;
4952
0
    utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
4953
0
    utf16_string[ safe_utf16_string_index++ ] = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
4954
0
  }
4955
0
  *utf16_string_index = safe_utf16_string_index;
4956
4957
0
  return( 1 );
4958
0
}
4959
4960
/* Copies an Unicode character from an UTF-16 stream
4961
 * Returns 1 if successful or -1 on error
4962
 */
4963
int libuna_unicode_character_copy_from_utf16_stream(
4964
     libuna_unicode_character_t *unicode_character,
4965
     const uint8_t *utf16_stream,
4966
     size_t utf16_stream_size,
4967
     size_t *utf16_stream_index,
4968
     int byte_order,
4969
     libcerror_error_t **error )
4970
9.68M
{
4971
9.68M
  static char *function                             = "libuna_unicode_character_copy_from_utf16_stream";
4972
9.68M
  libuna_unicode_character_t safe_unicode_character = 0;
4973
9.68M
  libuna_utf16_character_t utf16_surrogate          = 0;
4974
9.68M
  size_t safe_utf16_stream_index                    = 0;
4975
9.68M
  int byte_order_without_flags                      = 0;
4976
4977
9.68M
  if( unicode_character == NULL )
4978
0
  {
4979
0
    libcerror_error_set(
4980
0
     error,
4981
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4982
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4983
0
     "%s: invalid Unicode character.",
4984
0
     function );
4985
4986
0
    return( -1 );
4987
0
  }
4988
9.68M
  if( utf16_stream == NULL )
4989
0
  {
4990
0
    libcerror_error_set(
4991
0
     error,
4992
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4993
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
4994
0
     "%s: invalid UTF-16 stream.",
4995
0
     function );
4996
4997
0
    return( -1 );
4998
0
  }
4999
9.68M
  if( utf16_stream_size > (size_t) SSIZE_MAX )
5000
0
  {
5001
0
    libcerror_error_set(
5002
0
     error,
5003
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5004
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5005
0
     "%s: invalid UTF-16 stream size value exceeds maximum.",
5006
0
     function );
5007
5008
0
    return( -1 );
5009
0
  }
5010
9.68M
  if( utf16_stream_index == NULL )
5011
0
  {
5012
0
    libcerror_error_set(
5013
0
     error,
5014
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5015
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5016
0
     "%s: invalid UTF-16 stream index.",
5017
0
     function );
5018
5019
0
    return( -1 );
5020
0
  }
5021
9.68M
  byte_order_without_flags = byte_order & 0xff;
5022
5023
9.68M
  if( ( byte_order_without_flags != LIBUNA_ENDIAN_BIG )
5024
9.68M
   && ( byte_order_without_flags != LIBUNA_ENDIAN_LITTLE ) )
5025
0
  {
5026
0
    libcerror_error_set(
5027
0
     error,
5028
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5029
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5030
0
     "%s: unsupported byte order.",
5031
0
     function );
5032
5033
0
    return( -1 );
5034
0
  }
5035
9.68M
  safe_utf16_stream_index = *utf16_stream_index;
5036
5037
9.68M
  if( ( utf16_stream_size < 2 )
5038
9.68M
   || ( safe_utf16_stream_index > ( utf16_stream_size - 2 ) ) )
5039
51
  {
5040
51
    libcerror_error_set(
5041
51
     error,
5042
51
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5043
51
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5044
51
     "%s: UTF-16 stream too small.",
5045
51
     function );
5046
5047
51
    return( -1 );
5048
51
  }
5049
9.68M
  if( byte_order_without_flags == LIBUNA_ENDIAN_BIG )
5050
2.07M
  {
5051
2.07M
    safe_unicode_character   = utf16_stream[ safe_utf16_stream_index ];
5052
2.07M
    safe_unicode_character <<= 8;
5053
2.07M
    safe_unicode_character  += utf16_stream[ safe_utf16_stream_index + 1 ];
5054
2.07M
  }
5055
7.60M
  else if( byte_order_without_flags == LIBUNA_ENDIAN_LITTLE )
5056
7.60M
  {
5057
7.60M
    safe_unicode_character   = utf16_stream[ safe_utf16_stream_index + 1 ];
5058
7.60M
    safe_unicode_character <<= 8;
5059
7.60M
    safe_unicode_character  += utf16_stream[ safe_utf16_stream_index ];
5060
7.60M
  }
5061
9.68M
  safe_utf16_stream_index += 2;
5062
5063
  /* Determine if the Unicode character is valid
5064
   */
5065
9.68M
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5066
9.68M
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5067
478
  {
5068
478
    if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5069
472
    {
5070
472
      libcerror_error_set(
5071
472
       error,
5072
472
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
5073
472
       LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5074
472
       "%s: unsupported UTF-16 character.",
5075
472
       function );
5076
5077
472
      return( -1 );
5078
472
    }
5079
478
  }
5080
  /* Determine if the UTF-16 character is within the high surrogate range
5081
   */
5082
9.68M
  if( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5083
9.68M
   && ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_END ) )
5084
54.5k
  {
5085
54.5k
    if( safe_utf16_stream_index > ( utf16_stream_size - 2 ) )
5086
68
    {
5087
68
      if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5088
64
      {
5089
64
        libcerror_error_set(
5090
64
         error,
5091
64
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
5092
64
         LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5093
64
         "%s: missing surrogate UTF-16 character bytes.",
5094
64
         function );
5095
5096
64
        return( -1 );
5097
64
      }
5098
68
    }
5099
54.5k
    else
5100
54.5k
    {
5101
54.5k
      if( byte_order_without_flags == LIBUNA_ENDIAN_BIG )
5102
41.4k
      {
5103
41.4k
        utf16_surrogate   = utf16_stream[ safe_utf16_stream_index ];
5104
41.4k
        utf16_surrogate <<= 8;
5105
41.4k
        utf16_surrogate  += utf16_stream[ safe_utf16_stream_index + 1 ];
5106
41.4k
      }
5107
13.1k
      else if( byte_order_without_flags == LIBUNA_ENDIAN_LITTLE )
5108
13.1k
      {
5109
13.1k
        utf16_surrogate   = utf16_stream[ safe_utf16_stream_index + 1 ];
5110
13.1k
        utf16_surrogate <<= 8;
5111
13.1k
        utf16_surrogate  += utf16_stream[ safe_utf16_stream_index ];
5112
13.1k
      }
5113
      /* Determine if the UTF-16 character is within the low surrogate range
5114
       */
5115
54.5k
      if( ( utf16_surrogate >= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START )
5116
54.5k
       && ( utf16_surrogate <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5117
53.7k
      {
5118
53.7k
        safe_unicode_character  -= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START;
5119
53.7k
        safe_unicode_character <<= 10;
5120
53.7k
        safe_unicode_character  += utf16_surrogate - LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START;
5121
53.7k
        safe_unicode_character  += 0x010000;
5122
5123
53.7k
        safe_utf16_stream_index += 2;
5124
53.7k
      }
5125
788
      else if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5126
778
      {
5127
778
        libcerror_error_set(
5128
778
         error,
5129
778
         LIBCERROR_ERROR_DOMAIN_RUNTIME,
5130
778
         LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5131
778
         "%s: unsupported low surrogate UTF-16 character.",
5132
778
         function );
5133
5134
778
        return( -1 );
5135
778
      }
5136
54.5k
    }
5137
54.5k
  }
5138
9.68M
  if( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
5139
0
  {
5140
0
    libcerror_error_set(
5141
0
     error,
5142
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5143
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5144
0
     "%s: unsupported Unicode character.",
5145
0
     function );
5146
5147
0
    return( -1 );
5148
0
  }
5149
9.68M
  *unicode_character  = safe_unicode_character;
5150
9.68M
  *utf16_stream_index = safe_utf16_stream_index;
5151
5152
9.68M
  return( 1 );
5153
9.68M
}
5154
5155
/* Copies an Unicode character to an UTF-16 stream
5156
 * Returns 1 if successful or -1 on error
5157
 */
5158
int libuna_unicode_character_copy_to_utf16_stream(
5159
     libuna_unicode_character_t unicode_character,
5160
     uint8_t *utf16_stream,
5161
     size_t utf16_stream_size,
5162
     size_t *utf16_stream_index,
5163
     int byte_order,
5164
     libcerror_error_t **error )
5165
0
{
5166
0
  static char *function                    = "libuna_unicode_character_copy_to_utf16_stream";
5167
0
  libuna_utf16_character_t utf16_surrogate = 0;
5168
0
  size_t safe_utf16_stream_index           = 0;
5169
0
  int byte_order_without_flags             = 0;
5170
5171
0
  if( utf16_stream == NULL )
5172
0
  {
5173
0
    libcerror_error_set(
5174
0
     error,
5175
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5176
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5177
0
     "%s: invalid UTF-16 stream.",
5178
0
     function );
5179
5180
0
    return( -1 );
5181
0
  }
5182
0
  if( utf16_stream_size > (size_t) SSIZE_MAX )
5183
0
  {
5184
0
    libcerror_error_set(
5185
0
     error,
5186
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5187
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5188
0
     "%s: invalid UTF-16 stream size value exceeds maximum.",
5189
0
     function );
5190
5191
0
    return( -1 );
5192
0
  }
5193
0
  if( utf16_stream_index == NULL )
5194
0
  {
5195
0
    libcerror_error_set(
5196
0
     error,
5197
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5198
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5199
0
     "%s: invalid UTF-16 stream index.",
5200
0
     function );
5201
5202
0
    return( -1 );
5203
0
  }
5204
0
  byte_order_without_flags = byte_order & 0xff;
5205
5206
0
  if( ( byte_order_without_flags != LIBUNA_ENDIAN_BIG )
5207
0
   && ( byte_order_without_flags != LIBUNA_ENDIAN_LITTLE ) )
5208
0
  {
5209
0
    libcerror_error_set(
5210
0
     error,
5211
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5212
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5213
0
     "%s: unsupported byte order.",
5214
0
     function );
5215
5216
0
    return( -1 );
5217
0
  }
5218
0
  safe_utf16_stream_index = *utf16_stream_index;
5219
5220
  /* Determine if the Unicode character is valid
5221
   */
5222
0
  if( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX )
5223
0
  {
5224
0
    libcerror_error_set(
5225
0
     error,
5226
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5227
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5228
0
     "%s: unsupported Unicode character.",
5229
0
     function );
5230
5231
0
    return( -1 );
5232
0
  }
5233
0
  if( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5234
0
   && ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5235
0
  {
5236
0
    if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
5237
0
    {
5238
0
      libcerror_error_set(
5239
0
       error,
5240
0
       LIBCERROR_ERROR_DOMAIN_RUNTIME,
5241
0
       LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5242
0
       "%s: unsupported Unicode character.",
5243
0
       function );
5244
5245
0
      return( -1 );
5246
0
    }
5247
0
  }
5248
0
  if( unicode_character <= LIBUNA_UNICODE_BASIC_MULTILINGUAL_PLANE_MAX )
5249
0
  {
5250
0
    if( ( utf16_stream_size < 2 )
5251
0
     || ( safe_utf16_stream_index > ( utf16_stream_size - 2 ) ) )
5252
0
    {
5253
0
      libcerror_error_set(
5254
0
       error,
5255
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5256
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5257
0
       "%s: UTF-16 stream too small.",
5258
0
       function );
5259
5260
0
      return( -1 );
5261
0
    }
5262
0
    if( byte_order == LIBUNA_ENDIAN_BIG )
5263
0
    {
5264
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5265
0
      unicode_character                         >>= 8;
5266
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5267
0
    }
5268
0
    else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5269
0
    {
5270
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5271
0
      unicode_character                         >>= 8;
5272
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5273
0
    }
5274
0
    safe_utf16_stream_index += 2;
5275
0
  }
5276
0
  else
5277
0
  {
5278
0
    if( ( utf16_stream_size < 4 )
5279
0
     || ( safe_utf16_stream_index > ( utf16_stream_size - 4 ) ) )
5280
0
    {
5281
0
      libcerror_error_set(
5282
0
       error,
5283
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5284
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5285
0
       "%s: UTF-16 stream too small.",
5286
0
       function );
5287
5288
0
      return( -1 );
5289
0
    }
5290
0
    unicode_character -= 0x010000;
5291
5292
0
    utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character >> 10 ) + LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START );
5293
5294
0
    if( byte_order == LIBUNA_ENDIAN_BIG )
5295
0
    {
5296
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5297
0
      utf16_surrogate                           >>= 8;
5298
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5299
0
    }
5300
0
    else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5301
0
    {
5302
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5303
0
      utf16_surrogate                           >>= 8;
5304
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5305
0
    }
5306
0
    safe_utf16_stream_index += 2;
5307
5308
0
    utf16_surrogate = (libuna_utf16_character_t) ( ( unicode_character & 0x03ff ) + LIBUNA_UNICODE_SURROGATE_LOW_RANGE_START );
5309
5310
0
    if( byte_order == LIBUNA_ENDIAN_BIG )
5311
0
    {
5312
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5313
0
      utf16_surrogate                           >>= 8;
5314
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5315
0
    }
5316
0
    else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5317
0
    {
5318
0
      utf16_stream[ safe_utf16_stream_index     ] = (uint8_t) ( utf16_surrogate & 0xff );
5319
0
      utf16_surrogate                           >>= 8;
5320
0
      utf16_stream[ safe_utf16_stream_index + 1 ] = (uint8_t) ( utf16_surrogate & 0xff );
5321
0
    }
5322
0
    safe_utf16_stream_index += 2;
5323
0
  }
5324
0
  *utf16_stream_index = safe_utf16_stream_index;
5325
5326
0
  return( 1 );
5327
0
}
5328
5329
/* Determines the size of an UTF-32 character from an Unicode character
5330
 * Adds the size to the UTF-32 character size value
5331
 * Returns 1 if successful or -1 on error
5332
 */
5333
int libuna_unicode_character_size_to_utf32(
5334
     libuna_unicode_character_t unicode_character,
5335
     size_t *utf32_character_size,
5336
     libcerror_error_t **error )
5337
0
{
5338
0
  static char *function = "libuna_unicode_character_size_to_utf32";
5339
5340
0
  LIBUNA_UNREFERENCED_PARAMETER( unicode_character )
5341
5342
0
  if( utf32_character_size == NULL )
5343
0
  {
5344
0
    libcerror_error_set(
5345
0
     error,
5346
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5347
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5348
0
     "%s: invalid UTF-32 character size.",
5349
0
     function );
5350
5351
0
    return( -1 );
5352
0
  }
5353
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5354
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5355
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5356
0
  {
5357
0
    libcerror_error_set(
5358
0
     error,
5359
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5360
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5361
0
     "%s: unsupported Unicode character.",
5362
0
     function );
5363
5364
0
    return( -1 );
5365
0
  }
5366
0
  *utf32_character_size += 1;
5367
5368
0
  return( 1 );
5369
0
}
5370
5371
/* Copies an Unicode character from an UTF-32 string
5372
 * Returns 1 if successful or -1 on error
5373
 */
5374
int libuna_unicode_character_copy_from_utf32(
5375
     libuna_unicode_character_t *unicode_character,
5376
     const libuna_utf32_character_t *utf32_string,
5377
     size_t utf32_string_size,
5378
     size_t *utf32_string_index,
5379
     libcerror_error_t **error )
5380
0
{
5381
0
  static char *function                             = "libuna_unicode_character_copy_from_utf32";
5382
0
  libuna_unicode_character_t safe_unicode_character = 0;
5383
0
  size_t safe_utf32_string_index                    = 0;
5384
5385
0
  if( unicode_character == NULL )
5386
0
  {
5387
0
    libcerror_error_set(
5388
0
     error,
5389
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5390
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5391
0
     "%s: invalid Unicode character.",
5392
0
     function );
5393
5394
0
    return( -1 );
5395
0
  }
5396
0
  if( utf32_string == NULL )
5397
0
  {
5398
0
    libcerror_error_set(
5399
0
     error,
5400
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5401
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5402
0
     "%s: invalid UTF-32 string.",
5403
0
     function );
5404
5405
0
    return( -1 );
5406
0
  }
5407
0
  if( utf32_string_size > (size_t) SSIZE_MAX )
5408
0
  {
5409
0
    libcerror_error_set(
5410
0
     error,
5411
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5412
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5413
0
     "%s: invalid UTF-32 string size value exceeds maximum.",
5414
0
     function );
5415
5416
0
    return( -1 );
5417
0
  }
5418
0
  if( utf32_string_index == NULL )
5419
0
  {
5420
0
    libcerror_error_set(
5421
0
     error,
5422
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5423
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5424
0
     "%s: invalid UTF-32 string index.",
5425
0
     function );
5426
5427
0
    return( -1 );
5428
0
  }
5429
0
  safe_utf32_string_index = *utf32_string_index;
5430
5431
0
  if( safe_utf32_string_index >= utf32_string_size )
5432
0
  {
5433
0
    libcerror_error_set(
5434
0
     error,
5435
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5436
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5437
0
     "%s: UTF-32 string too small.",
5438
0
     function );
5439
5440
0
    return( -1 );
5441
0
  }
5442
0
  safe_unicode_character = utf32_string[ safe_utf32_string_index ];
5443
5444
  /* Determine if the Unicode character is valid
5445
   */
5446
0
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5447
0
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5448
0
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5449
0
  {
5450
0
    libcerror_error_set(
5451
0
     error,
5452
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5453
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5454
0
     "%s: unsupported Unicode character.",
5455
0
     function );
5456
5457
0
    return( -1 );
5458
0
  }
5459
0
  *unicode_character  = safe_unicode_character;
5460
0
  *utf32_string_index = safe_utf32_string_index + 1;
5461
5462
0
  return( 1 );
5463
0
}
5464
5465
/* Copies an Unicode character into a UTF-32 string
5466
 * Returns 1 if successful or -1 on error
5467
 */
5468
int libuna_unicode_character_copy_to_utf32(
5469
     libuna_unicode_character_t unicode_character,
5470
     libuna_utf32_character_t *utf32_string,
5471
     size_t utf32_string_size,
5472
     size_t *utf32_string_index,
5473
     libcerror_error_t **error )
5474
0
{
5475
0
  static char *function          = "libuna_unicode_character_copy_to_utf32";
5476
0
  size_t safe_utf32_string_index = 0;
5477
5478
0
  if( utf32_string == NULL )
5479
0
  {
5480
0
    libcerror_error_set(
5481
0
     error,
5482
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5483
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5484
0
     "%s: invalid UTF-32 string.",
5485
0
     function );
5486
5487
0
    return( -1 );
5488
0
  }
5489
0
  if( utf32_string_size > (size_t) SSIZE_MAX )
5490
0
  {
5491
0
    libcerror_error_set(
5492
0
     error,
5493
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5494
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5495
0
     "%s: invalid UTF-32 string size value exceeds maximum.",
5496
0
     function );
5497
5498
0
    return( -1 );
5499
0
  }
5500
0
  if( utf32_string_index == NULL )
5501
0
  {
5502
0
    libcerror_error_set(
5503
0
     error,
5504
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5505
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5506
0
     "%s: invalid UTF-32 string index.",
5507
0
     function );
5508
5509
0
    return( -1 );
5510
0
  }
5511
0
  safe_utf32_string_index = *utf32_string_index;
5512
5513
0
  if( safe_utf32_string_index >= utf32_string_size )
5514
0
  {
5515
0
    libcerror_error_set(
5516
0
     error,
5517
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5518
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5519
0
     "%s: UTF-32 string too small.",
5520
0
     function );
5521
5522
0
    return( -1 );
5523
0
  }
5524
  /* Determine if the Unicode character is valid
5525
   */
5526
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5527
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5528
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5529
0
  {
5530
0
    libcerror_error_set(
5531
0
     error,
5532
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5533
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5534
0
     "%s: unsupported Unicode character.",
5535
0
     function );
5536
5537
0
    return( -1 );
5538
0
  }
5539
0
  utf32_string[ safe_utf32_string_index ] = (libuna_utf32_character_t) unicode_character;
5540
5541
0
  *utf32_string_index = safe_utf32_string_index + 1;
5542
5543
0
  return( 1 );
5544
0
}
5545
5546
/* Copies an Unicode character from an UTF-32 stream
5547
 * Returns 1 if successful or -1 on error
5548
 */
5549
int libuna_unicode_character_copy_from_utf32_stream(
5550
     libuna_unicode_character_t *unicode_character,
5551
     const uint8_t *utf32_stream,
5552
     size_t utf32_stream_size,
5553
     size_t *utf32_stream_index,
5554
     int byte_order,
5555
     libcerror_error_t **error )
5556
0
{
5557
0
  static char *function                             = "libuna_unicode_character_copy_from_utf32_stream";
5558
0
  libuna_unicode_character_t safe_unicode_character = 0;
5559
0
  size_t safe_utf32_stream_index                    = 0;
5560
5561
0
  if( unicode_character == NULL )
5562
0
  {
5563
0
    libcerror_error_set(
5564
0
     error,
5565
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5566
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5567
0
     "%s: invalid Unicode character.",
5568
0
     function );
5569
5570
0
    return( -1 );
5571
0
  }
5572
0
  if( utf32_stream == NULL )
5573
0
  {
5574
0
    libcerror_error_set(
5575
0
     error,
5576
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5577
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5578
0
     "%s: invalid UTF-32 stream.",
5579
0
     function );
5580
5581
0
    return( -1 );
5582
0
  }
5583
0
  if( utf32_stream_size > (size_t) SSIZE_MAX )
5584
0
  {
5585
0
    libcerror_error_set(
5586
0
     error,
5587
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5588
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5589
0
     "%s: invalid UTF-32 stream size value exceeds maximum.",
5590
0
     function );
5591
5592
0
    return( -1 );
5593
0
  }
5594
0
  if( utf32_stream_index == NULL )
5595
0
  {
5596
0
    libcerror_error_set(
5597
0
     error,
5598
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5599
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5600
0
     "%s: invalid UTF-32 stream index.",
5601
0
     function );
5602
5603
0
    return( -1 );
5604
0
  }
5605
0
  safe_utf32_stream_index = *utf32_stream_index;
5606
5607
0
  if( ( utf32_stream_size < 4 )
5608
0
   || ( safe_utf32_stream_index > ( utf32_stream_size - 4 ) ) )
5609
0
  {
5610
0
    libcerror_error_set(
5611
0
     error,
5612
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5613
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5614
0
     "%s: UTF-32 stream too small.",
5615
0
     function );
5616
5617
0
    return( -1 );
5618
0
  }
5619
0
  if( ( byte_order != LIBUNA_ENDIAN_BIG )
5620
0
   && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5621
0
  {
5622
0
    libcerror_error_set(
5623
0
     error,
5624
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5625
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5626
0
     "%s: unsupported byte order.",
5627
0
     function );
5628
5629
0
    return( -1 );
5630
0
  }
5631
0
  if( byte_order == LIBUNA_ENDIAN_BIG )
5632
0
  {
5633
0
    safe_unicode_character   = utf32_stream[ safe_utf32_stream_index ];
5634
0
    safe_unicode_character <<= 8;
5635
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 1 ];
5636
0
    safe_unicode_character <<= 8;
5637
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 2 ];
5638
0
    safe_unicode_character <<= 8;
5639
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 3 ];
5640
0
  }
5641
0
  else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5642
0
  {
5643
0
    safe_unicode_character   = utf32_stream[ safe_utf32_stream_index + 3 ];
5644
0
    safe_unicode_character <<= 8;
5645
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 2 ];
5646
0
    safe_unicode_character <<= 8;
5647
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index + 1 ];
5648
0
    safe_unicode_character <<= 8;
5649
0
    safe_unicode_character  += utf32_stream[ safe_utf32_stream_index ];
5650
0
  }
5651
  /* Determine if the Unicode character is valid
5652
   */
5653
0
  if( ( ( safe_unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5654
0
    &&  ( safe_unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5655
0
   || ( safe_unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5656
0
  {
5657
0
    libcerror_error_set(
5658
0
     error,
5659
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5660
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5661
0
     "%s: unsupported Unicode character.",
5662
0
     function );
5663
5664
0
    return( -1 );
5665
0
  }
5666
0
  *unicode_character  = safe_unicode_character;
5667
0
  *utf32_stream_index = safe_utf32_stream_index + 4;
5668
5669
0
  return( 1 );
5670
0
}
5671
5672
/* Copies an Unicode character to an UTF-32 stream
5673
 * Returns 1 if successful or -1 on error
5674
 */
5675
int libuna_unicode_character_copy_to_utf32_stream(
5676
     libuna_unicode_character_t unicode_character,
5677
     uint8_t *utf32_stream,
5678
     size_t utf32_stream_size,
5679
     size_t *utf32_stream_index,
5680
     int byte_order,
5681
     libcerror_error_t **error )
5682
0
{
5683
0
  static char *function          = "libuna_unicode_character_copy_to_utf32_stream";
5684
0
  size_t safe_utf32_stream_index = 0;
5685
5686
0
  if( utf32_stream == NULL )
5687
0
  {
5688
0
    libcerror_error_set(
5689
0
     error,
5690
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5691
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5692
0
     "%s: invalid UTF-32 stream.",
5693
0
     function );
5694
5695
0
    return( -1 );
5696
0
  }
5697
0
  if( utf32_stream_size > (size_t) SSIZE_MAX )
5698
0
  {
5699
0
    libcerror_error_set(
5700
0
     error,
5701
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5702
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
5703
0
     "%s: invalid UTF-32 stream size value exceeds maximum.",
5704
0
     function );
5705
5706
0
    return( -1 );
5707
0
  }
5708
0
  if( utf32_stream_index == NULL )
5709
0
  {
5710
0
    libcerror_error_set(
5711
0
     error,
5712
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5713
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
5714
0
     "%s: invalid UTF-32 stream index.",
5715
0
     function );
5716
5717
0
    return( -1 );
5718
0
  }
5719
0
  safe_utf32_stream_index = *utf32_stream_index;
5720
5721
0
  if( ( utf32_stream_size < 4 )
5722
0
   || ( safe_utf32_stream_index > ( utf32_stream_size - 4 ) ) )
5723
0
  {
5724
0
    libcerror_error_set(
5725
0
     error,
5726
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5727
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
5728
0
     "%s: UTF-32 stream too small.",
5729
0
     function );
5730
5731
0
    return( -1 );
5732
0
  }
5733
0
  if( ( byte_order != LIBUNA_ENDIAN_BIG )
5734
0
   && ( byte_order != LIBUNA_ENDIAN_LITTLE ) )
5735
0
  {
5736
0
    libcerror_error_set(
5737
0
     error,
5738
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
5739
0
     LIBCERROR_ARGUMENT_ERROR_UNSUPPORTED_VALUE,
5740
0
     "%s: unsupported byte order.",
5741
0
     function );
5742
5743
0
    return( -1 );
5744
0
  }
5745
  /* Determine if the Unicode character is valid
5746
   */
5747
0
  if( ( ( unicode_character >= LIBUNA_UNICODE_SURROGATE_HIGH_RANGE_START )
5748
0
    &&  ( unicode_character <= LIBUNA_UNICODE_SURROGATE_LOW_RANGE_END ) )
5749
0
   || ( unicode_character > LIBUNA_UNICODE_CHARACTER_MAX ) )
5750
0
  {
5751
0
    libcerror_error_set(
5752
0
     error,
5753
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
5754
0
     LIBCERROR_RUNTIME_ERROR_UNSUPPORTED_VALUE,
5755
0
     "%s: unsupported Unicode character.",
5756
0
     function );
5757
5758
0
    return( -1 );
5759
0
  }
5760
0
  if( byte_order == LIBUNA_ENDIAN_BIG )
5761
0
  {
5762
0
    utf32_stream[ safe_utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5763
0
    unicode_character                         >>= 8;
5764
0
    utf32_stream[ safe_utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5765
0
    unicode_character                         >>= 8;
5766
0
    utf32_stream[ safe_utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5767
0
    unicode_character                         >>= 8;
5768
0
    utf32_stream[ safe_utf32_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5769
0
  }
5770
0
  else if( byte_order == LIBUNA_ENDIAN_LITTLE )
5771
0
  {
5772
0
    utf32_stream[ safe_utf32_stream_index     ] = (uint8_t) ( unicode_character & 0xff );
5773
0
    unicode_character                         >>= 8;
5774
0
    utf32_stream[ safe_utf32_stream_index + 1 ] = (uint8_t) ( unicode_character & 0xff );
5775
0
    unicode_character                         >>= 8;
5776
0
    utf32_stream[ safe_utf32_stream_index + 2 ] = (uint8_t) ( unicode_character & 0xff );
5777
0
    unicode_character                        >>= 8;
5778
0
    utf32_stream[ safe_utf32_stream_index + 3 ] = (uint8_t) ( unicode_character & 0xff );
5779
0
  }
5780
0
  *utf32_stream_index = safe_utf32_stream_index + 4;
5781
5782
0
  return( 1 );
5783
0
}
5784