Coverage Report

Created: 2024-02-25 07:20

/src/libcreg/libuna/libuna_utf8_string.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * UTF-8 string functions
3
 *
4
 * Copyright (C) 2008-2024, Joachim Metz <joachim.metz@gmail.com>
5
 *
6
 * Refer to AUTHORS for acknowledgements.
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public License
19
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
20
 */
21
22
#include <common.h>
23
#include <types.h>
24
25
#include "libuna_definitions.h"
26
#include "libuna_libcerror.h"
27
#include "libuna_scsu.h"
28
#include "libuna_types.h"
29
#include "libuna_unicode_character.h"
30
#include "libuna_utf8_string.h"
31
32
/* Determines the size of an UTF-8 string from a byte stream
33
 * Returns 1 if successful or -1 on error
34
 */
35
int libuna_utf8_string_size_from_byte_stream(
36
     const uint8_t *byte_stream,
37
     size_t byte_stream_size,
38
     int codepage,
39
     size_t *utf8_string_size,
40
     libcerror_error_t **error )
41
394
{
42
394
  static char *function                        = "libuna_utf8_string_size_from_byte_stream";
43
394
  size_t byte_stream_index                     = 0;
44
394
  libuna_unicode_character_t unicode_character = 0;
45
46
394
  if( byte_stream == NULL )
47
0
  {
48
0
    libcerror_error_set(
49
0
     error,
50
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
51
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
52
0
     "%s: invalid byte stream.",
53
0
     function );
54
55
0
    return( -1 );
56
0
  }
57
394
  if( byte_stream_size > (size_t) SSIZE_MAX )
58
0
  {
59
0
    libcerror_error_set(
60
0
     error,
61
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
62
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
63
0
     "%s: invalid byte stream size value exceeds maximum.",
64
0
     function );
65
66
0
    return( -1 );
67
0
  }
68
394
  if( utf8_string_size == NULL )
69
0
  {
70
0
    libcerror_error_set(
71
0
     error,
72
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
73
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
74
0
     "%s: invalid UTF-8 string size.",
75
0
     function );
76
77
0
    return( -1 );
78
0
  }
79
394
  *utf8_string_size = 0;
80
81
394
  if( byte_stream_size == 0 )
82
1
  {
83
1
    return( 1 );
84
1
  }
85
60.2k
  while( byte_stream_index < byte_stream_size )
86
59.8k
  {
87
    /* Convert the byte stream bytes into an Unicode character
88
     */
89
59.8k
    if( libuna_unicode_character_copy_from_byte_stream(
90
59.8k
         &unicode_character,
91
59.8k
         byte_stream,
92
59.8k
         byte_stream_size,
93
59.8k
         &byte_stream_index,
94
59.8k
         codepage,
95
59.8k
         error ) != 1 )
96
0
    {
97
0
      libcerror_error_set(
98
0
       error,
99
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
100
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
101
0
       "%s: unable to copy Unicode character from byte stream.",
102
0
       function );
103
104
0
      return( -1 );
105
0
    }
106
    /* Determine how many UTF-8 character bytes are required
107
     */
108
59.8k
    if( libuna_unicode_character_size_to_utf8(
109
59.8k
         unicode_character,
110
59.8k
         utf8_string_size,
111
59.8k
         error ) != 1 )
112
0
    {
113
0
      libcerror_error_set(
114
0
       error,
115
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
116
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
117
0
       "%s: unable to unable to determine size of Unicode character in UTF-8.",
118
0
       function );
119
120
0
      return( -1 );
121
0
    }
122
59.8k
    if( unicode_character == 0 )
123
13
    {
124
13
      break;
125
13
    }
126
59.8k
  }
127
  /* Check if the string is terminated with an end-of-string character
128
   */
129
393
  if( unicode_character != 0 )
130
380
  {
131
380
    *utf8_string_size += 1;
132
380
  }
133
393
  return( 1 );
134
393
}
135
136
/* Copies an UTF-8 string from a byte stream
137
 * Returns 1 if successful or -1 on error
138
 */
139
int libuna_utf8_string_copy_from_byte_stream(
140
     libuna_utf8_character_t *utf8_string,
141
     size_t utf8_string_size,
142
     const uint8_t *byte_stream,
143
     size_t byte_stream_size,
144
     int codepage,
145
     libcerror_error_t **error )
146
393
{
147
393
  static char *function    = "libuna_utf8_string_copy_from_byte_stream";
148
393
  size_t utf8_string_index = 0;
149
150
393
  if( libuna_utf8_string_with_index_copy_from_byte_stream(
151
393
       utf8_string,
152
393
       utf8_string_size,
153
393
       &utf8_string_index,
154
393
       byte_stream,
155
393
       byte_stream_size,
156
393
       codepage,
157
393
       error ) != 1 )
158
0
  {
159
0
    libcerror_error_set(
160
0
     error,
161
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
162
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
163
0
     "%s: unable to copy byte stream to UTF-8 string.",
164
0
     function );
165
166
0
    return( -1 );
167
0
  }
168
393
  return( 1 );
169
393
}
170
171
/* Copies an UTF-8 string from a byte stream
172
 * Returns 1 if successful or -1 on error
173
 */
174
int libuna_utf8_string_with_index_copy_from_byte_stream(
175
     libuna_utf8_character_t *utf8_string,
176
     size_t utf8_string_size,
177
     size_t *utf8_string_index,
178
     const uint8_t *byte_stream,
179
     size_t byte_stream_size,
180
     int codepage,
181
     libcerror_error_t **error )
182
393
{
183
393
  static char *function                        = "libuna_utf8_string_with_index_copy_from_byte_stream";
184
393
  size_t byte_stream_index                     = 0;
185
393
  libuna_unicode_character_t unicode_character = 0;
186
187
393
  if( utf8_string == NULL )
188
0
  {
189
0
    libcerror_error_set(
190
0
     error,
191
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
192
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
193
0
     "%s: invalid UTF-8 string.",
194
0
     function );
195
196
0
    return( -1 );
197
0
  }
198
393
  if( utf8_string_size > (size_t) SSIZE_MAX )
199
0
  {
200
0
    libcerror_error_set(
201
0
     error,
202
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
203
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
204
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
205
0
     function );
206
207
0
    return( -1 );
208
0
  }
209
393
  if( utf8_string_index == NULL )
210
0
  {
211
0
    libcerror_error_set(
212
0
     error,
213
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
214
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
215
0
     "%s: invalid UTF-8 string index.",
216
0
     function );
217
218
0
    return( -1 );
219
0
  }
220
393
  if( byte_stream == NULL )
221
0
  {
222
0
    libcerror_error_set(
223
0
     error,
224
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
225
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
226
0
     "%s: invalid byte stream.",
227
0
     function );
228
229
0
    return( -1 );
230
0
  }
231
393
  if( byte_stream_size > (size_t) SSIZE_MAX )
232
0
  {
233
0
    libcerror_error_set(
234
0
     error,
235
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
236
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
237
0
     "%s: invalid byte stream size value exceeds maximum.",
238
0
     function );
239
240
0
    return( -1 );
241
0
  }
242
393
  if( byte_stream_size == 0 )
243
0
  {
244
0
    libcerror_error_set(
245
0
     error,
246
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
247
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
248
0
     "%s: missing byte stream value.",
249
0
     function );
250
251
0
    return( -1 );
252
0
  }
253
60.2k
  while( byte_stream_index < byte_stream_size )
254
59.8k
  {
255
    /* Convert the byte stream bytes into an Unicode character
256
     */
257
59.8k
    if( libuna_unicode_character_copy_from_byte_stream(
258
59.8k
         &unicode_character,
259
59.8k
         byte_stream,
260
59.8k
         byte_stream_size,
261
59.8k
         &byte_stream_index,
262
59.8k
         codepage,
263
59.8k
         error ) != 1 )
264
0
    {
265
0
      libcerror_error_set(
266
0
       error,
267
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
268
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
269
0
       "%s: unable to copy Unicode character from byte stream.",
270
0
       function );
271
272
0
      return( -1 );
273
0
    }
274
    /* Convert the Unicode character into UTF-8 character bytes
275
     */
276
59.8k
    if( libuna_unicode_character_copy_to_utf8(
277
59.8k
         unicode_character,
278
59.8k
         utf8_string,
279
59.8k
         utf8_string_size,
280
59.8k
         utf8_string_index,
281
59.8k
         error ) != 1 )
282
0
    {
283
0
      libcerror_error_set(
284
0
       error,
285
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
286
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
287
0
       "%s: unable to copy Unicode character to UTF-8.",
288
0
       function );
289
290
0
      return( -1 );
291
0
    }
292
59.8k
    if( unicode_character == 0 )
293
13
    {
294
13
      break;
295
13
    }
296
59.8k
  }
297
  /* Check if the string is terminated with an end-of-string character
298
   */
299
393
  if( unicode_character != 0 )
300
380
  {
301
380
    if( *utf8_string_index >= utf8_string_size )
302
0
    {
303
0
      libcerror_error_set(
304
0
       error,
305
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
306
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
307
0
       "%s: UTF-8 string too small.",
308
0
       function );
309
310
0
      return( -1 );
311
0
    }
312
380
    utf8_string[ *utf8_string_index ] = 0;
313
314
380
    *utf8_string_index += 1;
315
380
  }
316
393
  return( 1 );
317
393
}
318
319
/* Compares an UTF-8 string with a byte stream
320
 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
321
 */
322
int libuna_utf8_string_compare_with_byte_stream(
323
     const libuna_utf8_character_t *utf8_string,
324
     size_t utf8_string_size,
325
     const uint8_t *byte_stream,
326
     size_t byte_stream_size,
327
     int codepage,
328
     libcerror_error_t **error )
329
4.91k
{
330
4.91k
  static char *function                                    = "libuna_utf8_string_compare_with_byte_stream";
331
4.91k
  size_t byte_stream_index                                 = 0;
332
4.91k
  size_t utf8_string_index                                 = 0;
333
4.91k
  libuna_unicode_character_t utf8_unicode_character        = 0;
334
4.91k
  libuna_unicode_character_t byte_stream_unicode_character = 0;
335
336
4.91k
  if( utf8_string == NULL )
337
0
  {
338
0
    libcerror_error_set(
339
0
     error,
340
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
341
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
342
0
     "%s: invalid UTF-8 string.",
343
0
     function );
344
345
0
    return( -1 );
346
0
  }
347
4.91k
  if( utf8_string_size > (size_t) SSIZE_MAX )
348
0
  {
349
0
    libcerror_error_set(
350
0
     error,
351
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
352
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
353
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
354
0
     function );
355
356
0
    return( -1 );
357
0
  }
358
4.91k
  if( byte_stream == NULL )
359
0
  {
360
0
    libcerror_error_set(
361
0
     error,
362
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
363
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
364
0
     "%s: invalid byte stream.",
365
0
     function );
366
367
0
    return( -1 );
368
0
  }
369
4.91k
  if( byte_stream_size > (size_t) SSIZE_MAX )
370
0
  {
371
0
    libcerror_error_set(
372
0
     error,
373
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
374
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
375
0
     "%s: invalid byte stream size value exceeds maximum.",
376
0
     function );
377
378
0
    return( -1 );
379
0
  }
380
4.91k
  if( byte_stream_size == 0 )
381
0
  {
382
0
    libcerror_error_set(
383
0
     error,
384
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
385
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
386
0
     "%s: missing byte stream value.",
387
0
     function );
388
389
0
    return( -1 );
390
0
  }
391
4.91k
  if( ( utf8_string_size >= 1 )
392
4.91k
   && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
393
3.00k
  {
394
3.00k
    utf8_string_size -= 1;
395
3.00k
  }
396
  /* Check if the byte stream is terminated with zero bytes
397
   */
398
4.91k
  if( ( byte_stream_size >= 1 )
399
4.91k
   && ( byte_stream[ byte_stream_size - 1 ] == 0 ) )
400
1.09k
  {
401
1.09k
    byte_stream_size -= 1;
402
1.09k
  }
403
8.83k
  while( ( utf8_string_index < utf8_string_size )
404
8.83k
      && ( byte_stream_index < byte_stream_size ) )
405
7.88k
  {
406
    /* Convert the UTF-8 character bytes into an Unicode character
407
     */
408
7.88k
    if( libuna_unicode_character_copy_from_utf8(
409
7.88k
         &utf8_unicode_character,
410
7.88k
         utf8_string,
411
7.88k
         utf8_string_size,
412
7.88k
         &utf8_string_index,
413
7.88k
         error ) != 1 )
414
102
    {
415
102
      libcerror_error_set(
416
102
       error,
417
102
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
418
102
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
419
102
       "%s: unable to copy Unicode character from UTF-8.",
420
102
       function );
421
422
102
      return( -1 );
423
102
    }
424
    /* Convert the byte stream bytes into an Unicode character
425
     */
426
7.77k
    if( libuna_unicode_character_copy_from_byte_stream(
427
7.77k
         &byte_stream_unicode_character,
428
7.77k
         byte_stream,
429
7.77k
         byte_stream_size,
430
7.77k
         &byte_stream_index,
431
7.77k
         codepage,
432
7.77k
         error ) != 1 )
433
0
    {
434
0
      libcerror_error_set(
435
0
       error,
436
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
437
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
438
0
       "%s: unable to copy Unicode character from byte stream.",
439
0
       function );
440
441
0
      return( -1 );
442
0
    }
443
7.77k
    if( utf8_unicode_character < byte_stream_unicode_character )
444
791
    {
445
791
      return( LIBUNA_COMPARE_LESS );
446
791
    }
447
6.98k
    else if( utf8_unicode_character > byte_stream_unicode_character )
448
3.07k
    {
449
3.07k
      return( LIBUNA_COMPARE_GREATER );
450
3.07k
    }
451
7.77k
  }
452
  /* Check if both strings were entirely processed
453
   */
454
954
  if( utf8_string_index < utf8_string_size )
455
522
  {
456
522
    return( LIBUNA_COMPARE_GREATER );
457
522
  }
458
432
  else if( byte_stream_index < byte_stream_size )
459
430
  {
460
430
    return( LIBUNA_COMPARE_LESS );
461
430
  }
462
2
  return( LIBUNA_COMPARE_EQUAL );
463
954
}
464
465
/* Determines the size of an UTF-8 string from an UTF-7 stream
466
 * Returns 1 if successful or -1 on error
467
 */
468
int libuna_utf8_string_size_from_utf7_stream(
469
     const uint8_t *utf7_stream,
470
     size_t utf7_stream_size,
471
     size_t *utf8_string_size,
472
     libcerror_error_t **error )
473
0
{
474
0
  static char *function                        = "libuna_utf8_string_size_from_utf7_stream";
475
0
  size_t utf7_stream_index                     = 0;
476
0
  libuna_unicode_character_t unicode_character = 0;
477
0
  uint32_t utf7_stream_base64_data             = 0;
478
479
0
  if( utf7_stream == NULL )
480
0
  {
481
0
    libcerror_error_set(
482
0
     error,
483
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
484
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
485
0
     "%s: invalid UTF-7 stream.",
486
0
     function );
487
488
0
    return( -1 );
489
0
  }
490
0
  if( utf7_stream_size > (size_t) SSIZE_MAX )
491
0
  {
492
0
    libcerror_error_set(
493
0
     error,
494
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
495
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
496
0
     "%s: invalid UTF-7 stream size value exceeds maximum.",
497
0
     function );
498
499
0
    return( -1 );
500
0
  }
501
0
  if( utf8_string_size == NULL )
502
0
  {
503
0
    libcerror_error_set(
504
0
     error,
505
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
506
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
507
0
     "%s: invalid UTF-8 string size.",
508
0
     function );
509
510
0
    return( -1 );
511
0
  }
512
0
  *utf8_string_size = 0;
513
514
0
  if( utf7_stream_size == 0 )
515
0
  {
516
0
    return( 1 );
517
0
  }
518
0
  while( utf7_stream_index < utf7_stream_size )
519
0
  {
520
    /* Convert the UTF-7 stream bytes into an Unicode character
521
     */
522
0
    if( libuna_unicode_character_copy_from_utf7_stream(
523
0
         &unicode_character,
524
0
         utf7_stream,
525
0
         utf7_stream_size,
526
0
         &utf7_stream_index,
527
0
         &utf7_stream_base64_data,
528
0
         error ) != 1 )
529
0
    {
530
0
      libcerror_error_set(
531
0
       error,
532
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
533
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
534
0
       "%s: unable to copy Unicode character from UTF-7 stream.",
535
0
       function );
536
537
0
      return( -1 );
538
0
    }
539
    /* Determine how many UTF-8 character bytes are required
540
     */
541
0
    if( libuna_unicode_character_size_to_utf8(
542
0
         unicode_character,
543
0
         utf8_string_size,
544
0
         error ) != 1 )
545
0
    {
546
0
      libcerror_error_set(
547
0
       error,
548
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
549
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
550
0
       "%s: unable to unable to determine size of Unicode character in UTF-8.",
551
0
       function );
552
553
0
      return( -1 );
554
0
    }
555
0
    if( unicode_character == 0 )
556
0
    {
557
0
      break;
558
0
    }
559
0
  }
560
  /* Check if the string is terminated with an end-of-string character
561
   */
562
0
  if( unicode_character != 0 )
563
0
  {
564
0
    *utf8_string_size += 1;
565
0
  }
566
0
  return( 1 );
567
0
}
568
569
/* Copies an UTF-8 string from an UTF-7 stream
570
 * Returns 1 if successful or -1 on error
571
 */
572
int libuna_utf8_string_copy_from_utf7_stream(
573
     libuna_utf8_character_t *utf8_string,
574
     size_t utf8_string_size,
575
     const uint8_t *utf7_stream,
576
     size_t utf7_stream_size,
577
     libcerror_error_t **error )
578
0
{
579
0
  static char *function    = "libuna_utf8_string_copy_from_utf7_stream";
580
0
  size_t utf8_string_index = 0;
581
582
0
  if( libuna_utf8_string_with_index_copy_from_utf7_stream(
583
0
       utf8_string,
584
0
       utf8_string_size,
585
0
       &utf8_string_index,
586
0
       utf7_stream,
587
0
       utf7_stream_size,
588
0
       error ) != 1 )
589
0
  {
590
0
    libcerror_error_set(
591
0
     error,
592
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
593
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
594
0
     "%s: unable to UTF-7 stream to UTF-8 string.",
595
0
     function );
596
597
0
    return( -1 );
598
0
  }
599
0
  return( 1 );
600
0
}
601
602
/* Copies an UTF-8 string from an UTF-7 stream
603
 * Returns 1 if successful or -1 on error
604
 */
605
int libuna_utf8_string_with_index_copy_from_utf7_stream(
606
     libuna_utf8_character_t *utf8_string,
607
     size_t utf8_string_size,
608
     size_t *utf8_string_index,
609
     const uint8_t *utf7_stream,
610
     size_t utf7_stream_size,
611
     libcerror_error_t **error )
612
0
{
613
0
  static char *function                        = "libuna_utf8_string_with_index_copy_from_utf7_stream";
614
0
  size_t utf7_stream_index                     = 0;
615
0
  libuna_unicode_character_t unicode_character = 0;
616
0
  uint32_t utf7_stream_base64_data             = 0;
617
618
0
  if( utf8_string == NULL )
619
0
  {
620
0
    libcerror_error_set(
621
0
     error,
622
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
623
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
624
0
     "%s: invalid UTF-8 string.",
625
0
     function );
626
627
0
    return( -1 );
628
0
  }
629
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
630
0
  {
631
0
    libcerror_error_set(
632
0
     error,
633
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
634
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
635
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
636
0
     function );
637
638
0
    return( -1 );
639
0
  }
640
0
  if( utf8_string_index == NULL )
641
0
  {
642
0
    libcerror_error_set(
643
0
     error,
644
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
645
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
646
0
     "%s: invalid UTF-8 string index.",
647
0
     function );
648
649
0
    return( -1 );
650
0
  }
651
0
  if( utf7_stream == NULL )
652
0
  {
653
0
    libcerror_error_set(
654
0
     error,
655
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
656
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
657
0
     "%s: invalid UTF-7 stream.",
658
0
     function );
659
660
0
    return( -1 );
661
0
  }
662
0
  if( utf7_stream_size > (size_t) SSIZE_MAX )
663
0
  {
664
0
    libcerror_error_set(
665
0
     error,
666
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
667
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
668
0
     "%s: invalid UTF-7 stream size value exceeds maximum.",
669
0
     function );
670
671
0
    return( -1 );
672
0
  }
673
0
  if( utf7_stream_size == 0 )
674
0
  {
675
0
    libcerror_error_set(
676
0
     error,
677
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
678
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
679
0
     "%s: missing UTF-7 stream value.",
680
0
     function );
681
682
0
    return( -1 );
683
0
  }
684
0
  while( utf7_stream_index < utf7_stream_size )
685
0
  {
686
    /* Convert the UTF-7 stream bytes into an Unicode character
687
     */
688
0
    if( libuna_unicode_character_copy_from_utf7_stream(
689
0
         &unicode_character,
690
0
         utf7_stream,
691
0
         utf7_stream_size,
692
0
         &utf7_stream_index,
693
0
         &utf7_stream_base64_data,
694
0
         error ) != 1 )
695
0
    {
696
0
      libcerror_error_set(
697
0
       error,
698
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
699
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
700
0
       "%s: unable to copy Unicode character from UTF-7 stream.",
701
0
       function );
702
703
0
      return( -1 );
704
0
    }
705
    /* Convert the Unicode character into UTF-8 character bytes
706
     */
707
0
    if( libuna_unicode_character_copy_to_utf8(
708
0
         unicode_character,
709
0
         utf8_string,
710
0
         utf8_string_size,
711
0
         utf8_string_index,
712
0
         error ) != 1 )
713
0
    {
714
0
      libcerror_error_set(
715
0
       error,
716
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
717
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
718
0
       "%s: unable to copy Unicode character to UTF-8.",
719
0
       function );
720
721
0
      return( -1 );
722
0
    }
723
0
    if( unicode_character == 0 )
724
0
    {
725
0
      break;
726
0
    }
727
0
  }
728
  /* Check if the string is terminated with an end-of-string character
729
   */
730
0
  if( unicode_character != 0 )
731
0
  {
732
0
    if( *utf8_string_index >= utf8_string_size )
733
0
    {
734
0
      libcerror_error_set(
735
0
       error,
736
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
737
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
738
0
       "%s: UTF-8 string too small.",
739
0
       function );
740
741
0
      return( -1 );
742
0
    }
743
0
    utf8_string[ *utf8_string_index ] = 0;
744
745
0
    *utf8_string_index += 1;
746
0
  }
747
0
  return( 1 );
748
0
}
749
750
/* Compares an UTF-8 string with an UTF-7 stream
751
 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
752
 */
753
int libuna_utf8_string_compare_with_utf7_stream(
754
     const libuna_utf8_character_t *utf8_string,
755
     size_t utf8_string_size,
756
     const uint8_t *utf7_stream,
757
     size_t utf7_stream_size,
758
     libcerror_error_t **error )
759
0
{
760
0
  static char *function                                    = "libuna_utf8_string_compare_with_utf7_stream";
761
0
  size_t utf7_stream_index                                 = 0;
762
0
  size_t utf8_string_index                                 = 0;
763
0
  libuna_unicode_character_t utf8_unicode_character        = 0;
764
0
  libuna_unicode_character_t utf7_stream_unicode_character = 0;
765
0
  uint32_t utf7_stream_base64_data                         = 0;
766
767
0
  if( utf8_string == NULL )
768
0
  {
769
0
    libcerror_error_set(
770
0
     error,
771
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
772
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
773
0
     "%s: invalid UTF-8 string.",
774
0
     function );
775
776
0
    return( -1 );
777
0
  }
778
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
779
0
  {
780
0
    libcerror_error_set(
781
0
     error,
782
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
783
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
784
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
785
0
     function );
786
787
0
    return( -1 );
788
0
  }
789
0
  if( utf7_stream == NULL )
790
0
  {
791
0
    libcerror_error_set(
792
0
     error,
793
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
794
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
795
0
     "%s: invalid UTF-7 stream.",
796
0
     function );
797
798
0
    return( -1 );
799
0
  }
800
0
  if( utf7_stream_size > (size_t) SSIZE_MAX )
801
0
  {
802
0
    libcerror_error_set(
803
0
     error,
804
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
805
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
806
0
     "%s: invalid UTF-7 stream size value exceeds maximum.",
807
0
     function );
808
809
0
    return( -1 );
810
0
  }
811
0
  if( utf7_stream_size == 0 )
812
0
  {
813
0
    libcerror_error_set(
814
0
     error,
815
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
816
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
817
0
     "%s: missing UTF-7 stream value.",
818
0
     function );
819
820
0
    return( -1 );
821
0
  }
822
0
  if( ( utf8_string_size >= 1 )
823
0
   && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
824
0
  {
825
0
    utf8_string_size -= 1;
826
0
  }
827
  /* Check if the UTF-7 stream is terminated with zero bytes
828
   */
829
0
  if( ( utf7_stream_size >= 1 )
830
0
   && ( utf7_stream[ utf7_stream_size - 1 ] == 0 ) )
831
0
  {
832
0
    utf7_stream_size -= 1;
833
0
  }
834
0
  while( ( utf8_string_index < utf8_string_size )
835
0
      && ( utf7_stream_index < utf7_stream_size ) )
836
0
  {
837
    /* Convert the UTF-8 character bytes into an Unicode character
838
     */
839
0
    if( libuna_unicode_character_copy_from_utf8(
840
0
         &utf8_unicode_character,
841
0
         utf8_string,
842
0
         utf8_string_size,
843
0
         &utf8_string_index,
844
0
         error ) != 1 )
845
0
    {
846
0
      libcerror_error_set(
847
0
       error,
848
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
849
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
850
0
       "%s: unable to copy Unicode character from UTF-8.",
851
0
       function );
852
853
0
      return( -1 );
854
0
    }
855
    /* Convert the UTF-7 character bytes into an Unicode character
856
     */
857
0
    if( libuna_unicode_character_copy_from_utf7_stream(
858
0
         &utf7_stream_unicode_character,
859
0
         utf7_stream,
860
0
         utf7_stream_size,
861
0
         &utf7_stream_index,
862
0
         &utf7_stream_base64_data,
863
0
                     error ) != 1 )
864
0
    {
865
0
      libcerror_error_set(
866
0
       error,
867
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
868
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
869
0
       "%s: unable to copy Unicode character from UTF-7 stream.",
870
0
       function );
871
872
0
      return( -1 );
873
0
    }
874
0
    if( utf8_unicode_character < utf7_stream_unicode_character )
875
0
    {
876
0
      return( LIBUNA_COMPARE_LESS );
877
0
    }
878
0
    else if( utf8_unicode_character > utf7_stream_unicode_character )
879
0
    {
880
0
      return( LIBUNA_COMPARE_GREATER );
881
0
    }
882
0
  }
883
  /* Check if both strings were entirely processed
884
   */
885
0
  if( utf8_string_index < utf8_string_size )
886
0
  {
887
0
    return( LIBUNA_COMPARE_GREATER );
888
0
  }
889
0
  else if( utf7_stream_index < utf7_stream_size )
890
0
  {
891
0
    return( LIBUNA_COMPARE_LESS );
892
0
  }
893
0
  return( LIBUNA_COMPARE_EQUAL );
894
0
}
895
896
/* Determines the size of an UTF-8 string from an UTF-8 stream
897
 * Returns 1 if successful or -1 on error
898
 */
899
int libuna_utf8_string_size_from_utf8_stream(
900
     const uint8_t *utf8_stream,
901
     size_t utf8_stream_size,
902
     size_t *utf8_string_size,
903
     libcerror_error_t **error )
904
2.30k
{
905
2.30k
  static char *function                        = "libuna_utf8_string_size_from_utf8_stream";
906
2.30k
  size_t utf8_stream_index                     = 0;
907
2.30k
  libuna_unicode_character_t unicode_character = 0;
908
909
2.30k
  if( utf8_stream == NULL )
910
0
  {
911
0
    libcerror_error_set(
912
0
     error,
913
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
914
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
915
0
     "%s: invalid UTF-8 stream.",
916
0
     function );
917
918
0
    return( -1 );
919
0
  }
920
2.30k
  if( utf8_stream_size > (size_t) SSIZE_MAX )
921
0
  {
922
0
    libcerror_error_set(
923
0
     error,
924
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
925
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
926
0
     "%s: invalid UTF-8 stream size value exceeds maximum.",
927
0
     function );
928
929
0
    return( -1 );
930
0
  }
931
2.30k
  if( utf8_string_size == NULL )
932
0
  {
933
0
    libcerror_error_set(
934
0
     error,
935
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
936
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
937
0
     "%s: invalid UTF-8 string size.",
938
0
     function );
939
940
0
    return( -1 );
941
0
  }
942
2.30k
  *utf8_string_size = 0;
943
944
2.30k
  if( utf8_stream_size == 0 )
945
15
  {
946
15
    return( 1 );
947
15
  }
948
  /* Check if UTF-8 stream starts with a byte order mark (BOM)
949
   */
950
2.28k
  if( utf8_stream_size >= 3 )
951
2.17k
  {
952
2.17k
    if( ( utf8_stream[ 0 ] == 0x0ef )
953
2.17k
     && ( utf8_stream[ 1 ] == 0x0bb )
954
2.17k
     && ( utf8_stream[ 2 ] == 0x0bf ) )
955
15
    {
956
15
      utf8_stream_index += 3;
957
15
    }
958
2.17k
  }
959
31.0k
  while( utf8_stream_index < utf8_stream_size )
960
30.7k
  {
961
    /* Convert the UTF-8 stream bytes into an Unicode character
962
     */
963
30.7k
    if( libuna_unicode_character_copy_from_utf8(
964
30.7k
         &unicode_character,
965
30.7k
         utf8_stream,
966
30.7k
         utf8_stream_size,
967
30.7k
         &utf8_stream_index,
968
30.7k
         error ) != 1 )
969
1.37k
    {
970
1.37k
      libcerror_error_set(
971
1.37k
       error,
972
1.37k
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
973
1.37k
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
974
1.37k
       "%s: unable to copy Unicode character from UTF-8 stream.",
975
1.37k
       function );
976
977
1.37k
      return( -1 );
978
1.37k
    }
979
    /* Determine how many UTF-8 character bytes are required
980
     */
981
29.3k
    if( libuna_unicode_character_size_to_utf8(
982
29.3k
         unicode_character,
983
29.3k
         utf8_string_size,
984
29.3k
         error ) != 1 )
985
0
    {
986
0
      libcerror_error_set(
987
0
       error,
988
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
989
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
990
0
       "%s: unable to unable to determine size of Unicode character in UTF-8.",
991
0
       function );
992
993
0
      return( -1 );
994
0
    }
995
29.3k
    if( unicode_character == 0 )
996
621
    {
997
621
      break;
998
621
    }
999
29.3k
  }
1000
  /* Check if the string is terminated with an end-of-string character
1001
   */
1002
911
  if( unicode_character != 0 )
1003
290
  {
1004
290
    *utf8_string_size += 1;
1005
290
  }
1006
911
  return( 1 );
1007
2.28k
}
1008
1009
/* Copies an UTF-8 string from an UTF-8 stream
1010
 * Returns 1 if successful or -1 on error
1011
 */
1012
int libuna_utf8_string_copy_from_utf8_stream(
1013
     libuna_utf8_character_t *utf8_string,
1014
     size_t utf8_string_size,
1015
     const uint8_t *utf8_stream,
1016
     size_t utf8_stream_size,
1017
     libcerror_error_t **error )
1018
2.30k
{
1019
2.30k
  static char *function    = "libuna_utf8_string_copy_from_utf8_stream";
1020
2.30k
  size_t utf8_string_index = 0;
1021
1022
2.30k
  if( libuna_utf8_string_with_index_copy_from_utf8_stream(
1023
2.30k
       utf8_string,
1024
2.30k
       utf8_string_size,
1025
2.30k
       &utf8_string_index,
1026
2.30k
       utf8_stream,
1027
2.30k
       utf8_stream_size,
1028
2.30k
       error ) != 1 )
1029
1.41k
  {
1030
1.41k
    libcerror_error_set(
1031
1.41k
     error,
1032
1.41k
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
1033
1.41k
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1034
1.41k
     "%s: unable to UTF-8 stream to UTF-8 string.",
1035
1.41k
     function );
1036
1037
1.41k
    return( -1 );
1038
1.41k
  }
1039
885
  return( 1 );
1040
2.30k
}
1041
1042
/* Copies an UTF-8 string from an UTF-8 stream
1043
 * Returns 1 if successful or -1 on error
1044
 */
1045
int libuna_utf8_string_with_index_copy_from_utf8_stream(
1046
     libuna_utf8_character_t *utf8_string,
1047
     size_t utf8_string_size,
1048
     size_t *utf8_string_index,
1049
     const uint8_t *utf8_stream,
1050
     size_t utf8_stream_size,
1051
     libcerror_error_t **error )
1052
2.30k
{
1053
2.30k
  static char *function                        = "libuna_utf8_string_with_index_copy_from_utf8_stream";
1054
2.30k
  size_t utf8_stream_index                     = 0;
1055
2.30k
  libuna_unicode_character_t unicode_character = 0;
1056
1057
2.30k
  if( utf8_string == NULL )
1058
0
  {
1059
0
    libcerror_error_set(
1060
0
     error,
1061
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1062
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1063
0
     "%s: invalid UTF-8 string.",
1064
0
     function );
1065
1066
0
    return( -1 );
1067
0
  }
1068
2.30k
  if( utf8_string_size > (size_t) SSIZE_MAX )
1069
0
  {
1070
0
    libcerror_error_set(
1071
0
     error,
1072
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1073
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1074
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
1075
0
     function );
1076
1077
0
    return( -1 );
1078
0
  }
1079
2.30k
  if( utf8_string_index == NULL )
1080
0
  {
1081
0
    libcerror_error_set(
1082
0
     error,
1083
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1084
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1085
0
     "%s: invalid UTF-8 string index.",
1086
0
     function );
1087
1088
0
    return( -1 );
1089
0
  }
1090
2.30k
  if( utf8_stream == NULL )
1091
0
  {
1092
0
    libcerror_error_set(
1093
0
     error,
1094
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1095
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1096
0
     "%s: invalid UTF-8 stream.",
1097
0
     function );
1098
1099
0
    return( -1 );
1100
0
  }
1101
2.30k
  if( utf8_stream_size > (size_t) SSIZE_MAX )
1102
0
  {
1103
0
    libcerror_error_set(
1104
0
     error,
1105
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1106
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1107
0
     "%s: invalid UTF-8 stream size value exceeds maximum.",
1108
0
     function );
1109
1110
0
    return( -1 );
1111
0
  }
1112
2.30k
  if( utf8_stream_size == 0 )
1113
15
  {
1114
15
    libcerror_error_set(
1115
15
     error,
1116
15
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1117
15
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1118
15
     "%s: missing UTF-8 stream value.",
1119
15
     function );
1120
1121
15
    return( -1 );
1122
15
  }
1123
  /* Check if UTF-8 stream starts with a byte order mark (BOM)
1124
   */
1125
2.28k
  if( utf8_stream_size >= 3 )
1126
2.17k
  {
1127
2.17k
    if( ( utf8_stream[ 0 ] == 0x0ef )
1128
2.17k
     && ( utf8_stream[ 1 ] == 0x0bb )
1129
2.17k
     && ( utf8_stream[ 2 ] == 0x0bf ) )
1130
15
    {
1131
15
      utf8_stream_index += 3;
1132
15
    }
1133
2.17k
  }
1134
18.6k
  while( utf8_stream_index < utf8_stream_size )
1135
18.3k
  {
1136
    /* Convert the UTF-8 stream bytes into an Unicode character
1137
     */
1138
18.3k
    if( libuna_unicode_character_copy_from_utf8(
1139
18.3k
         &unicode_character,
1140
18.3k
         utf8_stream,
1141
18.3k
         utf8_stream_size,
1142
18.3k
         &utf8_stream_index,
1143
18.3k
         error ) != 1 )
1144
1.25k
    {
1145
1.25k
      libcerror_error_set(
1146
1.25k
       error,
1147
1.25k
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1148
1.25k
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1149
1.25k
       "%s: unable to copy Unicode character from UTF-8 stream.",
1150
1.25k
       function );
1151
1152
1.25k
      return( -1 );
1153
1.25k
    }
1154
    /* Convert the Unicode character into UTF-8 character bytes
1155
     */
1156
17.0k
    if( libuna_unicode_character_copy_to_utf8(
1157
17.0k
         unicode_character,
1158
17.0k
         utf8_string,
1159
17.0k
         utf8_string_size,
1160
17.0k
         utf8_string_index,
1161
17.0k
         error ) != 1 )
1162
131
    {
1163
131
      libcerror_error_set(
1164
131
       error,
1165
131
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1166
131
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1167
131
       "%s: unable to copy Unicode character to UTF-8.",
1168
131
       function );
1169
1170
131
      return( -1 );
1171
131
    }
1172
16.9k
    if( unicode_character == 0 )
1173
611
    {
1174
611
      break;
1175
611
    }
1176
16.9k
  }
1177
  /* Check if the string is terminated with an end-of-string character
1178
   */
1179
898
  if( unicode_character != 0 )
1180
287
  {
1181
287
    if( *utf8_string_index >= utf8_string_size )
1182
13
    {
1183
13
      libcerror_error_set(
1184
13
       error,
1185
13
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1186
13
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1187
13
       "%s: UTF-8 string too small.",
1188
13
       function );
1189
1190
13
      return( -1 );
1191
13
    }
1192
274
    utf8_string[ *utf8_string_index ] = 0;
1193
1194
274
    *utf8_string_index += 1;
1195
274
  }
1196
885
  return( 1 );
1197
898
}
1198
1199
/* Compares an UTF-8 string with an UTF-8 stream
1200
 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
1201
 */
1202
int libuna_utf8_string_compare_with_utf8_stream(
1203
     const libuna_utf8_character_t *utf8_string,
1204
     size_t utf8_string_size,
1205
     const uint8_t *utf8_stream,
1206
     size_t utf8_stream_size,
1207
     libcerror_error_t **error )
1208
24.4k
{
1209
24.4k
  static char *function                                    = "libuna_utf8_string_compare_with_utf8_stream";
1210
24.4k
  size_t utf8_stream_index                                 = 0;
1211
24.4k
  size_t utf8_string_index                                 = 0;
1212
24.4k
  libuna_unicode_character_t utf8_unicode_character        = 0;
1213
24.4k
  libuna_unicode_character_t utf8_stream_unicode_character = 0;
1214
1215
24.4k
  if( utf8_string == NULL )
1216
0
  {
1217
0
    libcerror_error_set(
1218
0
     error,
1219
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1220
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1221
0
     "%s: invalid UTF-8 string.",
1222
0
     function );
1223
1224
0
    return( -1 );
1225
0
  }
1226
24.4k
  if( utf8_string_size > (size_t) SSIZE_MAX )
1227
0
  {
1228
0
    libcerror_error_set(
1229
0
     error,
1230
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1231
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1232
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
1233
0
     function );
1234
1235
0
    return( -1 );
1236
0
  }
1237
24.4k
  if( utf8_stream == NULL )
1238
0
  {
1239
0
    libcerror_error_set(
1240
0
     error,
1241
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1242
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1243
0
     "%s: invalid UTF-8 stream.",
1244
0
     function );
1245
1246
0
    return( -1 );
1247
0
  }
1248
24.4k
  if( utf8_stream_size > (size_t) SSIZE_MAX )
1249
0
  {
1250
0
    libcerror_error_set(
1251
0
     error,
1252
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1253
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1254
0
     "%s: invalid UTF-8 stream size value exceeds maximum.",
1255
0
     function );
1256
1257
0
    return( -1 );
1258
0
  }
1259
24.4k
  if( utf8_stream_size == 0 )
1260
53
  {
1261
53
    libcerror_error_set(
1262
53
     error,
1263
53
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1264
53
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1265
53
     "%s: missing UTF-8 stream value.",
1266
53
     function );
1267
1268
53
    return( -1 );
1269
53
  }
1270
  /* Check if UTF-8 stream starts with a byte order mark (BOM)
1271
   */
1272
24.3k
  if( utf8_stream_size >= 3 )
1273
19.2k
  {
1274
19.2k
    if( ( utf8_stream[ 0 ] == 0x0ef )
1275
19.2k
     && ( utf8_stream[ 1 ] == 0x0bb )
1276
19.2k
     && ( utf8_stream[ 2 ] == 0x0bf ) )
1277
240
    {
1278
240
      utf8_stream_index += 3;
1279
240
    }
1280
19.2k
  }
1281
24.3k
  if( ( utf8_string_size >= 1 )
1282
24.3k
   && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
1283
5.46k
  {
1284
5.46k
    utf8_string_size -= 1;
1285
5.46k
  }
1286
  /* Check if the UTF-8 stream is terminated with zero bytes
1287
   */
1288
24.3k
  if( ( utf8_stream_size >= 1 )
1289
24.3k
   && ( utf8_stream[ utf8_stream_size - 1 ] == 0 ) )
1290
14.1k
  {
1291
14.1k
    utf8_stream_size -= 1;
1292
14.1k
  }
1293
58.6k
  while( ( utf8_string_index < utf8_string_size )
1294
58.6k
      && ( utf8_stream_index < utf8_stream_size ) )
1295
51.3k
  {
1296
    /* Convert the UTF-8 character bytes into an Unicode character
1297
     */
1298
51.3k
    if( libuna_unicode_character_copy_from_utf8(
1299
51.3k
         &utf8_unicode_character,
1300
51.3k
         utf8_string,
1301
51.3k
         utf8_string_size,
1302
51.3k
         &utf8_string_index,
1303
51.3k
         error ) != 1 )
1304
0
    {
1305
0
      libcerror_error_set(
1306
0
       error,
1307
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1308
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1309
0
       "%s: unable to copy Unicode character from UTF-8.",
1310
0
       function );
1311
1312
0
      return( -1 );
1313
0
    }
1314
    /* Convert the UTF-8 character bytes into an Unicode character
1315
     */
1316
51.3k
    if( libuna_unicode_character_copy_from_utf8(
1317
51.3k
         &utf8_stream_unicode_character,
1318
51.3k
         utf8_stream,
1319
51.3k
         utf8_stream_size,
1320
51.3k
         &utf8_stream_index,
1321
51.3k
                     error ) != 1 )
1322
246
    {
1323
246
      libcerror_error_set(
1324
246
       error,
1325
246
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1326
246
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1327
246
       "%s: unable to copy Unicode character from UTF-8 stream.",
1328
246
       function );
1329
1330
246
      return( -1 );
1331
246
    }
1332
51.1k
    if( utf8_unicode_character < utf8_stream_unicode_character )
1333
6.65k
    {
1334
6.65k
      return( LIBUNA_COMPARE_LESS );
1335
6.65k
    }
1336
44.4k
    else if( utf8_unicode_character > utf8_stream_unicode_character )
1337
10.1k
    {
1338
10.1k
      return( LIBUNA_COMPARE_GREATER );
1339
10.1k
    }
1340
51.1k
  }
1341
  /* Check if both strings were entirely processed
1342
   */
1343
7.32k
  if( utf8_string_index < utf8_string_size )
1344
2.93k
  {
1345
2.93k
    return( LIBUNA_COMPARE_GREATER );
1346
2.93k
  }
1347
4.39k
  else if( utf8_stream_index < utf8_stream_size )
1348
521
  {
1349
521
    return( LIBUNA_COMPARE_LESS );
1350
521
  }
1351
3.87k
  return( LIBUNA_COMPARE_EQUAL );
1352
7.32k
}
1353
1354
/* Determines the size of an UTF-8 string from an UTF-16 string
1355
 * Returns 1 if successful or -1 on error
1356
 */
1357
int libuna_utf8_string_size_from_utf16(
1358
     const libuna_utf16_character_t *utf16_string,
1359
     size_t utf16_string_size,
1360
     size_t *utf8_string_size,
1361
     libcerror_error_t **error )
1362
0
{
1363
0
  static char *function                        = "libuna_utf8_string_size_from_utf16";
1364
0
  size_t utf16_string_index                    = 0;
1365
0
  libuna_unicode_character_t unicode_character = 0;
1366
1367
0
  if( utf16_string == NULL )
1368
0
  {
1369
0
    libcerror_error_set(
1370
0
     error,
1371
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1372
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1373
0
     "%s: invalid UTF-16 string.",
1374
0
     function );
1375
1376
0
    return( -1 );
1377
0
  }
1378
0
  if( utf16_string_size > (size_t) SSIZE_MAX )
1379
0
  {
1380
0
    libcerror_error_set(
1381
0
     error,
1382
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1383
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1384
0
     "%s: invalid UTF-16 string size value exceeds maximum.",
1385
0
     function );
1386
1387
0
    return( -1 );
1388
0
  }
1389
0
  if( utf8_string_size == NULL )
1390
0
  {
1391
0
    libcerror_error_set(
1392
0
     error,
1393
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1394
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1395
0
     "%s: invalid UTF-8 string size.",
1396
0
     function );
1397
1398
0
    return( -1 );
1399
0
  }
1400
0
  *utf8_string_size = 0;
1401
1402
0
  if( utf16_string_size == 0 )
1403
0
  {
1404
0
    return( 1 );
1405
0
  }
1406
0
  while( utf16_string_index < utf16_string_size )
1407
0
  {
1408
    /* Convert the UTF-16 character bytes into an Unicode character
1409
     */
1410
0
    if( libuna_unicode_character_copy_from_utf16(
1411
0
         &unicode_character,
1412
0
         utf16_string,
1413
0
         utf16_string_size,
1414
0
         &utf16_string_index,
1415
0
         error ) != 1 )
1416
0
    {
1417
0
      libcerror_error_set(
1418
0
       error,
1419
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1420
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1421
0
       "%s: unable to copy Unicode character from UTF-16.",
1422
0
       function );
1423
1424
0
      return( -1 );
1425
0
    }
1426
    /* Determine how many UTF-8 character bytes are required
1427
     */
1428
0
    if( libuna_unicode_character_size_to_utf8(
1429
0
         unicode_character,
1430
0
         utf8_string_size,
1431
0
         error ) != 1 )
1432
0
    {
1433
0
      libcerror_error_set(
1434
0
       error,
1435
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1436
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1437
0
       "%s: unable to unable to determine size of Unicode character in UTF-8.",
1438
0
       function );
1439
1440
0
      return( -1 );
1441
0
    }
1442
0
    if( unicode_character == 0 )
1443
0
    {
1444
0
      break;
1445
0
    }
1446
0
  }
1447
  /* Check if the string is terminated with an end-of-string character
1448
   */
1449
0
  if( unicode_character != 0 )
1450
0
  {
1451
0
    *utf8_string_size += 1;
1452
0
  }
1453
0
  return( 1 );
1454
0
}
1455
1456
/* Copies an UTF-8 string from an UTF-16 string
1457
 * Returns 1 if successful or -1 on error
1458
 */
1459
int libuna_utf8_string_copy_from_utf16(
1460
     libuna_utf8_character_t *utf8_string,
1461
     size_t utf8_string_size,
1462
     const libuna_utf16_character_t *utf16_string,
1463
     size_t utf16_string_size,
1464
     libcerror_error_t **error )
1465
0
{
1466
0
  static char *function    = "libuna_utf8_string_copy_from_utf16";
1467
0
  size_t utf8_string_index = 0;
1468
1469
0
  if( libuna_utf8_string_with_index_copy_from_utf16(
1470
0
       utf8_string,
1471
0
       utf8_string_size,
1472
0
       &utf8_string_index,
1473
0
       utf16_string,
1474
0
       utf16_string_size,
1475
0
       error ) != 1 )
1476
0
  {
1477
0
    libcerror_error_set(
1478
0
     error,
1479
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
1480
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1481
0
     "%s: unable to copy UTF-16 string to UTF-8 string.",
1482
0
     function );
1483
1484
0
    return( -1 );
1485
0
  }
1486
0
  return( 1 );
1487
0
}
1488
1489
/* Copies an UTF-8 string from an UTF-16 string
1490
 * Returns 1 if successful or -1 on error
1491
 */
1492
int libuna_utf8_string_with_index_copy_from_utf16(
1493
     libuna_utf8_character_t *utf8_string,
1494
     size_t utf8_string_size,
1495
     size_t *utf8_string_index,
1496
     const libuna_utf16_character_t *utf16_string,
1497
     size_t utf16_string_size,
1498
     libcerror_error_t **error )
1499
0
{
1500
0
  static char *function                        = "libuna_utf8_string_with_index_copy_from_utf16";
1501
0
  size_t utf16_string_index                    = 0;
1502
0
  libuna_unicode_character_t unicode_character = 0;
1503
1504
0
  if( utf8_string == NULL )
1505
0
  {
1506
0
    libcerror_error_set(
1507
0
     error,
1508
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1509
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1510
0
     "%s: invalid UTF-8 string.",
1511
0
     function );
1512
1513
0
    return( -1 );
1514
0
  }
1515
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
1516
0
  {
1517
0
    libcerror_error_set(
1518
0
     error,
1519
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1520
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1521
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
1522
0
     function );
1523
1524
0
    return( -1 );
1525
0
  }
1526
0
  if( utf8_string_index == NULL )
1527
0
  {
1528
0
    libcerror_error_set(
1529
0
     error,
1530
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1531
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1532
0
     "%s: invalid UTF-8 string index.",
1533
0
     function );
1534
1535
0
    return( -1 );
1536
0
  }
1537
0
  if( utf16_string == NULL )
1538
0
  {
1539
0
    libcerror_error_set(
1540
0
     error,
1541
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1542
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1543
0
     "%s: invalid UTF-16 string.",
1544
0
     function );
1545
1546
0
    return( -1 );
1547
0
  }
1548
0
  if( utf16_string_size > (size_t) SSIZE_MAX )
1549
0
  {
1550
0
    libcerror_error_set(
1551
0
     error,
1552
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1553
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1554
0
     "%s: invalid UTF-16 string size value exceeds maximum.",
1555
0
     function );
1556
1557
0
    return( -1 );
1558
0
  }
1559
0
  if( utf16_string_size == 0 )
1560
0
  {
1561
0
    libcerror_error_set(
1562
0
     error,
1563
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1564
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1565
0
     "%s: missing UTF-16 string value.",
1566
0
     function );
1567
1568
0
    return( -1 );
1569
0
  }
1570
0
  while( utf16_string_index < utf16_string_size )
1571
0
  {
1572
    /* Convert the UTF-16 character bytes into an Unicode character
1573
     */
1574
0
    if( libuna_unicode_character_copy_from_utf16(
1575
0
         &unicode_character,
1576
0
         utf16_string,
1577
0
         utf16_string_size,
1578
0
         &utf16_string_index,
1579
0
         error ) != 1 )
1580
0
    {
1581
0
      libcerror_error_set(
1582
0
       error,
1583
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1584
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1585
0
       "%s: unable to copy Unicode character from UTF-16.",
1586
0
       function );
1587
1588
0
      return( -1 );
1589
0
    }
1590
    /* Convert the Unicode character into UTF-8 character bytes
1591
     */
1592
0
    if( libuna_unicode_character_copy_to_utf8(
1593
0
         unicode_character,
1594
0
         utf8_string,
1595
0
         utf8_string_size,
1596
0
         utf8_string_index,
1597
0
         error ) != 1 )
1598
0
    {
1599
0
      libcerror_error_set(
1600
0
       error,
1601
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1602
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1603
0
       "%s: unable to copy Unicode character to UTF-8.",
1604
0
       function );
1605
1606
0
      return( -1 );
1607
0
    }
1608
0
    if( unicode_character == 0 )
1609
0
    {
1610
0
      break;
1611
0
    }
1612
0
  }
1613
  /* Check if the string is terminated with an end-of-string character
1614
   */
1615
0
  if( unicode_character != 0 )
1616
0
  {
1617
0
    if( *utf8_string_index >= utf8_string_size )
1618
0
    {
1619
0
      libcerror_error_set(
1620
0
       error,
1621
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1622
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1623
0
       "%s: UTF-8 string too small.",
1624
0
       function );
1625
1626
0
      return( -1 );
1627
0
    }
1628
0
    utf8_string[ *utf8_string_index ] = 0;
1629
1630
0
    *utf8_string_index += 1;
1631
0
  }
1632
0
  return( 1 );
1633
0
}
1634
1635
/* Compares an UTF-8 string with an UTF-16 string
1636
 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
1637
 */
1638
int libuna_utf8_string_compare_with_utf16(
1639
     const libuna_utf8_character_t *utf8_string,
1640
     size_t utf8_string_size,
1641
     const libuna_utf16_character_t *utf16_string,
1642
     size_t utf16_string_size,
1643
     libcerror_error_t **error )
1644
0
{
1645
0
  static char *function                              = "libuna_utf8_string_compare_with_utf16";
1646
0
  size_t utf16_string_index                          = 0;
1647
0
  size_t utf8_string_index                           = 0;
1648
0
  libuna_unicode_character_t utf8_unicode_character  = 0;
1649
0
  libuna_unicode_character_t utf16_unicode_character = 0;
1650
1651
0
  if( utf8_string == NULL )
1652
0
  {
1653
0
    libcerror_error_set(
1654
0
     error,
1655
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1656
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1657
0
     "%s: invalid UTF-8 string.",
1658
0
     function );
1659
1660
0
    return( -1 );
1661
0
  }
1662
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
1663
0
  {
1664
0
    libcerror_error_set(
1665
0
     error,
1666
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1667
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1668
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
1669
0
     function );
1670
1671
0
    return( -1 );
1672
0
  }
1673
0
  if( utf16_string == NULL )
1674
0
  {
1675
0
    libcerror_error_set(
1676
0
     error,
1677
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1678
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1679
0
     "%s: invalid UTF-16 string.",
1680
0
     function );
1681
1682
0
    return( -1 );
1683
0
  }
1684
0
  if( utf16_string_size > (size_t) SSIZE_MAX )
1685
0
  {
1686
0
    libcerror_error_set(
1687
0
     error,
1688
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1689
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1690
0
     "%s: invalid UTF-16 string size value exceeds maximum.",
1691
0
     function );
1692
1693
0
    return( -1 );
1694
0
  }
1695
0
  if( utf16_string_size == 0 )
1696
0
  {
1697
0
    libcerror_error_set(
1698
0
     error,
1699
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1700
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
1701
0
     "%s: missing UTF-16 string value.",
1702
0
     function );
1703
1704
0
    return( -1 );
1705
0
  }
1706
0
  if( ( utf8_string_size >= 1 )
1707
0
   && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
1708
0
  {
1709
0
    utf8_string_size -= 1;
1710
0
  }
1711
0
  if( ( utf16_string_size >= 1 )
1712
0
   && ( utf16_string[ utf16_string_size - 1 ] == 0 ) )
1713
0
  {
1714
0
    utf16_string_size -= 1;
1715
0
  }
1716
0
  while( ( utf8_string_index < utf8_string_size )
1717
0
      && ( utf16_string_index < utf16_string_size ) )
1718
0
  {
1719
    /* Convert the UTF-8 character bytes into an Unicode character
1720
     */
1721
0
    if( libuna_unicode_character_copy_from_utf8(
1722
0
         &utf8_unicode_character,
1723
0
         utf8_string,
1724
0
         utf8_string_size,
1725
0
         &utf8_string_index,
1726
0
         error ) != 1 )
1727
0
    {
1728
0
      libcerror_error_set(
1729
0
       error,
1730
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1731
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
1732
0
       "%s: unable to copy Unicode character from UTF-8.",
1733
0
       function );
1734
1735
0
      return( -1 );
1736
0
    }
1737
    /* Convert the UTF-16 character bytes into an Unicode character
1738
     */
1739
0
    if( libuna_unicode_character_copy_from_utf16(
1740
0
         &utf16_unicode_character,
1741
0
         utf16_string,
1742
0
         utf16_string_size,
1743
0
         &utf16_string_index,
1744
0
                     error ) != 1 )
1745
0
    {
1746
0
      libcerror_error_set(
1747
0
       error,
1748
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1749
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1750
0
       "%s: unable to copy Unicode character from UTF-16.",
1751
0
       function );
1752
1753
0
      return( -1 );
1754
0
    }
1755
0
    if( utf8_unicode_character < utf16_unicode_character )
1756
0
    {
1757
0
      return( LIBUNA_COMPARE_LESS );
1758
0
    }
1759
0
    else if( utf8_unicode_character > utf16_unicode_character )
1760
0
    {
1761
0
      return( LIBUNA_COMPARE_GREATER );
1762
0
    }
1763
0
  }
1764
  /* Check if both strings were entirely processed
1765
   */
1766
0
  if( utf8_string_index < utf8_string_size )
1767
0
  {
1768
0
    return( LIBUNA_COMPARE_GREATER );
1769
0
  }
1770
0
  else if( utf16_string_index < utf16_string_size )
1771
0
  {
1772
0
    return( LIBUNA_COMPARE_LESS );
1773
0
  }
1774
0
  return( LIBUNA_COMPARE_EQUAL );
1775
0
}
1776
1777
/* Determines the size of an UTF-8 string from an UTF-16 stream
1778
 * Returns 1 if successful or -1 on error
1779
 */
1780
int libuna_utf8_string_size_from_utf16_stream(
1781
     const uint8_t *utf16_stream,
1782
     size_t utf16_stream_size,
1783
     int byte_order,
1784
     size_t *utf8_string_size,
1785
     libcerror_error_t **error )
1786
120k
{
1787
120k
  static char *function                        = "libuna_utf8_string_size_from_utf16_stream";
1788
120k
  libuna_unicode_character_t unicode_character = 0;
1789
120k
  size_t utf16_stream_index                    = 0;
1790
120k
  int read_byte_order                          = 0;
1791
120k
  int result                                   = 0;
1792
1793
120k
  if( utf16_stream == NULL )
1794
0
  {
1795
0
    libcerror_error_set(
1796
0
     error,
1797
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1798
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1799
0
     "%s: invalid UTF-16 stream.",
1800
0
     function );
1801
1802
0
    return( -1 );
1803
0
  }
1804
120k
  if( utf16_stream_size > (size_t) SSIZE_MAX )
1805
0
  {
1806
0
    libcerror_error_set(
1807
0
     error,
1808
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1809
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1810
0
     "%s: invalid UTF-16 stream size value exceeds maximum.",
1811
0
     function );
1812
1813
0
    return( -1 );
1814
0
  }
1815
120k
  if( ( utf16_stream_size % 2 ) != 0 )
1816
12
  {
1817
12
    libcerror_error_set(
1818
12
     error,
1819
12
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1820
12
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
1821
12
     "%s: missing UTF-16 stream value.",
1822
12
     function );
1823
1824
12
    return( -1 );
1825
12
  }
1826
120k
  if( utf8_string_size == NULL )
1827
0
  {
1828
0
    libcerror_error_set(
1829
0
     error,
1830
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1831
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1832
0
     "%s: invalid UTF-8 string size.",
1833
0
     function );
1834
1835
0
    return( -1 );
1836
0
  }
1837
120k
  *utf8_string_size = 0;
1838
1839
120k
  if( utf16_stream_size == 0 )
1840
1
  {
1841
1
    return( 1 );
1842
1
  }
1843
  /* Check if UTF-16 stream is in big or little endian
1844
   */
1845
120k
  if( utf16_stream_size >= 2 )
1846
120k
  {
1847
120k
    if( ( utf16_stream[ 0 ] == 0xfe )
1848
120k
     && ( utf16_stream[ 1 ] == 0xff ) )
1849
846
    {
1850
846
      read_byte_order    = LIBUNA_ENDIAN_BIG;
1851
846
      utf16_stream_index = 2;
1852
846
    }
1853
119k
    else if( ( utf16_stream[ 0 ] == 0xff )
1854
119k
          && ( utf16_stream[ 1 ] == 0xfe ) )
1855
1.57k
    {
1856
1.57k
      read_byte_order    = LIBUNA_ENDIAN_LITTLE;
1857
1.57k
      utf16_stream_index = 2;
1858
1.57k
    }
1859
120k
    if( byte_order == 0 )
1860
0
    {
1861
0
      byte_order = read_byte_order;
1862
0
    }
1863
120k
  }
1864
3.93M
  while( ( utf16_stream_index + 1 ) < utf16_stream_size )
1865
3.83M
  {
1866
    /* Convert the UTF-16 stream bytes into an Unicode character
1867
     */
1868
3.83M
    if( libuna_unicode_character_copy_from_utf16_stream(
1869
3.83M
         &unicode_character,
1870
3.83M
         utf16_stream,
1871
3.83M
         utf16_stream_size,
1872
3.83M
         &utf16_stream_index,
1873
3.83M
         byte_order,
1874
3.83M
         error ) != 1 )
1875
200
    {
1876
200
      libcerror_error_set(
1877
200
       error,
1878
200
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1879
200
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1880
200
       "%s: unable to copy Unicode character from UTF-16 stream.",
1881
200
       function );
1882
1883
200
      return( -1 );
1884
200
    }
1885
    /* Determine how many UTF-8 character bytes are required
1886
     */
1887
3.82M
    if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
1888
3.82M
    {
1889
3.82M
      result = libuna_unicode_character_size_to_utf8(
1890
3.82M
                unicode_character,
1891
3.82M
                utf8_string_size,
1892
3.82M
                error );
1893
3.82M
    }
1894
0
    else
1895
0
    {
1896
0
      result = libuna_unicode_character_size_to_utf8_rfc2279(
1897
0
                unicode_character,
1898
0
                utf8_string_size,
1899
0
                error );
1900
0
    }
1901
3.82M
    if( result != 1 )
1902
0
    {
1903
0
      libcerror_error_set(
1904
0
       error,
1905
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
1906
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
1907
0
       "%s: unable to unable to determine size of Unicode character in UTF-8.",
1908
0
       function );
1909
1910
0
      return( -1 );
1911
0
    }
1912
3.82M
    if( unicode_character == 0 )
1913
10.4k
    {
1914
10.4k
      break;
1915
10.4k
    }
1916
3.82M
  }
1917
  /* Check if the string is terminated with an end-of-string character
1918
   */
1919
119k
  if( unicode_character != 0 )
1920
109k
  {
1921
109k
    *utf8_string_size += 1;
1922
109k
  }
1923
119k
  return( 1 );
1924
120k
}
1925
1926
/* Copies an UTF-8 string from an UTF-16 stream
1927
 * Returns 1 if successful or -1 on error
1928
 */
1929
int libuna_utf8_string_copy_from_utf16_stream(
1930
     libuna_utf8_character_t *utf8_string,
1931
     size_t utf8_string_size,
1932
     const uint8_t *utf16_stream,
1933
     size_t utf16_stream_size,
1934
     int byte_order,
1935
     libcerror_error_t **error )
1936
12.9k
{
1937
12.9k
  static char *function    = "libuna_utf8_string_copy_from_utf16_stream";
1938
12.9k
  size_t utf8_string_index = 0;
1939
1940
12.9k
  if( libuna_utf8_string_with_index_copy_from_utf16_stream(
1941
12.9k
       utf8_string,
1942
12.9k
       utf8_string_size,
1943
12.9k
       &utf8_string_index,
1944
12.9k
       utf16_stream,
1945
12.9k
       utf16_stream_size,
1946
12.9k
       byte_order,
1947
12.9k
       error ) != 1 )
1948
0
  {
1949
0
    libcerror_error_set(
1950
0
     error,
1951
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
1952
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
1953
0
     "%s: unable to copy UTF-16 stream to UTF-8 string.",
1954
0
     function );
1955
1956
0
    return( -1 );
1957
0
  }
1958
12.9k
  return( 1 );
1959
12.9k
}
1960
1961
/* Copies an UTF-8 string from an UTF-16 stream
1962
 * Returns 1 if successful or -1 on error
1963
 */
1964
int libuna_utf8_string_with_index_copy_from_utf16_stream(
1965
     libuna_utf8_character_t *utf8_string,
1966
     size_t utf8_string_size,
1967
     size_t *utf8_string_index,
1968
     const uint8_t *utf16_stream,
1969
     size_t utf16_stream_size,
1970
     int byte_order,
1971
     libcerror_error_t **error )
1972
104k
{
1973
104k
  static char *function                        = "libuna_utf8_string_with_index_copy_from_utf16_stream";
1974
104k
  libuna_unicode_character_t unicode_character = 0;
1975
104k
  size_t utf16_stream_index                    = 0;
1976
104k
  int read_byte_order                          = 0;
1977
104k
  int result                                   = 0;
1978
1979
104k
  if( utf8_string == NULL )
1980
0
  {
1981
0
    libcerror_error_set(
1982
0
     error,
1983
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1984
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
1985
0
     "%s: invalid UTF-8 string.",
1986
0
     function );
1987
1988
0
    return( -1 );
1989
0
  }
1990
104k
  if( utf8_string_size > (size_t) SSIZE_MAX )
1991
0
  {
1992
0
    libcerror_error_set(
1993
0
     error,
1994
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
1995
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
1996
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
1997
0
     function );
1998
1999
0
    return( -1 );
2000
0
  }
2001
104k
  if( utf8_string_index == NULL )
2002
0
  {
2003
0
    libcerror_error_set(
2004
0
     error,
2005
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2006
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2007
0
     "%s: invalid UTF-8 string index.",
2008
0
     function );
2009
2010
0
    return( -1 );
2011
0
  }
2012
104k
  if( utf16_stream == NULL )
2013
0
  {
2014
0
    libcerror_error_set(
2015
0
     error,
2016
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2017
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2018
0
     "%s: invalid UTF-16 stream.",
2019
0
     function );
2020
2021
0
    return( -1 );
2022
0
  }
2023
104k
  if( utf16_stream_size > (size_t) SSIZE_MAX )
2024
0
  {
2025
0
    libcerror_error_set(
2026
0
     error,
2027
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2028
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2029
0
     "%s: invalid UTF-16 stream size value exceeds maximum.",
2030
0
     function );
2031
2032
0
    return( -1 );
2033
0
  }
2034
104k
  if( ( utf16_stream_size == 0 )
2035
104k
   || ( ( utf16_stream_size % 2 ) != 0 ) )
2036
0
  {
2037
0
    libcerror_error_set(
2038
0
     error,
2039
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2040
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2041
0
     "%s: missing UTF-16 stream value.",
2042
0
     function );
2043
2044
0
    return( -1 );
2045
0
  }
2046
  /* Check if UTF-16 stream is in big or little endian
2047
   */
2048
104k
  if( utf16_stream_size >= 2 )
2049
104k
  {
2050
104k
    if( ( utf16_stream[ 0 ] == 0xfe )
2051
104k
     && ( utf16_stream[ 1 ] == 0xff ) )
2052
507
    {
2053
507
      read_byte_order    = LIBUNA_ENDIAN_BIG;
2054
507
      utf16_stream_index = 2;
2055
507
    }
2056
104k
    else if( ( utf16_stream[ 0 ] == 0xff )
2057
104k
          && ( utf16_stream[ 1 ] == 0xfe ) )
2058
1.19k
    {
2059
1.19k
      read_byte_order    = LIBUNA_ENDIAN_LITTLE;
2060
1.19k
      utf16_stream_index = 2;
2061
1.19k
    }
2062
104k
    if( byte_order == 0 )
2063
0
    {
2064
0
      byte_order = read_byte_order;
2065
0
    }
2066
104k
  }
2067
3.77M
  while( ( utf16_stream_index + 1 ) < utf16_stream_size )
2068
3.67M
  {
2069
    /* Convert the UTF-16 stream bytes into an Unicode character
2070
     */
2071
3.67M
    if( libuna_unicode_character_copy_from_utf16_stream(
2072
3.67M
         &unicode_character,
2073
3.67M
         utf16_stream,
2074
3.67M
         utf16_stream_size,
2075
3.67M
         &utf16_stream_index,
2076
3.67M
         byte_order,
2077
3.67M
         error ) != 1 )
2078
0
    {
2079
0
      libcerror_error_set(
2080
0
       error,
2081
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2082
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2083
0
       "%s: unable to copy Unicode character from UTF-16 stream.",
2084
0
       function );
2085
2086
0
      return( -1 );
2087
0
    }
2088
    /* Convert the Unicode character into UTF-8 character bytes
2089
     */
2090
3.67M
    if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
2091
3.67M
    {
2092
3.67M
      result = libuna_unicode_character_copy_to_utf8(
2093
3.67M
                unicode_character,
2094
3.67M
                utf8_string,
2095
3.67M
                utf8_string_size,
2096
3.67M
                utf8_string_index,
2097
3.67M
                error );
2098
3.67M
    }
2099
0
    else
2100
0
    {
2101
0
      result = libuna_unicode_character_copy_to_utf8_rfc2279(
2102
0
                unicode_character,
2103
0
                utf8_string,
2104
0
                utf8_string_size,
2105
0
                utf8_string_index,
2106
0
                error );
2107
0
    }
2108
3.67M
    if( result != 1 )
2109
0
    {
2110
0
      libcerror_error_set(
2111
0
       error,
2112
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2113
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2114
0
       "%s: unable to copy Unicode character to UTF-8.",
2115
0
       function );
2116
2117
0
      return( -1 );
2118
0
    }
2119
3.67M
    if( unicode_character == 0 )
2120
4.95k
    {
2121
4.95k
      break;
2122
4.95k
    }
2123
3.67M
  }
2124
  /* Check if the string is terminated with an end-of-string character
2125
   */
2126
104k
  if( unicode_character != 0 )
2127
99.6k
  {
2128
99.6k
    if( *utf8_string_index >= utf8_string_size )
2129
0
    {
2130
0
      libcerror_error_set(
2131
0
       error,
2132
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2133
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2134
0
       "%s: UTF-8 string too small.",
2135
0
       function );
2136
2137
0
      return( -1 );
2138
0
    }
2139
99.6k
    utf8_string[ *utf8_string_index ] = 0;
2140
2141
99.6k
    *utf8_string_index += 1;
2142
99.6k
  }
2143
104k
  return( 1 );
2144
104k
}
2145
2146
/* Compares an UTF-8 string with an UTF-16 stream
2147
 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
2148
 */
2149
int libuna_utf8_string_compare_with_utf16_stream(
2150
     const libuna_utf8_character_t *utf8_string,
2151
     size_t utf8_string_size,
2152
     const uint8_t *utf16_stream,
2153
     size_t utf16_stream_size,
2154
     int byte_order,
2155
     libcerror_error_t **error )
2156
2.16k
{
2157
2.16k
  static char *function                                     = "libuna_utf8_string_compare_with_utf16_stream";
2158
2.16k
  libuna_unicode_character_t utf16_stream_unicode_character = 0;
2159
2.16k
  libuna_unicode_character_t utf8_unicode_character         = 0;
2160
2.16k
  size_t utf16_stream_index                                 = 0;
2161
2.16k
  size_t utf8_string_index                                  = 0;
2162
2.16k
  int read_byte_order                                       = 0;
2163
2.16k
  int result                                                = 0;
2164
2165
2.16k
  if( utf8_string == NULL )
2166
0
  {
2167
0
    libcerror_error_set(
2168
0
     error,
2169
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2170
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2171
0
     "%s: invalid UTF-8 string.",
2172
0
     function );
2173
2174
0
    return( -1 );
2175
0
  }
2176
2.16k
  if( utf8_string_size > (size_t) SSIZE_MAX )
2177
0
  {
2178
0
    libcerror_error_set(
2179
0
     error,
2180
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2181
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2182
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
2183
0
     function );
2184
2185
0
    return( -1 );
2186
0
  }
2187
2.16k
  if( utf16_stream == NULL )
2188
0
  {
2189
0
    libcerror_error_set(
2190
0
     error,
2191
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2192
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2193
0
     "%s: invalid UTF-16 stream.",
2194
0
     function );
2195
2196
0
    return( -1 );
2197
0
  }
2198
2.16k
  if( utf16_stream_size > (size_t) SSIZE_MAX )
2199
0
  {
2200
0
    libcerror_error_set(
2201
0
     error,
2202
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2203
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2204
0
     "%s: invalid UTF-16 stream size value exceeds maximum.",
2205
0
     function );
2206
2207
0
    return( -1 );
2208
0
  }
2209
2.16k
  if( ( utf16_stream_size == 0 )
2210
2.16k
   || ( ( utf16_stream_size % 2 ) != 0 ) )
2211
3
  {
2212
3
    libcerror_error_set(
2213
3
     error,
2214
3
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2215
3
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2216
3
     "%s: missing UTF-16 stream value.",
2217
3
     function );
2218
2219
3
    return( -1 );
2220
3
  }
2221
  /* Check if UTF-16 stream is in big or little endian
2222
   */
2223
2.16k
  if( utf16_stream_size >= 2 )
2224
2.16k
  {
2225
2.16k
    if( ( utf16_stream[ 0 ] == 0xfe )
2226
2.16k
     && ( utf16_stream[ 1 ] == 0xff ) )
2227
151
    {
2228
151
      read_byte_order    = LIBUNA_ENDIAN_BIG;
2229
151
      utf16_stream_index = 2;
2230
151
    }
2231
2.01k
    else if( ( utf16_stream[ 0 ] == 0xff )
2232
2.01k
          && ( utf16_stream[ 1 ] == 0xfe ) )
2233
112
    {
2234
112
      read_byte_order    = LIBUNA_ENDIAN_LITTLE;
2235
112
      utf16_stream_index = 2;
2236
112
    }
2237
2.16k
    if( byte_order == 0 )
2238
0
    {
2239
0
      byte_order = read_byte_order;
2240
0
    }
2241
2.16k
  }
2242
2.16k
  if( ( utf8_string_size >= 1 )
2243
2.16k
   && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
2244
590
  {
2245
590
    utf8_string_size -= 1;
2246
590
  }
2247
  /* Check if the UTF-16 stream is terminated with zero bytes
2248
   */
2249
2.16k
  if( ( utf16_stream_size >= 2 )
2250
2.16k
   && ( utf16_stream[ utf16_stream_size - 2 ] == 0 )
2251
2.16k
   && ( utf16_stream[ utf16_stream_size - 1 ] == 0 ) )
2252
843
  {
2253
843
    utf16_stream_size -= 2;
2254
843
  }
2255
2.88k
  while( ( utf8_string_index < utf8_string_size )
2256
2.88k
      && ( utf16_stream_index < utf16_stream_size ) )
2257
2.74k
  {
2258
    /* Convert the UTF-8 character bytes into an Unicode character
2259
     */
2260
2.74k
    if( ( byte_order & LIBUNA_UTF16_STREAM_ALLOW_UNPAIRED_SURROGATE ) == 0 )
2261
2.74k
    {
2262
2.74k
      result = libuna_unicode_character_copy_from_utf8(
2263
2.74k
                &utf8_unicode_character,
2264
2.74k
                utf8_string,
2265
2.74k
                utf8_string_size,
2266
2.74k
                &utf8_string_index,
2267
2.74k
                error );
2268
2.74k
    }
2269
0
    else
2270
0
    {
2271
0
      result = libuna_unicode_character_copy_from_utf8_rfc2279(
2272
0
                &utf8_unicode_character,
2273
0
                utf8_string,
2274
0
                utf8_string_size,
2275
0
                &utf8_string_index,
2276
0
                error );
2277
0
    }
2278
2.74k
    if( result != 1 )
2279
0
    {
2280
0
      libcerror_error_set(
2281
0
       error,
2282
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2283
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2284
0
       "%s: unable to copy Unicode character from UTF-8.",
2285
0
       function );
2286
2287
0
      return( -1 );
2288
0
    }
2289
    /* Convert the UTF-16 stream bytes into an Unicode character
2290
     */
2291
2.74k
    if( libuna_unicode_character_copy_from_utf16_stream(
2292
2.74k
         &utf16_stream_unicode_character,
2293
2.74k
         utf16_stream,
2294
2.74k
         utf16_stream_size,
2295
2.74k
         &utf16_stream_index,
2296
2.74k
         byte_order,
2297
2.74k
                     error ) != 1 )
2298
115
    {
2299
115
      libcerror_error_set(
2300
115
       error,
2301
115
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2302
115
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2303
115
       "%s: unable to copy Unicode character from UTF-16 stream.",
2304
115
       function );
2305
2306
115
      return( -1 );
2307
115
    }
2308
2.63k
    if( utf8_unicode_character < utf16_stream_unicode_character )
2309
1.32k
    {
2310
1.32k
      return( LIBUNA_COMPARE_LESS );
2311
1.32k
    }
2312
1.30k
    else if( utf8_unicode_character > utf16_stream_unicode_character )
2313
580
    {
2314
580
      return( LIBUNA_COMPARE_GREATER );
2315
580
    }
2316
2.63k
  }
2317
  /* Check if both strings were entirely processed
2318
   */
2319
142
  if( utf8_string_index < utf8_string_size )
2320
142
  {
2321
142
    return( LIBUNA_COMPARE_GREATER );
2322
142
  }
2323
0
  else if( utf16_stream_index < utf16_stream_size )
2324
0
  {
2325
0
    return( LIBUNA_COMPARE_LESS );
2326
0
  }
2327
0
  return( LIBUNA_COMPARE_EQUAL );
2328
142
}
2329
2330
/* Determines the size of an UTF-8 string from an UTF-32 string
2331
 * Returns 1 if successful or -1 on error
2332
 */
2333
int libuna_utf8_string_size_from_utf32(
2334
     const libuna_utf32_character_t *utf32_string,
2335
     size_t utf32_string_size,
2336
     size_t *utf8_string_size,
2337
     libcerror_error_t **error )
2338
0
{
2339
0
  static char *function                        = "libuna_utf8_string_size_from_utf32";
2340
0
  size_t utf32_string_index                    = 0;
2341
0
  libuna_unicode_character_t unicode_character = 0;
2342
2343
0
  if( utf32_string == NULL )
2344
0
  {
2345
0
    libcerror_error_set(
2346
0
     error,
2347
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2348
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2349
0
     "%s: invalid UTF-32 string.",
2350
0
     function );
2351
2352
0
    return( -1 );
2353
0
  }
2354
0
  if( utf32_string_size > (size_t) SSIZE_MAX )
2355
0
  {
2356
0
    libcerror_error_set(
2357
0
     error,
2358
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2359
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2360
0
     "%s: invalid UTF-32 string size value exceeds maximum.",
2361
0
     function );
2362
2363
0
    return( -1 );
2364
0
  }
2365
0
  if( utf8_string_size == NULL )
2366
0
  {
2367
0
    libcerror_error_set(
2368
0
     error,
2369
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2370
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2371
0
     "%s: invalid UTF-8 string size.",
2372
0
     function );
2373
2374
0
    return( -1 );
2375
0
  }
2376
0
  *utf8_string_size = 0;
2377
2378
0
  if( utf32_string_size == 0 )
2379
0
  {
2380
0
    return( 1 );
2381
0
  }
2382
0
  while( utf32_string_index < utf32_string_size )
2383
0
  {
2384
    /* Convert the UTF-32 character bytes into an Unicode character
2385
     */
2386
0
    if( libuna_unicode_character_copy_from_utf32(
2387
0
         &unicode_character,
2388
0
         utf32_string,
2389
0
         utf32_string_size,
2390
0
         &utf32_string_index,
2391
0
         error ) != 1 )
2392
0
    {
2393
0
      libcerror_error_set(
2394
0
       error,
2395
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2396
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2397
0
       "%s: unable to copy Unicode character from UTF-32.",
2398
0
       function );
2399
2400
0
      return( -1 );
2401
0
    }
2402
    /* Determine how many UTF-8 character bytes are required
2403
     */
2404
0
    if( libuna_unicode_character_size_to_utf8(
2405
0
         unicode_character,
2406
0
         utf8_string_size,
2407
0
         error ) != 1 )
2408
0
    {
2409
0
      libcerror_error_set(
2410
0
       error,
2411
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2412
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2413
0
       "%s: unable to unable to determine size of Unicode character in UTF-8.",
2414
0
       function );
2415
2416
0
      return( -1 );
2417
0
    }
2418
0
    if( unicode_character == 0 )
2419
0
    {
2420
0
      break;
2421
0
    }
2422
0
  }
2423
  /* Check if the string is terminated with an end-of-string character
2424
   */
2425
0
  if( unicode_character != 0 )
2426
0
  {
2427
0
    *utf8_string_size += 1;
2428
0
  }
2429
0
  return( 1 );
2430
0
}
2431
2432
/* Copies an UTF-8 string from an UTF-32 string
2433
 * Returns 1 if successful or -1 on error
2434
 */
2435
int libuna_utf8_string_copy_from_utf32(
2436
     libuna_utf8_character_t *utf8_string,
2437
     size_t utf8_string_size,
2438
     const libuna_utf32_character_t *utf32_string,
2439
     size_t utf32_string_size,
2440
     libcerror_error_t **error )
2441
0
{
2442
0
  static char *function    = "libuna_utf8_string_copy_from_utf32";
2443
0
  size_t utf8_string_index = 0;
2444
2445
0
  if( libuna_utf8_string_with_index_copy_from_utf32(
2446
0
       utf8_string,
2447
0
       utf8_string_size,
2448
0
       &utf8_string_index,
2449
0
       utf32_string,
2450
0
       utf32_string_size,
2451
0
       error ) != 1 )
2452
0
  {
2453
0
    libcerror_error_set(
2454
0
     error,
2455
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2456
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2457
0
     "%s: unable to copy UTF-32 string to UTF-8 string.",
2458
0
     function );
2459
2460
0
    return( -1 );
2461
0
  }
2462
0
  return( 1 );
2463
0
}
2464
2465
/* Copies an UTF-8 string from an UTF-32 string
2466
 * Returns 1 if successful or -1 on error
2467
 */
2468
int libuna_utf8_string_with_index_copy_from_utf32(
2469
     libuna_utf8_character_t *utf8_string,
2470
     size_t utf8_string_size,
2471
     size_t *utf8_string_index,
2472
     const libuna_utf32_character_t *utf32_string,
2473
     size_t utf32_string_size,
2474
     libcerror_error_t **error )
2475
0
{
2476
0
  static char *function                        = "libuna_utf8_string_with_index_copy_from_utf32";
2477
0
  size_t utf32_string_index                    = 0;
2478
0
  libuna_unicode_character_t unicode_character = 0;
2479
2480
0
  if( utf8_string == NULL )
2481
0
  {
2482
0
    libcerror_error_set(
2483
0
     error,
2484
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2485
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2486
0
     "%s: invalid UTF-8 string.",
2487
0
     function );
2488
2489
0
    return( -1 );
2490
0
  }
2491
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
2492
0
  {
2493
0
    libcerror_error_set(
2494
0
     error,
2495
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2496
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2497
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
2498
0
     function );
2499
2500
0
    return( -1 );
2501
0
  }
2502
0
  if( utf8_string_index == NULL )
2503
0
  {
2504
0
    libcerror_error_set(
2505
0
     error,
2506
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2507
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2508
0
     "%s: invalid UTF-8 string index.",
2509
0
     function );
2510
2511
0
    return( -1 );
2512
0
  }
2513
0
  if( utf32_string == NULL )
2514
0
  {
2515
0
    libcerror_error_set(
2516
0
     error,
2517
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2518
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2519
0
     "%s: invalid UTF-32 string.",
2520
0
     function );
2521
2522
0
    return( -1 );
2523
0
  }
2524
0
  if( utf32_string_size > (size_t) SSIZE_MAX )
2525
0
  {
2526
0
    libcerror_error_set(
2527
0
     error,
2528
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2529
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2530
0
     "%s: invalid UTF-32 string size value exceeds maximum.",
2531
0
     function );
2532
2533
0
    return( -1 );
2534
0
  }
2535
0
  if( utf32_string_size == 0 )
2536
0
  {
2537
0
    libcerror_error_set(
2538
0
     error,
2539
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2540
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
2541
0
     "%s: missing UTF-32 string value.",
2542
0
     function );
2543
2544
0
    return( -1 );
2545
0
  }
2546
0
  while( utf32_string_index < utf32_string_size )
2547
0
  {
2548
    /* Convert the UTF-32 character bytes into an Unicode character
2549
     */
2550
0
    if( libuna_unicode_character_copy_from_utf32(
2551
0
         &unicode_character,
2552
0
         utf32_string,
2553
0
         utf32_string_size,
2554
0
         &utf32_string_index,
2555
0
         error ) != 1 )
2556
0
    {
2557
0
      libcerror_error_set(
2558
0
       error,
2559
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2560
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2561
0
       "%s: unable to copy Unicode character from UTF-32.",
2562
0
       function );
2563
2564
0
      return( -1 );
2565
0
    }
2566
    /* Convert the Unicode character into UTF-8 character bytes
2567
     */
2568
0
    if( libuna_unicode_character_copy_to_utf8(
2569
0
         unicode_character,
2570
0
         utf8_string,
2571
0
         utf8_string_size,
2572
0
         utf8_string_index,
2573
0
         error ) != 1 )
2574
0
    {
2575
0
      libcerror_error_set(
2576
0
       error,
2577
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2578
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2579
0
       "%s: unable to copy Unicode character to UTF-8.",
2580
0
       function );
2581
2582
0
      return( -1 );
2583
0
    }
2584
0
    if( unicode_character == 0 )
2585
0
    {
2586
0
      break;
2587
0
    }
2588
0
  }
2589
  /* Check if the string is terminated with an end-of-string character
2590
   */
2591
0
  if( unicode_character != 0 )
2592
0
  {
2593
0
    if( *utf8_string_index >= utf8_string_size )
2594
0
    {
2595
0
      libcerror_error_set(
2596
0
       error,
2597
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2598
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2599
0
       "%s: UTF-8 string too small.",
2600
0
       function );
2601
2602
0
      return( -1 );
2603
0
    }
2604
0
    utf8_string[ *utf8_string_index ] = 0;
2605
2606
0
    *utf8_string_index += 1;
2607
0
  }
2608
0
  return( 1 );
2609
0
}
2610
2611
/* Compares an UTF-8 string with an UTF-32 string
2612
 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
2613
 */
2614
int libuna_utf8_string_compare_with_utf32(
2615
     const libuna_utf8_character_t *utf8_string,
2616
     size_t utf8_string_size,
2617
     const libuna_utf32_character_t *utf32_string,
2618
     size_t utf32_string_size,
2619
     libcerror_error_t **error )
2620
0
{
2621
0
  static char *function                              = "libuna_utf8_string_compare_with_utf32";
2622
0
  size_t utf32_string_index                          = 0;
2623
0
  size_t utf8_string_index                           = 0;
2624
0
  libuna_unicode_character_t utf8_unicode_character  = 0;
2625
0
  libuna_unicode_character_t utf32_unicode_character = 0;
2626
2627
0
  if( utf8_string == NULL )
2628
0
  {
2629
0
    libcerror_error_set(
2630
0
     error,
2631
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2632
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2633
0
     "%s: invalid UTF-8 string.",
2634
0
     function );
2635
2636
0
    return( -1 );
2637
0
  }
2638
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
2639
0
  {
2640
0
    libcerror_error_set(
2641
0
     error,
2642
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2643
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2644
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
2645
0
     function );
2646
2647
0
    return( -1 );
2648
0
  }
2649
0
  if( utf32_string == NULL )
2650
0
  {
2651
0
    libcerror_error_set(
2652
0
     error,
2653
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2654
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2655
0
     "%s: invalid UTF-32 string.",
2656
0
     function );
2657
2658
0
    return( -1 );
2659
0
  }
2660
0
  if( utf32_string_size > (size_t) SSIZE_MAX )
2661
0
  {
2662
0
    libcerror_error_set(
2663
0
     error,
2664
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2665
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2666
0
     "%s: invalid UTF-32 string size value exceeds maximum.",
2667
0
     function );
2668
2669
0
    return( -1 );
2670
0
  }
2671
0
  if( utf32_string_size == 0 )
2672
0
  {
2673
0
    libcerror_error_set(
2674
0
     error,
2675
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2676
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
2677
0
     "%s: missing UTF-32 string value.",
2678
0
     function );
2679
2680
0
    return( -1 );
2681
0
  }
2682
0
  if( ( utf8_string_size >= 1 )
2683
0
   && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
2684
0
  {
2685
0
    utf8_string_size -= 1;
2686
0
  }
2687
0
  if( ( utf32_string_size >= 1 )
2688
0
   && ( utf32_string[ utf32_string_size - 1 ] == 0 ) )
2689
0
  {
2690
0
    utf32_string_size -= 1;
2691
0
  }
2692
0
  while( ( utf8_string_index < utf8_string_size )
2693
0
      && ( utf32_string_index < utf32_string_size ) )
2694
0
  {
2695
    /* Convert the UTF-8 character bytes into an Unicode character
2696
     */
2697
0
    if( libuna_unicode_character_copy_from_utf8(
2698
0
         &utf8_unicode_character,
2699
0
         utf8_string,
2700
0
         utf8_string_size,
2701
0
         &utf8_string_index,
2702
0
         error ) != 1 )
2703
0
    {
2704
0
      libcerror_error_set(
2705
0
       error,
2706
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2707
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
2708
0
       "%s: unable to copy Unicode character from UTF-8.",
2709
0
       function );
2710
2711
0
      return( -1 );
2712
0
    }
2713
    /* Convert the UTF-32 character bytes into an Unicode character
2714
     */
2715
0
    if( libuna_unicode_character_copy_from_utf32(
2716
0
         &utf32_unicode_character,
2717
0
         utf32_string,
2718
0
         utf32_string_size,
2719
0
         &utf32_string_index,
2720
0
                     error ) != 1 )
2721
0
    {
2722
0
      libcerror_error_set(
2723
0
       error,
2724
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2725
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2726
0
       "%s: unable to copy Unicode character from UTF-32.",
2727
0
       function );
2728
2729
0
      return( -1 );
2730
0
    }
2731
0
    if( utf8_unicode_character < utf32_unicode_character )
2732
0
    {
2733
0
      return( LIBUNA_COMPARE_LESS );
2734
0
    }
2735
0
    else if( utf8_unicode_character > utf32_unicode_character )
2736
0
    {
2737
0
      return( LIBUNA_COMPARE_GREATER );
2738
0
    }
2739
0
  }
2740
  /* Check if both strings were entirely processed
2741
   */
2742
0
  if( utf8_string_index < utf8_string_size )
2743
0
  {
2744
0
    return( LIBUNA_COMPARE_GREATER );
2745
0
  }
2746
0
  else if( utf32_string_index < utf32_string_size )
2747
0
  {
2748
0
    return( LIBUNA_COMPARE_LESS );
2749
0
  }
2750
0
  return( LIBUNA_COMPARE_EQUAL );
2751
0
}
2752
2753
/* Determines the size of an UTF-8 string from an UTF-32 stream
2754
 * Returns 1 if successful or -1 on error
2755
 */
2756
int libuna_utf8_string_size_from_utf32_stream(
2757
     const uint8_t *utf32_stream,
2758
     size_t utf32_stream_size,
2759
     int byte_order,
2760
     size_t *utf8_string_size,
2761
     libcerror_error_t **error )
2762
0
{
2763
0
  static char *function                        = "libuna_utf8_string_size_from_utf32_stream";
2764
0
  size_t utf32_stream_index                    = 0;
2765
0
  libuna_unicode_character_t unicode_character = 0;
2766
0
  int read_byte_order                          = 0;
2767
2768
0
  if( utf32_stream == NULL )
2769
0
  {
2770
0
    libcerror_error_set(
2771
0
     error,
2772
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2773
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2774
0
     "%s: invalid UTF-32 stream.",
2775
0
     function );
2776
2777
0
    return( -1 );
2778
0
  }
2779
0
  if( utf32_stream_size > (size_t) SSIZE_MAX )
2780
0
  {
2781
0
    libcerror_error_set(
2782
0
     error,
2783
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2784
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2785
0
     "%s: invalid UTF-32 stream size value exceeds maximum.",
2786
0
     function );
2787
2788
0
    return( -1 );
2789
0
  }
2790
0
  if( ( utf32_stream_size % 4 ) != 0 )
2791
0
  {
2792
0
    libcerror_error_set(
2793
0
     error,
2794
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2795
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
2796
0
     "%s: missing UTF-32 stream value.",
2797
0
     function );
2798
2799
0
    return( -1 );
2800
0
  }
2801
0
  if( utf8_string_size == NULL )
2802
0
  {
2803
0
    libcerror_error_set(
2804
0
     error,
2805
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2806
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2807
0
     "%s: invalid UTF-8 string size.",
2808
0
     function );
2809
2810
0
    return( -1 );
2811
0
  }
2812
0
  *utf8_string_size = 0;
2813
2814
0
  if( utf32_stream_size == 0 )
2815
0
  {
2816
0
    return( 1 );
2817
0
  }
2818
  /* Check if UTF-32 stream is in big or little endian
2819
   */
2820
0
  if( utf32_stream_size >= 4 )
2821
0
  {
2822
0
    if( ( utf32_stream[ 0 ] == 0x00 )
2823
0
     && ( utf32_stream[ 1 ] == 0x00 )
2824
0
     && ( utf32_stream[ 2 ] == 0xfe )
2825
0
     && ( utf32_stream[ 3 ] == 0xff ) )
2826
0
    {
2827
0
      read_byte_order    = LIBUNA_ENDIAN_BIG;
2828
0
      utf32_stream_index = 4;
2829
0
    }
2830
0
    else if( ( utf32_stream[ 0 ] == 0xff )
2831
0
          && ( utf32_stream[ 1 ] == 0xfe )
2832
0
          && ( utf32_stream[ 2 ] == 0x00 )
2833
0
          && ( utf32_stream[ 3 ] == 0x00 ) )
2834
0
    {
2835
0
      read_byte_order    = LIBUNA_ENDIAN_LITTLE;
2836
0
      utf32_stream_index = 4;
2837
0
    }
2838
0
    if( byte_order == 0 )
2839
0
    {
2840
0
      byte_order = read_byte_order;
2841
0
    }
2842
0
  }
2843
0
  while( ( utf32_stream_index + 3 ) < utf32_stream_size )
2844
0
  {
2845
    /* Convert the UTF-32 stream bytes into an Unicode character
2846
     */
2847
0
    if( libuna_unicode_character_copy_from_utf32_stream(
2848
0
         &unicode_character,
2849
0
         utf32_stream,
2850
0
         utf32_stream_size,
2851
0
         &utf32_stream_index,
2852
0
         byte_order,
2853
0
         error ) != 1 )
2854
0
    {
2855
0
      libcerror_error_set(
2856
0
       error,
2857
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2858
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2859
0
       "%s: unable to copy Unicode character from UTF-32 stream.",
2860
0
       function );
2861
2862
0
      return( -1 );
2863
0
    }
2864
    /* Determine how many UTF-8 character bytes are required
2865
     */
2866
0
    if( libuna_unicode_character_size_to_utf8(
2867
0
         unicode_character,
2868
0
         utf8_string_size,
2869
0
         error ) != 1 )
2870
0
    {
2871
0
      libcerror_error_set(
2872
0
       error,
2873
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
2874
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
2875
0
       "%s: unable to unable to determine size of Unicode character in UTF-8.",
2876
0
       function );
2877
2878
0
      return( -1 );
2879
0
    }
2880
0
    if( unicode_character == 0 )
2881
0
    {
2882
0
      break;
2883
0
    }
2884
0
  }
2885
  /* Check if the string is terminated with an end-of-string character
2886
   */
2887
0
  if( unicode_character != 0 )
2888
0
  {
2889
0
    *utf8_string_size += 1;
2890
0
  }
2891
0
  return( 1 );
2892
0
}
2893
2894
/* Copies an UTF-8 string from an UTF-32 stream
2895
 * Returns 1 if successful or -1 on error
2896
 */
2897
int libuna_utf8_string_copy_from_utf32_stream(
2898
     libuna_utf8_character_t *utf8_string,
2899
     size_t utf8_string_size,
2900
     const uint8_t *utf32_stream,
2901
     size_t utf32_stream_size,
2902
     int byte_order,
2903
     libcerror_error_t **error )
2904
0
{
2905
0
  static char *function    = "libuna_utf8_string_copy_from_utf32_stream";
2906
0
  size_t utf8_string_index = 0;
2907
2908
0
  if( libuna_utf8_string_with_index_copy_from_utf32_stream(
2909
0
       utf8_string,
2910
0
       utf8_string_size,
2911
0
       &utf8_string_index,
2912
0
       utf32_stream,
2913
0
       utf32_stream_size,
2914
0
       byte_order,
2915
0
       error ) != 1 )
2916
0
  {
2917
0
    libcerror_error_set(
2918
0
     error,
2919
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
2920
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
2921
0
     "%s: unable to copy UTF-32 stream to UTF-8 string.",
2922
0
     function );
2923
2924
0
    return( -1 );
2925
0
  }
2926
0
  return( 1 );
2927
0
}
2928
2929
/* Copies an UTF-8 string from an UTF-32 stream
2930
 * Returns 1 if successful or -1 on error
2931
 */
2932
int libuna_utf8_string_with_index_copy_from_utf32_stream(
2933
     libuna_utf8_character_t *utf8_string,
2934
     size_t utf8_string_size,
2935
     size_t *utf8_string_index,
2936
     const uint8_t *utf32_stream,
2937
     size_t utf32_stream_size,
2938
     int byte_order,
2939
     libcerror_error_t **error )
2940
0
{
2941
0
  static char *function                        = "libuna_utf8_string_with_index_copy_from_utf32_stream";
2942
0
  size_t utf32_stream_index                    = 0;
2943
0
  libuna_unicode_character_t unicode_character = 0;
2944
0
  int read_byte_order                          = 0;
2945
2946
0
  if( utf8_string == NULL )
2947
0
  {
2948
0
    libcerror_error_set(
2949
0
     error,
2950
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2951
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2952
0
     "%s: invalid UTF-8 string.",
2953
0
     function );
2954
2955
0
    return( -1 );
2956
0
  }
2957
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
2958
0
  {
2959
0
    libcerror_error_set(
2960
0
     error,
2961
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2962
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2963
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
2964
0
     function );
2965
2966
0
    return( -1 );
2967
0
  }
2968
0
  if( utf8_string_index == NULL )
2969
0
  {
2970
0
    libcerror_error_set(
2971
0
     error,
2972
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2973
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2974
0
     "%s: invalid UTF-8 string index.",
2975
0
     function );
2976
2977
0
    return( -1 );
2978
0
  }
2979
0
  if( utf32_stream == NULL )
2980
0
  {
2981
0
    libcerror_error_set(
2982
0
     error,
2983
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2984
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
2985
0
     "%s: invalid UTF-32 stream.",
2986
0
     function );
2987
2988
0
    return( -1 );
2989
0
  }
2990
0
  if( utf32_stream_size > (size_t) SSIZE_MAX )
2991
0
  {
2992
0
    libcerror_error_set(
2993
0
     error,
2994
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
2995
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
2996
0
     "%s: invalid UTF-32 stream size value exceeds maximum.",
2997
0
     function );
2998
2999
0
    return( -1 );
3000
0
  }
3001
0
  if( ( utf32_stream_size == 0 )
3002
0
   || ( ( utf32_stream_size % 4 ) != 0 ) )
3003
0
  {
3004
0
    libcerror_error_set(
3005
0
     error,
3006
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3007
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3008
0
     "%s: missing UTF-32 stream value.",
3009
0
     function );
3010
3011
0
    return( -1 );
3012
0
  }
3013
  /* Check if UTF-32 stream is in big or little endian
3014
   */
3015
0
  if( utf32_stream_size >= 4 )
3016
0
  {
3017
0
    if( ( utf32_stream[ 0 ] == 0x00 )
3018
0
     && ( utf32_stream[ 1 ] == 0x00 )
3019
0
     && ( utf32_stream[ 2 ] == 0xfe )
3020
0
     && ( utf32_stream[ 3 ] == 0xff ) )
3021
0
    {
3022
0
      read_byte_order    = LIBUNA_ENDIAN_BIG;
3023
0
      utf32_stream_index = 4;
3024
0
    }
3025
0
    else if( ( utf32_stream[ 0 ] == 0xff )
3026
0
          && ( utf32_stream[ 1 ] == 0xfe )
3027
0
          && ( utf32_stream[ 2 ] == 0x00 )
3028
0
          && ( utf32_stream[ 3 ] == 0x00 ) )
3029
0
    {
3030
0
      read_byte_order    = LIBUNA_ENDIAN_LITTLE;
3031
0
      utf32_stream_index = 4;
3032
0
    }
3033
0
    if( byte_order == 0 )
3034
0
    {
3035
0
      byte_order = read_byte_order;
3036
0
    }
3037
0
  }
3038
0
  while( ( utf32_stream_index + 3 ) < utf32_stream_size )
3039
0
  {
3040
    /* Convert the UTF-32 stream bytes into an Unicode character
3041
     */
3042
0
    if( libuna_unicode_character_copy_from_utf32_stream(
3043
0
         &unicode_character,
3044
0
         utf32_stream,
3045
0
         utf32_stream_size,
3046
0
         &utf32_stream_index,
3047
0
         byte_order,
3048
0
         error ) != 1 )
3049
0
    {
3050
0
      libcerror_error_set(
3051
0
       error,
3052
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
3053
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
3054
0
       "%s: unable to copy Unicode character from UTF-32 stream.",
3055
0
       function );
3056
3057
0
      return( -1 );
3058
0
    }
3059
    /* Convert the Unicode character into UTF-8 character bytes
3060
     */
3061
0
    if( libuna_unicode_character_copy_to_utf8(
3062
0
         unicode_character,
3063
0
         utf8_string,
3064
0
         utf8_string_size,
3065
0
         utf8_string_index,
3066
0
         error ) != 1 )
3067
0
    {
3068
0
      libcerror_error_set(
3069
0
       error,
3070
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
3071
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3072
0
       "%s: unable to copy Unicode character to UTF-8.",
3073
0
       function );
3074
3075
0
      return( -1 );
3076
0
    }
3077
0
    if( unicode_character == 0 )
3078
0
    {
3079
0
      break;
3080
0
    }
3081
0
  }
3082
  /* Check if the string is terminated with an end-of-string character
3083
   */
3084
0
  if( unicode_character != 0 )
3085
0
  {
3086
0
    if( *utf8_string_index >= utf8_string_size )
3087
0
    {
3088
0
      libcerror_error_set(
3089
0
       error,
3090
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3091
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3092
0
       "%s: UTF-8 string too small.",
3093
0
       function );
3094
3095
0
      return( -1 );
3096
0
    }
3097
0
    utf8_string[ *utf8_string_index ] = 0;
3098
3099
0
    *utf8_string_index += 1;
3100
0
  }
3101
0
  return( 1 );
3102
0
}
3103
3104
/* Compares an UTF-8 string with an UTF-32 stream
3105
 * Returns LIBUNA_COMPARE_LESS, LIBUNA_COMPARE_EQUAL, LIBUNA_COMPARE_GREATER if successful or -1 on error
3106
 */
3107
int libuna_utf8_string_compare_with_utf32_stream(
3108
     const libuna_utf8_character_t *utf8_string,
3109
     size_t utf8_string_size,
3110
     const uint8_t *utf32_stream,
3111
     size_t utf32_stream_size,
3112
     int byte_order,
3113
     libcerror_error_t **error )
3114
0
{
3115
0
  static char *function                                     = "libuna_utf8_string_compare_with_utf32_stream";
3116
0
  size_t utf32_stream_index                                 = 0;
3117
0
  size_t utf8_string_index                                  = 0;
3118
0
  libuna_unicode_character_t utf8_unicode_character         = 0;
3119
0
  libuna_unicode_character_t utf32_stream_unicode_character = 0;
3120
0
  int read_byte_order                                       = 0;
3121
3122
0
  if( utf8_string == NULL )
3123
0
  {
3124
0
    libcerror_error_set(
3125
0
     error,
3126
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3127
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3128
0
     "%s: invalid UTF-8 string.",
3129
0
     function );
3130
3131
0
    return( -1 );
3132
0
  }
3133
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
3134
0
  {
3135
0
    libcerror_error_set(
3136
0
     error,
3137
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3138
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3139
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
3140
0
     function );
3141
3142
0
    return( -1 );
3143
0
  }
3144
0
  if( utf32_stream == NULL )
3145
0
  {
3146
0
    libcerror_error_set(
3147
0
     error,
3148
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3149
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3150
0
     "%s: invalid UTF-32 stream.",
3151
0
     function );
3152
3153
0
    return( -1 );
3154
0
  }
3155
0
  if( utf32_stream_size > (size_t) SSIZE_MAX )
3156
0
  {
3157
0
    libcerror_error_set(
3158
0
     error,
3159
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3160
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3161
0
     "%s: invalid UTF-32 stream size value exceeds maximum.",
3162
0
     function );
3163
3164
0
    return( -1 );
3165
0
  }
3166
0
  if( ( utf32_stream_size == 0 )
3167
0
   || ( ( utf32_stream_size % 4 ) != 0 ) )
3168
0
  {
3169
0
    libcerror_error_set(
3170
0
     error,
3171
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3172
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3173
0
     "%s: missing UTF-32 stream value.",
3174
0
     function );
3175
3176
0
    return( -1 );
3177
0
  }
3178
  /* Check if UTF-32 stream is in big or little endian
3179
   */
3180
0
  if( utf32_stream_size >= 4 )
3181
0
  {
3182
0
    if( ( utf32_stream[ 0 ] == 0x00 )
3183
0
     && ( utf32_stream[ 1 ] == 0x00 )
3184
0
     && ( utf32_stream[ 2 ] == 0xfe )
3185
0
     && ( utf32_stream[ 3 ] == 0xff ) )
3186
0
    {
3187
0
      read_byte_order    = LIBUNA_ENDIAN_BIG;
3188
0
      utf32_stream_index = 4;
3189
0
    }
3190
0
    else if( ( utf32_stream[ 0 ] == 0xff )
3191
0
          && ( utf32_stream[ 1 ] == 0xfe )
3192
0
          && ( utf32_stream[ 2 ] == 0x00 )
3193
0
          && ( utf32_stream[ 3 ] == 0x00 ) )
3194
0
    {
3195
0
      read_byte_order    = LIBUNA_ENDIAN_LITTLE;
3196
0
      utf32_stream_index = 4;
3197
0
    }
3198
0
    if( byte_order == 0 )
3199
0
    {
3200
0
      byte_order = read_byte_order;
3201
0
    }
3202
0
  }
3203
0
  if( ( utf8_string_size >= 1 )
3204
0
   && ( utf8_string[ utf8_string_size - 1 ] == 0 ) )
3205
0
  {
3206
0
    utf8_string_size -= 1;
3207
0
  }
3208
  /* Check if the UTF-32 stream is terminated with zero bytes
3209
   */
3210
0
  if( ( utf32_stream_size >= 4 )
3211
0
   && ( utf32_stream[ utf32_stream_size - 4 ] == 0 )
3212
0
   && ( utf32_stream[ utf32_stream_size - 3 ] == 0 )
3213
0
   && ( utf32_stream[ utf32_stream_size - 2 ] == 0 )
3214
0
   && ( utf32_stream[ utf32_stream_size - 1 ] == 0 ) )
3215
0
  {
3216
0
    utf32_stream_size -= 1;
3217
0
  }
3218
0
  while( ( utf8_string_index < utf8_string_size )
3219
0
      && ( utf32_stream_index < utf32_stream_size ) )
3220
0
  {
3221
    /* Convert the UTF-8 character bytes into an Unicode character
3222
     */
3223
0
    if( libuna_unicode_character_copy_from_utf8(
3224
0
         &utf8_unicode_character,
3225
0
         utf8_string,
3226
0
         utf8_string_size,
3227
0
         &utf8_string_index,
3228
0
         error ) != 1 )
3229
0
    {
3230
0
      libcerror_error_set(
3231
0
       error,
3232
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
3233
0
       LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
3234
0
       "%s: unable to copy Unicode character from UTF-8.",
3235
0
       function );
3236
3237
0
      return( -1 );
3238
0
    }
3239
    /* Convert the UTF-32 stream bytes into an Unicode character
3240
     */
3241
0
    if( libuna_unicode_character_copy_from_utf32_stream(
3242
0
         &utf32_stream_unicode_character,
3243
0
         utf32_stream,
3244
0
         utf32_stream_size,
3245
0
         &utf32_stream_index,
3246
0
         byte_order,
3247
0
                     error ) != 1 )
3248
0
    {
3249
0
      libcerror_error_set(
3250
0
       error,
3251
0
       LIBCERROR_ERROR_DOMAIN_CONVERSION,
3252
0
       LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
3253
0
       "%s: unable to copy Unicode character from UTF-32 stream.",
3254
0
       function );
3255
3256
0
      return( -1 );
3257
0
    }
3258
0
    if( utf8_unicode_character < utf32_stream_unicode_character )
3259
0
    {
3260
0
      return( LIBUNA_COMPARE_LESS );
3261
0
    }
3262
0
    else if( utf8_unicode_character > utf32_stream_unicode_character )
3263
0
    {
3264
0
      return( LIBUNA_COMPARE_GREATER );
3265
0
    }
3266
0
  }
3267
  /* Check if both strings were entirely processed
3268
   */
3269
0
  if( utf8_string_index < utf8_string_size )
3270
0
  {
3271
0
    return( LIBUNA_COMPARE_GREATER );
3272
0
  }
3273
0
  else if( utf32_stream_index < utf32_stream_size )
3274
0
  {
3275
0
    return( LIBUNA_COMPARE_LESS );
3276
0
  }
3277
0
  return( LIBUNA_COMPARE_EQUAL );
3278
0
}
3279
3280
/* Determines the size of an UTF-8 string from a Standard Compression Scheme for Unicode (SCSU) stream
3281
 * Returns 1 if successful or -1 on error
3282
 */
3283
int libuna_utf8_string_size_from_scsu_stream(
3284
     const uint8_t *scsu_stream,
3285
     size_t scsu_stream_size,
3286
     size_t *utf8_string_size,
3287
     libcerror_error_t **error )
3288
0
{
3289
0
  uint32_t scsu_dynamic_window_positions[ 8 ] = {
3290
0
    0x0080, 0x00c0, 0x0400, 0x0600, 0x0900, 0x3040, 0x30a0, 0xff00 };
3291
3292
0
  static char *function                        = "libuna_utf8_string_size_from_scsu_stream";
3293
0
  libuna_unicode_character_t unicode_character = 0;
3294
0
  size_t scsu_stream_index                     = 0;
3295
0
  uint32_t scsu_window_position                = 0;
3296
0
  uint8_t byte_value1                          = 0;
3297
0
  uint8_t byte_value2                          = 0;
3298
0
  uint8_t byte_value3                          = 0;
3299
0
  uint8_t dynamic_window_position_index        = 0;
3300
0
  uint8_t in_unicode_mode                      = 0;
3301
0
  uint8_t unicode_character_set                = 0;
3302
3303
0
  if( scsu_stream == NULL )
3304
0
  {
3305
0
    libcerror_error_set(
3306
0
     error,
3307
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3308
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3309
0
     "%s: invalid SCSU stream.",
3310
0
     function );
3311
3312
0
    return( -1 );
3313
0
  }
3314
0
  if( scsu_stream_size > (size_t) SSIZE_MAX )
3315
0
  {
3316
0
    libcerror_error_set(
3317
0
     error,
3318
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3319
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3320
0
     "%s: invalid SCSU stream size value exceeds maximum.",
3321
0
     function );
3322
3323
0
    return( -1 );
3324
0
  }
3325
0
  if( utf8_string_size == NULL )
3326
0
  {
3327
0
    libcerror_error_set(
3328
0
     error,
3329
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3330
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3331
0
     "%s: invalid UTF-8 string size.",
3332
0
     function );
3333
3334
0
    return( -1 );
3335
0
  }
3336
0
  *utf8_string_size = 0;
3337
3338
0
  if( scsu_stream_size == 0 )
3339
0
  {
3340
0
    return( 1 );
3341
0
  }
3342
0
  while( scsu_stream_index < scsu_stream_size )
3343
0
  {
3344
0
    unicode_character_set = 0;
3345
3346
0
    if( scsu_stream_index >= scsu_stream_size )
3347
0
    {
3348
0
      libcerror_error_set(
3349
0
       error,
3350
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3351
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3352
0
       "%s: SCSU stream too small.",
3353
0
       function );
3354
3355
0
      return( -1 );
3356
0
    }
3357
0
    byte_value1 = scsu_stream[ scsu_stream_index++ ];
3358
3359
0
    if( in_unicode_mode != 0 )
3360
0
    {
3361
0
      if( ( byte_value1 <= 0xdf )
3362
0
       || ( byte_value1 >= 0xf3 ) )
3363
0
      {
3364
0
        if( scsu_stream_index >= scsu_stream_size )
3365
0
        {
3366
0
          libcerror_error_set(
3367
0
           error,
3368
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3369
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3370
0
           "%s: SCSU stream too small.",
3371
0
           function );
3372
3373
0
          return( -1 );
3374
0
        }
3375
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3376
3377
0
        unicode_character   = byte_value1;
3378
0
        unicode_character <<= 8;
3379
0
        unicode_character  |= byte_value2;
3380
3381
0
        unicode_character_set = 1;
3382
0
      }
3383
      /* UCn tags
3384
       */
3385
0
      else if( ( byte_value1 >= 0xe0 )
3386
0
            && ( byte_value1 <= 0xe7 ) )
3387
0
      {
3388
0
        dynamic_window_position_index = byte_value1 - 0xe0;
3389
3390
0
        in_unicode_mode = 0;
3391
0
      }
3392
      /* UDn tags
3393
       */
3394
0
      else if( ( byte_value1 >= 0xe8 )
3395
0
            && ( byte_value1 <= 0xef ) )
3396
0
      {
3397
0
        if( scsu_stream_index >= scsu_stream_size )
3398
0
        {
3399
0
          libcerror_error_set(
3400
0
           error,
3401
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3402
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3403
0
           "%s: SCSU stream too small.",
3404
0
           function );
3405
3406
0
          return( -1 );
3407
0
        }
3408
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3409
3410
0
        dynamic_window_position_index = byte_value1 - 0xe8;
3411
0
        scsu_window_position          = libuna_scsu_window_offset_table[ byte_value2 ];
3412
3413
0
        scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3414
3415
0
        in_unicode_mode = 0;
3416
0
      }
3417
      /* UQU tag
3418
       */
3419
0
      else if( byte_value1 == 0xf0 )
3420
0
      {
3421
0
        if( ( scsu_stream_size < 2 )
3422
0
         || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3423
0
        {
3424
0
          libcerror_error_set(
3425
0
           error,
3426
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3427
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3428
0
           "%s: SCSU stream too small.",
3429
0
           function );
3430
3431
0
          return( -1 );
3432
0
        }
3433
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3434
0
        byte_value3 = scsu_stream[ scsu_stream_index++ ];
3435
3436
0
        unicode_character   = byte_value2;
3437
0
        unicode_character <<= 8;
3438
0
        unicode_character  |= byte_value3;
3439
3440
0
        unicode_character_set = 1;
3441
0
      }
3442
      /* UDX tag
3443
       */
3444
0
      else if( byte_value1 == 0xf1 )
3445
0
      {
3446
0
        if( ( scsu_stream_size < 2 )
3447
0
         || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3448
0
        {
3449
0
          libcerror_error_set(
3450
0
           error,
3451
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3452
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3453
0
           "%s: SCSU stream too small.",
3454
0
           function );
3455
3456
0
          return( -1 );
3457
0
        }
3458
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3459
0
        byte_value3 = scsu_stream[ scsu_stream_index++ ];
3460
3461
0
        dynamic_window_position_index = byte_value2 >> 5;
3462
0
        scsu_window_position          = byte_value2 & 0x1f;
3463
0
        scsu_window_position        <<= 8;
3464
0
        scsu_window_position         |= byte_value3;
3465
0
        scsu_window_position        <<= 7;
3466
0
        scsu_window_position         += 0x00010000UL;
3467
3468
0
        scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3469
3470
0
        in_unicode_mode = 0;
3471
0
      }
3472
0
    }
3473
0
    else
3474
0
    {
3475
0
      if( ( byte_value1 == 0x00 )
3476
0
       || ( byte_value1 == 0x09 )
3477
0
       || ( byte_value1 == 0x0a )
3478
0
       || ( byte_value1 == 0x0c )
3479
0
       || ( byte_value1 == 0x0d )
3480
0
       || ( ( byte_value1 >= 0x20 )
3481
0
        &&  ( byte_value1 <= 0x7f ) ) )
3482
0
      {
3483
0
        unicode_character = byte_value1;
3484
3485
0
        unicode_character_set = 1;
3486
0
      }
3487
      /* SQn tags
3488
       */
3489
0
      else if( ( byte_value1 >= 0x01 )
3490
0
            && ( byte_value1 <= 0x08 ) )
3491
0
      {
3492
0
        if( scsu_stream_index >= scsu_stream_size )
3493
0
        {
3494
0
          libcerror_error_set(
3495
0
           error,
3496
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3497
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3498
0
           "%s: SCSU stream too small.",
3499
0
           function );
3500
3501
0
          return( -1 );
3502
0
        }
3503
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3504
3505
0
        unicode_character = byte_value2;
3506
3507
0
        if( byte_value2 < 0x80 )
3508
0
        {
3509
0
          unicode_character += libuna_scsu_static_window_positions[ byte_value1 - 0x01 ];
3510
0
        }
3511
0
        else
3512
0
        {
3513
0
          unicode_character -= 0x80;
3514
0
          unicode_character += scsu_dynamic_window_positions[ byte_value1 - 0x01 ];
3515
0
        }
3516
0
        unicode_character_set = 1;
3517
0
      }
3518
      /* SDX tag
3519
       */
3520
0
      else if( byte_value1 == 0x0b )
3521
0
      {
3522
0
        if( ( scsu_stream_size < 2 )
3523
0
         || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3524
0
        {
3525
0
          libcerror_error_set(
3526
0
           error,
3527
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3528
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3529
0
           "%s: SCSU stream too small.",
3530
0
           function );
3531
3532
0
          return( -1 );
3533
0
        }
3534
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3535
0
        byte_value3 = scsu_stream[ scsu_stream_index++ ];
3536
3537
0
        dynamic_window_position_index = byte_value2 >> 5;
3538
0
        scsu_window_position          = byte_value2 & 0x1f;
3539
0
        scsu_window_position        <<= 8;
3540
0
        scsu_window_position         |= byte_value3;
3541
0
        scsu_window_position        <<= 7;
3542
0
        scsu_window_position         += 0x00010000UL;
3543
3544
0
        scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3545
0
      }
3546
      /* SQU tag
3547
       */
3548
0
      else if( byte_value1 == 0x0e )
3549
0
      {
3550
0
        if( ( scsu_stream_size < 2 )
3551
0
         || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3552
0
        {
3553
0
          libcerror_error_set(
3554
0
           error,
3555
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3556
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3557
0
           "%s: SCSU stream too small.",
3558
0
           function );
3559
3560
0
          return( -1 );
3561
0
        }
3562
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3563
0
        byte_value3 = scsu_stream[ scsu_stream_index++ ];
3564
3565
0
        unicode_character   = byte_value2;
3566
0
        unicode_character <<= 8;
3567
0
        unicode_character  |= byte_value3;
3568
3569
0
        unicode_character_set = 1;
3570
0
      }
3571
      /* SCU tag
3572
       */
3573
0
      else if( byte_value1 == 0x0f )
3574
0
      {
3575
0
        in_unicode_mode = 1;
3576
0
      }
3577
      /* SCn tags
3578
       */
3579
0
      else if( ( byte_value1 >= 0x10 )
3580
0
            && ( byte_value1 <= 0x17 ) )
3581
0
      {
3582
0
        dynamic_window_position_index = byte_value1 - 0x10;
3583
0
      }
3584
      /* SDn tags
3585
       */
3586
0
      else if( ( byte_value1 >= 0x18 )
3587
0
            && ( byte_value1 <= 0x1f ) )
3588
0
      {
3589
0
        if( scsu_stream_index >= scsu_stream_size )
3590
0
        {
3591
0
          libcerror_error_set(
3592
0
           error,
3593
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3594
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3595
0
           "%s: SCSU stream too small.",
3596
0
           function );
3597
3598
0
          return( -1 );
3599
0
        }
3600
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3601
3602
0
        dynamic_window_position_index = byte_value1 - 0x18;
3603
0
        scsu_window_position          = libuna_scsu_window_offset_table[ byte_value2 ];
3604
3605
0
        scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3606
0
      }
3607
0
      else if( byte_value1 >= 0x80 )
3608
0
      {
3609
0
        unicode_character  = byte_value1 - 0x80;
3610
0
        unicode_character += scsu_dynamic_window_positions[ dynamic_window_position_index ];
3611
3612
0
        unicode_character_set = 1;
3613
0
      }
3614
0
    }
3615
0
    if( unicode_character_set != 0 )
3616
0
    {
3617
      /* Determine how many UTF-8 character bytes are required
3618
       */
3619
0
      if( libuna_unicode_character_size_to_utf8(
3620
0
           unicode_character,
3621
0
           utf8_string_size,
3622
0
           error ) != 1 )
3623
0
      {
3624
0
        libcerror_error_set(
3625
0
         error,
3626
0
         LIBCERROR_ERROR_DOMAIN_CONVERSION,
3627
0
         LIBCERROR_CONVERSION_ERROR_INPUT_FAILED,
3628
0
         "%s: unable to unable to determine size of Unicode character in UTF-8.",
3629
0
         function );
3630
3631
0
        return( -1 );
3632
0
      }
3633
0
      if( unicode_character == 0 )
3634
0
      {
3635
0
        break;
3636
0
      }
3637
0
    }
3638
0
  }
3639
  /* Check if the string is terminated with an end-of-string character
3640
   */
3641
0
  if( unicode_character != 0 )
3642
0
  {
3643
0
    *utf8_string_size += 1;
3644
0
  }
3645
0
  return( 1 );
3646
0
}
3647
3648
/* Copies an UTF-8 string from a Standard Compression Scheme for Unicode (SCSU) stream
3649
 * Returns 1 if successful or -1 on error
3650
 */
3651
int libuna_utf8_string_copy_from_scsu_stream(
3652
     libuna_utf8_character_t *utf8_string,
3653
     size_t utf8_string_size,
3654
     const uint8_t *scsu_stream,
3655
     size_t scsu_stream_size,
3656
     libcerror_error_t **error )
3657
0
{
3658
0
  static char *function    = "libuna_utf8_string_copy_from_scsu_stream";
3659
0
  size_t utf8_string_index = 0;
3660
3661
0
  if( libuna_utf8_string_with_index_copy_from_scsu_stream(
3662
0
       utf8_string,
3663
0
       utf8_string_size,
3664
0
       &utf8_string_index,
3665
0
       scsu_stream,
3666
0
       scsu_stream_size,
3667
0
       error ) != 1 )
3668
0
  {
3669
0
    libcerror_error_set(
3670
0
     error,
3671
0
     LIBCERROR_ERROR_DOMAIN_RUNTIME,
3672
0
     LIBCERROR_RUNTIME_ERROR_COPY_FAILED,
3673
0
     "%s: unable to SCSU stream to UTF-8 string.",
3674
0
     function );
3675
3676
0
    return( -1 );
3677
0
  }
3678
0
  return( 1 );
3679
0
}
3680
3681
/* Copies an UTF-8 string from a Standard Compression Scheme for Unicode (SCSU) stream
3682
 * Returns 1 if successful or -1 on error
3683
 */
3684
int libuna_utf8_string_with_index_copy_from_scsu_stream(
3685
     libuna_utf8_character_t *utf8_string,
3686
     size_t utf8_string_size,
3687
     size_t *utf8_string_index,
3688
     const uint8_t *scsu_stream,
3689
     size_t scsu_stream_size,
3690
     libcerror_error_t **error )
3691
0
{
3692
0
  uint32_t scsu_dynamic_window_positions[ 8 ] = {
3693
0
    0x0080, 0x00c0, 0x0400, 0x0600, 0x0900, 0x3040, 0x30a0, 0xff00 };
3694
3695
0
  static char *function                        = "libuna_utf8_string_with_index_copy_from_scsu_stream";
3696
0
  libuna_unicode_character_t unicode_character = 0;
3697
0
  size_t scsu_stream_index                     = 0;
3698
0
  uint32_t scsu_window_position                = 0;
3699
0
  uint8_t byte_value1                          = 0;
3700
0
  uint8_t byte_value2                          = 0;
3701
0
  uint8_t byte_value3                          = 0;
3702
0
  uint8_t dynamic_window_position_index        = 0;
3703
0
  uint8_t in_unicode_mode                      = 0;
3704
0
  uint8_t unicode_character_set                = 0;
3705
3706
0
  if( utf8_string == NULL )
3707
0
  {
3708
0
    libcerror_error_set(
3709
0
     error,
3710
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3711
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3712
0
     "%s: invalid UTF-8 string.",
3713
0
     function );
3714
3715
0
    return( -1 );
3716
0
  }
3717
0
  if( utf8_string_size > (size_t) SSIZE_MAX )
3718
0
  {
3719
0
    libcerror_error_set(
3720
0
     error,
3721
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3722
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3723
0
     "%s: invalid UTF-8 string size value exceeds maximum.",
3724
0
     function );
3725
3726
0
    return( -1 );
3727
0
  }
3728
0
  if( utf8_string_index == NULL )
3729
0
  {
3730
0
    libcerror_error_set(
3731
0
     error,
3732
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3733
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3734
0
     "%s: invalid UTF-8 string index.",
3735
0
     function );
3736
3737
0
    return( -1 );
3738
0
  }
3739
0
  if( scsu_stream == NULL )
3740
0
  {
3741
0
    libcerror_error_set(
3742
0
     error,
3743
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3744
0
     LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE,
3745
0
     "%s: invalid SCSU stream.",
3746
0
     function );
3747
3748
0
    return( -1 );
3749
0
  }
3750
0
  if( scsu_stream_size > (size_t) SSIZE_MAX )
3751
0
  {
3752
0
    libcerror_error_set(
3753
0
     error,
3754
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3755
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
3756
0
     "%s: invalid SCSU stream size value exceeds maximum.",
3757
0
     function );
3758
3759
0
    return( -1 );
3760
0
  }
3761
0
  if( scsu_stream_size == 0 )
3762
0
  {
3763
0
    libcerror_error_set(
3764
0
     error,
3765
0
     LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3766
0
     LIBCERROR_ARGUMENT_ERROR_VALUE_ZERO_OR_LESS,
3767
0
     "%s: missing SCSU stream value.",
3768
0
     function );
3769
3770
0
    return( -1 );
3771
0
  }
3772
0
  while( scsu_stream_index < scsu_stream_size )
3773
0
  {
3774
0
    unicode_character_set = 0;
3775
3776
0
    if( scsu_stream_index >= scsu_stream_size )
3777
0
    {
3778
0
      libcerror_error_set(
3779
0
       error,
3780
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3781
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3782
0
       "%s: SCSU stream too small.",
3783
0
       function );
3784
3785
0
      return( -1 );
3786
0
    }
3787
0
    byte_value1 = scsu_stream[ scsu_stream_index++ ];
3788
3789
0
    if( in_unicode_mode != 0 )
3790
0
    {
3791
0
      if( ( byte_value1 <= 0xdf )
3792
0
       || ( byte_value1 >= 0xf3 ) )
3793
0
      {
3794
0
        if( scsu_stream_index >= scsu_stream_size )
3795
0
        {
3796
0
          libcerror_error_set(
3797
0
           error,
3798
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3799
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3800
0
           "%s: SCSU stream too small.",
3801
0
           function );
3802
3803
0
          return( -1 );
3804
0
        }
3805
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3806
3807
0
        unicode_character   = byte_value1;
3808
0
        unicode_character <<= 8;
3809
0
        unicode_character  |= byte_value2;
3810
3811
0
        unicode_character_set = 1;
3812
0
      }
3813
      /* UCn tags
3814
       */
3815
0
      else if( ( byte_value1 >= 0xe0 )
3816
0
            && ( byte_value1 <= 0xe7 ) )
3817
0
      {
3818
0
        dynamic_window_position_index = byte_value1 - 0xe0;
3819
3820
0
        in_unicode_mode = 0;
3821
0
      }
3822
      /* UDn tags
3823
       */
3824
0
      else if( ( byte_value1 >= 0xe8 )
3825
0
            && ( byte_value1 <= 0xef ) )
3826
0
      {
3827
0
        if( scsu_stream_index >= scsu_stream_size )
3828
0
        {
3829
0
          libcerror_error_set(
3830
0
           error,
3831
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3832
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3833
0
           "%s: SCSU stream too small.",
3834
0
           function );
3835
3836
0
          return( -1 );
3837
0
        }
3838
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3839
3840
0
        dynamic_window_position_index = byte_value1 - 0xe8;
3841
0
        scsu_window_position          = libuna_scsu_window_offset_table[ byte_value2 ];
3842
3843
0
        scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3844
3845
0
        in_unicode_mode = 0;
3846
0
      }
3847
      /* UQU tag
3848
       */
3849
0
      else if( byte_value1 == 0xf0 )
3850
0
      {
3851
0
        if( ( scsu_stream_size < 2 )
3852
0
         || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3853
0
        {
3854
0
          libcerror_error_set(
3855
0
           error,
3856
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3857
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3858
0
           "%s: SCSU stream too small.",
3859
0
           function );
3860
3861
0
          return( -1 );
3862
0
        }
3863
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3864
0
        byte_value3 = scsu_stream[ scsu_stream_index++ ];
3865
3866
0
        unicode_character   = byte_value2;
3867
0
        unicode_character <<= 8;
3868
0
        unicode_character  |= byte_value3;
3869
3870
0
        unicode_character_set = 1;
3871
0
      }
3872
      /* UDX tag
3873
       */
3874
0
      else if( byte_value1 == 0xf1 )
3875
0
      {
3876
0
        if( ( scsu_stream_size < 2 )
3877
0
         || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3878
0
        {
3879
0
          libcerror_error_set(
3880
0
           error,
3881
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3882
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3883
0
           "%s: SCSU stream too small.",
3884
0
           function );
3885
3886
0
          return( -1 );
3887
0
        }
3888
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3889
0
        byte_value3 = scsu_stream[ scsu_stream_index++ ];
3890
3891
0
        dynamic_window_position_index = byte_value2 >> 5;
3892
0
        scsu_window_position          = byte_value2 & 0x1f;
3893
0
        scsu_window_position        <<= 8;
3894
0
        scsu_window_position         |= byte_value3;
3895
0
        scsu_window_position        <<= 7;
3896
0
        scsu_window_position         += 0x00010000UL;
3897
3898
0
        scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3899
3900
0
        in_unicode_mode = 0;
3901
0
      }
3902
0
    }
3903
0
    else
3904
0
    {
3905
0
      if( ( byte_value1 == 0x00 )
3906
0
       || ( byte_value1 == 0x09 )
3907
0
       || ( byte_value1 == 0x0a )
3908
0
       || ( byte_value1 == 0x0c )
3909
0
       || ( byte_value1 == 0x0d )
3910
0
       || ( ( byte_value1 >= 0x20 )
3911
0
        &&  ( byte_value1 <= 0x7f ) ) )
3912
0
      {
3913
0
        unicode_character = byte_value1;
3914
3915
0
        unicode_character_set = 1;
3916
0
      }
3917
      /* SQn tags
3918
       */
3919
0
      else if( ( byte_value1 >= 0x01 )
3920
0
            && ( byte_value1 <= 0x08 ) )
3921
0
      {
3922
0
        if( scsu_stream_index >= scsu_stream_size )
3923
0
        {
3924
0
          libcerror_error_set(
3925
0
           error,
3926
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3927
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3928
0
           "%s: SCSU stream too small.",
3929
0
           function );
3930
3931
0
          return( -1 );
3932
0
        }
3933
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3934
3935
0
        unicode_character = byte_value2;
3936
3937
0
        if( byte_value2 < 0x80 )
3938
0
        {
3939
0
          unicode_character += libuna_scsu_static_window_positions[ byte_value1 - 0x01 ];
3940
0
        }
3941
0
        else
3942
0
        {
3943
0
          unicode_character -= 0x80;
3944
0
          unicode_character += scsu_dynamic_window_positions[ byte_value1 - 0x01 ];
3945
0
        }
3946
0
        unicode_character_set = 1;
3947
0
      }
3948
      /* SDX tag
3949
       */
3950
0
      else if( byte_value1 == 0x0b )
3951
0
      {
3952
0
        if( ( scsu_stream_size < 2 )
3953
0
         || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3954
0
        {
3955
0
          libcerror_error_set(
3956
0
           error,
3957
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3958
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3959
0
           "%s: SCSU stream too small.",
3960
0
           function );
3961
3962
0
          return( -1 );
3963
0
        }
3964
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3965
0
        byte_value3 = scsu_stream[ scsu_stream_index++ ];
3966
3967
0
        dynamic_window_position_index = byte_value2 >> 5;
3968
0
        scsu_window_position          = byte_value2 & 0x1f;
3969
0
        scsu_window_position        <<= 8;
3970
0
        scsu_window_position         |= byte_value3;
3971
0
        scsu_window_position        <<= 7;
3972
0
        scsu_window_position         += 0x00010000UL;
3973
3974
0
        scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
3975
0
      }
3976
      /* SQU tag
3977
       */
3978
0
      else if( byte_value1 == 0x0e )
3979
0
      {
3980
0
        if( ( scsu_stream_size < 2 )
3981
0
         || ( scsu_stream_index > ( scsu_stream_size - 2 ) ) )
3982
0
        {
3983
0
          libcerror_error_set(
3984
0
           error,
3985
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
3986
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
3987
0
           "%s: SCSU stream too small.",
3988
0
           function );
3989
3990
0
          return( -1 );
3991
0
        }
3992
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
3993
0
        byte_value3 = scsu_stream[ scsu_stream_index++ ];
3994
3995
0
        unicode_character   = byte_value2;
3996
0
        unicode_character <<= 8;
3997
0
        unicode_character  |= byte_value3;
3998
3999
0
        unicode_character_set = 1;
4000
0
      }
4001
      /* SCU tag
4002
       */
4003
0
      else if( byte_value1 == 0x0f )
4004
0
      {
4005
0
        in_unicode_mode = 1;
4006
0
      }
4007
      /* SCn tags
4008
       */
4009
0
      else if( ( byte_value1 >= 0x10 )
4010
0
            && ( byte_value1 <= 0x17 ) )
4011
0
      {
4012
0
        dynamic_window_position_index = byte_value1 - 0x10;
4013
0
      }
4014
      /* SDn tags
4015
       */
4016
0
      else if( ( byte_value1 >= 0x18 )
4017
0
            && ( byte_value1 <= 0x1f ) )
4018
0
      {
4019
0
        if( scsu_stream_index >= scsu_stream_size )
4020
0
        {
4021
0
          libcerror_error_set(
4022
0
           error,
4023
0
           LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4024
0
           LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4025
0
           "%s: SCSU stream too small.",
4026
0
           function );
4027
4028
0
          return( -1 );
4029
0
        }
4030
0
        byte_value2 = scsu_stream[ scsu_stream_index++ ];
4031
4032
0
        dynamic_window_position_index = byte_value1 - 0x18;
4033
0
        scsu_window_position          = libuna_scsu_window_offset_table[ byte_value2 ];
4034
4035
0
        scsu_dynamic_window_positions[ dynamic_window_position_index ] = scsu_window_position;
4036
0
      }
4037
0
      else if( byte_value1 >= 0x80 )
4038
0
      {
4039
0
        unicode_character  = byte_value1 - 0x80;
4040
0
        unicode_character += scsu_dynamic_window_positions[ dynamic_window_position_index ];
4041
4042
0
        unicode_character_set = 1;
4043
0
      }
4044
0
    }
4045
0
    if( unicode_character_set != 0 )
4046
0
    {
4047
      /* Convert the Unicode character into UTF-8 character bytes
4048
       */
4049
0
      if( libuna_unicode_character_copy_to_utf8(
4050
0
           unicode_character,
4051
0
           utf8_string,
4052
0
           utf8_string_size,
4053
0
           utf8_string_index,
4054
0
           error ) != 1 )
4055
0
      {
4056
0
        libcerror_error_set(
4057
0
         error,
4058
0
         LIBCERROR_ERROR_DOMAIN_CONVERSION,
4059
0
         LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED,
4060
0
         "%s: unable to copy Unicode character to UTF-8.",
4061
0
         function );
4062
4063
0
        return( -1 );
4064
0
      }
4065
0
      if( unicode_character == 0 )
4066
0
      {
4067
0
        break;
4068
0
      }
4069
0
    }
4070
0
  }
4071
  /* Check if the string is terminated with an end-of-string character
4072
   */
4073
0
  if( unicode_character != 0 )
4074
0
  {
4075
0
    if( *utf8_string_index >= utf8_string_size )
4076
0
    {
4077
0
      libcerror_error_set(
4078
0
       error,
4079
0
       LIBCERROR_ERROR_DOMAIN_ARGUMENTS,
4080
0
       LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
4081
0
       "%s: UTF-8 string too small.",
4082
0
       function );
4083
4084
0
      return( -1 );
4085
0
    }
4086
0
    utf8_string[ *utf8_string_index ] = 0;
4087
4088
0
    *utf8_string_index += 1;
4089
0
  }
4090
0
  return( 1 );
4091
0
}
4092