/src/util-linux/lib/encode.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Based on code from libblkid, |
3 | | * |
4 | | * Copyright (C) 2008 Kay Sievers <kay.sievers@vrfy.org> |
5 | | * Copyright (C) 2009 Karel Zak <kzak@redhat.com> |
6 | | * Copyright (C) 2020 Pali Rohár <pali.rohar@gmail.com> |
7 | | * |
8 | | * This file may be redistributed under the terms of the |
9 | | * GNU Lesser General Public License. |
10 | | */ |
11 | | #include "c.h" |
12 | | #include "encode.h" |
13 | | |
14 | | size_t ul_encode_to_utf8(int enc, unsigned char *dest, size_t len, |
15 | | const unsigned char *src, size_t count) |
16 | 0 | { |
17 | 0 | size_t i, j; |
18 | 0 | uint32_t c; |
19 | 0 | uint16_t c2; |
20 | |
|
21 | 0 | for (j = i = 0; i < count; i++) { |
22 | 0 | if (enc == UL_ENCODE_UTF16LE) { |
23 | 0 | if (i+2 > count) |
24 | 0 | break; |
25 | 0 | c = (src[i+1] << 8) | src[i]; |
26 | 0 | i++; |
27 | 0 | } else if (enc == UL_ENCODE_UTF16BE) { |
28 | 0 | if (i+2 > count) |
29 | 0 | break; |
30 | 0 | c = (src[i] << 8) | src[i+1]; |
31 | 0 | i++; |
32 | 0 | } else if (enc == UL_ENCODE_LATIN1) { |
33 | 0 | c = src[i]; |
34 | 0 | } else { |
35 | 0 | return 0; |
36 | 0 | } |
37 | 0 | if ((enc == UL_ENCODE_UTF16LE || enc == UL_ENCODE_UTF16BE) && |
38 | 0 | c >= 0xD800 && c <= 0xDBFF && i+2 < count) { |
39 | 0 | if (enc == UL_ENCODE_UTF16LE) |
40 | 0 | c2 = (src[i+2] << 8) | src[i+1]; |
41 | 0 | else |
42 | 0 | c2 = (src[i+1] << 8) | src[i+2]; |
43 | 0 | if (c2 >= 0xDC00 && c2 <= 0xDFFF) { |
44 | 0 | c = 0x10000 + ((c - 0xD800) << 10) + (c2 - 0xDC00); |
45 | 0 | i += 2; |
46 | 0 | } |
47 | 0 | } |
48 | 0 | if (c == 0) { |
49 | 0 | dest[j] = '\0'; |
50 | 0 | break; |
51 | 0 | } |
52 | | |
53 | 0 | if (c < 0x80) { |
54 | 0 | if (j+1 >= len) |
55 | 0 | break; |
56 | 0 | dest[j++] = (uint8_t) c; |
57 | 0 | } else if (c < 0x800) { |
58 | 0 | if (j+2 >= len) |
59 | 0 | break; |
60 | 0 | dest[j++] = (uint8_t) (0xc0 | (c >> 6)); |
61 | 0 | dest[j++] = (uint8_t) (0x80 | (c & 0x3f)); |
62 | 0 | } else if (c < 0x10000) { |
63 | 0 | if (j+3 >= len) |
64 | 0 | break; |
65 | 0 | dest[j++] = (uint8_t) (0xe0 | (c >> 12)); |
66 | 0 | dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); |
67 | 0 | dest[j++] = (uint8_t) (0x80 | (c & 0x3f)); |
68 | 0 | } else { |
69 | 0 | if (j+4 >= len) |
70 | 0 | break; |
71 | 0 | dest[j++] = (uint8_t) (0xf0 | (c >> 18)); |
72 | 0 | dest[j++] = (uint8_t) (0x80 | ((c >> 12) & 0x3f)); |
73 | 0 | dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); |
74 | 0 | dest[j++] = (uint8_t) (0x80 | (c & 0x3f)); |
75 | 0 | } |
76 | 0 | } |
77 | 0 | dest[j] = '\0'; |
78 | 0 | return j; |
79 | 0 | } |