/src/libreoffice/sal/textenc/handleundefinedunicodetotextchar.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | */ |
9 | | |
10 | | #include <sal/config.h> |
11 | | |
12 | | #include <rtl/character.hxx> |
13 | | #include <rtl/textcvt.h> |
14 | | #include <sal/types.h> |
15 | | |
16 | | #include "handleundefinedunicodetotextchar.hxx" |
17 | | #include "tenchelp.hxx" |
18 | | |
19 | | namespace { |
20 | | |
21 | | bool ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags) |
22 | 28.3M | { |
23 | 28.3M | return |
24 | 28.3M | ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0 |
25 | 0 | && ImplIsZeroWidth(c)) |
26 | 28.3M | || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0 |
27 | 0 | && ImplIsControlOrFormat(c)) |
28 | 28.3M | || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0 |
29 | 0 | && ImplIsPrivateUse(c)); |
30 | 28.3M | } |
31 | | |
32 | | bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, |
33 | | char * pBuf, |
34 | | sal_Size nMaxLen) |
35 | 28.0M | { |
36 | 28.0M | if (nMaxLen == 0) |
37 | 0 | return false; |
38 | 28.0M | switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) |
39 | 28.0M | { |
40 | 0 | case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: |
41 | 0 | *pBuf = 0x00; |
42 | 0 | break; |
43 | | |
44 | 0 | case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: |
45 | 28.0M | default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */ |
46 | 28.0M | *pBuf = 0x3F; |
47 | 28.0M | break; |
48 | | |
49 | 0 | case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: |
50 | 0 | *pBuf = 0x5F; |
51 | 0 | break; |
52 | 28.0M | } |
53 | 28.0M | return true; |
54 | 28.0M | } |
55 | | |
56 | | bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, |
57 | | char * pBuf, |
58 | | sal_Size nMaxLen) |
59 | 243k | { |
60 | 243k | if (nMaxLen == 0) |
61 | 0 | return false; |
62 | 243k | switch (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) |
63 | 243k | { |
64 | 0 | case RTL_UNICODETOTEXT_FLAGS_INVALID_0: |
65 | 0 | *pBuf = 0x00; |
66 | 0 | break; |
67 | | |
68 | 0 | case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: |
69 | 243k | default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ |
70 | 243k | *pBuf = 0x3F; |
71 | 243k | break; |
72 | | |
73 | 0 | case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: |
74 | 0 | *pBuf = 0x5F; |
75 | 0 | break; |
76 | 243k | } |
77 | 243k | return true; |
78 | 243k | } |
79 | | |
80 | | } |
81 | | |
82 | | bool sal::detail::textenc::handleUndefinedUnicodeToTextChar( |
83 | | sal_Unicode const ** ppSrcBuf, sal_Unicode const * pEndSrcBuf, |
84 | | char ** ppDestBuf, char const * pEndDestBuf, sal_uInt32 nFlags, |
85 | | sal_uInt32 * pInfo) |
86 | 28.3M | { |
87 | 28.3M | sal_Unicode c = **ppSrcBuf; |
88 | | |
89 | | /* Should the private character map to one byte */ |
90 | 28.3M | if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) ) |
91 | 75.4k | { |
92 | 75.4k | if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) |
93 | 75.4k | { |
94 | 75.4k | **ppDestBuf = static_cast<char>(static_cast<unsigned char>(c-RTL_TEXTCVT_BYTE_PRIVATE_START)); |
95 | 75.4k | (*ppDestBuf)++; |
96 | 75.4k | (*ppSrcBuf)++; |
97 | 75.4k | return true; |
98 | 75.4k | } |
99 | 75.4k | } |
100 | | |
101 | | /* Should this character ignored (Private, Non Spacing, Control) */ |
102 | 28.3M | if ( ImplIsUnicodeIgnoreChar( c, nFlags ) ) |
103 | 0 | { |
104 | 0 | (*ppSrcBuf)++; |
105 | 0 | return true; |
106 | 0 | } |
107 | | |
108 | | /* Surrogates Characters should result in */ |
109 | | /* one replacement character */ |
110 | 28.3M | if (rtl::isHighSurrogate(c)) |
111 | 246k | { |
112 | 246k | if ( ((*ppSrcBuf) + 1) == pEndSrcBuf ) |
113 | 241 | { |
114 | 241 | *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; |
115 | 241 | return false; |
116 | 241 | } |
117 | | |
118 | 246k | c = *((*ppSrcBuf)+1); |
119 | 246k | if (rtl::isLowSurrogate(c)) |
120 | 2.94k | (*ppSrcBuf)++; |
121 | 243k | else |
122 | 243k | { |
123 | 243k | *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID; |
124 | 243k | if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR ) |
125 | 0 | { |
126 | 0 | *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; |
127 | 0 | return false; |
128 | 0 | } |
129 | 243k | if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE ) |
130 | 0 | { |
131 | 0 | (*ppSrcBuf)++; |
132 | 0 | return true; |
133 | 0 | } |
134 | 243k | if (ImplGetInvalidAsciiMultiByte(nFlags, |
135 | 243k | *ppDestBuf, |
136 | 243k | pEndDestBuf - *ppDestBuf)) |
137 | 243k | { |
138 | 243k | ++*ppSrcBuf; |
139 | 243k | ++*ppDestBuf; |
140 | 243k | return true; |
141 | 243k | } |
142 | 0 | *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; |
143 | 0 | return false; |
144 | 243k | } |
145 | 246k | } |
146 | | |
147 | 28.0M | *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED; |
148 | 28.0M | if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR ) |
149 | 1.58k | { |
150 | 1.58k | *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; |
151 | 1.58k | return false; |
152 | 1.58k | } |
153 | 28.0M | if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE ) |
154 | 0 | (*ppSrcBuf)++; |
155 | 28.0M | else if (ImplGetUndefinedAsciiMultiByte(nFlags, |
156 | 28.0M | *ppDestBuf, |
157 | 28.0M | pEndDestBuf - *ppDestBuf)) |
158 | 28.0M | { |
159 | 28.0M | ++*ppSrcBuf; |
160 | 28.0M | ++*ppDestBuf; |
161 | 28.0M | } |
162 | 0 | else |
163 | 0 | { |
164 | 0 | *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR |
165 | 0 | | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; |
166 | 0 | return false; |
167 | 0 | } |
168 | | |
169 | 28.0M | return true; |
170 | 28.0M | } |
171 | | |
172 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |