/src/libreoffice/sal/textenc/converter.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <sal/config.h> |
21 | | |
22 | | #include <o3tl/safeint.hxx> |
23 | | #include <rtl/textcvt.h> |
24 | | #include <sal/types.h> |
25 | | |
26 | | #include "converter.hxx" |
27 | | #include "tenchelp.hxx" |
28 | | #include "unichars.hxx" |
29 | | |
30 | | sal::detail::textenc::BadInputConversionAction |
31 | | sal::detail::textenc::handleBadInputTextToUnicodeConversion( |
32 | | bool bUndefined, bool bMultiByte, char cByte, sal_uInt32 nFlags, |
33 | | sal_Unicode ** pDestBufPtr, const sal_Unicode * pDestBufEnd, sal_uInt32 * pInfo) |
34 | 253M | { |
35 | 253M | *pInfo |= bUndefined |
36 | 253M | ? (bMultiByte |
37 | 38.4k | ? RTL_TEXTTOUNICODE_INFO_MBUNDEFINED |
38 | 38.4k | : RTL_TEXTTOUNICODE_INFO_UNDEFINED) |
39 | 253M | : RTL_TEXTTOUNICODE_INFO_INVALID; |
40 | 253M | switch (nFlags |
41 | 253M | & (bUndefined |
42 | 253M | ? (bMultiByte |
43 | 38.4k | ? RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK |
44 | 38.4k | : RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) |
45 | 253M | : RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK)) |
46 | 253M | { |
47 | 398 | case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR: |
48 | 398 | case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR: |
49 | 1.10M | case RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR: |
50 | 1.10M | *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; |
51 | 1.10M | return BAD_INPUT_STOP; |
52 | | |
53 | 0 | case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE: |
54 | 0 | case RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE: |
55 | 0 | case RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE: |
56 | 0 | return BAD_INPUT_CONTINUE; |
57 | | |
58 | 3.20k | case RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE: |
59 | 3.20k | if (*pDestBufPtr != pDestBufEnd) |
60 | 3.20k | { |
61 | 3.20k | *(*pDestBufPtr)++ = RTL_TEXTCVT_BYTE_PRIVATE_START |
62 | 3.20k | | static_cast<unsigned char>(cByte); |
63 | 3.20k | return BAD_INPUT_CONTINUE; |
64 | 3.20k | } |
65 | 0 | else |
66 | 0 | return BAD_INPUT_NO_OUTPUT; |
67 | | |
68 | 252M | default: // RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT, |
69 | | // RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT, |
70 | | // RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
71 | 252M | if (*pDestBufPtr != pDestBufEnd) |
72 | 252M | { |
73 | 252M | *(*pDestBufPtr)++ = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; |
74 | 252M | return BAD_INPUT_CONTINUE; |
75 | 252M | } |
76 | 31.5k | else |
77 | 31.5k | return BAD_INPUT_NO_OUTPUT; |
78 | 253M | } |
79 | 253M | } |
80 | | |
81 | | sal::detail::textenc::BadInputConversionAction |
82 | | sal::detail::textenc::handleBadInputUnicodeToTextConversion( |
83 | | bool bUndefined, sal_uInt32 nUtf32, sal_uInt32 nFlags, char ** pDestBufPtr, |
84 | | const char * pDestBufEnd, sal_uInt32 * pInfo, char const * pPrefix, |
85 | | sal_Size nPrefixLen, bool * pPrefixWritten) |
86 | 57.3k | { |
87 | | // TODO! RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE |
88 | | // RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR |
89 | | |
90 | 57.3k | char cReplace; |
91 | | |
92 | 57.3k | if (bUndefined) |
93 | 0 | { |
94 | 0 | if (ImplIsControlOrFormat(nUtf32)) |
95 | 0 | { |
96 | 0 | if ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0) |
97 | 0 | nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; |
98 | 0 | } |
99 | 0 | else if (ImplIsPrivateUse(nUtf32)) |
100 | 0 | { |
101 | 0 | if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0) |
102 | 0 | nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; |
103 | 0 | else if ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0) != 0) |
104 | 0 | nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0; |
105 | 0 | } |
106 | 0 | else if (ImplIsZeroWidth(nUtf32)) |
107 | 0 | { |
108 | 0 | if ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0) |
109 | 0 | nFlags = RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE; |
110 | 0 | } |
111 | 0 | } |
112 | 57.3k | *pInfo |= bUndefined ? RTL_UNICODETOTEXT_INFO_UNDEFINED : |
113 | 57.3k | RTL_UNICODETOTEXT_INFO_INVALID; |
114 | 57.3k | switch (nFlags & (bUndefined ? RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK : |
115 | 57.3k | RTL_UNICODETOTEXT_FLAGS_INVALID_MASK)) |
116 | 57.3k | { |
117 | 0 | case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR: |
118 | 0 | case RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR: |
119 | 0 | *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; |
120 | 0 | return BAD_INPUT_STOP; |
121 | | |
122 | 0 | case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE: |
123 | 0 | case RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE: |
124 | 0 | if (pPrefixWritten) |
125 | 0 | *pPrefixWritten = false; |
126 | 0 | return BAD_INPUT_CONTINUE; |
127 | | |
128 | 0 | case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: |
129 | 0 | case RTL_UNICODETOTEXT_FLAGS_INVALID_0: |
130 | 0 | cReplace = 0; |
131 | 0 | break; |
132 | | |
133 | 0 | case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: |
134 | 0 | case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: |
135 | 57.3k | default: // RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT, |
136 | | // RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT |
137 | 57.3k | cReplace = '?'; |
138 | 57.3k | break; |
139 | | |
140 | 0 | case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: |
141 | 0 | case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: |
142 | 0 | cReplace = '_'; |
143 | 0 | break; |
144 | 57.3k | } |
145 | 57.3k | if (o3tl::make_unsigned(pDestBufEnd - *pDestBufPtr) > nPrefixLen) |
146 | 57.3k | { |
147 | 57.3k | while (nPrefixLen-- > 0) |
148 | 0 | *(*pDestBufPtr)++ = *pPrefix++; |
149 | 57.3k | *(*pDestBufPtr)++ = cReplace; |
150 | 57.3k | if (pPrefixWritten) |
151 | 0 | *pPrefixWritten = true; |
152 | 57.3k | return BAD_INPUT_CONTINUE; |
153 | 57.3k | } |
154 | 0 | return BAD_INPUT_NO_OUTPUT; |
155 | 57.3k | } |
156 | | |
157 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |