/src/xerces-c/src/xercesc/util/XMLUCS4Transcoder.cpp
Line | Count | Source |
1 | | /* |
2 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
3 | | * contributor license agreements. See the NOTICE file distributed with |
4 | | * this work for additional information regarding copyright ownership. |
5 | | * The ASF licenses this file to You under the Apache License, Version 2.0 |
6 | | * (the "License"); you may not use this file except in compliance with |
7 | | * the License. You may obtain a copy of the License at |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | |
19 | | // --------------------------------------------------------------------------- |
20 | | // Includes |
21 | | // --------------------------------------------------------------------------- |
22 | | #include <xercesc/util/BitOps.hpp> |
23 | | #include <xercesc/util/XMLUCS4Transcoder.hpp> |
24 | | #include <xercesc/util/TranscodingException.hpp> |
25 | | #include <string.h> |
26 | | |
27 | | XERCES_CPP_NAMESPACE_BEGIN |
28 | | |
29 | | // --------------------------------------------------------------------------- |
30 | | // XMLUCS4Transcoder: Constructors and Destructor |
31 | | // --------------------------------------------------------------------------- |
32 | | XMLUCS4Transcoder::XMLUCS4Transcoder(const XMLCh* const encodingName |
33 | | , const XMLSize_t blockSize |
34 | | , const bool swapped |
35 | | , MemoryManager* const manager) : |
36 | | |
37 | 101 | XMLTranscoder(encodingName, blockSize, manager) |
38 | 101 | , fSwapped(swapped) |
39 | 101 | { |
40 | 101 | } |
41 | | |
42 | | |
43 | | XMLUCS4Transcoder::~XMLUCS4Transcoder() |
44 | 101 | { |
45 | 101 | } |
46 | | |
47 | | |
48 | | // --------------------------------------------------------------------------- |
49 | | // XMLUCS4Transcoder: Implementation of the transcoder API |
50 | | // --------------------------------------------------------------------------- |
51 | | XMLSize_t |
52 | | XMLUCS4Transcoder::transcodeFrom(const XMLByte* const srcData |
53 | | , const XMLSize_t srcCount |
54 | | , XMLCh* const toFill |
55 | | , const XMLSize_t maxChars |
56 | | , XMLSize_t& bytesEaten |
57 | | , unsigned char* const charSizes) |
58 | 512 | { |
59 | | // |
60 | | // Get pointers to the start and end of the source buffer in terms of |
61 | | // UCS-4 characters. |
62 | | // |
63 | 512 | const UCS4Ch* srcPtr = reinterpret_cast<const UCS4Ch*>(srcData); |
64 | 512 | const UCS4Ch* srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch)); |
65 | | |
66 | | // |
67 | | // Get pointers to the start and end of the target buffer, which is |
68 | | // in terms of the XMLCh chars we output. |
69 | | // |
70 | 512 | XMLCh* outPtr = toFill; |
71 | 512 | XMLCh* outEnd = toFill + maxChars; |
72 | | |
73 | | // |
74 | | // And get a pointer into the char sizes buffer. We will run this |
75 | | // up as we put chars into the output buffer. |
76 | | // |
77 | 512 | unsigned char* sizePtr = charSizes; |
78 | | |
79 | | // |
80 | | // Now process chars until we either use up all our source or all of |
81 | | // our output space. |
82 | | // |
83 | 2.33M | while ((outPtr < outEnd) && (srcPtr < srcEnd)) |
84 | 2.33M | { |
85 | | // |
86 | | // Get the next UCS char out of the buffer. Don't bump the ptr |
87 | | // yet since we might not have enough storage for it in the target |
88 | | // (if its causes a surrogate pair to be created. |
89 | | // |
90 | 2.33M | UCS4Ch nextVal = *srcPtr; |
91 | | |
92 | | // If it needs to be swapped, then do it |
93 | 2.33M | if (fSwapped) |
94 | 2.32M | nextVal = BitOps::swapBytes(nextVal); |
95 | | |
96 | | // Handle a surrogate pair if needed |
97 | 2.33M | if (nextVal & 0xFFFF0000) |
98 | 2.11M | { |
99 | | // |
100 | | // If we don't have room for both of the chars, then we |
101 | | // bail out now. |
102 | | // |
103 | 2.11M | if (outPtr + 1 == outEnd) |
104 | 50 | break; |
105 | | |
106 | 2.11M | const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10); |
107 | 2.11M | const XMLCh ch1 = XMLCh(LEAD_OFFSET + (nextVal >> 10)); |
108 | 2.11M | const XMLCh ch2 = XMLCh(0xDC00 + (nextVal & 0x3FF)); |
109 | | |
110 | | // |
111 | | // We have room so store them both. But note that the |
112 | | // second one took up no source bytes! |
113 | | // |
114 | 2.11M | *sizePtr++ = sizeof(UCS4Ch); |
115 | 2.11M | *outPtr++ = ch1; |
116 | 2.11M | *sizePtr++ = 0; |
117 | 2.11M | *outPtr++ = ch2; |
118 | 2.11M | } |
119 | 212k | else |
120 | 212k | { |
121 | | // |
122 | | // No surrogate, so just store it and bump the count of chars |
123 | | // read. Update the char sizes buffer for this char's entry. |
124 | | // |
125 | 212k | *sizePtr++ = sizeof(UCS4Ch); |
126 | 212k | *outPtr++ = XMLCh(nextVal); |
127 | 212k | } |
128 | | |
129 | | // Indicate that we ate another UCS char's worth of bytes |
130 | 2.33M | srcPtr++; |
131 | 2.33M | } |
132 | | |
133 | | // Set the bytes eaten parameter |
134 | 512 | bytesEaten = ((const XMLByte*)srcPtr) - srcData; |
135 | | |
136 | | // And return the chars written into the output buffer |
137 | 512 | return outPtr - toFill; |
138 | 512 | } |
139 | | |
140 | | |
141 | | XMLSize_t |
142 | | XMLUCS4Transcoder::transcodeTo( const XMLCh* const srcData |
143 | | , const XMLSize_t srcCount |
144 | | , XMLByte* const toFill |
145 | | , const XMLSize_t maxBytes |
146 | | , XMLSize_t& charsEaten |
147 | | , const UnRepOpts) |
148 | 0 | { |
149 | | // |
150 | | // Get pointers to the start and end of the source buffer, which |
151 | | // is in terms of XMLCh chars. |
152 | | // |
153 | 0 | const XMLCh* srcPtr = srcData; |
154 | 0 | const XMLCh* srcEnd = srcData + srcCount; |
155 | | |
156 | | // |
157 | | // Get pointers to the start and end of the target buffer, in terms |
158 | | // of UCS-4 chars. |
159 | | // |
160 | 0 | UCS4Ch* outPtr = reinterpret_cast<UCS4Ch*>(toFill); |
161 | 0 | UCS4Ch* outEnd = outPtr + (maxBytes / sizeof(UCS4Ch)); |
162 | | |
163 | | // |
164 | | // Now loop until we either run out of source characters or we |
165 | | // fill up our output buffer. |
166 | | // |
167 | 0 | XMLCh trailCh; |
168 | 0 | while ((outPtr < outEnd) && (srcPtr < srcEnd)) |
169 | 0 | { |
170 | | // |
171 | | // Get out an XMLCh char from the source. Don't bump up the |
172 | | // pointer yet, since it might be a leading for which we don't |
173 | | // have the trailing. |
174 | | // |
175 | 0 | const XMLCh curCh = *srcPtr; |
176 | | |
177 | | // |
178 | | // If its a leading char of a surrogate pair handle it one way, |
179 | | // else just cast it over into the target. |
180 | | // |
181 | 0 | if ((curCh >= 0xD800) && (curCh <= 0xDBFF)) |
182 | 0 | { |
183 | | // |
184 | | // Ok, we have to have another source char available or we |
185 | | // just give up without eating the leading char. |
186 | | // |
187 | 0 | if (srcPtr + 1 == srcEnd) |
188 | 0 | break; |
189 | | |
190 | | // |
191 | | // We have the trailing char, so eat the first char and the |
192 | | // trailing char from the source. |
193 | | // |
194 | 0 | srcPtr++; |
195 | 0 | trailCh = *srcPtr++; |
196 | | |
197 | | // |
198 | | // Then make sure its a legal trailing char. If not, throw |
199 | | // an exception. |
200 | | // |
201 | 0 | if ( !( (trailCh >= 0xDC00) && (trailCh <= 0xDFFF) ) ) |
202 | 0 | ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadTrailingSurrogate, getMemoryManager()); |
203 | | |
204 | | // And now combine the two into a single output char |
205 | 0 | const XMLInt32 SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; |
206 | 0 | *outPtr++ = (curCh << 10) + trailCh + SURROGATE_OFFSET; |
207 | 0 | } |
208 | 0 | else |
209 | 0 | { |
210 | | // |
211 | | // Its just a char, so we can take it as is. If we need to |
212 | | // swap it, then swap it. Because of flakey compilers, use |
213 | | // a temp first. |
214 | | // |
215 | 0 | const UCS4Ch tmpCh = UCS4Ch(curCh); |
216 | 0 | if (fSwapped) |
217 | 0 | *outPtr++ = BitOps::swapBytes(tmpCh); |
218 | 0 | else |
219 | 0 | *outPtr++ = tmpCh; |
220 | | |
221 | | // Bump the source pointer |
222 | 0 | srcPtr++; |
223 | 0 | } |
224 | 0 | } |
225 | | |
226 | | // Set the chars we ate from the source |
227 | 0 | charsEaten = srcPtr - srcData; |
228 | | |
229 | | // Return the bytes we wrote to the output |
230 | 0 | return ((XMLByte*)outPtr) - toFill; |
231 | 0 | } |
232 | | |
233 | | |
234 | | bool XMLUCS4Transcoder::canTranscodeTo(const unsigned int) |
235 | 0 | { |
236 | | // We can handle anything |
237 | 0 | return true; |
238 | 0 | } |
239 | | |
240 | | XERCES_CPP_NAMESPACE_END |