Coverage Report

Created: 2026-02-11 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/xerces-c/src/xercesc/util/XMLUCS4Transcoder.cpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 * 
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 * 
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
18
19
// ---------------------------------------------------------------------------
20
//  Includes
21
// ---------------------------------------------------------------------------
22
#include <xercesc/util/BitOps.hpp>
23
#include <xercesc/util/XMLUCS4Transcoder.hpp>
24
#include <xercesc/util/TranscodingException.hpp>
25
#include <string.h>
26
27
XERCES_CPP_NAMESPACE_BEGIN
28
29
// ---------------------------------------------------------------------------
30
//  XMLUCS4Transcoder: Constructors and Destructor
31
// ---------------------------------------------------------------------------
32
XMLUCS4Transcoder::XMLUCS4Transcoder(const  XMLCh* const    encodingName
33
                                    , const XMLSize_t       blockSize
34
                                    , const bool            swapped
35
                                    , MemoryManager* const manager) :
36
37
101
    XMLTranscoder(encodingName, blockSize, manager)
38
101
    , fSwapped(swapped)
39
101
{
40
101
}
41
42
43
XMLUCS4Transcoder::~XMLUCS4Transcoder()
44
101
{
45
101
}
46
47
48
// ---------------------------------------------------------------------------
49
//  XMLUCS4Transcoder: Implementation of the transcoder API
50
// ---------------------------------------------------------------------------
51
XMLSize_t
52
XMLUCS4Transcoder::transcodeFrom(const  XMLByte* const          srcData
53
                                , const XMLSize_t               srcCount
54
                                ,       XMLCh* const            toFill
55
                                , const XMLSize_t               maxChars
56
                                ,       XMLSize_t&              bytesEaten
57
                                ,       unsigned char* const    charSizes)
58
512
{
59
    //
60
    //  Get pointers to the start and end of the source buffer in terms of
61
    //  UCS-4 characters.
62
    //
63
512
    const UCS4Ch*   srcPtr = reinterpret_cast<const UCS4Ch*>(srcData);
64
512
    const UCS4Ch*   srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch));
65
66
    //
67
    //  Get pointers to the start and end of the target buffer, which is
68
    //  in terms of the XMLCh chars we output.
69
    //
70
512
    XMLCh*  outPtr = toFill;
71
512
    XMLCh*  outEnd = toFill + maxChars;
72
73
    //
74
    //  And get a pointer into the char sizes buffer. We will run this
75
    //  up as we put chars into the output buffer.
76
    //
77
512
    unsigned char* sizePtr = charSizes;
78
79
    //
80
    //  Now process chars until we either use up all our source or all of
81
    //  our output space.
82
    //
83
2.33M
    while ((outPtr < outEnd) && (srcPtr < srcEnd))
84
2.33M
    {
85
        //
86
        //  Get the next UCS char out of the buffer. Don't bump the ptr
87
        //  yet since we might not have enough storage for it in the target
88
        //  (if its causes a surrogate pair to be created.
89
        //
90
2.33M
        UCS4Ch nextVal = *srcPtr;
91
92
        // If it needs to be swapped, then do it
93
2.33M
        if (fSwapped)
94
2.32M
            nextVal = BitOps::swapBytes(nextVal);
95
96
        // Handle a surrogate pair if needed
97
2.33M
        if (nextVal & 0xFFFF0000)
98
2.11M
        {
99
            //
100
            //  If we don't have room for both of the chars, then we
101
            //  bail out now.
102
            //
103
2.11M
            if (outPtr + 1 == outEnd)
104
50
                break;
105
106
2.11M
            const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
107
2.11M
          const XMLCh ch1 = XMLCh(LEAD_OFFSET + (nextVal >> 10));
108
2.11M
          const XMLCh ch2 = XMLCh(0xDC00 + (nextVal & 0x3FF));
109
110
            //
111
            //  We have room so store them both. But note that the
112
            //  second one took up no source bytes!
113
            //
114
2.11M
            *sizePtr++ = sizeof(UCS4Ch);
115
2.11M
            *outPtr++ = ch1;
116
2.11M
            *sizePtr++ = 0;
117
2.11M
            *outPtr++ = ch2;
118
2.11M
        }
119
212k
         else
120
212k
        {
121
            //
122
            //  No surrogate, so just store it and bump the count of chars
123
            //  read. Update the char sizes buffer for this char's entry.
124
            //
125
212k
            *sizePtr++ = sizeof(UCS4Ch);
126
212k
            *outPtr++ = XMLCh(nextVal);
127
212k
        }
128
129
        // Indicate that we ate another UCS char's worth of bytes
130
2.33M
        srcPtr++;
131
2.33M
    }
132
133
    // Set the bytes eaten parameter
134
512
    bytesEaten = ((const XMLByte*)srcPtr) - srcData;
135
136
    // And return the chars written into the output buffer
137
512
    return outPtr - toFill;
138
512
}
139
140
141
XMLSize_t
142
XMLUCS4Transcoder::transcodeTo( const   XMLCh* const    srcData
143
                                , const XMLSize_t       srcCount
144
                                ,       XMLByte* const  toFill
145
                                , const XMLSize_t       maxBytes
146
                                ,       XMLSize_t&      charsEaten
147
                                , const UnRepOpts)
148
0
{
149
    //
150
    //  Get pointers to the start and end of the source buffer, which
151
    //  is in terms of XMLCh chars.
152
    //
153
0
    const XMLCh*  srcPtr = srcData;
154
0
    const XMLCh*  srcEnd = srcData + srcCount;
155
156
    //
157
    //  Get pointers to the start and end of the target buffer, in terms
158
    //  of UCS-4 chars.
159
    //
160
0
    UCS4Ch*   outPtr = reinterpret_cast<UCS4Ch*>(toFill);
161
0
    UCS4Ch*   outEnd = outPtr + (maxBytes / sizeof(UCS4Ch));
162
163
    //
164
    //  Now loop until we either run out of source characters or we
165
    //  fill up our output buffer.
166
    //
167
0
    XMLCh trailCh;
168
0
    while ((outPtr < outEnd) && (srcPtr < srcEnd))
169
0
    {
170
        //
171
        //  Get out an XMLCh char from the source. Don't bump up the
172
        //  pointer yet, since it might be a leading for which we don't
173
        //  have the trailing.
174
        //
175
0
        const XMLCh curCh = *srcPtr;
176
177
        //
178
        //  If its a leading char of a surrogate pair handle it one way,
179
        //  else just cast it over into the target.
180
        //
181
0
        if ((curCh >= 0xD800) && (curCh <= 0xDBFF))
182
0
        {
183
            //
184
            //  Ok, we have to have another source char available or we
185
            //  just give up without eating the leading char.
186
            //
187
0
            if (srcPtr + 1 == srcEnd)
188
0
                break;
189
190
            //
191
            //  We have the trailing char, so eat the first char and the
192
            //  trailing char from the source.
193
            //
194
0
            srcPtr++;
195
0
            trailCh = *srcPtr++;
196
197
            //
198
            //  Then make sure its a legal trailing char. If not, throw
199
            //  an exception.
200
            //
201
0
            if ( !( (trailCh >= 0xDC00) && (trailCh <= 0xDFFF) ) )
202
0
                ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadTrailingSurrogate, getMemoryManager());
203
204
            // And now combine the two into a single output char
205
0
            const XMLInt32 SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
206
0
            *outPtr++ = (curCh << 10) + trailCh + SURROGATE_OFFSET;
207
0
        }
208
0
         else
209
0
        {
210
            //
211
            //  Its just a char, so we can take it as is. If we need to
212
            //  swap it, then swap it. Because of flakey compilers, use
213
            //  a temp first.
214
            //
215
0
            const UCS4Ch tmpCh = UCS4Ch(curCh);
216
0
            if (fSwapped)
217
0
                *outPtr++ = BitOps::swapBytes(tmpCh);
218
0
            else
219
0
                *outPtr++ = tmpCh;
220
221
            // Bump the source pointer
222
0
            srcPtr++;
223
0
        }
224
0
    }
225
226
    // Set the chars we ate from the source
227
0
    charsEaten = srcPtr - srcData;
228
229
    // Return the bytes we wrote to the output
230
0
    return ((XMLByte*)outPtr) - toFill;
231
0
}
232
233
234
bool XMLUCS4Transcoder::canTranscodeTo(const unsigned int)
235
0
{
236
    // We can handle anything
237
0
    return true;
238
0
}
239
240
XERCES_CPP_NAMESPACE_END