/src/xerces-c/src/xercesc/framework/XMLRecognizer.cpp

Source
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 *  $Id: XMLRecognizer.cpp 555320 2007-07-11 16:05:13Z amassari $
 */


// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <xercesc/framework/XMLRecognizer.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/XMLString.hpp>

XERCES_CPP_NAMESPACE_BEGIN

// ---------------------------------------------------------------------------
//  Local data
//
//  gEncodingNameMap
//      This array maps the Encodings enum values to their canonical names.
//      Be sure to keep this in sync with that enum!
// ---------------------------------------------------------------------------
static const XMLCh* gEncodingNameMap[XMLRecognizer::Encodings_Count] =
{
    XMLUni::fgEBCDICEncodingString
    , XMLUni::fgUCS4BEncodingString
    , XMLUni::fgUCS4LEncodingString
    , XMLUni::fgUSASCIIEncodingString
    , XMLUni::fgUTF8EncodingString
    , XMLUni::fgUTF16BEncodingString
    , XMLUni::fgUTF16LEncodingString
    , XMLUni::fgXMLChEncodingString
};



// ---------------------------------------------------------------------------
//  XMLRecognizer: Public, const static data
//
//  gXXXPre
//  gXXXPreLen
//      The byte sequence prefixes for all of the encodings that we can
//      auto sense. Also included is the length of each sequence.
// ---------------------------------------------------------------------------
const char           XMLRecognizer::fgASCIIPre[]  = { 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20 };
const XMLSize_t      XMLRecognizer::fgASCIIPreLen = 6;
const XMLByte        XMLRecognizer::fgEBCDICPre[] = { 0x4C, 0x6F, 0xA7, 0x94, 0x93, 0x40 };
const XMLSize_t      XMLRecognizer::fgEBCDICPreLen = 6;
const XMLByte        XMLRecognizer::fgUTF16BPre[] = { 0x00, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20 };
const XMLByte        XMLRecognizer::fgUTF16LPre[] = { 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20, 0x00 };
const XMLSize_t      XMLRecognizer::fgUTF16PreLen = 12;
const XMLByte        XMLRecognizer::fgUCS4BPre[]  =
{
        0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3F
    ,   0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D
    ,   0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x20
};
const XMLByte        XMLRecognizer::fgUCS4LPre[]  =
{
        0x3C, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00
    ,   0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00
    ,   0x6C, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00
};
const XMLSize_t      XMLRecognizer::fgUCS4PreLen = 24;

const char           XMLRecognizer::fgUTF8BOM[] = {(char)0xEF, (char)0xBB, (char)0xBF};
const XMLSize_t      XMLRecognizer::fgUTF8BOMLen = 3;

// ---------------------------------------------------------------------------
//  XMLRecognizer: Encoding recognition methods
// ---------------------------------------------------------------------------
XMLRecognizer::Encodings
XMLRecognizer::basicEncodingProbe(  const   XMLByte* const  rawBuffer
                                    , const XMLSize_t       rawByteCount)
{
    //
    //  As an optimization to check the 90% case, check first for the ASCII
    //  sequence '<?xml', which means its either US-ASCII, UTF-8, or some
    //  other encoding that we don't do manually but which happens to share
    //  the US-ASCII code points for these characters. So just return UTF-8
    //  to get us through the first line.
    //
    if (rawByteCount >= fgASCIIPreLen)
    {
        if (!memcmp(rawBuffer, fgASCIIPre, fgASCIIPreLen))
            return UTF_8;
    }

    //
    //  If the count of raw bytes is less than 2, it cannot be anything
    //  we understand, so return UTF-8 as a fallback.
    //
    if (rawByteCount < 2)
        return UTF_8;
         
    //  
    //  We have two to four bytes, so lets check for a UTF-16 BOM. That
    //  is quick to check and enough to identify two major encodings.   
    // 

    if (rawByteCount < 4)
    {
        if ((rawBuffer[0] == 0xFE) && (rawBuffer[1] == 0xFF))
            return UTF_16B;
        else if ((rawBuffer[0] == 0xFF) && (rawBuffer[1] == 0xFE))
            return UTF_16L;
        else 
            return UTF_8;
    }

    /***
     *    F.1 Detection Without External Encoding Information
     *
     *    Because each XML entity not accompanied by external encoding information and 
     *    not in UTF-8 or UTF-16 encoding must begin with an XML encoding declaration, 
     *    in which the first characters must be '<?xml', any conforming processor can detect, 
     *    after two to four octets of input, which of the following cases apply. 
     *
     *    In reading this list, it may help to know that in UCS-4, '<' is "#x0000003C" and 
     *    '?' is "#x0000003F", and the Byte Order Mark required of UTF-16 data streams is 
     *    "#xFEFF". The notation ## is used to denote any byte value except that two consecutive 
     *    ##s cannot be both 00.
     *
     *    With a Byte Order Mark:
     *
     *    00 00 FE FF           UCS-4,    big-endian machine    (1234 order) 
     *    FF FE 00 00           UCS-4,    little-endian machine (4321 order) 
     *    00 00 FF FE           UCS-4,    unusual octet order   (2143) 
     *    FE FF 00 00           UCS-4,    unusual octet order   (3412) 
     *    FE FF ## ##           UTF-16,   big-endian 
     *    FF FE ## ##           UTF-16,   little-endian 
     *    EF BB BF              UTF-8 
     *
     ***/

    //
    //  We have at least four bytes, so we can check all BOM
    //  for UCS-4BE, UCS-4LE, UTF-16BE and UTF-16LE as well.
    //
    if ((rawBuffer[0] == 0x00) && (rawBuffer[1] == 0x00) && (rawBuffer[2] == 0xFE) && (rawBuffer[3] == 0xFF))
        return UCS_4B;
    else if ((rawBuffer[0] == 0xFF) && (rawBuffer[1] == 0xFE) && (rawBuffer[2] == 0x00) && (rawBuffer[3] == 0x00))
        return UCS_4L;
    else if ((rawBuffer[0] == 0xFE) && (rawBuffer[1] == 0xFF))
        return UTF_16B;
    else if ((rawBuffer[0] == 0xFF) && (rawBuffer[1] == 0xFE))
        return UTF_16L;

    //
    //  We have at least 4 bytes. So lets check the 4 byte sequences that
    //  indicate other UTF-16 and UCS encodings.
    //
    if ((rawBuffer[0] == 0x00) || (rawBuffer[0] == 0x3C))
    {
        if (rawByteCount >= fgUCS4PreLen && !memcmp(rawBuffer, fgUCS4BPre, fgUCS4PreLen))
            return UCS_4B;
        else if (rawByteCount >= fgUCS4PreLen && !memcmp(rawBuffer, fgUCS4LPre, fgUCS4PreLen))
            return UCS_4L;
        else if (rawByteCount >= fgUTF16PreLen && !memcmp(rawBuffer, fgUTF16BPre, fgUTF16PreLen))
            return UTF_16B;
        else if (rawByteCount >= fgUTF16PreLen && !memcmp(rawBuffer, fgUTF16LPre, fgUTF16PreLen))
            return UTF_16L;
    }

    //
    //  See if we have enough bytes to possibly match the EBCDIC prefix.
    //  If so, try it.
    //
    if (rawByteCount > fgEBCDICPreLen)
    {
        if (!memcmp(rawBuffer, fgEBCDICPre, fgEBCDICPreLen))
            return EBCDIC;
    }

    //
    //  Does not seem to be anything we know, so go with UTF-8 to get at
    //  least through the first line and see what it really is.
    //
    return UTF_8;
}


XMLRecognizer::Encodings
XMLRecognizer::encodingForName(const XMLCh* const encName)
{
    //
    //  Compare the passed string, assume input string is already uppercased,
    //  to the variations that we recognize.
    //
    //  !!NOTE: Note that we don't handle EBCDIC here because we don't handle
    //  that one ourselves. It is allowed to fall into 'other'.
    //
    if (encName == XMLUni::fgXMLChEncodingString ||
        !XMLString::compareString(encName, XMLUni::fgXMLChEncodingString))
    {
        return XMLRecognizer::XERCES_XMLCH;
    }
    else if (!XMLString::compareString(encName, XMLUni::fgUTF8EncodingString)
         ||  !XMLString::compareString(encName, XMLUni::fgUTF8EncodingString2))
    {
        return XMLRecognizer::UTF_8;
    }
    else if (!XMLString::compareString(encName, XMLUni::fgUSASCIIEncodingString)
         ||  !XMLString::compareString(encName, XMLUni::fgUSASCIIEncodingString2)
         ||  !XMLString::compareString(encName, XMLUni::fgUSASCIIEncodingString3)
         ||  !XMLString::compareString(encName, XMLUni::fgUSASCIIEncodingString4))
    {
        return XMLRecognizer::US_ASCII;
    }
    else if (!XMLString::compareString(encName, XMLUni::fgUTF16LEncodingString)
         ||  !XMLString::compareString(encName, XMLUni::fgUTF16LEncodingString2))
    {
        return XMLRecognizer::UTF_16L;
    }
    else if (!XMLString::compareString(encName, XMLUni::fgUTF16BEncodingString)
         ||  !XMLString::compareString(encName, XMLUni::fgUTF16BEncodingString2))
    {
        return XMLRecognizer::UTF_16B;
    }
    else if (!XMLString::compareString(encName, XMLUni::fgUTF16EncodingString))
    {
        return XMLPlatformUtils::fgXMLChBigEndian?XMLRecognizer::UTF_16B:XMLRecognizer::UTF_16L;
    }
    else if (!XMLString::compareString(encName, XMLUni::fgUCS4LEncodingString)
         ||  !XMLString::compareString(encName, XMLUni::fgUCS4LEncodingString2))
    {
        return XMLRecognizer::UCS_4L;
    }
    else if (!XMLString::compareString(encName, XMLUni::fgUCS4BEncodingString)
         ||  !XMLString::compareString(encName, XMLUni::fgUCS4BEncodingString2))
    {
        return XMLRecognizer::UCS_4B;
    }
    else if (!XMLString::compareString(encName, XMLUni::fgUCS4EncodingString))
    {
        return XMLPlatformUtils::fgXMLChBigEndian?XMLRecognizer::UCS_4B:XMLRecognizer::UCS_4L;
    }

    // Return 'other' since we don't recognizer it
    return XMLRecognizer::OtherEncoding;
}


const XMLCh*
XMLRecognizer::nameForEncoding(const XMLRecognizer::Encodings theEncoding
                               , MemoryManager* const manager)
{
    if (theEncoding >= Encodings_Count)
        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::XMLRec_UnknownEncoding, manager);

    return gEncodingNameMap[theEncoding];
}

XERCES_CPP_NAMESPACE_END

Coverage Report

Created: 2026-04-01 06:57

Line	Count	Source
1		/*
2		* Licensed to the Apache Software Foundation (ASF) under one or more
3		* contributor license agreements. See the NOTICE file distributed with
4		* this work for additional information regarding copyright ownership.
5		* The ASF licenses this file to You under the Apache License, Version 2.0
6		* (the "License"); you may not use this file except in compliance with
7		* the License. You may obtain a copy of the License at
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17
18		/**
19		* $Id: XMLRecognizer.cpp 555320 2007-07-11 16:05:13Z amassari $
20		*/
21
22
23		// ---------------------------------------------------------------------------
24		// Includes
25		// ---------------------------------------------------------------------------
26		#include <xercesc/framework/XMLRecognizer.hpp>
27		#include <xercesc/util/RuntimeException.hpp>
28		#include <xercesc/util/XMLString.hpp>
29
30		XERCES_CPP_NAMESPACE_BEGIN
31
32		// ---------------------------------------------------------------------------
33		// Local data
34		//
35		// gEncodingNameMap
36		// This array maps the Encodings enum values to their canonical names.
37		// Be sure to keep this in sync with that enum!
38		// ---------------------------------------------------------------------------
39		static const XMLCh* gEncodingNameMap[XMLRecognizer::Encodings_Count] =
40		{
41		XMLUni::fgEBCDICEncodingString
42		, XMLUni::fgUCS4BEncodingString
43		, XMLUni::fgUCS4LEncodingString
44		, XMLUni::fgUSASCIIEncodingString
45		, XMLUni::fgUTF8EncodingString
46		, XMLUni::fgUTF16BEncodingString
47		, XMLUni::fgUTF16LEncodingString
48		, XMLUni::fgXMLChEncodingString
49		};
50
51
52
53		// ---------------------------------------------------------------------------
54		// XMLRecognizer: Public, const static data
55		//
56		// gXXXPre
57		// gXXXPreLen
58		// The byte sequence prefixes for all of the encodings that we can
59		// auto sense. Also included is the length of each sequence.
60		// ---------------------------------------------------------------------------
61		const char XMLRecognizer::fgASCIIPre[] = { 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20 };
62		const XMLSize_t XMLRecognizer::fgASCIIPreLen = 6;
63		const XMLByte XMLRecognizer::fgEBCDICPre[] = { 0x4C, 0x6F, 0xA7, 0x94, 0x93, 0x40 };
64		const XMLSize_t XMLRecognizer::fgEBCDICPreLen = 6;
65		const XMLByte XMLRecognizer::fgUTF16BPre[] = { 0x00, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20 };
66		const XMLByte XMLRecognizer::fgUTF16LPre[] = { 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20, 0x00 };
67		const XMLSize_t XMLRecognizer::fgUTF16PreLen = 12;
68		const XMLByte XMLRecognizer::fgUCS4BPre[] =
69		{
70		0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3F
71		, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D
72		, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x20
73		};
74		const XMLByte XMLRecognizer::fgUCS4LPre[] =
75		{
76		0x3C, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00
77		, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00
78		, 0x6C, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00
79		};
80		const XMLSize_t XMLRecognizer::fgUCS4PreLen = 24;
81
82		const char XMLRecognizer::fgUTF8BOM[] = {(char)0xEF, (char)0xBB, (char)0xBF};
83		const XMLSize_t XMLRecognizer::fgUTF8BOMLen = 3;
84
85		// ---------------------------------------------------------------------------
86		// XMLRecognizer: Encoding recognition methods
87		// ---------------------------------------------------------------------------
88		XMLRecognizer::Encodings
89		XMLRecognizer::basicEncodingProbe( const XMLByte* const rawBuffer
90		, const XMLSize_t rawByteCount)
91	17.6k	{
92		//
93		// As an optimization to check the 90% case, check first for the ASCII
94		// sequence '<?xml', which means its either US-ASCII, UTF-8, or some
95		// other encoding that we don't do manually but which happens to share
96		// the US-ASCII code points for these characters. So just return UTF-8
97		// to get us through the first line.
98		//
99	17.6k	if (rawByteCount >= fgASCIIPreLen)
100	17.6k	{
101	17.6k	if (!memcmp(rawBuffer, fgASCIIPre, fgASCIIPreLen))
102	7.81k	return UTF_8;
103	17.6k	}
104
105		//
106		// If the count of raw bytes is less than 2, it cannot be anything
107		// we understand, so return UTF-8 as a fallback.
108		//
109	9.85k	if (rawByteCount < 2)
110	0	return UTF_8;
111
112		//
113		// We have two to four bytes, so lets check for a UTF-16 BOM. That
114		// is quick to check and enough to identify two major encodings.
115		//
116
117	9.85k	if (rawByteCount < 4)
118	0	{
119	0	if ((rawBuffer[0] == 0xFE) && (rawBuffer[1] == 0xFF))
120	0	return UTF_16B;
121	0	else if ((rawBuffer[0] == 0xFF) && (rawBuffer[1] == 0xFE))
122	0	return UTF_16L;
123	0	else
124	0	return UTF_8;
125	0	}
126
127		/***
128		* F.1 Detection Without External Encoding Information
129		*
130		* Because each XML entity not accompanied by external encoding information and
131		* not in UTF-8 or UTF-16 encoding must begin with an XML encoding declaration,
132		* in which the first characters must be '<?xml', any conforming processor can detect,
133		* after two to four octets of input, which of the following cases apply.
134		*
135		* In reading this list, it may help to know that in UCS-4, '<' is "#x0000003C" and
136		* '?' is "#x0000003F", and the Byte Order Mark required of UTF-16 data streams is
137		* "#xFEFF". The notation ## is used to denote any byte value except that two consecutive
138		* ##s cannot be both 00.
139		*
140		* With a Byte Order Mark:
141		*
142		* 00 00 FE FF UCS-4, big-endian machine (1234 order)
143		* FF FE 00 00 UCS-4, little-endian machine (4321 order)
144		* 00 00 FF FE UCS-4, unusual octet order (2143)
145		* FE FF 00 00 UCS-4, unusual octet order (3412)
146		* FE FF ## ## UTF-16, big-endian
147		* FF FE ## ## UTF-16, little-endian
148		* EF BB BF UTF-8
149		*
150		***/
151
152		//
153		// We have at least four bytes, so we can check all BOM
154		// for UCS-4BE, UCS-4LE, UTF-16BE and UTF-16LE as well.
155		//
156	9.85k	if ((rawBuffer[0] == 0x00) && (rawBuffer[1] == 0x00) && (rawBuffer[2] == 0xFE) && (rawBuffer[3] == 0xFF))
157	4	return UCS_4B;
158	9.85k	else if ((rawBuffer[0] == 0xFF) && (rawBuffer[1] == 0xFE) && (rawBuffer[2] == 0x00) && (rawBuffer[3] == 0x00))
159	13	return UCS_4L;
160	9.83k	else if ((rawBuffer[0] == 0xFE) && (rawBuffer[1] == 0xFF))
161	46	return UTF_16B;
162	9.79k	else if ((rawBuffer[0] == 0xFF) && (rawBuffer[1] == 0xFE))
163	83	return UTF_16L;
164
165		//
166		// We have at least 4 bytes. So lets check the 4 byte sequences that
167		// indicate other UTF-16 and UCS encodings.
168		//
169	9.70k	if ((rawBuffer[0] == 0x00) \|\| (rawBuffer[0] == 0x3C))
170	2.90k	{
171	2.90k	if (rawByteCount >= fgUCS4PreLen && !memcmp(rawBuffer, fgUCS4BPre, fgUCS4PreLen))
172	22	return UCS_4B;
173	2.87k	else if (rawByteCount >= fgUCS4PreLen && !memcmp(rawBuffer, fgUCS4LPre, fgUCS4PreLen))
174	3	return UCS_4L;
175	2.87k	else if (rawByteCount >= fgUTF16PreLen && !memcmp(rawBuffer, fgUTF16BPre, fgUTF16PreLen))
176	37	return UTF_16B;
177	2.83k	else if (rawByteCount >= fgUTF16PreLen && !memcmp(rawBuffer, fgUTF16LPre, fgUTF16PreLen))
178	21	return UTF_16L;
179	2.90k	}
180
181		//
182		// See if we have enough bytes to possibly match the EBCDIC prefix.
183		// If so, try it.
184		//
185	9.62k	if (rawByteCount > fgEBCDICPreLen)
186	9.62k	{
187	9.62k	if (!memcmp(rawBuffer, fgEBCDICPre, fgEBCDICPreLen))
188	18	return EBCDIC;
189	9.62k	}
190
191		//
192		// Does not seem to be anything we know, so go with UTF-8 to get at
193		// least through the first line and see what it really is.
194		//
195	9.60k	return UTF_8;
196	9.62k	}
197
198
199		XMLRecognizer::Encodings
200		XMLRecognizer::encodingForName(const XMLCh* const encName)
201	7.10k	{
202		//
203		// Compare the passed string, assume input string is already uppercased,
204		// to the variations that we recognize.
205		//
206		// !!NOTE: Note that we don't handle EBCDIC here because we don't handle
207		// that one ourselves. It is allowed to fall into 'other'.
208		//
209	7.10k	if (encName == XMLUni::fgXMLChEncodingString \|\|
210	7.10k	!XMLString::compareString(encName, XMLUni::fgXMLChEncodingString))
211	0	{
212	0	return XMLRecognizer::XERCES_XMLCH;
213	0	}
214	7.10k	else if (!XMLString::compareString(encName, XMLUni::fgUTF8EncodingString)
215	7.01k	\|\| !XMLString::compareString(encName, XMLUni::fgUTF8EncodingString2))
216	84	{
217	84	return XMLRecognizer::UTF_8;
218	84	}
219	7.01k	else if (!XMLString::compareString(encName, XMLUni::fgUSASCIIEncodingString)
220	6.94k	\|\| !XMLString::compareString(encName, XMLUni::fgUSASCIIEncodingString2)
221	6.94k	\|\| !XMLString::compareString(encName, XMLUni::fgUSASCIIEncodingString3)
222	6.87k	\|\| !XMLString::compareString(encName, XMLUni::fgUSASCIIEncodingString4))
223	138	{
224	138	return XMLRecognizer::US_ASCII;
225	138	}
226	6.87k	else if (!XMLString::compareString(encName, XMLUni::fgUTF16LEncodingString)
227	6.87k	\|\| !XMLString::compareString(encName, XMLUni::fgUTF16LEncodingString2))
228	8	{
229	8	return XMLRecognizer::UTF_16L;
230	8	}
231	6.87k	else if (!XMLString::compareString(encName, XMLUni::fgUTF16BEncodingString)
232	6.87k	\|\| !XMLString::compareString(encName, XMLUni::fgUTF16BEncodingString2))
233	24	{
234	24	return XMLRecognizer::UTF_16B;
235	24	}
236	6.84k	else if (!XMLString::compareString(encName, XMLUni::fgUTF16EncodingString))
237	0	{
238	0	return XMLPlatformUtils::fgXMLChBigEndian?XMLRecognizer::UTF_16B:XMLRecognizer::UTF_16L;
239	0	}
240	6.84k	else if (!XMLString::compareString(encName, XMLUni::fgUCS4LEncodingString)
241	6.84k	\|\| !XMLString::compareString(encName, XMLUni::fgUCS4LEncodingString2))
242	0	{
243	0	return XMLRecognizer::UCS_4L;
244	0	}
245	6.84k	else if (!XMLString::compareString(encName, XMLUni::fgUCS4BEncodingString)
246	6.84k	\|\| !XMLString::compareString(encName, XMLUni::fgUCS4BEncodingString2))
247	0	{
248	0	return XMLRecognizer::UCS_4B;
249	0	}
250	6.84k	else if (!XMLString::compareString(encName, XMLUni::fgUCS4EncodingString))
251	0	{
252	0	return XMLPlatformUtils::fgXMLChBigEndian?XMLRecognizer::UCS_4B:XMLRecognizer::UCS_4L;
253	0	}
254
255		// Return 'other' since we don't recognizer it
256	6.84k	return XMLRecognizer::OtherEncoding;
257	7.10k	}
258
259
260		const XMLCh*
261		XMLRecognizer::nameForEncoding(const XMLRecognizer::Encodings theEncoding
262		, MemoryManager* const manager)
263	426k	{
264	426k	if (theEncoding >= Encodings_Count)
265	0	ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::XMLRec_UnknownEncoding, manager);
266
267	426k	return gEncodingNameMap[theEncoding];
268	426k	}
269
270		XERCES_CPP_NAMESPACE_END