TestXmlReader.java
/*
* Copyright 2004 Sun Microsystems, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.rometools.opml;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.Map;
import junit.framework.TestCase;
import com.rometools.rome.io.XmlReader;
public class TestXmlReader extends TestCase {
public static void main(final String[] args) throws Exception {
final TestXmlReader test = new TestXmlReader();
test.testRawBom();
test.testRawNoBom();
test.testHttp();
}
protected void _testRawNoBomValid(final String encoding) throws Exception {
// TODO review this test (XmlReader is never closed, test fails when using a new XmlReader
// for each Assert)
InputStream is = getXmlStream("no-bom", "xml", encoding, encoding);
XmlReader xmlReader = new XmlReader(is, false);
assertEquals(xmlReader.getEncoding(), "UTF-8");
xmlReader.close();
is = getXmlStream("no-bom", "xml-prolog", encoding, encoding);
xmlReader = new XmlReader(is);
assertEquals(xmlReader.getEncoding(), "UTF-8");
xmlReader.close();
is = getXmlStream("no-bom", "xml-prolog-encoding", encoding, encoding);
xmlReader = new XmlReader(is);
assertEquals(xmlReader.getEncoding(), encoding);
xmlReader.close();
}
protected void _testRawNoBomInvalid(final String encoding) throws Exception {
final InputStream is = getXmlStream("no-bom", "xml-prolog-encoding", encoding, encoding);
try {
final XmlReader xmlReader = new XmlReader(is, false);
fail("It should have failed");
xmlReader.close();
} catch (final IOException ex) {
assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
}
}
public void testRawNoBom() throws Exception {
_testRawNoBomValid("US-ASCII");
_testRawNoBomValid("UTF-8");
_testRawNoBomValid("ISO-8859-1");
}
protected void _testRawBomValid(final String encoding) throws Exception {
final InputStream is = getXmlStream(encoding + "-bom", "xml-prolog-encoding", encoding, encoding);
final XmlReader xmlReader = new XmlReader(is, false);
if (!encoding.equals("UTF-16")) {
assertEquals(xmlReader.getEncoding(), encoding);
} else {
assertEquals(xmlReader.getEncoding().substring(0, encoding.length()), encoding);
}
xmlReader.close();
}
protected void _testRawBomInvalid(final String bomEnc, final String streamEnc, final String prologEnc) throws Exception {
final InputStream is = getXmlStream(bomEnc, "xml-prolog-encoding", streamEnc, prologEnc);
try {
final XmlReader xmlReader = new XmlReader(is, false);
fail("It should have failed for BOM " + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " + prologEnc);
xmlReader.close();
} catch (final IOException ex) {
assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
}
}
public void testRawBom() throws Exception {
_testRawBomValid("UTF-8");
_testRawBomValid("UTF-16BE");
_testRawBomValid("UTF-16LE");
_testRawBomValid("UTF-16");
_testRawBomInvalid("UTF-8-bom", "US-ASCII", "US-ASCII");
_testRawBomInvalid("UTF-8-bom", "ISO-8859-1", "ISO-8859-1");
_testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16");
_testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16BE");
_testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16LE");
_testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
_testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
_testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
}
public void testHttp() throws Exception {
_testHttpValid("application/xml", "no-bom", "US-ASCII", null);
_testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
_testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null);
_testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8");
_testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
_testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
_testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
_testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
_testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
_testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
_testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
_testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
_testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII", "US-ASCII");
_testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8", "UTF-8");
_testHttpInvalid("application/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
_testHttpValid("text/xml", "no-bom", "US-ASCII", null);
_testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
_testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
_testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
_testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
_testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
_testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null);
_testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
_testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
_testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
_testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
_testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null);
_testHttpLenient("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
_testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8", "UTF-8");
_testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null, "UTF-8");
_testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
_testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
_testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
_testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null, "US-ASCII");
_testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
_testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
_testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
_testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
_testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null, "UTF-16");
_testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", "US-ASCII");
_testHttpLenient("text/html", "no-bom", "US-ASCII", null, "US-ASCII");
_testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
_testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
}
public void _testHttpValid(final String cT, final String bomEnc, final String streamEnc, final String prologEnc) throws Exception {
final InputStream is = getXmlStream(bomEnc, prologEnc == null ? "xml" : "xml-prolog-encoding", streamEnc, prologEnc);
final XmlReader xmlReader = new XmlReader(is, cT, false);
if (!streamEnc.equals("UTF-16")) {
// we can not assert things here becuase UTF-8, US-ASCII and ISO-8859-1 look alike for
// the chars used for detection
} else {
assertEquals(xmlReader.getEncoding().substring(0, streamEnc.length()), streamEnc);
}
xmlReader.close();
}
protected void _testHttpInvalid(final String cT, final String bomEnc, final String streamEnc, final String prologEnc) throws Exception {
final InputStream is = getXmlStream(bomEnc, prologEnc == null ? "xml-prolog" : "xml-prolog-encoding", streamEnc, prologEnc);
try {
final XmlReader xmlReader = new XmlReader(is, cT, false);
fail("It should have failed for HTTP Content-type " + cT + ", BOM " + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " + prologEnc);
xmlReader.close();
} catch (final IOException ex) {
assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
}
}
protected void _testHttpLenient(final String cT, final String bomEnc, final String streamEnc, final String prologEnc, final String shouldbe)
throws Exception {
final InputStream is = getXmlStream(bomEnc, prologEnc == null ? "xml-prolog" : "xml-prolog-encoding", streamEnc, prologEnc);
final XmlReader xmlReader = new XmlReader(is, cT, true);
assertEquals(xmlReader.getEncoding(), shouldbe);
xmlReader.close();
}
// XML Stream generator
private static final int[] NO_BOM_BYTES = {};
private static final int[] UTF_16BE_BOM_BYTES = { 0xFE, 0xFF };
private static final int[] UTF_16LE_BOM_BYTES = { 0xFF, 0XFE };
private static final int[] UTF_8_BOM_BYTES = { 0xEF, 0xBB, 0xBF };
private static final Map<String, int[]> BOMs = new HashMap<String, int[]>();
static {
BOMs.put("no-bom", NO_BOM_BYTES);
BOMs.put("UTF-16BE-bom", UTF_16BE_BOM_BYTES);
BOMs.put("UTF-16LE-bom", UTF_16LE_BOM_BYTES);
BOMs.put("UTF-16-bom", NO_BOM_BYTES); // it's added by the writer
BOMs.put("UTF-8-bom", UTF_8_BOM_BYTES);
}
private static final MessageFormat XML = new MessageFormat("<root>{2}</root>");
private static final MessageFormat XML_WITH_PROLOG = new MessageFormat("<?xml version=\"1.0\"?>\n<root>{2}</root>");
private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING = new MessageFormat("<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
private static final MessageFormat INFO = new MessageFormat("\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
private static final Map<String, MessageFormat> XMLs = new HashMap<String, MessageFormat>();
static {
XMLs.put("xml", XML);
XMLs.put("xml-prolog", XML_WITH_PROLOG);
XMLs.put("xml-prolog-encoding", XML_WITH_PROLOG_AND_ENCODING);
}
/**
*
* @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
* @param xmlType xml, xml-prolog, xml-prolog-charset
* @return XML stream
*/
protected InputStream getXmlStream(final String bomType, final String xmlType, final String streamEnc, final String prologEnc) throws IOException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
int[] bom = BOMs.get(bomType);
if (bom == null) {
bom = new int[0];
}
final MessageFormat xml = XMLs.get(xmlType);
for (final int element : bom) {
baos.write(element);
}
final Writer writer = new OutputStreamWriter(baos, streamEnc);
final String info = INFO.format(new Object[] { bomType, xmlType, prologEnc });
final String xmlDoc = xml.format(new Object[] { streamEnc, prologEnc, info });
writer.write(xmlDoc);
// PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
writer.write("<da>\n");
for (int i = 0; i < 10000; i++) {
writer.write("<do/>\n");
}
writer.write("</da>\n");
writer.close();
return new ByteArrayInputStream(baos.toByteArray());
}
}