TestInvalidChars.java
package wstxtest.wstream;
import java.io.*;
import javax.xml.stream.*;
import org.codehaus.stax2.*;
import com.ctc.wstx.api.InvalidCharHandler;
import com.ctc.wstx.api.WstxOutputProperties;
import org.junit.jupiter.api.Test;
/**
* This unit test suite verifies handling of invalid/illegal xml
* characters, with or without explicit handlers
*
* @since 4.0
*/
public class TestInvalidChars
extends BaseWriterTest
{
final static String INVALID_TEXT = "\u0003";
final static Character REPL_CHAR = '*';
// // First let's verify that we do catch problematic chars
@Test
public void testInvalidCatchingCharacters() throws XMLStreamException
{
doTestInvalid(CHARACTERS);
}
@Test
public void testInvalidCatchingCData() throws XMLStreamException
{
doTestInvalid(CDATA);
}
@Test
public void testInvalidCatchingComment() throws XMLStreamException
{
doTestInvalid(COMMENT);
}
@Test
public void testInvalidCatchingPI() throws XMLStreamException
{
doTestInvalid(PROCESSING_INSTRUCTION);
}
@Test
public void testInvalidCatchingAttribute() throws XMLStreamException
{
doTestInvalid(ATTRIBUTE);
}
// // And then also that we can fix problems
@Test
public void testValidReplacingCharacters() throws Exception
{
doTestValid(CHARACTERS);
}
@Test
public void testValidReplacingCData() throws Exception
{
doTestValid(CDATA);
}
@Test
public void testValidReplacingComment() throws Exception
{
doTestValid(COMMENT);
}
@Test
public void testValidReplacingPI() throws Exception
{
doTestValid(PROCESSING_INSTRUCTION);
}
@Test
public void testValidReplacingAttribute() throws Exception
{
doTestValid(ATTRIBUTE);
}
// // [woodstox#201] regression coverage: char[] overload of writeCData,
// // and XML 1.1 mode (RestrictedChars cannot be escaped inside CDATA).
/**
* Exercises the {@code writeCData(char[], int, int)} overload, which was
* not covered by the parameterized {@link #doTestInvalid}/{@link #doTestValid}
* paths (those only call the {@code String} overload).
*/
@Test
public void testCDataCharArrayInvalidIsCaught() throws Exception
{
XMLOutputFactory2 f = getFactory(null);
// Embed INVALID_TEXT in middle of a larger buffer, with non-zero offset:
char[] cbuf = ("xx" + INVALID_TEXT + "yy").toCharArray();
for (XMLStreamWriter sw : new XMLStreamWriter[] {
f.createXMLStreamWriter(new StringWriter()),
f.createXMLStreamWriter(new ByteArrayOutputStream(), "UTF-8"),
f.createXMLStreamWriter(new ByteArrayOutputStream(), "ISO-8859-1"),
f.createXMLStreamWriter(new ByteArrayOutputStream(), "US-ASCII"),
}) {
XMLStreamWriter2 sw2 = (XMLStreamWriter2) sw;
sw2.writeStartDocument();
sw2.writeStartElement("root");
try {
sw2.writeCData(cbuf, 0, cbuf.length);
fail("Expected exception for invalid char in writeCData(char[]) (writer: " + sw2 + ")");
} catch (XMLStreamException expected) {
sw2.closeCompletely();
}
}
}
@Test
public void testCDataCharArrayInvalidIsReplaced() throws Exception
{
XMLOutputFactory2 f = getFactory(REPL_CHAR);
char[] cbuf = ("xx" + INVALID_TEXT + "yy").toCharArray();
// Cover both encoded byte-stream and raw Writer backends:
StringWriter strw = new StringWriter();
ByteArrayOutputStream utf8Out = new ByteArrayOutputStream();
OutputDest[] dests = new OutputDest[] {
new OutputDest("StringWriter", f.createXMLStreamWriter(strw), strw, null, null),
new OutputDest("UTF-8", f.createXMLStreamWriter(utf8Out, "UTF-8"), null, utf8Out, "UTF-8"),
};
for (OutputDest d : dests) {
d.sw.writeStartDocument();
d.sw.writeStartElement("root");
d.sw.writeCData(cbuf, 0, cbuf.length);
d.sw.writeEndElement();
d.sw.writeEndDocument();
d.sw.closeCompletely();
String out = d.text();
if (out.indexOf(INVALID_TEXT) >= 0) {
fail(d.name + ": invalid char (U+0003) still present in output: '" + out + "'");
}
if (out.indexOf("xx" + REPL_CHAR + "yy") < 0) {
fail(d.name + ": expected '" + REPL_CHAR + "' between 'xx'/'yy' in CDATA. Got: '" + out + "'");
}
}
}
/**
* XML 1.1 RestrictedChars (0x01-0x1F minus tab/LF/CR) must appear as
* character references in content, which is not possible inside
* CDATA / comment / PI. Verify they're still caught when the document
* is XML 1.1, for all of CDATA, COMMENT and PROCESSING_INSTRUCTION.
*/
@Test
public void testCDataXml11RestrictedCharIsCaught() throws Exception {
doTestXml11UnescapableInvalidIsCaught(CDATA);
}
@Test
public void testCommentXml11RestrictedCharIsCaught() throws Exception {
doTestXml11UnescapableInvalidIsCaught(COMMENT);
}
@Test
public void testPIXml11RestrictedCharIsCaught() throws Exception {
doTestXml11UnescapableInvalidIsCaught(PROCESSING_INSTRUCTION);
}
@Test
public void testCDataXml11RestrictedCharIsReplaced() throws Exception {
doTestXml11UnescapableInvalidIsReplaced(CDATA);
}
@Test
public void testCommentXml11RestrictedCharIsReplaced() throws Exception {
doTestXml11UnescapableInvalidIsReplaced(COMMENT);
}
@Test
public void testPIXml11RestrictedCharIsReplaced() throws Exception {
doTestXml11UnescapableInvalidIsReplaced(PROCESSING_INSTRUCTION);
}
private void doTestXml11UnescapableInvalidIsCaught(int evtType) throws Exception
{
XMLOutputFactory2 f = getFactory(null);
for (XMLStreamWriter sw : new XMLStreamWriter[] {
f.createXMLStreamWriter(new StringWriter()),
f.createXMLStreamWriter(new ByteArrayOutputStream(), "UTF-8"),
}) {
XMLStreamWriter2 sw2 = (XMLStreamWriter2) sw;
sw2.writeStartDocument("1.1");
sw2.writeStartElement("root");
try {
writeUnescapable(sw2, evtType);
fail("Expected exception for XML 1.1 RestrictedChar in "
+ tokenTypeDesc(evtType) + " (writer: " + sw2 + ")");
} catch (XMLStreamException expected) {
sw2.closeCompletely();
}
}
}
private void doTestXml11UnescapableInvalidIsReplaced(int evtType) throws Exception
{
XMLOutputFactory2 f = getFactory(REPL_CHAR);
StringWriter strw = new StringWriter();
XMLStreamWriter2 sw = (XMLStreamWriter2) f.createXMLStreamWriter(strw);
sw.writeStartDocument("1.1");
sw.writeStartElement("root");
writeUnescapable(sw, evtType);
sw.writeEndElement();
sw.writeEndDocument();
sw.closeCompletely();
String out = strw.toString();
if (out.indexOf(INVALID_TEXT) >= 0) {
fail("XML 1.1 " + tokenTypeDesc(evtType)
+ ": invalid char still present in output: '" + out + "'");
}
if (out.indexOf(REPL_CHAR) < 0) {
fail("XML 1.1 " + tokenTypeDesc(evtType)
+ ": expected replacement '" + REPL_CHAR + "' in output. Got: '" + out + "'");
}
}
private void writeUnescapable(XMLStreamWriter2 sw, int evtType) throws XMLStreamException {
switch (evtType) {
case CDATA:
sw.writeCData(INVALID_TEXT);
break;
case COMMENT:
sw.writeComment(INVALID_TEXT);
break;
case PROCESSING_INSTRUCTION:
sw.writeProcessingInstruction("pi", INVALID_TEXT);
break;
default:
throw new IllegalArgumentException("evtType=" + evtType);
}
}
/** Small struct so the replaced-CDATA test can iterate over backends with mixed output destinations. */
private static final class OutputDest {
final String name;
final XMLStreamWriter2 sw;
final StringWriter strw;
final ByteArrayOutputStream baos;
final String encoding;
OutputDest(String name, XMLStreamWriter sw, StringWriter strw, ByteArrayOutputStream baos, String encoding) {
this.name = name;
this.sw = (XMLStreamWriter2) sw;
this.strw = strw;
this.baos = baos;
this.encoding = encoding;
}
String text() throws IOException {
return (strw != null) ? strw.toString() : baos.toString(encoding);
}
}
/*
//////////////////////////////////////////////
// Shared test code
//////////////////////////////////////////////
*/
private void doTestInvalid(int evtType)
throws XMLStreamException
{
XMLOutputFactory2 f = getFactory(null);
doTestInvalid(evtType, f.createXMLStreamWriter(new ByteArrayOutputStream(), "ISO-8859-1"), true);
doTestInvalid(evtType, f.createXMLStreamWriter(new ByteArrayOutputStream(), "US-ASCII"), true);
// [WSTX-173] / [woodstox#201]: BufferingXmlWriter (StringWriter and
// UTF-8 byte-stream backends) used to skip invalid-char checks for
// CDATA/COMMENT/PI; the failures were suppressed via this flag rather
// than uncovered. Now fixed -- strict for all backends.
doTestInvalid(evtType, f.createXMLStreamWriter(new StringWriter()), true);
doTestInvalid(evtType, f.createXMLStreamWriter(new ByteArrayOutputStream(), "UTF-8"), true);
}
/**
* @param strictChecks Due to [WSTX-173], may need to relax some checks to pass
* for now. Not needed once bug is fixed.
*/
private void doTestInvalid(int evtType, XMLStreamWriter sw1, boolean strictChecks) throws XMLStreamException {
XMLStreamWriter2 sw = (XMLStreamWriter2) sw1;
sw.writeStartDocument();
sw.writeStartElement("root");
try {
switch (evtType) {
case ATTRIBUTE:
sw.writeAttribute("attr", INVALID_TEXT);
// always strict for attributes and characters
handleFailure(sw, "Expected an exception for ATTRIBUTE", true);
break;
case CHARACTERS:
sw.writeCharacters(INVALID_TEXT);
handleFailure(sw, "Expected an exception for CHARACTERS", true);
break;
case CDATA:
sw.writeCData(INVALID_TEXT);
handleFailure(sw, "Expected an exception for CDATA", strictChecks);
break;
case COMMENT:
sw.writeComment(INVALID_TEXT);
handleFailure(sw, "Expected an exception for COMMENT", strictChecks);
break;
case PROCESSING_INSTRUCTION:
sw.writeProcessingInstruction("pi", INVALID_TEXT);
handleFailure(sw, "Expected an exception for PROCESSING_INSTRUCTION", strictChecks);
break;
}
} catch (XMLStreamException xse) {
sw.closeCompletely();
}
}
private void doTestValid(int evtType) throws IOException, XMLStreamException {
XMLOutputFactory2 f = getFactory(REPL_CHAR);
doTestValid(f, evtType, "ISO-8859-1", true);
doTestValid(f, evtType, "US-ASCII", true);
// [WSTX-173] / [woodstox#201]: BufferingXmlWriter (UTF-8 and raw
// Writer backends) used to skip CDATA/COMMENT/PI invalid-char
// handling; failures were suppressed here. Now fixed -- strict.
doTestValid(f, evtType, "UTF-8", true);
StringWriter strw = new StringWriter();
XMLStreamWriter sw = f.createXMLStreamWriter(strw);
buildValid(evtType, sw);
verifyValidReplacement(evtType, sw, strw.toString(), true);
}
private void doTestValid(XMLOutputFactory2 f, int evtType, String enc, boolean strict)
throws IOException, XMLStreamException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
XMLStreamWriter sw = f.createXMLStreamWriter(out, enc);
buildValid(evtType, sw);
verifyValidReplacement(evtType, sw, out.toString(enc), strict);
}
private void verifyValidReplacement(int evtType, XMLStreamWriter sw, String doc, boolean strict) {
if (doc.indexOf(REPL_CHAR) < 0) { // no replacement...
handleFailure(sw,
"Failed to replace invalid char, event " + tokenTypeDesc(evtType) + ", xml = '" + doc + "'",
strict);
}
}
private void buildValid(int evtType, XMLStreamWriter sw1) throws XMLStreamException {
XMLStreamWriter2 sw = (XMLStreamWriter2) sw1;
sw.writeStartDocument();
sw.writeStartElement("root");
switch (evtType) {
case ATTRIBUTE:
sw.writeAttribute("attr", INVALID_TEXT);
break;
case CHARACTERS:
sw.writeCharacters(INVALID_TEXT);
break;
case CDATA:
sw.writeCData(INVALID_TEXT);
break;
case COMMENT:
sw.writeComment(INVALID_TEXT);
break;
case PROCESSING_INSTRUCTION:
sw.writeProcessingInstruction("pi", INVALID_TEXT);
break;
}
sw.writeEndElement();
sw.writeEndDocument();
sw.closeCompletely();
}
private void handleFailure(XMLStreamWriter sw, String msg, boolean doFail) {
if (doFail) {
fail(msg + " (stream writer: " + sw + ")");
} else {
warn("suppressing failure '" + msg + "' (stream writer: " + sw + ")");
}
}
/*
//////////////////////////////////////////////
// Helper methods, low-level
//////////////////////////////////////////////
*/
private XMLOutputFactory2 getFactory(Character replChar) throws XMLStreamException {
XMLOutputFactory2 f = getOutputFactory();
setRepairing(f, false);
setValidateContent(f, true);
f.setProperty(WstxOutputProperties.P_OUTPUT_INVALID_CHAR_HANDLER,
(replChar == null) ? null : new InvalidCharHandler.ReplacingHandler(replChar));
return f;
}
}