BaseStreamTest.java
package wstxtest.stream;
import java.io.*;
import java.util.Random;
import javax.xml.stream.*;
import org.codehaus.stax2.XMLStreamReader2;
import wstxtest.BaseWstxTest;
import wstxtest.cfg.*;
public abstract class BaseStreamTest
extends BaseWstxTest
{
protected BaseStreamTest() { super(); }
/*
///////////////////////////////////////////////////////////
// "Special" accessors
///////////////////////////////////////////////////////////
*/
/**
* Method that not only gets currently available text from the
* reader, but also checks that its consistenly accessible using
* different StAX methods.
*/
protected static String getAndVerifyText(XMLStreamReader sr)
throws XMLStreamException
{
int expLen = sr.getTextLength();
// Hmmh. It's only ok to return empty text for DTD event
if (sr.getEventType() != DTD) {
assertTrue("Stream reader should never return empty Strings.", (expLen > 0));
}
String text = sr.getText();
assertNotNull("getText() should never return null.", text);
assertEquals(expLen, text.length());
char[] textChars = sr.getTextCharacters();
int start = sr.getTextStart();
String text2 = new String(textChars, start, expLen);
assertEquals(text, text2);
return text;
}
protected static String getStreamingText(XMLStreamReader sr)
throws IOException, XMLStreamException
{
StringWriter sw = new StringWriter();
((XMLStreamReader2) sr).getText(sw, false);
return sw.toString();
}
/*
///////////////////////////////////////////////////////////
// Higher-level test methods
///////////////////////////////////////////////////////////
*/
protected int streamAndCheck(XMLInputFactory f, InputConfigIterator it,
String input, String expOutput,
boolean reallyStreaming)
throws IOException, XMLStreamException, UnsupportedEncodingException
{
int count = 0;
// Let's loop couple of input methods
for (int m = 0; m < 3; ++m) {
XMLStreamReader sr;
/* Contents shouldn't really contain anything
* outside ISO-Latin; however, detection may
* be tricky.. so let's just test with UTF-8,
* for now?
*/
switch (m) {
case 0: // simple StringReader:
sr = constructStreamReader(f, input);
break;
case 1: // via InputStream and auto-detection
{
ByteArrayInputStream bin = new ByteArrayInputStream
(input.getBytes("UTF-8"));
sr = f.createXMLStreamReader(bin);
}
break;
case 2: // explicit UTF-8 stream
{
ByteArrayInputStream bin = new ByteArrayInputStream
(input.getBytes("UTF-8"));
Reader br = new InputStreamReader(bin, "UTF-8");
sr = f.createXMLStreamReader(br);
}
break;
default: throw new Error("Internal error");
}
count += streamAndCheck(sr, it, input, expOutput,
reallyStreaming);
}
return count;
}
protected int streamAndCheck(XMLStreamReader sr, InputConfigIterator it,
String input, String expOutput,
boolean reallyStreaming)
throws IOException, XMLStreamException
{
int type;
/* Let's ignore leading white space and DTD; and stop on encountering
* something else
*/
do {
type = sr.next();
} while ((type == SPACE) || (type == DTD));
StringBuilder act = new StringBuilder(1000);
int count = 0;
do {
count += type;
if (type == START_ELEMENT || type == END_ELEMENT) {
act.append('<');
if (type == END_ELEMENT) {
act.append('/');
}
String prefix = sr.getPrefix();
if (prefix != null && prefix.length() > 0) {
act.append(prefix);
act.append(':');
}
act.append(sr.getLocalName());
act.append('>');
} else if (type == CHARACTERS || type == SPACE || type == CDATA) {
// No quoting, doesn't have to result in legal XML
if (reallyStreaming) {
StringWriter sw = new StringWriter();
// important: false to indicate 'don't preserve contents'
int gotLen = ((XMLStreamReader2)sr).getText(sw, false);
String text = sw.toString();
int textLen = text.length();
if (textLen != gotLen) {
if (text.length() > 60) {
text = text.substring(0, 30) + "<...>" + text.substring(textLen-30);
}
assertEquals("Incorrect return value from streaming getText() for "+
tokenTypeDesc(type)+" [string '"+text+"']", textLen, gotLen);
}
act.append(text);
} else {
act.append(sr.getText());
}
} else if (type == COMMENT) {
act.append("<!--");
if (reallyStreaming) {
StringWriter sw = new StringWriter();
// important: false to indicate 'don't preserve contents'
int gotLen = ((XMLStreamReader2)sr).getText(sw, false);
String text = sw.toString();
int textLen = text.length();
if (textLen != gotLen) {
if (text.length() > 60) {
text = text.substring(0, 30) + "<...>" + text.substring(textLen-30);
}
assertEquals("Incorrect return value from streaming getText() for "+
tokenTypeDesc(type)+" [string '"+text+"']", textLen, gotLen);
}
act.append(text);
} else {
act.append(sr.getText());
}
act.append("-->");
} else if (type == PROCESSING_INSTRUCTION) {
act.append("<!?");
act.append(sr.getPITarget());
String data = sr.getPIData();
if (data != null) {
act.append(' ');
act.append(data.trim());
}
act.append("?>");
} else if (type == ENTITY_REFERENCE) {
act.append(sr.getText());
} else {
fail("Unexpected event type "+tokenTypeDesc(type));
}
} while ((type = sr.next()) != END_DOCUMENT);
String result = act.toString();
if (!result.equals(expOutput)) {
String desc = it.toString();
int round = it.getIndex();
// uncomment for debugging:
/*
System.err.println("FAIL: round "+round+" ["+desc+"]");
System.err.println("Input: '"+input.toString()+"'");
System.err.println("Exp: '"+expOutput.toString()+"'");
System.err.println("Actual: '"+act.toString()+"'");
*/
fail("Failure with '"+desc+"' (round #"+round+"):\n<br />"
+"Input : {"+printableWithSpaces(input)+"}\n<br />"
+"Output: {"+printableWithSpaces(result)+"}\n<br />"
+"Exp. : {"+printableWithSpaces(expOutput)+"}\n<br />");
}
return count;
}
protected int streamAndSkip(XMLInputFactory f, InputConfigIterator it,
String input)
throws XMLStreamException, UnsupportedEncodingException
{
int count = 0;
// Let's loop couple of input methods
for (int m = 0; m < 3; ++m) {
XMLStreamReader sr;
switch (m) {
case 0: // simple StringReader:
sr = constructStreamReader(f, input);
break;
case 1: // via InputStream and auto-detection
{
ByteArrayInputStream bin = new ByteArrayInputStream
(input.getBytes("UTF-8"));
sr = f.createXMLStreamReader(bin);
}
break;
case 2: // explicit UTF-8 stream
{
ByteArrayInputStream bin = new ByteArrayInputStream
(input.getBytes("UTF-8"));
Reader br = new InputStreamReader(bin, "UTF-8");
sr = f.createXMLStreamReader(br);
}
break;
default: throw new Error("Internal error");
}
count += streamAndSkip(sr);
}
return count;
}
protected int streamAndSkip(XMLStreamReader sr)
throws XMLStreamException
{
int count = 0;
while (sr.hasNext()) {
count += sr.next();
}
return count;
}
protected void generateData(Random r, StringBuffer input,
StringBuffer output, boolean autoEnt)
{
final String PREAMBLE =
"<?xml version='1.0' encoding='UTF-8'?>"
+"<!DOCTYPE root [\n"
+" <!ENTITY ent1 'ent1Value'>\n"
+" <!ENTITY x 'Y'>\n"
+" <!ENTITY both '&ent1;&x;'>\n"
+"]>";
/* Ok; template will use '*' chars as placeholders, to be replaced
* by pseudo-randomly selected choices.
*/
final String TEMPLATE =
"<root>"
// Short one for trouble shooting:
/*
+" * Text ****<empty></empty>\n</root>"
*/
// Real one for regression testing:
+" * Text ****<empty></empty>\n"
+"<empty>*</empty>* * xx<empty></empty>\n"
+"<tag>Text ******</tag>\n"
+"<a>*...</a><b>...*</b><c>*</c>"
+"<c>*</c><c>*</c><c>*</c><c>*</c><c>*</c><c>*</c>"
+"<c>*<d>** *<e>*</e>**</d></c>"
+"<c><d><e>*</e> **</d>*</c>"
+"a*b*c*d*e*f*g*h*i*j*k"
+"</root>"
;
input.append(TEMPLATE);
output.append(TEMPLATE);
for (int i = TEMPLATE.length(); --i >= 0; ) {
char c = TEMPLATE.charAt(i);
if (c == '*') {
replaceEntity(input, output, autoEnt, r, i);
}
}
// Let's also insert preamble into input now
input.insert(0, PREAMBLE);
}
protected void replaceEntity(StringBuffer input, StringBuffer output,
boolean autoEnt,
Random r, int index)
{
String in, out;
switch (Math.abs(r.nextInt()) % 6) {
case 0: // Let's use one of pre-def'd entities:
switch (Math.abs(r.nextInt()) % 5) {
case 0:
in = "&"; out = "&";
break;
case 1:
in = "'"; out = "'";
break;
case 2:
in = "<"; out = "<";
break;
case 3:
in = ">"; out = ">";
break;
case 4:
in = """; out = "\"";
break;
default: throw new Error("Internal error!");
}
break;
case 1: // How about some CDATA?
switch (Math.abs(r.nextInt()) % 5) {
case 0:
in = "<![CDATA[]] >]]>";
out = "]] >";
break;
case 1:
in = "<![CDATA[xyz&abc]]>";
out = "xyz&abc";
break;
case 2:
in = "<![CDATA[ ]]>";
out = " ";
break;
case 3:
in = "<![CDATA[]]>";
out = "";
break;
case 4:
in = "<![CDATA[ \nxyz]]>";
out = " \nxyz";
break;
default: throw new Error("Internal error!");
}
case 2: // and COMMENTS
switch (Math.abs(r.nextInt()) % 5) {
case 0:
in = "<!--comment-->";
out = "<!--comment-->";
break;
case 1:
in = out = "<!---->";
break;
case 2:
in = out = "<!-- \n-->";
break;
case 3:
//in = out = "<!--a\nb \r\n \rhah\r \n-->";
in = out = "<!-- \r -->";
break;
case 4:
in = out = "<!-- a<>B -->";
break;
default: throw new Error("Internal error!");
}
break;
case 3: // Char entities?
switch (Math.abs(r.nextInt()) % 4) {
case 0:
in = "#";
out = "#";
break;
case 1:
in = "$";
out = "$";
break;
case 2:
in = "©"; // above US-Ascii, copyright symbol
out = "\u00A9";
break;
case 3:
in = "Ä"; // Upper-case a with umlauts
out = "\u00C4";
break;
default: throw new Error("Internal error!");
}
break;
case 4: // Full entities
switch (Math.abs(r.nextInt()) % 3) {
case 0:
in = "&ent1;";
out = "ent1Value";
break;
case 1:
in = "&x;";
out = "Y";
break;
case 2:
in = "&both;";
out = autoEnt ? "ent1ValueY" : "&ent1;&x;";
break;
default: throw new Error("Internal error!");
}
break;
case 5: // Plain text, ISO-Latin chars:
in = out = "(\u00A9)"; // copyright symbol
break;
case 6: // Proc. instr?
switch (Math.abs(r.nextInt()) % 5) {
case 0:
in = out = "<?myTarget?>";
break;
case 1:
in = out = "<?my data?>";
break;
case 2:
in = out = "<?a -ha!?>";
break;
case 3:
in = out = "<?xy_z ? ? <>? ?>";
break;
case 4:
in = out = "<?proc instr\nwith a\r\nlinefeed or <b>two</b> \r\r\r";
break;
default: throw new Error("Internal error!");
}
default:
throw new Error("Internal error!");
}
input.replace(index, index+1, in);
output.replace(index, index+1, out);
}
/**
* Method that will normalize all unnormalized LFs (\r, \r\n) into
* normalized one (\n).
*/
protected void normalizeLFs(StringBuffer input)
{
int len = input.length();
for (int i = len; --i >= 0; ) {
char c = input.charAt(i);
if (c == '\r') {
if (i < (len-1) && input.charAt(i+1) == '\n') {
input.deleteCharAt(i);
} else {
input.setCharAt(i, '\n');
}
}
}
}
}