TestGetSegmentedText.java
package org.codehaus.stax.test.stream;
import java.io.*;
import javax.xml.stream.*;
/**
* Unit test suite that ensures that the 'segmented' text accessors
* (multi-argument getTextCharacters) works as expected, with various
* combinations of access lengths, and orderings.
*
* @author Tatu Saloranta
*/
public class TestGetSegmentedText
extends BaseStreamTest
{
static String sXmlInput = null;
static String sExpResult = null;
public void testCoalescingAutoEntity()
throws Exception
{
doTest(false, true, true); // non-ns
doTest(true, true, true); // ns-aware
}
public void testNonCoalescingAutoEntity()
throws Exception
{
doTest(false, false, true); // non-ns
doTest(true, false, true); // ns-aware
}
public void testCoalescingNonAutoEntity()
throws Exception
{
doTest(false, true, false); // non-ns
doTest(true, true, false); // ns-aware
}
public void testNonCoalescingNonAutoEntity()
throws Exception
{
doTest(false, false, false); // non-ns
doTest(true, false, false); // ns-aware
}
public void testSegmentedGetCharacters()
throws XMLStreamException
{
final String TEXT = "Let's just add some content in here ('') to fill some of the parser buffers, to test multi-argument getTextCharacters() method";
final String XML = "<!--comment--><root><?proc instr?>"+TEXT+"</root>";
XMLInputFactory f = getFactory(true, false, true);
XMLStreamReader sr = constructStreamReader(f, XML);
// May or may not get the prolog comment
int type = sr.next();
if (type == COMMENT) {
type = sr.next();
}
assertTokenType(START_ELEMENT, type);
assertTokenType(PROCESSING_INSTRUCTION, sr.next());
type = sr.next();
assertTokenType(CHARACTERS, type);
/* Ok... let's just access all the text, by one char reads, from
* possibly multiple events:
*/
StringBuilder sb = new StringBuilder();
while (type == CHARACTERS) {
char[] buf = new char[5];
int offset = 0;
int count;
while (true) { // let's use 2 different size of requests...
int start, len;
if ((offset & 1) == 0) {
start = 2;
len = 1;
} else {
start = 0;
len = buf.length;
}
count = sr.getTextCharacters(offset, buf, start, len);
if (count > 0) {
sb.append(buf, start, count);
offset += count;
}
if (count < len) {
break;
}
}
type = sr.next();
}
assertEquals(TEXT, sb.toString());
assertTokenType(END_ELEMENT, type);
}
/*
////////////////////////////////////////
// Private methods, common test code
////////////////////////////////////////
*/
private void doTest(boolean ns, boolean coalescing, boolean autoEntity)
throws Exception
{
// This is bit hacky, but speeds up testing...
if (sXmlInput == null) {
initData();
}
// And let's also check using different buffer sizes:
for (int sz = 0; sz < 3; ++sz) {
// Let's test different input methods too:
for (int j = 0; j < 3; ++j) {
XMLInputFactory f = getFactory(ns, coalescing, autoEntity);
XMLStreamReader sr;
switch (j) {
case 0: // simple StringReader:
sr = constructStreamReader(f, sXmlInput);
break;
case 1: // via InputStream and auto-detection
/* It shouldn't really contain anything outside ISO-Latin;
* however, detection may be tricky.. so let's just
* test with UTF-8, for now?
*/
{
ByteArrayInputStream bin = new ByteArrayInputStream
(sXmlInput.getBytes("UTF-8"));
sr = f.createXMLStreamReader(bin);
}
break;
case 2: // explicit UTF-8 stream
{
ByteArrayInputStream bin = new ByteArrayInputStream
(sXmlInput.getBytes("UTF-8"));
Reader br = new InputStreamReader(bin, "UTF-8");
sr = f.createXMLStreamReader(br);
}
break;
default: throw new Error("Internal error");
}
char[] cbuf;
if (sz == 0) {
cbuf = new char[23];
} else if (sz == 1) {
cbuf = new char[384];
} else {
cbuf = new char[4005];
}
assertTokenType(START_ELEMENT, sr.next());
int segCount = 0;
int totalLen = sExpResult.length();
StringBuilder totalBuf = new StringBuilder(totalLen);
/* Ok; for each segment let's test separately first,
* and then combine all the results together as well
*/
while (sr.next() == CHARACTERS) {
// Where are we within the whole String?
int segOffset = totalBuf.length();
++segCount;
// Should not get multiple when coalescing...
if (coalescing && segCount > 1) {
fail("Didn't expect multiple CHARACTERS segments when coalescing: first segment contained "+segOffset+" chars from the whole expected "+totalLen+" chars");
}
StringBuilder sb = new StringBuilder();
int count;
int offset = 0;
int readCount = 0;
while ((count = sr.getTextCharacters(offset, cbuf, 0, cbuf.length)) > 0) {
++readCount;
sb.append(cbuf, 0, count);
offset += count;
}
int expLen = sr.getTextLength();
// Sanity check #1: should get matching totals
assertEquals
("Expected segment #"+segOffset+" (one-based; read with "+readCount+" reads) to have length of "
+expLen+"; reported to have gotten just "+offset+" chars",
expLen, offset);
// Sanity check #2: and string buf should have it too
assertEquals
("Expected segment #"+segOffset+" (one-based; read with "+readCount+" reads) to get "
+expLen+" chars; StringBuilder only has "+sb.length(),
expLen, sb.length());
totalBuf.append(sb);
}
assertTokenType(END_ELEMENT, sr.getEventType());
// Ok; all gotten, does it match?
assertEquals("Expected total of "+totalLen+" chars, got "+totalBuf.length(),
sExpResult.length(), totalBuf.length());
// Lengths are ok, but how about content?
if (!sExpResult.equals(totalBuf.toString())) {
// TODO: indicate where they differ?
String str1 = sExpResult;
String str2 = totalBuf.toString();
int len = str1.length();
int i = 0;
char c1 = 'x', c2 = 'x';
for (; i < len; ++i) {
c1 = str1.charAt(i);
c2 = str2.charAt(i);
if (c1 != c2) {
break;
}
}
fail("Expected Strings to equal; differed at character #"+i+" (length "+len+" was correct); expected '"+c1+"' ("+((int) c1)+"), got '"+c2+"' ("+((int) c2)+")");
sr.close();
}
}
}
}
/*
////////////////////////////////////////
// Private methods, other
////////////////////////////////////////
*/
private XMLInputFactory getFactory(boolean nsAware,
boolean coalescing, boolean autoEntity)
throws XMLStreamException
{
XMLInputFactory f = getInputFactory();
setNamespaceAware(f, nsAware);
setCoalescing(f, coalescing);
setReplaceEntities(f, autoEntity);
setSupportDTD(f, true);
setValidating(f, false);
return f;
}
private void initData()
throws XMLStreamException
{
StringBuilder sb = new StringBuilder("<?xml version='1.0'?>");
sb.append("<root>");
/* Let's create a ~64kchar text segment for testing, first; and one
* including stuff like linefeeds and (pre-defined) entities.
*/
while (sb.length() < 65000) {
sb.append("abcd efgh\r\nijkl & mnop < > qrst\n uvwx\r yz A");
}
sb.append("</root>");
final String XML = sb.toString();
/* But more than that, let's also see what we should get
* as a result...
*/
XMLInputFactory f = getFactory(true, false, true);
XMLStreamReader sr = constructStreamReader(f, XML);
assertTokenType(START_ELEMENT, sr.next());
StringBuilder sb2 = new StringBuilder(XML.length());
while (sr.next() == CHARACTERS) {
sb2.append(sr.getText());
}
sXmlInput = XML;
sExpResult = sb2.toString();
}
}