XmlNameProcessors.java
package com.fasterxml.jackson.dataformat.xml;
import java.util.Base64;
import java.util.regex.Pattern;
import static java.nio.charset.StandardCharsets.UTF_8;
/**
* Contains default {@link XmlNameProcessor} implementations.
* <p>
* Processors should be set in the {@link XmlMapper#setXmlNameProcessor(XmlNameProcessor)}
* and/or the {@link XmlMapper.Builder#xmlNameProcessor(XmlNameProcessor)} methods.
*
* @since 2.14
*/
public final class XmlNameProcessors
{
private XmlNameProcessors() {
// Nothing to do here; just to prevent instantiation
}
/**
* Generates a new processor that does nothing and just passes through the
* names as-is. Using this processor may generate invalid XML.
* <p>
* With this processor set, a map with the keys {@code "123"} and
* {@code "$ I am <fancy>! &;"} will be written as:
*
* <pre>{@code
* <DTO>
* <badMap>
* <$ I am <fancy>! &;>xyz</$ I am <fancy>! &;>
* <123>bar</123>
* </badMap>
* </DTO>
* }</pre>
* <p>
* This is the default behavior for backwards compatibility.
*
* @since 2.14
*/
public static XmlNameProcessor newPassthroughProcessor() {
return new PassthroughProcessor();
}
/**
* Generates a new processor that replaces all characters that are NOT one of:
*<ul>
* <li>Lower- or upper-case ASCII letter (a to z, A to Z)
* </li>
* <li>Digit (0 to 9) in position OTHER than the first character
* </li>
* <li>Underscore
* </li>
* <li>Hyphen ({@code -}) in position OTHER than the first character
* </li>
* <li>Colon (only exposed if underlying parser is in non-namespace-aware mode)
* </li>
* </ul>
* in an
* XML name with a replacement string. This is a one-way processor, since
* there is no way to reverse this replacement step.
* <p>
* With this processor set (and {@code "_"} as the replacement string), a map
* with the keys {@code "123"} and {@code "$ I am <fancy>! &;"} will be written as:
* <p>
* NOTE: this processor works for US-ASCII based element and attribute names
* but is unlikely to work well for many "international" use cases.
*
* <pre>{@code
* <DTO>
* <badMap>
* <__I_am__fancy_____>xyz</__I_am__fancy_____>
* <_23>bar</_23>
* </badMap>
* </DTO>
* }</pre>
*
* @param replacement The replacement string to replace invalid characters with
*
* @since 2.14
*/
public static XmlNameProcessor newReplacementProcessor(String replacement) {
return new ReplaceNameProcessor(replacement);
}
/**
* Convenience method
* equivalent to calling {@link #newReplacementProcessor(String)} with {@code "_"}
*
* @since 2.14
*/
public static XmlNameProcessor newReplacementProcessor() {
return newReplacementProcessor("_");
}
/**
* Generates a new processor that escapes all names that contains characters
* OTHER than following characters:
*<ul>
* <li>Lower- or upper-case ASCII letter (a to z, A to Z)
* </li>
* <li>Digit (0 to 9) in position OTHER than the first characters
* </li>
* <li>Underscore
* </li>
* <li>Hyphen ({@code -}) in position OTHER than the first character
* </li>
* <li>Colon (only exposed if underlying parser is in non-namespace-aware mode)
* </li>
* </ul>
* with a base64-encoded version. Here the
* <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-5">base64url</a>
* encoder and decoders are used. The {@code =} padding characters are
* always omitted.
* <p>
* With this processor set, a map with the keys {@code "123"} and
* {@code "$ I am <fancy>! &;"} will be written as:
*
* <pre>{@code
* <DTO>
* <badMap>
* <base64_tag_JCBJIGFtIDxmYW5jeT4hICY7>xyz</base64_tag_JCBJIGFtIDxmYW5jeT4hICY7>
* <base64_tag_MTIz>bar</base64_tag_MTIz>
* </badMap>
* </DTO>
* }</pre>
*<p>
* NOTE: you must ensure that no incoming element or attribute name starts
* with {@code prefix}, otherwise decoding will not work.
*
* @param prefix The prefix to use for name that are escaped
*
* @since 2.14
*/
public static XmlNameProcessor newBase64Processor(String prefix) {
return new Base64NameProcessor(prefix);
}
/**
* Convenience method
* equivalent to calling {@link #newBase64Processor(String)} with {@code "base64_tag_"}
*
* @since 2.14
*/
public static XmlNameProcessor newBase64Processor() {
return newBase64Processor("base64_tag_");
}
/**
* Similar to {@link #newBase64Processor(String)}, however, names will
* <b>always</b> be escaped with base64. No magic prefix is required
* for this case, since adding one would be redundant because all names
* will be base64 encoded.
*/
public static XmlNameProcessor newAlwaysOnBase64Processor() {
return new AlwaysOnBase64NameProcessor();
}
static class PassthroughProcessor implements XmlNameProcessor {
private static final long serialVersionUID = 1L;
public PassthroughProcessor() { }
@Override
public void encodeName(XmlName name) { }
@Override
public void decodeName(XmlName name) { }
}
static class ReplaceNameProcessor implements XmlNameProcessor {
private static final long serialVersionUID = 1L;
private static final Pattern BEGIN_MATCHER = Pattern.compile("^[^a-zA-Z_:]");
private static final Pattern MAIN_MATCHER = Pattern.compile("[^a-zA-Z0-9_:-]");
private final String _replacement;
public ReplaceNameProcessor(String replacement) {
_replacement = replacement;
}
@Override
public void encodeName(XmlName name) {
String newLocalPart = name.localPart;
newLocalPart = BEGIN_MATCHER.matcher(newLocalPart).replaceAll(_replacement);
name.localPart = MAIN_MATCHER.matcher(newLocalPart).replaceAll(_replacement);
}
@Override
public void decodeName(XmlName name) {
// One-way transformation; cannot decode
}
}
static class Base64NameProcessor implements XmlNameProcessor {
private static final long serialVersionUID = 1L;
private static final Base64.Decoder BASE64_DECODER = Base64.getUrlDecoder();
private static final Base64.Encoder BASE64_ENCODER = Base64.getUrlEncoder().withoutPadding();
private static final Pattern VALID_XML_NAME = Pattern.compile("[a-zA-Z_:]([a-zA-Z0-9_:.-])*");
private final String _prefix;
public Base64NameProcessor(String prefix) {
_prefix = prefix;
}
@Override
public void encodeName(XmlName name) {
if (!VALID_XML_NAME.matcher(name.localPart).matches()) {
name.localPart = _prefix + new String(BASE64_ENCODER.encode(name.localPart.getBytes(UTF_8)), UTF_8);
}
}
@Override
public void decodeName(XmlName name) {
if (name.localPart.startsWith(_prefix)) {
String localName = name.localPart;
localName = localName.substring(_prefix.length());
name.localPart = new String(BASE64_DECODER.decode(localName), UTF_8);
}
}
}
static class AlwaysOnBase64NameProcessor implements XmlNameProcessor {
private static final long serialVersionUID = 1L;
private static final Base64.Decoder BASE64_DECODER = Base64.getUrlDecoder();
private static final Base64.Encoder BASE64_ENCODER = Base64.getUrlEncoder().withoutPadding();
public AlwaysOnBase64NameProcessor() { }
@Override
public void encodeName(XmlName name) {
name.localPart = new String(BASE64_ENCODER.encode(name.localPart.getBytes(UTF_8)), UTF_8);
}
@Override
public void decodeName(XmlName name) {
name.localPart = new String(BASE64_DECODER.decode(name.localPart), UTF_8);
}
}
}