RTFToken.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft.rtf.jflex;
/**
* A single token produced by the RTF tokenizer.
* <p>
* Mutable and reused by the tokenizer to avoid allocation in the hot loop.
* Consumers must copy any data they need before requesting the next token.
* <p>
* For TEXT and CONTROL_SYMBOL tokens (single character), use {@link #getChar()}
* to avoid String allocation. For CONTROL_WORD tokens, use {@link #getName()}.
*/
public class RTFToken {
private RTFTokenType type;
private String name;
private char ch;
private int parameter;
private boolean hasParameter;
public void reset(RTFTokenType type) {
this.type = type;
this.name = null;
this.ch = 0;
this.parameter = -1;
this.hasParameter = false;
}
public void set(RTFTokenType type, String name, int parameter, boolean hasParameter) {
this.type = type;
this.name = name;
this.ch = 0;
this.parameter = parameter;
this.hasParameter = hasParameter;
}
public void setChar(RTFTokenType type, char ch) {
this.type = type;
this.name = null;
this.ch = ch;
this.parameter = -1;
this.hasParameter = false;
}
public RTFTokenType getType() {
return type;
}
/** For CONTROL_WORD tokens: the control word name. */
public String getName() {
return name;
}
/**
* For TEXT and CONTROL_SYMBOL tokens: the single character, without
* allocating a String.
*/
public char getChar() {
return ch;
}
public int getParameter() {
return parameter;
}
public boolean hasParameter() {
return hasParameter;
}
public int getHexValue() {
return parameter;
}
@Override
public String toString() {
switch (type) {
case GROUP_OPEN:
return "{";
case GROUP_CLOSE:
return "}";
case CONTROL_WORD:
return "\\" + name + (hasParameter ? String.valueOf(parameter) : "");
case CONTROL_SYMBOL:
return "\\" + ch;
case HEX_ESCAPE:
return String.format(java.util.Locale.ROOT, "\\'%02x", parameter);
case UNICODE_ESCAPE:
return "\\u" + parameter;
case TEXT:
return "TEXT[" + ch + "]";
case BIN:
return "\\bin" + parameter;
case CRLF:
return "CRLF";
case EOF:
return "EOF";
default:
return type.name();
}
}
}