RTFPictStreamParser.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft.rtf.jflex;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.TikaMemoryLimitException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
/**
* Streams decoded bytes from an RTF {@code \pict} group to a temp file.
*
* <p>Pict data is raw image bytes (after hex-pair decoding). There is no
* header to parse -- bytes are written directly to a temp file. On
* {@link #onComplete(Metadata)}, a {@link TikaInputStream} is returned
* whose close will clean up the temp file via {@link TemporaryResources}.</p>
*/
public class RTFPictStreamParser implements Closeable {
private final long maxBytes;
private final TemporaryResources tmp = new TemporaryResources();
private Path tempFile;
private OutputStream out;
private long bytesWritten;
/**
* @param maxBytes maximum number of bytes to accept (-1 for unlimited)
*/
public RTFPictStreamParser(long maxBytes) throws IOException {
this.maxBytes = maxBytes;
this.tempFile = tmp.createTempFile(".bin");
this.out = new BufferedOutputStream(Files.newOutputStream(tempFile));
}
/**
* Receive a single decoded byte from the pict hex stream.
*/
public void onByte(int b) throws IOException, TikaException {
if (maxBytes > 0 && bytesWritten >= maxBytes) {
throw new TikaMemoryLimitException(bytesWritten + 1, maxBytes);
}
out.write(b);
bytesWritten++;
}
/**
* Called when the pict group closes. Returns a TikaInputStream backed
* by the temp file. The caller owns the TikaInputStream -- closing it
* will delete the temp file.
*
* @return a TikaInputStream, or null if no bytes were written
*/
public TikaInputStream onComplete(Metadata metadata) throws IOException {
out.close();
out = null;
if (bytesWritten == 0) {
tmp.close();
return null;
}
// Hand ownership of the temp file to the TikaInputStream.
// TikaInputStream.close() will close the TemporaryResources,
// which deletes the temp file.
return TikaInputStream.get(tempFile, metadata, tmp);
}
/** Returns the number of bytes written so far. */
public long getBytesWritten() {
return bytesWritten;
}
@Override
public void close() throws IOException {
if (out != null) {
out.close();
out = null;
}
tmp.close();
}
}