FileSource.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.io;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.IOUtils;
/**
* Input source backed by a file.
* <p>
* Data is already on disk. No caching is needed.
* {@link #getPath} returns the existing path immediately.
* Mark/reset works by reopening the file and skipping to the marked position.
*/
class FileSource extends InputStream implements TikaInputSource {
private final Path path;
private final long length;
private InputStream currentStream;
private long position;
FileSource(Path path) throws IOException {
this.path = path;
this.length = Files.size(path);
this.currentStream = new BufferedInputStream(Files.newInputStream(path));
this.position = 0;
}
@Override
public int read() throws IOException {
int b = currentStream.read();
if (b != -1) {
position++;
}
return b;
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
int n = currentStream.read(b, off, len);
if (n > 0) {
position += n;
}
return n;
}
@Override
public long skip(long n) throws IOException {
long skipped = IOUtils.skip(currentStream, n);
position += skipped;
return skipped;
}
@Override
public int available() throws IOException {
return currentStream.available();
}
@Override
public void seekTo(long newPosition) throws IOException {
if (newPosition < 0) {
throw new IOException("Cannot seek to negative position: " + newPosition);
}
if (newPosition > length) {
throw new IOException("Cannot seek past end of file. Position: " +
newPosition + ", length: " + length);
}
// Close current stream and reopen at the beginning
currentStream.close();
currentStream = new BufferedInputStream(Files.newInputStream(path));
// Skip to the new position
if (newPosition > 0) {
IOUtils.skipFully(currentStream, newPosition);
}
this.position = newPosition;
}
@Override
public boolean hasPath() {
return true;
}
@Override
public Path getPath(String suffix) throws IOException {
// Already file-backed, just return the path
return path;
}
@Override
public long getLength() {
return length;
}
@Override
public void close() throws IOException {
if (currentStream != null) {
currentStream.close();
}
}
@Override
public void enableRewind() {
// No-op: file is always rewindable
}
// Mark/reset support using seekTo
private long markPosition = -1;
@Override
public synchronized void mark(int readlimit) {
markPosition = position;
}
@Override
public synchronized void reset() throws IOException {
if (markPosition < 0) {
throw new IOException("Mark not set");
}
seekTo(markPosition);
}
@Override
public boolean markSupported() {
return true;
}
}