ArArchiveInputStream.java
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.archivers.ar;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.utils.ArchiveUtils;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.compress.utils.ParsingUtils;
import org.apache.commons.lang3.ArrayUtils;
/**
* Implements the "ar" archive format as an input stream.
*
* @NotThreadSafe
*/
public class ArArchiveInputStream extends ArchiveInputStream<ArArchiveEntry> {
/**
* Builds a new {@link ArArchiveInputStream}.
* <p>
* For example:
* </p>
* <pre>{@code
* ArArchiveInputStream in = ArArchiveInputStream.builder()
* .setPath(inputPath)
* .get();
* }</pre>
*
* @since 1.29.0
*/
public static final class Builder extends AbstractBuilder<ArArchiveInputStream, Builder> {
private Builder() {
setCharset(StandardCharsets.US_ASCII);
}
@Override
public ArArchiveInputStream get() throws IOException {
return new ArArchiveInputStream(this);
}
}
// offsets and length of meta data parts
private static final int NAME_OFFSET = 0;
private static final int NAME_LEN = 16;
private static final int LAST_MODIFIED_OFFSET = NAME_LEN;
private static final int LAST_MODIFIED_LEN = 12;
private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN;
private static final int USER_ID_LEN = 6;
private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN;
private static final int GROUP_ID_LEN = 6;
private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN;
private static final int FILE_MODE_LEN = 8;
private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN;
private static final int LENGTH_LEN = 10;
static final String BSD_LONGNAME_PREFIX = "#1/";
private static final int BSD_LONGNAME_PREFIX_LEN = BSD_LONGNAME_PREFIX.length();
private static final Pattern BSD_LONGNAME_PATTERN = Pattern.compile("^" + BSD_LONGNAME_PREFIX + "\\d+");
private static final String GNU_STRING_TABLE_NAME = "//";
private static final Pattern GNU_LONGNAME_PATTERN = Pattern.compile("^/\\d+");
/**
* Creates a new builder.
*
* @return A new builder.
* @since 1.29.0
*/
public static Builder builder() {
return new Builder();
}
/**
* Does the name look like it is a long name (or a name containing spaces) as encoded by BSD ar?
* <p>
* From the FreeBSD ar(5) man page:
* </p>
* <pre>
* BSD In the BSD variant, names that are shorter than 16
* characters and without embedded spaces are stored
* directly in this field. If a name has an embedded
* space, or if it is longer than 16 characters, then
* the string "#1/" followed by the decimal represen-
* tation of the length of the file name is placed in
* this field. The actual file name is stored immedi-
* ately after the archive header. The content of the
* archive member follows the file name. The ar_size
* field of the header (see below) will then hold the
* sum of the size of the file name and the size of
* the member.
* </pre>
*
* @since 1.3
*/
private static boolean isBSDLongName(final String name) {
return name != null && BSD_LONGNAME_PATTERN.matcher(name).matches();
}
/**
* Is this the name of the "Archive String Table" as used by SVR4/GNU to store long file names?
* <p>
* GNU ar stores multiple extended file names in the data section of a file with the name "//", this record is referred to by future headers.
* </p>
* <p>
* A header references an extended file name by storing a "/" followed by a decimal offset to the start of the file name in the extended file name data
* section.
* </p>
* <p>
* The format of the "//" file itself is simply a list of the long file names, each separated by one or more LF characters. Note that the decimal offsets
* are number of characters, not line or string number within the "//" file.
* </p>
*/
private static boolean isGNUStringTable(final ArArchiveEntry entry) {
return GNU_STRING_TABLE_NAME.equals(entry.getName());
}
/**
* Checks if the signature matches ASCII "!<arch>" followed by a single LF control character
*
* @param buffer the bytes to check.
* @param ignored ignored.
* @return true, if this stream is an Ar archive stream, false otherwise.
*/
public static boolean matches(final byte[] buffer, final int ignored) {
return ArrayUtils.startsWith(buffer, ArArchiveEntry.HEADER_BYTES);
}
private boolean closed;
/*
* If getNextEntry has been called, the entry metadata is stored in currentEntry.
*/
private ArArchiveEntry currentEntry;
/** Storage area for extra long names (GNU ar). */
private byte[] namebuffer;
/**
* The offset where the data for the current entry starts.
*/
private long entryOffset = -1;
/** Cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection). */
private final byte[] metaData = new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN];
private ArArchiveInputStream(final Builder builder) throws IOException {
this(builder.getInputStream(), builder);
}
/**
* Constructs an Ar input stream with the referenced stream
*
* @param inputStream the ar input stream
*/
public ArArchiveInputStream(final InputStream inputStream) {
this(inputStream, builder());
}
private ArArchiveInputStream(final InputStream inputStream, final Builder builder) {
super(inputStream, builder.getCharset());
}
private int asInt(final byte[] byteArray, final int offset, final int len, final boolean treatBlankAsZero) throws IOException {
return asInt(byteArray, offset, len, 10, treatBlankAsZero);
}
private int asInt(final byte[] byteArray, final int offset, final int len, final int base) throws IOException {
return asInt(byteArray, offset, len, base, false);
}
private int asInt(final byte[] byteArray, final int offset, final int len, final int base, final boolean treatBlankAsZero) throws IOException {
final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim();
if (string.isEmpty() && treatBlankAsZero) {
return 0;
}
return ParsingUtils.parseIntValue(string, base);
}
private long asLong(final byte[] byteArray, final int offset, final int len) throws IOException {
return ParsingUtils.parseLongValue(ArchiveUtils.toAsciiString(byteArray, offset, len).trim());
}
/**
* Checks and skips the trailer of the current entry.
*
* @throws IOException if the trailer is invalid or not read correctly.
*/
private void checkTrailer() throws IOException {
// Check and skip the record trailer
final byte[] expectedTrailer = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
final byte[] actualTrailer = IOUtils.readRange(in, expectedTrailer.length);
if (actualTrailer.length < expectedTrailer.length) {
throw new EOFException(String.format(
"Premature end of ar archive: invalid or incomplete trailer for entry '%s'.",
ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim()));
}
count(actualTrailer.length);
if (!Arrays.equals(expectedTrailer, actualTrailer)) {
throw new ArchiveException("Invalid ar archive entry trailer: " + ArchiveUtils.toAsciiString(actualTrailer));
}
}
/*
* (non-Javadoc)
*
* @see InputStream#close()
*/
@Override
public void close() throws IOException {
if (!closed) {
closed = true;
in.close();
}
currentEntry = null;
}
/**
* Reads the real name from the current stream assuming the very first bytes to be read are the real file name.
*
* @see #isBSDLongName
* @since 1.3
*/
private String getBSDLongName(final String bsdLongName) throws IOException {
final int nameLen = ParsingUtils.parseIntValue(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
final byte[] name = IOUtils.readRange(in, nameLen);
final int read = name.length;
count(read);
if (read != nameLen) {
throw new EOFException(bsdLongName);
}
return ArchiveUtils.toAsciiString(name);
}
/**
* Gets an extended name from the GNU extended name buffer.
*
* @param offset pointer to entry within the buffer
* @return the extended file name; without trailing "/" if present.
* @throws IOException if name not found or buffer not set up
*/
private String getExtendedName(final int offset) throws IOException {
if (namebuffer == null) {
throw new ArchiveException("Cannot process GNU long file name as no GNU string table was found");
}
if (offset >= namebuffer.length) {
throw new ArchiveException("GNU long file name offset out of range: " + offset);
}
for (int i = offset; i < namebuffer.length; i++) {
final byte c = namebuffer[i];
if (c == '\n' || c == 0) {
if (i > offset && namebuffer[i - 1] == '/') {
i--; // drop trailing '/'
}
// Check there is a something to return, otherwise break out of the loop
if (i > offset) {
return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset);
}
break;
}
}
throw new ArchiveException("Failed to read GNU long file name at offset " + offset);
}
/**
* Returns the next AR entry in this stream.
*
* @return the next AR entry.
* @throws IOException if the entry could not be read
* @deprecated Use {@link #getNextEntry()}.
*/
@Deprecated
public ArArchiveEntry getNextArEntry() throws IOException {
return getNextEntry();
}
/*
* Returns the next AR file entry in this stream.
* <p>
* The method skips special AR file entries, such as those used by GNU.
* </p>
* @return The next AR file entry.
* @throws IOException if the entry could not be read or is malformed.
*/
@Override
public ArArchiveEntry getNextEntry() throws IOException {
skipGlobalSignature();
// Handle special GNU ar entries
boolean foundGNUStringTable = false;
do {
// If there is a current entry, skip any unread data and padding
if (currentEntry != null) {
IOUtils.skip(this, Long.MAX_VALUE); // Skip to end of current entry
skipRecordPadding(); // Skip padding to align to the next record
}
// Read the next header record
final byte[] headerBuf = getRecord();
if (headerBuf == null) {
// If we encounter a GNU string table but no subsequent file member, the archive is malformed.
// GNU does not document the ordering of the GNU string table, but the FreeBSD ar(5) manual does:
//
// "If present, this member immediately follows the archive symbol table if an archive symbol
// table is present, or is the first member otherwise."
//
// Reference: https://man.freebsd.org/cgi/man.cgi?query=ar&sektion=5
if (foundGNUStringTable) {
throw new EOFException("Premature end of ar archive: no regular entry after GNU string table.");
}
currentEntry = null;
return null; // End of archive
}
checkTrailer();
// Parse the header into a new entry
currentEntry = parseEntry(headerBuf);
entryOffset = getBytesRead(); // Store the offset of the entry
foundGNUStringTable = isGNUStringTable(currentEntry);
if (foundGNUStringTable) {
// If this is a GNU string table entry, read the extended names and continue
namebuffer = readGNUStringTable(currentEntry);
}
} while (foundGNUStringTable);
// Handle long file names and other special cases
String name = currentEntry.getName();
long len = currentEntry.getLength();
// Handle GNU ar: names ending with '/' are terminated (allows spaces in names)
if (name.endsWith("/")) {
name = name.substring(0, name.length() - 1);
} else if (isGNULongName(name)) {
// GNU ar: name is a reference to the string table (e.g., "/42"), resolve the actual name
final int off = ParsingUtils.parseIntValue(name.substring(1));
name = getExtendedName(off);
} else if (isBSDLongName(name)) {
// BSD ar: name is stored after the header, retrieve it
name = getBSDLongName(name);
// The entry length includes the file name length; adjust to get the actual file data length
final int nameLen = name.length();
if (nameLen > len) {
throw new ArchiveException(
"Invalid BSD long name: file name length (" + nameLen + ") exceeds entry length (" + len + ")");
}
len -= nameLen;
entryOffset += nameLen;
}
currentEntry = new ArArchiveEntry(name, len, currentEntry.getUserId(), currentEntry.getGroupId(),
currentEntry.getMode(), currentEntry.getLastModified());
return currentEntry;
}
/**
* Reads the next raw record from the input stream.
* <p>
* The record is expected to be of a fixed size defined by the AR format.
* </p>
*
* @return the byte array containing the record data, or null if the end of the stream is reached.
* @throws IOException if an I/O error occurs while reading the stream or if the record is malformed.
*/
private byte[] getRecord() throws IOException {
final int read = IOUtils.readFully(in, metaData);
count(read);
if (read == 0) {
return null;
}
if (read < metaData.length) {
throw new EOFException(String.format(
"Premature end of ar archive: incomplete entry header (expected %d bytes, got %d).",
metaData.length, read));
}
return metaData;
}
/**
* Does the name look like it is a long name (or a name containing spaces) as encoded by SVR4/GNU ar?
*
* @see #isGNUStringTable
*/
private boolean isGNULongName(final String name) {
return name != null && GNU_LONGNAME_PATTERN.matcher(name).matches();
}
/**
* Parses the entry metadata from the provided raw record.
*
* @param headerBuf the buffer containing the entry metadata.
* @return an {@link ArArchiveEntry} object containing the parsed metadata.
* @throws IOException if the metadata cannot be parsed correctly.
*/
private ArArchiveEntry parseEntry(final byte[] headerBuf) throws IOException {
// Parse the entry metadata from the header buffer
try {
final String name =
ArchiveUtils.toAsciiString(headerBuf, NAME_OFFSET, NAME_LEN).trim();
final long length = asLong(headerBuf, LENGTH_OFFSET, LENGTH_LEN);
// The remaining fields in the GNU string table entry are not used and may be blank.
if (GNU_STRING_TABLE_NAME.equals(name)) {
return new ArArchiveEntry(name, length);
}
final int userId = asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true);
final int groupId = asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true);
final int mode = asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8);
final long lastModified = asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN);
return new ArArchiveEntry(name, length, userId, groupId, mode, lastModified);
} catch (final IllegalArgumentException e) {
throw new ArchiveException("Broken archive, entry with negative size", (Throwable) e);
} catch (final IOException e) {
throw new ArchiveException("Failed to parse ar entry.", (Throwable) e);
}
}
/*
* (non-Javadoc)
*
* @see InputStream#read(byte[], int, int)
*/
@Override
public int read(final byte[] b, final int off, final int len) throws IOException {
if (len == 0) {
return 0;
}
if (currentEntry == null) {
throw new IllegalStateException("No current ar entry");
}
final long entryEnd = entryOffset + currentEntry.getLength();
final long offset = getBytesRead();
if (len < 0 || offset >= entryEnd) {
return -1;
}
final int toRead = ArchiveException.toIntExact(Math.min(len, entryEnd - offset));
final int ret = in.read(b, off, toRead);
if (ret < 0) {
throw new EOFException(String.format(
"Premature end of ar archive: entry '%s' is truncated or incomplete.", currentEntry.getName()));
}
count(ret);
return ret;
}
/**
* Reads the GNU archive String Table.
*
* @see #isGNUStringTable
*/
private byte[] readGNUStringTable(final ArArchiveEntry entry) throws IOException {
if (entry.getLength() > Integer.MAX_VALUE) {
throw new ArchiveException("Invalid GNU string table entry size: " + entry.getLength());
}
final int size = (int) entry.getLength();
final byte[] namebuffer = IOUtils.readRange(in, size);
final int read = namebuffer.length;
if (read < size) {
throw new EOFException("Premature end of ar archive: truncated or incomplete GNU string table.");
}
count(read);
return namebuffer;
}
/**
* Skips the global archive signature if at the beginning of the stream.
*
* @throws IOException if an I/O error occurs while reading the stream or if the signature is invalid.
*/
private void skipGlobalSignature() throws IOException {
final long offset = getBytesRead();
if (offset == 0) {
final byte[] expectedMagic = ArArchiveEntry.HEADER_BYTES;
final byte[] actualMagic = IOUtils.readRange(in, expectedMagic.length);
count(actualMagic.length);
if (expectedMagic.length != actualMagic.length) {
throw new EOFException(String.format(
"Premature end of ar archive: incomplete global header (expected %d bytes, got %d).",
expectedMagic.length, actualMagic.length));
}
if (!Arrays.equals(expectedMagic, actualMagic)) {
throw new ArchiveException(
"Invalid global ar archive header: " + ArchiveUtils.toAsciiString(actualMagic));
}
}
}
/**
* Skips the padding bytes at the end of each record.
* <p>
* The AR format requires that each record is padded to an even number of bytes, so if the current offset is odd,
* we skip one byte.
* </p>
*
* @throws IOException if an I/O error occurs while reading the stream.
*/
private void skipRecordPadding() throws IOException {
// If the offset is odd, we need to skip one byte
final long offset = getBytesRead();
if (offset % 2 != 0) {
final int c = in.read();
if (c < 0) {
throw new EOFException(String.format(
"Premature end of ar archive: missing padding for entry '%s'.", currentEntry.getName()));
}
count(1);
}
}
}