ExtraField.java
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.gzip;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import org.apache.commons.compress.compressors.gzip.ExtraField.SubField;
/**
* If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes.
*
* <pre>
* +---+---+=================================+
* | XLEN |...XLEN bytes of "extra field"...| (more...)
* +---+---+=================================+
* </pre>
*
* This class represents the extra field payload (excluding the XLEN 2 bytes). The ExtraField payload consists of a series of subfields, each of the form:
*
* <pre>
* +---+---+---+---+==================================+
* |SI1|SI2| LEN |... LEN bytes of subfield data ...|
* +---+---+---+---+==================================+
* </pre>
*
* This class does not expose the internal subfields list to prevent adding subfields without total extra length validation. The class is iterable, but this
* iterator is immutable.
*
* @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
* @since 1.28.0
*/
public class ExtraField implements Iterable<SubField> {
/**
* If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes. It consists of a series of subfields, each
* of the form:
*
* <pre>
* +---+---+---+---+==================================+
* |SI1|SI2| LEN |... LEN bytes of subfield data ...|
* +---+---+---+---+==================================+
* </pre>
* <p>
* The reserved IDs are:
* </p>
*
* <pre>
* SI1 SI2 Data
* ---------- ---------- ----
* 0x41 ('A') 0x70 ('P') Apollo file type information
* </pre>
* <p>
* Subfield IDs with {@code SI2 = 0} are reserved for future use.
* </p>
* <p>
* LEN gives the length of the subfield data, excluding the 4 initial bytes.
* </p>
*
* @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
*/
public static class SubField {
private final byte si1;
private final byte si2;
private final byte[] payload;
SubField(final byte si1, final byte si2, final byte[] payload) {
this.si1 = si1;
this.si2 = si2;
this.payload = payload;
}
/**
* The 2 character ISO-8859-1 string made from the si1 and si2 bytes of the sub field id.
*
* @return Two character ID.
*/
public String getId() {
return String.valueOf(new char[] { (char) (si1 & 0xff), (char) (si2 & 0xff) });
}
/**
* The subfield payload.
*
* @return The payload.
*/
public byte[] getPayload() {
return payload;
}
}
private static final int MAX_SIZE = 0xFFFF;
private static final byte[] ZERO_BYTES = {};
static ExtraField fromBytes(final byte[] bytes) throws IOException {
if (bytes == null) {
return null;
}
final ExtraField extra = new ExtraField();
int pos = 0;
while (pos <= bytes.length - 4) {
final byte si1 = bytes[pos++];
final byte si2 = bytes[pos++];
final int sublen = bytes[pos++] & 0xff | (bytes[pos++] & 0xff) << 8;
if (sublen > bytes.length - pos) {
throw new IOException("Extra subfield lenght exceeds remaining bytes in extra: " + sublen + " > " + (bytes.length - pos));
}
final byte[] payload = new byte[sublen];
System.arraycopy(bytes, pos, payload, 0, sublen);
pos += sublen;
extra.subFields.add(new SubField(si1, si2, payload));
extra.totalSize = pos;
}
if (pos < bytes.length) {
throw new IOException("" + (bytes.length - pos) + " remaining bytes not used to parse an extra subfield.");
}
return extra;
}
private final List<SubField> subFields = new ArrayList<>();
private int totalSize = 0;
/**
* Constructs a new instance.
*/
public ExtraField() {
}
/**
* Append a subfield by a 2-chars ISO-8859-1 string. The char at index 0 and 1 are respectiovely si1 and si2 (subfield id 1 and 2).
*
* @param id The subfield ID.
* @param payload The subfield payload.
* @return this instance.
* @throws NullPointerException if {@code id} is {@code null}.
* @throws NullPointerException if {@code payload} is {@code null}.
* @throws IllegalArgumentException if the subfield is not 2 characters or the payload is null
* @throws IOException if appending this subfield would exceed the max size 65535 of the extra header.
*/
public ExtraField addSubField(final String id, final byte[] payload) throws IOException {
Objects.requireNonNull(id, "payload");
Objects.requireNonNull(payload, "payload");
if (id.length() != 2) {
throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string.");
}
final char si1 = id.charAt(0);
final char si2 = id.charAt(1);
if ((si1 & 0xff00) != 0 || (si2 & 0xff00) != 0) {
throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string.");
}
final SubField f = new SubField((byte) (si1 & 0xff), (byte) (si2 & 0xff), payload);
final int len = 4 + payload.length;
if (totalSize + len > MAX_SIZE) {
throw new IOException("Extra subfield '" + f.getId() + "' too big (extras total size is already at " + totalSize + ")");
}
subFields.add(f);
totalSize += len;
return this;
}
/**
* Removes all subfields from this instance.
*/
public void clear() {
subFields.clear();
totalSize = 0;
}
/**
* Finds the first subfield that matched the id if found, null otherwise.
*
* @param id The ID to find.
* @return The first SubField that matched or null.
*/
public SubField findFirstSubField(final String id) {
return subFields.stream().filter(f -> f.getId().equals(id)).findFirst().orElse(null);
}
/**
* Gets the size in bytes of the encoded extra field. This does not include its own 16 bits size when embeded in the gzip header. For N sub fields,
* the total is all subfields payloads bytes + 4N.
*
* @return the bytes count of this extra payload when encoded.
*/
public int getEncodedSize() {
return totalSize;
}
/**
* Gets the subfield at the given index.
*
* @param index index of the element to return.
* @return the subfield at the specified position in this list.
* @throws IndexOutOfBoundsException if the index is out of range ({@code index < 0 || index >= size()}).
*/
public SubField getSubField(final int index) {
return subFields.get(index);
}
/**
* Tests is this extra field has no subfields.
*
* @return true if there are no subfields, false otherwise.
*/
public boolean isEmpty() {
return subFields.isEmpty();
}
/**
* Returns an unmodifiable iterator over the elements in the SubField list in proper sequence.
*
* @return an unmodifiable naturally ordered iterator over the SubField elements.
*/
@Override
public Iterator<SubField> iterator() {
return Collections.unmodifiableList(subFields).iterator();
}
/**
* Gets the count of subfields currently in in this extra field.
*
* @return the count of subfields contained in this instance.
*/
public int size() {
return subFields.size();
}
byte[] toByteArray() {
if (subFields.isEmpty()) {
return ZERO_BYTES;
}
final byte[] ba = new byte[totalSize];
int pos = 0;
for (final SubField f : subFields) {
ba[pos++] = f.si1;
ba[pos++] = f.si2;
ba[pos++] = (byte) (f.payload.length & 0xff); // little endian expected
ba[pos++] = (byte) (f.payload.length >>> 8);
System.arraycopy(f.payload, 0, ba, pos, f.payload.length);
pos += f.payload.length;
}
return ba;
}
}