BPGParser.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.image;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.TikaMemoryLimitException;
import org.apache.tika.io.EndianUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Photoshop;
import org.apache.tika.metadata.TIFF;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
/**
* Parser for the Better Portable Graphics (BPG) File Format.
* <p/>
* Documentation on the file format is available from
* http://bellard.org/bpg/bpg_spec.txt
*/
@TikaComponent
public class BPGParser extends AbstractImageParser {
protected static final int EXTENSION_TAG_EXIF = 1;
protected static final int EXTENSION_TAG_ICC_PROFILE = 2;
protected static final int EXTENSION_TAG_XMP = 3;
protected static final int EXTENSION_TAG_THUMBNAIL = 4;
//50 MB -- throw TikaMemoryLimitException if xmp or exif is allegedly longer than this
private static final int DEFAULT_MAX_RECORD_LENGTH = 50 * 1024 * 1024;
private static final long serialVersionUID = -161736541253892772L;
private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
new HashSet<>(
Arrays.asList(MediaType.image("x-bpg"), MediaType.image("bpg"))));
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
private int maxRecordLength = DEFAULT_MAX_RECORD_LENGTH;
@Override
void extractMetadata(InputStream stream, ContentHandler contentHandler, Metadata metadata,
ParseContext parseContext)
throws IOException, SAXException, TikaException {
// Check for the magic header signature
byte[] signature = new byte[4];
IOUtils.readFully(stream, signature);
if (signature[0] == (byte) 'B' && signature[1] == (byte) 'P' &&
signature[2] == (byte) 'G' && signature[3] == (byte) 0xfb) {
// Good, signature found
} else {
throw new TikaException("BPG magic signature invalid");
}
// Grab and decode the first byte
int pdf = stream.read();
// Pixel format: Greyscale / 4:2:0 / 4:2:2 / 4:4:4
int pixelFormat = pdf & 0x7;
// TODO Identify a suitable metadata key for this
// Is there an alpha plane as well as a colour plane?
boolean hasAlphaPlane1 = (pdf & 0x8) == 0x8;
// TODO Identify a suitable metadata key for this+hasAlphaPlane2
// Bit depth minus 8
int bitDepth = (pdf >> 4) + 8;
metadata.set(TIFF.BITS_PER_SAMPLE, Integer.toString(bitDepth));
// Grab and decode the second byte
int cer = stream.read();
// Colour Space: YCbCr / RGB / YCgCo / YCbCrK / CMYK
int colourSpace = cer & 0x15;
switch (colourSpace) {
case 0:
metadata.set(Photoshop.COLOR_MODE, "YCbCr Colour");
break;
case 1:
metadata.set(Photoshop.COLOR_MODE, "RGB Colour");
break;
case 2:
metadata.set(Photoshop.COLOR_MODE, "YCgCo Colour");
break;
case 3:
metadata.set(Photoshop.COLOR_MODE, "YCbCrK Colour");
break;
case 4:
metadata.set(Photoshop.COLOR_MODE, "CMYK Colour");
break;
}
// Are there extensions or not?
boolean hasExtensions = (cer & 16) == 16;
// Is the Alpha Plane 2 flag set?
boolean hasAlphaPlane2 = (cer & 32) == 32;
// cer then holds 2 more booleans - limited range, reserved
// Width and height next
int width = (int) EndianUtils.readUE7(stream);
int height = (int) EndianUtils.readUE7(stream);
metadata.set(TIFF.IMAGE_LENGTH, height);
metadata.set(TIFF.IMAGE_WIDTH, width);
// Picture Data length
EndianUtils.readUE7(stream);
// Extension Data Length, if extensions present
long extensionDataLength = 0;
if (hasExtensions) {
extensionDataLength = EndianUtils.readUE7(stream);
}
// Alpha Data Length, if alpha used
long alphaDataLength = 0;
if (hasAlphaPlane1 || hasAlphaPlane2) {
alphaDataLength = EndianUtils.readUE7(stream);
}
// Extension Data
if (hasExtensions) {
long extensionsDataSeen = 0;
ImageMetadataExtractor metadataExtractor = new ImageMetadataExtractor(metadata);
while (extensionsDataSeen < extensionDataLength) {
int extensionType = (int) EndianUtils.readUE7(stream);
int extensionLength = (int) EndianUtils.readUE7(stream);
if (extensionLength > maxRecordLength) {
throw new TikaMemoryLimitException("extension length (" +
extensionLength + " bytes) is greater than 'maxRecordLength' (" +
maxRecordLength + " bytes). If this file is not corrupt, " +
"consider bumping the maxRecordLength via tika-config.xml");
}
switch (extensionType) {
case EXTENSION_TAG_EXIF:
metadataExtractor.parseRawExif(stream, extensionLength, true);
break;
case EXTENSION_TAG_XMP:
handleXMP(stream, extensionLength, metadataExtractor);
break;
default:
IOUtils.skipFully(stream, extensionLength);
}
extensionsDataSeen += extensionLength;
}
}
// HEVC Header + Data
// Alpha HEVC Header + Data
// We can't do anything with these parts
}
public void setMaxRecordLength(int maxRecordLength) {
this.maxRecordLength = maxRecordLength;
}
public int getMaxRecordLength() {
return this.maxRecordLength;
}
protected void handleXMP(InputStream stream, int xmpLength, ImageMetadataExtractor extractor)
throws IOException, TikaException, SAXException {
if (xmpLength < 0) {
throw new TikaException("xmp length must be >= 0");
}
if (xmpLength > maxRecordLength) {
throw new TikaMemoryLimitException("xmplength (" + xmpLength + " bytes) is larger than maxXMPLength (" +
maxRecordLength + "). Consider setting maxXMPLength to a greater value for " +
"this parser via" +
" tika-config.xml if this file is not corrupt.");
}
byte[] xmp = new byte[xmpLength];
IOUtils.readFully(stream, xmp);
extractor.parseRawXMP(xmp);
}
}