ImageParserTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.image;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.TikaTest;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
public class ImageParserTest extends TikaTest {
private final Parser parser = new ImageParser();
@Test
public void testBMP() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/bmp");
try (TikaInputStream tis = getResourceAsStream("/test-documents/testBMP.bmp")) {
parser.parse(tis, new DefaultHandler(), metadata, new ParseContext());
}
assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height"));
assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width"));
assertEquals("8 8 8", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data BitsPerSample"));
assertEquals("1.0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension PixelAspectRatio"));
//TODO: figure out why we're getting 0.35273367 in Ubuntu, but not Windows
//assertEquals("0", metadata.get("Dimension VerticalPhysicalPixelSpacing"));
//assertEquals("0", metadata.get("Dimension HorizontalPhysicalPixelSpacing"));
assertEquals("BI_RGB", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression CompressionTypeName"));
assertEquals("image/bmp", metadata.get("Content-Type"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("8 8 8", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals(1, metadata.getInt(TikaCoreProperties.NUM_IMAGES));
}
@Test
public void testGIF() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/gif");
try (TikaInputStream tis = getResourceAsStream("/test-documents/testGIF.gif")) {
parser.parse(tis, new DefaultHandler(), metadata, new ParseContext());
}
assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height"));
assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width"));
assertEquals("true", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression Lossless"));
assertEquals("Normal", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension ImageOrientation"));
assertEquals("lzw", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression CompressionTypeName"));
assertEquals("0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension HorizontalPixelOffset"));
assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100, " +
"imageHeight=75, interlaceFlag=false", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "ImageDescriptor"));
assertEquals("Index", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data SampleFormat"));
assertEquals("3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma NumChannels"));
assertEquals("1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression NumProgressiveScans"));
assertEquals("RGB", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma ColorSpaceType"));
assertEquals("Licensed to the Apache Software Foundation (ASF) under " +
"one or more contributor license agreements. See the NOTICE file " +
"distributed with this work for additional information regarding " +
"copyright ownership.", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "CommentExtensions CommentExtension"));
assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one " +
"or more contributor license agreements. See the NOTICE file " +
"distributed with this work for additional information regarding " +
"copyright ownership., encoding=ISO-8859-1, compression=none",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Text TextEntry"));
assertEquals("true", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma BlackIsZero"));
assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, " +
"delayTime=0, transparentColorIndex=0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "GraphicControlExtension"));
assertEquals("0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension VerticalPixelOffset"));
assertEquals("image/gif", metadata.get("Content-Type"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
"more contributor license agreements. See the NOTICE file distributed " +
"with this work for additional information regarding copyright ownership.",
metadata.get(TikaCoreProperties.COMMENTS));
assertEquals(1, metadata.getInt(TikaCoreProperties.NUM_IMAGES));
}
@Test
public void testJPEG() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
try (TikaInputStream tis = getResourceAsStream("/test-documents/testJPEG.jpg")) {
parser.parse(tis, new DefaultHandler(), metadata, new ParseContext());
}
assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height"));
assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width"));
assertEquals("0.35277778", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension VerticalPixelSize"));
assertEquals("false", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression Lossless"));
assertEquals("class=0, htableId=0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence dht dhtable"));
assertEquals("majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72, " +
"Ydensity=72, thumbWidth=0, thumbHeight=0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "JPEGvariety app0JFIF"));
assertEquals("225", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence unknown"));
assertEquals("componentSelector=1, dcHuffTable=0, acHuffTable=0",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence sos scanComponentSpec"));
assertEquals("normal", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension ImageOrientation"));
assertEquals("1.0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension PixelAspectRatio"));
assertEquals("elementPrecision=0, qtableId=0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence dqt dqtable"));
assertEquals("numScanComponents=3, startSpectralSelection=0, " +
"endSpectralSelection=63, approxHigh=0, approxLow=0",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence sos"));
assertEquals("componentId=1, HsamplingFactor=1, " + "VsamplingFactor=1, QtableSelector=0",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence sof componentSpec"));
assertEquals("JPEG", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression CompressionTypeName"));
assertEquals("0.35277778", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension HorizontalPixelSize"));
assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
"more contributor license agreements. See the NOTICE file " +
"distributed with this work for additional information " +
"regarding copyright ownership.", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence com"));
assertEquals("3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma NumChannels"));
assertEquals("1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression NumProgressiveScans"));
assertEquals("YCbCr", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma ColorSpaceType"));
assertEquals("keyword=comment, value=Licensed to the Apache Software Foundation " +
"(ASF) under one or more contributor license agreements. See the NOTICE" +
" file distributed with this work for additional information regarding " +
"copyright ownership.", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Text TextEntry"));
assertEquals("image/jpeg", metadata.get("Content-Type"));
assertEquals("process=0, samplePrecision=8, numLines=75, samplesPerLine=100, " +
"numFrameComponents=3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "markerSequence sof"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
"more contributor license agreements. See the NOTICE file distributed " +
"with this work for additional information regarding copyright ownership.",
metadata.get(TikaCoreProperties.COMMENTS));
assertEquals(1, metadata.getInt(TikaCoreProperties.NUM_IMAGES));
}
@Test
public void testPNG() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/png");
try (TikaInputStream tis = getResourceAsStream("/test-documents/testPNG.png")) {
parser.parse(tis, new DefaultHandler(), metadata, new ParseContext());
}
assertEquals("75", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height"));
assertEquals("100", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width"));
assertEquals("0.35273367", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension VerticalPixelSize"));
assertEquals("8 8 8", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data BitsPerSample"));
assertEquals("Perceptual", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "sRGB"));
assertEquals("true", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression Lossless"));
assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "tIME"));
assertEquals("Normal", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension ImageOrientation"));
assertEquals("1.0", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension PixelAspectRatio"));
assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation " +
"(ASF) under one or more contributor license agreements. See the " +
"NOTICE file distributed with this work for additional information " +
"regarding copyright ownership.", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "tEXt tEXtEntry"));
assertEquals("deflate", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression CompressionTypeName"));
assertEquals("UnsignedIntegral", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data SampleFormat"));
assertEquals("0.35273367", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Dimension HorizontalPixelSize"));
assertEquals("none", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Transparency Alpha"));
assertEquals("pixelsPerUnitXAxis=2835, pixelsPerUnitYAxis=2835, unitSpecifier=meter",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "pHYs"));
assertEquals("3", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma NumChannels"));
assertEquals("1", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Compression NumProgressiveScans"));
assertEquals("RGB", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma ColorSpaceType"));
assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation " +
"(ASF) under one or more contributor license agreements. See the " +
"NOTICE file distributed with this work for additional information " +
"regarding copyright ownership., encoding=ISO-8859-1, compression=none",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Text TextEntry"));
assertEquals("PixelInterleaved", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Data PlanarConfiguration"));
assertEquals("width=100, height=75, bitDepth=8, colorType=RGB, " +
"compressionMethod=deflate, filterMethod=adaptive, interlaceMethod=none",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "IHDR"));
assertEquals("true", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Chroma BlackIsZero"));
assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47",
metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "Document ImageModificationTime"));
assertEquals("image/png", metadata.get("Content-Type"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("8 8 8", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals(1, metadata.getInt(TikaCoreProperties.NUM_IMAGES));
}
@Test // TIKA-2232
public void testJBIG2() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/x-jbig2");
try (TikaInputStream tis = getResourceAsStream("/test-documents/testJBIG2.jb2")) {
parser.parse(tis, new DefaultHandler(), metadata, new ParseContext());
}
assertEquals("78", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "height"));
assertEquals("328", metadata.get(ImageMetadataExtractor.UNKNOWN_IMG_NS + "width"));
assertEquals("image/x-jbig2", metadata.get("Content-Type"));
assertEquals(1, metadata.getInt(TikaCoreProperties.NUM_IMAGES));
}
@Test
public void testMimeTypeToOCRMimeTypeConversion() throws Exception {
assertEquals(new MediaType("image", "OCR-png"),
AbstractImageParser.convertToOCRMediaType(MediaType.image("png")));
}
@Test
public void testNPEOnEmptyContentType() throws Exception {
//test no NPE TIKA-3569
Metadata metadata = new Metadata();
try (TikaInputStream tis = getResourceAsStream("/test-documents/testBMP.bmp")) {
parser.parse(tis, new DefaultHandler(), metadata, new ParseContext());
}
metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "unparseablegarbage");
try (TikaInputStream tis = getResourceAsStream("/test-documents/testBMP.bmp")) {
parser.parse(tis, new DefaultHandler(), metadata, new ParseContext());
}
}
}