DWGParserTest.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.dwg;



import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assumptions.assumeTrue;

import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;

import org.junit.jupiter.api.Test;
import org.xml.sax.ContentHandler;

import org.apache.tika.TikaTest;
import org.apache.tika.config.loader.TikaLoader;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.utils.StringUtils;

public class DWGParserTest extends TikaTest {
    public boolean canRun(DWGParser parser)  {
        String dwgRead = parser.getDefaultConfig().getDwgReadExecutable();

        if (!StringUtils.isBlank(dwgRead) && !Files.isRegularFile(Paths.get(dwgRead))) {
            return false;
        }

        // Try running DWGRead from there, and see if it exists + works
        String[] checkCmd = { dwgRead };
        return ExternalParser.check(checkCmd);

    }
    @Test
    public void testDWG2000Parser() throws Exception {
        TikaInputStream tis = TikaInputStream.get(
                DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2000.dwg"));
        testParserAlt(tis);
    }

    @Test
    public void testDWG2004Parser() throws Exception {
        TikaInputStream tis = TikaInputStream.get(
                DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2004.dwg"));
        testParser(tis);
    }

    @Test
    public void testDWG2004ParserNoHeaderAddress() throws Exception {
        TikaInputStream tis = TikaInputStream.get(DWGParserTest.class
                .getResourceAsStream("/test-documents/testDWG2004_no_header.dwg"));
        testParserNoHeader(tis);
    }

    @Test
    public void testDWG2007Parser() throws Exception {
        TikaInputStream tis = TikaInputStream.get(
                DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2007.dwg"));
        testParser(tis);
    }

    @Test
    public void testDWG2010Parser() throws Exception {
        TikaInputStream tis = TikaInputStream.get(
                DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2010.dwg"));
        testParser(tis);
    }

    @Test
    public void testDWG2010CustomPropertiesParser() throws Exception {
        // Check that standard parsing works
        TikaInputStream testInput = TikaInputStream.get(DWGParserTest.class
                .getResourceAsStream("/test-documents/testDWG2010_custom_props.dwg"));
        testParser(testInput);

        // Check that custom properties with alternate padding work
        try (TikaInputStream tis = TikaInputStream.get(DWGParserTest.class
                .getResourceAsStream("/test-documents/testDWG2010_custom_props.dwg"))) {
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler();
            new DWGParser().parse(tis, handler, metadata,new ParseContext());

            assertEquals("valueforcustomprop1",
                    metadata.get(DWGParser.DWG_CUSTOM_META_PREFIX + "customprop1"));
            assertEquals("valueforcustomprop2",
                    metadata.get(DWGParser.DWG_CUSTOM_META_PREFIX + "customprop2"));
        }
    }

    @Test
    public void testDWGMechParser() throws Exception {
        String[] types =
                new String[]{"6", "2004", "2004DX", "2005", "2006", "2007", "2008", "2009", "2010",
                        "2011"};
        for (String type : types) {
            TikaInputStream tis = TikaInputStream.get(DWGParserTest.class
                    .getResourceAsStream("/test-documents/testDWGmech" + type + ".dwg"));
            testParserAlt(tis);
        }
    }


    private void testParser(TikaInputStream tis) throws Exception {
        try {
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler();
            new DWGParser().parse(tis, handler, metadata,new ParseContext());

            assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));

            assertEquals("The quick brown fox jumps over the lazy dog",
                    metadata.get(TikaCoreProperties.TITLE));
            assertEquals("Gym class featuring a brown fox and lazy dog",
                    metadata.get(TikaCoreProperties.DESCRIPTION));
            assertEquals("Nevin Nollop", metadata.get(TikaCoreProperties.CREATOR));
            assertContains("Pangram, fox, dog",
                    Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
            assertEquals("Lorem ipsum", metadata.get(TikaCoreProperties.COMMENTS).substring(0, 11));
            assertEquals("http://www.alfresco.com", metadata.get(TikaCoreProperties.RELATION));

            String content = handler.toString();
            assertContains("The quick brown fox jumps over the lazy dog", content);
            assertContains("Gym class", content);
            assertContains("www.alfresco.com", content);
        } finally {
            tis.close();
        }
    }


    private void testParserNoHeader(TikaInputStream tis) throws Exception {
        try {
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler();
            new DWGParser().parse(tis, handler, metadata,new ParseContext());

            assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));

            assertNull(metadata.get(TikaCoreProperties.TITLE));
            assertNull(metadata.get(TikaCoreProperties.DESCRIPTION));
            assertNull(metadata.get(TikaCoreProperties.CREATOR));
            assertNull(metadata.get(TikaCoreProperties.SUBJECT));
            assertNull(metadata.get(TikaCoreProperties.COMMENTS));
            assertNull(metadata.get(TikaCoreProperties.RELATION));

            String content = handler.toString();
            assertEquals("", content);
        } finally {
            tis.close();
        }
    }

    private void testParserAlt(TikaInputStream tis) throws Exception {
        try {
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler();
            new DWGParser().parse(tis, handler, metadata, new ParseContext());

            assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));

            assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
            assertEquals("Test Subject", metadata.get(TikaCoreProperties.DESCRIPTION));
            assertEquals("My Author", metadata.get(TikaCoreProperties.CREATOR));
            assertEquals("My keyword1, MyKeyword2", metadata.get(TikaCoreProperties.SUBJECT));
            assertEquals("This is a comment", metadata.get(TikaCoreProperties.COMMENTS));
            assertEquals("bejanpol", metadata.get(TikaCoreProperties.MODIFIER));
            assertEquals("http://mycompany/drawings", metadata.get(TikaCoreProperties.RELATION));
            assertEquals("MyCustomPropertyValue",
                    metadata.get(DWGParser.DWG_CUSTOM_META_PREFIX + "MyCustomProperty"));

            String content = handler.toString();
            assertContains("This is a comment", content);
            assertContains("mycompany", content);
        } finally {
            tis.close();
        }
    }

    @Test
    public void testAC1027() throws Exception {
        Metadata metadata = getXML("testDWG-AC1027.dwg").metadata;
        assertEquals("hlu", metadata.get(TikaCoreProperties.MODIFIER));
    }

    @Test
    public void testAC1032() throws Exception {
        Metadata metadata = getXML("testDWG-AC1032.dwg").metadata;
        assertEquals("jlakshvi", metadata.get(TikaCoreProperties.MODIFIER));
        assertEquals("CUSTOMER'S ADDRESS", metadata.get("dwg-custom:CUSTOMER'S ADDRESS"));
    }
    @Test
    public void testDWGReadexe() throws Exception {
        DWGParser parser =
                (DWGParser) ((CompositeParser) TikaLoader.load(
                                getConfigPath(DWGParserTest.class, "tika-config-dwgRead.json"))
                        .loadParsers())
                        .getAllComponentParsers().get(0);
        assumeTrue(canRun(parser), "Can't run DWGRead.exe");
        String output = getText("architectural_-_annotation_scaling_and_multileaders.dwg", parser);
        assertContains("ELEV. 11'-9\" TOP OF SECOND FLR.",output);
    }

    @Test
    public void testDWGReadtimeout() throws Exception {
        DWGParser parser = (DWGParser) ((CompositeParser) TikaLoader.load(
                        getConfigPath(DWGParserTest.class, "tika-config-dwgRead-Timeout.json"))
                .loadParsers())
                .getAllComponentParsers().get(0);
        assumeTrue(canRun(parser), "Can't run DWGRead.exe");
        TikaException thrown = assertThrows(
                TikaException.class,
                () -> getText("architectural_-_annotation_scaling_and_multileaders.dwg", parser),
                "Expected getText() to throw TikaException but it failed"
        );
        assertTrue(thrown.getMessage().contains("Timeout setting exceeded current setting of"));
    }

}