Office.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.metadata;

/**
 * Office Document properties collection. These properties apply to
 * Office / Productivity Documents of all forms, including (but not limited
 * to) MS Office and OpenDocument formats.
 * This is a logical collection of properties, which may be drawn from a
 * few different external definitions.
 *
 * @since Apache Tika 1.2
 */
public interface Office {
    // These are taken from the OpenDocumentFormat specification
    String NAMESPACE_URI_DOC_META = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
    String PREFIX_DOC_META = "meta";

    /**
     * For user defined metadata entries in the document,
     * what prefix should be attached to the key names.
     * eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
     */
    String USER_DEFINED_METADATA_NAME_PREFIX = "custom:";


    /**
     * Keywords pertaining to a document. Also populates {@link DublinCore#SUBJECT}.
     */
    Property KEYWORDS = Property.composite(Property.internalTextBag(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "keyword"),
            new Property[]{DublinCore.SUBJECT,});

    /**
     * Name of the initial creator/author of a document
     */
    Property INITIAL_AUTHOR = Property.internalText(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "initial-author");

    /**
     * Name of the last (most recent) author of a document
     */
    Property LAST_AUTHOR = Property.internalText(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "last-author");

    /**
     * Name of the principal author(s) of a document
     */
    Property AUTHOR = Property.internalTextBag(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "author");


    /**
     * When was the document created?
     */
    Property CREATION_DATE = Property.internalDate(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "creation-date");

    /**
     * When was the document last saved?
     */
    Property SAVE_DATE = Property.internalDate(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "save-date");

    /**
     * When was the document last printed?
     */
    Property PRINT_DATE = Property.internalDate(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "print-date");


    /**
     * The number of Slides are there in the (presentation) document
     */
    Property SLIDE_COUNT = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "slide-count");

    /**
     * The number of Pages are there in the (paged) document
     */
    Property PAGE_COUNT = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "page-count");

    /**
     * The number of individual Paragraphs in the document
     */
    Property PARAGRAPH_COUNT = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "paragraph-count");

    /**
     * The number of lines in the document
     */
    Property LINE_COUNT = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "line-count");

    /**
     * The number of Words in the document
     */
    Property WORD_COUNT = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "word-count");

    /**
     * The number of Characters in the document
     */
    Property CHARACTER_COUNT = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "character-count");

    /**
     * The number of Characters in the document, including spaces
     */
    Property CHARACTER_COUNT_WITH_SPACES = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
                    "character-count-with-spaces");

    /**
     * The number of Tables in the document
     */
    Property TABLE_COUNT = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "table-count");

    /**
     * The number of Images in the document
     */
    Property IMAGE_COUNT = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "image-count");

    /**
     * The number of Objects in the document. These are typically non-Image resources
     * embedded in the document, such as other documents or non-Image media.
     */
    Property OBJECT_COUNT = Property.internalInteger(
            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "object-count");

    /**
     * Embedded files may have a "progID" associated with them, such as
     * Word.Document.12 or AcroExch.Document.DC
     */
    Property PROG_ID = Property.internalText("msoffice:progID");

    Property OCX_NAME = Property.internalText("msoffice:ocxName");

    Property EMBEDDED_STORAGE_CLASS_ID = Property.internalText("msoffice:embeddedStorageClassId");

    Property HAS_HIDDEN_SHEETS = Property.internalBoolean("msoffice:excel:has-hidden-sheets");

    Property HAS_HIDDEN_COLUMNS = Property.internalBoolean("msoffice:excel:has-hidden-cols");

    Property HAS_HIDDEN_ROWS = Property.internalBoolean("msoffice:excel:has-hidden-rows");

    Property HAS_VERY_HIDDEN_SHEETS = Property.internalBoolean("msoffice:excel:has-very-hidden-sheets");

    Property HIDDEN_SHEET_NAMES = Property.internalTextBag("msoffice:excel:hidden-sheet-names");

    Property VERY_HIDDEN_SHEET_NAMES = Property.internalTextBag("msoffice:excel:very-hidden-sheet-names");

    Property PROTECTED_WORKSHEET = Property.internalBoolean("msoffice:excel:protected-worksheet");

    Property WORKBOOK_CODENAME = Property.internalText("msoffice:excel:workbook-codename");

    Property HAS_DATA_CONNECTIONS = Property.internalBoolean("msoffice:excel:has-data-connections");

    Property HAS_EXTERNAL_LINKS = Property.internalBoolean("msoffice:excel:has-external-links");

    Property HAS_WEB_QUERIES = Property.internalBoolean("msoffice:excel:has-web-queries");

    Property HAS_EXTERNAL_OLE_OBJECTS = Property.internalBoolean("msoffice:has-external-ole-objects");

    Property HAS_FIELD_HYPERLINKS = Property.internalBoolean("msoffice:has-field-hyperlinks");

    Property HAS_HOVER_HYPERLINKS = Property.internalBoolean("msoffice:has-hover-hyperlinks");

    Property HAS_VML_HYPERLINKS = Property.internalBoolean("msoffice:has-vml-hyperlinks");

    Property HAS_COMMENTS = Property.internalBoolean("msoffice:has-comments");

    Property COMMENT_PERSONS = Property.internalTextBag("msoffice:comment-person-display-name");

    Property HAS_HIDDEN_SLIDES = Property.internalBoolean("msoffice:ppt:has-hidden-slides");

    Property NUM_HIDDEN_SLIDES = Property.internalInteger("msoffice:ppt:num-hidden-slides");

    Property HAS_ANIMATIONS = Property.internalBoolean("msoffice:ppt:has-animations");

    //w:vanish or isVanish or isFldVanish
    Property HAS_HIDDEN_TEXT = Property.internalBoolean("msoffice:doc:has-hidden-text");

    Property HAS_TRACK_CHANGES = Property.internalBoolean("msoffice:has-track-changes");

    // Security-relevant: DDE (Dynamic Data Exchange) links can execute commands
    Property HAS_DDE_LINKS = Property.internalBoolean("msoffice:excel:has-dde-links");

    // Security-relevant: Mail merge can reference external data sources
    Property HAS_MAIL_MERGE = Property.internalBoolean("msoffice:doc:has-mail-merge");

    // Security-relevant: Attached templates can be fetched from external URLs
    Property HAS_ATTACHED_TEMPLATE = Property.internalBoolean("msoffice:doc:has-attached-template");

    // Security-relevant: SubDocuments reference external documents in master docs
    Property HAS_SUBDOCUMENTS = Property.internalBoolean("msoffice:doc:has-subdocuments");

    // Security-relevant: Pivot tables can reference external OLAP/database sources
    Property HAS_EXTERNAL_PIVOT_DATA = Property.internalBoolean("msoffice:excel:has-external-pivot-data");

    // Security-relevant: Power Query can contain URLs and connection strings
    Property HAS_POWER_QUERY = Property.internalBoolean("msoffice:excel:has-power-query");

    // Security-relevant: OLE objects can link to external files (vs embedded)
    Property HAS_LINKED_OLE_OBJECTS = Property.internalBoolean("msoffice:has-linked-ole-objects");

    // Security-relevant: Charts can reference external workbook data
    Property HAS_EXTERNAL_CHART_DATA = Property.internalBoolean("msoffice:has-external-chart-data");

    // Security-relevant: Framesets can load external URLs
    Property HAS_FRAMESETS = Property.internalBoolean("msoffice:doc:has-framesets");
}