AbstractUnpackHandler.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.pipes.core.extractor;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

import org.apache.tika.extractor.UnpackHandler;
import org.apache.tika.io.FilenameUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.utils.StringUtils;

public abstract class AbstractUnpackHandler implements UnpackHandler {

    List<Integer> ids = new ArrayList<>();

    public String getEmitKey(String containerEmitKey, int embeddedId,
                             UnpackConfig unpackConfig,
                             Metadata metadata) {
        String embeddedIdString = unpackConfig.getZeroPadName() > 0 ?
                StringUtils.leftPad(Integer.toString(embeddedId),
                        unpackConfig.getZeroPadName(), "0") :
                Integer.toString(embeddedId);

        StringBuilder emitKey = new StringBuilder();
        if (unpackConfig.getKeyBaseStrategy() == UnpackConfig.KEY_BASE_STRATEGY.DEFAULT) {
            // Default pattern: {containerKey}-embed/{id}{suffix}
            emitKey.append(containerEmitKey);
            emitKey.append("-embed/");
            emitKey.append(embeddedIdString);
        } else {
            // CUSTOM: use the configured emitKeyBase
            emitKey.append(unpackConfig.getEmitKeyBase());
            emitKey.append(unpackConfig.getEmbeddedIdPrefix());
            emitKey.append(embeddedIdString);
        }
        appendSuffix(emitKey, metadata, unpackConfig);
        return emitKey.toString();
    }

    @Override
    public void add(int id, Metadata metadata, InputStream bytes) throws IOException {
        ids.add(id);
    }

    @Override
    public List<Integer> getIds() {
        return ids;
    }

    private void appendSuffix(StringBuilder emitKey, Metadata metadata, UnpackConfig unpackConfig) {
        if (unpackConfig.getSuffixStrategy().equals(
                UnpackConfig.SUFFIX_STRATEGY.EXISTING)) {
            String fName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
            String suffix = FilenameUtils.getSuffixFromPath(fName);
            suffix = suffix.toLowerCase(Locale.US);
            emitKey.append(suffix);
        } else if (unpackConfig.getSuffixStrategy()
                                              .equals(UnpackConfig.SUFFIX_STRATEGY.DETECTED)) {
            emitKey.append(FilenameUtils.calculateExtension(metadata, ".bin"));
        }
    }
}