Mp3Parser.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.mp3;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;

import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import org.apache.tika.config.TikaComponent;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TailStream;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.mp3.ID3Tags.ID3Comment;
import org.apache.tika.sax.XHTMLContentHandler;

/**
 * The <code>Mp3Parser</code> is used to parse ID3 Version 1 Tag information
 * from an MP3 file, if available.
 *
 * @see <a href="https://id3.org/ID3v1">MP3 ID3 Version 1 specification</a>
 * @see <a href="https://id3.org/id3v2.4.0-structure">MP3 ID3 Version 2.4 Structure Specification</a>
 * @see <a href="https://id3.org/id3v2.4.0-frames">MP3 ID3 Version 2.4 Frames Specification</a>
 */
@TikaComponent(name = "mp3-parser")
public class Mp3Parser implements Parser {

    /**
     * Serial version UID
     */
    private static final long serialVersionUID = 8537074922934844370L;

    private static final Set<MediaType> SUPPORTED_TYPES =
            Collections.singleton(MediaType.audio("mpeg"));

    /**
     * Scans the MP3 frames for ID3 tags, and creates ID3Tag Handlers
     * for each supported set of tags.
     */
    protected static ID3TagsAndAudio getAllTagHandlers(InputStream tis, ContentHandler handler)
            throws IOException, SAXException, TikaException {
        ID3v24Handler v24 = null;
        ID3v23Handler v23 = null;
        ID3v22Handler v22 = null;
        ID3v1Handler v1 = null;
        LyricsHandler lyrics = null;
        AudioFrame firstAudio = null;

        TailStream tailStream = new TailStream(tis, 10240 + 128);
        MpegStream mpegStream = new MpegStream(tailStream);

        // ID3v2 tags live at the start of the file
        // You can apparently have several different ID3 tag blocks
        // So, keep going until we don't find any more
        MP3Frame f;
        while ((f = ID3v2Frame.createFrameIfPresent(mpegStream)) != null) {
            if (f instanceof ID3v2Frame) {
                ID3v2Frame id3F = (ID3v2Frame) f;
                if (id3F.getMajorVersion() == 4) {
                    v24 = new ID3v24Handler(id3F);
                } else if (id3F.getMajorVersion() == 3) {
                    v23 = new ID3v23Handler(id3F);
                } else if (id3F.getMajorVersion() == 2) {
                    v22 = new ID3v22Handler(id3F);
                }
            }
        }

        // Now iterate over all audio frames in the file
        AudioFrame frame = mpegStream.nextFrame();
        float duration = 0;
        boolean skipped = true;
        while (frame != null && skipped) {
            duration += frame.getDuration();
            if (firstAudio == null) {
                firstAudio = frame;
            }
            skipped = mpegStream.skipFrame();
            if (skipped) {
                frame = mpegStream.nextFrame();
            }
        }

        // ID3v1 tags live at the end of the file
        // Lyrics live just before ID3v1, at the end of the file
        // Search for both (handlers seek to the end for us)
        lyrics = new LyricsHandler(tailStream.getTail());
        v1 = lyrics.id3v1;

        // Go in order of preference
        // Currently, that's newest to oldest
        List<ID3Tags> tags = new ArrayList<>();

        if (v24 != null && v24.getTagsPresent()) {
            tags.add(v24);
        }
        if (v23 != null && v23.getTagsPresent()) {
            tags.add(v23);
        }
        if (v22 != null && v22.getTagsPresent()) {
            tags.add(v22);
        }
        if (v1 != null && v1.getTagsPresent()) {
            tags.add(v1);
        }

        ID3TagsAndAudio ret = new ID3TagsAndAudio();
        ret.audio = firstAudio;
        ret.lyrics = lyrics;
        ret.tags = tags.toArray(new ID3Tags[0]);
        ret.duration = duration;
        return ret;
    }

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }

    public void parse(TikaInputStream tis, ContentHandler handler, Metadata metadata,
                      ParseContext context) throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");
        metadata.set(XMPDM.AUDIO_COMPRESSOR, "MP3");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata, context);

        // Create handlers for the various kinds of ID3 tags
        ID3TagsAndAudio audioAndTags = getAllTagHandlers(tis, handler);

        // Before we start on the XHTML output, process and store
        //  as much metadata as possible
        if (audioAndTags.duration > 0) {
            metadata.set(XMPDM.DURATION, audioAndTags.durationSeconds());
        }

        if (audioAndTags.audio != null) {
            metadata.set("samplerate", String.valueOf(audioAndTags.audio.getSampleRate()));
            metadata.set("channels", String.valueOf(audioAndTags.audio.getChannels()));
            metadata.set("version", audioAndTags.audio.getVersion());

            metadata.set(XMPDM.AUDIO_SAMPLE_RATE,
                    Integer.toString(audioAndTags.audio.getSampleRate()));
            if (audioAndTags.audio.getChannels() == 1) {
                metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "Mono");
            } else if (audioAndTags.audio.getChannels() == 2) {
                metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "Stereo");
            } else if (audioAndTags.audio.getChannels() == 5) {
                metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "5.1");
            } else if (audioAndTags.audio.getChannels() == 7) {
                metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "7.1");
            }
        }

        xhtml.startDocument();
        // Process tags metadata if the file has supported tags
        List<String> comments = new ArrayList<>();
        if (audioAndTags.tags.length > 0) {
            CompositeTagHandler tag = new CompositeTagHandler(audioAndTags.tags);

            metadata.set(TikaCoreProperties.TITLE, tag.getTitle());
            metadata.set(TikaCoreProperties.CREATOR, tag.getArtist());
            metadata.set(XMPDM.ARTIST, tag.getArtist());
            metadata.set(XMPDM.ALBUM_ARTIST, tag.getAlbumArtist());
            metadata.set(XMPDM.COMPOSER, tag.getComposer());
            metadata.set(XMPDM.ALBUM, tag.getAlbum());
            metadata.set(XMPDM.COMPILATION, tag.getCompilation());
            metadata.set(XMPDM.RELEASE_DATE, tag.getYear());
            metadata.set(XMPDM.GENRE, tag.getGenre());

            for (ID3Comment comment : tag.getComments()) {
                StringBuilder cmt = new StringBuilder();
                if (comment.getLanguage() != null) {
                    cmt.append(comment.getLanguage());
                    cmt.append(" - ");
                }
                if (comment.getDescription() != null) {
                    cmt.append(comment.getDescription());
                    if (comment.getText() != null) {
                        cmt.append("\n");
                    }
                }
                if (comment.getText() != null) {
                    cmt.append(comment.getText());
                }

                comments.add(cmt.toString());
                metadata.add(XMPDM.LOG_COMMENT.getName(), cmt.toString());
            }

            // ID3v1.1 Track addition
            StringBuilder sb = new StringBuilder();
            sb.append(tag.getAlbum());
            if (tag.getTrackNumber() != null) {
                sb.append(", track ").append(tag.getTrackNumber());
                metadata.set(XMPDM.TRACK_NUMBER, tag.getTrackNumber());
            }
            if (tag.getDisc() != null) {
                sb.append(", disc ").append(tag.getDisc());
                metadata.set(XMPDM.DISC_NUMBER, tag.getDisc());
            }

            xhtml.element("h1", tag.getTitle());
            xhtml.element("p", tag.getArtist());


            xhtml.element("p", sb.toString());

            xhtml.element("p", tag.getYear());
            xhtml.element("p", tag.getGenre());
        }
        xhtml.element("p", String.valueOf(audioAndTags.durationSeconds()));
        for (String comment : comments) {
            xhtml.element("p", comment);
        }

        if (audioAndTags.lyrics != null && audioAndTags.lyrics.hasLyrics()) {
            xhtml.startElement("p", "class", "lyrics");
            xhtml.characters(audioAndTags.lyrics.lyricsText);
            xhtml.endElement("p");
        }

        xhtml.endDocument();
    }

    /**
     * This statically sets the max record size in {@link ID3v2Frame}
     *
     * @param maxRecordSize
     */
    public void setMaxRecordSize(int maxRecordSize) {
        ID3v2Frame.setMaxRecordSize(maxRecordSize);
    }

    public int getMaxRecordSize() {
        return ID3v2Frame.getMaxRecordSize();
    }
    protected static class ID3TagsAndAudio {
        private ID3Tags[] tags;
        private AudioFrame audio;
        private LyricsHandler lyrics;
        private float duration; // Milliseconds

        private float durationSeconds() {
            return duration / 1000;
        }
    }
}