UnifiedDiffReader.java

/*
 * Copyright 2019 java-diff-utils.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.github.difflib.unifieddiff;

import com.github.difflib.patch.ChangeDelta;
import com.github.difflib.patch.Chunk;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.function.BiConsumer;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 *
 * @author Tobias Warneke (t.warneke@gmx.net)
 */
public final class UnifiedDiffReader {

    static final Pattern UNIFIED_DIFF_CHUNK_REGEXP = Pattern.compile("^@@\\s+-(?:(\\d+)(?:,(\\d+))?)\\s+\\+(?:(\\d+)(?:,(\\d+))?)\\s+@@");
    static final Pattern TIMESTAMP_REGEXP = Pattern.compile("(\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}:\\d{2}\\.\\d{3,})(?: [+-]\\d+)?");

    private final InternalUnifiedDiffReader READER;
    private final UnifiedDiff data = new UnifiedDiff();

    private final UnifiedDiffLine DIFF_COMMAND = new UnifiedDiffLine(true, "^diff\\s", this::processDiff);
    private final UnifiedDiffLine SIMILARITY_INDEX = new UnifiedDiffLine(true, "^similarity index (\\d+)%$", this::processSimilarityIndex);
    private final UnifiedDiffLine INDEX = new UnifiedDiffLine(true, "^index\\s[\\da-zA-Z]+\\.\\.[\\da-zA-Z]+(\\s(\\d+))?$", this::processIndex);
    private final UnifiedDiffLine FROM_FILE = new UnifiedDiffLine(true, "^---\\s", this::processFromFile);
    private final UnifiedDiffLine TO_FILE = new UnifiedDiffLine(true, "^\\+\\+\\+\\s", this::processToFile);
    private final UnifiedDiffLine RENAME_FROM = new UnifiedDiffLine(true, "^rename\\sfrom\\s(.+)$", this::processRenameFrom);
    private final UnifiedDiffLine RENAME_TO = new UnifiedDiffLine(true, "^rename\\sto\\s(.+)$", this::processRenameTo);

    private final UnifiedDiffLine NEW_FILE_MODE = new UnifiedDiffLine(true, "^new\\sfile\\smode\\s(\\d+)", this::processNewFileMode);

    private final UnifiedDiffLine DELETED_FILE_MODE = new UnifiedDiffLine(true, "^deleted\\sfile\\smode\\s(\\d+)", this::processDeletedFileMode);
    private final UnifiedDiffLine OLD_MODE = new UnifiedDiffLine(true, "^old\\smode\\s(\\d+)", this::processOldMode);
    private final UnifiedDiffLine NEW_MODE = new UnifiedDiffLine(true, "^new\\smode\\s(\\d+)", this::processNewMode);
    private final UnifiedDiffLine BINARY_ADDED = new UnifiedDiffLine(true, "^Binary\\sfiles\\s/dev/null\\sand\\sb/(.+)\\sdiffer", this::processBinaryAdded);
    private final UnifiedDiffLine BINARY_DELETED = new UnifiedDiffLine(true, "^Binary\\sfiles\\sa/(.+)\\sand\\s/dev/null\\sdiffer", this::processBinaryDeleted);
    private final UnifiedDiffLine BINARY_EDITED = new UnifiedDiffLine(true, "^Binary\\sfiles\\sa/(.+)\\sand\\sb/(.+)\\sdiffer", this::processBinaryEdited);
    private final UnifiedDiffLine CHUNK = new UnifiedDiffLine(false, UNIFIED_DIFF_CHUNK_REGEXP, this::processChunk);
    private final UnifiedDiffLine LINE_NORMAL = new UnifiedDiffLine("^\\s", this::processNormalLine);
    private final UnifiedDiffLine LINE_DEL = new UnifiedDiffLine("^-", this::processDelLine);
    private final UnifiedDiffLine LINE_ADD = new UnifiedDiffLine("^\\+", this::processAddLine);

    private UnifiedDiffFile actualFile;

    UnifiedDiffReader(Reader reader) {
        this.READER = new InternalUnifiedDiffReader(reader);
    }

    // schema = [[/^\s+/, normal], [/^diff\s/, start], [/^new file mode \d+$/, new_file], 
    // [/^deleted file mode \d+$/, deleted_file], [/^index\s[\da-zA-Z]+\.\.[\da-zA-Z]+(\s(\d+))?$/, index], 
    // [/^---\s/, from_file], [/^\+\+\+\s/, to_file], [/^@@\s+\-(\d+),?(\d+)?\s+\+(\d+),?(\d+)?\s@@/, chunk], 
    // [/^-/, del], [/^\+/, add], [/^\\ No newline at end of file$/, eof]];
    private UnifiedDiff parse() throws IOException, UnifiedDiffParserException {
//        String headerTxt = "";
//        LOG.log(Level.FINE, "header parsing");
//        String line = null;
//        while (READER.ready()) {
//            line = READER.readLine();
//            LOG.log(Level.FINE, "parsing line {0}", line);
//            if (DIFF_COMMAND.validLine(line) || INDEX.validLine(line)
//                    || FROM_FILE.validLine(line) || TO_FILE.validLine(line)
//                    || NEW_FILE_MODE.validLine(line)) {
//                break;
//            } else {
//                headerTxt += line + "\n";
//            }
//        }
//        if (!"".equals(headerTxt)) {
//            data.setHeader(headerTxt);
//        }

        String line = READER.readLine();
        while (line != null) {
            String headerTxt = "";
            LOG.log(Level.FINE, "header parsing");
            while (line != null) {
                LOG.log(Level.FINE, "parsing line {0}", line);
                if (validLine(line, DIFF_COMMAND, SIMILARITY_INDEX, INDEX,
                            FROM_FILE, TO_FILE,
                            RENAME_FROM, RENAME_TO,
                            NEW_FILE_MODE, DELETED_FILE_MODE,
                            OLD_MODE, NEW_MODE,
                            BINARY_ADDED, BINARY_DELETED,
                            BINARY_EDITED, CHUNK)) {
                    break;
                } else {
                    headerTxt += line + "\n";
                }
                line = READER.readLine();
            }
            if (!"".equals(headerTxt)) {
                data.setHeader(headerTxt);
            }
            if (line != null && !CHUNK.validLine(line)) {
                initFileIfNecessary();
                while (line != null && !CHUNK.validLine(line)) {
                    if (!processLine(line, DIFF_COMMAND, SIMILARITY_INDEX, INDEX,
                            FROM_FILE, TO_FILE,
                            RENAME_FROM, RENAME_TO,
                            NEW_FILE_MODE, DELETED_FILE_MODE,
                            OLD_MODE, NEW_MODE,
                            BINARY_ADDED , BINARY_DELETED,
                            BINARY_EDITED)) {
                        throw new UnifiedDiffParserException("expected file start line not found");
                    }
                    line = READER.readLine();
                }
            }
            if (line != null) {
                processLine(line, CHUNK);
                while ((line = READER.readLine()) != null) {
                    line = checkForNoNewLineAtTheEndOfTheFile(line);

                    if (!processLine(line, LINE_NORMAL, LINE_ADD, LINE_DEL)) {
                        throw new UnifiedDiffParserException("expected data line not found");
                    }
                    if ((originalTxt.size() == old_size && revisedTxt.size() == new_size)
                            || (old_size == 0 && new_size == 0 && originalTxt.size() == this.old_ln
                            && revisedTxt.size() == this.new_ln)) {
                        finalizeChunk();
                        break;
                    }
                }
                line = READER.readLine();

                line = checkForNoNewLineAtTheEndOfTheFile(line);
            }
            if (line == null || (line.startsWith("--") && !line.startsWith("---"))) {
                break;
            }
        }

        if (READER.ready()) {
            String tailTxt = "";
            while (READER.ready()) {
                if (tailTxt.length() > 0) {
                    tailTxt += "\n";
                }
                tailTxt += READER.readLine();
            }
            data.setTailTxt(tailTxt);
        }

        return data;
    }

    private String checkForNoNewLineAtTheEndOfTheFile(String line) throws IOException {
        if ("\\ No newline at end of file".equals(line)) {
            actualFile.setNoNewLineAtTheEndOfTheFile(true);
            return READER.readLine();
        }
        return line;
    }

    static String[] parseFileNames(String line) {
        String[] split = line.split(" ");
        return new String[]{
            split[2].replaceAll("^a/", ""),
            split[3].replaceAll("^b/", "")
        };
    }

    private static final Logger LOG = Logger.getLogger(UnifiedDiffReader.class.getName());

    /**
     * To parse a diff file use this method.
     *
     * @param stream This is the diff file data.
     * @return In a UnifiedDiff structure this diff file data is returned.
     * @throws IOException
     * @throws UnifiedDiffParserException
     */
    public static UnifiedDiff parseUnifiedDiff(InputStream stream) throws IOException, UnifiedDiffParserException {
        UnifiedDiffReader parser = new UnifiedDiffReader(new BufferedReader(new InputStreamReader(stream)));
        return parser.parse();
    }

    private boolean processLine(String line, UnifiedDiffLine... rules) throws UnifiedDiffParserException {
        if (line == null) {
            return false;
        }
        for (UnifiedDiffLine rule : rules) {
            if (rule.processLine(line)) {
                LOG.fine("  >>> processed rule " + rule.toString());
                return true;
            }
        }
        LOG.warning("  >>> no rule matched " + line);
        return false;
        //throw new UnifiedDiffParserException("parsing error at line " + line);
    }
    
    private boolean validLine(String line, UnifiedDiffLine ... rules) {
        if (line == null) {
            return false;
        }
        for (UnifiedDiffLine rule : rules) {
            if (rule.validLine(line)) {
                LOG.fine("  >>> accepted rule " + rule.toString());
                return true;
            }
        }
        return false;
    }

    private void initFileIfNecessary() {
        if (!originalTxt.isEmpty() || !revisedTxt.isEmpty()) {
            throw new IllegalStateException();
        }
        actualFile = null;
        if (actualFile == null) {
            actualFile = new UnifiedDiffFile();
            data.addFile(actualFile);
        }
    }

    private void processDiff(MatchResult match, String line) {
        //initFileIfNecessary();
        LOG.log(Level.FINE, "start {0}", line);
        String[] fromTo = parseFileNames(READER.lastLine());
        actualFile.setFromFile(fromTo[0]);
        actualFile.setToFile(fromTo[1]);
        actualFile.setDiffCommand(line);
    }

    private void processSimilarityIndex(MatchResult match, String line) {
        actualFile.setSimilarityIndex(Integer.valueOf(match.group(1)));
    }

    private List<String> originalTxt = new ArrayList<>();
    private List<String> revisedTxt = new ArrayList<>();
    private List<Integer> addLineIdxList = new ArrayList<>();
    private List<Integer> delLineIdxList = new ArrayList<>();
    private int old_ln;
    private int old_size;
    private int new_ln;
    private int new_size;
    private int delLineIdx = 0;
    private int addLineIdx = 0;

    private void finalizeChunk() {
        if (!originalTxt.isEmpty() || !revisedTxt.isEmpty()) {
            actualFile.getPatch().addDelta(new ChangeDelta<>(new Chunk<>(
                    old_ln - 1, originalTxt, delLineIdxList), new Chunk<>(
                    new_ln - 1, revisedTxt, addLineIdxList)));
            old_ln = 0;
            new_ln = 0;
            originalTxt.clear();
            revisedTxt.clear();
            addLineIdxList.clear();
            delLineIdxList.clear();
            delLineIdx = 0;
            addLineIdx = 0;
        }
    }

    private void processNormalLine(MatchResult match, String line) {
        String cline = line.substring(1);
        originalTxt.add(cline);
        revisedTxt.add(cline);
        delLineIdx++;
        addLineIdx++;
    }

    private void processAddLine(MatchResult match, String line) {
        String cline = line.substring(1);
        revisedTxt.add(cline);
        addLineIdx++;
        addLineIdxList.add(new_ln - 1 + addLineIdx);
    }

    private void processDelLine(MatchResult match, String line) {
        String cline = line.substring(1);
        originalTxt.add(cline);
        delLineIdx++;
        delLineIdxList.add(old_ln - 1 + delLineIdx);
    }

    private void processChunk(MatchResult match, String chunkStart) {
        // finalizeChunk();
        old_ln = toInteger(match, 1, 1);
        old_size = toInteger(match, 2, 1);
        new_ln = toInteger(match, 3, 1);
        new_size = toInteger(match, 4, 1);
        if (old_ln == 0) {
            old_ln = 1;
        }
        if (new_ln == 0) {
            new_ln = 1;
        }
    }

    private static Integer toInteger(MatchResult match, int group, int defValue) throws NumberFormatException {
        return Integer.valueOf(Objects.toString(match.group(group), "" + defValue));
    }

    private void processIndex(MatchResult match, String line) {
        //initFileIfNecessary();
        LOG.log(Level.FINE, "index {0}", line);
        actualFile.setIndex(line.substring(6));
    }

    private void processFromFile(MatchResult match, String line) {
        //initFileIfNecessary();
        actualFile.setFromFile(extractFileName(line));
        actualFile.setFromTimestamp(extractTimestamp(line));
    }

    private void processToFile(MatchResult match, String line) {
        //initFileIfNecessary();
        actualFile.setToFile(extractFileName(line));
        actualFile.setToTimestamp(extractTimestamp(line));
    }

    private void processRenameFrom(MatchResult match, String line) {
        actualFile.setRenameFrom(match.group(1));
    }

    private void processRenameTo(MatchResult match, String line) {
        actualFile.setRenameTo(match.group(1));
    }

    private void processNewFileMode(MatchResult match, String line) {
        //initFileIfNecessary();
        actualFile.setNewFileMode(match.group(1));
    }

    private void processDeletedFileMode(MatchResult match, String line) {
        //initFileIfNecessary();
        actualFile.setDeletedFileMode(match.group(1));
    }

    private void processOldMode(MatchResult match, String line) {
        actualFile.setOldMode(match.group(1));
    }

    private void processNewMode(MatchResult match, String line) {
        actualFile.setNewMode(match.group(1));
    }

    private void processBinaryAdded(MatchResult match, String line) {
        actualFile.setBinaryAdded(match.group(1));
    }

    private void processBinaryDeleted(MatchResult match, String line) {
        actualFile.setBinaryDeleted(match.group(1));
    }

    private void processBinaryEdited(MatchResult match, String line) {
        actualFile.setBinaryEdited(match.group(1));
    }

    private String extractFileName(String _line) {
        Matcher matcher = TIMESTAMP_REGEXP.matcher(_line);
        String line = _line;
        if (matcher.find()) {
            line = line.substring(0, matcher.start());
        }
        line = line.split("\t")[0];
        return line.substring(4).replaceFirst("^(a|b|old|new)/", "")
                .trim();
    }

    private String extractTimestamp(String line) {
        Matcher matcher = TIMESTAMP_REGEXP.matcher(line);
        if (matcher.find()) {
            return matcher.group();
        }
        return null;
    }

    final class UnifiedDiffLine {

        private final Pattern pattern;
        private final BiConsumer<MatchResult, String> command;
        private final boolean stopsHeaderParsing;

        public UnifiedDiffLine(String pattern, BiConsumer<MatchResult, String> command) {
            this(false, pattern, command);
        }

        public UnifiedDiffLine(boolean stopsHeaderParsing, String pattern, BiConsumer<MatchResult, String> command) {
            this.pattern = Pattern.compile(pattern);
            this.command = command;
            this.stopsHeaderParsing = stopsHeaderParsing;
        }

        public UnifiedDiffLine(boolean stopsHeaderParsing, Pattern pattern, BiConsumer<MatchResult, String> command) {
            this.pattern = pattern;
            this.command = command;
            this.stopsHeaderParsing = stopsHeaderParsing;
        }

        public boolean validLine(String line) {
            Matcher m = pattern.matcher(line);
            return m.find();
        }

        public boolean processLine(String line) throws UnifiedDiffParserException {
            Matcher m = pattern.matcher(line);
            if (m.find()) {
                command.accept(m.toMatchResult(), line);
                return true;
            } else {
                return false;
            }
        }

        public boolean isStopsHeaderParsing() {
            return stopsHeaderParsing;
        }

        @Override
        public String toString() {
            return "UnifiedDiffLine{" + "pattern=" + pattern + ", stopsHeaderParsing=" + stopsHeaderParsing + '}';
        }
    }
}

class InternalUnifiedDiffReader extends BufferedReader {

    private String lastLine;

    public InternalUnifiedDiffReader(Reader reader) {
        super(reader);
    }

    @Override
    public String readLine() throws IOException {
        lastLine = super.readLine();
        return lastLine();
    }

    String lastLine() {
        return lastLine;
    }
}