DiffRowGenerator.java
/*
* Copyright 2009-2017 java-diff-utils.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.difflib.text;
import com.github.difflib.DiffUtils;
import com.github.difflib.patch.AbstractDelta;
import com.github.difflib.patch.ChangeDelta;
import com.github.difflib.patch.Chunk;
import com.github.difflib.patch.DeleteDelta;
import com.github.difflib.patch.DeltaType;
import com.github.difflib.patch.InsertDelta;
import com.github.difflib.patch.Patch;
import com.github.difflib.text.DiffRow.Tag;
import java.util.*;
import java.util.function.BiFunction;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.util.stream.Collectors.toList;
/**
* This class for generating DiffRows for side-by-sidy view. You can customize
* the way of generating. For example, show inline diffs on not, ignoring white
* spaces or/and blank lines and so on. All parameters for generating are
* optional. If you do not specify them, the class will use the default values.
*
* These values are: showInlineDiffs = false; ignoreWhiteSpaces = true;
* ignoreBlankLines = true; ...
*
* For instantiating the DiffRowGenerator you should use the its builder. Like
* in example <code>
* DiffRowGenerator generator = new DiffRowGenerator.Builder().showInlineDiffs(true).
* ignoreWhiteSpaces(true).columnWidth(100).build();
* </code>
*/
public final class DiffRowGenerator {
public static final BiPredicate<String, String> DEFAULT_EQUALIZER = Object::equals;
public static final BiPredicate<String, String> IGNORE_WHITESPACE_EQUALIZER = (original, revised)
-> adjustWhitespace(original).equals(adjustWhitespace(revised));
public static final Function<String, String> LINE_NORMALIZER_FOR_HTML = StringUtils::normalize;
/**
* Splitting lines by character to achieve char by char diff checking.
*/
public static final Function<String, List<String>> SPLITTER_BY_CHARACTER = line -> {
List<String> list = new ArrayList<>(line.length());
for (Character character : line.toCharArray()) {
list.add(character.toString());
}
return list;
};
public static final Pattern SPLIT_BY_WORD_PATTERN = Pattern.compile("\\s+|[,.\\[\\](){}/\\\\*+\\-#]");
/**
* Splitting lines by word to achieve word by word diff checking.
*/
public static final Function<String, List<String>> SPLITTER_BY_WORD = line -> splitStringPreserveDelimiter(line, SPLIT_BY_WORD_PATTERN);
public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
public static Builder create() {
return new Builder();
}
private static String adjustWhitespace(String raw) {
return WHITESPACE_PATTERN.matcher(raw.trim()).replaceAll(" ");
}
protected final static List<String> splitStringPreserveDelimiter(String str, Pattern SPLIT_PATTERN) {
List<String> list = new ArrayList<>();
if (str != null) {
Matcher matcher = SPLIT_PATTERN.matcher(str);
int pos = 0;
while (matcher.find()) {
if (pos < matcher.start()) {
list.add(str.substring(pos, matcher.start()));
}
list.add(matcher.group());
pos = matcher.end();
}
if (pos < str.length()) {
list.add(str.substring(pos));
}
}
return list;
}
/**
* Wrap the elements in the sequence with the given tag
*
* @param startPosition the position from which tag should start. The
* counting start from a zero.
* @param endPosition the position before which tag should should be closed.
* @param tagGenerator the tag generator
*/
static void wrapInTag(List<String> sequence, int startPosition,
int endPosition, Tag tag, BiFunction<Tag, Boolean, String> tagGenerator,
Function<String, String> processDiffs, boolean replaceLinefeedWithSpace) {
int endPos = endPosition;
while (endPos >= startPosition) {
//search position for end tag
while (endPos > startPosition) {
if (!"\n".equals(sequence.get(endPos - 1))) {
break;
} else if (replaceLinefeedWithSpace) {
sequence.set(endPos - 1, " ");
break;
}
endPos--;
}
if (endPos == startPosition) {
break;
}
sequence.add(endPos, tagGenerator.apply(tag, false));
if (processDiffs != null) {
sequence.set(endPos - 1,
processDiffs.apply(sequence.get(endPos - 1)));
}
endPos--;
//search position for end tag
while (endPos > startPosition) {
if ("\n".equals(sequence.get(endPos - 1))) {
if (replaceLinefeedWithSpace) {
sequence.set(endPos - 1, " ");
} else {
break;
}
}
if (processDiffs != null) {
sequence.set(endPos - 1,
processDiffs.apply(sequence.get(endPos - 1)));
}
endPos--;
}
sequence.add(endPos, tagGenerator.apply(tag, true));
endPos--;
}
}
private final int columnWidth;
private final BiPredicate<String, String> equalizer;
private final boolean ignoreWhiteSpaces;
private final Function<String, List<String>> inlineDiffSplitter;
private final boolean mergeOriginalRevised;
private final BiFunction<Tag, Boolean, String> newTag;
private final BiFunction<Tag, Boolean, String> oldTag;
private final boolean reportLinesUnchanged;
private final Function<String, String> lineNormalizer;
private final Function<String, String> processDiffs;
private final boolean showInlineDiffs;
private final boolean replaceOriginalLinefeedInChangesWithSpaces;
private final boolean decompressDeltas;
private DiffRowGenerator(Builder builder) {
showInlineDiffs = builder.showInlineDiffs;
ignoreWhiteSpaces = builder.ignoreWhiteSpaces;
oldTag = builder.oldTag;
newTag = builder.newTag;
columnWidth = builder.columnWidth;
mergeOriginalRevised = builder.mergeOriginalRevised;
inlineDiffSplitter = builder.inlineDiffSplitter;
decompressDeltas = builder.decompressDeltas;
if (builder.equalizer != null) {
equalizer = builder.equalizer;
} else {
equalizer = ignoreWhiteSpaces ? IGNORE_WHITESPACE_EQUALIZER : DEFAULT_EQUALIZER;
}
reportLinesUnchanged = builder.reportLinesUnchanged;
lineNormalizer = builder.lineNormalizer;
processDiffs = builder.processDiffs;
replaceOriginalLinefeedInChangesWithSpaces = builder.replaceOriginalLinefeedInChangesWithSpaces;
Objects.requireNonNull(inlineDiffSplitter);
Objects.requireNonNull(lineNormalizer);
}
/**
* Get the DiffRows describing the difference between original and revised
* texts using the given patch. Useful for displaying side-by-side diff.
*
* @param original the original text
* @param revised the revised text
* @return the DiffRows between original and revised texts
*/
public List<DiffRow> generateDiffRows(List<String> original, List<String> revised) {
return generateDiffRows(original, DiffUtils.diff(original, revised, equalizer));
}
/**
* Generates the DiffRows describing the difference between original and
* revised texts using the given patch. Useful for displaying side-by-side
* diff.
*
* @param original the original text
* @param patch the given patch
* @return the DiffRows between original and revised texts
*/
public List<DiffRow> generateDiffRows(final List<String> original, Patch<String> patch) {
List<DiffRow> diffRows = new ArrayList<>();
int endPos = 0;
final List<AbstractDelta<String>> deltaList = patch.getDeltas();
if (decompressDeltas) {
for (AbstractDelta<String> originalDelta : deltaList) {
for (AbstractDelta<String> delta : decompressDeltas(originalDelta)) {
endPos = transformDeltaIntoDiffRow(original, endPos, diffRows, delta);
}
}
} else {
for (AbstractDelta<String> delta : deltaList) {
endPos = transformDeltaIntoDiffRow(original, endPos, diffRows, delta);
}
}
// Copy the final matching chunk if any.
for (String line : original.subList(endPos, original.size())) {
diffRows.add(buildDiffRow(Tag.EQUAL, line, line));
}
return diffRows;
}
/**
* Transforms one patch delta into a DiffRow object.
*/
private int transformDeltaIntoDiffRow(final List<String> original, int endPos, List<DiffRow> diffRows, AbstractDelta<String> delta) {
Chunk<String> orig = delta.getSource();
Chunk<String> rev = delta.getTarget();
for (String line : original.subList(endPos, orig.getPosition())) {
diffRows.add(buildDiffRow(Tag.EQUAL, line, line));
}
switch (delta.getType()) {
case INSERT:
for (String line : rev.getLines()) {
diffRows.add(buildDiffRow(Tag.INSERT, "", line));
}
break;
case DELETE:
for (String line : orig.getLines()) {
diffRows.add(buildDiffRow(Tag.DELETE, line, ""));
}
break;
default:
if (showInlineDiffs) {
diffRows.addAll(generateInlineDiffs(delta));
} else {
for (int j = 0; j < Math.max(orig.size(), rev.size()); j++) {
diffRows.add(buildDiffRow(Tag.CHANGE,
orig.getLines().size() > j ? orig.getLines().get(j) : "",
rev.getLines().size() > j ? rev.getLines().get(j) : ""));
}
}
}
return orig.last() + 1;
}
/**
* Decompresses ChangeDeltas with different source and target size to a
* ChangeDelta with same size and a following InsertDelta or DeleteDelta.
* With this problems of building DiffRows getting smaller.
*
* @param deltaList
*/
private List<AbstractDelta<String>> decompressDeltas(AbstractDelta<String> delta) {
if (delta.getType() == DeltaType.CHANGE && delta.getSource().size() != delta.getTarget().size()) {
List<AbstractDelta<String>> deltas = new ArrayList<>();
//System.out.println("decompress this " + delta);
int minSize = Math.min(delta.getSource().size(), delta.getTarget().size());
Chunk<String> orig = delta.getSource();
Chunk<String> rev = delta.getTarget();
deltas.add(new ChangeDelta<String>(
new Chunk<>(orig.getPosition(), orig.getLines().subList(0, minSize)),
new Chunk<>(rev.getPosition(), rev.getLines().subList(0, minSize))));
if (orig.getLines().size() < rev.getLines().size()) {
deltas.add(new InsertDelta<String>(
new Chunk<>(orig.getPosition() + minSize, Collections.emptyList()),
new Chunk<>(rev.getPosition() + minSize, rev.getLines().subList(minSize, rev.getLines().size()))));
} else {
deltas.add(new DeleteDelta<String>(
new Chunk<>(orig.getPosition() + minSize, orig.getLines().subList(minSize, orig.getLines().size())),
new Chunk<>(rev.getPosition() + minSize, Collections.emptyList())));
}
return deltas;
}
return Collections.singletonList(delta);
}
private DiffRow buildDiffRow(Tag type, String orgline, String newline) {
if (reportLinesUnchanged) {
return new DiffRow(type, orgline, newline);
} else {
String wrapOrg = preprocessLine(orgline);
if (Tag.DELETE == type) {
if (mergeOriginalRevised || showInlineDiffs) {
wrapOrg = oldTag.apply(type, true) + wrapOrg + oldTag.apply(type, false);
}
}
String wrapNew = preprocessLine(newline);
if (Tag.INSERT == type) {
if (mergeOriginalRevised) {
wrapOrg = newTag.apply(type, true) + wrapNew + newTag.apply(type, false);
} else if (showInlineDiffs) {
wrapNew = newTag.apply(type, true) + wrapNew + newTag.apply(type, false);
}
}
return new DiffRow(type, wrapOrg, wrapNew);
}
}
private DiffRow buildDiffRowWithoutNormalizing(Tag type, String orgline, String newline) {
return new DiffRow(type,
StringUtils.wrapText(orgline, columnWidth),
StringUtils.wrapText(newline, columnWidth));
}
List<String> normalizeLines(List<String> list) {
return reportLinesUnchanged
? list
: list.stream()
.map(lineNormalizer::apply)
.collect(toList());
}
/**
* Add the inline diffs for given delta
*
* @param delta the given delta
*/
private List<DiffRow> generateInlineDiffs(AbstractDelta<String> delta) {
List<String> orig = normalizeLines(delta.getSource().getLines());
List<String> rev = normalizeLines(delta.getTarget().getLines());
List<String> origList;
List<String> revList;
String joinedOrig = String.join("\n", orig);
String joinedRev = String.join("\n", rev);
origList = inlineDiffSplitter.apply(joinedOrig);
revList = inlineDiffSplitter.apply(joinedRev);
List<AbstractDelta<String>> inlineDeltas = DiffUtils.diff(origList, revList, equalizer).getDeltas();
Collections.reverse(inlineDeltas);
for (AbstractDelta<String> inlineDelta : inlineDeltas) {
Chunk<String> inlineOrig = inlineDelta.getSource();
Chunk<String> inlineRev = inlineDelta.getTarget();
if (inlineDelta.getType() == DeltaType.DELETE) {
wrapInTag(origList, inlineOrig.getPosition(), inlineOrig
.getPosition()
+ inlineOrig.size(), Tag.DELETE, oldTag, processDiffs, replaceOriginalLinefeedInChangesWithSpaces && mergeOriginalRevised);
} else if (inlineDelta.getType() == DeltaType.INSERT) {
if (mergeOriginalRevised) {
origList.addAll(inlineOrig.getPosition(),
revList.subList(inlineRev.getPosition(),
inlineRev.getPosition() + inlineRev.size()));
wrapInTag(origList, inlineOrig.getPosition(),
inlineOrig.getPosition() + inlineRev.size(),
Tag.INSERT, newTag, processDiffs, false);
} else {
wrapInTag(revList, inlineRev.getPosition(),
inlineRev.getPosition() + inlineRev.size(),
Tag.INSERT, newTag, processDiffs, false);
}
} else if (inlineDelta.getType() == DeltaType.CHANGE) {
if (mergeOriginalRevised) {
origList.addAll(inlineOrig.getPosition() + inlineOrig.size(),
revList.subList(inlineRev.getPosition(),
inlineRev.getPosition() + inlineRev.size()));
wrapInTag(origList, inlineOrig.getPosition() + inlineOrig.size(),
inlineOrig.getPosition() + inlineOrig.size() + inlineRev.size(),
Tag.CHANGE, newTag, processDiffs, false);
} else {
wrapInTag(revList, inlineRev.getPosition(),
inlineRev.getPosition() + inlineRev.size(),
Tag.CHANGE, newTag, processDiffs, false);
}
wrapInTag(origList, inlineOrig.getPosition(),
inlineOrig.getPosition() + inlineOrig.size(),
Tag.CHANGE, oldTag, processDiffs, replaceOriginalLinefeedInChangesWithSpaces && mergeOriginalRevised);
}
}
StringBuilder origResult = new StringBuilder();
StringBuilder revResult = new StringBuilder();
for (String character : origList) {
origResult.append(character);
}
for (String character : revList) {
revResult.append(character);
}
List<String> original = Arrays.asList(origResult.toString().split("\n"));
List<String> revised = Arrays.asList(revResult.toString().split("\n"));
List<DiffRow> diffRows = new ArrayList<>();
for (int j = 0; j < Math.max(original.size(), revised.size()); j++) {
diffRows.
add(buildDiffRowWithoutNormalizing(Tag.CHANGE,
original.size() > j ? original.get(j) : "",
revised.size() > j ? revised.get(j) : ""));
}
return diffRows;
}
private String preprocessLine(String line) {
if (columnWidth == 0) {
return lineNormalizer.apply(line);
} else {
return StringUtils.wrapText(lineNormalizer.apply(line), columnWidth);
}
}
/**
* This class used for building the DiffRowGenerator.
*
* @author dmitry
*
*/
public static class Builder {
private boolean showInlineDiffs = false;
private boolean ignoreWhiteSpaces = false;
private boolean decompressDeltas = true;
private BiFunction<Tag, Boolean, String> oldTag
= (tag, f) -> f ? "<span class=\"editOldInline\">" : "</span>";
private BiFunction<Tag, Boolean, String> newTag
= (tag, f) -> f ? "<span class=\"editNewInline\">" : "</span>";
private int columnWidth = 0;
private boolean mergeOriginalRevised = false;
private boolean reportLinesUnchanged = false;
private Function<String, List<String>> inlineDiffSplitter = SPLITTER_BY_CHARACTER;
private Function<String, String> lineNormalizer = LINE_NORMALIZER_FOR_HTML;
private Function<String, String> processDiffs = null;
private BiPredicate<String, String> equalizer = null;
private boolean replaceOriginalLinefeedInChangesWithSpaces = false;
private Builder() {
}
/**
* Show inline diffs in generating diff rows or not.
*
* @param val the value to set. Default: false.
* @return builder with configured showInlineDiff parameter
*/
public Builder showInlineDiffs(boolean val) {
showInlineDiffs = val;
return this;
}
/**
* Ignore white spaces in generating diff rows or not.
*
* @param val the value to set. Default: true.
* @return builder with configured ignoreWhiteSpaces parameter
*/
public Builder ignoreWhiteSpaces(boolean val) {
ignoreWhiteSpaces = val;
return this;
}
/**
* Report all lines without markup on the old or new text.
*
* @param val the value to set. Default: false.
* @return builder with configured reportLinesUnchanged parameter
*/
public Builder reportLinesUnchanged(final boolean val) {
reportLinesUnchanged = val;
return this;
}
/**
* Generator for Old-Text-Tags.
*
* @param generator the tag generator
* @return builder with configured ignoreBlankLines parameter
*/
public Builder oldTag(BiFunction<Tag, Boolean, String> generator) {
this.oldTag = generator;
return this;
}
/**
* Generator for Old-Text-Tags.
*
* @param generator the tag generator
* @return builder with configured ignoreBlankLines parameter
*/
public Builder oldTag(Function<Boolean, String> generator) {
this.oldTag = (tag, f) -> generator.apply(f);
return this;
}
/**
* Generator for New-Text-Tags.
*
* @param generator
* @return
*/
public Builder newTag(BiFunction<Tag, Boolean, String> generator) {
this.newTag = generator;
return this;
}
/**
* Generator for New-Text-Tags.
*
* @param generator
* @return
*/
public Builder newTag(Function<Boolean, String> generator) {
this.newTag = (tag, f) -> generator.apply(f);
return this;
}
/**
* Processor for diffed text parts. Here e.g. whitecharacters could be
* replaced by something visible.
*
* @param processDiffs
* @return
*/
public Builder processDiffs(Function<String, String> processDiffs) {
this.processDiffs = processDiffs;
return this;
}
/**
* Set the column width of generated lines of original and revised
* texts.
*
* @param width the width to set. Making it < 0 doesn't make any
* sense. Default 80.
* @return builder with config of column width
*/
public Builder columnWidth(int width) {
if (width >= 0) {
columnWidth = width;
}
return this;
}
/**
* Build the DiffRowGenerator. If some parameters is not set, the
* default values are used.
*
* @return the customized DiffRowGenerator
*/
public DiffRowGenerator build() {
return new DiffRowGenerator(this);
}
/**
* Merge the complete result within the original text. This makes sense
* for one line display.
*
* @param mergeOriginalRevised
* @return
*/
public Builder mergeOriginalRevised(boolean mergeOriginalRevised) {
this.mergeOriginalRevised = mergeOriginalRevised;
return this;
}
/**
* Deltas could be in a state, that would produce some unreasonable
* results within an inline diff. So the deltas are decompressed into
* smaller parts and rebuild. But this could result in more differences.
*
* @param decompressDeltas
* @return
*/
public Builder decompressDeltas(boolean decompressDeltas) {
this.decompressDeltas = decompressDeltas;
return this;
}
/**
* Per default each character is separatly processed. This variant
* introduces processing by word, which does not deliver in word
* changes. Therefore the whole word will be tagged as changed:
*
* <pre>
* false: (aBa : aba) -- changed: a(B)a : a(b)a
* true: (aBa : aba) -- changed: (aBa) : (aba)
* </pre>
*/
public Builder inlineDiffByWord(boolean inlineDiffByWord) {
inlineDiffSplitter = inlineDiffByWord ? SPLITTER_BY_WORD : SPLITTER_BY_CHARACTER;
return this;
}
/**
* To provide some customized splitting a splitter can be provided. Here
* someone could think about sentence splitter, comma splitter or stuff
* like that.
*
* @param inlineDiffSplitter
* @return
*/
public Builder inlineDiffBySplitter(Function<String, List<String>> inlineDiffSplitter) {
this.inlineDiffSplitter = inlineDiffSplitter;
return this;
}
/**
* By default DiffRowGenerator preprocesses lines for HTML output. Tabs
* and special HTML characters like "<" are replaced with its encoded
* value. To change this you can provide a customized line normalizer
* here.
*
* @param lineNormalizer
* @return
*/
public Builder lineNormalizer(Function<String, String> lineNormalizer) {
this.lineNormalizer = lineNormalizer;
return this;
}
/**
* Provide an equalizer for diff processing.
*
* @param equalizer equalizer for diff processing.
* @return builder with configured equalizer parameter
*/
public Builder equalizer(BiPredicate<String, String> equalizer) {
this.equalizer = equalizer;
return this;
}
/**
* Sometimes it happens that a change contains multiple lines. If there
* is no correspondence in old and new. To keep the merged line more
* readable the linefeeds could be replaced by spaces.
*
* @param replace
* @return
*/
public Builder replaceOriginalLinefeedInChangesWithSpaces(boolean replace) {
this.replaceOriginalLinefeedInChangesWithSpaces = replace;
return this;
}
}
}