DirectoryScanner.java
package org.codehaus.plexus.util;
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution, if
* any, must include the following acknowledgement:
* "This product includes software developed by the
* Apache Software Foundation (http://www.codehaus.org/)."
* Alternately, this acknowledgement may appear in the software itself,
* if and wherever such third-party acknowledgements normally appear.
*
* 4. The names "Ant" and "Apache Software
* Foundation" must not be used to endorse or promote products derived
* from this software without prior written permission. For written
* permission, please contact codehaus@codehaus.org.
*
* 5. Products derived from this software may not be called "Apache"
* nor may "Apache" appear in their names without prior written
* permission of the Apache Group.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.codehaus.org/>.
*/
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
/**
* <p>Class for scanning a directory for files/directories which match certain criteria.</p>
*
* <p>These criteria consist of selectors and patterns which have been specified. With the selectors you can select which
* files you want to have included. Files which are not selected are excluded. With patterns you can include or exclude
* files based on their filename.</p>
*
* <p>The idea is simple. A given directory is recursively scanned for all files and directories. Each file/directory is
* matched against a set of selectors, including special support for matching against filenames with include and and
* exclude patterns. Only files/directories which match at least one pattern of the include pattern list or other file
* selector, and don't match any pattern of the exclude pattern list or fail to match against a required selector will
* be placed in the list of files/directories found.</p>
*
* <p>When no list of include patterns is supplied, "**" will be used, which means that everything will be matched. When no
* list of exclude patterns is supplied, an empty list is used, such that nothing will be excluded. When no selectors
* are supplied, none are applied.</p>
*
* <p>The filename pattern matching is done as follows: The name to be matched is split up in path segments. A path segment
* is the name of a directory or file, which is bounded by <code>File.separator</code> ('/' under UNIX, '\' under
* Windows). For example, "abc/def/ghi/xyz.java" is split up in the segments "abc", "def","ghi" and "xyz.java". The same
* is done for the pattern against which should be matched.</p>
*
* <p>The segments of the name and the pattern are then matched against each other. When '**' is used for a path segment in
* the pattern, it matches zero or more path segments of the name.</p>
*
* <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of the pattern and the
* string to match:<br>
* When a pattern starts with a <code>File.separator</code>, the string to match must also start with a
* <code>File.separator</code>. When a pattern does not start with a <code>File.separator</code>, the string to match
* may not start with a <code>File.separator</code>. When one of these rules is not obeyed, the string will not match.</p>
*
* <p>When a name path segment is matched against a pattern path segment, the following special characters can be used:<br>
* '*' matches zero or more characters<br>
* '?' matches one character.</p>
*
* Examples:
* <ul>
* <li>"**\*.class" matches all .class files/dirs in a directory tree.</li>
* <li>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and then ".java", in a
* directory called test.</li>
* <li>"**" matches everything in a directory tree.</li>
* <li>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent directory called test
* (e.g. "abc\test\def\ghi\XYZ123").</li>
* </ul>
*
* <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p>
* Example of usage:
* <pre>
* String[] includes = { "**\\*.class" };
* String[] excludes = { "modules\\*\\**" };
* ds.setIncludes( includes );
* ds.setExcludes( excludes );
* ds.setBasedir( new File( "test" ) );
* ds.setCaseSensitive( true );
* ds.scan();
*
* System.out.println( "FILES:" );
* String[] files = ds.getIncludedFiles();
* for ( int i = 0; i < files.length; i++ )
* {
* System.out.println( files[i] );
* }
* </pre>
*
* <p>This will scan a directory called test for .class files, but excludes all files in all proper subdirectories of a
* directory called "modules"</p>
*
* @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
* @author Magesh Umasankar
* @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
* @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a>
*/
public class DirectoryScanner extends AbstractScanner {
private static final String[] EMPTY_STRING_ARRAY = new String[0];
/**
* The base directory to be scanned.
*/
protected File basedir;
/**
* The files which matched at least one include and no excludes and were selected.
*/
protected ArrayList<String> filesIncluded;
/**
* The files which did not match any includes or selectors.
*/
protected ArrayList<String> filesNotIncluded;
/**
* The files which matched at least one include and at least one exclude.
*/
protected ArrayList<String> filesExcluded;
/**
* The directories which matched at least one include and no excludes and were selected.
*/
protected ArrayList<String> dirsIncluded;
/**
* The directories which were found and did not match any includes.
*/
protected ArrayList<String> dirsNotIncluded;
/**
* The directories which matched at least one include and at least one exclude.
*/
protected ArrayList<String> dirsExcluded;
/**
* The files which matched at least one include and no excludes and which a selector discarded.
*/
protected ArrayList<String> filesDeselected;
/**
* The directories which matched at least one include and no excludes but which a selector discarded.
*/
protected ArrayList<String> dirsDeselected;
/**
* Whether or not our results were built by a slow scan.
*/
protected boolean haveSlowResults = false;
/**
* Whether or not symbolic links should be followed.
*
* @since Ant 1.5
*/
private boolean followSymlinks = true;
/**
* Whether or not everything tested so far has been included.
*/
protected boolean everythingIncluded = true;
private final char[][] tokenizedEmpty = MatchPattern.tokenizePathToCharArray("", File.separator);
/**
* Sole constructor.
*/
public DirectoryScanner() {}
/**
* Sets the base directory to be scanned. This is the directory which is scanned recursively. All '/' and '\'
* characters are replaced by <code>File.separatorChar</code>, so the separator used need not match
* <code>File.separatorChar</code>.
*
* @param basedir The base directory to scan. Must not be <code>null</code>.
*/
public void setBasedir(String basedir) {
setBasedir(new File(basedir.replace('/', File.separatorChar).replace('\\', File.separatorChar)));
}
/**
* Sets the base directory to be scanned. This is the directory which is scanned recursively.
*
* @param basedir The base directory for scanning. Should not be <code>null</code>.
*/
public void setBasedir(File basedir) {
this.basedir = basedir;
}
/**
* Returns the base directory to be scanned. This is the directory which is scanned recursively.
*
* @return the base directory to be scanned
*/
@Override
public File getBasedir() {
return basedir;
}
/**
* Sets whether or not symbolic links should be followed.
*
* @param followSymlinks whether or not symbolic links should be followed
*/
public void setFollowSymlinks(boolean followSymlinks) {
this.followSymlinks = followSymlinks;
}
/**
* Returns whether or not the scanner has included all the files or directories it has come across so far.
*
* @return <code>true</code> if all files and directories which have been found so far have been included.
*/
public boolean isEverythingIncluded() {
return everythingIncluded;
}
/**
* Scans the base directory for files which match at least one include pattern and don't match any exclude patterns.
* If there are selectors then the files must pass muster there, as well.
*
* @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is <code>null</code>, doesn't
* exist, or isn't a directory).
*/
@Override
public void scan() throws IllegalStateException {
if (basedir == null) {
throw new IllegalStateException("No basedir set");
}
if (!basedir.exists()) {
throw new IllegalStateException("basedir " + basedir + " does not exist");
}
if (!basedir.isDirectory()) {
throw new IllegalStateException("basedir " + basedir + " is not a directory");
}
setupDefaultFilters();
setupMatchPatterns();
filesIncluded = new ArrayList<String>();
filesNotIncluded = new ArrayList<String>();
filesExcluded = new ArrayList<String>();
filesDeselected = new ArrayList<String>();
dirsIncluded = new ArrayList<String>();
dirsNotIncluded = new ArrayList<String>();
dirsExcluded = new ArrayList<String>();
dirsDeselected = new ArrayList<String>();
if (isIncluded("", tokenizedEmpty)) {
if (!isExcluded("", tokenizedEmpty)) {
if (isSelected("", basedir)) {
dirsIncluded.add("");
} else {
dirsDeselected.add("");
}
} else {
dirsExcluded.add("");
}
} else {
dirsNotIncluded.add("");
}
scandir(basedir, "", true);
}
/**
* <p>Top level invocation for a slow scan. A slow scan builds up a full list of excluded/included files/directories,
* whereas a fast scan will only have full results for included files, as it ignores directories which can't
* possibly hold any included files/directories.</p>
*
* <p>Returns immediately if a slow scan has already been completed.</p>
*/
protected void slowScan() {
if (haveSlowResults) {
return;
}
String[] excl = dirsExcluded.toArray(EMPTY_STRING_ARRAY);
String[] notIncl = dirsNotIncluded.toArray(EMPTY_STRING_ARRAY);
for (String anExcl : excl) {
if (!couldHoldIncluded(anExcl)) {
scandir(new File(basedir, anExcl), anExcl + File.separator, false);
}
}
for (String aNotIncl : notIncl) {
if (!couldHoldIncluded(aNotIncl)) {
scandir(new File(basedir, aNotIncl), aNotIncl + File.separator, false);
}
}
haveSlowResults = true;
}
/**
* Scans the given directory for files and directories. Found files and directories are placed in their respective
* collections, based on the matching of includes, excludes, and the selectors. When a directory is found, it is
* scanned recursively.
*
* @param dir The directory to scan. Must not be <code>null</code>.
* @param vpath The path relative to the base directory (needed to prevent problems with an absolute path when using
* dir). Must not be <code>null</code>.
* @param fast Whether or not this call is part of a fast scan.
* @see #filesIncluded
* @see #filesNotIncluded
* @see #filesExcluded
* @see #dirsIncluded
* @see #dirsNotIncluded
* @see #dirsExcluded
* @see #slowScan
*/
protected void scandir(File dir, String vpath, boolean fast) {
String[] newfiles = dir.list();
if (newfiles == null) {
/*
* two reasons are mentioned in the API docs for File.list (1) dir is not a directory. This is impossible as
* we wouldn't get here in this case. (2) an IO error occurred (why doesn't it throw an exception then???)
*/
/*
* [jdcasey] (2) is apparently happening to me, as this is killing one of my tests... this is affecting the
* assembly plugin, fwiw. I will initialize the newfiles array as zero-length for now. NOTE: I can't find
* the problematic code, as it appears to come from a native method in UnixFileSystem...
*/
/*
* [bentmann] A null array will also be returned from list() on NTFS when dir refers to a soft link or
* junction point whose target is not existent.
*/
newfiles = EMPTY_STRING_ARRAY;
// throw new IOException( "IO error scanning directory " + dir.getAbsolutePath() );
}
if (!followSymlinks) {
try {
if (isParentSymbolicLink(dir, null)) {
for (String newfile : newfiles) {
String name = vpath + newfile;
File file = new File(dir, newfile);
if (file.isDirectory()) {
dirsExcluded.add(name);
} else {
filesExcluded.add(name);
}
}
return;
}
} catch (IOException ioe) {
String msg = "IOException caught while checking for links!";
// will be caught and redirected to Ant's logging system
System.err.println(msg);
}
}
if (filenameComparator != null) {
Arrays.sort(newfiles, filenameComparator);
}
for (String newfile : newfiles) {
String name = vpath + newfile;
char[][] tokenizedName = MatchPattern.tokenizePathToCharArray(name, File.separator);
File file = new File(dir, newfile);
if (file.isDirectory()) {
if (isIncluded(name, tokenizedName)) {
if (!isExcluded(name, tokenizedName)) {
if (isSelected(name, file)) {
dirsIncluded.add(name);
if (fast) {
scandir(file, name + File.separator, fast);
}
} else {
everythingIncluded = false;
dirsDeselected.add(name);
if (fast && couldHoldIncluded(name)) {
scandir(file, name + File.separator, fast);
}
}
} else {
everythingIncluded = false;
dirsExcluded.add(name);
if (fast && couldHoldIncluded(name)) {
scandir(file, name + File.separator, fast);
}
}
} else {
everythingIncluded = false;
dirsNotIncluded.add(name);
if (fast && couldHoldIncluded(name)) {
scandir(file, name + File.separator, fast);
}
}
if (!fast) {
scandir(file, name + File.separator, fast);
}
} else if (file.isFile()) {
if (isIncluded(name, tokenizedName)) {
if (!isExcluded(name, tokenizedName)) {
if (isSelected(name, file)) {
filesIncluded.add(name);
} else {
everythingIncluded = false;
filesDeselected.add(name);
}
} else {
everythingIncluded = false;
filesExcluded.add(name);
}
} else {
everythingIncluded = false;
filesNotIncluded.add(name);
}
}
}
}
/**
* Tests whether a name should be selected.
*
* @param name the filename to check for selecting
* @param file the java.io.File object for this filename
* @return <code>false</code> when the selectors says that the file should not be selected, <code>true</code>
* otherwise.
*/
protected boolean isSelected(String name, File file) {
return true;
}
/**
* Returns the names of the files which matched at least one of the include patterns and none of the exclude
* patterns. The names are relative to the base directory.
*
* @return the names of the files which matched at least one of the include patterns and none of the exclude
* patterns.
*/
@Override
public String[] getIncludedFiles() {
return filesIncluded.toArray(EMPTY_STRING_ARRAY);
}
/**
* Returns the names of the files which matched none of the include patterns. The names are relative to the base
* directory. This involves performing a slow scan if one has not already been completed.
*
* @return the names of the files which matched none of the include patterns.
* @see #slowScan
*/
public String[] getNotIncludedFiles() {
slowScan();
return filesNotIncluded.toArray(EMPTY_STRING_ARRAY);
}
/**
* Returns the names of the files which matched at least one of the include patterns and at least one of the exclude
* patterns. The names are relative to the base directory. This involves performing a slow scan if one has not
* already been completed.
*
* @return the names of the files which matched at least one of the include patterns and at at least one of the
* exclude patterns.
* @see #slowScan
*/
public String[] getExcludedFiles() {
slowScan();
return filesExcluded.toArray(EMPTY_STRING_ARRAY);
}
/**
* <p>Returns the names of the files which were selected out and therefore not ultimately included.</p>
*
* <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
* completed.</p>
*
* @return the names of the files which were deselected.
* @see #slowScan
*/
public String[] getDeselectedFiles() {
slowScan();
return filesDeselected.toArray(EMPTY_STRING_ARRAY);
}
/**
* Returns the names of the directories which matched at least one of the include patterns and none of the exclude
* patterns. The names are relative to the base directory.
*
* @return the names of the directories which matched at least one of the include patterns and none of the exclude
* patterns.
*/
@Override
public String[] getIncludedDirectories() {
return dirsIncluded.toArray(EMPTY_STRING_ARRAY);
}
/**
* Returns the names of the directories which matched none of the include patterns. The names are relative to the
* base directory. This involves performing a slow scan if one has not already been completed.
*
* @return the names of the directories which matched none of the include patterns.
* @see #slowScan
*/
public String[] getNotIncludedDirectories() {
slowScan();
return dirsNotIncluded.toArray(EMPTY_STRING_ARRAY);
}
/**
* Returns the names of the directories which matched at least one of the include patterns and at least one of the
* exclude patterns. The names are relative to the base directory. This involves performing a slow scan if one has
* not already been completed.
*
* @return the names of the directories which matched at least one of the include patterns and at least one of the
* exclude patterns.
* @see #slowScan
*/
public String[] getExcludedDirectories() {
slowScan();
return dirsExcluded.toArray(EMPTY_STRING_ARRAY);
}
/**
* <p>Returns the names of the directories which were selected out and therefore not ultimately included.</p>
*
* <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
* completed.</p>
*
* @return the names of the directories which were deselected.
* @see #slowScan
*/
public String[] getDeselectedDirectories() {
slowScan();
return dirsDeselected.toArray(EMPTY_STRING_ARRAY);
}
/**
* <p>Checks whether a given file is a symbolic link.</p>
*
* <p>It doesn't really test for symbolic links but whether the canonical and absolute paths of the file are identical
* - this may lead to false positives on some platforms.
* </p>
*
* @param parent the parent directory of the file to test
* @param name the name of the file to test.
* @return true if it's a symbolic link
* @throws java.io.IOException .
* @since Ant 1.5
*/
public boolean isSymbolicLink(File parent, String name) throws IOException {
return NioFiles.isSymbolicLink(new File(parent, name));
}
/**
* <p>Checks whether the parent of this file is a symbolic link.</p>
*
* <p>For java versions prior to 7 It doesn't really test for symbolic links but whether the canonical and absolute
* paths of the file are identical - this may lead to false positives on some platforms.</p>
*
* @param parent the parent directory of the file to test
* @param name the name of the file to test.
* @return true if it's a symbolic link
* @throws java.io.IOException .
* @since Ant 1.5
*/
public boolean isParentSymbolicLink(File parent, String name) throws IOException {
return NioFiles.isSymbolicLink(parent);
}
}