LinkCheck.java
/* *******************************************************************
* Copyright (c) 2003 Contributors.
* All rights reserved.
* This program and the accompanying materials are made available
* under the terms of the Eclipse Public License v 2.0
* which accompanies this distribution and is available at
* https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt
*
* Contributors:
* Wes Isberg initial implementation
* ******************************************************************/
package org.aspectj.testing.util;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.HTMLEditorKit;
import org.aspectj.bridge.IMessage;
import org.aspectj.bridge.IMessageHandler;
import org.aspectj.bridge.MessageHandler;
import org.aspectj.bridge.MessageUtil;
import org.aspectj.util.LangUtil;
//import org.aspectj.util.FileUtil;
/**
* Quick and dirty link checker.
* This checks that links into file: and https: targets work,
* and that links out of file: targets work.
*/
public class LinkCheck {
/*
* Known issues:
* - main interface only, though API's easily upgraded
* - https MalformedUrlExceptions on redirect
* - Swing won't quit without System.exit
* - single-threaded
*/
static final URL COMMAND_LINE;
static {
URL commandLine = null;
try {
commandLine = new URL("file://commandLine");
} catch (Throwable t) {
}
COMMAND_LINE = commandLine;
}
/** @param args file {-logFile {file} | -printInfo } */
public static void main(String[] args) {
final String syntax = "java "
+ LinkCheck.class.getName()
+ " file {-log <file> | -printInfo}..";
if ((null == args) || (0 >= args.length)) {
System.err.println(syntax);
System.exit(1);
}
final String startingURL = "file:///" + args[0].replace('\\', '/');
String logFile = null;
boolean printInfo = false;
for (int i = 1; i < args.length; i++) {
if ("-log".equals(args[i]) && ((i+1) < args.length)) {
logFile = args[++i];
} else if ("-printInfo".equals(args[i])) {
printInfo = true;
} else {
System.err.println(syntax);
System.exit(1);
}
}
final boolean useSystemOut = (null == logFile);
final MessageHandler mh;
final OutputStream out;
if (useSystemOut) {
mh = new MessageHandler();
out = null;
} else {
try {
out = new FileOutputStream(logFile);
} catch (FileNotFoundException e) {
e.printStackTrace();
return;
}
final PrintStream ps = new PrintStream(out, true);
final boolean printAll = printInfo;
mh = new MessageHandler() {
public boolean handleMessage(IMessage message) {
if (printAll || !message.isInfo()) {
ps.println(message.toString());
}
return super.handleMessage(message);
}
};
}
Link.Check exists
= Link.getProtocolChecker(new String[] {"file", "http"});
Link.Check contents
= Link.getProtocolChecker(new String[] {"file"});
LinkCheck me = new LinkCheck(mh, exists, contents);
me.addLinkToCheck(COMMAND_LINE, startingURL); // pwd as base?
try {
String label = "checking URLs from " + startingURL;
if (useSystemOut) {
System.out.println(label);
}
MessageUtil.info("START " + label);
long start = System.currentTimeMillis();
me.run();
long duration = (System.currentTimeMillis() - start)/1000;
long numChecked = me.checkedUrls.size();
if (numChecked > 0) {
float cps = (float) duration / (float) numChecked;
StringBuilder sb = new StringBuilder();
sb.append("DONE. Checked " + numChecked);
sb.append(" URL's in " + duration);
sb.append(" seconds (" + cps);
sb.append(" seconds per URL).");
MessageUtil.info("END " + label + ": " + sb);
if (useSystemOut) {
System.out.println(sb.toString());
}
}
MessageUtil.info(MessageUtil.renderCounts(mh));
try {
if (null != out) {
out.flush();
}
} catch (IOException e) {
// ignore
}
if (useSystemOut && (null != logFile)) {
System.out.println("Find log in " + logFile);
}
} finally {
if (null != out) {
try {
out.close();
} catch (IOException e1) {
}
}
System.exit(mh.numMessages(IMessage.ERROR, true)); // XXX dumb swing
}
}
// private static boolean isCheckedFileType(URL url) {
// if (null == url) {
// return false;
// }
// String file = url.getFile();
// return !FileUtil.hasZipSuffix(file)
// && !file.endsWith(".pdf");
// }
private final Messages messages;
private final HTMLEditorKit.Parser parser; // XXX untested - stateful
private final List<Link> linksToCheck;
private final List<String> checkedUrls; // String (URL.toString)
private final List<String> validRefs; // String (URL.toString)
private final List<String> refsToCheck; // String (URL.toString)
private final Link.Check checkExists;
private final Link.Check checkContents;
public LinkCheck(IMessageHandler handler,
Link.Check checkExists,
Link.Check checkContents) {
LangUtil.throwIaxIfNull(handler, "handler");
LangUtil.throwIaxIfNull(checkExists, "checkExists");
LangUtil.throwIaxIfNull(checkContents, "checkContents");
this.messages = new Messages(handler);
linksToCheck = new ArrayList<>();
checkedUrls = new ArrayList<>();
refsToCheck = new ArrayList<>();
validRefs = new ArrayList<>();
parser = new HTMLEditorKit() {
public HTMLEditorKit.Parser getParser() {
return super.getParser();
}
}
.getParser();
this.checkExists = checkExists;
this.checkContents = checkContents;
}
public synchronized void addLinkToCheck(URL doc, String link) {
URL linkURL = makeURL(doc, link);
if (null == linkURL) {
// messages.addingNullLinkFrom(doc);
return;
}
String linkString = linkURL.toString();
if ((null != link) && !checkedUrls.contains(linkString) ) {
if (!checkExists.check(linkURL)) {
checkedUrls.add(linkString);
messages.acceptingUncheckedLink(doc, linkURL);
} else {
Link toAdd = new Link(doc, linkURL);
if (!linksToCheck.contains(toAdd)) { // equals overridden
linksToCheck.add(toAdd);
}
}
}
}
public synchronized void run() {
List<Link> list = new ArrayList<>();
while (0 < linksToCheck.size()) {
messages.checkingLinks(linksToCheck.size());
list.clear();
list.addAll(linksToCheck);
for (final Link link : list) {
String urlString = link.url.toString();
if (!checkedUrls.contains(urlString)) {
checkedUrls.add(urlString);
messages.checkingLink(link);
checkLink(link);
}
}
linksToCheck.removeAll(list);
}
// now check that all named references are accounted for
for (String ref : refsToCheck) {
if (!validRefs.contains(ref)) {
messages.namedReferenceNotFound(ref);
}
}
}
/** @return null if link known or if unable to create */
private URL makeURL(URL doc, String link) {
if (checkedUrls.contains(link)) {
return null;
}
URL result = null;
try {
result = new URL(link);
} catch (MalformedURLException e) {
if (null == doc) {
messages.malformedUrl(null, link, e);
} else {
try {
URL res = new URL(doc, link);
String resultString = res.toString();
if (checkedUrls.contains(resultString)) {
return null;
}
result = res;
} catch (MalformedURLException me) {
messages.malformedUrl(doc, link, me);
}
}
}
return result;
}
/** @param link a Link with a url we can handle */
private void checkLink(final Link link) {
if (handleAsRef(link)) {
return;
}
URL url = link.url;
InputStream input = null;
try {
URLConnection connection = url.openConnection();
if (null == connection) {
messages.cantOpenConnection(url);
return;
}
// get bad urls to fail on read before skipping by type
input = connection.getInputStream();
String type = connection.getContentType();
if (null == type) {
messages.noContentType(link);
} else if (!type.toLowerCase().startsWith("text/")) {
messages.notTextContentType(link);
} else {
boolean addingLinks = checkContents.check(url);
parser.parse(
new InputStreamReader(input),
new LinkListener(url, addingLinks), true);
}
} catch (IOException e) {
messages.exceptionReading(link, e);
} finally {
if (null != input) {
try {
input.close();
} catch (IOException e1) {
// ignore
}
}
}
}
/** @return true if link is to an internal ...#name */
private boolean handleAsRef(Link link) {
String ref = link.url.getRef();
if (!LangUtil.isEmpty(ref)) {
String refString = link.url.toString(); // XXX canonicalize?
if (!refsToCheck.contains(refString)) {
refsToCheck.add(refString);
// add pseudo-link to force reading of ref'd doc XXX hmm
int refLoc = refString.indexOf("#");
if (-1 == refLoc) {
messages.uncommentedReference(link);
} else {
refString = refString.substring(0, refLoc);
addLinkToCheck(link.doc, refString);
}
return true;
}
}
return false;
}
/** LinkListener callback */
private boolean addKnownNamedAnchor(URL doc, String name) {
String namedRef = "#" + name;
try {
String ref = new URL(doc + namedRef).toString();
if (!validRefs.contains(ref)) {
validRefs.add(ref);
}
return true;
} catch (MalformedURLException e) {
messages.malformedUrl(doc, namedRef, e);
return false;
}
}
private class Messages {
private final IMessageHandler handler;
private Messages(IMessageHandler handler) {
LangUtil.throwIaxIfNull(handler, "handler");
this.handler = handler;
}
private void info(String label, Object more) {
MessageUtil.info(handler, label + " " + more);
}
private void fail(String label, Object more, Throwable thrown) {
MessageUtil.fail(handler, label + " " + more, thrown);
}
private void uncommentedReference(Link link) {
info("uncommentedReference", link); // XXX bug?
}
// private void addingNullLinkFrom(URL doc) {
// info("addingNullLinkFrom", doc);
// }
//
// private void noContentCheck(Link link) {
// info("noContentCheck", link);
// }
private void notTextContentType(Link link) {
info("notTextContentType", link);
}
private void noContentType(Link link) {
info("noContentType", link);
}
private void checkingLinks(int i) {
info("checkingLinks", i);
}
private void checkingLink(Link link) {
info("checkingLink", link);
}
private void acceptingUncheckedLink(URL doc, URL link) {
info("acceptingUncheckedLink", "doc=" + doc + " link=" + link);
}
// private void cantHandleRefsYet(Link link) {
// info("cantHandleRefsYet", link.url);
// }
private void namedReferenceNotFound(String ref) {
// XXX find all references to this unfound named reference
fail("namedReferenceNotFound", ref, null);
}
private void malformedUrl(URL doc, String link, MalformedURLException e) {
fail("malformedUrl", "doc=" + doc + " link=" + link, e);
}
private void cantOpenConnection(URL url) {
fail("cantOpenConnection", url, null);
}
private void exceptionReading(Link link, IOException e) {
// only info if redirect from http to https
String m = e.getMessage();
if ((m != null)
&& (m.contains("protocol"))
&& (m.contains("https"))
&& "http".equals(link.url.getProtocol())) {
info("httpsRedirect", link);
return;
}
fail("exceptionReading", link, e);
}
private void nullLink(URL doc, Tag tag) {
// ignore - many tags do not have links
}
private void emptyLink(URL doc, Tag tag) {
fail("emptyLink", "doc=" + doc + " tag=" + tag, null);
}
}
/**
* Register named anchors and add any hrefs to the links to check.
*/
private class LinkListener extends HTMLEditorKit.ParserCallback {
private final URL doc;
private final boolean addingLinks;
private LinkListener(URL doc, boolean addingLinks) {
this.doc = doc;
this.addingLinks = addingLinks;
}
public void handleStartTag(
HTML.Tag tag,
MutableAttributeSet attributes,
int position) {
handleSimpleTag(tag, attributes, position);
}
public void handleSimpleTag(
HTML.Tag tag,
MutableAttributeSet attributes,
int position) { // XXX use position to emit context?
boolean isNameAnchor = registerIfNamedAnchor(tag, attributes);
if (!addingLinks) {
return;
}
Object key = HTML.Tag.FRAME == tag
? HTML.Attribute.SRC
: HTML.Attribute.HREF;
String link = (String) attributes.getAttribute(key);
if (null == link) {
if (!isNameAnchor) {
messages.nullLink(doc, tag);
}
} else if (0 == link.length()) {
if (!isNameAnchor) {
messages.emptyLink(doc, tag);
}
} else {
addLinkToCheck(doc, link);
}
}
private boolean registerIfNamedAnchor(
HTML.Tag tag,
MutableAttributeSet attributes) {
if (HTML.Tag.A.equals(tag)) {
String name
= (String) attributes.getAttribute(HTML.Attribute.NAME);
if (null != name) {
addKnownNamedAnchor(doc, name);
return true;
}
}
return false;
}
}
private static class Link {
private static final Check FALSE_CHECKER = new Check() {
public boolean check(Link link) { return false; }
public boolean check(URL url) { return false; }
};
private static Check getProtocolChecker(String[] protocols) {
final String[] input
= (String[]) LangUtil.safeCopy(protocols, protocols);
if (0 == input.length) {
return FALSE_CHECKER;
}
return new Check() {
final List list = Arrays.asList(input);
public boolean check(URL url) {
return (null != url) && list.contains(url.getProtocol());
}
};
}
private final URL doc;
private final URL url;
private String toString;
private Link(URL doc, URL url) {
LangUtil.throwIaxIfNull(doc, "doc");
LangUtil.throwIaxIfNull(url, "url");
this.doc = doc;
this.url = url;
}
public boolean equals(Object o) {
if (null == o) {
return false;
}
if (this == o) {
return true;
}
if (Link.class != o.getClass()) {
return false; // exact class
}
Link other = (Link) o;
return doc.equals(other) && url.equals(other);
//return toString().equals(o.toString());
}
public int hashCode() { // XXX
return doc.hashCode() + (url.hashCode() >> 4);
// return toString.hashCode();
}
public String toString() {
if (null == toString) {
toString = url + " linked from " + doc;
}
return toString;
}
private static class Check {
public boolean check(Link link) {
return (null != link) && check(link.url);
}
public boolean check(URL url) {
return (null != url);
}
}
}
}