这个程序出自Java Tools for Extreme Programming一书。
import com.meterware.httpunit.*; import java.util.HashSet; import java.util.Set;
public class CheckSite {
private WebConversation conversation;
private Set checkedLinks;
private String host = "www.sohu.com";
public static void main(String[] args) throws Exception { CheckSite cs = new CheckSite(); cs.setUp(); cs.testEntireSite(); }
public void setUp() { conversation = new WebConversation(); checkedLinks = new HashSet(); }
public void testEntireSite() throws Exception { WebResponse response = conversation.getResponse("http://" + host); checkAllLinks(response); System.out.println("Site check finished. Link's checked: " + checkedLinks.size() + " : " + checkedLinks); }
private void checkAllLinks(WebResponse response) throws Exception { if (!isHtml(response)) { return; } WebLink[] links = response.getLinks(); System.out.println(response.getTitle() + " -- links found = " + links.length); for (int i = 0; i < links.length; i++) { boolean newLink = checkedLinks.add(links[i].getURLString()); if (newLink) { System.out.println("Total links checked so far: " + checkedLinks.size()); checkLink(links[i]); } } }
private boolean isHtml(WebResponse response) { return response.getContentType().equals("text/html"); }
private void checkLink(WebLink link) throws Exception { WebRequest request = link.getRequest(); java.net.URL url = request.getURL(); System.out.println("checking link: " + url); String linkHost = url.getHost(); if (linkHost.equals(this.host)) { WebResponse response = conversation.getResponse(request); this.checkAllLinks(response); }
} } 
|