花了一个中午搞定了一个单词计数器,可以按照字典和频率两种排序,功能还比较强大。 package treeroot.util; //anthor treeroot //since 2004-12-3 public class Word{ private String value; public Word(String value){ this.value=value.toLowerCase(); } private int count=1; protected void increase(){ count++; } public String getWord(){ return value; } public int getCount(){ return count; } public boolean equals(Object o){ return (o instanceof Word)&&(((Word)o).value.equals(value)); } public int hashCode(){ return value.hashCode(); } }
package treeroot.util; //author treeroot //since 2004-12-3 import java.util.*;
public class WordCount { private static class WordSet implements Set{ private Map map=new HashMap();
public int size(){ return map.size(); } public boolean isEmpty(){ return map.isEmpty(); } public boolean contains(Object o){ return map.containsKey(o); } public Iterator iterator(){ return map.keySet().iterator(); } public Object[] toArray(){ return map.keySet().toArray(); } public Object[] toArray(Object[] a){ return map.keySet().toArray(a); } public boolean add(Object o){ if(map.containsKey(o)){ ((Word)map.get(o)).increase(); } else { map.put(o,o); } return true; } public boolean remove(Object o){ return false; } public boolean addAll(Collection c){ return false; } public boolean removeAll(Collection c){ return false; } public boolean retainAll(Collection c){ return false; } public boolean containsAll(Collection c){ return map.keySet().containsAll(c); } public void clear(){} public boolean equals(Object c){ return map.keySet().equals(c); } public int hashCode(){ return map.keySet().hashCode(); } }
public static Set getWordCount(String s,Comparator order){ Set set=new WordSet(); String split1="[^a-zA-Z\\-_']+"; String split2="[^a-zA-Z]+[\\-_'][^a-zA-Z]*"; String split3="[^a-zA-Z]*[\\-_'][^a-zA-Z]+"; String regex = "("+split2+")|("+split3+")|("+split1+")"; String[] words = s.split(regex); for(int i=0;i<words.length;i++){ set.add(new Word(words[i])); } Set sort=new TreeSet(order); sort.addAll(set); return Collections.synchronizedSet(sort); } public static Comparator DICTIONARY_ORDER=new Comparator(){ public int compare(Object o1,Object o2){ Word w1=(Word)o1; Word w2=(Word)o2; return w1.getWord().compareTo(w2.getWord()); } }; public static Comparator FREQUENCY_ORDER =new Comparator(){ public int compare(Object o1,Object o2){ Word w1=(Word)o1; Word w2=(Word)o2; int i=w2.getCount()-w1.getCount(); if(i==0){ return w1.getWord().compareTo(w2.getWord()); } return i; } }; public static void main(String[] args) { String s="A regular expression, specified as a string, must first be compiled into an instance of this class. The resulting pattern can then be used to create a Matcher object that can match arbitrary character sequences against the regular expression. All of the state involved in performing a match resides in the matcher, so many matchers can share the same pattern. "; Set set=WordCount.getWordCount(s,WordCount.FREQUENCY_ORDER); for(Iterator it=set.iterator();it.hasNext();){ Word w=(Word)it.next(); int i=4-w.getWord().length()/8; String tab=""; for(int j=0;j<i;j++) tab+="\t"; System.out.println(w.getWord()+tab+w.getCount()); } } }

|