HashMap是Java新Collection Framework中用来代替HashTable的一个实现,HashMap和HashTable的区别是: HashMap是未经同步的,而且允许null值。HashTable继承Dictionary,而且使用了Enumeration,所以被建议不要使用。 HashMap的声明如下: public class HashMap extends AbstractMap implements Map, Cloneable,Serializable 有关AbstractMap:http://blog.csdn.net/treeroot/archive/2004/09/20/110343.aspx 有关Map:http://blog.csdn.net/treeroot/archive/2004/09/20/110331.aspx 有关Cloneable:http://blog.csdn.net/treeroot/archive/2004/09/07/96936.aspx 这个类比较复杂,这里只是重点分析了几个方法,特别是后面涉及到很多内部类都没有解释 不过都比较简单。
static final int DEFAULT_INITIAL_CAPACITY = 16; 默认初始化大小
static final int MAXIMUM_CAPACITY = 1 << 30; 最大初始化大小
static final float DEFAULT_LOAD_FACTOR = 0.75f; 默认加载因子
transient Entry[] table; 一个Entry类型的数组,数组的长度为2的指数。
transient int size; 映射的个数
int threshold; 下一次扩容时的值
final float loadFactor; 加载因子
transient volatile int modCount; 修改次数
public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " +initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " +loadFactor); int capacity = 1; while (capacity < initialCapacity) capacity <<= 1; this.loadFactor = loadFactor; threshold = (int)(capacity * loadFactor); table = new Entry[capacity]; init(); }
public HashMap(int initialCapacity) { this(initialCapacity, DEFAULT_LOAD_FACTOR); }
public HashMap() { this.loadFactor = DEFAULT_LOAD_FACTOR; threshold = (int)(DEFAULT_INITIAL_CAPACITY); 注意:这里应该是一个失误! 应该是:threshold =(int)(DEFAULT_INITIAL_CAPACITY * loadFactor); table = new Entry[DEFAULT_INITIAL_CAPACITY]; init(); }
public HashMap(Map m) { this(Math.max((int) (m.size() / DEFAULT_LOAD_FACTOR) + 1, DEFAULT_INITIAL_CAPACITY), DEFAULT_LOAD_FACTOR); putAllForCreate(m); }
void init() {}
static final Object NULL_KEY = new Object();
static Object maskNull(Object key){ return (key == null ? NULL_KEY : key); }
static Object unmaskNull(Object key) { return (key == NULL_KEY ? null : key); }
static int hash(Object x) { int h = x.hashCode(); h += ~(h << 9); h ^= (h >>> 14); h += (h << 4); h ^= (h >>> 10); return h; } 在HashTable中没有这个方法,也就是说HashTable中是直接用对象的hashCode值,但是HashMap做了改进 用这个算法来获得哈希值。
static boolean eq(Object x, Object y) { return x == y || x.equals(y); }
static int indexFor(int h, int length) { return h & (length-1); } 根据哈希值和数组的长度来返回该hash值在数组中的位置,只是简单的与关系。
public int size() { return size; }
public boolean isEmpty() { return size == 0; }
public Object get(Object key) { Object k = maskNull(key); int hash = hash(k); int i = indexFor(hash, table.length); Entry e = table[i]; while (true) { if (e == null) return e; if (e.hash == hash && eq(k, e.key)) return e.value; e = e.next; } } 这个方法是获取数据的方法,首先获得哈希值,这里把null值掩饰了,并且hash值经过函数hash()修正。 然后计算该哈希值在数组中的索引值。如果该索引处的引用为null,表示HashMap中不存在这个映射。 否则的话遍历整个链表,这里找到了就返回,如果没有找到就遍历到链表末尾,返回null。这里的比较是这样的:e.hash==hash && eq(k,e.key) 也就是说如果hash不同就肯定认为不相等,eq就被短路了,只有在 hash相同的情况下才调用equals方法。现在我们该明白Object中说的如果两个对象equals返回true,他们的 hashCode应该相同的道理了吧。假如两个对象调用equals返回true,但是hashCode不一样,那么在HashMap 里就认为他们不相等。
public boolean containsKey(Object key) { Object k = maskNull(key); int hash = hash(k); int i = indexFor(hash, table.length); Entry e = table[i]; while (e != null) { if (e.hash == hash && eq(k, e.key)) return true; e = e.next; } return false; } 这个方法比上面的简单,先找到哈希位置,再遍历整个链表,如果找到就返回true。 Entry getEntry(Object key) { Object k = maskNull(key); int hash = hash(k); int i = indexFor(hash, table.length); Entry e = table[i]; while (e != null && !(e.hash == hash && eq(k, e.key))) e = e.next; return e; } 这个方法根据key值返回Entry节点,也是先获得索引位置,再遍历链表,如果没有找到返回的是null。
public Object put(Object key, Object value) { Object k = maskNull(key); int hash = hash(k); int i = indexFor(hash, table.length); for (Entry e = table[i]; e != null; e = e.next) { if (e.hash == hash && eq(k, e.key)) { Object oldValue = e.value; e.value = value; e.recordAccess(this); return oldValue; } } modCount++; addEntry(hash, k, value, i); return null; } 首先获得hash索引位置,如果该位置的引用为null,那么直接插入一个映射,返回null。如果此处的引用不是null,必须遍历链表,如果找到一个相同的key,那么就更新该value,同时返回原来的value值。如果遍历完了没有找到,说明该key值不存在,还是插入一个映射。如果hash值足够离散的话,也就是说该索引没有被使用的话,那么不不用遍历链表了。相反,如果hash值不离散,极端的说如果是常数的话,所有的映射都会在这一个链表上,效率会极其低下。这里举一个最简单的例子,写两 个不同的类作为key插入到HashMap中,效率会远远不同。 class Good{ int i; public Good(int i){ this.i=i; } public boolean equals(Object o){ return (o instanceof Good) && (this.i==((Good)o).i) } public int hashCode(){ return i; } } class Bad{ int i; public Good(int i){ this.i=i; } public boolean equals(Object o){ return (o instanceof Good) && (this.i==((Good)o).i) } public int hashCode(){ return 0; } } 执行代码: Map m1=new HashMap(); Map m2=new HashMap(); for(int i=0;i<100;i++){ m1.put(new Good(i),new Integer(i)); //这里效率非常高 } for(int i=0;i<100;i++){ m2.put(new Bad(i),new Integer(i)); //这里几乎要崩溃 } 上面的是两个非常极端的例子,执行一下就知道差别有多大。
private void putForCreate(Object key, Object value) { Object k = maskNull(key); int hash = hash(k); int i = indexFor(hash, table.length); for (Entry e = table[i]; e != null; e = e.next) { if (e.hash == hash && eq(k, e.key)) { e.value = value; return; } } createEntry(hash, k, value, i); }
void putAllForCreate(Map m) { for (Iterator i = m.entrySet().iterator(); i.hasNext(); ) { Map.Entry e = (Map.Entry) i.next(); putForCreate(e.getKey(), e.getValue()); } } 上面的两个方法是被构造函数和clone方法调用的。
void resize(int newCapacity) { Entry[] oldTable = table; int oldCapacity = oldTable.length; if (size < threshold || oldCapacity > newCapacity) return; Entry[] newTable = new Entry[newCapacity]; transfer(newTable); table = newTable; threshold = (int)(newCapacity * loadFactor); } 这个方法在需要的时候重新分配空间,相当于ArrayList的ensureCapacity方法,不过这个更加复杂。
void transfer(Entry[] newTable) { Entry[] src = table; int newCapacity = newTable.length; for (int j = 0; j < src.length; j++) { Entry e = src[j]; if (e != null) { src[j] = null; do { Entry next = e.next; int i = indexFor(e.hash, newCapacity); e.next = newTable[i]; newTable[i] = e; e = next; } while (e != null); } } } 遍历原来的数组,如果该Entry不是null的话,说明有映射,然后遍历这个链表,把所有的映射插入到新的数组中,注意这里要从新计算索引位置。
public void putAll(Map t) { int n = t.size(); if (n == 0) return; if (n >= threshold) { n = (int)(n / loadFactor + 1); if (n > MAXIMUM_CAPACITY) n = MAXIMUM_CAPACITY; int capacity = table.length; while (capacity < n) capacity <<= 1; resize(capacity); } for (Iterator i = t.entrySet().iterator(); i.hasNext(); ) { Map.Entry e = (Map.Entry) i.next(); put(e.getKey(), e.getValue()); } } 这个方法先确定是否需要扩大空间,然后循环调用put方法。
public Object remove(Object key) { Entry e = removeEntryForKey(key); return (e == null ? e : e.value); }
Entry removeEntryForKey(Object key) { Object k = maskNull(key); int hash = hash(k); int i = indexFor(hash, table.length); Entry prev = table[i]; Entry e = prev; while (e != null) { 如果e==null表示不存在 Entry next = e.next; if (e.hash == hash && eq(k, e.key)) { modCount++; size--; if (prev == e) table[i] = next; 链表的第一个元素就是要删除的,这里最好加一句 e.next=null. else prev.next = next; 存在担不是链表的第一个元素, 这里最好加一句 e.next=null. e.recordRemoval(this); return e; } prev = e; e = next; } return e; 这里其实就是return null; } 这个方法其实也不复杂,也是遍历链表,这里建议加一句e.next=null,可以改为 if(prev==e) table[i]=next; else prev.next=next; e.next=null; 这一句是多加的,可以提高效率。 这里简单说明我的看法: 因为e是被删除的节点,删除它其实就是指向它的指针指向它的后面一个节点。所以e可以作为GC回收的对象。 可以e还有一个next指针指向我们的数据,如果e没有被回收。而且此时e.next指向的节点也变为没用的了,但是 却有一个它的引用(e.next),所以虽然e的下一个节点没用了,但是却不能作为GC回收的对象,除非e先被回收。 虽然不一定会引起很大的问题,但是至少会影响GC的回收效率。就像数据库中的外键引用一样,删除起来很麻烦呀。
Entry removeMapping(Object o) { if (!(o instanceof Map.Entry)) return null; Map.Entry entry = (Map.Entry)o; Object k = maskNull(entry.getKey()); int hash = hash(k); int i = indexFor(hash, table.length); Entry prev = table[i]; Entry e = prev; while (e != null) { Entry next = e.next; if (e.hash == hash && e.equals(entry)) { modCount++; size--; if (prev == e) table[i] = next; else prev.next = next; e.recordRemoval(this); return e; } prev = e; e = next; } return e; } 这个方法和上面的一样。
public void clear() { modCount++; Entry tab[] = table; for (int i = 0; i < tab.length; i++) tab[i] = null; size = 0; } 同样可以改进
public boolean containsValue(Object value) { if (value == null) return containsNullValue(); Entry tab[] = table; for (int i = 0; i < tab.length ; i++) for (Entry e = tab[i] ; e != null ; e = e.next) if (value.equals(e.value)) return true; return false; }
private boolean containsNullValue() { Entry tab[] = table; for (int i = 0; i < tab.length ; i++) for (Entry e = tab[i] ; e != null ; e = e.next) if (e.value == null) return true; return false; }
public Object clone() { HashMap result = null; try { result = (HashMap)super.clone(); } catch (CloneNotSupportedException e) { // assert false; } result.table = new Entry[table.length]; result.entrySet = null; result.modCount = 0; result.size = 0; result.init(); result.putAllForCreate(this); return result; }
static class Entry implements Map.Entry { final Object key; Object value; final int hash; Entry next; Entry(int h, Object k, Object v, Entry n) { value = v; next = n; key = k; hash = h; } public Object getKey() { return unmaskNull(key); } public Object getValue() { return value; } public Object setValue(Object newValue) { Object oldValue = value; value = newValue; return oldValue; } public boolean equals(Object o) { if (!(o instanceof Map.Entry)) return false; Map.Entry e = (Map.Entry)o; Object k1 = getKey(); Object k2 = e.getKey(); if (k1 == k2 || (k1 != null && k1.equals(k2))) { Object v1 = getValue(); Object v2 = e.getValue(); if (v1 == v2 || (v1 != null && v1.equals(v2))) return true; } return false; } public int hashCode() { return (key==NULL_KEY ? 0 : key.hashCode()) ^ (value==null ? 0 : value.hashCode()); } public String toString() { return getKey() + "=" + getValue(); } void recordAccess(HashMap m) { } void recordRemoval(HashMap m) { } } 一个静态内部类
void addEntry(int hash, Object key, Object value, int bucketIndex) { table[bucketIndex] = new Entry(hash, key, value, table[bucketIndex]); if (size++ >= threshold) resize(2 * table.length); } 注意这个方法,插入连表的头。 可以写成这样更好理解: Entry oldHead=table[bucketIndex]; Entry newHead = new Entry(hash,key,value,oldHead); table[bucketIndex]=newHead;
void createEntry(int hash, Object key, Object value, int bucketIndex) { table[bucketIndex] = new Entry(hash, key, value, table[bucketIndex]); size++; }
private abstract class HashIterator implements Iterator { Entry next; int expectedModCount; int index; Entry current; HashIterator() { expectedModCount = modCount; Entry[] t = table; int i = t.length; Entry n = null; if (size != 0) { while (i > 0 && (n = t[--i]) == null) ; } next = n; index = i; } public boolean hasNext() { return next != null; } Entry nextEntry() { if (modCount != expectedModCount) throw new ConcurrentModificationException(); Entry e = next; if (e == null) throw new NoSuchElementException(); Entry n = e.next; Entry[] t = table; int i = index; while (n == null && i > 0) n = t[--i]; index = i; next = n; return current = e; } public void remove() { if (current == null) throw new IllegalStateException(); if (modCount != expectedModCount) throw new ConcurrentModificationException(); Object k = current.key; current = null; HashMap.this.removeEntryForKey(k); expectedModCount = modCount; } }
private class ValueIterator extends HashIterator { public Object next() { return nextEntry().value; } }
private class KeyIterator extends HashIterator { public Object next() { return nextEntry().getKey(); } }
private class EntryIterator extends HashIterator { public Object next() { return nextEntry(); } }
Iterator newKeyIterator() { return new KeyIterator(); }
Iterator newValueIterator() { return new ValueIterator(); }
Iterator newEntryIterator() { return new EntryIterator(); }
private transient Set entrySet = null;
public Set keySet() { Set ks = keySet; return (ks != null ? ks : (keySet = new KeySet())); }
private class KeySet extends AbstractSet { public Iterator iterator() { return newKeyIterator(); } public int size() { return size; } public boolean contains(Object o) { return containsKey(o); } public boolean remove(Object o) { return HashMap.this.removeEntryForKey(o) != null; } public void clear() { HashMap.this.clear(); } }
public Collection values() { Collection vs = values; return (vs != null ? vs : (values = new Values())); }
private class Values extends AbstractCollection { public Iterator iterator() { return newValueIterator(); } public int size() { return size; } public boolean contains(Object o) { return containsValue(o); } public void clear() { HashMap.this.clear(); } }
public Set entrySet() { Set es = entrySet; return (es != null ? es : (entrySet = new EntrySet())); }
private class EntrySet extends AbstractSet { public Iterator iterator() { return newEntryIterator(); } public boolean contains(Object o) { if (!(o instanceof Map.Entry)) return false; Map.Entry e = (Map.Entry)o; Entry candidate = getEntry(e.getKey()); return candidate != null && candidate.equals(e); } public boolean remove(Object o) { return removeMapping(o) != null; } public int size() { return size; } public void clear() { HashMap.this.clear(); } }
private void writeObject(java.io.ObjectOutputStream s) throws IOException { s.defaultWriteObject(); s.writeInt(table.length); s.writeInt(size); for (Iterator i = entrySet().iterator(); i.hasNext(); ) { Map.Entry e = (Map.Entry) i.next(); s.writeObject(e.getKey()); s.writeObject(e.getValue()); } }
private static final long serialVersionUID = 362498820763181265L;
private void readObject(java.io.ObjectInputStream s) throws IOException, ClassNotFoundException { s.defaultReadObject(); int numBuckets = s.readInt(); table = new Entry[numBuckets]; init(); size = s.readInt(); for (int i=0; for (int i=0; i<size; i++) { Object key = s.readObject(); Object value = s.readObject(); putForCreate(key, value); } }
int capacity() { return table.length; } float loadFactor() { return loadFactor; } 
|