aboutsummaryrefslogtreecommitdiff
path: root/src/com/sun/org/apache/xerces/internal/util/SymbolTable.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/com/sun/org/apache/xerces/internal/util/SymbolTable.java')
-rw-r--r--src/com/sun/org/apache/xerces/internal/util/SymbolTable.java285
1 files changed, 216 insertions, 69 deletions
diff --git a/src/com/sun/org/apache/xerces/internal/util/SymbolTable.java b/src/com/sun/org/apache/xerces/internal/util/SymbolTable.java
index 3398318..9eeef66 100644
--- a/src/com/sun/org/apache/xerces/internal/util/SymbolTable.java
+++ b/src/com/sun/org/apache/xerces/internal/util/SymbolTable.java
@@ -1,13 +1,13 @@
/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
*/
-
/*
- * Copyright 2005 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@@ -55,28 +55,99 @@ public class SymbolTable {
//
/** Default table size. */
- protected static final int TABLE_SIZE = 173;
+ protected static final int TABLE_SIZE = 101;
+
+ /** Maximum hash collisions per bucket for a table with load factor == 1. */
+ protected static final int MAX_HASH_COLLISIONS = 40;
+ protected static final int MULTIPLIERS_SIZE = 1 << 5;
+ protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
+
+ //
+ // Data
+ //
/** Buckets. */
protected Entry[] fBuckets = null;
- // actual table size
+ /** actual table size */
protected int fTableSize;
+ /** The total number of entries in the hash table. */
+ protected transient int fCount;
+
+ /** The table is rehashed when its size exceeds this threshold. (The
+ * value of this field is (int)(capacity * loadFactor).) */
+ protected int fThreshold;
+
+ /** The load factor for the SymbolTable. */
+ protected float fLoadFactor;
+
+ /**
+ * A new hash function is selected and the table is rehashed when
+ * the number of keys in the bucket exceeds this threshold.
+ */
+ protected final int fCollisionThreshold;
+
+ /**
+ * Array of randomly selected hash function multipliers or <code>null</code>
+ * if the default String.hashCode() function should be used.
+ */
+ protected int[] fHashMultipliers;
+
//
// Constructors
//
- /** Constructs a symbol table with a default number of buckets. */
- public SymbolTable() {
- this(TABLE_SIZE);
- }
+ /**
+ * Constructs a new, empty SymbolTable with the specified initial
+ * capacity and the specified load factor.
+ *
+ * @param initialCapacity the initial capacity of the SymbolTable.
+ * @param loadFactor the load factor of the SymbolTable.
+ * @throws IllegalArgumentException if the initial capacity is less
+ * than zero, or if the load factor is nonpositive.
+ */
+ public SymbolTable(int initialCapacity, float loadFactor) {
+
+ if (initialCapacity < 0) {
+ throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity);
+ }
+
+ if (loadFactor <= 0 || Float.isNaN(loadFactor)) {
+ throw new IllegalArgumentException("Illegal Load: " + loadFactor);
+ }
+
+ if (initialCapacity == 0) {
+ initialCapacity = 1;
+ }
- /** Constructs a symbol table with a specified number of buckets. */
- public SymbolTable(int tableSize) {
- fTableSize = tableSize;
+ fLoadFactor = loadFactor;
+ fTableSize = initialCapacity;
fBuckets = new Entry[fTableSize];
+ fThreshold = (int)(fTableSize * loadFactor);
+ fCollisionThreshold = (int)(MAX_HASH_COLLISIONS * loadFactor);
+ fCount = 0;
+ }
+
+ /**
+ * Constructs a new, empty SymbolTable with the specified initial capacity
+ * and default load factor, which is <tt>0.75</tt>.
+ *
+ * @param initialCapacity the initial capacity of the hashtable.
+ * @throws IllegalArgumentException if the initial capacity is less
+ * than zero.
+ */
+ public SymbolTable(int initialCapacity) {
+ this(initialCapacity, 0.75f);
+ }
+
+ /**
+ * Constructs a new, empty SymbolTable with a default initial capacity (101)
+ * and load factor, which is <tt>0.75</tt>.
+ */
+ public SymbolTable() {
+ this(TABLE_SIZE, 0.75f);
}
//
@@ -94,36 +165,39 @@ public class SymbolTable {
public String addSymbol(String symbol) {
// search for identical symbol
- final int hash = hash(symbol);
- final int bucket = hash % fTableSize;
- final int length = symbol.length();
- OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
- if (length == entry.characters.length && hash == entry.hashCode) {
- if(symbol.regionMatches(0,entry.symbol,0,length)){
- return entry.symbol;
- }
- else{
- continue OUTER;
- }
- /**
- for (int i = 0; i < length; i++) {
- if (symbol.charAt(i) != entry.characters[i]) {
- continue OUTER;
- }
- }
- symbolAsArray = entry.characters;
+ int collisionCount = 0;
+ int bucket = hash(symbol) % fTableSize;
+ for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
+ if (entry.symbol.equals(symbol)) {
return entry.symbol;
- */
}
+ ++collisionCount;
+ }
+ return addSymbol0(symbol, bucket, collisionCount);
+
+ } // addSymbol(String):String
+
+ private String addSymbol0(String symbol, int bucket, int collisionCount) {
+
+ if (fCount >= fThreshold) {
+ // Rehash the table if the threshold is exceeded
+ rehash();
+ bucket = hash(symbol) % fTableSize;
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(symbol) % fTableSize;
}
// create new entry
Entry entry = new Entry(symbol, fBuckets[bucket]);
- entry.hashCode = hash;
fBuckets[bucket] = entry;
+ ++fCount;
return entry.symbol;
- } // addSymbol(String):String
+ } // addSymbol0(String,int,int):String
/**
* Adds the specified symbol to the symbol table and returns a
@@ -136,27 +210,47 @@ public class SymbolTable {
* @param length The length of the new symbol in the buffer.
*/
public String addSymbol(char[] buffer, int offset, int length) {
+
// search for identical symbol
- int hash = hash(buffer, offset, length);
- int bucket = hash % fTableSize;
+ int collisionCount = 0;
+ int bucket = hash(buffer, offset, length) % fTableSize;
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
- if (length == entry.characters.length && hash ==entry.hashCode) {
+ if (length == entry.characters.length) {
for (int i = 0; i < length; i++) {
if (buffer[offset + i] != entry.characters[i]) {
+ ++collisionCount;
continue OUTER;
}
}
return entry.symbol;
}
+ ++collisionCount;
+ }
+ return addSymbol0(buffer, offset, length, bucket, collisionCount);
+
+ } // addSymbol(char[],int,int):String
+
+ private String addSymbol0(char[] buffer, int offset, int length, int bucket, int collisionCount) {
+
+ if (fCount >= fThreshold) {
+ // Rehash the table if the threshold is exceeded
+ rehash();
+ bucket = hash(buffer, offset, length) % fTableSize;
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(buffer, offset, length) % fTableSize;
}
// add new entry
Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
fBuckets[bucket] = entry;
- entry.hashCode = hash;
+ ++fCount;
return entry.symbol;
- } // addSymbol(char[],int,int):String
+ } // addSymbol0(char[],int,int,int,int):String
/**
* Returns a hashcode value for the specified symbol. The value
@@ -167,15 +261,21 @@ public class SymbolTable {
* @param symbol The symbol to hash.
*/
public int hash(String symbol) {
+ if (fHashMultipliers == null) {
+ return symbol.hashCode() & 0x7FFFFFFF;
+ }
+ return hash0(symbol);
+ } // hash(String):int
+ private int hash0(String symbol) {
int code = 0;
- int length = symbol.length();
- for (int i = 0; i < length; i++) {
- code = code * 37 + symbol.charAt(i);
+ final int length = symbol.length();
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
}
return code & 0x7FFFFFFF;
-
- } // hash(String):int
+ } // hash0(String):int
/**
* Returns a hashcode value for the specified symbol information.
@@ -189,14 +289,73 @@ public class SymbolTable {
* @param length The length of the symbol.
*/
public int hash(char[] buffer, int offset, int length) {
+ if (fHashMultipliers == null) {
+ int code = 0;
+ for (int i = 0; i < length; ++i) {
+ code = code * 31 + buffer[offset + i];
+ }
+ return code & 0x7FFFFFFF;
+ }
+ return hash0(buffer, offset, length);
+
+ } // hash(char[],int,int):int
+ private int hash0(char[] buffer, int offset, int length) {
int code = 0;
- for (int i = 0; i < length; i++) {
- code = code * 37 + buffer[offset + i];
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] + buffer[offset + i];
}
return code & 0x7FFFFFFF;
+ } // hash0(char[],int,int):int
- } // hash(char[],int,int):int
+ /**
+ * Increases the capacity of and internally reorganizes this
+ * SymbolTable, in order to accommodate and access its entries more
+ * efficiently. This method is called automatically when the
+ * number of keys in the SymbolTable exceeds this hashtable's capacity
+ * and load factor.
+ */
+ protected void rehash() {
+ rehashCommon(fBuckets.length * 2 + 1);
+ }
+
+ /**
+ * Randomly selects a new hash function and reorganizes this SymbolTable
+ * in order to more evenly distribute its entries across the table. This
+ * method is called automatically when the number keys in one of the
+ * SymbolTable's buckets exceeds the given collision threshold.
+ */
+ protected void rebalance() {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ rehashCommon(fBuckets.length);
+ }
+
+ private void rehashCommon(final int newCapacity) {
+
+ int oldCapacity = fBuckets.length;
+ Entry[] oldTable = fBuckets;
+
+ Entry[] newTable = new Entry[newCapacity];
+
+ fThreshold = (int)(newCapacity * fLoadFactor);
+ fBuckets = newTable;
+ fTableSize = fBuckets.length;
+
+ for (int i = oldCapacity ; i-- > 0 ;) {
+ for (Entry old = oldTable[i] ; old != null ; ) {
+ Entry e = old;
+ old = old.next;
+
+ int index = hash(e.symbol) % newCapacity;
+ e.next = newTable[index];
+ newTable[index] = e;
+ }
+ }
+ }
/**
* Returns true if the symbol table already contains the specified
@@ -207,25 +366,16 @@ public class SymbolTable {
public boolean containsSymbol(String symbol) {
// search for identical symbol
- int hash = hash(symbol);
- int bucket = hash % fTableSize;
+ int bucket = hash(symbol) % fTableSize;
int length = symbol.length();
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
- if (length == entry.characters.length && hash == entry.hashCode) {
- if(symbol.regionMatches(0,entry.symbol,0,length)){
- return true;
- }
- else {
- continue OUTER;
- }
- /**
+ if (length == entry.characters.length) {
for (int i = 0; i < length; i++) {
if (symbol.charAt(i) != entry.characters[i]) {
continue OUTER;
}
}
- return true;
- */
+ return true;
}
}
@@ -244,10 +394,9 @@ public class SymbolTable {
public boolean containsSymbol(char[] buffer, int offset, int length) {
// search for identical symbol
- int hash = hash(buffer, offset, length) ;
- int bucket = hash % fTableSize;
+ int bucket = hash(buffer, offset, length) % fTableSize;
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
- if (length == entry.characters.length && hash == entry.hashCode) {
+ if (length == entry.characters.length) {
for (int i = 0; i < length; i++) {
if (buffer[offset + i] != entry.characters[i]) {
continue OUTER;
@@ -261,7 +410,6 @@ public class SymbolTable {
} // containsSymbol(char[],int,int):boolean
-
//
// Classes
//
@@ -277,14 +425,13 @@ public class SymbolTable {
//
/** Symbol. */
- public String symbol;
- int hashCode = 0;
+ public final String symbol;
/**
* Symbol characters. This information is duplicated here for
* comparison performance.
*/
- public char[] characters;
+ public final char[] characters;
/** The next entry. */
public Entry next;