diff options
Diffstat (limited to 'starlark/hashtable.go')
-rw-r--r-- | starlark/hashtable.go | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/starlark/hashtable.go b/starlark/hashtable.go new file mode 100644 index 0000000..27990b5 --- /dev/null +++ b/starlark/hashtable.go @@ -0,0 +1,373 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package starlark + +import ( + "fmt" + _ "unsafe" // for go:linkname hack +) + +// hashtable is used to represent Starlark dict and set values. +// It is a hash table whose key/value entries form a doubly-linked list +// in the order the entries were inserted. +type hashtable struct { + table []bucket // len is zero or a power of two + bucket0 [1]bucket // inline allocation for small maps. + len uint32 + itercount uint32 // number of active iterators (ignored if frozen) + head *entry // insertion order doubly-linked list; may be nil + tailLink **entry // address of nil link at end of list (perhaps &head) + frozen bool +} + +const bucketSize = 8 + +type bucket struct { + entries [bucketSize]entry + next *bucket // linked list of buckets +} + +type entry struct { + hash uint32 // nonzero => in use + key, value Value + next *entry // insertion order doubly-linked list; may be nil + prevLink **entry // address of link to this entry (perhaps &head) +} + +func (ht *hashtable) init(size int) { + if size < 0 { + panic("size < 0") + } + nb := 1 + for overloaded(size, nb) { + nb = nb << 1 + } + if nb < 2 { + ht.table = ht.bucket0[:1] + } else { + ht.table = make([]bucket, nb) + } + ht.tailLink = &ht.head +} + +func (ht *hashtable) freeze() { + if !ht.frozen { + ht.frozen = true + for i := range ht.table { + for p := &ht.table[i]; p != nil; p = p.next { + for i := range p.entries { + e := &p.entries[i] + if e.hash != 0 { + e.key.Freeze() + e.value.Freeze() + } + } + } + } + } +} + +func (ht *hashtable) insert(k, v Value) error { + if ht.frozen { + return fmt.Errorf("cannot insert into frozen hash table") + } + if ht.itercount > 0 { + return fmt.Errorf("cannot insert into hash table during iteration") + } + if ht.table == nil { + ht.init(1) + } + h, err := k.Hash() + if err != nil { + return err + } + if h == 0 { + h = 1 // zero is reserved + } + +retry: + var insert *entry + + // Inspect each bucket in the bucket list. + p := &ht.table[h&(uint32(len(ht.table)-1))] + for { + for i := range p.entries { + e := &p.entries[i] + if e.hash != h { + if e.hash == 0 { + // Found empty entry; make a note. + insert = e + } + continue + } + if eq, err := Equal(k, e.key); err != nil { + return err // e.g. excessively recursive tuple + } else if !eq { + continue + } + // Key already present; update value. + e.value = v + return nil + } + if p.next == nil { + break + } + p = p.next + } + + // Key not found. p points to the last bucket. + + // Does the number of elements exceed the buckets' load factor? + if overloaded(int(ht.len), len(ht.table)) { + ht.grow() + goto retry + } + + if insert == nil { + // No space in existing buckets. Add a new one to the bucket list. + b := new(bucket) + p.next = b + insert = &b.entries[0] + } + + // Insert key/value pair. + insert.hash = h + insert.key = k + insert.value = v + + // Append entry to doubly-linked list. + insert.prevLink = ht.tailLink + *ht.tailLink = insert + ht.tailLink = &insert.next + + ht.len++ + + return nil +} + +func overloaded(elems, buckets int) bool { + const loadFactor = 6.5 // just a guess + return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets) +} + +func (ht *hashtable) grow() { + // Double the number of buckets and rehash. + // TODO(adonovan): opt: + // - avoid reentrant calls to ht.insert, and specialize it. + // e.g. we know the calls to Equals will return false since + // there are no duplicates among the old keys. + // - saving the entire hash in the bucket would avoid the need to + // recompute the hash. + // - save the old buckets on a free list. + ht.table = make([]bucket, len(ht.table)<<1) + oldhead := ht.head + ht.head = nil + ht.tailLink = &ht.head + ht.len = 0 + for e := oldhead; e != nil; e = e.next { + ht.insert(e.key, e.value) + } + ht.bucket0[0] = bucket{} // clear out unused initial bucket +} + +func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) { + h, err := k.Hash() + if err != nil { + return nil, false, err // unhashable + } + if h == 0 { + h = 1 // zero is reserved + } + if ht.table == nil { + return None, false, nil // empty + } + + // Inspect each bucket in the bucket list. + for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { + for i := range p.entries { + e := &p.entries[i] + if e.hash == h { + if eq, err := Equal(k, e.key); err != nil { + return nil, false, err // e.g. excessively recursive tuple + } else if eq { + return e.value, true, nil // found + } + } + } + } + return None, false, nil // not found +} + +// Items returns all the items in the map (as key/value pairs) in insertion order. +func (ht *hashtable) items() []Tuple { + items := make([]Tuple, 0, ht.len) + array := make([]Value, ht.len*2) // allocate a single backing array + for e := ht.head; e != nil; e = e.next { + pair := Tuple(array[:2:2]) + array = array[2:] + pair[0] = e.key + pair[1] = e.value + items = append(items, pair) + } + return items +} + +func (ht *hashtable) first() (Value, bool) { + if ht.head != nil { + return ht.head.key, true + } + return None, false +} + +func (ht *hashtable) keys() []Value { + keys := make([]Value, 0, ht.len) + for e := ht.head; e != nil; e = e.next { + keys = append(keys, e.key) + } + return keys +} + +func (ht *hashtable) delete(k Value) (v Value, found bool, err error) { + if ht.frozen { + return nil, false, fmt.Errorf("cannot delete from frozen hash table") + } + if ht.itercount > 0 { + return nil, false, fmt.Errorf("cannot delete from hash table during iteration") + } + if ht.table == nil { + return None, false, nil // empty + } + h, err := k.Hash() + if err != nil { + return nil, false, err // unhashable + } + if h == 0 { + h = 1 // zero is reserved + } + + // Inspect each bucket in the bucket list. + for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { + for i := range p.entries { + e := &p.entries[i] + if e.hash == h { + if eq, err := Equal(k, e.key); err != nil { + return nil, false, err + } else if eq { + // Remove e from doubly-linked list. + *e.prevLink = e.next + if e.next == nil { + ht.tailLink = e.prevLink // deletion of last entry + } else { + e.next.prevLink = e.prevLink + } + + v := e.value + *e = entry{} + ht.len-- + return v, true, nil // found + } + } + } + } + + // TODO(adonovan): opt: remove completely empty bucket from bucket list. + + return None, false, nil // not found +} + +func (ht *hashtable) clear() error { + if ht.frozen { + return fmt.Errorf("cannot clear frozen hash table") + } + if ht.itercount > 0 { + return fmt.Errorf("cannot clear hash table during iteration") + } + if ht.table != nil { + for i := range ht.table { + ht.table[i] = bucket{} + } + } + ht.head = nil + ht.tailLink = &ht.head + ht.len = 0 + return nil +} + +// dump is provided as an aid to debugging. +func (ht *hashtable) dump() { + fmt.Printf("hashtable %p len=%d head=%p tailLink=%p", + ht, ht.len, ht.head, ht.tailLink) + if ht.tailLink != nil { + fmt.Printf(" *tailLink=%p", *ht.tailLink) + } + fmt.Println() + for j := range ht.table { + fmt.Printf("bucket chain %d\n", j) + for p := &ht.table[j]; p != nil; p = p.next { + fmt.Printf("bucket %p\n", p) + for i := range p.entries { + e := &p.entries[i] + fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n", + i, e, e.hash, e.key, e.value) + fmt.Printf("\t\tnext=%p &next=%p prev=%p", + e.next, &e.next, e.prevLink) + if e.prevLink != nil { + fmt.Printf(" *prev=%p", *e.prevLink) + } + fmt.Println() + } + } + } +} + +func (ht *hashtable) iterate() *keyIterator { + if !ht.frozen { + ht.itercount++ + } + return &keyIterator{ht: ht, e: ht.head} +} + +type keyIterator struct { + ht *hashtable + e *entry +} + +func (it *keyIterator) Next(k *Value) bool { + if it.e != nil { + *k = it.e.key + it.e = it.e.next + return true + } + return false +} + +func (it *keyIterator) Done() { + if !it.ht.frozen { + it.ht.itercount-- + } +} + +// hashString computes the hash of s. +func hashString(s string) uint32 { + if len(s) >= 12 { + // Call the Go runtime's optimized hash implementation, + // which uses the AESENC instruction on amd64 machines. + return uint32(goStringHash(s, 0)) + } + return softHashString(s) +} + +//go:linkname goStringHash runtime.stringHash +func goStringHash(s string, seed uintptr) uintptr + +// softHashString computes the 32-bit FNV-1a hash of s in software. +func softHashString(s string) uint32 { + var h uint32 = 2166136261 + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + return h +} |