diff options
Diffstat (limited to 'stringclassifier/internal')
-rw-r--r-- | stringclassifier/internal/pq/priority.go | 111 | ||||
-rw-r--r-- | stringclassifier/internal/pq/priority_test.go | 181 | ||||
-rw-r--r-- | stringclassifier/internal/sets/intset.go | 227 | ||||
-rw-r--r-- | stringclassifier/internal/sets/intset_test.go | 438 | ||||
-rw-r--r-- | stringclassifier/internal/sets/sets.go | 20 |
5 files changed, 977 insertions, 0 deletions
diff --git a/stringclassifier/internal/pq/priority.go b/stringclassifier/internal/pq/priority.go new file mode 100644 index 0000000..d1797c7 --- /dev/null +++ b/stringclassifier/internal/pq/priority.go @@ -0,0 +1,111 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package pq provides a priority queue. +package pq + +import "container/heap" + +// NewQueue returns an unbounded priority queue that compares elements using +// less; the minimal element is at the top of the queue. +// +// If setIndex is not nil, the queue calls setIndex to inform each element of +// its position in the queue. If an element's priority changes, its position in +// the queue may be incorrect. Call Fix on the element's index to update the +// queue. Call Remove on the element's index to remove it from the queue. +func NewQueue(less func(x, y interface{}) bool, setIndex func(x interface{}, idx int)) *Queue { + return &Queue{ + heap: pqHeap{ + less: less, + setIndex: setIndex, + }, + } +} + +// Queue is a priority queue that supports updating the priority of an element. +// A Queue must be created with NewQueue. +type Queue struct { + heap pqHeap +} + +// Len returns the number of elements in the queue. +func (pq *Queue) Len() int { + return pq.heap.Len() +} + +// Push adds x to the queue. +func (pq *Queue) Push(x interface{}) { + heap.Push(&pq.heap, x) +} + +// Min returns the minimal element. +// Min panics if the queue is empty. +func (pq *Queue) Min() interface{} { + return pq.heap.a[0] +} + +// Pop removes and returns the minimal element. +// Pop panics if the queue is empty. +func (pq *Queue) Pop() interface{} { + return heap.Pop(&pq.heap) +} + +// Fix adjusts the heap to reflect that the element at index has changed priority. +func (pq *Queue) Fix(index int) { + heap.Fix(&pq.heap, index) +} + +// Remove removes the element at index i from the heap. +func (pq *Queue) Remove(index int) { + heap.Remove(&pq.heap, index) +} + +// pqHeap implements heap.Interface. +type pqHeap struct { + a []interface{} + less func(x, y interface{}) bool + setIndex func(x interface{}, idx int) +} + +func (h pqHeap) Len() int { + return len(h.a) +} + +func (h pqHeap) Less(i, j int) bool { + return h.less(h.a[i], h.a[j]) +} + +func (h pqHeap) Swap(i, j int) { + h.a[i], h.a[j] = h.a[j], h.a[i] + if h.setIndex != nil { + h.setIndex(h.a[i], i) + h.setIndex(h.a[j], j) + } +} + +func (h *pqHeap) Push(x interface{}) { + n := len(h.a) + if h.setIndex != nil { + h.setIndex(x, n) + } + h.a = append(h.a, x) +} + +func (h *pqHeap) Pop() interface{} { + old := h.a + n := len(old) + x := old[n-1] + h.a = old[:n-1] + return x +} diff --git a/stringclassifier/internal/pq/priority_test.go b/stringclassifier/internal/pq/priority_test.go new file mode 100644 index 0000000..a77a301 --- /dev/null +++ b/stringclassifier/internal/pq/priority_test.go @@ -0,0 +1,181 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package pq + +import "testing" + +func TestQueue(t *testing.T) { + pq := NewQueue(func(x, y interface{}) bool { + return x.(string) < y.(string) + }, nil) + if n := pq.Len(); n != 0 { + t.Fatalf("pq.Len() = %d want 0", n) + } + pq.Push("Go") + pq.Push("C++") + pq.Push("Java") + for i, want := range []string{"C++", "Go", "Java"} { + wantLen := 3 - i + if n := pq.Len(); n != wantLen { + t.Fatalf("pq.Len() = %d want %d", n, wantLen) + } + if s := pq.Min().(string); s != want { + t.Fatalf("pq.Min() = %q want %q", s, want) + } + if n := pq.Len(); n != wantLen { + t.Fatalf("pq.Len() = %d want %d", n, wantLen) + } + if s := pq.Pop().(string); s != want { + t.Fatalf("pq.Pop() = %q want %q", s, want) + } + if n := pq.Len(); n != wantLen-1 { + t.Fatalf("pq.Len() = %d want %d", n, wantLen-1) + } + } + if n := pq.Len(); n != 0 { + t.Fatalf("pq.Len() = %d want 0", n) + } +} + +// Test that reprioritizing an item works. +func TestFix(t *testing.T) { + type item struct { + value int + index int + } + pq := NewQueue(func(x, y interface{}) bool { + return x.(*item).value < y.(*item).value + }, func(x interface{}, index int) { + x.(*item).index = index + }) + if n := pq.Len(); n != 0 { + t.Fatalf("pq.Len() = %d want 0", n) + } + i1 := &item{value: 1} + i2 := &item{value: 2} + i3 := &item{value: 3} + pq.Push(i3) + pq.Push(i1) + pq.Push(i2) + if n := pq.Len(); n != 3 { + t.Fatalf("pq.Len() = %d want 3", n) + } + for i, it := range []*item{i1, i2, i3} { + if i == 0 && it.index != 0 { + t.Errorf("item %+v want index 0", it) + } + if it.value != i+1 { + t.Errorf("item %+v want value %d", it, i+1) + } + } + i1.value = 4 + pq.Fix(i1.index) + if n := pq.Len(); n != 3 { + t.Fatalf("pq.Len() = %d want 3", n) + } + for i, it := range []*item{i2, i3, i1} { + if i == 0 && it.index != 0 { + t.Errorf("item %+v want index 0", it) + } + if it.value != i+2 { + t.Errorf("item %+v want value %d", it, i+2) + } + } + for i, want := range []int{2, 3, 4} { + wantLen := 3 - i + if n := pq.Len(); n != wantLen { + t.Fatalf("pq.Len() = %d want %d", n, wantLen) + } + if it := pq.Min().(*item); it.value != want { + t.Fatalf("pq.Min() = %+v want value %d", it, want) + } + if n := pq.Len(); n != wantLen { + t.Fatalf("pq.Len() = %d want %d", n, wantLen) + } + if it := pq.Pop().(*item); it.value != want { + t.Fatalf("pq.Pop() = %+v want value %d", it, want) + } + if n := pq.Len(); n != wantLen-1 { + t.Fatalf("pq.Len() = %d want %d", n, wantLen-1) + } + } + if n := pq.Len(); n != 0 { + t.Fatalf("pq.Len() = %d want 0", n) + } +} + +func TestRemove(t *testing.T) { + type item struct { + value int + index int + } + pq := NewQueue(func(x, y interface{}) bool { + return x.(*item).value < y.(*item).value + }, func(x interface{}, index int) { + x.(*item).index = index + }) + if n := pq.Len(); n != 0 { + t.Fatalf("pq.Len() = %d want 0", n) + } + i1 := &item{value: 1} + i2 := &item{value: 2} + i3 := &item{value: 3} + pq.Push(i3) + pq.Push(i1) + pq.Push(i2) + if n := pq.Len(); n != 3 { + t.Fatalf("pq.Len() = %d want 3", n) + } + for i, it := range []*item{i1, i2, i3} { + if i == 0 && it.index != 0 { + t.Errorf("item %+v want index 0", it) + } + if it.value != i+1 { + t.Errorf("item %+v want value %d", it, i+1) + } + } + pq.Remove(i3.index) + if n := pq.Len(); n != 2 { + t.Fatalf("pq.Len() = %d want 2", n) + } + for i, it := range []*item{i1, i2} { + if i == 0 && it.index != 0 { + t.Errorf("item %+v want index 0", it) + } + if it.value != i+1 { + t.Errorf("item %+v want value %d", it, i+2) + } + } + for i, want := range []int{1, 2} { + wantLen := 2 - i + if n := pq.Len(); n != wantLen { + t.Fatalf("pq.Len() = %d want %d", n, wantLen) + } + if it := pq.Min().(*item); it.value != want { + t.Fatalf("pq.Min() = %+v want value %d", it, want) + } + if n := pq.Len(); n != wantLen { + t.Fatalf("pq.Len() = %d want %d", n, wantLen) + } + if it := pq.Pop().(*item); it.value != want { + t.Fatalf("pq.Pop() = %+v want value %d", it, want) + } + if n := pq.Len(); n != wantLen-1 { + t.Fatalf("pq.Len() = %d want %d", n, wantLen-1) + } + } + if n := pq.Len(); n != 0 { + t.Fatalf("pq.Len() = %d want 0", n) + } +} diff --git a/stringclassifier/internal/sets/intset.go b/stringclassifier/internal/sets/intset.go new file mode 100644 index 0000000..42f7368 --- /dev/null +++ b/stringclassifier/internal/sets/intset.go @@ -0,0 +1,227 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "fmt" + "sort" + "strings" +) + +// IntSet stores a set of unique int elements. +type IntSet struct { + set map[int]present +} + +// NewIntSet creates an IntSet containing the supplied initial int elements. +func NewIntSet(elements ...int) *IntSet { + s := &IntSet{} + s.set = make(map[int]present) + s.Insert(elements...) + return s +} + +// Copy returns a newly allocated copy of the supplied IntSet. +func (s *IntSet) Copy() *IntSet { + c := NewIntSet() + if s != nil { + for e := range s.set { + c.set[e] = present{} + } + } + return c +} + +// Insert zero or more int elements into the IntSet. As expected for a Set, +// elements already present in the IntSet are simply ignored. +func (s *IntSet) Insert(elements ...int) { + for _, e := range elements { + s.set[e] = present{} + } +} + +// Delete zero or more int elements from the IntSet. Any elements not present +// in the IntSet are simply ignored. +func (s *IntSet) Delete(elements ...int) { + for _, e := range elements { + delete(s.set, e) + } +} + +// Intersect returns a new IntSet containing the intersection of the receiver +// and argument IntSets. Returns an empty set if the argument is nil. +func (s *IntSet) Intersect(other *IntSet) *IntSet { + if other == nil { + return NewIntSet() + } + + // Point a and b to the maps, setting a to the smaller of the two. + a, b := s.set, other.set + if len(b) < len(a) { + a, b = b, a + } + + // Perform the intersection. + intersect := NewIntSet() + for e := range a { + if _, ok := b[e]; ok { + intersect.set[e] = present{} + } + } + return intersect +} + +// Disjoint returns true if the intersection of the receiver and the argument +// IntSets is the empty set. Returns true if the argument is nil or either +// IntSet is the empty set. +func (s *IntSet) Disjoint(other *IntSet) bool { + if other == nil || len(other.set) == 0 || len(s.set) == 0 { + return true + } + + // Point a and b to the maps, setting a to the smaller of the two. + a, b := s.set, other.set + if len(b) < len(a) { + a, b = b, a + } + + // Check for non-empty intersection. + for e := range a { + if _, ok := b[e]; ok { + return false // Early-exit because intersecting. + } + } + return true +} + +// Difference returns a new IntSet containing the elements in the receiver that +// are not present in the argument IntSet. Returns a copy of the receiver if +// the argument is nil. +func (s *IntSet) Difference(other *IntSet) *IntSet { + if other == nil { + return s.Copy() + } + + // Insert only the elements in the receiver that are not present in the + // argument IntSet. + diff := NewIntSet() + for e := range s.set { + if _, ok := other.set[e]; !ok { + diff.set[e] = present{} + } + } + return diff +} + +// Unique returns a new IntSet containing the elements in the receiver that are +// not present in the argument IntSet *and* the elements in the argument IntSet +// that are not in the receiver. Returns a copy of the receiver if the argument +// is nil. +func (s *IntSet) Unique(other *IntSet) *IntSet { + if other == nil { + return s.Copy() + } + + sNotInOther := s.Difference(other) + otherNotInS := other.Difference(s) + + // Duplicate Union implementation here to avoid extra Copy, since both + // sNotInOther and otherNotInS are already copies. + unique := sNotInOther + for e := range otherNotInS.set { + unique.set[e] = present{} + } + return unique +} + +// Equal returns true if the receiver and the argument IntSet contain exactly +// the same elements. Returns false if the argument is nil. +func (s *IntSet) Equal(other *IntSet) bool { + if s == nil || other == nil { + return s == nil && other == nil + } + + // Two sets of different length cannot have the exact same unique + // elements. + if len(s.set) != len(other.set) { + return false + } + + // Only one loop is needed. If the two sets are known to be of equal + // length, then the two sets are equal only if exactly all of the + // elements in the first set are found in the second. + for e := range s.set { + if _, ok := other.set[e]; !ok { + return false + } + } + + return true +} + +// Union returns a new IntSet containing the union of the receiver and argument +// IntSets. Returns a copy of the receiver if the argument is nil. +func (s *IntSet) Union(other *IntSet) *IntSet { + union := s.Copy() + if other != nil { + for e := range other.set { + union.set[e] = present{} + } + } + return union +} + +// Contains returns true if element is in the IntSet. +func (s *IntSet) Contains(element int) bool { + _, in := s.set[element] + return in +} + +// Len returns the number of unique elements in the IntSet. +func (s *IntSet) Len() int { + return len(s.set) +} + +// Empty returns true if the receiver is the empty set. +func (s *IntSet) Empty() bool { + return len(s.set) == 0 +} + +// Elements returns a []int of the elements in the IntSet, in no particular (or +// consistent) order. +func (s *IntSet) Elements() []int { + elements := []int{} // Return at least an empty slice rather than nil. + for e := range s.set { + elements = append(elements, e) + } + return elements +} + +// Sorted returns a sorted []int of the elements in the IntSet. +func (s *IntSet) Sorted() []int { + elements := s.Elements() + sort.Ints(elements) + return elements +} + +// String formats the IntSet elements as sorted ints, representing them in +// "array initializer" syntax. +func (s *IntSet) String() string { + elements := s.Sorted() + var quoted []string + for _, e := range elements { + quoted = append(quoted, fmt.Sprintf("\"%d\"", e)) + } + return fmt.Sprintf("{%s}", strings.Join(quoted, ", ")) +} diff --git a/stringclassifier/internal/sets/intset_test.go b/stringclassifier/internal/sets/intset_test.go new file mode 100644 index 0000000..e77a1f9 --- /dev/null +++ b/stringclassifier/internal/sets/intset_test.go @@ -0,0 +1,438 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "sort" + "testing" +) + +func checkSameIntSet(t *testing.T, set *IntSet, unique []int) { + // Check that lengths are the same. + want := len(unique) + got := set.Len() + + if got != want { + t.Errorf("NewIntSet(%v) want length %v, got %v", unique, want, got) + } + + // Check that all ints are present in set. + for _, s := range unique { + want := true + got := set.Contains(s) + + if got != want { + t.Errorf("Contains(%v) want %v, got %v", s, want, got) + } + } + + // Check that all elements are present in ints. + sort.Ints(unique) + + for i, got := range set.Sorted() { + want := unique[i] + if got != want { + t.Errorf("Sorted(%d) want %v, got %v", i, want, got) + } + } +} + +type enumTest int + +const ( + et0 enumTest = iota + et1 + et2 + et3 + et4 +) + +func TestNewIntSet(t *testing.T) { + empty := NewIntSet() + want := 0 + got := empty.Len() + + if got != want { + t.Errorf("NewIntSet() want length %v, got %v", want, got) + } + + unique := []int{0, 1, 2} + set := NewIntSet(unique...) + checkSameIntSet(t, set, unique) + + // Append an already-present element. + nonUnique := append(unique, unique[0]) + set = NewIntSet(nonUnique...) + + // Non-unique unique should collapse to one. + want = len(unique) + got = set.Len() + + if got != want { + t.Errorf("NewIntSet(%v) want length %v, got %v", nonUnique, want, got) + } + + // Initialize with enum values cast to int. + set = NewIntSet(int(et0), int(et1), int(et2)) + checkSameIntSet(t, set, unique) +} + +func TestIntSet_Copy(t *testing.T) { + // Check both copies represent the same set. + base := []int{1, 2, 3} + orig := NewIntSet(base...) + cpy := orig.Copy() + checkSameIntSet(t, orig, base) + checkSameIntSet(t, cpy, base) + + // Check the two copies are independent. + more := []int{4} + orig.Insert(more...) + more = append(base, more...) + checkSameIntSet(t, orig, more) + checkSameIntSet(t, cpy, base) +} + +func TestIntSet_Insert(t *testing.T) { + unique := []int{0, 1, 2} + set := NewIntSet(unique...) + + // Insert existing element, which should basically be a no-op. + set.Insert(unique[0]) + checkSameIntSet(t, set, unique) + + // Actually insert new unique elements (cast from enum values this time). + additional := []int{int(et3), int(et4)} + longer := append(unique, additional...) + set.Insert(additional...) + checkSameIntSet(t, set, longer) +} + +func TestIntSet_Delete(t *testing.T) { + unique := []int{0, 1, 2} + set := NewIntSet(unique...) + + // Delete non-existent element, which should basically be a no-op. + set.Delete(int(et4)) + checkSameIntSet(t, set, unique) + + // Actually delete existing elements. + set.Delete(unique[1:]...) + checkSameIntSet(t, set, unique[:1]) +} + +func TestIntSet_Intersect(t *testing.T) { + input1 := []int{1, 3, 4, 5, 6} + input2 := []int{2, 3, 5} + + // Check Intersect(nil) returns an empty set. + setA := NewIntSet(input1...) + got := setA.Intersect(nil) + checkSameIntSet(t, got, []int{}) + // Check that the receiver is unchanged. + checkSameIntSet(t, setA, input1) + + // Check Intersect returns the correct result. + setB := NewIntSet(input2...) + got = setA.Intersect(setB) + want := []int{3, 5} + checkSameIntSet(t, got, want) + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setB, input2) + + // Reverse the inputs and verify Intersect produces the same results. + setA = NewIntSet(input2...) + setB = NewIntSet(input1...) + got = setA.Intersect(setB) + checkSameIntSet(t, got, want) + // Check the sources are again unchanged. + checkSameIntSet(t, setA, input2) + checkSameIntSet(t, setB, input1) +} + +func TestIntSet_Disjoint(t *testing.T) { + input1 := []int{1, 3, 4, 5, 6} + input2 := []int{2, 3, 5} + input3 := []int{98, 99, 100} + + // Check that sets are always disjoint with the empty set or nil + setA := NewIntSet(input1...) + emptySet := NewIntSet() + + if disjoint := setA.Disjoint(nil); !disjoint { + t.Errorf("Disjoint(%s, %v) want %v, got %v", setA, nil, true, disjoint) + } + + if disjoint := setA.Disjoint(emptySet); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, emptySet, true, disjoint) + } + + if disjoint := emptySet.Disjoint(setA); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, setA, true, disjoint) + } + + if disjoint := emptySet.Disjoint(emptySet); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, emptySet, true, disjoint) + } + + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, emptySet, []int{}) + + // Check two non-empty, non-nil disjoint sets. + setC := NewIntSet(input3...) + + if disjoint := setA.Disjoint(setC); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setC, true, disjoint) + } + + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setC, input3) + + // Check that two intersecting sets are not Disjoint. + setB := NewIntSet(input2...) + + if disjoint := setA.Disjoint(setB); disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setB, false, disjoint) + } + + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setB, input2) +} + +func TestIntSet_Difference(t *testing.T) { + input1 := []int{1, 3, 4, 5, 6} + input2 := []int{2, 3, 5} + input3 := []int{98, 99, 100} + + // Check Difference(nil) returns a copy of the receiver. + setA := NewIntSet(input1...) + got := setA.Difference(nil) + checkSameIntSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameIntSet(t, setA, input1) + + // Check A - A returns the empty set. + got = setA.Difference(setA) + + if !got.Empty() { + t.Errorf("Difference(%s, %s).Empty() want %v, got %v", + setA, setA, true, false) + } + + checkSameIntSet(t, got, []int{}) + // Check that the receiver is unchanged. + checkSameIntSet(t, setA, input1) + + // Check A - C simply returns elements in A if A and C are disjoint. + setC := NewIntSet(input3...) + got = setA.Difference(setC) + checkSameIntSet(t, got, input1) + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setC, input3) + + // Check A - B returns elements in A not in B. + setB := NewIntSet(input2...) + got = setA.Difference(setB) + want := []int{1, 4, 6} + checkSameIntSet(t, got, want) + + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setB, input2) + + // Check B - A returns elements in B not in A. + got = setB.Difference(setA) + want = []int{2} + checkSameIntSet(t, got, want) + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setB, input2) +} + +func TestIntSet_Unique(t *testing.T) { + input1 := []int{1, 3, 4, 5, 6} + input2 := []int{2, 3, 5} + input3 := []int{98, 99, 100} + + // Check Unique(nil) returns a copy of the receiver. + setA := NewIntSet(input1...) + got := setA.Unique(nil) + checkSameIntSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameIntSet(t, setA, input1) + + // Check Unique returns only elements in A and B not in both A and B. + setB := NewIntSet(input2...) + got = setA.Unique(setB) + want := []int{1, 2, 4, 6} + checkSameIntSet(t, got, want) + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setB, input2) + + // Check Unique of two disjoint sets is the Union of those sets. + setC := NewIntSet(input3...) + got = setA.Unique(setC) + union := setA.Union(setC) + + if equal := union.Equal(got); !equal { + t.Errorf("Union of disjoint Equal(%s, %s) want %v, got %v", + union, got, true, equal) + } + + // Check Unique is the Union of A - B and B - A. + aNotInB := setA.Difference(setB) + bNotInA := setB.Difference(setA) + union = aNotInB.Union(bNotInA) + want = []int{1, 2, 4, 6} + checkSameIntSet(t, union, want) + got = setA.Unique(setB) + + if equal := union.Equal(got); !equal { + t.Errorf("Union of differences Equal(%s, %s) want %v, got %v", + union, got, true, equal) + } + + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setB, input2) +} + +func TestIntSet_Equal(t *testing.T) { + input1 := []int{1, 3, 4, 5, 6} + input2 := []int{2, 3, 5} + input3 := []int{1, 3, 4, 5, 7} + + // Check Equal(nil) returns false. + setA := NewIntSet(input1...) + + if equal := setA.Equal(nil); equal { + t.Errorf("Equal(%s, %v) want %v, got %v", setA, nil, false, true) + } + + // Check that the receiver is unchanged. + checkSameIntSet(t, setA, input1) + + // Check Equal returns true for a set and itself. + if equal := setA.Equal(setA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) + } + + // Check that the receiver is unchanged. + checkSameIntSet(t, setA, input1) + + // Check Equal returns false for sets of non-equal length. + setB := NewIntSet(input2...) + + if equal := setA.Equal(setB); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setB, false, true) + } + + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setB, input2) + + // Check Equal returns false for equal-length sets with different elements. + setC := NewIntSet(input3...) + + if equal := setA.Equal(setC); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setC, false, true) + } + + if equal := setC.Equal(setA); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setC, setA, false, true) + } + + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setC, input3) + + // Check Equal returns true for a set with itself. + if equal := setA.Equal(setA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) + } + + // Also check the source is unchanged. + checkSameIntSet(t, setA, input1) + + // Check Equal returns true for two separate equal sets. + anotherA := NewIntSet(input1...) + + if equal := setA.Equal(anotherA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, anotherA, true, false) + } + + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, anotherA, input1) + + // Check for equality comparing to nil struct. + var nilSet *IntSet + if equal := nilSet.Equal(setA); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, setA, false, true) + } + if equal := setA.Equal(nilSet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, nilSet, false, true) + } + if equal := nilSet.Equal(nilSet); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, nilSet, true, false) + } + + // Edge case: consider the empty set to be different than the nil set. + emptySet := NewIntSet() + if equal := nilSet.Equal(emptySet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, emptySet, false, true) + } + if equal := emptySet.Equal(nilSet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, nilSet, false, true) + } + if equal := emptySet.Equal(emptySet); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, emptySet, true, false) + } +} + +func TestIntSet_Union(t *testing.T) { + input1 := []int{1, 3, 4, 5, 6} + input2 := []int{2, 3, 5} + + // Check Union(nil) returns a copy of the receiver. + setA := NewIntSet(input1...) + got := setA.Union(nil) + checkSameIntSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameIntSet(t, setA, input1) + + // Check Union returns the correct result. + setB := NewIntSet(input2...) + got = setA.Union(setB) + want := []int{1, 2, 3, 4, 5, 6} + checkSameIntSet(t, got, want) + // Also check the sources are unchanged. + checkSameIntSet(t, setA, input1) + checkSameIntSet(t, setB, input2) + + // Reverse the inputs and verify Union produces the same results. + setA = NewIntSet(input2...) + setB = NewIntSet(input1...) + got = setA.Union(setB) + checkSameIntSet(t, got, want) + // Check the sources are again unchanged. + checkSameIntSet(t, setA, input2) + checkSameIntSet(t, setB, input1) +} diff --git a/stringclassifier/internal/sets/sets.go b/stringclassifier/internal/sets/sets.go new file mode 100644 index 0000000..d06947d --- /dev/null +++ b/stringclassifier/internal/sets/sets.go @@ -0,0 +1,20 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package sets provides sets for storing collections of unique elements. +package sets + +// present is an empty struct used as the "value" in the map[int], since empty +// structs consume zero bytes (unlike one unnecessary byte per bool). +type present struct{} |