aboutsummaryrefslogtreecommitdiff
path: root/stringclassifier/internal
diff options
context:
space:
mode:
Diffstat (limited to 'stringclassifier/internal')
-rw-r--r--stringclassifier/internal/pq/priority.go111
-rw-r--r--stringclassifier/internal/pq/priority_test.go181
-rw-r--r--stringclassifier/internal/sets/intset.go227
-rw-r--r--stringclassifier/internal/sets/intset_test.go438
-rw-r--r--stringclassifier/internal/sets/sets.go20
5 files changed, 977 insertions, 0 deletions
diff --git a/stringclassifier/internal/pq/priority.go b/stringclassifier/internal/pq/priority.go
new file mode 100644
index 0000000..d1797c7
--- /dev/null
+++ b/stringclassifier/internal/pq/priority.go
@@ -0,0 +1,111 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package pq provides a priority queue.
+package pq
+
+import "container/heap"
+
+// NewQueue returns an unbounded priority queue that compares elements using
+// less; the minimal element is at the top of the queue.
+//
+// If setIndex is not nil, the queue calls setIndex to inform each element of
+// its position in the queue. If an element's priority changes, its position in
+// the queue may be incorrect. Call Fix on the element's index to update the
+// queue. Call Remove on the element's index to remove it from the queue.
+func NewQueue(less func(x, y interface{}) bool, setIndex func(x interface{}, idx int)) *Queue {
+ return &Queue{
+ heap: pqHeap{
+ less: less,
+ setIndex: setIndex,
+ },
+ }
+}
+
+// Queue is a priority queue that supports updating the priority of an element.
+// A Queue must be created with NewQueue.
+type Queue struct {
+ heap pqHeap
+}
+
+// Len returns the number of elements in the queue.
+func (pq *Queue) Len() int {
+ return pq.heap.Len()
+}
+
+// Push adds x to the queue.
+func (pq *Queue) Push(x interface{}) {
+ heap.Push(&pq.heap, x)
+}
+
+// Min returns the minimal element.
+// Min panics if the queue is empty.
+func (pq *Queue) Min() interface{} {
+ return pq.heap.a[0]
+}
+
+// Pop removes and returns the minimal element.
+// Pop panics if the queue is empty.
+func (pq *Queue) Pop() interface{} {
+ return heap.Pop(&pq.heap)
+}
+
+// Fix adjusts the heap to reflect that the element at index has changed priority.
+func (pq *Queue) Fix(index int) {
+ heap.Fix(&pq.heap, index)
+}
+
+// Remove removes the element at index i from the heap.
+func (pq *Queue) Remove(index int) {
+ heap.Remove(&pq.heap, index)
+}
+
+// pqHeap implements heap.Interface.
+type pqHeap struct {
+ a []interface{}
+ less func(x, y interface{}) bool
+ setIndex func(x interface{}, idx int)
+}
+
+func (h pqHeap) Len() int {
+ return len(h.a)
+}
+
+func (h pqHeap) Less(i, j int) bool {
+ return h.less(h.a[i], h.a[j])
+}
+
+func (h pqHeap) Swap(i, j int) {
+ h.a[i], h.a[j] = h.a[j], h.a[i]
+ if h.setIndex != nil {
+ h.setIndex(h.a[i], i)
+ h.setIndex(h.a[j], j)
+ }
+}
+
+func (h *pqHeap) Push(x interface{}) {
+ n := len(h.a)
+ if h.setIndex != nil {
+ h.setIndex(x, n)
+ }
+ h.a = append(h.a, x)
+}
+
+func (h *pqHeap) Pop() interface{} {
+ old := h.a
+ n := len(old)
+ x := old[n-1]
+ h.a = old[:n-1]
+ return x
+}
diff --git a/stringclassifier/internal/pq/priority_test.go b/stringclassifier/internal/pq/priority_test.go
new file mode 100644
index 0000000..a77a301
--- /dev/null
+++ b/stringclassifier/internal/pq/priority_test.go
@@ -0,0 +1,181 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package pq
+
+import "testing"
+
+func TestQueue(t *testing.T) {
+ pq := NewQueue(func(x, y interface{}) bool {
+ return x.(string) < y.(string)
+ }, nil)
+ if n := pq.Len(); n != 0 {
+ t.Fatalf("pq.Len() = %d want 0", n)
+ }
+ pq.Push("Go")
+ pq.Push("C++")
+ pq.Push("Java")
+ for i, want := range []string{"C++", "Go", "Java"} {
+ wantLen := 3 - i
+ if n := pq.Len(); n != wantLen {
+ t.Fatalf("pq.Len() = %d want %d", n, wantLen)
+ }
+ if s := pq.Min().(string); s != want {
+ t.Fatalf("pq.Min() = %q want %q", s, want)
+ }
+ if n := pq.Len(); n != wantLen {
+ t.Fatalf("pq.Len() = %d want %d", n, wantLen)
+ }
+ if s := pq.Pop().(string); s != want {
+ t.Fatalf("pq.Pop() = %q want %q", s, want)
+ }
+ if n := pq.Len(); n != wantLen-1 {
+ t.Fatalf("pq.Len() = %d want %d", n, wantLen-1)
+ }
+ }
+ if n := pq.Len(); n != 0 {
+ t.Fatalf("pq.Len() = %d want 0", n)
+ }
+}
+
+// Test that reprioritizing an item works.
+func TestFix(t *testing.T) {
+ type item struct {
+ value int
+ index int
+ }
+ pq := NewQueue(func(x, y interface{}) bool {
+ return x.(*item).value < y.(*item).value
+ }, func(x interface{}, index int) {
+ x.(*item).index = index
+ })
+ if n := pq.Len(); n != 0 {
+ t.Fatalf("pq.Len() = %d want 0", n)
+ }
+ i1 := &item{value: 1}
+ i2 := &item{value: 2}
+ i3 := &item{value: 3}
+ pq.Push(i3)
+ pq.Push(i1)
+ pq.Push(i2)
+ if n := pq.Len(); n != 3 {
+ t.Fatalf("pq.Len() = %d want 3", n)
+ }
+ for i, it := range []*item{i1, i2, i3} {
+ if i == 0 && it.index != 0 {
+ t.Errorf("item %+v want index 0", it)
+ }
+ if it.value != i+1 {
+ t.Errorf("item %+v want value %d", it, i+1)
+ }
+ }
+ i1.value = 4
+ pq.Fix(i1.index)
+ if n := pq.Len(); n != 3 {
+ t.Fatalf("pq.Len() = %d want 3", n)
+ }
+ for i, it := range []*item{i2, i3, i1} {
+ if i == 0 && it.index != 0 {
+ t.Errorf("item %+v want index 0", it)
+ }
+ if it.value != i+2 {
+ t.Errorf("item %+v want value %d", it, i+2)
+ }
+ }
+ for i, want := range []int{2, 3, 4} {
+ wantLen := 3 - i
+ if n := pq.Len(); n != wantLen {
+ t.Fatalf("pq.Len() = %d want %d", n, wantLen)
+ }
+ if it := pq.Min().(*item); it.value != want {
+ t.Fatalf("pq.Min() = %+v want value %d", it, want)
+ }
+ if n := pq.Len(); n != wantLen {
+ t.Fatalf("pq.Len() = %d want %d", n, wantLen)
+ }
+ if it := pq.Pop().(*item); it.value != want {
+ t.Fatalf("pq.Pop() = %+v want value %d", it, want)
+ }
+ if n := pq.Len(); n != wantLen-1 {
+ t.Fatalf("pq.Len() = %d want %d", n, wantLen-1)
+ }
+ }
+ if n := pq.Len(); n != 0 {
+ t.Fatalf("pq.Len() = %d want 0", n)
+ }
+}
+
+func TestRemove(t *testing.T) {
+ type item struct {
+ value int
+ index int
+ }
+ pq := NewQueue(func(x, y interface{}) bool {
+ return x.(*item).value < y.(*item).value
+ }, func(x interface{}, index int) {
+ x.(*item).index = index
+ })
+ if n := pq.Len(); n != 0 {
+ t.Fatalf("pq.Len() = %d want 0", n)
+ }
+ i1 := &item{value: 1}
+ i2 := &item{value: 2}
+ i3 := &item{value: 3}
+ pq.Push(i3)
+ pq.Push(i1)
+ pq.Push(i2)
+ if n := pq.Len(); n != 3 {
+ t.Fatalf("pq.Len() = %d want 3", n)
+ }
+ for i, it := range []*item{i1, i2, i3} {
+ if i == 0 && it.index != 0 {
+ t.Errorf("item %+v want index 0", it)
+ }
+ if it.value != i+1 {
+ t.Errorf("item %+v want value %d", it, i+1)
+ }
+ }
+ pq.Remove(i3.index)
+ if n := pq.Len(); n != 2 {
+ t.Fatalf("pq.Len() = %d want 2", n)
+ }
+ for i, it := range []*item{i1, i2} {
+ if i == 0 && it.index != 0 {
+ t.Errorf("item %+v want index 0", it)
+ }
+ if it.value != i+1 {
+ t.Errorf("item %+v want value %d", it, i+2)
+ }
+ }
+ for i, want := range []int{1, 2} {
+ wantLen := 2 - i
+ if n := pq.Len(); n != wantLen {
+ t.Fatalf("pq.Len() = %d want %d", n, wantLen)
+ }
+ if it := pq.Min().(*item); it.value != want {
+ t.Fatalf("pq.Min() = %+v want value %d", it, want)
+ }
+ if n := pq.Len(); n != wantLen {
+ t.Fatalf("pq.Len() = %d want %d", n, wantLen)
+ }
+ if it := pq.Pop().(*item); it.value != want {
+ t.Fatalf("pq.Pop() = %+v want value %d", it, want)
+ }
+ if n := pq.Len(); n != wantLen-1 {
+ t.Fatalf("pq.Len() = %d want %d", n, wantLen-1)
+ }
+ }
+ if n := pq.Len(); n != 0 {
+ t.Fatalf("pq.Len() = %d want 0", n)
+ }
+}
diff --git a/stringclassifier/internal/sets/intset.go b/stringclassifier/internal/sets/intset.go
new file mode 100644
index 0000000..42f7368
--- /dev/null
+++ b/stringclassifier/internal/sets/intset.go
@@ -0,0 +1,227 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package sets
+
+import (
+ "fmt"
+ "sort"
+ "strings"
+)
+
+// IntSet stores a set of unique int elements.
+type IntSet struct {
+ set map[int]present
+}
+
+// NewIntSet creates an IntSet containing the supplied initial int elements.
+func NewIntSet(elements ...int) *IntSet {
+ s := &IntSet{}
+ s.set = make(map[int]present)
+ s.Insert(elements...)
+ return s
+}
+
+// Copy returns a newly allocated copy of the supplied IntSet.
+func (s *IntSet) Copy() *IntSet {
+ c := NewIntSet()
+ if s != nil {
+ for e := range s.set {
+ c.set[e] = present{}
+ }
+ }
+ return c
+}
+
+// Insert zero or more int elements into the IntSet. As expected for a Set,
+// elements already present in the IntSet are simply ignored.
+func (s *IntSet) Insert(elements ...int) {
+ for _, e := range elements {
+ s.set[e] = present{}
+ }
+}
+
+// Delete zero or more int elements from the IntSet. Any elements not present
+// in the IntSet are simply ignored.
+func (s *IntSet) Delete(elements ...int) {
+ for _, e := range elements {
+ delete(s.set, e)
+ }
+}
+
+// Intersect returns a new IntSet containing the intersection of the receiver
+// and argument IntSets. Returns an empty set if the argument is nil.
+func (s *IntSet) Intersect(other *IntSet) *IntSet {
+ if other == nil {
+ return NewIntSet()
+ }
+
+ // Point a and b to the maps, setting a to the smaller of the two.
+ a, b := s.set, other.set
+ if len(b) < len(a) {
+ a, b = b, a
+ }
+
+ // Perform the intersection.
+ intersect := NewIntSet()
+ for e := range a {
+ if _, ok := b[e]; ok {
+ intersect.set[e] = present{}
+ }
+ }
+ return intersect
+}
+
+// Disjoint returns true if the intersection of the receiver and the argument
+// IntSets is the empty set. Returns true if the argument is nil or either
+// IntSet is the empty set.
+func (s *IntSet) Disjoint(other *IntSet) bool {
+ if other == nil || len(other.set) == 0 || len(s.set) == 0 {
+ return true
+ }
+
+ // Point a and b to the maps, setting a to the smaller of the two.
+ a, b := s.set, other.set
+ if len(b) < len(a) {
+ a, b = b, a
+ }
+
+ // Check for non-empty intersection.
+ for e := range a {
+ if _, ok := b[e]; ok {
+ return false // Early-exit because intersecting.
+ }
+ }
+ return true
+}
+
+// Difference returns a new IntSet containing the elements in the receiver that
+// are not present in the argument IntSet. Returns a copy of the receiver if
+// the argument is nil.
+func (s *IntSet) Difference(other *IntSet) *IntSet {
+ if other == nil {
+ return s.Copy()
+ }
+
+ // Insert only the elements in the receiver that are not present in the
+ // argument IntSet.
+ diff := NewIntSet()
+ for e := range s.set {
+ if _, ok := other.set[e]; !ok {
+ diff.set[e] = present{}
+ }
+ }
+ return diff
+}
+
+// Unique returns a new IntSet containing the elements in the receiver that are
+// not present in the argument IntSet *and* the elements in the argument IntSet
+// that are not in the receiver. Returns a copy of the receiver if the argument
+// is nil.
+func (s *IntSet) Unique(other *IntSet) *IntSet {
+ if other == nil {
+ return s.Copy()
+ }
+
+ sNotInOther := s.Difference(other)
+ otherNotInS := other.Difference(s)
+
+ // Duplicate Union implementation here to avoid extra Copy, since both
+ // sNotInOther and otherNotInS are already copies.
+ unique := sNotInOther
+ for e := range otherNotInS.set {
+ unique.set[e] = present{}
+ }
+ return unique
+}
+
+// Equal returns true if the receiver and the argument IntSet contain exactly
+// the same elements. Returns false if the argument is nil.
+func (s *IntSet) Equal(other *IntSet) bool {
+ if s == nil || other == nil {
+ return s == nil && other == nil
+ }
+
+ // Two sets of different length cannot have the exact same unique
+ // elements.
+ if len(s.set) != len(other.set) {
+ return false
+ }
+
+ // Only one loop is needed. If the two sets are known to be of equal
+ // length, then the two sets are equal only if exactly all of the
+ // elements in the first set are found in the second.
+ for e := range s.set {
+ if _, ok := other.set[e]; !ok {
+ return false
+ }
+ }
+
+ return true
+}
+
+// Union returns a new IntSet containing the union of the receiver and argument
+// IntSets. Returns a copy of the receiver if the argument is nil.
+func (s *IntSet) Union(other *IntSet) *IntSet {
+ union := s.Copy()
+ if other != nil {
+ for e := range other.set {
+ union.set[e] = present{}
+ }
+ }
+ return union
+}
+
+// Contains returns true if element is in the IntSet.
+func (s *IntSet) Contains(element int) bool {
+ _, in := s.set[element]
+ return in
+}
+
+// Len returns the number of unique elements in the IntSet.
+func (s *IntSet) Len() int {
+ return len(s.set)
+}
+
+// Empty returns true if the receiver is the empty set.
+func (s *IntSet) Empty() bool {
+ return len(s.set) == 0
+}
+
+// Elements returns a []int of the elements in the IntSet, in no particular (or
+// consistent) order.
+func (s *IntSet) Elements() []int {
+ elements := []int{} // Return at least an empty slice rather than nil.
+ for e := range s.set {
+ elements = append(elements, e)
+ }
+ return elements
+}
+
+// Sorted returns a sorted []int of the elements in the IntSet.
+func (s *IntSet) Sorted() []int {
+ elements := s.Elements()
+ sort.Ints(elements)
+ return elements
+}
+
+// String formats the IntSet elements as sorted ints, representing them in
+// "array initializer" syntax.
+func (s *IntSet) String() string {
+ elements := s.Sorted()
+ var quoted []string
+ for _, e := range elements {
+ quoted = append(quoted, fmt.Sprintf("\"%d\"", e))
+ }
+ return fmt.Sprintf("{%s}", strings.Join(quoted, ", "))
+}
diff --git a/stringclassifier/internal/sets/intset_test.go b/stringclassifier/internal/sets/intset_test.go
new file mode 100644
index 0000000..e77a1f9
--- /dev/null
+++ b/stringclassifier/internal/sets/intset_test.go
@@ -0,0 +1,438 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package sets
+
+import (
+ "sort"
+ "testing"
+)
+
+func checkSameIntSet(t *testing.T, set *IntSet, unique []int) {
+ // Check that lengths are the same.
+ want := len(unique)
+ got := set.Len()
+
+ if got != want {
+ t.Errorf("NewIntSet(%v) want length %v, got %v", unique, want, got)
+ }
+
+ // Check that all ints are present in set.
+ for _, s := range unique {
+ want := true
+ got := set.Contains(s)
+
+ if got != want {
+ t.Errorf("Contains(%v) want %v, got %v", s, want, got)
+ }
+ }
+
+ // Check that all elements are present in ints.
+ sort.Ints(unique)
+
+ for i, got := range set.Sorted() {
+ want := unique[i]
+ if got != want {
+ t.Errorf("Sorted(%d) want %v, got %v", i, want, got)
+ }
+ }
+}
+
+type enumTest int
+
+const (
+ et0 enumTest = iota
+ et1
+ et2
+ et3
+ et4
+)
+
+func TestNewIntSet(t *testing.T) {
+ empty := NewIntSet()
+ want := 0
+ got := empty.Len()
+
+ if got != want {
+ t.Errorf("NewIntSet() want length %v, got %v", want, got)
+ }
+
+ unique := []int{0, 1, 2}
+ set := NewIntSet(unique...)
+ checkSameIntSet(t, set, unique)
+
+ // Append an already-present element.
+ nonUnique := append(unique, unique[0])
+ set = NewIntSet(nonUnique...)
+
+ // Non-unique unique should collapse to one.
+ want = len(unique)
+ got = set.Len()
+
+ if got != want {
+ t.Errorf("NewIntSet(%v) want length %v, got %v", nonUnique, want, got)
+ }
+
+ // Initialize with enum values cast to int.
+ set = NewIntSet(int(et0), int(et1), int(et2))
+ checkSameIntSet(t, set, unique)
+}
+
+func TestIntSet_Copy(t *testing.T) {
+ // Check both copies represent the same set.
+ base := []int{1, 2, 3}
+ orig := NewIntSet(base...)
+ cpy := orig.Copy()
+ checkSameIntSet(t, orig, base)
+ checkSameIntSet(t, cpy, base)
+
+ // Check the two copies are independent.
+ more := []int{4}
+ orig.Insert(more...)
+ more = append(base, more...)
+ checkSameIntSet(t, orig, more)
+ checkSameIntSet(t, cpy, base)
+}
+
+func TestIntSet_Insert(t *testing.T) {
+ unique := []int{0, 1, 2}
+ set := NewIntSet(unique...)
+
+ // Insert existing element, which should basically be a no-op.
+ set.Insert(unique[0])
+ checkSameIntSet(t, set, unique)
+
+ // Actually insert new unique elements (cast from enum values this time).
+ additional := []int{int(et3), int(et4)}
+ longer := append(unique, additional...)
+ set.Insert(additional...)
+ checkSameIntSet(t, set, longer)
+}
+
+func TestIntSet_Delete(t *testing.T) {
+ unique := []int{0, 1, 2}
+ set := NewIntSet(unique...)
+
+ // Delete non-existent element, which should basically be a no-op.
+ set.Delete(int(et4))
+ checkSameIntSet(t, set, unique)
+
+ // Actually delete existing elements.
+ set.Delete(unique[1:]...)
+ checkSameIntSet(t, set, unique[:1])
+}
+
+func TestIntSet_Intersect(t *testing.T) {
+ input1 := []int{1, 3, 4, 5, 6}
+ input2 := []int{2, 3, 5}
+
+ // Check Intersect(nil) returns an empty set.
+ setA := NewIntSet(input1...)
+ got := setA.Intersect(nil)
+ checkSameIntSet(t, got, []int{})
+ // Check that the receiver is unchanged.
+ checkSameIntSet(t, setA, input1)
+
+ // Check Intersect returns the correct result.
+ setB := NewIntSet(input2...)
+ got = setA.Intersect(setB)
+ want := []int{3, 5}
+ checkSameIntSet(t, got, want)
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setB, input2)
+
+ // Reverse the inputs and verify Intersect produces the same results.
+ setA = NewIntSet(input2...)
+ setB = NewIntSet(input1...)
+ got = setA.Intersect(setB)
+ checkSameIntSet(t, got, want)
+ // Check the sources are again unchanged.
+ checkSameIntSet(t, setA, input2)
+ checkSameIntSet(t, setB, input1)
+}
+
+func TestIntSet_Disjoint(t *testing.T) {
+ input1 := []int{1, 3, 4, 5, 6}
+ input2 := []int{2, 3, 5}
+ input3 := []int{98, 99, 100}
+
+ // Check that sets are always disjoint with the empty set or nil
+ setA := NewIntSet(input1...)
+ emptySet := NewIntSet()
+
+ if disjoint := setA.Disjoint(nil); !disjoint {
+ t.Errorf("Disjoint(%s, %v) want %v, got %v", setA, nil, true, disjoint)
+ }
+
+ if disjoint := setA.Disjoint(emptySet); !disjoint {
+ t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, emptySet, true, disjoint)
+ }
+
+ if disjoint := emptySet.Disjoint(setA); !disjoint {
+ t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, setA, true, disjoint)
+ }
+
+ if disjoint := emptySet.Disjoint(emptySet); !disjoint {
+ t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, emptySet, true, disjoint)
+ }
+
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, emptySet, []int{})
+
+ // Check two non-empty, non-nil disjoint sets.
+ setC := NewIntSet(input3...)
+
+ if disjoint := setA.Disjoint(setC); !disjoint {
+ t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setC, true, disjoint)
+ }
+
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setC, input3)
+
+ // Check that two intersecting sets are not Disjoint.
+ setB := NewIntSet(input2...)
+
+ if disjoint := setA.Disjoint(setB); disjoint {
+ t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setB, false, disjoint)
+ }
+
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setB, input2)
+}
+
+func TestIntSet_Difference(t *testing.T) {
+ input1 := []int{1, 3, 4, 5, 6}
+ input2 := []int{2, 3, 5}
+ input3 := []int{98, 99, 100}
+
+ // Check Difference(nil) returns a copy of the receiver.
+ setA := NewIntSet(input1...)
+ got := setA.Difference(nil)
+ checkSameIntSet(t, got, input1)
+ // Check that the receiver is unchanged.
+ checkSameIntSet(t, setA, input1)
+
+ // Check A - A returns the empty set.
+ got = setA.Difference(setA)
+
+ if !got.Empty() {
+ t.Errorf("Difference(%s, %s).Empty() want %v, got %v",
+ setA, setA, true, false)
+ }
+
+ checkSameIntSet(t, got, []int{})
+ // Check that the receiver is unchanged.
+ checkSameIntSet(t, setA, input1)
+
+ // Check A - C simply returns elements in A if A and C are disjoint.
+ setC := NewIntSet(input3...)
+ got = setA.Difference(setC)
+ checkSameIntSet(t, got, input1)
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setC, input3)
+
+ // Check A - B returns elements in A not in B.
+ setB := NewIntSet(input2...)
+ got = setA.Difference(setB)
+ want := []int{1, 4, 6}
+ checkSameIntSet(t, got, want)
+
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setB, input2)
+
+ // Check B - A returns elements in B not in A.
+ got = setB.Difference(setA)
+ want = []int{2}
+ checkSameIntSet(t, got, want)
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setB, input2)
+}
+
+func TestIntSet_Unique(t *testing.T) {
+ input1 := []int{1, 3, 4, 5, 6}
+ input2 := []int{2, 3, 5}
+ input3 := []int{98, 99, 100}
+
+ // Check Unique(nil) returns a copy of the receiver.
+ setA := NewIntSet(input1...)
+ got := setA.Unique(nil)
+ checkSameIntSet(t, got, input1)
+ // Check that the receiver is unchanged.
+ checkSameIntSet(t, setA, input1)
+
+ // Check Unique returns only elements in A and B not in both A and B.
+ setB := NewIntSet(input2...)
+ got = setA.Unique(setB)
+ want := []int{1, 2, 4, 6}
+ checkSameIntSet(t, got, want)
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setB, input2)
+
+ // Check Unique of two disjoint sets is the Union of those sets.
+ setC := NewIntSet(input3...)
+ got = setA.Unique(setC)
+ union := setA.Union(setC)
+
+ if equal := union.Equal(got); !equal {
+ t.Errorf("Union of disjoint Equal(%s, %s) want %v, got %v",
+ union, got, true, equal)
+ }
+
+ // Check Unique is the Union of A - B and B - A.
+ aNotInB := setA.Difference(setB)
+ bNotInA := setB.Difference(setA)
+ union = aNotInB.Union(bNotInA)
+ want = []int{1, 2, 4, 6}
+ checkSameIntSet(t, union, want)
+ got = setA.Unique(setB)
+
+ if equal := union.Equal(got); !equal {
+ t.Errorf("Union of differences Equal(%s, %s) want %v, got %v",
+ union, got, true, equal)
+ }
+
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setB, input2)
+}
+
+func TestIntSet_Equal(t *testing.T) {
+ input1 := []int{1, 3, 4, 5, 6}
+ input2 := []int{2, 3, 5}
+ input3 := []int{1, 3, 4, 5, 7}
+
+ // Check Equal(nil) returns false.
+ setA := NewIntSet(input1...)
+
+ if equal := setA.Equal(nil); equal {
+ t.Errorf("Equal(%s, %v) want %v, got %v", setA, nil, false, true)
+ }
+
+ // Check that the receiver is unchanged.
+ checkSameIntSet(t, setA, input1)
+
+ // Check Equal returns true for a set and itself.
+ if equal := setA.Equal(setA); !equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false)
+ }
+
+ // Check that the receiver is unchanged.
+ checkSameIntSet(t, setA, input1)
+
+ // Check Equal returns false for sets of non-equal length.
+ setB := NewIntSet(input2...)
+
+ if equal := setA.Equal(setB); equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", setA, setB, false, true)
+ }
+
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setB, input2)
+
+ // Check Equal returns false for equal-length sets with different elements.
+ setC := NewIntSet(input3...)
+
+ if equal := setA.Equal(setC); equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", setA, setC, false, true)
+ }
+
+ if equal := setC.Equal(setA); equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", setC, setA, false, true)
+ }
+
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setC, input3)
+
+ // Check Equal returns true for a set with itself.
+ if equal := setA.Equal(setA); !equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false)
+ }
+
+ // Also check the source is unchanged.
+ checkSameIntSet(t, setA, input1)
+
+ // Check Equal returns true for two separate equal sets.
+ anotherA := NewIntSet(input1...)
+
+ if equal := setA.Equal(anotherA); !equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", setA, anotherA, true, false)
+ }
+
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, anotherA, input1)
+
+ // Check for equality comparing to nil struct.
+ var nilSet *IntSet
+ if equal := nilSet.Equal(setA); equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, setA, false, true)
+ }
+ if equal := setA.Equal(nilSet); equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", setA, nilSet, false, true)
+ }
+ if equal := nilSet.Equal(nilSet); !equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, nilSet, true, false)
+ }
+
+ // Edge case: consider the empty set to be different than the nil set.
+ emptySet := NewIntSet()
+ if equal := nilSet.Equal(emptySet); equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, emptySet, false, true)
+ }
+ if equal := emptySet.Equal(nilSet); equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, nilSet, false, true)
+ }
+ if equal := emptySet.Equal(emptySet); !equal {
+ t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, emptySet, true, false)
+ }
+}
+
+func TestIntSet_Union(t *testing.T) {
+ input1 := []int{1, 3, 4, 5, 6}
+ input2 := []int{2, 3, 5}
+
+ // Check Union(nil) returns a copy of the receiver.
+ setA := NewIntSet(input1...)
+ got := setA.Union(nil)
+ checkSameIntSet(t, got, input1)
+ // Check that the receiver is unchanged.
+ checkSameIntSet(t, setA, input1)
+
+ // Check Union returns the correct result.
+ setB := NewIntSet(input2...)
+ got = setA.Union(setB)
+ want := []int{1, 2, 3, 4, 5, 6}
+ checkSameIntSet(t, got, want)
+ // Also check the sources are unchanged.
+ checkSameIntSet(t, setA, input1)
+ checkSameIntSet(t, setB, input2)
+
+ // Reverse the inputs and verify Union produces the same results.
+ setA = NewIntSet(input2...)
+ setB = NewIntSet(input1...)
+ got = setA.Union(setB)
+ checkSameIntSet(t, got, want)
+ // Check the sources are again unchanged.
+ checkSameIntSet(t, setA, input2)
+ checkSameIntSet(t, setB, input1)
+}
diff --git a/stringclassifier/internal/sets/sets.go b/stringclassifier/internal/sets/sets.go
new file mode 100644
index 0000000..d06947d
--- /dev/null
+++ b/stringclassifier/internal/sets/sets.go
@@ -0,0 +1,20 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package sets provides sets for storing collections of unique elements.
+package sets
+
+// present is an empty struct used as the "value" in the map[int], since empty
+// structs consume zero bytes (unlike one unnecessary byte per bool).
+type present struct{}