From 85ea7edc49509df714ac09a6166e514b05e30d6d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 9 Apr 2017 20:49:36 -0700 Subject: Initial commit of license classifier. --- internal/sets/sets.go | 20 ++ internal/sets/sets_benchmark_test.go | 213 ++++++++++++++++++ internal/sets/stringset.go | 228 +++++++++++++++++++ internal/sets/stringset_test.go | 425 +++++++++++++++++++++++++++++++++++ 4 files changed, 886 insertions(+) create mode 100644 internal/sets/sets.go create mode 100644 internal/sets/sets_benchmark_test.go create mode 100644 internal/sets/stringset.go create mode 100644 internal/sets/stringset_test.go (limited to 'internal/sets') diff --git a/internal/sets/sets.go b/internal/sets/sets.go new file mode 100644 index 0000000..f34ae5b --- /dev/null +++ b/internal/sets/sets.go @@ -0,0 +1,20 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package sets provides sets for storing collections of unique elements. +package sets + +// present is an empty struct used as the "value" in the map[int], since +// empty structs consume zero bytes (unlike 1 unnecessary byte per bool). +type present struct{} diff --git a/internal/sets/sets_benchmark_test.go b/internal/sets/sets_benchmark_test.go new file mode 100644 index 0000000..017dca6 --- /dev/null +++ b/internal/sets/sets_benchmark_test.go @@ -0,0 +1,213 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "strings" + "testing" +) + +const ( + postmodernThesisCollapse = `1. Expressions of collapse + +If one examines postcultural Marxism, one is faced with a choice: either +reject capitalist submodern theory or conclude that the purpose of the reader +is significant form. Bataille uses the term ‘capitalist construction’ to denote +not, in fact, discourse, but prediscourse. + +Therefore, in Stardust, Gaiman analyses postcultural Marxism; in +The Books of Magic, although, he denies capitalist submodern theory. If +capitalist construction holds, we have to choose between capitalist submodern +theory and Baudrillardist simulacra. + +However, conceptualist socialism implies that narrativity may be used to +oppress the proletariat, given that sexuality is distinct from art. The subject +is interpolated into a capitalist construction that includes language as a +paradox. +` + postmodernThesisNarratives = `1. Narratives of failure + +The main theme of the works of Joyce is the defining characteristic, and some +would say the economy, of neocultural class. But Bataille promotes the use of +socialist realism to deconstruct sexual identity. + +The subject is interpolated into a Baudrillardist simulation that includes +consciousness as a whole. Thus, the primary theme of Pickett's[1] model of +socialist realism is the role of the reader as artist. + +The subject is contextualised into a postcapitalist discourse that includes +language as a paradox. It could be said that if Baudrillardist simulation +holds, the works of Gibson are postmodern. The characteristic theme of the +works of Gibson is the common ground between society and narrativity. However, +Sartre uses the term 'postcapitalist discourse' to denote not, in fact, +narrative, but postnarrative. +` +) + +var ( + // Word lists: + stringsA = strings.Fields(postmodernThesisCollapse) + stringsB = strings.Fields(postmodernThesisNarratives) +) + +func BenchmarkStringSets_NewStringSet(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + NewStringSet(stringsA...) + } +} + +func BenchmarkStringSets_Copy(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Copy() + } +} + +func BenchmarkStringSets_Insert(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + s := NewStringSet() + s.Insert(stringsA...) + s.Insert(stringsB...) + } +} + +func BenchmarkStringSets_Delete(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + s := NewStringSet(stringsA...) + s.Delete(stringsB...) + } +} + +func BenchmarkStringSets_Intersect(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Intersect(t) + } +} + +func BenchmarkStringSets_Disjoint(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Disjoint(t) + } +} + +func BenchmarkStringSets_Difference(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Difference(t) + } +} + +func BenchmarkStringSets_Unique(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Unique(t) + } +} + +func BenchmarkStringSets_Equal(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Equal(t) + } +} + +func BenchmarkStringSets_Union(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Union(t) + } +} + +func BenchmarkStringSets_Contains(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, w := range stringsB { + s.Contains(w) + } + } +} + +func BenchmarkStringSets_Len(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Len() + } +} + +func BenchmarkStringSets_Empty(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Empty() + t.Empty() + } +} + +func BenchmarkStringSets_Elements(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Elements() + } +} + +func BenchmarkStringSets_Sorted(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Sorted() + } +} + +func BenchmarkStringSets_String(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.String() + } +} diff --git a/internal/sets/stringset.go b/internal/sets/stringset.go new file mode 100644 index 0000000..9f93450 --- /dev/null +++ b/internal/sets/stringset.go @@ -0,0 +1,228 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "fmt" + "sort" + "strings" +) + +// StringSet stores a set of unique string elements. +type StringSet struct { + set map[string]present +} + +// NewStringSet creates a StringSet containing the supplied initial string elements. +func NewStringSet(elements ...string) *StringSet { + s := &StringSet{} + s.set = make(map[string]present) + s.Insert(elements...) + return s +} + +// Copy returns a newly allocated copy of the supplied StringSet. +func (s *StringSet) Copy() *StringSet { + c := NewStringSet() + if s != nil { + for e := range s.set { + c.set[e] = present{} + } + } + return c +} + +// Insert zero or more string elements into the StringSet. +// As expected for a Set, elements already present in the StringSet are +// simply ignored. +func (s *StringSet) Insert(elements ...string) { + for _, e := range elements { + s.set[e] = present{} + } +} + +// Delete zero or more string elements from the StringSet. +// Any elements not present in the StringSet are simply ignored. +func (s *StringSet) Delete(elements ...string) { + for _, e := range elements { + delete(s.set, e) + } +} + +// Intersect returns a new StringSet containing the intersection of the +// receiver and argument StringSets. Returns an empty set if the argument is nil. +func (s *StringSet) Intersect(other *StringSet) *StringSet { + if other == nil { + return NewStringSet() + } + + // Point a and b to the maps, setting a to the smaller of the two. + a, b := s.set, other.set + if len(b) < len(a) { + a, b = b, a + } + + // Perform the intersection. + intersect := NewStringSet() + for e := range a { + if _, ok := b[e]; ok { + intersect.set[e] = present{} + } + } + return intersect +} + +// Disjoint returns true if the intersection of the receiver and the argument +// StringSets is the empty set. Returns true if the argument is nil or either +// StringSet is the empty set. +func (s *StringSet) Disjoint(other *StringSet) bool { + if other == nil || len(other.set) == 0 || len(s.set) == 0 { + return true + } + + // Point a and b to the maps, setting a to the smaller of the two. + a, b := s.set, other.set + if len(b) < len(a) { + a, b = b, a + } + + // Check for non-empty intersection. + for e := range a { + if _, ok := b[e]; ok { + return false // Early-exit because intersecting. + } + } + return true +} + +// Difference returns a new StringSet containing the elements in the receiver +// that are not present in the argument StringSet. Returns a copy of the +// receiver if the argument is nil. +func (s *StringSet) Difference(other *StringSet) *StringSet { + if other == nil { + return s.Copy() + } + + // Insert only the elements in the receiver that are not present in the + // argument StringSet. + diff := NewStringSet() + for e := range s.set { + if _, ok := other.set[e]; !ok { + diff.set[e] = present{} + } + } + return diff +} + +// Unique returns a new StringSet containing the elements in the receiver +// that are not present in the argument StringSet *and* the elements in the +// argument StringSet that are not in the receiver (which is the union of two +// disjoint sets). Returns a copy of the +// receiver if the argument is nil. +func (s *StringSet) Unique(other *StringSet) *StringSet { + if other == nil { + return s.Copy() + } + + sNotInOther := s.Difference(other) + otherNotInS := other.Difference(s) + + // Duplicate Union implementation here to avoid extra Copy, since both + // sNotInOther and otherNotInS are already copies. + unique := sNotInOther + for e := range otherNotInS.set { + unique.set[e] = present{} + } + return unique +} + +// Equal returns true if the receiver and the argument StringSet contain +// exactly the same elements. Returns false if the argument is nil. +func (s *StringSet) Equal(other *StringSet) bool { + if s == nil || other == nil { + return s == nil && other == nil + } + + // Two sets of different length cannot have the exact same unique elements. + if len(s.set) != len(other.set) { + return false + } + + // Only one loop is needed. If the two sets are known to be of equal + // length, then the two sets are equal only if exactly all of the elements + // in the first set are found in the second. + for e := range s.set { + if _, ok := other.set[e]; !ok { + return false + } + } + + return true +} + +// Union returns a new StringSet containing the union of the receiver and +// argument StringSets. Returns a copy of the receiver if the argument is nil. +func (s *StringSet) Union(other *StringSet) *StringSet { + union := s.Copy() + if other != nil { + for e := range other.set { + union.set[e] = present{} + } + } + return union +} + +// Contains returns true if element is in the StringSet. +func (s *StringSet) Contains(element string) bool { + _, in := s.set[element] + return in +} + +// Len returns the number of unique elements in the StringSet. +func (s *StringSet) Len() int { + return len(s.set) +} + +// Empty returns true if the receiver is the empty set. +func (s *StringSet) Empty() bool { + return len(s.set) == 0 +} + +// Elements returns a []string of the elements in the StringSet, in no +// particular (or consistent) order. +func (s *StringSet) Elements() []string { + elements := []string{} // Return at least an empty slice rather than nil. + for e := range s.set { + elements = append(elements, e) + } + return elements +} + +// Sorted returns a sorted []string of the elements in the StringSet. +func (s *StringSet) Sorted() []string { + elements := s.Elements() + sort.Strings(elements) + return elements +} + +// String formats the StringSet elements as sorted strings, representing them +// in "array initializer" syntax. +func (s *StringSet) String() string { + elements := s.Sorted() + var quoted []string + for _, e := range elements { + quoted = append(quoted, fmt.Sprintf("%q", e)) + } + return fmt.Sprintf("{%s}", strings.Join(quoted, ", ")) +} diff --git a/internal/sets/stringset_test.go b/internal/sets/stringset_test.go new file mode 100644 index 0000000..e37efaa --- /dev/null +++ b/internal/sets/stringset_test.go @@ -0,0 +1,425 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "sort" + "testing" +) + +func checkSameStringSet(t *testing.T, set *StringSet, unique []string) { + // Check that lengths are the same. + want := len(unique) + got := set.Len() + + if got != want { + t.Errorf("NewStringSet(%v) want length %v, got %v", unique, want, got) + } + + // Check that all strings are present in set. + for _, s := range unique { + want := true + got := set.Contains(s) + + if got != want { + t.Errorf("Contains(%v) want %v, got %v", s, want, got) + } + } + + // Check that all elements are present in strings. + sort.Strings(unique) + + for i, got := range set.Sorted() { + want := unique[i] + + if got != want { + t.Errorf("Sorted(%d) want %v, got %v", i, want, got) + } + } +} + +func TestNewStringSet(t *testing.T) { + empty := NewStringSet() + want := 0 + got := empty.Len() + + if got != want { + t.Errorf("NewStringSet() want length %v, got %v", want, got) + } + + unique := []string{"a", "b", "c"} + set := NewStringSet(unique...) + checkSameStringSet(t, set, unique) + + // Append an already-present element. + nonUnique := append(unique, unique[0]) + set = NewStringSet(nonUnique...) + + // Non-unique unique should collapse to one. + want = len(unique) + got = set.Len() + + if got != want { + t.Errorf("NewStringSet(%v) want length %v, got %v", nonUnique, want, got) + } +} + +func TestStringSet_Copy(t *testing.T) { + // Check both copies represent the same set. + base := []string{"a", "b", "c"} + orig := NewStringSet(base...) + cpy := orig.Copy() + checkSameStringSet(t, orig, base) + checkSameStringSet(t, cpy, base) + + // Check the two copies are independent. + more := []string{"d"} + orig.Insert(more...) + more = append(base, more...) + checkSameStringSet(t, orig, more) + checkSameStringSet(t, cpy, base) +} + +func TestStringSet_Insert(t *testing.T) { + unique := []string{"a", "b", "c"} + set := NewStringSet(unique...) + + // Insert existing element, which should basically be a no-op. + set.Insert(unique[0]) + checkSameStringSet(t, set, unique) + + // Actually insert new unique elements. + additional := []string{"d", "e"} + longer := append(unique, additional...) + set.Insert(additional...) + checkSameStringSet(t, set, longer) +} + +func TestStringSet_Delete(t *testing.T) { + unique := []string{"a", "b", "c"} + set := NewStringSet(unique...) + + // Delete non-existent element, which should basically be a no-op. + set.Delete("z") + checkSameStringSet(t, set, unique) + + // Actually delete existing elements. + set.Delete(unique[1:]...) + checkSameStringSet(t, set, unique[:1]) +} + +func TestStringSet_Intersect(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + + // Check Intersect(nil) returns an empty set. + setA := NewStringSet(input1...) + got := setA.Intersect(nil) + checkSameStringSet(t, got, []string{}) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Intersect returns the correct result. + setB := NewStringSet(input2...) + got = setA.Intersect(setB) + want := []string{"c", "e"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Reverse the inputs and verify Intersect produces the same results. + setA = NewStringSet(input2...) + setB = NewStringSet(input1...) + got = setA.Intersect(setB) + checkSameStringSet(t, got, want) + // Check the sources are again unchanged. + checkSameStringSet(t, setA, input2) + checkSameStringSet(t, setB, input1) +} + +func TestStringSet_Disjoint(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"x", "y", "z"} + + // Check that sets are always disjoint with the empty set or nil + setA := NewStringSet(input1...) + emptySet := NewStringSet() + + if disjoint := setA.Disjoint(nil); !disjoint { + t.Errorf("Disjoint(%s, %v) want %v, got %v", setA, nil, true, disjoint) + } + + if disjoint := setA.Disjoint(emptySet); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, emptySet, true, disjoint) + } + + if disjoint := emptySet.Disjoint(setA); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, setA, true, disjoint) + } + + if disjoint := emptySet.Disjoint(emptySet); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, emptySet, true, disjoint) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, emptySet, []string{}) + + // Check two non-empty, non-nil disjoint sets. + setC := NewStringSet(input3...) + + if disjoint := setA.Disjoint(setC); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setC, true, disjoint) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setC, input3) + + // Check that two intersecting sets are not Disjoint. + setB := NewStringSet(input2...) + + if disjoint := setA.Disjoint(setB); disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setB, false, disjoint) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) +} + +func TestStringSet_Difference(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"x", "y", "z"} + + // Check Difference(nil) returns a copy of the receiver. + setA := NewStringSet(input1...) + got := setA.Difference(nil) + checkSameStringSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check A - A returns the empty set. + got = setA.Difference(setA) + + if !got.Empty() { + t.Errorf("Difference(%s, %s).Empty() want %v, got %v", + setA, setA, true, false) + } + + checkSameStringSet(t, got, []string{}) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check A - C simply returns elements in A if A and C are disjoint. + setC := NewStringSet(input3...) + got = setA.Difference(setC) + checkSameStringSet(t, got, input1) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setC, input3) + + // Check A - B returns elements in A not in B. + setB := NewStringSet(input2...) + got = setA.Difference(setB) + want := []string{"a", "d", "f"} + checkSameStringSet(t, got, want) + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Check B - A returns elements in B not in A. + got = setB.Difference(setA) + want = []string{"b"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) +} + +func TestStringSet_Unique(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"x", "y", "z"} + + // Check Unique(nil) returns a copy of the receiver. + setA := NewStringSet(input1...) + got := setA.Unique(nil) + checkSameStringSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Unique returns only elements in A and B not in both A and B. + setB := NewStringSet(input2...) + got = setA.Unique(setB) + want := []string{"a", "b", "d", "f"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Check Unique of two disjoint sets is the Union of those sets. + setC := NewStringSet(input3...) + got = setA.Unique(setC) + union := setA.Union(setC) + + if equal := union.Equal(got); !equal { + t.Errorf("Union of disjoint Equal(%s, %s) want %v, got %v", + union, got, true, equal) + } + + // Check Unique is the Union of A - B and B - A. + aNotInB := setA.Difference(setB) + bNotInA := setB.Difference(setA) + union = aNotInB.Union(bNotInA) + want = []string{"a", "b", "d", "f"} + checkSameStringSet(t, union, want) + got = setA.Unique(setB) + + if equal := union.Equal(got); !equal { + t.Errorf("Union of differences Equal(%s, %s) want %v, got %v", + union, got, true, equal) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) +} + +func TestStringSet_Equal(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"a", "c", "d", "e", "g"} + + // Check Equal(nil) returns false. + setA := NewStringSet(input1...) + + if equal := setA.Equal(nil); equal { + t.Errorf("Equal(%s, %v) want %v, got %v", setA, nil, false, true) + } + + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Equal returns true for a set and itself. + if equal := setA.Equal(setA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) + } + + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Equal returns false for sets of non-equal length. + setB := NewStringSet(input2...) + + if equal := setA.Equal(setB); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setB, false, true) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Check Equal returns false for equal-length sets with different elements. + setC := NewStringSet(input3...) + + if equal := setA.Equal(setC); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setC, false, true) + } + + if equal := setC.Equal(setA); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setC, setA, false, true) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setC, input3) + + // Check Equal returns true for a set with itself. + if equal := setA.Equal(setA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) + } + + // Also check the source is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Equal returns true for two separate equal sets. + anotherA := NewStringSet(input1...) + + if equal := setA.Equal(anotherA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, anotherA, true, false) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, anotherA, input1) + + // Check for equality comparing to nil struct. + var nilSet *StringSet + if equal := nilSet.Equal(setA); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, setA, false, true) + } + if equal := setA.Equal(nilSet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, nilSet, false, true) + } + if equal := nilSet.Equal(nilSet); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, nilSet, true, false) + } + + // Edge case: consider the empty set to be different than the nil set. + emptySet := NewStringSet() + if equal := nilSet.Equal(emptySet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, emptySet, false, true) + } + if equal := emptySet.Equal(nilSet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, nilSet, false, true) + } + if equal := emptySet.Equal(emptySet); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, emptySet, true, false) + } +} + +func TestStringSet_Union(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + + // Check Union(nil) returns a copy of the receiver. + setA := NewStringSet(input1...) + got := setA.Union(nil) + checkSameStringSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Union returns the correct result. + setB := NewStringSet(input2...) + got = setA.Union(setB) + want := []string{"a", "b", "c", "d", "e", "f"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Reverse the inputs and verify Union produces the same results. + setA = NewStringSet(input2...) + setB = NewStringSet(input1...) + got = setA.Union(setB) + checkSameStringSet(t, got, want) + // Check the sources are again unchanged. + checkSameStringSet(t, setA, input2) + checkSameStringSet(t, setB, input1) +} -- cgit v1.2.3 From 40befb6fd604194bd69f12dce0556f736f63925d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 10 Apr 2017 11:02:33 -0700 Subject: Restructure directory to add stringclassifier pkg. --- internal/sets/sets.go | 20 -- internal/sets/sets_benchmark_test.go | 213 ------------------ internal/sets/stringset.go | 228 ------------------- internal/sets/stringset_test.go | 425 ----------------------------------- 4 files changed, 886 deletions(-) delete mode 100644 internal/sets/sets.go delete mode 100644 internal/sets/sets_benchmark_test.go delete mode 100644 internal/sets/stringset.go delete mode 100644 internal/sets/stringset_test.go (limited to 'internal/sets') diff --git a/internal/sets/sets.go b/internal/sets/sets.go deleted file mode 100644 index f34ae5b..0000000 --- a/internal/sets/sets.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2017 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package sets provides sets for storing collections of unique elements. -package sets - -// present is an empty struct used as the "value" in the map[int], since -// empty structs consume zero bytes (unlike 1 unnecessary byte per bool). -type present struct{} diff --git a/internal/sets/sets_benchmark_test.go b/internal/sets/sets_benchmark_test.go deleted file mode 100644 index 017dca6..0000000 --- a/internal/sets/sets_benchmark_test.go +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright 2017 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package sets - -import ( - "strings" - "testing" -) - -const ( - postmodernThesisCollapse = `1. Expressions of collapse - -If one examines postcultural Marxism, one is faced with a choice: either -reject capitalist submodern theory or conclude that the purpose of the reader -is significant form. Bataille uses the term ‘capitalist construction’ to denote -not, in fact, discourse, but prediscourse. - -Therefore, in Stardust, Gaiman analyses postcultural Marxism; in -The Books of Magic, although, he denies capitalist submodern theory. If -capitalist construction holds, we have to choose between capitalist submodern -theory and Baudrillardist simulacra. - -However, conceptualist socialism implies that narrativity may be used to -oppress the proletariat, given that sexuality is distinct from art. The subject -is interpolated into a capitalist construction that includes language as a -paradox. -` - postmodernThesisNarratives = `1. Narratives of failure - -The main theme of the works of Joyce is the defining characteristic, and some -would say the economy, of neocultural class. But Bataille promotes the use of -socialist realism to deconstruct sexual identity. - -The subject is interpolated into a Baudrillardist simulation that includes -consciousness as a whole. Thus, the primary theme of Pickett's[1] model of -socialist realism is the role of the reader as artist. - -The subject is contextualised into a postcapitalist discourse that includes -language as a paradox. It could be said that if Baudrillardist simulation -holds, the works of Gibson are postmodern. The characteristic theme of the -works of Gibson is the common ground between society and narrativity. However, -Sartre uses the term 'postcapitalist discourse' to denote not, in fact, -narrative, but postnarrative. -` -) - -var ( - // Word lists: - stringsA = strings.Fields(postmodernThesisCollapse) - stringsB = strings.Fields(postmodernThesisNarratives) -) - -func BenchmarkStringSets_NewStringSet(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - NewStringSet(stringsA...) - } -} - -func BenchmarkStringSets_Copy(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Copy() - } -} - -func BenchmarkStringSets_Insert(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - s := NewStringSet() - s.Insert(stringsA...) - s.Insert(stringsB...) - } -} - -func BenchmarkStringSets_Delete(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - s := NewStringSet(stringsA...) - s.Delete(stringsB...) - } -} - -func BenchmarkStringSets_Intersect(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Intersect(t) - } -} - -func BenchmarkStringSets_Disjoint(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Disjoint(t) - } -} - -func BenchmarkStringSets_Difference(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Difference(t) - } -} - -func BenchmarkStringSets_Unique(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Unique(t) - } -} - -func BenchmarkStringSets_Equal(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Equal(t) - } -} - -func BenchmarkStringSets_Union(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Union(t) - } -} - -func BenchmarkStringSets_Contains(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - for _, w := range stringsB { - s.Contains(w) - } - } -} - -func BenchmarkStringSets_Len(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Len() - } -} - -func BenchmarkStringSets_Empty(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Empty() - t.Empty() - } -} - -func BenchmarkStringSets_Elements(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Elements() - } -} - -func BenchmarkStringSets_Sorted(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Sorted() - } -} - -func BenchmarkStringSets_String(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.String() - } -} diff --git a/internal/sets/stringset.go b/internal/sets/stringset.go deleted file mode 100644 index 9f93450..0000000 --- a/internal/sets/stringset.go +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2017 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package sets - -import ( - "fmt" - "sort" - "strings" -) - -// StringSet stores a set of unique string elements. -type StringSet struct { - set map[string]present -} - -// NewStringSet creates a StringSet containing the supplied initial string elements. -func NewStringSet(elements ...string) *StringSet { - s := &StringSet{} - s.set = make(map[string]present) - s.Insert(elements...) - return s -} - -// Copy returns a newly allocated copy of the supplied StringSet. -func (s *StringSet) Copy() *StringSet { - c := NewStringSet() - if s != nil { - for e := range s.set { - c.set[e] = present{} - } - } - return c -} - -// Insert zero or more string elements into the StringSet. -// As expected for a Set, elements already present in the StringSet are -// simply ignored. -func (s *StringSet) Insert(elements ...string) { - for _, e := range elements { - s.set[e] = present{} - } -} - -// Delete zero or more string elements from the StringSet. -// Any elements not present in the StringSet are simply ignored. -func (s *StringSet) Delete(elements ...string) { - for _, e := range elements { - delete(s.set, e) - } -} - -// Intersect returns a new StringSet containing the intersection of the -// receiver and argument StringSets. Returns an empty set if the argument is nil. -func (s *StringSet) Intersect(other *StringSet) *StringSet { - if other == nil { - return NewStringSet() - } - - // Point a and b to the maps, setting a to the smaller of the two. - a, b := s.set, other.set - if len(b) < len(a) { - a, b = b, a - } - - // Perform the intersection. - intersect := NewStringSet() - for e := range a { - if _, ok := b[e]; ok { - intersect.set[e] = present{} - } - } - return intersect -} - -// Disjoint returns true if the intersection of the receiver and the argument -// StringSets is the empty set. Returns true if the argument is nil or either -// StringSet is the empty set. -func (s *StringSet) Disjoint(other *StringSet) bool { - if other == nil || len(other.set) == 0 || len(s.set) == 0 { - return true - } - - // Point a and b to the maps, setting a to the smaller of the two. - a, b := s.set, other.set - if len(b) < len(a) { - a, b = b, a - } - - // Check for non-empty intersection. - for e := range a { - if _, ok := b[e]; ok { - return false // Early-exit because intersecting. - } - } - return true -} - -// Difference returns a new StringSet containing the elements in the receiver -// that are not present in the argument StringSet. Returns a copy of the -// receiver if the argument is nil. -func (s *StringSet) Difference(other *StringSet) *StringSet { - if other == nil { - return s.Copy() - } - - // Insert only the elements in the receiver that are not present in the - // argument StringSet. - diff := NewStringSet() - for e := range s.set { - if _, ok := other.set[e]; !ok { - diff.set[e] = present{} - } - } - return diff -} - -// Unique returns a new StringSet containing the elements in the receiver -// that are not present in the argument StringSet *and* the elements in the -// argument StringSet that are not in the receiver (which is the union of two -// disjoint sets). Returns a copy of the -// receiver if the argument is nil. -func (s *StringSet) Unique(other *StringSet) *StringSet { - if other == nil { - return s.Copy() - } - - sNotInOther := s.Difference(other) - otherNotInS := other.Difference(s) - - // Duplicate Union implementation here to avoid extra Copy, since both - // sNotInOther and otherNotInS are already copies. - unique := sNotInOther - for e := range otherNotInS.set { - unique.set[e] = present{} - } - return unique -} - -// Equal returns true if the receiver and the argument StringSet contain -// exactly the same elements. Returns false if the argument is nil. -func (s *StringSet) Equal(other *StringSet) bool { - if s == nil || other == nil { - return s == nil && other == nil - } - - // Two sets of different length cannot have the exact same unique elements. - if len(s.set) != len(other.set) { - return false - } - - // Only one loop is needed. If the two sets are known to be of equal - // length, then the two sets are equal only if exactly all of the elements - // in the first set are found in the second. - for e := range s.set { - if _, ok := other.set[e]; !ok { - return false - } - } - - return true -} - -// Union returns a new StringSet containing the union of the receiver and -// argument StringSets. Returns a copy of the receiver if the argument is nil. -func (s *StringSet) Union(other *StringSet) *StringSet { - union := s.Copy() - if other != nil { - for e := range other.set { - union.set[e] = present{} - } - } - return union -} - -// Contains returns true if element is in the StringSet. -func (s *StringSet) Contains(element string) bool { - _, in := s.set[element] - return in -} - -// Len returns the number of unique elements in the StringSet. -func (s *StringSet) Len() int { - return len(s.set) -} - -// Empty returns true if the receiver is the empty set. -func (s *StringSet) Empty() bool { - return len(s.set) == 0 -} - -// Elements returns a []string of the elements in the StringSet, in no -// particular (or consistent) order. -func (s *StringSet) Elements() []string { - elements := []string{} // Return at least an empty slice rather than nil. - for e := range s.set { - elements = append(elements, e) - } - return elements -} - -// Sorted returns a sorted []string of the elements in the StringSet. -func (s *StringSet) Sorted() []string { - elements := s.Elements() - sort.Strings(elements) - return elements -} - -// String formats the StringSet elements as sorted strings, representing them -// in "array initializer" syntax. -func (s *StringSet) String() string { - elements := s.Sorted() - var quoted []string - for _, e := range elements { - quoted = append(quoted, fmt.Sprintf("%q", e)) - } - return fmt.Sprintf("{%s}", strings.Join(quoted, ", ")) -} diff --git a/internal/sets/stringset_test.go b/internal/sets/stringset_test.go deleted file mode 100644 index e37efaa..0000000 --- a/internal/sets/stringset_test.go +++ /dev/null @@ -1,425 +0,0 @@ -// Copyright 2017 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package sets - -import ( - "sort" - "testing" -) - -func checkSameStringSet(t *testing.T, set *StringSet, unique []string) { - // Check that lengths are the same. - want := len(unique) - got := set.Len() - - if got != want { - t.Errorf("NewStringSet(%v) want length %v, got %v", unique, want, got) - } - - // Check that all strings are present in set. - for _, s := range unique { - want := true - got := set.Contains(s) - - if got != want { - t.Errorf("Contains(%v) want %v, got %v", s, want, got) - } - } - - // Check that all elements are present in strings. - sort.Strings(unique) - - for i, got := range set.Sorted() { - want := unique[i] - - if got != want { - t.Errorf("Sorted(%d) want %v, got %v", i, want, got) - } - } -} - -func TestNewStringSet(t *testing.T) { - empty := NewStringSet() - want := 0 - got := empty.Len() - - if got != want { - t.Errorf("NewStringSet() want length %v, got %v", want, got) - } - - unique := []string{"a", "b", "c"} - set := NewStringSet(unique...) - checkSameStringSet(t, set, unique) - - // Append an already-present element. - nonUnique := append(unique, unique[0]) - set = NewStringSet(nonUnique...) - - // Non-unique unique should collapse to one. - want = len(unique) - got = set.Len() - - if got != want { - t.Errorf("NewStringSet(%v) want length %v, got %v", nonUnique, want, got) - } -} - -func TestStringSet_Copy(t *testing.T) { - // Check both copies represent the same set. - base := []string{"a", "b", "c"} - orig := NewStringSet(base...) - cpy := orig.Copy() - checkSameStringSet(t, orig, base) - checkSameStringSet(t, cpy, base) - - // Check the two copies are independent. - more := []string{"d"} - orig.Insert(more...) - more = append(base, more...) - checkSameStringSet(t, orig, more) - checkSameStringSet(t, cpy, base) -} - -func TestStringSet_Insert(t *testing.T) { - unique := []string{"a", "b", "c"} - set := NewStringSet(unique...) - - // Insert existing element, which should basically be a no-op. - set.Insert(unique[0]) - checkSameStringSet(t, set, unique) - - // Actually insert new unique elements. - additional := []string{"d", "e"} - longer := append(unique, additional...) - set.Insert(additional...) - checkSameStringSet(t, set, longer) -} - -func TestStringSet_Delete(t *testing.T) { - unique := []string{"a", "b", "c"} - set := NewStringSet(unique...) - - // Delete non-existent element, which should basically be a no-op. - set.Delete("z") - checkSameStringSet(t, set, unique) - - // Actually delete existing elements. - set.Delete(unique[1:]...) - checkSameStringSet(t, set, unique[:1]) -} - -func TestStringSet_Intersect(t *testing.T) { - input1 := []string{"a", "c", "d", "e", "f"} - input2 := []string{"b", "c", "e"} - - // Check Intersect(nil) returns an empty set. - setA := NewStringSet(input1...) - got := setA.Intersect(nil) - checkSameStringSet(t, got, []string{}) - // Check that the receiver is unchanged. - checkSameStringSet(t, setA, input1) - - // Check Intersect returns the correct result. - setB := NewStringSet(input2...) - got = setA.Intersect(setB) - want := []string{"c", "e"} - checkSameStringSet(t, got, want) - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setB, input2) - - // Reverse the inputs and verify Intersect produces the same results. - setA = NewStringSet(input2...) - setB = NewStringSet(input1...) - got = setA.Intersect(setB) - checkSameStringSet(t, got, want) - // Check the sources are again unchanged. - checkSameStringSet(t, setA, input2) - checkSameStringSet(t, setB, input1) -} - -func TestStringSet_Disjoint(t *testing.T) { - input1 := []string{"a", "c", "d", "e", "f"} - input2 := []string{"b", "c", "e"} - input3 := []string{"x", "y", "z"} - - // Check that sets are always disjoint with the empty set or nil - setA := NewStringSet(input1...) - emptySet := NewStringSet() - - if disjoint := setA.Disjoint(nil); !disjoint { - t.Errorf("Disjoint(%s, %v) want %v, got %v", setA, nil, true, disjoint) - } - - if disjoint := setA.Disjoint(emptySet); !disjoint { - t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, emptySet, true, disjoint) - } - - if disjoint := emptySet.Disjoint(setA); !disjoint { - t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, setA, true, disjoint) - } - - if disjoint := emptySet.Disjoint(emptySet); !disjoint { - t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, emptySet, true, disjoint) - } - - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, emptySet, []string{}) - - // Check two non-empty, non-nil disjoint sets. - setC := NewStringSet(input3...) - - if disjoint := setA.Disjoint(setC); !disjoint { - t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setC, true, disjoint) - } - - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setC, input3) - - // Check that two intersecting sets are not Disjoint. - setB := NewStringSet(input2...) - - if disjoint := setA.Disjoint(setB); disjoint { - t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setB, false, disjoint) - } - - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setB, input2) -} - -func TestStringSet_Difference(t *testing.T) { - input1 := []string{"a", "c", "d", "e", "f"} - input2 := []string{"b", "c", "e"} - input3 := []string{"x", "y", "z"} - - // Check Difference(nil) returns a copy of the receiver. - setA := NewStringSet(input1...) - got := setA.Difference(nil) - checkSameStringSet(t, got, input1) - // Check that the receiver is unchanged. - checkSameStringSet(t, setA, input1) - - // Check A - A returns the empty set. - got = setA.Difference(setA) - - if !got.Empty() { - t.Errorf("Difference(%s, %s).Empty() want %v, got %v", - setA, setA, true, false) - } - - checkSameStringSet(t, got, []string{}) - // Check that the receiver is unchanged. - checkSameStringSet(t, setA, input1) - - // Check A - C simply returns elements in A if A and C are disjoint. - setC := NewStringSet(input3...) - got = setA.Difference(setC) - checkSameStringSet(t, got, input1) - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setC, input3) - - // Check A - B returns elements in A not in B. - setB := NewStringSet(input2...) - got = setA.Difference(setB) - want := []string{"a", "d", "f"} - checkSameStringSet(t, got, want) - - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setB, input2) - - // Check B - A returns elements in B not in A. - got = setB.Difference(setA) - want = []string{"b"} - checkSameStringSet(t, got, want) - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setB, input2) -} - -func TestStringSet_Unique(t *testing.T) { - input1 := []string{"a", "c", "d", "e", "f"} - input2 := []string{"b", "c", "e"} - input3 := []string{"x", "y", "z"} - - // Check Unique(nil) returns a copy of the receiver. - setA := NewStringSet(input1...) - got := setA.Unique(nil) - checkSameStringSet(t, got, input1) - // Check that the receiver is unchanged. - checkSameStringSet(t, setA, input1) - - // Check Unique returns only elements in A and B not in both A and B. - setB := NewStringSet(input2...) - got = setA.Unique(setB) - want := []string{"a", "b", "d", "f"} - checkSameStringSet(t, got, want) - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setB, input2) - - // Check Unique of two disjoint sets is the Union of those sets. - setC := NewStringSet(input3...) - got = setA.Unique(setC) - union := setA.Union(setC) - - if equal := union.Equal(got); !equal { - t.Errorf("Union of disjoint Equal(%s, %s) want %v, got %v", - union, got, true, equal) - } - - // Check Unique is the Union of A - B and B - A. - aNotInB := setA.Difference(setB) - bNotInA := setB.Difference(setA) - union = aNotInB.Union(bNotInA) - want = []string{"a", "b", "d", "f"} - checkSameStringSet(t, union, want) - got = setA.Unique(setB) - - if equal := union.Equal(got); !equal { - t.Errorf("Union of differences Equal(%s, %s) want %v, got %v", - union, got, true, equal) - } - - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setB, input2) -} - -func TestStringSet_Equal(t *testing.T) { - input1 := []string{"a", "c", "d", "e", "f"} - input2 := []string{"b", "c", "e"} - input3 := []string{"a", "c", "d", "e", "g"} - - // Check Equal(nil) returns false. - setA := NewStringSet(input1...) - - if equal := setA.Equal(nil); equal { - t.Errorf("Equal(%s, %v) want %v, got %v", setA, nil, false, true) - } - - // Check that the receiver is unchanged. - checkSameStringSet(t, setA, input1) - - // Check Equal returns true for a set and itself. - if equal := setA.Equal(setA); !equal { - t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) - } - - // Check that the receiver is unchanged. - checkSameStringSet(t, setA, input1) - - // Check Equal returns false for sets of non-equal length. - setB := NewStringSet(input2...) - - if equal := setA.Equal(setB); equal { - t.Errorf("Equal(%s, %s) want %v, got %v", setA, setB, false, true) - } - - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setB, input2) - - // Check Equal returns false for equal-length sets with different elements. - setC := NewStringSet(input3...) - - if equal := setA.Equal(setC); equal { - t.Errorf("Equal(%s, %s) want %v, got %v", setA, setC, false, true) - } - - if equal := setC.Equal(setA); equal { - t.Errorf("Equal(%s, %s) want %v, got %v", setC, setA, false, true) - } - - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setC, input3) - - // Check Equal returns true for a set with itself. - if equal := setA.Equal(setA); !equal { - t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) - } - - // Also check the source is unchanged. - checkSameStringSet(t, setA, input1) - - // Check Equal returns true for two separate equal sets. - anotherA := NewStringSet(input1...) - - if equal := setA.Equal(anotherA); !equal { - t.Errorf("Equal(%s, %s) want %v, got %v", setA, anotherA, true, false) - } - - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, anotherA, input1) - - // Check for equality comparing to nil struct. - var nilSet *StringSet - if equal := nilSet.Equal(setA); equal { - t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, setA, false, true) - } - if equal := setA.Equal(nilSet); equal { - t.Errorf("Equal(%s, %s) want %v, got %v", setA, nilSet, false, true) - } - if equal := nilSet.Equal(nilSet); !equal { - t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, nilSet, true, false) - } - - // Edge case: consider the empty set to be different than the nil set. - emptySet := NewStringSet() - if equal := nilSet.Equal(emptySet); equal { - t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, emptySet, false, true) - } - if equal := emptySet.Equal(nilSet); equal { - t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, nilSet, false, true) - } - if equal := emptySet.Equal(emptySet); !equal { - t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, emptySet, true, false) - } -} - -func TestStringSet_Union(t *testing.T) { - input1 := []string{"a", "c", "d", "e", "f"} - input2 := []string{"b", "c", "e"} - - // Check Union(nil) returns a copy of the receiver. - setA := NewStringSet(input1...) - got := setA.Union(nil) - checkSameStringSet(t, got, input1) - // Check that the receiver is unchanged. - checkSameStringSet(t, setA, input1) - - // Check Union returns the correct result. - setB := NewStringSet(input2...) - got = setA.Union(setB) - want := []string{"a", "b", "c", "d", "e", "f"} - checkSameStringSet(t, got, want) - // Also check the sources are unchanged. - checkSameStringSet(t, setA, input1) - checkSameStringSet(t, setB, input2) - - // Reverse the inputs and verify Union produces the same results. - setA = NewStringSet(input2...) - setB = NewStringSet(input1...) - got = setA.Union(setB) - checkSameStringSet(t, got, want) - // Check the sources are again unchanged. - checkSameStringSet(t, setA, input2) - checkSameStringSet(t, setB, input1) -} -- cgit v1.2.3 From 1636da7b553f808512f80a2a89d64100488a32a7 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 19 Sep 2017 15:01:07 -0700 Subject: Move files around to make library shallower. --- internal/sets/sets.go | 20 ++ internal/sets/sets_benchmark_test.go | 213 ++++++++++++++++++ internal/sets/stringset.go | 228 +++++++++++++++++++ internal/sets/stringset_test.go | 425 +++++++++++++++++++++++++++++++++++ 4 files changed, 886 insertions(+) create mode 100644 internal/sets/sets.go create mode 100644 internal/sets/sets_benchmark_test.go create mode 100644 internal/sets/stringset.go create mode 100644 internal/sets/stringset_test.go (limited to 'internal/sets') diff --git a/internal/sets/sets.go b/internal/sets/sets.go new file mode 100644 index 0000000..f34ae5b --- /dev/null +++ b/internal/sets/sets.go @@ -0,0 +1,20 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package sets provides sets for storing collections of unique elements. +package sets + +// present is an empty struct used as the "value" in the map[int], since +// empty structs consume zero bytes (unlike 1 unnecessary byte per bool). +type present struct{} diff --git a/internal/sets/sets_benchmark_test.go b/internal/sets/sets_benchmark_test.go new file mode 100644 index 0000000..017dca6 --- /dev/null +++ b/internal/sets/sets_benchmark_test.go @@ -0,0 +1,213 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "strings" + "testing" +) + +const ( + postmodernThesisCollapse = `1. Expressions of collapse + +If one examines postcultural Marxism, one is faced with a choice: either +reject capitalist submodern theory or conclude that the purpose of the reader +is significant form. Bataille uses the term ‘capitalist construction’ to denote +not, in fact, discourse, but prediscourse. + +Therefore, in Stardust, Gaiman analyses postcultural Marxism; in +The Books of Magic, although, he denies capitalist submodern theory. If +capitalist construction holds, we have to choose between capitalist submodern +theory and Baudrillardist simulacra. + +However, conceptualist socialism implies that narrativity may be used to +oppress the proletariat, given that sexuality is distinct from art. The subject +is interpolated into a capitalist construction that includes language as a +paradox. +` + postmodernThesisNarratives = `1. Narratives of failure + +The main theme of the works of Joyce is the defining characteristic, and some +would say the economy, of neocultural class. But Bataille promotes the use of +socialist realism to deconstruct sexual identity. + +The subject is interpolated into a Baudrillardist simulation that includes +consciousness as a whole. Thus, the primary theme of Pickett's[1] model of +socialist realism is the role of the reader as artist. + +The subject is contextualised into a postcapitalist discourse that includes +language as a paradox. It could be said that if Baudrillardist simulation +holds, the works of Gibson are postmodern. The characteristic theme of the +works of Gibson is the common ground between society and narrativity. However, +Sartre uses the term 'postcapitalist discourse' to denote not, in fact, +narrative, but postnarrative. +` +) + +var ( + // Word lists: + stringsA = strings.Fields(postmodernThesisCollapse) + stringsB = strings.Fields(postmodernThesisNarratives) +) + +func BenchmarkStringSets_NewStringSet(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + NewStringSet(stringsA...) + } +} + +func BenchmarkStringSets_Copy(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Copy() + } +} + +func BenchmarkStringSets_Insert(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + s := NewStringSet() + s.Insert(stringsA...) + s.Insert(stringsB...) + } +} + +func BenchmarkStringSets_Delete(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + s := NewStringSet(stringsA...) + s.Delete(stringsB...) + } +} + +func BenchmarkStringSets_Intersect(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Intersect(t) + } +} + +func BenchmarkStringSets_Disjoint(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Disjoint(t) + } +} + +func BenchmarkStringSets_Difference(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Difference(t) + } +} + +func BenchmarkStringSets_Unique(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Unique(t) + } +} + +func BenchmarkStringSets_Equal(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Equal(t) + } +} + +func BenchmarkStringSets_Union(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Union(t) + } +} + +func BenchmarkStringSets_Contains(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, w := range stringsB { + s.Contains(w) + } + } +} + +func BenchmarkStringSets_Len(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Len() + } +} + +func BenchmarkStringSets_Empty(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Empty() + t.Empty() + } +} + +func BenchmarkStringSets_Elements(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Elements() + } +} + +func BenchmarkStringSets_Sorted(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Sorted() + } +} + +func BenchmarkStringSets_String(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.String() + } +} diff --git a/internal/sets/stringset.go b/internal/sets/stringset.go new file mode 100644 index 0000000..54edbd4 --- /dev/null +++ b/internal/sets/stringset.go @@ -0,0 +1,228 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "fmt" + "sort" + "strings" +) + +// StringSet stores a set of unique string elements. +type StringSet struct { + set map[string]present +} + +// NewStringSet creates a StringSet containing the supplied initial string elements. +func NewStringSet(elements ...string) *StringSet { + s := &StringSet{} + s.set = make(map[string]present) + s.Insert(elements...) + return s +} + +// Copy returns a newly allocated copy of the supplied StringSet. +func (s *StringSet) Copy() *StringSet { + c := NewStringSet() + if s != nil { + for e := range s.set { + c.set[e] = present{} + } + } + return c +} + +// Insert zero or more string elements into the StringSet. +// As expected for a Set, elements already present in the StringSet are +// simply ignored. +func (s *StringSet) Insert(elements ...string) { + for _, e := range elements { + s.set[e] = present{} + } +} + +// Delete zero or more string elements from the StringSet. +// Any elements not present in the StringSet are simply ignored. +func (s *StringSet) Delete(elements ...string) { + for _, e := range elements { + delete(s.set, e) + } +} + +// Intersect returns a new StringSet containing the intersection of the +// receiver and argument StringSets. Returns an empty set if the argument is nil. +func (s *StringSet) Intersect(other *StringSet) *StringSet { + if other == nil { + return NewStringSet() + } + + // Point a and b to the maps, setting a to the smaller of the two. + a, b := s.set, other.set + if len(b) < len(a) { + a, b = b, a + } + + // Perform the intersection. + intersect := NewStringSet() + for e := range a { + if _, ok := b[e]; ok { + intersect.set[e] = present{} + } + } + return intersect +} + +// Disjoint returns true if the intersection of the receiver and the argument +// StringSets is the empty set. Returns true if the argument is nil or either +// StringSet is the empty set. +func (s *StringSet) Disjoint(other *StringSet) bool { + if other == nil || len(other.set) == 0 || len(s.set) == 0 { + return true + } + + // Point a and b to the maps, setting a to the smaller of the two. + a, b := s.set, other.set + if len(b) < len(a) { + a, b = b, a + } + + // Check for non-empty intersection. + for e := range a { + if _, ok := b[e]; ok { + return false // Early-exit because intersecting. + } + } + return true +} + +// Difference returns a new StringSet containing the elements in the receiver +// that are not present in the argument StringSet. Returns a copy of the +// receiver if the argument is nil. +func (s *StringSet) Difference(other *StringSet) *StringSet { + if other == nil { + return s.Copy() + } + + // Insert only the elements in the receiver that are not present in the + // argument StringSet. + diff := NewStringSet() + for e := range s.set { + if _, ok := other.set[e]; !ok { + diff.set[e] = present{} + } + } + return diff +} + +// Unique returns a new StringSet containing the elements in the receiver +// that are not present in the argument StringSet *and* the elements in the +// argument StringSet that are not in the receiver (which is the union of two +// disjoint sets). Returns a copy of the +// receiver if the argument is nil. +func (s *StringSet) Unique(other *StringSet) *StringSet { + if other == nil { + return s.Copy() + } + + sNotInOther := s.Difference(other) + otherNotInS := other.Difference(s) + + // Duplicate Union implementation here to avoid extra Copy, since both + // sNotInOther and otherNotInS are already copies. + unique := sNotInOther + for e := range otherNotInS.set { + unique.set[e] = present{} + } + return unique +} + +// Equal returns true if the receiver and the argument StringSet contain +// exactly the same elements. +func (s *StringSet) Equal(other *StringSet) bool { + if s == nil || other == nil { + return s == nil && other == nil + } + + // Two sets of different length cannot have the exact same unique elements. + if len(s.set) != len(other.set) { + return false + } + + // Only one loop is needed. If the two sets are known to be of equal + // length, then the two sets are equal only if exactly all of the elements + // in the first set are found in the second. + for e := range s.set { + if _, ok := other.set[e]; !ok { + return false + } + } + + return true +} + +// Union returns a new StringSet containing the union of the receiver and +// argument StringSets. Returns a copy of the receiver if the argument is nil. +func (s *StringSet) Union(other *StringSet) *StringSet { + union := s.Copy() + if other != nil { + for e := range other.set { + union.set[e] = present{} + } + } + return union +} + +// Contains returns true if element is in the StringSet. +func (s *StringSet) Contains(element string) bool { + _, in := s.set[element] + return in +} + +// Len returns the number of unique elements in the StringSet. +func (s *StringSet) Len() int { + return len(s.set) +} + +// Empty returns true if the receiver is the empty set. +func (s *StringSet) Empty() bool { + return len(s.set) == 0 +} + +// Elements returns a []string of the elements in the StringSet, in no +// particular (or consistent) order. +func (s *StringSet) Elements() []string { + elements := []string{} // Return at least an empty slice rather than nil. + for e := range s.set { + elements = append(elements, e) + } + return elements +} + +// Sorted returns a sorted []string of the elements in the StringSet. +func (s *StringSet) Sorted() []string { + elements := s.Elements() + sort.Strings(elements) + return elements +} + +// String formats the StringSet elements as sorted strings, representing them +// in "array initializer" syntax. +func (s *StringSet) String() string { + elements := s.Sorted() + var quoted []string + for _, e := range elements { + quoted = append(quoted, fmt.Sprintf("%q", e)) + } + return fmt.Sprintf("{%s}", strings.Join(quoted, ", ")) +} diff --git a/internal/sets/stringset_test.go b/internal/sets/stringset_test.go new file mode 100644 index 0000000..e37efaa --- /dev/null +++ b/internal/sets/stringset_test.go @@ -0,0 +1,425 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "sort" + "testing" +) + +func checkSameStringSet(t *testing.T, set *StringSet, unique []string) { + // Check that lengths are the same. + want := len(unique) + got := set.Len() + + if got != want { + t.Errorf("NewStringSet(%v) want length %v, got %v", unique, want, got) + } + + // Check that all strings are present in set. + for _, s := range unique { + want := true + got := set.Contains(s) + + if got != want { + t.Errorf("Contains(%v) want %v, got %v", s, want, got) + } + } + + // Check that all elements are present in strings. + sort.Strings(unique) + + for i, got := range set.Sorted() { + want := unique[i] + + if got != want { + t.Errorf("Sorted(%d) want %v, got %v", i, want, got) + } + } +} + +func TestNewStringSet(t *testing.T) { + empty := NewStringSet() + want := 0 + got := empty.Len() + + if got != want { + t.Errorf("NewStringSet() want length %v, got %v", want, got) + } + + unique := []string{"a", "b", "c"} + set := NewStringSet(unique...) + checkSameStringSet(t, set, unique) + + // Append an already-present element. + nonUnique := append(unique, unique[0]) + set = NewStringSet(nonUnique...) + + // Non-unique unique should collapse to one. + want = len(unique) + got = set.Len() + + if got != want { + t.Errorf("NewStringSet(%v) want length %v, got %v", nonUnique, want, got) + } +} + +func TestStringSet_Copy(t *testing.T) { + // Check both copies represent the same set. + base := []string{"a", "b", "c"} + orig := NewStringSet(base...) + cpy := orig.Copy() + checkSameStringSet(t, orig, base) + checkSameStringSet(t, cpy, base) + + // Check the two copies are independent. + more := []string{"d"} + orig.Insert(more...) + more = append(base, more...) + checkSameStringSet(t, orig, more) + checkSameStringSet(t, cpy, base) +} + +func TestStringSet_Insert(t *testing.T) { + unique := []string{"a", "b", "c"} + set := NewStringSet(unique...) + + // Insert existing element, which should basically be a no-op. + set.Insert(unique[0]) + checkSameStringSet(t, set, unique) + + // Actually insert new unique elements. + additional := []string{"d", "e"} + longer := append(unique, additional...) + set.Insert(additional...) + checkSameStringSet(t, set, longer) +} + +func TestStringSet_Delete(t *testing.T) { + unique := []string{"a", "b", "c"} + set := NewStringSet(unique...) + + // Delete non-existent element, which should basically be a no-op. + set.Delete("z") + checkSameStringSet(t, set, unique) + + // Actually delete existing elements. + set.Delete(unique[1:]...) + checkSameStringSet(t, set, unique[:1]) +} + +func TestStringSet_Intersect(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + + // Check Intersect(nil) returns an empty set. + setA := NewStringSet(input1...) + got := setA.Intersect(nil) + checkSameStringSet(t, got, []string{}) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Intersect returns the correct result. + setB := NewStringSet(input2...) + got = setA.Intersect(setB) + want := []string{"c", "e"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Reverse the inputs and verify Intersect produces the same results. + setA = NewStringSet(input2...) + setB = NewStringSet(input1...) + got = setA.Intersect(setB) + checkSameStringSet(t, got, want) + // Check the sources are again unchanged. + checkSameStringSet(t, setA, input2) + checkSameStringSet(t, setB, input1) +} + +func TestStringSet_Disjoint(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"x", "y", "z"} + + // Check that sets are always disjoint with the empty set or nil + setA := NewStringSet(input1...) + emptySet := NewStringSet() + + if disjoint := setA.Disjoint(nil); !disjoint { + t.Errorf("Disjoint(%s, %v) want %v, got %v", setA, nil, true, disjoint) + } + + if disjoint := setA.Disjoint(emptySet); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, emptySet, true, disjoint) + } + + if disjoint := emptySet.Disjoint(setA); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, setA, true, disjoint) + } + + if disjoint := emptySet.Disjoint(emptySet); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, emptySet, true, disjoint) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, emptySet, []string{}) + + // Check two non-empty, non-nil disjoint sets. + setC := NewStringSet(input3...) + + if disjoint := setA.Disjoint(setC); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setC, true, disjoint) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setC, input3) + + // Check that two intersecting sets are not Disjoint. + setB := NewStringSet(input2...) + + if disjoint := setA.Disjoint(setB); disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setB, false, disjoint) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) +} + +func TestStringSet_Difference(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"x", "y", "z"} + + // Check Difference(nil) returns a copy of the receiver. + setA := NewStringSet(input1...) + got := setA.Difference(nil) + checkSameStringSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check A - A returns the empty set. + got = setA.Difference(setA) + + if !got.Empty() { + t.Errorf("Difference(%s, %s).Empty() want %v, got %v", + setA, setA, true, false) + } + + checkSameStringSet(t, got, []string{}) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check A - C simply returns elements in A if A and C are disjoint. + setC := NewStringSet(input3...) + got = setA.Difference(setC) + checkSameStringSet(t, got, input1) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setC, input3) + + // Check A - B returns elements in A not in B. + setB := NewStringSet(input2...) + got = setA.Difference(setB) + want := []string{"a", "d", "f"} + checkSameStringSet(t, got, want) + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Check B - A returns elements in B not in A. + got = setB.Difference(setA) + want = []string{"b"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) +} + +func TestStringSet_Unique(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"x", "y", "z"} + + // Check Unique(nil) returns a copy of the receiver. + setA := NewStringSet(input1...) + got := setA.Unique(nil) + checkSameStringSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Unique returns only elements in A and B not in both A and B. + setB := NewStringSet(input2...) + got = setA.Unique(setB) + want := []string{"a", "b", "d", "f"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Check Unique of two disjoint sets is the Union of those sets. + setC := NewStringSet(input3...) + got = setA.Unique(setC) + union := setA.Union(setC) + + if equal := union.Equal(got); !equal { + t.Errorf("Union of disjoint Equal(%s, %s) want %v, got %v", + union, got, true, equal) + } + + // Check Unique is the Union of A - B and B - A. + aNotInB := setA.Difference(setB) + bNotInA := setB.Difference(setA) + union = aNotInB.Union(bNotInA) + want = []string{"a", "b", "d", "f"} + checkSameStringSet(t, union, want) + got = setA.Unique(setB) + + if equal := union.Equal(got); !equal { + t.Errorf("Union of differences Equal(%s, %s) want %v, got %v", + union, got, true, equal) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) +} + +func TestStringSet_Equal(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"a", "c", "d", "e", "g"} + + // Check Equal(nil) returns false. + setA := NewStringSet(input1...) + + if equal := setA.Equal(nil); equal { + t.Errorf("Equal(%s, %v) want %v, got %v", setA, nil, false, true) + } + + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Equal returns true for a set and itself. + if equal := setA.Equal(setA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) + } + + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Equal returns false for sets of non-equal length. + setB := NewStringSet(input2...) + + if equal := setA.Equal(setB); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setB, false, true) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Check Equal returns false for equal-length sets with different elements. + setC := NewStringSet(input3...) + + if equal := setA.Equal(setC); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setC, false, true) + } + + if equal := setC.Equal(setA); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setC, setA, false, true) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setC, input3) + + // Check Equal returns true for a set with itself. + if equal := setA.Equal(setA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) + } + + // Also check the source is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Equal returns true for two separate equal sets. + anotherA := NewStringSet(input1...) + + if equal := setA.Equal(anotherA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, anotherA, true, false) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, anotherA, input1) + + // Check for equality comparing to nil struct. + var nilSet *StringSet + if equal := nilSet.Equal(setA); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, setA, false, true) + } + if equal := setA.Equal(nilSet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, nilSet, false, true) + } + if equal := nilSet.Equal(nilSet); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, nilSet, true, false) + } + + // Edge case: consider the empty set to be different than the nil set. + emptySet := NewStringSet() + if equal := nilSet.Equal(emptySet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, emptySet, false, true) + } + if equal := emptySet.Equal(nilSet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, nilSet, false, true) + } + if equal := emptySet.Equal(emptySet); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, emptySet, true, false) + } +} + +func TestStringSet_Union(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + + // Check Union(nil) returns a copy of the receiver. + setA := NewStringSet(input1...) + got := setA.Union(nil) + checkSameStringSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Union returns the correct result. + setB := NewStringSet(input2...) + got = setA.Union(setB) + want := []string{"a", "b", "c", "d", "e", "f"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Reverse the inputs and verify Union produces the same results. + setA = NewStringSet(input2...) + setB = NewStringSet(input1...) + got = setA.Union(setB) + checkSameStringSet(t, got, want) + // Check the sources are again unchanged. + checkSameStringSet(t, setA, input2) + checkSameStringSet(t, setB, input1) +} -- cgit v1.2.3 From 1825cc11e3c0ce419de5229f8a1f6d557b5e6b12 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 20 Sep 2017 22:22:56 -0700 Subject: Tell 'go vet' that we are ignoring the return value. --- internal/sets/sets_benchmark_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'internal/sets') diff --git a/internal/sets/sets_benchmark_test.go b/internal/sets/sets_benchmark_test.go index 017dca6..0a96c28 100644 --- a/internal/sets/sets_benchmark_test.go +++ b/internal/sets/sets_benchmark_test.go @@ -208,6 +208,6 @@ func BenchmarkStringSets_String(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - s.String() + _ := s.String() } } -- cgit v1.2.3 From c3beb1f5717a301482f89a411e093bf1bd7f2279 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 20 Sep 2017 22:40:52 -0700 Subject: Remove benchmark tests. --- internal/sets/sets_benchmark_test.go | 213 ----------------------------------- 1 file changed, 213 deletions(-) delete mode 100644 internal/sets/sets_benchmark_test.go (limited to 'internal/sets') diff --git a/internal/sets/sets_benchmark_test.go b/internal/sets/sets_benchmark_test.go deleted file mode 100644 index 0a96c28..0000000 --- a/internal/sets/sets_benchmark_test.go +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright 2017 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package sets - -import ( - "strings" - "testing" -) - -const ( - postmodernThesisCollapse = `1. Expressions of collapse - -If one examines postcultural Marxism, one is faced with a choice: either -reject capitalist submodern theory or conclude that the purpose of the reader -is significant form. Bataille uses the term ‘capitalist construction’ to denote -not, in fact, discourse, but prediscourse. - -Therefore, in Stardust, Gaiman analyses postcultural Marxism; in -The Books of Magic, although, he denies capitalist submodern theory. If -capitalist construction holds, we have to choose between capitalist submodern -theory and Baudrillardist simulacra. - -However, conceptualist socialism implies that narrativity may be used to -oppress the proletariat, given that sexuality is distinct from art. The subject -is interpolated into a capitalist construction that includes language as a -paradox. -` - postmodernThesisNarratives = `1. Narratives of failure - -The main theme of the works of Joyce is the defining characteristic, and some -would say the economy, of neocultural class. But Bataille promotes the use of -socialist realism to deconstruct sexual identity. - -The subject is interpolated into a Baudrillardist simulation that includes -consciousness as a whole. Thus, the primary theme of Pickett's[1] model of -socialist realism is the role of the reader as artist. - -The subject is contextualised into a postcapitalist discourse that includes -language as a paradox. It could be said that if Baudrillardist simulation -holds, the works of Gibson are postmodern. The characteristic theme of the -works of Gibson is the common ground between society and narrativity. However, -Sartre uses the term 'postcapitalist discourse' to denote not, in fact, -narrative, but postnarrative. -` -) - -var ( - // Word lists: - stringsA = strings.Fields(postmodernThesisCollapse) - stringsB = strings.Fields(postmodernThesisNarratives) -) - -func BenchmarkStringSets_NewStringSet(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - NewStringSet(stringsA...) - } -} - -func BenchmarkStringSets_Copy(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Copy() - } -} - -func BenchmarkStringSets_Insert(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - s := NewStringSet() - s.Insert(stringsA...) - s.Insert(stringsB...) - } -} - -func BenchmarkStringSets_Delete(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - s := NewStringSet(stringsA...) - s.Delete(stringsB...) - } -} - -func BenchmarkStringSets_Intersect(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Intersect(t) - } -} - -func BenchmarkStringSets_Disjoint(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Disjoint(t) - } -} - -func BenchmarkStringSets_Difference(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Difference(t) - } -} - -func BenchmarkStringSets_Unique(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Unique(t) - } -} - -func BenchmarkStringSets_Equal(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Equal(t) - } -} - -func BenchmarkStringSets_Union(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet(stringsB...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Union(t) - } -} - -func BenchmarkStringSets_Contains(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - for _, w := range stringsB { - s.Contains(w) - } - } -} - -func BenchmarkStringSets_Len(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Len() - } -} - -func BenchmarkStringSets_Empty(b *testing.B) { - s := NewStringSet(stringsA...) - t := NewStringSet() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Empty() - t.Empty() - } -} - -func BenchmarkStringSets_Elements(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Elements() - } -} - -func BenchmarkStringSets_Sorted(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - s.Sorted() - } -} - -func BenchmarkStringSets_String(b *testing.B) { - s := NewStringSet(stringsA...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ := s.String() - } -} -- cgit v1.2.3 From 9dfa8d8474eb60a626d9f8439362e49b1c3dca54 Mon Sep 17 00:00:00 2001 From: teeler Date: Wed, 8 Jan 2020 15:10:22 -0800 Subject: Merge with downstream (#16) --- internal/sets/sets_benchmark_test.go | 213 +++++++++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 internal/sets/sets_benchmark_test.go (limited to 'internal/sets') diff --git a/internal/sets/sets_benchmark_test.go b/internal/sets/sets_benchmark_test.go new file mode 100644 index 0000000..017dca6 --- /dev/null +++ b/internal/sets/sets_benchmark_test.go @@ -0,0 +1,213 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "strings" + "testing" +) + +const ( + postmodernThesisCollapse = `1. Expressions of collapse + +If one examines postcultural Marxism, one is faced with a choice: either +reject capitalist submodern theory or conclude that the purpose of the reader +is significant form. Bataille uses the term ‘capitalist construction’ to denote +not, in fact, discourse, but prediscourse. + +Therefore, in Stardust, Gaiman analyses postcultural Marxism; in +The Books of Magic, although, he denies capitalist submodern theory. If +capitalist construction holds, we have to choose between capitalist submodern +theory and Baudrillardist simulacra. + +However, conceptualist socialism implies that narrativity may be used to +oppress the proletariat, given that sexuality is distinct from art. The subject +is interpolated into a capitalist construction that includes language as a +paradox. +` + postmodernThesisNarratives = `1. Narratives of failure + +The main theme of the works of Joyce is the defining characteristic, and some +would say the economy, of neocultural class. But Bataille promotes the use of +socialist realism to deconstruct sexual identity. + +The subject is interpolated into a Baudrillardist simulation that includes +consciousness as a whole. Thus, the primary theme of Pickett's[1] model of +socialist realism is the role of the reader as artist. + +The subject is contextualised into a postcapitalist discourse that includes +language as a paradox. It could be said that if Baudrillardist simulation +holds, the works of Gibson are postmodern. The characteristic theme of the +works of Gibson is the common ground between society and narrativity. However, +Sartre uses the term 'postcapitalist discourse' to denote not, in fact, +narrative, but postnarrative. +` +) + +var ( + // Word lists: + stringsA = strings.Fields(postmodernThesisCollapse) + stringsB = strings.Fields(postmodernThesisNarratives) +) + +func BenchmarkStringSets_NewStringSet(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + NewStringSet(stringsA...) + } +} + +func BenchmarkStringSets_Copy(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Copy() + } +} + +func BenchmarkStringSets_Insert(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + s := NewStringSet() + s.Insert(stringsA...) + s.Insert(stringsB...) + } +} + +func BenchmarkStringSets_Delete(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + s := NewStringSet(stringsA...) + s.Delete(stringsB...) + } +} + +func BenchmarkStringSets_Intersect(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Intersect(t) + } +} + +func BenchmarkStringSets_Disjoint(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Disjoint(t) + } +} + +func BenchmarkStringSets_Difference(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Difference(t) + } +} + +func BenchmarkStringSets_Unique(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Unique(t) + } +} + +func BenchmarkStringSets_Equal(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Equal(t) + } +} + +func BenchmarkStringSets_Union(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Union(t) + } +} + +func BenchmarkStringSets_Contains(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, w := range stringsB { + s.Contains(w) + } + } +} + +func BenchmarkStringSets_Len(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Len() + } +} + +func BenchmarkStringSets_Empty(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Empty() + t.Empty() + } +} + +func BenchmarkStringSets_Elements(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Elements() + } +} + +func BenchmarkStringSets_Sorted(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Sorted() + } +} + +func BenchmarkStringSets_String(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.String() + } +} -- cgit v1.2.3 From dd64e698b98547386f13e9e683dc1fb3b6029c60 Mon Sep 17 00:00:00 2001 From: Google Open Source Date: Thu, 17 Mar 2022 11:54:49 -0700 Subject: Consolidated non-code updates to licenseclassifier. Removed the old 'licenses' directory since 'assets' replaced it. Whitespace formatting to code. Add new licenses to corpus. --- internal/sets/sets_benchmark_test.go | 2 +- internal/sets/stringset.go | 2 +- internal/sets/stringset_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'internal/sets') diff --git a/internal/sets/sets_benchmark_test.go b/internal/sets/sets_benchmark_test.go index 017dca6..75bc78d 100644 --- a/internal/sets/sets_benchmark_test.go +++ b/internal/sets/sets_benchmark_test.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, diff --git a/internal/sets/stringset.go b/internal/sets/stringset.go index 54edbd4..4a1c7e2 100644 --- a/internal/sets/stringset.go +++ b/internal/sets/stringset.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, diff --git a/internal/sets/stringset_test.go b/internal/sets/stringset_test.go index e37efaa..2ed452c 100644 --- a/internal/sets/stringset_test.go +++ b/internal/sets/stringset_test.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, -- cgit v1.2.3