diff options
author | Bill Wendling <morbo@google.com> | 2017-04-09 20:49:36 -0700 |
---|---|---|
committer | Bill Wendling <morbo@google.com> | 2017-04-09 20:49:36 -0700 |
commit | 85ea7edc49509df714ac09a6166e514b05e30d6d (patch) | |
tree | 2bb8c36b5938ce5af17775a382e2a0e8608e2723 /internal | |
download | licenseclassifier-85ea7edc49509df714ac09a6166e514b05e30d6d.tar.gz |
Initial commit of license classifier.
Diffstat (limited to 'internal')
-rw-r--r-- | internal/commentparser/comment_parser.go | 336 | ||||
-rw-r--r-- | internal/commentparser/comment_parser_test.go | 413 | ||||
-rw-r--r-- | internal/commentparser/language/language.go | 268 | ||||
-rw-r--r-- | internal/sets/sets.go | 20 | ||||
-rw-r--r-- | internal/sets/sets_benchmark_test.go | 213 | ||||
-rw-r--r-- | internal/sets/stringset.go | 228 | ||||
-rw-r--r-- | internal/sets/stringset_test.go | 425 |
7 files changed, 1903 insertions, 0 deletions
diff --git a/internal/commentparser/comment_parser.go b/internal/commentparser/comment_parser.go new file mode 100644 index 0000000..0cc3fe3 --- /dev/null +++ b/internal/commentparser/comment_parser.go @@ -0,0 +1,336 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package commentparser does a basic parse over a source file and returns all +// of the comments from the code. This is useful for when you want to analyze +// text written in comments (like copyright notices) but not in the code +// itself. +package commentparser + +import ( + "bytes" + "log" + "strings" + "unicode/utf8" + + "github.com/google/licenseclassifier/internal/commentparser/language" +) + +const ( + eofInString = "%d:EOF in string" + eofInSingleLineComment = "%d:EOF in single line comment" + eofInMultilineComment = "%d:EOF in multiline comment" +) + +// Parse parses the input data and returns the comments. +func Parse(contents []byte, lang language.Language) Comments { + if len(contents) == 0 { + return nil + } + + c := string(contents) + if !strings.HasSuffix(c, "\n") { + // Force a terminating newline if one isn't present. + c += "\n" + } + i := &input{ + s: c, + lang: lang, + offset: 0, + pos: position{line: 1, lineRune: []int{0}}, + } + i.lex() + return i.comments +} + +// Comment is either a single line or multiline comment in a source code file. +// A single line comment has StartLine equal to EndLine. The lines are 1-based. +type Comment struct { + StartLine int + EndLine int + Text string +} + +// Comments allows us to treat a slice of comments as a unit. +type Comments []*Comment + +// ChunkIterator returns a read-only channel and generates the comments in a +// goroutine, then closes the channel. +func (c Comments) ChunkIterator() <-chan Comments { + ch := make(chan Comments) + go func() { + index := 0 + prevLine := c.StartLine() + + for ; index < len(c); index++ { + var chunk Comments + for ; index < len(c); index++ { + if c[index].StartLine > prevLine+2 { + break + } + chunk = append(chunk, c[index]) + prevLine = c[index].StartLine + } + if len(chunk) == 0 { + break + } + + ch <- chunk + if index >= len(c) { + break + } + + prevLine = c[index].StartLine + index-- + } + + close(ch) + }() + return ch +} + +// StartLine is the line number (1-based) the first part of the comment block +// starts on. +func (c Comments) StartLine() int { + if len(c) == 0 { + return 0 + } + return c[0].StartLine +} + +// String creates a string out of the text of the comments. Comment begin and +// end markers are removed. +func (c Comments) String() string { + var s []string + for _, cmt := range c { + s = append(s, cmt.Text) + } + return strings.Join(s, "\n") +} + +// position records the location of a lexeme. +type position struct { + line int // Line number of input: 1-based + lineRune []int // Rune offset from beginning of line: 0-based +} + +// input holds the current state of the lexer. +type input struct { + s string // Entire input. + lang language.Language // Source code language. + offset int // Offset into input. + pos position // Current position in the input. + comments Comments // Comments in the source file. +} + +// lex is called to obtain the comments. +func (i *input) lex() { + for { + c, ok := i.peekRune() + if !ok { + break + } + + switch c { + case '"', '\'', '`': // String + // Ignore strings because they could contain comment + // start or end sequences which we need to ignore. + if i.lang == language.HTML { + // Quotes in HTML-like files aren't meaningful, + // because it's basically plain text + break + } + + ok, hasEscape := i.lang.QuoteCharacter(c) + if !ok { + break + } + + quote := string(c) + if i.lang == language.Python { + if c == '\'' && i.match("'''") { + quote = "'''" + } else if c == '"' && i.match(`"""`) { + quote = `"""` + } else { + i.readRune() // Eat quote. + } + } else { + i.readRune() // Eat quote. + } + + startLine := i.pos.line + for { + c, ok = i.peekRune() + if !ok { + log.Printf(eofInString, startLine) + return + } + if hasEscape && c == '\\' { + i.readRune() // Eat escape. + } else if i.match(quote) { + break + } else if (i.lang == language.JavaScript || i.lang == language.Perl) && c == '\n' { + // JavaScript and Perl allow you to + // specify regexes without quotes, but + // which contain quotes. So treat the + // newline as terminating the string. + break + } + i.readRune() // Eat character. + if i.eof() { + log.Printf(eofInString, startLine) + return + } + } + default: + startLine := i.pos.line + var comment bytes.Buffer + if i.singleLineComment() { // Single line comment + for { + if i.eof() { + log.Printf(eofInSingleLineComment, i.pos.line) + return + } + c = i.readRune() + if c == '\n' { + i.unreadRune(c) + break + } + comment.WriteRune(c) + } + i.comments = append(i.comments, &Comment{ + StartLine: startLine, + EndLine: i.pos.line, + Text: comment.String(), + }) + } else if ok, start, end := i.multiLineComment(); ok { // Multiline comment + nesting := 0 + startLine := i.pos.line + for { + if i.eof() { + log.Printf(eofInMultilineComment, startLine) + return + } + c := i.readRune() + comment.WriteRune(c) + if i.lang.NestedComments() && i.match(start) { + // Allows nested comments. + comment.WriteString(start) + nesting++ + } + if i.match(end) { + if nesting > 0 { + comment.WriteString(end) + nesting-- + } else { + break + } + } + } + i.comments = append(i.comments, &Comment{ + StartLine: startLine, + EndLine: i.pos.line, + Text: comment.String(), + }) + } + } + + i.readRune() // Ignore non-comments. + } +} + +// singleLineComment returns 'true' if we've run across a single line comment +// in the given language. +func (i *input) singleLineComment() bool { + return i.match(i.lang.SingleLineCommentStart()) +} + +// multiLineComment returns 'true' if we've run across a multiline comment in +// the given language. +func (i *input) multiLineComment() (bool, string, string) { + if s := i.lang.MultilineCommentStart(); i.match(s) { + return true, s, i.lang.MultilineCommentEnd() + } + return false, "", "" +} + +// match returns 'true' if the next tokens in the stream match the given +// string. +func (i *input) match(s string) bool { + if s == "" { + return false + } + saved := s + var read []rune + for len(s) > 0 && !i.eof() { + r, size := utf8.DecodeRuneInString(s) + if c, ok := i.peekRune(); ok && c == r { + read = append(read, c) + } else { + // No match. Push the tokens we read back onto the stack. + for idx := len(read) - 1; idx >= 0; idx-- { + i.unreadRune(read[idx]) + } + return false + } + s = s[size:] + i.readRune() // Eat token. + } + return string(read) == saved +} + +// eof reports whether the input has reached the end of the file. +func (i *input) eof() bool { + return len(i.s) <= i.offset +} + +// peekRune returns the next rune in the input without consuming it. +func (i *input) peekRune() (rune, bool) { + if i.eof() { + return rune(0), false + } + r, _ := utf8.DecodeRuneInString(i.s[i.offset:]) + return r, true +} + +// readRune consumes and returns the next rune in the input. +func (i *input) readRune() rune { + r, size := utf8.DecodeRuneInString(i.s[i.offset:]) + if r == '\n' { + i.pos.line++ + i.pos.lineRune = append(i.pos.lineRune, 0) + } else { + i.pos.lineRune[len(i.pos.lineRune)-1]++ + } + i.offset += size + return r +} + +// unreadRune winds the lexer's state back to before the rune was read. +func (i *input) unreadRune(c rune) { + p := make([]byte, utf8.UTFMax) + size := utf8.EncodeRune(p, c) + i.offset -= size + if c == '\n' { + i.pos.line-- + if len(i.pos.lineRune) > 1 { + i.pos.lineRune = i.pos.lineRune[:len(i.pos.lineRune)-1] + } else { + i.pos.lineRune[len(i.pos.lineRune)-1] = 0 + } + } else { + i.pos.lineRune[len(i.pos.lineRune)-1]-- + } +} diff --git a/internal/commentparser/comment_parser_test.go b/internal/commentparser/comment_parser_test.go new file mode 100644 index 0000000..86dd996 --- /dev/null +++ b/internal/commentparser/comment_parser_test.go @@ -0,0 +1,413 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package commentparser + +import ( + "fmt" + "reflect" + "testing" + + "github.com/google/licenseclassifier/internal/commentparser/language" +) + +const ( + singleLineText = "single line text" + multilineText = `first line of text +second line of text +third line of text +` +) + +func TestCommentParser_Lex(t *testing.T) { + tests := []struct { + description string + lang language.Language + source string + want Comments + }{ + { + description: "BCPL Single Line Comments", + lang: language.Go, + source: fmt.Sprintf("//%s\n", singleLineText), + want: []*Comment{ + { + StartLine: 1, + EndLine: 1, + Text: singleLineText, + }, + }, + }, + { + description: "Go Comment With Multiline String", + lang: language.Go, + source: fmt.Sprintf("var a = `A\nmultiline\\x20\nstring`\n//%s\n", singleLineText), + want: []*Comment{ + { + StartLine: 4, + EndLine: 4, + Text: singleLineText, + }, + }, + }, + { + description: "Python Multiline String", + lang: language.Python, + source: fmt.Sprintf("#%s\n'''this is a multiline\nstring'''", singleLineText), + want: []*Comment{ + { + StartLine: 1, + EndLine: 1, + Text: singleLineText, + }, + }, + }, + { + description: "TR Command String", + lang: language.Python, + source: fmt.Sprintf(`#%s +AUTH= \ +| tr '"\n' \ +| base64 -w +`, singleLineText), + want: []*Comment{ + { + StartLine: 1, + EndLine: 1, + Text: singleLineText, + }, + }, + }, + { + description: "Lisp Single Line Comments", + lang: language.Clojure, + source: fmt.Sprintf(";%s\n", singleLineText), + want: []*Comment{ + { + StartLine: 1, + EndLine: 1, + Text: singleLineText, + }, + }, + }, + { + description: "Shell Single Line Comments", + lang: language.Shell, + source: fmt.Sprintf("#%s\n", singleLineText), + want: []*Comment{ + { + StartLine: 1, + EndLine: 1, + Text: singleLineText, + }, + }, + }, + { + description: "BCPL Multiline Comments", + lang: language.C, + source: fmt.Sprintf("/*%s*/\n", multilineText), + want: []*Comment{ + { + StartLine: 1, + EndLine: 4, + Text: multilineText, + }, + }, + }, + { + description: "BCPL Multiline Comments no terminating newline", + lang: language.C, + source: fmt.Sprintf("/*%s*/", multilineText), + want: []*Comment{ + { + StartLine: 1, + EndLine: 4, + Text: multilineText, + }, + }, + }, + { + description: "Nested Multiline Comments", + lang: language.Swift, + source: "/*a /*\n nested\n*/\n comment\n*/\n", + want: []*Comment{ + { + StartLine: 1, + EndLine: 5, + Text: "a /*\n nested\n*/\n comment\n", + }, + }, + }, + { + description: "Ruby Multiline Comments", + lang: language.Ruby, + source: fmt.Sprintf("=begin\n%s=end\n", multilineText), + want: []*Comment{ + { + StartLine: 1, + EndLine: 5, + Text: "\n" + multilineText, + }, + }, + }, + { + description: "Multiple Single Line Comments", + lang: language.Shell, + source: `# First line +# Second line +# Third line +`, + want: []*Comment{ + { + StartLine: 1, + EndLine: 1, + Text: " First line", + }, + { + StartLine: 2, + EndLine: 2, + Text: " Second line", + }, + { + StartLine: 3, + EndLine: 3, + Text: " Third line", + }, + }, + }, + { + description: "Mixed Multiline / Single Line Comments", + lang: language.C, + source: `/* + * The first multiline line. + * The second multiline line. + */ + // The first single line comment. + // The second single line comment. +`, + want: []*Comment{ + { + StartLine: 1, + EndLine: 4, + Text: ` + * The first multiline line. + * The second multiline line. + `, + }, + { + StartLine: 5, + EndLine: 5, + Text: " The first single line comment.", + }, + { + StartLine: 6, + EndLine: 6, + Text: " The second single line comment.", + }, + }, + }, + { + description: "Mixed Multiline / Single Line Comments", + lang: language.C, + source: `/* + * The first multiline line. + * The second multiline line. + */ + // The first single line comment. + // The second single line comment. +`, + want: []*Comment{ + { + StartLine: 1, + EndLine: 4, + Text: ` + * The first multiline line. + * The second multiline line. + `, + }, + { + StartLine: 5, + EndLine: 5, + Text: " The first single line comment.", + }, + { + StartLine: 6, + EndLine: 6, + Text: " The second single line comment.", + }, + }, + }, + { + description: "HTML-like comments and quotes", + lang: language.HTML, + source: `# This is an important topic +I don't want to go on all day here! <-- notice the quote in there! +<!-- Well, maybe I do... --> +`, + want: []*Comment{ + { + StartLine: 3, + EndLine: 3, + Text: " Well, maybe I do... ", + }, + }, + }, + { + description: "JavaScript regex", + lang: language.JavaScript, + source: `var re = /hello"world/; +// the comment +`, + want: []*Comment{ + { + StartLine: 2, + EndLine: 2, + Text: " the comment", + }, + }, + }, + { + description: "Perl regex", + lang: language.Perl, + source: `if (/hello"world/) { + # the comment + print "Yo!" +} +`, + want: []*Comment{ + { + StartLine: 2, + EndLine: 2, + Text: " the comment", + }, + }, + }, + } + + for _, tt := range tests { + got := Parse([]byte(tt.source), tt.lang) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Mismatch(%q) = %+v, want %+v", tt.description, got, tt.want) + } + } +} + +func TestCommentParser_ChunkIterator(t *testing.T) { + tests := []struct { + description string + comments Comments + want []Comments + }{ + { + description: "Empty Comments", + comments: Comments{}, + want: nil, + }, + { + description: "Single Line Comment Chunk", + comments: Comments{ + {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"}, + {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"}, + }, + want: []Comments{ + Comments{ + {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"}, + {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"}, + }, + }, + }, + { + description: "Multiline Comment Chunk", + comments: Comments{ + {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}, + }, + want: []Comments{ + Comments{{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}}, + }, + }, + { + description: "Multiple Single Line Comment Chunks", + comments: Comments{ + {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"}, + {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"}, + {StartLine: 5, EndLine: 5, Text: "Block 2 line 1"}, + {StartLine: 6, EndLine: 6, Text: "Block 2 line 2"}, + {StartLine: 10, EndLine: 10, Text: "Block 3 line 1"}, + {StartLine: 11, EndLine: 11, Text: "Block 3 line 2"}, + {StartLine: 13, EndLine: 13, Text: "Block 3 line 3"}, + }, + want: []Comments{ + Comments{ + {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"}, + {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"}, + }, + Comments{ + {StartLine: 5, EndLine: 5, Text: "Block 2 line 1"}, + {StartLine: 6, EndLine: 6, Text: "Block 2 line 2"}, + }, + Comments{ + {StartLine: 10, EndLine: 10, Text: "Block 3 line 1"}, + {StartLine: 11, EndLine: 11, Text: "Block 3 line 2"}, + {StartLine: 13, EndLine: 13, Text: "Block 3 line 3"}, + }, + }, + }, + { + description: "Multiline Comment Chunk", + comments: Comments{ + {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}, + {StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"}, + }, + want: []Comments{ + Comments{{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}}, + Comments{{StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"}}, + }, + }, + { + description: "Multiline and Single Line Comment Chunks", + comments: Comments{ + {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}, + {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"}, + {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"}, + }, + want: []Comments{ + Comments{ + {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}, + }, + Comments{ + {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"}, + {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"}, + }, + }, + }, + } + + for _, tt := range tests { + i := 0 + for got := range tt.comments.ChunkIterator() { + if i >= len(tt.want) { + t.Errorf("Mismatch(%q) more comment chunks than expected = %v, want %v", + tt.description, i+1, len(tt.want)) + break + } + if !reflect.DeepEqual(got, tt.want[i]) { + t.Errorf("Mismatch(%q) = %+v, want %+v", tt.description, got, tt.want[i]) + } + i++ + } + if i != len(tt.want) { + t.Errorf("Mismatch(%q) not enough comment chunks = %v, want %v", + tt.description, i, len(tt.want)) + } + } +} diff --git a/internal/commentparser/language/language.go b/internal/commentparser/language/language.go new file mode 100644 index 0000000..36e04eb --- /dev/null +++ b/internal/commentparser/language/language.go @@ -0,0 +1,268 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package language contains methods and information about the different +// programming languages the comment parser supports. +package language + +import ( + "path/filepath" + "strings" +) + +// Language is the progamming language we're grabbing the comments from. +type Language int + +// Languages we can retrieve comments from. +const ( + Unknown Language = iota + AppleScript + Assembly + Batch + C + Clif + Clojure + CMake + CSharp + Dart + Fortran + GLSLF // OpenGL Shading Language + Go + Haskell + HTML + Java + JavaScript + Flex + Lisp + NinjaBuild + ObjectiveC + Perl + Python + R + Ruby + Rust + Shader + Shell + SQL + Swift + SWIG + TypeScript + Yacc + Yaml +) + +// style is the comment styles that a language uses. +type style int + +// Comment styles. +const ( + unknown style = iota + applescript // -- ... and (* ... *) + batch // @REM + bcpl // // ... and /* ... */ + cmake // # ... and #[[ ... ]] + fortran // ! ... + haskell // -- ... and {- ... -} + html // <!-- ... --> + lisp // ;; + ruby // # ... and =begin ... =end + shell // # ... +) + +// ClassifyLanguage determines what language the source code was written in. It +// does this by looking at the file's extension. +func ClassifyLanguage(filename string) Language { + ext := strings.ToLower(filepath.Ext(filename)) + if len(ext) == 0 || ext[0] != '.' { + return Unknown + } + + switch ext[1:] { // Skip the '.'. + case "applescript": + return AppleScript + case "bat": + return Batch + case "c", "cc", "cpp", "c++", "h", "hh", "hpp": + return C + case "clif": + return Clif + case "cmake": + return CMake + case "cs": + return CSharp + case "dart": + return Dart + case "f", "f90", "f95": + return Fortran + case "glslf": + return GLSLF + case "go": + return Go + case "hs": + return Haskell + case "html", "htm", "md", "ng", "sgml": + return HTML + case "java": + return Java + case "js": + return JavaScript + case "l": + return Flex + case "lisp", "el", "clj": + return Lisp + case "m", "mm": + return ObjectiveC + case "gn": + return NinjaBuild + case "pl", "pm": + return Perl + case "py", "pi": + return Python + case "r": + return R + case "rb": + return Ruby + case "rs": + return Rust + case "s": + return Assembly + case "sh": + return Shell + case "shader": + return Shader + case "sql": + return SQL + case "swift": + return Swift + case "swig": + return SWIG + case "ts", "tsx": + return TypeScript + case "y": + return Yacc + case "yaml": + return Yaml + } + return Unknown +} + +// commentStyle returns the language's comment style. +func (lang Language) commentStyle() style { + switch lang { + case Assembly, C, CSharp, Dart, GLSLF, Go, Java, JavaScript, Flex, ObjectiveC, Rust, Shader, Swift, SWIG, TypeScript, Yacc: + return bcpl + case Batch: + return batch + case CMake: + return cmake + case Fortran: + return fortran + case Haskell, SQL: + return haskell + case HTML: + return html + case Clojure, Lisp: + return lisp + case Ruby: + return ruby + case Clif, NinjaBuild, Perl, Python, R, Shell, Yaml: + return shell + } + return unknown +} + +// SingleLineCommentStart returns the starting string of a single line comment +// for the given language. There is no equivalent "End" method, because it's +// the end of line. +func (lang Language) SingleLineCommentStart() string { + switch lang.commentStyle() { + case applescript, haskell: + return "--" + case batch: + return "@REM" + case bcpl: + return "//" + case fortran: + return "!" + case lisp: + return ";" + case shell, ruby, cmake: + return "#" + } + return "" +} + +// MultilineCommentStart returns the starting string of a multiline comment for +// the given language. +func (lang Language) MultilineCommentStart() string { + switch lang.commentStyle() { + case applescript: + return "(*" + case bcpl: + if lang != Rust { + return "/*" + } + case cmake: + return "#[[" + case haskell: + return "{-" + case html: + return "<!--" + case ruby: + return "=begin" + } + return "" +} + +// MultilineCommentEnd returns the ending string of a multiline comment for the +// given langauge. +func (lang Language) MultilineCommentEnd() string { + switch lang.commentStyle() { + case applescript: + return "*)" + case bcpl: + if lang != Rust { + return "*/" + } + case cmake: + return "]]" + case haskell: + return "-}" + case html: + return "-->" + case ruby: + return "=end" + } + return "" +} + +// QuoteCharacter returns 'true' if the character is considered the beginning +// of a string in the given language. The second return value is true if the +// string allows for escaping. +func (lang Language) QuoteCharacter(quote rune) (ok bool, escape bool) { + switch quote { + case '"', '\'': + return true, true + case '`': + if lang == Go { + return true, false + } + } + return false, false +} + +// NestedComments returns true if the language allows for nested multiline comments. +func (lang Language) NestedComments() bool { + return lang == Swift +} diff --git a/internal/sets/sets.go b/internal/sets/sets.go new file mode 100644 index 0000000..f34ae5b --- /dev/null +++ b/internal/sets/sets.go @@ -0,0 +1,20 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package sets provides sets for storing collections of unique elements. +package sets + +// present is an empty struct used as the "value" in the map[int], since +// empty structs consume zero bytes (unlike 1 unnecessary byte per bool). +type present struct{} diff --git a/internal/sets/sets_benchmark_test.go b/internal/sets/sets_benchmark_test.go new file mode 100644 index 0000000..017dca6 --- /dev/null +++ b/internal/sets/sets_benchmark_test.go @@ -0,0 +1,213 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "strings" + "testing" +) + +const ( + postmodernThesisCollapse = `1. Expressions of collapse + +If one examines postcultural Marxism, one is faced with a choice: either +reject capitalist submodern theory or conclude that the purpose of the reader +is significant form. Bataille uses the term ‘capitalist construction’ to denote +not, in fact, discourse, but prediscourse. + +Therefore, in Stardust, Gaiman analyses postcultural Marxism; in +The Books of Magic, although, he denies capitalist submodern theory. If +capitalist construction holds, we have to choose between capitalist submodern +theory and Baudrillardist simulacra. + +However, conceptualist socialism implies that narrativity may be used to +oppress the proletariat, given that sexuality is distinct from art. The subject +is interpolated into a capitalist construction that includes language as a +paradox. +` + postmodernThesisNarratives = `1. Narratives of failure + +The main theme of the works of Joyce is the defining characteristic, and some +would say the economy, of neocultural class. But Bataille promotes the use of +socialist realism to deconstruct sexual identity. + +The subject is interpolated into a Baudrillardist simulation that includes +consciousness as a whole. Thus, the primary theme of Pickett's[1] model of +socialist realism is the role of the reader as artist. + +The subject is contextualised into a postcapitalist discourse that includes +language as a paradox. It could be said that if Baudrillardist simulation +holds, the works of Gibson are postmodern. The characteristic theme of the +works of Gibson is the common ground between society and narrativity. However, +Sartre uses the term 'postcapitalist discourse' to denote not, in fact, +narrative, but postnarrative. +` +) + +var ( + // Word lists: + stringsA = strings.Fields(postmodernThesisCollapse) + stringsB = strings.Fields(postmodernThesisNarratives) +) + +func BenchmarkStringSets_NewStringSet(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + NewStringSet(stringsA...) + } +} + +func BenchmarkStringSets_Copy(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Copy() + } +} + +func BenchmarkStringSets_Insert(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + s := NewStringSet() + s.Insert(stringsA...) + s.Insert(stringsB...) + } +} + +func BenchmarkStringSets_Delete(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + s := NewStringSet(stringsA...) + s.Delete(stringsB...) + } +} + +func BenchmarkStringSets_Intersect(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Intersect(t) + } +} + +func BenchmarkStringSets_Disjoint(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Disjoint(t) + } +} + +func BenchmarkStringSets_Difference(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Difference(t) + } +} + +func BenchmarkStringSets_Unique(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Unique(t) + } +} + +func BenchmarkStringSets_Equal(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Equal(t) + } +} + +func BenchmarkStringSets_Union(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet(stringsB...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Union(t) + } +} + +func BenchmarkStringSets_Contains(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, w := range stringsB { + s.Contains(w) + } + } +} + +func BenchmarkStringSets_Len(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Len() + } +} + +func BenchmarkStringSets_Empty(b *testing.B) { + s := NewStringSet(stringsA...) + t := NewStringSet() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Empty() + t.Empty() + } +} + +func BenchmarkStringSets_Elements(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Elements() + } +} + +func BenchmarkStringSets_Sorted(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Sorted() + } +} + +func BenchmarkStringSets_String(b *testing.B) { + s := NewStringSet(stringsA...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.String() + } +} diff --git a/internal/sets/stringset.go b/internal/sets/stringset.go new file mode 100644 index 0000000..9f93450 --- /dev/null +++ b/internal/sets/stringset.go @@ -0,0 +1,228 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "fmt" + "sort" + "strings" +) + +// StringSet stores a set of unique string elements. +type StringSet struct { + set map[string]present +} + +// NewStringSet creates a StringSet containing the supplied initial string elements. +func NewStringSet(elements ...string) *StringSet { + s := &StringSet{} + s.set = make(map[string]present) + s.Insert(elements...) + return s +} + +// Copy returns a newly allocated copy of the supplied StringSet. +func (s *StringSet) Copy() *StringSet { + c := NewStringSet() + if s != nil { + for e := range s.set { + c.set[e] = present{} + } + } + return c +} + +// Insert zero or more string elements into the StringSet. +// As expected for a Set, elements already present in the StringSet are +// simply ignored. +func (s *StringSet) Insert(elements ...string) { + for _, e := range elements { + s.set[e] = present{} + } +} + +// Delete zero or more string elements from the StringSet. +// Any elements not present in the StringSet are simply ignored. +func (s *StringSet) Delete(elements ...string) { + for _, e := range elements { + delete(s.set, e) + } +} + +// Intersect returns a new StringSet containing the intersection of the +// receiver and argument StringSets. Returns an empty set if the argument is nil. +func (s *StringSet) Intersect(other *StringSet) *StringSet { + if other == nil { + return NewStringSet() + } + + // Point a and b to the maps, setting a to the smaller of the two. + a, b := s.set, other.set + if len(b) < len(a) { + a, b = b, a + } + + // Perform the intersection. + intersect := NewStringSet() + for e := range a { + if _, ok := b[e]; ok { + intersect.set[e] = present{} + } + } + return intersect +} + +// Disjoint returns true if the intersection of the receiver and the argument +// StringSets is the empty set. Returns true if the argument is nil or either +// StringSet is the empty set. +func (s *StringSet) Disjoint(other *StringSet) bool { + if other == nil || len(other.set) == 0 || len(s.set) == 0 { + return true + } + + // Point a and b to the maps, setting a to the smaller of the two. + a, b := s.set, other.set + if len(b) < len(a) { + a, b = b, a + } + + // Check for non-empty intersection. + for e := range a { + if _, ok := b[e]; ok { + return false // Early-exit because intersecting. + } + } + return true +} + +// Difference returns a new StringSet containing the elements in the receiver +// that are not present in the argument StringSet. Returns a copy of the +// receiver if the argument is nil. +func (s *StringSet) Difference(other *StringSet) *StringSet { + if other == nil { + return s.Copy() + } + + // Insert only the elements in the receiver that are not present in the + // argument StringSet. + diff := NewStringSet() + for e := range s.set { + if _, ok := other.set[e]; !ok { + diff.set[e] = present{} + } + } + return diff +} + +// Unique returns a new StringSet containing the elements in the receiver +// that are not present in the argument StringSet *and* the elements in the +// argument StringSet that are not in the receiver (which is the union of two +// disjoint sets). Returns a copy of the +// receiver if the argument is nil. +func (s *StringSet) Unique(other *StringSet) *StringSet { + if other == nil { + return s.Copy() + } + + sNotInOther := s.Difference(other) + otherNotInS := other.Difference(s) + + // Duplicate Union implementation here to avoid extra Copy, since both + // sNotInOther and otherNotInS are already copies. + unique := sNotInOther + for e := range otherNotInS.set { + unique.set[e] = present{} + } + return unique +} + +// Equal returns true if the receiver and the argument StringSet contain +// exactly the same elements. Returns false if the argument is nil. +func (s *StringSet) Equal(other *StringSet) bool { + if s == nil || other == nil { + return s == nil && other == nil + } + + // Two sets of different length cannot have the exact same unique elements. + if len(s.set) != len(other.set) { + return false + } + + // Only one loop is needed. If the two sets are known to be of equal + // length, then the two sets are equal only if exactly all of the elements + // in the first set are found in the second. + for e := range s.set { + if _, ok := other.set[e]; !ok { + return false + } + } + + return true +} + +// Union returns a new StringSet containing the union of the receiver and +// argument StringSets. Returns a copy of the receiver if the argument is nil. +func (s *StringSet) Union(other *StringSet) *StringSet { + union := s.Copy() + if other != nil { + for e := range other.set { + union.set[e] = present{} + } + } + return union +} + +// Contains returns true if element is in the StringSet. +func (s *StringSet) Contains(element string) bool { + _, in := s.set[element] + return in +} + +// Len returns the number of unique elements in the StringSet. +func (s *StringSet) Len() int { + return len(s.set) +} + +// Empty returns true if the receiver is the empty set. +func (s *StringSet) Empty() bool { + return len(s.set) == 0 +} + +// Elements returns a []string of the elements in the StringSet, in no +// particular (or consistent) order. +func (s *StringSet) Elements() []string { + elements := []string{} // Return at least an empty slice rather than nil. + for e := range s.set { + elements = append(elements, e) + } + return elements +} + +// Sorted returns a sorted []string of the elements in the StringSet. +func (s *StringSet) Sorted() []string { + elements := s.Elements() + sort.Strings(elements) + return elements +} + +// String formats the StringSet elements as sorted strings, representing them +// in "array initializer" syntax. +func (s *StringSet) String() string { + elements := s.Sorted() + var quoted []string + for _, e := range elements { + quoted = append(quoted, fmt.Sprintf("%q", e)) + } + return fmt.Sprintf("{%s}", strings.Join(quoted, ", ")) +} diff --git a/internal/sets/stringset_test.go b/internal/sets/stringset_test.go new file mode 100644 index 0000000..e37efaa --- /dev/null +++ b/internal/sets/stringset_test.go @@ -0,0 +1,425 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package sets + +import ( + "sort" + "testing" +) + +func checkSameStringSet(t *testing.T, set *StringSet, unique []string) { + // Check that lengths are the same. + want := len(unique) + got := set.Len() + + if got != want { + t.Errorf("NewStringSet(%v) want length %v, got %v", unique, want, got) + } + + // Check that all strings are present in set. + for _, s := range unique { + want := true + got := set.Contains(s) + + if got != want { + t.Errorf("Contains(%v) want %v, got %v", s, want, got) + } + } + + // Check that all elements are present in strings. + sort.Strings(unique) + + for i, got := range set.Sorted() { + want := unique[i] + + if got != want { + t.Errorf("Sorted(%d) want %v, got %v", i, want, got) + } + } +} + +func TestNewStringSet(t *testing.T) { + empty := NewStringSet() + want := 0 + got := empty.Len() + + if got != want { + t.Errorf("NewStringSet() want length %v, got %v", want, got) + } + + unique := []string{"a", "b", "c"} + set := NewStringSet(unique...) + checkSameStringSet(t, set, unique) + + // Append an already-present element. + nonUnique := append(unique, unique[0]) + set = NewStringSet(nonUnique...) + + // Non-unique unique should collapse to one. + want = len(unique) + got = set.Len() + + if got != want { + t.Errorf("NewStringSet(%v) want length %v, got %v", nonUnique, want, got) + } +} + +func TestStringSet_Copy(t *testing.T) { + // Check both copies represent the same set. + base := []string{"a", "b", "c"} + orig := NewStringSet(base...) + cpy := orig.Copy() + checkSameStringSet(t, orig, base) + checkSameStringSet(t, cpy, base) + + // Check the two copies are independent. + more := []string{"d"} + orig.Insert(more...) + more = append(base, more...) + checkSameStringSet(t, orig, more) + checkSameStringSet(t, cpy, base) +} + +func TestStringSet_Insert(t *testing.T) { + unique := []string{"a", "b", "c"} + set := NewStringSet(unique...) + + // Insert existing element, which should basically be a no-op. + set.Insert(unique[0]) + checkSameStringSet(t, set, unique) + + // Actually insert new unique elements. + additional := []string{"d", "e"} + longer := append(unique, additional...) + set.Insert(additional...) + checkSameStringSet(t, set, longer) +} + +func TestStringSet_Delete(t *testing.T) { + unique := []string{"a", "b", "c"} + set := NewStringSet(unique...) + + // Delete non-existent element, which should basically be a no-op. + set.Delete("z") + checkSameStringSet(t, set, unique) + + // Actually delete existing elements. + set.Delete(unique[1:]...) + checkSameStringSet(t, set, unique[:1]) +} + +func TestStringSet_Intersect(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + + // Check Intersect(nil) returns an empty set. + setA := NewStringSet(input1...) + got := setA.Intersect(nil) + checkSameStringSet(t, got, []string{}) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Intersect returns the correct result. + setB := NewStringSet(input2...) + got = setA.Intersect(setB) + want := []string{"c", "e"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Reverse the inputs and verify Intersect produces the same results. + setA = NewStringSet(input2...) + setB = NewStringSet(input1...) + got = setA.Intersect(setB) + checkSameStringSet(t, got, want) + // Check the sources are again unchanged. + checkSameStringSet(t, setA, input2) + checkSameStringSet(t, setB, input1) +} + +func TestStringSet_Disjoint(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"x", "y", "z"} + + // Check that sets are always disjoint with the empty set or nil + setA := NewStringSet(input1...) + emptySet := NewStringSet() + + if disjoint := setA.Disjoint(nil); !disjoint { + t.Errorf("Disjoint(%s, %v) want %v, got %v", setA, nil, true, disjoint) + } + + if disjoint := setA.Disjoint(emptySet); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, emptySet, true, disjoint) + } + + if disjoint := emptySet.Disjoint(setA); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, setA, true, disjoint) + } + + if disjoint := emptySet.Disjoint(emptySet); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", emptySet, emptySet, true, disjoint) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, emptySet, []string{}) + + // Check two non-empty, non-nil disjoint sets. + setC := NewStringSet(input3...) + + if disjoint := setA.Disjoint(setC); !disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setC, true, disjoint) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setC, input3) + + // Check that two intersecting sets are not Disjoint. + setB := NewStringSet(input2...) + + if disjoint := setA.Disjoint(setB); disjoint { + t.Errorf("Disjoint(%s, %s) want %v, got %v", setA, setB, false, disjoint) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) +} + +func TestStringSet_Difference(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"x", "y", "z"} + + // Check Difference(nil) returns a copy of the receiver. + setA := NewStringSet(input1...) + got := setA.Difference(nil) + checkSameStringSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check A - A returns the empty set. + got = setA.Difference(setA) + + if !got.Empty() { + t.Errorf("Difference(%s, %s).Empty() want %v, got %v", + setA, setA, true, false) + } + + checkSameStringSet(t, got, []string{}) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check A - C simply returns elements in A if A and C are disjoint. + setC := NewStringSet(input3...) + got = setA.Difference(setC) + checkSameStringSet(t, got, input1) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setC, input3) + + // Check A - B returns elements in A not in B. + setB := NewStringSet(input2...) + got = setA.Difference(setB) + want := []string{"a", "d", "f"} + checkSameStringSet(t, got, want) + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Check B - A returns elements in B not in A. + got = setB.Difference(setA) + want = []string{"b"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) +} + +func TestStringSet_Unique(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"x", "y", "z"} + + // Check Unique(nil) returns a copy of the receiver. + setA := NewStringSet(input1...) + got := setA.Unique(nil) + checkSameStringSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Unique returns only elements in A and B not in both A and B. + setB := NewStringSet(input2...) + got = setA.Unique(setB) + want := []string{"a", "b", "d", "f"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Check Unique of two disjoint sets is the Union of those sets. + setC := NewStringSet(input3...) + got = setA.Unique(setC) + union := setA.Union(setC) + + if equal := union.Equal(got); !equal { + t.Errorf("Union of disjoint Equal(%s, %s) want %v, got %v", + union, got, true, equal) + } + + // Check Unique is the Union of A - B and B - A. + aNotInB := setA.Difference(setB) + bNotInA := setB.Difference(setA) + union = aNotInB.Union(bNotInA) + want = []string{"a", "b", "d", "f"} + checkSameStringSet(t, union, want) + got = setA.Unique(setB) + + if equal := union.Equal(got); !equal { + t.Errorf("Union of differences Equal(%s, %s) want %v, got %v", + union, got, true, equal) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) +} + +func TestStringSet_Equal(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + input3 := []string{"a", "c", "d", "e", "g"} + + // Check Equal(nil) returns false. + setA := NewStringSet(input1...) + + if equal := setA.Equal(nil); equal { + t.Errorf("Equal(%s, %v) want %v, got %v", setA, nil, false, true) + } + + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Equal returns true for a set and itself. + if equal := setA.Equal(setA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) + } + + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Equal returns false for sets of non-equal length. + setB := NewStringSet(input2...) + + if equal := setA.Equal(setB); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setB, false, true) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Check Equal returns false for equal-length sets with different elements. + setC := NewStringSet(input3...) + + if equal := setA.Equal(setC); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setC, false, true) + } + + if equal := setC.Equal(setA); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setC, setA, false, true) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setC, input3) + + // Check Equal returns true for a set with itself. + if equal := setA.Equal(setA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, setA, true, false) + } + + // Also check the source is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Equal returns true for two separate equal sets. + anotherA := NewStringSet(input1...) + + if equal := setA.Equal(anotherA); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, anotherA, true, false) + } + + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, anotherA, input1) + + // Check for equality comparing to nil struct. + var nilSet *StringSet + if equal := nilSet.Equal(setA); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, setA, false, true) + } + if equal := setA.Equal(nilSet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", setA, nilSet, false, true) + } + if equal := nilSet.Equal(nilSet); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, nilSet, true, false) + } + + // Edge case: consider the empty set to be different than the nil set. + emptySet := NewStringSet() + if equal := nilSet.Equal(emptySet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", nilSet, emptySet, false, true) + } + if equal := emptySet.Equal(nilSet); equal { + t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, nilSet, false, true) + } + if equal := emptySet.Equal(emptySet); !equal { + t.Errorf("Equal(%s, %s) want %v, got %v", emptySet, emptySet, true, false) + } +} + +func TestStringSet_Union(t *testing.T) { + input1 := []string{"a", "c", "d", "e", "f"} + input2 := []string{"b", "c", "e"} + + // Check Union(nil) returns a copy of the receiver. + setA := NewStringSet(input1...) + got := setA.Union(nil) + checkSameStringSet(t, got, input1) + // Check that the receiver is unchanged. + checkSameStringSet(t, setA, input1) + + // Check Union returns the correct result. + setB := NewStringSet(input2...) + got = setA.Union(setB) + want := []string{"a", "b", "c", "d", "e", "f"} + checkSameStringSet(t, got, want) + // Also check the sources are unchanged. + checkSameStringSet(t, setA, input1) + checkSameStringSet(t, setB, input2) + + // Reverse the inputs and verify Union produces the same results. + setA = NewStringSet(input2...) + setB = NewStringSet(input1...) + got = setA.Union(setB) + checkSameStringSet(t, got, want) + // Check the sources are again unchanged. + checkSameStringSet(t, setA, input2) + checkSameStringSet(t, setB, input1) +} |