aboutsummaryrefslogtreecommitdiff
path: root/commentparser/comment_parser_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'commentparser/comment_parser_test.go')
-rw-r--r--commentparser/comment_parser_test.go566
1 files changed, 566 insertions, 0 deletions
diff --git a/commentparser/comment_parser_test.go b/commentparser/comment_parser_test.go
new file mode 100644
index 0000000..23ff0c7
--- /dev/null
+++ b/commentparser/comment_parser_test.go
@@ -0,0 +1,566 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package commentparser
+
+import (
+ "fmt"
+ "reflect"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/licenseclassifier/commentparser/language"
+)
+
+const (
+ singleLineText = "single line text"
+ multilineText = `first line of text
+second line of text
+third line of text
+`
+)
+
+func TestCommentParser_Lex(t *testing.T) {
+ tests := []struct {
+ description string
+ lang language.Language
+ source string
+ want Comments
+ }{
+ {
+ description: "BCPL Single Line Comments",
+ lang: language.Go,
+ source: fmt.Sprintf("//%s\n", singleLineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: singleLineText,
+ },
+ },
+ },
+ {
+ description: "Go Comment With Multiline String",
+ lang: language.Go,
+ source: fmt.Sprintf("var a = `A\nmultiline\\x20\nstring`\n//%s\n", singleLineText),
+ want: []*Comment{
+ {
+ StartLine: 4,
+ EndLine: 4,
+ Text: singleLineText,
+ },
+ },
+ },
+ {
+ description: "Python Multiline String",
+ lang: language.Python,
+ source: fmt.Sprintf("#%s\n\n\n\nx = '''this is a multiline\nstring'''", singleLineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: singleLineText,
+ },
+ },
+ },
+ {
+ description: "Python module-level Docstring #1",
+ lang: language.Python,
+ source: fmt.Sprintf("'''%s'''\nimport foo", multilineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 4,
+ Text: multilineText,
+ },
+ },
+ },
+ {
+ description: "Python module-level Docstring #2",
+ lang: language.Python,
+ source: fmt.Sprintf("#!/usr/bin/python\n'''%s'''\nimport foo", multilineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: "!/usr/bin/python",
+ },
+ {
+ StartLine: 2,
+ EndLine: 5,
+ Text: multilineText,
+ },
+ },
+ },
+ {
+ // Only include docstrings that start at the beginning of a line
+ description: "Python module-level Docstring #3",
+ lang: language.Python,
+ source: "'''zero1'''\n '''one'''\n '''two'''\n'''zero2'''",
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: "zero1",
+ },
+ {
+ StartLine: 4,
+ EndLine: 4,
+ Text: "zero2",
+ },
+ },
+ },
+ {
+ description: "TR Command String",
+ lang: language.Python,
+ source: fmt.Sprintf(`#%s
+AUTH= \
+| tr '"\n' \
+| base64 -w
+`, singleLineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: singleLineText,
+ },
+ },
+ },
+ {
+ description: "Lisp Single Line Comments",
+ lang: language.Clojure,
+ source: fmt.Sprintf(";%s\n", singleLineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: singleLineText,
+ },
+ },
+ },
+ {
+ description: "Shell Single Line Comments",
+ lang: language.Shell,
+ source: fmt.Sprintf("#%s\n", singleLineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: singleLineText,
+ },
+ },
+ },
+ {
+ description: "BCPL Multiline Comments",
+ lang: language.C,
+ source: fmt.Sprintf("/*%s*/\n", multilineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 4,
+ Text: multilineText,
+ },
+ },
+ },
+ {
+ description: "BCPL Multiline Comments no terminating newline",
+ lang: language.C,
+ source: fmt.Sprintf("/*%s*/", multilineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 4,
+ Text: multilineText,
+ },
+ },
+ },
+ {
+ description: "Nested Multiline Comments",
+ lang: language.Swift,
+ source: "/*a /*\n nested\n*/\n comment\n*/\n",
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 5,
+ Text: "a /*\n nested\n*/\n comment\n",
+ },
+ },
+ },
+ {
+ description: "Ruby Multiline Comments",
+ lang: language.Ruby,
+ source: fmt.Sprintf("=begin\n%s=end\n", multilineText),
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 5,
+ Text: "\n" + multilineText,
+ },
+ },
+ },
+ {
+ description: "Multiple Single Line Comments",
+ lang: language.Shell,
+ source: `# First line
+# Second line
+# Third line
+`,
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: " First line",
+ },
+ {
+ StartLine: 2,
+ EndLine: 2,
+ Text: " Second line",
+ },
+ {
+ StartLine: 3,
+ EndLine: 3,
+ Text: " Third line",
+ },
+ },
+ },
+ {
+ description: "Mixed Multiline / Single Line Comments",
+ lang: language.C,
+ source: `/*
+ * The first multiline line.
+ * The second multiline line.
+ */
+ // The first single line comment.
+ // The second single line comment.
+`,
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 4,
+ Text: `
+ * The first multiline line.
+ * The second multiline line.
+ `,
+ },
+ {
+ StartLine: 5,
+ EndLine: 5,
+ Text: " The first single line comment.",
+ },
+ {
+ StartLine: 6,
+ EndLine: 6,
+ Text: " The second single line comment.",
+ },
+ },
+ },
+ {
+ description: "Mixed Multiline / Single Line Comments",
+ lang: language.C,
+ source: `/*
+ * The first multiline line.
+ * The second multiline line.
+ */
+ // The first single line comment.
+ // The second single line comment.
+`,
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 4,
+ Text: `
+ * The first multiline line.
+ * The second multiline line.
+ `,
+ },
+ {
+ StartLine: 5,
+ EndLine: 5,
+ Text: " The first single line comment.",
+ },
+ {
+ StartLine: 6,
+ EndLine: 6,
+ Text: " The second single line comment.",
+ },
+ },
+ },
+ {
+ description: "HTML-like comments and quotes",
+ lang: language.HTML,
+ source: `# This is an important topic
+I don't want to go on all day here! <-- notice the quote in there!
+<!-- Well, maybe I do... -->
+`,
+ want: []*Comment{
+ {
+ StartLine: 3,
+ EndLine: 3,
+ Text: " Well, maybe I do... ",
+ },
+ },
+ },
+ {
+ description: "JavaScript regex",
+ lang: language.JavaScript,
+ source: `var re = /hello"world/;
+// the comment
+`,
+ want: []*Comment{
+ {
+ StartLine: 2,
+ EndLine: 2,
+ Text: " the comment",
+ },
+ },
+ },
+ {
+ description: "Perl regex",
+ lang: language.Perl,
+ source: `if (/hello"world/) {
+ # the comment
+ print "Yo!"
+}
+`,
+ want: []*Comment{
+ {
+ StartLine: 2,
+ EndLine: 2,
+ Text: " the comment",
+ },
+ },
+ },
+ {
+ description: "SQL using MySQL-style comments",
+ lang: language.SQL,
+ source: `/*
+ * The first multiline line.
+ * The second multiline line.
+ */
+ # The first single line comment.
+ # The second single line comment.
+`,
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 4,
+ Text: `
+ * The first multiline line.
+ * The second multiline line.
+ `,
+ },
+ {
+ StartLine: 5,
+ EndLine: 5,
+ Text: " The first single line comment.",
+ },
+ {
+ StartLine: 6,
+ EndLine: 6,
+ Text: " The second single line comment.",
+ },
+ },
+ },
+ {
+ description: "SQL using MySQL-style comments",
+ lang: language.SQL,
+ source: `-- The first single line comment.
+/*
+ * The first multiline line.
+ * The second multiline line.
+ */
+ -- The second single line comment.
+`,
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: " The first single line comment.",
+ },
+ {
+ StartLine: 2,
+ EndLine: 5,
+ Text: `
+ * The first multiline line.
+ * The second multiline line.
+ `,
+ },
+ {
+ StartLine: 6,
+ EndLine: 6,
+ Text: " The second single line comment.",
+ },
+ },
+ },
+ {
+ description: "Matlab language - Single Line Comments",
+ lang: language.ObjectiveC, // Matlab has same extension as Objective-C.
+ source: `% Copyright 2017 Yoyodyne Inc.
+
+clear;
+close all;
+`,
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 1,
+ Text: " Copyright 2017 Yoyodyne Inc.",
+ },
+ },
+ },
+ {
+ description: "Matlab language - Multi-Line Comments",
+ lang: language.ObjectiveC, // Matlab has same extension as Objective-C.
+ source: `%{ Multiline comment start.
+ Second line of multiline comment.
+%}
+
+clear;
+close all;
+`,
+ want: []*Comment{
+ {
+ StartLine: 1,
+ EndLine: 3,
+ Text: ` Multiline comment start.
+ Second line of multiline comment.
+`,
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ got := Parse([]byte(tt.source), tt.lang)
+ if !cmp.Equal(got, tt.want) {
+ t.Errorf("Mismatch(%q) = %+v, want %+v, diff=%v", tt.description, got, tt.want, cmp.Diff(got, tt.want))
+ }
+ }
+}
+
+func TestCommentParser_ChunkIterator(t *testing.T) {
+ tests := []struct {
+ description string
+ comments Comments
+ want []Comments
+ }{
+ {
+ description: "Empty Comments",
+ comments: Comments{},
+ want: nil,
+ },
+ {
+ description: "Single Line Comment Chunk",
+ comments: Comments{
+ {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
+ {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
+ },
+ want: []Comments{{
+ {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
+ {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
+ }},
+ },
+ {
+ description: "Multiline Comment Chunk",
+ comments: Comments{{
+ StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3",
+ }},
+ want: []Comments{{{
+ StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3",
+ }}},
+ },
+ {
+ description: "Multiple Single Line Comment Chunks",
+ comments: Comments{
+ {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
+ {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
+ {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
+ {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
+ },
+ want: []Comments{
+ {
+ {StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
+ {StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
+ },
+ {
+ {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
+ {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
+ },
+ },
+ },
+ {
+ description: "Multiline Comment Chunk",
+ comments: Comments{
+ {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
+ {StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"},
+ },
+ want: []Comments{
+ {{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}},
+ {{StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"}},
+ },
+ },
+ {
+ description: "Multiline and Single Line Comment Chunks",
+ comments: Comments{
+ {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
+ {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
+ {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
+ },
+ want: []Comments{
+ {
+ {StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
+ },
+ {
+ {StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
+ {StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
+ },
+ },
+ },
+ {
+ description: "Mixed Multiline / Single Line Comments",
+ comments: []*Comment{
+ {StartLine: 1, EndLine: 1, Text: " The first single line comment."},
+ {StartLine: 2, EndLine: 2, Text: " The second single line comment."},
+ {StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"},
+ },
+ want: []Comments{
+ {
+ {StartLine: 1, EndLine: 1, Text: " The first single line comment."},
+ {StartLine: 2, EndLine: 2, Text: " The second single line comment."},
+ },
+ {
+ {StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"},
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ i := 0
+ for got := range tt.comments.ChunkIterator() {
+ if i >= len(tt.want) {
+ t.Errorf("Mismatch(%q) more comment chunks than expected = %v, want %v",
+ tt.description, i+1, len(tt.want))
+ break
+ }
+ if !reflect.DeepEqual(got, tt.want[i]) {
+ t.Errorf("Mismatch(%q) = %+v, want %+v", tt.description, got, tt.want[i])
+ }
+ i++
+ }
+ if i != len(tt.want) {
+ t.Errorf("Mismatch(%q) not enough comment chunks = %v, want %v",
+ tt.description, i, len(tt.want))
+ }
+ }
+}