// Copyright 2020 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package classifier import ( "bytes" "errors" "io/ioutil" "log" "os" "path" "path/filepath" "sort" "strings" "testing" "testing/iotest" "github.com/davecgh/go-spew/spew" "github.com/google/go-cmp/cmp" ) type scenario struct { expected []string data []byte } var defaultThreshold = .8 var baseLicenses = "assets" func classifier() (*Classifier, error) { c := NewClassifier(defaultThreshold) return c, c.LoadLicenses(path.Join(baseLicenses)) } func getScenarioFilenames() ([]string, error) { scenarios := "scenarios" var files []string err := filepath.Walk(path.Join(scenarios), func(path string, info os.FileInfo, err error) error { if err != nil { return err } if strings.HasSuffix(path, "md") || info.IsDir() { return nil } files = append(files, path) return nil }) return files, err } func TestMatchScenarios(t *testing.T) { c, err := classifier() if err != nil { t.Fatalf("couldn't instantiate standard test classifier: %v", err) } files, err := getScenarioFilenames() if err != nil { t.Fatalf("encountered error walking scenarios directory: %v", err) } for _, f := range files { s := readScenario(f) m := c.Match(s.data) checkMatches(t, m.Matches, f, s.expected) } } func readScenario(path string) *scenario { var s scenario b, err := ioutil.ReadFile(path) if err != nil { log.Fatalf("Couldn't read scenario %s: %v", path, err) } // A scenario consists of any number of comment lines, which are ignored, then a line of the form // EXPECTED: A,B,C // // or EXPECTED: // where A,B,C is a comma-separated list of expected licenses. lines := strings.SplitN(string(b), "EXPECTED:", 2) // The first part of lines is description, which we ignore. We then split on a linefeed to get the // list of licenses and the rest of the data content. lines = strings.SplitN(lines[1], "\n", 2) if lines[0] != "" { s.expected = strings.Split(lines[0], ",") } else { s.expected = []string{} } s.data = []byte(lines[1]) return &s } func TestContainsAndOverlaps(t *testing.T) { tests := []struct { name string a, b *Match contains bool overlaps bool }{ { name: "no intersection", a: &Match{ StartLine: 1, EndLine: 3, }, b: &Match{ StartLine: 4, EndLine: 5, }, contains: false, overlaps: false, }, { name: "overlap at end", a: &Match{ StartLine: 4, EndLine: 10, }, b: &Match{ StartLine: 1, EndLine: 5, }, contains: false, overlaps: true, }, { name: "overlap at end", a: &Match{ StartLine: 1, EndLine: 10, }, b: &Match{ StartLine: 4, EndLine: 12, }, contains: false, overlaps: true, }, { name: "contains", a: &Match{ StartLine: 1, EndLine: 10, }, b: &Match{ StartLine: 4, EndLine: 7, }, contains: true, overlaps: false, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { if got := contains(test.a, test.b); got != test.contains { t.Errorf("contains: got %v want %v", got, test.contains) } if got := overlaps(test.a, test.b); got != test.overlaps { t.Errorf("overlaps: got %v want %v", got, test.overlaps) } }) } } func TestLicName(t *testing.T) { tests := []struct { name string expected string }{ { // The filename for a license name: "GPL-2.0.txt", expected: "GPL-2.0", }, { // The filename for a header reference to a license name: "GPL-2.0.header.txt", expected: "GPL-2.0", }, { // The filename for a variant header reference to a license name: "GPL-2.0.header_a.txt", expected: "GPL-2.0", }, { // The filename for a variant license body name: "Apache-2.0_no_toc.txt", expected: "Apache-2.0", }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { }) } } func TestMatchFrom(t *testing.T) { tr := iotest.TimeoutReader(strings.NewReader("some data")) c, err := classifier() if err != nil { t.Fatalf("couldn't instantiate standard Google classifier: %v", err) } _, err = c.MatchFrom(tr) if !errors.Is(err, iotest.ErrTimeout) { t.Errorf("got %v want %v", err, iotest.ErrTimeout) } files, err := getScenarioFilenames() if err != nil { t.Fatalf("encountered error walking scenarios directory: %v", err) } for _, f := range files { s := readScenario(f) r := bytes.NewReader(s.data) m, err := c.MatchFrom(r) if err != nil { t.Errorf("unexpected error: %v", err) } checkMatches(t, m.Matches, f, s.expected) } } // checkMatches diffs the resulting matches against the expected content and // sets test results. func checkMatches(t *testing.T, m Matches, f string, e []string) { found := make(map[string]bool) // Uniquify the licenses found for _, l := range m { found[l.Name] = true } var names []string for l := range found { names = append(names, l) } sort.Strings(names) if len(names) != len(e) { t.Errorf("Match(%q) number matches: %v, want %v: %v", f, len(names), len(e), spew.Sdump(m)) return } for i := 0; i < len(names); i++ { w := strings.TrimSpace(e[i]) if got, want := names[i], w; got != want { t.Errorf("Match(%q) = %q, want %q", f, got, want) } } } func TestLicenseName(t *testing.T) { tests := []struct { input string want string }{ { input: "License/example/file.txt", want: "example", }, { input: "License/example/a.txt", want: "example", }, { input: "Header/example/header.txt", want: "example", }, { input: "Header/example/a.txt", want: "example", }, } for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { got := LicenseName(tt.input) if diff := cmp.Diff(tt.want, got); diff != "" { t.Errorf("Unexpected result; diff %v", diff) } }) } } func TestNormalize(t *testing.T) { tests := []struct { input string want string }{ { input: "Words With Extra Spaces are flattened out, preserving case", want: "Words With Extra Spaces are flattened out preserving case", }, { input: "", want: "", }, { input: " License ", want: "License", }, { // This tests that the line breaks in the input text are properly // preserved, which is important for visual diffing. input: `Preserving line breaks is important`, want: `Preserving line breaks is important`, }, { // This tests that soft EOL functionality doesn't affect normalized output input: `This is a sentence looking construct. This is another sentence. What happens?`, want: `This is a sentence looking construct This is another sentence What happens`, }, { input: `header ........................ This is oddly formatted`, want: `header This is oddly formatted`, }, { input: `baseball basket- ball football`, want: "baseball basketball\nfootball", }, } for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { c, err := classifier() if err != nil { t.Fatalf("couldn't instantiate standard Google classifier: %v", err) } got := c.Normalize([]byte(tt.input)) if diff := cmp.Diff(tt.want, string(got)); diff != "" { t.Errorf("Unexpected result; diff %v", diff) } }) } }