aboutsummaryrefslogtreecommitdiff
path: root/idsearcher
diff options
context:
space:
mode:
authorRishabhBhatnagar <bhatnagarrishabh4@gmail.com>2020-01-09 20:39:55 +0530
committerRishabhBhatnagar <bhatnagarrishabh4@gmail.com>2020-01-09 21:04:37 +0530
commitcd59ee66408a908f7ef94548814514f6bc9fc906 (patch)
tree550b146d4de0cc00a4784147f7d8f2a7bc93cffe /idsearcher
parentf4fef41a45620391fca6481f4700b89de170ab88 (diff)
downloadspdx-tools-cd59ee66408a908f7ef94548814514f6bc9fc906.tar.gz
Create Go Module
- Unpack directory v0 to move all the content to the root directory. - ./v0/* converted to ./* - all the test cases were fixed to remove one directory less indexing for test files - add go.mod - go version 1.13 is used to have a relatively stable versioning system Signed-off-by: RishabhBhatnagar <bhatnagarrishabh4@gmail.com>
Diffstat (limited to 'idsearcher')
-rw-r--r--idsearcher/idsearcher.go228
-rw-r--r--idsearcher/idsearcher_test.go603
2 files changed, 831 insertions, 0 deletions
diff --git a/idsearcher/idsearcher.go b/idsearcher/idsearcher.go
new file mode 100644
index 0000000..253bdaa
--- /dev/null
+++ b/idsearcher/idsearcher.go
@@ -0,0 +1,228 @@
+// Package idsearcher is used to search for short-form IDs in files
+// within a directory, and to build an SPDX Document containing those
+// license findings.
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+package idsearcher
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strings"
+
+ "github.com/spdx/tools-golang/builder"
+ "github.com/spdx/tools-golang/spdx"
+ "github.com/spdx/tools-golang/utils"
+)
+
+// Config is a collection of configuration settings for docbuilder
+// (for version 2.1 SPDX Documents). A few mandatory fields are set here
+// so that they can be repeatedly reused in multiple calls to Build2_1.
+type Config struct {
+ // NamespacePrefix should be a URI representing a prefix for the
+ // namespace with which the SPDX Document will be associated.
+ // It will be used in the DocumentNamespace field in the CreationInfo
+ // section, followed by the per-Document package name and a random UUID.
+ NamespacePrefix string
+
+ // BuilderPathsIgnored lists certain paths to be omitted from the built
+ // document. Each string should be a path, relative to the package's
+ // dirRoot, to a specific file or (for all files in a directory) ending
+ // in a slash. Prefix the string with "**" to omit all instances of that
+ // file / directory, regardless of where it is in the file tree.
+ BuilderPathsIgnored []string
+
+ // SearcherPathsIgnored lists certain paths that should not be searched
+ // by idsearcher, even if those paths have Files present. It uses the
+ // same format as BuilderPathsIgnored.
+ SearcherPathsIgnored []string
+}
+
+// BuildIDsDocument creates an SPDX Document (version 2.1) and searches for
+// short-form IDs in each file, filling in license fields as appropriate. It
+// returns that document or error if any is encountered. Arguments:
+// - packageName: name of package / directory
+// - dirRoot: path to directory to be analyzed
+// - namespacePrefix: URI representing a prefix for the
+// namespace with which the SPDX Document will be associated
+func BuildIDsDocument(packageName string, dirRoot string, idconfig *Config) (*spdx.Document2_1, error) {
+ // first, build the Document using builder
+ bconfig := &builder.Config2_1{
+ NamespacePrefix: idconfig.NamespacePrefix,
+ CreatorType: "Tool",
+ Creator: "github.com/spdx/tools-golang/idsearcher",
+ PathsIgnored: idconfig.BuilderPathsIgnored,
+ }
+ doc, err := builder.Build2_1(packageName, dirRoot, bconfig)
+ if err != nil {
+ return nil, err
+ }
+ if doc == nil {
+ return nil, fmt.Errorf("builder returned nil Document")
+ }
+ if doc.Packages == nil {
+ return nil, fmt.Errorf("builder returned nil Package")
+ }
+ if len(doc.Packages) != 1 {
+ return nil, fmt.Errorf("builder returned %d Packages", len(doc.Packages))
+ }
+
+ // now, walk through each file and find its licenses (if any)
+ pkg := doc.Packages[0]
+ if pkg.Files == nil {
+ return nil, fmt.Errorf("builder returned nil Files in Package")
+ }
+ licsForPackage := map[string]int{}
+ for _, f := range pkg.Files {
+ // start by initializing / clearing values
+ f.LicenseInfoInFile = []string{"NOASSERTION"}
+ f.LicenseConcluded = "NOASSERTION"
+
+ // check whether the searcher should ignore this file
+ if utils.ShouldIgnore(f.FileName, idconfig.SearcherPathsIgnored) {
+ continue
+ }
+
+ fPath := filepath.Join(dirRoot, f.FileName)
+ // FIXME this is not preferable -- ignoring error
+ ids, _ := searchFileIDs(fPath)
+ // FIXME for now, proceed onwards with whatever IDs we obtained.
+ // FIXME instead of ignoring the error, should probably either log it,
+ // FIXME and/or enable the caller to configure what should happen.
+
+ // separate out for this file's licenses
+ licsForFile := map[string]int{}
+ licsParens := []string{}
+ for _, lid := range ids {
+ // get individual elements and add for file and package
+ licElements := getIndividualLicenses(lid)
+ for _, elt := range licElements {
+ licsForFile[elt] = 1
+ licsForPackage[elt] = 1
+ }
+ // parenthesize if needed and add to slice for joining
+ licsParens = append(licsParens, makeElement(lid))
+ }
+
+ // OK -- now we can fill in the file's details, or NOASSERTION if none
+ if len(licsForFile) > 0 {
+ f.LicenseInfoInFile = []string{}
+ for lic := range licsForFile {
+ f.LicenseInfoInFile = append(f.LicenseInfoInFile, lic)
+ }
+ sort.Strings(f.LicenseInfoInFile)
+ // avoid adding parens and joining for single-ID items
+ if len(licsParens) == 1 {
+ f.LicenseConcluded = ids[0]
+ } else {
+ f.LicenseConcluded = strings.Join(licsParens, " AND ")
+ }
+ }
+ }
+
+ // and finally, we can fill in the package's details
+ if len(licsForPackage) == 0 {
+ pkg.PackageLicenseInfoFromFiles = []string{"NOASSERTION"}
+ } else {
+ pkg.PackageLicenseInfoFromFiles = []string{}
+ for lic := range licsForPackage {
+ pkg.PackageLicenseInfoFromFiles = append(pkg.PackageLicenseInfoFromFiles, lic)
+ }
+ sort.Strings(pkg.PackageLicenseInfoFromFiles)
+ }
+
+ return doc, nil
+}
+
+// ===== Utility functions =====
+func searchFileIDs(filePath string) ([]string, error) {
+ idsMap := map[string]int{}
+ ids := []string{}
+
+ f, err := os.Open(filePath)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ scanner := bufio.NewScanner(f)
+
+ for scanner.Scan() {
+ if strings.Contains(scanner.Text(), "SPDX-License-Identifier:") {
+ strs := strings.SplitN(scanner.Text(), "SPDX-License-Identifier:", 2)
+
+ // if prefixed by more than n characters, it's probably not a
+ // short-form ID; it's probably code to detect short-form IDs.
+ // Like this function itself, for example =)
+ prefix := stripTrash(strs[0])
+ if len(prefix) > 5 {
+ continue
+ }
+
+ // stop before trailing */ if it is present
+ lidToExtract := strs[1]
+ lidToExtract = strings.Split(lidToExtract, "*/")[0]
+ lid := strings.TrimSpace(lidToExtract)
+ lid = stripTrash(lid)
+ idsMap[lid] = 1
+ }
+ }
+
+ // FIXME for now, ignore scanner errors because we want to return whatever
+ // FIXME IDs were in fact found. should probably be changed to either
+ // FIXME log the error, and/or be configurable for what should happen.
+ // if err = scanner.Err(); err != nil {
+ // return nil, err
+ // }
+
+ // now, convert map to string
+ for lid := range idsMap {
+ ids = append(ids, lid)
+ }
+
+ // and sort it
+ sort.Strings(ids)
+
+ return ids, nil
+}
+
+func stripTrash(lid string) string {
+ re := regexp.MustCompile(`[^\w\s\d.\-\+()]+`)
+ return re.ReplaceAllString(lid, "")
+}
+
+func makeElement(lic string) string {
+ if strings.Contains(lic, " AND ") || strings.Contains(lic, " OR ") {
+ return fmt.Sprintf("(%s)", lic)
+ }
+
+ return lic
+}
+
+func getIndividualLicenses(lic string) []string {
+ // replace parens and '+' with spaces
+ lic = strings.Replace(lic, "(", " ", -1)
+ lic = strings.Replace(lic, ")", " ", -1)
+ lic = strings.Replace(lic, "+", " ", -1)
+
+ // now, split by spaces, trim, and add to slice
+ licElements := strings.Split(lic, " ")
+ lics := []string{}
+ for _, elt := range licElements {
+ elt := strings.TrimSpace(elt)
+ // don't add if empty or if case-insensitive operator
+ if elt == "" || strings.EqualFold(elt, "AND") ||
+ strings.EqualFold(elt, "OR") || strings.EqualFold(elt, "WITH") {
+ continue
+ }
+
+ lics = append(lics, elt)
+ }
+
+ // sort before returning
+ sort.Strings(lics)
+ return lics
+}
diff --git a/idsearcher/idsearcher_test.go b/idsearcher/idsearcher_test.go
new file mode 100644
index 0000000..7c70209
--- /dev/null
+++ b/idsearcher/idsearcher_test.go
@@ -0,0 +1,603 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+
+package idsearcher
+
+import (
+ "testing"
+)
+
+// ===== Searcher top-level function tests =====
+func TestSearcherCanFillInIDs(t *testing.T) {
+ packageName := "project2"
+ dirRoot := "../testdata/project2/"
+ config := &Config{
+ NamespacePrefix: "https://github.com/swinslow/spdx-docs/spdx-go/testdata-",
+ }
+
+ doc, err := BuildIDsDocument(packageName, dirRoot, config)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+ if doc == nil {
+ t.Fatalf("expected non-nil Document, got nil")
+ }
+
+ // not checking all contents of doc, see builder tests for those
+
+ // get the package and its files, checking size of each
+ if doc.Packages == nil {
+ t.Fatalf("expected non-nil Packages, got nil")
+ }
+ if len(doc.Packages) != 1 {
+ t.Fatalf("expected Packages len to be 1, got %d", len(doc.Packages))
+ }
+ pkg := doc.Packages[0]
+
+ if pkg.Files == nil {
+ t.Fatalf("expected non-nil Files, got nil")
+ }
+ if len(pkg.Files) != 6 {
+ t.Fatalf("expected Files len to be 6, got %d", len(pkg.Files))
+ }
+
+ fileInFolder := pkg.Files[0]
+ if fileInFolder.LicenseInfoInFile == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFile, got nil")
+ }
+ if len(fileInFolder.LicenseInfoInFile) != 1 {
+ t.Fatalf("expected LicenseInfoInFile len to be 1, got %d", len(fileInFolder.LicenseInfoInFile))
+ }
+ if fileInFolder.LicenseInfoInFile[0] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileInFolder.LicenseInfoInFile[0])
+ }
+ if fileInFolder.LicenseConcluded != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileInFolder.LicenseConcluded)
+ }
+
+ fileTrailingComment := pkg.Files[1]
+ if fileTrailingComment.LicenseInfoInFile == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFile, got nil")
+ }
+ if len(fileTrailingComment.LicenseInfoInFile) != 1 {
+ t.Fatalf("expected LicenseInfoInFile len to be 1, got %d", len(fileTrailingComment.LicenseInfoInFile))
+ }
+ if fileTrailingComment.LicenseInfoInFile[0] != "GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-or-later", fileTrailingComment.LicenseInfoInFile[0])
+ }
+ if fileTrailingComment.LicenseConcluded != "GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-or-later", fileTrailingComment.LicenseConcluded)
+ }
+
+ fileHasDuplicateID := pkg.Files[2]
+ if fileHasDuplicateID.LicenseInfoInFile == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFile, got nil")
+ }
+ if len(fileHasDuplicateID.LicenseInfoInFile) != 1 {
+ t.Fatalf("expected LicenseInfoInFile len to be 1, got %d", len(fileHasDuplicateID.LicenseInfoInFile))
+ }
+ if fileHasDuplicateID.LicenseInfoInFile[0] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileHasDuplicateID.LicenseInfoInFile[0])
+ }
+ if fileHasDuplicateID.LicenseConcluded != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileHasDuplicateID.LicenseConcluded)
+ }
+
+ fileHasID := pkg.Files[3]
+ if fileHasID.LicenseInfoInFile == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFile, got nil")
+ }
+ if len(fileHasID.LicenseInfoInFile) != 2 {
+ t.Fatalf("expected LicenseInfoInFile len to be 2, got %d", len(fileHasID.LicenseInfoInFile))
+ }
+ if fileHasID.LicenseInfoInFile[0] != "Apache-2.0" {
+ t.Errorf("expected %v, got %v", "Apache-2.0", fileHasID.LicenseInfoInFile[0])
+ }
+ if fileHasID.LicenseInfoInFile[1] != "GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-or-later", fileHasID.LicenseInfoInFile[1])
+ }
+ if fileHasID.LicenseConcluded != "Apache-2.0 OR GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "Apache-2.0 OR GPL-2.0-or-later", fileHasID.LicenseConcluded)
+ }
+
+ fileMultipleIDs := pkg.Files[4]
+ if fileMultipleIDs.LicenseInfoInFile == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFile, got nil")
+ }
+ if len(fileMultipleIDs.LicenseInfoInFile) != 5 {
+ t.Fatalf("expected LicenseInfoInFile len to be 5, got %d", len(fileMultipleIDs.LicenseInfoInFile))
+ }
+ if fileMultipleIDs.LicenseInfoInFile[0] != "BSD-2-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-2-Clause", fileMultipleIDs.LicenseInfoInFile[0])
+ }
+ if fileMultipleIDs.LicenseInfoInFile[1] != "BSD-3-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-3-Clause", fileMultipleIDs.LicenseInfoInFile[1])
+ }
+ // here, DO NOT keep the +
+ if fileMultipleIDs.LicenseInfoInFile[2] != "EPL-1.0" {
+ t.Errorf("expected %v, got %v", "EPL-1.0", fileMultipleIDs.LicenseInfoInFile[2])
+ }
+ if fileMultipleIDs.LicenseInfoInFile[3] != "ISC" {
+ t.Errorf("expected %v, got %v", "ISC", fileMultipleIDs.LicenseInfoInFile[3])
+ }
+ if fileMultipleIDs.LicenseInfoInFile[4] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileMultipleIDs.LicenseInfoInFile[4])
+ }
+ if fileMultipleIDs.LicenseConcluded != "((MIT AND BSD-3-Clause) OR ISC) AND BSD-2-Clause AND EPL-1.0+" {
+ t.Errorf("expected %v, got %v", "((MIT AND BSD-3-Clause) OR ISC) AND BSD-2-Clause AND EPL-1.0+", fileMultipleIDs.LicenseConcluded)
+ }
+
+ fileNoID := pkg.Files[5]
+ if fileNoID.LicenseInfoInFile == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFile, got nil")
+ }
+ if len(fileNoID.LicenseInfoInFile) != 1 {
+ t.Fatalf("expected LicenseInfoInFile len to be 1, got %d", len(fileNoID.LicenseInfoInFile))
+ }
+ if fileNoID.LicenseInfoInFile[0] != "NOASSERTION" {
+ t.Errorf("expected %v, got %v", "NOASSERTION", fileNoID.LicenseInfoInFile[0])
+ }
+ if fileNoID.LicenseConcluded != "NOASSERTION" {
+ t.Errorf("expected %v, got %v", "NOASSERTION", fileNoID.LicenseConcluded)
+ }
+
+ // and finally, the package should have all of these licenses
+ if pkg.PackageLicenseInfoFromFiles == nil {
+ t.Fatalf("expected non-nil PackageLicenseInfoFromFiles, got nil")
+ }
+ if len(pkg.PackageLicenseInfoFromFiles) != 7 {
+ t.Fatalf("expected PackageLicenseInfoFromFiles len to be 7, got %d", len(pkg.PackageLicenseInfoFromFiles))
+ }
+ if pkg.PackageLicenseInfoFromFiles[0] != "Apache-2.0" {
+ t.Errorf("expected %v, got %v", "Apache-2.0", pkg.PackageLicenseInfoFromFiles[0])
+ }
+ if pkg.PackageLicenseInfoFromFiles[1] != "BSD-2-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-2-Clause", pkg.PackageLicenseInfoFromFiles[1])
+ }
+ if pkg.PackageLicenseInfoFromFiles[2] != "BSD-3-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-3-Clause", pkg.PackageLicenseInfoFromFiles[2])
+ }
+ // here, DO NOT keep the +
+ if pkg.PackageLicenseInfoFromFiles[3] != "EPL-1.0" {
+ t.Errorf("expected %v, got %v", "EPL-1.0", pkg.PackageLicenseInfoFromFiles[3])
+ }
+ if pkg.PackageLicenseInfoFromFiles[4] != "GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-or-later", pkg.PackageLicenseInfoFromFiles[4])
+ }
+ if pkg.PackageLicenseInfoFromFiles[5] != "ISC" {
+ t.Errorf("expected %v, got %v", "ISC", pkg.PackageLicenseInfoFromFiles[5])
+ }
+ if pkg.PackageLicenseInfoFromFiles[6] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", pkg.PackageLicenseInfoFromFiles[6])
+ }
+
+}
+
+func TestSearcherCanFillInIDsAndIgnorePaths(t *testing.T) {
+ packageName := "project3"
+ dirRoot := "../testdata/project3/"
+ config := &Config{
+ NamespacePrefix: "https://github.com/swinslow/spdx-docs/spdx-go/testdata-",
+ BuilderPathsIgnored: []string{
+ "**/ignoredir/",
+ "/excludedir/",
+ "**/ignorefile.txt",
+ "/alsoEXCLUDEthis.txt",
+ },
+ SearcherPathsIgnored: []string{
+ "**/dontscan.txt",
+ },
+ }
+
+ doc, err := BuildIDsDocument(packageName, dirRoot, config)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+ if doc == nil {
+ t.Fatalf("expected non-nil Document, got nil")
+ }
+
+ // not checking all contents of doc, see builder tests for those
+
+ // get the package and its files, checking licenses for each, and
+ // confirming NOASSERTION for those that are skipped
+ pkg := doc.Packages[0]
+ if len(pkg.Files) != 5 {
+ t.Fatalf("expected len %d, got %d", 5, len(pkg.Files))
+ }
+
+ f := pkg.Files[0]
+ if f.FileName != "/dontscan.txt" {
+ t.Errorf("expected %v, got %v", "/dontscan.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFile) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFile))
+ }
+ if f.LicenseInfoInFile[0] != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseInfoInFile[0])
+ }
+ if f.LicenseConcluded != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseConcluded)
+ }
+
+ f = pkg.Files[1]
+ if f.FileName != "/keep/keep.txt" {
+ t.Errorf("expected %v, got %v", "/keep/keep.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFile) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFile))
+ }
+ if f.LicenseInfoInFile[0] != "MIT" {
+ t.Errorf("expected %s, got %s", "MIT", f.LicenseInfoInFile[0])
+ }
+ if f.LicenseConcluded != "MIT" {
+ t.Errorf("expected %s, got %s", "MIT", f.LicenseConcluded)
+ }
+
+ f = pkg.Files[2]
+ if f.FileName != "/keep.txt" {
+ t.Errorf("expected %v, got %v", "/keep.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFile) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFile))
+ }
+ if f.LicenseInfoInFile[0] != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseInfoInFile[0])
+ }
+ if f.LicenseConcluded != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseConcluded)
+ }
+
+ f = pkg.Files[3]
+ if f.FileName != "/subdir/keep/dontscan.txt" {
+ t.Errorf("expected %v, got %v", "/subdir/keep/dontscan.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFile) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFile))
+ }
+ if f.LicenseInfoInFile[0] != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseInfoInFile[0])
+ }
+ if f.LicenseConcluded != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseConcluded)
+ }
+
+ f = pkg.Files[4]
+ if f.FileName != "/subdir/keep/keep.txt" {
+ t.Errorf("expected %v, got %v", "/subdir/keep/keep.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFile) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFile))
+ }
+ if f.LicenseInfoInFile[0] != "MIT" {
+ t.Errorf("expected %s, got %s", "MIT", f.LicenseInfoInFile[0])
+ }
+ if f.LicenseConcluded != "MIT" {
+ t.Errorf("expected %s, got %s", "MIT", f.LicenseConcluded)
+ }
+}
+
+func TestSearcherFailsWithInvalidPath(t *testing.T) {
+ packageName := "project2"
+ dirRoot := "./oops/invalid"
+ config := &Config{
+ NamespacePrefix: "whatever",
+ }
+
+ _, err := BuildIDsDocument(packageName, dirRoot, config)
+ if err == nil {
+ t.Fatalf("expected non-nil error, got nil")
+ }
+}
+
+// ===== Searcher utility tests =====
+func TestCanFindShortFormIDWhenPresent(t *testing.T) {
+ filePath := "../testdata/project2/has-id.txt"
+
+ ids, err := searchFileIDs(filePath)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+
+ if len(ids) != 1 {
+ t.Fatalf("expected len 1, got %d", len(ids))
+ }
+
+ if ids[0] != "Apache-2.0 OR GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "Apache-2.0 OR GPL-2.0-or-later", ids[0])
+ }
+}
+
+func TestCanFindMultipleShortFormIDsWhenPresent(t *testing.T) {
+ filePath := "../testdata/project2/has-multiple-ids.txt"
+
+ ids, err := searchFileIDs(filePath)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+
+ if len(ids) != 3 {
+ t.Fatalf("expected len 3, got %d", len(ids))
+ }
+
+ if ids[0] != "(MIT AND BSD-3-Clause) OR ISC" {
+ t.Errorf("expected %v, got %v", "(MIT AND BSD-3-Clause) OR ISC", ids[0])
+ }
+ if ids[1] != "BSD-2-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-2-Clause", ids[1])
+ }
+ if ids[2] != "EPL-1.0+" {
+ t.Errorf("expected %v, got %v", "EPL-1.0+", ids[2])
+ }
+}
+
+func TestCanCollapseDuplicateShortFormIDsWhenPresent(t *testing.T) {
+ filePath := "../testdata/project2/has-duplicate-ids.txt"
+
+ ids, err := searchFileIDs(filePath)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+
+ if len(ids) != 1 {
+ t.Fatalf("expected len 1, got %d", len(ids))
+ }
+
+ if ids[0] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", ids[0])
+ }
+}
+
+func TestCanStripTrailingStarSlash(t *testing.T) {
+ filePath := "../testdata/project2/folder/has-trailing-comment-marker.c"
+
+ ids, err := searchFileIDs(filePath)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+
+ if len(ids) != 1 {
+ t.Fatalf("expected len 1, got %d", len(ids))
+ }
+
+ if ids[0] != "GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-or-later", ids[0])
+ }
+}
+
+func TestCanIgnoreShortFormIDWhenTooManyPrefixChars(t *testing.T) {
+ filePath := "../testdata/project4/has-id-to-ignore.txt"
+
+ ids, err := searchFileIDs(filePath)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+
+ if len(ids) != 0 {
+ t.Fatalf("expected len 0, got %d", len(ids))
+ }
+}
+
+func TestCanPickJustTheRightID(t *testing.T) {
+ filePath := "../testdata/project4/has-mix-of-ids.txt"
+
+ ids, err := searchFileIDs(filePath)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+
+ if len(ids) != 1 {
+ t.Fatalf("expected len 1, got %d", len(ids))
+ }
+
+ if ids[0] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", ids[0])
+ }
+}
+
+func TestCannotFindShortFormIDWhenAbsent(t *testing.T) {
+ filePath := "../testdata/project2/no-id.txt"
+
+ ids, err := searchFileIDs(filePath)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+
+ if len(ids) != 0 {
+ t.Fatalf("expected len 0, got %d", len(ids))
+ }
+}
+
+func TestCanExcludeTrashCharactersFromID(t *testing.T) {
+ lid := "Apac\",he-2.0"
+ want := "Apache-2.0"
+ got := stripTrash(lid)
+ if want != got {
+ t.Errorf("expected %v, got %v", want, got)
+ }
+
+ lid = "Apache-2.0"
+ want = "Apache-2.0"
+ got = stripTrash(lid)
+ if want != got {
+ t.Errorf("expected %v, got %v", want, got)
+ }
+}
+
+func TestSearchFileIDsFailsWithInvalidFilePath(t *testing.T) {
+ filePath := "./oops/nm/invalid"
+
+ _, err := searchFileIDs(filePath)
+ if err == nil {
+ t.Fatalf("expected non-nil error, got nil")
+ }
+}
+
+func TestWillParenthesizeIfNeeded(t *testing.T) {
+ licID := "MIT OR BSD-3-Clause"
+ retval := makeElement(licID)
+ if retval != "(MIT OR BSD-3-Clause)" {
+ t.Errorf("expected %v, got %v", "(MIT OR BSD-3-Clause)", retval)
+ }
+
+ licID = "ISC AND HPND"
+ retval = makeElement(licID)
+ if retval != "(ISC AND HPND)" {
+ t.Errorf("expected %v, got %v", "(ISC AND HPND)", retval)
+ }
+}
+
+func TestWillNotParenthesizeIfNotNeeded(t *testing.T) {
+ lic := "MIT"
+ retval := makeElement(lic)
+ if retval != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", retval)
+ }
+
+ lic = "GPL-2.0-only WITH Classpath-exception-2.0"
+ retval = makeElement(lic)
+ if retval != "GPL-2.0-only WITH Classpath-exception-2.0" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-only WITH Classpath-exception-2.0", retval)
+ }
+}
+
+func TestCanGetIndividualLicenses(t *testing.T) {
+ // single license
+ lic := "MIT"
+ lics := getIndividualLicenses(lic)
+ if lics == nil {
+ t.Fatalf("expected non-nil lics, got nil")
+ }
+ if len(lics) != 1 {
+ t.Fatalf("expected lics to have len 1, got %d", len(lics))
+ }
+ if lics[0] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", lics[0])
+ }
+
+ // two-license combo
+ lic = "ISC AND BSD-3-Clause"
+ lics = getIndividualLicenses(lic)
+ if lics == nil {
+ t.Fatalf("expected non-nil lics, got nil")
+ }
+ if len(lics) != 2 {
+ t.Fatalf("expected lics to have len 2, got %d", len(lics))
+ }
+ // should be sorted alphabetically
+ if lics[0] != "BSD-3-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-3-Clause", lics[0])
+ }
+ if lics[1] != "ISC" {
+ t.Errorf("expected %v, got %v", "ISC", lics[1])
+ }
+
+ // license WITH exception
+ lic = "GPL-2.0-only WITH Classpath-exception-2.0"
+ lics = getIndividualLicenses(lic)
+ if lics == nil {
+ t.Fatalf("expected non-nil lics, got nil")
+ }
+ if len(lics) != 2 {
+ t.Fatalf("expected lics to have len 2, got %d", len(lics))
+ }
+ // exception should be listed separately
+ if lics[0] != "Classpath-exception-2.0" {
+ t.Errorf("expected %v, got %v", "Classpath-exception-2.0", lics[0])
+ }
+ if lics[1] != "GPL-2.0-only" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-only", lics[1])
+ }
+
+ // two-license combo with parens
+ lic = "(JSON OR BSD-2-Clause)"
+ lics = getIndividualLicenses(lic)
+ if lics == nil {
+ t.Fatalf("expected non-nil lics, got nil")
+ }
+ if len(lics) != 2 {
+ t.Fatalf("expected lics to have len 2, got %d", len(lics))
+ }
+ // parens should get dropped
+ if lics[0] != "BSD-2-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-2-Clause", lics[0])
+ }
+ if lics[1] != "JSON" {
+ t.Errorf("expected %v, got %v", "JSON", lics[1])
+ }
+
+ // multi-license combo with nested parens
+ lic = "GPL-2.0-only AND ((EPL-1.0 AND BSD-4-Clause) OR MIT)"
+ lics = getIndividualLicenses(lic)
+ if lics == nil {
+ t.Fatalf("expected non-nil lics, got nil")
+ }
+ if len(lics) != 4 {
+ t.Fatalf("expected lics to have len 4, got %d", len(lics))
+ }
+ if lics[0] != "BSD-4-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-4-Clause", lics[0])
+ }
+ if lics[1] != "EPL-1.0" {
+ t.Errorf("expected %v, got %v", "EPL-1.0", lics[1])
+ }
+ if lics[2] != "GPL-2.0-only" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-only", lics[2])
+ }
+ if lics[3] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", lics[3])
+ }
+}
+
+func TestCanGetIndividualLicensesIgnoringOperatorCase(t *testing.T) {
+ // two-license combo with lowercase 'and'
+ lic := "ISC and BSD-3-Clause"
+ lics := getIndividualLicenses(lic)
+ if lics == nil {
+ t.Fatalf("expected non-nil lics, got nil")
+ }
+ // should be sorted alphabetically; 'and' should not appear
+ if len(lics) != 2 {
+ t.Fatalf("expected lics to have len 2, got %d", len(lics))
+ }
+ if lics[0] != "BSD-3-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-3-Clause", lics[0])
+ }
+ if lics[1] != "ISC" {
+ t.Errorf("expected %v, got %v", "ISC", lics[1])
+ }
+
+ // two-license combo with lowercase 'or'
+ lic = "ISC or BSD-3-Clause"
+ lics = getIndividualLicenses(lic)
+ if lics == nil {
+ t.Fatalf("expected non-nil lics, got nil")
+ }
+ // should be sorted alphabetically; 'or' should not appear
+ if len(lics) != 2 {
+ t.Fatalf("expected lics to have len 2, got %d", len(lics))
+ }
+ if lics[0] != "BSD-3-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-3-Clause", lics[0])
+ }
+ if lics[1] != "ISC" {
+ t.Errorf("expected %v, got %v", "ISC", lics[1])
+ }
+
+ // two-license combo with lowercase 'with'
+ lic = "GPL-2.0-only with Classpath-exception-2.0"
+ lics = getIndividualLicenses(lic)
+ if lics == nil {
+ t.Fatalf("expected non-nil lics, got nil")
+ }
+ // should be sorted alphabetically; 'with' should not appear
+ if len(lics) != 2 {
+ t.Fatalf("expected lics to have len 2, got %d", len(lics))
+ }
+ if lics[0] != "Classpath-exception-2.0" {
+ t.Errorf("expected %v, got %v", "Classpath-exception-2.0", lics[0])
+ }
+ if lics[1] != "GPL-2.0-only" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-only", lics[1])
+ }
+
+}