aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--classifier.go46
-rw-r--r--classifier_test.go38
-rw-r--r--commentparser/comment_parser_test.go2
-rw-r--r--go.mod9
-rw-r--r--go.sum11
-rw-r--r--license_type.go4
-rw-r--r--serializer/serializer_test.go2
-rw-r--r--tools/identify_license/bench_test.go96
8 files changed, 94 insertions, 114 deletions
diff --git a/classifier.go b/classifier.go
index 8d39caf..0af9c1c 100644
--- a/classifier.go
+++ b/classifier.go
@@ -75,31 +75,49 @@ type License struct {
// Threshold is the lowest confidence percentage acceptable for the
// classifier.
Threshold float64
+
+ // archive is the path to the license archive
+ archive string
+}
+
+// OptionFunc set options on a License struct.
+type OptionFunc func(l *License) error
+
+// Archive is an OptionFunc to specify the location of the license archive file.
+func Archive(f string) OptionFunc {
+ return func(l *License) error {
+ l.archive = f
+ return nil
+ }
}
// New creates a license classifier and pre-loads it with known open source licenses.
-func New(threshold float64) (*License, error) {
+func New(threshold float64, options ...OptionFunc) (*License, error) {
classifier := &License{
c: stringclassifier.New(threshold, Normalizers...),
Threshold: threshold,
+ archive: LicenseArchive,
}
- if err := classifier.registerLicenses(LicenseArchive); err != nil {
- return nil, fmt.Errorf("cannot register licenses: %v", err)
+
+ for _, o := range options {
+ err := o(classifier)
+ if err != nil {
+ return nil, fmt.Errorf("error setting option %v: %v", o, err)
+ }
+ }
+
+ if err := classifier.registerLicenses(); err != nil {
+ return nil, fmt.Errorf("cannot register licenses from %q: %v", classifier.archive, err)
}
return classifier, nil
}
// NewWithForbiddenLicenses creates a license classifier and pre-loads it with
// known open source licenses which are forbidden.
-func NewWithForbiddenLicenses(threshold float64) (*License, error) {
- classifier := &License{
- c: stringclassifier.New(threshold, Normalizers...),
- Threshold: threshold,
- }
- if err := classifier.registerLicenses(ForbiddenLicenseArchive); err != nil {
- return nil, fmt.Errorf("cannot register licenses: %v", err)
- }
- return classifier, nil
+func NewWithForbiddenLicenses(threshold float64, options ...OptionFunc) (*License, error) {
+ opts := []OptionFunc{Archive(ForbiddenLicenseArchive)}
+ opts = append(opts, options...)
+ return New(threshold, opts...)
}
// WithinConfidenceThreshold returns true if the confidence value is above or
@@ -178,8 +196,8 @@ type archivedValue struct {
// registerLicenses loads all known licenses and adds them to c as known values
// for comparison. The allocated space after ingesting the 'licenses.db'
// archive is ~167M.
-func (c *License) registerLicenses(archive string) error {
- contents, err := ReadLicenseFile(archive)
+func (c *License) registerLicenses() error {
+ contents, err := ReadLicenseFile(c.archive)
if err != nil {
return err
}
diff --git a/classifier_test.go b/classifier_test.go
index 7ba32e6..b997a35 100644
--- a/classifier_test.go
+++ b/classifier_test.go
@@ -797,3 +797,41 @@ func BenchmarkClassifier(b *testing.B) {
classifier.NearestMatch(contents)
}
}
+
+func TestNew(t *testing.T) {
+ tests := []struct {
+ desc string
+ options []OptionFunc
+ wantArchive string
+ wantErr bool
+ }{
+ {
+ desc: "no options, use default",
+ options: []OptionFunc{},
+ wantArchive: LicenseArchive,
+ },
+ {
+ desc: "specify ForbiddenLicenseArchive",
+ options: []OptionFunc{Archive(ForbiddenLicenseArchive)},
+ wantArchive: ForbiddenLicenseArchive,
+ },
+ {
+ desc: "file doesn't exist results in error",
+ options: []OptionFunc{Archive("doesnotexist")},
+ wantArchive: "doesnotexist",
+ wantErr: true,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.desc, func(t *testing.T) {
+ c, err := New(0.5, tt.options...)
+ if tt.wantErr != (err != nil) {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if err == nil && c.archive != tt.wantArchive {
+ t.Errorf("got archive %v, want %v", c.archive, tt.wantArchive)
+ }
+ })
+ }
+
+}
diff --git a/commentparser/comment_parser_test.go b/commentparser/comment_parser_test.go
index d1e0d1a..6b5429a 100644
--- a/commentparser/comment_parser_test.go
+++ b/commentparser/comment_parser_test.go
@@ -18,7 +18,7 @@ import (
"reflect"
"testing"
- "github.com/google/go-cmp"
+ "github.com/google/go-cmp/cmp"
"github.com/google/licenseclassifier/commentparser/language"
)
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..28205e4
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,9 @@
+module github.com/google/licenseclassifier
+
+go 1.11
+
+require (
+ github.com/google/go-cmp v0.2.0
+ github.com/sergi/go-diff v1.0.0
+ github.com/stretchr/testify v1.3.0 // indirect
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..99076ee
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,11 @@
+github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
+github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
diff --git a/license_type.go b/license_type.go
index 43923be..25cb369 100644
--- a/license_type.go
+++ b/license_type.go
@@ -22,9 +22,9 @@ package licenseclassifier
import "github.com/google/licenseclassifier/internal/sets"
// Canonical names of the licenses.
+// The names come from the https://spdx.org/licenses website, and are
+// also the filenames of the licenses in licenseclassifier/licenses.
const (
- // The names come from the https://spdx.org/licenses website, and are
- // also the filenames of the licenses in licenseclassifier/licenses.
AFL11 = "AFL-1.1"
AFL12 = "AFL-1.2"
AFL20 = "AFL-2.0"
diff --git a/serializer/serializer_test.go b/serializer/serializer_test.go
index 1bca727..755c601 100644
--- a/serializer/serializer_test.go
+++ b/serializer/serializer_test.go
@@ -213,7 +213,7 @@ func compareSearchSets(x, y *searchset.SearchSet) error {
return fmt.Errorf("Hash keys differ = %d vs %d", xKeys[i], yKeys[i])
}
if !reflect.DeepEqual(x.Hashes[xKeys[i]], y.Hashes[yKeys[i]]) {
- return fmt.Errorf("Hash values differ = %d vs %d", x.Hashes[xKeys[i]], y.Hashes[yKeys[i]])
+ return fmt.Errorf("Hash values differ = %v vs %v", x.Hashes[xKeys[i]], y.Hashes[yKeys[i]])
}
}
diff --git a/tools/identify_license/bench_test.go b/tools/identify_license/bench_test.go
deleted file mode 100644
index 1320a87..0000000
--- a/tools/identify_license/bench_test.go
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2017 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// The identify_license program tries to identify the license type of an
-// unknown license. The file containing the license text is specified on the
-// command line. Multiple license files can be analyzed with a single command.
-// The type of the license is returned along with the confidence level of the
-// match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an
-// exact match and 0.0 indicating a complete mismatch. The results are sorted
-// by confidence level.
-//
-// $ identifylicense LICENSE1 LICENSE2
-// LICENSE2: MIT (confidence: 0.987)
-// LICENSE1: BSD-2-Clause (confidence: 0.833)
-
-package bench_test
-
-import (
- "context"
- "fmt"
- "log"
- "os"
- "path/filepath"
- "sort"
- "testing"
- "time"
-
- "github.com/google/licenseclassifier"
- "github.com/google/licenseclassifier/tools/identify_license/backend"
- "google3/base/go/flag"
-)
-
-var (
- headers = flag.Bool("headers", false, "match license headers")
- forbiddenOnly = flag.Bool("forbidden", false, "identify using forbidden licenses archive")
- threshold = flag.Float64("threshold", licenseclassifier.DefaultConfidenceThreshold, "confidence threshold")
- timeout = flag.Duration("timeout", 24*time.Hour, "timeout before giving up on classifying a file.")
-)
-
-func init() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, `Usage: %s <licensefile> ...
-
-Identify an unknown license.
-
-Options:
-`, filepath.Base(os.Args[0]))
- flag.PrintDefaults()
- }
-}
-
-func BenchmarkIdentifyLicense(b *testing.B) {
- be, err := backend.New(*threshold, *forbiddenOnly)
- if err != nil {
- be.Close()
- log.Fatalf("cannot create license classifier: %v", err)
- }
-
- ctx, cancel := context.WithTimeout(context.Background(), *timeout)
- defer func() {
- b.StopTimer()
- cancel()
- }()
- b.StartTimer()
- if errs := be.ClassifyLicensesWithContext(ctx, flag.Args(), *headers); errs != nil {
- be.Close()
- for _, err := range errs {
- log.Printf("classify license failed: %v", err)
- }
- log.Fatal("cannot classify licenses")
- }
-
- results := be.GetResults()
- if len(results) == 0 {
- be.Close()
- log.Fatal("Couldn't classify license(s)")
- }
-
- sort.Sort(results)
- for _, r := range results {
- fmt.Printf("%s: %s (confidence: %v, offset: %v, extent: %v)\n",
- r.Filename, r.Name, r.Confidence, r.Offset, r.Extent)
- }
- be.Close()
-}