aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--classifier.go46
-rw-r--r--classifier_test.go38
-rw-r--r--commentparser/comment_parser_test.go2
-rw-r--r--go.mod9
-rw-r--r--go.sum11
-rw-r--r--license_type.go4
-rw-r--r--serializer/serializer_test.go2
-rw-r--r--tools/identify_license/bench_test.go96
8 files changed, 114 insertions, 94 deletions
diff --git a/classifier.go b/classifier.go
index 0af9c1c..8d39caf 100644
--- a/classifier.go
+++ b/classifier.go
@@ -75,49 +75,31 @@ type License struct {
// Threshold is the lowest confidence percentage acceptable for the
// classifier.
Threshold float64
-
- // archive is the path to the license archive
- archive string
-}
-
-// OptionFunc set options on a License struct.
-type OptionFunc func(l *License) error
-
-// Archive is an OptionFunc to specify the location of the license archive file.
-func Archive(f string) OptionFunc {
- return func(l *License) error {
- l.archive = f
- return nil
- }
}
// New creates a license classifier and pre-loads it with known open source licenses.
-func New(threshold float64, options ...OptionFunc) (*License, error) {
+func New(threshold float64) (*License, error) {
classifier := &License{
c: stringclassifier.New(threshold, Normalizers...),
Threshold: threshold,
- archive: LicenseArchive,
}
-
- for _, o := range options {
- err := o(classifier)
- if err != nil {
- return nil, fmt.Errorf("error setting option %v: %v", o, err)
- }
- }
-
- if err := classifier.registerLicenses(); err != nil {
- return nil, fmt.Errorf("cannot register licenses from %q: %v", classifier.archive, err)
+ if err := classifier.registerLicenses(LicenseArchive); err != nil {
+ return nil, fmt.Errorf("cannot register licenses: %v", err)
}
return classifier, nil
}
// NewWithForbiddenLicenses creates a license classifier and pre-loads it with
// known open source licenses which are forbidden.
-func NewWithForbiddenLicenses(threshold float64, options ...OptionFunc) (*License, error) {
- opts := []OptionFunc{Archive(ForbiddenLicenseArchive)}
- opts = append(opts, options...)
- return New(threshold, opts...)
+func NewWithForbiddenLicenses(threshold float64) (*License, error) {
+ classifier := &License{
+ c: stringclassifier.New(threshold, Normalizers...),
+ Threshold: threshold,
+ }
+ if err := classifier.registerLicenses(ForbiddenLicenseArchive); err != nil {
+ return nil, fmt.Errorf("cannot register licenses: %v", err)
+ }
+ return classifier, nil
}
// WithinConfidenceThreshold returns true if the confidence value is above or
@@ -196,8 +178,8 @@ type archivedValue struct {
// registerLicenses loads all known licenses and adds them to c as known values
// for comparison. The allocated space after ingesting the 'licenses.db'
// archive is ~167M.
-func (c *License) registerLicenses() error {
- contents, err := ReadLicenseFile(c.archive)
+func (c *License) registerLicenses(archive string) error {
+ contents, err := ReadLicenseFile(archive)
if err != nil {
return err
}
diff --git a/classifier_test.go b/classifier_test.go
index b997a35..7ba32e6 100644
--- a/classifier_test.go
+++ b/classifier_test.go
@@ -797,41 +797,3 @@ func BenchmarkClassifier(b *testing.B) {
classifier.NearestMatch(contents)
}
}
-
-func TestNew(t *testing.T) {
- tests := []struct {
- desc string
- options []OptionFunc
- wantArchive string
- wantErr bool
- }{
- {
- desc: "no options, use default",
- options: []OptionFunc{},
- wantArchive: LicenseArchive,
- },
- {
- desc: "specify ForbiddenLicenseArchive",
- options: []OptionFunc{Archive(ForbiddenLicenseArchive)},
- wantArchive: ForbiddenLicenseArchive,
- },
- {
- desc: "file doesn't exist results in error",
- options: []OptionFunc{Archive("doesnotexist")},
- wantArchive: "doesnotexist",
- wantErr: true,
- },
- }
- for _, tt := range tests {
- t.Run(tt.desc, func(t *testing.T) {
- c, err := New(0.5, tt.options...)
- if tt.wantErr != (err != nil) {
- t.Fatalf("unexpected error: %v", err)
- }
- if err == nil && c.archive != tt.wantArchive {
- t.Errorf("got archive %v, want %v", c.archive, tt.wantArchive)
- }
- })
- }
-
-}
diff --git a/commentparser/comment_parser_test.go b/commentparser/comment_parser_test.go
index 6b5429a..d1e0d1a 100644
--- a/commentparser/comment_parser_test.go
+++ b/commentparser/comment_parser_test.go
@@ -18,7 +18,7 @@ import (
"reflect"
"testing"
- "github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp"
"github.com/google/licenseclassifier/commentparser/language"
)
diff --git a/go.mod b/go.mod
deleted file mode 100644
index 28205e4..0000000
--- a/go.mod
+++ /dev/null
@@ -1,9 +0,0 @@
-module github.com/google/licenseclassifier
-
-go 1.11
-
-require (
- github.com/google/go-cmp v0.2.0
- github.com/sergi/go-diff v1.0.0
- github.com/stretchr/testify v1.3.0 // indirect
-)
diff --git a/go.sum b/go.sum
deleted file mode 100644
index 99076ee..0000000
--- a/go.sum
+++ /dev/null
@@ -1,11 +0,0 @@
-github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
-github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
-github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
-github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
-github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
-github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
diff --git a/license_type.go b/license_type.go
index 25cb369..43923be 100644
--- a/license_type.go
+++ b/license_type.go
@@ -22,9 +22,9 @@ package licenseclassifier
import "github.com/google/licenseclassifier/internal/sets"
// Canonical names of the licenses.
-// The names come from the https://spdx.org/licenses website, and are
-// also the filenames of the licenses in licenseclassifier/licenses.
const (
+ // The names come from the https://spdx.org/licenses website, and are
+ // also the filenames of the licenses in licenseclassifier/licenses.
AFL11 = "AFL-1.1"
AFL12 = "AFL-1.2"
AFL20 = "AFL-2.0"
diff --git a/serializer/serializer_test.go b/serializer/serializer_test.go
index 755c601..1bca727 100644
--- a/serializer/serializer_test.go
+++ b/serializer/serializer_test.go
@@ -213,7 +213,7 @@ func compareSearchSets(x, y *searchset.SearchSet) error {
return fmt.Errorf("Hash keys differ = %d vs %d", xKeys[i], yKeys[i])
}
if !reflect.DeepEqual(x.Hashes[xKeys[i]], y.Hashes[yKeys[i]]) {
- return fmt.Errorf("Hash values differ = %v vs %v", x.Hashes[xKeys[i]], y.Hashes[yKeys[i]])
+ return fmt.Errorf("Hash values differ = %d vs %d", x.Hashes[xKeys[i]], y.Hashes[yKeys[i]])
}
}
diff --git a/tools/identify_license/bench_test.go b/tools/identify_license/bench_test.go
new file mode 100644
index 0000000..1320a87
--- /dev/null
+++ b/tools/identify_license/bench_test.go
@@ -0,0 +1,96 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The identify_license program tries to identify the license type of an
+// unknown license. The file containing the license text is specified on the
+// command line. Multiple license files can be analyzed with a single command.
+// The type of the license is returned along with the confidence level of the
+// match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an
+// exact match and 0.0 indicating a complete mismatch. The results are sorted
+// by confidence level.
+//
+// $ identifylicense LICENSE1 LICENSE2
+// LICENSE2: MIT (confidence: 0.987)
+// LICENSE1: BSD-2-Clause (confidence: 0.833)
+
+package bench_test
+
+import (
+ "context"
+ "fmt"
+ "log"
+ "os"
+ "path/filepath"
+ "sort"
+ "testing"
+ "time"
+
+ "github.com/google/licenseclassifier"
+ "github.com/google/licenseclassifier/tools/identify_license/backend"
+ "google3/base/go/flag"
+)
+
+var (
+ headers = flag.Bool("headers", false, "match license headers")
+ forbiddenOnly = flag.Bool("forbidden", false, "identify using forbidden licenses archive")
+ threshold = flag.Float64("threshold", licenseclassifier.DefaultConfidenceThreshold, "confidence threshold")
+ timeout = flag.Duration("timeout", 24*time.Hour, "timeout before giving up on classifying a file.")
+)
+
+func init() {
+ flag.Usage = func() {
+ fmt.Fprintf(os.Stderr, `Usage: %s <licensefile> ...
+
+Identify an unknown license.
+
+Options:
+`, filepath.Base(os.Args[0]))
+ flag.PrintDefaults()
+ }
+}
+
+func BenchmarkIdentifyLicense(b *testing.B) {
+ be, err := backend.New(*threshold, *forbiddenOnly)
+ if err != nil {
+ be.Close()
+ log.Fatalf("cannot create license classifier: %v", err)
+ }
+
+ ctx, cancel := context.WithTimeout(context.Background(), *timeout)
+ defer func() {
+ b.StopTimer()
+ cancel()
+ }()
+ b.StartTimer()
+ if errs := be.ClassifyLicensesWithContext(ctx, flag.Args(), *headers); errs != nil {
+ be.Close()
+ for _, err := range errs {
+ log.Printf("classify license failed: %v", err)
+ }
+ log.Fatal("cannot classify licenses")
+ }
+
+ results := be.GetResults()
+ if len(results) == 0 {
+ be.Close()
+ log.Fatal("Couldn't classify license(s)")
+ }
+
+ sort.Sort(results)
+ for _, r := range results {
+ fmt.Printf("%s: %s (confidence: %v, offset: %v, extent: %v)\n",
+ r.Filename, r.Name, r.Confidence, r.Offset, r.Extent)
+ }
+ be.Close()
+}