aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBill Neubauer <wcn@google.com>2022-03-21 12:12:34 -0700
committerBill Neubauer <wcn@google.com>2022-03-21 12:12:34 -0700
commita6f4b9f7a6aafe35aefa57e97567f4de44972f46 (patch)
tree70bd0fd1421edf8576993ab4f6bcae9ee5883a4f
parent0f3a421c4dc74c9c20185eb6063d7cfa59715a10 (diff)
downloadlicenseclassifier-a6f4b9f7a6aafe35aefa57e97567f4de44972f46.tar.gz
Add v2 versions of identify_license packages
This uses Go 1.16 embed features, so it moves the overall support level for the licenseclassifier up from 1.15
-rw-r--r--go.mod6
-rw-r--r--go.sum16
-rw-r--r--tools/identify_license/backend/v2/backend.go166
-rw-r--r--tools/identify_license/backend/v2/go.mod10
-rw-r--r--tools/identify_license/backend/v2/go.sum17
-rw-r--r--tools/identify_license/results/v2/go.mod3
-rw-r--r--tools/identify_license/results/v2/results.go142
-rw-r--r--tools/identify_license/v2/go.mod11
-rw-r--r--tools/identify_license/v2/go.sum17
-rwxr-xr-xtools/identify_license/v2/identify_licensebin0 -> 5310636 bytes
-rw-r--r--tools/identify_license/v2/identify_license.go204
-rw-r--r--v2/assets/embed.go54
-rw-r--r--v2/go.mod2
13 files changed, 644 insertions, 4 deletions
diff --git a/go.mod b/go.mod
index 10d6453..58e427a 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module github.com/google/licenseclassifier
go 1.16
require (
- github.com/google/go-cmp v0.2.0
- github.com/sergi/go-diff v1.0.0
- github.com/stretchr/testify v1.3.0 // indirect
+ github.com/google/go-cmp v0.5.2
+ github.com/google/licenseclassifier/v2 v2.0.0-alpha.1 // indirect
+ github.com/sergi/go-diff v1.1.0
)
diff --git a/go.sum b/go.sum
index 99076ee..0e424c5 100644
--- a/go.sum
+++ b/go.sum
@@ -1,11 +1,27 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/licenseclassifier/v2 v2.0.0-alpha.1 h1:E0HY5OuFS3CQoVFAr1dabMFm4PyjNMbIB1zYulfwnRI=
+github.com/google/licenseclassifier/v2 v2.0.0-alpha.1/go.mod h1:YAgBGGTeNDMU+WfIgaFvjZe4rudym4f6nIn8ZH5X+VM=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
+github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
+github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/tools/identify_license/backend/v2/backend.go b/tools/identify_license/backend/v2/backend.go
new file mode 100644
index 0000000..5e11d78
--- /dev/null
+++ b/tools/identify_license/backend/v2/backend.go
@@ -0,0 +1,166 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package backend contains the necessary functions to classify a license.
+package backend
+
+import (
+ "context"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "sync"
+ "time"
+
+ //gc "google3/devtools/compliance/common/licenseclassifier/classifier"
+
+ "github.com/google/licenseclassifier/tools/identify_license/results/v2"
+ classifier "github.com/google/licenseclassifier/v2"
+ "github.com/google/licenseclassifier/v2/assets"
+)
+
+// ClassifierInterface is the interface each backend must implement.
+type ClassifierInterface interface {
+ Close()
+ SetTraceConfiguration(tc *classifier.TraceConfiguration)
+ ClassifyLicenses(numTasks int, filenames []string, headers bool) []error
+ ClassifyLicensesWithContext(ctx context.Context, numTasks int, filenames []string, headers bool) []error
+ GetResults() results.LicenseTypes
+}
+
+// ClassifierBackend is an object that handles classifying a license.
+type ClassifierBackend struct {
+ results results.LicenseTypes
+ mu sync.Mutex
+ classifier *classifier.Classifier
+}
+
+// New creates a new backend working on the local filesystem.
+func New() (*ClassifierBackend, error) {
+ _, err := assets.ReadLicenseDir()
+ if err != nil {
+ return nil, err
+ }
+ lc, err := assets.DefaultClassifier()
+ if err != nil {
+ return nil, err
+ }
+ return &ClassifierBackend{classifier: lc}, nil
+}
+
+// Close does nothing here since there's nothing to close.
+func (b *ClassifierBackend) Close() {
+}
+
+// SetTraceConfiguration injects the supplied trace configuration
+func (b *ClassifierBackend) SetTraceConfiguration(tc *classifier.TraceConfiguration) {
+ //b.classifier.SetTraceConfiguration((*gc.TraceConfiguration)(tc))
+}
+
+// ClassifyLicenses runs the license classifier over the given file.
+func (b *ClassifierBackend) ClassifyLicenses(numTasks int, filenames []string, headers bool) (errors []error) {
+ // Create a pool from which tasks can later be started. We use a pool because the OS limits
+ // the number of files that can be open at any one time.
+ task := make(chan bool, numTasks)
+ for i := 0; i < numTasks; i++ {
+ task <- true
+ }
+
+ errs := make(chan error, len(filenames))
+
+ var wg sync.WaitGroup
+ analyze := func(filename string) {
+ defer func() {
+ wg.Done()
+ task <- true
+ }()
+ if err := b.classifyLicense(filename, headers); err != nil {
+ errs <- err
+ }
+ }
+
+ for _, filename := range filenames {
+ wg.Add(1)
+ <-task
+ go analyze(filename)
+ }
+ go func() {
+ wg.Wait()
+ close(task)
+ close(errs)
+ }()
+
+ for err := range errs {
+ errors = append(errors, err)
+ }
+ return errors
+}
+
+// ClassifyLicensesWithContext runs the license classifier over the given file; ensure that it will respect the timeout in the provided context.
+func (b *ClassifierBackend) ClassifyLicensesWithContext(ctx context.Context, numTasks int, filenames []string, headers bool) (errors []error) {
+ done := make(chan bool)
+ go func() {
+ errors = b.ClassifyLicenses(numTasks, filenames, headers)
+ done <- true
+ }()
+ select {
+ case <-ctx.Done():
+ err := ctx.Err()
+ errors = append(errors, err)
+ return errors
+ case <-done:
+ return errors
+ }
+}
+
+// classifyLicense is called by a Go-function to perform the actual
+// classification of a license.
+func (b *ClassifierBackend) classifyLicense(filename string, headers bool) error {
+ contents, err := ioutil.ReadFile(filename)
+ if err != nil {
+ return fmt.Errorf("unable to read %q: %v", filename, err)
+ }
+
+ matchLoop := func(contents []byte) {
+ for _, m := range b.classifier.Match(contents).Matches {
+ // If not looking for headers, skip them
+ if !headers && m.MatchType == "Header" {
+ continue
+ }
+
+ b.mu.Lock()
+ b.results = append(b.results, &results.LicenseType{
+ Filename: filename,
+ MatchType: m.MatchType,
+ Name: m.Name,
+ Variant: m.Variant,
+ Confidence: m.Confidence,
+ StartLine: m.StartLine,
+ EndLine: m.EndLine,
+ })
+ b.mu.Unlock()
+ }
+ }
+
+ log.Printf("Classifying license(s): %s", filename)
+ start := time.Now()
+ matchLoop(contents)
+ log.Printf("Finished Classifying License %q: %v", filename, time.Since(start))
+ return nil
+}
+
+// GetResults returns the results of the classifications.
+func (b *ClassifierBackend) GetResults() results.LicenseTypes {
+ return b.results
+}
diff --git a/tools/identify_license/backend/v2/go.mod b/tools/identify_license/backend/v2/go.mod
new file mode 100644
index 0000000..9b73b94
--- /dev/null
+++ b/tools/identify_license/backend/v2/go.mod
@@ -0,0 +1,10 @@
+module github.com/google/licenseclassifier/tools/identify_license/backend/v2
+
+go 1.18
+
+require (
+ github.com/davecgh/go-spew v1.1.1 // indirect
+ github.com/google/licenseclassifier/tools/identify_license/results/v2 v2.0.0 // indirect
+ github.com/google/licenseclassifier/v2 v2.0.0 // indirect
+ github.com/sergi/go-diff v1.1.0 // indirect
+)
diff --git a/tools/identify_license/backend/v2/go.sum b/tools/identify_license/backend/v2/go.sum
new file mode 100644
index 0000000..72fe660
--- /dev/null
+++ b/tools/identify_license/backend/v2/go.sum
@@ -0,0 +1,17 @@
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
+github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/tools/identify_license/results/v2/go.mod b/tools/identify_license/results/v2/go.mod
new file mode 100644
index 0000000..962b8e0
--- /dev/null
+++ b/tools/identify_license/results/v2/go.mod
@@ -0,0 +1,3 @@
+module github.com/google/licenseclassifier/tools/identify_license/results/v2
+
+go 1.18
diff --git a/tools/identify_license/results/v2/results.go b/tools/identify_license/results/v2/results.go
new file mode 100644
index 0000000..fd7c598
--- /dev/null
+++ b/tools/identify_license/results/v2/results.go
@@ -0,0 +1,142 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package results contains the result type returned by the classifier backend.
+// Placing the type into a separate module allows us to swap out backends and
+// still use the same datatype.
+package results
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+ "sort"
+)
+
+// LicenseType is the assumed type of the unknown license.
+type LicenseType struct {
+ Filename string
+ Name string
+ MatchType string
+ Variant string
+ Confidence float64
+ StartLine int
+ EndLine int
+}
+
+// LicenseTypes is a list of LicenseType objects.
+type LicenseTypes []*LicenseType
+
+func (lt LicenseTypes) Len() int { return len(lt) }
+func (lt LicenseTypes) Swap(i, j int) { lt[i], lt[j] = lt[j], lt[i] }
+func (lt LicenseTypes) Less(i, j int) bool {
+ if lt[i].Confidence > lt[j].Confidence {
+ return true
+ }
+ if lt[i].Confidence < lt[j].Confidence {
+ return false
+ }
+ if lt[i].Filename < lt[j].Filename {
+ return true
+ }
+ if lt[i].Filename > lt[j].Filename {
+ return false
+ }
+ return lt[i].EndLine < lt[j].EndLine
+}
+
+// Classification is the license classification for a segment of a file.
+type Classification struct {
+ Name string
+ Confidence float64
+ StartLine int
+ EndLine int
+ Text string `json:",omitempty"`
+}
+
+// Classifications contains all license classifications for a file
+type Classifications []*Classification
+
+// FileClassifications contains the license classifications for a particular file.
+type FileClassifications struct {
+ Filepath string
+ Classifications Classifications
+}
+
+//JSONResult is the format for the jr JSON file
+type JSONResult []*FileClassifications
+
+func (jr JSONResult) Len() int { return len(jr) }
+func (jr JSONResult) Swap(i, j int) { jr[i], jr[j] = jr[j], jr[i] }
+func (jr JSONResult) Less(i, j int) bool { return jr[i].Filepath < jr[j].Filepath }
+
+// readFileLines will read a specified range of lines of a file
+func readFileLines(filename string, startLine, endLine int) (string, error) {
+ f, err := os.Open(filename)
+ if err != nil {
+ return "", err
+ }
+ defer f.Close()
+
+ scanner := bufio.NewScanner(f)
+ lines := ""
+ i := 0
+ for scanner.Scan() {
+ i++ // lines are 1-indexed
+ if i < startLine {
+ continue
+ } else if i > endLine {
+ break
+ }
+ lines += scanner.Text() + "\n"
+ }
+ if i < endLine {
+ return "", fmt.Errorf(
+ "line %d was the last line read from file %s, but endLine was set to %d", i, filename, endLine)
+ }
+ return lines, nil
+}
+
+// NewJSONResult creates a new JSONResult object from a LicenseTypes object.
+func NewJSONResult(licenses LicenseTypes, includeText bool) (JSONResult, error) {
+ fMap := map[string]*FileClassifications{}
+ for _, l := range licenses {
+ currF, ok := fMap[l.Filename]
+ if !ok {
+ currF = &FileClassifications{Filepath: l.Filename}
+ fMap[l.Filename] = currF
+ }
+ c := &Classification{
+ Name: l.Name,
+ Confidence: l.Confidence,
+ StartLine: l.StartLine,
+ EndLine: l.EndLine,
+ }
+ if includeText {
+ text, err := readFileLines(l.Filename, l.StartLine, l.EndLine)
+ if err != nil {
+ return nil, err
+ }
+ c.Text = text
+ }
+ currF.Classifications = append(currF.Classifications, c)
+ }
+
+ jr := JSONResult{}
+ for _, fc := range fMap {
+ jr = append(jr, fc)
+ }
+ sort.Sort(jr)
+ return jr, nil
+}
diff --git a/tools/identify_license/v2/go.mod b/tools/identify_license/v2/go.mod
new file mode 100644
index 0000000..a0b8e2e
--- /dev/null
+++ b/tools/identify_license/v2/go.mod
@@ -0,0 +1,11 @@
+module github.com/google/licenseclassifier/tools/identify_license/v2
+
+go 1.16
+
+require (
+ github.com/davecgh/go-spew v1.1.1 // indirect
+ github.com/google/licenseclassifier/tools/identify_license/backend/v2 v2.0.0 // indirect
+ github.com/google/licenseclassifier/tools/identify_license/results/v2 v2.0.0 // indirect
+ github.com/google/licenseclassifier/v2 v2.0.0 // indirect
+ github.com/sergi/go-diff v1.1.0 // indirect
+)
diff --git a/tools/identify_license/v2/go.sum b/tools/identify_license/v2/go.sum
new file mode 100644
index 0000000..72fe660
--- /dev/null
+++ b/tools/identify_license/v2/go.sum
@@ -0,0 +1,17 @@
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
+github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/tools/identify_license/v2/identify_license b/tools/identify_license/v2/identify_license
new file mode 100755
index 0000000..c7ad3f7
--- /dev/null
+++ b/tools/identify_license/v2/identify_license
Binary files differ
diff --git a/tools/identify_license/v2/identify_license.go b/tools/identify_license/v2/identify_license.go
new file mode 100644
index 0000000..42d9b94
--- /dev/null
+++ b/tools/identify_license/v2/identify_license.go
@@ -0,0 +1,204 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The identify_license program tries to identify the license type of an
+// unknown license. The file containing the license text is specified on the
+// command line. Multiple license files can be analyzed with a single command.
+// The type of the license is returned along with the confidence level of the
+// match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an
+// exact match and 0.0 indicating a complete mismatch. The results are sorted
+// by confidence level.
+//
+// $ identifylicense <LICENSE_OR_DIRECTORY> <LICENSE_OR_DIRECTORY> ...
+// LICENSE2: MIT (confidence: 0.987)
+// LICENSE1: BSD-2-Clause (confidence: 0.833)
+package main
+
+import (
+ "context"
+ "encoding/json"
+ "flag"
+ "fmt"
+ "strings"
+
+ //"google3/file/base/go/contrib/walk/walk"
+ //"google3/file/base/go/file"
+ "io/fs"
+ "io/ioutil"
+ "log"
+ "os"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "time"
+
+ "github.com/google/licenseclassifier/tools/identify_license/backend/v2"
+ "github.com/google/licenseclassifier/tools/identify_license/results/v2"
+ classifier "github.com/google/licenseclassifier/v2"
+)
+
+var (
+ headers = flag.Bool("headers", false, "match license headers")
+ jsonFname = flag.String("json", "", "filename to write JSON output to.")
+ includeText = flag.Bool("include_text", false, "include the license text in the JSON output")
+ numTasks = flag.Int("tasks", 1000, "the number of license scanning tasks running concurrently")
+ timeout = flag.Duration("timeout", 24*time.Hour, "timeout before giving up on classifying a file.")
+ tracePhases = flag.String("trace_phases", "", "comma-separated list of phases of the license classifier to trace")
+ traceLicenses = flag.String("trace_licenses", "", "comma-separated list of licenses for the license classifier to trace")
+ ignorePaths = flag.String("ignore_paths_re", "", "comma-separated list of regular expressions that match file paths to ignore")
+)
+
+// expandFiles recursively returns a list of files stored in a list of
+// directories. If an input is not a directory, it is added to the output list.
+func expandFiles(ctx context.Context, paths []string) ([]string, error) {
+ var finalPaths []string
+
+ ip, err := parseIgnorePaths()
+ if err != nil {
+ return nil, fmt.Errorf("could not parse ignore paths: %v", err)
+ }
+
+ handleFile := func(path string) {
+ if shouldIgnore(ip, path) {
+ return
+ }
+ finalPaths = append(finalPaths, path)
+ }
+
+ for _, p := range paths {
+ p, err := filepath.Abs(p)
+ if err != nil {
+ return nil, err
+ }
+
+ err = filepath.Walk(p, func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+ if info.IsDir() {
+ if shouldIgnore(ip, info.Name()) {
+ return fs.SkipDir
+ }
+ return nil // walk the directory
+ }
+ handleFile(path)
+ return nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ }
+ return finalPaths, nil
+}
+
+func shouldIgnore(ignorePaths []*regexp.Regexp, path string) bool {
+ for _, r := range ignorePaths {
+ if exactRegexMatch(r, path) {
+ return true
+ }
+ }
+ return false
+}
+
+func exactRegexMatch(r *regexp.Regexp, s string) bool {
+ m := r.FindStringIndex(s)
+ if m == nil {
+ return false
+ }
+ return (m[0] == 0) && (m[1] == len(s))
+}
+
+func parseIgnorePaths() (out []*regexp.Regexp, err error) {
+ for _, p := range strings.Split(*ignorePaths, ",") {
+ r, err := regexp.Compile(p)
+ if err != nil {
+ return nil, err
+ }
+ out = append(out, r)
+ }
+ return out, nil
+}
+
+// outputJSON writes the output formatted as JSON to a file.
+func outputJSON(filename *string, res results.LicenseTypes, includeText bool) error {
+ d, err := results.NewJSONResult(res, includeText)
+ if err != nil {
+ return err
+ }
+ fc, err := json.MarshalIndent(d, "", " ")
+ if err != nil {
+ return err
+ }
+ return ioutil.WriteFile(*filename, fc, 0644)
+}
+
+func init() {
+ flag.Usage = func() {
+ fmt.Fprintf(os.Stderr, `Usage: %s <licensefile> ...
+
+Identify an unknown license.
+
+Options:
+`, filepath.Base(os.Args[0]))
+ flag.PrintDefaults()
+ }
+}
+
+func main() {
+ flag.Parse()
+
+ be, err := backend.New()
+ if err != nil {
+ log.Fatalf("cannot create license classifier: %v", err)
+ }
+
+ paths, err := expandFiles(context.Background(), flag.Args())
+ defer be.Close()
+ be.SetTraceConfiguration(
+ &classifier.TraceConfiguration{
+ TracePhases: *tracePhases,
+ TraceLicenses: *traceLicenses,
+ })
+
+ ctx, cancel := context.WithTimeout(context.Background(), *timeout)
+ defer cancel()
+ if errs := be.ClassifyLicensesWithContext(ctx, *numTasks, paths, *headers); errs != nil {
+ be.Close()
+ for _, err := range errs {
+ log.Printf("classify license failed: %v", err)
+ }
+ log.Fatal("cannot classify licenses")
+ }
+
+ results := be.GetResults()
+ if len(results) == 0 {
+ log.Fatal("Couldn't classify license(s)")
+ }
+
+ sort.Sort(results)
+ for _, r := range results {
+ name := r.Name
+ if r.MatchType != "License" && r.MatchType != "Header" {
+ name = fmt.Sprintf("%s:%s", r.MatchType, r.Name)
+ }
+ fmt.Printf("%s %s (variant: %v, confidence: %v, start: %v, end: %v)\n",
+ r.Filename, name, r.Variant, r.Confidence, r.StartLine, r.EndLine)
+ }
+ if len(*jsonFname) > 0 {
+ err = outputJSON(jsonFname, results, *includeText)
+ if err != nil {
+ log.Fatalf("Couldn't write JSON output to file %s: %v", *jsonFname, err)
+ }
+ }
+}
diff --git a/v2/assets/embed.go b/v2/assets/embed.go
new file mode 100644
index 0000000..7a93a39
--- /dev/null
+++ b/v2/assets/embed.go
@@ -0,0 +1,54 @@
+package assets
+
+import (
+ "embed"
+ "fmt"
+ "io/fs"
+ "os"
+ "strings"
+
+ classifier "github.com/google/licenseclassifier/v2"
+)
+
+//go:embed */*/*
+var licenseFS embed.FS
+
+// DefaultClassifier returns a classifier loaded with the contents of the
+// assets directory.
+func DefaultClassifier() (*classifier.Classifier, error) {
+ c := classifier.NewClassifier(.8)
+
+ err := fs.WalkDir(licenseFS, ".", func(path string, d fs.DirEntry, err error) error {
+ if err != nil {
+ return err
+ }
+ if d.IsDir() {
+ return nil
+ }
+
+ b, err := licenseFS.ReadFile(path)
+ if err != nil {
+ return err
+ }
+ splits := strings.Split(path, fmt.Sprintf("%c", os.PathSeparator))
+ category, name, variant := splits[0], splits[1], splits[2]
+ c.AddContent(category, name, variant, b)
+ return nil
+ })
+
+ if err != nil {
+ return nil, err
+ }
+ return c, nil
+
+}
+
+// ReadLicenseFile locates and reads the license archive file. Absolute paths are used unmodified. Relative paths are expected to be in the licenses directory of the licenseclassifier package.
+func ReadLicenseFile(filename string) ([]byte, error) {
+ return licenseFS.ReadFile(filename)
+}
+
+// ReadLicenseDir reads directory containing the license files.
+func ReadLicenseDir() ([]fs.DirEntry, error) {
+ return licenseFS.ReadDir(".")
+}
diff --git a/v2/go.mod b/v2/go.mod
index e84f4c1..632bb23 100644
--- a/v2/go.mod
+++ b/v2/go.mod
@@ -1,6 +1,6 @@
module github.com/google/licenseclassifier/v2
-go 1.15
+go 1.16
require (
github.com/davecgh/go-spew v1.1.1