diff options
Diffstat (limited to 'tools/identify_license/backend/backend.go')
-rw-r--r-- | tools/identify_license/backend/backend.go | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/tools/identify_license/backend/backend.go b/tools/identify_license/backend/backend.go new file mode 100644 index 0000000..a9e46ba --- /dev/null +++ b/tools/identify_license/backend/backend.go @@ -0,0 +1,166 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package backend contains the necessary functions to classify a license. +package backend + +import ( + "context" + "fmt" + "io/ioutil" + "log" + "sync" + "time" + + "github.com/google/licenseclassifier" + "github.com/google/licenseclassifier/commentparser" + "github.com/google/licenseclassifier/commentparser/language" + "github.com/google/licenseclassifier/tools/identify_license/results" +) + +// ClassifierInterface is the interface each backend must implement. +type ClassifierInterface interface { + Close() + ClassifyLicenses(filenames []string, headers bool) []error + ClassifyLicensesWithContext(ctx context.Context, filenames []string, headers bool) []error + GetResults() results.LicenseTypes +} + +// ClassifierBackend is an object that handles classifying a license. +type ClassifierBackend struct { + results results.LicenseTypes + mu sync.Mutex + classifier *licenseclassifier.License +} + +// New creates a new backend working on the local filesystem. +func New(threshold float64, forbiddenOnly bool) (*ClassifierBackend, error) { + var lc *licenseclassifier.License + var err error + if forbiddenOnly { + lc, err = licenseclassifier.NewWithForbiddenLicenses(threshold) + } else { + lc, err = licenseclassifier.New(threshold) + } + if err != nil { + return nil, err + } + return &ClassifierBackend{classifier: lc}, nil +} + +// Close does nothing here since there's nothing to close. +func (b *ClassifierBackend) Close() { +} + +// ClassifyLicenses runs the license classifier over the given file. +func (b *ClassifierBackend) ClassifyLicenses(filenames []string, headers bool) (errors []error) { + return b.ClassifyLicensesWithContext(context.Background(), filenames, headers) +} + +// ClassifyLicensesWithContext runs the license classifier over the given file; +// ensure that it will respect the timeout and cancelation in the provided context. +func (b *ClassifierBackend) ClassifyLicensesWithContext(ctx context.Context, filenames []string, headers bool) (errors []error) { + + files := make(chan string, len(filenames)) + for _, f := range filenames { + files <- f + } + close(files) + errs := make(chan error, len(filenames)) + + var wg sync.WaitGroup + + // Create a pool from which tasks can later be started. We use a pool because the OS limits + // the number of files that can be open at any one time. + const numTasks = 1000 + wg.Add(numTasks) + + for i := 0; i < numTasks; i++ { + go func() { + // Ensure that however this function terminates, the wait group + // is unblocked + defer wg.Done() + + for { + filename := <-files + + // no file? we're done + if filename == "" { + break + } + + // If the context is done, record that the file was not + // classified due to the context's termination. + if err := ctx.Err(); err != nil { + errs <- fmt.Errorf("file %s not classified due to context completion: %v", filename, err) + continue + } + + if err := b.classifyLicense(filename, headers); err != nil { + errs <- err + } + } + }() + } + + wg.Wait() + close(errs) + + for err := range errs { + errors = append(errors, err) + } + return errors +} + +// classifyLicense is called by a Go-function to perform the actual +// classification of a license. +func (b *ClassifierBackend) classifyLicense(filename string, headers bool) error { + contents, err := ioutil.ReadFile(filename) + if err != nil { + return fmt.Errorf("unable to read %q: %v", filename, err) + } + + matchLoop := func(contents string) { + for _, m := range b.classifier.MultipleMatch(contents, headers) { + b.mu.Lock() + b.results = append(b.results, &results.LicenseType{ + Filename: filename, + Name: m.Name, + Confidence: m.Confidence, + Offset: m.Offset, + Extent: m.Extent, + }) + b.mu.Unlock() + } + } + + log.Printf("Classifying license(s): %s", filename) + start := time.Now() + if lang := language.ClassifyLanguage(filename); lang == language.Unknown { + matchLoop(string(contents)) + } else { + log.Printf("detected language: %v", lang) + comments := commentparser.Parse(contents, lang) + for ch := range comments.ChunkIterator() { + matchLoop(ch.String()) + } + } + log.Printf("Finished Classifying License %q: %v", filename, time.Since(start)) + return nil +} + +// GetResults returns the results of the classifications. +func (b *ClassifierBackend) GetResults() results.LicenseTypes { + return b.results +} |