aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBill Neubauer <wcn@google.com>2022-08-02 10:54:56 -0700
committerBill Neubauer <bill.neubauer@gmail.com>2022-09-16 10:06:11 -0700
commit27441af7cea0e66e29fc9ead35db306e8313dc14 (patch)
tree2661a2530ede7ecb5b514fbe122f0d1d0132bf60
parentebfb5e6792acd76483c177dcdb6853b4f5b868c6 (diff)
downloadlicenseclassifier-27441af7cea0e66e29fc9ead35db306e8313dc14.tar.gz
Reduce noisy logging in searchset. The intermediate results are rarely useful
in debugging, just the final set helps to understand why a document didn't survive for further matching rounds. PiperOrigin-RevId: 464832890
-rw-r--r--v2/searchset.go8
1 files changed, 4 insertions, 4 deletions
diff --git a/v2/searchset.go b/v2/searchset.go
index a25b2e2..e0e69ce 100644
--- a/v2/searchset.go
+++ b/v2/searchset.go
@@ -190,6 +190,9 @@ func (c *Classifier) findPotentialMatches(src, target *searchSet, confidence flo
}
}
+ if c.tc.traceSearchset(src.origin) {
+ c.tc.trace("finalized matchedRanges for %s: %d = %s", src.origin, len(src.Tokens), spew.Sdump(matchedRanges))
+ }
return matchedRanges
}
@@ -219,7 +222,7 @@ func (c *Classifier) fuseRanges(origin string, matched matchRanges, confidence f
// For each hit detected, compare it against all other previous hits to see if it can be part of match
// or represents a group that is eligible for matching and having other hits contribute to it.
- for i, m := range matched {
+ for _, m := range matched {
off := m.TargetStart - m.SrcStart
// If the offset is negative, but within error margins, we associate it
@@ -301,9 +304,6 @@ func (c *Classifier) fuseRanges(origin string, matched matchRanges, confidence f
if unclaimed && m.TokensClaimed*10 > matched[0].TokensClaimed {
claimed = append(claimed, m)
}
- if c.tc.traceSearchset(origin) {
- c.tc.trace("after %d ranges, claimed is %s", i, spew.Sdump(claimed))
- }
}
sort.Sort(claimed)
if c.tc.traceSearchset(origin) {