From e1af5d0f485f5d5699a767de29a2a9432404db21 Mon Sep 17 00:00:00 2001 From: Remi NGUYEN VAN Date: Fri, 20 May 2022 11:34:32 +0900 Subject: Speed up jarjar with prefix matches A common use-case for jarjar is to have rule with the format my.package.name.**, or even my.package.name.MyClass with no wildcards. Jarjar will convert that pattern to a regular expression and do regular expression matching on every symbol it scans to determine if the rule matches. This is very inefficient as most symbols will not match, and a simple prefix match can rule out the match in most cases. When parsing rules, extract the plain-text prefix of each rule, which is everything before the first wildcard. Then index all rules by their prefix in a prefix trie. This allows: - Avoiding slow regular expression matches in most cases, as jarjar can immediately observe that there is no matching prefix in the map. - Scaling in O(log(n)) with the number of rules when they have distinct prefixes (instead of O(n), as a matching prefix can be found from the trie instead of looping through all rules. This makes jarjar faster in current builds (running all jarjar rules to build a device back-to-back goes from 295s to 270s). For example: With FrameworksNetTests (not part of device build): real 0m48.580s -> 0m24.507s user 1m10.767s -> 0m39.251s sys 0m12.029s -> 0m12.099s With service-wifi: real 0m12.887s -> 0m7.061s user 0m26.274s -> 0m20.519s sys 0m3.806s -> 0m3.478s With NetworkStack: real 0m2.081s -> 0m1.870s user 0m7.119s -> 0m6.230s sys 0m1.157s -> 0m1.049s All jars jarjared as part of a device build were checked to remain byte-identical after this change. Test: m clean, m, grep out/verbose.log.gz for jarjar commands, run all commands with/without updated jarjar and check output is byte-identical. Bug: 217129444 Bug: 233081774 Change-Id: I3b7c1a7215ea8378b819ab0fe1f74b2b6fe8dcb5 (cherry picked from commit e3db807ac093f42e2bf3d6ad3979a4a500cd4e49) Merged-In: I3b7c1a7215ea8378b819ab0fe1f74b2b6fe8dcb5 --- .../com/tonicsystems/jarjar/PackageRemapper.java | 7 +- .../com/tonicsystems/jarjar/PatternElement.java | 4 +- src/main/com/tonicsystems/jarjar/Wildcard.java | 21 ++++- src/main/com/tonicsystems/jarjar/WildcardTrie.java | 91 ++++++++++++++++++++++ 4 files changed, 117 insertions(+), 6 deletions(-) create mode 100644 src/main/com/tonicsystems/jarjar/WildcardTrie.java diff --git a/src/main/com/tonicsystems/jarjar/PackageRemapper.java b/src/main/com/tonicsystems/jarjar/PackageRemapper.java index 4d102be..e281f24 100644 --- a/src/main/com/tonicsystems/jarjar/PackageRemapper.java +++ b/src/main/com/tonicsystems/jarjar/PackageRemapper.java @@ -16,7 +16,6 @@ package com.tonicsystems.jarjar; -import org.objectweb.asm.*; import org.objectweb.asm.commons.*; import java.util.*; import java.util.regex.Pattern; @@ -28,7 +27,7 @@ class PackageRemapper extends Remapper private static final Pattern ARRAY_FOR_NAME_PATTERN = Pattern.compile("\\[L[\\p{javaJavaIdentifierPart}\\.]+?;"); - private final List wildcards; + private final WildcardTrie wildcards; private final Map typeCache = new HashMap(); private final Map pathCache = new HashMap(); private final Map valueCache = new HashMap(); @@ -36,7 +35,7 @@ class PackageRemapper extends Remapper public PackageRemapper(List ruleList, boolean verbose) { this.verbose = verbose; - wildcards = PatternElement.createWildcards(ruleList); + wildcards = new WildcardTrie(PatternElement.createWildcards(ruleList)); } // also used by KeepProcessor @@ -118,7 +117,7 @@ class PackageRemapper extends Remapper } private String replaceHelper(String value) { - for (Wildcard wildcard : wildcards) { + for (Wildcard wildcard : wildcards.getPossibleMatches(value)) { String test = wildcard.replace(value); if (test != null) return test; diff --git a/src/main/com/tonicsystems/jarjar/PatternElement.java b/src/main/com/tonicsystems/jarjar/PatternElement.java index 6ccd9ea..6b852d7 100644 --- a/src/main/com/tonicsystems/jarjar/PatternElement.java +++ b/src/main/com/tonicsystems/jarjar/PatternElement.java @@ -32,12 +32,14 @@ abstract public class PatternElement static List createWildcards(List patterns) { List wildcards = new ArrayList(); + int ruleIndex = 0; for (PatternElement pattern : patterns) { String result = (pattern instanceof Rule) ? ((Rule)pattern).getResult() : ""; String expr = pattern.getPattern(); if (expr.indexOf('/') >= 0) throw new IllegalArgumentException("Patterns cannot contain slashes"); - wildcards.add(new Wildcard(expr.replace('.', '/'), result)); + wildcards.add(new Wildcard(expr.replace('.', '/'), result, ruleIndex)); + ruleIndex++; } return wildcards; } diff --git a/src/main/com/tonicsystems/jarjar/Wildcard.java b/src/main/com/tonicsystems/jarjar/Wildcard.java index 5cc486f..c92e0fb 100644 --- a/src/main/com/tonicsystems/jarjar/Wildcard.java +++ b/src/main/com/tonicsystems/jarjar/Wildcard.java @@ -27,14 +27,18 @@ class Wildcard private static Pattern star = Pattern.compile("\\*"); private static Pattern estar = Pattern.compile("\\+\\??\\)\\Z"); private static Pattern dollar = Pattern.compile("\\$"); + // Apart from stars and dollar signs, wildcards are plain-text full matches + private static Pattern plainTextPrefixPattern = Pattern.compile("^[^*$]*"); private final Pattern pattern; + private final String plainTextPrefix; + private final int ruleIndex; private final int count; private final ArrayList parts = new ArrayList(16); // kept for debugging private final String[] strings; private final int[] refs; - public Wildcard(String pattern, String result) { + public Wildcard(String pattern, String result, int ruleIndex) { if (pattern.equals("**")) throw new IllegalArgumentException("'**' is not a valid pattern"); if (!checkIdentifierChars(pattern, "/*")) @@ -47,6 +51,13 @@ class Wildcard regex = replaceAllLiteral(star, regex, "([^/]+)"); regex = replaceAllLiteral(estar, regex, "*)"); regex = replaceAllLiteral(dollar, regex, "\\$"); + Matcher prefixMatcher = plainTextPrefixPattern.matcher(pattern); + // prefixMatcher will always match, but may match an empty string + if (!prefixMatcher.find()) { + throw new IllegalArgumentException(plainTextPrefixPattern + " not found in " + pattern); + } + this.plainTextPrefix = prefixMatcher.group(); + this.ruleIndex = ruleIndex; this.pattern = Pattern.compile("\\A" + regex + "\\Z"); this.count = this.pattern.matcher("foo").groupCount(); @@ -95,6 +106,14 @@ class Wildcard // System.err.println(this); } + public String getPlainTextPrefix() { + return plainTextPrefix; + } + + public int getRuleIndex() { + return ruleIndex; + } + public boolean matches(String value) { return getMatcher(value) != null; } diff --git a/src/main/com/tonicsystems/jarjar/WildcardTrie.java b/src/main/com/tonicsystems/jarjar/WildcardTrie.java new file mode 100644 index 0000000..e80dbc9 --- /dev/null +++ b/src/main/com/tonicsystems/jarjar/WildcardTrie.java @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tonicsystems.jarjar; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.TreeMap; + +/** + * A prefix trie of {@link Wildcard}, where the prefix is obtained from + * {@link Wildcard#getPlainTextPrefix()}. + * + * This allows quick lookup of applicable wildcards in the common case where wildcards have a + * non-empty plain-text prefix. + */ +public class WildcardTrie { + private final TreeMap subTries = new TreeMap<>(); + private final List wildcards = new ArrayList<>(); + private final String prefix; + + public WildcardTrie(List wildcards) { + this(""); + final ArrayList lst = new ArrayList<>(wildcards); + // Sort values to ensure that wildcards that prefix others are added first + lst.sort(Comparator.comparing(Wildcard::getPlainTextPrefix)); + for (Wildcard w : lst) { + final String prefix = w.getPlainTextPrefix(); + final WildcardTrie prefixTrie = findSubTrieWhichPrefixes(prefix, this); + if (prefixTrie.prefix.equals(prefix)) { + prefixTrie.wildcards.add(w); + } else { + final WildcardTrie newTrie = new WildcardTrie(prefix); + newTrie.wildcards.add(w); + prefixTrie.subTries.put(prefix, newTrie); + } + } + } + + private WildcardTrie(String prefix) { + this.prefix = prefix; + } + + private static WildcardTrie findSubTrieWhichPrefixes(String value, WildcardTrie baseTrie) { + final String possiblePrefix = baseTrie.subTries.floorKey(value); + // Because each level of the trie does not contain keys that are prefixes of each other, + // there can be at most one prefix of the value at that level, and that prefix will be the + // highest key ordered before the value (any non-prefix key would have a character + // difference with the prefix and so be ordered before the prefix or after the value). + if (possiblePrefix != null && value.startsWith(possiblePrefix)) { + return findSubTrieWhichPrefixes(value, baseTrie.subTries.get(possiblePrefix)); + } + return baseTrie; + } + + public List getPossibleMatches(String value) { + WildcardTrie baseTrie = this; + List prefixMatches = wildcards.isEmpty() + // If there's no match, don't even allocate a list and use the singleton emptyList + ? Collections.emptyList() : new ArrayList<>(wildcards); + while (true) { + final String possiblePrefix = baseTrie.subTries.floorKey(value); + if (possiblePrefix != null && value.startsWith(possiblePrefix)) { + baseTrie = baseTrie.subTries.get(possiblePrefix); + if (prefixMatches.isEmpty()) { + prefixMatches = new ArrayList<>(baseTrie.wildcards); + } else { + prefixMatches.addAll(baseTrie.wildcards); + } + } else { + prefixMatches.sort(Comparator.comparing(Wildcard::getRuleIndex)); + return prefixMatches; + } + } + } +} -- cgit v1.2.3