summaryrefslogtreecommitdiff
path: root/base/android/java/src/org/chromium/base/PiiElider.java
blob: 7652671873cbb2cb9bebde96e13d03a6d9e6f7cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

package org.chromium.base;

import android.text.TextUtils;
import android.util.Patterns;

import org.jni_zero.CalledByNative;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Provides public methods for detecting and eliding sensitive PII.
 */
public class PiiElider {
    private static final String EMAIL_ELISION = "XXX@EMAIL.ELIDED";

    private static final String URL_ELISION = "HTTP://WEBADDRESS.ELIDED";

    private static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";

    private static final String IP_ADDRESS =
            "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
            + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
            + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
            + "|[1-9][0-9]|[0-9]))";

    private static final String IRI =
            "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";

    private static final String GOOD_GTLD_CHAR = "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
    private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}";
    private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD;

    private static final String URI_ENCODED_CHAR = "(%[a-fA-F0-9]{2})";

    private static final String URI_CHAR = "([a-zA-Z0-9$_.+!*'(),;?&=-]|" + URI_ENCODED_CHAR + ")";

    private static final String PATH_CHAR =
            // Either a single valid path component character or a URI-encoded character.
            "(([" + GOOD_IRI_CHAR + ";/?:@&=#~.+!*'(),_-])|" + URI_ENCODED_CHAR + ")";

    private static final String URI_SCHEME = "((http|https|Http|Https|rtsp|Rtsp)://"
            + "(" + URI_CHAR + "{1,64}(:" + URI_CHAR + "{1,25})?@)?)";

    private static final String DOMAIN_NAME = "(" + HOST_NAME + "|" + IP_ADDRESS + ")";

    private static final String PORT = "(:\\d{1,5})";

    private static final String URL_WITH_OPTIONAL_SCHEME_AND_PORT =
            "(" + URI_SCHEME + "?" + DOMAIN_NAME + PORT + "?)";

    private static final String PATH_COMPONENT = "(" + PATH_CHAR + "+)";

    // Based on: http://www.faqs.org/rfcs/rfc2396.html#:~:text=Scheme%20Component
    private static final String INTENT_SCHEME = "[a-zA-Z][a-zA-Z0-9+.-]+://";

    private static final String INTENT = "(" + INTENT_SCHEME + PATH_COMPONENT + ")";

    private static final String URL_OR_INTENT =
            "(" + URL_WITH_OPTIONAL_SCHEME_AND_PORT + "|" + INTENT + ")";

    private static final Pattern WEB_URL =
            Pattern.compile("(\\b|^)" // Always start on a word boundary or start of string.
                    + "(" + URL_OR_INTENT + ")" // Main URL or Intent scheme/domain/root path.
                    + "(/" + PATH_CHAR + "*)?" // Rest of the URI path.
                    + "(\\b|$)"); // Always end on a word boundary or end of string.

    // Example variant info chromium-TrichromeChromeGoogle6432.aab
    private static final String CHROME_VARIANT_INFO = "chromium-[^\\.]+\\.aab";
    private static final Pattern LIKELY_EXCEPTION_LOG = Pattern.compile("\\sat\\s"
            // These are all package prefixes of classes that are likely to
            // exist on a stacktrace and are very unlikely to be a PII url.
            + "(org\\.chromium|com\\.google|java|android|com\\.android)\\.[^ ]+.|"
            // if a line has what looks like line number info, it's probably an
            // exception log.
            + "\\(" + CHROME_VARIANT_INFO
            + "[^:]+:\\d+\\)|"
            // When a class is not found it can fail to satisfy our isClass
            // check but is still worth noting what it was.
            + "Caused by: java\\.lang\\."
            + "(ClassNotFoundException|NoClassDefFoundError):");

    private static final String IP_ELISION = "1.2.3.4";
    private static final String MAC_ELISION = "01:23:45:67:89:AB";
    private static final String CONSOLE_ELISION = "[ELIDED:CONSOLE(0)] ELIDED CONSOLE MESSAGE";

    private static final Pattern MAC_ADDRESS =
            Pattern.compile("([0-9a-fA-F]{2}[-:]+){5}[0-9a-fA-F]{2}");

    private static final Pattern CONSOLE_MSG = Pattern.compile("\\[\\w*:CONSOLE.*\\].*");

    private static final String[] APP_NAMESPACE =
            new String[] {"org.chromium.", "com.google.", "com.chrome."};

    private static final String[] SYSTEM_NAMESPACE =
            new String[] {"android.", "com.android.", "dalvik.", "java.", "javax.", "org.apache.",
                    "org.json.", "org.w3c.dom.", "org.xml.", "org.xmlpull.", "System."};

    /**
     * Elides any emails in the specified {@link String} with
     * {@link #EMAIL_ELISION}.
     *
     * @param original String potentially containing emails.
     * @return String with elided emails.
     */
    public static String elideEmail(String original) {
        return Patterns.EMAIL_ADDRESS.matcher(original).replaceAll(EMAIL_ELISION);
    }

    /**
     * Elides any URLs in the specified {@link String} with
     * {@link #URL_ELISION}.
     *
     * @param original String potentially containing URLs.
     * @return String with elided URLs.
     */
    public static String elideUrl(String original) {
        // Url-matching is fussy. If something looks like an exception message, just return.
        if (LIKELY_EXCEPTION_LOG.matcher(original).find()) return original;
        StringBuilder buffer = new StringBuilder(original);
        Matcher matcher = WEB_URL.matcher(buffer);
        int start = 0;
        while (matcher.find(start)) {
            start = matcher.start();
            int end = matcher.end();
            String url = buffer.substring(start, end);
            if (!likelyToBeAppNamespace(url) && !likelyToBeSystemNamespace(url)
                    && !likelyToBeClassOrMethodName(url)) {
                buffer.replace(start, end, URL_ELISION);
                end = start + URL_ELISION.length();
                matcher = WEB_URL.matcher(buffer);
            }
            start = end;
        }
        return buffer.toString();
    }

    private static boolean likelyToBeClassOrMethodName(String url) {
        if (isClassName(url)) return true;

        // Since the suspected URL could actually be a method name, check if the portion preceding
        // the last subdomain is a class name.
        int indexOfLastPeriod = url.lastIndexOf(".");
        if (indexOfLastPeriod == -1) return false;
        return isClassName(url.substring(0, indexOfLastPeriod));
    }

    private static boolean isClassName(String url) {
        try {
            Class.forName(url, false, ContextUtils.getApplicationContext().getClassLoader());
            return true;
        } catch (Throwable e) {
            // Some examples: ClassNotFoundException, NoClassDefFoundException, VerifyError.
        }
        return false;
    }

    private static boolean likelyToBeAppNamespace(String url) {
        for (String ns : APP_NAMESPACE) {
            if (url.startsWith(ns)) {
                return true;
            }
        }
        return false;
    }

    private static boolean likelyToBeSystemNamespace(String url) {
        for (String ns : SYSTEM_NAMESPACE) {
            if (url.startsWith(ns)) {
                return true;
            }
        }
        return false;
    }

    /**
     * Elides any IP addresses in the specified {@link String} with
     * {@link #IP_ELISION}.
     *
     * @param original String potentially containing IPs.
     * @return String with elided IPs.
     */
    public static String elideIp(String original) {
        return Patterns.IP_ADDRESS.matcher(original).replaceAll(IP_ELISION);
    }

    /**
     * Elides any MAC addresses in the specified {@link String} with
     * {@link #MAC_ELISION}.
     *
     * @param original String potentially containing MACs.
     * @return String with elided MACs.
     */
    public static String elideMac(String original) {
        return MAC_ADDRESS.matcher(original).replaceAll(MAC_ELISION);
    }

    /**
     * Elides any console messages in the specified {@link String} with
     * {@link #CONSOLE_ELISION}.
     *
     * @param original String potentially containing console messages.
     * @return String with elided console messages.
     */
    public static String elideConsole(String original) {
        return CONSOLE_MSG.matcher(original).replaceAll(CONSOLE_ELISION);
    }

    /**
     * Elides any URL in the exception messages contained inside a stacktrace with
     * {@link #URL_ELISION}.
     *
     * @param stacktrace Multiline stacktrace as a string.
     * @return Stacktrace with elided URLs.
     */
    public static String sanitizeStacktrace(String stacktrace) {
        if (TextUtils.isEmpty(stacktrace)) {
            return "";
        }
        String[] frames = stacktrace.split("\\n");
        // Sanitize first stacktrace line which contains the exception message.
        frames[0] = elideUrl(frames[0]);
        for (int i = 1; i < frames.length; i++) {
            // Nested exceptions should also have their message sanitized.
            if (frames[i].startsWith("Caused by:")) {
                frames[i] = elideUrl(frames[i]);
            }
        }
        return TextUtils.join("\n", frames);
    }

    /**
     * Returns a sanitized stacktrace (per {@link #sanitizeStacktrace(String)}) for the given
     * throwable.
     */
    @CalledByNative
    public static String getSanitizedStacktrace(Throwable throwable) {
        return sanitizeStacktrace(Log.getStackTraceString(throwable));
    }
}