summaryrefslogtreecommitdiff
path: root/android_icu4j/src/main/java/android/icu/text/IdentifierInfo.java
blob: 5985d4d338529d2b4efb2e3ea65157ec3673413c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
/* GENERATED SOURCE. DO NOT MODIFY. */
/*
 ***************************************************************************
 * Copyright (C) 2008-2015, Google, International Business Machines Corporation
 * and others. All Rights Reserved.
 ***************************************************************************
 */
package android.icu.text;

import java.util.BitSet;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;

import android.icu.lang.UCharacter;
import android.icu.lang.UCharacterCategory;
import android.icu.lang.UScript;
import android.icu.text.SpoofChecker.RestrictionLevel;

/**
 * This class analyzes a possible identifier for script and identifier status. Use it by calling setIdentifierProfile
 * then setIdentifier. Available methods include:
 * <ol>
 * <li>call getScripts for the specific scripts in the identifier. The identifier contains at least one character in
 * each of these.
 * <li>call getAlternates to get cases where a character is not limited to a single script. For example, it could be
 * either Katakana or Hiragana.
 * <li>call getCommonAmongAlternates to find out if any scripts are common to all the alternates.
 * <li>call getNumerics to get a representative character (with value zero) for each of the decimal number systems in
 * the identifier.
 * <li>call getRestrictionLevel to see what the UTS36 restriction level is.
 * </ol>
 * 
 * @author markdavis
 * @deprecated This API is ICU internal only.
 * @hide Only a subset of ICU is exposed in Android
 * @hide draft / provisional / internal are hidden on Android
 */
@Deprecated
public class IdentifierInfo {

    private static final UnicodeSet ASCII = new UnicodeSet(0, 0x7F).freeze();

    private String identifier;
    private final BitSet requiredScripts = new BitSet();
    private final Set<BitSet> scriptSetSet = new HashSet<BitSet>();
    private final BitSet commonAmongAlternates = new BitSet();
    private final UnicodeSet numerics = new UnicodeSet();
    private final UnicodeSet identifierProfile = new UnicodeSet(0, 0x10FFFF);

    /**
     * Create an identifier info object. Subsequently, call {@link #setIdentifier(String)}, etc.
     * {@link #setIdentifierProfile(UnicodeSet)}
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public IdentifierInfo() {
        super();
    }

    private IdentifierInfo clear() {
        requiredScripts.clear();
        scriptSetSet.clear();
        numerics.clear();
        commonAmongAlternates.clear();
        return this;
    }

    /**
     * Set the identifier profile: the characters that are to be allowed in the identifier.
     * 
     * @param identifierProfile the characters that are to be allowed in the identifier
     * @return self
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    
    public IdentifierInfo setIdentifierProfile(UnicodeSet identifierProfile) {
        this.identifierProfile.set(identifierProfile);
        return this;
    }

    /**
     * Get the identifier profile: the characters that are to be allowed in the identifier.
     * 
     * @return The characters that are to be allowed in the identifier.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    
    public UnicodeSet getIdentifierProfile() {
        return new UnicodeSet(identifierProfile);
    }

    /**
     * Set an identifier to analyze. Afterwards, call methods like getScripts()
     * 
     * @param identifier the identifier to analyze
     * @return self
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated    
    public IdentifierInfo setIdentifier(String identifier) {
        this.identifier = identifier;
        clear();
        BitSet scriptsForCP = new BitSet();
        int cp;
        for (int i = 0; i < identifier.length(); i += Character.charCount(cp)) {
            cp = Character.codePointAt(identifier, i);
            // Store a representative character for each kind of decimal digit
            if (UCharacter.getType(cp) == UCharacterCategory.DECIMAL_DIGIT_NUMBER) {
                // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value
                numerics.add(cp - UCharacter.getNumericValue(cp));
            }
            UScript.getScriptExtensions(cp, scriptsForCP);
            scriptsForCP.clear(UScript.COMMON);
            scriptsForCP.clear(UScript.INHERITED);
            //            if (temp.cardinality() == 0) {
            //                // HACK for older version of ICU
            //                requiredScripts.set(UScript.getScript(cp));
            //            } else 
            switch (scriptsForCP.cardinality()) {
            case 0: break;
            case 1:
                // Single script, record it.
                requiredScripts.or(scriptsForCP);
                break;
            default:
                if (!requiredScripts.intersects(scriptsForCP) 
                        && scriptSetSet.add(scriptsForCP)) {
                    scriptsForCP = new BitSet();
                }
                break;
            }
        }
        // Now make a final pass through to remove alternates that came before singles.
        // [Kana], [Kana Hira] => [Kana]
        // This is relatively infrequent, so doesn't have to be optimized.
        // We also compute any commonalities among the alternates.
        if (scriptSetSet.size() > 0) {
            commonAmongAlternates.set(0, UScript.CODE_LIMIT);
            for (Iterator<BitSet> it = scriptSetSet.iterator(); it.hasNext();) {
                final BitSet next = it.next();
                // [Kana], [Kana Hira] => [Kana]
                if (requiredScripts.intersects(next)) {
                    it.remove();
                } else {
                    // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]
                    commonAmongAlternates.and(next); // get the intersection.
                    for (BitSet other : scriptSetSet) {
                        if (next != other && contains(next, other)) {
                            it.remove();
                            break;
                        }
                    }
                }
            }
        }
        if (scriptSetSet.size() == 0) {
            commonAmongAlternates.clear();
        }
        return this;
    }

    /**
     * Get the identifier that was analyzed.
     * 
     * @return the identifier that was analyzed.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public String getIdentifier() {
        return identifier;
    }

    /**
     * Get the scripts found in the identifiers.
     * 
     * @return the set of explicit scripts.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public BitSet getScripts() {
        return (BitSet) requiredScripts.clone();
    }

    /**
     * Get the set of alternate scripts found in the identifiers. That is, when a character can be in two scripts, then
     * the set consisting of those scripts will be returned.
     * 
     * @return the set of explicit scripts.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public Set<BitSet> getAlternates() {
        Set<BitSet> result = new HashSet<BitSet>();
        for (BitSet item : scriptSetSet) {
            result.add((BitSet) item.clone());
        }
        return result;
    }

    /**
     * Get the representative characters (zeros) for the numerics found in the identifier.
     * 
     * @return the set of explicit scripts.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public UnicodeSet getNumerics() {
        return new UnicodeSet(numerics);
    }

    /**
     * Find out which scripts are in common among the alternates.
     * 
     * @return the set of scripts that are in common among the alternates.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public BitSet getCommonAmongAlternates() {
        return (BitSet) commonAmongAlternates.clone();
    }

    // BitSet doesn't support "contains(...)", so we have inverted constants
    // They are private; they can't be made immutable in Java.
    private final static BitSet JAPANESE = set(new BitSet(), UScript.LATIN, UScript.HAN, UScript.HIRAGANA,
            UScript.KATAKANA);
    private final static BitSet CHINESE = set(new BitSet(), UScript.LATIN, UScript.HAN, UScript.BOPOMOFO);
    private final static BitSet KOREAN = set(new BitSet(), UScript.LATIN, UScript.HAN, UScript.HANGUL);
    private final static BitSet CONFUSABLE_WITH_LATIN = set(new BitSet(), UScript.CYRILLIC, UScript.GREEK,
            UScript.CHEROKEE);

    /**
     * Find the "tightest" restriction level that the identifier satisfies.
     * 
     * @return the restriction level.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public RestrictionLevel getRestrictionLevel() {
        if (!identifierProfile.containsAll(identifier) || getNumerics().size() > 1) {
            return RestrictionLevel.UNRESTRICTIVE;
        }
        if (ASCII.containsAll(identifier)) {
            return RestrictionLevel.ASCII;
        }
        // This is a bit tricky. We look at a number of factors.
        //   The number of scripts in the text.
        //   Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc])
        //   Plus number of alternates otherwise (this only works because we only test cardinality up to 2.)
        
        // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the
        //       time it is created, in setIdentifier().
        final int cardinalityPlus = requiredScripts.cardinality() + (commonAmongAlternates.cardinality() == 0 ? scriptSetSet.size() : 1);
        if (cardinalityPlus < 2) {
            return RestrictionLevel.SINGLE_SCRIPT_RESTRICTIVE;
        }
        if (containsWithAlternates(JAPANESE, requiredScripts) || containsWithAlternates(CHINESE, requiredScripts)
                || containsWithAlternates(KOREAN, requiredScripts)) {
            return RestrictionLevel.HIGHLY_RESTRICTIVE;
        }
        if (cardinalityPlus == 2 && requiredScripts.get(UScript.LATIN) && !requiredScripts.intersects(CONFUSABLE_WITH_LATIN)) {
            return RestrictionLevel.MODERATELY_RESTRICTIVE;
        }
        return RestrictionLevel.MINIMALLY_RESTRICTIVE;
    }

    /**
     * Get the number of scripts appearing in the identifier.
     *   Note: Common and Inherited scripts are omitted from the count.
     *   Note: If the identifier contains characters with alternate scripts
     *         (the character is used with more than one script), minimize
     *         the reported number of scripts by considering the character
     *         to be of a script that already appears elsewhere in the identifier
     *         when possible.
     *         The alternate script computation may not be perfect. The distinction
     *         between 0, 1 and > 1 scripts will be valid, however.
     * @return the number of scripts.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public int getScriptCount() {
        // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts.
        int count = requiredScripts.cardinality() +
                (commonAmongAlternates.cardinality() == 0 ? scriptSetSet.size() : 1);
        return count;

    }

    /**
     * See Object.toString()
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    @Override
    public String toString() {
        return identifier + ", " + identifierProfile.toPattern(false) + ", " + getRestrictionLevel() + ", "
                + displayScripts(requiredScripts) + ", " + displayAlternates(scriptSetSet) + ", "
                + numerics.toPattern(false);
    }

    private boolean containsWithAlternates(BitSet container, BitSet containee) {
        if (!contains(container, containee)) {
            return false;
        }
        for (BitSet alternatives : scriptSetSet) {
            if (!container.intersects(alternatives)) {
                return false;
            }
        }
        return true;
    }

    /**
     * Produce a readable string of alternates.
     * 
     * @param alternates a set of BitSets of script values.
     * @return display form
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public static String displayAlternates(Set<BitSet> alternates) {
        if (alternates.size() == 0) {
            return "";
        }
        StringBuilder result = new StringBuilder();
        // for consistent results
        Set<BitSet> sorted = new TreeSet<BitSet>(BITSET_COMPARATOR);
        sorted.addAll(alternates);
        for (BitSet item : sorted) {
            if (result.length() != 0) {
                result.append("; ");
            }
            result.append(displayScripts(item));
        }
        return result.toString();
    }
    
    /**
     * Order BitSets, first by shortest, then by items.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public static final Comparator<BitSet> BITSET_COMPARATOR = new Comparator<BitSet>() {

        public int compare(BitSet arg0, BitSet arg1) {
            int diff = arg0.cardinality() - arg1.cardinality();
            if (diff != 0) return diff;
            int i0 = arg0.nextSetBit(0);
            int i1 = arg1.nextSetBit(0);
            while ((diff = i0-i1) == 0 && i0 > 0) {
                i0 = arg0.nextSetBit(i0+1);
                i1 = arg1.nextSetBit(i1+1);
            }
            return diff;
        }
        
    };

    /**
     * Produce a readable string of a set of scripts
     * 
     * @param scripts a BitSet of UScript values
     * @return a readable string of a set of scripts
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public static String displayScripts(BitSet scripts) {
        StringBuilder result = new StringBuilder();
        for (int i = scripts.nextSetBit(0); i >= 0; i = scripts.nextSetBit(i + 1)) {
            if (result.length() != 0) {
                result.append(' ');
            }
            result.append(UScript.getShortName(i));
        }
        return result.toString();
    }

    /**
     * Parse a text list of scripts into a BitSet.
     * 
     * @param scriptsString the string to be parsed
     * @return BitSet of UScript values.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public static BitSet parseScripts(String scriptsString) {
        BitSet result = new BitSet();
        for (String item : scriptsString.trim().split(",?\\s+")) {
            if (item.length() != 0) {
                result.set(UScript.getCodeFromName(item));
            }
        }
        return result;
    }

    /**
     * Parse a list of alternates into a set of sets of UScript values.
     * 
     * @param scriptsSetString a list of alternates, separated by ;
     * @return a set of BitSets of UScript values
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public static Set<BitSet> parseAlternates(String scriptsSetString) {
        Set<BitSet> result = new HashSet<BitSet>();
        for (String item : scriptsSetString.trim().split("\\s*;\\s*")) {
            if (item.length() != 0) {
                result.add(parseScripts(item));
            }
        }
        return result;
    }

    /**
     * Test containment. Should be a method on BitSet.
     * 
     * @param container possible container to be tested
     * @param containee possible containee to be tested
     * @return true if container contains containee
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public static final boolean contains(BitSet container, BitSet containee) {
        for (int i = containee.nextSetBit(0); i >= 0; i = containee.nextSetBit(i + 1)) {
            if (!container.get(i)) {
                return false;
            }
        }
        return true;
    }

    /**
     * Sets a number of values at once. Should be on BitSet.
     * 
     * @param bitset bitset to be affected
     * @param values values to be set in the bitset
     * @return modified bitset.
     * @deprecated This API is ICU internal only.
     * @hide draft / provisional / internal are hidden on Android
     */
    @Deprecated
    public static final BitSet set(BitSet bitset, int... values) {
        for (int value : values) {
            bitset.set(value);
        }
        return bitset;
    }
    
    
    // public static final class FreezableBitSet extends BitSet implements Freezable<FreezableBitSet> {
    // private boolean frozen;
    //
    // public FreezableBitSet() {
    // super();
    // }
    // public FreezableBitSet(int nbits) {
    // super(nbits);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#and(java.util.BitSet)
    // */
    // @Override
    // public void and(BitSet set) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.and(set);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#andNot(java.util.BitSet)
    // */
    // @Override
    // public void andNot(BitSet set) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.andNot(set);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#cardinality()
    // */
    //
    // @Override
    // public void clear() {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.clear();
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#clear(int)
    // */
    // @Override
    // public void clear(int bitIndex) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.clear(bitIndex);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#clear(int, int)
    // */
    // @Override
    // public void clear(int fromIndex, int toIndex) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.clear(fromIndex, toIndex);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#clone()
    // */
    // @Override
    // public Object clone() {
    // return super.clone();
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#equals(java.lang.Object)
    // */
    // @Override
    // public boolean equals(Object obj) {
    // if (obj == null || obj.getClass() != FreezableBitSet.class) {
    // return false;
    // }
    // return super.equals((BitSet)obj);
    // }
    //
    // /* (non-Javadoc)
    // * @see java.util.BitSet#flip(int)
    // */
    // @Override
    // public void flip(int bitIndex) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.flip(bitIndex);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#flip(int, int)
    // */
    // @Override
    // public void flip(int fromIndex, int toIndex) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.flip(fromIndex, toIndex);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#or(java.util.BitSet)
    // */
    // @Override
    // public void or(BitSet set) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.or(set);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#set(int)
    // */
    // @Override
    // public void set(int bitIndex) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.set(bitIndex);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#set(int, boolean)
    // */
    // @Override
    // public void set(int bitIndex, boolean value) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.set(bitIndex, value);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#set(int, int)
    // */
    // @Override
    // public void set(int fromIndex, int toIndex) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.set(fromIndex, toIndex);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#set(int, int, boolean)
    // */
    // @Override
    // public void set(int fromIndex, int toIndex, boolean value) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.set(fromIndex, toIndex, value);
    // }
    // /* (non-Javadoc)
    // * @see java.util.BitSet#xor(java.util.BitSet)
    // */
    // @Override
    // public void xor(BitSet set) {
    // if (frozen) {
    // throw new UnsupportedOperationException();
    // }
    // super.xor(set);
    // }
    // /* (non-Javadoc)
    // * @see android.icu.util.Freezable#isFrozen()
    // */
    // public boolean isFrozen() {
    // return frozen;
    // }
    // /* (non-Javadoc)
    // * @see android.icu.util.Freezable#freeze()
    // */
    // public FreezableBitSet freeze() {
    // frozen = true;
    // return this;
    // }
    // /* (non-Javadoc)
    // * @see android.icu.util.Freezable#cloneAsThawed()
    // */
    // public FreezableBitSet cloneAsThawed() {
    // FreezableBitSet result = new FreezableBitSet(size());
    // result.or(this);
    // return result;
    // }
    // }
}