aboutsummaryrefslogtreecommitdiff
path: root/java/com/ibm/icu/text/SelectFormat.java
blob: c0627449993bcdbfaeba78d1fcf9e619169a23a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
/*
 *******************************************************************************
 * Copyright (C) 2004-2011, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 * Copyright (C) 2009 , Yahoo! Inc.                                            *
 *******************************************************************************
 */
package com.ibm.icu.text;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.text.FieldPosition;
import java.text.Format;
import java.text.ParsePosition;

import com.ibm.icu.impl.PatternProps;

/**
 * <p><code>SelectFormat</code> supports the creation of  internationalized
 * messages by selecting phrases based on keywords. The pattern  specifies
 * how to map keywords to phrases and provides a default phrase. The
 * object provided to the format method is a string that's matched
 * against the keywords. If there is a match, the corresponding phrase
 * is selected; otherwise, the default phrase is used.</p>
 *
 * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
 *
 * <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
 * with a <code>select</code> argument type,
 * rather than using a stand-alone <code>SelectFormat</code>.</p>
 *
 * <p>The main use case for the select format is gender based  inflection.
 * When names or nouns are inserted into sentences, their gender can  affect pronouns,
 * verb forms, articles, and adjectives. Special care needs to be
 * taken for the case where the gender cannot be determined.
 * The impact varies between languages:</p>
 *
 * <ul>
 * <li>English has three genders, and unknown gender is handled as a  special
 * case. Names use the gender of the named person (if known), nouns  referring
 * to people use natural gender, and inanimate objects are usually  neutral.
 * The gender only affects pronouns: "he", "she", "it", "they".
 *
 * <li>German differs from English in that the gender of nouns is  rather
 * arbitrary, even for nouns referring to people ("M&#u00E4;dchen", girl, is  neutral).
 * The gender affects pronouns ("er", "sie", "es"), articles ("der",  "die",
 * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes  M&#u00E4;dchen").
 *
 * <li>French has only two genders; as in German the gender of nouns
 * is rather arbitrary - for sun and moon, the genders
 * are the opposite of those in German. The gender affects
 * pronouns ("il", "elle"), articles ("le", "la"),
 * adjective forms ("bon", "bonne"), and sometimes
 * verb forms ("all&#u00E9;", "all&#u00E9e;").
 *
 * <li>Polish distinguishes five genders (or noun classes),
 * human masculine, animate non-human masculine, inanimate masculine,
 * feminine, and neuter.
 * </ul>
 *
 * <p>Some other languages have noun classes that are not related to  gender,
 * but similar in grammatical use.
 * Some African languages have around 20 noun classes.</p>
 *
 * <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
 * we usually need to distinguish only between female, male and other/unknown.</p>
 *
 * <p>To enable localizers to create sentence patterns that take their
 * language's gender dependencies into consideration, software has to  provide
 * information about the gender associated with a noun or name to
 * <code>MessageFormat</code>.
 * Two main cases can be distinguished:</p>
 *
 * <ul>
 * <li>For people, natural gender information should be maintained  for each person.
 * Keywords like "male", "female", "mixed" (for groups of people)
 * and "unknown" could be used.
 *
 * <li>For nouns, grammatical gender information should be maintained  for
 * each noun and per language, e.g., in resource bundles.
 * The keywords "masculine", "feminine", and "neuter" are commonly  used,
 * but some languages may require other keywords.
 * </ul>
 *
 * <p>The resulting keyword is provided to <code>MessageFormat</code>  as a
 * parameter separate from the name or noun it's associated with. For  example,
 * to generate a message such as "Jean went to Paris", three separate  arguments
 * would be provided: The name of the person as argument 0, the  gender of
 * the person as argument 1, and the name of the city as argument 2.
 * The sentence pattern for English, where the gender of the person has
 * no impact on this simple sentence, would not refer to argument 1  at all:</p>
 *
 * <pre>{0} went to {2}.</pre>
 *
 * <p><b>Note:</b> The entire sentence should be included (and partially repeated)
 * inside each phrase. Otherwise translators would have to be trained on how to
 * move bits of the sentence in and out of the select argument of a message.
 * (The examples below do not follow this recommendation!)</p>
 * 
 * <p>The sentence pattern for French, where the gender of the person affects
 * the form of the participle, uses a select format based on argument 1:</p>
 *
 * <pre>{0} est {1, select, female {all&#u00E9;e} other {all&#u00E9;}} &#u00E0; {2}.</pre>
 *
 * <p>Patterns can be nested, so that it's possible to handle  interactions of
 * number and gender where necessary. For example, if the above  sentence should
 * allow for the names of several people to be inserted, the  following sentence
 * pattern can be used (with argument 0 the list of people's names,  
 * argument 1 the number of people, argument 2 their combined gender, and  
 * argument 3 the city name):</p>
 *
 * <pre>{0} {1, plural, 
 * one {est {2, select, female {all&#u00E9;e} other  {all&#u00E9;}}}
 * other {sont {2, select, female {all&#u00E9;es} other {all&#u00E9;s}}}
 * }&#u00E0; {3}.</pre>
 *
 * <h4>Patterns and Their Interpretation</h4>
 *
 * <p>The <code>SelectFormat</code> pattern string defines the phrase  output
 * for each user-defined keyword.
 * The pattern is a sequence of (keyword, message) pairs.
 * A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
 *
 * <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
 *
 * <p>You always have to define a phrase for the default keyword
 * <code>other</code>; this phrase is returned when the keyword  
 * provided to
 * the <code>format</code> method matches no other keyword.
 * If a pattern does not provide a phrase for <code>other</code>, the  method
 * it's provided to returns the error  <code>U_DEFAULT_KEYWORD_MISSING</code>.
 * <br/>
 * Pattern_White_Space between keywords and messages is ignored.
 * Pattern_White_Space within a message is preserved and output.</p>
 *
 * <p><pre>Example:
 * MessageFormat msgFmt = new MessageFormat("{0} est " +
 *     "{1, select, female {all&#u00E9;e} other {all&#u00E9;}} &#u00E0; Paris.",
 *     new ULocale("fr"));
 * Object args[] = {"Kirti","female"};
 * System.out.println(msgFmt.format(args));
 * </pre>
 * <p>
 * Produces the output:<br/>
 * <code>Kirti est all&#u00E9;e &#u00E0; Paris.</code>
 * </p>
 *
 * @stable ICU 4.4
 */

public class SelectFormat extends Format{
    // Generated by serialver from JDK 1.5
    private static final long serialVersionUID = 2993154333257524984L;

    /*
     * The applied pattern string.
     */
    private String pattern = null;

    /**
     * The MessagePattern which contains the parsed structure of the pattern string.
     */
    transient private MessagePattern msgPattern;
    
    /**
     * Creates a new <code>SelectFormat</code> for a given pattern string.
     * @param  pattern the pattern for this <code>SelectFormat</code>.
     * @stable ICU 4.4
     */
    public SelectFormat(String pattern) {
        applyPattern(pattern);
    }

    /*
     * Resets the <code>SelectFormat</code> object.
     */
    private void reset() {
        pattern = null;
        if(msgPattern != null) {
            msgPattern.clear();
        }
    }

    /**
     * Sets the pattern used by this select format.
     * Patterns and their interpretation are specified in the class description.
     *
     * @param pattern the pattern for this select format.
     * @throws IllegalArgumentException when the pattern is not a valid select format pattern.
     * @stable ICU 4.4
     */
    public void applyPattern(String pattern) {
        this.pattern = pattern;
        if (msgPattern == null) {
            msgPattern = new MessagePattern();
        }
        try {
            msgPattern.parseSelectStyle(pattern);
        } catch(RuntimeException e) {
            reset();
            throw e;
        }
    }

    /**
     * Returns the pattern for this <code>SelectFormat</code>
     *
     * @return the pattern string
     * @stable ICU 4.4
     */
    public String toPattern() {
        return pattern;
    }

    /**
     * Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
     * @param pattern A MessagePattern.
     * @param partIndex the index of the first SelectFormat argument style part.
     * @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
     * @return the sub-message start part index.
     */
    public static int findSubMessage(MessagePattern pattern, int partIndex, String keyword) {
        int count=pattern.countParts();
        int msgStart=0;
        // Iterate over (ARG_SELECTOR, message) pairs until ARG_LIMIT or end of select-only pattern.
        do {
            MessagePattern.Part part=pattern.getPart(partIndex++);
            MessagePattern.Part.Type type=part.getType();
            if(type==MessagePattern.Part.Type.ARG_LIMIT) {
                break;
            }
            assert type==MessagePattern.Part.Type.ARG_SELECTOR;
            // part is an ARG_SELECTOR followed by a message
            if(pattern.partSubstringMatches(part, keyword)) {
                // keyword matches
                return partIndex;
            } else if(msgStart==0 && pattern.partSubstringMatches(part, "other")) {
                msgStart=partIndex;
            }
            partIndex=pattern.getLimitPartIndex(partIndex);
        } while(++partIndex<count);
        return msgStart;
    }

    /**
     * Selects the phrase for the given keyword.
     *
     * @param keyword a phrase selection keyword.
     * @return the string containing the formatted select message.
     * @throws IllegalArgumentException when the given keyword is not a "pattern identifier"
     * @stable ICU 4.4
     */
    public final String format(String keyword) {
        //Check for the validity of the keyword
        if (!PatternProps.isIdentifier(keyword)) {
            throw new IllegalArgumentException("Invalid formatting argument.");
        }
        // If no pattern was applied, throw an exception
        if (msgPattern == null || msgPattern.countParts() == 0) {
            throw new IllegalStateException("Invalid format error.");
        }

        // Get the appropriate sub-message.
        int msgStart = findSubMessage(msgPattern, 0, keyword);
        if (!msgPattern.jdkAposMode()) {
            int msgLimit = msgPattern.getLimitPartIndex(msgStart);
            return msgPattern.getPatternString().substring(msgPattern.getPart(msgStart).getLimit(),
                                                           msgPattern.getPatternIndex(msgLimit));
        }
        // JDK compatibility mode: Remove SKIP_SYNTAX.
        StringBuilder result = null;
        int prevIndex = msgPattern.getPart(msgStart).getLimit();
        for (int i = msgStart;;) {
            MessagePattern.Part part = msgPattern.getPart(++i);
            MessagePattern.Part.Type type = part.getType();
            int index = part.getIndex();
            if (type == MessagePattern.Part.Type.MSG_LIMIT) {
                if (result == null) {
                    return pattern.substring(prevIndex, index);
                } else {
                    return result.append(pattern, prevIndex, index).toString();
                }
            } else if (type == MessagePattern.Part.Type.SKIP_SYNTAX) {
                if (result == null) {
                    result = new StringBuilder();
                }
                result.append(pattern, prevIndex, index);
                prevIndex = part.getLimit();
            } else if (type == MessagePattern.Part.Type.ARG_START) {
                if (result == null) {
                    result = new StringBuilder();
                }
                result.append(pattern, prevIndex, index);
                prevIndex = index;
                i = msgPattern.getLimitPartIndex(i);
                index = msgPattern.getPart(i).getLimit();
                MessagePattern.appendReducedApostrophes(pattern, prevIndex, index, result);
                prevIndex = index;
            }
        }
    }

    /**
     * Selects the phrase for the given keyword.
     * and appends the formatted message to the given <code>StringBuffer</code>.
     * @param keyword a phrase selection keyword.
     * @param toAppendTo the selected phrase will be appended to this
     *        <code>StringBuffer</code>.
     * @param pos will be ignored by this method.
     * @throws IllegalArgumentException when the given keyword is not a String
     *         or not a "pattern identifier"
     * @return the string buffer passed in as toAppendTo, with formatted text
     *         appended.
     * @stable ICU 4.4
     */
    public StringBuffer format(Object keyword, StringBuffer toAppendTo,
            FieldPosition pos) {
        if (keyword instanceof String) {
            toAppendTo.append(format( (String)keyword));
        }else{
            throw new IllegalArgumentException("'" + keyword + "' is not a String");
        }
        return toAppendTo;
    }

    /**
     * This method is not supported by <code>SelectFormat</code>.
     * @param source the string to be parsed.
     * @param pos defines the position where parsing is to begin,
     * and upon return, the position where parsing left off.  If the position
     * has not changed upon return, then parsing failed.
     * @return nothing because this method is not supported.
     * @throws UnsupportedOperationException thrown always.
     * @stable ICU 4.4
     */
    public Object parseObject(String source, ParsePosition pos) {
        throw new UnsupportedOperationException();
    }

    /**
     * {@inheritDoc}
     * @stable ICU 4.4
     */
    @Override
    public boolean equals(Object obj) {
        if(this == obj) {
            return true;
        }
        if(obj == null || getClass() != obj.getClass()) {
            return false;
        }
        SelectFormat sf = (SelectFormat) obj;
        return msgPattern == null ? sf.msgPattern == null : msgPattern.equals(sf.msgPattern);
    }

    /**
     * {@inheritDoc}
     * @stable ICU 4.4
     */
    @Override
    public int hashCode() {
        if (pattern != null) {
            return pattern.hashCode();
        }
        return 0;
    }

    /**
     * {@inheritDoc}
     * @stable ICU 4.4
     */
    @Override
    public String toString() {
        return "pattern='" + pattern + "'";
    }

    private void readObject(ObjectInputStream in)
        throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        if (pattern != null) {
            applyPattern(pattern);
        }
    }
}