diff options
author | jshin@chromium.org <jshin@chromium.org> | 2014-09-17 18:53:08 +0000 |
---|---|---|
committer | jshin@chromium.org <jshin@chromium.org> | 2014-09-17 18:53:08 +0000 |
commit | d2abf6c1e1f986f4a8db0341b8a8c55c55ec1174 (patch) | |
tree | 69dd609650394c53786e9cfd1c4d6d0623f20bcd | |
parent | 89831135d5f76413fcdd3852f8de7f344b3fa40c (diff) | |
download | icu-d2abf6c1e1f986f4a8db0341b8a8c55c55ec1174.tar.gz |
Fix a bug in ICU's bidi handling.
Cherry-pick an upstream patch from
http://bugs.icu-project.org/trac/ticket/11054
BUG=402297
TEST=See the comment 7 and comment 27 in the bug (after rolling DEPS with this
CL).
R=ckocagil@chromium.org
TBR=ckocagil@chromium.org
Review URL: https://codereview.chromium.org/566073003
git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/icu52@292003 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
-rw-r--r-- | README.chromium | 58 | ||||
-rw-r--r-- | patches/bidi.patch | 95 | ||||
-rw-r--r-- | source/common/ubidi.c | 8 | ||||
-rw-r--r-- | source/common/ubidiimp.h | 2 | ||||
-rw-r--r-- | source/test/cintltst/cbiditst.c | 25 |
5 files changed, 156 insertions, 32 deletions
diff --git a/README.chromium b/README.chromium index 37660de..a9f786c 100644 --- a/README.chromium +++ b/README.chromium @@ -19,11 +19,11 @@ This directory contains the source code of ICU 52.1 for C/C++ - source/layoutex - source/data/xml - patches/configure.patch is applied to get runConfigureICU work in the + patches/configure.patch is applied to get runConfigureICU work in the icudata generation step without layout and layoutex directory by removing the corresponding Makefile's from ac_config variable. -2. Apply the following patch for platform related headers (putilimpl.h and +2. Apply the following patch for platform related headers (putilimpl.h and others). - patches/putil.patch for Android, QNX and newlib(NaCl-newlib). @@ -37,12 +37,12 @@ This directory contains the source code of ICU 52.1 for C/C++ 3. Breakiterator patches - - Apply patches/brkitr.patch - * word.txt + - Apply patches/brkitr.patch + * word.txt a. Move full stops (U+002E, U+FF0E) from MidNumLet to MidNum so that FQDN labels can be split at '.' b. Move fullwidth digits (U+FF10 - U+FF19) from Ideographic to Numeric. - See http://unicode.org/cldr/trac/ticket/6555 + See http://unicode.org/cldr/trac/ticket/6555 * line.txt a. Use Japanese rules for all locales because Japanese tailoring only affects Japanese specific characters. @@ -51,15 +51,15 @@ This directory contains the source code of ICU 52.1 for C/C++ more consistenly. See http://unicode.org/cldr/trac/ticket/6557 c. Fix line breaking for Chinese characters and quotation marks - See http://unicode.org/cldr/trac/ticket/4200 and + See http://unicode.org/cldr/trac/ticket/4200 and http://crbug.com/39779 - + - Add a new file brklocal.mk (copied from brkfiles.mk) with line_ja.txt and word_POSIX.txt dropped from the build list. - Apply patches/khmer-dictbe.patch and put in a smaller Khmer dictionary - (source/data/brkitr/khmerdict.txt) obtained from + (source/data/brkitr/khmerdict.txt) obtained from http://bugs.icu-project.org/trac/ticket/9451 - Add several common Chinese words that were dropped previously to @@ -72,9 +72,9 @@ This directory contains the source code of ICU 52.1 for C/C++ Reverts some changes about Chinese/Japanese segmentation rules in patches/brkitr.patch to reduce binary size for Android. -4. Converter changes : +4. Converter changes : - - converters.patch : + - converters.patch : a. revises existing mapping tables b. Remove a lot of unused aliases in the converter alias table (source/data/mappings/convrtrs.txt ) leading to 40kB size reduction. @@ -82,20 +82,20 @@ This directory contains the source code of ICU 52.1 for C/C++ - Add source/data/mappings/ucmlocal.txt : to list only converters we need. - Add three new tables per WHATWG encoding standards for EUC-JP, Shift_JIS and CP866. - They're generated with scripts/{eucjp, sjis, ibm866}_gen.sh. + They're generated with scripts/{eucjp, sjis, ibm866}_gen.sh. - Add three 'fake' tables for ISO-2022-CN(-Ext) : noop-*.ucm. - - uconv.patch + - uconv.patch a. ucnv2022 uses 3 fake tables for ISO-2022-CN(-Ext) instead of two huge tables. b. ISO-2022-JP-[1-4] is dropped. - c. SCSU, BOCU, ISCII, UTF-7 conversion is diabled leading to + c. SCSU, BOCU, ISCII, UTF-7 conversion is diabled leading to the 47kB reduction in the code size. 5. Locale changes - patches/locale1.patch : a. Exemplar character set changes for zh*, ja + 9 Indian locales - b. Minor fixes for Korean, a few Indic (AmPmMarkers) and + b. Minor fixes for Korean, a few Indic (AmPmMarkers) and others (datetime format) - Locale build configuration files: To include the full locale data @@ -103,7 +103,7 @@ This directory contains the source code of ICU 52.1 for C/C++ add reslocal.mk or {trns,sprep,rbnf,coll}local.mk files to source/data/{coll,curr,lang.locale,curr,region,translit,zone,rbnf,sprep}. - This along with #8 (data.build.patch), #3 (brkiter) and #4 (converter) + This along with #8 (data.build.patch), #3 (brkiter) and #4 (converter) cuts down the data size by ~ 11MB. - Run scripts/trim_data.sh : About 2.1MB data size reduction. @@ -112,7 +112,7 @@ This directory contains the source code of ICU 52.1 for C/C++ b. Trim the locale data for non-UI languages to the bare minimum : ExemplarCharacters, LocaleScript, layout, and the name of the language for a locale in its native language. - c. Remove the legacy Chinese character set-based collation + c. Remove the legacy Chinese character set-based collation (big5han/gb2312han) that don't make any sense and nobdoy uses. - android/patch_locale.sh (to be run for Android build only): @@ -120,7 +120,7 @@ This directory contains the source code of ICU 52.1 for C/C++ except the language and script names of zh_Hans and zh_Hant. b. Remove exemplar cities in timezone data (data/zone) c. Keep only the minimal calendar data in data/locales - + - Add tg.txt to source/data/locale source/data/lang to add the minimal locale data necessary for the spellchecker. In both directories, add tg.txt to reslocal.mk @@ -154,7 +154,7 @@ This directory contains the source code of ICU 52.1 for C/C++ a. do not use WINDOWS_LOCALE_API in locmap.c b. do not redefine stringpiece::npos c. fix a Windows build failure with U_USING_ICU_NAMESPACE=0 - upstream bug: http://bugs.icu-project.org/trac/ticket/10486 + upstream bug: http://bugs.icu-project.org/trac/ticket/10486 fixed in ICU 53) d. Explicitly use Windows 'A' API when argument is an LPSTR in wintz.c upstream bug : http://bugs.icu-project.org/trac/ticket/10870 @@ -165,19 +165,19 @@ This directory contains the source code of ICU 52.1 for C/C++ Windows-only data build patch. - patches/clang_win.patch : - Take care of 3 warnings from clang and MSVC 2013. + Take care of 3 warnings from clang and MSVC 2013. upstream bug : http://bugs.icu-project.org/trac/ticket/11102 9. Pre-built data files are checked in with the following steps on Linux: - a. Make a icu data build directory outside the Chromium source tree + a. Make a icu data build directory outside the Chromium source tree and cd to that directory. - b. Run + b. Run ${CHROME_ICU_TREE_TOP}/source/runConfigureICU Linux --disable-layout c. Run 'make' - d. 'make' will fail in the 1st pass. Copy + d. 'make' will fail in the 1st pass. Copy ${CHROME_ICU_TREE_TOP}/source/data/in/coll/invuca.icu to {BUILD_DIR_ROOT}/data/out/build/icudt52l/coll and re-run 'make' in {BUILD_DIR_ROOT}/data. @@ -199,7 +199,7 @@ This directory contains the source code of ICU 52.1 for C/C++ icudt52l_dat.S, but '52' is dropped while copying. mac/icudtl_dat.S is identical to linux/icudtl_dat.S except for - the header portion. With "linux/icudtl_dat.S" in its place, + the header portion. With "linux/icudtl_dat.S" in its place, run scripts/make_mac_assembly.sh to generate it. - android/icudtl_dat.S : Built on Linux with all the patches above and @@ -211,24 +211,28 @@ This directory contains the source code of ICU 52.1 for C/C++ copied to the above location with '52' dropped in its name. - windows/icudt.dll (by default, we set icu_use_icu_data_flag to 1 - and don't use this file.) + and don't use this file.) a. check out a clean copy of icu52 from the upstream on Windows outside the Chrome tree. $ svn export --native-eol LF http://source.icu-project.org/repos/icu/icu/tags/release-52-1 ${SEPARATE_ICU_ROOT}/icu52 - b. copy ${CHROME_ICU_ROOT}/source/data/in/icudtl.dat to + b. copy ${CHROME_ICU_ROOT}/source/data/in/icudtl.dat to ${SEPARATE_ICU_ROOT}/source/data/in/icudt52l.dat c. copy ${CHROME_ICU_ROOT}/source/data/makedata.mak to ${SEPARATE_ICU_ROOT}/source/data/makedata.mak c. In Visual Studio, open source/allinone/allinone.sln solution in ${SEPARATE_ICU_ROOT} - d. Build 'makedata' target + d. Build 'makedata' target e. icudt52.dll will be generated in ${SEPARATE_ICU_ROOT}/bin - f. Copy that icudt52.dll to ${CHROME_ICU_ROOT}/windows/icudt.dll + f. Copy that icudt52.dll to ${CHROME_ICU_ROOT}/windows/icudt.dll and check that in. 10. Change export of U_ICUDATA_ENTRY_POINT from U_IMPORT to U_EXPORT. - patches/declspec.patch + +11. Cherry-pick an upstream patch to fix a bug in bidi. + - patches/bidi.patch + - upstream bug : http://bugs.icu-project.org/trac/ticket/11054 diff --git a/patches/bidi.patch b/patches/bidi.patch new file mode 100644 index 0000000..5a43e4c --- /dev/null +++ b/patches/bidi.patch @@ -0,0 +1,95 @@ +Index: source/test/cintltst/cbiditst.c +=================================================================== +--- source/test/cintltst/cbiditst.c (revision 36303) ++++ source/test/cintltst/cbiditst.c (revision 36304) +@@ -87,6 +87,8 @@ + + static void doTailTest(void); + ++static void testBracketOverflow(void); ++ + /* new BIDI API */ + static void testReorderingMode(void); + static void testReorderRunsOnly(void); +@@ -133,6 +135,7 @@ + addTest(root, testClassOverride, "complex/bidi/TestClassOverride"); + addTest(root, testGetBaseDirection, "complex/bidi/testGetBaseDirection"); + addTest(root, testContext, "complex/bidi/testContext"); ++ addTest(root, testBracketOverflow, "complex/bidi/TestBracketOverflow"); + + addTest(root, doArabicShapingTest, "complex/arabic-shaping/ArabicShapingTest"); + addTest(root, doLamAlefSpecialVLTRArabicShapingTest, "complex/arabic-shaping/lamalef"); +@@ -4896,3 +4899,25 @@ + + log_verbose("\nExiting TestContext \n\n"); + } ++ ++/* Ticket#11054 ubidi_setPara crash with heavily nested brackets */ ++static void ++testBracketOverflow(void) { ++ static const char* TEXT = "(((((((((((((((((((((((((((((((((((((((((a)(A)))))))))))))))))))))))))))))))))))))))))"; ++ UErrorCode status = U_ZERO_ERROR; ++ UBiDi* bidi; ++ UChar src[100]; ++ UChar dest[100]; ++ int32_t len; ++ ++ bidi = ubidi_open(); ++ len = uprv_strlen(TEXT); ++ pseudoToU16(len, TEXT, src); ++ ubidi_setPara(bidi, src, len, UBIDI_DEFAULT_LTR , NULL, &status); ++ if (U_FAILURE(status)) { ++ log_err("setPara failed with heavily nested brackets - %s", u_errorName(status)); ++ } ++ ++ ubidi_close(bidi); ++} ++ +Index: source/common/ubidiimp.h +=================================================================== +--- source/common/ubidiimp.h (revision 36303) ++++ source/common/ubidiimp.h (revision 36304) +@@ -173,7 +173,7 @@ + /* array of opening entries which should be enough in most cases; no malloc() */ + Opening simpleOpenings[SIMPLE_OPENINGS_SIZE]; + Opening *openings; /* pointer to current array of entries */ +- int32_t openingsSize; /* number of allocated entries */ ++ int32_t openingsCount; /* number of allocated entries */ + int32_t isoRunLast; /* index of last used entry */ + /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL + + 1 for index 0, + 1 for before the first isolated sequence */ +Index: source/common/ubidi.c +=================================================================== +--- source/common/ubidi.c (revision 36303) ++++ source/common/ubidi.c (revision 36304) +@@ -679,10 +679,10 @@ + bd->isoRuns[0].contextPos=0; + if(pBiDi->openingsMemory) { + bd->openings=pBiDi->openingsMemory; +- bd->openingsSize=pBiDi->openingsSize; ++ bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); + } else { + bd->openings=bd->simpleOpenings; +- bd->openingsSize=SIMPLE_OPENINGS_SIZE; ++ bd->openingsCount=SIMPLE_OPENINGS_SIZE; + } + bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || + bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; +@@ -743,7 +743,7 @@ + bracketAddOpening(BracketData *bd, UChar match, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *pOpening; +- if(pLastIsoRun->limit>=bd->openingsSize) { /* no available new entry */ ++ if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */ + UBiDi *pBiDi=bd->pBiDi; + if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) + return FALSE; +@@ -751,7 +751,7 @@ + uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, + SIMPLE_OPENINGS_SIZE * sizeof(Opening)); + bd->openings=pBiDi->openingsMemory; /* may have changed */ +- bd->openingsSize=pBiDi->openingsSize; ++ bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); + } + pOpening=&bd->openings[pLastIsoRun->limit]; + pOpening->position=position; diff --git a/source/common/ubidi.c b/source/common/ubidi.c index 75afd06..edccccf 100644 --- a/source/common/ubidi.c +++ b/source/common/ubidi.c @@ -671,10 +671,10 @@ bracketInit(UBiDi *pBiDi, BracketData *bd) { bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0; if(pBiDi->openingsMemory) { bd->openings=pBiDi->openingsMemory; - bd->openingsSize=pBiDi->openingsSize; + bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); } else { bd->openings=bd->simpleOpenings; - bd->openingsSize=SIMPLE_OPENINGS_SIZE; + bd->openingsCount=SIMPLE_OPENINGS_SIZE; } bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; @@ -732,7 +732,7 @@ static UBool /* return TRUE if success */ bracketAddOpening(BracketData *bd, UChar match, int32_t position) { IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; Opening *pOpening; - if(pLastIsoRun->limit>=bd->openingsSize) { /* no available new entry */ + if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */ UBiDi *pBiDi=bd->pBiDi; if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) return FALSE; @@ -740,7 +740,7 @@ bracketAddOpening(BracketData *bd, UChar match, int32_t position) { uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, SIMPLE_OPENINGS_SIZE * sizeof(Opening)); bd->openings=pBiDi->openingsMemory; /* may have changed */ - bd->openingsSize=pBiDi->openingsSize; + bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); } pOpening=&bd->openings[pLastIsoRun->limit]; pOpening->position=position; diff --git a/source/common/ubidiimp.h b/source/common/ubidiimp.h index 58c86f2..068e156 100644 --- a/source/common/ubidiimp.h +++ b/source/common/ubidiimp.h @@ -183,7 +183,7 @@ typedef struct BracketData { /* array of opening entries which should be enough in most cases; no malloc() */ Opening simpleOpenings[SIMPLE_OPENINGS_SIZE]; Opening *openings; /* pointer to current array of entries */ - int32_t openingsSize; /* number of allocated entries */ + int32_t openingsCount; /* number of allocated entries */ int32_t isoRunLast; /* index of last used entry */ /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL + 1 for index 0, + 1 for before the first isolated sequence */ diff --git a/source/test/cintltst/cbiditst.c b/source/test/cintltst/cbiditst.c index 30bca7a..c896ce3 100644 --- a/source/test/cintltst/cbiditst.c +++ b/source/test/cintltst/cbiditst.c @@ -84,6 +84,8 @@ static void testContext(void); static void doTailTest(void); +static void testBracketOverflow(void); + /* new BIDI API */ static void testReorderingMode(void); static void testReorderRunsOnly(void); @@ -130,6 +132,7 @@ addComplexTest(TestNode** root) { addTest(root, testClassOverride, "complex/bidi/TestClassOverride"); addTest(root, testGetBaseDirection, "complex/bidi/testGetBaseDirection"); addTest(root, testContext, "complex/bidi/testContext"); + addTest(root, testBracketOverflow, "complex/bidi/TestBracketOverflow"); addTest(root, doArabicShapingTest, "complex/arabic-shaping/ArabicShapingTest"); addTest(root, doLamAlefSpecialVLTRArabicShapingTest, "complex/arabic-shaping/lamalef"); @@ -4654,3 +4657,25 @@ testContext(void) { log_verbose("\nExiting TestContext \n\n"); } + +/* Ticket#11054 ubidi_setPara crash with heavily nested brackets */ +static void +testBracketOverflow(void) { + static const char* TEXT = "(((((((((((((((((((((((((((((((((((((((((a)(A)))))))))))))))))))))))))))))))))))))))))"; + UErrorCode status = U_ZERO_ERROR; + UBiDi* bidi; + UChar src[100]; + UChar dest[100]; + int32_t len; + + bidi = ubidi_open(); + len = uprv_strlen(TEXT); + pseudoToU16(len, TEXT, src); + ubidi_setPara(bidi, src, len, UBIDI_DEFAULT_LTR , NULL, &status); + if (U_FAILURE(status)) { + log_err("setPara failed with heavily nested brackets - %s", u_errorName(status)); + } + + ubidi_close(bidi); +} + |