diff options
author | Jean-Baptiste Queru <jbq@google.com> | 2009-07-17 17:40:43 -0700 |
---|---|---|
committer | Jean-Baptiste Queru <jbq@google.com> | 2009-07-17 17:40:43 -0700 |
commit | c0f3e2506e4cc62ff8c220fe72849728e9d6cecf (patch) | |
tree | 778334c2c002f3c4b016a8d4de106cdb8dc959d8 /tools | |
parent | 6b13cbaafaffaeeaf0477e95816759728fcdb763 (diff) | |
download | icu4c-c0f3e2506e4cc62ff8c220fe72849728e9d6cecf.tar.gz |
import cl @40073
Diffstat (limited to 'tools')
-rw-r--r-- | tools/makeconv/gencnvex.c | 4 | ||||
-rw-r--r-- | tools/makeconv/genmbcs.c | 212 | ||||
-rw-r--r-- | tools/makeconv/genmbcs.h | 13 | ||||
-rw-r--r-- | tools/makeconv/makeconv.c | 59 | ||||
-rw-r--r-- | tools/makeconv/makeconv.h | 3 | ||||
-rw-r--r-- | tools/toolutil/pkgitems.cpp | 16 |
6 files changed, 234 insertions, 73 deletions
diff --git a/tools/makeconv/gencnvex.c b/tools/makeconv/gencnvex.c index 3b657366..cf09cbe5 100644 --- a/tools/makeconv/gencnvex.c +++ b/tools/makeconv/gencnvex.c @@ -130,7 +130,7 @@ CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData, extData->ucm->baseName[length++]=0; } - headerSize=sizeof(header)+length; + headerSize=MBCS_HEADER_V4_LENGTH*4+length; /* fill the header */ header.version[0]=4; @@ -138,7 +138,7 @@ CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData, header.flags=(uint32_t)((headerSize<<8)|MBCS_OUTPUT_EXT_ONLY); /* write the header and the base table name */ - udata_writeBlock(pData, &header, sizeof(header)); + udata_writeBlock(pData, &header, MBCS_HEADER_V4_LENGTH*4); udata_writeBlock(pData, extData->ucm->baseName, length); } diff --git a/tools/makeconv/genmbcs.c b/tools/makeconv/genmbcs.c index 139ab010..6757b778 100644 --- a/tools/makeconv/genmbcs.c +++ b/tools/makeconv/genmbcs.c @@ -30,7 +30,7 @@ * Reduce tests for maxCharLength. */ -typedef struct MBCSData { +struct MBCSData { NewConverter newConverter; UCMFile *ucm; @@ -48,10 +48,18 @@ typedef struct MBCSData { uint32_t stage2Top, stage3Top; /* fromUTF8 */ - uint16_t stageUTF8[MBCS_UTF8_STAGE_SIZE]; + uint16_t stageUTF8[0x10000>>MBCS_UTF8_STAGE_SHIFT]; /* allow for utf8Max=0xffff */ + + /* + * Maximum UTF-8-friendly code point. + * 0 if !utf8Friendly, otherwise 0x01ff..0xffff in steps of 0x100. + * If utf8Friendly, utf8Max is normally either MBCS_UTF8_MAX or 0xffff. + */ + uint16_t utf8Max; UBool utf8Friendly; -} MBCSData; + UBool omitFromU; +}; /* prototypes */ static void @@ -115,6 +123,29 @@ printBytes(char *buffer, const uint8_t *bytes, int32_t length) { /* implementation ----------------------------------------------------------- */ +static MBCSData gDummy; + +U_CFUNC const MBCSData * +MBCSGetDummy() { + uprv_memset(&gDummy, 0, sizeof(MBCSData)); + + /* + * Set "pessimistic" values which may sometimes move too many + * mappings to the extension table (but never too few). + * These values cause MBCSOkForBaseFromUnicode() to return FALSE for the + * largest set of mappings. + * Assume maxCharLength>1. + */ + gDummy.utf8Friendly=TRUE; + if(SMALL) { + gDummy.utf8Max=0xffff; + gDummy.omitFromU=TRUE; + } else { + gDummy.utf8Max=MBCS_UTF8_MAX; + } + return &gDummy; +} + static void MBCSInit(MBCSData *mbcsData, UCMFile *ucm) { uprv_memset(mbcsData, 0, sizeof(MBCSData)); @@ -680,7 +711,7 @@ MBCSAddFromUnicode(MBCSData *mbcsData, /* inspect stage 1 */ index=c>>MBCS_STAGE_1_SHIFT; - if(mbcsData->utf8Friendly && c<=MBCS_UTF8_MAX) { + if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) { nextOffset=(c>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK&~(MBCS_UTF8_STAGE_3_BLOCKS-1); } else { nextOffset=(c>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK; @@ -716,7 +747,7 @@ MBCSAddFromUnicode(MBCSData *mbcsData, /* inspect stage 2 */ index=mbcsData->stage1[index]+nextOffset; - if(mbcsData->utf8Friendly && c<=MBCS_UTF8_MAX) { + if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) { /* allocate 64-entry blocks for UTF-8-friendly lookup */ blockSize=MBCS_UTF8_STAGE_3_BLOCK_SIZE*maxCharLength; nextOffset=c&MBCS_UTF8_STAGE_3_BLOCK_MASK; @@ -761,12 +792,12 @@ MBCSAddFromUnicode(MBCSData *mbcsData, stage3Index=MBCS_STAGE_3_GRANULARITY*(uint32_t)(uint16_t)mbcsData->stage2[index]; /* Build an alternate, UTF-8-friendly stage table as well. */ - if(mbcsData->utf8Friendly && c<=MBCS_UTF8_MAX) { + if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) { /* Overflow for uint16_t entries in stageUTF8? */ if(stage3Index>0xffff) { /* * This can occur only if the mapping table is nearly perfectly filled and if - * MBCS_UTF8_MAX==0xffff. + * utf8Max==0xffff. * (There is no known charset like this. GB 18030 does not map * surrogate code points and LMBCS does not map 256 PUA code points.) * @@ -776,20 +807,20 @@ MBCSAddFromUnicode(MBCSData *mbcsData, * mappings with 0<=c<MBCS_UTF8_LIMIT, and there is only also * the initial all-unassigned block in stage3. * + * Solution for the overflow: Reduce utf8Max to the next lower value, 0xfeff. + * * (See svn revision 20866 of the markus/ucnvutf8 feature branch for * code that causes MBCSAddTable() to rebuild the table not utf8Friendly * in case of overflow. That code was not tested.) */ - fprintf(stderr, "too many stage 3 entries for UTF-8-friendly format, processing U+%04x<->0x%s\n", - (int)c, printBytes(buffer, bytes, length)); - return FALSE; + mbcsData->utf8Max=0xfeff; + } else { + /* + * The stage 3 block has been assigned for the regular trie. + * Just copy its index into stageUTF8[], without the granularity. + */ + mbcsData->stageUTF8[c>>MBCS_UTF8_STAGE_SHIFT]=(uint16_t)stage3Index; } - - /* - * The stage 3 block has been assigned for the regular trie. - * Just copy its index into stageUTF8[], without the granularity. - */ - mbcsData->stageUTF8[c>>MBCS_UTF8_STAGE_SHIFT]=(uint16_t)stage3Index; } /* write the codepage bytes into stage 3 and get the previous bytes */ @@ -856,7 +887,7 @@ MBCSAddFromUnicode(MBCSData *mbcsData, } U_CFUNC UBool -MBCSOkForBaseFromUnicode(UBool utf8Friendly, +MBCSOkForBaseFromUnicode(const MBCSData *mbcsData, const uint8_t *bytes, int32_t length, UChar32 c, int8_t flag) { /* @@ -883,7 +914,16 @@ MBCSOkForBaseFromUnicode(UBool utf8Friendly, * - any mapping to 0x00 (result value 0, indistinguishable from unmappable entry) * - any |1 fallback (no roundtrip flags in the optimized table) */ - if(utf8Friendly && flag<=1 && c<=MBCS_UTF8_MAX && (bytes[0]==0 || flag==1)) { + if(mbcsData->utf8Friendly && flag<=1 && c<=mbcsData->utf8Max && (bytes[0]==0 || flag==1)) { + return FALSE; + } + + /* + * If we omit the fromUnicode data, we can only store roundtrips there + * because only they are recoverable from the toUnicode data. + * Fallbacks must go into the extension table. + */ + if(mbcsData->omitFromU && flag!=0) { return FALSE; } @@ -918,6 +958,18 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati * indicators are used. */ mbcsData->utf8Friendly=utf8Friendly=(UBool)((table->flagsType&UCM_FLAGS_EXPLICIT)!=0); + if(utf8Friendly) { + mbcsData->utf8Max=MBCS_UTF8_MAX; + if(SMALL && maxCharLength>1) { + mbcsData->omitFromU=TRUE; + } + } else { + mbcsData->utf8Max=0; + if(SMALL && maxCharLength>1) { + fprintf(stderr, + "makeconv warning: --small not available for .ucm files without |0 etc.\n"); + } + } if(!MBCSStartMappings(mbcsData)) { return FALSE; @@ -933,6 +985,28 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati c=m->u; f=m->f; + /* + * Small optimization for --small .cnv files: + * + * If there are fromUnicode mappings above MBCS_UTF8_MAX, + * then the file size will be smaller if we make utf8Max larger + * because the size increase in stageUTF8 will be more than balanced by + * how much less of stage2 needs to be stored. + * + * There is no point in doing this incrementally because stageUTF8 + * uses so much less space per block than stage2, + * so we immediately increase utf8Max to 0xffff. + * + * Do not increase utf8Max if it is already at 0xfeff because MBCSAddFromUnicode() + * sets it to that value when stageUTF8 overflows. + */ + if( mbcsData->omitFromU && f<=1 && + mbcsData->utf8Max<c && c<=0xffff && + mbcsData->utf8Max<0xfeff + ) { + mbcsData->utf8Max=0xffff; + } + switch(f) { case -1: /* there was no precision/fallback indicator */ @@ -943,7 +1017,7 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati if(maxCharLength==1) { isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f); - } else if(MBCSOkForBaseFromUnicode(utf8Friendly, m->b.bytes, m->bLen, c, f)) { + } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) { isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f); } else { m->f|=MBCS_FROM_U_EXT_FLAG; @@ -955,7 +1029,7 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati if(maxCharLength==1) { staticData->hasFromUnicodeFallback=TRUE; isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f); - } else if(MBCSOkForBaseFromUnicode(utf8Friendly, m->b.bytes, m->bLen, c, f)) { + } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) { staticData->hasFromUnicodeFallback=TRUE; isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f); } else { @@ -965,7 +1039,7 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati break; case 2: /* ignore |2 SUB mappings, except to move <subchar1> mappings to the extension table */ - if(maxCharLength>1 && !MBCSOkForBaseFromUnicode(utf8Friendly, m->b.bytes, m->bLen, c, f)) { + if(maxCharLength>1 && m->bLen==1) { m->f|=MBCS_FROM_U_EXT_FLAG; m->moveFlag=UCM_MOVE_TO_EXT; } @@ -1329,24 +1403,56 @@ static uint32_t MBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData, UNewDataMemory *pData, int32_t tableType) { MBCSData *mbcsData=(MBCSData *)cnvData; + uint32_t stage2Start, stage2Length; uint32_t top, stageUTF8Length=0; int32_t i, stage1Top; + uint32_t headerLength; _MBCSHeader header={ { 0, 0, 0, 0 }, 0, 0, 0, 0, 0, 0, 0 }; - /* adjust stage 1 entries to include the size of stage 1 in the offsets to stage 2 */ - if(mbcsData->ucm->states.maxCharLength==1) { - if(staticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { - stage1Top=MBCS_STAGE_1_SIZE; /* 0x440==1088 */ + stage2Length=mbcsData->stage2Top; + if(mbcsData->omitFromU) { + /* find how much of stage2 can be omitted */ + int32_t utf8Limit=(int32_t)mbcsData->utf8Max+1; + uint32_t st2; + + i=utf8Limit>>MBCS_STAGE_1_SHIFT; + if((utf8Limit&((1<<MBCS_STAGE_1_SHIFT)-1))!=0 && (st2=mbcsData->stage1[i])!=0) { + /* utf8Limit is in the middle of an existing stage 2 block */ + stage2Start=st2+((utf8Limit>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK); } else { - stage1Top=0x40; /* 0x40==64 */ + /* find the last stage2 block with mappings before utf8Limit */ + while(i>0 && (st2=mbcsData->stage1[--i])==0) {} + /* stage2 up to the end of this block corresponds to stageUTF8 */ + stage2Start=st2+MBCS_STAGE_2_BLOCK_SIZE; } + header.options|=MBCS_OPT_NO_FROM_U; + header.fullStage2Length=stage2Length; + stage2Length-=stage2Start; + if(VERBOSE) { + printf("+ omitting %lu out of %lu stage2 entries and %lu fromUBytes\n", + stage2Start, mbcsData->stage2Top, mbcsData->stage3Top); + printf("+ total size savings: %lu bytes\n", stage2Start*4+mbcsData->stage3Top); + } + } else { + stage2Start=0; + } + + if(staticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { + stage1Top=MBCS_STAGE_1_SIZE; /* 0x440==1088 */ + } else { + stage1Top=0x40; /* 0x40==64 */ + } + + /* adjust stage 1 entries to include the size of stage 1 in the offsets to stage 2 */ + if(mbcsData->ucm->states.maxCharLength==1) { for(i=0; i<stage1Top; ++i) { mbcsData->stage1[i]+=(uint16_t)stage1Top; } - /* stage2Top has counted 16-bit results, now we need to count bytes */ - mbcsData->stage2Top*=2; + /* stage2Top/Length have counted 16-bit results, now we need to count bytes */ + /* also round up to a multiple of 4 bytes */ + stage2Length=(stage2Length*2+1)&~1; /* stage3Top has counted 16-bit results, now we need to count bytes */ mbcsData->stage3Top*=2; @@ -1355,40 +1461,47 @@ MBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData, header.version[2]=(uint8_t)(SBCS_UTF8_MAX>>8); /* store 0x1f for max==0x1fff */ } } else { - if(staticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { - stage1Top=MBCS_STAGE_1_SIZE; /* 0x440==1088 */ - } else { - stage1Top=0x40; /* 0x40==64 */ - } for(i=0; i<stage1Top; ++i) { mbcsData->stage1[i]+=(uint16_t)stage1Top/2; /* stage 2 contains 32-bit entries, stage 1 16-bit entries */ } - /* stage2Top has counted 32-bit results, now we need to count bytes */ - mbcsData->stage2Top*=4; + /* stage2Top/Length have counted 32-bit results, now we need to count bytes */ + stage2Length*=4; + /* leave stage2Start counting 32-bit units */ if(mbcsData->utf8Friendly) { - stageUTF8Length=MBCS_UTF8_STAGE_SIZE; - header.version[2]=(uint8_t)(MBCS_UTF8_MAX>>8); /* store 0xd7 for max==0xd7ff */ + stageUTF8Length=(mbcsData->utf8Max+1)>>MBCS_UTF8_STAGE_SHIFT; + header.version[2]=(uint8_t)(mbcsData->utf8Max>>8); /* store 0xd7 for max==0xd7ff */ } /* stage3Top has already counted bytes */ } - /* round up stage2Top and stage3Top so that the sizes of all data blocks are multiples of 4 */ - mbcsData->stage2Top=(mbcsData->stage2Top+3)&~3; + /* round up stage3Top so that the sizes of all data blocks are multiples of 4 */ mbcsData->stage3Top=(mbcsData->stage3Top+3)&~3; /* fill the header */ - header.version[0]=4; + if(header.options&MBCS_OPT_INCOMPATIBLE_MASK) { + header.version[0]=5; + if(header.options&MBCS_OPT_NO_FROM_U) { + headerLength=10; /* include fullStage2Length */ + } else { + headerLength=MBCS_HEADER_V5_MIN_LENGTH; /* 9 */ + } + } else { + header.version[0]=4; + headerLength=MBCS_HEADER_V4_LENGTH; /* 8 */ + } header.version[1]=3; /* header.version[2] set above for utf8Friendly data */ + header.options|=(uint32_t)headerLength; + header.countStates=mbcsData->ucm->states.countStates; header.countToUFallbacks=mbcsData->countToUFallbacks; header.offsetToUCodeUnits= - sizeof(_MBCSHeader)+ + headerLength*4+ mbcsData->ucm->states.countStates*1024+ mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback); header.offsetFromUTable= @@ -1397,10 +1510,13 @@ MBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData, header.offsetFromUBytes= header.offsetFromUTable+ stage1Top*2+ - mbcsData->stage2Top; + stage2Length; header.fromUBytesLength=mbcsData->stage3Top; - top=header.offsetFromUBytes+header.fromUBytesLength+stageUTF8Length*2; + top=header.offsetFromUBytes+stageUTF8Length*2; + if(!(header.options&MBCS_OPT_NO_FROM_U)) { + top+=header.fromUBytesLength; + } header.flags=(uint8_t)(mbcsData->ucm->states.outputType); @@ -1414,17 +1530,19 @@ MBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData, } /* write the MBCS data */ - udata_writeBlock(pData, &header, sizeof(_MBCSHeader)); + udata_writeBlock(pData, &header, headerLength*4); udata_writeBlock(pData, mbcsData->ucm->states.stateTable, header.countStates*1024); udata_writeBlock(pData, mbcsData->toUFallbacks, mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback)); udata_writeBlock(pData, mbcsData->unicodeCodeUnits, mbcsData->ucm->states.countToUCodeUnits*2); udata_writeBlock(pData, mbcsData->stage1, stage1Top*2); if(mbcsData->ucm->states.maxCharLength==1) { - udata_writeBlock(pData, mbcsData->stage2Single, mbcsData->stage2Top); + udata_writeBlock(pData, mbcsData->stage2Single+stage2Start, stage2Length); } else { - udata_writeBlock(pData, mbcsData->stage2, mbcsData->stage2Top); + udata_writeBlock(pData, mbcsData->stage2+stage2Start, stage2Length); + } + if(!(header.options&MBCS_OPT_NO_FROM_U)) { + udata_writeBlock(pData, mbcsData->fromUBytes, mbcsData->stage3Top); } - udata_writeBlock(pData, mbcsData->fromUBytes, mbcsData->stage3Top); if(stageUTF8Length>0) { udata_writeBlock(pData, mbcsData->stageUTF8, stageUTF8Length*2); diff --git a/tools/makeconv/genmbcs.h b/tools/makeconv/genmbcs.h index 60f52e3d..cb0cc5e6 100644 --- a/tools/makeconv/genmbcs.h +++ b/tools/makeconv/genmbcs.h @@ -101,9 +101,20 @@ enum { U_CFUNC NewConverter * MBCSOpen(UCMFile *ucm); +struct MBCSData; +typedef struct MBCSData MBCSData; + +/* + * Get a dummy MBCSData for use with MBCSOkForBaseFromUnicode() + * for creating an extension-only file. + * Assume maxCharLength>1. + */ +U_CFUNC const MBCSData * +MBCSGetDummy(); + /* Test if a 1:1 mapping fits into the MBCS base table's fromUnicode structure. */ U_CFUNC UBool -MBCSOkForBaseFromUnicode(UBool utf8Friendly, +MBCSOkForBaseFromUnicode(const MBCSData *mbcsData, const uint8_t *bytes, int32_t length, UChar32 c, int8_t flag); diff --git a/tools/makeconv/makeconv.c b/tools/makeconv/makeconv.c index d5aeafad..7e62c868 100644 --- a/tools/makeconv/makeconv.c +++ b/tools/makeconv/makeconv.c @@ -34,6 +34,8 @@ #include "makeconv.h" #include "genmbcs.h" +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) + #define DEBUG 0 typedef struct ConvData { @@ -76,6 +78,7 @@ extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPP * Global - verbosity */ UBool VERBOSE = FALSE; +UBool SMALL = FALSE; static void createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); @@ -163,13 +166,25 @@ writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr } } +enum { + OPT_HELP_H, + OPT_HELP_QUESTION_MARK, + OPT_COPYRIGHT, + OPT_VERSION, + OPT_DESTDIR, + OPT_VERBOSE, + OPT_SMALL, + OPT_COUNT +}; + static UOption options[]={ - UOPTION_HELP_H, /* 0 Numbers for those who*/ - UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */ - UOPTION_COPYRIGHT, /* 2 */ - UOPTION_VERSION, /* 3 */ - UOPTION_DESTDIR, /* 4 */ - UOPTION_VERBOSE, /* 5 */ + UOPTION_HELP_H, + UOPTION_HELP_QUESTION_MARK, + UOPTION_COPYRIGHT, + UOPTION_VERSION, + UOPTION_DESTDIR, + UOPTION_VERBOSE, + { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } }; int main(int argc, char* argv[]) @@ -194,8 +209,8 @@ int main(int argc, char* argv[]) uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); /* preset then read command line options */ - options[4].value=u_getDataDirectory(); - argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); + options[OPT_DESTDIR].value=u_getDataDirectory(); + argc=u_parseArgs(argc, argv, LENGTHOF(options), options); /* error handling, printing usage message */ if(argc<0) { @@ -205,8 +220,9 @@ int main(int argc, char* argv[]) } else if(argc<2) { argc=-1; } - if(argc<0 || options[0].doesOccur || options[1].doesOccur) { - fprintf(stderr, + if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { + FILE *stdfile=argc<0 ? stderr : stdout; + fprintf(stdfile, "usage: %s [-options] files...\n" "\tread .ucm codepage mapping files and write .cnv files\n" "options:\n" @@ -216,20 +232,26 @@ int main(int argc, char* argv[]) "\t-d or --destdir destination directory, followed by the path\n" "\t-v or --verbose Turn on verbose output\n", argv[0]); + fprintf(stdfile, + "\t --small Generate smaller .cnv files. They will be\n" + "\t significantly smaller but may not be compatible with\n" + "\t older versions of ICU and will require heap memory\n" + "\t allocation when loaded.\n"); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } - if(options[3].doesOccur) { - fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n", - dataInfo.formatVersion[0], dataInfo.formatVersion[1]); - fprintf(stderr, U_COPYRIGHT_STRING "\n"); + if(options[OPT_VERSION].doesOccur) { + printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n", + dataInfo.formatVersion[0], dataInfo.formatVersion[1]); + printf("%s\n", U_COPYRIGHT_STRING); exit(0); } /* get the options values */ - haveCopyright = options[2].doesOccur; - destdir = options[4].value; - VERBOSE = options[5].doesOccur; + haveCopyright = options[OPT_COPYRIGHT].doesOccur; + destdir = options[OPT_DESTDIR].value; + VERBOSE = options[OPT_VERBOSE].doesOccur; + SMALL = options[OPT_SMALL].doesOccur; if (destdir != NULL && *destdir != 0) { uprv_strcpy(outFileName, destdir); @@ -766,12 +788,13 @@ createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod * * Do this after ucm_checkBaseExt(). */ + const MBCSData *mbcsData=MBCSGetDummy(); int32_t needsMove=0; for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; m<mLimit; ++m ) { - if(!MBCSOkForBaseFromUnicode(TRUE, m->b.bytes, m->bLen, m->u, m->f)) { + if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) { m->f|=MBCS_FROM_U_EXT_FLAG; m->moveFlag=UCM_MOVE_TO_EXT; ++needsMove; diff --git a/tools/makeconv/makeconv.h b/tools/makeconv/makeconv.h index 0fa0fb2d..a3c2d375 100644 --- a/tools/makeconv/makeconv.h +++ b/tools/makeconv/makeconv.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2000-2006, International Business Machines +* Copyright (C) 2000-2007, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -24,6 +24,7 @@ /* exports from makeconv.c */ U_CFUNC UBool VERBOSE; +U_CFUNC UBool SMALL; /* converter table type for writing */ enum { diff --git a/tools/toolutil/pkgitems.cpp b/tools/toolutil/pkgitems.cpp index 6a93769c..2a8f0128 100644 --- a/tools/toolutil/pkgitems.cpp +++ b/tools/toolutil/pkgitems.cpp @@ -497,7 +497,7 @@ ucnv_enumDependencies(const UDataSwapper *ds, /* check for supported conversionType values */ if(inStaticData->conversionType==UCNV_MBCS) { /* MBCS data */ - uint32_t mbcsHeaderFlags; + uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions; int32_t extOffset; inMBCSHeader=(const _MBCSHeader *)inBytes; @@ -508,7 +508,14 @@ ucnv_enumDependencies(const UDataSwapper *ds, *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } - if(!(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1)) { + if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { + mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; + } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && + ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))& + MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 + ) { + mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK; + } else { udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n", inMBCSHeader->version[0], inMBCSHeader->version[1]); *pErrorCode=U_UNSUPPORTED_ERROR; @@ -536,14 +543,15 @@ ucnv_enumDependencies(const UDataSwapper *ds, } /* swap the base name, between the header and the extension data */ - baseNameLength=(int32_t)strlen((const char *)(inMBCSHeader+1)); + const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4; + baseNameLength=(int32_t)strlen(inBaseName); if(baseNameLength>=(int32_t)sizeof(baseName)) { udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n", itemName, baseNameLength); *pErrorCode=U_UNSUPPORTED_ERROR; return; } - ds->swapInvChars(ds, inMBCSHeader+1, baseNameLength+1, baseName, pErrorCode); + ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode); checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode); } |