aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorclaireho <chinglanho@gmail.com>2010-05-03 15:44:48 -0700
committerclaireho <chinglanho@gmail.com>2010-05-03 15:51:28 -0700
commit50294ead5e5d23f5bbfed76e00e6b510bd41eee1 (patch)
tree50cb58e1ad7008bd1b6497bf3ba4f0e173fcd35a /tools
parent6d5deb12725f146643d443090dfa11b206df528a (diff)
downloadicu4c-50294ead5e5d23f5bbfed76e00e6b510bd41eee1.tar.gz
Update external/icu4c upstream-source to from public ICU 4.2.1 to 4.4.1.
Change-Id: Ia4a187a89f07da3a9957fb6b0b281d31cbf67918
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile.in8
-rw-r--r--tools/ctestfw/Makefile.in28
-rw-r--r--tools/ctestfw/ctest.c48
-rw-r--r--tools/ctestfw/ctestfw.vcproj36
-rw-r--r--tools/ctestfw/tstdtmod.cpp60
-rw-r--r--tools/ctestfw/ucln_ct.c17
-rw-r--r--tools/ctestfw/udbgutil.cpp28
-rw-r--r--tools/ctestfw/unicode/ctest.h76
-rw-r--r--tools/ctestfw/unicode/testlog.h18
-rw-r--r--tools/ctestfw/unicode/udbgutil.h5
-rw-r--r--tools/genbidi/genbidi.c636
-rw-r--r--tools/genbidi/genbidi.h48
-rw-r--r--tools/genbidi/genbidi.vcproj422
-rw-r--r--tools/genbidi/store.c500
-rw-r--r--tools/genbrk/genbrk.cpp23
-rw-r--r--tools/gencase/Makefile.in95
-rw-r--r--tools/gencase/gencase.c833
-rw-r--r--tools/gencase/gencase.h128
-rw-r--r--tools/gencase/gencase.vcproj422
-rw-r--r--tools/gencase/store.c1208
-rw-r--r--tools/gencfu/gencfu.cpp28
-rw-r--r--tools/genctd/genctd.cpp23
-rw-r--r--tools/gendraft/Makefile12
-rwxr-xr-xtools/gendraft/genheaders.pl19
-rw-r--r--tools/gennames/Makefile.in97
-rw-r--r--tools/gennames/gennames.c1438
-rw-r--r--tools/gennorm/Makefile.in97
-rw-r--r--tools/gennorm/gennorm.c561
-rw-r--r--tools/gennorm/gennorm.h96
-rw-r--r--tools/gennorm/gennorm.vcproj422
-rw-r--r--tools/gennorm/store.c2181
-rw-r--r--tools/gennorm2/Makefile.in (renamed from tools/genbidi/Makefile.in)35
-rw-r--r--tools/gennorm2/gennorm2.cpp271
-rw-r--r--tools/gennorm2/gennorm2.vcproj (renamed from tools/gennames/gennames.vcproj)147
-rw-r--r--tools/gennorm2/n2builder.cpp1094
-rw-r--r--tools/gennorm2/n2builder.h129
-rw-r--r--tools/genpname/Makefile.in97
-rw-r--r--tools/genpname/SyntheticPropertyAliases.txt52
-rw-r--r--tools/genpname/SyntheticPropertyValueAliases.txt78
-rw-r--r--tools/genpname/data.h2363
-rw-r--r--tools/genpname/genpname.cpp1225
-rwxr-xr-xtools/genpname/gensvpa.pl161
-rwxr-xr-xtools/genpname/preparse.pl1328
-rw-r--r--tools/genprops/Makefile.in97
-rw-r--r--tools/genprops/genprops.c590
-rw-r--r--tools/genprops/genprops.h92
-rw-r--r--tools/genprops/genprops.vcproj426
-rw-r--r--tools/genprops/misc/ucdmerge.c149
-rw-r--r--tools/genprops/misc/ucdstrip.c59
-rwxr-xr-xtools/genprops/misc/ucdstrip.pl7
-rw-r--r--tools/genprops/props2.c813
-rw-r--r--tools/genprops/store.c545
-rw-r--r--tools/genrb/Makefile.in63
-rw-r--r--tools/genrb/derb.c80
-rwxr-xr-xtools/genrb/gendtjar.pl391
-rw-r--r--tools/genrb/genrb.c214
-rw-r--r--tools/genrb/parse.c33
-rw-r--r--tools/genrb/reslist.c1587
-rw-r--r--tools/genrb/reslist.h82
-rw-r--r--tools/genrb/wrtjava.c18
-rw-r--r--tools/genrb/wrtxml.cpp12
-rw-r--r--tools/genren/Makefile4
-rwxr-xr-xtools/genren/genren.pl70
-rw-r--r--tools/gentest/gentest.c49
-rw-r--r--tools/genuca/Makefile.in96
-rw-r--r--tools/genuca/genuca.8.in94
-rw-r--r--tools/genuca/genuca.cpp1215
-rw-r--r--tools/genuca/genuca.h47
-rwxr-xr-xtools/icu-svnprops-check.py195
-rw-r--r--tools/icuinfo/Makefile.in113
-rw-r--r--tools/icuinfo/icuinfo.cpp318
-rw-r--r--tools/icuinfo/icuinfo.vcproj (renamed from tools/genuca/genuca.vcproj)203
-rw-r--r--tools/icuinfo/icuplugins_windows_sample.txt57
-rw-r--r--tools/icuinfo/testplug.c204
-rw-r--r--tools/icuinfo/testplug.vcproj (renamed from tools/genpname/genpname.vcproj)112
-rw-r--r--tools/icupkg/icupkg.cpp2
-rw-r--r--tools/makeconv/genmbcs.c4
-rw-r--r--tools/makeconv/makeconv.c18
-rw-r--r--tools/makeconv/makeconv.h3
-rw-r--r--tools/pkgdata/pkgdata.cpp517
-rw-r--r--tools/toolutil/flagparser.c14
-rw-r--r--tools/toolutil/package.cpp6
-rw-r--r--tools/toolutil/pkg_genc.h2
-rw-r--r--tools/toolutil/pkg_icu.cpp2
-rw-r--r--tools/toolutil/pkg_icu.h4
-rw-r--r--tools/toolutil/pkgitems.cpp581
-rw-r--r--tools/toolutil/swapimpl.cpp16
-rw-r--r--tools/toolutil/toolutil.cpp (renamed from tools/toolutil/toolutil.c)80
-rw-r--r--tools/toolutil/toolutil.h50
-rw-r--r--tools/toolutil/toolutil.vcproj499
-rw-r--r--tools/toolutil/ucm.h4
-rw-r--r--tools/toolutil/ucmstate.c9
-rw-r--r--tools/toolutil/unewdata.c30
-rw-r--r--tools/toolutil/unewdata.h6
-rw-r--r--tools/toolutil/uparse.c98
-rw-r--r--tools/toolutil/uparse.h21
-rw-r--r--tools/toolutil/writesrc.c47
-rw-r--r--tools/toolutil/writesrc.h10
-rw-r--r--tools/toolutil/xmlparser.cpp6
-rw-r--r--tools/tzcode/Makefile.in48
-rw-r--r--tools/tzcode/tz2icu.cpp551
-rw-r--r--tools/tzcode/tz2icu.h5
-rw-r--r--tools/tzcode/zic.c17
103 files changed, 6054 insertions, 21224 deletions
diff --git a/tools/Makefile.in b/tools/Makefile.in
index 4925421d..1cfb2f05 100644
--- a/tools/Makefile.in
+++ b/tools/Makefile.in
@@ -1,5 +1,5 @@
## Makefile.in for ICU tools
-## Copyright (c) 1999-2009, International Business Machines Corporation and
+## Copyright (c) 1999-2010, International Business Machines Corporation and
## others. All Rights Reserved.
## Source directory information
@@ -13,9 +13,9 @@ include $(top_builddir)/icudefs.mk
## Build directory information
subdir = tools
-SUBDIRS = toolutil ctestfw makeconv genrb genuca genbrk genctd \
-gennames genpname gencnval gensprep genccode gencmn icupkg pkgdata \
-gentest genprops gencase genbidi gennorm gencfu
+SUBDIRS = toolutil ctestfw makeconv genrb genbrk genctd \
+gencnval gensprep icuinfo genccode gencmn icupkg pkgdata \
+gentest gennorm2 gencfu
## List of phony targets
.PHONY : all all-local all-recursive install install-local \
diff --git a/tools/ctestfw/Makefile.in b/tools/ctestfw/Makefile.in
index 4651a2fb..4a7971b2 100644
--- a/tools/ctestfw/Makefile.in
+++ b/tools/ctestfw/Makefile.in
@@ -1,6 +1,6 @@
#******************************************************************************
#
-# Copyright (C) 1999-2007, International Business Machines
+# Copyright (C) 1999-2010, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
@@ -46,12 +46,12 @@ CXXFLAGS += $(LIBCXXFLAGS)
ifneq ($(top_builddir),$(top_srcdir))
CPPFLAGS += -I$(top_builddir)/common
endif
-CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil -I$(srcdir) $(LIBCPPFLAGS)
+CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil -I$(srcdir) $(LIBCPPFLAGS) $(CPPFLAGSCTESTFW)
DEFS += -DT_CTEST_IMPLEMENTATION
LDFLAGS += $(LDFLAGSCTESTFW)
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS)
-OBJECTS = ctest.o tstdtmod.o testdata.o datamap.o uperf.o dbgutil.o udbgutil.o
+OBJECTS = ctest.o tstdtmod.o testdata.o datamap.o uperf.o dbgutil.o udbgutil.o ucln_ct.o
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
@@ -79,6 +79,28 @@ all-local: $(ALL_TARGETS)
install-local: install-library
install-library: all-local
+ $(MKINSTALLDIRS) $(DESTDIR)$(libdir)
+ifneq ($(ENABLE_STATIC),)
+ $(INSTALL-L) $(TARGET) $(DESTDIR)$(libdir)
+endif
+ifneq ($(ENABLE_SHARED),)
+ $(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(libdir)
+ifneq ($(FINAL_SO_TARGET),$(SO_TARGET))
+ cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(SO_TARGET))
+ifneq ($(FINAL_SO_TARGET),$(MIDDLE_SO_TARGET))
+ cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(MIDDLE_SO_TARGET))
+endif
+endif
+ifneq ($(IMPORT_LIB_EXT),)
+ $(INSTALL-L) $(FINAL_IMPORT_LIB) $(DESTDIR)$(libdir)
+ifneq ($(IMPORT_LIB),$(FINAL_IMPORT_LIB))
+ cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(IMPORT_LIB))
+endif
+ifneq ($(MIDDLE_IMPORT_LIB),$(FINAL_IMPORT_LIB))
+ cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(MIDDLE_IMPORT_LIB))
+endif
+endif
+endif
dist-local:
diff --git a/tools/ctestfw/ctest.c b/tools/ctestfw/ctest.c
index 92f5fb9a..825e3429 100644
--- a/tools/ctestfw/ctest.c
+++ b/tools/ctestfw/ctest.c
@@ -1,7 +1,7 @@
/*
********************************************************************************
*
-* Copyright (C) 1996-2009, International Business Machines
+* Copyright (C) 1996-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************************
@@ -830,3 +830,49 @@ static void help ( const char *argv0 )
printf(" eg: to run just the utility tests type: cintltest /tsutil) \n");
}
+int32_t T_CTEST_EXPORT2
+getTestOption ( int32_t testOption ) {
+ switch (testOption) {
+ case VERBOSITY_OPTION:
+ return VERBOSITY;
+ case WARN_ON_MISSING_DATA_OPTION:
+ return WARN_ON_MISSING_DATA;
+ case QUICK_OPTION:
+ return QUICK;
+ case REPEAT_TESTS_OPTION:
+ return REPEAT_TESTS;
+ case ERR_MSG_OPTION:
+ return ERR_MSG;
+ case ICU_TRACE_OPTION:
+ return ICU_TRACE;
+ default :
+ return 0;
+ }
+}
+
+void T_CTEST_EXPORT2
+setTestOption ( int32_t testOption, int32_t value) {
+ if (value == DECREMENT_OPTION_VALUE) {
+ value = getTestOption(testOption);
+ --value;
+ }
+ switch (testOption) {
+ case VERBOSITY_OPTION:
+ VERBOSITY = value;
+ break;
+ case WARN_ON_MISSING_DATA_OPTION:
+ WARN_ON_MISSING_DATA = value;
+ break;
+ case QUICK_OPTION:
+ QUICK = value;
+ break;
+ case REPEAT_TESTS_OPTION:
+ REPEAT_TESTS = value;
+ break;
+ case ICU_TRACE_OPTION:
+ ICU_TRACE = value;
+ break;
+ default :
+ break;
+ }
+}
diff --git a/tools/ctestfw/ctestfw.vcproj b/tools/ctestfw/ctestfw.vcproj
index 244a6a7d..ecb80aa7 100644
--- a/tools/ctestfw/ctestfw.vcproj
+++ b/tools/ctestfw/ctestfw.vcproj
@@ -426,6 +426,42 @@
>
</File>
<File
+ RelativePath=".\ucln_ct.c"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ </File>
+ <File
RelativePath=".\udbgutil.cpp"
>
</File>
diff --git a/tools/ctestfw/tstdtmod.cpp b/tools/ctestfw/tstdtmod.cpp
index ebf99a22..27e3cbeb 100644
--- a/tools/ctestfw/tstdtmod.cpp
+++ b/tools/ctestfw/tstdtmod.cpp
@@ -1,16 +1,74 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 2002-2009, International Business Machines Corporation and
+ * Copyright (c) 2002-2010, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/* Created by weiv 05/09/2002 */
+#include <stdarg.h>
+
#include "unicode/tstdtmod.h"
#include "cmemory.h"
+#include <stdio.h>
TestLog::~TestLog() {}
+IcuTestErrorCode::~IcuTestErrorCode() {
+ // Safe because our handleFailure() does not throw exceptions.
+ if(isFailure()) { handleFailure(); }
+}
+
+UBool IcuTestErrorCode::logIfFailureAndReset(const char *fmt, ...) {
+ if(isFailure()) {
+ char buffer[4000];
+ va_list ap;
+ va_start(ap, fmt);
+ vsprintf(buffer, fmt, ap);
+ va_end(ap);
+ UnicodeString msg(testName, -1, US_INV);
+ msg.append(UNICODE_STRING_SIMPLE(" failure: ")).append(UnicodeString(errorName(), -1, US_INV));
+ msg.append(UNICODE_STRING_SIMPLE(" - ")).append(UnicodeString(buffer, -1, US_INV));
+ testClass.errln(msg);
+ reset();
+ return TRUE;
+ } else {
+ reset();
+ return FALSE;
+ }
+}
+
+UBool IcuTestErrorCode::logDataIfFailureAndReset(const char *fmt, ...) {
+ if(isFailure()) {
+ char buffer[4000];
+ va_list ap;
+ va_start(ap, fmt);
+ vsprintf(buffer, fmt, ap);
+ va_end(ap);
+ UnicodeString msg(testName, -1, US_INV);
+ msg.append(UNICODE_STRING_SIMPLE(" failure: ")).append(UnicodeString(errorName(), -1, US_INV));
+ msg.append(UNICODE_STRING_SIMPLE(" - ")).append(UnicodeString(buffer, -1, US_INV));
+ testClass.dataerrln(msg);
+ reset();
+ return TRUE;
+ } else {
+ reset();
+ return FALSE;
+ }
+}
+
+void IcuTestErrorCode::handleFailure() const {
+ // testClass.errln("%s failure - %s", testName, errorName());
+ UnicodeString msg(testName, -1, US_INV);
+ msg.append(UNICODE_STRING_SIMPLE(" failure: ")).append(UnicodeString(errorName(), -1, US_INV));
+
+ if (get() == U_MISSING_RESOURCE_ERROR) {
+ testClass.dataerrln(msg);
+ } else {
+ testClass.errln(msg);
+ }
+}
+
TestDataModule *TestDataModule::getTestDataModule(const char* name, TestLog& log, UErrorCode &status)
{
if(U_FAILURE(status)) {
diff --git a/tools/ctestfw/ucln_ct.c b/tools/ctestfw/ucln_ct.c
new file mode 100644
index 00000000..1f4bbbed
--- /dev/null
+++ b/tools/ctestfw/ucln_ct.c
@@ -0,0 +1,17 @@
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 2007-2009, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+
+/** Auto-client **/
+#define UCLN_TYPE UCLN_CTESTFW
+#include "ucln_imp.h"
+
+int dummyFunction();
+int dummyFunction()
+{
+ /* this is here to prevent the compiler from complaining about an empty file */
+ return 0;
+}
diff --git a/tools/ctestfw/udbgutil.cpp b/tools/ctestfw/udbgutil.cpp
index 49d6e902..85436e9e 100644
--- a/tools/ctestfw/udbgutil.cpp
+++ b/tools/ctestfw/udbgutil.cpp
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 2007-2009, International Business Machines Corporation and
+ * Copyright (c) 2007-2010, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@@ -177,6 +177,26 @@ static const Field names_UColAttributeValue[] = {
#endif
+
+#include "unicode/icuplug.h"
+
+#define LEN_UPLUG_REASON 13 /* UPLUG_REASON_ */
+static const int32_t count_UPlugReason = UPLUG_REASON_COUNT;
+static const Field names_UPlugReason[] = {
+ FIELD_NAME_STR( LEN_UPLUG_REASON, UPLUG_REASON_QUERY ),
+ FIELD_NAME_STR( LEN_UPLUG_REASON, UPLUG_REASON_LOAD ),
+ FIELD_NAME_STR( LEN_UPLUG_REASON, UPLUG_REASON_UNLOAD ),
+};
+
+#define LEN_UPLUG_LEVEL 12 /* UPLUG_LEVEL_ */
+static const int32_t count_UPlugLevel = UPLUG_LEVEL_COUNT;
+static const Field names_UPlugLevel[] = {
+ FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_INVALID ),
+ FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_UNKNOWN ),
+ FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_LOW ),
+ FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_HIGH ),
+};
+
#define LEN_UDBG 5 /* "UDBG_" */
static const int32_t count_UDebugEnumType = UDBG_ENUM_COUNT;
static const Field names_UDebugEnumType[] =
@@ -187,6 +207,8 @@ static const Field names_UDebugEnumType[] =
FIELD_NAME_STR( LEN_UDBG, UDBG_UCalendarMonths ),
FIELD_NAME_STR( LEN_UDBG, UDBG_UDateFormatStyle ),
#endif
+ FIELD_NAME_STR( LEN_UDBG, UDBG_UPlugReason ),
+ FIELD_NAME_STR( LEN_UDBG, UDBG_UPlugLevel ),
FIELD_NAME_STR( LEN_UDBG, UDBG_UAcceptResult ),
#if !UCONFIG_NO_COLLATION
FIELD_NAME_STR( LEN_UDBG, UDBG_UColAttributeValue ),
@@ -216,6 +238,8 @@ static int32_t _udbg_enumCount(UDebugEnumType type, UBool actual) {
COUNT_CASE(UCalendarMonths)
COUNT_CASE(UDateFormatStyle)
#endif
+ COUNT_CASE(UPlugReason)
+ COUNT_CASE(UPlugLevel)
COUNT_CASE(UAcceptResult)
#if !UCONFIG_NO_COLLATION
COUNT_CASE(UColAttributeValue)
@@ -234,6 +258,8 @@ static const Field* _udbg_enumFields(UDebugEnumType type) {
FIELD_CASE(UCalendarMonths)
FIELD_CASE(UDateFormatStyle)
#endif
+ FIELD_CASE(UPlugReason)
+ FIELD_CASE(UPlugLevel)
FIELD_CASE(UAcceptResult)
// FIELD_FAIL_CASE(UNonExistentEnum)
#if !UCONFIG_NO_COLLATION
diff --git a/tools/ctestfw/unicode/ctest.h b/tools/ctestfw/unicode/ctest.h
index e292e8a1..535e34c9 100644
--- a/tools/ctestfw/unicode/ctest.h
+++ b/tools/ctestfw/unicode/ctest.h
@@ -1,7 +1,7 @@
/*
********************************************************************************
*
- * Copyright (C) 1996-2009, International Business Machines
+ * Copyright (C) 1996-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************************
@@ -23,53 +23,61 @@ typedef struct TestNode TestNode;
U_CDECL_END
/**
- * Set this to zero to disable log_verbose() messages.
- * Otherwise nonzero to see log_verbose() messages.
+ * This is use to set or get the option value for REPEAT_TESTS.
+ * Use with set/getTestOption().
*
- * @internal Internal APIs for testing purpose only
+ * @internal
*/
-extern T_CTEST_EXPORT_API int REPEAT_TESTS;
+#define REPEAT_TESTS_OPTION 1
/**
- * Set this to zero to disable log_verbose() messages.
+ * This is use to set or get the option value for VERBOSITY.
+ * When option is set to zero to disable log_verbose() messages.
* Otherwise nonzero to see log_verbose() messages.
+ * Use with set/getTestOption().
*
- * @internal Internal APIs for testing purpose only
+ * @internal
*/
-extern T_CTEST_EXPORT_API int VERBOSITY;
+#define VERBOSITY_OPTION 2
/**
- * Set this to zero to disable log_verbose() messages.
- * Otherwise nonzero to see log_verbose() messages.
+ * This is use to set or get the option value for ERR_MSG.
+ * Use with set/getTestOption().
*
- * @internal Internal APIs for testing purpose only
+ * @internal
*/
-extern T_CTEST_EXPORT_API int ERR_MSG;
+#define ERR_MSG_OPTION 3
/**
- * Set this to zero to disable some of the slower tests.
+ * This is use to set or get the option value for QUICK.
+ * When option is zero, disable some of the slower tests.
* Otherwise nonzero to run the slower tests.
+ * Use with set/getTestOption().
*
- * @internal Internal APIs for testing purpose only
+ * @internal
*/
-extern T_CTEST_EXPORT_API int QUICK;
+#define QUICK_OPTION 4
/**
- * Set this to nonzero to warn (not error) on missing data.
- * Otherwise, zero will cause an error to be propagated when data is not available.
+ * This is use to set or get the option value for WARN_ON_MISSING_DATA.
+ * When option is nonzero, warn on missing data.
+ * Otherwise, errors are propagated when data is not available.
* Affects the behavior of log_dataerr.
+ * Use with set/getTestOption().
*
* @see log_data_err
- * @internal Internal APIs for testing purpose only
+ * @internal
*/
-extern T_CTEST_EXPORT_API int WARN_ON_MISSING_DATA;
+#define WARN_ON_MISSING_DATA_OPTION 5
/**
- * ICU tracing level, is set by command line option
+ * This is use to set or get the option value for ICU_TRACE.
+ * ICU tracing level, is set by command line option.
+ * Use with set/getTestOption().
*
* @internal
*/
-extern T_CTEST_EXPORT_API UTraceLevel ICU_TRACE;
+#define ICU_TRACE_OPTION 6
/**
* Maximum amount of memory uprv_malloc should allocate before returning NULL.
@@ -85,6 +93,32 @@ extern T_CTEST_EXPORT_API size_t MAX_MEMORY_ALLOCATION;
*/
extern T_CTEST_EXPORT_API int32_t ALLOCATION_COUNT;
+/**
+ * Pass to setTestOption to decrement the test option value.
+ *
+ * @internal
+ */
+#define DECREMENT_OPTION_VALUE -99
+
+/**
+ * Gets the test option set on commandline.
+ *
+ * @param testOption macro definition for the individual test option
+ * @return value of test option, zero if option is not set or off
+ * @internal Internal APIs for testing purpose only
+ */
+T_CTEST_API int32_t T_CTEST_EXPORT2
+getTestOption ( int32_t testOption );
+
+/**
+ * Sets the test option with value given on commandline.
+ *
+ * @param testOption macro definition for the individual test option
+ * @param value to set the test option to
+ * @internal Internal APIs for testing purpose only
+ */
+T_CTEST_API void T_CTEST_EXPORT2
+setTestOption ( int32_t testOption, int32_t value);
/**
* Show the names of all nodes.
diff --git a/tools/ctestfw/unicode/testlog.h b/tools/ctestfw/unicode/testlog.h
index f8544423..6a6e0678 100644
--- a/tools/ctestfw/unicode/testlog.h
+++ b/tools/ctestfw/unicode/testlog.h
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 2004-2008, International Business Machines Corporation and
+ * Copyright (c) 2004-2010, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@@ -11,6 +11,7 @@
#ifndef U_TESTFW_TESTLOG
#define U_TESTFW_TESTLOG
+#include "unicode/errorcode.h"
#include "unicode/unistr.h"
#include "unicode/testtype.h"
@@ -22,9 +23,24 @@ class T_CTEST_EXPORT_API TestLog {
public:
virtual ~TestLog();
virtual void errln( const UnicodeString &message ) = 0;
+ virtual void logln( const UnicodeString &message ) = 0;
virtual void dataerrln( const UnicodeString &message ) = 0;
virtual const char* getTestDataPath(UErrorCode& err) = 0;
};
+class T_CTEST_EXPORT_API IcuTestErrorCode : public ErrorCode {
+public:
+ IcuTestErrorCode(TestLog &callingTestClass, const char *callingTestName) :
+ testClass(callingTestClass), testName(callingTestName) {}
+ virtual ~IcuTestErrorCode();
+ // Returns TRUE if isFailure().
+ UBool logIfFailureAndReset(const char *fmt, ...);
+ UBool logDataIfFailureAndReset(const char *fmt, ...);
+protected:
+ virtual void handleFailure() const;
+private:
+ TestLog &testClass;
+ const char *const testName;
+};
#endif
diff --git a/tools/ctestfw/unicode/udbgutil.h b/tools/ctestfw/unicode/udbgutil.h
index f4b8ea52..b03e46f1 100644
--- a/tools/ctestfw/unicode/udbgutil.h
+++ b/tools/ctestfw/unicode/udbgutil.h
@@ -1,6 +1,6 @@
/*
************************************************************************
-* Copyright (c) 2008-2009, International Business Machines
+* Copyright (c) 2008-2010, International Business Machines
* Corporation and others. All Rights Reserved.
************************************************************************
*/
@@ -21,7 +21,8 @@ enum UDebugEnumType {
UDBG_UCalendarMonths, /* UCalendarMonths. Count= (UCAL_UNDECIMBER+1) */
UDBG_UDateFormatStyle, /* Count = UDAT_SHORT=1 */
#endif
-
+ UDBG_UPlugReason, /* Count = UPLUG_REASON_COUNT */
+ UDBG_UPlugLevel, /* COUNT = UPLUG_LEVEL_COUNT */
UDBG_UAcceptResult, /* Count = ULOC_ACCEPT_FALLBACK+1=3 */
/* All following enums may be discontiguous. */
diff --git a/tools/genbidi/genbidi.c b/tools/genbidi/genbidi.c
deleted file mode 100644
index f773b1cc..00000000
--- a/tools/genbidi/genbidi.c
+++ /dev/null
@@ -1,636 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: genbidi.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004dec30
-* created by: Markus W. Scherer
-*
-* This program reads several of the Unicode character database text files,
-* parses them, and extracts the bidi/shaping properties for each character.
-* It then writes a binary file containing the properties
-* that is designed to be used directly for random-access to
-* the properties of each Unicode character.
-*/
-
-#include <stdio.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/putil.h"
-#include "unicode/uclean.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uarrsort.h"
-#include "unewdata.h"
-#include "uoptions.h"
-#include "uparse.h"
-#include "propsvec.h"
-#include "ubidi_props.h"
-#include "genbidi.h"
-
-#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
-
-/* data --------------------------------------------------------------------- */
-
-UPropsVectors *pv;
-
-UBool beVerbose=FALSE, haveCopyright=TRUE;
-
-/* prototypes --------------------------------------------------------------- */
-
-static UBool
-isToken(const char *token, const char *s);
-
-static void
-parseBidiMirroring(const char *filename, UErrorCode *pErrorCode);
-
-static void
-parseDB(const char *filename, UErrorCode *pErrorCode);
-
-/* miscellaneous ------------------------------------------------------------ */
-
-/* TODO: more common code, move functions to uparse.h|c */
-
-static char *
-trimTerminateField(char *s, char *limit) {
- /* trim leading whitespace */
- s=(char *)u_skipWhitespace(s);
-
- /* trim trailing whitespace */
- while(s<limit && (*(limit-1)==' ' || *(limit-1)=='\t')) {
- --limit;
- }
- *limit=0;
-
- return s;
-}
-
-static void
-parseTwoFieldFile(char *filename, char *basename,
- const char *ucdFile, const char *suffix,
- UParseLineFn *lineFn,
- UErrorCode *pErrorCode) {
- char *fields[2][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- writeUCDFilename(basename, ucdFile, suffix);
-
- u_parseDelimitedFile(filename, ';', fields, 2, lineFn, NULL, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode));
- }
-}
-
-static void U_CALLCONV
-bidiClassLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode);
-
-/* parse files with single enumerated properties ---------------------------- */
-
-/* TODO: more common code, move functions to uparse.h|c */
-
-struct SingleEnum {
- const char *ucdFile, *propName;
- UProperty prop;
- int32_t vecWord, vecShift;
- uint32_t vecMask;
-};
-typedef struct SingleEnum SingleEnum;
-
-static void
-parseSingleEnumFile(char *filename, char *basename, const char *suffix,
- const SingleEnum *sen,
- UErrorCode *pErrorCode);
-
-static const SingleEnum jtSingleEnum={
- "DerivedJoiningType", "joining type",
- UCHAR_JOINING_TYPE,
- 0, UBIDI_JT_SHIFT, UBIDI_JT_MASK
-};
-
-static const SingleEnum jgSingleEnum={
- "DerivedJoiningGroup", "joining group",
- UCHAR_JOINING_GROUP,
- 1, 0, 0xff /* column 1 bits 7..0 */
-};
-
-static void U_CALLCONV
-singleEnumLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- const SingleEnum *sen;
- char *s;
- uint32_t start, end, uv;
- int32_t value;
-
- sen=(const SingleEnum *)context;
-
- u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", sen->ucdFile, fields[0][0]);
- exit(*pErrorCode);
- }
-
- /* parse property alias */
- s=trimTerminateField(fields[1][0], fields[1][1]);
- value=u_getPropertyValueEnum(sen->prop, s);
- if(value<0) {
- if(sen->prop==UCHAR_BLOCK) {
- if(isToken("Greek", s)) {
- value=UBLOCK_GREEK; /* Unicode 3.2 renames this to "Greek and Coptic" */
- } else if(isToken("Combining Marks for Symbols", s)) {
- value=UBLOCK_COMBINING_MARKS_FOR_SYMBOLS; /* Unicode 3.2 renames this to "Combining Diacritical Marks for Symbols" */
- } else if(isToken("Private Use", s)) {
- value=UBLOCK_PRIVATE_USE; /* Unicode 3.2 renames this to "Private Use Area" */
- }
- }
- }
- if(value<0) {
- fprintf(stderr, "genbidi error: unknown %s name in %s.txt field 1 at %s\n",
- sen->propName, sen->ucdFile, s);
- exit(U_PARSE_ERROR);
- }
-
- uv=(uint32_t)(value<<sen->vecShift);
- if((uv&sen->vecMask)!=uv) {
- fprintf(stderr, "genbidi error: %s value overflow (0x%x) at %s\n",
- sen->propName, (int)uv, s);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
-
- upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genbidi error: unable to set %s code: %s\n",
- sen->propName, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-}
-
-static void
-parseSingleEnumFile(char *filename, char *basename, const char *suffix,
- const SingleEnum *sen,
- UErrorCode *pErrorCode) {
- char *fields[2][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- writeUCDFilename(basename, sen->ucdFile, suffix);
-
- u_parseDelimitedFile(filename, ';', fields, 2, singleEnumLineFn, (void *)sen, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "error parsing %s.txt: %s\n", sen->ucdFile, u_errorName(*pErrorCode));
- }
-}
-
-/* parse files with multiple binary properties ------------------------------ */
-
-/* TODO: more common code, move functions to uparse.h|c */
-
-/* TODO: similar to genbidi/props2.c but not the same; same as in gencase/gencase.c */
-
-struct Binary {
- const char *propName;
- int32_t vecWord;
- uint32_t vecValue, vecMask;
-};
-typedef struct Binary Binary;
-
-struct Binaries {
- const char *ucdFile;
- const Binary *binaries;
- int32_t binariesCount;
-};
-typedef struct Binaries Binaries;
-
-static const Binary
-propListNames[]={
- { "Bidi_Control", 0, U_MASK(UBIDI_BIDI_CONTROL_SHIFT), U_MASK(UBIDI_BIDI_CONTROL_SHIFT) },
- { "Join_Control", 0, U_MASK(UBIDI_JOIN_CONTROL_SHIFT), U_MASK(UBIDI_JOIN_CONTROL_SHIFT) }
-};
-
-static const Binaries
-propListBinaries={
- "PropList", propListNames, LENGTHOF(propListNames)
-};
-
-static void U_CALLCONV
-binariesLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- const Binaries *bin;
- char *s;
- uint32_t start, end;
- int32_t i;
-
- bin=(const Binaries *)context;
-
- u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]);
- exit(*pErrorCode);
- }
-
- /* parse binary property name */
- s=(char *)u_skipWhitespace(fields[1][0]);
- for(i=0;; ++i) {
- if(i==bin->binariesCount) {
- /* ignore unrecognized properties */
- return;
- }
- if(isToken(bin->binaries[i].propName, s)) {
- break;
- }
- }
-
- if(bin->binaries[i].vecMask==0) {
- fprintf(stderr, "genbidi error: mask value %d==0 for %s %s\n",
- (int)bin->binaries[i].vecMask, bin->ucdFile, bin->binaries[i].propName);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
-
- upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genbidi error: unable to set %s, code: %s\n",
- bin->binaries[i].propName, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-}
-
-static void
-parseBinariesFile(char *filename, char *basename, const char *suffix,
- const Binaries *bin,
- UErrorCode *pErrorCode) {
- char *fields[2][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- writeUCDFilename(basename, bin->ucdFile, suffix);
-
- u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode));
- }
-}
-
-/* -------------------------------------------------------------------------- */
-
-enum {
- HELP_H,
- HELP_QUESTION_MARK,
- VERBOSE,
- COPYRIGHT,
- DESTDIR,
- SOURCEDIR,
- UNICODE_VERSION,
- ICUDATADIR,
- CSOURCE
-};
-
-/* Keep these values in sync with the above enums */
-static UOption options[]={
- UOPTION_HELP_H,
- UOPTION_HELP_QUESTION_MARK,
- UOPTION_VERBOSE,
- UOPTION_COPYRIGHT,
- UOPTION_DESTDIR,
- UOPTION_SOURCEDIR,
- UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
- UOPTION_ICUDATADIR,
- UOPTION_DEF("csource", 'C', UOPT_NO_ARG)
-};
-
-extern int
-main(int argc, char* argv[]) {
- char filename[300];
- const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
- char *basename=NULL;
- UErrorCode errorCode=U_ZERO_ERROR;
-
- U_MAIN_INIT_ARGS(argc, argv);
-
- /* preset then read command line options */
- options[DESTDIR].value=u_getDataDirectory();
- options[SOURCEDIR].value="";
- options[UNICODE_VERSION].value="";
- options[ICUDATADIR].value=u_getDataDirectory();
- argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
-
- /* error handling, printing usage message */
- if(argc<0) {
- fprintf(stderr,
- "error in command line argument \"%s\"\n",
- argv[-argc]);
- }
- if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
- /*
- * Broken into chucks because the C89 standard says the minimum
- * required supported string length is 509 bytes.
- */
- fprintf(stderr,
- "Usage: %s [-options] [suffix]\n"
- "\n"
- "read the UnicodeData.txt file and other Unicode properties files and\n"
- "create a binary file " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE " with the bidi/shaping properties\n"
- "\n",
- argv[0]);
- fprintf(stderr,
- "Options:\n"
- "\t-h or -? or --help this usage text\n"
- "\t-v or --verbose verbose output\n"
- "\t-c or --copyright include a copyright notice\n"
- "\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"
- "\t-C or --csource generate a .c source file rather than the .icu binary\n");
- fprintf(stderr,
- "\t-d or --destdir destination directory, followed by the path\n"
- "\t-s or --sourcedir source directory, followed by the path\n"
- "\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
- "\t followed by path, defaults to %s\n"
- "\tsuffix suffix that is to be appended with a '-'\n"
- "\t to the source file basenames before opening;\n"
- "\t 'genbidi new' will read UnicodeData-new.txt etc.\n",
- u_getDataDirectory());
- return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
- }
-
- /* get the options values */
- beVerbose=options[VERBOSE].doesOccur;
- haveCopyright=options[COPYRIGHT].doesOccur;
- srcDir=options[SOURCEDIR].value;
- destDir=options[DESTDIR].value;
-
- if(argc>=2) {
- suffix=argv[1];
- } else {
- suffix=NULL;
- }
-
- if(options[UNICODE_VERSION].doesOccur) {
- setUnicodeVersion(options[UNICODE_VERSION].value);
- }
- /* else use the default dataVersion in store.c */
-
- if (options[ICUDATADIR].doesOccur) {
- u_setDataDirectory(options[ICUDATADIR].value);
- }
-
- /* prepare the filename beginning with the source dir */
- uprv_strcpy(filename, srcDir);
- basename=filename+uprv_strlen(filename);
- if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
- *basename++=U_FILE_SEP_CHAR;
- }
-
- /* initialize */
- pv=upvec_open(2, &errorCode);
-
- /* process BidiMirroring.txt */
- writeUCDFilename(basename, "BidiMirroring", suffix);
- parseBidiMirroring(filename, &errorCode);
-
- /* process additional properties files */
- *basename=0;
-
- parseBinariesFile(filename, basename, suffix, &propListBinaries, &errorCode);
-
- parseSingleEnumFile(filename, basename, suffix, &jtSingleEnum, &errorCode);
-
- parseSingleEnumFile(filename, basename, suffix, &jgSingleEnum, &errorCode);
-
- /* process UnicodeData.txt */
- writeUCDFilename(basename, "UnicodeData", suffix);
- parseDB(filename, &errorCode);
-
- /* set proper bidi class for unassigned code points (Cn) */
- parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, &errorCode);
-
- /* process parsed data */
- if(U_SUCCESS(errorCode)) {
- /* write the properties data file */
- generateData(destDir, options[CSOURCE].doesOccur);
- }
-
- u_cleanup();
- return errorCode;
-}
-
-U_CFUNC void
-writeUCDFilename(char *basename, const char *filename, const char *suffix) {
- int32_t length=(int32_t)uprv_strlen(filename);
- uprv_strcpy(basename, filename);
- if(suffix!=NULL) {
- basename[length++]='-';
- uprv_strcpy(basename+length, suffix);
- length+=(int32_t)uprv_strlen(suffix);
- }
- uprv_strcpy(basename+length, ".txt");
-}
-
-/* TODO: move to toolutil */
-static UBool
-isToken(const char *token, const char *s) {
- const char *z;
- int32_t j;
-
- s=u_skipWhitespace(s);
- for(j=0;; ++j) {
- if(token[j]!=0) {
- if(s[j]!=token[j]) {
- break;
- }
- } else {
- z=u_skipWhitespace(s+j);
- if(*z==';' || *z==0) {
- return TRUE;
- } else {
- break;
- }
- }
- }
-
- return FALSE;
-}
-
-/* parser for BidiMirroring.txt --------------------------------------------- */
-
-static void U_CALLCONV
-mirrorLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- char *end;
- UChar32 src, mirror;
-
- src=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
- if(end<=fields[0][0] || end!=fields[0][1]) {
- fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 0 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- mirror=(UChar32)uprv_strtoul(fields[1][0], &end, 16);
- if(end<=fields[1][0] || end!=fields[1][1]) {
- fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 1 at %s\n", fields[1][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- addMirror(src, mirror);
-}
-
-static void
-parseBidiMirroring(const char *filename, UErrorCode *pErrorCode) {
- char *fields[2][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- u_parseDelimitedFile(filename, ';', fields, 2, mirrorLineFn, NULL, pErrorCode);
-}
-
-/* parser for UnicodeData.txt ----------------------------------------------- */
-
-static void U_CALLCONV
-unicodeDataLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- char *end;
- UErrorCode errorCode;
- UChar32 c;
-
- errorCode=U_ZERO_ERROR;
-
- /* get the character code, field 0 */
- c=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
- if(end<=fields[0][0] || end!=fields[0][1]) {
- fprintf(stderr, "genbidi: syntax error in field 0 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* get Mirrored flag, field 9 */
- if(*fields[9][0]=='Y') {
- upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genbidi error: unable to set 'is mirrored' for U+%04lx, code: %s\n",
- (long)c, u_errorName(errorCode));
- exit(errorCode);
- }
- } else if(fields[9][1]-fields[9][0]!=1 || *fields[9][0]!='N') {
- fprintf(stderr, "genbidi: syntax error in field 9 at U+%04lx\n",
- (long)c);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-}
-
-static void
-parseDB(const char *filename, UErrorCode *pErrorCode) {
- /* default Bidi classes for unassigned code points */
- static const UChar32 defaultBidi[][3]={ /* { start, end, class } */
- /* R: U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF */
- { 0x0590, 0x05FF, U_RIGHT_TO_LEFT },
- { 0x07C0, 0x08FF, U_RIGHT_TO_LEFT },
- { 0xFB1D, 0xFB4F, U_RIGHT_TO_LEFT },
- { 0x10800, 0x10FFF, U_RIGHT_TO_LEFT },
-
- /* AL: U+0600..U+07BF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFE */
- { 0x0600, 0x07BF, U_RIGHT_TO_LEFT_ARABIC },
- { 0xFB50, 0xFDCF, U_RIGHT_TO_LEFT_ARABIC },
- { 0xFDF0, 0xFDFF, U_RIGHT_TO_LEFT_ARABIC },
- { 0xFE70, 0xFEFE, U_RIGHT_TO_LEFT_ARABIC }
-
- /* L otherwise */
- };
-
- char *fields[15][2];
- UChar32 start, end;
- int32_t i;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /*
- * Set default Bidi classes for unassigned code points.
- * See the documentation for Bidi_Class in UCD.html in the Unicode data.
- * http://www.unicode.org/Public/
- *
- * Starting with Unicode 5.0, DerivedBidiClass.txt should (re)set
- * the Bidi_Class values for all code points including unassigned ones
- * and including L values for these.
- * This code becomes unnecesary but harmless. Leave it for now in case
- * someone uses genbidi on pre-Unicode 5.0 data.
- */
- for(i=0; i<LENGTHOF(defaultBidi); ++i) {
- start=defaultBidi[i][0];
- end=defaultBidi[i][1];
- upvec_setValue(pv, start, end, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genbidi error: unable to set default bidi class for U+%04lx..U+%04lx, code: %s\n",
- (long)start, (long)end, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
- }
-
- u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);
-
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-}
-
-/* DerivedBidiClass.txt ----------------------------------------------------- */
-
-static void U_CALLCONV
-bidiClassLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- char *s;
- uint32_t start, end, value;
-
- /* get the code point range */
- u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genbidi: syntax error in DerivedBidiClass.txt field 0 at %s\n", fields[0][0]);
- exit(*pErrorCode);
- }
-
- /* parse bidi class */
- s=trimTerminateField(fields[1][0], fields[1][1]);
- value=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, s);
- if((int32_t)value<0) {
- fprintf(stderr, "genbidi error: unknown bidi class in DerivedBidiClass.txt field 1 at %s\n", s);
- exit(U_PARSE_ERROR);
- }
-
- upvec_setValue(pv, start, end, 0, value, UBIDI_CLASS_MASK, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genbidi error: unable to set derived bidi class for U+%04x..U+%04x - %s\n",
- (int)start, (int)end, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/genbidi/genbidi.h b/tools/genbidi/genbidi.h
deleted file mode 100644
index ab1ac417..00000000
--- a/tools/genbidi/genbidi.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: genbidi.h
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004dec30
-* created by: Markus W. Scherer
-*/
-
-#ifndef __GENBIDI_H__
-#define __GENBIDI_H__
-
-#include "unicode/utypes.h"
-#include "propsvec.h"
-
-U_CDECL_BEGIN
-
-/* genbidi ------------------------------------------------------------------ */
-
-/* global flags */
-extern UBool beVerbose, haveCopyright;
-
-/* properties vectors in genbidi.c */
-extern UPropsVectors *pv;
-
-/* prototypes */
-U_CFUNC void
-writeUCDFilename(char *basename, const char *filename, const char *suffix);
-
-extern void
-setUnicodeVersion(const char *v);
-
-extern void
-addMirror(UChar32 src, UChar32 mirror);
-
-extern void
-generateData(const char *dataDir, UBool csource);
-
-U_CDECL_END
-
-#endif
diff --git a/tools/genbidi/genbidi.vcproj b/tools/genbidi/genbidi.vcproj
deleted file mode 100644
index 56682cbc..00000000
--- a/tools/genbidi/genbidi.vcproj
+++ /dev/null
@@ -1,422 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
- ProjectType="Visual C++"
- Version="9.00"
- Name="genbidi"
- ProjectGUID="{DB312A49-12A9-4E07-9E96-451DC2D8FF62}"
- TargetFrameworkVersion="131072"
- >
- <Platforms>
- <Platform
- Name="Win32"
- />
- <Platform
- Name="x64"
- />
- </Platforms>
- <ToolFiles>
- </ToolFiles>
- <Configurations>
- <Configuration
- Name="Release|Win32"
- OutputDirectory=".\x86\Release"
- IntermediateDirectory=".\x86\Release"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TypeLibraryName=".\x86\Release/genbidi.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Release/genbidi.pch"
- AssemblerListingLocation=".\x86\Release/"
- ObjectFile=".\x86\Release/"
- ProgramDataBaseFileName=".\x86\Release/"
- WarningLevel="3"
- SuppressStartupBanner="true"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x86\Release/genbidi.exe"
- LinkIncremental="1"
- SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x86\Release/genbidi.pdb"
- SubSystem="1"
- RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Debug|Win32"
- OutputDirectory=".\x86\Debug"
- IntermediateDirectory=".\x86\Debug"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TypeLibraryName=".\x86\Debug/genbidi.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Debug/genbidi.pch"
- AssemblerListingLocation=".\x86\Debug/"
- ObjectFile=".\x86\Debug/"
- ProgramDataBaseFileName=".\x86\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
- SuppressStartupBanner="true"
- DebugInformationFormat="4"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x86\Debug/genbidi.exe"
- LinkIncremental="2"
- SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x86\Debug/genbidi.pdb"
- SubSystem="1"
- RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- UseFAT32Workaround="true"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Release|x64"
- OutputDirectory=".\x64\Release"
- IntermediateDirectory=".\x64\Release"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Release/genbidi.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Release/genbidi.pch"
- AssemblerListingLocation=".\x64\Release/"
- ObjectFile=".\x64\Release/"
- ProgramDataBaseFileName=".\x64\Release/"
- WarningLevel="3"
- SuppressStartupBanner="true"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x64\Release/genbidi.exe"
- LinkIncremental="1"
- SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x64\Release/genbidi.pdb"
- SubSystem="1"
- TargetMachine="17"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Debug|x64"
- OutputDirectory=".\x64\Debug"
- IntermediateDirectory=".\x64\Debug"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Debug/genbidi.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Debug/genbidi.pch"
- AssemblerListingLocation=".\x64\Debug/"
- ObjectFile=".\x64\Debug/"
- ProgramDataBaseFileName=".\x64\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
- SuppressStartupBanner="true"
- DebugInformationFormat="3"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x64\Debug/genbidi.exe"
- LinkIncremental="2"
- SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x64\Debug/genbidi.pdb"
- SubSystem="1"
- TargetMachine="17"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- UseFAT32Workaround="true"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- </Configurations>
- <References>
- </References>
- <Files>
- <Filter
- Name="Source Files"
- Filter="c;cpp;rc"
- >
- <File
- RelativePath=".\genbidi.c"
- >
- </File>
- <File
- RelativePath=".\store.c"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h"
- >
- <File
- RelativePath=".\genbidi.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
- >
- </Filter>
- </Files>
- <Globals>
- </Globals>
-</VisualStudioProject>
diff --git a/tools/genbidi/store.c b/tools/genbidi/store.c
deleted file mode 100644
index dbbf5e5b..00000000
--- a/tools/genbidi/store.c
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: store.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004dec30
-* created by: Markus W. Scherer
-*
-* Store Unicode bidi/shaping properties efficiently for
-* random access.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "utrie.h"
-#include "utrie2.h"
-#include "uarrsort.h"
-#include "unicode/udata.h"
-#include "unewdata.h"
-#include "propsvec.h"
-#include "writesrc.h"
-#include "ubidi_props.h"
-#include "genbidi.h"
-
-#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
-
-/* Unicode bidi/shaping properties file format ---------------------------------
-
-The file format prepared and written here contains several data
-structures that store indexes or data.
-
-Before the data contents described below, there are the headers required by
-the udata API for loading ICU data. Especially, a UDataInfo structure
-precedes the actual data. It contains platform properties values and the
-file format version.
-
-The following is a description of format version 1.0 .
-
-The file contains the following structures:
-
- const int32_t indexes[i0] with values i0, i1, ...:
- (see UBIDI_IX_... constants for names of indexes)
-
- i0 indexLength; -- length of indexes[] (UBIDI_IX_TOP)
- i1 dataLength; -- length in bytes of the post-header data (incl. indexes[])
- i2 trieSize; -- size in bytes of the bidi/shaping properties trie
- i3 mirrorLength; -- length in uint32_t of the bidi mirroring array
-
- i4 jgStart; -- first code point with Joining_Group data
- i5 jgLimit; -- limit code point for Joining_Group data
-
- i6..i14 reservedIndexes; -- reserved values; 0 for now
-
- i15 maxValues; -- maximum code values for enumerated properties
- bits 23..16 contain the max value for Joining_Group,
- otherwise the bits are used like enum fields in the trie word
-
- Serialized trie, see utrie.h;
-
- const uint32_t mirrors[mirrorLength];
-
- const uint8_t jgArray[i5-i4]; -- (i5-i4) is always a multiple of 4
-
-Trie data word:
-Bits
-15..13 signed delta to bidi mirroring code point
- (add delta to input code point)
- 0 no such code point (source maps to itself)
- -3..-1, 1..3 delta
- -4 look in mirrors table
- 12 is mirrored
- 11 Bidi_Control
- 10 Join_Control
- 9.. 8 reserved (set to 0)
- 7.. 5 Joining_Type
- 4.. 0 BiDi category
-
-
-Mirrors:
-Stores some of the bidi mirroring data, where each code point maps to
-at most one other.
-Most code points do not have a mirroring code point; most that do have a signed
-delta stored in the trie data value. Only those where the delta does not fit
-into the trie data are stored in this table.
-
-Logically, this is a two-column table with source and mirror code points.
-
-Physically, the table is compressed by taking advantage of the fact that each
-mirror code point is also a source code point
-(each of them is a mirror of the other).
-Therefore, both logical columns contain the same set of code points, which needs
-to be stored only once.
-
-The table stores source code points, and also for each the index of its mirror
-code point in the same table, in a simple array of uint32_t.
-Bits
-31..21 index to mirror code point (unsigned)
-20.. 0 source code point
-
-The table is sorted by source code points.
-
-
-Joining_Group array:
-The Joining_Group values do not fit into the 16-bit trie, but the data is also
-limited to a small range of code points (Arabic and Syriac) and not
-well compressible.
-
-The start and limit code points for the range are stored in the indexes[]
-array, and the jgArray[] stores a byte for each of these code points,
-containing the Joining_Group value.
-
-All code points outside of this range have No_Joining_Group (0).
-
------------------------------------------------------------------------------ */
-
-/* UDataInfo cf. udata.h */
-static UDataInfo dataInfo={
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- U_SIZEOF_UCHAR,
- 0,
-
- /* dataFormat="BiDi" */
- { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
- { 1, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
- { 4, 0, 1, 0 } /* dataVersion */
-};
-
-/* exceptions values */
-static uint32_t mirrors[UBIDI_MAX_MIRROR_INDEX+1][2];
-static uint16_t mirrorTop=0;
-
-/* -------------------------------------------------------------------------- */
-
-extern void
-setUnicodeVersion(const char *v) {
- UVersionInfo version;
- u_versionFromString(version, v);
- uprv_memcpy(dataInfo.dataVersion, version, 4);
-}
-
-/* bidi mirroring table ----------------------------------------------------- */
-
-extern void
-addMirror(UChar32 src, UChar32 mirror) {
- UErrorCode errorCode;
- int32_t delta;
-
- delta=mirror-src;
- if(delta==0) {
- return; /* mapping to self=no mapping */
- }
-
- if(delta<UBIDI_MIN_MIRROR_DELTA || UBIDI_MAX_MIRROR_DELTA<delta) {
- /* delta does not fit into the trie properties value, store in the mirrors[] table */
- if(mirrorTop==LENGTHOF(mirrors)) {
- fprintf(stderr, "genbidi error: too many long-distance mirroring mappings\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-
- /* possible: search the table so far and see if src is already listed */
-
- mirrors[mirrorTop][0]=(uint32_t)src;
- mirrors[mirrorTop][1]=(uint32_t)mirror;
- ++mirrorTop;
-
- /* set an escape marker in src's properties */
- delta=UBIDI_ESC_MIRROR_DELTA;
- }
-
- errorCode=U_ZERO_ERROR;
- upvec_setValue(
- pv, src, src, 0,
- (uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT, (uint32_t)(-1)<<UBIDI_MIRROR_DELTA_SHIFT,
- &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genbidi error: unable to set mirroring delta, code: %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
-}
-
-static int32_t U_CALLCONV
-compareMirror(const void *context, const void *left, const void *right) {
- UChar32 l, r;
-
- l=UBIDI_GET_MIRROR_CODE_POINT(((const uint32_t *)left)[0]);
- r=UBIDI_GET_MIRROR_CODE_POINT(((const uint32_t *)right)[0]);
- return l-r;
-}
-
-static void
-makeMirror() {
- uint32_t *reducedMirror;
- UErrorCode errorCode;
- int32_t i, j, start, limit, step;
- uint32_t c;
-
- /* sort the mirroring table by source code points */
- errorCode=U_ZERO_ERROR;
- uprv_sortArray(mirrors, mirrorTop, 8,
- compareMirror, NULL, FALSE, &errorCode);
-
- /*
- * reduce the 2-column table to a single column
- * by putting the index to the mirror entry into the source entry
- *
- * first:
- * find each mirror code point in the source column and set each other's indexes
- *
- * second:
- * reduce the table, combine the source code points with their indexes
- * and store as a simple array of uint32_t
- */
- for(i=0; i<mirrorTop; ++i) {
- c=mirrors[i][1]; /* mirror code point */
- if(c>0x1fffff) {
- continue; /* this entry already has an index */
- }
-
- /* search for the mirror code point in the source column */
- if(c<mirrors[i][0]) {
- /* search before i */
- start=i-1;
- limit=-1;
- step=-1;
- } else {
- start=i+1;
- limit=mirrorTop;
- step=1;
- }
-
- for(j=start;; j+=step) {
- if(j==limit) {
- fprintf(stderr,
- "genbidi error: bidi mirror does not roundtrip - %04lx->%04lx->?\n",
- (long)mirrors[i][0], (long)mirrors[i][1]);
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
- if(c==mirrors[j][0]) {
- /*
- * found the mirror code point c in the source column,
- * set both entries' indexes to each other
- */
- if(UBIDI_GET_MIRROR_CODE_POINT(mirrors[i][0])!=UBIDI_GET_MIRROR_CODE_POINT(mirrors[j][1])) {
- /* roundtrip check fails */
- fprintf(stderr,
- "genbidi error: bidi mirrors do not roundtrip - %04lx->%04lx->%04lx\n",
- (long)mirrors[i][0], (long)mirrors[i][1], (long)mirrors[j][1]);
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- } else {
- mirrors[i][1]|=(uint32_t)j<<UBIDI_MIRROR_INDEX_SHIFT;
- mirrors[j][1]|=(uint32_t)i<<UBIDI_MIRROR_INDEX_SHIFT;
- }
- break;
- }
- }
- }
-
- /* now the second step, the actual reduction of the table */
- reducedMirror=mirrors[0];
- for(i=0; i<mirrorTop; ++i) {
- reducedMirror[i]=mirrors[i][0]|(mirrors[i][1]&~0x1fffff);
- }
-
- if(U_FAILURE(errorCode)) {
- exit(errorCode);
- }
-}
-
-/* generate output data ----------------------------------------------------- */
-
-extern void
-generateData(const char *dataDir, UBool csource) {
- static int32_t indexes[UBIDI_IX_TOP]={
- UBIDI_IX_TOP
- };
- static uint8_t trieBlock[40000];
- static uint8_t jgArray[0x300]; /* at most for U+0600..U+08FF */
-
- const uint32_t *row;
- UChar32 start, end, prev, jgStart;
- int32_t i;
-
- UNewDataMemory *pData;
- UNewTrie *pTrie;
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t trieSize;
- long dataLength;
-
- makeMirror();
-
- pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
- if(pTrie==NULL) {
- fprintf(stderr, "genbidi error: unable to create a UNewTrie\n");
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
-
- prev=jgStart=0;
- for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL && start<UPVEC_FIRST_SPECIAL_CP; ++i) {
- /* store most values from vector column 0 in the trie */
- if(!utrie_setRange32(pTrie, start, end+1, *row, TRUE)) {
- fprintf(stderr, "genbidi error: unable to set trie value (overflow)\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-
- /* store Joining_Group values from vector column 1 in a simple byte array */
- if(row[1]!=0) {
- if(start<0x600 || 0x8ff<end) {
- fprintf(stderr, "genbidi error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n",
- (long)start, (long)end);
- exit(U_ILLEGAL_ARGUMENT_ERROR);
- }
-
- if(prev==0) {
- /* first code point with any value */
- prev=jgStart=start;
- } else {
- /* add No_Joining_Group for code points between prev and start */
- while(prev<start) {
- jgArray[prev++ -jgStart]=0;
- }
- }
-
- /* set Joining_Group value for start..end */
- while(prev<=end) {
- jgArray[prev++ -jgStart]=(uint8_t)row[1];
- }
- }
- }
-
- /* finish jgArray, pad to multiple of 4 */
- while((prev-jgStart)&3) {
- jgArray[prev++ -jgStart]=0;
- }
- indexes[UBIDI_IX_JG_START]=jgStart;
- indexes[UBIDI_IX_JG_LIMIT]=prev;
-
- trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genbidi error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
- exit(errorCode);
- }
-
- indexes[UBIDI_IX_TRIE_SIZE]=trieSize;
- indexes[UBIDI_IX_MIRROR_LENGTH]=mirrorTop;
- indexes[UBIDI_IX_LENGTH]=
- (int32_t)sizeof(indexes)+
- trieSize+
- 4*mirrorTop+
- (prev-jgStart);
-
- if(beVerbose) {
- printf("trie size in bytes: %5d\n", (int)trieSize);
- printf("size in bytes of mirroring table: %5d\n", (int)(4*mirrorTop));
- printf("length of Joining_Group array: %5d (U+%04x..U+%04x)\n", (int)(prev-jgStart), (int)jgStart, (int)(prev-1));
- printf("data size: %5d\n", (int)indexes[UBIDI_IX_LENGTH]);
- }
-
- indexes[UBIDI_MAX_VALUES_INDEX]=
- ((int32_t)U_CHAR_DIRECTION_COUNT-1)|
- (((int32_t)U_JT_COUNT-1)<<UBIDI_JT_SHIFT)|
- (((int32_t)U_JG_COUNT-1)<<UBIDI_MAX_JG_SHIFT);
-
- if(csource) {
- /* write .c file for hardcoded data */
- UTrie trie={ NULL };
- UTrie2 *trie2;
- FILE *f;
-
- utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "genbidi error: failed to utrie_unserialize(ubidi.icu trie) - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
-
- /* use UTrie2 */
- dataInfo.formatVersion[0]=2;
- dataInfo.formatVersion[2]=0;
- dataInfo.formatVersion[3]=0;
- trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "genbidi error: utrie2_fromUTrie() failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- {
- /* delete lead surrogate code unit values */
- UChar lead;
- trie2=utrie2_cloneAsThawed(trie2, &errorCode);
- for(lead=0xd800; lead<0xdc00; ++lead) {
- utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
- }
- utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "genbidi error: deleting lead surrogate code unit values failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- }
-
- f=usrc_create(dataDir, "ubidi_props_data.c");
- if(f!=NULL) {
- usrc_writeArray(f,
- "static const UVersionInfo ubidi_props_dataVersion={",
- dataInfo.dataVersion, 8, 4,
- "};\n\n");
- usrc_writeArray(f,
- "static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={",
- indexes, 32, UBIDI_IX_TOP,
- "};\n\n");
- usrc_writeUTrie2Arrays(f,
- "static const uint16_t ubidi_props_trieIndex[%ld]={\n", NULL,
- trie2,
- "\n};\n\n");
- usrc_writeArray(f,
- "static const uint32_t ubidi_props_mirrors[%ld]={\n",
- mirrors, 32, mirrorTop,
- "\n};\n\n");
- usrc_writeArray(f,
- "static const uint8_t ubidi_props_jgArray[%ld]={\n",
- jgArray, 8, prev-jgStart,
- "\n};\n\n");
- fputs(
- "static const UBiDiProps ubidi_props_singleton={\n"
- " NULL,\n"
- " ubidi_props_indexes,\n"
- " ubidi_props_mirrors,\n"
- " ubidi_props_jgArray,\n",
- f);
- usrc_writeUTrie2Struct(f,
- " {\n",
- trie2, "ubidi_props_trieIndex", NULL,
- " },\n");
- usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n");
- fputs("};\n", f);
- fclose(f);
- }
- utrie2_close(trie2);
- } else {
- /* write the data */
- pData=udata_create(dataDir, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo,
- haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genbidi: unable to create data memory, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
-
- udata_writeBlock(pData, indexes, sizeof(indexes));
- udata_writeBlock(pData, trieBlock, trieSize);
- udata_writeBlock(pData, mirrors, 4*mirrorTop);
- udata_writeBlock(pData, jgArray, prev-jgStart);
-
- /* finish up */
- dataLength=udata_finish(pData, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genbidi: error %d writing the output file\n", errorCode);
- exit(errorCode);
- }
-
- if(dataLength!=indexes[UBIDI_IX_LENGTH]) {
- fprintf(stderr, "genbidi: data length %ld != calculated size %d\n",
- dataLength, (int)indexes[UBIDI_IX_LENGTH]);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- }
-
- utrie_close(pTrie);
- upvec_close(pv);
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/genbrk/genbrk.cpp b/tools/genbrk/genbrk.cpp
index ddbcfc2d..72c6cffe 100644
--- a/tools/genbrk/genbrk.cpp
+++ b/tools/genbrk/genbrk.cpp
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 2002-2006, International Business Machines
+* Copyright (C) 2002-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@@ -74,7 +74,7 @@ void usageAndDie(int retCode) {
}
-#if UCONFIG_NO_BREAK_ITERATION
+#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
/* dummy UDataInfo cf. udata.h */
static UDataInfo dummyDataInfo = {
@@ -159,13 +159,6 @@ int main(int argc, char **argv) {
u_setDataDirectory(options[5].value);
}
- /* Initialize ICU */
- u_init(&status);
- if (U_FAILURE(status)) {
- fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
- argv[0], u_errorName(status));
- exit(1);
- }
status = U_ZERO_ERROR;
/* Combine the directory with the file name */
@@ -176,13 +169,13 @@ int main(int argc, char **argv) {
copyright = U_COPYRIGHT_STRING;
}
-#if UCONFIG_NO_BREAK_ITERATION
+#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
UNewDataMemory *pData;
char msg[1024];
/* write message with just the name */
- sprintf(msg, "genbrk writes dummy %s because of UCONFIG_NO_BREAK_ITERATION, see uconfig.h", outFileName);
+ sprintf(msg, "genbrk writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
fprintf(stderr, "%s\n", msg);
/* write the dummy data file */
@@ -192,6 +185,14 @@ int main(int argc, char **argv) {
return (int)status;
#else
+ /* Initialize ICU */
+ u_init(&status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
+ argv[0], u_errorName(status));
+ exit(1);
+ }
+ status = U_ZERO_ERROR;
//
// Read in the rule source file
diff --git a/tools/gencase/Makefile.in b/tools/gencase/Makefile.in
deleted file mode 100644
index 7ab1d619..00000000
--- a/tools/gencase/Makefile.in
+++ /dev/null
@@ -1,95 +0,0 @@
-## Makefile.in for ICU - tools/gencase
-## Copyright (c) 1999-2005, International Business Machines Corporation and
-## others. All Rights Reserved.
-## Steven R. Loomis
-
-## Source directory information
-srcdir = @srcdir@
-top_srcdir = @top_srcdir@
-
-top_builddir = ../..
-
-include $(top_builddir)/icudefs.mk
-
-## Build directory information
-subdir = tools/gencase
-
-TARGET_STUB_NAME = gencase
-
-SECTION = 8
-
-#MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
-
-
-## Extra files to remove for 'make clean'
-CLEANFILES = *~ $(DEPS) $(MAN_FILES)
-
-## Target information
-TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
-
-ifneq ($(top_builddir),$(top_srcdir))
-CPPFLAGS += -I$(top_builddir)/common
-endif
-CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
-LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
-
-OBJECTS = gencase.o store.o
-
-DEPS = $(OBJECTS:.o=.d)
-
-## List of phony targets
-.PHONY : all all-local install install-local clean clean-local \
-distclean distclean-local dist dist-local check check-local install-man
-
-## Clear suffix list
-.SUFFIXES :
-
-## List of standard targets
-all: all-local
-install: install-local
-clean: clean-local
-distclean : distclean-local
-dist: dist-local
-check: all check-local
-
-all-local: $(TARGET) $(MAN_FILES)
-
-install-local: all-local install-man
-
-install-man: $(MAN_FILES)
-# $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
-# $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
-
-dist-local:
-
-clean-local:
- test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
- $(RMV) $(TARGET) $(OBJECTS)
-
-distclean-local: clean-local
- $(RMV) Makefile
-
-check-local: all-local
-
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-$(TARGET) : $(OBJECTS)
- $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
- $(POST_BUILD_STEP)
-
-
-%.$(SECTION): $(srcdir)/%.$(SECTION).in
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-
-ifeq (,$(MAKECMDGOALS))
--include $(DEPS)
-else
-ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
--include $(DEPS)
-endif
-endif
-
diff --git a/tools/gencase/gencase.c b/tools/gencase/gencase.c
deleted file mode 100644
index 0ef684a7..00000000
--- a/tools/gencase/gencase.c
+++ /dev/null
@@ -1,833 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: gencase.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug28
-* created by: Markus W. Scherer
-*
-* This program reads several of the Unicode character database text files,
-* parses them, and the case mapping properties for each character.
-* It then writes a binary file containing the properties
-* that is designed to be used directly for random-access to
-* the properties of each Unicode character.
-*/
-
-#include <stdio.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/uset.h"
-#include "unicode/putil.h"
-#include "unicode/uclean.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uarrsort.h"
-#include "unewdata.h"
-#include "uoptions.h"
-#include "uparse.h"
-#include "uprops.h"
-#include "propsvec.h"
-#include "gencase.h"
-
-#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
-
-/* data --------------------------------------------------------------------- */
-
-UPropsVectors *pv;
-
-UBool beVerbose=FALSE, haveCopyright=TRUE;
-
-/*
- * Unicode set collecting the case-sensitive characters;
- * see uchar.h UCHAR_CASE_SENSITIVE.
- * Add code points from case mappings/foldings in
- * the root locale and with default options.
- */
-static USet *caseSensitive;
-
-/* prototypes --------------------------------------------------------------- */
-
-static void
-parseSpecialCasing(const char *filename, UErrorCode *pErrorCode);
-
-static void
-parseCaseFolding(const char *filename, UErrorCode *pErrorCode);
-
-static void
-parseDB(const char *filename, UErrorCode *pErrorCode);
-
-/* parse files with multiple binary properties ------------------------------ */
-
-/* TODO: more common code, move functions to uparse.h|c */
-
-/* TODO: similar to genprops/props2.c but not the same */
-
-struct Binary {
- const char *propName;
- int32_t vecWord;
- uint32_t vecValue, vecMask;
-};
-typedef struct Binary Binary;
-
-struct Binaries {
- const char *ucdFile;
- const Binary *binaries;
- int32_t binariesCount;
-};
-typedef struct Binaries Binaries;
-
-static const Binary
-propListNames[]={
- { "Soft_Dotted", 0, UCASE_SOFT_DOTTED, UCASE_DOT_MASK }
-};
-
-static const Binaries
-propListBinaries={
- "PropList", propListNames, LENGTHOF(propListNames)
-};
-
-static const Binary
-derCorePropsNames[]={
- { "Lowercase", 0, UCASE_LOWER, UCASE_TYPE_MASK },
- { "Uppercase", 0, UCASE_UPPER, UCASE_TYPE_MASK }
-};
-
-static const Binaries
-derCorePropsBinaries={
- "DerivedCoreProperties", derCorePropsNames, LENGTHOF(derCorePropsNames)
-};
-
-/*
- * Treat Word_Break=MidLetter and MidNumLet as a single binary property.
- * We need not distinguish between them because both add to case-ignorable.
- * We ignore all other Word_Break values.
- */
-static const Binary
-wordBreakNames[]={
- { "MidLetter", 1, U_MASK(UGENCASE_IS_MID_LETTER_SHIFT), U_MASK(UGENCASE_IS_MID_LETTER_SHIFT) },
- { "MidNumLet", 1, U_MASK(UGENCASE_IS_MID_LETTER_SHIFT), U_MASK(UGENCASE_IS_MID_LETTER_SHIFT) }
-};
-
-static const Binaries
-wordBreakBinaries={
- "WordBreakProperty", wordBreakNames, LENGTHOF(wordBreakNames)
-};
-
-static void U_CALLCONV
-binariesLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- const Binaries *bin;
- char *s;
- uint32_t start, end;
- int32_t i;
-
- bin=(const Binaries *)context;
-
- u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "gencase: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]);
- exit(*pErrorCode);
- }
-
- /* parse binary property name */
- s=(char *)u_skipWhitespace(fields[1][0]);
- for(i=0;; ++i) {
- if(i==bin->binariesCount) {
- /* ignore unrecognized properties */
- return;
- }
- if(isToken(bin->binaries[i].propName, s)) {
- break;
- }
- }
-
- if(bin->binaries[i].vecMask==0) {
- fprintf(stderr, "gencase error: mask value %d==0 for %s %s\n",
- (int)bin->binaries[i].vecMask, bin->ucdFile, bin->binaries[i].propName);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
-
- upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "gencase error: unable to set %s, code: %s\n",
- bin->binaries[i].propName, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-}
-
-static void
-parseBinariesFile(char *filename, char *basename, const char *suffix,
- const Binaries *bin,
- UErrorCode *pErrorCode) {
- char *fields[2][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- writeUCDFilename(basename, bin->ucdFile, suffix);
-
- u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode));
- }
-}
-
-/* -------------------------------------------------------------------------- */
-
-enum
-{
- HELP_H,
- HELP_QUESTION_MARK,
- VERBOSE,
- COPYRIGHT,
- DESTDIR,
- SOURCEDIR,
- UNICODE_VERSION,
- ICUDATADIR,
- CSOURCE
-};
-
-/* Keep these values in sync with the above enums */
-static UOption options[]={
- UOPTION_HELP_H,
- UOPTION_HELP_QUESTION_MARK,
- UOPTION_VERBOSE,
- UOPTION_COPYRIGHT,
- UOPTION_DESTDIR,
- UOPTION_SOURCEDIR,
- UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
- UOPTION_ICUDATADIR,
- UOPTION_DEF("csource", 'C', UOPT_NO_ARG)
-};
-
-extern int
-main(int argc, char* argv[]) {
- char filename[300];
- const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
- char *basename=NULL;
- UErrorCode errorCode=U_ZERO_ERROR;
-
- U_MAIN_INIT_ARGS(argc, argv);
-
- /* preset then read command line options */
- options[DESTDIR].value=u_getDataDirectory();
- options[SOURCEDIR].value="";
- options[UNICODE_VERSION].value="";
- options[ICUDATADIR].value=u_getDataDirectory();
- argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
-
- /* error handling, printing usage message */
- if(argc<0) {
- fprintf(stderr,
- "error in command line argument \"%s\"\n",
- argv[-argc]);
- }
- if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
- /*
- * Broken into chucks because the C89 standard says the minimum
- * required supported string length is 509 bytes.
- */
- fprintf(stderr,
- "Usage: %s [-options] [suffix]\n"
- "\n"
- "read the UnicodeData.txt file and other Unicode properties files and\n"
- "create a binary file " UCASE_DATA_NAME "." UCASE_DATA_TYPE " with the case mapping properties\n"
- "\n",
- argv[0]);
- fprintf(stderr,
- "Options:\n"
- "\t-h or -? or --help this usage text\n"
- "\t-v or --verbose verbose output\n"
- "\t-c or --copyright include a copyright notice\n"
- "\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"
- "\t-C or --csource generate a .c source file rather than the .icu binary\n");
- fprintf(stderr,
- "\t-d or --destdir destination directory, followed by the path\n"
- "\t-s or --sourcedir source directory, followed by the path\n"
- "\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
- "\t followed by path, defaults to %s\n"
- "\tsuffix suffix that is to be appended with a '-'\n"
- "\t to the source file basenames before opening;\n"
- "\t 'gencase new' will read UnicodeData-new.txt etc.\n",
- u_getDataDirectory());
- return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
- }
-
- /* get the options values */
- beVerbose=options[VERBOSE].doesOccur;
- haveCopyright=options[COPYRIGHT].doesOccur;
- srcDir=options[SOURCEDIR].value;
- destDir=options[DESTDIR].value;
-
- if(argc>=2) {
- suffix=argv[1];
- } else {
- suffix=NULL;
- }
-
- if(options[UNICODE_VERSION].doesOccur) {
- setUnicodeVersion(options[UNICODE_VERSION].value);
- }
- /* else use the default dataVersion in store.c */
-
- if (options[ICUDATADIR].doesOccur) {
- u_setDataDirectory(options[ICUDATADIR].value);
- }
-
- /* prepare the filename beginning with the source dir */
- uprv_strcpy(filename, srcDir);
- basename=filename+uprv_strlen(filename);
- if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
- *basename++=U_FILE_SEP_CHAR;
- }
-
- /* initialize */
- pv=upvec_open(2, &errorCode);
- caseSensitive=uset_open(1, 0); /* empty set (start>end) */
-
- /* process SpecialCasing.txt */
- writeUCDFilename(basename, "SpecialCasing", suffix);
- parseSpecialCasing(filename, &errorCode);
-
- /* process CaseFolding.txt */
- writeUCDFilename(basename, "CaseFolding", suffix);
- parseCaseFolding(filename, &errorCode);
-
- /* process additional properties files */
- *basename=0;
-
- parseBinariesFile(filename, basename, suffix, &propListBinaries, &errorCode);
-
- parseBinariesFile(filename, basename, suffix, &derCorePropsBinaries, &errorCode);
-
- if(ucdVersion>=UNI_4_1) {
- parseBinariesFile(filename, basename, suffix, &wordBreakBinaries, &errorCode);
- }
-
- /* process UnicodeData.txt */
- writeUCDFilename(basename, "UnicodeData", suffix);
- parseDB(filename, &errorCode);
-
- /* process parsed data */
- makeCaseClosure();
-
- makeExceptions();
-
- if(U_SUCCESS(errorCode)) {
- /* write the properties data file */
- generateData(destDir, options[CSOURCE].doesOccur);
- }
-
- u_cleanup();
- return errorCode;
-}
-
-U_CFUNC void
-writeUCDFilename(char *basename, const char *filename, const char *suffix) {
- int32_t length=(int32_t)uprv_strlen(filename);
- uprv_strcpy(basename, filename);
- if(suffix!=NULL) {
- basename[length++]='-';
- uprv_strcpy(basename+length, suffix);
- length+=(int32_t)uprv_strlen(suffix);
- }
- uprv_strcpy(basename+length, ".txt");
-}
-
-/* TODO: move to toolutil */
-U_CFUNC UBool
-isToken(const char *token, const char *s) {
- const char *z;
- int32_t j;
-
- s=u_skipWhitespace(s);
- for(j=0;; ++j) {
- if(token[j]!=0) {
- if(s[j]!=token[j]) {
- break;
- }
- } else {
- z=u_skipWhitespace(s+j);
- if(*z==';' || *z==0) {
- return TRUE;
- } else {
- break;
- }
- }
- }
-
- return FALSE;
-}
-
-static int32_t
-getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) {
- const char *t, *z;
- int32_t i, j;
-
- s=u_skipWhitespace(s);
- for(i=0; i<countTokens; ++i) {
- t=tokens[i];
- if(t!=NULL) {
- for(j=0;; ++j) {
- if(t[j]!=0) {
- if(s[j]!=t[j]) {
- break;
- }
- } else {
- z=u_skipWhitespace(s+j);
- if(*z==';' || *z==0 || *z=='#' || *z=='\r' || *z=='\n') {
- return i;
- } else {
- break;
- }
- }
- }
- }
- }
- return -1;
-}
-
-static void
-_set_addAll(USet *set, const UChar *s, int32_t length) {
- UChar32 c;
- int32_t i;
-
- /* needs length>=0 */
- for(i=0; i<length; /* U16_NEXT advances i */) {
- U16_NEXT(s, i, length, c);
- uset_add(set, c);
- }
-}
-
-/* parser for SpecialCasing.txt --------------------------------------------- */
-
-#define MAX_SPECIAL_CASING_COUNT 500
-
-static SpecialCasing specialCasings[MAX_SPECIAL_CASING_COUNT];
-static int32_t specialCasingCount=0;
-
-static void U_CALLCONV
-specialCasingLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- char *end;
-
- /* get code point */
- specialCasings[specialCasingCount].code=(UChar32)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
- end=(char *)u_skipWhitespace(end);
- if(end<=fields[0][0] || end!=fields[0][1]) {
- fprintf(stderr, "gencase: syntax error in SpecialCasing.txt field 0 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* is this a complex mapping? */
- if(*(end=(char *)u_skipWhitespace(fields[4][0]))!=0 && *end!=';' && *end!='#') {
- /* there is some condition text in the fifth field */
- specialCasings[specialCasingCount].isComplex=TRUE;
-
- /* do not store any actual mappings for this */
- specialCasings[specialCasingCount].lowerCase[0]=0;
- specialCasings[specialCasingCount].upperCase[0]=0;
- specialCasings[specialCasingCount].titleCase[0]=0;
- } else {
- /* just set the "complex" flag and get the case mappings */
- specialCasings[specialCasingCount].isComplex=FALSE;
- specialCasings[specialCasingCount].lowerCase[0]=
- (UChar)u_parseString(fields[1][0], specialCasings[specialCasingCount].lowerCase+1, 31, NULL, pErrorCode);
- specialCasings[specialCasingCount].upperCase[0]=
- (UChar)u_parseString(fields[3][0], specialCasings[specialCasingCount].upperCase+1, 31, NULL, pErrorCode);
- specialCasings[specialCasingCount].titleCase[0]=
- (UChar)u_parseString(fields[2][0], specialCasings[specialCasingCount].titleCase+1, 31, NULL, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "gencase: error parsing special casing at %s\n", fields[0][0]);
- exit(*pErrorCode);
- }
-
- uset_add(caseSensitive, (UChar32)specialCasings[specialCasingCount].code);
- _set_addAll(caseSensitive, specialCasings[specialCasingCount].lowerCase+1, specialCasings[specialCasingCount].lowerCase[0]);
- _set_addAll(caseSensitive, specialCasings[specialCasingCount].upperCase+1, specialCasings[specialCasingCount].upperCase[0]);
- _set_addAll(caseSensitive, specialCasings[specialCasingCount].titleCase+1, specialCasings[specialCasingCount].titleCase[0]);
- }
-
- if(++specialCasingCount==MAX_SPECIAL_CASING_COUNT) {
- fprintf(stderr, "gencase: too many special casing mappings\n");
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- exit(U_INDEX_OUTOFBOUNDS_ERROR);
- }
-}
-
-static int32_t U_CALLCONV
-compareSpecialCasings(const void *context, const void *left, const void *right) {
- return ((const SpecialCasing *)left)->code-((const SpecialCasing *)right)->code;
-}
-
-static void
-parseSpecialCasing(const char *filename, UErrorCode *pErrorCode) {
- char *fields[5][2];
- int32_t i, j;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- u_parseDelimitedFile(filename, ';', fields, 5, specialCasingLineFn, NULL, pErrorCode);
-
- /* sort the special casing entries by code point */
- if(specialCasingCount>0) {
- uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing),
- compareSpecialCasings, NULL, FALSE, pErrorCode);
- }
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /* replace multiple entries for any code point by one "complex" one */
- j=0;
- for(i=1; i<specialCasingCount; ++i) {
- if(specialCasings[i-1].code==specialCasings[i].code) {
- /* there is a duplicate code point */
- specialCasings[i-1].code=0x7fffffff; /* remove this entry in the following sorting */
- specialCasings[i].isComplex=TRUE; /* make the following one complex */
- specialCasings[i].lowerCase[0]=0;
- specialCasings[i].upperCase[0]=0;
- specialCasings[i].titleCase[0]=0;
- ++j;
- }
- }
-
- /* if some entries just were removed, then re-sort */
- if(j>0) {
- uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing),
- compareSpecialCasings, NULL, FALSE, pErrorCode);
- specialCasingCount-=j;
- }
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /*
- * Add one complex mapping to caseSensitive that was filtered out above:
- * Greek final Sigma has a conditional mapping but not locale-sensitive,
- * and it is taken when lowercasing just U+03A3 alone.
- * 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
- */
- uset_add(caseSensitive, 0x3c2);
-}
-
-/* parser for CaseFolding.txt ----------------------------------------------- */
-
-#define MAX_CASE_FOLDING_COUNT 2000
-
-static CaseFolding caseFoldings[MAX_CASE_FOLDING_COUNT];
-static int32_t caseFoldingCount=0;
-
-static void U_CALLCONV
-caseFoldingLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- char *end;
- static UChar32 prevCode=0;
- int32_t count;
- char status;
-
- /* get code point */
- caseFoldings[caseFoldingCount].code=(UChar32)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
- end=(char *)u_skipWhitespace(end);
- if(end<=fields[0][0] || end!=fields[0][1]) {
- fprintf(stderr, "gencase: syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* get the status of this mapping */
- caseFoldings[caseFoldingCount].status=status=*u_skipWhitespace(fields[1][0]);
- if(status!='L' && status!='E' && status!='C' && status!='S' && status!='F' && status!='I' && status!='T') {
- fprintf(stderr, "gencase: unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* ignore all case folding mappings that are the same as the UnicodeData.txt lowercase mappings */
- if(status=='L') {
- return;
- }
-
- /* get the mapping */
- count=caseFoldings[caseFoldingCount].full[0]=
- (UChar)u_parseString(fields[2][0], caseFoldings[caseFoldingCount].full+1, 31, (uint32_t *)&caseFoldings[caseFoldingCount].simple, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "gencase: error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
- exit(*pErrorCode);
- }
-
- /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
- if(count==0 || count>2 || (count==2 && UTF_IS_SINGLE(caseFoldings[caseFoldingCount].full[1]))) {
- caseFoldings[caseFoldingCount].simple=0;
- }
-
- /* update the case-sensitive set */
- if(status!='T') {
- uset_add(caseSensitive, (UChar32)caseFoldings[caseFoldingCount].code);
- _set_addAll(caseSensitive, caseFoldings[caseFoldingCount].full+1, caseFoldings[caseFoldingCount].full[0]);
- }
-
- /* check the status */
- if(status=='S') {
- /* check if there was a full mapping for this code point before */
- if( caseFoldingCount>0 &&
- caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code &&
- caseFoldings[caseFoldingCount-1].status=='F'
- ) {
- /* merge the two entries */
- caseFoldings[caseFoldingCount-1].simple=caseFoldings[caseFoldingCount].simple;
- return;
- }
- } else if(status=='F') {
- /* check if there was a simple mapping for this code point before */
- if( caseFoldingCount>0 &&
- caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code &&
- caseFoldings[caseFoldingCount-1].status=='S'
- ) {
- /* merge the two entries */
- uprv_memcpy(caseFoldings[caseFoldingCount-1].full, caseFoldings[caseFoldingCount].full, 32*U_SIZEOF_UCHAR);
- return;
- }
- } else if(status=='I' || status=='T') {
- /* check if there was a default mapping for this code point before (remove it) */
- while(caseFoldingCount>0 &&
- caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code
- ) {
- prevCode=0;
- --caseFoldingCount;
- }
- /* store only a marker for special handling for cases like dotless i */
- caseFoldings[caseFoldingCount].simple=0;
- caseFoldings[caseFoldingCount].full[0]=0;
- }
-
- /* check that the code points (caseFoldings[caseFoldingCount].code) are in ascending order */
- if(caseFoldings[caseFoldingCount].code<=prevCode && caseFoldings[caseFoldingCount].code>0) {
- fprintf(stderr, "gencase: error - CaseFolding entries out of order, U+%04lx after U+%04lx\n",
- (unsigned long)caseFoldings[caseFoldingCount].code,
- (unsigned long)prevCode);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- prevCode=caseFoldings[caseFoldingCount].code;
-
- if(++caseFoldingCount==MAX_CASE_FOLDING_COUNT) {
- fprintf(stderr, "gencase: too many case folding mappings\n");
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- exit(U_INDEX_OUTOFBOUNDS_ERROR);
- }
-}
-
-static void
-parseCaseFolding(const char *filename, UErrorCode *pErrorCode) {
- char *fields[3][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- u_parseDelimitedFile(filename, ';', fields, 3, caseFoldingLineFn, NULL, pErrorCode);
-}
-
-/* parser for UnicodeData.txt ----------------------------------------------- */
-
-/* general categories */
-const char *const
-genCategoryNames[U_CHAR_CATEGORY_COUNT]={
- "Cn",
- "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me",
- "Mc", "Nd", "Nl", "No",
- "Zs", "Zl", "Zp",
- "Cc", "Cf", "Co", "Cs",
- "Pd", "Ps", "Pe", "Pc", "Po",
- "Sm", "Sc", "Sk", "So",
- "Pi", "Pf"
-};
-
-static int32_t specialCasingIndex=0, caseFoldingIndex=0;
-
-static void U_CALLCONV
-unicodeDataLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- Props p;
- char *end;
- static UChar32 prevCode=0;
- UChar32 value;
- int32_t i;
-
- /* reset the properties */
- uprv_memset(&p, 0, sizeof(Props));
-
- /* get the character code, field 0 */
- p.code=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
- if(end<=fields[0][0] || end!=fields[0][1]) {
- fprintf(stderr, "gencase: syntax error in field 0 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* get general category, field 2 */
- i=getTokenIndex(genCategoryNames, U_CHAR_CATEGORY_COUNT, fields[2][0]);
- if(i>=0) {
- p.gc=(uint8_t)i;
- } else {
- fprintf(stderr, "gencase: unknown general category \"%s\" at code 0x%lx\n",
- fields[2][0], (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* get canonical combining class, field 3 */
- value=(UChar32)uprv_strtoul(fields[3][0], &end, 10);
- if(end<=fields[3][0] || end!=fields[3][1] || value>0xff) {
- fprintf(stderr, "gencase: syntax error in field 3 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- p.cc=(uint8_t)value;
-
- /* get uppercase mapping, field 12 */
- value=(UChar32)uprv_strtoul(fields[12][0], &end, 16);
- if(end!=fields[12][1]) {
- fprintf(stderr, "gencase: syntax error in field 12 at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- if(value!=0 && value!=p.code) {
- p.upperCase=value;
- uset_add(caseSensitive, p.code);
- uset_add(caseSensitive, value);
- }
-
- /* get lowercase value, field 13 */
- value=(UChar32)uprv_strtoul(fields[13][0], &end, 16);
- if(end!=fields[13][1]) {
- fprintf(stderr, "gencase: syntax error in field 13 at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- if(value!=0 && value!=p.code) {
- p.lowerCase=value;
- uset_add(caseSensitive, p.code);
- uset_add(caseSensitive, value);
- }
-
- /* get titlecase value, field 14 */
- value=(UChar32)uprv_strtoul(fields[14][0], &end, 16);
- if(end!=fields[14][1]) {
- fprintf(stderr, "gencase: syntax error in field 14 at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- if(value!=0 && value!=p.code) {
- p.titleCase=value;
- uset_add(caseSensitive, p.code);
- uset_add(caseSensitive, value);
- }
-
- /* set additional properties from previously parsed files */
- if(specialCasingIndex<specialCasingCount && p.code==specialCasings[specialCasingIndex].code) {
- p.specialCasing=specialCasings+specialCasingIndex++;
- } else {
- p.specialCasing=NULL;
- }
- if(caseFoldingIndex<caseFoldingCount && p.code==caseFoldings[caseFoldingIndex].code) {
- p.caseFolding=caseFoldings+caseFoldingIndex++;
-
- /* ignore "Common" mappings (simple==full) that map to the same code point as the regular lowercase mapping */
- if( p.caseFolding->status=='C' &&
- p.caseFolding->simple==p.lowerCase
- ) {
- p.caseFolding=NULL;
- }
- } else {
- p.caseFolding=NULL;
- }
-
- /* check for non-character code points */
- if((p.code&0xfffe)==0xfffe || (uint32_t)(p.code-0xfdd0)<0x20) {
- fprintf(stderr, "gencase: error - properties for non-character code point U+%04lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* check that the code points (p.code) are in ascending order */
- if(p.code<=prevCode && p.code>0) {
- fprintf(stderr, "gencase: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n",
- (unsigned long)p.code, (unsigned long)prevCode);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* properties for a single code point */
- setProps(&p);
-
- prevCode=p.code;
-}
-
-static void
-parseDB(const char *filename, UErrorCode *pErrorCode) {
- char *fields[15][2];
- UChar32 start, end;
- int32_t i;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);
-
- /* are all sub-properties consumed? */
- if(specialCasingIndex<specialCasingCount) {
- fprintf(stderr, "gencase: error - some code points in SpecialCasing.txt are missing from UnicodeData.txt\n");
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- if(caseFoldingIndex<caseFoldingCount) {
- fprintf(stderr, "gencase: error - some code points in CaseFolding.txt are missing from UnicodeData.txt\n");
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-
- for(i=0;
- 0==uset_getItem(caseSensitive, i, &start, &end, NULL, 0, pErrorCode) && U_SUCCESS(*pErrorCode);
- ++i
- ) {
- addCaseSensitive(start, end);
- }
- if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
- *pErrorCode=U_ZERO_ERROR;
- }
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/gencase/gencase.h b/tools/gencase/gencase.h
deleted file mode 100644
index 57d03e81..00000000
--- a/tools/gencase/gencase.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: gencase.h
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug28
-* created by: Markus W. Scherer
-*/
-
-#ifndef __GENCASE_H__
-#define __GENCASE_H__
-
-#include "unicode/utypes.h"
-#include "utrie.h"
-#include "propsvec.h"
-#include "ucase.h"
-
-U_CDECL_BEGIN
-
-/* Unicode versions --------------------------------------------------------- */
-
-enum {
- UNI_1_0,
- UNI_1_1,
- UNI_2_0,
- UNI_3_0,
- UNI_3_1,
- UNI_3_2,
- UNI_4_0,
- UNI_4_0_1,
- UNI_4_1,
- UNI_VER_COUNT
-};
-
-extern int32_t ucdVersion;
-
-/* gencase ------------------------------------------------------------------ */
-
-#define UGENCASE_EXC_SHIFT 16
-#define UGENCASE_EXC_MASK 0xffff0000
-
-/*
- * Values for the ucase.icu unfold[] data array, see store.c.
- * The values are stored in ucase.icu so that the runtime code will work with
- * changing values, but they are hardcoded for gencase for simplicity.
- * They are optimized, that is, provide for minimal table column widths,
- * for the actual Unicode data, so that the table size is minimized.
- * Future versions of Unicode may require increases of some of these values.
- */
-enum {
- UGENCASE_UNFOLD_STRING_WIDTH=3,
- UGENCASE_UNFOLD_CP_WIDTH=2,
- UGENCASE_UNFOLD_WIDTH=UGENCASE_UNFOLD_STRING_WIDTH+UGENCASE_UNFOLD_CP_WIDTH,
- UGENCASE_UNFOLD_MAX_ROWS=250
-};
-
-/* Values for additional data stored in pv column 1 */
-enum {
- UGENCASE_IS_MID_LETTER_SHIFT /* bit 0 WB=MidLetter or WB=MidNumLet */
-};
-
-/* special casing data */
-typedef struct {
- UChar32 code;
- UBool isComplex;
- UChar lowerCase[32], upperCase[32], titleCase[32];
-} SpecialCasing;
-
-/* case folding data */
-typedef struct {
- UChar32 code, simple;
- char status;
- UChar full[32];
-} CaseFolding;
-
-/* case mapping properties */
-typedef struct {
- UChar32 code, lowerCase, upperCase, titleCase;
- UChar32 closure[8];
- SpecialCasing *specialCasing;
- CaseFolding *caseFolding;
- uint8_t gc, cc;
-} Props;
-
-/* global flags */
-extern UBool beVerbose, haveCopyright;
-
-/* properties vectors in gencase.c */
-extern UPropsVectors *pv;
-
-/* prototypes */
-U_CFUNC void
-writeUCDFilename(char *basename, const char *filename, const char *suffix);
-
-U_CFUNC UBool
-isToken(const char *token, const char *s);
-
-extern void
-setUnicodeVersion(const char *v);
-
-extern void
-setProps(Props *p);
-
-U_CFUNC uint32_t U_EXPORT2
-getFoldedPropsValue(UNewTrie *trie, UChar32 start, int32_t offset);
-
-extern void
-addCaseSensitive(UChar32 first, UChar32 last);
-
-extern void
-makeCaseClosure(void);
-
-extern void
-makeExceptions(void);
-
-extern void
-generateData(const char *dataDir, UBool csource);
-
-U_CDECL_END
-
-#endif
diff --git a/tools/gencase/gencase.vcproj b/tools/gencase/gencase.vcproj
deleted file mode 100644
index 81c4eb26..00000000
--- a/tools/gencase/gencase.vcproj
+++ /dev/null
@@ -1,422 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
- ProjectType="Visual C++"
- Version="9.00"
- Name="gencase"
- ProjectGUID="{DB312A49-12A9-4E07-9E96-451DC2D8FF61}"
- TargetFrameworkVersion="131072"
- >
- <Platforms>
- <Platform
- Name="Win32"
- />
- <Platform
- Name="x64"
- />
- </Platforms>
- <ToolFiles>
- </ToolFiles>
- <Configurations>
- <Configuration
- Name="Release|Win32"
- OutputDirectory=".\x86\Release"
- IntermediateDirectory=".\x86\Release"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TypeLibraryName=".\x86\Release/gencase.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Release/gencase.pch"
- AssemblerListingLocation=".\x86\Release/"
- ObjectFile=".\x86\Release/"
- ProgramDataBaseFileName=".\x86\Release/"
- WarningLevel="3"
- SuppressStartupBanner="true"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x86\Release/gencase.exe"
- LinkIncremental="1"
- SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x86\Release/gencase.pdb"
- SubSystem="1"
- RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Debug|Win32"
- OutputDirectory=".\x86\Debug"
- IntermediateDirectory=".\x86\Debug"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TypeLibraryName=".\x86\Debug/gencase.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Debug/gencase.pch"
- AssemblerListingLocation=".\x86\Debug/"
- ObjectFile=".\x86\Debug/"
- ProgramDataBaseFileName=".\x86\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
- SuppressStartupBanner="true"
- DebugInformationFormat="4"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x86\Debug/gencase.exe"
- LinkIncremental="2"
- SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x86\Debug/gencase.pdb"
- SubSystem="1"
- RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- UseFAT32Workaround="true"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Release|x64"
- OutputDirectory=".\x64\Release"
- IntermediateDirectory=".\x64\Release"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Release/gencase.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Release/gencase.pch"
- AssemblerListingLocation=".\x64\Release/"
- ObjectFile=".\x64\Release/"
- ProgramDataBaseFileName=".\x64\Release/"
- WarningLevel="3"
- SuppressStartupBanner="true"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x64\Release/gencase.exe"
- LinkIncremental="1"
- SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x64\Release/gencase.pdb"
- SubSystem="1"
- TargetMachine="17"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Debug|x64"
- OutputDirectory=".\x64\Debug"
- IntermediateDirectory=".\x64\Debug"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Debug/gencase.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Debug/gencase.pch"
- AssemblerListingLocation=".\x64\Debug/"
- ObjectFile=".\x64\Debug/"
- ProgramDataBaseFileName=".\x64\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
- SuppressStartupBanner="true"
- DebugInformationFormat="3"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x64\Debug/gencase.exe"
- LinkIncremental="2"
- SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x64\Debug/gencase.pdb"
- SubSystem="1"
- TargetMachine="17"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- UseFAT32Workaround="true"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- </Configurations>
- <References>
- </References>
- <Files>
- <Filter
- Name="Source Files"
- Filter="c;cpp;rc"
- >
- <File
- RelativePath=".\gencase.c"
- >
- </File>
- <File
- RelativePath=".\store.c"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h"
- >
- <File
- RelativePath=".\gencase.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
- >
- </Filter>
- </Files>
- <Globals>
- </Globals>
-</VisualStudioProject>
diff --git a/tools/gencase/store.c b/tools/gencase/store.c
deleted file mode 100644
index eaa924fe..00000000
--- a/tools/gencase/store.c
+++ /dev/null
@@ -1,1208 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2004-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: store.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2004aug28
-* created by: Markus W. Scherer
-*
-* Store Unicode case mapping properties efficiently for
-* random access.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/ustring.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "filestrm.h"
-#include "utrie.h"
-#include "utrie2.h"
-#include "uarrsort.h"
-#include "unicode/udata.h"
-#include "unewdata.h"
-#include "propsvec.h"
-#include "writesrc.h"
-#include "gencase.h"
-
-#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
-
-/* Unicode case mapping properties file format ---------------------------------
-
-The file format prepared and written here contains several data
-structures that store indexes or data.
-
-Before the data contents described below, there are the headers required by
-the udata API for loading ICU data. Especially, a UDataInfo structure
-precedes the actual data. It contains platform properties values and the
-file format version.
-
-The following is a description of format version 1.1 .
-
-Format version 1.1 adds data for case closure.
-
-The file contains the following structures:
-
- const int32_t indexes[i0] with values i0, i1, ...:
- (see UCASE_IX_... constants for names of indexes)
-
- i0 indexLength; -- length of indexes[] (UCASE_IX_TOP)
- i1 dataLength; -- length in bytes of the post-header data (incl. indexes[])
- i2 trieSize; -- size in bytes of the case mapping properties trie
- i3 exceptionsLength; -- length in uint16_t of the exceptions array
- i4 unfoldLength; -- length in uint16_t of the reverse-folding array (new in format version 1.1)
-
- i5..i14 reservedIndexes; -- reserved values; 0 for now
-
- i15 maxFullLength; -- maximum length of a full case mapping/folding string
-
-
- Serialized trie, see utrie.h;
-
- const uint16_t exceptions[exceptionsLength];
-
- const UChar unfold[unfoldLength];
-
-
-Trie data word:
-Bits
-if(exception) {
- 15..4 unsigned exception index
-} else {
- if(not uncased) {
- 15..6 signed delta to simple case mapping code point
- (add delta to input code point)
- } else {
- 6 the code point is case-ignorable
- (U+0307 is also case-ignorable but has an exception)
- }
- 5..4 0 normal character with cc=0
- 1 soft-dotted character
- 2 cc=230
- 3 other cc
-}
- 3 exception
- 2 case sensitive
- 1..0 0 uncased
- 1 lowercase
- 2 uppercase
- 3 titlecase
-
-
-Exceptions:
-A sub-array of the exceptions array is indexed by the exception index in a
-trie word.
-The sub-array consists of the following fields:
- uint16_t excWord;
- uint16_t optional values [];
- UTF-16 strings for full (string) mappings for lowercase, case folding, uppercase, titlecase
-
-excWord: (see UCASE_EXC_...)
-Bits
- 15 conditional case folding
- 14 conditional special casing
-13..12 same as non-exception trie data bits 5..4
- moved here because the exception index needs more bits than the delta
- 0 normal character with cc=0
- 1 soft-dotted character
- 2 cc=230
- 3 other cc
-11.. 9 reserved
- 8 if set, then for each optional-value slot there are 2 uint16_t values
- (high and low parts of 32-bit values)
- instead of single ones
- 7.. 0 bits for which optional value is present
-
-Optional-value slots:
-0 lowercase mapping (code point)
-1 case folding (code point)
-2 uppercase mapping (code point)
-3 titlecase mapping (code point)
-4 reserved
-5 reserved
-6 closure mappings (new in format version 1.1)
-7 there is at least one full (string) case mapping
- the length of each is encoded in a nibble of this optional value,
- and the strings follow this optional value in the same order:
- lower/fold/upper/title
-
-The optional closure mappings value is used as follows:
-Bits 0..3 contain the length of a string of code points for case closure.
-The string immediately follows the full case mappings, or the closure value
-slot if there are no full case mappings.
-Bits 4..15 are reserved and could be used in the future to indicate the
-number of strings for case closure.
-Complete case closure for a code point is given by the union of all simple
-and full case mappings and foldings, plus the case closure code points
-(and potentially, in the future, case closure strings).
-
-For space saving, some values are not stored. Lookups are as follows:
-- If special casing is conditional, then no full lower/upper/title mapping
- strings are stored.
-- If case folding is conditional, then no simple or full case foldings are
- stored.
-- Fall back in this order:
- full (string) mapping -- if full mappings are used
- simple (code point) mapping of the same type
- simple fold->simple lower
- simple title->simple upper
- finally, the original code point (no mapping)
-
-This fallback order is strict:
-In particular, the fallback from full case folding is to simple case folding,
-not to full lowercase mapping.
-
-Reverse case folding data ("unfold") array: (new in format version 1.1)
-
-This array stores some miscellaneous values followed by a table. The data maps
-back from multi-character strings to their original code points, for use
-in case closure.
-
-The table contains two columns of strings.
-The string in the first column is the case folding of each of the code points
-in the second column. The strings are terminated with NUL or by the end of the
-column, whichever comes first.
-
-The miscellaneous data takes up one pseudo-row and includes:
-- number of rows
-- number of UChars per row
-- number of UChars in the left (folding string) column
-
-The table is sorted by its first column. Values in the first column are unique.
-
------------------------------------------------------------------------------ */
-
-/* UDataInfo cf. udata.h */
-static UDataInfo dataInfo={
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- U_SIZEOF_UCHAR,
- 0,
-
- /* dataFormat="cAsE" */
- { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
- { 1, 1, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
- { 4, 0, 1, 0 } /* dataVersion */
-};
-
-enum {
- /* maximum number of exceptions expected */
- MAX_EXC_COUNT=1000
-};
-
-/* exceptions values */
-static uint16_t exceptions[UCASE_MAX_EXCEPTIONS+100];
-static uint16_t exceptionsTop=0;
-static Props excProps[MAX_EXC_COUNT];
-static uint16_t exceptionsCount=0;
-
-/* becomes indexes[UCASE_IX_MAX_FULL_LENGTH] */
-static int32_t maxFullLength=U16_MAX_LENGTH;
-
-/* reverse case folding ("unfold") data */
-static UChar unfold[UGENCASE_UNFOLD_MAX_ROWS*UGENCASE_UNFOLD_WIDTH]={
- 0, UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH, 0, 0
-};
-static uint16_t unfoldRows=0;
-static uint16_t unfoldTop=UGENCASE_UNFOLD_WIDTH;
-
-/* Unicode versions --------------------------------------------------------- */
-
-static const UVersionInfo
-unicodeVersions[]={
- { 1, 0, 0, 0 },
- { 1, 1, 0, 0 },
- { 2, 0, 0, 0 },
- { 3, 0, 0, 0 },
- { 3, 1, 0, 0 },
- { 3, 2, 0, 0 },
- { 4, 0, 0, 0 },
- { 4, 0, 1, 0 },
- { 4, 1, 0, 0 }
-};
-
-int32_t ucdVersion=UNI_4_1;
-
-static int32_t
-findUnicodeVersion(const UVersionInfo version) {
- int32_t i;
-
- for(i=0; /* while(version>unicodeVersions[i]) {} */
- i<UNI_VER_COUNT && uprv_memcmp(version, unicodeVersions[i], 4)>0;
- ++i) {}
- if(0<i && i<UNI_VER_COUNT && uprv_memcmp(version, unicodeVersions[i], 4)<0) {
- --i; /* fix 4.0.2 to land before 4.1, for valid x>=ucdVersion comparisons */
- }
- return i; /* version>=unicodeVersions[i] && version<unicodeVersions[i+1]; possible: i==UNI_VER_COUNT */
-}
-
-extern void
-setUnicodeVersion(const char *v) {
- UVersionInfo version;
- u_versionFromString(version, v);
- uprv_memcpy(dataInfo.dataVersion, version, 4);
- ucdVersion=findUnicodeVersion(version);
-}
-
-/* -------------------------------------------------------------------------- */
-
-static void
-addUnfolding(UChar32 c, const UChar *s, int32_t length) {
- int32_t i;
-
- if(length>UGENCASE_UNFOLD_STRING_WIDTH) {
- fprintf(stderr, "gencase error: case folding too long (length=%ld>%d=UGENCASE_UNFOLD_STRING_WIDTH)\n",
- (long)length, UGENCASE_UNFOLD_STRING_WIDTH);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- if(unfoldTop >= (LENGTHOF(unfold) - UGENCASE_UNFOLD_STRING_WIDTH)) {
- fprintf(stderr, "gencase error: too many multi-character case foldings\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
- u_memset(unfold+unfoldTop, 0, UGENCASE_UNFOLD_WIDTH);
- u_memcpy(unfold+unfoldTop, s, length);
-
- i=unfoldTop+UGENCASE_UNFOLD_STRING_WIDTH;
- U16_APPEND_UNSAFE(unfold, i, c);
-
- ++unfoldRows;
- unfoldTop+=UGENCASE_UNFOLD_WIDTH;
-}
-
-/* store a character's properties ------------------------------------------- */
-
-extern void
-setProps(Props *p) {
- UErrorCode errorCode;
- uint32_t value, oldValue;
- int32_t delta;
- UBool isCaseIgnorable;
-
- /* get the non-UnicodeData.txt properties */
- value=oldValue=upvec_getValue(pv, p->code, 0);
-
- /* default: map to self */
- delta=0;
-
- if(p->gc==U_TITLECASE_LETTER) {
- /* the Titlecase property is read late, from UnicodeData.txt */
- value|=UCASE_TITLE;
- }
-
- if(p->upperCase!=0) {
- /* uppercase mapping as delta if the character is lowercase */
- if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
- delta=p->upperCase-p->code;
- } else {
- value|=UCASE_EXCEPTION;
- }
- }
- if(p->lowerCase!=0) {
- /* lowercase mapping as delta if the character is uppercase or titlecase */
- if((value&UCASE_TYPE_MASK)>=UCASE_UPPER) {
- delta=p->lowerCase-p->code;
- } else {
- value|=UCASE_EXCEPTION;
- }
- }
- if(p->upperCase!=p->titleCase) {
- value|=UCASE_EXCEPTION;
- }
- if(p->closure[0]!=0) {
- value|=UCASE_EXCEPTION;
- }
- if(p->specialCasing!=NULL) {
- value|=UCASE_EXCEPTION;
- }
- if(p->caseFolding!=NULL) {
- value|=UCASE_EXCEPTION;
- }
-
- if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
- value|=UCASE_EXCEPTION;
- }
-
- if(p->cc!=0) {
- if(value&UCASE_DOT_MASK) {
- fprintf(stderr, "gencase: a soft-dotted character has cc!=0\n");
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- if(p->cc==230) {
- value|=UCASE_ABOVE;
- } else {
- value|=UCASE_OTHER_ACCENT;
- }
- }
-
- /* encode case-ignorable as delta==1 on uncased characters */
- isCaseIgnorable=FALSE;
- if((value&UCASE_TYPE_MASK)==UCASE_NONE) {
- if(ucdVersion>=UNI_4_1) {
- /*
- * Unicode 4.1 and up: (D47a) Word_Break=MidLetter or Mn, Me, Cf, Lm, Sk
- * Unicode 5.1 and up: Word_Break=(MidLetter or MidNumLet) or Mn, Me, Cf, Lm, Sk
- * The UGENCASE_IS_MID_LETTER_SHIFT bit is set for both WB=MidLetter and WB=MidNumLet.
- */
- if(
- (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
- (upvec_getValue(pv, p->code, 1)&U_MASK(UGENCASE_IS_MID_LETTER_SHIFT))!=0
- ) {
- isCaseIgnorable=TRUE;
- }
- } else {
- /* before Unicode 4.1: Mn, Me, Cf, Lm, Sk or 0027 or 00AD or 2019 */
- if(
- (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
- p->code==0x27 || p->code==0xad || p->code==0x2019
- ) {
- isCaseIgnorable=TRUE;
- }
- }
- }
-
- if(isCaseIgnorable && p->code!=0x307) {
- /*
- * We use one of the delta/exception bits, which works because we only
- * store the case-ignorable flag for uncased characters.
- * There is no delta for uncased characters (see checks above).
- * If there is an exception for an uncased, case-ignorable character
- * (although there should not be any case mappings if it's uncased)
- * then we have a problem.
- * There is one character which is case-ignorable but has an exception:
- * U+0307 is uncased, Mn, has conditional special casing and
- * is therefore handled in code instead.
- */
- if(value&UCASE_EXCEPTION) {
- fprintf(stderr, "gencase error: unable to encode case-ignorable for U+%04lx with exceptions\n",
- (unsigned long)p->code);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
-
- delta=1;
- }
-
- /* handle exceptions */
- if(value&UCASE_EXCEPTION) {
- /* simply store exceptions for later processing and encoding */
- value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
- uprv_memcpy(excProps+exceptionsCount, p, sizeof(*p));
- if(++exceptionsCount==MAX_EXC_COUNT) {
- fprintf(stderr, "gencase: too many exceptions\n");
- exit(U_INDEX_OUTOFBOUNDS_ERROR);
- }
- } else {
- /* store the simple case mapping delta */
- value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK;
- }
-
- errorCode=U_ZERO_ERROR;
- if(value!=oldValue) {
- upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- }
-
- /* add the multi-character case folding to the "unfold" data */
- if(p->caseFolding!=NULL) {
- int32_t length=p->caseFolding->full[0];
- if(length>1 && u_strHasMoreChar32Than(p->caseFolding->full+1, length, 1)) {
- addUnfolding(p->code, p->caseFolding->full+1, length);
- }
- }
-}
-
-extern void
-addCaseSensitive(UChar32 first, UChar32 last) {
- UErrorCode errorCode=U_ZERO_ERROR;
- upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
-}
-
-/* finalize reverse case folding ("unfold") data ---------------------------- */
-
-static int32_t U_CALLCONV
-compareUnfold(const void *context, const void *left, const void *right) {
- return u_memcmp((const UChar *)left, (const UChar *)right, UGENCASE_UNFOLD_WIDTH);
-}
-
-static void
-makeUnfoldData() {
- static const UChar
- iDot[2]= { 0x69, 0x307 };
-
- UChar *p, *q;
- int32_t i, j, k;
- UErrorCode errorCode;
-
- /*
- * add a case folding that we missed because it's conditional:
- * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
- */
- addUnfolding(0x130, iDot, 2);
-
- /* sort the data */
- errorCode=U_ZERO_ERROR;
- uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
- compareUnfold, NULL, FALSE, &errorCode);
-
- /* make unique-string rows by merging adjacent ones' code point columns */
-
- /* make p point to row i-1 */
- p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH;
-
- for(i=1; i<unfoldRows;) {
- if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
- /* concatenate code point columns */
- q=p+UGENCASE_UNFOLD_STRING_WIDTH;
- for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
- for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
- q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
- }
- if(j>UGENCASE_UNFOLD_CP_WIDTH) {
- fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
- (long)j, UGENCASE_UNFOLD_CP_WIDTH);
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-
- /* move following rows up one */
- --unfoldRows;
- unfoldTop-=UGENCASE_UNFOLD_WIDTH;
- u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
- } else {
- p+=UGENCASE_UNFOLD_WIDTH;
- ++i;
- }
- }
-
- unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;
-
- if(beVerbose) {
- puts("unfold data:");
-
- p=(UChar *)unfold;
- for(i=0; i<unfoldRows; ++i) {
- p+=UGENCASE_UNFOLD_WIDTH;
- printf("[%2d] %04x %04x %04x <- %04x %04x\n",
- (int)i, p[0], p[1], p[2], p[3], p[4]);
- }
- }
-}
-
-/* case closure ------------------------------------------------------------- */
-
-static void
-addClosureMapping(UChar32 src, UChar32 dest) {
- uint32_t value;
-
- if(beVerbose) {
- printf("add closure mapping U+%04lx->U+%04lx\n",
- (unsigned long)src, (unsigned long)dest);
- }
-
- value=upvec_getValue(pv, src, 0);
- if(value&UCASE_EXCEPTION) {
- Props *p=excProps+(value>>UGENCASE_EXC_SHIFT);
- int32_t i;
-
- /* append dest to src's closure array */
- for(i=0;; ++i) {
- if(i==LENGTHOF(p->closure)) {
- fprintf(stderr, "closure[] overflow for U+%04lx->U+%04lx\n",
- (unsigned long)src, (unsigned long)dest);
- exit(U_BUFFER_OVERFLOW_ERROR);
- } else if(p->closure[i]==dest) {
- break; /* do not store duplicates */
- } else if(p->closure[i]==0) {
- p->closure[i]=dest;
- break;
- }
- }
- } else {
- Props p2={ 0 };
- UChar32 next;
- UErrorCode errorCode;
-
- /*
- * decode value into p2 (enough for makeException() to work properly),
- * add the closure mapping,
- * and set the new exception for src
- */
- p2.code=src;
- p2.closure[0]=dest;
-
- if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
- /* one simple case mapping, don't care which one */
- next=src+((int16_t)value>>UCASE_DELTA_SHIFT);
- if(next!=src) {
- if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
- p2.upperCase=p2.titleCase=next;
- } else {
- p2.lowerCase=next;
- }
- }
- } else if(value&UCASE_DELTA_MASK) {
- fprintf(stderr, "gencase error: unable to add case closure exception to case-ignorable U+%04lx\n",
- (unsigned long)src);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
-
- value&=~(UGENCASE_EXC_MASK|UCASE_DELTA_MASK); /* remove previous simple mapping */
- value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
- value|=UCASE_EXCEPTION;
- uprv_memcpy(excProps+exceptionsCount, &p2, sizeof(p2));
- if(++exceptionsCount==MAX_EXC_COUNT) {
- fprintf(stderr, "gencase: too many exceptions\n");
- exit(U_INDEX_OUTOFBOUNDS_ERROR);
- }
-
- errorCode=U_ZERO_ERROR;
- upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- }
-}
-
-/*
- * Find missing case mapping relationships and add mappings for case closure.
- * This function starts from an "original" code point and recursively
- * finds its case mappings and the case mappings of where it maps to.
- *
- * The recursion depth is capped at 3 nested calls of this function.
- * In each call, the current code point is c, and the function enumerates
- * all of c's simple (single-code point) case mappings.
- * prev is the code point that case-mapped to c.
- * prev2 is the code point that case-mapped to prev.
- *
- * The initial function call has prev2<0, prev<0, and c==orig
- * (marking no code points).
- * It enumerates c's case mappings and recurses without further action.
- *
- * The second-level function call has prev2<0, prev==orig, and c is
- * the destination code point of one of prev's case mappings.
- * The function checks if any of c's case mappings go back to orig
- * and adds a closure mapping if not.
- * In other words, it turns a case mapping relationship of
- * orig->c
- * into
- * orig<->c
- *
- * The third-level function call has prev2==orig, prev>=0, and c is
- * the destination code point of one of prev's case mappings.
- * (And prev is the destination of one of prev2's case mappings.)
- * The function checks if any of c's case mappings go back to orig
- * and adds a closure mapping if not.
- * In other words, it turns case mapping relationships of
- * orig->prev->c or orig->prev<->c
- * into
- * orig->prev->c->orig or orig->prev<->c->orig
- * etc.
- * (Graphically, this closes a triangle.)
- *
- * With repeated application on all code points until no more closure mappings
- * are added, all case equivalence groups get complete mappings.
- * That is, in each group of code points with case relationships
- * each code point will in the end have some mapping to each other
- * code point in the group.
- *
- * @return TRUE if a closure mapping was added
- */
-static UBool
-addClosure(UChar32 orig, UChar32 prev2, UChar32 prev, UChar32 c, uint32_t value) {
- UChar32 next;
- UBool someMappingsAdded=FALSE;
-
- if(c!=orig) {
- /* get the properties for c */
- value=upvec_getValue(pv, c, 0);
- }
- /* else if c==orig then c's value was passed in */
-
- if(value&UCASE_EXCEPTION) {
- UChar32 set[32];
- int32_t i, count=0;
-
- Props *p=excProps+(value>>UGENCASE_EXC_SHIFT);
-
- /*
- * marker for whether any of c's mappings goes to orig
- * c==orig: prevent adding a closure mapping when getting orig's own, direct mappings
- */
- UBool mapsToOrig=(UBool)(c==orig);
-
- /* collect c's case mapping destinations in set[] */
- if((next=p->upperCase)!=0 && next!=c) {
- set[count++]=next;
- }
- if((next=p->lowerCase)!=0 && next!=c) {
- set[count++]=next;
- }
- if(p->upperCase!=(next=p->titleCase) && next!=c) {
- set[count++]=next;
- }
- if(p->caseFolding!=NULL && (next=p->caseFolding->simple)!=0 && next!=c) {
- set[count++]=next;
- }
-
- /* append c's current closure mappings to set[] */
- for(i=0; i<LENGTHOF(p->closure) && (next=p->closure[i])!=0; ++i) {
- set[count++]=next;
- }
-
- /* process all code points to which c case-maps */
- for(i=0; i<count; ++i) {
- next=set[i]; /* next!=c */
-
- if(next==orig) {
- mapsToOrig=TRUE; /* remember that we map to orig */
- } else if(prev2<0 && next!=prev) {
- /*
- * recurse unless
- * we have reached maximum depth (prev2>=0) or
- * this is a mapping to one of the previous code points (orig, prev, c)
- */
- someMappingsAdded|=addClosure(orig, prev, c, next, 0);
- }
- }
-
- if(!mapsToOrig) {
- addClosureMapping(c, orig);
- return TRUE;
- }
- } else {
- if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
- /* one simple case mapping, don't care which one */
- next=c+((int16_t)value>>UCASE_DELTA_SHIFT);
- if(next!=c) {
- /*
- * recurse unless
- * we have reached maximum depth (prev2>=0) or
- * this is a mapping to one of the previous code points (orig, prev, c)
- */
- if(prev2<0 && next!=orig && next!=prev) {
- someMappingsAdded|=addClosure(orig, prev, c, next, 0);
- }
-
- if(c!=orig && next!=orig) {
- /* c does not map to orig, add a closure mapping c->orig */
- addClosureMapping(c, orig);
- return TRUE;
- }
- }
- }
- }
-
- return someMappingsAdded;
-}
-
-extern void
-makeCaseClosure() {
- UChar *p;
- uint32_t *row;
- uint32_t value;
- UChar32 start, end, c, c2;
- int32_t i, j;
- UBool someMappingsAdded;
-
- /*
- * finalize the "unfold" data because we need to use it to add closure mappings
- * for situations like FB05->"st"<-FB06
- * where we would otherwise miss the FB05<->FB06 relationship
- */
- makeUnfoldData();
-
- /* use the "unfold" data to add mappings */
-
- /* p always points to the code points; this loop ignores the strings completely */
- p=unfold+UGENCASE_UNFOLD_WIDTH+UGENCASE_UNFOLD_STRING_WIDTH;
-
- for(i=0; i<unfoldRows; p+=UGENCASE_UNFOLD_WIDTH, ++i) {
- j=0;
- U16_NEXT_UNSAFE(p, j, c);
- while(j<UGENCASE_UNFOLD_CP_WIDTH && p[j]!=0) {
- U16_NEXT_UNSAFE(p, j, c2);
- addClosure(c, U_SENTINEL, c, c2, 0);
- }
- }
-
- if(beVerbose) {
- puts("---- ---- ---- ---- (done with closures from unfolding)");
- }
-
- /* add further closure mappings from analyzing simple mappings */
- do {
- someMappingsAdded=FALSE;
-
- i=0;
- while((row=upvec_getRow(pv, i, &start, &end))!=NULL && start<UPVEC_FIRST_SPECIAL_CP) {
- value=*row;
- if(value!=0) {
- while(start<=end) {
- if(addClosure(start, U_SENTINEL, U_SENTINEL, start, value)) {
- someMappingsAdded=TRUE;
-
- /*
- * stop this loop because pv was changed and row is not valid any more
- * skip all rows below the current start
- */
- while((row=upvec_getRow(pv, i, NULL, &end))!=NULL && start>end) {
- ++i;
- }
- row=NULL; /* signal to continue with outer loop, without further ++i */
- break;
- }
- ++start;
- }
- if(row==NULL) {
- continue; /* see row=NULL above */
- }
- }
- ++i;
- }
-
- if(beVerbose && someMappingsAdded) {
- puts("---- ---- ---- ----");
- }
- } while(someMappingsAdded);
-}
-
-/* exceptions --------------------------------------------------------------- */
-
-/* get the string length from zero-terminated code points in a limited-length array */
-static int32_t
-getLengthOfCodePoints(const UChar32 *s, int32_t maxLength) {
- int32_t i, length;
-
- for(i=length=0; i<maxLength && s[i]!=0; ++i) {
- length+=U16_LENGTH(s[i]);
- }
- return length;
-}
-
-static UBool
-fullMappingEqualsSimple(const UChar *s, UChar32 simple, UChar32 c) {
- int32_t i, length;
- UChar32 full;
-
- length=*s++;
- if(length==0 || length>U16_MAX_LENGTH) {
- return FALSE;
- }
- i=0;
- U16_NEXT(s, i, length, full);
-
- if(simple==0) {
- simple=c; /* UCD has no simple mapping if it's the same as the code point itself */
- }
- return (UBool)(i==length && full==simple);
-}
-
-static uint16_t
-makeException(uint32_t value, Props *p) {
- uint32_t slots[8];
- uint32_t slotBits;
- uint16_t excWord, excIndex, excTop, i, count, length, fullLengths;
- UBool doubleSlots;
-
- /* excIndex will be returned for storing in the trie word */
- excIndex=exceptionsTop;
- if(excIndex>=UCASE_MAX_EXCEPTIONS) {
- fprintf(stderr, "gencase error: too many exceptions words\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-
- excTop=excIndex+1; /* +1 for excWord which will be stored at excIndex */
-
- /* copy and shift the soft-dotted bits */
- excWord=((uint16_t)value&UCASE_DOT_MASK)<<UCASE_EXC_DOT_SHIFT;
-
- /* update maxFullLength */
- if(p->specialCasing!=NULL) {
- length=p->specialCasing->lowerCase[0];
- if(length>maxFullLength) {
- maxFullLength=length;
- }
- length=p->specialCasing->upperCase[0];
- if(length>maxFullLength) {
- maxFullLength=length;
- }
- length=p->specialCasing->titleCase[0];
- if(length>maxFullLength) {
- maxFullLength=length;
- }
- }
- if(p->caseFolding!=NULL) {
- length=p->caseFolding->full[0];
- if(length>maxFullLength) {
- maxFullLength=length;
- }
- }
-
- /* set the bits for conditional mappings */
- if(p->specialCasing!=NULL && p->specialCasing->isComplex) {
- excWord|=UCASE_EXC_CONDITIONAL_SPECIAL;
- p->specialCasing=NULL;
- }
- if(p->caseFolding!=NULL && p->caseFolding->simple==0 && p->caseFolding->full[0]==0) {
- excWord|=UCASE_EXC_CONDITIONAL_FOLD;
- p->caseFolding=NULL;
- }
-
- /*
- * Note:
- * UCD stores no simple mappings when they are the same as the code point itself.
- * SpecialCasing and CaseFolding do store simple mappings even if they are
- * the same as the code point itself.
- * Comparisons between simple regular mappings and simple special/folding
- * mappings need to compensate for the difference by comparing with the
- * original code point if a simple UCD mapping is missing (0).
- */
-
- /* remove redundant data */
- if(p->specialCasing!=NULL) {
- /* do not store full mappings if they are the same as the simple ones */
- if(fullMappingEqualsSimple(p->specialCasing->lowerCase, p->lowerCase, p->code)) {
- p->specialCasing->lowerCase[0]=0;
- }
- if(fullMappingEqualsSimple(p->specialCasing->upperCase, p->upperCase, p->code)) {
- p->specialCasing->upperCase[0]=0;
- }
- if(fullMappingEqualsSimple(p->specialCasing->titleCase, p->titleCase, p->code)) {
- p->specialCasing->titleCase[0]=0;
- }
- }
- if( p->caseFolding!=NULL &&
- fullMappingEqualsSimple(p->caseFolding->full, p->caseFolding->simple, p->code)
- ) {
- p->caseFolding->full[0]=0;
- }
-
- /* write the optional slots */
- slotBits=0;
- count=0;
-
- if(p->lowerCase!=0) {
- slots[count]=(uint32_t)p->lowerCase;
- slotBits|=slots[count];
- ++count;
- excWord|=U_MASK(UCASE_EXC_LOWER);
- }
- if( p->caseFolding!=NULL &&
- p->caseFolding->simple!=0 &&
- (p->lowerCase!=0 ?
- p->caseFolding->simple!=p->lowerCase :
- p->caseFolding->simple!=p->code)
- ) {
- slots[count]=(uint32_t)p->caseFolding->simple;
- slotBits|=slots[count];
- ++count;
- excWord|=U_MASK(UCASE_EXC_FOLD);
- }
- if(p->upperCase!=0) {
- slots[count]=(uint32_t)p->upperCase;
- slotBits|=slots[count];
- ++count;
- excWord|=U_MASK(UCASE_EXC_UPPER);
- }
- if(p->upperCase!=p->titleCase) {
- if(p->titleCase!=0) {
- slots[count]=(uint32_t)p->titleCase;
- } else {
- slots[count]=(uint32_t)p->code;
- }
- slotBits|=slots[count];
- ++count;
- excWord|=U_MASK(UCASE_EXC_TITLE);
- }
-
- /* length of case closure */
- if(p->closure[0]!=0) {
- length=getLengthOfCodePoints(p->closure, LENGTHOF(p->closure));
- slots[count]=(uint32_t)length; /* must be 1..UCASE_CLOSURE_MAX_LENGTH */
- slotBits|=slots[count];
- ++count;
- excWord|=U_MASK(UCASE_EXC_CLOSURE);
- }
-
- /* lengths of full case mapping strings, stored in the last slot */
- fullLengths=0;
- if(p->specialCasing!=NULL) {
- fullLengths=p->specialCasing->lowerCase[0];
- fullLengths|=p->specialCasing->upperCase[0]<<8;
- fullLengths|=p->specialCasing->titleCase[0]<<12;
- }
- if(p->caseFolding!=NULL) {
- fullLengths|=p->caseFolding->full[0]<<4;
- }
- if(fullLengths!=0) {
- slots[count]=fullLengths;
- slotBits|=slots[count];
- ++count;
- excWord|=U_MASK(UCASE_EXC_FULL_MAPPINGS);
- }
-
- /* write slots */
- doubleSlots=(UBool)(slotBits>0xffff);
- if(!doubleSlots) {
- for(i=0; i<count; ++i) {
- exceptions[excTop++]=(uint16_t)slots[i];
- }
- } else {
- excWord|=UCASE_EXC_DOUBLE_SLOTS;
- for(i=0; i<count; ++i) {
- exceptions[excTop++]=(uint16_t)(slots[i]>>16);
- exceptions[excTop++]=(uint16_t)slots[i];
- }
- }
-
- /* write the full case mapping strings */
- if(p->specialCasing!=NULL) {
- length=(uint16_t)p->specialCasing->lowerCase[0];
- u_memcpy((UChar *)exceptions+excTop, p->specialCasing->lowerCase+1, length);
- excTop+=length;
- }
- if(p->caseFolding!=NULL) {
- length=(uint16_t)p->caseFolding->full[0];
- u_memcpy((UChar *)exceptions+excTop, p->caseFolding->full+1, length);
- excTop+=length;
- }
- if(p->specialCasing!=NULL) {
- length=(uint16_t)p->specialCasing->upperCase[0];
- u_memcpy((UChar *)exceptions+excTop, p->specialCasing->upperCase+1, length);
- excTop+=length;
-
- length=(uint16_t)p->specialCasing->titleCase[0];
- u_memcpy((UChar *)exceptions+excTop, p->specialCasing->titleCase+1, length);
- excTop+=length;
- }
-
- /* write the closure data */
- if(p->closure[0]!=0) {
- UChar32 c;
-
- for(i=0; i<LENGTHOF(p->closure) && (c=p->closure[i])!=0; ++i) {
- U16_APPEND_UNSAFE((UChar *)exceptions, excTop, c);
- }
- }
-
- exceptionsTop=excTop;
-
- /* write the main exceptions word */
- exceptions[excIndex]=excWord;
-
- return excIndex;
-}
-
-extern void
-makeExceptions() {
- uint32_t *row;
- uint32_t value;
- int32_t i;
- uint16_t excIndex;
-
- i=0;
- while((row=upvec_getRow(pv, i, NULL, NULL))!=NULL) {
- value=*row;
- if(value&UCASE_EXCEPTION) {
- excIndex=makeException(value, excProps+(value>>UGENCASE_EXC_SHIFT));
- *row=(value&~(UGENCASE_EXC_MASK|UCASE_EXC_MASK))|(excIndex<<UCASE_EXC_SHIFT);
- }
- ++i;
- }
-}
-
-/* generate output data ----------------------------------------------------- */
-
-extern void
-generateData(const char *dataDir, UBool csource) {
- static int32_t indexes[UCASE_IX_TOP]={
- UCASE_IX_TOP
- };
- static uint8_t trieBlock[40000];
-
- const uint32_t *row;
- UChar32 start, end;
- int32_t i;
-
- UNewDataMemory *pData;
- UNewTrie *pTrie;
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t trieSize;
- long dataLength;
-
- pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
- if(pTrie==NULL) {
- fprintf(stderr, "gencase error: unable to create a UNewTrie\n");
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
-
- for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL; ++i) {
- if(start<UPVEC_FIRST_SPECIAL_CP && !utrie_setRange32(pTrie, start, end+1, *row, TRUE)) {
- fprintf(stderr, "gencase error: unable to set trie value (overflow)\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
- }
-
- trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
- exit(errorCode);
- }
-
- indexes[UCASE_IX_EXC_LENGTH]=exceptionsTop;
- indexes[UCASE_IX_TRIE_SIZE]=trieSize;
- indexes[UCASE_IX_UNFOLD_LENGTH]=unfoldTop;
- indexes[UCASE_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+2*exceptionsTop+2*unfoldTop;
-
- indexes[UCASE_IX_MAX_FULL_LENGTH]=maxFullLength;
-
- if(beVerbose) {
- printf("trie size in bytes: %5d\n", (int)trieSize);
- printf("number of code points with exceptions: %5d\n", exceptionsCount);
- printf("size in bytes of exceptions: %5d\n", 2*exceptionsTop);
- printf("size in bytes of reverse foldings: %5d\n", 2*unfoldTop);
- printf("data size: %5d\n", (int)indexes[UCASE_IX_LENGTH]);
- }
-
- if(csource) {
- /* write .c file for hardcoded data */
- UTrie trie={ NULL };
- UTrie2 *trie2;
- FILE *f;
-
- utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "gencase error: failed to utrie_unserialize(ucase.icu trie) - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
-
- /* use UTrie2 */
- dataInfo.formatVersion[0]=2;
- dataInfo.formatVersion[2]=0;
- dataInfo.formatVersion[3]=0;
- trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "gencase error: utrie2_fromUTrie() failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- {
- /* delete lead surrogate code unit values */
- UChar lead;
- trie2=utrie2_cloneAsThawed(trie2, &errorCode);
- for(lead=0xd800; lead<0xdc00; ++lead) {
- utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
- }
- utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "gencase error: deleting lead surrogate code unit values failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- }
-
- f=usrc_create(dataDir, "ucase_props_data.c");
- if(f!=NULL) {
- usrc_writeArray(f,
- "static const UVersionInfo ucase_props_dataVersion={",
- dataInfo.dataVersion, 8, 4,
- "};\n\n");
- usrc_writeArray(f,
- "static const int32_t ucase_props_indexes[UCASE_IX_TOP]={",
- indexes, 32, UCASE_IX_TOP,
- "};\n\n");
- usrc_writeUTrie2Arrays(f,
- "static const uint16_t ucase_props_trieIndex[%ld]={\n", NULL,
- trie2,
- "\n};\n\n");
- usrc_writeArray(f,
- "static const uint16_t ucase_props_exceptions[%ld]={\n",
- exceptions, 16, exceptionsTop,
- "\n};\n\n");
- usrc_writeArray(f,
- "static const uint16_t ucase_props_unfold[%ld]={\n",
- unfold, 16, unfoldTop,
- "\n};\n\n");
- fputs(
- "static const UCaseProps ucase_props_singleton={\n"
- " NULL,\n"
- " ucase_props_indexes,\n"
- " ucase_props_exceptions,\n"
- " ucase_props_unfold,\n",
- f);
- usrc_writeUTrie2Struct(f,
- " {\n",
- trie2, "ucase_props_trieIndex", NULL,
- " },\n");
- usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n");
- fputs("};\n", f);
- fclose(f);
- }
- utrie2_close(trie2);
- } else {
- /* write the data */
- pData=udata_create(dataDir, UCASE_DATA_TYPE, UCASE_DATA_NAME, &dataInfo,
- haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gencase: unable to create data memory, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
-
- udata_writeBlock(pData, indexes, sizeof(indexes));
- udata_writeBlock(pData, trieBlock, trieSize);
- udata_writeBlock(pData, exceptions, 2*exceptionsTop);
- udata_writeBlock(pData, unfold, 2*unfoldTop);
-
- /* finish up */
- dataLength=udata_finish(pData, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gencase: error %d writing the output file\n", errorCode);
- exit(errorCode);
- }
-
- if(dataLength!=indexes[UCASE_IX_LENGTH]) {
- fprintf(stderr, "gencase: data length %ld != calculated size %d\n",
- dataLength, (int)indexes[UCASE_IX_LENGTH]);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- }
-
- utrie_close(pTrie);
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/gencfu/gencfu.cpp b/tools/gencfu/gencfu.cpp
index ed3b30cd..b50d6cbb 100644
--- a/tools/gencfu/gencfu.cpp
+++ b/tools/gencfu/gencfu.cpp
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 2009, International Business Machines
+* Copyright (C) 2009-2010, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@@ -74,7 +74,7 @@ void usageAndDie(int retCode) {
}
-#if UCONFIG_NO_REGULAR_EXPRESSIONS
+#if UCONFIG_NO_REGULAR_EXPRESSIONS || UCONFIG_NO_NORMALIZATION || UCONFIG_NO_FILE_IO
/* dummy UDataInfo cf. udata.h */
static UDataInfo dummyDataInfo = {
@@ -164,13 +164,6 @@ int main(int argc, char **argv) {
u_setDataDirectory(options[6].value);
}
- /* Initialize ICU */
- u_init(&status);
- if (U_FAILURE(status)) {
- fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
- argv[0], u_errorName(status));
- exit(1);
- }
status = U_ZERO_ERROR;
/* Combine the directory with the file name */
@@ -181,7 +174,7 @@ int main(int argc, char **argv) {
copyright = U_COPYRIGHT_STRING;
}
-#if UCONFIG_NO_REGULAR_EXPRESSIONS
+#if UCONFIG_NO_REGULAR_EXPRESSIONS || UCONFIG_NO_NORMALIZATION || UCONFIG_NO_FILE_IO
// spoof detection data file parsing is dependent on regular expressions.
// TODO: have the tool return an error status. Requires fixing the ICU data build
// so that it doesn't abort entirely on that error.
@@ -190,7 +183,7 @@ int main(int argc, char **argv) {
char msg[1024];
/* write message with just the name */
- sprintf(msg, "gencfu writes dummy %s because of UCONFIG_NO_REGULAR_EXPRESSIONS, see uconfig.h", outFileName);
+ sprintf(msg, "gencfu writes dummy %s because of UCONFIG_NO_REGULAR_EXPRESSIONS and/or UCONFIG_NO_NORMALIZATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
fprintf(stderr, "%s\n", msg);
/* write the dummy data file */
@@ -200,6 +193,14 @@ int main(int argc, char **argv) {
return (int)status;
#else
+ /* Initialize ICU */
+ u_init(&status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
+ argv[0], u_errorName(status));
+ exit(1);
+ }
+ status = U_ZERO_ERROR;
// Read in the confusables source file
@@ -285,7 +286,7 @@ int main(int argc, char **argv) {
}
uspoof_close(sc);
- delete outData;
+ delete [] outData;
delete confusables;
delete wsConfsables;
u_cleanup();
@@ -317,7 +318,8 @@ int main(int argc, char **argv) {
long t = fread(result, 1, fileSize, file);
if (t != fileSize) {
- delete result;
+ delete [] result;
+ fclose(file);
return NULL;
}
result[fileSize]=0;
diff --git a/tools/genctd/genctd.cpp b/tools/genctd/genctd.cpp
index 1b114ec2..e5dccbf7 100644
--- a/tools/genctd/genctd.cpp
+++ b/tools/genctd/genctd.cpp
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 2002-2006, International Business Machines
+* Copyright (C) 2002-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@@ -74,7 +74,7 @@ void usageAndDie(int retCode) {
}
-#if UCONFIG_NO_BREAK_ITERATION
+#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
/* dummy UDataInfo cf. udata.h */
static UDataInfo dummyDataInfo = {
@@ -157,13 +157,6 @@ int main(int argc, char **argv) {
u_setDataDirectory(options[4].value);
}
- /* Initialize ICU */
- u_init(&status);
- if (U_FAILURE(status)) {
- fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
- argv[0], u_errorName(status));
- exit(1);
- }
status = U_ZERO_ERROR;
/* Combine the directory with the file name */
@@ -174,13 +167,13 @@ int main(int argc, char **argv) {
copyright = U_COPYRIGHT_STRING;
}
-#if UCONFIG_NO_BREAK_ITERATION
+#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
UNewDataMemory *pData;
char msg[1024];
/* write message with just the name */
- sprintf(msg, "genctd writes dummy %s because of UCONFIG_NO_BREAK_ITERATION, see uconfig.h", outFileName);
+ sprintf(msg, "genctd writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
fprintf(stderr, "%s\n", msg);
/* write the dummy data file */
@@ -190,6 +183,14 @@ int main(int argc, char **argv) {
return (int)status;
#else
+ /* Initialize ICU */
+ u_init(&status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
+ argv[0], u_errorName(status));
+ exit(1);
+ }
+ status = U_ZERO_ERROR;
//
// Read in the dictionary source file
diff --git a/tools/gendraft/Makefile b/tools/gendraft/Makefile
index d04eb342..07694928 100644
--- a/tools/gendraft/Makefile
+++ b/tools/gendraft/Makefile
@@ -1,5 +1,5 @@
#*******************************************************************************
-#* Copyright (C) 2008, International Business Machines
+#* Copyright (C) 2008-2010, International Business Machines
#* Corporation and others. All Rights Reserved.
#*******************************************************************************
@@ -11,7 +11,7 @@ top_builddir=../..
include $(top_builddir)/icudefs.mk
-LOCALHEADERS= udeprctd.h udraft.h uintrnal.h usystem.h
+LOCALHEADERS= udeprctd.h udraft.h uintrnal.h usystem.h
COMMONHDR= $(top_srcdir)/common/unicode
DOCDIR= $(top_builddir)/doc/html
@@ -21,19 +21,19 @@ all:
@echo Usage: 'make install-headers' to update headers.
@echo 'be sure to verify the headers (in soure/common/unicode) before checkin!'
@exit 1
-
+
clean:
-$(RMV) $(LOCALHEADERS)
-
+
$(DOCDIR):
( cd $(top_builddir) ; $(MAKE) doc )
local-headers: $(LOCALHEADERS)
-install-headers: $(DOCDIR) $(LOCALHEADERS)
+install-headers: $(DOCDIR)
perl ./genheaders.pl --srcdir=$(DOCDIR) --destdir=$(COMMONHDR) --version=$(VERSION) --exclusion-list=$(EXCLUDE)
( cd $(COMMONHDR) ; ls -l $(LOCALHEADERS) )
%.h: $(COMMONHDR)/%.h
cp $< $@
-
+
diff --git a/tools/gendraft/genheaders.pl b/tools/gendraft/genheaders.pl
index 126c0f07..00fae2db 100755
--- a/tools/gendraft/genheaders.pl
+++ b/tools/gendraft/genheaders.pl
@@ -1,7 +1,7 @@
#!/usr/bin/perl
#*
#*******************************************************************************
-#* Copyright (C) 2006-2009, International Business Machines
+#* Copyright (C) 2006-2010, International Business Machines
#* Corporation and others. All Rights Reserved.
#*******************************************************************************
#*
@@ -46,6 +46,7 @@ $internalAppend = "INTERNAL_API_DO_NOT_USE";
$internalDefine = "U_HIDE_INTERNAL_API";
$versionAppend="";
+
#run the program
main();
@@ -58,7 +59,8 @@ sub main(){
"--destdir=s" => \$destDir,
"--version=s" => \$version,
"--exclusion-list=s" => \$exclude,
- "--include-types" => \$includeTypes
+ "--include-types" => \$includeTypes,
+ "--verbose" => \$verbose
);
usage() unless defined $srcDir;
usage() unless defined $destDir;
@@ -101,6 +103,7 @@ sub getHeaderDef{
sub writeFile{
($infile,$outfile,$destDir, $symbolAppend, $symbolDef, $exclude) = @_;
+ my $outFileName = $outfile;
$headerDef = getHeaderDef($outfile);
$outfile = $destDir."/".$outfile;
@@ -167,6 +170,10 @@ sub parseWriteFile{
($line =~ /Class/) ){
next;
}
+ if( $line =~ /^\<dt\>File [^\>]*\>([^\<]*)/ ) {
+ print "Skipping file-scope $symbolAppend $1\n";
+ next;
+ }
#<dt>Global <a class="el" href="utrans_8h.html#a21">utrans_unregister</a> </dt>
#<dt>Global <a class="el" href="classUnicodeString.html#w1w0">UnicodeString::kInvariant</a> </dt>
# the below regular expression works for both the above formats.
@@ -180,13 +187,17 @@ sub parseWriteFile{
#print "$value $exclude->{$value}\n";
next;
}
- #print "$value $realSymbol $nonExSymbol\n";
+ print "$value $realSymbol $nonExSymbol :: $line\n" if defined $verbose;
next if(isStringAcceptable($value)==1);
+ if($value =~ /^operator[^a-zA-Z]/) {
+ print "Skipping operator $symbolAppend $value from $line\n";
+ next;
+ }
$realSymbol = $value."_".$versionAppend;
$nonExSymbol = $value."_".$symbolAppend;
$disableRenaming{$value} = $nonExSymbol;
$enableRenaming{$realSymbol} = $nonExSymbol;
- #print "$value $realSymbol $nonExSymbol\n";
+ print "$value $realSymbol $nonExSymbol\n" if defined $verbose;
}
}
diff --git a/tools/gennames/Makefile.in b/tools/gennames/Makefile.in
deleted file mode 100644
index ed2f88f3..00000000
--- a/tools/gennames/Makefile.in
+++ /dev/null
@@ -1,97 +0,0 @@
-## Makefile.in for ICU - tools/gennames
-## Copyright (c) 1999-2005, International Business Machines Corporation and
-## others. All Rights Reserved.
-## Steven R. Loomis
-
-## Source directory information
-srcdir = @srcdir@
-top_srcdir = @top_srcdir@
-
-top_builddir = ../..
-
-include $(top_builddir)/icudefs.mk
-
-## Build directory information
-subdir = tools/gennames
-
-TARGET_STUB_NAME = gennames
-
-SECTION = 8
-
-#MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
-
-
-## Extra files to remove for 'make clean'
-CLEANFILES = *~ $(DEPS) $(MAN_FILES)
-
-## Target information
-TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
-
-ifneq ($(top_builddir),$(top_srcdir))
-CPPFLAGS += -I$(top_builddir)/common
-endif
-CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
-LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
-
-OBJECTS = gennames.o
-
-DEPS = $(OBJECTS:.o=.d)
-
-## List of phony targets
-.PHONY : all all-local install install-local clean clean-local \
-distclean distclean-local dist dist-local check check-local install-man
-
-## Clear suffix list
-.SUFFIXES :
-
-## List of standard targets
-all: all-local
-install: install-local
-clean: clean-local
-distclean : distclean-local
-dist: dist-local
-check: all check-local
-
-all-local: $(TARGET) $(MAN_FILES)
-
-install-local: all-local install-man
-# $(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
-# $(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
-
-install-man: $(MAN_FILES)
-# $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
-# $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
-
-dist-local:
-
-clean-local:
- test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
- $(RMV) $(TARGET) $(OBJECTS)
-
-distclean-local: clean-local
- $(RMV) Makefile
-
-check-local: all-local
-
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-$(TARGET) : $(OBJECTS)
- $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
- $(POST_BUILD_STEP)
-
-
-%.$(SECTION): $(srcdir)/%.$(SECTION).in
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-
-ifeq (,$(MAKECMDGOALS))
--include $(DEPS)
-else
-ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
--include $(DEPS)
-endif
-endif
-
diff --git a/tools/gennames/gennames.c b/tools/gennames/gennames.c
deleted file mode 100644
index e4a91fab..00000000
--- a/tools/gennames/gennames.c
+++ /dev/null
@@ -1,1438 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: gennames.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999sep30
-* created by: Markus W. Scherer
-*
-* This program reads the Unicode character database text file,
-* parses it, and extracts the character code,
-* the "modern" character name, and optionally the
-* Unicode 1.0 character name, and (starting with ICU 2.2) the ISO 10646 comment.
-* It then tokenizes and compresses the names and builds
-* compact binary tables for random-access lookup
-* in a u_charName() API function.
-*
-* unames.icu file format (after UDataInfo header etc. - see udata.c)
-* (all data is static const)
-*
-* UDataInfo fields:
-* dataFormat "unam"
-* formatVersion 1.0
-* dataVersion = Unicode version from -u or --unicode command line option, defaults to 3.0.0
-*
-* -- data-based names
-* uint32_t tokenStringOffset,
-* groupsOffset,
-* groupStringOffset,
-* algNamesOffset;
-*
-* uint16_t tokenCount;
-* uint16_t tokenTable[tokenCount];
-*
-* char tokenStrings[]; -- padded to even count
-*
-* -- strings (groupStrings) are tokenized as follows:
-* for each character c
-* if(c>=tokenCount) write that character c directly
-* else
-* token=tokenTable[c];
-* if(token==0xfffe) -- lead byte of double-byte token
-* token=tokenTable[c<<8|next character];
-* if(token==-1)
-* write c directly
-* else
-* tokenString=tokenStrings+token; (tokenStrings=start of names data + tokenStringOffset;)
-* append zero-terminated tokenString;
-*
-* Different strings for a code point - normal name, 1.0 name, and ISO comment -
-* are separated by ';'.
-*
-* uint16_t groupCount;
-* struct {
-* uint16_t groupMSB; -- for a group of 32 character names stored, this is code point>>5
-* uint16_t offsetHigh; -- group strings are at start of names data + groupStringsOffset + this 32 bit-offset
-* uint16_t offsetLow;
-* } groupTable[groupCount];
-*
-* char groupStrings[]; -- padded to 4-count
-*
-* -- The actual, tokenized group strings are not zero-terminated because
-* that would take up too much space.
-* Instead, they are preceeded by their length, written in a variable-length sequence:
-* For each of the 32 group strings, one or two nibbles are stored for its length.
-* Nibbles (4-bit values, half-bytes) are read MSB first.
-* A nibble with a value of 0..11 directly indicates the length of the name string.
-* A nibble n with a value of 12..15 is a lead nibble and forms a value with the following nibble m
-* by (((n-12)<<4)|m)+12, reaching values of 12..75.
-* These lengths are sequentially for each tokenized string, not for the de-tokenized result.
-* For the de-tokenizing, see token description above; the strings immediately follow the
-* 32 lengths.
-*
-* -- algorithmic names
-*
-* typedef struct AlgorithmicRange {
-* uint32_t rangeStart, rangeEnd;
-* uint8_t algorithmType, algorithmVariant;
-* uint16_t rangeSize;
-* } AlgorithmicRange;
-*
-* uint32_t algRangesCount; -- number of data blocks for ranges of
-* algorithmic names (Unicode 3.0.0: 3, hardcoded in gennames)
-*
-* struct {
-* AlgorithmicRange algRange;
-* uint8_t algRangeData[]; -- padded to 4-count except in last range
-* } algRanges[algNamesCount];
-* -- not a real array because each part has a different size
-* of algRange.rangeSize (including AlgorithmicRange)
-*
-* -- algorithmic range types:
-*
-* 0 Names are formed from a string prefix that is stored in
-* the algRangeData (zero-terminated), followed by the Unicode code point
-* of the character in hexadecimal digits;
-* algRange.algorithmVariant digits are written
-*
-* 1 Names are formed by calculating modulo-factors of the code point value as follows:
-* algRange.algorithmVariant is the count of modulo factors
-* algRangeData contains
-* uint16_t factors[algRange.algorithmVariant];
-* char strings[];
-* the first zero-terminated string is written as the prefix; then:
-*
-* The rangeStart is subtracted; with the difference, here "code":
-* for(i=algRange.algorithmVariant-1 to 0 step -1)
-* index[i]=code%factor[i];
-* code/=factor[i];
-*
-* The strings after the prefix are short pieces that are then appended to the result
-* according to index[0..algRange.algorithmVariant-1].
-*/
-
-#include <stdio.h>
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "unicode/uclean.h"
-#include "unicode/udata.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uarrsort.h"
-#include "unewdata.h"
-#include "uoptions.h"
-#include "uparse.h"
-
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
-#define STRING_STORE_SIZE 1000000
-#define GROUP_STORE_SIZE 5000
-
-#define GROUP_SHIFT 5
-#define LINES_PER_GROUP (1UL<<GROUP_SHIFT)
-#define GROUP_MASK (LINES_PER_GROUP-1)
-
-#define MAX_LINE_COUNT 50000
-#define MAX_WORD_COUNT 20000
-#define MAX_GROUP_COUNT 5000
-
-#define DATA_NAME "unames"
-#define DATA_TYPE "icu"
-#define VERSION_STRING "unam"
-#define NAME_SEPARATOR_CHAR ';'
-
-#define ISO_DATA_NAME "ucomment"
-
-/* Unicode versions --------------------------------------------------------- */
-
-enum {
- UNI_1_0,
- UNI_1_1,
- UNI_2_0,
- UNI_3_0,
- UNI_3_1,
- UNI_3_2,
- UNI_4_0,
- UNI_4_0_1,
- UNI_4_1,
- UNI_5_0,
- UNI_5_1,
- UNI_VER_COUNT
-};
-
-static const UVersionInfo
-unicodeVersions[]={
- { 1, 0, 0, 0 },
- { 1, 1, 0, 0 },
- { 2, 0, 0, 0 },
- { 3, 0, 0, 0 },
- { 3, 1, 0, 0 },
- { 3, 2, 0, 0 },
- { 4, 0, 0, 0 },
- { 4, 0, 1, 0 },
- { 4, 1, 0, 0 },
- { 5, 0, 0, 0 },
- { 5, 1, 0, 0 }
-};
-
-static int32_t ucdVersion=UNI_5_1;
-
-static int32_t
-findUnicodeVersion(const UVersionInfo version) {
- int32_t i;
-
- for(i=0; /* while(version>unicodeVersions[i]) {} */
- i<UNI_VER_COUNT && uprv_memcmp(version, unicodeVersions[i], 4)>0;
- ++i) {}
- if(0<i && i<UNI_VER_COUNT && uprv_memcmp(version, unicodeVersions[i], 4)<0) {
- --i; /* fix 4.0.2 to land before 4.1, for valid x>=ucdVersion comparisons */
- }
- return i; /* version>=unicodeVersions[i] && version<unicodeVersions[i+1]; possible: i==UNI_VER_COUNT */
-}
-
-/* generator data ----------------------------------------------------------- */
-
-/* UDataInfo cf. udata.h */
-static UDataInfo dataInfo={
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- sizeof(UChar),
- 0,
-
- {0x75, 0x6e, 0x61, 0x6d}, /* dataFormat="unam" */
- {1, 0, 0, 0}, /* formatVersion */
- {3, 0, 0, 0} /* dataVersion */
-};
-
-static UBool beVerbose=FALSE, beQuiet=FALSE, haveCopyright=TRUE;
-
-typedef struct Options {
- UBool storeNames;
- UBool store10Names;
- UBool storeISOComments;
-} Options;
-
-static uint8_t stringStore[STRING_STORE_SIZE],
- groupStore[GROUP_STORE_SIZE],
- lineLengths[LINES_PER_GROUP];
-
-static uint32_t lineTop=0, groupBottom, wordBottom=STRING_STORE_SIZE, lineLengthsTop;
-
-typedef struct {
- uint32_t code;
- int16_t length;
- uint8_t *s;
-} Line;
-
-typedef struct {
- int32_t weight; /* -(cost for token) + (number of occurences) * (length-1) */
- int16_t count;
- int16_t length;
- uint8_t *s;
-} Word;
-
-static Line lines[MAX_LINE_COUNT];
-static Word words[MAX_WORD_COUNT];
-
-static uint32_t lineCount=0, wordCount=0;
-
-static int16_t leadByteCount;
-
-#define LEADBYTE_LIMIT 16
-
-static int16_t tokens[LEADBYTE_LIMIT*256];
-static uint32_t tokenCount;
-
-/* prototypes --------------------------------------------------------------- */
-
-static void
-init(void);
-
-static void
-parseDB(const char *filename, Options *options);
-
-static void
-parseName(char *name, int16_t length);
-
-static int16_t
-skipNoise(char *line, int16_t start, int16_t limit);
-
-static int16_t
-getWord(char *line, int16_t start, int16_t limit);
-
-static void
-compress(void);
-
-static void
-compressLines(void);
-
-static int16_t
-compressLine(uint8_t *s, int16_t length, int16_t *pGroupTop);
-
-static int32_t
-compareWords(const void *context, const void *word1, const void *word2);
-
-static void
-generateData(const char *dataDir, Options *options);
-
-static uint32_t
-generateAlgorithmicData(UNewDataMemory *pData, Options *options);
-
-static int16_t
-findToken(uint8_t *s, int16_t length);
-
-static Word *
-findWord(char *s, int16_t length);
-
-static Word *
-addWord(char *s, int16_t length);
-
-static void
-countWord(Word *word);
-
-static void
-addLine(uint32_t code, char *names[], int16_t lengths[], int16_t count);
-
-static void
-addGroup(uint32_t groupMSB, uint8_t *strings, int16_t length);
-
-static uint32_t
-addToken(uint8_t *s, int16_t length);
-
-static void
-appendLineLength(int16_t length);
-
-static void
-appendLineLengthNibble(uint8_t nibble);
-
-static uint8_t *
-allocLine(int32_t length);
-
-static uint8_t *
-allocWord(uint32_t length);
-
-/* -------------------------------------------------------------------------- */
-
-enum {
- HELP_H,
- HELP_QUESTION_MARK,
- VERBOSE,
- QUIET,
- COPYRIGHT,
- DESTDIR,
- UNICODE,
- UNICODE1_NAMES,
- NO_ISO_COMMENTS,
- ONLY_ISO_COMMENTS
-};
-
-static UOption options[]={
- UOPTION_HELP_H,
- UOPTION_HELP_QUESTION_MARK,
- UOPTION_VERBOSE,
- UOPTION_QUIET,
- UOPTION_COPYRIGHT,
- UOPTION_DESTDIR,
- { "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
- { "unicode1-names", NULL, NULL, NULL, '1', UOPT_NO_ARG, 0 },
- { "no-iso-comments", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
- { "only-iso-comments", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
-};
-
-extern int
-main(int argc, char* argv[]) {
- UVersionInfo version;
- Options moreOptions={ TRUE, FALSE, TRUE };
- UErrorCode errorCode = U_ZERO_ERROR;
-
- U_MAIN_INIT_ARGS(argc, argv);
-
- /* Initialize ICU */
- u_init(&errorCode);
- if (U_FAILURE(errorCode) && errorCode != U_FILE_ACCESS_ERROR) {
- /* Note: u_init() will try to open ICU property data.
- * failures here are expected when building ICU from scratch.
- * ignore them.
- */
- fprintf(stderr, "%s: can not initialize ICU. errorCode = %s\n",
- argv[0], u_errorName(errorCode));
- exit(1);
- }
-
- /* preset then read command line options */
- options[DESTDIR].value=u_getDataDirectory();
- options[UNICODE].value="4.1";
- argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
-
- /* error handling, printing usage message */
- if(argc<0) {
- fprintf(stderr,
- "error in command line argument \"%s\"\n",
- argv[-argc]);
- } else if(argc<2) {
- argc=-1;
- }
- if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
- /*
- * Broken into chucks because the C89 standard says the minimum
- * required supported string length is 509 bytes.
- */
- fprintf(stderr,
- "Usage: %s [-1[+|-]] [-v[+|-]] [-c[+|-]] filename\n"
- "\n"
- "Read the UnicodeData.txt file and \n"
- "create a binary file " DATA_NAME "." DATA_TYPE " with the character names\n"
- "\n"
- "\tfilename absolute path/filename for the Unicode database text file\n"
- "\t\t(default: standard input)\n"
- "\n",
- argv[0]);
- fprintf(stderr,
- "Options:\n"
- "\t-h or -? or --help this usage text\n"
- "\t-v or --verbose verbose output\n"
- "\t-q or --quiet no output\n"
- "\t-c or --copyright include a copyright notice\n"
- "\t-d or --destdir destination directory, followed by the path\n"
- "\t-u or --unicode Unicode version, followed by the version like 3.0.0\n");
- fprintf(stderr,
- "\t-1 or --unicode1-names store Unicode 1.0 character names\n"
- "\t --no-iso-comments do not store ISO comments\n"
- "\t --only-iso-comments write ucomment.icu with only ISO comments\n");
- return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
- }
-
- /* get the options values */
- beVerbose=options[VERBOSE].doesOccur;
- beQuiet=options[QUIET].doesOccur;
- haveCopyright=options[COPYRIGHT].doesOccur;
- moreOptions.store10Names=options[UNICODE1_NAMES].doesOccur;
- moreOptions.storeISOComments=!options[NO_ISO_COMMENTS].doesOccur;
- if(options[ONLY_ISO_COMMENTS].doesOccur) {
- moreOptions.storeNames=moreOptions.store10Names=FALSE;
- moreOptions.storeISOComments=TRUE;
- }
-
- /* set the Unicode version */
- u_versionFromString(version, options[UNICODE].value);
- uprv_memcpy(dataInfo.dataVersion, version, 4);
- ucdVersion=findUnicodeVersion(version);
-
- init();
- parseDB(argc>=2 ? argv[1] : "-", &moreOptions);
- compress();
- generateData(options[DESTDIR].value, &moreOptions);
-
- u_cleanup();
- return 0;
-}
-
-static void
-init() {
- int i;
-
- for(i=0; i<256; ++i) {
- tokens[i]=0;
- }
-}
-
-/* parsing ------------------------------------------------------------------ */
-
-/* get a name, strip leading and trailing whitespace */
-static int16_t
-getName(char **pStart, char *limit) {
- /* strip leading whitespace */
- char *start=(char *)u_skipWhitespace(*pStart);
-
- /* strip trailing whitespace */
- while(start<limit && (*(limit-1)==' ' || *(limit-1)=='\t')) {
- --limit;
- }
-
- /* return results */
- *pStart=start;
- return (int16_t)(limit-start);
-}
-
-static void U_CALLCONV
-lineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- Options *storeOptions=(Options *)context;
- char *names[3];
- int16_t lengths[3]={ 0, 0, 0 };
- static uint32_t prevCode=0;
- uint32_t code=0;
-
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- /* get the character code */
- code=uprv_strtoul(fields[0][0], NULL, 16);
-
- /* get the character name */
- if(storeOptions->storeNames) {
- names[0]=fields[1][0];
- lengths[0]=getName(names+0, fields[1][1]);
- if(names[0][0]=='<') {
- /* do not store pseudo-names in <> brackets */
- lengths[0]=0;
- }
- }
-
- /* store 1.0 names */
- /* get the second character name, the one from Unicode 1.0 */
- if(storeOptions->store10Names) {
- names[1]=fields[10][0];
- lengths[1]=getName(names+1, fields[10][1]);
- if(names[1][0]=='<') {
- /* do not store pseudo-names in <> brackets */
- lengths[1]=0;
- }
- }
-
- /* get the ISO 10646 comment */
- if(storeOptions->storeISOComments) {
- names[2]=fields[11][0];
- lengths[2]=getName(names+2, fields[11][1]);
- }
-
- if(lengths[0]+lengths[1]+lengths[2]==0) {
- return;
- }
-
- /* check for non-character code points */
- if(!U_IS_UNICODE_CHAR(code)) {
- fprintf(stderr, "gennames: error - properties for non-character code point U+%04lx\n",
- (unsigned long)code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* check that the code points (code) are in ascending order */
- if(code<=prevCode && code>0) {
- fprintf(stderr, "gennames: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n",
- (unsigned long)code, (unsigned long)prevCode);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- prevCode=code;
-
- parseName(names[0], lengths[0]);
- parseName(names[1], lengths[1]);
- parseName(names[2], lengths[2]);
-
- /*
- * set the count argument to
- * 1: only store regular names, or only store ISO 10646 comments
- * 2: store regular and 1.0 names
- * 3: store names and ISO 10646 comment
- *
- * addLine() will ignore empty trailing names
- */
- if(storeOptions->storeNames) {
- /* store names and comments as parsed according to storeOptions */
- addLine(code, names, lengths, 3);
- } else {
- /* store only ISO 10646 comments */
- addLine(code, names+2, lengths+2, 1);
- }
-}
-
-static void
-parseDB(const char *filename, Options *storeOptions) {
- char *fields[15][2];
- UErrorCode errorCode=U_ZERO_ERROR;
-
- u_parseDelimitedFile(filename, ';', fields, 15, lineFn, storeOptions, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gennames parse error: %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
-
- if(!beQuiet) {
- printf("size of all names in the database: %lu\n",
- (unsigned long)lineTop);
- printf("number of named Unicode characters: %lu\n",
- (unsigned long)lineCount);
- printf("number of words in the dictionary from these names: %lu\n",
- (unsigned long)wordCount);
- }
-}
-
-static void
-parseName(char *name, int16_t length) {
- int16_t start=0, limit, wordLength/*, prevStart=-1*/;
- Word *word;
-
- while(start<length) {
- /* skip any "noise" characters */
- limit=skipNoise(name, start, length);
- if(start<limit) {
- /*prevStart=-1;*/
- start=limit;
- }
- if(start==length) {
- break;
- }
-
- /* get a word and add it if it is longer than 1 */
- limit=getWord(name, start, length);
- wordLength=(int16_t)(limit-start);
- if(wordLength>1) {
- word=findWord(name+start, wordLength);
- if(word==NULL) {
- word=addWord(name+start, wordLength);
- }
- countWord(word);
- }
-
-#if 0
- /*
- * if there was a word before this
- * (with no noise in between), then add the pair of words, too
- */
- if(prevStart!=-1) {
- wordLength=limit-prevStart;
- word=findWord(name+prevStart, wordLength);
- if(word==NULL) {
- word=addWord(name+prevStart, wordLength);
- }
- countWord(word);
- }
-#endif
-
- /*prevStart=start;*/
- start=limit;
- }
-}
-
-static UBool U_INLINE
-isWordChar(char c) {
- return ('A'<=c && c<='I') || /* EBCDIC-safe check for letters */
- ('J'<=c && c<='R') ||
- ('S'<=c && c<='Z') ||
-
- ('a'<=c && c<='i') || /* lowercase letters for ISO comments */
- ('j'<=c && c<='r') ||
- ('s'<=c && c<='z') ||
-
- ('0'<=c && c<='9');
-}
-
-static int16_t
-skipNoise(char *line, int16_t start, int16_t limit) {
- /* skip anything that is not part of a word in this sense */
- while(start<limit && !isWordChar(line[start])) {
- ++start;
- }
-
- return start;
-}
-
-static int16_t
-getWord(char *line, int16_t start, int16_t limit) {
- char c=0; /* initialize to avoid a compiler warning although the code was safe */
-
- /* a unicode character name word consists of A-Z0-9 */
- while(start<limit && isWordChar(line[start])) {
- ++start;
- }
-
- /* include a following space or dash */
- if(start<limit && ((c=line[start])==' ' || c=='-')) {
- ++start;
- }
-
- return start;
-}
-
-/* compressing -------------------------------------------------------------- */
-
-static void
-compress() {
- uint32_t i, letterCount;
- int16_t wordNumber;
- UErrorCode errorCode;
-
- /* sort the words in reverse order by weight */
- errorCode=U_ZERO_ERROR;
- uprv_sortArray(words, wordCount, sizeof(Word),
- compareWords, NULL, FALSE, &errorCode);
-
- /* remove the words that do not save anything */
- while(wordCount>0 && words[wordCount-1].weight<1) {
- --wordCount;
- }
-
- /* count the letters in the token range */
- letterCount=0;
- for(i=LEADBYTE_LIMIT; i<256; ++i) {
- if(tokens[i]==-1) {
- ++letterCount;
- }
- }
- if(!beQuiet) {
- printf("number of letters used in the names: %d\n", (int)letterCount);
- }
-
- /* do we need double-byte tokens? */
- if(wordCount+letterCount<=256) {
- /* no, single-byte tokens are enough */
- leadByteCount=0;
- for(i=0, wordNumber=0; wordNumber<(int16_t)wordCount; ++i) {
- if(tokens[i]!=-1) {
- tokens[i]=wordNumber;
- if(beVerbose) {
- printf("tokens[0x%03x]: word%8ld \"%.*s\"\n",
- (int)i, (long)words[wordNumber].weight,
- words[wordNumber].length, words[wordNumber].s);
- }
- ++wordNumber;
- }
- }
- tokenCount=i;
- } else {
- /*
- * The tokens that need two token bytes
- * get their weight reduced by their count
- * because they save less.
- */
- tokenCount=256-letterCount;
- for(i=tokenCount; i<wordCount; ++i) {
- words[i].weight-=words[i].count;
- }
-
- /* sort these words in reverse order by weight */
- errorCode=U_ZERO_ERROR;
- uprv_sortArray(words+tokenCount, wordCount-tokenCount, sizeof(Word),
- compareWords, NULL, FALSE, &errorCode);
-
- /* remove the words that do not save anything */
- while(wordCount>0 && words[wordCount-1].weight<1) {
- --wordCount;
- }
-
- /* how many tokens and lead bytes do we have now? */
- tokenCount=wordCount+letterCount+(LEADBYTE_LIMIT-1);
- /*
- * adjust upwards to take into account that
- * double-byte tokens must not
- * use NAME_SEPARATOR_CHAR as a second byte
- */
- tokenCount+=(tokenCount-256+254)/255;
-
- leadByteCount=(int16_t)(tokenCount>>8);
- if(leadByteCount<LEADBYTE_LIMIT) {
- /* adjust for the real number of lead bytes */
- tokenCount-=(LEADBYTE_LIMIT-1)-leadByteCount;
- } else {
- /* limit the number of lead bytes */
- leadByteCount=LEADBYTE_LIMIT-1;
- tokenCount=LEADBYTE_LIMIT*256;
- wordCount=tokenCount-letterCount-(LEADBYTE_LIMIT-1);
- /* adjust again to skip double-byte tokens with ';' */
- wordCount-=(tokenCount-256+254)/255;
- }
-
- /* set token 0 to word 0 */
- tokens[0]=0;
- if(beVerbose) {
- printf("tokens[0x000]: word%8ld \"%.*s\"\n",
- (long)words[0].weight,
- words[0].length, words[0].s);
- }
- wordNumber=1;
-
- /* set the lead byte tokens */
- for(i=1; (int16_t)i<=leadByteCount; ++i) {
- tokens[i]=-2;
- }
-
- /* set the tokens */
- for(; i<256; ++i) {
- /* if store10Names then the parser set tokens[NAME_SEPARATOR_CHAR]=-1 */
- if(tokens[i]!=-1) {
- tokens[i]=wordNumber;
- if(beVerbose) {
- printf("tokens[0x%03x]: word%8ld \"%.*s\"\n",
- (int)i, (long)words[wordNumber].weight,
- words[wordNumber].length, words[wordNumber].s);
- }
- ++wordNumber;
- }
- }
-
- /* continue above 255 where there are no letters */
- for(; (uint32_t)wordNumber<wordCount; ++i) {
- if((i&0xff)==NAME_SEPARATOR_CHAR) {
- tokens[i]=-1; /* do not use NAME_SEPARATOR_CHAR as a second token byte */
- } else {
- tokens[i]=wordNumber;
- if(beVerbose) {
- printf("tokens[0x%03x]: word%8ld \"%.*s\"\n",
- (int)i, (long)words[wordNumber].weight,
- words[wordNumber].length, words[wordNumber].s);
- }
- ++wordNumber;
- }
- }
- tokenCount=i; /* should be already tokenCount={i or i+1} */
- }
-
- if(!beQuiet) {
- printf("number of lead bytes: %d\n", leadByteCount);
- printf("number of single-byte tokens: %lu\n",
- (unsigned long)256-letterCount-leadByteCount);
- printf("number of tokens: %lu\n", (unsigned long)tokenCount);
- }
-
- compressLines();
-}
-
-static void
-compressLines() {
- Line *line=NULL;
- uint32_t i=0, inLine, outLine=0xffffffff /* (uint32_t)(-1) */,
- groupMSB=0xffff, lineCount2;
- int16_t groupTop=0;
-
- /* store the groups like lines, with compressed data after raw strings */
- groupBottom=lineTop;
- lineCount2=lineCount;
- lineCount=0;
-
- /* loop over all lines */
- while(i<lineCount2) {
- line=lines+i++;
- inLine=line->code;
-
- /* segment the lines to groups of 32 */
- if(inLine>>GROUP_SHIFT!=groupMSB) {
- /* finish the current group with empty lines */
- while((++outLine&GROUP_MASK)!=0) {
- appendLineLength(0);
- }
-
- /* store the group like a line */
- if(groupTop>0) {
- if(groupTop>GROUP_STORE_SIZE) {
- fprintf(stderr, "gennames: group store overflow\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
- addGroup(groupMSB, groupStore, groupTop);
- }
-
- /* start the new group */
- lineLengthsTop=0;
- groupTop=0;
- groupMSB=inLine>>GROUP_SHIFT;
- outLine=(inLine&~GROUP_MASK)-1;
- }
-
- /* write empty lines between the previous line in the group and this one */
- while(++outLine<inLine) {
- appendLineLength(0);
- }
-
- /* write characters and tokens for this line */
- appendLineLength(compressLine(line->s, line->length, &groupTop));
- }
-
- /* finish and store the last group */
- if(line && groupMSB!=0xffff) {
- /* finish the current group with empty lines */
- while((++outLine&GROUP_MASK)!=0) {
- appendLineLength(0);
- }
-
- /* store the group like a line */
- if(groupTop>0) {
- if(groupTop>GROUP_STORE_SIZE) {
- fprintf(stderr, "gennames: group store overflow\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
- addGroup(groupMSB, groupStore, groupTop);
- }
- }
-
- if(!beQuiet) {
- printf("number of groups: %lu\n", (unsigned long)lineCount);
- }
-}
-
-static int16_t
-compressLine(uint8_t *s, int16_t length, int16_t *pGroupTop) {
- int16_t start, limit, token, groupTop=*pGroupTop;
-
- start=0;
- do {
- /* write any "noise" characters */
- limit=skipNoise((char *)s, start, length);
- while(start<limit) {
- groupStore[groupTop++]=s[start++];
- }
-
- if(start==length) {
- break;
- }
-
- /* write a word, as token or directly */
- limit=getWord((char *)s, start, length);
- if(limit-start==1) {
- groupStore[groupTop++]=s[start++];
- } else {
- token=findToken(s+start, (int16_t)(limit-start));
- if(token!=-1) {
- if(token>0xff) {
- groupStore[groupTop++]=(uint8_t)(token>>8);
- }
- groupStore[groupTop++]=(uint8_t)token;
- start=limit;
- } else {
- while(start<limit) {
- groupStore[groupTop++]=s[start++];
- }
- }
- }
- } while(start<length);
-
- length=(int16_t)(groupTop-*pGroupTop);
- *pGroupTop=groupTop;
- return length;
-}
-
-static int32_t
-compareWords(const void *context, const void *word1, const void *word2) {
- /* reverse sort by word weight */
- return ((Word *)word2)->weight-((Word *)word1)->weight;
-}
-
-/* generate output data ----------------------------------------------------- */
-
-static void
-generateData(const char *dataDir, Options *storeOptions) {
- UNewDataMemory *pData;
- UErrorCode errorCode=U_ZERO_ERROR;
- uint16_t groupWords[3];
- uint32_t i, groupTop=lineTop, offset, size,
- tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
- long dataLength;
- int16_t token;
-
- pData=udata_create(dataDir,
- DATA_TYPE, storeOptions->storeNames ? DATA_NAME : ISO_DATA_NAME,
- &dataInfo,
- haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gennames: unable to create data memory, error %d\n", errorCode);
- exit(errorCode);
- }
-
- /* first, see how much space we need, and prepare the token strings */
- for(i=0; i<tokenCount; ++i) {
- token=tokens[i];
- if(token!=-1 && token!=-2) {
- tokens[i]=(int16_t)(addToken(words[token].s, words[token].length)-groupTop);
- }
- }
-
- /*
- * Required padding for data swapping:
- * The token table undergoes a permutation during data swapping when the
- * input and output charsets are different.
- * The token table cannot grow during swapping, so we need to make sure that
- * the table is long enough for successful in-place permutation.
- *
- * We simply round up tokenCount to the next multiple of 256 to account for
- * all possible permutations.
- *
- * An optimization is possible if we only ever swap between ASCII and EBCDIC:
- *
- * If tokenCount>256, then a semicolon (NAME_SEPARATOR_CHAR) is used
- * and will be swapped between ASCII and EBCDIC between
- * positions 0x3b (ASCII semicolon) and 0x5e (EBCDIC semicolon).
- * This should be the only -1 entry in tokens[256..511] on which the data
- * swapper bases its trail byte permutation map (trailMap[]).
- *
- * It would be sufficient to increase tokenCount so that its lower 8 bits
- * are at least 0x5e+1 to make room for swapping between the two semicolons.
- * For values higher than 0x5e, the trail byte permutation map (trailMap[])
- * should always be an identity map, where we do not need additional room.
- */
- i=tokenCount;
- tokenCount=(tokenCount+0xff)&~0xff;
- if(!beQuiet && i<tokenCount) {
- printf("number of tokens[] padding entries for data swapping: %lu\n", (unsigned long)(tokenCount-i));
- }
- for(; i<tokenCount; ++i) {
- if((i&0xff)==NAME_SEPARATOR_CHAR) {
- tokens[i]=-1; /* do not use NAME_SEPARATOR_CHAR as a second token byte */
- } else {
- tokens[i]=0; /* unused token for padding */
- }
- }
-
- /*
- * Calculate the total size in bytes of the data including:
- * - the offset to the token strings, uint32_t (4)
- * - the offset to the group table, uint32_t (4)
- * - the offset to the group strings, uint32_t (4)
- * - the offset to the algorithmic names, uint32_t (4)
- *
- * - the number of tokens, uint16_t (2)
- * - the token table, uint16_t[tokenCount] (2*tokenCount)
- *
- * - the token strings, each zero-terminated (tokenSize=(lineTop-groupTop)), 2-padded
- *
- * - the number of groups, uint16_t (2)
- * - the group table, { uint16_t groupMSB, uint16_t offsetHigh, uint16_t offsetLow }[6*groupCount]
- *
- * - the group strings (groupTop-groupBottom), 2-padded
- *
- * - the size of the data for the algorithmic names
- */
- tokenStringOffset=4+4+4+4+2+2*tokenCount;
- groupsOffset=(tokenStringOffset+(lineTop-groupTop)+1)&~1;
- groupStringOffset=groupsOffset+2+6*lineCount;
- algNamesOffset=(groupStringOffset+(groupTop-groupBottom)+3)&~3;
-
- offset=generateAlgorithmicData(NULL, storeOptions);
- size=algNamesOffset+offset;
-
- if(!beQuiet) {
- printf("size of the Unicode Names data:\n"
- "total data length %lu, token strings %lu, compressed strings %lu, algorithmic names %lu\n",
- (unsigned long)size, (unsigned long)(lineTop-groupTop),
- (unsigned long)(groupTop-groupBottom), (unsigned long)offset);
- }
-
- /* write the data to the file */
- /* offsets */
- udata_write32(pData, tokenStringOffset);
- udata_write32(pData, groupsOffset);
- udata_write32(pData, groupStringOffset);
- udata_write32(pData, algNamesOffset);
-
- /* token table */
- udata_write16(pData, (uint16_t)tokenCount);
- udata_writeBlock(pData, tokens, 2*tokenCount);
-
- /* token strings */
- udata_writeBlock(pData, stringStore+groupTop, lineTop-groupTop);
- if((lineTop-groupTop)&1) {
- /* 2-padding */
- udata_writePadding(pData, 1);
- }
-
- /* group table */
- udata_write16(pData, (uint16_t)lineCount);
- for(i=0; i<lineCount; ++i) {
- /* groupMSB */
- groupWords[0]=(uint16_t)lines[i].code;
-
- /* offset */
- offset = (uint32_t)((lines[i].s - stringStore)-groupBottom);
- groupWords[1]=(uint16_t)(offset>>16);
- groupWords[2]=(uint16_t)(offset);
- udata_writeBlock(pData, groupWords, 6);
- }
-
- /* group strings */
- udata_writeBlock(pData, stringStore+groupBottom, groupTop-groupBottom);
-
- /* 4-align the algorithmic names data */
- udata_writePadding(pData, algNamesOffset-(groupStringOffset+(groupTop-groupBottom)));
-
- generateAlgorithmicData(pData, storeOptions);
-
- /* finish up */
- dataLength=udata_finish(pData, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gennames: error %d writing the output file\n", errorCode);
- exit(errorCode);
- }
-
- if(dataLength!=(long)size) {
- fprintf(stderr, "gennames: data length %ld != calculated size %lu\n",
-dataLength, (unsigned long)size);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
-}
-
-/* the structure for algorithmic names needs to be 4-aligned */
-typedef struct AlgorithmicRange {
- uint32_t rangeStart, rangeEnd;
- uint8_t algorithmType, algorithmVariant;
- uint16_t rangeSize;
-} AlgorithmicRange;
-
-static uint32_t
-generateAlgorithmicData(UNewDataMemory *pData, Options *storeOptions) {
- static char prefix[] = "CJK UNIFIED IDEOGRAPH-";
-# define PREFIX_LENGTH 23
-# define PREFIX_LENGTH_4 24
- uint32_t countAlgRanges;
-
- static AlgorithmicRange cjkExtA={
- 0x3400, 0x4db5,
- 0, 4,
- sizeof(AlgorithmicRange)+PREFIX_LENGTH_4
- };
- static AlgorithmicRange cjk={
- 0x4e00, 0x9fa5,
- 0, 4,
- sizeof(AlgorithmicRange)+PREFIX_LENGTH_4
- };
- static AlgorithmicRange cjkExtB={
- 0x20000, 0x2a6d6,
- 0, 5,
- sizeof(AlgorithmicRange)+PREFIX_LENGTH_4
- };
-
- static char jamo[]=
- "HANGUL SYLLABLE \0"
-
- "G\0GG\0N\0D\0DD\0R\0M\0B\0BB\0"
- "S\0SS\0\0J\0JJ\0C\0K\0T\0P\0H\0"
-
- "A\0AE\0YA\0YAE\0EO\0E\0YEO\0YE\0O\0"
- "WA\0WAE\0OE\0YO\0U\0WEO\0WE\0WI\0"
- "YU\0EU\0YI\0I\0"
-
- "\0G\0GG\0GS\0N\0NJ\0NH\0D\0L\0LG\0LM\0"
- "LB\0LS\0LT\0LP\0LH\0M\0B\0BS\0"
- "S\0SS\0NG\0J\0C\0K\0T\0P\0H"
- ;
-
- static AlgorithmicRange hangul={
- 0xac00, 0xd7a3,
- 1, 3,
- sizeof(AlgorithmicRange)+6+sizeof(jamo)
- };
-
- /* modulo factors, maximum 8 */
- /* 3 factors: 19, 21, 28, most-to-least-significant */
- static uint16_t hangulFactors[3]={
- 19, 21, 28
- };
-
- uint32_t size;
-
- size=0;
-
- if(ucdVersion>=UNI_5_1) {
- /* Unicode 5.1 and up has a longer CJK Unihan range than before */
- cjk.rangeEnd=0x9FC3;
- } else if(ucdVersion>=UNI_4_1) {
- /* Unicode 4.1 and up has a longer CJK Unihan range than before */
- cjk.rangeEnd=0x9FBB;
- }
-
- /* number of ranges of algorithmic names */
- if(!storeOptions->storeNames) {
- countAlgRanges=0;
- } else if(ucdVersion>=UNI_3_1) {
- /* Unicode 3.1 and up has 4 ranges including CJK Extension B */
- countAlgRanges=4;
- } else if(ucdVersion>=UNI_3_0) {
- /* Unicode 3.0 has 3 ranges including CJK Extension A */
- countAlgRanges=3;
- } else {
- /* Unicode 2.0 has 2 ranges including Hangul and CJK Unihan */
- countAlgRanges=2;
- }
-
- if(pData!=NULL) {
- udata_write32(pData, countAlgRanges);
- } else {
- size+=4;
- }
- if(countAlgRanges==0) {
- return size;
- }
-
- /*
- * each range:
- * uint32_t rangeStart
- * uint32_t rangeEnd
- * uint8_t algorithmType
- * uint8_t algorithmVariant
- * uint16_t size of range data
- * uint8_t[size] data
- */
-
- /* range 0: cjk extension a */
- if(countAlgRanges>=3) {
- if(pData!=NULL) {
- udata_writeBlock(pData, &cjkExtA, sizeof(AlgorithmicRange));
- udata_writeString(pData, prefix, PREFIX_LENGTH);
- if(PREFIX_LENGTH<PREFIX_LENGTH_4) {
- udata_writePadding(pData, PREFIX_LENGTH_4-PREFIX_LENGTH);
- }
- } else {
- size+=sizeof(AlgorithmicRange)+PREFIX_LENGTH_4;
- }
- }
-
- /* range 1: cjk */
- if(pData!=NULL) {
- udata_writeBlock(pData, &cjk, sizeof(AlgorithmicRange));
- udata_writeString(pData, prefix, PREFIX_LENGTH);
- if(PREFIX_LENGTH<PREFIX_LENGTH_4) {
- udata_writePadding(pData, PREFIX_LENGTH_4-PREFIX_LENGTH);
- }
- } else {
- size+=sizeof(AlgorithmicRange)+PREFIX_LENGTH_4;
- }
-
- /* range 2: hangul syllables */
- if(pData!=NULL) {
- udata_writeBlock(pData, &hangul, sizeof(AlgorithmicRange));
- udata_writeBlock(pData, hangulFactors, 6);
- udata_writeString(pData, jamo, sizeof(jamo));
- } else {
- size+=sizeof(AlgorithmicRange)+6+sizeof(jamo);
- }
-
- /* range 3: cjk extension b */
- if(countAlgRanges>=4) {
- if(pData!=NULL) {
- udata_writeBlock(pData, &cjkExtB, sizeof(AlgorithmicRange));
- udata_writeString(pData, prefix, PREFIX_LENGTH);
- if(PREFIX_LENGTH<PREFIX_LENGTH_4) {
- udata_writePadding(pData, PREFIX_LENGTH_4-PREFIX_LENGTH);
- }
- } else {
- size+=sizeof(AlgorithmicRange)+PREFIX_LENGTH_4;
- }
- }
-
- return size;
-}
-
-/* helpers ------------------------------------------------------------------ */
-
-static int16_t
-findToken(uint8_t *s, int16_t length) {
- int16_t i, token;
-
- for(i=0; i<(int16_t)tokenCount; ++i) {
- token=tokens[i];
- if(token>=0 && length==words[token].length && 0==uprv_memcmp(s, words[token].s, length)) {
- return i;
- }
- }
-
- return -1;
-}
-
-static Word *
-findWord(char *s, int16_t length) {
- uint32_t i;
-
- for(i=0; i<wordCount; ++i) {
- if(length==words[i].length && 0==uprv_memcmp(s, words[i].s, length)) {
- return words+i;
- }
- }
-
- return NULL;
-}
-
-static Word *
-addWord(char *s, int16_t length) {
- uint8_t *stringStart;
- Word *word;
-
- if(wordCount==MAX_WORD_COUNT) {
- fprintf(stderr, "gennames: too many words\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-
- stringStart=allocWord(length);
- uprv_memcpy(stringStart, s, length);
-
- word=words+wordCount;
-
- /*
- * Initialize the weight with the costs for this token:
- * a zero-terminated string and a 16-bit offset.
- */
- word->weight=-(length+1+2);
- word->count=0;
- word->length=length;
- word->s=stringStart;
-
- ++wordCount;
-
- return word;
-}
-
-static void
-countWord(Word *word) {
- /* add to the weight the savings: the length of the word minus 1 byte for the token */
- word->weight+=word->length-1;
- ++word->count;
-}
-
-static void
-addLine(uint32_t code, char *names[], int16_t lengths[], int16_t count) {
- uint8_t *stringStart;
- Line *line;
- int16_t i, length;
-
- if(lineCount==MAX_LINE_COUNT) {
- fprintf(stderr, "gennames: too many lines\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-
- /* find the last non-empty name */
- while(count>0 && lengths[count-1]==0) {
- --count;
- }
- if(count==0) {
- return; /* should not occur: caller should not have called */
- }
-
- /* there will be (count-1) separator characters */
- i=count;
- length=count-1;
-
- /* add lengths of strings */
- while(i>0) {
- length+=lengths[--i];
- }
-
- /* allocate line memory */
- stringStart=allocLine(length);
-
- /* copy all strings into the line memory */
- length=0; /* number of chars copied so far */
- for(i=0; i<count; ++i) {
- if(i>0) {
- stringStart[length++]=NAME_SEPARATOR_CHAR;
- }
- if(lengths[i]>0) {
- uprv_memcpy(stringStart+length, names[i], lengths[i]);
- length+=lengths[i];
- }
- }
-
- line=lines+lineCount;
-
- line->code=code;
- line->length=length;
- line->s=stringStart;
-
- ++lineCount;
-
- /* prevent a character value that is actually in a name from becoming a token */
- while(length>0) {
- tokens[stringStart[--length]]=-1;
- }
-}
-
-static void
-addGroup(uint32_t groupMSB, uint8_t *strings, int16_t length) {
- uint8_t *stringStart;
- Line *line;
-
- if(lineCount==MAX_LINE_COUNT) {
- fprintf(stderr, "gennames: too many groups\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-
- /* store the line lengths first, then the strings */
- lineLengthsTop=(lineLengthsTop+1)/2;
- stringStart=allocLine(lineLengthsTop+length);
- uprv_memcpy(stringStart, lineLengths, lineLengthsTop);
- uprv_memcpy(stringStart+lineLengthsTop, strings, length);
-
- line=lines+lineCount;
-
- line->code=groupMSB;
- line->length=length;
- line->s=stringStart;
-
- ++lineCount;
-}
-
-static uint32_t
-addToken(uint8_t *s, int16_t length) {
- uint8_t *stringStart;
-
- stringStart=allocLine(length+1);
- uprv_memcpy(stringStart, s, length);
- stringStart[length]=0;
-
- return (uint32_t)(stringStart - stringStore);
-}
-
-static void
-appendLineLength(int16_t length) {
- if(length>=76) {
- fprintf(stderr, "gennames: compressed line too long\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
- if(length>=12) {
- length-=12;
- appendLineLengthNibble((uint8_t)((length>>4)|12));
- }
- appendLineLengthNibble((uint8_t)length);
-}
-
-static void
-appendLineLengthNibble(uint8_t nibble) {
- if((lineLengthsTop&1)==0) {
- lineLengths[lineLengthsTop/2]=(uint8_t)(nibble<<4);
- } else {
- lineLengths[lineLengthsTop/2]|=nibble&0xf;
- }
- ++lineLengthsTop;
-}
-
-static uint8_t *
-allocLine(int32_t length) {
- uint32_t top=lineTop+length;
- uint8_t *p;
-
- if(top>wordBottom) {
- fprintf(stderr, "gennames: out of memory\n");
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
- p=stringStore+lineTop;
- lineTop=top;
- return p;
-}
-
-static uint8_t *
-allocWord(uint32_t length) {
- uint32_t bottom=wordBottom-length;
-
- if(lineTop>bottom) {
- fprintf(stderr, "gennames: out of memory\n");
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
- wordBottom=bottom;
- return stringStore+bottom;
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/gennorm/Makefile.in b/tools/gennorm/Makefile.in
deleted file mode 100644
index baec6859..00000000
--- a/tools/gennorm/Makefile.in
+++ /dev/null
@@ -1,97 +0,0 @@
-## Makefile.in for ICU - tools/gennorm
-## Copyright (c) 2001-2005, International Business Machines Corporation and
-## others. All Rights Reserved.
-## Steven R. Loomis/Markus W. Scherer
-
-## Source directory information
-srcdir = @srcdir@
-top_srcdir = @top_srcdir@
-
-top_builddir = ../..
-
-include $(top_builddir)/icudefs.mk
-
-## Build directory information
-subdir = tools/gennorm
-
-TARGET_STUB_NAME = gennorm
-
-SECTION = 8
-
-#MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
-
-
-## Extra files to remove for 'make clean'
-CLEANFILES = *~ $(DEPS) $(MAN_FILES)
-
-## Target information
-TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
-
-ifneq ($(top_builddir),$(top_srcdir))
-CPPFLAGS += -I$(top_builddir)/common
-endif
-CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
-LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
-
-OBJECTS = gennorm.o store.o
-
-DEPS = $(OBJECTS:.o=.d)
-
-## List of phony targets
-.PHONY : all all-local install install-local clean clean-local \
-distclean distclean-local dist dist-local check check-local install-man
-
-## Clear suffix list
-.SUFFIXES :
-
-## List of standard targets
-all: all-local
-install: install-local
-clean: clean-local
-distclean : distclean-local
-dist: dist-local
-check: all check-local
-
-all-local: $(TARGET) $(MAN_FILES)
-
-install-local: all-local install-man
-# $(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
-# $(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
-
-install-man: $(MAN_FILES)
-# $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
-# $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
-
-dist-local:
-
-clean-local:
- test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
- $(RMV) $(TARGET) $(OBJECTS)
-
-distclean-local: clean-local
- $(RMV) Makefile
-
-check-local: all-local
-
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-$(TARGET) : $(OBJECTS)
- $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
- $(POST_BUILD_STEP)
-
-
-%.$(SECTION): $(srcdir)/%.$(SECTION).in
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-
-ifeq (,$(MAKECMDGOALS))
--include $(DEPS)
-else
-ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
--include $(DEPS)
-endif
-endif
-
diff --git a/tools/gennorm/gennorm.c b/tools/gennorm/gennorm.c
deleted file mode 100644
index 69a12e30..00000000
--- a/tools/gennorm/gennorm.c
+++ /dev/null
@@ -1,561 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2001-2005, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: gennorm.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001may25
-* created by: Markus W. Scherer
-*
-* This program reads the Unicode character database text file,
-* parses it, and extracts the data for normalization.
-* It then preprocesses it and writes a binary file for efficient use
-* in various Unicode text normalization processes.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/ustring.h"
-#include "unicode/putil.h"
-#include "unicode/uclean.h"
-#include "unicode/udata.h"
-#include "unicode/uset.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "unewdata.h"
-#include "uoptions.h"
-#include "uparse.h"
-#include "unormimp.h"
-
-U_CDECL_BEGIN
-#include "gennorm.h"
-U_CDECL_END
-
-UBool beVerbose=FALSE, haveCopyright=TRUE;
-
-/* prototypes --------------------------------------------------------------- */
-
-static void
-parseDerivedNormalizationProperties(const char *filename, UErrorCode *pErrorCode, UBool reportError);
-
-static void
-parseDB(const char *filename, UErrorCode *pErrorCode);
-
-/* -------------------------------------------------------------------------- */
-
-enum {
- HELP_H,
- HELP_QUESTION_MARK,
- VERBOSE,
- COPYRIGHT,
- DESTDIR,
- SOURCEDIR,
- UNICODE_VERSION,
- ICUDATADIR,
- CSOURCE,
- STORE_FLAGS
-};
-
-static UOption options[]={
- UOPTION_HELP_H,
- UOPTION_HELP_QUESTION_MARK,
- UOPTION_VERBOSE,
- UOPTION_COPYRIGHT,
- UOPTION_DESTDIR,
- UOPTION_SOURCEDIR,
- UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
- UOPTION_ICUDATADIR,
- UOPTION_DEF("csource", 'C', UOPT_NO_ARG),
- UOPTION_DEF("prune", 'p', UOPT_REQUIRES_ARG)
-};
-
-extern int
-main(int argc, char* argv[]) {
-#if !UCONFIG_NO_NORMALIZATION
- char filename[300];
-#endif
- const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
- char *basename=NULL;
- UErrorCode errorCode=U_ZERO_ERROR;
-
- U_MAIN_INIT_ARGS(argc, argv);
-
- /* preset then read command line options */
- options[4].value=u_getDataDirectory();
- options[5].value="";
- options[6].value="3.0.0";
- options[ICUDATADIR].value=u_getDataDirectory();
- argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
-
- /* error handling, printing usage message */
- if(argc<0) {
- fprintf(stderr,
- "error in command line argument \"%s\"\n",
- argv[-argc]);
- }
- if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
- /*
- * Broken into chucks because the C89 standard says the minimum
- * required supported string length is 509 bytes.
- */
- fprintf(stderr,
- "Usage: %s [-options] [suffix]\n"
- "\n"
- "Read the UnicodeData.txt file and other Unicode properties files and\n"
- "create a binary file " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE " with the normalization data\n"
- "\n",
- argv[0]);
- fprintf(stderr,
- "Options:\n"
- "\t-h or -? or --help this usage text\n"
- "\t-v or --verbose verbose output\n"
- "\t-c or --copyright include a copyright notice\n"
- "\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"
- "\t-C or --csource generate a .c source file rather than the .icu binary\n");
- fprintf(stderr,
- "\t-p or --prune flags Prune for data modularization:\n"
- "\t Determine what data is to be stored.\n"
- "\t 0 (zero) stores minimal data (only for NFD)\n"
- "\t lowercase letters turn off data, uppercase turn on (use with 0)\n");
- fprintf(stderr,
- "\t k: compatibility decompositions (NFKC, NFKD)\n"
- "\t c: composition data (NFC, NFKC)\n"
- "\t f: FCD data (will be generated at load time)\n"
- "\t a: auxiliary data (canonical closure etc.)\n"
- "\t x: exclusion sets (Unicode 3.2-level normalization)\n");
- fprintf(stderr,
- "\t-d or --destdir destination directory, followed by the path\n"
- "\t-s or --sourcedir source directory, followed by the path\n"
- "\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
- "\t followed by path, defaults to <%s>\n"
- "\tsuffix suffix that is to be appended with a '-'\n"
- "\t to the source file basenames before opening;\n"
- "\t 'gennorm new' will read UnicodeData-new.txt etc.\n",
- u_getDataDirectory());
- return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
- }
-
- /* get the options values */
- beVerbose=options[2].doesOccur;
- haveCopyright=options[3].doesOccur;
- srcDir=options[5].value;
- destDir=options[4].value;
-
- if(argc>=2) {
- suffix=argv[1];
- } else {
- suffix=NULL;
- }
-
-#if UCONFIG_NO_NORMALIZATION
-
- fprintf(stderr,
- "gennorm writes a dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE
- " because UCONFIG_NO_NORMALIZATION is set, \n"
- "see icu/source/common/unicode/uconfig.h\n");
- generateData(destDir, options[CSOURCE].doesOccur);
-
-#else
-
- setUnicodeVersion(options[6].value);
-
- if (options[ICUDATADIR].doesOccur) {
- u_setDataDirectory(options[ICUDATADIR].value);
- }
-
- if(options[STORE_FLAGS].doesOccur) {
- const char *s=options[STORE_FLAGS].value;
- char c;
-
- while((c=*s++)!=0) {
- switch(c) {
- case '0':
- gStoreFlags=0; /* store minimal data (only for NFD) */
- break;
-
- /* lowercase letters: omit data */
- case 'k':
- gStoreFlags&=~U_MASK(UGENNORM_STORE_COMPAT);
- break;
- case 'c':
- gStoreFlags&=~U_MASK(UGENNORM_STORE_COMPOSITION);
- break;
- case 'f':
- gStoreFlags&=~U_MASK(UGENNORM_STORE_FCD);
- break;
- case 'a':
- gStoreFlags&=~U_MASK(UGENNORM_STORE_AUX);
- break;
- case 'x':
- gStoreFlags&=~U_MASK(UGENNORM_STORE_EXCLUSIONS);
- break;
-
- /* uppercase letters: include data (use with 0) */
- case 'K':
- gStoreFlags|=U_MASK(UGENNORM_STORE_COMPAT);
- break;
- case 'C':
- gStoreFlags|=U_MASK(UGENNORM_STORE_COMPOSITION);
- break;
- case 'F':
- gStoreFlags|=U_MASK(UGENNORM_STORE_FCD);
- break;
- case 'A':
- gStoreFlags|=U_MASK(UGENNORM_STORE_AUX);
- break;
- case 'X':
- gStoreFlags|=U_MASK(UGENNORM_STORE_EXCLUSIONS);
- break;
-
- default:
- fprintf(stderr, "ignoring undefined prune flag '%c'\n", c);
- break;
- }
- }
- }
-
- /*
- * Verify that we can work with properties
- * but don't call u_init() because that needs unorm.icu which we are just
- * going to build here.
- */
- {
- U_STRING_DECL(ideo, "[:Ideographic:]", 15);
- USet *set;
-
- U_STRING_INIT(ideo, "[:Ideographic:]", 15);
- set=uset_openPattern(ideo, -1, &errorCode);
- if(U_FAILURE(errorCode) || !uset_contains(set, 0xf900)) {
- fprintf(stderr, "gennorm is unable to work with properties (uprops.icu): %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
- uset_close(set);
- }
-
- /* prepare the filename beginning with the source dir */
- uprv_strcpy(filename, srcDir);
- basename=filename+uprv_strlen(filename);
- if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
- *basename++=U_FILE_SEP_CHAR;
- }
-
- /* initialize */
- init();
-
- /* process DerivedNormalizationProps.txt (name changed for Unicode 3.2, to <=31 characters) */
- if(suffix==NULL) {
- uprv_strcpy(basename, "DerivedNormalizationProps.txt");
- } else {
- uprv_strcpy(basename, "DerivedNormalizationProps");
- basename[30]='-';
- uprv_strcpy(basename+31, suffix);
- uprv_strcat(basename+31, ".txt");
- }
- parseDerivedNormalizationProperties(filename, &errorCode, FALSE);
- if(U_FAILURE(errorCode)) {
- /* can be only U_FILE_ACCESS_ERROR - try filename from before Unicode 3.2 */
- if(suffix==NULL) {
- uprv_strcpy(basename, "DerivedNormalizationProperties.txt");
- } else {
- uprv_strcpy(basename, "DerivedNormalizationProperties");
- basename[30]='-';
- uprv_strcpy(basename+31, suffix);
- uprv_strcat(basename+31, ".txt");
- }
- parseDerivedNormalizationProperties(filename, &errorCode, TRUE);
- }
-
- /* process UnicodeData.txt */
- if(suffix==NULL) {
- uprv_strcpy(basename, "UnicodeData.txt");
- } else {
- uprv_strcpy(basename, "UnicodeData");
- basename[11]='-';
- uprv_strcpy(basename+12, suffix);
- uprv_strcat(basename+12, ".txt");
- }
- parseDB(filename, &errorCode);
-
- /* process parsed data */
- if(U_SUCCESS(errorCode)) {
- processData();
-
- /* write the properties data file */
- generateData(destDir, options[CSOURCE].doesOccur);
-
- cleanUpData();
- }
-
-#endif
-
- return errorCode;
-}
-
-#if !UCONFIG_NO_NORMALIZATION
-
-/* parser for DerivedNormalizationProperties.txt ---------------------------- */
-
-static void U_CALLCONV
-derivedNormalizationPropertiesLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- UChar string[32];
- char *s;
- uint32_t start, end;
- int32_t count;
- uint8_t qcFlags;
-
- /* get code point range */
- count=u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "gennorm: error parsing DerivedNormalizationProperties.txt mapping at %s\n", fields[0][0]);
- exit(*pErrorCode);
- }
-
- /* ignore hangul - handle explicitly */
- if(start==0xac00) {
- return;
- }
-
- /* get property - ignore unrecognized ones */
- s=(char *)u_skipWhitespace(fields[1][0]);
- if(*s=='N' && s[1]=='F') {
- /* quick check flag */
- qcFlags=0x11;
- s+=2;
- if(*s=='K') {
- qcFlags<<=1;
- ++s;
- }
-
- if(*s=='C' && s[1]=='_') {
- s+=2;
- } else if(*s=='D' && s[1]=='_') {
- qcFlags<<=2;
- s+=2;
- } else {
- return;
- }
-
- if(0==uprv_strncmp(s, "NO", 2)) {
- qcFlags&=0xf;
- } else if(0==uprv_strncmp(s, "MAYBE", 5)) {
- qcFlags&=0x30;
- } else if(0==uprv_strncmp(s, "QC", 2) && *(s=(char *)u_skipWhitespace(s+2))==';') {
- /*
- * Unicode 4.0.1:
- * changes single field "NFD_NO" -> two fields "NFD_QC; N" etc.
- */
- /* start of the field */
- s=(char *)u_skipWhitespace(s+1);
- if(*s=='N') {
- qcFlags&=0xf;
- } else if(*s=='M') {
- qcFlags&=0x30;
- } else {
- return; /* do nothing for "Yes" because it's the default value */
- }
- } else {
- return; /* do nothing for "Yes" because it's the default value */
- }
-
- /* set this flag for all code points in this range */
- while(start<=end) {
- setQCFlags(start++, qcFlags);
- }
- } else if(0==uprv_memcmp(s, "Comp_Ex", 7) || 0==uprv_memcmp(s, "Full_Composition_Exclusion", 26)) {
- /* full composition exclusion */
- while(start<=end) {
- setCompositionExclusion(start++);
- }
- } else if(
- ((0==uprv_memcmp(s, "FNC", 3) && *(s=(char *)u_skipWhitespace(s+3))==';') ||
- (0==uprv_memcmp(s, "FC_NFKC", 7) && *(s=(char *)u_skipWhitespace(s+7))==';'))
-
- ) {
- /* FC_NFKC_Closure, parse field 2 to get the string */
- char *t;
-
- /* start of the field */
- s=(char *)u_skipWhitespace(s+1);
-
- /* find the end of the field */
- for(t=s; *t!=';' && *t!='#' && *t!=0 && *t!='\n' && *t!='\r'; ++t) {}
- *t=0;
-
- string[0]=(UChar)u_parseString(s, string+1, 31, NULL, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "gennorm error: illegal FNC string at %s\n", fields[0][0]);
- exit(*pErrorCode);
- }
- while(start<=end) {
- setFNC(start++, string);
- }
- }
-}
-
-static void
-parseDerivedNormalizationProperties(const char *filename, UErrorCode *pErrorCode, UBool reportError) {
- char *fields[2][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- u_parseDelimitedFile(filename, ';', fields, 2, derivedNormalizationPropertiesLineFn, NULL, pErrorCode);
- if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
- fprintf(stderr, "gennorm error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-}
-
-/* parser for UnicodeData.txt ----------------------------------------------- */
-
-static void U_CALLCONV
-unicodeDataLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- uint32_t decomp[40];
- Norm norm;
- const char *s;
- char *end;
- uint32_t code, value;
- int32_t length;
- UBool isCompat, something=FALSE;
-
- /* ignore First and Last entries for ranges */
- if( *fields[1][0]=='<' &&
- (length=(int32_t)(fields[1][1]-fields[1][0]))>=9 &&
- (0==uprv_memcmp(", First>", fields[1][1]-8, 8) || 0==uprv_memcmp(", Last>", fields[1][1]-7, 7))
- ) {
- return;
- }
-
- /* reset the properties */
- uprv_memset(&norm, 0, sizeof(Norm));
-
- /*
- * The combiningIndex must not be initialized to 0 because 0 is the
- * combiningIndex of the first forward-combining character.
- */
- norm.combiningIndex=0xffff;
-
- /* get the character code, field 0 */
- code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
- if(end<=fields[0][0] || end!=fields[0][1]) {
- fprintf(stderr, "gennorm: syntax error in field 0 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* get canonical combining class, field 3 */
- value=(uint32_t)uprv_strtoul(fields[3][0], &end, 10);
- if(end<=fields[3][0] || end!=fields[3][1] || value>0xff) {
- fprintf(stderr, "gennorm: syntax error in field 3 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- if(value>0) {
- norm.udataCC=(uint8_t)value;
- something=TRUE;
- }
-
- /* get the decomposition, field 5 */
- if(fields[5][0]<fields[5][1]) {
- if(*(s=fields[5][0])=='<') {
- ++s;
- isCompat=TRUE;
-
- /* skip and ignore the compatibility type name */
- do {
- if(s==fields[5][1]) {
- /* missing '>' */
- fprintf(stderr, "gennorm: syntax error in field 5 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- } while(*s++!='>');
- } else {
- isCompat=FALSE;
- }
-
- /* parse the decomposition string */
- length=u_parseCodePoints(s, decomp, sizeof(decomp)/4, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "gennorm error parsing UnicodeData.txt decomposition of U+%04lx - %s\n",
- (long)code, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-
- /* store the string */
- if(length>0) {
- something=TRUE;
- if(isCompat) {
- norm.lenNFKD=(uint8_t)length;
- norm.nfkd=decomp;
- } else {
- if(length>2) {
- fprintf(stderr, "gennorm: error - length of NFD(U+%04lx) = %ld >2 in UnicodeData - illegal\n",
- (long)code, (long)length);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- norm.lenNFD=(uint8_t)length;
- norm.nfd=decomp;
- }
- }
- }
-
- /* check for non-character code points */
- if((code&0xfffe)==0xfffe || (uint32_t)(code-0xfdd0)<0x20 || code>0x10ffff) {
- fprintf(stderr, "gennorm: error - properties for non-character code point U+%04lx\n",
- (long)code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- if(something) {
- /* there are normalization values, so store them */
-#if 0
- if(beVerbose) {
- printf("store values for U+%04lx: cc=%d, lenNFD=%ld, lenNFKD=%ld\n",
- (long)code, norm.udataCC, (long)norm.lenNFD, (long)norm.lenNFKD);
- }
-#endif
- storeNorm(code, &norm);
- }
-}
-
-static void
-parseDB(const char *filename, UErrorCode *pErrorCode) {
- char *fields[15][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "gennorm error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-}
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/gennorm/gennorm.h b/tools/gennorm/gennorm.h
deleted file mode 100644
index ea33d957..00000000
--- a/tools/gennorm/gennorm.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2005, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: gennorm.h
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001may25
-* created by: Markus W. Scherer
-*/
-
-#ifndef __GENPROPS_H__
-#define __GENPROPS_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uset.h"
-
-/* file definitions */
-#define DATA_NAME "unorm"
-#define DATA_TYPE "icu"
-
-/*
- * data structure that holds the normalization properties for one or more
- * code point(s) at build time
- */
-typedef struct Norm {
- uint8_t udataCC, lenNFD, lenNFKD;
- uint8_t qcFlags, combiningFlags;
- uint16_t canonBothCCs, compatBothCCs, combiningIndex, specialTag;
- uint32_t *nfd, *nfkd;
- uint32_t value32; /* temporary variable for generating runtime norm32 and fcd values */
- int32_t fncIndex;
- USet *canonStart;
- UBool unsafeStart;
-} Norm;
-
-/*
- * modularization flags
- *
- * Corresponding bits in gStoreFlags control whether certain kinds of data
- * are to be stored in (1) or omitted from (0) the data file.
- * The flags are controlled by a command-line argument, with a letter
- * per flag.
- */
-enum {
- UGENNORM_STORE_COMPAT, /* (k) compatibility decompositions */
- UGENNORM_STORE_COMPOSITION, /* (c) composition data */
- UGENNORM_STORE_FCD, /* (f) FCD data */
- UGENNORM_STORE_AUX, /* (a) auxiliary trie and associated data */
- UGENNORM_STORE_EXCLUSIONS, /* (x) exclusion sets */
- UGENNORM_STORE_COUNT
-};
-
-extern uint32_t gStoreFlags;
-
-#define DO_STORE(flag) (0!=(gStoreFlags&U_MASK(flag)))
-#define DO_NOT_STORE(flag) (0==(gStoreFlags&U_MASK(flag)))
-
-/* global flags */
-extern UBool beVerbose, haveCopyright;
-
-/* prototypes */
-extern void
-setUnicodeVersion(const char *v);
-
-extern void
-init(void);
-
-extern void
-storeNorm(uint32_t code, Norm *norm);
-
-extern void
-setQCFlags(uint32_t code, uint8_t qcFlags);
-
-extern void
-setCompositionExclusion(uint32_t code);
-
-U_CFUNC void
-setFNC(uint32_t c, UChar *s);
-
-extern void
-processData(void);
-
-extern void
-generateData(const char *dataDir, UBool csource);
-
-extern void
-cleanUpData(void);
-
-#endif
-
diff --git a/tools/gennorm/gennorm.vcproj b/tools/gennorm/gennorm.vcproj
deleted file mode 100644
index a57114ba..00000000
--- a/tools/gennorm/gennorm.vcproj
+++ /dev/null
@@ -1,422 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
- ProjectType="Visual C++"
- Version="9.00"
- Name="gennorm"
- ProjectGUID="{F5213103-6CBE-46E6-B4CC-2570B6837D86}"
- TargetFrameworkVersion="131072"
- >
- <Platforms>
- <Platform
- Name="Win32"
- />
- <Platform
- Name="x64"
- />
- </Platforms>
- <ToolFiles>
- </ToolFiles>
- <Configurations>
- <Configuration
- Name="Release|Win32"
- OutputDirectory=".\x86\Release"
- IntermediateDirectory=".\x86\Release"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TypeLibraryName=".\x86\Release/gennorm.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Release/gennorm.pch"
- AssemblerListingLocation=".\x86\Release/"
- ObjectFile=".\x86\Release/"
- ProgramDataBaseFileName=".\x86\Release/"
- WarningLevel="3"
- SuppressStartupBanner="true"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x86\Release/gennorm.exe"
- LinkIncremental="1"
- SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x86\Release/gennorm.pdb"
- SubSystem="1"
- RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Debug|Win32"
- OutputDirectory=".\x86\Debug"
- IntermediateDirectory=".\x86\Debug"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TypeLibraryName=".\x86\Debug/gennorm.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Debug/gennorm.pch"
- AssemblerListingLocation=".\x86\Debug/"
- ObjectFile=".\x86\Debug/"
- ProgramDataBaseFileName=".\x86\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
- SuppressStartupBanner="true"
- DebugInformationFormat="4"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x86\Debug/gennorm.exe"
- LinkIncremental="2"
- SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x86\Debug/gennorm.pdb"
- SubSystem="1"
- RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- UseFAT32Workaround="true"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Release|x64"
- OutputDirectory=".\x64\Release"
- IntermediateDirectory=".\x64\Release"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Release/gennorm.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Release/gennorm.pch"
- AssemblerListingLocation=".\x64\Release/"
- ObjectFile=".\x64\Release/"
- ProgramDataBaseFileName=".\x64\Release/"
- WarningLevel="3"
- SuppressStartupBanner="true"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x64\Release/gennorm.exe"
- LinkIncremental="1"
- SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x64\Release/gennorm.pdb"
- SubSystem="1"
- TargetMachine="17"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Debug|x64"
- OutputDirectory=".\x64\Debug"
- IntermediateDirectory=".\x64\Debug"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Debug/gennorm.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Debug/gennorm.pch"
- AssemblerListingLocation=".\x64\Debug/"
- ObjectFile=".\x64\Debug/"
- ProgramDataBaseFileName=".\x64\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
- SuppressStartupBanner="true"
- DebugInformationFormat="3"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x64\Debug/gennorm.exe"
- LinkIncremental="2"
- SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x64\Debug/gennorm.pdb"
- SubSystem="1"
- TargetMachine="17"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- UseFAT32Workaround="true"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- </Configurations>
- <References>
- </References>
- <Files>
- <Filter
- Name="Source Files"
- Filter="c;cpp;rc"
- >
- <File
- RelativePath=".\gennorm.c"
- >
- </File>
- <File
- RelativePath=".\store.c"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h"
- >
- <File
- RelativePath=".\gennorm.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
- >
- </Filter>
- </Files>
- <Globals>
- </Globals>
-</VisualStudioProject>
diff --git a/tools/gennorm/store.c b/tools/gennorm/store.c
deleted file mode 100644
index 581a4473..00000000
--- a/tools/gennorm/store.c
+++ /dev/null
@@ -1,2181 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: store.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2001may25
-* created by: Markus W. Scherer
-*
-* Store Unicode normalization data in a memory-mappable file.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/ustring.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "filestrm.h"
-#include "unicode/udata.h"
-#include "utrie.h"
-#include "utrie2.h"
-#include "unicode/uset.h"
-#include "toolutil.h"
-#include "unewdata.h"
-#include "writesrc.h"
-#include "unormimp.h"
-#include "gennorm.h"
-
-#define DO_DEBUG_OUT 0
-
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
-/*
- * The new implementation of the normalization code loads its data from
- * unorm.icu, which is generated with this gennorm tool.
- * The format of that file is described in unormimp.h .
- */
-
-/* file data ---------------------------------------------------------------- */
-
-#if UCONFIG_NO_NORMALIZATION
-
-/* dummy UDataInfo cf. udata.h */
-static UDataInfo dataInfo = {
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- U_SIZEOF_UCHAR,
- 0,
-
- { 0, 0, 0, 0 }, /* dummy dataFormat */
- { 0, 0, 0, 0 }, /* dummy formatVersion */
- { 0, 0, 0, 0 } /* dummy dataVersion */
-};
-
-#else
-
-/* UDataInfo cf. udata.h */
-static UDataInfo dataInfo={
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- U_SIZEOF_UCHAR,
- 0,
-
- { 0x4e, 0x6f, 0x72, 0x6d }, /* dataFormat="Norm" */
- { 2, 3, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
- { 3, 2, 0, 0 } /* dataVersion (Unicode version) */
-};
-
-extern void
-setUnicodeVersion(const char *v) {
- UVersionInfo version;
- u_versionFromString(version, v);
- uprv_memcpy(dataInfo.dataVersion, version, 4);
-}
-
-static int32_t indexes[_NORM_INDEX_TOP]={ 0 };
-
-/* builder data ------------------------------------------------------------- */
-
-/* modularization flags, see gennorm.h (default to "store everything") */
-uint32_t gStoreFlags=0xffffffff;
-
-typedef void EnumTrieFn(void *context, uint32_t code, Norm *norm);
-
-static UNewTrie
- *normTrie,
- *norm32Trie,
- *fcdTrie,
- *auxTrie;
-
-static UToolMemory *normMem, *utf32Mem, *extraMem, *combiningTriplesMem;
-
-static Norm *norms;
-
-/*
- * set a flag for each code point that was seen in decompositions -
- * avoid to decompose ones that have not been used before
- */
-static uint32_t haveSeenFlags[256];
-
-/* set of characters with NFD_QC=No (i.e., those with canonical decompositions) */
-static USet *nfdQCNoSet;
-
-/* see addCombiningCP() for details */
-static uint32_t combiningCPs[2000];
-
-/*
- * after processCombining() this contains for each code point in combiningCPs[]
- * the runtime combining index
- */
-static uint16_t combiningIndexes[2000];
-
-/* section limits for combiningCPs[], see addCombiningCP() */
-static uint16_t combineFwdTop=0, combineBothTop=0, combineBackTop=0;
-
-/**
- * Structure for a triple of code points, stored in combiningTriplesMem.
- * The lead and trail code points combine into the the combined one,
- * i.e., there is a canonical decomposition of combined-> <lead, trail>.
- *
- * Before processCombining() is called, leadIndex and trailIndex are 0.
- * After processCombining(), they contain the indexes of the lead and trail
- * code point in the combiningCPs[] array.
- * They are then sorted by leadIndex, then trailIndex.
- * They are not sorted by code points.
- */
-typedef struct CombiningTriple {
- uint16_t leadIndex, trailIndex;
- uint32_t lead, trail, combined;
-} CombiningTriple;
-
-/* 15b in the combining index -> <=0x8000 uint16_t values in the combining table */
-static uint16_t combiningTable[0x8000];
-static uint16_t combiningTableTop=0;
-
-#define _NORM_MAX_SET_SEARCH_TABLE_LENGTH 0x4000
-static uint16_t canonStartSets[_NORM_MAX_CANON_SETS+2*_NORM_MAX_SET_SEARCH_TABLE_LENGTH
- +10000]; /* +10000 for exclusion sets */
-static int32_t canonStartSetsTop=_NORM_SET_INDEX_TOP;
-static int32_t canonSetsCount=0;
-
-/* allocate and initialize a Norm unit */
-static Norm *
-allocNorm() {
- /* allocate Norm */
- Norm *p=(Norm *)utm_alloc(normMem);
- /*
- * The combiningIndex must not be initialized to 0 because 0 is the
- * combiningIndex of the first forward-combining character.
- */
- p->combiningIndex=0xffff;
- return p;
-}
-
-extern void
-init() {
- uint16_t *p16;
-
- normTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
- uprv_memset(normTrie, 0, sizeof(UNewTrie));
- norm32Trie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
- uprv_memset(norm32Trie, 0, sizeof(UNewTrie));
- fcdTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
- uprv_memset(fcdTrie, 0, sizeof(UNewTrie));
- auxTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
- uprv_memset(auxTrie, 0, sizeof(UNewTrie));
-
- /* initialize the two tries */
- if(NULL==utrie_open(normTrie, NULL, 30000, 0, 0, FALSE)) {
- fprintf(stderr, "error: failed to initialize tries\n");
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
-
- /* allocate Norm structures and reset the first one */
- normMem=utm_open("gennorm normalization structs", 20000, 20000, sizeof(Norm));
- norms=allocNorm();
-
- /* allocate UTF-32 string memory */
- utf32Mem=utm_open("gennorm UTF-32 strings", 30000, 30000, 4);
-
- /* reset all "have seen" flags */
- uprv_memset(haveSeenFlags, 0, sizeof(haveSeenFlags));
-
- /* open an empty set */
- nfdQCNoSet=uset_open(1, 0);
-
- /* allocate extra data memory for UTF-16 decomposition strings and other values */
- extraMem=utm_open("gennorm extra 16-bit memory", _NORM_EXTRA_INDEX_TOP, _NORM_EXTRA_INDEX_TOP, 2);
- /* initialize the extraMem counter for the top of FNC strings */
- p16=(uint16_t *)utm_alloc(extraMem);
- *p16=1;
-
- /* allocate temporary memory for combining triples */
- combiningTriplesMem=utm_open("gennorm combining triples", 0x4000, 0x4000, sizeof(CombiningTriple));
-
- /* set the minimum code points for no/maybe quick check values to the end of the BMP */
- indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE]=0xffff;
- indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE]=0xffff;
- indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE]=0xffff;
- indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE]=0xffff;
-
- /* preset the indexes portion of canonStartSets */
- uprv_memset(canonStartSets, 0, _NORM_SET_INDEX_TOP*2);
-}
-
-/*
- * get or create a Norm unit;
- * get or create the intermediate trie entries for it as well
- */
-static Norm *
-createNorm(uint32_t code) {
- Norm *p;
- uint32_t i;
-
- i=utrie_get32(normTrie, (UChar32)code, NULL);
- if(i!=0) {
- p=norms+i;
- } else {
- /* allocate Norm */
- p=allocNorm();
- if(!utrie_set32(normTrie, (UChar32)code, (uint32_t)(p-norms))) {
- fprintf(stderr, "error: too many normalization entries\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
- }
- return p;
-}
-
-/* get an existing Norm unit */
-static Norm *
-getNorm(uint32_t code) {
- uint32_t i;
-
- i=utrie_get32(normTrie, (UChar32)code, NULL);
- if(i==0) {
- return NULL;
- }
- return norms+i;
-}
-
-/* get the canonical combining class of a character */
-static uint8_t
-getCCFromCP(uint32_t code) {
- Norm *norm=getNorm(code);
- if(norm==NULL) {
- return 0;
- } else {
- return norm->udataCC;
- }
-}
-
-/*
- * enumerate all code points with their Norm structs and call a function for each
- * return the number of code points with data
- */
-static uint32_t
-enumTrie(EnumTrieFn *fn, void *context) {
- uint32_t count, i;
- UChar32 code;
- UBool isInBlockZero;
-
- count=0;
- for(code=0; code<=0x10ffff;) {
- i=utrie_get32(normTrie, code, &isInBlockZero);
- if(isInBlockZero) {
- code+=UTRIE_DATA_BLOCK_LENGTH;
- } else {
- if(i!=0) {
- fn(context, (uint32_t)code, norms+i);
- ++count;
- }
- ++code;
- }
- }
- return count;
-}
-
-static void
-setHaveSeenString(const uint32_t *s, int32_t length) {
- uint32_t c;
-
- while(length>0) {
- c=*s++;
- haveSeenFlags[(c>>5)&0xff]|=(1<<(c&0x1f));
- --length;
- }
-}
-
-#define HAVE_SEEN(c) (haveSeenFlags[((c)>>5)&0xff]&(1<<((c)&0x1f)))
-
-/* handle combining data ---------------------------------------------------- */
-
-/*
- * Insert an entry into combiningCPs[] for the new code point code with its flags.
- * The flags indicate if code combines forward, backward, or both.
- *
- * combiningCPs[] contains three sections:
- * 1. code points that combine forward
- * 2. code points that combine forward and backward
- * 3. code points that combine backward
- *
- * Search for code in the entire array.
- * If it is found and already is in the right section (old flags==new flags)
- * then we are done.
- * If it is found but the flags are different, then remove it,
- * union the old and new flags, and reinsert it into its correct section.
- * If it is not found, then just insert it.
- *
- * Within each section, the code points are not sorted.
- */
-static void
-addCombiningCP(uint32_t code, uint8_t flags) {
- uint32_t newEntry;
- uint16_t i;
-
- newEntry=code|((uint32_t)flags<<24);
-
- /* search for this code point */
- for(i=0; i<combineBackTop; ++i) {
- if(code==(combiningCPs[i]&0xffffff)) {
- /* found it */
- if(newEntry==combiningCPs[i]) {
- return; /* no change */
- }
-
- /* combine the flags, remove the old entry from the old place, and insert the new one */
- newEntry|=combiningCPs[i];
- if(i!=--combineBackTop) {
- uprv_memmove(combiningCPs+i, combiningCPs+i+1, (combineBackTop-i)*4);
- }
- if(i<combineBothTop) {
- --combineBothTop;
- }
- if(i<combineFwdTop) {
- --combineFwdTop;
- }
- break;
- }
- }
-
- /* not found or modified, insert it */
- if(combineBackTop>=sizeof(combiningCPs)/4) {
- fprintf(stderr, "error: gennorm combining code points - trying to use more than %ld units\n",
- (long)(sizeof(combiningCPs)/4));
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
-
- /* set i to the insertion point */
- flags=(uint8_t)(newEntry>>24);
- if(flags==1) {
- i=combineFwdTop++;
- ++combineBothTop;
- } else if(flags==3) {
- i=combineBothTop++;
- } else /* flags==2 */ {
- i=combineBackTop;
- }
-
- /* move the following code points up one and insert newEntry at i */
- if(i<combineBackTop) {
- uprv_memmove(combiningCPs+i+1, combiningCPs+i, (combineBackTop-i)*4);
- }
- combiningCPs[i]=newEntry;
-
- /* finally increment the total counter */
- ++combineBackTop;
-}
-
-/**
- * Find the index in combiningCPs[] where code point code is stored.
- * @param code code point to look for
- * @param isLead is code a forward combining code point?
- * @return index in combiningCPs[] where code is stored
- */
-static uint16_t
-findCombiningCP(uint32_t code, UBool isLead) {
- uint16_t i, limit;
-
- if(isLead) {
- i=0;
- limit=combineBothTop;
- } else {
- i=combineFwdTop;
- limit=combineBackTop;
- }
-
- /* search for this code point */
- for(; i<limit; ++i) {
- if(code==(combiningCPs[i]&0xffffff)) {
- /* found it */
- return i;
- }
- }
-
- /* not found */
- return 0xffff;
-}
-
-static void
-addCombiningTriple(uint32_t lead, uint32_t trail, uint32_t combined) {
- CombiningTriple *triple;
-
- if(DO_NOT_STORE(UGENNORM_STORE_COMPOSITION)) {
- return;
- }
-
- /*
- * set combiningFlags for the two code points
- * do this after decomposition so that getNorm() above returns NULL
- * if we do not have actual sub-decomposition data for the initial NFD here
- */
- createNorm(lead)->combiningFlags|=1; /* combines forward */
- createNorm(trail)->combiningFlags|=2; /* combines backward */
-
- addCombiningCP(lead, 1);
- addCombiningCP(trail, 2);
-
- triple=(CombiningTriple *)utm_alloc(combiningTriplesMem);
- triple->lead=lead;
- triple->trail=trail;
- triple->combined=combined;
-}
-
-static int
-compareTriples(const void *l, const void *r) {
- int diff;
- diff=(int)((CombiningTriple *)l)->leadIndex-
- (int)((CombiningTriple *)r)->leadIndex;
- if(diff==0) {
- diff=(int)((CombiningTriple *)l)->trailIndex-
- (int)((CombiningTriple *)r)->trailIndex;
- }
- return diff;
-}
-
-static void
-processCombining() {
- CombiningTriple *triples;
- uint16_t *p;
- uint32_t combined;
- uint16_t i, j, count, tableTop, finalIndex, combinesFwd;
-
- triples=utm_getStart(combiningTriplesMem);
-
- /* add lead and trail indexes to the triples for sorting */
- count=(uint16_t)utm_countItems(combiningTriplesMem);
- for(i=0; i<count; ++i) {
- /* findCombiningCP() must always find the code point */
- triples[i].leadIndex=findCombiningCP(triples[i].lead, TRUE);
- triples[i].trailIndex=findCombiningCP(triples[i].trail, FALSE);
- }
-
- /* sort them by leadIndex, trailIndex */
- qsort(triples, count, sizeof(CombiningTriple), compareTriples);
-
- /* calculate final combining indexes and store them in the Norm entries */
- tableTop=0;
- j=0; /* triples counter */
-
- /* first, combining indexes of fwd/both characters are indexes into the combiningTable */
- for(i=0; i<combineBothTop; ++i) {
- /* start a new table */
-
- /* assign combining index */
- createNorm(combiningCPs[i]&0xffffff)->combiningIndex=combiningIndexes[i]=tableTop;
-
- /* calculate the length of the combining data for this lead code point in the combiningTable */
- while(j<count && i==triples[j].leadIndex) {
- /* count 2 to 3 16-bit units per composition entry (back-index, code point) */
- combined=triples[j++].combined;
- if(combined<=0x1fff) {
- tableTop+=2;
- } else {
- tableTop+=3;
- }
- }
- }
-
- /* second, combining indexes of back-only characters are simply incremented from here to be unique */
- finalIndex=tableTop;
- for(; i<combineBackTop; ++i) {
- createNorm(combiningCPs[i]&0xffffff)->combiningIndex=combiningIndexes[i]=finalIndex++;
- }
-
- /* it must be finalIndex<=0x8000 because bit 15 is used in combiningTable as an end-for-this-lead marker */
- if(finalIndex>0x8000) {
- fprintf(stderr, "error: gennorm combining table - trying to use %u units, more than the %ld units available\n",
- tableTop, (long)(sizeof(combiningTable)/4));
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
-
- combiningTableTop=tableTop;
-
- /* store the combining data in the combiningTable, with the final indexes from above */
- p=combiningTable;
- j=0; /* triples counter */
-
- /*
- * this is essentially the same loop as above, but
- * it writes the table data instead of calculating and setting the final indexes;
- * it is necessary to have two passes so that all the final indexes are known before
- * they are written into the table
- */
- for(i=0; i<combineBothTop; ++i) {
- /* start a new table */
-
- combined=0; /* avoid compiler warning */
-
- /* store the combining data for this lead code point in the combiningTable */
- while(j<count && i==triples[j].leadIndex) {
- Norm *normPtr;
- finalIndex=combiningIndexes[triples[j].trailIndex];
- combined=triples[j++].combined;
- normPtr = getNorm(combined);
-
- if (normPtr == NULL) {
- fprintf(stderr, "error: processCombining did not get expected result. combined=%d\n", combined);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
-
- /* is combined a starter? (i.e., cc==0 && combines forward) */
- combinesFwd=(uint16_t)((normPtr->combiningFlags&1)<<13);
-
- *p++=finalIndex;
- if(combined<=0x1fff) {
- *p++=(uint16_t)(combinesFwd|combined);
- } else if(combined<=0xffff) {
- *p++=(uint16_t)(0x8000|combinesFwd);
- *p++=(uint16_t)combined;
- } else {
- *p++=(uint16_t)(0xc000|combinesFwd|((combined-0x10000)>>10));
- *p++=(uint16_t)(0xdc00|(combined&0x3ff));
- }
- }
-
- /* set a marker on the last final trail index in this lead's table */
- if(combined<=0x1fff) {
- *(p-2)|=0x8000;
- } else {
- *(p-3)|=0x8000;
- }
- }
-
- /* post condition: tableTop==(p-combiningTable) */
-}
-
-/* processing incoming normalization data ----------------------------------- */
-
-/*
- * Decompose Hangul syllables algorithmically and fill a pseudo-Norm struct.
- * c must be a Hangul syllable code point.
- */
-static void
-getHangulDecomposition(uint32_t c, Norm *pHangulNorm, uint32_t hangulBuffer[3]) {
- /* Hangul syllable: decompose algorithmically */
- uint32_t c2;
- uint8_t length;
-
- uprv_memset(pHangulNorm, 0, sizeof(Norm));
-
- c-=HANGUL_BASE;
-
- c2=c%JAMO_T_COUNT;
- c/=JAMO_T_COUNT;
- if(c2>0) {
- hangulBuffer[2]=JAMO_T_BASE+c2;
- length=3;
- } else {
- hangulBuffer[2]=0;
- length=2;
- }
-
- hangulBuffer[1]=JAMO_V_BASE+c%JAMO_V_COUNT;
- hangulBuffer[0]=JAMO_L_BASE+c/JAMO_V_COUNT;
-
- pHangulNorm->nfd=hangulBuffer;
- pHangulNorm->lenNFD=length;
- if(DO_STORE(UGENNORM_STORE_COMPAT)) {
- pHangulNorm->nfkd=hangulBuffer;
- pHangulNorm->lenNFKD=length;
- }
-}
-
-/*
- * decompose the one decomposition further, may generate two decompositions
- * apply all previous characters' decompositions to this one
- */
-static void
-decompStoreNewNF(uint32_t code, Norm *norm) {
- uint32_t nfd[40], nfkd[40], hangulBuffer[3];
- Norm hangulNorm;
-
- uint32_t *s32;
- Norm *p;
- uint32_t c;
- int32_t i, length;
- uint8_t lenNFD=0, lenNFKD=0;
- UBool changedNFD=FALSE, changedNFKD=FALSE;
-
- if((length=norm->lenNFD)!=0) {
- /* always allocate the original string */
- changedNFD=TRUE;
- s32=norm->nfd;
- } else if((length=norm->lenNFKD)!=0) {
- /* always allocate the original string */
- changedNFKD=TRUE;
- s32=norm->nfkd;
- } else {
- /* no decomposition here, nothing to do */
- return;
- }
-
- /* decompose each code point */
- for(i=0; i<length; ++i) {
- c=s32[i];
- p=getNorm(c);
- if(p==NULL) {
- if(HANGUL_BASE<=c && c<(HANGUL_BASE+HANGUL_COUNT)) {
- getHangulDecomposition(c, &hangulNorm, hangulBuffer);
- p=&hangulNorm;
- } else {
- /* no data, no decomposition */
- nfd[lenNFD++]=c;
- nfkd[lenNFKD++]=c;
- continue;
- }
- }
-
- /* canonically decompose c */
- if(changedNFD) {
- if(p->lenNFD!=0) {
- uprv_memcpy(nfd+lenNFD, p->nfd, p->lenNFD*4);
- lenNFD+=p->lenNFD;
- } else {
- nfd[lenNFD++]=c;
- }
- }
-
- /* compatibility-decompose c */
- if(p->lenNFKD!=0) {
- uprv_memcpy(nfkd+lenNFKD, p->nfkd, p->lenNFKD*4);
- lenNFKD+=p->lenNFKD;
- changedNFKD=TRUE;
- } else if(p->lenNFD!=0) {
- uprv_memcpy(nfkd+lenNFKD, p->nfd, p->lenNFD*4);
- lenNFKD+=p->lenNFD;
- /*
- * not changedNFKD=TRUE;
- * so that we do not store a new nfkd if there was no nfkd string before
- * and we only see canonical decompositions
- */
- } else {
- nfkd[lenNFKD++]=c;
- }
- }
-
- /* assume that norm->lenNFD==1 or ==2 */
- if(norm->lenNFD==2 && !(norm->combiningFlags&0x80)) {
- addCombiningTriple(s32[0], s32[1], code);
- }
-
- if(changedNFD) {
- if(lenNFD!=0) {
- s32=utm_allocN(utf32Mem, lenNFD);
- uprv_memcpy(s32, nfd, lenNFD*4);
- } else {
- s32=NULL;
- }
- norm->lenNFD=lenNFD;
- norm->nfd=s32;
- setHaveSeenString(nfd, lenNFD);
- }
- if(changedNFKD) {
- if(lenNFKD!=0) {
- s32=utm_allocN(utf32Mem, lenNFKD);
- uprv_memcpy(s32, nfkd, lenNFKD*4);
- } else {
- s32=NULL;
- }
- norm->lenNFKD=lenNFKD;
- norm->nfkd=s32;
- setHaveSeenString(nfkd, lenNFKD);
- }
-}
-
-typedef struct DecompSingle {
- uint32_t c;
- Norm *norm;
-} DecompSingle;
-
-/*
- * apply this one character's decompositions (there is at least one!) to
- * all previous characters' decompositions to decompose them further
- */
-static void
-decompWithSingleFn(void *context, uint32_t code, Norm *norm) {
- uint32_t nfd[40], nfkd[40];
- uint32_t *s32;
- DecompSingle *me=(DecompSingle *)context;
- uint32_t c, myC;
- int32_t i, length;
- uint8_t lenNFD=0, lenNFKD=0, myLenNFD, myLenNFKD;
- UBool changedNFD=FALSE, changedNFKD=FALSE;
-
- /* get the new character's data */
- myC=me->c;
- myLenNFD=me->norm->lenNFD;
- myLenNFKD=me->norm->lenNFKD;
- /* assume that myC has at least one decomposition */
-
- if((length=norm->lenNFD)!=0 && myLenNFD!=0) {
- /* apply NFD(myC) to norm->nfd */
- s32=norm->nfd;
- for(i=0; i<length; ++i) {
- c=s32[i];
- if(c==myC) {
- uprv_memcpy(nfd+lenNFD, me->norm->nfd, myLenNFD*4);
- lenNFD+=myLenNFD;
- changedNFD=TRUE;
- } else {
- nfd[lenNFD++]=c;
- }
- }
- }
-
- if((length=norm->lenNFKD)!=0) {
- /* apply NFD(myC) and NFKD(myC) to norm->nfkd */
- s32=norm->nfkd;
- for(i=0; i<length; ++i) {
- c=s32[i];
- if(c==myC) {
- if(myLenNFKD!=0) {
- uprv_memcpy(nfkd+lenNFKD, me->norm->nfkd, myLenNFKD*4);
- lenNFKD+=myLenNFKD;
- } else /* assume myLenNFD!=0 */ {
- uprv_memcpy(nfkd+lenNFKD, me->norm->nfd, myLenNFD*4);
- lenNFKD+=myLenNFD;
- }
- changedNFKD=TRUE;
- } else {
- nfkd[lenNFKD++]=c;
- }
- }
- } else if((length=norm->lenNFD)!=0 && myLenNFKD!=0) {
- /* apply NFKD(myC) to norm->nfd, forming a new norm->nfkd */
- s32=norm->nfd;
- for(i=0; i<length; ++i) {
- c=s32[i];
- if(c==myC) {
- uprv_memcpy(nfkd+lenNFKD, me->norm->nfkd, myLenNFKD*4);
- lenNFKD+=myLenNFKD;
- changedNFKD=TRUE;
- } else {
- nfkd[lenNFKD++]=c;
- }
- }
- }
-
- /* set the new decompositions, forget the old ones */
- if(changedNFD) {
- if(lenNFD!=0) {
- if(lenNFD>norm->lenNFD) {
- s32=utm_allocN(utf32Mem, lenNFD);
- } else {
- s32=norm->nfd;
- }
- uprv_memcpy(s32, nfd, lenNFD*4);
- } else {
- s32=NULL;
- }
- norm->lenNFD=lenNFD;
- norm->nfd=s32;
- }
- if(changedNFKD) {
- if(lenNFKD!=0) {
- if(lenNFKD>norm->lenNFKD) {
- s32=utm_allocN(utf32Mem, lenNFKD);
- } else {
- s32=norm->nfkd;
- }
- uprv_memcpy(s32, nfkd, lenNFKD*4);
- } else {
- s32=NULL;
- }
- norm->lenNFKD=lenNFKD;
- norm->nfkd=s32;
- }
-}
-
-/*
- * process the data for one code point listed in UnicodeData;
- * UnicodeData itself never maps a code point to both NFD and NFKD
- */
-extern void
-storeNorm(uint32_t code, Norm *norm) {
- DecompSingle decompSingle;
- Norm *p;
-
- if(DO_NOT_STORE(UGENNORM_STORE_COMPAT)) {
- /* ignore compatibility decomposition */
- norm->lenNFKD=0;
- }
-
- /* copy existing derived normalization properties */
- p=createNorm(code);
- norm->qcFlags=p->qcFlags;
- norm->combiningFlags=p->combiningFlags;
- norm->fncIndex=p->fncIndex;
-
- /* process the decomposition if there is one here */
- if((norm->lenNFD|norm->lenNFKD)!=0) {
- /* decompose this one decomposition further, may generate two decompositions */
- decompStoreNewNF(code, norm);
-
- /* has this code point been used in previous decompositions? */
- if(HAVE_SEEN(code)) {
- /* use this decomposition to decompose other decompositions further */
- decompSingle.c=code;
- decompSingle.norm=norm;
- enumTrie(decompWithSingleFn, &decompSingle);
- }
- }
-
- /* store the data */
- uprv_memcpy(p, norm, sizeof(Norm));
-}
-
-extern void
-setQCFlags(uint32_t code, uint8_t qcFlags) {
- if(DO_NOT_STORE(UGENNORM_STORE_COMPAT)) {
- /* ignore compatibility decomposition: unset the KC/KD flags */
- qcFlags&=~(_NORM_QC_NFKC|_NORM_QC_NFKD);
-
- /* set the KC/KD flags to the same values as the C/D flags */
- qcFlags|=qcFlags<<1;
- }
- if(DO_NOT_STORE(UGENNORM_STORE_COMPOSITION)) {
- /* ignore composition data: unset the C/KC flags */
- qcFlags&=~(_NORM_QC_NFC|_NORM_QC_NFKC);
-
- /* set the C/KC flags to the same values as the D/KD flags */
- qcFlags|=qcFlags>>2;
- }
-
- createNorm(code)->qcFlags|=qcFlags;
-
- /* adjust the minimum code point for quick check no/maybe */
- if(code<0xffff) {
- if((qcFlags&_NORM_QC_NFC) && (uint16_t)code<indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE]) {
- indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE]=(uint16_t)code;
- }
- if((qcFlags&_NORM_QC_NFKC) && (uint16_t)code<indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE]) {
- indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE]=(uint16_t)code;
- }
- if((qcFlags&_NORM_QC_NFD) && (uint16_t)code<indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE]) {
- indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE]=(uint16_t)code;
- }
- if((qcFlags&_NORM_QC_NFKD) && (uint16_t)code<indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE]) {
- indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE]=(uint16_t)code;
- }
- }
-
- if(qcFlags&_NORM_QC_NFD) {
- uset_add(nfdQCNoSet, (UChar32)code);
- }
-}
-
-extern void
-setCompositionExclusion(uint32_t code) {
- if(DO_STORE(UGENNORM_STORE_COMPOSITION)) {
- createNorm(code)->combiningFlags|=0x80;
- }
-}
-
-static void
-setHangulJamoSpecials() {
- Norm *norm;
- uint32_t c, hangul;
-
- /*
- * Hangul syllables are algorithmically decomposed into Jamos,
- * and Jamos are algorithmically composed into Hangul syllables.
- * The quick check flags are parsed, except for Hangul.
- */
-
- /* set Jamo L specials */
- hangul=0xac00;
- for(c=0x1100; c<=0x1112; ++c) {
- norm=createNorm(c);
- norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_JAMO_L;
- if(DO_STORE(UGENNORM_STORE_COMPOSITION)) {
- norm->combiningFlags=1;
- }
-
- /* for each Jamo L create a set with its associated Hangul block */
- norm->canonStart=uset_open(hangul, hangul+21*28-1);
- hangul+=21*28;
- }
-
- /* set Jamo V specials */
- for(c=0x1161; c<=0x1175; ++c) {
- norm=createNorm(c);
- norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_JAMO_V;
- if(DO_STORE(UGENNORM_STORE_COMPOSITION)) {
- norm->combiningFlags=2;
- }
- norm->unsafeStart=TRUE;
- }
-
- /* set Jamo T specials */
- for(c=0x11a8; c<=0x11c2; ++c) {
- norm=createNorm(c);
- norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_JAMO_T;
- if(DO_STORE(UGENNORM_STORE_COMPOSITION)) {
- norm->combiningFlags=2;
- }
- norm->unsafeStart=TRUE;
- }
-
- /* set Hangul specials, precompacted */
- norm=allocNorm();
- norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_HANGUL;
- if(DO_STORE(UGENNORM_STORE_COMPAT)) {
- norm->qcFlags=_NORM_QC_NFD|_NORM_QC_NFKD;
- } else {
- norm->qcFlags=_NORM_QC_NFD;
- }
-
- if(!utrie_setRange32(normTrie, 0xac00, 0xd7a4, (uint32_t)(norm-norms), TRUE)) {
- fprintf(stderr, "error: too many normalization entries (setting Hangul)\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-}
-
-/*
- * set FC-NFKC-Closure string
- * s contains the closure string; s[0]==length, s[1..length] is the actual string
- * may modify s[0]
- */
-U_CFUNC void
-setFNC(uint32_t c, UChar *s) {
- uint16_t *p;
- int32_t length, i, count;
- UChar first;
-
- if( DO_NOT_STORE(UGENNORM_STORE_COMPAT) ||
- DO_NOT_STORE(UGENNORM_STORE_COMPOSITION) ||
- DO_NOT_STORE(UGENNORM_STORE_AUX)
- ) {
- return;
- }
-
- count=utm_countItems(extraMem);
- length=s[0];
- first=s[1];
-
- /* try to overlay single-unit strings with existing ones */
- if(length==1 && first<0xff00) {
- p=utm_getStart(extraMem);
- for(i=1; i<count; ++i) {
- if(first==p[i]) {
- break;
- }
- }
- } else {
- i=count;
- }
-
- /* append the new string if it cannot be overlayed with an old one */
- if(i==count) {
- if(count>_NORM_AUX_MAX_FNC) {
- fprintf(stderr, "gennorm error: too many FNC strings\n");
- exit(U_INDEX_OUTOFBOUNDS_ERROR);
- }
-
- /* prepend 0xffxx with xx==length */
- s[0]=(uint16_t)(0xff00+length);
- ++length;
- p=(uint16_t *)utm_allocN(extraMem, length);
- uprv_memcpy(p, s, length*2);
-
- /* update the top index in extraMem[0] */
- count+=length;
- ((uint16_t *)utm_getStart(extraMem))[0]=(uint16_t)count;
- }
-
- /* store the index to the string */
- createNorm(c)->fncIndex=i;
-}
-
-/* build runtime structures ------------------------------------------------- */
-
-/* canonically reorder a UTF-32 string; return { leadCC, trailCC } */
-static uint16_t
-reorderString(uint32_t *s, int32_t length) {
- uint8_t ccs[40];
- uint32_t c;
- int32_t i, j;
- uint8_t cc, prevCC;
-
- if(length<=0) {
- return 0;
- }
-
- for(i=0; i<length; ++i) {
- /* get the i-th code point and its combining class */
- c=s[i];
- cc=getCCFromCP(c);
- if(cc!=0 && i!=0) {
- /* it is a combining mark, see if it needs to be moved back */
- j=i;
- do {
- prevCC=ccs[j-1];
- if(prevCC<=cc) {
- break; /* found the right place */
- }
- /* move the previous code point here and go back */
- s[j]=s[j-1];
- ccs[j]=prevCC;
- } while(--j!=0);
- s[j]=c;
- ccs[j]=cc;
- } else {
- /* just store the combining class */
- ccs[i]=cc;
- }
- }
-
- return (uint16_t)(((uint16_t)ccs[0]<<8)|ccs[length-1]);
-}
-
-#if 0
-static UBool combineAndQC[64]={ 0 };
-#endif
-
-/*
- * canonically reorder the up to two decompositions
- * and store the leading and trailing combining classes accordingly
- *
- * also process canonical decompositions for canonical closure
- */
-static void
-postParseFn(void *context, uint32_t code, Norm *norm) {
- int32_t length;
-
- /* canonically order the NFD */
- length=norm->lenNFD;
- if(length>0) {
- norm->canonBothCCs=reorderString(norm->nfd, length);
- }
-
- /* canonically reorder the NFKD */
- length=norm->lenNFKD;
- if(length>0) {
- norm->compatBothCCs=reorderString(norm->nfkd, length);
- }
-
- /* verify that code has a decomposition if and only if the quick check flags say "no" on NF(K)D */
- if((norm->lenNFD!=0) != ((norm->qcFlags&_NORM_QC_NFD)!=0)) {
- fprintf(stderr, "gennorm warning: U+%04lx has NFD[%d] but quick check 0x%02x\n", (long)code, norm->lenNFD, norm->qcFlags);
- }
- if(((norm->lenNFD|norm->lenNFKD)!=0) != ((norm->qcFlags&(_NORM_QC_NFD|_NORM_QC_NFKD))!=0)) {
- fprintf(stderr, "gennorm warning: U+%04lx has NFD[%d] NFKD[%d] but quick check 0x%02x\n", (long)code, norm->lenNFD, norm->lenNFKD, norm->qcFlags);
- }
-
- /* see which combinations of combiningFlags and qcFlags are used for NFC/NFKC */
-#if 0
- combineAndQC[(norm->qcFlags&0x33)|((norm->combiningFlags&3)<<2)]=1;
-#endif
-
- if(norm->combiningFlags&1) {
- if(norm->udataCC!=0) {
- /* illegal - data-derivable composition exclusion */
- fprintf(stderr, "gennorm warning: U+%04lx combines forward but udataCC==%u\n", (long)code, norm->udataCC);
- }
- }
- if(norm->combiningFlags&2) {
- if((norm->qcFlags&0x11)==0) {
- fprintf(stderr, "gennorm warning: U+%04lx combines backward but qcNF?C==0\n", (long)code);
- }
-#if 0
- /* occurs sometimes, this one is ok (therefore #if 0) - still here for documentation */
- if(norm->udataCC==0) {
- printf("U+%04lx combines backward but udataCC==0\n", (long)code);
- }
-#endif
- }
- if((norm->combiningFlags&3)==3 && beVerbose) {
- printf("U+%04lx combines both ways\n", (long)code);
- }
-
- /*
- * process canonical decompositions for canonical closure
- *
- * in each canonical decomposition:
- * add the current character (code) to the set of canonical starters of its norm->nfd[0]
- * set the "unsafe starter" flag for each norm->nfd[1..]
- */
- length=norm->lenNFD;
- if(length>0) {
- Norm *otherNorm;
- UChar32 c;
- int32_t i;
-
- /* nfd[0].canonStart.add(code) */
- c=norm->nfd[0];
- otherNorm=createNorm(c);
- if(otherNorm->canonStart==NULL) {
- otherNorm->canonStart=uset_open(code, code);
- if(otherNorm->canonStart==NULL) {
- fprintf(stderr, "gennorm error: out of memory in uset_open()\n");
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
- } else {
- uset_add(otherNorm->canonStart, code);
- if(!uset_contains(otherNorm->canonStart, code)) {
- fprintf(stderr, "gennorm error: uset_add(setOf(U+%4x), U+%4x)\n", (int)c, (int)code);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- }
-
- /* for(i=1..length-1) nfd[i].unsafeStart=TRUE */
- for(i=1; i<length; ++i) {
- createNorm(norm->nfd[i])->unsafeStart=TRUE;
- }
- }
-}
-
-static uint32_t
-make32BitNorm(Norm *norm) {
- UChar extra[100];
- const Norm *other;
- uint32_t word;
- int32_t i, length, beforeZero=0, count, start;
-
- /*
- * Check for assumptions:
- *
- * Test that if a "true starter" (cc==0 && NF*C_YES) decomposes,
- * then the decomposition also begins with a true starter.
- */
- if(norm->udataCC==0) {
- /* this is a starter */
- if((norm->qcFlags&_NORM_QC_NFC)==0 && norm->lenNFD>0) {
- /* a "true" NFC starter with a canonical decomposition */
- if( norm->canonBothCCs>=0x100 || /* lead cc!=0 or */
- ((other=getNorm(norm->nfd[0]))!=NULL && (other->qcFlags&_NORM_QC_NFC)!=0) /* nfd[0] not NFC_YES */
- ) {
- fprintf(stderr,
- "error: true NFC starter canonical decomposition[%u] does not begin\n"
- " with a true NFC starter: U+%04lx U+%04lx%s\n",
- norm->lenNFD, (long)norm->nfd[0], (long)norm->nfd[1],
- norm->lenNFD<=2 ? "" : " ...");
- exit(U_INVALID_TABLE_FILE);
- }
- }
-
- if((norm->qcFlags&_NORM_QC_NFKC)==0) {
- if(norm->lenNFKD>0) {
- /* a "true" NFKC starter with a compatibility decomposition */
- if( norm->compatBothCCs>=0x100 || /* lead cc!=0 or */
- ((other=getNorm(norm->nfkd[0]))!=NULL && (other->qcFlags&_NORM_QC_NFKC)!=0) /* nfkd[0] not NFKC_YES */
- ) {
- fprintf(stderr,
- "error: true NFKC starter compatibility decomposition[%u] does not begin\n"
- " with a true NFKC starter: U+%04lx U+%04lx%s\n",
- norm->lenNFKD, (long)norm->nfkd[0], (long)norm->nfkd[1],
- norm->lenNFKD<=2 ? "" : " ...");
- exit(U_INVALID_TABLE_FILE);
- }
- } else if(norm->lenNFD>0) {
- /* a "true" NFKC starter with only a canonical decomposition */
- if( norm->canonBothCCs>=0x100 || /* lead cc!=0 or */
- ((other=getNorm(norm->nfd[0]))!=NULL && (other->qcFlags&_NORM_QC_NFKC)!=0) /* nfd[0] not NFKC_YES */
- ) {
- fprintf(stderr,
- "error: true NFKC starter canonical decomposition[%u] does not begin\n"
- " with a true NFKC starter: U+%04lx U+%04lx%s\n",
- norm->lenNFD, (long)norm->nfd[0], (long)norm->nfd[1],
- norm->lenNFD<=2 ? "" : " ...");
- exit(U_INVALID_TABLE_FILE);
- }
- }
- }
- }
-
- /* reset the 32-bit word and set the quick check flags */
- word=norm->qcFlags;
-
- /* set the UnicodeData combining class */
- word|=(uint32_t)norm->udataCC<<_NORM_CC_SHIFT;
-
- /* set the combining flag and index */
- if(norm->combiningFlags&3) {
- word|=(uint32_t)(norm->combiningFlags&3)<<6;
- }
-
- /* set the combining index value into the extra data */
- /* 0xffff: no combining index; 0..0x7fff: combining index */
- if(norm->combiningIndex!=0xffff) {
- extra[0]=norm->combiningIndex;
- beforeZero=1;
- }
-
- count=beforeZero;
-
- /* write the decompositions */
- if((norm->lenNFD|norm->lenNFKD)!=0) {
- extra[count++]=0; /* set the pieces when available, into extra[beforeZero] */
-
- length=norm->lenNFD;
- if(length>0) {
- if(norm->canonBothCCs!=0) {
- extra[beforeZero]|=0x80;
- extra[count++]=norm->canonBothCCs;
- }
- start=count;
- for(i=0; i<length; ++i) {
- UTF_APPEND_CHAR_UNSAFE(extra, count, norm->nfd[i]);
- }
- extra[beforeZero]|=(UChar)(count-start); /* set the decomp length as the number of UTF-16 code units */
- }
-
- length=norm->lenNFKD;
- if(length>0) {
- if(norm->compatBothCCs!=0) {
- extra[beforeZero]|=0x8000;
- extra[count++]=norm->compatBothCCs;
- }
- start=count;
- for(i=0; i<length; ++i) {
- UTF_APPEND_CHAR_UNSAFE(extra, count, norm->nfkd[i]);
- }
- extra[beforeZero]|=(UChar)((count-start)<<8); /* set the decomp length as the number of UTF-16 code units */
- }
- }
-
- /* allocate and copy the extra data */
- if(count!=0) {
- UChar *p;
-
- if(norm->specialTag!=0) {
- fprintf(stderr, "error: gennorm - illegal to have both extra data and a special tag (0x%x)\n", norm->specialTag);
- exit(U_ILLEGAL_ARGUMENT_ERROR);
- }
-
- p=(UChar *)utm_allocN(extraMem, count);
- uprv_memcpy(p, extra, count*2);
-
- /* set the extra index, offset by beforeZero */
- word|=(uint32_t)(beforeZero+(p-(UChar *)utm_getStart(extraMem)))<<_NORM_EXTRA_SHIFT;
- } else if(norm->specialTag!=0) {
- /* set a special tag instead of an extra index */
- word|=(uint32_t)norm->specialTag<<_NORM_EXTRA_SHIFT;
- }
-
- return word;
-}
-
-/* turn all Norm structs into corresponding 32-bit norm values */
-static void
-makeAll32() {
- uint32_t *pNormData;
- uint32_t n;
- int32_t i, normLength, count;
-
- count=(int32_t)utm_countItems(normMem);
- for(i=0; i<count; ++i) {
- norms[i].value32=make32BitNorm(norms+i);
- }
-
- pNormData=utrie_getData(norm32Trie, &normLength);
-
- count=0; /* count is now just used for debugging */
- for(i=0; i<normLength; ++i) {
- n=pNormData[i];
- if(0!=(pNormData[i]=norms[n].value32)) {
- ++count;
- }
- }
-}
-
-/*
- * extract all Norm.canonBothCCs into the FCD table
- * set 32-bit values to use the common fold and compact functions
- */
-static void
-makeFCD() {
- uint32_t *pFCDData;
- uint32_t n;
- int32_t i, count, fcdLength;
- uint16_t bothCCs;
-
- count=utm_countItems(normMem);
- for(i=0; i<count; ++i) {
- bothCCs=norms[i].canonBothCCs;
- if(bothCCs==0) {
- /* if there are no decomposition cc's then use the udataCC twice */
- bothCCs=norms[i].udataCC;
- bothCCs|=bothCCs<<8;
- }
- norms[i].value32=bothCCs;
- }
-
- pFCDData=utrie_getData(fcdTrie, &fcdLength);
-
- for(i=0; i<fcdLength; ++i) {
- n=pFCDData[i];
- pFCDData[i]=norms[n].value32;
- }
-}
-
-/**
- * If the given set contains exactly one character, then return it.
- * Otherwise return -1.
- */
-static int32_t
-usetContainsOne(const USet* set) {
- if(uset_getItemCount(set)==1) {
- /* there is a single item (a single range) */
- UChar32 start, end;
- UErrorCode ec=U_ZERO_ERROR;
- int32_t len=uset_getItem(set, 0, &start, &end, NULL, 0, &ec);
- if (len==0 && start==end) { /* a range (len==0) with a single code point */
- return start;
- }
- }
- return -1;
-}
-
-static void
-makeCanonSetFn(void *context, uint32_t code, Norm *norm) {
- if(norm->canonStart!=NULL && !uset_isEmpty(norm->canonStart)) {
- uint16_t *table;
- int32_t c, tableLength;
- UErrorCode errorCode=U_ZERO_ERROR;
-
- /* does the set contain exactly one code point? */
- c=usetContainsOne(norm->canonStart);
-
- /* add an entry to the BMP or supplementary search table */
- if(code<=0xffff) {
- table=canonStartSets+_NORM_MAX_CANON_SETS;
- tableLength=canonStartSets[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH];
-
- table[tableLength++]=(uint16_t)code;
-
- if(c>=0 && c<=0xffff && (c&_NORM_CANON_SET_BMP_MASK)!=_NORM_CANON_SET_BMP_IS_INDEX) {
- /* single-code point BMP result for BMP code point */
- table[tableLength++]=(uint16_t)c;
- } else {
- table[tableLength++]=(uint16_t)(_NORM_CANON_SET_BMP_IS_INDEX|canonStartSetsTop);
- c=-1;
- }
- canonStartSets[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH]=(uint16_t)tableLength;
- } else {
- table=canonStartSets+_NORM_MAX_CANON_SETS+_NORM_MAX_SET_SEARCH_TABLE_LENGTH;
- tableLength=canonStartSets[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH];
-
- table[tableLength++]=(uint16_t)(code>>16);
- table[tableLength++]=(uint16_t)code;
-
- if(c>=0) {
- /* single-code point result for supplementary code point */
- table[tableLength-2]|=(uint16_t)(0x8000|((c>>8)&0x1f00));
- table[tableLength++]=(uint16_t)c;
- } else {
- table[tableLength++]=(uint16_t)canonStartSetsTop;
- }
- canonStartSets[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH]=(uint16_t)tableLength;
- }
-
- if(c<0) {
- /* write a USerializedSet */
- ++canonSetsCount;
- canonStartSetsTop+=
- uset_serialize(norm->canonStart,
- canonStartSets+canonStartSetsTop,
- _NORM_MAX_CANON_SETS-canonStartSetsTop,
- &errorCode);
- }
- canonStartSets[_NORM_SET_INDEX_CANON_SETS_LENGTH]=(uint16_t)canonStartSetsTop;
-
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gennorm error: uset_serialize()->%s (canonStartSetsTop=%d)\n", u_errorName(errorCode), (int)canonStartSetsTop);
- exit(errorCode);
- }
- if(tableLength>_NORM_MAX_SET_SEARCH_TABLE_LENGTH) {
- fprintf(stderr, "gennorm error: search table for canonical starter sets too long\n");
- exit(U_INDEX_OUTOFBOUNDS_ERROR);
- }
- }
-}
-
-/* for getSkippableFlags ---------------------------------------------------- */
-
-/* combine the lead and trail code points; return <0 if they do not combine */
-static int32_t
-combine(uint32_t lead, uint32_t trail) {
- CombiningTriple *triples;
- uint32_t i, count;
-
- /* search for all triples with c as lead code point */
- triples=utm_getStart(combiningTriplesMem);
- count=utm_countItems(combiningTriplesMem);
-
- /* triples are not sorted by code point but for each lead CP there is one contiguous block */
- for(i=0; i<count && lead!=triples[i].lead; ++i) {}
-
- /* check each triple for this code point */
- for(; i<count && lead==triples[i].lead; ++i) {
- if(trail==triples[i].trail) {
- return (int32_t)triples[i].combined;
- }
- }
-
- return -1;
-}
-
-/*
- * Starting from the canonical decomposition s[0..length[ of a single code point,
- * is the code point c consumed in an NFC/FCC recomposition?
- *
- * No need to handle discontiguous composition because that would not consume some
- * intermediate character, so would not compose back to the original character.
- * See comments in canChangeWithFollowing().
- *
- * No need to compose beyond where c canonically orders because if it is consumed
- * then the result differs from the original anyway.
- *
- * Possible optimization:
- * - Verify that there are no cases of the same combining mark stacking twice.
- * - return FALSE right away if c inserts after a copy of itself
- * without attempting to recompose; will happen because each mark in
- * the decomposition will be enumerated and passed in as c.
- * More complicated and fragile though than it is already.
- *
- * markus 2002nov04
- */
-static UBool
-doesComposeConsume(const uint32_t *s, int32_t length, uint32_t c, uint8_t cc) {
- int32_t starter, i;
-
- /* ignore trailing characters where cc<prevCC */
- while(length>1 && cc<getCCFromCP(s[length-1])) {
- --length;
- }
-
- /* start consuming/combining from the beginning */
- starter=(int32_t)s[0];
- for(i=1; i<length; ++i) {
- starter=combine((uint32_t)starter, s[i]);
- if(starter<0) {
- fprintf(stderr, "error: unable to consume normal decomposition in doesComposeConsume(<%04x, %04x, ...>[%d], U+%04x, %u)\n",
- (int)s[0], (int)s[1], (int)length, (int)c, cc);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- }
-
- /* try to combine/consume c, return TRUE if it is consumed */
- return combine((uint32_t)starter, c)>=0;
-}
-
-/* does the starter s[0] combine forward with another char that is below trailCC? */
-static UBool
-canChangeWithFollowing(const uint32_t *s, int32_t length, uint8_t trailCC) {
- if(trailCC<=1) {
- /* no character will combine ahead of the trailing char of the decomposition */
- return FALSE;
- }
-
- /*
- * We are only checking skippable condition (f).
- * Therefore, the original character does not have quick check flag NFC_NO (c),
- * i.e., the decomposition recomposes completely back into the original code point.
- * So s[0] must be a true starter with cc==0 and
- * combining with following code points.
- *
- * Similarly, length==1 is not possible because that would be a singleton
- * decomposition which is marked with NFC_NO and does not pass (c).
- *
- * Only a character with cc<trailCC can change the composition.
- * Reason: A char with cc>=trailCC would order after decomposition s[],
- * composition would consume all of the decomposition, and here we know that
- * the original char passed check d), i.e., it does not combine forward,
- * therefore does not combine with anything after the decomposition is consumed.
- *
- * Now see if there is a character that
- * 1. combines backward
- * 2. has cc<trailCC
- * 3. is consumed in recomposition
- *
- * length==2 is simple:
- *
- * Characters that fulfill these conditions are exactly the ones that combine directly
- * with the starter c==s[0] because there is no intervening character after
- * reordering.
- * We can just enumerate all chars with which c combines (they all pass 1. and 3.)
- * and see if one has cc<trailCC (passes 2.).
- *
- * length>2 is a little harder:
- *
- * Since we will get different starters during recomposition, we need to
- * enumerate each backward-combining character (1.)
- * with cc<trailCC (2.) and
- * see if it gets consumed in recomposition. (3.)
- * No need to enumerate both-ways combining characters because they must have cc==0.
- */
- if(length==2) {
- /* enumerate all chars that combine with this one and check their cc */
- CombiningTriple *triples;
- uint32_t c, i, count;
- uint8_t cc;
-
- /* search for all triples with c as lead code point */
- triples=utm_getStart(combiningTriplesMem);
- count=utm_countItems(combiningTriplesMem);
- c=s[0];
-
- /* triples are not sorted by code point but for each lead CP there is one contiguous block */
- for(i=0; i<count && c!=triples[i].lead; ++i) {}
-
- /* check each triple for this code point */
- for(; i<count && c==triples[i].lead; ++i) {
- cc=getCCFromCP(triples[i].trail);
- if(cc>0 && cc<trailCC) {
- /* this trail code point combines with c and has cc<trailCC */
- return TRUE;
- }
- }
- } else {
- /* enumerate all chars that combine backward */
- uint32_t c2;
- uint16_t i;
- uint8_t cc;
-
- for(i=combineBothTop; i<combineBackTop; ++i) {
- c2=combiningCPs[i]&0xffffff;
- cc=getCCFromCP(c2);
- /* pass in length-1 because we already know that c2 will insert before the last character with trailCC */
- if(cc>0 && cc<trailCC && doesComposeConsume(s, length-1, c2, cc)) {
- return TRUE;
- }
- }
- }
-
- /* this decomposition is not modified by any appended character */
- return FALSE;
-}
-
-/* see unormimp.h for details on NF*C Skippable flags */
-static uint32_t
-getSkippableFlags(const Norm *norm) {
- /* ignore NF*D skippable properties because they are covered by norm32, test at runtime */
-
- /* ignore Hangul, test those at runtime (LV Hangul are not skippable) */
- if(norm->specialTag==_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_HANGUL) {
- return 0;
- }
-
- /* ### TODO check other data generation functions whether they should & do ignore Hangul/Jamo specials */
-
- /*
- * Note:
- * This function returns a non-zero flag only if (a)..(e) indicate skippable but (f) does not.
- *
- * This means that (a)..(e) must always be derived from the runtime norm32 value,
- * and (f) be checked from the auxTrie if the character is skippable per (a)..(e),
- * the form is NF*C and there is a canonical decomposition (NFD_NO).
- *
- * (a) unassigned code points get "not skippable"==false because they
- * don't have a Norm struct so they won't get here
- */
-
- /* (b) not skippable if cc!=0 */
- if(norm->udataCC!=0) {
- return 0; /* non-zero flag for (f) only */
- }
-
- /*
- * not NFC_Skippable if
- * (c) quick check flag == NO or
- * (d) combines forward or
- * (e) combines back or
- * (f) can change if another character is added
- *
- * for (f):
- * For NF*C: Get corresponding decomposition, get its last starter (cc==0),
- * check its composition list,
- * see if any of the second code points in the list
- * has cc less than the trailCC of the decomposition.
- *
- * For FCC: Test at runtime if the decomposition has a trailCC>1
- * -> there are characters with cc==1, they would order before the trail char
- * and prevent contiguous combination with the trail char.
- */
- if( (norm->qcFlags&(_NORM_QC_NFC&_NORM_QC_ANY_NO))!=0 ||
- (norm->combiningFlags&3)!=0) {
- return 0; /* non-zero flag for (f) only */
- }
- if(norm->lenNFD!=0 && canChangeWithFollowing(norm->nfd, norm->lenNFD, (uint8_t)norm->canonBothCCs)) {
- return _NORM_AUX_NFC_SKIP_F_MASK;
- }
-
- return 0; /* skippable */
-}
-
-static void
-makeAux() {
- Norm *norm;
- uint32_t *pData;
- int32_t i, length;
-
- pData=utrie_getData(auxTrie, &length);
-
- for(i=0; i<length; ++i) {
- norm=norms+pData[i];
- /*
- * 16-bit auxiliary normalization properties
- * see unormimp.h
- */
- pData[i]=
- ((uint32_t)(norm->combiningFlags&0x80)<<(_NORM_AUX_COMP_EX_SHIFT-7))|
- (uint32_t)norm->fncIndex;
-
- if(norm->unsafeStart || norm->udataCC!=0) {
- pData[i]|=_NORM_AUX_UNSAFE_MASK;
- }
-
- pData[i]|=getSkippableFlags(norm);
- }
-}
-
-/* folding value for normalization: just store the offset (16 bits) if there is any non-0 entry */
-static uint32_t U_CALLCONV
-getFoldedNormValue(UNewTrie *trie, UChar32 start, int32_t offset) {
- uint32_t value, leadNorm32=0;
- UChar32 limit;
- UBool inBlockZero;
-
- limit=start+0x400;
- while(start<limit) {
- value=utrie_get32(trie, start, &inBlockZero);
- if(inBlockZero) {
- start+=UTRIE_DATA_BLOCK_LENGTH;
- } else {
- if(value!=0) {
- leadNorm32|=value;
- }
- ++start;
- }
- }
-
- /* turn multi-bit fields into the worst-case value */
- if(leadNorm32&_NORM_CC_MASK) {
- leadNorm32|=_NORM_CC_MASK;
- }
-
- /* clean up unnecessarily ored bit fields */
- leadNorm32&=~((uint32_t)0xffffffff<<_NORM_EXTRA_SHIFT);
-
- if(leadNorm32==0) {
- /* nothing to do (only composition exclusions?) */
- return 0;
- }
-
- /* add the extra surrogate index, offset by the BMP top, for the new stage 1 location */
- leadNorm32|=(
- (uint32_t)_NORM_EXTRA_INDEX_TOP+
- (uint32_t)((offset-UTRIE_BMP_INDEX_LENGTH)>>UTRIE_SURROGATE_BLOCK_BITS)
- )<<_NORM_EXTRA_SHIFT;
-
- return leadNorm32;
-}
-
-/* folding value for FCD: use default function (just store the offset (16 bits) if there is any non-0 entry) */
-
-/*
- * folding value for auxiliary data:
- * store the non-zero offset in bits 9..0 (FNC bits)
- * if there is any non-0 entry;
- * "or" [verb!] together data bits 15..10 of all of the 1024 supplementary code points
- */
-static uint32_t U_CALLCONV
-getFoldedAuxValue(UNewTrie *trie, UChar32 start, int32_t offset) {
- uint32_t value, oredValues;
- UChar32 limit;
- UBool inBlockZero;
-
- oredValues=0;
- limit=start+0x400;
- while(start<limit) {
- value=utrie_get32(trie, start, &inBlockZero);
- if(inBlockZero) {
- start+=UTRIE_DATA_BLOCK_LENGTH;
- } else {
- oredValues|=value;
- ++start;
- }
- }
-
- if(oredValues!=0) {
- /* move the 10 significant offset bits into bits 9..0 */
- offset>>=UTRIE_SURROGATE_BLOCK_BITS;
- if(offset>_NORM_AUX_FNC_MASK) {
- fprintf(stderr, "gennorm error: folding offset too large (auxTrie)\n");
- exit(U_INDEX_OUTOFBOUNDS_ERROR);
- }
- return (uint32_t)offset|(oredValues&~_NORM_AUX_FNC_MASK);
- } else {
- return 0;
- }
-}
-
-extern void
-processData() {
-#if 0
- uint16_t i;
-#endif
-
- processCombining();
-
- /* canonically reorder decompositions and assign combining classes for decompositions */
- enumTrie(postParseFn, NULL);
-
-#if 0
- for(i=1; i<64; ++i) {
- if(combineAndQC[i]) {
- printf("combiningFlags==0x%02x qcFlags(NF?C)==0x%02x\n", (i&0xc)>>2, i&0x33);
- }
- }
-#endif
-
- /* add hangul/jamo specials */
- setHangulJamoSpecials();
-
- /* set this value; will be updated as makeCanonSetFn() adds sets (if there are any, see gStoreFlags) */
- canonStartSets[_NORM_SET_INDEX_CANON_SETS_LENGTH]=(uint16_t)canonStartSetsTop;
-
- /* store search tables and USerializedSets for canonical starters (after Hangul/Jamo specials!) */
- if(DO_STORE(UGENNORM_STORE_AUX) && DO_STORE(UGENNORM_STORE_COMPOSITION)) {
- enumTrie(makeCanonSetFn, NULL);
- }
-
- /* clone the normalization builder trie to make the final data tries */
- if( NULL==utrie_clone(norm32Trie, normTrie, NULL, 0) ||
- NULL==utrie_clone(fcdTrie, normTrie, NULL, 0) ||
- NULL==utrie_clone(auxTrie, normTrie, NULL, 0)
- ) {
- fprintf(stderr, "error: unable to clone the normalization trie\n");
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
-
- /* --- finalize data for quick checks & normalization --- */
-
- /* turn the Norm structs (stage2, norms) into 32-bit data words */
- makeAll32();
-
- /* --- finalize data for FCD checks --- */
-
- /* FCD data: take Norm.canonBothCCs and store them in the FCD table */
- makeFCD();
-
- /* --- finalize auxiliary normalization data --- */
- makeAux();
-
- if(beVerbose) {
-#if 0
- printf("number of stage 2 entries: %ld\n", stage2Mem->index);
- printf("size of stage 1 (BMP) & 2 (uncompacted) + extra data: %ld bytes\n", _NORM_STAGE_1_BMP_COUNT*2+stage2Mem->index*4+extraMem->index*2);
-#endif
- printf("combining CPs tops: fwd %u both %u back %u\n", combineFwdTop, combineBothTop, combineBackTop);
- printf("combining table count: %u\n", combiningTableTop);
- }
-}
-
-/* is this a norm32 with a special index for a lead surrogate? */
-static U_INLINE UBool
-isNorm32LeadSurrogate(uint32_t norm32) {
- return _NORM_MIN_SPECIAL<=norm32 && norm32<_NORM_SURROGATES_TOP;
-}
-
-/* normTrie: 32-bit trie result may contain a special extraData index with the folding offset */
-static int32_t U_CALLCONV
-getFoldingNormOffset(uint32_t norm32) {
- if(isNorm32LeadSurrogate(norm32)) {
- return
- UTRIE_BMP_INDEX_LENGTH+
- (((int32_t)norm32>>(_NORM_EXTRA_SHIFT-UTRIE_SURROGATE_BLOCK_BITS))&
- (0x3ff<<UTRIE_SURROGATE_BLOCK_BITS));
- } else {
- return 0;
- }
-}
-
-/* auxTrie: the folding offset is in bits 9..0 of the 16-bit trie result */
-static int32_t U_CALLCONV
-getFoldingAuxOffset(uint32_t data) {
- return (int32_t)(data&_NORM_AUX_FNC_MASK)<<UTRIE_SURROGATE_BLOCK_BITS;
-}
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-extern void
-generateData(const char *dataDir, UBool csource) {
- static uint8_t normTrieBlock[100000], fcdTrieBlock[100000], auxTrieBlock[100000];
-
- UNewDataMemory *pData;
- UErrorCode errorCode=U_ZERO_ERROR;
- int32_t size, dataLength;
-
-#if UCONFIG_NO_NORMALIZATION
-
- size=0;
-
-#else
-
- U_STRING_DECL(nxCJKCompatPattern, "[:Ideographic:]", 15);
- U_STRING_DECL(nxUnicode32Pattern, "[:^Age=3.2:]", 12);
- USet *set;
- int32_t normTrieSize, fcdTrieSize, auxTrieSize;
-
- normTrieSize=utrie_serialize(norm32Trie, normTrieBlock, sizeof(normTrieBlock), getFoldedNormValue, FALSE, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: utrie_serialize(normalization properties) failed, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
-
- if(DO_STORE(UGENNORM_STORE_FCD)) {
- fcdTrieSize=utrie_serialize(fcdTrie, fcdTrieBlock, sizeof(fcdTrieBlock), NULL, TRUE, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: utrie_serialize(FCD data) failed, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
- } else {
- fcdTrieSize=0;
- }
-
- if(DO_STORE(UGENNORM_STORE_AUX)) {
- auxTrieSize=utrie_serialize(auxTrie, auxTrieBlock, sizeof(auxTrieBlock), getFoldedAuxValue, TRUE, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: utrie_serialize(auxiliary data) failed, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
- } else {
- auxTrieSize=0;
- }
-
- /* move the parts of canonStartSets[] together into a contiguous block */
- if( canonStartSetsTop<_NORM_MAX_CANON_SETS &&
- canonStartSets[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH]!=0
- ) {
- uprv_memmove(canonStartSets+canonStartSetsTop,
- canonStartSets+_NORM_MAX_CANON_SETS,
- canonStartSets[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH]*2);
- }
- canonStartSetsTop+=canonStartSets[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH];
-
- if( canonStartSetsTop<(_NORM_MAX_CANON_SETS+_NORM_MAX_SET_SEARCH_TABLE_LENGTH) &&
- canonStartSets[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH]!=0
- ) {
- uprv_memmove(canonStartSets+canonStartSetsTop,
- canonStartSets+_NORM_MAX_CANON_SETS+_NORM_MAX_SET_SEARCH_TABLE_LENGTH,
- canonStartSets[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH]*2);
- }
- canonStartSetsTop+=canonStartSets[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH];
-
- /* create the normalization exclusion sets */
- /*
- * nxCJKCompatPattern should be [[:Ideographic:]&[:NFD_QC=No:]]
- * but we cannot use NFD_QC from the pattern because that would require
- * unorm.icu which we are just going to generate.
- * Therefore we have manually collected nfdQCNoSet and intersect Ideographic
- * with that.
- */
- U_STRING_INIT(nxCJKCompatPattern, "[:Ideographic:]", 15);
- U_STRING_INIT(nxUnicode32Pattern, "[:^Age=3.2:]", 12);
-
- canonStartSets[_NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET]=canonStartSetsTop;
- set=uset_openPattern(nxCJKCompatPattern, -1, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: uset_openPattern([:Ideographic:]&[:NFD_QC=No:]) failed, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
- uset_retainAll(set, nfdQCNoSet);
- if(DO_NOT_STORE(UGENNORM_STORE_EXCLUSIONS)) {
- uset_clear(set);
- }
- canonStartSetsTop+=uset_serialize(set, canonStartSets+canonStartSetsTop, LENGTHOF(canonStartSets)-canonStartSetsTop, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: uset_serialize([:Ideographic:]&[:NFD_QC=No:]) failed, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
- uset_close(set);
-
- canonStartSets[_NORM_SET_INDEX_NX_UNICODE32_OFFSET]=canonStartSetsTop;
- set=uset_openPattern(nxUnicode32Pattern, -1, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: uset_openPattern([:^Age=3.2:]) failed, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
- if(DO_NOT_STORE(UGENNORM_STORE_EXCLUSIONS)) {
- uset_clear(set);
- }
- canonStartSetsTop+=uset_serialize(set, canonStartSets+canonStartSetsTop, LENGTHOF(canonStartSets)-canonStartSetsTop, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: uset_serialize([:^Age=3.2:]) failed, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
- uset_close(set);
-
- canonStartSets[_NORM_SET_INDEX_NX_RESERVED_OFFSET]=canonStartSetsTop;
-
- /* make sure that the FCD trie is 4-aligned */
- if((utm_countItems(extraMem)+combiningTableTop)&1) {
- combiningTable[combiningTableTop++]=0x1234; /* add one 16-bit word for an even number */
- }
-
- /* pad canonStartSets to 4-alignment, too */
- if(canonStartSetsTop&1) {
- canonStartSets[canonStartSetsTop++]=0x1235;
- }
-
- size=
- _NORM_INDEX_TOP*4+
- normTrieSize+
- utm_countItems(extraMem)*2+
- combiningTableTop*2+
- fcdTrieSize+
- auxTrieSize+
- canonStartSetsTop*2;
-
- if(beVerbose) {
- printf("size of normalization trie %5u bytes\n", (int)normTrieSize);
- printf("size of 16-bit extra memory %5u UChars/uint16_t\n", (int)utm_countItems(extraMem));
- printf(" of that: FC_NFKC_Closure size %5u UChars/uint16_t\n", ((uint16_t *)utm_getStart(extraMem))[0]);
- printf("size of combining table %5u uint16_t\n", combiningTableTop);
- printf("size of FCD trie %5u bytes\n", (int)fcdTrieSize);
- printf("size of auxiliary trie %5u bytes\n", (int)auxTrieSize);
- printf("size of canonStartSets[] %5u uint16_t\n", (int)canonStartSetsTop);
- printf(" number of indexes %5u uint16_t\n", _NORM_SET_INDEX_TOP);
- printf(" size of sets %5u uint16_t\n", canonStartSets[_NORM_SET_INDEX_CANON_SETS_LENGTH]-_NORM_SET_INDEX_TOP);
- printf(" number of sets %5d\n", (int)canonSetsCount);
- printf(" size of BMP search table %5u uint16_t\n", canonStartSets[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH]);
- printf(" size of supplementary search table %5u uint16_t\n", canonStartSets[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH]);
- printf(" length of exclusion sets %5u uint16_t\n", canonStartSets[_NORM_SET_INDEX_NX_RESERVED_OFFSET]-canonStartSets[_NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET]);
- printf("size of " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE " contents: %ld bytes\n", (long)size);
- }
-
- indexes[_NORM_INDEX_TRIE_SIZE]=normTrieSize;
- indexes[_NORM_INDEX_UCHAR_COUNT]=(uint16_t)utm_countItems(extraMem);
-
- indexes[_NORM_INDEX_COMBINE_DATA_COUNT]=combiningTableTop;
- indexes[_NORM_INDEX_COMBINE_FWD_COUNT]=combineFwdTop;
- indexes[_NORM_INDEX_COMBINE_BOTH_COUNT]=(uint16_t)(combineBothTop-combineFwdTop);
- indexes[_NORM_INDEX_COMBINE_BACK_COUNT]=(uint16_t)(combineBackTop-combineBothTop);
-
- /* the quick check minimum code points are already set */
-
- indexes[_NORM_INDEX_FCD_TRIE_SIZE]=fcdTrieSize;
- indexes[_NORM_INDEX_AUX_TRIE_SIZE]=auxTrieSize;
- indexes[_NORM_INDEX_CANON_SET_COUNT]=canonStartSetsTop;
-
-#endif
-
- if(csource) {
-#if UCONFIG_NO_NORMALIZATION
- /* no csource for dummy mode..? */
- fprintf(stderr, "gennorm error: UCONFIG_NO_NORMALIZATION is on in csource mode.\n");
- exit(1);
-#else
- /* write .c file for hardcoded data */
- UTrie normRuntimeTrie={ NULL }, fcdRuntimeTrie={ NULL }, auxRuntimeTrie={ NULL };
- UTrie2 *normRuntimeTrie2, *fcdRuntimeTrie2=NULL, *auxRuntimeTrie2=NULL;
- FILE *f;
-
- utrie_unserialize(&normRuntimeTrie, normTrieBlock, normTrieSize, &errorCode);
- normRuntimeTrie.getFoldingOffset=getFoldingNormOffset;
- if(fcdTrieSize>0) {
- utrie_unserialize(&fcdRuntimeTrie, fcdTrieBlock, fcdTrieSize, &errorCode);
- }
- if(auxTrieSize>0) {
- utrie_unserialize(&auxRuntimeTrie, auxTrieBlock, auxTrieSize, &errorCode);
- auxRuntimeTrie.getFoldingOffset=getFoldingAuxOffset;
- }
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "gennorm error: failed to utrie_unserialize() one of the tries - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
-
- /* use UTrie2 */
- dataInfo.formatVersion[0]=3;
- dataInfo.formatVersion[2]=0;
- dataInfo.formatVersion[3]=0;
- normRuntimeTrie2=utrie2_fromUTrie(&normRuntimeTrie, 0, &errorCode);
- if(fcdTrieSize>0) {
- fcdRuntimeTrie2=utrie2_fromUTrie(&fcdRuntimeTrie, 0, &errorCode);
- }
- if(auxTrieSize>0) {
- auxRuntimeTrie2=utrie2_fromUTrie(&auxRuntimeTrie, 0, &errorCode);
- }
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "gennorm error: utrie2_fromUTrie() failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- if(auxTrieSize>0) {
- /* delete lead surrogate code unit values */
- UChar lead;
- auxRuntimeTrie2=utrie2_cloneAsThawed(auxRuntimeTrie2, &errorCode);
- for(lead=0xd800; lead<0xdc00; ++lead) {
- utrie2_set32ForLeadSurrogateCodeUnit(auxRuntimeTrie2, lead, auxRuntimeTrie2->initialValue, &errorCode);
- }
- utrie2_freeze(auxRuntimeTrie2, UTRIE2_16_VALUE_BITS, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "gennorm error: deleting lead surrogate code unit values failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- }
-
- f=usrc_create(dataDir, "unorm_props_data.c");
- if(f!=NULL) {
- usrc_writeArray(f,
- "static const UVersionInfo formatVersion={ ",
- dataInfo.formatVersion, 8, 4,
- " };\n\n");
- usrc_writeArray(f,
- "static const UVersionInfo dataVersion={ ",
- dataInfo.dataVersion, 8, 4,
- " };\n\n");
- usrc_writeArray(f,
- "static const int32_t indexes[_NORM_INDEX_TOP]={\n",
- indexes, 32, _NORM_INDEX_TOP,
- "\n};\n\n");
- usrc_writeUTrie2Arrays(f,
- "static const uint16_t normTrie_index[%ld]={\n",
- "static const uint32_t normTrie_data32[%ld]={\n",
- normRuntimeTrie2,
- "\n};\n\n");
- usrc_writeUTrie2Struct(f,
- "static const UTrie2 normTrie={\n",
- normRuntimeTrie2, "normTrie_index", "normTrie_data32",
- "};\n\n");
- usrc_writeArray(f,
- "static const uint16_t extraData[%ld]={\n",
- utm_getStart(extraMem), 16, utm_countItems(extraMem),
- "\n};\n\n");
- usrc_writeArray(f,
- "static const uint16_t combiningTable[%ld]={\n",
- combiningTable, 16, combiningTableTop,
- "\n};\n\n");
- if(fcdTrieSize>0) {
- usrc_writeUTrie2Arrays(f,
- "static const uint16_t fcdTrie_index[%ld]={\n", NULL,
- fcdRuntimeTrie2,
- "\n};\n\n");
- usrc_writeUTrie2Struct(f,
- "static const UTrie2 fcdTrie={\n",
- fcdRuntimeTrie2, "fcdTrie_index", NULL,
- "};\n\n");
- } else {
- fputs( "static const UTrie2 fcdTrie={ NULL };\n\n", f);
- }
- if(auxTrieSize>0) {
- usrc_writeUTrie2Arrays(f,
- "static const uint16_t auxTrie_index[%ld]={\n", NULL,
- auxRuntimeTrie2,
- "\n};\n\n");
- usrc_writeUTrie2Struct(f,
- "static const UTrie2 auxTrie={\n",
- auxRuntimeTrie2, "auxTrie_index", NULL,
- "};\n\n");
- } else {
- fputs( "static const UTrie2 auxTrie={ NULL };\n\n", f);
- }
- usrc_writeArray(f,
- "static const uint16_t canonStartSets[%ld]={\n",
- canonStartSets, 16, canonStartSetsTop,
- "\n};\n\n");
- fclose(f);
- }
- utrie2_close(normRuntimeTrie2);
- utrie2_close(fcdRuntimeTrie2);
- utrie2_close(auxRuntimeTrie2);
-#endif
- } else {
- /* write the data */
- pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
- haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gennorm: unable to create the output file, error %d\n", errorCode);
- exit(errorCode);
- }
-
-#if !UCONFIG_NO_NORMALIZATION
-
- udata_writeBlock(pData, indexes, sizeof(indexes));
- udata_writeBlock(pData, normTrieBlock, normTrieSize);
- udata_writeBlock(pData, utm_getStart(extraMem), utm_countItems(extraMem)*2);
- udata_writeBlock(pData, combiningTable, combiningTableTop*2);
- udata_writeBlock(pData, fcdTrieBlock, fcdTrieSize);
- udata_writeBlock(pData, auxTrieBlock, auxTrieSize);
- udata_writeBlock(pData, canonStartSets, canonStartSetsTop*2);
-
-#endif
-
- /* finish up */
- dataLength=udata_finish(pData, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "gennorm: error %d writing the output file\n", errorCode);
- exit(errorCode);
- }
-
- if(dataLength!=size) {
- fprintf(stderr, "gennorm error: data length %ld != calculated size %ld\n",
- (long)dataLength, (long)size);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- }
-}
-
-#if !UCONFIG_NO_NORMALIZATION
-
-extern void
-cleanUpData(void) {
- int32_t i, count;
-
- count=utm_countItems(normMem);
- for(i=0; i<count; ++i) {
- uset_close(norms[i].canonStart);
- }
-
- utm_close(normMem);
- utm_close(utf32Mem);
- utm_close(extraMem);
- utm_close(combiningTriplesMem);
- utrie_close(normTrie);
- utrie_close(norm32Trie);
- utrie_close(fcdTrie);
- utrie_close(auxTrie);
-
- uset_close(nfdQCNoSet);
-
- uprv_free(normTrie);
- uprv_free(norm32Trie);
- uprv_free(fcdTrie);
- uprv_free(auxTrie);
-}
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/genbidi/Makefile.in b/tools/gennorm2/Makefile.in
index 2f93e7d6..f46e4664 100644
--- a/tools/genbidi/Makefile.in
+++ b/tools/gennorm2/Makefile.in
@@ -1,7 +1,7 @@
-## Makefile.in for ICU - tools/genbidi
-## Copyright (c) 1999-2005, International Business Machines Corporation and
+## Makefile.in for ICU - tools/gennorm2
+## Copyright (c) 2009-2010, International Business Machines Corporation and
## others. All Rights Reserved.
-## Steven R. Loomis
+## Steven R. Loomis/Markus W. Scherer
## Source directory information
srcdir = @srcdir@
@@ -12,16 +12,12 @@ top_builddir = ../..
include $(top_builddir)/icudefs.mk
## Build directory information
-subdir = tools/genbidi
+subdir = tools/gennorm2
-TARGET_STUB_NAME = genbidi
-
-SECTION = 8
-
-#MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
+TARGET_STUB_NAME = gennorm2
## Extra files to remove for 'make clean'
-CLEANFILES = *~ $(DEPS) $(MAN_FILES)
+CLEANFILES = *~ $(DEPS)
## Target information
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
@@ -32,7 +28,7 @@ endif
CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
-OBJECTS = genbidi.o store.o
+OBJECTS = gennorm2.o n2builder.o
DEPS = $(OBJECTS:.o=.d)
@@ -51,14 +47,11 @@ distclean : distclean-local
dist: dist-local
check: all check-local
-all-local: $(TARGET) $(MAN_FILES)
-
-install-local: all-local install-man
-
-install-man: $(MAN_FILES)
-# $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
-# $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
+all-local: $(TARGET)
+install-local: all-local
+ $(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
+ $(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
dist-local:
@@ -80,11 +73,6 @@ $(TARGET) : $(OBJECTS)
$(POST_BUILD_STEP)
-%.$(SECTION): $(srcdir)/%.$(SECTION).in
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-
ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
@@ -92,4 +80,3 @@ ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif
-
diff --git a/tools/gennorm2/gennorm2.cpp b/tools/gennorm2/gennorm2.cpp
new file mode 100644
index 00000000..5e717f3b
--- /dev/null
+++ b/tools/gennorm2/gennorm2.cpp
@@ -0,0 +1,271 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: gennorm2.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov25
+* created by: Markus W. Scherer
+*
+* This program reads text files that define Unicode normalization,
+* parses them, and builds a binary data file.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/std_string.h" // U_HAVE_STD_STRING, #include <string>
+#include "n2builder.h" // UCONFIG_NO_NORMALIZATION=1 if !U_HAVE_STD_STRING
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "unicode/errorcode.h"
+#include "unicode/localpointer.h"
+#include "unicode/putil.h"
+#include "unicode/uchar.h"
+#include "unicode/unistr.h"
+#include "normalizer2impl.h"
+#include "toolutil.h"
+#include "uoptions.h"
+#include "uparse.h"
+
+#if UCONFIG_NO_NORMALIZATION
+#include "unewdata.h"
+#endif
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+U_NAMESPACE_BEGIN
+
+UBool beVerbose=FALSE, haveCopyright=TRUE;
+
+U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
+
+#if !UCONFIG_NO_NORMALIZATION
+void parseFile(FILE *f, Normalizer2DataBuilder &builder);
+#endif
+
+/* -------------------------------------------------------------------------- */
+
+enum {
+ HELP_H,
+ HELP_QUESTION_MARK,
+ VERBOSE,
+ COPYRIGHT,
+ SOURCEDIR,
+ OUTPUT_FILENAME,
+ UNICODE_VERSION,
+ OPT_FAST
+};
+
+static UOption options[]={
+ UOPTION_HELP_H,
+ UOPTION_HELP_QUESTION_MARK,
+ UOPTION_VERBOSE,
+ UOPTION_COPYRIGHT,
+ UOPTION_SOURCEDIR,
+ UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
+ UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
+ UOPTION_DEF("fast", '\1', UOPT_NO_ARG)
+};
+
+extern "C" int
+main(int argc, char* argv[]) {
+ U_MAIN_INIT_ARGS(argc, argv);
+
+ /* preset then read command line options */
+ options[SOURCEDIR].value="";
+ options[UNICODE_VERSION].value=U_UNICODE_VERSION;
+ argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), options);
+
+ /* error handling, printing usage message */
+ if(argc<0) {
+ fprintf(stderr,
+ "error in command line argument \"%s\"\n",
+ argv[-argc]);
+ }
+ if(!options[OUTPUT_FILENAME].doesOccur) {
+ argc=-1;
+ }
+ if( argc<2 ||
+ options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur
+ ) {
+ /*
+ * Broken into chunks because the C89 standard says the minimum
+ * required supported string length is 509 bytes.
+ */
+ fprintf(stderr,
+ "Usage: %s [-options] infiles+ -o outputfilename\n"
+ "\n"
+ "Reads the infiles with normalization data and\n"
+ "creates a binary file (outputfilename) with the data.\n"
+ "\n",
+ argv[0]);
+ fprintf(stderr,
+ "Options:\n"
+ "\t-h or -? or --help this usage text\n"
+ "\t-v or --verbose verbose output\n"
+ "\t-c or --copyright include a copyright notice\n"
+ "\t-u or --unicode Unicode version, followed by the version like 5.2.0\n");
+ fprintf(stderr,
+ "\t-s or --sourcedir source directory, followed by the path\n"
+ "\t-o or --output output filename\n");
+ fprintf(stderr,
+ "\t --fast optimize the .nrm file for fast normalization,\n"
+ "\t which might increase its size (Writes fully decomposed\n"
+ "\t regular mappings instead of delta mappings.\n"
+ "\t You should measure the runtime speed to make sure that\n"
+ "\t this is a good trade-off.)\n");
+ return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+ }
+
+ beVerbose=options[VERBOSE].doesOccur;
+ haveCopyright=options[COPYRIGHT].doesOccur;
+
+ IcuToolErrorCode errorCode("gennorm2/main()");
+
+#if UCONFIG_NO_NORMALIZATION
+
+ fprintf(stderr,
+ "gennorm2 writes a dummy binary data file "
+ "because UCONFIG_NO_NORMALIZATION is set, \n"
+ "see icu/source/common/unicode/uconfig.h\n");
+ udata_createDummy(NULL, NULL, options[OUTPUT_FILENAME].value, errorCode);
+ return U_UNSUPPORTED_ERROR;
+
+#else
+
+ LocalPointer<Normalizer2DataBuilder> builder(new Normalizer2DataBuilder(errorCode));
+ errorCode.assertSuccess();
+
+ builder->setUnicodeVersion(options[UNICODE_VERSION].value);
+
+ if(options[OPT_FAST].doesOccur) {
+ builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
+ }
+
+ // prepare the filename beginning with the source dir
+ U_STD_NSQ string filename(options[SOURCEDIR].value);
+ int32_t pathLength=filename.length();
+ if( pathLength>0 &&
+ filename[pathLength-1]!=U_FILE_SEP_CHAR &&
+ filename[pathLength-1]!=U_FILE_ALT_SEP_CHAR
+ ) {
+ filename.push_back(U_FILE_SEP_CHAR);
+ pathLength=filename.length();
+ }
+
+ for(int i=1; i<argc; ++i) {
+ printf("gennorm2: processing %s\n", argv[i]);
+ filename.append(argv[i]);
+ LocalStdioFilePointer f(fopen(filename.c_str(), "r"));
+ if(f==NULL) {
+ fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.c_str());
+ exit(U_FILE_ACCESS_ERROR);
+ }
+ builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS);
+ parseFile(f.getAlias(), *builder);
+ filename.erase(pathLength);
+ }
+
+ builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
+
+ return errorCode.get();
+
+#endif
+}
+
+#if !UCONFIG_NO_NORMALIZATION
+
+void parseFile(FILE *f, Normalizer2DataBuilder &builder) {
+ IcuToolErrorCode errorCode("gennorm2/parseFile()");
+ char line[300];
+ uint32_t startCP, endCP;
+ while(NULL!=fgets(line, (int)sizeof(line), f)) {
+ char *comment=(char *)strchr(line, '#');
+ if(comment!=NULL) {
+ *comment=0;
+ }
+ u_rtrim(line);
+ if(line[0]==0) {
+ continue; // skip empty and comment-only lines
+ }
+ if(line[0]=='*') {
+ continue; // reserved syntax
+ }
+ const char *delimiter;
+ int32_t rangeLength=
+ u_parseCodePointRangeAnyTerminator(line, &startCP, &endCP, &delimiter, errorCode);
+ if(errorCode.isFailure()) {
+ fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line);
+ exit(errorCode.reset());
+ }
+ delimiter=u_skipWhitespace(delimiter);
+ if(*delimiter==':') {
+ const char *s=u_skipWhitespace(delimiter+1);
+ char *end;
+ unsigned long value=strtoul(s, &end, 10);
+ if(end<=s || *u_skipWhitespace(end)!=0 || value>=0xff) {
+ fprintf(stderr, "gennorm2 error: parsing ccc from %s\n", line);
+ exit(U_PARSE_ERROR);
+ }
+ for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
+ builder.setCC(c, (uint8_t)value);
+ }
+ continue;
+ }
+ if(*delimiter=='-') {
+ if(*u_skipWhitespace(delimiter+1)!=0) {
+ fprintf(stderr, "gennorm2 error: parsing remove-mapping %s\n", line);
+ exit(U_PARSE_ERROR);
+ }
+ for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
+ builder.removeMapping(c);
+ }
+ continue;
+ }
+ if(*delimiter=='=' || *delimiter=='>') {
+ UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK];
+ int32_t length=u_parseString(delimiter+1, uchars, LENGTHOF(uchars), NULL, errorCode);
+ if(errorCode.isFailure()) {
+ fprintf(stderr, "gennorm2 error: parsing mapping string from %s\n", line);
+ exit(errorCode.reset());
+ }
+ UnicodeString mapping(FALSE, uchars, length);
+ if(*delimiter=='=') {
+ if(rangeLength!=1) {
+ fprintf(stderr,
+ "gennorm2 error: round-trip mapping for more than 1 code point on %s\n",
+ line);
+ exit(U_PARSE_ERROR);
+ }
+ builder.setRoundTripMapping((UChar32)startCP, mapping);
+ } else {
+ for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
+ builder.setOneWayMapping(c, mapping);
+ }
+ }
+ continue;
+ }
+ fprintf(stderr, "gennorm2 error: unrecognized data line %s\n", line);
+ exit(U_PARSE_ERROR);
+ }
+}
+
+#endif // !UCONFIG_NO_NORMALIZATION
+
+U_NAMESPACE_END
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/tools/gennames/gennames.vcproj b/tools/gennorm2/gennorm2.vcproj
index a47250af..f061a5ef 100644
--- a/tools/gennames/gennames.vcproj
+++ b/tools/gennorm2/gennorm2.vcproj
@@ -2,9 +2,11 @@
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
- Name="gennames"
- ProjectGUID="{F5281B04-A9E0-4680-BBA8-1D7F7D115458}"
- TargetFrameworkVersion="131072"
+ Name="gennorm2"
+ ProjectGUID="{C7891A65-80AB-4245-912E-5F1E17B0E6C4}"
+ RootNamespace="gennorm2"
+ Keyword="Win32Proj"
+ TargetFrameworkVersion="196613"
>
<Platforms>
<Platform
@@ -22,10 +24,8 @@
OutputDirectory=".\x86\Release"
IntermediateDirectory=".\x86\Release"
ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
@@ -43,22 +43,24 @@
/>
<Tool
Name="VCMIDLTool"
- TypeLibraryName=".\x86\Release/gennames.tlb"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\..\common;..\toolutil"
PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
+ Optimization="2"
+ EnableIntrinsicFunctions="true"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Release/gennames.pch"
- AssemblerListingLocation=".\x86\Release/"
- ObjectFile=".\x86\Release/"
- ProgramDataBaseFileName=".\x86\Release/"
+ PrecompiledHeaderFile=".\x86\Release\gennorm2.pch"
+ AssemblerListingLocation=".\x86\Release\"
+ ObjectFile=".\x86\Release\"
+ ProgramDataBaseFileName=".\x86\Release\"
WarningLevel="3"
+ DebugInformationFormat="3"
SuppressStartupBanner="true"
CompileAs="0"
/>
@@ -75,13 +77,17 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x86\Release/gennames.exe"
+ OutputFile=".\x86\Release\gennorm2.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x86\Release/gennames.pdb"
+ ProgramDatabaseFile=".\x86\Release\gennorm2.pdb"
+ GenerateDebugInformation="true"
SubSystem="1"
+ OptimizeReferences="2"
+ EnableCOMDATFolding="2"
+ TargetMachine="1"
RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
+ DataExecutionPrevention="1"
/>
<Tool
Name="VCALinkTool"
@@ -110,10 +116,7 @@
OutputDirectory=".\x86\Debug"
IntermediateDirectory=".\x86\Debug"
ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
+ CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
@@ -131,22 +134,24 @@
/>
<Tool
Name="VCMIDLTool"
- TypeLibraryName=".\x86\Debug/gennames.tlb"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\common;..\toolutil"
PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+ EnableIntrinsicFunctions="true"
+ MinimalRebuild="true"
BasicRuntimeChecks="3"
+ StringPooling="true"
RuntimeLibrary="3"
BufferSecurityCheck="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Debug/gennames.pch"
- AssemblerListingLocation=".\x86\Debug/"
- ObjectFile=".\x86\Debug/"
- ProgramDataBaseFileName=".\x86\Debug/"
+ PrecompiledHeaderFile=".\x86\Debug\gennorm2.pch"
+ AssemblerListingLocation=".\x86\Debug\"
+ ObjectFile=".\x86\Debug\"
+ ProgramDataBaseFileName=".\x86\Debug\"
BrowseInformation="1"
WarningLevel="3"
SuppressStartupBanner="true"
@@ -166,21 +171,21 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x86\Debug/gennames.exe"
+ OutputFile=".\x86\Debug\gennorm2.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x86\Debug/gennames.pdb"
+ ProgramDatabaseFile=".\x86\Debug\gennorm2.pdb"
SubSystem="1"
+ TargetMachine="1"
RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
+ DataExecutionPrevention="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
- UseFAT32Workaround="true"
/>
<Tool
Name="VCXDCMakeTool"
@@ -203,10 +208,8 @@
OutputDirectory=".\x64\Release"
IntermediateDirectory=".\x64\Release"
ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
@@ -224,23 +227,24 @@
/>
<Tool
Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Release/gennames.tlb"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\..\common;..\toolutil"
PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
+ Optimization="2"
+ EnableIntrinsicFunctions="true"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Release/gennames.pch"
- AssemblerListingLocation=".\x64\Release/"
- ObjectFile=".\x64\Release/"
- ProgramDataBaseFileName=".\x64\Release/"
+ PrecompiledHeaderFile=".\x64\Release\gennorm2.pch"
+ AssemblerListingLocation=".\x64\Release\"
+ ObjectFile=".\x64\Release\"
+ ProgramDataBaseFileName=".\x64\Release\"
WarningLevel="3"
+ DebugInformationFormat="3"
SuppressStartupBanner="true"
CompileAs="0"
/>
@@ -257,12 +261,17 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x64\Release/gennames.exe"
+ OutputFile=".\x64\Release\gennorm2.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x64\Release/gennames.pdb"
+ ProgramDatabaseFile=".\x64\Release\gennorm2.pdb"
+ GenerateDebugInformation="true"
SubSystem="1"
+ OptimizeReferences="2"
+ EnableCOMDATFolding="2"
TargetMachine="17"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="1"
/>
<Tool
Name="VCALinkTool"
@@ -283,9 +292,6 @@
Name="VCAppVerifierTool"
/>
<Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
@@ -294,10 +300,7 @@
OutputDirectory=".\x64\Debug"
IntermediateDirectory=".\x64\Debug"
ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
+ CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
@@ -315,27 +318,28 @@
/>
<Tool
Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Debug/gennames.tlb"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\common;..\toolutil"
PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+ EnableIntrinsicFunctions="true"
+ MinimalRebuild="true"
BasicRuntimeChecks="3"
+ StringPooling="true"
RuntimeLibrary="3"
BufferSecurityCheck="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Debug/gennames.pch"
- AssemblerListingLocation=".\x64\Debug/"
- ObjectFile=".\x64\Debug/"
- ProgramDataBaseFileName=".\x64\Debug/"
+ PrecompiledHeaderFile=".\x64\Debug\gennorm2.pch"
+ AssemblerListingLocation=".\x64\Debug\"
+ ObjectFile=".\x64\Debug\"
+ ProgramDataBaseFileName=".\x64\Debug\"
BrowseInformation="1"
WarningLevel="3"
SuppressStartupBanner="true"
- DebugInformationFormat="3"
+ DebugInformationFormat="4"
CompileAs="0"
/>
<Tool
@@ -351,20 +355,21 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x64\Debug/gennames.exe"
+ OutputFile=".\x64\Debug\gennorm2.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x64\Debug/gennames.pdb"
+ ProgramDatabaseFile=".\x64\Debug\gennorm2.pdb"
SubSystem="1"
TargetMachine="17"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
- UseFAT32Workaround="true"
/>
<Tool
Name="VCXDCMakeTool"
@@ -379,9 +384,6 @@
Name="VCAppVerifierTool"
/>
<Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
@@ -389,25 +391,18 @@
<References>
</References>
<Files>
- <Filter
- Name="Source Files"
- Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+ <File
+ RelativePath=".\gennorm2.cpp"
>
- <File
- RelativePath=".\gennames.c"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h;hpp;hxx;hm;inl"
+ </File>
+ <File
+ RelativePath=".\n2builder.cpp"
>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+ </File>
+ <File
+ RelativePath=".\n2builder.h"
>
- </Filter>
+ </File>
</Files>
<Globals>
</Globals>
diff --git a/tools/gennorm2/n2builder.cpp b/tools/gennorm2/n2builder.cpp
new file mode 100644
index 00000000..daf9e1cb
--- /dev/null
+++ b/tools/gennorm2/n2builder.cpp
@@ -0,0 +1,1094 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: n2builder.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov25
+* created by: Markus W. Scherer
+*
+* Builds Normalizer2 data and writes a binary .nrm file.
+* For the file format see source/common/normalizer2impl.h.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/std_string.h" // U_HAVE_STD_STRING, #include <string>
+#include "n2builder.h" // UCONFIG_NO_NORMALIZATION=1 if !U_HAVE_STD_STRING
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if U_HAVE_STD_STRING
+#include <vector>
+#endif
+#include "unicode/errorcode.h"
+#include "unicode/localpointer.h"
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+#include "hash.h"
+#include "normalizer2impl.h"
+#include "toolutil.h"
+#include "unewdata.h"
+#include "utrie2.h"
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/* UDataInfo cf. udata.h */
+static UDataInfo dataInfo={
+ sizeof(UDataInfo),
+ 0,
+
+ U_IS_BIG_ENDIAN,
+ U_CHARSET_FAMILY,
+ U_SIZEOF_UCHAR,
+ 0,
+
+ { 0x4e, 0x72, 0x6d, 0x32 }, /* dataFormat="Nrm2" */
+ { 1, 0, 0, 0 }, /* formatVersion */
+ { 5, 2, 0, 0 } /* dataVersion (Unicode version) */
+};
+
+U_NAMESPACE_BEGIN
+
+class HangulIterator {
+public:
+ struct Range {
+ UChar32 start, limit;
+ uint16_t norm16;
+ };
+
+ HangulIterator() : rangeIndex(0) {}
+ const Range *nextRange() {
+ if(rangeIndex<LENGTHOF(ranges)) {
+ return ranges+rangeIndex++;
+ } else {
+ return NULL;
+ }
+ }
+ void reset() { rangeIndex=0; }
+private:
+ static const Range ranges[4];
+ int32_t rangeIndex;
+};
+
+const HangulIterator::Range HangulIterator::ranges[4]={
+ { Hangul::JAMO_L_BASE, Hangul::JAMO_L_BASE+Hangul::JAMO_L_COUNT, 1 },
+ { Hangul::JAMO_V_BASE, Hangul::JAMO_V_BASE+Hangul::JAMO_V_COUNT, Normalizer2Impl::JAMO_VT },
+ // JAMO_T_BASE+1: not U+11A7
+ { Hangul::JAMO_T_BASE+1, Hangul::JAMO_T_BASE+Hangul::JAMO_T_COUNT, Normalizer2Impl::JAMO_VT },
+ { Hangul::HANGUL_BASE, Hangul::HANGUL_BASE+Hangul::HANGUL_COUNT, 0 }, // will become minYesNo
+};
+
+struct CompositionPair {
+ CompositionPair(UChar32 t, UChar32 c) : trail(t), composite(c) {}
+ UChar32 trail, composite;
+};
+
+struct Norm {
+ enum MappingType { NONE, REMOVED, ROUND_TRIP, ONE_WAY };
+
+ UBool hasMapping() const { return mappingType>REMOVED; }
+
+ // Requires hasMapping() and well-formed mapping.
+ void setMappingCP() {
+ UChar32 c;
+ if(!mapping->isEmpty() && mapping->length()==U16_LENGTH(c=mapping->char32At(0))) {
+ mappingCP=c;
+ } else {
+ mappingCP=U_SENTINEL;
+ }
+ }
+
+ UnicodeString *mapping;
+ UChar32 mappingCP; // >=0 if mapping to 1 code point
+ int32_t mappingPhase;
+ MappingType mappingType;
+
+ U_STD_NSQ vector<CompositionPair> *compositions;
+ uint8_t cc;
+ UBool combinesBack;
+ UBool hasNoCompBoundaryAfter;
+
+ enum OffsetType {
+ OFFSET_NONE, OFFSET_MAYBE_YES,
+ OFFSET_YES_YES, OFFSET_YES_NO, OFFSET_NO_NO,
+ OFFSET_DELTA
+ };
+ enum { OFFSET_SHIFT=4, OFFSET_MASK=(1<<OFFSET_SHIFT)-1 };
+ int32_t offset;
+};
+
+class Normalizer2DBEnumerator {
+public:
+ Normalizer2DBEnumerator(Normalizer2DataBuilder &b) : builder(b) {}
+ virtual ~Normalizer2DBEnumerator() {}
+ virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) = 0;
+ Normalizer2DBEnumerator *ptr() { return this; }
+protected:
+ Normalizer2DataBuilder &builder;
+};
+
+U_CDECL_BEGIN
+
+static UBool U_CALLCONV
+enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
+ return ((Normalizer2DBEnumerator *)context)->rangeHandler(start, end, value);
+}
+
+U_CDECL_END
+
+Normalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) :
+ phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL) {
+ memset(unicodeVersion, 0, sizeof(unicodeVersion));
+ normTrie=utrie2_open(0, 0, &errorCode);
+ normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm));
+ norms=allocNorm(); // unused Norm struct at index 0
+ memset(indexes, 0, sizeof(indexes));
+}
+
+Normalizer2DataBuilder::~Normalizer2DataBuilder() {
+ utrie2_close(normTrie);
+ int32_t normsLength=utm_countItems(normMem);
+ for(int32_t i=1; i<normsLength; ++i) {
+ delete norms[i].mapping;
+ delete norms[i].compositions;
+ }
+ utm_close(normMem);
+ utrie2_close(norm16Trie);
+}
+
+void
+Normalizer2DataBuilder::setUnicodeVersion(const char *v) {
+ u_versionFromString(unicodeVersion, v);
+}
+
+Norm *Normalizer2DataBuilder::allocNorm() {
+ Norm *p=(Norm *)utm_alloc(normMem);
+ norms=(Norm *)utm_getStart(normMem); // in case it got reallocated
+ return p;
+}
+
+/* get an existing Norm unit */
+Norm *Normalizer2DataBuilder::getNorm(UChar32 c) {
+ uint32_t i=utrie2_get32(normTrie, c);
+ if(i==0) {
+ return NULL;
+ }
+ return norms+i;
+}
+
+const Norm &Normalizer2DataBuilder::getNormRef(UChar32 c) const {
+ return norms[utrie2_get32(normTrie, c)];
+}
+
+/*
+ * get or create a Norm unit;
+ * get or create the intermediate trie entries for it as well
+ */
+Norm *Normalizer2DataBuilder::createNorm(UChar32 c) {
+ uint32_t i=utrie2_get32(normTrie, c);
+ if(i!=0) {
+ return norms+i;
+ } else {
+ /* allocate Norm */
+ Norm *p=allocNorm();
+ IcuToolErrorCode errorCode("gennorm2/createNorm()");
+ utrie2_set32(normTrie, c, (uint32_t)(p-norms), errorCode);
+ return p;
+ }
+}
+
+Norm *Normalizer2DataBuilder::checkNormForMapping(Norm *p, UChar32 c) {
+ if(p!=NULL) {
+ if(p->mappingType!=Norm::NONE) {
+ if( overrideHandling==OVERRIDE_NONE ||
+ (overrideHandling==OVERRIDE_PREVIOUS && p->mappingPhase==phase)
+ ) {
+ fprintf(stderr,
+ "error in gennorm2 phase %d: "
+ "not permitted to override mapping for U+%04lX from phase %d\n",
+ (int)phase, (long)c, (int)p->mappingPhase);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ delete p->mapping;
+ p->mapping=NULL;
+ }
+ p->mappingPhase=phase;
+ }
+ return p;
+}
+
+void Normalizer2DataBuilder::setOverrideHandling(OverrideHandling oh) {
+ overrideHandling=oh;
+ ++phase;
+}
+
+void Normalizer2DataBuilder::setCC(UChar32 c, uint8_t cc) {
+ createNorm(c)->cc=cc;
+}
+
+uint8_t Normalizer2DataBuilder::getCC(UChar32 c) const {
+ return getNormRef(c).cc;
+}
+
+static UBool isWellFormed(const UnicodeString &s) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ u_strToUTF8(NULL, 0, NULL, s.getBuffer(), s.length(), &errorCode);
+ return U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR;
+}
+
+void Normalizer2DataBuilder::setOneWayMapping(UChar32 c, const UnicodeString &m) {
+ if(!isWellFormed(m)) {
+ fprintf(stderr,
+ "error in gennorm2 phase %d: "
+ "illegal one-way mapping from U+%04lX to malformed string\n",
+ (int)phase, (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ Norm *p=checkNormForMapping(createNorm(c), c);
+ p->mapping=new UnicodeString(m);
+ p->mappingType=Norm::ONE_WAY;
+ p->setMappingCP();
+}
+
+void Normalizer2DataBuilder::setRoundTripMapping(UChar32 c, const UnicodeString &m) {
+ if(U_IS_SURROGATE(c)) {
+ fprintf(stderr,
+ "error in gennorm2 phase %d: "
+ "illegal round-trip mapping from surrogate code point U+%04lX\n",
+ (int)phase, (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ if(!isWellFormed(m)) {
+ fprintf(stderr,
+ "error in gennorm2 phase %d: "
+ "illegal round-trip mapping from U+%04lX to malformed string\n",
+ (int)phase, (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ int32_t numCP=u_countChar32(m.getBuffer(), m.length());
+ if(numCP!=2) {
+ fprintf(stderr,
+ "error in gennorm2 phase %d: "
+ "illegal round-trip mapping from U+%04lX to %d!=2 code points\n",
+ (int)phase, (long)c, (int)numCP);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ Norm *p=checkNormForMapping(createNorm(c), c);
+ p->mapping=new UnicodeString(m);
+ p->mappingType=Norm::ROUND_TRIP;
+ p->mappingCP=U_SENTINEL;
+}
+
+void Normalizer2DataBuilder::removeMapping(UChar32 c) {
+ Norm *p=checkNormForMapping(getNorm(c), c);
+ if(p!=NULL) {
+ p->mappingType=Norm::REMOVED;
+ }
+}
+
+class CompositionBuilder : public Normalizer2DBEnumerator {
+public:
+ CompositionBuilder(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b) {}
+ virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
+ builder.addComposition(start, end, value);
+ return TRUE;
+ }
+};
+
+void
+Normalizer2DataBuilder::addComposition(UChar32 start, UChar32 end, uint32_t value) {
+ if(norms[value].mappingType==Norm::ROUND_TRIP) {
+ if(start!=end) {
+ fprintf(stderr,
+ "gennorm2 error: same round-trip mapping for "
+ "more than 1 code point U+%04lX..U+%04lX\n",
+ (long)start, (long)end);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ if(norms[value].cc!=0) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX has a round-trip mapping and ccc!=0, "
+ "not possible in Unicode normalization\n",
+ (long)start);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ // setRoundTripMapping() ensured that there are exactly two code points.
+ const UnicodeString &m=*norms[value].mapping;
+ UChar32 lead=m.char32At(0);
+ UChar32 trail=m.char32At(m.length()-1);
+ if(getCC(lead)!=0) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX's round-trip mapping's starter U+%04lX has ccc!=0, "
+ "not possible in Unicode normalization\n",
+ (long)start, (long)lead);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ // Flag for trailing character.
+ createNorm(trail)->combinesBack=TRUE;
+ // Insert (trail, composite) pair into compositions list for the lead character.
+ CompositionPair pair(trail, start);
+ Norm *leadNorm=createNorm(lead);
+ U_STD_NSQ vector<CompositionPair> *compositions=leadNorm->compositions;
+ if(compositions==NULL) {
+ compositions=leadNorm->compositions=new U_STD_NSQ vector<CompositionPair>;
+ compositions->push_back(pair);
+ } else {
+ // Insertion sort, and check for duplicate trail characters.
+ U_STD_NSQ vector<CompositionPair>::iterator it;
+ for(it=compositions->begin(); it!=compositions->end(); ++it) {
+ if(trail==it->trail) {
+ fprintf(stderr,
+ "gennorm2 error: same round-trip mapping for "
+ "more than 1 code point (e.g., U+%04lX) to U+%04lX + U+%04lX\n",
+ (long)start, (long)lead, (long)trail);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ if(trail<it->trail) {
+ break;
+ }
+ }
+ compositions->insert(it, pair);
+ }
+ }
+}
+
+UBool Normalizer2DataBuilder::combinesWithCCBetween(const Norm &norm,
+ uint8_t lowCC, uint8_t highCC) const {
+ const U_STD_NSQ vector<CompositionPair> *compositions=norm.compositions;
+ if(compositions!=NULL && (highCC-lowCC)>=2) {
+ U_STD_NSQ vector<CompositionPair>::const_iterator it;
+ for(it=compositions->begin(); it!=compositions->end(); ++it) {
+ uint8_t trailCC=getCC(it->trail);
+ if(lowCC<trailCC && trailCC<highCC) {
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+UChar32 Normalizer2DataBuilder::combine(const Norm &norm, UChar32 trail) const {
+ const U_STD_NSQ vector<CompositionPair> *compositions=norm.compositions;
+ if(compositions!=NULL) {
+ U_STD_NSQ vector<CompositionPair>::const_iterator it;
+ for(it=compositions->begin(); it!=compositions->end(); ++it) {
+ if(trail==it->trail) {
+ return it->composite;
+ }
+ if(trail<it->trail) {
+ break;
+ }
+ }
+ }
+ return U_SENTINEL;
+}
+
+class Decomposer : public Normalizer2DBEnumerator {
+public:
+ Decomposer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b), didDecompose(FALSE) {}
+ virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
+ didDecompose|=builder.decompose(start, end, value);
+ return TRUE;
+ }
+ UBool didDecompose;
+};
+
+UBool
+Normalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) {
+ if(norms[value].hasMapping()) {
+ const UnicodeString &m=*norms[value].mapping;
+ UnicodeString *decomposed=NULL;
+ const UChar *s=m.getBuffer();
+ int32_t length=m.length();
+ int32_t prev, i=0;
+ UChar32 c;
+ while(i<length) {
+ prev=i;
+ U16_NEXT(s, i, length, c);
+ if(start<=c && c<=end) {
+ fprintf(stderr,
+ "gennorm2 error: U+%04lX maps to itself directly or indirectly\n",
+ (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ const Norm &cNorm=getNormRef(c);
+ if(cNorm.hasMapping()) {
+ if(norms[value].mappingType==Norm::ROUND_TRIP) {
+ if(prev==0) {
+ if(cNorm.mappingType!=Norm::ROUND_TRIP) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX's round-trip mapping's starter "
+ "U+%04lX one-way-decomposes, "
+ "not possible in Unicode normalization\n",
+ (long)start, (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ uint8_t myTrailCC=getCC(m.char32At(i));
+ UChar32 cTrailChar=cNorm.mapping->char32At(cNorm.mapping->length()-1);
+ uint8_t cTrailCC=getCC(cTrailChar);
+ if(cTrailCC>myTrailCC) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX's round-trip mapping's starter "
+ "U+%04lX decomposes and the "
+ "inner/earlier tccc=%hu > outer/following tccc=%hu, "
+ "not possible in Unicode normalization\n",
+ (long)start, (long)c,
+ (short)cTrailCC, (short)myTrailCC);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ } else {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX's round-trip mapping's non-starter "
+ "U+%04lX decomposes, "
+ "not possible in Unicode normalization\n",
+ (long)start, (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ }
+ if(decomposed==NULL) {
+ decomposed=new UnicodeString(m, 0, prev);
+ }
+ decomposed->append(*cNorm.mapping);
+ } else if(Hangul::isHangul(c)) {
+ UChar buffer[3];
+ int32_t hangulLength=Hangul::decompose(c, buffer);
+ if(norms[value].mappingType==Norm::ROUND_TRIP && prev!=0) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX's round-trip mapping's non-starter "
+ "U+%04lX decomposes, "
+ "not possible in Unicode normalization\n",
+ (long)start, (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ if(decomposed==NULL) {
+ decomposed=new UnicodeString(m, 0, prev);
+ }
+ decomposed->append(buffer, hangulLength);
+ } else if(decomposed!=NULL) {
+ decomposed->append(m, prev, i-prev);
+ }
+ }
+ if(decomposed!=NULL) {
+ delete norms[value].mapping;
+ norms[value].mapping=decomposed;
+ // Not norms[value].setMappingCP(); because the original mapping
+ // is most likely to be encodable as a delta.
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+class BuilderReorderingBuffer {
+public:
+ BuilderReorderingBuffer() : fLength(0), fLastStarterIndex(-1), fDidReorder(FALSE) {}
+ void reset() {
+ fLength=0;
+ fLastStarterIndex=-1;
+ fDidReorder=FALSE;
+ }
+ int32_t length() const { return fLength; }
+ UBool isEmpty() const { return fLength==0; }
+ int32_t lastStarterIndex() const { return fLastStarterIndex; }
+ UChar32 charAt(int32_t i) const { return fArray[i]>>8; }
+ uint8_t ccAt(int32_t i) const { return (uint8_t)fArray[i]; }
+ UBool didReorder() const { return fDidReorder; }
+ void append(UChar32 c, uint8_t cc) {
+ if(cc==0 || fLength==0 || ccAt(fLength-1)<=cc) {
+ if(cc==0) {
+ fLastStarterIndex=fLength;
+ }
+ fArray[fLength++]=(c<<8)|cc;
+ return;
+ }
+ // Let this character bubble back to its canonical order.
+ int32_t i=fLength-1;
+ while(i>fLastStarterIndex && ccAt(i)>cc) {
+ --i;
+ }
+ ++i; // after the last starter or prevCC<=cc
+ // Move this and the following characters forward one to make space.
+ for(int32_t j=fLength; i<j; --j) {
+ fArray[j]=fArray[j-1];
+ }
+ fArray[i]=(c<<8)|cc;
+ ++fLength;
+ fDidReorder=TRUE;
+ }
+ void toString(UnicodeString &dest) {
+ dest.remove();
+ for(int32_t i=0; i<fLength; ++i) {
+ dest.append(charAt(i));
+ }
+ }
+ void setComposite(UChar32 composite, int32_t combMarkIndex) {
+ fArray[fLastStarterIndex]=composite<<8;
+ // Remove the combining mark that contributed to the composite.
+ --fLength;
+ while(combMarkIndex<fLength) {
+ fArray[combMarkIndex]=fArray[combMarkIndex+1];
+ ++combMarkIndex;
+ }
+ }
+private:
+ int32_t fArray[Normalizer2Impl::MAPPING_LENGTH_MASK];
+ int32_t fLength;
+ int32_t fLastStarterIndex;
+ UBool fDidReorder;
+};
+
+void
+Normalizer2DataBuilder::reorder(Norm *p, BuilderReorderingBuffer &buffer) {
+ UnicodeString &m=*p->mapping;
+ int32_t length=m.length();
+ if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) {
+ return; // writeMapping() will complain about it and print the code point.
+ }
+ const UChar *s=m.getBuffer();
+ int32_t i=0;
+ UChar32 c;
+ while(i<length) {
+ U16_NEXT(s, i, length, c);
+ buffer.append(c, getCC(c));
+ }
+ if(buffer.didReorder()) {
+ buffer.toString(m);
+ }
+}
+
+UBool Normalizer2DataBuilder::hasNoCompBoundaryAfter(BuilderReorderingBuffer &buffer) {
+ if(buffer.isEmpty()) {
+ return TRUE; // maps-to-empty string is no boundary of any kind
+ }
+ int32_t lastStarterIndex=buffer.lastStarterIndex();
+ if(lastStarterIndex<0) {
+ return TRUE; // no starter
+ }
+ UChar32 starter=buffer.charAt(lastStarterIndex);
+ if( Hangul::isJamoL(starter) ||
+ (Hangul::isJamoV(starter) &&
+ 0<lastStarterIndex && Hangul::isJamoL(buffer.charAt(lastStarterIndex-1)))
+ ) {
+ // A Jamo leading consonant or an LV pair combines-forward if it is at the end,
+ // otherwise it is blocked.
+ return lastStarterIndex==buffer.length()-1;
+ }
+ // no Hangul in fully decomposed mapping
+ const Norm *starterNorm=&getNormRef(starter);
+ if(starterNorm->compositions==NULL) {
+ return FALSE; // the last starter does not combine forward
+ }
+ // Compose as far as possible, and see if further compositions are possible.
+ uint8_t prevCC=0;
+ for(int32_t combMarkIndex=lastStarterIndex+1; combMarkIndex<buffer.length();) {
+ uint8_t cc=buffer.ccAt(combMarkIndex); // !=0 because after last starter
+ if(combinesWithCCBetween(*starterNorm, prevCC, cc)) {
+ return TRUE;
+ }
+ if( prevCC<cc &&
+ (starter=combine(*starterNorm, buffer.charAt(combMarkIndex)))>=0
+ ) {
+ buffer.setComposite(starter, combMarkIndex);
+ starterNorm=&getNormRef(starter);
+ if(starterNorm->compositions==NULL) {
+ return FALSE; // the composite does not combine further
+ }
+ } else {
+ prevCC=cc;
+ ++combMarkIndex;
+ }
+ }
+ // TRUE if the final, forward-combining starter is at the end.
+ return prevCC==0;
+}
+
+// Requires p->hasMapping().
+void Normalizer2DataBuilder::writeMapping(UChar32 c, const Norm *p, UnicodeString &dataString) {
+ UnicodeString &m=*p->mapping;
+ int32_t length=m.length();
+ if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "mapping for U+%04lX longer than maximum of %d\n",
+ (long)c, Normalizer2Impl::MAPPING_LENGTH_MASK);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ int32_t leadCC, trailCC;
+ if(length==0) {
+ leadCC=trailCC=0;
+ } else {
+ leadCC=getCC(m.char32At(0));
+ trailCC=getCC(m.char32At(length-1));
+ }
+ if(c<Normalizer2Impl::MIN_CCC_LCCC_CP && (p->cc!=0 || leadCC!=0)) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX below U+0300 has ccc!=0 or lccc!=0, not supported by ICU\n",
+ (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ int32_t firstUnit=length|(trailCC<<8);
+ int32_t secondUnit=p->cc|(leadCC<<8);
+ if(secondUnit!=0) {
+ firstUnit|=Normalizer2Impl::MAPPING_HAS_CCC_LCCC_WORD;
+ }
+ if(p->compositions!=NULL) {
+ firstUnit|=Normalizer2Impl::MAPPING_PLUS_COMPOSITION_LIST;
+ }
+ if(p->hasNoCompBoundaryAfter) {
+ firstUnit|=Normalizer2Impl::MAPPING_NO_COMP_BOUNDARY_AFTER;
+ }
+ dataString.append((UChar)firstUnit);
+ if(secondUnit!=0) {
+ dataString.append((UChar)secondUnit);
+ }
+ dataString.append(m);
+}
+
+// Requires p->compositions!=NULL.
+void Normalizer2DataBuilder::writeCompositions(UChar32 c, const Norm *p, UnicodeString &dataString) {
+ if(p->cc!=0) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX combines-forward and has ccc!=0, not possible in Unicode normalization\n",
+ (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ int32_t length=p->compositions->size();
+ for(int32_t i=0; i<length; ++i) {
+ CompositionPair &pair=p->compositions->at(i);
+ // 22 bits for the composite character and whether it combines forward.
+ UChar32 compositeAndFwd=pair.composite<<1;
+ if(getNormRef(pair.composite).compositions!=NULL) {
+ compositeAndFwd|=1; // The composite character also combines-forward.
+ }
+ // Encode most pairs in two units and some in three.
+ int32_t firstUnit, secondUnit, thirdUnit;
+ if(pair.trail<Normalizer2Impl::COMP_1_TRAIL_LIMIT) {
+ if(compositeAndFwd<=0xffff) {
+ firstUnit=pair.trail<<1;
+ secondUnit=compositeAndFwd;
+ thirdUnit=-1;
+ } else {
+ firstUnit=(pair.trail<<1)|Normalizer2Impl::COMP_1_TRIPLE;
+ secondUnit=compositeAndFwd>>16;
+ thirdUnit=compositeAndFwd;
+ }
+ } else {
+ firstUnit=(Normalizer2Impl::COMP_1_TRAIL_LIMIT+
+ (pair.trail>>Normalizer2Impl::COMP_1_TRAIL_SHIFT))|
+ Normalizer2Impl::COMP_1_TRIPLE;
+ secondUnit=(pair.trail<<Normalizer2Impl::COMP_2_TRAIL_SHIFT)|
+ (compositeAndFwd>>16);
+ thirdUnit=compositeAndFwd;
+ }
+ // Set the high bit of the first unit if this is the last composition pair.
+ if(i==(length-1)) {
+ firstUnit|=Normalizer2Impl::COMP_1_LAST_TUPLE;
+ }
+ dataString.append((UChar)firstUnit).append((UChar)secondUnit);
+ if(thirdUnit>=0) {
+ dataString.append((UChar)thirdUnit);
+ }
+ }
+}
+
+class ExtraDataWriter : public Normalizer2DBEnumerator {
+public:
+ ExtraDataWriter(Normalizer2DataBuilder &b) :
+ Normalizer2DBEnumerator(b),
+ yesYesCompositions(1000, (UChar32)0xffff, 2), // 0=inert, 1=Jamo L, 2=start of compositions
+ yesNoData(1000, (UChar32)0, 1) {} // 0=Hangul, 1=start of normal data
+ virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
+ if(value!=0) {
+ if(start!=end) {
+ fprintf(stderr,
+ "gennorm2 error: unexpected shared data for "
+ "multiple code points U+%04lX..U+%04lX\n",
+ (long)start, (long)end);
+ exit(U_INTERNAL_PROGRAM_ERROR);
+ }
+ builder.writeExtraData(start, value, *this);
+ }
+ return TRUE;
+ }
+ UnicodeString maybeYesCompositions;
+ UnicodeString yesYesCompositions;
+ UnicodeString yesNoData;
+ UnicodeString noNoMappings;
+ Hashtable previousNoNoMappings; // If constructed in runtime code, pass in UErrorCode.
+};
+
+void Normalizer2DataBuilder::writeExtraData(UChar32 c, uint32_t value, ExtraDataWriter &writer) {
+ Norm *p=norms+value;
+ if(p->combinesBack) {
+ if(p->hasMapping()) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX combines-back and decomposes, not possible in Unicode normalization\n",
+ (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ if(p->compositions!=NULL) {
+ p->offset=
+ (writer.maybeYesCompositions.length()<<Norm::OFFSET_SHIFT)|
+ Norm::OFFSET_MAYBE_YES;
+ writeCompositions(c, p, writer.maybeYesCompositions);
+ }
+ } else if(!p->hasMapping()) {
+ if(p->compositions!=NULL) {
+ p->offset=
+ (writer.yesYesCompositions.length()<<Norm::OFFSET_SHIFT)|
+ Norm::OFFSET_YES_YES;
+ writeCompositions(c, p, writer.yesYesCompositions);
+ }
+ } else if(p->mappingType==Norm::ROUND_TRIP) {
+ p->offset=
+ (writer.yesNoData.length()<<Norm::OFFSET_SHIFT)|
+ Norm::OFFSET_YES_NO;
+ writeMapping(c, p, writer.yesNoData);
+ if(p->compositions!=NULL) {
+ writeCompositions(c, p, writer.yesNoData);
+ }
+ } else /* one-way */ {
+ if(p->compositions!=NULL) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "U+%04lX combines-forward and has a one-way mapping, "
+ "not possible in Unicode normalization\n",
+ (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ if(p->cc==0 && optimization!=OPTIMIZE_FAST) {
+ // Try a compact, algorithmic encoding.
+ // Only for ccc=0, because we can't store additional information.
+ if(p->mappingCP>=0) {
+ int32_t delta=p->mappingCP-c;
+ if(-Normalizer2Impl::MAX_DELTA<=delta && delta<=Normalizer2Impl::MAX_DELTA) {
+ p->offset=(delta<<Norm::OFFSET_SHIFT)|Norm::OFFSET_DELTA;
+ }
+ }
+ }
+ if(p->offset==0) {
+ int32_t oldNoNoLength=writer.noNoMappings.length();
+ writeMapping(c, p, writer.noNoMappings);
+ UnicodeString newMapping=writer.noNoMappings.tempSubString(oldNoNoLength);
+ int32_t previousOffset=writer.previousNoNoMappings.geti(newMapping);
+ if(previousOffset!=0) {
+ // Duplicate, remove the new units and point to the old ones.
+ writer.noNoMappings.truncate(oldNoNoLength);
+ p->offset=
+ ((previousOffset-1)<<Norm::OFFSET_SHIFT)|
+ Norm::OFFSET_NO_NO;
+ } else {
+ // Enter this new mapping into the hashtable, avoiding value 0 which is "not found".
+ IcuToolErrorCode errorCode("gennorm2/writeExtraData()/Hashtable.puti()");
+ writer.previousNoNoMappings.puti(newMapping, oldNoNoLength+1, errorCode);
+ p->offset=
+ (oldNoNoLength<<Norm::OFFSET_SHIFT)|
+ Norm::OFFSET_NO_NO;
+ }
+ }
+ }
+}
+
+class Norm16Writer : public Normalizer2DBEnumerator {
+public:
+ Norm16Writer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b) {}
+ virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
+ builder.writeNorm16(start, end, value);
+ return TRUE;
+ }
+};
+
+void Normalizer2DataBuilder::writeNorm16(UChar32 start, UChar32 end, uint32_t value) {
+ if(value!=0) {
+ const Norm *p=norms+value;
+ int32_t offset=p->offset>>Norm::OFFSET_SHIFT;
+ int32_t norm16=0;
+ UBool isDecompNo=FALSE;
+ UBool isCompNoMaybe=FALSE;
+ switch(p->offset&Norm::OFFSET_MASK) {
+ case Norm::OFFSET_NONE:
+ // No mapping, no compositions list.
+ if(p->combinesBack) {
+ norm16=Normalizer2Impl::MIN_NORMAL_MAYBE_YES+p->cc;
+ isDecompNo=(UBool)(p->cc!=0);
+ isCompNoMaybe=TRUE;
+ } else if(p->cc!=0) {
+ norm16=Normalizer2Impl::MIN_YES_YES_WITH_CC-1+p->cc;
+ isDecompNo=isCompNoMaybe=TRUE;
+ }
+ break;
+ case Norm::OFFSET_MAYBE_YES:
+ norm16=indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]+offset;
+ isCompNoMaybe=TRUE;
+ break;
+ case Norm::OFFSET_YES_YES:
+ norm16=offset;
+ break;
+ case Norm::OFFSET_YES_NO:
+ norm16=indexes[Normalizer2Impl::IX_MIN_YES_NO]+offset;
+ isDecompNo=TRUE;
+ break;
+ case Norm::OFFSET_NO_NO:
+ norm16=indexes[Normalizer2Impl::IX_MIN_NO_NO]+offset;
+ isDecompNo=isCompNoMaybe=TRUE;
+ break;
+ case Norm::OFFSET_DELTA:
+ norm16=getCenterNoNoDelta()+offset;
+ isDecompNo=isCompNoMaybe=TRUE;
+ break;
+ default: // Should not occur.
+ exit(U_INTERNAL_PROGRAM_ERROR);
+ }
+ IcuToolErrorCode errorCode("gennorm2/writeNorm16()");
+ utrie2_setRange32(norm16Trie, start, end, (uint32_t)norm16, TRUE, errorCode);
+ if(isDecompNo && start<indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]) {
+ indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=start;
+ }
+ if(isCompNoMaybe && start<indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]) {
+ indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=start;
+ }
+ }
+}
+
+void Normalizer2DataBuilder::setHangulData() {
+ HangulIterator hi;
+ const HangulIterator::Range *range;
+ // Check that none of the Hangul/Jamo code points have data.
+ while((range=hi.nextRange())!=NULL) {
+ for(UChar32 c=range->start; c<range->limit; ++c) {
+ if(utrie2_get32(norm16Trie, c)!=0) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "illegal mapping/composition/ccc data for Hangul or Jamo U+%04lX\n",
+ (long)c);
+ exit(U_INVALID_FORMAT_ERROR);
+ }
+ }
+ }
+ // Set data for algorithmic runtime handling.
+ IcuToolErrorCode errorCode("gennorm2/setHangulData()");
+ hi.reset();
+ while((range=hi.nextRange())!=NULL) {
+ uint16_t norm16=range->norm16;
+ if(norm16==0) {
+ norm16=(uint16_t)indexes[Normalizer2Impl::IX_MIN_YES_NO]; // Hangul LV/LVT encoded as minYesNo
+ if(range->start<indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]) {
+ indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=range->start;
+ }
+ } else {
+ if(range->start<indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]) { // Jamo V/T are maybeYes
+ indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=range->start;
+ }
+ }
+ utrie2_setRange32(norm16Trie, range->start, range->limit-1, norm16, TRUE, errorCode);
+ errorCode.assertSuccess();
+ }
+}
+
+U_CDECL_BEGIN
+
+static UBool U_CALLCONV
+enumRangeMaxValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) {
+ uint32_t *pMaxValue=(uint32_t *)context;
+ if(value>*pMaxValue) {
+ *pMaxValue=value;
+ }
+ return TRUE;
+}
+
+U_CDECL_END
+
+void Normalizer2DataBuilder::processData() {
+ IcuToolErrorCode errorCode("gennorm2/processData()");
+ norm16Trie=utrie2_open(0, 0, errorCode);
+ errorCode.assertSuccess();
+
+ utrie2_enum(normTrie, NULL, enumRangeHandler, CompositionBuilder(*this).ptr());
+
+ Decomposer decomposer(*this);
+ do {
+ decomposer.didDecompose=FALSE;
+ utrie2_enum(normTrie, NULL, enumRangeHandler, &decomposer);
+ } while(decomposer.didDecompose);
+
+ BuilderReorderingBuffer buffer;
+ int32_t normsLength=utm_countItems(normMem);
+ for(int32_t i=1; i<normsLength; ++i) {
+ if(norms[i].hasMapping()) {
+ buffer.reset();
+ reorder(norms+i, buffer);
+ norms[i].hasNoCompBoundaryAfter=hasNoCompBoundaryAfter(buffer);
+ }
+ }
+
+ indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=0x110000;
+ indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=0x110000;
+
+ ExtraDataWriter extraDataWriter(*this);
+ utrie2_enum(normTrie, NULL, enumRangeHandler, &extraDataWriter);
+
+ extraData=extraDataWriter.maybeYesCompositions;
+ extraData.append(extraDataWriter.yesYesCompositions).
+ append(extraDataWriter.yesNoData).
+ append(extraDataWriter.noNoMappings);
+ // Pad to even length for 4-byte alignment of following data.
+ if(extraData.length()&1) {
+ extraData.append((UChar)0);
+ }
+
+ indexes[Normalizer2Impl::IX_MIN_YES_NO]=
+ extraDataWriter.yesYesCompositions.length();
+ indexes[Normalizer2Impl::IX_MIN_NO_NO]=
+ indexes[Normalizer2Impl::IX_MIN_YES_NO]+
+ extraDataWriter.yesNoData.length();
+ indexes[Normalizer2Impl::IX_LIMIT_NO_NO]=
+ indexes[Normalizer2Impl::IX_MIN_NO_NO]+
+ extraDataWriter.noNoMappings.length();
+ indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]=
+ Normalizer2Impl::MIN_NORMAL_MAYBE_YES-
+ extraDataWriter.maybeYesCompositions.length();
+
+ int32_t minNoNoDelta=getCenterNoNoDelta()-Normalizer2Impl::MAX_DELTA;
+ if(indexes[Normalizer2Impl::IX_LIMIT_NO_NO]>minNoNoDelta) {
+ fprintf(stderr,
+ "gennorm2 error: "
+ "data structure overflow, too much mapping composition data\n");
+ exit(U_BUFFER_OVERFLOW_ERROR);
+ }
+
+ utrie2_enum(normTrie, NULL, enumRangeHandler, Norm16Writer(*this).ptr());
+
+ setHangulData();
+
+ // Look for the "worst" norm16 value of any supplementary code point
+ // corresponding to a lead surrogate, and set it as that surrogate's value.
+ // Enables quick check inner loops to look at only code units.
+ //
+ // We could be more sophisticated:
+ // We could collect a bit set for whether there are values in the different
+ // norm16 ranges (yesNo, maybeYes, yesYesWithCC etc.)
+ // and select the best value that only breaks the composition and/or decomposition
+ // inner loops if necessary.
+ // However, that seems like overkill for an optimization for supplementary characters.
+ for(UChar lead=0xd800; lead<0xdc00; ++lead) {
+ uint32_t maxValue=utrie2_get32(norm16Trie, lead);
+ utrie2_enumForLeadSurrogate(norm16Trie, lead, NULL, enumRangeMaxValue, &maxValue);
+ if( maxValue>=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO] &&
+ maxValue>(uint32_t)indexes[Normalizer2Impl::IX_MIN_NO_NO]
+ ) {
+ // Set noNo ("worst" value) if it got into "less-bad" maybeYes or ccc!=0.
+ // Otherwise it might end up at something like JAMO_VT which stays in
+ // the inner decomposition quick check loop.
+ maxValue=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]-1;
+ }
+ utrie2_set32ForLeadSurrogateCodeUnit(norm16Trie, lead, maxValue, errorCode);
+ }
+
+ // Adjust supplementary minimum code points to break quick check loops at their lead surrogates.
+ // For an empty data file, minCP=0x110000 turns into 0xdc00 (first trail surrogate)
+ // which is harmless.
+ // As a result, the minimum code points are always BMP code points.
+ int32_t minCP=indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP];
+ if(minCP>=0x10000) {
+ indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=U16_LEAD(minCP);
+ }
+ minCP=indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP];
+ if(minCP>=0x10000) {
+ indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=U16_LEAD(minCP);
+ }
+}
+
+void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
+ processData();
+
+ IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()");
+ utrie2_freeze(norm16Trie, UTRIE2_16_VALUE_BITS, errorCode);
+ int32_t norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode);
+ if(errorCode.get()!=U_BUFFER_OVERFLOW_ERROR) {
+ fprintf(stderr, "gennorm2 error: unable to freeze/serialize the normalization trie - %s\n",
+ errorCode.errorName());
+ exit(errorCode.reset());
+ }
+ errorCode.reset();
+ LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]);
+ utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode);
+ errorCode.assertSuccess();
+
+ int32_t offset=(int32_t)sizeof(indexes);
+ indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]=offset;
+ offset+=norm16TrieLength;
+ indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]=offset;
+ int32_t totalSize=offset+=extraData.length()*2;
+ for(int32_t i=Normalizer2Impl::IX_RESERVED2_OFFSET; i<=Normalizer2Impl::IX_TOTAL_SIZE; ++i) {
+ indexes[i]=totalSize;
+ }
+
+ if(beVerbose) {
+ printf("size of normalization trie: %5ld bytes\n", (long)norm16TrieLength);
+ printf("size of 16-bit extra data: %5ld uint16_t\n", (long)extraData.length());
+ printf("size of binary data file contents: %5ld bytes\n", (long)totalSize);
+ printf("minDecompNoCodePoint: U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]);
+ printf("minCompNoMaybeCodePoint: U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]);
+ printf("minYesNo: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_YES_NO]);
+ printf("minNoNo: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_NO_NO]);
+ printf("limitNoNo: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]);
+ printf("minMaybeYes: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]);
+ }
+
+ memcpy(dataInfo.dataVersion, unicodeVersion, 4);
+ UNewDataMemory *pData=
+ udata_create(NULL, NULL, filename, &dataInfo,
+ haveCopyright ? U_COPYRIGHT_STRING : NULL, errorCode);
+ if(errorCode.isFailure()) {
+ fprintf(stderr, "gennorm2 error: unable to create the output file %s - %s\n",
+ filename, errorCode.errorName());
+ exit(errorCode.reset());
+ }
+ udata_writeBlock(pData, indexes, sizeof(indexes));
+ udata_writeBlock(pData, norm16TrieBytes.getAlias(), norm16TrieLength);
+ udata_writeUString(pData, extraData.getBuffer(), extraData.length());
+
+ int32_t writtenSize=udata_finish(pData, errorCode);
+ if(errorCode.isFailure()) {
+ fprintf(stderr, "gennorm2: error %s writing the output file\n", errorCode.errorName());
+ exit(errorCode.reset());
+ }
+ if(writtenSize!=totalSize) {
+ fprintf(stderr, "gennorm2 error: written size %ld != calculated size %ld\n",
+ (long)writtenSize, (long)totalSize);
+ exit(U_INTERNAL_PROGRAM_ERROR);
+ }
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/gennorm2/n2builder.h b/tools/gennorm2/n2builder.h
new file mode 100644
index 00000000..a6eeaae5
--- /dev/null
+++ b/tools/gennorm2/n2builder.h
@@ -0,0 +1,129 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: n2builder.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov25
+* created by: Markus W. Scherer
+*/
+
+#ifndef __N2BUILDER_H__
+#define __N2BUILDER_H__
+
+#include "unicode/utypes.h"
+#include "unicode/std_string.h"
+
+#if !U_HAVE_STD_STRING
+// The gennorm2 implementation uses STL classes like string and vector.
+#undef UCONFIG_NO_NORMALIZATION
+#define UCONFIG_NO_NORMALIZATION 1
+#endif
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/errorcode.h"
+#include "unicode/unistr.h"
+#include "normalizer2impl.h" // for IX_COUNT
+#include "toolutil.h"
+#include "utrie2.h"
+
+U_NAMESPACE_BEGIN
+
+extern UBool beVerbose, haveCopyright;
+
+struct Norm;
+
+class BuilderReorderingBuffer;
+class ExtraDataWriter;
+
+class Normalizer2DataBuilder {
+public:
+ Normalizer2DataBuilder(UErrorCode &errorCode);
+ ~Normalizer2DataBuilder();
+
+ enum OverrideHandling {
+ OVERRIDE_NONE,
+ OVERRIDE_ANY,
+ OVERRIDE_PREVIOUS
+ };
+
+ void setOverrideHandling(OverrideHandling oh);
+
+ enum Optimization {
+ OPTIMIZE_NORMAL,
+ OPTIMIZE_FAST
+ };
+
+ void setOptimization(Optimization opt) { optimization=opt; }
+
+ void setCC(UChar32 c, uint8_t cc);
+ void setOneWayMapping(UChar32 c, const UnicodeString &m);
+ void setRoundTripMapping(UChar32 c, const UnicodeString &m);
+ void removeMapping(UChar32 c);
+
+ void setUnicodeVersion(const char *v);
+
+ void writeBinaryFile(const char *filename);
+
+private:
+ friend class CompositionBuilder;
+ friend class Decomposer;
+ friend class ExtraDataWriter;
+ friend class Norm16Writer;
+
+ // No copy constructor nor assignment operator.
+ Normalizer2DataBuilder(const Normalizer2DataBuilder &other);
+ Normalizer2DataBuilder &operator=(const Normalizer2DataBuilder &other);
+
+ Norm *allocNorm();
+ Norm *getNorm(UChar32 c);
+ Norm *createNorm(UChar32 c);
+ Norm *checkNormForMapping(Norm *p, UChar32 c); // check for permitted overrides
+
+ const Norm &getNormRef(UChar32 c) const;
+ uint8_t getCC(UChar32 c) const;
+ UBool combinesWithCCBetween(const Norm &norm, uint8_t lowCC, uint8_t highCC) const;
+ UChar32 combine(const Norm &norm, UChar32 trail) const;
+
+ void addComposition(UChar32 start, UChar32 end, uint32_t value);
+ UBool decompose(UChar32 start, UChar32 end, uint32_t value);
+ void reorder(Norm *p, BuilderReorderingBuffer &buffer);
+ UBool hasNoCompBoundaryAfter(BuilderReorderingBuffer &buffer);
+ void setHangulData();
+ void writeMapping(UChar32 c, const Norm *p, UnicodeString &dataString);
+ void writeCompositions(UChar32 c, const Norm *p, UnicodeString &dataString);
+ void writeExtraData(UChar32 c, uint32_t value, ExtraDataWriter &writer);
+ int32_t getCenterNoNoDelta() {
+ return indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]-Normalizer2Impl::MAX_DELTA-1;
+ }
+ void writeNorm16(UChar32 start, UChar32 end, uint32_t value);
+ void processData();
+
+ UTrie2 *normTrie;
+ UToolMemory *normMem;
+ Norm *norms;
+
+ int32_t phase;
+ OverrideHandling overrideHandling;
+
+ Optimization optimization;
+
+ int32_t indexes[Normalizer2Impl::IX_COUNT];
+ UTrie2 *norm16Trie;
+ UnicodeString extraData;
+
+ UVersionInfo unicodeVersion;
+};
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_NORMALIZATION
+
+#endif // __N2BUILDER_H__
diff --git a/tools/genpname/Makefile.in b/tools/genpname/Makefile.in
deleted file mode 100644
index 0a592a9f..00000000
--- a/tools/genpname/Makefile.in
+++ /dev/null
@@ -1,97 +0,0 @@
-## Makefile.in for ICU - tools/genpname
-## Copyright (c) 1999-2005, International Business Machines Corporation and
-## others. All Rights Reserved.
-## Steven R. Loomis
-
-## Source directory information
-srcdir = @srcdir@
-top_srcdir = @top_srcdir@
-
-top_builddir = ../..
-
-include $(top_builddir)/icudefs.mk
-
-## Build directory information
-subdir = tools/genpname
-
-TARGET_STUB_NAME = genpname
-
-SECTION = 8
-
-MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
-
-
-## Extra files to remove for 'make clean'
-CLEANFILES = *~ $(DEPS)
-
-## Target information
-TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
-
-ifneq ($(top_builddir),$(top_srcdir))
-CPPFLAGS += -I$(top_builddir)/common
-endif
-CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
-LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
-
-OBJECTS = genpname.o
-
-DEPS = $(OBJECTS:.o=.d)
-
-## List of phony targets
-.PHONY : all all-local install install-local clean clean-local \
-distclean distclean-local dist dist-local check check-local install-man
-
-## Clear suffix list
-.SUFFIXES :
-
-## List of standard targets
-all: all-local
-install: install-local
-clean: clean-local
-distclean : distclean-local
-dist: dist-local
-check: all check-local
-
-all-local: $(TARGET)
-
-install-local: all-local
-# $(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
-# $(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
-
-install-man: $(MAN_FILES)
-# $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
-# $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
-
-dist-local:
-
-clean-local:
- test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
- $(RMV) $(TARGET) $(OBJECTS)
-
-distclean-local: clean-local
- $(RMV) Makefile
-
-check-local: all-local
-
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-$(TARGET) : $(OBJECTS)
- $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
- $(POST_BUILD_STEP)
-
-
-%.$(SECTION): $(srcdir)/%.$(SECTION).in
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-
-ifeq (,$(MAKECMDGOALS))
--include $(DEPS)
-else
-ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
--include $(DEPS)
-endif
-endif
-
diff --git a/tools/genpname/SyntheticPropertyAliases.txt b/tools/genpname/SyntheticPropertyAliases.txt
deleted file mode 100644
index 77d71f23..00000000
--- a/tools/genpname/SyntheticPropertyAliases.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-######################################################################
-# Copyright (c) 2003-2005, International Business Machines
-# Corporation and others. All Rights Reserved.
-######################################################################
-# Author: Alan Liu
-# Created: February 20 2003
-# Since: ICU 2.6
-######################################################################
-
-# This file follows the format of PropertyAliases.txt
-# It contains synthetic property aliases not present
-# in the UCD. Unlike PropertyAliases.txt, it should
-# NOT contain a version number.
-# ================================================
-
-# ================================================
-# Non-enumerated Properties
-# ================================================
-
-# ================================================
-# Enumerated Non-Binary Properties
-# ================================================
-
-# lccc(c)=ccc(NFD(c)[0])
-# tccc(c)=ccc(NFD(c)[last])
-lccc; Lead_Canonical_Combining_Class
-tccc; Trail_Canonical_Combining_Class
-
-# ================================================
-# Bitmask Properties
-# ================================================
-gcm ; General_Category_Mask
-
-# ================================================
-# Binary Properties
-# ================================================
-Sensitive ; Case_Sensitive
-
-nfdinert; NFD_Inert
-nfkdinert; NFKD_Inert
-nfcinert; NFC_Inert
-nfkcinert; NFKC_Inert
-
-segstart; Segment_Starter
-
-# C/POSIX character classes that do not have Unicode property [value] aliases
-# see uchar.h
-n/a; alnum
-n/a; blank
-n/a; graph
-n/a; print
-n/a; xdigit
diff --git a/tools/genpname/SyntheticPropertyValueAliases.txt b/tools/genpname/SyntheticPropertyValueAliases.txt
deleted file mode 100644
index 63d455a2..00000000
--- a/tools/genpname/SyntheticPropertyValueAliases.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-########################################################################
-# Copyright (c) 2006-2008, International Business Machines
-# Corporation and others. All Rights Reserved.
-########################################################################
-# file name: SyntheticPropertyValueAliases.txt
-# encoding: US-ASCII
-# tab size: 8 (not used)
-# indentation: 4
-# created by: gensvpa.pl
-########################################################################
-
-# This file follows the format of PropertyValueAliases.txt
-# It contains synthetic property value aliases not present
-# in the UCD. Unlike PropertyValueAliases.txt, it should
-# NOT contain a version number.
-
-########################################################################
-# THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW
-# WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
-########################################################################
-
-# set the same names as short and long names to fit the syntax without
-# inventing names that we would have to support forever
-
-# Script (sc)
-
-sc ; Batk ; Batk
-sc ; Blis ; Blis
-sc ; Brah ; Brah
-sc ; Cirt ; Cirt
-sc ; Cyrs ; Cyrs
-sc ; Egyd ; Egyd
-sc ; Egyh ; Egyh
-sc ; Egyp ; Egyp
-sc ; Geok ; Geok
-sc ; Hans ; Hans
-sc ; Hant ; Hant
-sc ; Hmng ; Hmng
-sc ; Hung ; Hung
-sc ; Inds ; Inds
-sc ; Java ; Java
-sc ; Jpan ; Jpan
-sc ; Lana ; Lana
-sc ; Latf ; Latf
-sc ; Latg ; Latg
-sc ; Lina ; Lina
-sc ; Mand ; Mand
-sc ; Maya ; Maya
-sc ; Mero ; Mero
-sc ; Moon ; Moon
-sc ; Mtei ; Mtei
-sc ; Orkh ; Orkh
-sc ; Perm ; Perm
-sc ; Plrd ; Plrd
-sc ; Roro ; Roro
-sc ; Sara ; Sara
-sc ; Sgnw ; Sgnw
-sc ; Syre ; Syre
-sc ; Syrj ; Syrj
-sc ; Syrn ; Syrn
-sc ; Teng ; Teng
-sc ; Visp ; Visp
-sc ; Zxxx ; Zxxx
-
-sc ; Armi ; Armi
-sc ; Avst ; Avst
-sc ; Cakm ; Cakm
-sc ; Kore ; Kore
-sc ; Kthi ; Kthi
-sc ; Mani ; Mani
-sc ; Phli ; Phli
-sc ; Phlp ; Phlp
-sc ; Phlv ; Phlv
-sc ; Prti ; Prti
-sc ; Samr ; Samr
-sc ; Tavt ; Tavt
-sc ; Zmth ; Zmth
-sc ; Zsym ; Zsym
diff --git a/tools/genpname/data.h b/tools/genpname/data.h
deleted file mode 100644
index d727118b..00000000
--- a/tools/genpname/data.h
+++ /dev/null
@@ -1,2363 +0,0 @@
-/**
- * Copyright (C) 2002-2008, International Business Machines Corporation and
- * others. All Rights Reserved.
- *
- * MACHINE GENERATED FILE. !!! Do not edit manually !!!
- *
- * Generated from
- * uchar.h
- * uscript.h
- * Blocks.txt
- * PropertyAliases.txt
- * PropertyValueAliases.txt
- *
- * Date: Fri Feb 29 14:11:29 2008
- * Unicode version: 5.1.0
- * Script: preparse.pl
- */
-
-/* Unicode version 5.1.0 */
-const uint8_t VERSION_0 = 5;
-const uint8_t VERSION_1 = 1;
-const uint8_t VERSION_2 = 0;
-const uint8_t VERSION_3 = 0;
-
-const int32_t STRING_COUNT = 859;
-
-/* to be sorted */
-const AliasName STRING_TABLE[] = {
- AliasName("", 0),
- AliasName("A", 1),
- AliasName("AHex", 2),
- AliasName("AI", 3),
- AliasName("AL", 4),
- AliasName("ALetter", 5),
- AliasName("AN", 6),
- AliasName("AR", 7),
- AliasName("ASCII", 8),
- AliasName("ASCII_Hex_Digit", 9),
- AliasName("AT", 10),
- AliasName("ATAR", 11),
- AliasName("ATB", 12),
- AliasName("ATBL", 13),
- AliasName("ATerm", 14),
- AliasName("Above", 15),
- AliasName("Above_Left", 16),
- AliasName("Above_Right", 17),
- AliasName("Aegean_Numbers", 18),
- AliasName("Age", 19),
- AliasName("Ain", 20),
- AliasName("Alaph", 21),
- AliasName("Alef", 22),
- AliasName("Alpha", 23),
- AliasName("Alphabetic", 24),
- AliasName("Alphabetic_Presentation_Forms", 25),
- AliasName("Ambiguous", 26),
- AliasName("Ancient_Greek_Musical_Notation", 27),
- AliasName("Ancient_Greek_Numbers", 28),
- AliasName("Ancient_Symbols", 29),
- AliasName("Arab", 30),
- AliasName("Arabic", 31),
- AliasName("Arabic_Letter", 32),
- AliasName("Arabic_Number", 33),
- AliasName("Arabic_Presentation_Forms-A", 34),
- AliasName("Arabic_Presentation_Forms_A", 35),
- AliasName("Arabic_Presentation_Forms_B", 36),
- AliasName("Arabic_Supplement", 37),
- AliasName("Armenian", 38),
- AliasName("Armi", 39),
- AliasName("Armn", 40),
- AliasName("Arrows", 41),
- AliasName("Attached_Above_Right", 42),
- AliasName("Attached_Below", 43),
- AliasName("Attached_Below_Left", 44),
- AliasName("Avst", 45),
- AliasName("B", 46),
- AliasName("B2", 47),
- AliasName("BA", 48),
- AliasName("BB", 49),
- AliasName("BK", 50),
- AliasName("BL", 51),
- AliasName("BN", 52),
- AliasName("BR", 53),
- AliasName("Bali", 54),
- AliasName("Balinese", 55),
- AliasName("Basic_Latin", 56),
- AliasName("Batk", 57),
- AliasName("Beh", 58),
- AliasName("Below", 59),
- AliasName("Below_Left", 60),
- AliasName("Below_Right", 61),
- AliasName("Beng", 62),
- AliasName("Bengali", 63),
- AliasName("Beth", 64),
- AliasName("Bidi_C", 65),
- AliasName("Bidi_Class", 66),
- AliasName("Bidi_Control", 67),
- AliasName("Bidi_M", 68),
- AliasName("Bidi_Mirrored", 69),
- AliasName("Bidi_Mirroring_Glyph", 70),
- AliasName("Blis", 71),
- AliasName("Block", 72),
- AliasName("Block_Elements", 73),
- AliasName("Bopo", 74),
- AliasName("Bopomofo", 75),
- AliasName("Bopomofo_Extended", 76),
- AliasName("Boundary_Neutral", 77),
- AliasName("Box_Drawing", 78),
- AliasName("Brah", 79),
- AliasName("Brai", 80),
- AliasName("Braille", 81),
- AliasName("Braille_Patterns", 82),
- AliasName("Break_After", 83),
- AliasName("Break_Before", 84),
- AliasName("Break_Both", 85),
- AliasName("Break_Symbols", 86),
- AliasName("Bugi", 87),
- AliasName("Buginese", 88),
- AliasName("Buhd", 89),
- AliasName("Buhid", 90),
- AliasName("Burushaski_Yeh_Barree", 91),
- AliasName("Byzantine_Musical_Symbols", 92),
- AliasName("C", 93),
- AliasName("CB", 94),
- AliasName("CJK_Compatibility", 95),
- AliasName("CJK_Compatibility_Forms", 96),
- AliasName("CJK_Compatibility_Ideographs", 97),
- AliasName("CJK_Compatibility_Ideographs_Supplement", 98),
- AliasName("CJK_Radicals_Supplement", 99),
- AliasName("CJK_Strokes", 100),
- AliasName("CJK_Symbols_And_Punctuation", 101),
- AliasName("CJK_Unified_Ideographs", 102),
- AliasName("CJK_Unified_Ideographs_Extension_A", 103),
- AliasName("CJK_Unified_Ideographs_Extension_B", 104),
- AliasName("CL", 105),
- AliasName("CM", 106),
- AliasName("CN", 107),
- AliasName("CR", 108),
- AliasName("CS", 109),
- AliasName("Cakm", 110),
- AliasName("Can", 111),
- AliasName("Canadian_Aboriginal", 112),
- AliasName("Canadian_Syllabics", 113),
- AliasName("Canonical", 114),
- AliasName("Canonical_Combining_Class", 115),
- AliasName("Cans", 116),
- AliasName("Cari", 117),
- AliasName("Carian", 118),
- AliasName("Carriage_Return", 119),
- AliasName("Case_Folding", 120),
- AliasName("Case_Sensitive", 121),
- AliasName("Cased_Letter", 122),
- AliasName("Cc", 123),
- AliasName("Cf", 124),
- AliasName("Cham", 125),
- AliasName("Cher", 126),
- AliasName("Cherokee", 127),
- AliasName("Circle", 128),
- AliasName("Cirt", 129),
- AliasName("Close", 130),
- AliasName("Close_Punctuation", 131),
- AliasName("Cn", 132),
- AliasName("Co", 133),
- AliasName("Com", 134),
- AliasName("Combining_Diacritical_Marks", 135),
- AliasName("Combining_Diacritical_Marks_For_Symbols", 136),
- AliasName("Combining_Diacritical_Marks_Supplement", 137),
- AliasName("Combining_Half_Marks", 138),
- AliasName("Combining_Mark", 139),
- AliasName("Combining_Marks_For_Symbols", 140),
- AliasName("Common", 141),
- AliasName("Common_Separator", 142),
- AliasName("Comp_Ex", 143),
- AliasName("Compat", 144),
- AliasName("Complex_Context", 145),
- AliasName("Connector_Punctuation", 146),
- AliasName("Contingent_Break", 147),
- AliasName("Control", 148),
- AliasName("Control_Pictures", 149),
- AliasName("Copt", 150),
- AliasName("Coptic", 151),
- AliasName("Counting_Rod_Numerals", 152),
- AliasName("Cprt", 153),
- AliasName("Cs", 154),
- AliasName("Cuneiform", 155),
- AliasName("Cuneiform_Numbers_And_Punctuation", 156),
- AliasName("Currency_Symbol", 157),
- AliasName("Currency_Symbols", 158),
- AliasName("Cypriot", 159),
- AliasName("Cypriot_Syllabary", 160),
- AliasName("Cyrillic", 161),
- AliasName("Cyrillic_Extended_A", 162),
- AliasName("Cyrillic_Extended_B", 163),
- AliasName("Cyrillic_Supplement", 164),
- AliasName("Cyrillic_Supplementary", 165),
- AliasName("Cyrl", 166),
- AliasName("Cyrs", 167),
- AliasName("D", 168),
- AliasName("DA", 169),
- AliasName("DB", 170),
- AliasName("DI", 171),
- AliasName("Dal", 172),
- AliasName("Dalath_Rish", 173),
- AliasName("Dash", 174),
- AliasName("Dash_Punctuation", 175),
- AliasName("De", 176),
- AliasName("Decimal", 177),
- AliasName("Decimal_Number", 178),
- AliasName("Decomposition_Type", 179),
- AliasName("Default_Ignorable_Code_Point", 180),
- AliasName("Dep", 181),
- AliasName("Deprecated", 182),
- AliasName("Deseret", 183),
- AliasName("Deva", 184),
- AliasName("Devanagari", 185),
- AliasName("Di", 186),
- AliasName("Dia", 187),
- AliasName("Diacritic", 188),
- AliasName("Digit", 189),
- AliasName("Dingbats", 190),
- AliasName("Domino_Tiles", 191),
- AliasName("Double_Above", 192),
- AliasName("Double_Below", 193),
- AliasName("Dsrt", 194),
- AliasName("Dual_Joining", 195),
- AliasName("E", 196),
- AliasName("EN", 197),
- AliasName("ES", 198),
- AliasName("ET", 199),
- AliasName("EX", 200),
- AliasName("East_Asian_Width", 201),
- AliasName("Egyd", 202),
- AliasName("Egyh", 203),
- AliasName("Egyp", 204),
- AliasName("Enc", 205),
- AliasName("Enclosed_Alphanumerics", 206),
- AliasName("Enclosed_CJK_Letters_And_Months", 207),
- AliasName("Enclosing_Mark", 208),
- AliasName("Ethi", 209),
- AliasName("Ethiopic", 210),
- AliasName("Ethiopic_Extended", 211),
- AliasName("Ethiopic_Supplement", 212),
- AliasName("European_Number", 213),
- AliasName("European_Separator", 214),
- AliasName("European_Terminator", 215),
- AliasName("Exclamation", 216),
- AliasName("Ext", 217),
- AliasName("Extend", 218),
- AliasName("ExtendNumLet", 219),
- AliasName("Extender", 220),
- AliasName("F", 221),
- AliasName("FO", 222),
- AliasName("False", 223),
- AliasName("Fe", 224),
- AliasName("Feh", 225),
- AliasName("Fin", 226),
- AliasName("Final", 227),
- AliasName("Final_Punctuation", 228),
- AliasName("Final_Semkath", 229),
- AliasName("Font", 230),
- AliasName("Format", 231),
- AliasName("Fra", 232),
- AliasName("Fraction", 233),
- AliasName("Full_Composition_Exclusion", 234),
- AliasName("Fullwidth", 235),
- AliasName("GCB", 236),
- AliasName("GL", 237),
- AliasName("Gaf", 238),
- AliasName("Gamal", 239),
- AliasName("General_Category", 240),
- AliasName("General_Category_Mask", 241),
- AliasName("General_Punctuation", 242),
- AliasName("Geok", 243),
- AliasName("Geometric_Shapes", 244),
- AliasName("Geor", 245),
- AliasName("Georgian", 246),
- AliasName("Georgian_Supplement", 247),
- AliasName("Glag", 248),
- AliasName("Glagolitic", 249),
- AliasName("Glue", 250),
- AliasName("Goth", 251),
- AliasName("Gothic", 252),
- AliasName("Gr_Base", 253),
- AliasName("Gr_Ext", 254),
- AliasName("Gr_Link", 255),
- AliasName("Grapheme_Base", 256),
- AliasName("Grapheme_Cluster_Break", 257),
- AliasName("Grapheme_Extend", 258),
- AliasName("Grapheme_Link", 259),
- AliasName("Greek", 260),
- AliasName("Greek_And_Coptic", 261),
- AliasName("Greek_Extended", 262),
- AliasName("Grek", 263),
- AliasName("Gujarati", 264),
- AliasName("Gujr", 265),
- AliasName("Gurmukhi", 266),
- AliasName("Guru", 267),
- AliasName("H", 268),
- AliasName("H2", 269),
- AliasName("H3", 270),
- AliasName("HY", 271),
- AliasName("Hah", 272),
- AliasName("Halfwidth", 273),
- AliasName("Halfwidth_And_Fullwidth_Forms", 274),
- AliasName("Hamza_On_Heh_Goal", 275),
- AliasName("Han", 276),
- AliasName("Hang", 277),
- AliasName("Hangul", 278),
- AliasName("Hangul_Compatibility_Jamo", 279),
- AliasName("Hangul_Jamo", 280),
- AliasName("Hangul_Syllable_Type", 281),
- AliasName("Hangul_Syllables", 282),
- AliasName("Hani", 283),
- AliasName("Hano", 284),
- AliasName("Hans", 285),
- AliasName("Hant", 286),
- AliasName("Hanunoo", 287),
- AliasName("He", 288),
- AliasName("Hebr", 289),
- AliasName("Hebrew", 290),
- AliasName("Heh", 291),
- AliasName("Heh_Goal", 292),
- AliasName("Heth", 293),
- AliasName("Hex", 294),
- AliasName("Hex_Digit", 295),
- AliasName("High_Private_Use_Surrogates", 296),
- AliasName("High_Surrogates", 297),
- AliasName("Hira", 298),
- AliasName("Hiragana", 299),
- AliasName("Hmng", 300),
- AliasName("Hrkt", 301),
- AliasName("Hung", 302),
- AliasName("Hyphen", 303),
- AliasName("ID", 304),
- AliasName("IDC", 305),
- AliasName("IDS", 306),
- AliasName("IDSB", 307),
- AliasName("IDST", 308),
- AliasName("IDS_Binary_Operator", 309),
- AliasName("IDS_Trinary_Operator", 310),
- AliasName("ID_Continue", 311),
- AliasName("ID_Start", 312),
- AliasName("IN", 313),
- AliasName("IPA_Extensions", 314),
- AliasName("IS", 315),
- AliasName("ISO_Comment", 316),
- AliasName("Ideo", 317),
- AliasName("Ideographic", 318),
- AliasName("Ideographic_Description_Characters", 319),
- AliasName("Inds", 320),
- AliasName("Infix_Numeric", 321),
- AliasName("Inherited", 322),
- AliasName("Init", 323),
- AliasName("Initial", 324),
- AliasName("Initial_Punctuation", 325),
- AliasName("Inseparable", 326),
- AliasName("Inseperable", 327),
- AliasName("Iota_Subscript", 328),
- AliasName("Iso", 329),
- AliasName("Isolated", 330),
- AliasName("Ital", 331),
- AliasName("JL", 332),
- AliasName("JT", 333),
- AliasName("JV", 334),
- AliasName("Java", 335),
- AliasName("Join_C", 336),
- AliasName("Join_Causing", 337),
- AliasName("Join_Control", 338),
- AliasName("Joining_Group", 339),
- AliasName("Joining_Type", 340),
- AliasName("Jpan", 341),
- AliasName("KA", 342),
- AliasName("KV", 343),
- AliasName("Kaf", 344),
- AliasName("Kali", 345),
- AliasName("Kana", 346),
- AliasName("Kana_Voicing", 347),
- AliasName("Kanbun", 348),
- AliasName("Kangxi_Radicals", 349),
- AliasName("Kannada", 350),
- AliasName("Kaph", 351),
- AliasName("Katakana", 352),
- AliasName("Katakana_Or_Hiragana", 353),
- AliasName("Katakana_Phonetic_Extensions", 354),
- AliasName("Kayah_Li", 355),
- AliasName("Khaph", 356),
- AliasName("Khar", 357),
- AliasName("Kharoshthi", 358),
- AliasName("Khmer", 359),
- AliasName("Khmer_Symbols", 360),
- AliasName("Khmr", 361),
- AliasName("Knda", 362),
- AliasName("Knotted_Heh", 363),
- AliasName("Kore", 364),
- AliasName("Kthi", 365),
- AliasName("L", 366),
- AliasName("LC", 367),
- AliasName("LE", 368),
- AliasName("LF", 369),
- AliasName("LO", 370),
- AliasName("LOE", 371),
- AliasName("LRE", 372),
- AliasName("LRO", 373),
- AliasName("LV", 374),
- AliasName("LVT", 375),
- AliasName("LVT_Syllable", 376),
- AliasName("LV_Syllable", 377),
- AliasName("Lam", 378),
- AliasName("Lamadh", 379),
- AliasName("Lana", 380),
- AliasName("Lao", 381),
- AliasName("Laoo", 382),
- AliasName("Latf", 383),
- AliasName("Latg", 384),
- AliasName("Latin", 385),
- AliasName("Latin_1", 386),
- AliasName("Latin_1_Supplement", 387),
- AliasName("Latin_Extended_A", 388),
- AliasName("Latin_Extended_Additional", 389),
- AliasName("Latin_Extended_B", 390),
- AliasName("Latin_Extended_C", 391),
- AliasName("Latin_Extended_D", 392),
- AliasName("Latn", 393),
- AliasName("Lead_Canonical_Combining_Class", 394),
- AliasName("Leading_Jamo", 395),
- AliasName("Left", 396),
- AliasName("Left_Joining", 397),
- AliasName("Left_To_Right", 398),
- AliasName("Left_To_Right_Embedding", 399),
- AliasName("Left_To_Right_Override", 400),
- AliasName("Lepc", 401),
- AliasName("Lepcha", 402),
- AliasName("Letter", 403),
- AliasName("Letter_Number", 404),
- AliasName("Letterlike_Symbols", 405),
- AliasName("Limb", 406),
- AliasName("Limbu", 407),
- AliasName("Lina", 408),
- AliasName("Linb", 409),
- AliasName("Line_Break", 410),
- AliasName("Line_Feed", 411),
- AliasName("Line_Separator", 412),
- AliasName("Linear_B", 413),
- AliasName("Linear_B_Ideograms", 414),
- AliasName("Linear_B_Syllabary", 415),
- AliasName("Ll", 416),
- AliasName("Lm", 417),
- AliasName("Lo", 418),
- AliasName("Logical_Order_Exception", 419),
- AliasName("Low_Surrogates", 420),
- AliasName("Lower", 421),
- AliasName("Lowercase", 422),
- AliasName("Lowercase_Letter", 423),
- AliasName("Lowercase_Mapping", 424),
- AliasName("Lt", 425),
- AliasName("Lu", 426),
- AliasName("Lyci", 427),
- AliasName("Lycian", 428),
- AliasName("Lydi", 429),
- AliasName("Lydian", 430),
- AliasName("M", 431),
- AliasName("MB", 432),
- AliasName("ML", 433),
- AliasName("MN", 434),
- AliasName("Mahjong_Tiles", 435),
- AliasName("Malayalam", 436),
- AliasName("Mand", 437),
- AliasName("Mandatory_Break", 438),
- AliasName("Mani", 439),
- AliasName("Mark", 440),
- AliasName("Math", 441),
- AliasName("Math_Symbol", 442),
- AliasName("Mathematical_Alphanumeric_Symbols", 443),
- AliasName("Mathematical_Operators", 444),
- AliasName("Maya", 445),
- AliasName("Maybe", 446),
- AliasName("Mc", 447),
- AliasName("Me", 448),
- AliasName("Med", 449),
- AliasName("Medial", 450),
- AliasName("Meem", 451),
- AliasName("Mero", 452),
- AliasName("MidLetter", 453),
- AliasName("MidNum", 454),
- AliasName("MidNumLet", 455),
- AliasName("Mim", 456),
- AliasName("Miscellaneous_Mathematical_Symbols_A", 457),
- AliasName("Miscellaneous_Mathematical_Symbols_B", 458),
- AliasName("Miscellaneous_Symbols", 459),
- AliasName("Miscellaneous_Symbols_And_Arrows", 460),
- AliasName("Miscellaneous_Technical", 461),
- AliasName("Mlym", 462),
- AliasName("Mn", 463),
- AliasName("Modifier_Letter", 464),
- AliasName("Modifier_Symbol", 465),
- AliasName("Modifier_Tone_Letters", 466),
- AliasName("Mong", 467),
- AliasName("Mongolian", 468),
- AliasName("Moon", 469),
- AliasName("Mtei", 470),
- AliasName("Musical_Symbols", 471),
- AliasName("Myanmar", 472),
- AliasName("Mymr", 473),
- AliasName("N", 474),
- AliasName("NA", 475),
- AliasName("NChar", 476),
- AliasName("NFC_Inert", 477),
- AliasName("NFC_QC", 478),
- AliasName("NFC_Quick_Check", 479),
- AliasName("NFD_Inert", 480),
- AliasName("NFD_QC", 481),
- AliasName("NFD_Quick_Check", 482),
- AliasName("NFKC_Inert", 483),
- AliasName("NFKC_QC", 484),
- AliasName("NFKC_Quick_Check", 485),
- AliasName("NFKD_Inert", 486),
- AliasName("NFKD_QC", 487),
- AliasName("NFKD_Quick_Check", 488),
- AliasName("NK", 489),
- AliasName("NKo", 490),
- AliasName("NL", 491),
- AliasName("NR", 492),
- AliasName("NS", 493),
- AliasName("NSM", 494),
- AliasName("NU", 495),
- AliasName("Na", 496),
- AliasName("Name", 497),
- AliasName("Nar", 498),
- AliasName("Narrow", 499),
- AliasName("Nb", 500),
- AliasName("Nd", 501),
- AliasName("Neutral", 502),
- AliasName("New_Tai_Lue", 503),
- AliasName("Newline", 504),
- AliasName("Next_Line", 505),
- AliasName("Nko", 506),
- AliasName("Nkoo", 507),
- AliasName("Nl", 508),
- AliasName("No", 509),
- AliasName("No_Block", 510),
- AliasName("No_Joining_Group", 511),
- AliasName("Nobreak", 512),
- AliasName("Non_Joining", 513),
- AliasName("Noncharacter_Code_Point", 514),
- AliasName("None", 515),
- AliasName("Nonspacing_Mark", 516),
- AliasName("Nonstarter", 517),
- AliasName("Noon", 518),
- AliasName("Not_Applicable", 519),
- AliasName("Not_Reordered", 520),
- AliasName("Nu", 521),
- AliasName("Nukta", 522),
- AliasName("Number", 523),
- AliasName("Number_Forms", 524),
- AliasName("Numeric", 525),
- AliasName("Numeric_Type", 526),
- AliasName("Numeric_Value", 527),
- AliasName("Nun", 528),
- AliasName("OLetter", 529),
- AliasName("ON", 530),
- AliasName("OP", 531),
- AliasName("OV", 532),
- AliasName("Ogam", 533),
- AliasName("Ogham", 534),
- AliasName("Ol_Chiki", 535),
- AliasName("Olck", 536),
- AliasName("Old_Italic", 537),
- AliasName("Old_Persian", 538),
- AliasName("Open_Punctuation", 539),
- AliasName("Optical_Character_Recognition", 540),
- AliasName("Oriya", 541),
- AliasName("Orkh", 542),
- AliasName("Orya", 543),
- AliasName("Osma", 544),
- AliasName("Osmanya", 545),
- AliasName("Other", 546),
- AliasName("Other_Letter", 547),
- AliasName("Other_Neutral", 548),
- AliasName("Other_Number", 549),
- AliasName("Other_Punctuation", 550),
- AliasName("Other_Symbol", 551),
- AliasName("Overlay", 552),
- AliasName("P", 553),
- AliasName("PDF", 554),
- AliasName("PO", 555),
- AliasName("PP", 556),
- AliasName("PR", 557),
- AliasName("Paragraph_Separator", 558),
- AliasName("Pat_Syn", 559),
- AliasName("Pat_WS", 560),
- AliasName("Pattern_Syntax", 561),
- AliasName("Pattern_White_Space", 562),
- AliasName("Pc", 563),
- AliasName("Pd", 564),
- AliasName("Pe", 565),
- AliasName("Perm", 566),
- AliasName("Pf", 567),
- AliasName("Phag", 568),
- AliasName("Phags_Pa", 569),
- AliasName("Phaistos_Disc", 570),
- AliasName("Phli", 571),
- AliasName("Phlp", 572),
- AliasName("Phlv", 573),
- AliasName("Phnx", 574),
- AliasName("Phoenician", 575),
- AliasName("Phonetic_Extensions", 576),
- AliasName("Phonetic_Extensions_Supplement", 577),
- AliasName("Pi", 578),
- AliasName("Plrd", 579),
- AliasName("Po", 580),
- AliasName("Pop_Directional_Format", 581),
- AliasName("Postfix_Numeric", 582),
- AliasName("Prefix_Numeric", 583),
- AliasName("Prepend", 584),
- AliasName("Private_Use", 585),
- AliasName("Private_Use_Area", 586),
- AliasName("Prti", 587),
- AliasName("Ps", 588),
- AliasName("Punctuation", 589),
- AliasName("QMark", 590),
- AliasName("QU", 591),
- AliasName("Qaac", 592),
- AliasName("Qaai", 593),
- AliasName("Qaf", 594),
- AliasName("Qaph", 595),
- AliasName("Quotation", 596),
- AliasName("Quotation_Mark", 597),
- AliasName("R", 598),
- AliasName("RLE", 599),
- AliasName("RLO", 600),
- AliasName("Radical", 601),
- AliasName("Reh", 602),
- AliasName("Rejang", 603),
- AliasName("Reversed_Pe", 604),
- AliasName("Right", 605),
- AliasName("Right_Joining", 606),
- AliasName("Right_To_Left", 607),
- AliasName("Right_To_Left_Embedding", 608),
- AliasName("Right_To_Left_Override", 609),
- AliasName("Rjng", 610),
- AliasName("Roro", 611),
- AliasName("Runic", 612),
- AliasName("Runr", 613),
- AliasName("S", 614),
- AliasName("SA", 615),
- AliasName("SB", 616),
- AliasName("SC", 617),
- AliasName("SContinue", 618),
- AliasName("SD", 619),
- AliasName("SE", 620),
- AliasName("SG", 621),
- AliasName("SM", 622),
- AliasName("SP", 623),
- AliasName("ST", 624),
- AliasName("STerm", 625),
- AliasName("SY", 626),
- AliasName("Sad", 627),
- AliasName("Sadhe", 628),
- AliasName("Samr", 629),
- AliasName("Sara", 630),
- AliasName("Saur", 631),
- AliasName("Saurashtra", 632),
- AliasName("Sc", 633),
- AliasName("Script", 634),
- AliasName("Seen", 635),
- AliasName("Segment_Separator", 636),
- AliasName("Segment_Starter", 637),
- AliasName("Semkath", 638),
- AliasName("Sensitive", 639),
- AliasName("Sentence_Break", 640),
- AliasName("Sep", 641),
- AliasName("Separator", 642),
- AliasName("Sgnw", 643),
- AliasName("Shavian", 644),
- AliasName("Shaw", 645),
- AliasName("Shin", 646),
- AliasName("Simple_Case_Folding", 647),
- AliasName("Simple_Lowercase_Mapping", 648),
- AliasName("Simple_Titlecase_Mapping", 649),
- AliasName("Simple_Uppercase_Mapping", 650),
- AliasName("Sinh", 651),
- AliasName("Sinhala", 652),
- AliasName("Sk", 653),
- AliasName("Sm", 654),
- AliasName("Small", 655),
- AliasName("Small_Form_Variants", 656),
- AliasName("Sml", 657),
- AliasName("So", 658),
- AliasName("Soft_Dotted", 659),
- AliasName("Sp", 660),
- AliasName("Space", 661),
- AliasName("Space_Separator", 662),
- AliasName("SpacingMark", 663),
- AliasName("Spacing_Mark", 664),
- AliasName("Spacing_Modifier_Letters", 665),
- AliasName("Specials", 666),
- AliasName("Sqr", 667),
- AliasName("Square", 668),
- AliasName("Sub", 669),
- AliasName("Sund", 670),
- AliasName("Sundanese", 671),
- AliasName("Sup", 672),
- AliasName("Super", 673),
- AliasName("Superscripts_And_Subscripts", 674),
- AliasName("Supplemental_Arrows_A", 675),
- AliasName("Supplemental_Arrows_B", 676),
- AliasName("Supplemental_Mathematical_Operators", 677),
- AliasName("Supplemental_Punctuation", 678),
- AliasName("Supplementary_Private_Use_Area_A", 679),
- AliasName("Supplementary_Private_Use_Area_B", 680),
- AliasName("Surrogate", 681),
- AliasName("Swash_Kaf", 682),
- AliasName("Sylo", 683),
- AliasName("Syloti_Nagri", 684),
- AliasName("Symbol", 685),
- AliasName("Syrc", 686),
- AliasName("Syre", 687),
- AliasName("Syriac", 688),
- AliasName("Syriac_Waw", 689),
- AliasName("Syrj", 690),
- AliasName("Syrn", 691),
- AliasName("T", 692),
- AliasName("Tagalog", 693),
- AliasName("Tagb", 694),
- AliasName("Tagbanwa", 695),
- AliasName("Tags", 696),
- AliasName("Tah", 697),
- AliasName("Tai_Le", 698),
- AliasName("Tai_Xuan_Jing_Symbols", 699),
- AliasName("Tale", 700),
- AliasName("Talu", 701),
- AliasName("Tamil", 702),
- AliasName("Taml", 703),
- AliasName("Tavt", 704),
- AliasName("Taw", 705),
- AliasName("Teh_Marbuta", 706),
- AliasName("Telu", 707),
- AliasName("Telugu", 708),
- AliasName("Teng", 709),
- AliasName("Term", 710),
- AliasName("Terminal_Punctuation", 711),
- AliasName("Teth", 712),
- AliasName("Tfng", 713),
- AliasName("Tglg", 714),
- AliasName("Thaa", 715),
- AliasName("Thaana", 716),
- AliasName("Thai", 717),
- AliasName("Tibetan", 718),
- AliasName("Tibt", 719),
- AliasName("Tifinagh", 720),
- AliasName("Titlecase_Letter", 721),
- AliasName("Titlecase_Mapping", 722),
- AliasName("Trail_Canonical_Combining_Class", 723),
- AliasName("Trailing_Jamo", 724),
- AliasName("Transparent", 725),
- AliasName("True", 726),
- AliasName("U", 727),
- AliasName("UIdeo", 728),
- AliasName("UP", 729),
- AliasName("Ugar", 730),
- AliasName("Ugaritic", 731),
- AliasName("Unassigned", 732),
- AliasName("Unicode_1_Name", 733),
- AliasName("Unified_Canadian_Aboriginal_Syllabics", 734),
- AliasName("Unified_Ideograph", 735),
- AliasName("Unknown", 736),
- AliasName("Upper", 737),
- AliasName("Uppercase", 738),
- AliasName("Uppercase_Letter", 739),
- AliasName("Uppercase_Mapping", 740),
- AliasName("V", 741),
- AliasName("VR", 742),
- AliasName("VS", 743),
- AliasName("Vai", 744),
- AliasName("Vaii", 745),
- AliasName("Variation_Selector", 746),
- AliasName("Variation_Selectors", 747),
- AliasName("Variation_Selectors_Supplement", 748),
- AliasName("Vert", 749),
- AliasName("Vertical", 750),
- AliasName("Vertical_Forms", 751),
- AliasName("Virama", 752),
- AliasName("Visp", 753),
- AliasName("Vowel_Jamo", 754),
- AliasName("W", 755),
- AliasName("WB", 756),
- AliasName("WJ", 757),
- AliasName("WS", 758),
- AliasName("WSpace", 759),
- AliasName("Waw", 760),
- AliasName("White_Space", 761),
- AliasName("Wide", 762),
- AliasName("Word_Break", 763),
- AliasName("Word_Joiner", 764),
- AliasName("XIDC", 765),
- AliasName("XIDS", 766),
- AliasName("XID_Continue", 767),
- AliasName("XID_Start", 768),
- AliasName("XX", 769),
- AliasName("Xpeo", 770),
- AliasName("Xsux", 771),
- AliasName("Y", 772),
- AliasName("Yeh", 773),
- AliasName("Yeh_Barree", 774),
- AliasName("Yeh_With_Tail", 775),
- AliasName("Yes", 776),
- AliasName("Yi", 777),
- AliasName("Yi_Radicals", 778),
- AliasName("Yi_Syllables", 779),
- AliasName("Yiii", 780),
- AliasName("Yijing_Hexagram_Symbols", 781),
- AliasName("Yudh", 782),
- AliasName("Yudh_He", 783),
- AliasName("Z", 784),
- AliasName("ZW", 785),
- AliasName("ZWSpace", 786),
- AliasName("Zain", 787),
- AliasName("Zhain", 788),
- AliasName("Zl", 789),
- AliasName("Zmth", 790),
- AliasName("Zp", 791),
- AliasName("Zs", 792),
- AliasName("Zsym", 793),
- AliasName("Zxxx", 794),
- AliasName("Zyyy", 795),
- AliasName("Zzzz", 796),
- AliasName("age", 797),
- AliasName("alnum", 798),
- AliasName("bc", 799),
- AliasName("blank", 800),
- AliasName("blk", 801),
- AliasName("bmg", 802),
- AliasName("can", 803),
- AliasName("ccc", 804),
- AliasName("cf", 805),
- AliasName("cntrl", 806),
- AliasName("com", 807),
- AliasName("digit", 808),
- AliasName("dt", 809),
- AliasName("ea", 810),
- AliasName("enc", 811),
- AliasName("fin", 812),
- AliasName("font", 813),
- AliasName("fra", 814),
- AliasName("gc", 815),
- AliasName("gcm", 816),
- AliasName("graph", 817),
- AliasName("hst", 818),
- AliasName("init", 819),
- AliasName("isc", 820),
- AliasName("iso", 821),
- AliasName("jg", 822),
- AliasName("jt", 823),
- AliasName("lb", 824),
- AliasName("lc", 825),
- AliasName("lccc", 826),
- AliasName("med", 827),
- AliasName("na", 828),
- AliasName("na1", 829),
- AliasName("nar", 830),
- AliasName("nb", 831),
- AliasName("nfcinert", 832),
- AliasName("nfdinert", 833),
- AliasName("nfkcinert", 834),
- AliasName("nfkdinert", 835),
- AliasName("none", 836),
- AliasName("nt", 837),
- AliasName("nv", 838),
- AliasName("print", 839),
- AliasName("punct", 840),
- AliasName("sc", 841),
- AliasName("scf", 842),
- AliasName("segstart", 843),
- AliasName("sfc", 844),
- AliasName("slc", 845),
- AliasName("sml", 846),
- AliasName("space", 847),
- AliasName("sqr", 848),
- AliasName("stc", 849),
- AliasName("sub", 850),
- AliasName("suc", 851),
- AliasName("sup", 852),
- AliasName("tc", 853),
- AliasName("tccc", 854),
- AliasName("uc", 855),
- AliasName("vert", 856),
- AliasName("wide", 857),
- AliasName("xdigit", 858),
-};
-
-/* to be filled in */
-int32_t REMAP[859];
-
-const int32_t NAME_GROUP_COUNT = 1291;
-
-int32_t NAME_GROUP[] = {
- 107, -148, /* 0: "CN", "Control" */
- 108, -108, /* 2: "CR", "CR" */
- 200, -218, /* 4: "EX", "Extend" */
- 366, -366, /* 6: "L", "L" */
- 369, -369, /* 8: "LF", "LF" */
- 374, -374, /* 10: "LV", "LV" */
- 375, -375, /* 12: "LVT", "LVT" */
- 769, -546, /* 14: "XX", "Other" */
- 556, -584, /* 16: "PP", "Prepend" */
- 622, -663, /* 18: "SM", "SpacingMark" */
- 692, -692, /* 20: "T", "T" */
- 741, -741, /* 22: "V", "V" */
- 431, -446, /* 24: "M", "Maybe" */
- 474, -509, /* 26: "N", "No" */
- 772, -776, /* 28: "Y", "Yes" */
- 10, -14, /* 30: "AT", "ATerm" */
- 105, -130, /* 32: "CL", "Close" */
- 222, -231, /* 34: "FO", "Format" */
- 370, -421, /* 36: "LO", "Lower" */
- 495, -525, /* 38: "NU", "Numeric" */
- 368, -529, /* 40: "LE", "OLetter" */
- 617, -618, /* 42: "SC", "SContinue" */
- 620, -641, /* 44: "SE", "Sep" */
- 623, -660, /* 46: "SP", "Sp" */
- 624, -625, /* 48: "ST", "STerm" */
- 729, -737, /* 50: "UP", "Upper" */
- 368, -5, /* 52: "LE", "ALetter" */
- 218, -218, /* 54: "Extend", "Extend" */
- 200, -219, /* 56: "EX", "ExtendNumLet" */
- 342, -352, /* 58: "KA", "Katakana" */
- 433, -453, /* 60: "ML", "MidLetter" */
- 434, -454, /* 62: "MN", "MidNum" */
- 432, -455, /* 64: "MB", "MidNumLet" */
- 491, -504, /* 66: "NL", "Newline" */
- 23, -24, /* 68: "Alpha", "Alphabetic" */
- 2, -9, /* 70: "AHex", "ASCII_Hex_Digit" */
- 65, -67, /* 72: "Bidi_C", "Bidi_Control" */
- 68, -69, /* 74: "Bidi_M", "Bidi_Mirrored" */
- 639, -121, /* 76: "Sensitive", "Case_Sensitive" */
- 174, -174, /* 78: "Dash", "Dash" */
- 171, -180, /* 80: "DI", "Default_Ignorable_Code_Point" */
- 181, -182, /* 82: "Dep", "Deprecated" */
- 187, -188, /* 84: "Dia", "Diacritic" */
- 217, -220, /* 86: "Ext", "Extender" */
- 143, -234, /* 88: "Comp_Ex", "Full_Composition_Exclusion" */
- 253, -256, /* 90: "Gr_Base", "Grapheme_Base" */
- 254, -258, /* 92: "Gr_Ext", "Grapheme_Extend" */
- 255, -259, /* 94: "Gr_Link", "Grapheme_Link" */
- 294, -295, /* 96: "Hex", "Hex_Digit" */
- 303, -303, /* 98: "Hyphen", "Hyphen" */
- 317, -318, /* 100: "Ideo", "Ideographic" */
- 307, -309, /* 102: "IDSB", "IDS_Binary_Operator" */
- 308, -310, /* 104: "IDST", "IDS_Trinary_Operator" */
- 305, -311, /* 106: "IDC", "ID_Continue" */
- 306, -312, /* 108: "IDS", "ID_Start" */
- 336, -338, /* 110: "Join_C", "Join_Control" */
- 371, -419, /* 112: "LOE", "Logical_Order_Exception" */
- 421, -422, /* 114: "Lower", "Lowercase" */
- 441, -441, /* 116: "Math", "Math" */
- 832, -477, /* 118: "nfcinert", "NFC_Inert" */
- 833, -480, /* 120: "nfdinert", "NFD_Inert" */
- 834, -483, /* 122: "nfkcinert", "NFKC_Inert" */
- 835, -486, /* 124: "nfkdinert", "NFKD_Inert" */
- 476, -514, /* 126: "NChar", "Noncharacter_Code_Point" */
- 559, -561, /* 128: "Pat_Syn", "Pattern_Syntax" */
- 560, -562, /* 130: "Pat_WS", "Pattern_White_Space" */
- 0, -798, /* 132: "", "alnum" */
- 0, -800, /* 134: "", "blank" */
- 0, -817, /* 136: "", "graph" */
- 0, -839, /* 138: "", "print" */
- 0, -858, /* 140: "", "xdigit" */
- 590, -597, /* 142: "QMark", "Quotation_Mark" */
- 601, -601, /* 144: "Radical", "Radical" */
- 843, -637, /* 146: "segstart", "Segment_Starter" */
- 619, -659, /* 148: "SD", "Soft_Dotted" */
- 625, -625, /* 150: "STerm", "STerm" */
- 710, -711, /* 152: "Term", "Terminal_Punctuation" */
- 728, -735, /* 154: "UIdeo", "Unified_Ideograph" */
- 737, -738, /* 156: "Upper", "Uppercase" */
- 743, -746, /* 158: "VS", "Variation_Selector" */
- 759, 761, -847, /* 160: "WSpace", "White_Space", "space" */
- 765, -767, /* 163: "XIDC", "XID_Continue" */
- 766, -768, /* 165: "XIDS", "XID_Start" */
- 838, -527, /* 167: "nv", "Numeric_Value" */
- 799, -66, /* 169: "bc", "Bidi_Class" */
- 801, -72, /* 171: "blk", "Block" */
- 804, -115, /* 173: "ccc", "Canonical_Combining_Class" */
- 809, -179, /* 175: "dt", "Decomposition_Type" */
- 810, -201, /* 177: "ea", "East_Asian_Width" */
- 815, -240, /* 179: "gc", "General_Category" */
- 236, -257, /* 181: "GCB", "Grapheme_Cluster_Break" */
- 818, -281, /* 183: "hst", "Hangul_Syllable_Type" */
- 822, -339, /* 185: "jg", "Joining_Group" */
- 823, -340, /* 187: "jt", "Joining_Type" */
- 826, -394, /* 189: "lccc", "Lead_Canonical_Combining_Class" */
- 824, -410, /* 191: "lb", "Line_Break" */
- 478, -479, /* 193: "NFC_QC", "NFC_Quick_Check" */
- 481, -482, /* 195: "NFD_QC", "NFD_Quick_Check" */
- 484, -485, /* 197: "NFKC_QC", "NFKC_Quick_Check" */
- 487, -488, /* 199: "NFKD_QC", "NFKD_Quick_Check" */
- 837, -526, /* 201: "nt", "Numeric_Type" */
- 841, -634, /* 203: "sc", "Script" */
- 616, -640, /* 205: "SB", "Sentence_Break" */
- 854, -723, /* 207: "tccc", "Trail_Canonical_Combining_Class" */
- 756, -763, /* 209: "WB", "Word_Break" */
- 816, -241, /* 211: "gcm", "General_Category_Mask" */
- 797, -19, /* 213: "age", "Age" */
- 802, -70, /* 215: "bmg", "Bidi_Mirroring_Glyph" */
- 805, -120, /* 217: "cf", "Case_Folding" */
- 820, -316, /* 219: "isc", "ISO_Comment" */
- 825, -424, /* 221: "lc", "Lowercase_Mapping" */
- 828, -497, /* 223: "na", "Name" */
- 842, 647, -844, /* 225: "scf", "Simple_Case_Folding", "sfc" */
- 845, -648, /* 228: "slc", "Simple_Lowercase_Mapping" */
- 849, -649, /* 230: "stc", "Simple_Titlecase_Mapping" */
- 851, -650, /* 232: "suc", "Simple_Uppercase_Mapping" */
- 853, -722, /* 234: "tc", "Titlecase_Mapping" */
- 829, -733, /* 236: "na1", "Unicode_1_Name" */
- 855, -740, /* 238: "uc", "Uppercase_Mapping" */
- 6, -33, /* 240: "AN", "Arabic_Number" */
- 46, -558, /* 242: "B", "Paragraph_Separator" */
- 52, -77, /* 244: "BN", "Boundary_Neutral" */
- 109, -142, /* 246: "CS", "Common_Separator" */
- 494, -516, /* 248: "NSM", "Nonspacing_Mark" */
- 197, -213, /* 250: "EN", "European_Number" */
- 198, -214, /* 252: "ES", "European_Separator" */
- 199, -215, /* 254: "ET", "European_Terminator" */
- 366, -398, /* 256: "L", "Left_To_Right" */
- 372, -399, /* 258: "LRE", "Left_To_Right_Embedding" */
- 373, -400, /* 260: "LRO", "Left_To_Right_Override" */
- 530, -548, /* 262: "ON", "Other_Neutral" */
- 554, -581, /* 264: "PDF", "Pop_Directional_Format" */
- 598, -607, /* 266: "R", "Right_To_Left" */
- 4, -32, /* 268: "AL", "Arabic_Letter" */
- 599, -608, /* 270: "RLE", "Right_To_Left_Embedding" */
- 600, -609, /* 272: "RLO", "Right_To_Left_Override" */
- 614, -636, /* 274: "S", "Segment_Separator" */
- 758, -761, /* 276: "WS", "White_Space" */
- 474, 509, 221, -223, /* 278: "N", "No", "F", "False" */
- 772, 776, 692, -726, /* 282: "Y", "Yes", "T", "True" */
- 0, -18, /* 286: "", "Aegean_Numbers" */
- 0, -25, /* 288: "", "Alphabetic_Presentation_Forms" */
- 0, -27, /* 290: "", "Ancient_Greek_Musical_Notation" */
- 0, -28, /* 292: "", "Ancient_Greek_Numbers" */
- 0, -29, /* 294: "", "Ancient_Symbols" */
- 0, -31, /* 296: "", "Arabic" */
- 0, 35, -34, /* 298: "", "Arabic_Presentation_Forms_A", "Arabic_Presentation_Forms-A" */
- 0, -36, /* 301: "", "Arabic_Presentation_Forms_B" */
- 0, -37, /* 303: "", "Arabic_Supplement" */
- 0, -38, /* 305: "", "Armenian" */
- 0, -41, /* 307: "", "Arrows" */
- 0, -55, /* 309: "", "Balinese" */
- 0, 56, -8, /* 311: "", "Basic_Latin", "ASCII" */
- 0, -63, /* 314: "", "Bengali" */
- 0, -73, /* 316: "", "Block_Elements" */
- 0, -75, /* 318: "", "Bopomofo" */
- 0, -76, /* 320: "", "Bopomofo_Extended" */
- 0, -78, /* 322: "", "Box_Drawing" */
- 0, -82, /* 324: "", "Braille_Patterns" */
- 0, -88, /* 326: "", "Buginese" */
- 0, -90, /* 328: "", "Buhid" */
- 0, -92, /* 330: "", "Byzantine_Musical_Symbols" */
- 0, -118, /* 332: "", "Carian" */
- 0, -125, /* 334: "", "Cham" */
- 0, -127, /* 336: "", "Cherokee" */
- 0, -95, /* 338: "", "CJK_Compatibility" */
- 0, -96, /* 340: "", "CJK_Compatibility_Forms" */
- 0, -97, /* 342: "", "CJK_Compatibility_Ideographs" */
- 0, -98, /* 344: "", "CJK_Compatibility_Ideographs_Supplement" */
- 0, -99, /* 346: "", "CJK_Radicals_Supplement" */
- 0, -100, /* 348: "", "CJK_Strokes" */
- 0, -101, /* 350: "", "CJK_Symbols_And_Punctuation" */
- 0, -102, /* 352: "", "CJK_Unified_Ideographs" */
- 0, -103, /* 354: "", "CJK_Unified_Ideographs_Extension_A" */
- 0, -104, /* 356: "", "CJK_Unified_Ideographs_Extension_B" */
- 0, -135, /* 358: "", "Combining_Diacritical_Marks" */
- 0, -137, /* 360: "", "Combining_Diacritical_Marks_Supplement" */
- 0, -138, /* 362: "", "Combining_Half_Marks" */
- 0, 136, -140, /* 364: "", "Combining_Diacritical_Marks_For_Symbols", "Combining_Marks_For_Symbols" */
- 0, -149, /* 367: "", "Control_Pictures" */
- 0, -151, /* 369: "", "Coptic" */
- 0, -152, /* 371: "", "Counting_Rod_Numerals" */
- 0, -155, /* 373: "", "Cuneiform" */
- 0, -156, /* 375: "", "Cuneiform_Numbers_And_Punctuation" */
- 0, -158, /* 377: "", "Currency_Symbols" */
- 0, -160, /* 379: "", "Cypriot_Syllabary" */
- 0, -161, /* 381: "", "Cyrillic" */
- 0, -162, /* 383: "", "Cyrillic_Extended_A" */
- 0, -163, /* 385: "", "Cyrillic_Extended_B" */
- 0, 164, -165, /* 387: "", "Cyrillic_Supplement", "Cyrillic_Supplementary" */
- 0, -183, /* 390: "", "Deseret" */
- 0, -185, /* 392: "", "Devanagari" */
- 0, -190, /* 394: "", "Dingbats" */
- 0, -191, /* 396: "", "Domino_Tiles" */
- 0, -206, /* 398: "", "Enclosed_Alphanumerics" */
- 0, -207, /* 400: "", "Enclosed_CJK_Letters_And_Months" */
- 0, -210, /* 402: "", "Ethiopic" */
- 0, -211, /* 404: "", "Ethiopic_Extended" */
- 0, -212, /* 406: "", "Ethiopic_Supplement" */
- 0, -242, /* 408: "", "General_Punctuation" */
- 0, -244, /* 410: "", "Geometric_Shapes" */
- 0, -246, /* 412: "", "Georgian" */
- 0, -247, /* 414: "", "Georgian_Supplement" */
- 0, -249, /* 416: "", "Glagolitic" */
- 0, -252, /* 418: "", "Gothic" */
- 0, 261, -260, /* 420: "", "Greek_And_Coptic", "Greek" */
- 0, -262, /* 423: "", "Greek_Extended" */
- 0, -264, /* 425: "", "Gujarati" */
- 0, -266, /* 427: "", "Gurmukhi" */
- 0, -274, /* 429: "", "Halfwidth_And_Fullwidth_Forms" */
- 0, -279, /* 431: "", "Hangul_Compatibility_Jamo" */
- 0, -280, /* 433: "", "Hangul_Jamo" */
- 0, -282, /* 435: "", "Hangul_Syllables" */
- 0, -287, /* 437: "", "Hanunoo" */
- 0, -290, /* 439: "", "Hebrew" */
- 0, -296, /* 441: "", "High_Private_Use_Surrogates" */
- 0, -297, /* 443: "", "High_Surrogates" */
- 0, -299, /* 445: "", "Hiragana" */
- 0, -319, /* 447: "", "Ideographic_Description_Characters" */
- 0, -314, /* 449: "", "IPA_Extensions" */
- 0, -348, /* 451: "", "Kanbun" */
- 0, -349, /* 453: "", "Kangxi_Radicals" */
- 0, -350, /* 455: "", "Kannada" */
- 0, -352, /* 457: "", "Katakana" */
- 0, -354, /* 459: "", "Katakana_Phonetic_Extensions" */
- 0, -355, /* 461: "", "Kayah_Li" */
- 0, -358, /* 463: "", "Kharoshthi" */
- 0, -359, /* 465: "", "Khmer" */
- 0, -360, /* 467: "", "Khmer_Symbols" */
- 0, -381, /* 469: "", "Lao" */
- 0, 387, -386, /* 471: "", "Latin_1_Supplement", "Latin_1" */
- 0, -388, /* 474: "", "Latin_Extended_A" */
- 0, -389, /* 476: "", "Latin_Extended_Additional" */
- 0, -390, /* 478: "", "Latin_Extended_B" */
- 0, -391, /* 480: "", "Latin_Extended_C" */
- 0, -392, /* 482: "", "Latin_Extended_D" */
- 0, -402, /* 484: "", "Lepcha" */
- 0, -405, /* 486: "", "Letterlike_Symbols" */
- 0, -407, /* 488: "", "Limbu" */
- 0, -414, /* 490: "", "Linear_B_Ideograms" */
- 0, -415, /* 492: "", "Linear_B_Syllabary" */
- 0, -420, /* 494: "", "Low_Surrogates" */
- 0, -428, /* 496: "", "Lycian" */
- 0, -430, /* 498: "", "Lydian" */
- 0, -435, /* 500: "", "Mahjong_Tiles" */
- 0, -436, /* 502: "", "Malayalam" */
- 0, -443, /* 504: "", "Mathematical_Alphanumeric_Symbols" */
- 0, -444, /* 506: "", "Mathematical_Operators" */
- 0, -457, /* 508: "", "Miscellaneous_Mathematical_Symbols_A" */
- 0, -458, /* 510: "", "Miscellaneous_Mathematical_Symbols_B" */
- 0, -459, /* 512: "", "Miscellaneous_Symbols" */
- 0, -460, /* 514: "", "Miscellaneous_Symbols_And_Arrows" */
- 0, -461, /* 516: "", "Miscellaneous_Technical" */
- 0, -466, /* 518: "", "Modifier_Tone_Letters" */
- 0, -468, /* 520: "", "Mongolian" */
- 0, -471, /* 522: "", "Musical_Symbols" */
- 0, -472, /* 524: "", "Myanmar" */
- 0, -503, /* 526: "", "New_Tai_Lue" */
- 0, -490, /* 528: "", "NKo" */
- 0, -510, /* 530: "", "No_Block" */
- 0, -524, /* 532: "", "Number_Forms" */
- 0, -534, /* 534: "", "Ogham" */
- 0, -537, /* 536: "", "Old_Italic" */
- 0, -538, /* 538: "", "Old_Persian" */
- 0, -535, /* 540: "", "Ol_Chiki" */
- 0, -540, /* 542: "", "Optical_Character_Recognition" */
- 0, -541, /* 544: "", "Oriya" */
- 0, -545, /* 546: "", "Osmanya" */
- 0, -569, /* 548: "", "Phags_Pa" */
- 0, -570, /* 550: "", "Phaistos_Disc" */
- 0, -575, /* 552: "", "Phoenician" */
- 0, -576, /* 554: "", "Phonetic_Extensions" */
- 0, -577, /* 556: "", "Phonetic_Extensions_Supplement" */
- 0, 586, -585, /* 558: "", "Private_Use_Area", "Private_Use" */
- 0, -603, /* 561: "", "Rejang" */
- 0, -612, /* 563: "", "Runic" */
- 0, -632, /* 565: "", "Saurashtra" */
- 0, -644, /* 567: "", "Shavian" */
- 0, -652, /* 569: "", "Sinhala" */
- 0, -656, /* 571: "", "Small_Form_Variants" */
- 0, -665, /* 573: "", "Spacing_Modifier_Letters" */
- 0, -666, /* 575: "", "Specials" */
- 0, -671, /* 577: "", "Sundanese" */
- 0, -674, /* 579: "", "Superscripts_And_Subscripts" */
- 0, -675, /* 581: "", "Supplemental_Arrows_A" */
- 0, -676, /* 583: "", "Supplemental_Arrows_B" */
- 0, -677, /* 585: "", "Supplemental_Mathematical_Operators" */
- 0, -678, /* 587: "", "Supplemental_Punctuation" */
- 0, -679, /* 589: "", "Supplementary_Private_Use_Area_A" */
- 0, -680, /* 591: "", "Supplementary_Private_Use_Area_B" */
- 0, -684, /* 593: "", "Syloti_Nagri" */
- 0, -688, /* 595: "", "Syriac" */
- 0, -693, /* 597: "", "Tagalog" */
- 0, -695, /* 599: "", "Tagbanwa" */
- 0, -696, /* 601: "", "Tags" */
- 0, -698, /* 603: "", "Tai_Le" */
- 0, -699, /* 605: "", "Tai_Xuan_Jing_Symbols" */
- 0, -702, /* 607: "", "Tamil" */
- 0, -708, /* 609: "", "Telugu" */
- 0, -716, /* 611: "", "Thaana" */
- 0, -717, /* 613: "", "Thai" */
- 0, -718, /* 615: "", "Tibetan" */
- 0, -720, /* 617: "", "Tifinagh" */
- 0, -731, /* 619: "", "Ugaritic" */
- 0, 734, -113, /* 621: "", "Unified_Canadian_Aboriginal_Syllabics", "Canadian_Syllabics" */
- 0, -744, /* 624: "", "Vai" */
- 0, -747, /* 626: "", "Variation_Selectors" */
- 0, -748, /* 628: "", "Variation_Selectors_Supplement" */
- 0, -751, /* 630: "", "Vertical_Forms" */
- 0, -781, /* 632: "", "Yijing_Hexagram_Symbols" */
- 0, -778, /* 634: "", "Yi_Radicals" */
- 0, -779, /* 636: "", "Yi_Syllables" */
- 492, -520, /* 638: "NR", "Not_Reordered" */
- 532, -552, /* 640: "OV", "Overlay" */
- 13, -44, /* 642: "ATBL", "Attached_Below_Left" */
- 12, -43, /* 644: "ATB", "Attached_Below" */
- 11, -42, /* 646: "ATAR", "Attached_Above_Right" */
- 51, -60, /* 648: "BL", "Below_Left" */
- 46, -59, /* 650: "B", "Below" */
- 53, -61, /* 652: "BR", "Below_Right" */
- 366, -396, /* 654: "L", "Left" */
- 598, -605, /* 656: "R", "Right" */
- 4, -16, /* 658: "AL", "Above_Left" */
- 1, -15, /* 660: "A", "Above" */
- 7, -17, /* 662: "AR", "Above_Right" */
- 170, -193, /* 664: "DB", "Double_Below" */
- 169, -192, /* 666: "DA", "Double_Above" */
- 315, -328, /* 668: "IS", "Iota_Subscript" */
- 489, -522, /* 670: "NK", "Nukta" */
- 343, -347, /* 672: "KV", "Kana_Voicing" */
- 742, -752, /* 674: "VR", "Virama" */
- 111, 114, -803, /* 676: "Can", "Canonical", "can" */
- 205, 128, -811, /* 679: "Enc", "Circle", "enc" */
- 134, 144, -807, /* 682: "Com", "Compat", "com" */
- 226, 227, -812, /* 685: "Fin", "Final", "fin" */
- 230, -813, /* 688: "Font", "font" */
- 232, 233, -814, /* 690: "Fra", "Fraction", "fra" */
- 323, 324, -819, /* 693: "Init", "Initial", "init" */
- 329, 330, -821, /* 696: "Iso", "Isolated", "iso" */
- 449, 450, -827, /* 699: "Med", "Medial", "med" */
- 498, 499, -830, /* 702: "Nar", "Narrow", "nar" */
- 500, 512, -831, /* 705: "Nb", "Nobreak", "nb" */
- 515, -836, /* 708: "None", "none" */
- 657, 655, -846, /* 710: "Sml", "Small", "sml" */
- 667, 668, -848, /* 713: "Sqr", "Square", "sqr" */
- 669, -850, /* 716: "Sub", "sub" */
- 672, 673, -852, /* 718: "Sup", "Super", "sup" */
- 749, 750, -856, /* 721: "Vert", "Vertical", "vert" */
- 762, -857, /* 724: "Wide", "wide" */
- 1, -26, /* 726: "A", "Ambiguous" */
- 221, -235, /* 728: "F", "Fullwidth" */
- 268, -273, /* 730: "H", "Halfwidth" */
- 496, -499, /* 732: "Na", "Narrow" */
- 474, -502, /* 734: "N", "Neutral" */
- 755, -762, /* 736: "W", "Wide" */
- 447, -664, /* 738: "Mc", "Spacing_Mark" */
- 563, -146, /* 740: "Pc", "Connector_Punctuation" */
- 123, 148, -806, /* 742: "Cc", "Control", "cntrl" */
- 633, -157, /* 745: "Sc", "Currency_Symbol" */
- 564, -175, /* 747: "Pd", "Dash_Punctuation" */
- 501, 178, -808, /* 749: "Nd", "Decimal_Number", "digit" */
- 448, -208, /* 752: "Me", "Enclosing_Mark" */
- 565, -131, /* 754: "Pe", "Close_Punctuation" */
- 567, -228, /* 756: "Pf", "Final_Punctuation" */
- 124, -231, /* 758: "Cf", "Format" */
- 132, -732, /* 760: "Cn", "Unassigned" */
- 578, -325, /* 762: "Pi", "Initial_Punctuation" */
- 508, -404, /* 764: "Nl", "Letter_Number" */
- 789, -412, /* 766: "Zl", "Line_Separator" */
- 416, -423, /* 768: "Ll", "Lowercase_Letter" */
- 654, -442, /* 770: "Sm", "Math_Symbol" */
- 417, -464, /* 772: "Lm", "Modifier_Letter" */
- 653, -465, /* 774: "Sk", "Modifier_Symbol" */
- 463, -516, /* 776: "Mn", "Nonspacing_Mark" */
- 418, -547, /* 778: "Lo", "Other_Letter" */
- 509, -549, /* 780: "No", "Other_Number" */
- 580, -550, /* 782: "Po", "Other_Punctuation" */
- 658, -551, /* 784: "So", "Other_Symbol" */
- 791, -558, /* 786: "Zp", "Paragraph_Separator" */
- 133, -585, /* 788: "Co", "Private_Use" */
- 792, -662, /* 790: "Zs", "Space_Separator" */
- 588, -539, /* 792: "Ps", "Open_Punctuation" */
- 154, -681, /* 794: "Cs", "Surrogate" */
- 425, -721, /* 796: "Lt", "Titlecase_Letter" */
- 426, -739, /* 798: "Lu", "Uppercase_Letter" */
- 93, -546, /* 800: "C", "Other" */
- 367, -122, /* 802: "LC", "Cased_Letter" */
- 366, -403, /* 804: "L", "Letter" */
- 431, -440, /* 806: "M", "Mark" */
- 474, -523, /* 808: "N", "Number" */
- 553, 589, -840, /* 810: "P", "Punctuation", "punct" */
- 614, -685, /* 813: "S", "Symbol" */
- 784, -642, /* 815: "Z", "Separator" */
- 366, -395, /* 817: "L", "Leading_Jamo" */
- 375, -376, /* 819: "LVT", "LVT_Syllable" */
- 374, -377, /* 821: "LV", "LV_Syllable" */
- 475, -519, /* 823: "NA", "Not_Applicable" */
- 692, -724, /* 825: "T", "Trailing_Jamo" */
- 741, -754, /* 827: "V", "Vowel_Jamo" */
- 0, -20, /* 829: "", "Ain" */
- 0, -21, /* 831: "", "Alaph" */
- 0, -22, /* 833: "", "Alef" */
- 0, -58, /* 835: "", "Beh" */
- 0, -64, /* 837: "", "Beth" */
- 0, -91, /* 839: "", "Burushaski_Yeh_Barree" */
- 0, -172, /* 841: "", "Dal" */
- 0, -173, /* 843: "", "Dalath_Rish" */
- 0, -196, /* 845: "", "E" */
- 0, -224, /* 847: "", "Fe" */
- 0, -225, /* 849: "", "Feh" */
- 0, -229, /* 851: "", "Final_Semkath" */
- 0, -238, /* 853: "", "Gaf" */
- 0, -239, /* 855: "", "Gamal" */
- 0, -272, /* 857: "", "Hah" */
- 0, -275, /* 859: "", "Hamza_On_Heh_Goal" */
- 0, -288, /* 861: "", "He" */
- 0, -291, /* 863: "", "Heh" */
- 0, -292, /* 865: "", "Heh_Goal" */
- 0, -293, /* 867: "", "Heth" */
- 0, -344, /* 869: "", "Kaf" */
- 0, -351, /* 871: "", "Kaph" */
- 0, -356, /* 873: "", "Khaph" */
- 0, -363, /* 875: "", "Knotted_Heh" */
- 0, -378, /* 877: "", "Lam" */
- 0, -379, /* 879: "", "Lamadh" */
- 0, -451, /* 881: "", "Meem" */
- 0, -456, /* 883: "", "Mim" */
- 0, -518, /* 885: "", "Noon" */
- 0, -511, /* 887: "", "No_Joining_Group" */
- 0, -528, /* 889: "", "Nun" */
- 0, -565, /* 891: "", "Pe" */
- 0, -594, /* 893: "", "Qaf" */
- 0, -595, /* 895: "", "Qaph" */
- 0, -602, /* 897: "", "Reh" */
- 0, -604, /* 899: "", "Reversed_Pe" */
- 0, -627, /* 901: "", "Sad" */
- 0, -628, /* 903: "", "Sadhe" */
- 0, -635, /* 905: "", "Seen" */
- 0, -638, /* 907: "", "Semkath" */
- 0, -646, /* 909: "", "Shin" */
- 0, -682, /* 911: "", "Swash_Kaf" */
- 0, -689, /* 913: "", "Syriac_Waw" */
- 0, -697, /* 915: "", "Tah" */
- 0, -705, /* 917: "", "Taw" */
- 0, -706, /* 919: "", "Teh_Marbuta" */
- 0, -712, /* 921: "", "Teth" */
- 0, -760, /* 923: "", "Waw" */
- 0, -773, /* 925: "", "Yeh" */
- 0, -774, /* 927: "", "Yeh_Barree" */
- 0, -775, /* 929: "", "Yeh_With_Tail" */
- 0, -782, /* 931: "", "Yudh" */
- 0, -783, /* 933: "", "Yudh_He" */
- 0, -787, /* 935: "", "Zain" */
- 0, -788, /* 937: "", "Zhain" */
- 168, -195, /* 939: "D", "Dual_Joining" */
- 93, -337, /* 941: "C", "Join_Causing" */
- 366, -397, /* 943: "L", "Left_Joining" */
- 727, -513, /* 945: "U", "Non_Joining" */
- 598, -606, /* 947: "R", "Right_Joining" */
- 692, -725, /* 949: "T", "Transparent" */
- 4, -24, /* 951: "AL", "Alphabetic" */
- 3, -26, /* 953: "AI", "Ambiguous" */
- 48, -83, /* 955: "BA", "Break_After" */
- 49, -84, /* 957: "BB", "Break_Before" */
- 47, -85, /* 959: "B2", "Break_Both" */
- 626, -86, /* 961: "SY", "Break_Symbols" */
- 108, -119, /* 963: "CR", "Carriage_Return" */
- 105, -131, /* 965: "CL", "Close_Punctuation" */
- 106, -139, /* 967: "CM", "Combining_Mark" */
- 615, -145, /* 969: "SA", "Complex_Context" */
- 94, -147, /* 971: "CB", "Contingent_Break" */
- 200, -216, /* 973: "EX", "Exclamation" */
- 237, -250, /* 975: "GL", "Glue" */
- 269, -269, /* 977: "H2", "H2" */
- 270, -270, /* 979: "H3", "H3" */
- 271, -303, /* 981: "HY", "Hyphen" */
- 304, -318, /* 983: "ID", "Ideographic" */
- 315, -321, /* 985: "IS", "Infix_Numeric" */
- 313, 326, -327, /* 987: "IN", "Inseparable", "Inseperable" */
- 332, -332, /* 990: "JL", "JL" */
- 333, -333, /* 992: "JT", "JT" */
- 334, -334, /* 994: "JV", "JV" */
- 369, -411, /* 996: "LF", "Line_Feed" */
- 50, -438, /* 998: "BK", "Mandatory_Break" */
- 491, -505, /* 1000: "NL", "Next_Line" */
- 493, -517, /* 1002: "NS", "Nonstarter" */
- 531, -539, /* 1004: "OP", "Open_Punctuation" */
- 555, -582, /* 1006: "PO", "Postfix_Numeric" */
- 557, -583, /* 1008: "PR", "Prefix_Numeric" */
- 591, -596, /* 1010: "QU", "Quotation" */
- 623, -661, /* 1012: "SP", "Space" */
- 621, -681, /* 1014: "SG", "Surrogate" */
- 769, -736, /* 1016: "XX", "Unknown" */
- 757, -764, /* 1018: "WJ", "Word_Joiner" */
- 785, -786, /* 1020: "ZW", "ZWSpace" */
- 176, -177, /* 1022: "De", "Decimal" */
- 186, -189, /* 1024: "Di", "Digit" */
- 515, -515, /* 1026: "None", "None" */
- 521, -525, /* 1028: "Nu", "Numeric" */
- 30, -31, /* 1030: "Arab", "Arabic" */
- 40, -38, /* 1032: "Armn", "Armenian" */
- 45, -45, /* 1034: "Avst", "Avst" */
- 54, -55, /* 1036: "Bali", "Balinese" */
- 57, -57, /* 1038: "Batk", "Batk" */
- 62, -63, /* 1040: "Beng", "Bengali" */
- 71, -71, /* 1042: "Blis", "Blis" */
- 573, -573, /* 1044: "Phlv", "Phlv" */
- 74, -75, /* 1046: "Bopo", "Bopomofo" */
- 79, -79, /* 1048: "Brah", "Brah" */
- 80, -81, /* 1050: "Brai", "Braille" */
- 87, -88, /* 1052: "Bugi", "Buginese" */
- 89, -90, /* 1054: "Buhd", "Buhid" */
- 116, -112, /* 1056: "Cans", "Canadian_Aboriginal" */
- 117, -118, /* 1058: "Cari", "Carian" */
- 110, -110, /* 1060: "Cakm", "Cakm" */
- 125, -125, /* 1062: "Cham", "Cham" */
- 126, -127, /* 1064: "Cher", "Cherokee" */
- 129, -129, /* 1066: "Cirt", "Cirt" */
- 795, -141, /* 1068: "Zyyy", "Common" */
- 150, 151, -592, /* 1070: "Copt", "Coptic", "Qaac" */
- 771, -155, /* 1073: "Xsux", "Cuneiform" */
- 153, -159, /* 1075: "Cprt", "Cypriot" */
- 166, -161, /* 1077: "Cyrl", "Cyrillic" */
- 202, -202, /* 1079: "Egyd", "Egyd" */
- 194, -183, /* 1081: "Dsrt", "Deseret" */
- 184, -185, /* 1083: "Deva", "Devanagari" */
- 691, -691, /* 1085: "Syrn", "Syrn" */
- 204, -204, /* 1087: "Egyp", "Egyp" */
- 687, -687, /* 1089: "Syre", "Syre" */
- 209, -210, /* 1091: "Ethi", "Ethiopic" */
- 245, -246, /* 1093: "Geor", "Georgian" */
- 248, -249, /* 1095: "Glag", "Glagolitic" */
- 251, -252, /* 1097: "Goth", "Gothic" */
- 263, -260, /* 1099: "Grek", "Greek" */
- 265, -264, /* 1101: "Gujr", "Gujarati" */
- 267, -266, /* 1103: "Guru", "Gurmukhi" */
- 283, -276, /* 1105: "Hani", "Han" */
- 277, -278, /* 1107: "Hang", "Hangul" */
- 284, -287, /* 1109: "Hano", "Hanunoo" */
- 320, -320, /* 1111: "Inds", "Inds" */
- 289, -290, /* 1113: "Hebr", "Hebrew" */
- 203, -203, /* 1115: "Egyh", "Egyh" */
- 298, -299, /* 1117: "Hira", "Hiragana" */
- 39, -39, /* 1119: "Armi", "Armi" */
- 593, -322, /* 1121: "Qaai", "Inherited" */
- 571, -571, /* 1123: "Phli", "Phli" */
- 587, -587, /* 1125: "Prti", "Prti" */
- 341, -341, /* 1127: "Jpan", "Jpan" */
- 335, -335, /* 1129: "Java", "Java" */
- 365, -365, /* 1131: "Kthi", "Kthi" */
- 362, -350, /* 1133: "Knda", "Kannada" */
- 346, -352, /* 1135: "Kana", "Katakana" */
- 301, -353, /* 1137: "Hrkt", "Katakana_Or_Hiragana" */
- 345, -355, /* 1139: "Kali", "Kayah_Li" */
- 357, -358, /* 1141: "Khar", "Kharoshthi" */
- 361, -359, /* 1143: "Khmr", "Khmer" */
- 243, -243, /* 1145: "Geok", "Geok" */
- 364, -364, /* 1147: "Kore", "Kore" */
- 380, -380, /* 1149: "Lana", "Lana" */
- 382, -381, /* 1151: "Laoo", "Lao" */
- 393, -385, /* 1153: "Latn", "Latin" */
- 383, -383, /* 1155: "Latf", "Latf" */
- 384, -384, /* 1157: "Latg", "Latg" */
- 401, -402, /* 1159: "Lepc", "Lepcha" */
- 406, -407, /* 1161: "Limb", "Limbu" */
- 408, -408, /* 1163: "Lina", "Lina" */
- 409, -413, /* 1165: "Linb", "Linear_B" */
- 427, -428, /* 1167: "Lyci", "Lycian" */
- 429, -430, /* 1169: "Lydi", "Lydian" */
- 462, -436, /* 1171: "Mlym", "Malayalam" */
- 437, -437, /* 1173: "Mand", "Mand" */
- 439, -439, /* 1175: "Mani", "Mani" */
- 790, -790, /* 1177: "Zmth", "Zmth" */
- 445, -445, /* 1179: "Maya", "Maya" */
- 470, -470, /* 1181: "Mtei", "Mtei" */
- 452, -452, /* 1183: "Mero", "Mero" */
- 467, -468, /* 1185: "Mong", "Mongolian" */
- 469, -469, /* 1187: "Moon", "Moon" */
- 473, -472, /* 1189: "Mymr", "Myanmar" */
- 701, -503, /* 1191: "Talu", "New_Tai_Lue" */
- 507, -506, /* 1193: "Nkoo", "Nko" */
- 533, -534, /* 1195: "Ogam", "Ogham" */
- 167, -167, /* 1197: "Cyrs", "Cyrs" */
- 302, -302, /* 1199: "Hung", "Hung" */
- 331, -537, /* 1201: "Ital", "Old_Italic" */
- 566, -566, /* 1203: "Perm", "Perm" */
- 770, -538, /* 1205: "Xpeo", "Old_Persian" */
- 536, -535, /* 1207: "Olck", "Ol_Chiki" */
- 543, -541, /* 1209: "Orya", "Oriya" */
- 542, -542, /* 1211: "Orkh", "Orkh" */
- 544, -545, /* 1213: "Osma", "Osmanya" */
- 300, -300, /* 1215: "Hmng", "Hmng" */
- 568, -569, /* 1217: "Phag", "Phags_Pa" */
- 574, -575, /* 1219: "Phnx", "Phoenician" */
- 579, -579, /* 1221: "Plrd", "Plrd" */
- 572, -572, /* 1223: "Phlp", "Phlp" */
- 610, -603, /* 1225: "Rjng", "Rejang" */
- 611, -611, /* 1227: "Roro", "Roro" */
- 613, -612, /* 1229: "Runr", "Runic" */
- 629, -629, /* 1231: "Samr", "Samr" */
- 630, -630, /* 1233: "Sara", "Sara" */
- 631, -632, /* 1235: "Saur", "Saurashtra" */
- 645, -644, /* 1237: "Shaw", "Shavian" */
- 643, -643, /* 1239: "Sgnw", "Sgnw" */
- 285, -285, /* 1241: "Hans", "Hans" */
- 651, -652, /* 1243: "Sinh", "Sinhala" */
- 670, -671, /* 1245: "Sund", "Sundanese" */
- 683, -684, /* 1247: "Sylo", "Syloti_Nagri" */
- 793, -793, /* 1249: "Zsym", "Zsym" */
- 686, -688, /* 1251: "Syrc", "Syriac" */
- 714, -693, /* 1253: "Tglg", "Tagalog" */
- 694, -695, /* 1255: "Tagb", "Tagbanwa" */
- 700, -698, /* 1257: "Tale", "Tai_Le" */
- 704, -704, /* 1259: "Tavt", "Tavt" */
- 703, -702, /* 1261: "Taml", "Tamil" */
- 707, -708, /* 1263: "Telu", "Telugu" */
- 709, -709, /* 1265: "Teng", "Teng" */
- 715, -716, /* 1267: "Thaa", "Thaana" */
- 717, -717, /* 1269: "Thai", "Thai" */
- 719, -718, /* 1271: "Tibt", "Tibetan" */
- 713, -720, /* 1273: "Tfng", "Tifinagh" */
- 286, -286, /* 1275: "Hant", "Hant" */
- 730, -731, /* 1277: "Ugar", "Ugaritic" */
- 796, -736, /* 1279: "Zzzz", "Unknown" */
- 794, -794, /* 1281: "Zxxx", "Zxxx" */
- 745, -744, /* 1283: "Vaii", "Vai" */
- 753, -753, /* 1285: "Visp", "Visp" */
- 690, -690, /* 1287: "Syrj", "Syrj" */
- 780, -777, /* 1289: "Yiii", "Yi" */
-};
-
-#define MAX_NAMES_PER_GROUP 4
-
-const int32_t VALUES_GCB_COUNT = 12;
-
-const Alias VALUES_GCB[] = {
- Alias((int32_t) U_GCB_CONTROL, 0),
- Alias((int32_t) U_GCB_CR, 2),
- Alias((int32_t) U_GCB_EXTEND, 4),
- Alias((int32_t) U_GCB_L, 6),
- Alias((int32_t) U_GCB_LF, 8),
- Alias((int32_t) U_GCB_LV, 10),
- Alias((int32_t) U_GCB_LVT, 12),
- Alias((int32_t) U_GCB_OTHER, 14),
- Alias((int32_t) U_GCB_PREPEND, 16),
- Alias((int32_t) U_GCB_SPACING_MARK, 18),
- Alias((int32_t) U_GCB_T, 20),
- Alias((int32_t) U_GCB_V, 22),
-};
-
-const int32_t VALUES_NFC_QC_COUNT = 3;
-
-const Alias VALUES_NFC_QC[] = {
- Alias((int32_t) UNORM_MAYBE, 24),
- Alias((int32_t) UNORM_NO, 26),
- Alias((int32_t) UNORM_YES, 28),
-};
-
-const int32_t VALUES_NFD_QC_COUNT = 2;
-
-const Alias VALUES_NFD_QC[] = {
- Alias((int32_t) UNORM_NO, 26),
- Alias((int32_t) UNORM_YES, 28),
-};
-
-const int32_t VALUES_NFKC_QC_COUNT = 3;
-
-const Alias VALUES_NFKC_QC[] = {
- Alias((int32_t) UNORM_MAYBE, 24),
- Alias((int32_t) UNORM_NO, 26),
- Alias((int32_t) UNORM_YES, 28),
-};
-
-const int32_t VALUES_NFKD_QC_COUNT = 2;
-
-const Alias VALUES_NFKD_QC[] = {
- Alias((int32_t) UNORM_NO, 26),
- Alias((int32_t) UNORM_YES, 28),
-};
-
-const int32_t VALUES_SB_COUNT = 15;
-
-const Alias VALUES_SB[] = {
- Alias((int32_t) U_SB_ATERM, 30),
- Alias((int32_t) U_SB_CLOSE, 32),
- Alias((int32_t) U_SB_CR, 2),
- Alias((int32_t) U_SB_EXTEND, 4),
- Alias((int32_t) U_SB_FORMAT, 34),
- Alias((int32_t) U_SB_LF, 8),
- Alias((int32_t) U_SB_LOWER, 36),
- Alias((int32_t) U_SB_NUMERIC, 38),
- Alias((int32_t) U_SB_OLETTER, 40),
- Alias((int32_t) U_SB_OTHER, 14),
- Alias((int32_t) U_SB_SCONTINUE, 42),
- Alias((int32_t) U_SB_SEP, 44),
- Alias((int32_t) U_SB_SP, 46),
- Alias((int32_t) U_SB_STERM, 48),
- Alias((int32_t) U_SB_UPPER, 50),
-};
-
-const int32_t VALUES_WB_COUNT = 13;
-
-const Alias VALUES_WB[] = {
- Alias((int32_t) U_WB_ALETTER, 52),
- Alias((int32_t) U_WB_CR, 2),
- Alias((int32_t) U_WB_EXTEND, 54),
- Alias((int32_t) U_WB_EXTENDNUMLET, 56),
- Alias((int32_t) U_WB_FORMAT, 34),
- Alias((int32_t) U_WB_KATAKANA, 58),
- Alias((int32_t) U_WB_LF, 8),
- Alias((int32_t) U_WB_MIDLETTER, 60),
- Alias((int32_t) U_WB_MIDNUM, 62),
- Alias((int32_t) U_WB_MIDNUMLET, 64),
- Alias((int32_t) U_WB_NEWLINE, 66),
- Alias((int32_t) U_WB_NUMERIC, 38),
- Alias((int32_t) U_WB_OTHER, 14),
-};
-
-const int32_t VALUES_bc_COUNT = 19;
-
-const Alias VALUES_bc[] = {
- Alias((int32_t) U_ARABIC_NUMBER, 240),
- Alias((int32_t) U_BLOCK_SEPARATOR, 242),
- Alias((int32_t) U_BOUNDARY_NEUTRAL, 244),
- Alias((int32_t) U_COMMON_NUMBER_SEPARATOR, 246),
- Alias((int32_t) U_DIR_NON_SPACING_MARK, 248),
- Alias((int32_t) U_EUROPEAN_NUMBER, 250),
- Alias((int32_t) U_EUROPEAN_NUMBER_SEPARATOR, 252),
- Alias((int32_t) U_EUROPEAN_NUMBER_TERMINATOR, 254),
- Alias((int32_t) U_LEFT_TO_RIGHT, 256),
- Alias((int32_t) U_LEFT_TO_RIGHT_EMBEDDING, 258),
- Alias((int32_t) U_LEFT_TO_RIGHT_OVERRIDE, 260),
- Alias((int32_t) U_OTHER_NEUTRAL, 262),
- Alias((int32_t) U_POP_DIRECTIONAL_FORMAT, 264),
- Alias((int32_t) U_RIGHT_TO_LEFT, 266),
- Alias((int32_t) U_RIGHT_TO_LEFT_ARABIC, 268),
- Alias((int32_t) U_RIGHT_TO_LEFT_EMBEDDING, 270),
- Alias((int32_t) U_RIGHT_TO_LEFT_OVERRIDE, 272),
- Alias((int32_t) U_SEGMENT_SEPARATOR, 274),
- Alias((int32_t) U_WHITE_SPACE_NEUTRAL, 276),
-};
-
-const int32_t VALUES_binprop_COUNT = 2;
-
-const Alias VALUES_binprop[] = {
- Alias((int32_t) 0, 278),
- Alias((int32_t) 1, 282),
-};
-
-const int32_t VALUES_blk_COUNT = 172;
-
-const Alias VALUES_blk[] = {
- Alias((int32_t) UBLOCK_AEGEAN_NUMBERS, 286),
- Alias((int32_t) UBLOCK_ALPHABETIC_PRESENTATION_FORMS, 288),
- Alias((int32_t) UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION, 290),
- Alias((int32_t) UBLOCK_ANCIENT_GREEK_NUMBERS, 292),
- Alias((int32_t) UBLOCK_ANCIENT_SYMBOLS, 294),
- Alias((int32_t) UBLOCK_ARABIC, 296),
- Alias((int32_t) UBLOCK_ARABIC_PRESENTATION_FORMS_A, 298),
- Alias((int32_t) UBLOCK_ARABIC_PRESENTATION_FORMS_B, 301),
- Alias((int32_t) UBLOCK_ARABIC_SUPPLEMENT, 303),
- Alias((int32_t) UBLOCK_ARMENIAN, 305),
- Alias((int32_t) UBLOCK_ARROWS, 307),
- Alias((int32_t) UBLOCK_BALINESE, 309),
- Alias((int32_t) UBLOCK_BASIC_LATIN, 311),
- Alias((int32_t) UBLOCK_BENGALI, 314),
- Alias((int32_t) UBLOCK_BLOCK_ELEMENTS, 316),
- Alias((int32_t) UBLOCK_BOPOMOFO, 318),
- Alias((int32_t) UBLOCK_BOPOMOFO_EXTENDED, 320),
- Alias((int32_t) UBLOCK_BOX_DRAWING, 322),
- Alias((int32_t) UBLOCK_BRAILLE_PATTERNS, 324),
- Alias((int32_t) UBLOCK_BUGINESE, 326),
- Alias((int32_t) UBLOCK_BUHID, 328),
- Alias((int32_t) UBLOCK_BYZANTINE_MUSICAL_SYMBOLS, 330),
- Alias((int32_t) UBLOCK_CARIAN, 332),
- Alias((int32_t) UBLOCK_CHAM, 334),
- Alias((int32_t) UBLOCK_CHEROKEE, 336),
- Alias((int32_t) UBLOCK_CJK_COMPATIBILITY, 338),
- Alias((int32_t) UBLOCK_CJK_COMPATIBILITY_FORMS, 340),
- Alias((int32_t) UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, 342),
- Alias((int32_t) UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 344),
- Alias((int32_t) UBLOCK_CJK_RADICALS_SUPPLEMENT, 346),
- Alias((int32_t) UBLOCK_CJK_STROKES, 348),
- Alias((int32_t) UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION, 350),
- Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS, 352),
- Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 354),
- Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 356),
- Alias((int32_t) UBLOCK_COMBINING_DIACRITICAL_MARKS, 358),
- Alias((int32_t) UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 360),
- Alias((int32_t) UBLOCK_COMBINING_HALF_MARKS, 362),
- Alias((int32_t) UBLOCK_COMBINING_MARKS_FOR_SYMBOLS, 364),
- Alias((int32_t) UBLOCK_CONTROL_PICTURES, 367),
- Alias((int32_t) UBLOCK_COPTIC, 369),
- Alias((int32_t) UBLOCK_COUNTING_ROD_NUMERALS, 371),
- Alias((int32_t) UBLOCK_CUNEIFORM, 373),
- Alias((int32_t) UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION, 375),
- Alias((int32_t) UBLOCK_CURRENCY_SYMBOLS, 377),
- Alias((int32_t) UBLOCK_CYPRIOT_SYLLABARY, 379),
- Alias((int32_t) UBLOCK_CYRILLIC, 381),
- Alias((int32_t) UBLOCK_CYRILLIC_EXTENDED_A, 383),
- Alias((int32_t) UBLOCK_CYRILLIC_EXTENDED_B, 385),
- Alias((int32_t) UBLOCK_CYRILLIC_SUPPLEMENT, 387),
- Alias((int32_t) UBLOCK_DESERET, 390),
- Alias((int32_t) UBLOCK_DEVANAGARI, 392),
- Alias((int32_t) UBLOCK_DINGBATS, 394),
- Alias((int32_t) UBLOCK_DOMINO_TILES, 396),
- Alias((int32_t) UBLOCK_ENCLOSED_ALPHANUMERICS, 398),
- Alias((int32_t) UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS, 400),
- Alias((int32_t) UBLOCK_ETHIOPIC, 402),
- Alias((int32_t) UBLOCK_ETHIOPIC_EXTENDED, 404),
- Alias((int32_t) UBLOCK_ETHIOPIC_SUPPLEMENT, 406),
- Alias((int32_t) UBLOCK_GENERAL_PUNCTUATION, 408),
- Alias((int32_t) UBLOCK_GEOMETRIC_SHAPES, 410),
- Alias((int32_t) UBLOCK_GEORGIAN, 412),
- Alias((int32_t) UBLOCK_GEORGIAN_SUPPLEMENT, 414),
- Alias((int32_t) UBLOCK_GLAGOLITIC, 416),
- Alias((int32_t) UBLOCK_GOTHIC, 418),
- Alias((int32_t) UBLOCK_GREEK, 420),
- Alias((int32_t) UBLOCK_GREEK_EXTENDED, 423),
- Alias((int32_t) UBLOCK_GUJARATI, 425),
- Alias((int32_t) UBLOCK_GURMUKHI, 427),
- Alias((int32_t) UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, 429),
- Alias((int32_t) UBLOCK_HANGUL_COMPATIBILITY_JAMO, 431),
- Alias((int32_t) UBLOCK_HANGUL_JAMO, 433),
- Alias((int32_t) UBLOCK_HANGUL_SYLLABLES, 435),
- Alias((int32_t) UBLOCK_HANUNOO, 437),
- Alias((int32_t) UBLOCK_HEBREW, 439),
- Alias((int32_t) UBLOCK_HIGH_PRIVATE_USE_SURROGATES, 441),
- Alias((int32_t) UBLOCK_HIGH_SURROGATES, 443),
- Alias((int32_t) UBLOCK_HIRAGANA, 445),
- Alias((int32_t) UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 447),
- Alias((int32_t) UBLOCK_IPA_EXTENSIONS, 449),
- Alias((int32_t) UBLOCK_KANBUN, 451),
- Alias((int32_t) UBLOCK_KANGXI_RADICALS, 453),
- Alias((int32_t) UBLOCK_KANNADA, 455),
- Alias((int32_t) UBLOCK_KATAKANA, 457),
- Alias((int32_t) UBLOCK_KATAKANA_PHONETIC_EXTENSIONS, 459),
- Alias((int32_t) UBLOCK_KAYAH_LI, 461),
- Alias((int32_t) UBLOCK_KHAROSHTHI, 463),
- Alias((int32_t) UBLOCK_KHMER, 465),
- Alias((int32_t) UBLOCK_KHMER_SYMBOLS, 467),
- Alias((int32_t) UBLOCK_LAO, 469),
- Alias((int32_t) UBLOCK_LATIN_1_SUPPLEMENT, 471),
- Alias((int32_t) UBLOCK_LATIN_EXTENDED_A, 474),
- Alias((int32_t) UBLOCK_LATIN_EXTENDED_ADDITIONAL, 476),
- Alias((int32_t) UBLOCK_LATIN_EXTENDED_B, 478),
- Alias((int32_t) UBLOCK_LATIN_EXTENDED_C, 480),
- Alias((int32_t) UBLOCK_LATIN_EXTENDED_D, 482),
- Alias((int32_t) UBLOCK_LEPCHA, 484),
- Alias((int32_t) UBLOCK_LETTERLIKE_SYMBOLS, 486),
- Alias((int32_t) UBLOCK_LIMBU, 488),
- Alias((int32_t) UBLOCK_LINEAR_B_IDEOGRAMS, 490),
- Alias((int32_t) UBLOCK_LINEAR_B_SYLLABARY, 492),
- Alias((int32_t) UBLOCK_LOW_SURROGATES, 494),
- Alias((int32_t) UBLOCK_LYCIAN, 496),
- Alias((int32_t) UBLOCK_LYDIAN, 498),
- Alias((int32_t) UBLOCK_MAHJONG_TILES, 500),
- Alias((int32_t) UBLOCK_MALAYALAM, 502),
- Alias((int32_t) UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 504),
- Alias((int32_t) UBLOCK_MATHEMATICAL_OPERATORS, 506),
- Alias((int32_t) UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 508),
- Alias((int32_t) UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 510),
- Alias((int32_t) UBLOCK_MISCELLANEOUS_SYMBOLS, 512),
- Alias((int32_t) UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS, 514),
- Alias((int32_t) UBLOCK_MISCELLANEOUS_TECHNICAL, 516),
- Alias((int32_t) UBLOCK_MODIFIER_TONE_LETTERS, 518),
- Alias((int32_t) UBLOCK_MONGOLIAN, 520),
- Alias((int32_t) UBLOCK_MUSICAL_SYMBOLS, 522),
- Alias((int32_t) UBLOCK_MYANMAR, 524),
- Alias((int32_t) UBLOCK_NEW_TAI_LUE, 526),
- Alias((int32_t) UBLOCK_NKO, 528),
- Alias((int32_t) UBLOCK_NO_BLOCK, 530),
- Alias((int32_t) UBLOCK_NUMBER_FORMS, 532),
- Alias((int32_t) UBLOCK_OGHAM, 534),
- Alias((int32_t) UBLOCK_OLD_ITALIC, 536),
- Alias((int32_t) UBLOCK_OLD_PERSIAN, 538),
- Alias((int32_t) UBLOCK_OL_CHIKI, 540),
- Alias((int32_t) UBLOCK_OPTICAL_CHARACTER_RECOGNITION, 542),
- Alias((int32_t) UBLOCK_ORIYA, 544),
- Alias((int32_t) UBLOCK_OSMANYA, 546),
- Alias((int32_t) UBLOCK_PHAGS_PA, 548),
- Alias((int32_t) UBLOCK_PHAISTOS_DISC, 550),
- Alias((int32_t) UBLOCK_PHOENICIAN, 552),
- Alias((int32_t) UBLOCK_PHONETIC_EXTENSIONS, 554),
- Alias((int32_t) UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT, 556),
- Alias((int32_t) UBLOCK_PRIVATE_USE_AREA, 558),
- Alias((int32_t) UBLOCK_REJANG, 561),
- Alias((int32_t) UBLOCK_RUNIC, 563),
- Alias((int32_t) UBLOCK_SAURASHTRA, 565),
- Alias((int32_t) UBLOCK_SHAVIAN, 567),
- Alias((int32_t) UBLOCK_SINHALA, 569),
- Alias((int32_t) UBLOCK_SMALL_FORM_VARIANTS, 571),
- Alias((int32_t) UBLOCK_SPACING_MODIFIER_LETTERS, 573),
- Alias((int32_t) UBLOCK_SPECIALS, 575),
- Alias((int32_t) UBLOCK_SUNDANESE, 577),
- Alias((int32_t) UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS, 579),
- Alias((int32_t) UBLOCK_SUPPLEMENTAL_ARROWS_A, 581),
- Alias((int32_t) UBLOCK_SUPPLEMENTAL_ARROWS_B, 583),
- Alias((int32_t) UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 585),
- Alias((int32_t) UBLOCK_SUPPLEMENTAL_PUNCTUATION, 587),
- Alias((int32_t) UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A, 589),
- Alias((int32_t) UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B, 591),
- Alias((int32_t) UBLOCK_SYLOTI_NAGRI, 593),
- Alias((int32_t) UBLOCK_SYRIAC, 595),
- Alias((int32_t) UBLOCK_TAGALOG, 597),
- Alias((int32_t) UBLOCK_TAGBANWA, 599),
- Alias((int32_t) UBLOCK_TAGS, 601),
- Alias((int32_t) UBLOCK_TAI_LE, 603),
- Alias((int32_t) UBLOCK_TAI_XUAN_JING_SYMBOLS, 605),
- Alias((int32_t) UBLOCK_TAMIL, 607),
- Alias((int32_t) UBLOCK_TELUGU, 609),
- Alias((int32_t) UBLOCK_THAANA, 611),
- Alias((int32_t) UBLOCK_THAI, 613),
- Alias((int32_t) UBLOCK_TIBETAN, 615),
- Alias((int32_t) UBLOCK_TIFINAGH, 617),
- Alias((int32_t) UBLOCK_UGARITIC, 619),
- Alias((int32_t) UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 621),
- Alias((int32_t) UBLOCK_VAI, 624),
- Alias((int32_t) UBLOCK_VARIATION_SELECTORS, 626),
- Alias((int32_t) UBLOCK_VARIATION_SELECTORS_SUPPLEMENT, 628),
- Alias((int32_t) UBLOCK_VERTICAL_FORMS, 630),
- Alias((int32_t) UBLOCK_YIJING_HEXAGRAM_SYMBOLS, 632),
- Alias((int32_t) UBLOCK_YI_RADICALS, 634),
- Alias((int32_t) UBLOCK_YI_SYLLABLES, 636),
-};
-
-const int32_t VALUES_ccc_COUNT = 19;
-
-const Alias VALUES_ccc[] = {
- Alias((int32_t) 0, 638),
- Alias((int32_t) 1, 640),
- Alias((int32_t) 200, 642),
- Alias((int32_t) 202, 644),
- Alias((int32_t) 216, 646),
- Alias((int32_t) 218, 648),
- Alias((int32_t) 220, 650),
- Alias((int32_t) 222, 652),
- Alias((int32_t) 224, 654),
- Alias((int32_t) 226, 656),
- Alias((int32_t) 228, 658),
- Alias((int32_t) 230, 660),
- Alias((int32_t) 232, 662),
- Alias((int32_t) 233, 664),
- Alias((int32_t) 234, 666),
- Alias((int32_t) 240, 668),
- Alias((int32_t) 7, 670),
- Alias((int32_t) 8, 672),
- Alias((int32_t) 9, 674),
-};
-
-const int32_t VALUES_dt_COUNT = 18;
-
-const Alias VALUES_dt[] = {
- Alias((int32_t) U_DT_CANONICAL, 676),
- Alias((int32_t) U_DT_CIRCLE, 679),
- Alias((int32_t) U_DT_COMPAT, 682),
- Alias((int32_t) U_DT_FINAL, 685),
- Alias((int32_t) U_DT_FONT, 688),
- Alias((int32_t) U_DT_FRACTION, 690),
- Alias((int32_t) U_DT_INITIAL, 693),
- Alias((int32_t) U_DT_ISOLATED, 696),
- Alias((int32_t) U_DT_MEDIAL, 699),
- Alias((int32_t) U_DT_NARROW, 702),
- Alias((int32_t) U_DT_NOBREAK, 705),
- Alias((int32_t) U_DT_NONE, 708),
- Alias((int32_t) U_DT_SMALL, 710),
- Alias((int32_t) U_DT_SQUARE, 713),
- Alias((int32_t) U_DT_SUB, 716),
- Alias((int32_t) U_DT_SUPER, 718),
- Alias((int32_t) U_DT_VERTICAL, 721),
- Alias((int32_t) U_DT_WIDE, 724),
-};
-
-const int32_t VALUES_ea_COUNT = 6;
-
-const Alias VALUES_ea[] = {
- Alias((int32_t) U_EA_AMBIGUOUS, 726),
- Alias((int32_t) U_EA_FULLWIDTH, 728),
- Alias((int32_t) U_EA_HALFWIDTH, 730),
- Alias((int32_t) U_EA_NARROW, 732),
- Alias((int32_t) U_EA_NEUTRAL, 734),
- Alias((int32_t) U_EA_WIDE, 736),
-};
-
-const int32_t VALUES_gc_COUNT = 30;
-
-const Alias VALUES_gc[] = {
- Alias((int32_t) U_COMBINING_SPACING_MARK, 738),
- Alias((int32_t) U_CONNECTOR_PUNCTUATION, 740),
- Alias((int32_t) U_CONTROL_CHAR, 742),
- Alias((int32_t) U_CURRENCY_SYMBOL, 745),
- Alias((int32_t) U_DASH_PUNCTUATION, 747),
- Alias((int32_t) U_DECIMAL_DIGIT_NUMBER, 749),
- Alias((int32_t) U_ENCLOSING_MARK, 752),
- Alias((int32_t) U_END_PUNCTUATION, 754),
- Alias((int32_t) U_FINAL_PUNCTUATION, 756),
- Alias((int32_t) U_FORMAT_CHAR, 758),
- Alias((int32_t) U_GENERAL_OTHER_TYPES, 760),
- Alias((int32_t) U_INITIAL_PUNCTUATION, 762),
- Alias((int32_t) U_LETTER_NUMBER, 764),
- Alias((int32_t) U_LINE_SEPARATOR, 766),
- Alias((int32_t) U_LOWERCASE_LETTER, 768),
- Alias((int32_t) U_MATH_SYMBOL, 770),
- Alias((int32_t) U_MODIFIER_LETTER, 772),
- Alias((int32_t) U_MODIFIER_SYMBOL, 774),
- Alias((int32_t) U_NON_SPACING_MARK, 776),
- Alias((int32_t) U_OTHER_LETTER, 778),
- Alias((int32_t) U_OTHER_NUMBER, 780),
- Alias((int32_t) U_OTHER_PUNCTUATION, 782),
- Alias((int32_t) U_OTHER_SYMBOL, 784),
- Alias((int32_t) U_PARAGRAPH_SEPARATOR, 786),
- Alias((int32_t) U_PRIVATE_USE_CHAR, 788),
- Alias((int32_t) U_SPACE_SEPARATOR, 790),
- Alias((int32_t) U_START_PUNCTUATION, 792),
- Alias((int32_t) U_SURROGATE, 794),
- Alias((int32_t) U_TITLECASE_LETTER, 796),
- Alias((int32_t) U_UPPERCASE_LETTER, 798),
-};
-
-const int32_t VALUES_gcm_COUNT = 38;
-
-const Alias VALUES_gcm[] = {
- Alias((int32_t) U_GC_CC_MASK, 742),
- Alias((int32_t) U_GC_CF_MASK, 758),
- Alias((int32_t) U_GC_CN_MASK, 760),
- Alias((int32_t) U_GC_CO_MASK, 788),
- Alias((int32_t) U_GC_CS_MASK, 794),
- Alias((int32_t) U_GC_C_MASK, 800),
- Alias((int32_t) U_GC_LC_MASK, 802),
- Alias((int32_t) U_GC_LL_MASK, 768),
- Alias((int32_t) U_GC_LM_MASK, 772),
- Alias((int32_t) U_GC_LO_MASK, 778),
- Alias((int32_t) U_GC_LT_MASK, 796),
- Alias((int32_t) U_GC_LU_MASK, 798),
- Alias((int32_t) U_GC_L_MASK, 804),
- Alias((int32_t) U_GC_MC_MASK, 738),
- Alias((int32_t) U_GC_ME_MASK, 752),
- Alias((int32_t) U_GC_MN_MASK, 776),
- Alias((int32_t) U_GC_M_MASK, 806),
- Alias((int32_t) U_GC_ND_MASK, 749),
- Alias((int32_t) U_GC_NL_MASK, 764),
- Alias((int32_t) U_GC_NO_MASK, 780),
- Alias((int32_t) U_GC_N_MASK, 808),
- Alias((int32_t) U_GC_PC_MASK, 740),
- Alias((int32_t) U_GC_PD_MASK, 747),
- Alias((int32_t) U_GC_PE_MASK, 754),
- Alias((int32_t) U_GC_PF_MASK, 756),
- Alias((int32_t) U_GC_PI_MASK, 762),
- Alias((int32_t) U_GC_PO_MASK, 782),
- Alias((int32_t) U_GC_PS_MASK, 792),
- Alias((int32_t) U_GC_P_MASK, 810),
- Alias((int32_t) U_GC_SC_MASK, 745),
- Alias((int32_t) U_GC_SK_MASK, 774),
- Alias((int32_t) U_GC_SM_MASK, 770),
- Alias((int32_t) U_GC_SO_MASK, 784),
- Alias((int32_t) U_GC_S_MASK, 813),
- Alias((int32_t) U_GC_ZL_MASK, 766),
- Alias((int32_t) U_GC_ZP_MASK, 786),
- Alias((int32_t) U_GC_ZS_MASK, 790),
- Alias((int32_t) U_GC_Z_MASK, 815),
-};
-
-const int32_t VALUES_hst_COUNT = 6;
-
-const Alias VALUES_hst[] = {
- Alias((int32_t) U_HST_LEADING_JAMO, 817),
- Alias((int32_t) U_HST_LVT_SYLLABLE, 819),
- Alias((int32_t) U_HST_LV_SYLLABLE, 821),
- Alias((int32_t) U_HST_NOT_APPLICABLE, 823),
- Alias((int32_t) U_HST_TRAILING_JAMO, 825),
- Alias((int32_t) U_HST_VOWEL_JAMO, 827),
-};
-
-const int32_t VALUES_jg_COUNT = 55;
-
-const Alias VALUES_jg[] = {
- Alias((int32_t) U_JG_AIN, 829),
- Alias((int32_t) U_JG_ALAPH, 831),
- Alias((int32_t) U_JG_ALEF, 833),
- Alias((int32_t) U_JG_BEH, 835),
- Alias((int32_t) U_JG_BETH, 837),
- Alias((int32_t) U_JG_BURUSHASKI_YEH_BARREE, 839),
- Alias((int32_t) U_JG_DAL, 841),
- Alias((int32_t) U_JG_DALATH_RISH, 843),
- Alias((int32_t) U_JG_E, 845),
- Alias((int32_t) U_JG_FE, 847),
- Alias((int32_t) U_JG_FEH, 849),
- Alias((int32_t) U_JG_FINAL_SEMKATH, 851),
- Alias((int32_t) U_JG_GAF, 853),
- Alias((int32_t) U_JG_GAMAL, 855),
- Alias((int32_t) U_JG_HAH, 857),
- Alias((int32_t) U_JG_HAMZA_ON_HEH_GOAL, 859),
- Alias((int32_t) U_JG_HE, 861),
- Alias((int32_t) U_JG_HEH, 863),
- Alias((int32_t) U_JG_HEH_GOAL, 865),
- Alias((int32_t) U_JG_HETH, 867),
- Alias((int32_t) U_JG_KAF, 869),
- Alias((int32_t) U_JG_KAPH, 871),
- Alias((int32_t) U_JG_KHAPH, 873),
- Alias((int32_t) U_JG_KNOTTED_HEH, 875),
- Alias((int32_t) U_JG_LAM, 877),
- Alias((int32_t) U_JG_LAMADH, 879),
- Alias((int32_t) U_JG_MEEM, 881),
- Alias((int32_t) U_JG_MIM, 883),
- Alias((int32_t) U_JG_NOON, 885),
- Alias((int32_t) U_JG_NO_JOINING_GROUP, 887),
- Alias((int32_t) U_JG_NUN, 889),
- Alias((int32_t) U_JG_PE, 891),
- Alias((int32_t) U_JG_QAF, 893),
- Alias((int32_t) U_JG_QAPH, 895),
- Alias((int32_t) U_JG_REH, 897),
- Alias((int32_t) U_JG_REVERSED_PE, 899),
- Alias((int32_t) U_JG_SAD, 901),
- Alias((int32_t) U_JG_SADHE, 903),
- Alias((int32_t) U_JG_SEEN, 905),
- Alias((int32_t) U_JG_SEMKATH, 907),
- Alias((int32_t) U_JG_SHIN, 909),
- Alias((int32_t) U_JG_SWASH_KAF, 911),
- Alias((int32_t) U_JG_SYRIAC_WAW, 913),
- Alias((int32_t) U_JG_TAH, 915),
- Alias((int32_t) U_JG_TAW, 917),
- Alias((int32_t) U_JG_TEH_MARBUTA, 919),
- Alias((int32_t) U_JG_TETH, 921),
- Alias((int32_t) U_JG_WAW, 923),
- Alias((int32_t) U_JG_YEH, 925),
- Alias((int32_t) U_JG_YEH_BARREE, 927),
- Alias((int32_t) U_JG_YEH_WITH_TAIL, 929),
- Alias((int32_t) U_JG_YUDH, 931),
- Alias((int32_t) U_JG_YUDH_HE, 933),
- Alias((int32_t) U_JG_ZAIN, 935),
- Alias((int32_t) U_JG_ZHAIN, 937),
-};
-
-const int32_t VALUES_jt_COUNT = 6;
-
-const Alias VALUES_jt[] = {
- Alias((int32_t) U_JT_DUAL_JOINING, 939),
- Alias((int32_t) U_JT_JOIN_CAUSING, 941),
- Alias((int32_t) U_JT_LEFT_JOINING, 943),
- Alias((int32_t) U_JT_NON_JOINING, 945),
- Alias((int32_t) U_JT_RIGHT_JOINING, 947),
- Alias((int32_t) U_JT_TRANSPARENT, 949),
-};
-
-const int32_t VALUES_lb_COUNT = 36;
-
-const Alias VALUES_lb[] = {
- Alias((int32_t) U_LB_ALPHABETIC, 951),
- Alias((int32_t) U_LB_AMBIGUOUS, 953),
- Alias((int32_t) U_LB_BREAK_AFTER, 955),
- Alias((int32_t) U_LB_BREAK_BEFORE, 957),
- Alias((int32_t) U_LB_BREAK_BOTH, 959),
- Alias((int32_t) U_LB_BREAK_SYMBOLS, 961),
- Alias((int32_t) U_LB_CARRIAGE_RETURN, 963),
- Alias((int32_t) U_LB_CLOSE_PUNCTUATION, 965),
- Alias((int32_t) U_LB_COMBINING_MARK, 967),
- Alias((int32_t) U_LB_COMPLEX_CONTEXT, 969),
- Alias((int32_t) U_LB_CONTINGENT_BREAK, 971),
- Alias((int32_t) U_LB_EXCLAMATION, 973),
- Alias((int32_t) U_LB_GLUE, 975),
- Alias((int32_t) U_LB_H2, 977),
- Alias((int32_t) U_LB_H3, 979),
- Alias((int32_t) U_LB_HYPHEN, 981),
- Alias((int32_t) U_LB_IDEOGRAPHIC, 983),
- Alias((int32_t) U_LB_INFIX_NUMERIC, 985),
- Alias((int32_t) U_LB_INSEPARABLE, 987),
- Alias((int32_t) U_LB_JL, 990),
- Alias((int32_t) U_LB_JT, 992),
- Alias((int32_t) U_LB_JV, 994),
- Alias((int32_t) U_LB_LINE_FEED, 996),
- Alias((int32_t) U_LB_MANDATORY_BREAK, 998),
- Alias((int32_t) U_LB_NEXT_LINE, 1000),
- Alias((int32_t) U_LB_NONSTARTER, 1002),
- Alias((int32_t) U_LB_NUMERIC, 38),
- Alias((int32_t) U_LB_OPEN_PUNCTUATION, 1004),
- Alias((int32_t) U_LB_POSTFIX_NUMERIC, 1006),
- Alias((int32_t) U_LB_PREFIX_NUMERIC, 1008),
- Alias((int32_t) U_LB_QUOTATION, 1010),
- Alias((int32_t) U_LB_SPACE, 1012),
- Alias((int32_t) U_LB_SURROGATE, 1014),
- Alias((int32_t) U_LB_UNKNOWN, 1016),
- Alias((int32_t) U_LB_WORD_JOINER, 1018),
- Alias((int32_t) U_LB_ZWSPACE, 1020),
-};
-
-const int32_t VALUES_lccc_COUNT = 19;
-
-const Alias VALUES_lccc[] = {
- Alias((int32_t) 0, 638),
- Alias((int32_t) 1, 640),
- Alias((int32_t) 200, 642),
- Alias((int32_t) 202, 644),
- Alias((int32_t) 216, 646),
- Alias((int32_t) 218, 648),
- Alias((int32_t) 220, 650),
- Alias((int32_t) 222, 652),
- Alias((int32_t) 224, 654),
- Alias((int32_t) 226, 656),
- Alias((int32_t) 228, 658),
- Alias((int32_t) 230, 660),
- Alias((int32_t) 232, 662),
- Alias((int32_t) 233, 664),
- Alias((int32_t) 234, 666),
- Alias((int32_t) 240, 668),
- Alias((int32_t) 7, 670),
- Alias((int32_t) 8, 672),
- Alias((int32_t) 9, 674),
-};
-
-const int32_t VALUES_nt_COUNT = 4;
-
-const Alias VALUES_nt[] = {
- Alias((int32_t) U_NT_DECIMAL, 1022),
- Alias((int32_t) U_NT_DIGIT, 1024),
- Alias((int32_t) U_NT_NONE, 1026),
- Alias((int32_t) U_NT_NUMERIC, 1028),
-};
-
-const int32_t VALUES_sc_COUNT = 130;
-
-const Alias VALUES_sc[] = {
- Alias((int32_t) USCRIPT_ARABIC, 1030),
- Alias((int32_t) USCRIPT_ARMENIAN, 1032),
- Alias((int32_t) USCRIPT_AVESTAN, 1034),
- Alias((int32_t) USCRIPT_BALINESE, 1036),
- Alias((int32_t) USCRIPT_BATAK, 1038),
- Alias((int32_t) USCRIPT_BENGALI, 1040),
- Alias((int32_t) USCRIPT_BLISSYMBOLS, 1042),
- Alias((int32_t) USCRIPT_BOOK_PAHLAVI, 1044),
- Alias((int32_t) USCRIPT_BOPOMOFO, 1046),
- Alias((int32_t) USCRIPT_BRAHMI, 1048),
- Alias((int32_t) USCRIPT_BRAILLE, 1050),
- Alias((int32_t) USCRIPT_BUGINESE, 1052),
- Alias((int32_t) USCRIPT_BUHID, 1054),
- Alias((int32_t) USCRIPT_CANADIAN_ABORIGINAL, 1056),
- Alias((int32_t) USCRIPT_CARIAN, 1058),
- Alias((int32_t) USCRIPT_CHAKMA, 1060),
- Alias((int32_t) USCRIPT_CHAM, 1062),
- Alias((int32_t) USCRIPT_CHEROKEE, 1064),
- Alias((int32_t) USCRIPT_CIRTH, 1066),
- Alias((int32_t) USCRIPT_COMMON, 1068),
- Alias((int32_t) USCRIPT_COPTIC, 1070),
- Alias((int32_t) USCRIPT_CUNEIFORM, 1073),
- Alias((int32_t) USCRIPT_CYPRIOT, 1075),
- Alias((int32_t) USCRIPT_CYRILLIC, 1077),
- Alias((int32_t) USCRIPT_DEMOTIC_EGYPTIAN, 1079),
- Alias((int32_t) USCRIPT_DESERET, 1081),
- Alias((int32_t) USCRIPT_DEVANAGARI, 1083),
- Alias((int32_t) USCRIPT_EASTERN_SYRIAC, 1085),
- Alias((int32_t) USCRIPT_EGYPTIAN_HIEROGLYPHS, 1087),
- Alias((int32_t) USCRIPT_ESTRANGELO_SYRIAC, 1089),
- Alias((int32_t) USCRIPT_ETHIOPIC, 1091),
- Alias((int32_t) USCRIPT_GEORGIAN, 1093),
- Alias((int32_t) USCRIPT_GLAGOLITIC, 1095),
- Alias((int32_t) USCRIPT_GOTHIC, 1097),
- Alias((int32_t) USCRIPT_GREEK, 1099),
- Alias((int32_t) USCRIPT_GUJARATI, 1101),
- Alias((int32_t) USCRIPT_GURMUKHI, 1103),
- Alias((int32_t) USCRIPT_HAN, 1105),
- Alias((int32_t) USCRIPT_HANGUL, 1107),
- Alias((int32_t) USCRIPT_HANUNOO, 1109),
- Alias((int32_t) USCRIPT_HARAPPAN_INDUS, 1111),
- Alias((int32_t) USCRIPT_HEBREW, 1113),
- Alias((int32_t) USCRIPT_HIERATIC_EGYPTIAN, 1115),
- Alias((int32_t) USCRIPT_HIRAGANA, 1117),
- Alias((int32_t) USCRIPT_IMPERIAL_ARAMAIC, 1119),
- Alias((int32_t) USCRIPT_INHERITED, 1121),
- Alias((int32_t) USCRIPT_INSCRIPTIONAL_PAHLAVI, 1123),
- Alias((int32_t) USCRIPT_INSCRIPTIONAL_PARTHIAN, 1125),
- Alias((int32_t) USCRIPT_JAPANESE, 1127),
- Alias((int32_t) USCRIPT_JAVANESE, 1129),
- Alias((int32_t) USCRIPT_KAITHI, 1131),
- Alias((int32_t) USCRIPT_KANNADA, 1133),
- Alias((int32_t) USCRIPT_KATAKANA, 1135),
- Alias((int32_t) USCRIPT_KATAKANA_OR_HIRAGANA, 1137),
- Alias((int32_t) USCRIPT_KAYAH_LI, 1139),
- Alias((int32_t) USCRIPT_KHAROSHTHI, 1141),
- Alias((int32_t) USCRIPT_KHMER, 1143),
- Alias((int32_t) USCRIPT_KHUTSURI, 1145),
- Alias((int32_t) USCRIPT_KOREAN, 1147),
- Alias((int32_t) USCRIPT_LANNA, 1149),
- Alias((int32_t) USCRIPT_LAO, 1151),
- Alias((int32_t) USCRIPT_LATIN, 1153),
- Alias((int32_t) USCRIPT_LATIN_FRAKTUR, 1155),
- Alias((int32_t) USCRIPT_LATIN_GAELIC, 1157),
- Alias((int32_t) USCRIPT_LEPCHA, 1159),
- Alias((int32_t) USCRIPT_LIMBU, 1161),
- Alias((int32_t) USCRIPT_LINEAR_A, 1163),
- Alias((int32_t) USCRIPT_LINEAR_B, 1165),
- Alias((int32_t) USCRIPT_LYCIAN, 1167),
- Alias((int32_t) USCRIPT_LYDIAN, 1169),
- Alias((int32_t) USCRIPT_MALAYALAM, 1171),
- Alias((int32_t) USCRIPT_MANDAEAN, 1173),
- Alias((int32_t) USCRIPT_MANICHAEAN, 1175),
- Alias((int32_t) USCRIPT_MATHEMATICAL_NOTATION, 1177),
- Alias((int32_t) USCRIPT_MAYAN_HIEROGLYPHS, 1179),
- Alias((int32_t) USCRIPT_MEITEI_MAYEK, 1181),
- Alias((int32_t) USCRIPT_MEROITIC, 1183),
- Alias((int32_t) USCRIPT_MONGOLIAN, 1185),
- Alias((int32_t) USCRIPT_MOON, 1187),
- Alias((int32_t) USCRIPT_MYANMAR, 1189),
- Alias((int32_t) USCRIPT_NEW_TAI_LUE, 1191),
- Alias((int32_t) USCRIPT_NKO, 1193),
- Alias((int32_t) USCRIPT_OGHAM, 1195),
- Alias((int32_t) USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC, 1197),
- Alias((int32_t) USCRIPT_OLD_HUNGARIAN, 1199),
- Alias((int32_t) USCRIPT_OLD_ITALIC, 1201),
- Alias((int32_t) USCRIPT_OLD_PERMIC, 1203),
- Alias((int32_t) USCRIPT_OLD_PERSIAN, 1205),
- Alias((int32_t) USCRIPT_OL_CHIKI, 1207),
- Alias((int32_t) USCRIPT_ORIYA, 1209),
- Alias((int32_t) USCRIPT_ORKHON, 1211),
- Alias((int32_t) USCRIPT_OSMANYA, 1213),
- Alias((int32_t) USCRIPT_PAHAWH_HMONG, 1215),
- Alias((int32_t) USCRIPT_PHAGS_PA, 1217),
- Alias((int32_t) USCRIPT_PHOENICIAN, 1219),
- Alias((int32_t) USCRIPT_PHONETIC_POLLARD, 1221),
- Alias((int32_t) USCRIPT_PSALTER_PAHLAVI, 1223),
- Alias((int32_t) USCRIPT_REJANG, 1225),
- Alias((int32_t) USCRIPT_RONGORONGO, 1227),
- Alias((int32_t) USCRIPT_RUNIC, 1229),
- Alias((int32_t) USCRIPT_SAMARITAN, 1231),
- Alias((int32_t) USCRIPT_SARATI, 1233),
- Alias((int32_t) USCRIPT_SAURASHTRA, 1235),
- Alias((int32_t) USCRIPT_SHAVIAN, 1237),
- Alias((int32_t) USCRIPT_SIGN_WRITING, 1239),
- Alias((int32_t) USCRIPT_SIMPLIFIED_HAN, 1241),
- Alias((int32_t) USCRIPT_SINHALA, 1243),
- Alias((int32_t) USCRIPT_SUNDANESE, 1245),
- Alias((int32_t) USCRIPT_SYLOTI_NAGRI, 1247),
- Alias((int32_t) USCRIPT_SYMBOLS, 1249),
- Alias((int32_t) USCRIPT_SYRIAC, 1251),
- Alias((int32_t) USCRIPT_TAGALOG, 1253),
- Alias((int32_t) USCRIPT_TAGBANWA, 1255),
- Alias((int32_t) USCRIPT_TAI_LE, 1257),
- Alias((int32_t) USCRIPT_TAI_VIET, 1259),
- Alias((int32_t) USCRIPT_TAMIL, 1261),
- Alias((int32_t) USCRIPT_TELUGU, 1263),
- Alias((int32_t) USCRIPT_TENGWAR, 1265),
- Alias((int32_t) USCRIPT_THAANA, 1267),
- Alias((int32_t) USCRIPT_THAI, 1269),
- Alias((int32_t) USCRIPT_TIBETAN, 1271),
- Alias((int32_t) USCRIPT_TIFINAGH, 1273),
- Alias((int32_t) USCRIPT_TRADITIONAL_HAN, 1275),
- Alias((int32_t) USCRIPT_UGARITIC, 1277),
- Alias((int32_t) USCRIPT_UNKNOWN, 1279),
- Alias((int32_t) USCRIPT_UNWRITTEN_LANGUAGES, 1281),
- Alias((int32_t) USCRIPT_VAI, 1283),
- Alias((int32_t) USCRIPT_VISIBLE_SPEECH, 1285),
- Alias((int32_t) USCRIPT_WESTERN_SYRIAC, 1287),
- Alias((int32_t) USCRIPT_YI, 1289),
-};
-
-const int32_t VALUES_tccc_COUNT = 19;
-
-const Alias VALUES_tccc[] = {
- Alias((int32_t) 0, 638),
- Alias((int32_t) 1, 640),
- Alias((int32_t) 200, 642),
- Alias((int32_t) 202, 644),
- Alias((int32_t) 216, 646),
- Alias((int32_t) 218, 648),
- Alias((int32_t) 220, 650),
- Alias((int32_t) 222, 652),
- Alias((int32_t) 224, 654),
- Alias((int32_t) 226, 656),
- Alias((int32_t) 228, 658),
- Alias((int32_t) 230, 660),
- Alias((int32_t) 232, 662),
- Alias((int32_t) 233, 664),
- Alias((int32_t) 234, 666),
- Alias((int32_t) 240, 668),
- Alias((int32_t) 7, 670),
- Alias((int32_t) 8, 672),
- Alias((int32_t) 9, 674),
-};
-
-const int32_t PROPERTY_COUNT = 85;
-
-const Property PROPERTY[] = {
- Property((int32_t) UCHAR_ALPHABETIC, 68, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_ASCII_HEX_DIGIT, 70, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_BIDI_CONTROL, 72, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_BIDI_MIRRORED, 74, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_CASE_SENSITIVE, 76, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_DASH, 78, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_DEFAULT_IGNORABLE_CODE_POINT, 80, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_DEPRECATED, 82, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_DIACRITIC, 84, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_EXTENDER, 86, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_FULL_COMPOSITION_EXCLUSION, 88, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_GRAPHEME_BASE, 90, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_GRAPHEME_EXTEND, 92, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_GRAPHEME_LINK, 94, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_HEX_DIGIT, 96, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_HYPHEN, 98, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_IDEOGRAPHIC, 100, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_IDS_BINARY_OPERATOR, 102, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_IDS_TRINARY_OPERATOR, 104, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_ID_CONTINUE, 106, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_ID_START, 108, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_JOIN_CONTROL, 110, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_LOGICAL_ORDER_EXCEPTION, 112, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_LOWERCASE, 114, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_MATH, 116, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_NFC_INERT, 118, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_NFD_INERT, 120, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_NFKC_INERT, 122, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_NFKD_INERT, 124, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_NONCHARACTER_CODE_POINT, 126, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_PATTERN_SYNTAX, 128, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_PATTERN_WHITE_SPACE, 130, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_POSIX_ALNUM, 132, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_POSIX_BLANK, 134, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_POSIX_GRAPH, 136, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_POSIX_PRINT, 138, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_POSIX_XDIGIT, 140, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_QUOTATION_MARK, 142, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_RADICAL, 144, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_SEGMENT_STARTER, 146, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_SOFT_DOTTED, 148, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_S_TERM, 150, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_TERMINAL_PUNCTUATION, 152, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_UNIFIED_IDEOGRAPH, 154, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_UPPERCASE, 156, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_VARIATION_SELECTOR, 158, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_WHITE_SPACE, 160, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_XID_CONTINUE, 163, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_XID_START, 165, VALUES_binprop_COUNT, VALUES_binprop),
- Property((int32_t) UCHAR_BIDI_CLASS, 169, VALUES_bc_COUNT, VALUES_bc),
- Property((int32_t) UCHAR_BLOCK, 171, VALUES_blk_COUNT, VALUES_blk),
- Property((int32_t) UCHAR_CANONICAL_COMBINING_CLASS, 173, VALUES_ccc_COUNT, VALUES_ccc),
- Property((int32_t) UCHAR_DECOMPOSITION_TYPE, 175, VALUES_dt_COUNT, VALUES_dt),
- Property((int32_t) UCHAR_EAST_ASIAN_WIDTH, 177, VALUES_ea_COUNT, VALUES_ea),
- Property((int32_t) UCHAR_GENERAL_CATEGORY, 179, VALUES_gc_COUNT, VALUES_gc),
- Property((int32_t) UCHAR_GRAPHEME_CLUSTER_BREAK, 181, VALUES_GCB_COUNT, VALUES_GCB),
- Property((int32_t) UCHAR_HANGUL_SYLLABLE_TYPE, 183, VALUES_hst_COUNT, VALUES_hst),
- Property((int32_t) UCHAR_JOINING_GROUP, 185, VALUES_jg_COUNT, VALUES_jg),
- Property((int32_t) UCHAR_JOINING_TYPE, 187, VALUES_jt_COUNT, VALUES_jt),
- Property((int32_t) UCHAR_LEAD_CANONICAL_COMBINING_CLASS, 189, VALUES_lccc_COUNT, VALUES_lccc),
- Property((int32_t) UCHAR_LINE_BREAK, 191, VALUES_lb_COUNT, VALUES_lb),
- Property((int32_t) UCHAR_NFC_QUICK_CHECK, 193, VALUES_NFC_QC_COUNT, VALUES_NFC_QC),
- Property((int32_t) UCHAR_NFD_QUICK_CHECK, 195, VALUES_NFD_QC_COUNT, VALUES_NFD_QC),
- Property((int32_t) UCHAR_NFKC_QUICK_CHECK, 197, VALUES_NFKC_QC_COUNT, VALUES_NFKC_QC),
- Property((int32_t) UCHAR_NFKD_QUICK_CHECK, 199, VALUES_NFKD_QC_COUNT, VALUES_NFKD_QC),
- Property((int32_t) UCHAR_NUMERIC_TYPE, 201, VALUES_nt_COUNT, VALUES_nt),
- Property((int32_t) UCHAR_SCRIPT, 203, VALUES_sc_COUNT, VALUES_sc),
- Property((int32_t) UCHAR_SENTENCE_BREAK, 205, VALUES_SB_COUNT, VALUES_SB),
- Property((int32_t) UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, 207, VALUES_tccc_COUNT, VALUES_tccc),
- Property((int32_t) UCHAR_WORD_BREAK, 209, VALUES_WB_COUNT, VALUES_WB),
- Property((int32_t) UCHAR_AGE, 213, 0, NULL),
- Property((int32_t) UCHAR_BIDI_MIRRORING_GLYPH, 215, 0, NULL),
- Property((int32_t) UCHAR_CASE_FOLDING, 217, 0, NULL),
- Property((int32_t) UCHAR_ISO_COMMENT, 219, 0, NULL),
- Property((int32_t) UCHAR_LOWERCASE_MAPPING, 221, 0, NULL),
- Property((int32_t) UCHAR_NAME, 223, 0, NULL),
- Property((int32_t) UCHAR_SIMPLE_CASE_FOLDING, 225, 0, NULL),
- Property((int32_t) UCHAR_SIMPLE_LOWERCASE_MAPPING, 228, 0, NULL),
- Property((int32_t) UCHAR_SIMPLE_TITLECASE_MAPPING, 230, 0, NULL),
- Property((int32_t) UCHAR_SIMPLE_UPPERCASE_MAPPING, 232, 0, NULL),
- Property((int32_t) UCHAR_TITLECASE_MAPPING, 234, 0, NULL),
- Property((int32_t) UCHAR_UNICODE_1_NAME, 236, 0, NULL),
- Property((int32_t) UCHAR_UPPERCASE_MAPPING, 238, 0, NULL),
- Property((int32_t) UCHAR_NUMERIC_VALUE, 167, 0, NULL),
- Property((int32_t) UCHAR_GENERAL_CATEGORY_MASK, 211, VALUES_gcm_COUNT, VALUES_gcm),
-};
-
-/*eof*/
diff --git a/tools/genpname/genpname.cpp b/tools/genpname/genpname.cpp
deleted file mode 100644
index 84294a62..00000000
--- a/tools/genpname/genpname.cpp
+++ /dev/null
@@ -1,1225 +0,0 @@
-/*
-**********************************************************************
-* Copyright (C) 2002-2006, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 10/11/02 aliu Creation.
-**********************************************************************
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "unicode/uclean.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "filestrm.h"
-#include "uarrsort.h"
-#include "unewdata.h"
-#include "uoptions.h"
-#include "uprops.h"
-#include "propname.h"
-#include "uassert.h"
-
-#include <stdio.h>
-
-U_NAMESPACE_USE
-
-// TODO: Clean up and comment this code.
-
-//----------------------------------------------------------------------
-// BEGIN DATA
-//
-// This is the raw data to be output. We define the data structure,
-// then include a machine-generated header that contains the actual
-// data.
-
-#include "unicode/uchar.h"
-#include "unicode/uscript.h"
-#include "unicode/unorm.h"
-
-class AliasName {
-public:
- const char* str;
- int32_t index;
-
- AliasName(const char* str, int32_t index);
-
- int compare(const AliasName& other) const;
-
- UBool operator==(const AliasName& other) const {
- return compare(other) == 0;
- }
-
- UBool operator!=(const AliasName& other) const {
- return compare(other) != 0;
- }
-};
-
-AliasName::AliasName(const char* _str,
- int32_t _index) :
- str(_str),
- index(_index)
-{
-}
-
-int AliasName::compare(const AliasName& other) const {
- return uprv_comparePropertyNames(str, other.str);
-}
-
-class Alias {
-public:
- int32_t enumValue;
- int32_t nameGroupIndex;
-
- Alias(int32_t enumValue,
- int32_t nameGroupIndex);
-
- int32_t getUniqueNames(int32_t* nameGroupIndices) const;
-};
-
-Alias::Alias(int32_t anEnumValue,
- int32_t aNameGroupIndex) :
- enumValue(anEnumValue),
- nameGroupIndex(aNameGroupIndex)
-{
-}
-
-class Property : public Alias {
-public:
- int32_t valueCount;
- const Alias* valueList;
-
- Property(int32_t enumValue,
- int32_t nameGroupIndex,
- int32_t valueCount,
- const Alias* valueList);
-};
-
-Property::Property(int32_t _enumValue,
- int32_t _nameGroupIndex,
- int32_t _valueCount,
- const Alias* _valueList) :
- Alias(_enumValue, _nameGroupIndex),
- valueCount(_valueCount),
- valueList(_valueList)
-{
-}
-
-// *** Include the data header ***
-#include "data.h"
-
-/* return a list of unique names, not including "", for this property
- * @param stringIndices array of at least MAX_NAMES_PER_GROUP
- * elements, will be filled with indices into STRING_TABLE
- * @return number of indices, >= 1
- */
-int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
- int32_t count = 0;
- int32_t i = nameGroupIndex;
- UBool done = FALSE;
- while (!done) {
- int32_t j = NAME_GROUP[i++];
- if (j < 0) {
- done = TRUE;
- j = -j;
- }
- if (j == 0) continue; // omit "" entries
- UBool dupe = FALSE;
- for (int32_t k=0; k<count; ++k) {
- if (stringIndices[k] == j) {
- dupe = TRUE;
- break;
- }
- // also do a string check for things like "age|Age"
- if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
- //printf("Found dupe %s|%s\n",
- // STRING_TABLE[stringIndices[k]].str,
- // STRING_TABLE[j].str);
- dupe = TRUE;
- break;
- }
- }
- if (dupe) continue; // omit duplicates
- stringIndices[count++] = j;
- }
- return count;
-}
-
-// END DATA
-//----------------------------------------------------------------------
-
-#define MALLOC(type, count) \
- (type*) uprv_malloc(sizeof(type) * count)
-
-void die(const char* msg) {
- fprintf(stderr, "Error: %s\n", msg);
- exit(1);
-}
-
-//----------------------------------------------------------------------
-
-/**
- * A list of Alias objects.
- */
-class AliasList {
-public:
- virtual ~AliasList();
- virtual const Alias& operator[](int32_t i) const = 0;
- virtual int32_t count() const = 0;
-};
-
-AliasList::~AliasList() {}
-
-/**
- * A single array.
- */
-class AliasArrayList : public AliasList {
- const Alias* a;
- int32_t n;
-public:
- AliasArrayList(const Alias* _a, int32_t _n) {
- a = _a;
- n = _n;
- }
- virtual const Alias& operator[](int32_t i) const {
- return a[i];
- }
- virtual int32_t count() const {
- return n;
- }
-};
-
-/**
- * A single array.
- */
-class PropertyArrayList : public AliasList {
- const Property* a;
- int32_t n;
-public:
- PropertyArrayList(const Property* _a, int32_t _n) {
- a = _a;
- n = _n;
- }
- virtual const Alias& operator[](int32_t i) const {
- return a[i];
- }
- virtual int32_t count() const {
- return n;
- }
-};
-
-//----------------------------------------------------------------------
-
-/**
- * An element in a name index. It maps a name (given by index) into
- * an enum value.
- */
-class NameToEnumEntry {
-public:
- int32_t nameIndex;
- int32_t enumValue;
- NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
-};
-
-// Sort function for NameToEnumEntry (sort by name)
-U_CFUNC int32_t
-compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
- return
- STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
- compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
-}
-
-//----------------------------------------------------------------------
-
-/**
- * An element in an enum index. It maps an enum into a name group entry
- * (given by index).
- */
-class EnumToNameGroupEntry {
-public:
- int32_t enumValue;
- int32_t nameGroupIndex;
- EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
-
- // are enumValues contiguous for count entries starting with this one?
- // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
- UBool isContiguous(int32_t count) const {
- const EnumToNameGroupEntry* p = this;
- for (int32_t i=1; i<count; ++i) {
- if (p[i].enumValue != (this->enumValue + i)) {
- return FALSE;
- }
- }
- return TRUE;
- }
-};
-
-// Sort function for EnumToNameGroupEntry (sort by name index)
-U_CFUNC int32_t
-compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
- return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
-}
-
-//----------------------------------------------------------------------
-
-/**
- * An element in the map from enumerated property enums to value maps.
- */
-class EnumToValueEntry {
-public:
- int32_t enumValue;
- EnumToNameGroupEntry* enumToName;
- int32_t enumToName_count;
- NameToEnumEntry* nameToEnum;
- int32_t nameToEnum_count;
-
- // are enumValues contiguous for count entries starting with this one?
- // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
- UBool isContiguous(int32_t count) const {
- const EnumToValueEntry* p = this;
- for (int32_t i=1; i<count; ++i) {
- if (p[i].enumValue != (this->enumValue + i)) {
- return FALSE;
- }
- }
- return TRUE;
- }
-};
-
-// Sort function for EnumToValueEntry (sort by enum)
-U_CFUNC int32_t
-compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
- return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
-}
-
-//----------------------------------------------------------------------
-// BEGIN Builder
-
-#define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
-
-class Builder {
- // header:
- PropertyAliases header;
-
- // 0:
- NonContiguousEnumToOffset* enumToName;
- int32_t enumToName_size;
- Offset enumToName_offset;
-
- // 1: (deleted)
-
- // 2:
- NameToEnum* nameToEnum;
- int32_t nameToEnum_size;
- Offset nameToEnum_offset;
-
- // 3:
- NonContiguousEnumToOffset* enumToValue;
- int32_t enumToValue_size;
- Offset enumToValue_offset;
-
- // 4:
- ValueMap* valueMap;
- int32_t valueMap_size;
- int32_t valueMap_count;
- Offset valueMap_offset;
-
- // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
- // NULL and one is not. valueEnumToName_size[i] is the size of
- // the non-NULL one. i=0..valueMapCount-1
- // 5a:
- EnumToOffset** valueEnumToName;
- // 5b:
- NonContiguousEnumToOffset** valueNCEnumToName;
- int32_t* valueEnumToName_size;
- Offset* valueEnumToName_offset;
- // 6:
- // arrays of valueMap_count pointers, sizes, & offsets
- NameToEnum** valueNameToEnum;
- int32_t* valueNameToEnum_size;
- Offset* valueNameToEnum_offset;
-
- // 98:
- Offset* nameGroupPool;
- int32_t nameGroupPool_count;
- int32_t nameGroupPool_size;
- Offset nameGroupPool_offset;
-
- // 99:
- char* stringPool;
- int32_t stringPool_count;
- int32_t stringPool_size;
- Offset stringPool_offset;
- Offset* stringPool_offsetArray; // relative to stringPool
-
- int32_t total_size; // size of everything
-
- int32_t debug;
-
-public:
-
- Builder(int32_t debugLevel);
- ~Builder();
-
- void buildTopLevelProperties(const NameToEnumEntry* propName,
- int32_t propNameCount,
- const EnumToNameGroupEntry* propEnum,
- int32_t propEnumCount);
-
- void buildValues(const EnumToValueEntry* e2v,
- int32_t count);
-
- void buildStringPool(const AliasName* propertyNames,
- int32_t propertyNameCount,
- const int32_t* nameGroupIndices,
- int32_t nameGroupIndicesCount);
-
- void fixup();
-
- int8_t* createData(int32_t& length) const;
-
-private:
-
- static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
- int32_t count,
- int32_t& size);
- static NonContiguousEnumToOffset*
- buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
- int32_t count,
- int32_t& size);
-
- static NonContiguousEnumToOffset*
- buildNCEnumToValue(const EnumToValueEntry* e2v,
- int32_t count,
- int32_t& size);
-
- static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
- int32_t count,
- int32_t& size);
-
- Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
- void fixupNameToEnum(NameToEnum* n);
- void fixupEnumToNameGroup(EnumToOffset* e2ng);
- void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
-
- void computeOffsets();
- void fixupStringPoolOffsets();
- void fixupNameGroupPoolOffsets();
- void fixupMiscellaneousOffsets();
-
- static int32_t align(int32_t a);
- static void erase(void* p, int32_t size);
-};
-
-Builder::Builder(int32_t debugLevel) {
- debug = debugLevel;
- enumToName = 0;
- nameToEnum = 0;
- enumToValue = 0;
- valueMap_count = 0;
- valueMap = 0;
- valueEnumToName = 0;
- valueNCEnumToName = 0;
- valueEnumToName_size = 0;
- valueEnumToName_offset = 0;
- valueNameToEnum = 0;
- valueNameToEnum_size = 0;
- valueNameToEnum_offset = 0;
- nameGroupPool = 0;
- stringPool = 0;
- stringPool_offsetArray = 0;
-}
-
-Builder::~Builder() {
- uprv_free(enumToName);
- uprv_free(nameToEnum);
- uprv_free(enumToValue);
- uprv_free(valueMap);
- for (int32_t i=0; i<valueMap_count; ++i) {
- uprv_free(valueEnumToName[i]);
- uprv_free(valueNCEnumToName[i]);
- uprv_free(valueNameToEnum[i]);
- }
- uprv_free(valueEnumToName);
- uprv_free(valueNCEnumToName);
- uprv_free(valueEnumToName_size);
- uprv_free(valueEnumToName_offset);
- uprv_free(valueNameToEnum);
- uprv_free(valueNameToEnum_size);
- uprv_free(valueNameToEnum_offset);
- uprv_free(nameGroupPool);
- uprv_free(stringPool);
- uprv_free(stringPool_offsetArray);
-}
-
-int32_t Builder::align(int32_t a) {
- U_ASSERT(a >= 0);
- int32_t k = a % sizeof(int32_t);
- if (k == 0) {
- return a;
- }
- a += sizeof(int32_t) - k;
- return a;
-}
-
-void Builder::erase(void* p, int32_t size) {
- U_ASSERT(size >= 0);
- int8_t* q = (int8_t*) p;
- while (size--) {
- *q++ = 0;
- }
-}
-
-EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
- int32_t count,
- int32_t& size) {
- U_ASSERT(e2ng->isContiguous(count));
- size = align(EnumToOffset::getSize(count));
- EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
- erase(result, size);
- result->enumStart = e2ng->enumValue;
- result->enumLimit = e2ng->enumValue + count;
- Offset* p = result->getOffsetArray();
- for (int32_t i=0; i<count; ++i) {
- // set these to NGI index values
- // fix them up to NGI offset values
- U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
- p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
- }
- return result;
-}
-
-NonContiguousEnumToOffset*
-Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
- int32_t count,
- int32_t& size) {
- U_ASSERT(!e2ng->isContiguous(count));
- size = align(NonContiguousEnumToOffset::getSize(count));
- NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
- erase(nc, size);
- nc->count = count;
- EnumValue* e = nc->getEnumArray();
- Offset* p = nc->getOffsetArray();
- for (int32_t i=0; i<count; ++i) {
- // set these to NGI index values
- // fix them up to NGI offset values
- e[i] = e2ng[i].enumValue;
- U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
- p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
- }
- return nc;
-}
-
-NonContiguousEnumToOffset*
-Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
- int32_t count,
- int32_t& size) {
- U_ASSERT(!e2v->isContiguous(count));
- size = align(NonContiguousEnumToOffset::getSize(count));
- NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
- erase(result, size);
- result->count = count;
- EnumValue* e = result->getEnumArray();
- for (int32_t i=0; i<count; ++i) {
- e[i] = e2v[i].enumValue;
- // offset must be set later
- }
- return result;
-}
-
-/**
- * Given an index into the string pool, return an offset. computeOffsets()
- * must have been called already. If allowNegative is true, allow negatives
- * and preserve their sign.
- */
-Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
- // Index 0 is ""; we turn this into an Offset of zero
- if (index == 0) return 0;
- if (index < 0) {
- if (allowNegative) {
- return -Builder::stringIndexToOffset(-index);
- } else {
- die("Negative string pool index");
- }
- } else {
- if (index >= stringPool_count) {
- die("String pool index too large");
- }
- Offset result = stringPool_offset + stringPool_offsetArray[index];
- U_ASSERT(result >= 0 && result < total_size);
- return result;
- }
- return 0; // never executed; make compiler happy
-}
-
-NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
- int32_t count,
- int32_t& size) {
- size = align(NameToEnum::getSize(count));
- NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
- erase(n2e, size);
- n2e->count = count;
- Offset* p = n2e->getNameArray();
- EnumValue* e = n2e->getEnumArray();
- for (int32_t i=0; i<count; ++i) {
- // set these to SP index values
- // fix them up to SP offset values
- U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
- p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
- e[i] = nameToEnum[i].enumValue;
- }
- return n2e;
-}
-
-
-void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
- int32_t propNameCount,
- const EnumToNameGroupEntry* propEnum,
- int32_t propEnumCount) {
- enumToName = buildNCEnumToNameGroup(propEnum,
- propEnumCount,
- enumToName_size);
- nameToEnum = buildNameToEnum(propName,
- propNameCount,
- nameToEnum_size);
-}
-
-void Builder::buildValues(const EnumToValueEntry* e2v,
- int32_t count) {
- int32_t i;
-
- U_ASSERT(!e2v->isContiguous(count));
-
- valueMap_count = count;
-
- enumToValue = buildNCEnumToValue(e2v, count,
- enumToValue_size);
-
- valueMap_size = align(count * sizeof(ValueMap));
- valueMap = (ValueMap*) uprv_malloc(valueMap_size);
- erase(valueMap, valueMap_size);
-
- valueEnumToName = MALLOC(EnumToOffset*, count);
- valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
- valueEnumToName_size = MALLOC(int32_t, count);
- valueEnumToName_offset = MALLOC(Offset, count);
- valueNameToEnum = MALLOC(NameToEnum*, count);
- valueNameToEnum_size = MALLOC(int32_t, count);
- valueNameToEnum_offset = MALLOC(Offset, count);
-
- for (i=0; i<count; ++i) {
- UBool isContiguous =
- e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
- valueEnumToName[i] = 0;
- valueNCEnumToName[i] = 0;
- if (isContiguous) {
- valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
- e2v[i].enumToName_count,
- valueEnumToName_size[i]);
- } else {
- valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
- e2v[i].enumToName_count,
- valueEnumToName_size[i]);
- }
- valueNameToEnum[i] =
- buildNameToEnum(e2v[i].nameToEnum,
- e2v[i].nameToEnum_count,
- valueNameToEnum_size[i]);
- }
-}
-
-void Builder::buildStringPool(const AliasName* propertyNames,
- int32_t propertyNameCount,
- const int32_t* nameGroupIndices,
- int32_t nameGroupIndicesCount) {
- int32_t i;
-
- nameGroupPool_count = nameGroupIndicesCount;
- nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
- nameGroupPool = MALLOC(Offset, nameGroupPool_count);
-
- for (i=0; i<nameGroupPool_count; ++i) {
- // Some indices are negative.
- int32_t a = nameGroupIndices[i];
- if (a < 0) a = -a;
- U_ASSERT(IS_VALID_OFFSET(a));
- nameGroupPool[i] = (Offset) nameGroupIndices[i];
- }
-
- stringPool_count = propertyNameCount;
- stringPool_size = 0;
- // first string must be "" -- we skip it
- U_ASSERT(*propertyNames[0].str == 0);
- for (i=1 /*sic*/; i<propertyNameCount; ++i) {
- stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
- }
- stringPool = MALLOC(char, stringPool_size);
- stringPool_offsetArray = MALLOC(Offset, stringPool_count);
- Offset soFar = 0;
- char* p = stringPool;
- stringPool_offsetArray[0] = -1; // we don't use this entry
- for (i=1 /*sic*/; i<propertyNameCount; ++i) {
- const char* str = propertyNames[i].str;
- int32_t len = (int32_t)uprv_strlen(str);
- uprv_strcpy(p, str);
- p += len;
- *p++ = 0;
- stringPool_offsetArray[i] = soFar;
- soFar += (Offset)(len+1);
- }
- U_ASSERT(soFar == stringPool_size);
- U_ASSERT(p == (stringPool + stringPool_size));
-}
-
-// Confirm that PropertyAliases is a POD (plain old data; see C++
-// std). The following union will _fail to compile_ if
-// PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
-// macro to check this, but that's not quite right, so that test is
-// commented out -- see below.)
-typedef union {
- int32_t i;
- PropertyAliases p;
-} PropertyAliasesPODTest;
-
-void Builder::computeOffsets() {
- int32_t i;
- Offset off = sizeof(header);
-
- if (debug>0) {
- printf("header \t offset=%4d size=%5d\n", 0, off);
- }
-
- // PropertyAliases must have no v-table and must be
- // padded (if necessary) to the next 32-bit boundary.
- //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
- U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
-
- #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
-
- #define COMPUTE_OFFSET2(foo,type) \
- if (debug>0)\
- printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\
- foo##_offset = off;\
- U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
- U_ASSERT(foo##_offset % sizeof(type) == 0);\
- off = (Offset) (off + foo##_size);
-
- COMPUTE_OFFSET(enumToName); // 0:
- COMPUTE_OFFSET(nameToEnum); // 2:
- COMPUTE_OFFSET(enumToValue); // 3:
- COMPUTE_OFFSET(valueMap); // 4:
-
- for (i=0; i<valueMap_count; ++i) {
- if (debug>0) {
- printf(" enumToName[%d]\t offset=%4d size=%5d\n",
- (int)i, off, (int)valueEnumToName_size[i]);
- }
-
- valueEnumToName_offset[i] = off; // 5:
- U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
- off = (Offset) (off + valueEnumToName_size[i]);
-
- if (debug>0) {
- printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
- (int)i, off, (int)valueNameToEnum_size[i]);
- }
-
- valueNameToEnum_offset[i] = off; // 6:
- U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
- off = (Offset) (off + valueNameToEnum_size[i]);
- }
-
- // These last two chunks have weaker alignment needs
- COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
- COMPUTE_OFFSET2(stringPool,char); // 99:
-
- total_size = off;
- if (debug>0) printf("total size=%5d\n\n", (int)total_size);
- U_ASSERT(total_size <= (MAX_OFFSET+1));
-}
-
-void Builder::fixupNameToEnum(NameToEnum* n) {
- // Fix the string pool offsets in n
- Offset* p = n->getNameArray();
- for (int32_t i=0; i<n->count; ++i) {
- p[i] = stringIndexToOffset(p[i]);
- }
-}
-
-void Builder::fixupStringPoolOffsets() {
- int32_t i;
-
- // 2:
- fixupNameToEnum(nameToEnum);
-
- // 6:
- for (i=0; i<valueMap_count; ++i) {
- fixupNameToEnum(valueNameToEnum[i]);
- }
-
- // 98:
- for (i=0; i<nameGroupPool_count; ++i) {
- nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
- }
-}
-
-void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
- EnumValue i;
- int32_t j;
- Offset* p = e2ng->getOffsetArray();
- for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
- p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
- }
-}
-
-void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
- int32_t i;
- /*EnumValue* e = e2ng->getEnumArray();*/
- Offset* p = e2ng->getOffsetArray();
- for (i=0; i<e2ng->count; ++i) {
- p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
- }
-}
-
-void Builder::fixupNameGroupPoolOffsets() {
- int32_t i;
-
- // 0:
- fixupNCEnumToNameGroup(enumToName);
-
- // 1: (deleted)
-
- // 5:
- for (i=0; i<valueMap_count; ++i) {
- // 5a:
- if (valueEnumToName[i] != 0) {
- fixupEnumToNameGroup(valueEnumToName[i]);
- }
- // 5b:
- if (valueNCEnumToName[i] != 0) {
- fixupNCEnumToNameGroup(valueNCEnumToName[i]);
- }
- }
-}
-
-void Builder::fixupMiscellaneousOffsets() {
- int32_t i;
-
- // header:
- erase(&header, sizeof(header));
- header.enumToName_offset = enumToName_offset;
- header.nameToEnum_offset = nameToEnum_offset;
- header.enumToValue_offset = enumToValue_offset;
- // header meta-info used by Java:
- U_ASSERT(total_size > 0 && total_size < 0x7FFF);
- header.total_size = (int16_t) total_size;
- header.valueMap_offset = valueMap_offset;
- header.valueMap_count = (int16_t) valueMap_count;
- header.nameGroupPool_offset = nameGroupPool_offset;
- header.nameGroupPool_count = (int16_t) nameGroupPool_count;
- header.stringPool_offset = stringPool_offset;
- header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
-
- U_ASSERT(valueMap_count <= 0x7FFF);
- U_ASSERT(nameGroupPool_count <= 0x7FFF);
- U_ASSERT(stringPool_count <= 0x7FFF);
-
- // 3:
- Offset* p = enumToValue->getOffsetArray();
- /*EnumValue* e = enumToValue->getEnumArray();*/
- U_ASSERT(valueMap_count == enumToValue->count);
- for (i=0; i<valueMap_count; ++i) {
- p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
- }
-
- // 4:
- for (i=0; i<valueMap_count; ++i) {
- ValueMap& v = valueMap[i];
- v.enumToName_offset = v.ncEnumToName_offset = 0;
- if (valueEnumToName[i] != 0) {
- v.enumToName_offset = valueEnumToName_offset[i];
- }
- if (valueNCEnumToName[i] != 0) {
- v.ncEnumToName_offset = valueEnumToName_offset[i];
- }
- v.nameToEnum_offset = valueNameToEnum_offset[i];
- }
-}
-
-void Builder::fixup() {
- computeOffsets();
- fixupStringPoolOffsets();
- fixupNameGroupPoolOffsets();
- fixupMiscellaneousOffsets();
-}
-
-int8_t* Builder::createData(int32_t& length) const {
- length = total_size;
- int8_t* result = MALLOC(int8_t, length);
-
- int8_t* p = result;
- int8_t* limit = result + length;
-
- #define APPEND2(x, size) \
- U_ASSERT((p+size)<=limit); \
- uprv_memcpy(p, x, size); \
- p += size
-
- #define APPEND(x) APPEND2(x, x##_size)
-
- APPEND2(&header, sizeof(header));
- APPEND(enumToName);
- APPEND(nameToEnum);
- APPEND(enumToValue);
- APPEND(valueMap);
-
- for (int32_t i=0; i<valueMap_count; ++i) {
- U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
- (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
- if (valueEnumToName[i] != 0) {
- APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
- }
- if (valueNCEnumToName[i] != 0) {
- APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
- }
- APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
- }
-
- APPEND(nameGroupPool);
- APPEND(stringPool);
-
- if (p != limit) {
- fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
- exit(1);
- }
- return result;
-}
-
-// END Builder
-//----------------------------------------------------------------------
-
-/* UDataInfo cf. udata.h */
-static UDataInfo dataInfo = {
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- sizeof(UChar),
- 0,
-
- {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
- {PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */
- {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
-};
-
-class genpname {
-
- // command-line options
- UBool useCopyright;
- UBool verbose;
- int32_t debug;
-
-public:
- int MMain(int argc, char *argv[]);
-
-private:
- NameToEnumEntry* createNameIndex(const AliasList& list,
- int32_t& nameIndexCount);
-
- EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
-
- int32_t writeDataFile(const char *destdir, const Builder&);
-};
-
-int main(int argc, char *argv[]) {
- UErrorCode status = U_ZERO_ERROR;
- u_init(&status);
- if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
- // Note: u_init() will try to open ICU property data.
- // failures here are expected when building ICU from scratch.
- // ignore them.
- fprintf(stderr, "genpname: can not initialize ICU. Status = %s\n",
- u_errorName(status));
- exit(1);
- }
-
- genpname app;
- U_MAIN_INIT_ARGS(argc, argv);
- int retVal = app.MMain(argc, argv);
- u_cleanup();
- return retVal;
-}
-
-static UOption options[]={
- UOPTION_HELP_H,
- UOPTION_HELP_QUESTION_MARK,
- UOPTION_COPYRIGHT,
- UOPTION_DESTDIR,
- UOPTION_VERBOSE,
- UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
-};
-
-NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
- int32_t& nameIndexCount) {
-
- // Build name => enum map
-
- // This is an n->1 map. There are typically multiple names
- // mapping to one enum. The name index is sorted in order of the name,
- // as defined by the uprv_compareAliasNames() function.
-
- int32_t i, j;
- int32_t count = list.count();
-
- // compute upper limit on number of names in the index
- int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
- NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
-
- nameIndexCount = 0;
- int32_t names[MAX_NAMES_PER_GROUP];
- for (i=0; i<count; ++i) {
- const Alias& p = list[i];
- int32_t n = p.getUniqueNames(names);
- for (j=0; j<n; ++j) {
- U_ASSERT(nameIndexCount < nameIndexCapacity);
- nameIndex[nameIndexCount++] =
- NameToEnumEntry(names[j], p.enumValue);
- }
- }
-
- /*
- * use a stable sort to ensure consistent results between
- * genpname.cpp and the propname.cpp swapping code
- */
- UErrorCode errorCode = U_ZERO_ERROR;
- uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
- compareNameToEnumEntry, NULL, TRUE, &errorCode);
- if (debug>1) {
- printf("Alias names: %d\n", (int)nameIndexCount);
- for (i=0; i<nameIndexCount; ++i) {
- printf("%s => %d\n",
- STRING_TABLE[nameIndex[i].nameIndex].str,
- (int)nameIndex[i].enumValue);
- }
- printf("\n");
- }
- // make sure there are no duplicates. for a sorted list we need
- // only compare adjacent items. Alias.getUniqueNames() has
- // already eliminated duplicate names for a single property, which
- // does occur, so we're checking for duplicate names between two
- // properties, which should never occur.
- UBool ok = TRUE;
- for (i=1; i<nameIndexCount; ++i) {
- if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
- STRING_TABLE[nameIndex[i].nameIndex]) {
- printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
- STRING_TABLE[nameIndex[i-1].nameIndex].str,
- STRING_TABLE[nameIndex[i].nameIndex].str);
- ok = FALSE;
- }
- }
- if (!ok) {
- die("Two or more duplicate names in property list");
- }
-
- return nameIndex;
-}
-
-EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
-
- // Build the enum => name map
-
- // This is a 1->n map. Each enum maps to 1 or more names. To
- // accomplish this the index entry points to an element of the
- // NAME_GROUP array. This is the short name (which may be empty).
- // From there, subsequent elements of NAME_GROUP are alternate
- // names for this enum, up to and including the first one that is
- // negative (negate for actual index).
-
- int32_t i, j, k;
- int32_t count = list.count();
-
- EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
- for (i=0; i<count; ++i) {
- const Alias& p = list[i];
- enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
- }
-
- UErrorCode errorCode = U_ZERO_ERROR;
- uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
- compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
- if (debug>1) {
- printf("Property enums: %d\n", (int)count);
- for (i=0; i<count; ++i) {
- printf("%d => %d: ",
- (int)enumIndex[i].enumValue,
- (int)enumIndex[i].nameGroupIndex);
- UBool done = FALSE;
- for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
- k = NAME_GROUP[j];
- if (k < 0) {
- k = -k;
- done = TRUE;
- }
- printf("\"%s\"", STRING_TABLE[k].str);
- if (!done) printf(", ");
- }
- printf("\n");
- }
- printf("\n");
- }
- return enumIndex;
-}
-
-int genpname::MMain(int argc, char* argv[])
-{
- int32_t i, j;
- UErrorCode status = U_ZERO_ERROR;
-
- u_init(&status);
- if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
- fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
- status = U_ZERO_ERROR;
- }
-
-
- /* preset then read command line options */
- options[3].value=u_getDataDirectory();
- argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
-
- /* error handling, printing usage message */
- if (argc<0) {
- fprintf(stderr,
- "error in command line argument \"%s\"\n",
- argv[-argc]);
- }
-
- debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
-
- if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
- debug < 0 || debug > 9) {
- fprintf(stderr,
- "usage: %s [-options]\n"
- "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
- "options:\n"
- "\t-h or -? or --help this usage text\n"
- "\t-v or --verbose turn on verbose output\n"
- "\t-c or --copyright include a copyright notice\n"
- "\t-d or --destdir destination directory, followed by the path\n"
- "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
- argv[0]);
- return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
- }
-
- /* get the options values */
- useCopyright=options[2].doesOccur;
- verbose = options[4].doesOccur;
-
- // ------------------------------------------------------------
- // Do not sort the string table, instead keep it in data.h order.
- // This simplifies data swapping and testing thereof because the string
- // table itself need not be sorted during swapping.
- // The NameToEnum sorter sorts each such map's string offsets instead.
-
- if (debug>1) {
- printf("String pool: %d\n", (int)STRING_COUNT);
- for (i=0; i<STRING_COUNT; ++i) {
- if (i != 0) {
- printf(", ");
- }
- printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
- }
- printf("\n\n");
- }
-
- // ------------------------------------------------------------
- // Create top-level property indices
-
- PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
- int32_t propNameCount;
- NameToEnumEntry* propName = createNameIndex(props, propNameCount);
- EnumToNameGroupEntry* propEnum = createEnumIndex(props);
-
- // ------------------------------------------------------------
- // Create indices for the value list for each enumerated property
-
- // This will have more entries than we need...
- EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
- int32_t enumToValue_count = 0;
- for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
- if (PROPERTY[i].valueCount == 0) continue;
- AliasArrayList values(PROPERTY[i].valueList,
- PROPERTY[i].valueCount);
- enumToValue[j].enumValue = PROPERTY[i].enumValue;
- enumToValue[j].enumToName = createEnumIndex(values);
- enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
- enumToValue[j].nameToEnum = createNameIndex(values,
- enumToValue[j].nameToEnum_count);
- ++j;
- }
- enumToValue_count = j;
-
- uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
- compareEnumToValueEntry, NULL, FALSE, &status);
-
- // ------------------------------------------------------------
- // Build PropertyAliases layout in memory
-
- Builder builder(debug);
-
- builder.buildTopLevelProperties(propName,
- propNameCount,
- propEnum,
- PROPERTY_COUNT);
-
- builder.buildValues(enumToValue,
- enumToValue_count);
-
- builder.buildStringPool(STRING_TABLE,
- STRING_COUNT,
- NAME_GROUP,
- NAME_GROUP_COUNT);
-
- builder.fixup();
-
- ////////////////////////////////////////////////////////////
- // Write the output file
- ////////////////////////////////////////////////////////////
- int32_t wlen = writeDataFile(options[3].value, builder);
- if (verbose) {
- fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
- U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
- }
-
- return 0; // success
-}
-
-int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
- int32_t length;
- int8_t* data = builder.createData(length);
-
- UNewDataMemory *pdata;
- UErrorCode status = U_ZERO_ERROR;
-
- pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
- useCopyright ? U_COPYRIGHT_STRING : 0, &status);
- if (U_FAILURE(status)) {
- die("Unable to create data memory");
- }
-
- udata_writeBlock(pdata, data, length);
-
- int32_t dataLength = (int32_t) udata_finish(pdata, &status);
- if (U_FAILURE(status)) {
- die("Error writing output file");
- }
- if (dataLength != length) {
- die("Written file doesn't match expected size");
- }
-
- return dataLength;
-}
-
-//eof
diff --git a/tools/genpname/gensvpa.pl b/tools/genpname/gensvpa.pl
deleted file mode 100755
index bcbf1cc4..00000000
--- a/tools/genpname/gensvpa.pl
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/perl
-#*
-#*******************************************************************************
-#* Copyright (C) 2006, International Business Machines
-#* Corporation and others. All Rights Reserved.
-#*******************************************************************************
-#*
-#* file name: genspva.pl
-#* encoding: US-ASCII
-#* tab size: 8 (not used)
-#* indentation:4
-#*
-#* Created by: Ram Viswanadha
-#*
-#* This file filters iso15924-utf8-<date>.txt
-#*
-
-use File::Find;
-use File::Basename;
-use IO::File;
-use Cwd;
-use File::Copy;
-use Getopt::Long;
-use File::Path;
-use File::Copy;
-
-#run the program
-main();
-
-#---------------------------------------------------------------------
-# The main program
-
-sub main(){
- GetOptions(
- "--destdir=s" => \$destdir,
- "--iso15924=s" => \$iso,
- "--prop=s" => \$prop,
- "--code-start=s" => \$code,
- );
- usage() unless defined $destdir;
- usage() unless defined $iso;
- usage() unless defined $prop;
-
- $outfile = "$destdir/SyntheticPropertyValueAliases.txt";
- $propFH = IO::File->new($prop,"r")
- or die "could not open the file $prop for reading: $! \n";
- $isoFH = IO::File->new($iso,"r")
- or die "could not open the file $iso for reading: $! \n";
- $outFH = IO::File->new($outfile,"w")
- or die "could not open the file $outfile for reading: $! \n";
- my @propLines;
- while (<$propFH>) {
- next if(!($_ =~/sc ; /));
- push(@propLines, $_);
- }
- printHeader($outFH);
- if(defined $code){
- print "Please add the following to UScriptCode enum in uscript.h.\n";
- print "#ifndef U_HIDE_DRAFT_API\n";
- }
- while (<$isoFH>) {
- next if($_=~/^#/);#skip if the line starts with a comment char
- ($script, $t, $name, $rest) = split(/;/,$_,4);
- #sc ; Arab
- $outstr = "sc ; $script";
- $encoded = 0; #false
-
- # seach the propLines to make sure that this scipt code is not
- # encoded in Unicode
- foreach $key (@propLines){
- if($key =~ /$outstr/){
- $encoded = 1;
- }
- }
- next if($encoded == 1);
- #ignore private use codes
- next if($script =~ /Qa[ab][a-z]/);
-
- #if($script eq "Qaaa"){
- # $outstr = $outstr." ; Private_Use_Start\n";
- #}elsif($script eq "Qabx"){
- # $outstr = $outstr." ; Private_Use_End\n";
- #}else{
- # $outstr = $outstr." ; $script \n";
- #}
-
- $outstr = $outstr." ; $script \n";
- print $outFH $outstr;
-
- #print to console
- if(defined $code){
- if($name =~ /[(\s,\x80-\xFF]/){
- $name = $script;
- }
- $name =~s/-/_/g;
-
- $scriptcode = "USCRIPT_".uc($name);
- print " $scriptcode = $code, /* $script */\n";
- $code++;
- }
-
- }
- if(defined $code){
- print "#endif /* U_HIDE_DRAFT_API */\n";
- }
- for($i=0; $i<2; $i++){
-
- }
- close($isoFH);
- close($propFH);
- close($outFH);
-}
-#-----------------------------------------------------------------------
-sub printHeader{
- ($outFH) = @_;
- ($DAY, $MONTH, $YEAR) = (localtime)[3,4,5];
- $YEAR += 1900;
- #We will print our copyright here + warnings
-print $outFH <<END_HEADER_COMMENT;
-########################################################################
-# Copyright (c) 2006-$YEAR, International Business Machines
-# Corporation and others. All Rights Reserved.
-########################################################################
-# file name: SyntheticPropertyValueAliases.txt
-# encoding: US-ASCII
-# tab size: 8 (not used)
-# indentation: 4
-# created by: gensvpa.pl
-########################################################################
-
-# This file follows the format of PropertyValueAliases.txt
-# It contains synthetic property value aliases not present
-# in the UCD. Unlike PropertyValueAliases.txt, it should
-# NOT contain a version number.
-
-########################################################################
-# THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW
-# WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
-########################################################################
-
-# set the same names as short and long names to fit the syntax without
-# inventing names that we would have to support forever
-
-# Script (sc)
-
-END_HEADER_COMMENT
-}
-#-----------------------------------------------------------------------
-sub usage {
- print << "END";
-Usage:
-gensvpa.pl
-Options:
- --destdir=<directory>
- --iso15924=<file name>
- --prop=<PropertyValueAliases.txt>
- --code-start=s
-e.g.: gensvpa.pl --destdir=<icu>/source/tools/genpname --iso15924=iso15924-utf8-20041025.txt --prop=<icu>/source/data/unidata --code-start=60
-END
- exit(0);
-} \ No newline at end of file
diff --git a/tools/genpname/preparse.pl b/tools/genpname/preparse.pl
deleted file mode 100755
index d265bbe4..00000000
--- a/tools/genpname/preparse.pl
+++ /dev/null
@@ -1,1328 +0,0 @@
-#!/bin/perl -w
-#*******************************************************************
-# COPYRIGHT:
-# Copyright (c) 2002-2008, International Business Machines Corporation and
-# others. All Rights Reserved.
-#*******************************************************************
-
-# This script reads in UCD files PropertyAliases.txt and
-# PropertyValueAliases.txt and correlates them with ICU enums
-# defined in uchar.h and uscript.h. It then outputs a header
-# file which contains all names and enums. The header is included
-# by the genpname tool C++ source file, which produces the actual
-# binary data file.
-#
-# See usage note below.
-#
-# TODO: The Property[Value]Alias.txt files state that they can support
-# more than 2 names per property|value. Currently (Unicode 3.2) there
-# are always 1 or 2 names. If more names were supported, presumably
-# the format would be something like:
-# nv ; Numeric_Value
-# nv ; Value_Numerique
-# CURRENTLY, this script assumes that there are 1 or two names. Any
-# duplicates it sees are flagged as an error. If multiple aliases
-# appear in a future version of Unicode, modify this script to support
-# that.
-#
-# NOTE: As of ICU 2.6, this script has been modified to know about the
-# pseudo-property gcm/General_Category_Mask, which corresponds to the
-# uchar.h property UCHAR_GENERAL_CATEGORY_MASK. This property
-# corresponds to General_Category but is a bitmask value. It does not
-# exist in the UCD. Therefore, I special case it in several places
-# (search for General_Category_Mask and gcm).
-#
-# NOTE: As of ICU 2.6, this script reads an auxiliary data file,
-# SyntheticPropertyAliases.txt, containing property aliases not
-# present in the UCD but present in ICU. This file resides in the
-# same directory as this script. Its contents are merged into those
-# of PropertyAliases.txt as if the two files were appended.
-#
-# NOTE: The following names are handled specially. See script below
-# for details.
-#
-# T/True
-# F/False
-# No_Block
-#
-# Author: Alan Liu
-# Created: October 14 2002
-# Since: ICU 2.4
-
-use FileHandle;
-use strict;
-use Dumpvalue;
-
-my $DEBUG = 1;
-my $DUMPER = new Dumpvalue;
-
-my $count = @ARGV;
-my $ICU_DIR = shift() || '';
-my $OUT_FILE = shift() || 'data.h';
-my $HEADER_DIR = "$ICU_DIR/source/common/unicode";
-my $UNIDATA_DIR = "$ICU_DIR/source/data/unidata";
-
-# Get the current year from the system
-my $YEAR = 1900+@{[localtime]}[5]; # Get the current year
-
-# Used to make "n/a" property [value] aliases (Unicode or Synthetic) unique
-my $propNA = 0;
-my $valueNA = 0;
-
-#----------------------------------------------------------------------
-# Top level property keys for binary, enumerated, string, and double props
-my @TOP = qw( _bp _ep _sp _dp _mp );
-
-# This hash governs how top level properties are grouped into output arrays.
-#my %TOP_PROPS = ( "VALUED" => [ '_bp', '_ep' ],
-# "NO_VALUE" => [ '_sp', '_dp' ] );m
-#my %TOP_PROPS = ( "BINARY" => [ '_bp' ],
-# "ENUMERATED" => [ '_ep' ],
-# "STRING" => [ '_sp' ],
-# "DOUBLE" => [ '_dp' ] );
-my %TOP_PROPS = ( "" => [ '_bp', '_ep', '_sp', '_dp', '_mp' ] );
-
-my %PROP_TYPE = (Binary => "_bp",
- String => "_sp",
- Double => "_dp",
- Enumerated => "_ep",
- Bitmask => "_mp");
-#----------------------------------------------------------------------
-
-# Properties that are unsupported in ICU
-my %UNSUPPORTED = (Composition_Exclusion => 1,
- Decomposition_Mapping => 1,
- Expands_On_NFC => 1,
- Expands_On_NFD => 1,
- Expands_On_NFKC => 1,
- Expands_On_NFKD => 1,
- FC_NFKC_Closure => 1,
- ID_Start_Exceptions => 1,
- Special_Case_Condition => 1,
- );
-
-# Short names of properties that weren't seen in uchar.h. If the
-# properties weren't seen, don't complain about the property values
-# missing.
-my %MISSING_FROM_UCHAR;
-
-# Additional property aliases beyond short and long names,
-# like space in addition to WSpace and White_Space in Unicode 4.1.
-# Hashtable, maps long name to alias.
-# For example, maps White_Space->space.
-#
-# If multiple additional aliases are defined,
-# then they are separated in the value string with '|'.
-# For example, White_Space->space|outer_space
-my %additional_property_aliases;
-
-#----------------------------------------------------------------------
-
-# Emitted class names
-my ($STRING_CLASS, $ALIAS_CLASS, $PROPERTY_CLASS) = qw(AliasName Alias Property);
-
-if ($count < 1 || $count > 2 ||
- !-d $HEADER_DIR ||
- !-d $UNIDATA_DIR) {
- my $me = $0;
- $me =~ s|.+[/\\]||;
- my $lm = ' ' x length($me);
- print <<"END";
-
-$me: Reads ICU4C headers and Unicode data files and creates
-$lm a C header file that is included by genpname. The header
-$lm file matches constants defined in the ICU4C headers with
-$lm property|value aliases in the Unicode data files.
-
-Usage: $me <icu_dir> [<out_file>]
-
-<icu_dir> ICU4C root directory, containing
- source/common/unicode/uchar.h
- source/common/unicode/uscript.h
- source/data/unidata/Blocks.txt
- source/data/unidata/PropertyAliases.txt
- source/data/unidata/PropertyValueAliases.txt
-<out_file> File name of header to be written;
- default is 'data.h'.
-
-The Unicode versions of all input files must match.
-END
- exit(1);
-}
-
-my ($h, $version) = readAndMerge($HEADER_DIR, $UNIDATA_DIR);
-
-if ($DEBUG) {
- print "Merged hash:\n";
- for my $key (sort keys %$h) {
- my $hh = $h->{$key};
- for my $subkey (sort keys %$hh) {
- print "$key:$subkey:", $hh->{$subkey}, "\n";
- }
- }
-}
-
-my $out = new FileHandle($OUT_FILE, 'w');
-die "Error: Can't write to $OUT_FILE: $!" unless (defined $out);
-my $save = select($out);
-formatData($h, $version);
-select($save);
-$out->close();
-
-exit(0);
-
-#----------------------------------------------------------------------
-# From PropList.html: "The properties of the form Other_XXX
-# are used to generate properties in DerivedCoreProperties.txt.
-# They are not intended for general use, such as in APIs that
-# return property values.
-# Non_Break is not a valid property as of 3.2.
-sub isIgnoredProperty {
- local $_ = shift;
- /^Other_/i || /^Non_Break$/i;
-}
-
-# 'qc' is a pseudo-property matching any quick-check property
-# see PropertyValueAliases.txt file comments. 'binprop' is
-# a synthetic binary value alias "True"/"False", not present
-# in PropertyValueAliases.txt until Unicode 5.0.
-# Starting with Unicode 5.1, PropertyValueAliases.txt does have
-# explicit values for binary properties.
-sub isPseudoProperty {
- $_[0] eq 'qc' ||
- $_[0] eq 'binprop';
-}
-
-#----------------------------------------------------------------------
-# Emit the combined data from headers and the Unicode database as a
-# C source code header file.
-#
-# @param ref to hash with the data
-# @param Unicode version, as a string
-sub formatData {
- my $h = shift;
- my $version = shift;
-
- my $date = scalar localtime();
- print <<"END";
-/**
- * Copyright (C) 2002-$YEAR, International Business Machines Corporation and
- * others. All Rights Reserved.
- *
- * MACHINE GENERATED FILE. !!! Do not edit manually !!!
- *
- * Generated from
- * uchar.h
- * uscript.h
- * Blocks.txt
- * PropertyAliases.txt
- * PropertyValueAliases.txt
- *
- * Date: $date
- * Unicode version: $version
- * Script: $0
- */
-
-END
-
- #------------------------------------------------------------
- # Emit Unicode version
- print "/* Unicode version $version */\n";
- my @v = split(/\./, $version);
- push @v, '0' while (@v < 4);
- for (my $i=0; $i<@v; ++$i) {
- print "const uint8_t VERSION_$i = $v[$i];\n";
- }
- print "\n";
-
- #------------------------------------------------------------
- # Emit String table
- # [A table of all identifiers, that is, all long or short property
- # or value names. The list need NOT be sorted; it will be sorted
- # by the C program. Strings are referenced by their index into
- # this table. After sorting, a REMAP[] array is used to map the
- # old position indices to the new positions.]
- my %strings;
- for my $prop (sort keys %$h) {
- my $hh = $h->{$prop};
- for my $enum (sort keys %$hh) {
- my @a = split(/\|/, $hh->{$enum});
- for (@a) {
- $strings{$_} = 1 if (length($_));
- }
- }
- }
- my @strings = sort keys %strings;
- unshift @strings, "";
-
- print "const int32_t STRING_COUNT = ", scalar @strings, ";\n\n";
-
- # while printing, create a mapping hash from string table entry to index
- my %stringToID;
- print "/* to be sorted */\n";
- print "const $STRING_CLASS STRING_TABLE[] = {\n";
- for (my $i=0; $i<@strings; ++$i) {
- print " $STRING_CLASS(\"$strings[$i]\", $i),\n";
- $stringToID{$strings[$i]} = $i;
- }
- print "};\n\n";
-
- # placeholder for the remapping index. this is used to map
- # indices that we compute here to indices of the sorted
- # STRING_TABLE. STRING_TABLE will be sorted by the C++ program
- # using the uprv_comparePropertyNames() function. this will
- # reshuffle the order. we then use the indices (passed to the
- # String constructor) to create a REMAP[] array.
- print "/* to be filled in */\n";
- print "int32_t REMAP[", scalar @strings, "];\n\n";
-
- #------------------------------------------------------------
- # Emit the name group table
- # [A table of name groups. A name group is one or more names
- # for a property or property value. The Unicode data files specify
- # that there may be more than 2, although as of Unicode 3.2 there
- # are at most 2. The name group table looks like this:
- #
- # 114, -115, 116, -117, 0, -118, 65, -64, ...
- # [0] [2] [4] [6]
- #
- # The entry at [0] consists of 2 strings, 114 and 115.
- # The entry at [2] consists of 116 and 117. The entry at
- # [4] is one string, 118. There is always at least one
- # string; typically there are two. If there are two, the first
- # is the SHORT name and the second is the LONG. If there is
- # one, then the missing entry (always the short name, in 3.2)
- # is zero, which is by definition the index of "". The
- # 'preferred' name will generally be the LONG name, if there are
- # more than 2 entries. The last entry is negative.
-
- # Build name group list and replace string refs with nameGroup indices
- my @nameGroups;
-
- # Check for duplicate name groups, and reuse them if possible
- my %groupToInt; # Map group strings to ints
- for my $prop (sort keys %$h) {
- my $hh = $h->{$prop};
- for my $enum (sort keys %$hh) {
- my $groupString = $hh->{$enum};
- my $i;
- if (exists $groupToInt{$groupString}) {
- $i = $groupToInt{$groupString};
- } else {
- my @names = split(/\|/, $groupString);
- die "Error: Wrong number of names in " . $groupString if (@names < 1);
- $i = @nameGroups; # index of group we are making
- $groupToInt{$groupString} = $i; # Cache for reuse
- push @nameGroups, map { $stringToID{$_} } @names;
- $nameGroups[$#nameGroups] = -$nameGroups[$#nameGroups]; # mark end
- }
- # now, replace string list with ref to name group
- $hh->{$enum} = $i;
- }
- }
-
- print "const int32_t NAME_GROUP_COUNT = ",
- scalar @nameGroups, ";\n\n";
-
- print "int32_t NAME_GROUP[] = {\n";
- # emit one group per line, with annotations
- my $max_names = 0;
- for (my $i=0; $i<@nameGroups; ) {
- my @a;
- my $line;
- my $start = $i;
- for (;;) {
- my $j = $nameGroups[$i++];
- $line .= "$j, ";
- push @a, abs($j);
- last if ($j < 0);
- }
- print " ",
- $line,
- ' 'x(20-length($line)),
- "/* ", sprintf("%3d", $start),
- ": \"", join("\", \"", map { $strings[$_] } @a), "\" */\n";
- $max_names = @a if(@a > $max_names);
-
- }
- print "};\n\n";
-
- # This is fixed for 3.2 at "2" but should be calculated dynamically
- # when more than 2 names appear in Property[Value]Aliases.txt.
- print "#define MAX_NAMES_PER_GROUP $max_names\n\n";
-
- #------------------------------------------------------------
- # Emit enumerated property values
- for my $prop (sort keys %$h) {
- next if ($prop =~ /^_/);
- my $vh = $h->{$prop};
- my $count = scalar keys %$vh;
-
- print "const int32_t VALUES_${prop}_COUNT = ",
- $count, ";\n\n";
-
- print "const $ALIAS_CLASS VALUES_${prop}\[] = {\n";
- for my $enum (sort keys %$vh) {
- #my @names = split(/\|/, $vh->{$enum});
- #die "Error: Wrong number of names for $prop:$enum in [" . join(",", @names) . "]"
- # if (@names != 2);
- print " $ALIAS_CLASS((int32_t) $enum, ", $vh->{$enum}, "),\n";
- #$stringToID{$names[0]}, ", ",
- #$stringToID{$names[1]}, "),\n";
- # "\"", $names[0], "\", ",
- # "\"", $names[1], "\"),\n";
- }
- print "};\n\n";
- }
-
- #------------------------------------------------------------
- # Emit top-level properties (binary, enumerated, etc.)
- for my $topName (sort keys %TOP_PROPS) {
- my $a = $TOP_PROPS{$topName};
- my $count = 0;
- for my $type (@$a) { # "_bp", "_ep", etc.
- $count += scalar keys %{$h->{$type}};
- }
-
- print "const int32_t ${topName}PROPERTY_COUNT = $count;\n\n";
-
- print "const $PROPERTY_CLASS ${topName}PROPERTY[] = {\n";
-
- for my $type (@$a) { # "_bp", "_ep", etc.
- my $p = $h->{$type};
-
- for my $enum (sort keys %$p) {
- my $name = $strings[$nameGroups[$p->{$enum}]];
-
- my $valueRef = "0, NULL";
- if ($type eq '_bp') {
- $valueRef = "VALUES_binprop_COUNT, VALUES_binprop";
- }
- elsif (exists $h->{$name}) {
- $valueRef = "VALUES_${name}_COUNT, VALUES_$name";
- }
-
- print " $PROPERTY_CLASS((int32_t) $enum, ",
- $p->{$enum}, ", $valueRef),\n";
- }
- }
- print "};\n\n";
- }
-
- print "/*eof*/\n";
-}
-
-#----------------------------------------------------------------------
-# Read in the files uchar.h, uscript.h, Blocks.txt,
-# PropertyAliases.txt, and PropertyValueAliases.txt,
-# and combine them into one hash.
-#
-# @param directory containing headers
-# @param directory containin Unicode data files
-#
-# @return hash ref, Unicode version
-sub readAndMerge {
-
- my ($headerDir, $unidataDir) = @_;
-
- my $h = read_uchar("$headerDir/uchar.h");
- my $s = read_uscript("$headerDir/uscript.h");
- my $b = read_Blocks("$unidataDir/Blocks.txt");
- my $pa = {};
- read_PropertyAliases($pa, "$unidataDir/PropertyAliases.txt");
- read_PropertyAliases($pa, "SyntheticPropertyAliases.txt");
- my $va = {};
- read_PropertyValueAliases($va, "$unidataDir/PropertyValueAliases.txt");
- read_PropertyValueAliases($va, "SyntheticPropertyValueAliases.txt");
-
- # Extract property family hash
- my $fam = $pa->{'_family'};
- delete $pa->{'_family'};
-
- # Note: uscript.h has no version string, so don't check it
- my $version = check_versions([ 'uchar.h', $h ],
- [ 'Blocks.txt', $b ],
- [ 'PropertyAliases.txt', $pa ],
- [ 'PropertyValueAliases.txt', $va ]);
-
- # Do this BEFORE merging; merging modifies the hashes
- check_PropertyValueAliases($pa, $va);
-
- # Dump out the $va hash for debugging
- if ($DEBUG) {
- print "Property values hash:\n";
- for my $key (sort keys %$va) {
- my $hh = $va->{$key};
- for my $subkey (sort keys %$hh) {
- print "$key:$subkey:", $hh->{$subkey}, "\n";
- }
- }
- }
-
- # Dump out the $s hash for debugging
- if ($DEBUG) {
- print "Script hash:\n";
- for my $key (sort keys %$s) {
- print "$key:", $s->{$key}, "\n";
- }
- }
-
- # Link in the script data
- $h->{'sc'} = $s;
-
- merge_Blocks($h, $b);
-
- merge_PropertyAliases($h, $pa, $fam);
-
- merge_PropertyValueAliases($h, $va);
-
- ($h, $version);
-}
-
-#----------------------------------------------------------------------
-# Ensure that the version strings in the given hashes (under the key
-# '_version') are compatible. Currently this means they must be
-# identical, with the exception that "X.Y" will match "X.Y.0".
-# All hashes must define the key '_version'.
-#
-# @param a list of pairs of (file name, hash reference)
-#
-# @return the version of all the hashes. Upon return, the '_version'
-# will be removed from all hashes.
-sub check_versions {
- my $version = '';
- my $msg = '';
- foreach my $a (@_) {
- my $name = $a->[0];
- my $h = $a->[1];
- die "Error: No version found" unless (exists $h->{'_version'});
- my $v = $h->{'_version'};
- delete $h->{'_version'};
-
- # append ".0" if necessary, to standardize to X.Y.Z
- $v .= '.0' unless ($v =~ /\.\d+\./);
- $v .= '.0' unless ($v =~ /\.\d+\./);
- $msg .= "$name = $v\n";
- if ($version) {
- die "Error: Mismatched Unicode versions\n$msg"
- unless ($version eq $v);
- } else {
- $version = $v;
- }
- }
- $version;
-}
-
-#----------------------------------------------------------------------
-# Make sure the property names in PropertyValueAliases.txt match those
-# in PropertyAliases.txt.
-#
-# @param a hash ref from read_PropertyAliases.
-# @param a hash ref from read_PropertyValueAliases.
-sub check_PropertyValueAliases {
- my ($pa, $va) = @_;
-
- # make a reverse hash of short->long
- my %rev;
- for (keys %$pa) { $rev{$pa->{$_}} = $_; }
-
- for my $prop (keys %$va) {
- if (!exists $rev{$prop} && !isPseudoProperty($prop)) {
- print "Warning: Property $prop from PropertyValueAliases not listed in PropertyAliases\n";
- }
- }
-}
-
-#----------------------------------------------------------------------
-# Merge blocks data into uchar.h enum data. In the 'blk' subhash all
-# code point values, as returned from read_uchar, are replaced by
-# block names, as read from Blocks.txt and returned by read_Blocks.
-# The match must be 1-to-1. If there is any failure of 1-to-1
-# mapping, an error is signaled. Upon return, the read_Blocks hash
-# is emptied of all contents, except for those that failed to match.
-#
-# The mapping in the 'blk' subhash, after this function returns, is
-# from uchar.h enum name, e.g. "UBLOCK_BASIC_LATIN", to Blocks.h
-# pseudo-name, e.g. "Basic Latin".
-#
-# @param a hash ref from read_uchar.
-# @param a hash ref from read_Blocks.
-sub merge_Blocks {
- my ($h, $b) = @_;
-
- die "Error: No blocks data in uchar.h"
- unless (exists $h->{'blk'});
- my $blk = $h->{'blk'};
- for my $enum (keys %$blk) {
- my $cp = $blk->{$enum};
- if ($cp && !exists $b->{$cp}) {
- die "Error: No block found at $cp in Blocks.txt";
- }
- # Convert code point to pseudo-name:
- $blk->{$enum} = $b->{$cp};
- delete $b->{$cp};
- }
- my $err = '';
- for my $cp (keys %$b) {
- $err .= "Error: Block " . $b->{$cp} . " not listed in uchar.h\n";
- }
- die $err if ($err);
-}
-
-#----------------------------------------------------------------------
-# Merge property alias names into the uchar.h hash. The subhashes
-# under the keys _* (b(inary, e(numerated, s(tring, d(ouble) are
-# examined and the values of those subhashes are assumed to be long
-# names in PropertyAliases.txt. They are validated and replaced by
-# "<short>|<long>". Upon return, the read_PropertyAliases hash is
-# emptied of all contents, except for those that failed to match.
-# Unmatched names in PropertyAliases are listed as a warning but do
-# NOT cause the script to die.
-#
-# @param a hash ref from read_uchar.
-# @param a hash ref from read_PropertyAliases.
-# @param a hash mapping long names to property family (e.g., 'binary')
-sub merge_PropertyAliases {
- my ($h, $pa, $fam) = @_;
-
- for my $k (@TOP) {
- die "Error: No properties data for $k in uchar.h"
- unless (exists $h->{$k});
- }
-
- for my $subh (map { $h->{$_} } @TOP) {
- for my $enum (keys %$subh) {
- my $long_name = $subh->{$enum};
- if (!exists $pa->{$long_name}) {
- die "Error: Property $long_name not found (or used more than once)";
- }
-
- my $value;
- if($pa->{$long_name} =~ m|^n/a\d*$|) {
- # replace an "n/a" short name with an empty name (nothing before "|");
- # don't remove it (don't remove the "|"): there must always be a long name,
- # and if the short name is removed, then the long name becomes the
- # short name and there is no long name left (unless there is another alias)
- $value = "|" . $long_name;
- } else {
- $value = $pa->{$long_name} . "|" . $long_name;
- }
- if (exists $additional_property_aliases{$long_name}) {
- $value .= "|" . $additional_property_aliases{$long_name};
- }
- $subh->{$enum} = $value;
- delete $pa->{$long_name};
- }
- }
-
- my @err;
- for my $name (keys %$pa) {
- $MISSING_FROM_UCHAR{$pa->{$name}} = 1;
- if (exists $UNSUPPORTED{$name}) {
- push @err, "Info: No enum for " . $fam->{$name} . " property $name in uchar.h";
- } elsif (!isIgnoredProperty($name)) {
- push @err, "Warning: No enum for " . $fam->{$name} . " property $name in uchar.h";
- }
- }
- print join("\n", sort @err), "\n" if (@err);
-}
-
-#----------------------------------------------------------------------
-# Return 1 if two names match ignoring whitespace, '-', and '_'.
-# Used to match names in Blocks.txt with those in PropertyValueAliases.txt
-# as of Unicode 4.0.
-sub matchesLoosely {
- my ($a, $b) = @_;
- $a =~ s/[\s\-_]//g;
- $b =~ s/[\s\-_]//g;
- $a =~ /^$b$/i;
-}
-
-#----------------------------------------------------------------------
-# Merge PropertyValueAliases.txt data into the uchar.h hash. All
-# properties other than blk, _bp, and _ep are analyzed and mapped to
-# the names listed in PropertyValueAliases. They are then replaced
-# with a string of the form "<short>|<long>". The short or long name
-# may be missing.
-#
-# @param a hash ref from read_uchar.
-# @param a hash ref from read_PropertyValueAliases.
-sub merge_PropertyValueAliases {
- my ($h, $va) = @_;
-
- my %gcCount;
- for my $prop (keys %$h) {
- # _bp, _ep handled in merge_PropertyAliases
- next if ($prop =~ /^_/);
-
- # Special case: gcm
- my $prop2 = ($prop eq 'gcm') ? 'gc' : $prop;
-
- # find corresponding PropertyValueAliases data
- die "Error: Can't find $prop in PropertyValueAliases.txt"
- unless (exists $va->{$prop2});
- my $pva = $va->{$prop2};
-
- # match up data
- my $hh = $h->{$prop};
- for my $enum (keys %$hh) {
-
- my $name = $hh->{$enum};
-
- # look up both long and short & ignore case
- my $n;
- if (exists $pva->{$name}) {
- $n = $name;
- } else {
- # iterate (slow)
- for my $a (keys %$pva) {
- # case-insensitive match
- # & case-insensitive reverse match
- if ($a =~ /^$name$/i ||
- $pva->{$a} =~ /^$name$/i) {
- $n = $a;
- last;
- }
- }
- }
-
- # For blocks, do a loose match from Blocks.txt pseudo-name
- # to PropertyValueAliases long name.
- if (!$n && $prop eq 'blk') {
- for my $a (keys %$pva) {
- # The block is only going to match the long name,
- # but we check both for completeness. As of Unicode
- # 4.0, blocks do not have short names.
- if (matchesLoosely($name, $pva->{$a}) ||
- matchesLoosely($name, $a)) {
- $n = $a;
- last;
- }
- }
- }
-
- die "Error: Property value $prop:$name not found" unless ($n);
-
- my $l = $n;
- my $r = $pva->{$n};
- # convert |n/a\d*| to blank
- $l = '' if ($l =~ m|^n/a\d*$|);
- $r = '' if ($r =~ m|^n/a\d*$|);
-
- $hh->{$enum} = "$l|$r";
- # Don't delete the 'gc' properties because we need to share
- # them between 'gc' and 'gcm'. Count each use instead.
- if ($prop2 eq 'gc') {
- ++$gcCount{$n};
- } else {
- delete $pva->{$n};
- }
- }
- }
-
- # Merge the combining class values in manually
- # Add the same values to the synthetic lccc and tccc properties
- die "Error: No ccc data"
- unless exists $va->{'ccc'};
- for my $ccc (keys %{$va->{'ccc'}}) {
- die "Error: Can't overwrite ccc $ccc"
- if (exists $h->{'ccc'}->{$ccc});
- $h->{'lccc'}->{$ccc} =
- $h->{'tccc'}->{$ccc} =
- $h->{'ccc'}->{$ccc} = $va->{'ccc'}->{$ccc};
- }
- delete $va->{'ccc'};
-
- # Merge synthetic binary property values in manually.
- # These are the "True" and "False" value aliases.
- die "Error: No True/False value aliases"
- unless exists $va->{'binprop'};
- for my $bp (keys %{$va->{'binprop'}}) {
- $h->{'binprop'}->{$bp} = $va->{'binprop'}->{$bp};
- }
- delete $va->{'binprop'};
-
- my $err = '';
- for my $prop (sort keys %$va) {
- my $hh = $va->{$prop};
- for my $subkey (sort keys %$hh) {
- # 'gc' props are shared with 'gcm'; make sure they were used
- # once or twice.
- if ($prop eq 'gc') {
- my $n = $gcCount{$subkey};
- next if ($n >= 1 && $n <= 2);
- }
- $err .= "Warning: Enum for value $prop:$subkey not found in uchar.h\n"
- unless exists $MISSING_FROM_UCHAR{$prop};
- }
- }
- print $err if ($err);
-}
-
-#----------------------------------------------------------------------
-# Read the PropertyAliases.txt file. Return a hash that maps the long
-# name to the short name. The special key '_version' will map to the
-# Unicode version of the file. The special key '_family' holds a
-# subhash that maps long names to a family string, for descriptive
-# purposes.
-#
-# @param a filename for PropertyAliases.txt
-# @param reference to hash to receive data. Keys are long names.
-# Values are short names.
-sub read_PropertyAliases {
-
- my $hash = shift; # result
-
- my $filename = shift;
-
- my $fam = {}; # map long names to family string
- $fam = $hash->{'_family'} if (exists $hash->{'_family'});
-
- my $family; # binary, enumerated, etc.
-
- my $in = new FileHandle($filename, 'r');
- die "Error: Cannot open $filename" if (!defined $in);
-
- while (<$in>) {
-
- # Read version (embedded in a comment)
- if (/PropertyAliases-(\d+\.\d+\.\d+)/i) {
- die "Error: Multiple versions in $filename"
- if (exists $hash->{'_version'});
- $hash->{'_version'} = $1;
- }
-
- # Read family heading
- if (/^\s*\#\s*(.+?)\s*Properties\s*$/) {
- $family = $1;
- }
-
- # Ignore comments and blank lines
- s/\#.*//;
- next unless (/\S/);
-
- if (/^\s*(.+?)\s*;/) {
- my $short = $1;
- my @fields = /;\s*([^\s;]+)/g;
- if (@fields < 1 || @fields > 2) {
- my $number = @fields;
- die "Error: Wrong number of fields ($number) in $filename at $_";
- }
-
- # Make "n/a" strings unique
- if ($short eq 'n/a') {
- $short .= sprintf("%03d", $propNA++);
- }
- my $long = $fields[0];
- if ($long eq 'n/a') {
- $long .= sprintf("%03d", $propNA++);
- }
-
- # Add long name->short name to the hash=pa hash table
- if (exists $hash->{$long}) {
- die "Error: Duplicate property $long in $filename"
- }
- $hash->{$long} = $short;
- $fam->{$long} = $family;
-
- # Add the list of further aliases to the additional_property_aliases hash table,
- # using the long property name as the key.
- # For example:
- # White_Space->space|outer_space
- if (@fields > 1) {
- my $value = pop @fields;
- while (@fields > 1) {
- $value .= "|" . pop @fields;
- }
- $additional_property_aliases{$long} = $value;
- }
- } else {
- die "Error: Can't parse $_ in $filename";
- }
- }
-
- $in->close();
-
- $hash->{'_family'} = $fam;
-}
-
-#----------------------------------------------------------------------
-# Read the PropertyValueAliases.txt file. Return a two level hash
-# that maps property_short_name:value_short_name:value_long_name. In
-# the case of the 'ccc' property, the short name is the numeric class
-# and the long name is "<short>|<long>". The special key '_version'
-# will map to the Unicode version of the file.
-#
-# @param a filename for PropertyValueAliases.txt
-#
-# @return a hash reference.
-sub read_PropertyValueAliases {
-
- my $hash = shift; # result
-
- my $filename = shift;
-
- my $in = new FileHandle($filename, 'r');
- die "Error: Cannot open $filename" if (!defined $in);
-
- while (<$in>) {
-
- # Read version (embedded in a comment)
- if (/PropertyValueAliases-(\d+\.\d+\.\d+)/i) {
- die "Error: Multiple versions in $filename"
- if (exists $hash->{'_version'});
- $hash->{'_version'} = $1;
- }
-
- # Ignore comments and blank lines
- s/\#.*//;
- next unless (/\S/);
-
- if (/^\s*(.+?)\s*;/i) {
- my $prop = $1;
- my @fields = /;\s*([^\s;]+)/g;
- die "Error: Wrong number of fields in $filename"
- if (@fields < 2 || @fields > 5);
- # Make "n/a" strings unique
- $fields[0] .= sprintf("%03d", $valueNA++) if ($fields[0] eq 'n/a');
- # Squash extra fields together
- while (@fields > 2) {
- my $f = pop @fields;
- $fields[$#fields] .= '|' . $f;
- }
- addDatum($hash, $prop, @fields);
- }
-
- else {
- die "Error: Can't parse $_ in $filename";
- }
- }
-
- $in->close();
-
- # Script Copt=Qaac (Coptic) is a special case.
- # Before the Copt code was defined, the private-use code Qaac was used.
- # Starting with Unicode 4.1, PropertyValueAliases.txt contains
- # Copt as the short name as well as Qaac as an alias.
- # For use with older Unicode data files, we add here a Qaac->Coptic entry.
- # This should not do anything for 4.1-and-later Unicode data files.
- # See also UAX #24: Script Names http://www.unicode.org/unicode/reports/tr24/
- $hash->{'sc'}->{'Qaac'} = 'Coptic'
- unless (exists $hash->{'sc'}->{'Qaac'} || exists $hash->{'sc'}->{'Copt'});
-
- # Add N|No|T|True and Y|Yes|F|False -- these are values we recognize for
- # binary properties (until Unicode 5.0 NOT from PropertyValueAliases.txt).
- # These are of the same form as the 'ccc' value aliases.
- # Starting with Unicode 5.1, PropertyValueAliases.txt does have values
- # for binary properties.
- if (!exists $hash->{'binprop'}->{'0'}) {
- if (exists $hash->{'Alpha'}->{'N'}) {
- # Unicode 5.1 and later: Make the numeric value the key.
- $hash->{'binprop'}->{'0'} = 'N|' . $hash->{'Alpha'}->{'N'};
- $hash->{'binprop'}->{'1'} = 'Y|' . $hash->{'Alpha'}->{'Y'};
- } elsif (exists $hash->{'Alpha'}) {
- die "Error: Unrecognized short value name for binary property 'Alpha'\n";
- } else {
- # Unicode 5.0 and earlier: Add manually.
- $hash->{'binprop'}->{'0'} = 'N|No|F|False';
- $hash->{'binprop'}->{'1'} = 'Y|Yes|T|True';
- }
- }
-}
-
-#----------------------------------------------------------------------
-# Read the Blocks.txt file. Return a hash that maps the code point
-# range start to the block name. The special key '_version' will map
-# to the Unicode version of the file.
-#
-# As of Unicode 4.0, the names in the Blocks.txt are no longer the
-# proper names. The proper names are now listed in PropertyValueAliases.
-# They are similar but not identical. Furthermore, 4.0 introduces
-# a new block name, No_Block, which is listed only in PropertyValueAliases
-# and not in Blocks.txt. As a result, we handle blocks as follows:
-#
-# 1. Read Blocks.txt to map code point range start to quasi-block name.
-# 2. Add to Blocks.txt a synthetic No Block code point & name:
-# X -> No Block
-# 3. Map quasi-names from Blocks.txt (including No Block) to actual
-# names from PropertyValueAliases. This occurs in
-# merge_PropertyValueAliases.
-#
-# @param a filename for Blocks.txt
-#
-# @return a ref to a hash. Keys are code points, as text, e.g.,
-# "1720". Values are pseudo-block names, e.g., "Hanunoo".
-sub read_Blocks {
-
- my $filename = shift;
-
- my $hash = {}; # result
-
- my $in = new FileHandle($filename, 'r');
- die "Error: Cannot open $filename" if (!defined $in);
-
- while (<$in>) {
-
- # Read version (embedded in a comment)
- if (/Blocks-(\d+\.\d+\.\d+)/i) {
- die "Error: Multiple versions in $filename"
- if (exists $hash->{'_version'});
- $hash->{'_version'} = $1;
- }
-
- # Ignore comments and blank lines
- s/\#.*//;
- next unless (/\S/);
-
- if (/^([0-9a-f]+)\.\.[0-9a-f]+\s*;\s*(.+?)\s*$/i) {
- die "Error: Duplicate range $1 in $filename"
- if (exists $hash->{$1});
- $hash->{$1} = $2;
- }
-
- else {
- die "Error: Can't parse $_ in $filename";
- }
- }
-
- $in->close();
-
- # Add pseudo-name for No Block
- $hash->{'none'} = 'No Block';
-
- $hash;
-}
-
-#----------------------------------------------------------------------
-# Read the uscript.h file and compile a mapping of Unicode symbols to
-# icu4c enum values.
-#
-# @param a filename for uscript.h
-#
-# @return a ref to a hash. The keys of the hash are enum symbols from
-# uscript.h, and the values are script names.
-sub read_uscript {
-
- my $filename = shift;
-
- my $mode = ''; # state machine mode and submode
- my $submode = '';
-
- my $last = ''; # for line folding
-
- my $hash = {}; # result
- my $key; # first-level key
-
- my $in = new FileHandle($filename, 'r');
- die "Error: Cannot open $filename" if (!defined $in);
-
- while (<$in>) {
- # Fold continued lines together
- if (/^(.*)\\$/) {
- $last = $1;
- next;
- } elsif ($last) {
- $_ = $last . $_;
- $last = '';
- }
-
- # Exit all modes here
- if ($mode && $mode ne 'DEPRECATED') {
- if (/^\s*\}/) {
- $mode = '';
- next;
- }
- }
-
- # Handle individual modes
-
- if ($mode eq 'UScriptCode') {
- if (m|^\s*(USCRIPT_\w+).+?/\*\s*(\w+)|) {
- my ($enum, $code) = ($1, $2);
- die "Error: Duplicate script $enum"
- if (exists $hash->{$enum});
- $hash->{$enum} = $code;
- }
- }
-
- elsif ($mode eq 'DEPRECATED') {
- if (/\s*\#ifdef/) {
- die "Error: Nested #ifdef";
- }
- elsif (/\s*\#endif/) {
- $mode = '';
- }
- }
-
- elsif (!$mode) {
- if (/^\s*typedef\s+enum\s+(\w+)\s*\{/ ||
- /^\s*typedef\s+enum\s+(\w+)\s*$/) {
- $mode = $1;
- #print "Parsing $mode\n";
- }
-
- elsif (/^\s*\#ifdef\s+ICU_UCHAR_USE_DEPRECATES\b/) {
- $mode = 'DEPRECATED';
- }
- }
- }
-
- $in->close();
-
- $hash;
-}
-
-#----------------------------------------------------------------------
-# Read the uchar.h file and compile a mapping of Unicode symbols to
-# icu4c enum values.
-#
-# @param a filename for uchar.h
-#
-# @return a ref to a hash. The keys of the hash are '_bp' for binary
-# properties, '_ep' for enumerated properties, '_dp'/'_sp'/'_mp' for
-# double/string/mask properties, and 'gc', 'gcm', 'bc', 'blk',
-# 'ea', 'dt', 'jt', 'jg', 'lb', or 'nt' for corresponding property
-# value aliases. The values of the hash are subhashes. The subhashes
-# have a key of the uchar.h enum symbol, and a value of the alias
-# string (as listed in PropertyValueAliases.txt). NOTE: The alias
-# string is whatever alias uchar.h lists. This may be either short or
-# long, depending on the specific enum. NOTE: For blocks ('blk'), the
-# value is a hex code point for the start of the associated block.
-# NOTE: The special key _version will map to the Unicode version of
-# the file.
-sub read_uchar {
-
- my $filename = shift;
-
- my $mode = ''; # state machine mode and submode
- my $submode = '';
-
- my $last = ''; # for line folding
-
- my $hash = {}; # result
- my $key; # first-level key
-
- my $in = new FileHandle($filename, 'r');
- die "Error: Cannot open $filename" if (!defined $in);
-
- while (<$in>) {
- # Fold continued lines together
- if (/^(.*)\\$/) {
- $last .= $1;
- next;
- } elsif ($last) {
- $_ = $last . $_;
- $last = '';
- }
-
- # Exit all modes here
- if ($mode && $mode ne 'DEPRECATED') {
- if (/^\s*\}/) {
- $mode = '';
- next;
- }
- }
-
- # Handle individual modes
-
- if ($mode eq 'UProperty') {
- if (/^\s*(UCHAR_\w+)\s*[,=]/ || /^\s+(UCHAR_\w+)\s*$/) {
- if ($submode) {
- addDatum($hash, $key, $1, $submode);
- $submode = '';
- } else {
- #print "Warning: Ignoring $1\n";
- }
- }
-
- elsif (m|^\s*/\*\*\s*(\w+)\s+property\s+(\w+)|i) {
- die "Error: Unmatched tag $submode" if ($submode);
- die "Error: Unrecognized UProperty comment: $_"
- unless (exists $PROP_TYPE{$1});
- $key = $PROP_TYPE{$1};
- $submode = $2;
- }
- }
-
- elsif ($mode eq 'UCharCategory') {
- if (/^\s*(U_\w+)\s*=/) {
- if ($submode) {
- addDatum($hash, 'gc', $1, $submode);
- $submode = '';
- } else {
- #print "Warning: Ignoring $1\n";
- }
- }
-
- elsif (m|^\s*/\*\*\s*([A-Z][a-z])\s|) {
- die "Error: Unmatched tag $submode" if ($submode);
- $submode = $1;
- }
- }
-
- elsif ($mode eq 'UCharDirection') {
- if (/^\s*(U_\w+)\s*[,=]/ || /^\s+(U_\w+)\s*$/) {
- if ($submode) {
- addDatum($hash, $key, $1, $submode);
- $submode = '';
- } else {
- #print "Warning: Ignoring $1\n";
- }
- }
-
- elsif (m|/\*\*\s*([A-Z]+)\s|) {
- die "Error: Unmatched tag $submode" if ($submode);
- $key = 'bc';
- $submode = $1;
- }
- }
-
- elsif ($mode eq 'UBlockCode') {
- if (m|^\s*(UBLOCK_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'blk', $1, $2);
- }
- }
-
- elsif ($mode eq 'UEastAsianWidth') {
- if (m|^\s*(U_EA_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'ea', $1, $2);
- }
- }
-
- elsif ($mode eq 'UDecompositionType') {
- if (m|^\s*(U_DT_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'dt', $1, $2);
- }
- }
-
- elsif ($mode eq 'UJoiningType') {
- if (m|^\s*(U_JT_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'jt', $1, $2);
- }
- }
-
- elsif ($mode eq 'UJoiningGroup') {
- if (/^\s*(U_JG_(\w+))/) {
- addDatum($hash, 'jg', $1, $2) unless ($2 eq 'COUNT');
- }
- }
-
- elsif ($mode eq 'UGraphemeClusterBreak') {
- if (m|^\s*(U_GCB_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'GCB', $1, $2);
- }
- }
-
- elsif ($mode eq 'UWordBreakValues') {
- if (m|^\s*(U_WB_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'WB', $1, $2);
- }
- }
-
- elsif ($mode eq 'USentenceBreak') {
- if (m|^\s*(U_SB_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'SB', $1, $2);
- }
- }
-
- elsif ($mode eq 'ULineBreak') {
- if (m|^\s*(U_LB_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'lb', $1, $2);
- }
- }
-
- elsif ($mode eq 'UNumericType') {
- if (m|^\s*(U_NT_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'nt', $1, $2);
- }
- }
-
- elsif ($mode eq 'UHangulSyllableType') {
- if (m|^\s*(U_HST_\w+).+?/\*\[(.+?)\]\*/|) {
- addDatum($hash, 'hst', $1, $2);
- }
- }
-
- elsif ($mode eq 'DEPRECATED') {
- if (/\s*\#ifdef/) {
- die "Error: Nested #ifdef";
- }
- elsif (/\s*\#endif/) {
- $mode = '';
- }
- }
-
- elsif (!$mode) {
- if (/^\s*\#define\s+(\w+)\s+(.+)/) {
- # #define $left $right
- my ($left, $right) = ($1, $2);
-
- if ($left eq 'U_UNICODE_VERSION') {
- my $version = $right;
- $version = $1 if ($version =~ /^\"(.*)\"/);
- # print "Unicode version: ", $version, "\n";
- die "Error: Multiple versions in $filename"
- if (defined $hash->{'_version'});
- $hash->{'_version'} = $version;
- }
-
- elsif ($left =~ /U_GC_(\w+?)_MASK/) {
- addDatum($hash, 'gcm', $left, $1);
- }
- }
-
- elsif (/^\s*typedef\s+enum\s+(\w+)\s*\{/ ||
- /^\s*typedef\s+enum\s+(\w+)\s*$/) {
- $mode = $1;
- #print "Parsing $mode\n";
- }
-
- elsif (/^\s*enum\s+(\w+)\s*\{/ ||
- /^\s*enum\s+(\w+)\s*$/) {
- $mode = $1;
- #print "Parsing $mode\n";
- }
-
- elsif (/^\s*\#ifdef\s+ICU_UCHAR_USE_DEPRECATES\b/) {
- $mode = 'DEPRECATED';
- }
- }
- }
-
- $in->close();
-
- # hardcode known values for the normalization quick check properties
- # see unorm.h for the UNormalizationCheckResult enum
-
- addDatum($hash, 'NFC_QC', 'UNORM_NO', 'N');
- addDatum($hash, 'NFC_QC', 'UNORM_YES', 'Y');
- addDatum($hash, 'NFC_QC', 'UNORM_MAYBE', 'M');
-
- addDatum($hash, 'NFKC_QC', 'UNORM_NO', 'N');
- addDatum($hash, 'NFKC_QC', 'UNORM_YES', 'Y');
- addDatum($hash, 'NFKC_QC', 'UNORM_MAYBE', 'M');
-
- # no "maybe" values for NF[K]D
-
- addDatum($hash, 'NFD_QC', 'UNORM_NO', 'N');
- addDatum($hash, 'NFD_QC', 'UNORM_YES', 'Y');
-
- addDatum($hash, 'NFKD_QC', 'UNORM_NO', 'N');
- addDatum($hash, 'NFKD_QC', 'UNORM_YES', 'Y');
-
- $hash;
-}
-
-#----------------------------------------------------------------------
-# Add a new value to a two-level hash. That is, given a ref to
-# a hash, two keys, and a value, add $hash->{$key1}->{$key2} = $value.
-sub addDatum {
- my ($h, $k1, $k2, $v) = @_;
- if (exists $h->{$k1}->{$k2}) {
- die "Error: $k1:$k2 already set to " .
- $h->{$k1}->{$k2} . ", cannot set to " . $v;
- }
- $h->{$k1}->{$k2} = $v;
-}
-
-#eof
diff --git a/tools/genprops/Makefile.in b/tools/genprops/Makefile.in
deleted file mode 100644
index 2856edf6..00000000
--- a/tools/genprops/Makefile.in
+++ /dev/null
@@ -1,97 +0,0 @@
-## Makefile.in for ICU - tools/genprops
-## Copyright (c) 1999-2005, International Business Machines Corporation and
-## others. All Rights Reserved.
-## Steven R. Loomis
-
-## Source directory information
-srcdir = @srcdir@
-top_srcdir = @top_srcdir@
-
-top_builddir = ../..
-
-include $(top_builddir)/icudefs.mk
-
-## Build directory information
-subdir = tools/genprops
-
-TARGET_STUB_NAME = genprops
-
-SECTION = 8
-
-#MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
-
-
-## Extra files to remove for 'make clean'
-CLEANFILES = *~ $(DEPS) $(MAN_FILES)
-
-## Target information
-TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
-
-ifneq ($(top_builddir),$(top_srcdir))
-CPPFLAGS += -I$(top_builddir)/common
-endif
-CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
-LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
-
-OBJECTS = genprops.o props2.o store.o
-
-DEPS = $(OBJECTS:.o=.d)
-
-## List of phony targets
-.PHONY : all all-local install install-local clean clean-local \
-distclean distclean-local dist dist-local check check-local install-man
-
-## Clear suffix list
-.SUFFIXES :
-
-## List of standard targets
-all: all-local
-install: install-local
-clean: clean-local
-distclean : distclean-local
-dist: dist-local
-check: all check-local
-
-all-local: $(TARGET) $(MAN_FILES)
-
-install-local: all-local install-man
-# $(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
-# $(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
-
-install-man: $(MAN_FILES)
-# $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
-# $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
-
-dist-local:
-
-clean-local:
- test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
- $(RMV) $(TARGET) $(OBJECTS)
-
-distclean-local: clean-local
- $(RMV) Makefile
-
-check-local: all-local
-
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-$(TARGET) : $(OBJECTS)
- $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
- $(POST_BUILD_STEP)
-
-
-%.$(SECTION): $(srcdir)/%.$(SECTION).in
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-
-ifeq (,$(MAKECMDGOALS))
--include $(DEPS)
-else
-ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
--include $(DEPS)
-endif
-endif
-
diff --git a/tools/genprops/genprops.c b/tools/genprops/genprops.c
deleted file mode 100644
index 119ed006..00000000
--- a/tools/genprops/genprops.c
+++ /dev/null
@@ -1,590 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: genprops.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999dec08
-* created by: Markus W. Scherer
-*
-* This program reads several of the Unicode character database text files,
-* parses them, and extracts most of the properties for each character.
-* It then writes a binary file containing the properties
-* that is designed to be used directly for random-access to
-* the properties of each Unicode character.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/putil.h"
-#include "unicode/uclean.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "unewdata.h"
-#include "uoptions.h"
-#include "uparse.h"
-#include "uprops.h"
-#include "propsvec.h"
-
-U_CDECL_BEGIN
-#include "genprops.h"
-U_CDECL_END
-
-#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
-
-UBool beVerbose=FALSE, haveCopyright=TRUE;
-
-/* prototypes --------------------------------------------------------------- */
-
-static void
-parseDB(const char *filename, UErrorCode *pErrorCode);
-
-/* -------------------------------------------------------------------------- */
-
-enum
-{
- HELP_H,
- HELP_QUESTION_MARK,
- VERBOSE,
- COPYRIGHT,
- DESTDIR,
- SOURCEDIR,
- UNICODE_VERSION,
- ICUDATADIR,
- CSOURCE
-};
-
-/* Keep these values in sync with the above enums */
-static UOption options[]={
- UOPTION_HELP_H,
- UOPTION_HELP_QUESTION_MARK,
- UOPTION_VERBOSE,
- UOPTION_COPYRIGHT,
- UOPTION_DESTDIR,
- UOPTION_SOURCEDIR,
- UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
- UOPTION_ICUDATADIR,
- UOPTION_DEF("csource", 'C', UOPT_NO_ARG)
-};
-
-extern int
-main(int argc, char* argv[]) {
- char filename[300];
- const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
- char *basename=NULL;
- UErrorCode errorCode=U_ZERO_ERROR;
-
- U_MAIN_INIT_ARGS(argc, argv);
-
- /* preset then read command line options */
- options[DESTDIR].value=u_getDataDirectory();
- options[SOURCEDIR].value="";
- options[UNICODE_VERSION].value="";
- options[ICUDATADIR].value=u_getDataDirectory();
- argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
-
- /* error handling, printing usage message */
- if(argc<0) {
- fprintf(stderr,
- "error in command line argument \"%s\"\n",
- argv[-argc]);
- }
- if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
- /*
- * Broken into chucks because the C89 standard says the minimum
- * required supported string length is 509 bytes.
- */
- fprintf(stderr,
- "Usage: %s [-options] [suffix]\n"
- "\n"
- "read the UnicodeData.txt file and other Unicode properties files and\n"
- "create a binary file " DATA_NAME "." DATA_TYPE " with the character properties\n"
- "\n",
- argv[0]);
- fprintf(stderr,
- "Options:\n"
- "\t-h or -? or --help this usage text\n"
- "\t-v or --verbose verbose output\n"
- "\t-c or --copyright include a copyright notice\n"
- "\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"
- "\t-C or --csource generate a .c source file rather than the .icu binary\n");
- fprintf(stderr,
- "\t-d or --destdir destination directory, followed by the path\n"
- "\t-s or --sourcedir source directory, followed by the path\n"
- "\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
- "\t followed by path, defaults to %s\n"
- "\tsuffix suffix that is to be appended with a '-'\n"
- "\t to the source file basenames before opening;\n"
- "\t 'genprops new' will read UnicodeData-new.txt etc.\n",
- u_getDataDirectory());
- return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
- }
-
- /* get the options values */
- beVerbose=options[VERBOSE].doesOccur;
- haveCopyright=options[COPYRIGHT].doesOccur;
- srcDir=options[SOURCEDIR].value;
- destDir=options[DESTDIR].value;
-
- if(argc>=2) {
- suffix=argv[1];
- } else {
- suffix=NULL;
- }
-
- if(options[UNICODE_VERSION].doesOccur) {
- setUnicodeVersion(options[UNICODE_VERSION].value);
- }
- /* else use the default dataVersion in store.c */
-
- if (options[ICUDATADIR].doesOccur) {
- u_setDataDirectory(options[ICUDATADIR].value);
- }
-
- /* prepare the filename beginning with the source dir */
- uprv_strcpy(filename, srcDir);
- basename=filename+uprv_strlen(filename);
- if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
- *basename++=U_FILE_SEP_CHAR;
- }
-
- /* initialize */
- initStore();
-
- /* process UnicodeData.txt */
- writeUCDFilename(basename, "UnicodeData", suffix);
- parseDB(filename, &errorCode);
-
- /* process additional properties files */
- *basename=0;
- generateAdditionalProperties(filename, suffix, &errorCode);
-
- /* process parsed data */
- if(U_SUCCESS(errorCode)) {
- /* write the properties data file */
- generateData(destDir, options[CSOURCE].doesOccur);
- }
-
- exitStore();
- u_cleanup();
- return errorCode;
-}
-
-U_CFUNC void
-writeUCDFilename(char *basename, const char *filename, const char *suffix) {
- int32_t length=(int32_t)uprv_strlen(filename);
- uprv_strcpy(basename, filename);
- if(suffix!=NULL) {
- basename[length++]='-';
- uprv_strcpy(basename+length, suffix);
- length+=(int32_t)uprv_strlen(suffix);
- }
- uprv_strcpy(basename+length, ".txt");
-}
-
-U_CFUNC UBool
-isToken(const char *token, const char *s) {
- const char *z;
- int32_t j;
-
- s=u_skipWhitespace(s);
- for(j=0;; ++j) {
- if(token[j]!=0) {
- if(s[j]!=token[j]) {
- break;
- }
- } else {
- z=u_skipWhitespace(s+j);
- if(*z==';' || *z==0) {
- return TRUE;
- } else {
- break;
- }
- }
- }
-
- return FALSE;
-}
-
-U_CFUNC int32_t
-getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) {
- const char *t, *z;
- int32_t i, j;
-
- s=u_skipWhitespace(s);
- for(i=0; i<countTokens; ++i) {
- t=tokens[i];
- if(t!=NULL) {
- for(j=0;; ++j) {
- if(t[j]!=0) {
- if(s[j]!=t[j]) {
- break;
- }
- } else {
- z=u_skipWhitespace(s+j);
- if(*z==';' || *z==0 || *z=='#' || *z=='\r' || *z=='\n') {
- return i;
- } else {
- break;
- }
- }
- }
- }
- }
- return -1;
-}
-
-/* parser for UnicodeData.txt ----------------------------------------------- */
-
-/* general categories */
-const char *const
-genCategoryNames[U_CHAR_CATEGORY_COUNT]={
- "Cn",
- "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me",
- "Mc", "Nd", "Nl", "No",
- "Zs", "Zl", "Zp",
- "Cc", "Cf", "Co", "Cs",
- "Pd", "Ps", "Pe", "Pc", "Po",
- "Sm", "Sc", "Sk", "So",
- "Pi", "Pf"
-};
-
-const char *const
-decompositionTypeNames[U_DT_COUNT]={
- NULL,
- NULL,
- "compat",
- "circle",
- "final",
- "font",
- "fraction",
- "initial",
- "isolated",
- "medial",
- "narrow",
- "noBreak",
- "small",
- "square",
- "sub",
- "super",
- "vertical",
- "wide"
-};
-
-static struct {
- uint32_t first, last, props;
- char name[80];
-} unicodeAreas[32];
-
-static int32_t unicodeAreaIndex=0;
-
-static void U_CALLCONV
-unicodeDataLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- Props p;
- char *end;
- static uint32_t prevCode=0;
- uint32_t value;
- int32_t i;
-
- /* reset the properties */
- uprv_memset(&p, 0, sizeof(Props));
-
- /* get the character code, field 0 */
- p.code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
- if(end<=fields[0][0] || end!=fields[0][1]) {
- fprintf(stderr, "genprops: syntax error in field 0 at %s\n", fields[0][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* get general category, field 2 */
- i=getTokenIndex(genCategoryNames, U_CHAR_CATEGORY_COUNT, fields[2][0]);
- if(i>=0) {
- p.generalCategory=(uint8_t)i;
- } else {
- fprintf(stderr, "genprops: unknown general category \"%s\" at code 0x%lx\n",
- fields[2][0], (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* get decomposition type, field 5 */
- if(fields[5][0]<fields[5][1]) {
- /* there is some decomposition */
- if(*fields[5][0]!='<') {
- /* canonical */
- i=U_DT_CANONICAL;
- } else {
- /* get compatibility type */
- end=fields[5][0]+1;
- while(end<fields[5][1] && *end!='>') {
- ++end;
- }
- *end='#';
- i=getTokenIndex(decompositionTypeNames, U_DT_COUNT, fields[5][0]+1);
- if(i<0) {
- fprintf(stderr, "genprops: unknown decomposition type \"%s\" at code 0x%lx\n",
- fields[5][0], (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- }
- upvec_setValue(pv, p.code, p.code, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
- }
-
- /* decimal digit value, field 6 */
- if(fields[6][0]<fields[6][1]) {
- value=(uint32_t)uprv_strtoul(fields[6][0], &end, 10);
- if(end!=fields[6][1] || value>0x7fff) {
- fprintf(stderr, "genprops: syntax error in field 6 at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- p.numericValue=(int32_t)value;
- p.numericType=1;
- }
-
- /* digit value, field 7 */
- if(fields[7][0]<fields[7][1]) {
- value=(uint32_t)uprv_strtoul(fields[7][0], &end, 10);
- if(end!=fields[7][1] || value>0x7fff) {
- fprintf(stderr, "genprops: syntax error in field 7 at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- if(p.numericType==0) {
- p.numericValue=(int32_t)value;
- p.numericType=2;
- } else if((int32_t)value!=p.numericValue) {
- fprintf(stderr, "genprops error: numeric values in fields 6 & 7 different at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- }
-
- /* numeric value, field 8 */
- if(fields[8][0]<fields[8][1]) {
- char *s=fields[8][0];
- UBool isNegative;
-
- /* get a possible minus sign */
- if(*s=='-') {
- isNegative=TRUE;
- ++s;
- } else {
- isNegative=FALSE;
- }
-
- value=(uint32_t)uprv_strtoul(s, &end, 10);
- if(value>0 && *end=='/') {
- /* field 8 may contain a fractional value, get the denominator */
- if(p.numericType>0) {
- fprintf(stderr, "genprops error: numeric values in fields 6..8 different at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- p.denominator=(uint32_t)uprv_strtoul(end+1, &end, 10);
- if(p.denominator==0) {
- fprintf(stderr, "genprops: denominator is 0 in field 8 at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- }
- if(end!=fields[8][1] || value>0x7fffffff) {
- fprintf(stderr, "genprops: syntax error in field 8 at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- if(p.numericType==0) {
- if(isNegative) {
- p.numericValue=-(int32_t)value;
- } else {
- p.numericValue=(int32_t)value;
- }
- p.numericType=3;
- } else if((int32_t)value!=p.numericValue) {
- fprintf(stderr, "genprops error: numeric values in fields 6..8 different at code 0x%lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- }
-
- value=makeProps(&p);
-
- if(*fields[1][0]=='<') {
- /* first or last entry of a Unicode area */
- size_t length=fields[1][1]-fields[1][0];
-
- if(length<9) {
- /* name too short for an area name */
- } else if(0==uprv_memcmp(", First>", fields[1][1]-8, 8)) {
- /* set the current area */
- if(unicodeAreas[unicodeAreaIndex].first==0xffffffff) {
- length-=9;
- unicodeAreas[unicodeAreaIndex].first=p.code;
- unicodeAreas[unicodeAreaIndex].props=value;
- uprv_memcpy(unicodeAreas[unicodeAreaIndex].name, fields[1][0]+1, length);
- unicodeAreas[unicodeAreaIndex].name[length]=0;
- } else {
- /* error: a previous area is incomplete */
- fprintf(stderr, "genprops: error - area \"%s\" is incomplete\n", unicodeAreas[unicodeAreaIndex].name);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- return;
- } else if(0==uprv_memcmp(", Last>", fields[1][1]-7, 7)) {
- /* check that the current area matches, and complete it with the last code point */
- length-=8;
- if( unicodeAreas[unicodeAreaIndex].props==value &&
- 0==uprv_memcmp(unicodeAreas[unicodeAreaIndex].name, fields[1][0]+1, length) &&
- unicodeAreas[unicodeAreaIndex].name[length]==0 &&
- unicodeAreas[unicodeAreaIndex].first<p.code
- ) {
- unicodeAreas[unicodeAreaIndex].last=p.code;
- if(beVerbose) {
- printf("Unicode area U+%04lx..U+%04lx \"%s\"\n",
- (unsigned long)unicodeAreas[unicodeAreaIndex].first,
- (unsigned long)unicodeAreas[unicodeAreaIndex].last,
- unicodeAreas[unicodeAreaIndex].name);
- }
- unicodeAreas[++unicodeAreaIndex].first=0xffffffff;
- } else {
- /* error: different properties between first & last, different area name, first>=last */
- fprintf(stderr, "genprops: error - Last of area \"%s\" is incorrect\n", unicodeAreas[unicodeAreaIndex].name);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- return;
- } else {
- /* not an area name */
- }
- }
-
- /* check for non-character code points */
- if((p.code&0xfffe)==0xfffe || (uint32_t)(p.code-0xfdd0)<0x20) {
- fprintf(stderr, "genprops: error - properties for non-character code point U+%04lx\n",
- (unsigned long)p.code);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- /* check that the code points (p.code) are in ascending order */
- if(p.code<=prevCode && p.code>0) {
- fprintf(stderr, "genprops: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n",
- (unsigned long)p.code, (unsigned long)prevCode);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- prevCode=p.code;
-
- /* properties for a single code point */
- addProps(p.code, value);
-}
-
-/* set repeated properties for the areas */
-static void
-repeatAreaProps() {
- uint32_t puaProps;
- int32_t i;
- UBool hasPlane15PUA, hasPlane16PUA;
- UErrorCode errorCode;
-
- /*
- * UnicodeData.txt before 3.0.1 did not contain the PUAs on
- * planes 15 and 16.
- * If that is the case, then we add them here, using the properties
- * from the BMP PUA.
- */
- puaProps=0;
- hasPlane15PUA=hasPlane16PUA=FALSE;
-
- for(i=0; i<unicodeAreaIndex; ++i) {
- repeatProps(unicodeAreas[i].first,
- unicodeAreas[i].last,
- unicodeAreas[i].props);
- if(unicodeAreas[i].first==0xe000) {
- puaProps=unicodeAreas[i].props;
- } else if(unicodeAreas[i].first==0xf0000) {
- hasPlane15PUA=TRUE;
- } else if(unicodeAreas[i].first==0x100000) {
- hasPlane16PUA=TRUE;
- }
- }
-
- if(puaProps!=0) {
- if(!hasPlane15PUA) {
- repeatProps(0xf0000, 0xffffd, puaProps);
- }
- if(!hasPlane16PUA) {
- repeatProps(0x100000, 0x10fffd, puaProps);
- }
- }
-
- /* Hangul have canonical decompositions */
- errorCode=U_ZERO_ERROR;
- upvec_setValue(pv, 0xac00, 0xd7a3, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
-}
-
-static void
-parseDB(const char *filename, UErrorCode *pErrorCode) {
- char *fields[15][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- /* while unicodeAreas[unicodeAreaIndex] is unused, set its first to a bogus value */
- unicodeAreas[0].first=0xffffffff;
-
- u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);
-
- if(unicodeAreas[unicodeAreaIndex].first!=0xffffffff) {
- fprintf(stderr, "genprops: error - the last area \"%s\" from U+%04lx is incomplete\n",
- unicodeAreas[unicodeAreaIndex].name,
- (unsigned long)unicodeAreas[unicodeAreaIndex].first);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
-
- repeatAreaProps();
-
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/genprops/genprops.h b/tools/genprops/genprops.h
deleted file mode 100644
index b50a1037..00000000
--- a/tools/genprops/genprops.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: genprops.h
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999dec13
-* created by: Markus W. Scherer
-*/
-
-#ifndef __GENPROPS_H__
-#define __GENPROPS_H__
-
-#include "unicode/utypes.h"
-#include "utrie.h"
-#include "propsvec.h"
-
-/* file definitions */
-#define DATA_NAME "uprops"
-#define DATA_TYPE "icu"
-
-/* character properties */
-typedef struct {
- uint32_t code;
- int32_t numericValue; /* see numericType */
- uint32_t denominator; /* 0: no value */
- uint8_t generalCategory, numericType, exponent;
-} Props;
-
-/* global flags */
-extern UBool beVerbose, haveCopyright;
-
-extern const char *const
-genCategoryNames[];
-
-/* properties vectors in props2.c */
-extern UPropsVectors *pv;
-
-/* prototypes */
-U_CFUNC void
-writeUCDFilename(char *basename, const char *filename, const char *suffix);
-
-U_CFUNC UBool
-isToken(const char *token, const char *s);
-
-U_CFUNC int32_t
-getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s);
-
-extern void
-setUnicodeVersion(const char *v);
-
-extern void
-initStore(void);
-
-extern void
-exitStore(void);
-
-extern uint32_t
-makeProps(Props *p);
-
-extern void
-addProps(uint32_t c, uint32_t props);
-
-extern uint32_t
-getProps(uint32_t c);
-
-extern void
-repeatProps(uint32_t first, uint32_t last, uint32_t props);
-
-extern void
-generateData(const char *dataDir, UBool csource);
-
-/* props2.c */
-U_CFUNC void
-initAdditionalProperties(void);
-
-U_CFUNC void
-exitAdditionalProperties(void);
-
-U_CFUNC void
-generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode);
-
-U_CFUNC int32_t
-writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[16]);
-
-#endif
diff --git a/tools/genprops/genprops.vcproj b/tools/genprops/genprops.vcproj
deleted file mode 100644
index 871abf35..00000000
--- a/tools/genprops/genprops.vcproj
+++ /dev/null
@@ -1,426 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
- ProjectType="Visual C++"
- Version="9.00"
- Name="genprops"
- ProjectGUID="{6F744648-D15F-478A-90C6-58E353B5DDB3}"
- TargetFrameworkVersion="131072"
- >
- <Platforms>
- <Platform
- Name="Win32"
- />
- <Platform
- Name="x64"
- />
- </Platforms>
- <ToolFiles>
- </ToolFiles>
- <Configurations>
- <Configuration
- Name="Release|Win32"
- OutputDirectory=".\x86\Release"
- IntermediateDirectory=".\x86\Release"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TypeLibraryName=".\x86\Release/genprops.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Release/genprops.pch"
- AssemblerListingLocation=".\x86\Release/"
- ObjectFile=".\x86\Release/"
- ProgramDataBaseFileName=".\x86\Release/"
- WarningLevel="3"
- SuppressStartupBanner="true"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x86\Release/genprops.exe"
- LinkIncremental="1"
- SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x86\Release/genprops.pdb"
- SubSystem="1"
- RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Debug|Win32"
- OutputDirectory=".\x86\Debug"
- IntermediateDirectory=".\x86\Debug"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TypeLibraryName=".\x86\Debug/genprops.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Debug/genprops.pch"
- AssemblerListingLocation=".\x86\Debug/"
- ObjectFile=".\x86\Debug/"
- ProgramDataBaseFileName=".\x86\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
- SuppressStartupBanner="true"
- DebugInformationFormat="4"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x86\Debug/genprops.exe"
- LinkIncremental="2"
- SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x86\Debug/genprops.pdb"
- SubSystem="1"
- RandomizedBaseAddress="1"
- DataExecutionPrevention="0"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- UseFAT32Workaround="true"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Release|x64"
- OutputDirectory=".\x64\Release"
- IntermediateDirectory=".\x64\Release"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Release/genprops.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Release/genprops.pch"
- AssemblerListingLocation=".\x64\Release/"
- ObjectFile=".\x64\Release/"
- ProgramDataBaseFileName=".\x64\Release/"
- WarningLevel="3"
- SuppressStartupBanner="true"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x64\Release/genprops.exe"
- LinkIncremental="1"
- SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x64\Release/genprops.pdb"
- SubSystem="1"
- TargetMachine="17"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Debug|x64"
- OutputDirectory=".\x64\Debug"
- IntermediateDirectory=".\x64\Debug"
- ConfigurationType="1"
- InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
- UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="false"
- CharacterSet="2"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- TargetEnvironment="3"
- TypeLibraryName=".\x64\Debug/genprops.tlb"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
- DisableLanguageExtensions="true"
- TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Debug/genprops.pch"
- AssemblerListingLocation=".\x64\Debug/"
- ObjectFile=".\x64\Debug/"
- ProgramDataBaseFileName=".\x64\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
- SuppressStartupBanner="true"
- DebugInformationFormat="3"
- CompileAs="0"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- OutputFile=".\x64\Debug/genprops.exe"
- LinkIncremental="2"
- SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x64\Debug/genprops.pdb"
- SubSystem="1"
- TargetMachine="17"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- UseFAT32Workaround="true"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- </Configurations>
- <References>
- </References>
- <Files>
- <Filter
- Name="Source Files"
- Filter="c;cpp;rc"
- >
- <File
- RelativePath=".\genprops.c"
- >
- </File>
- <File
- RelativePath=".\props2.c"
- >
- </File>
- <File
- RelativePath=".\store.c"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h"
- >
- <File
- RelativePath=".\genprops.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
- >
- </Filter>
- </Files>
- <Globals>
- </Globals>
-</VisualStudioProject>
diff --git a/tools/genprops/misc/ucdmerge.c b/tools/genprops/misc/ucdmerge.c
deleted file mode 100644
index 35f6850c..00000000
--- a/tools/genprops/misc/ucdmerge.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ucdmerge.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003feb20
-* created by: Markus W. Scherer
-*
-* Simple tool for Unicode Character Database files with semicolon-delimited fields.
-* Merges adjacent, identical per-code point data lines into one line with range syntax.
-*
-* To compile, just call a C compiler/linker with this source file.
-* On Windows: cl ucdmerge.c
-*/
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-static const char *
-skipWhitespace(const char *s) {
- while(*s==' ' || *s=='\t') {
- ++s;
- }
- return s;
-}
-
-/* return the first character position after the end of the data */
-static char *
-endOfData(const char *l) {
- char *end;
- char c;
-
- end=strchr(l, '#');
- if(end!=NULL) {
- /* ignore whitespace before the comment */
- while(l!=end && ((c=*(end-1))==' ' || c=='\t')) {
- --end;
- }
- } else {
- end=strchr(l, 0);
- }
- return end;
-}
-
-static int
-sameData(const char *l1, const char *l2) {
- char *end1, *end2;
- int length;
-
- /* find the first semicolon in each line - there must be one */
- l1=strchr(l1, ';')+1;
- l2=strchr(l2, ';')+1;
-
- /* find the end of data: end of string or start of comment */
- end1=endOfData(l1);
- end2=endOfData(l2);
-
- /* compare the line data portions */
- length=end1-l1;
- return length==(end2-l2) && 0==memcmp(l1, l2, length);
-}
-
-extern int
-main(int argc, const char *argv[]) {
- static char line[2000], firstLine[2000], lastLine[2000];
- char *end;
- long first, last, c;
- int finished;
-
- first=last=-1;
- finished=0;
-
- for(;;) {
- if(gets(line)!=NULL) {
- /* parse the initial code point, if any */
- c=strtol(line, &end, 16);
- if(end!=line && *skipWhitespace(end)==';') {
- /* single code point followed by semicolon and data, keep c */
- } else {
- c=-1;
- }
- } else {
- line[0]=0;
- c=-1;
- finished=1;
- }
-
- if(last>=0 && (c!=(last+1) || !sameData(firstLine, line))) {
- /* output the current range */
- if(first==last) {
- /* there was no range, just output the one line we found */
- puts(firstLine);
- } else {
- /* there was a real range, merge their lines */
- end=strchr(lastLine, '#');
- if(end==NULL) {
- /* no comment in second line */
- printf("%04lX..%04lX%s\n",
- first, last, /* code point range */
- strchr(firstLine, ';'));/* first line starting from the first ; */
- } else if(strchr(firstLine, '#')==NULL) {
- /* no comment in first line */
- printf("%04lX..%04lX%s%s\n",
- first, last, /* code point range */
- strchr(firstLine, ';'), /* first line starting from the first ; */
- end); /* comment from second line */
- } else {
- /* merge comments from both lines */
- printf("%04lX..%04lX%s..%s\n",
- first, last, /* code point range */
- strchr(firstLine, ';'), /* first line starting from the first ; */
- skipWhitespace(end+1)); /* comment from second line, after # and spaces */
- }
- }
- first=last=-1;
- }
-
- if(c<0) {
- if(finished) {
- break;
- }
-
- /* no data on this line, output as is */
- puts(line);
- } else {
- /* data on this line, store for possible range compaction */
- if(last<0) {
- /* set as the first line in a possible range */
- first=last=c;
- strcpy(firstLine, line);
- lastLine[0]=0;
- } else /* must be c==(last+1) && sameData() because of previous conditions */ {
- /* continue with the current range */
- last=c;
- strcpy(lastLine, line);
- }
- }
- }
-
- return 0;
-}
diff --git a/tools/genprops/misc/ucdstrip.c b/tools/genprops/misc/ucdstrip.c
deleted file mode 100644
index 33ef53f1..00000000
--- a/tools/genprops/misc/ucdstrip.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2003, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: ucdstrip.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2003feb20
-* created by: Markus W. Scherer
-*
-* Simple tool for Unicode Character Database files with semicolon-delimited fields.
-* Removes comments behind data lines but not in others.
-*
-* To compile, just call a C compiler/linker with this source file.
-* On Windows: cl ucdstrip.c
-*/
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-/* return the first character position after the end of the data */
-static char *
-endOfData(const char *l) {
- char *end;
- char c;
-
- end=strchr(l, '#');
- if(end!=NULL) {
- /* ignore whitespace before the comment */
- while(l!=end && ((c=*(end-1))==' ' || c=='\t')) {
- --end;
- }
- } else {
- end=strchr(l, 0);
- }
- return end;
-}
-
-extern int
-main(int argc, const char *argv[]) {
- static char line[2000];
- char *end;
-
- while(gets(line)!=NULL) {
- if(strtol(line, &end, 16)>=0 && end!=line) {
- /* code point or range followed by semicolon and data, remove comment */
- *endOfData(line)=0;
- }
- puts(line);
- }
-
- return 0;
-}
diff --git a/tools/genprops/misc/ucdstrip.pl b/tools/genprops/misc/ucdstrip.pl
deleted file mode 100755
index 6109770b..00000000
--- a/tools/genprops/misc/ucdstrip.pl
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/lib/perl -p
-# Copyright (c) 2001-2003 International Business Machines
-# Corporation and others. All Rights Reserved.
-# Simple tool for Unicode Character Database files with semicolon-delimited fields.
-# Removes comments behind data lines but not in others.
-# The Perl option -p above runs a while(<>) loop and prints the expression output.
-s/^([0-9a-fA-F]+.+?) *#.*/\1/;
diff --git a/tools/genprops/props2.c b/tools/genprops/props2.c
deleted file mode 100644
index 21b049bf..00000000
--- a/tools/genprops/props2.c
+++ /dev/null
@@ -1,813 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2002-2009, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: props2.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002feb24
-* created by: Markus W. Scherer
-*
-* Parse more Unicode Character Database files and store
-* additional Unicode character properties in bit set vectors.
-*/
-
-#include <stdio.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "unicode/uscript.h"
-#include "cstring.h"
-#include "cmemory.h"
-#include "utrie.h"
-#include "uprops.h"
-#include "propsvec.h"
-#include "uparse.h"
-#include "writesrc.h"
-#include "genprops.h"
-
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
-/* data --------------------------------------------------------------------- */
-
-static UNewTrie *newTrie;
-UPropsVectors *pv;
-
-/* miscellaneous ------------------------------------------------------------ */
-
-static char *
-trimTerminateField(char *s, char *limit) {
- /* trim leading whitespace */
- s=(char *)u_skipWhitespace(s);
-
- /* trim trailing whitespace */
- while(s<limit && (*(limit-1)==' ' || *(limit-1)=='\t')) {
- --limit;
- }
- *limit=0;
-
- return s;
-}
-
-static void
-parseTwoFieldFile(char *filename, char *basename,
- const char *ucdFile, const char *suffix,
- UParseLineFn *lineFn,
- UErrorCode *pErrorCode) {
- char *fields[2][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- writeUCDFilename(basename, ucdFile, suffix);
-
- u_parseDelimitedFile(filename, ';', fields, 2, lineFn, NULL, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode));
- }
-}
-
-static void U_CALLCONV
-ageLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode);
-
-static void
-parseMultiFieldFile(char *filename, char *basename,
- const char *ucdFile, const char *suffix,
- int32_t fieldCount,
- UParseLineFn *lineFn,
- UErrorCode *pErrorCode) {
- char *fields[20][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- writeUCDFilename(basename, ucdFile, suffix);
-
- u_parseDelimitedFile(filename, ';', fields, fieldCount, lineFn, NULL, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode));
- }
-}
-
-static void U_CALLCONV
-numericLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode);
-
-/* parse files with single enumerated properties ---------------------------- */
-
-struct SingleEnum {
- const char *ucdFile, *propName;
- UProperty prop;
- int32_t vecWord, vecShift;
- uint32_t vecMask;
-};
-typedef struct SingleEnum SingleEnum;
-
-static void
-parseSingleEnumFile(char *filename, char *basename, const char *suffix,
- const SingleEnum *sen,
- UErrorCode *pErrorCode);
-
-static const SingleEnum scriptSingleEnum={
- "Scripts", "script",
- UCHAR_SCRIPT,
- 0, 0, UPROPS_SCRIPT_MASK
-};
-
-static const SingleEnum blockSingleEnum={
- "Blocks", "block",
- UCHAR_BLOCK,
- 0, UPROPS_BLOCK_SHIFT, UPROPS_BLOCK_MASK
-};
-
-static const SingleEnum graphemeClusterBreakSingleEnum={
- "GraphemeBreakProperty", "Grapheme_Cluster_Break",
- UCHAR_GRAPHEME_CLUSTER_BREAK,
- 2, UPROPS_GCB_SHIFT, UPROPS_GCB_MASK
-};
-
-static const SingleEnum wordBreakSingleEnum={
- "WordBreakProperty", "Word_Break",
- UCHAR_WORD_BREAK,
- 2, UPROPS_WB_SHIFT, UPROPS_WB_MASK
-};
-
-static const SingleEnum sentenceBreakSingleEnum={
- "SentenceBreakProperty", "Sentence_Break",
- UCHAR_SENTENCE_BREAK,
- 2, UPROPS_SB_SHIFT, UPROPS_SB_MASK
-};
-
-static const SingleEnum lineBreakSingleEnum={
- "LineBreak", "line break",
- UCHAR_LINE_BREAK,
- UPROPS_LB_VWORD, UPROPS_LB_SHIFT, UPROPS_LB_MASK
-};
-
-static const SingleEnum eawSingleEnum={
- "EastAsianWidth", "east asian width",
- UCHAR_EAST_ASIAN_WIDTH,
- 0, UPROPS_EA_SHIFT, UPROPS_EA_MASK
-};
-
-static void U_CALLCONV
-singleEnumLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- const SingleEnum *sen;
- char *s;
- uint32_t start, end, uv;
- int32_t value;
-
- sen=(const SingleEnum *)context;
-
- u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops: syntax error in %s.txt field 0 at %s\n", sen->ucdFile, fields[0][0]);
- exit(*pErrorCode);
- }
-
- /* parse property alias */
- s=trimTerminateField(fields[1][0], fields[1][1]);
- value=u_getPropertyValueEnum(sen->prop, s);
- if(value<0) {
- if(sen->prop==UCHAR_BLOCK) {
- if(isToken("Greek", s)) {
- value=UBLOCK_GREEK; /* Unicode 3.2 renames this to "Greek and Coptic" */
- } else if(isToken("Combining Marks for Symbols", s)) {
- value=UBLOCK_COMBINING_MARKS_FOR_SYMBOLS; /* Unicode 3.2 renames this to "Combining Diacritical Marks for Symbols" */
- } else if(isToken("Private Use", s)) {
- value=UBLOCK_PRIVATE_USE; /* Unicode 3.2 renames this to "Private Use Area" */
- }
- }
- }
- if(value<0) {
- fprintf(stderr, "genprops error: unknown %s name in %s.txt field 1 at %s\n",
- sen->propName, sen->ucdFile, s);
- exit(U_PARSE_ERROR);
- }
-
- uv=(uint32_t)(value<<sen->vecShift);
- if((uv&sen->vecMask)!=uv) {
- fprintf(stderr, "genprops error: %s value overflow (0x%x) at %s\n",
- sen->propName, (int)uv, s);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
-
- if(start==0 && end==0x10ffff) {
- /* Also set bits for initialValue and errorValue. */
- end=UPVEC_MAX_CP;
- }
- upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops error: unable to set %s code: %s\n",
- sen->propName, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-}
-
-static void
-parseSingleEnumFile(char *filename, char *basename, const char *suffix,
- const SingleEnum *sen,
- UErrorCode *pErrorCode) {
- char *fields[2][2];
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- writeUCDFilename(basename, sen->ucdFile, suffix);
-
- u_parseDelimitedFile(filename, ';', fields, 2, singleEnumLineFn, (void *)sen, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "error parsing %s.txt: %s\n", sen->ucdFile, u_errorName(*pErrorCode));
- }
-}
-
-/* parse files with multiple binary properties ------------------------------ */
-
-struct Binary {
- const char *propName;
- int32_t vecWord, vecShift;
-};
-typedef struct Binary Binary;
-
-struct Binaries {
- const char *ucdFile;
- const Binary *binaries;
- int32_t binariesCount;
-};
-typedef struct Binaries Binaries;
-
-static const Binary
-propListNames[]={
- { "White_Space", 1, UPROPS_WHITE_SPACE },
- { "Dash", 1, UPROPS_DASH },
- { "Hyphen", 1, UPROPS_HYPHEN },
- { "Quotation_Mark", 1, UPROPS_QUOTATION_MARK },
- { "Terminal_Punctuation", 1, UPROPS_TERMINAL_PUNCTUATION },
- { "Hex_Digit", 1, UPROPS_HEX_DIGIT },
- { "ASCII_Hex_Digit", 1, UPROPS_ASCII_HEX_DIGIT },
- { "Ideographic", 1, UPROPS_IDEOGRAPHIC },
- { "Diacritic", 1, UPROPS_DIACRITIC },
- { "Extender", 1, UPROPS_EXTENDER },
- { "Noncharacter_Code_Point", 1, UPROPS_NONCHARACTER_CODE_POINT },
- { "Grapheme_Link", 1, UPROPS_GRAPHEME_LINK },
- { "IDS_Binary_Operator", 1, UPROPS_IDS_BINARY_OPERATOR },
- { "IDS_Trinary_Operator", 1, UPROPS_IDS_TRINARY_OPERATOR },
- { "Radical", 1, UPROPS_RADICAL },
- { "Unified_Ideograph", 1, UPROPS_UNIFIED_IDEOGRAPH },
- { "Deprecated", 1, UPROPS_DEPRECATED },
- { "Logical_Order_Exception", 1, UPROPS_LOGICAL_ORDER_EXCEPTION },
-
- /* new properties in Unicode 4.0.1 */
- { "STerm", 1, UPROPS_S_TERM },
- { "Variation_Selector", 1, UPROPS_VARIATION_SELECTOR },
-
- /* new properties in Unicode 4.1 */
- { "Pattern_Syntax", 1, UPROPS_PATTERN_SYNTAX },
- { "Pattern_White_Space", 1, UPROPS_PATTERN_WHITE_SPACE }
-};
-
-static const Binaries
-propListBinaries={
- "PropList", propListNames, LENGTHOF(propListNames)
-};
-
-static const Binary
-derCorePropsNames[]={
- { "XID_Start", 1, UPROPS_XID_START },
- { "XID_Continue", 1, UPROPS_XID_CONTINUE },
-
- /* before Unicode 4/ICU 2.6/format version 3.2, these used to be Other_XYZ from PropList.txt */
- { "Math", 1, UPROPS_MATH },
- { "Alphabetic", 1, UPROPS_ALPHABETIC },
- { "Grapheme_Extend", 1, UPROPS_GRAPHEME_EXTEND },
- { "Default_Ignorable_Code_Point", 1, UPROPS_DEFAULT_IGNORABLE_CODE_POINT },
-
- /* new properties bits in ICU 2.6/format version 3.2 */
- { "ID_Start", 1, UPROPS_ID_START },
- { "ID_Continue", 1, UPROPS_ID_CONTINUE },
- { "Grapheme_Base", 1, UPROPS_GRAPHEME_BASE },
-
- /*
- * Unicode 5/ICU 3.6 moves Grapheme_Link from PropList.txt
- * to DerivedCoreProperties.txt and deprecates it.
- */
- { "Grapheme_Link", 1, UPROPS_GRAPHEME_LINK }
-};
-
-static const Binaries
-derCorePropsBinaries={
- "DerivedCoreProperties", derCorePropsNames, LENGTHOF(derCorePropsNames)
-};
-
-static char ignoredProps[100][64];
-static int32_t ignoredPropsCount;
-
-static void
-addIgnoredProp(char *s, char *limit) {
- int32_t i;
-
- s=trimTerminateField(s, limit);
- for(i=0; i<ignoredPropsCount; ++i) {
- if(0==uprv_strcmp(ignoredProps[i], s)) {
- return;
- }
- }
- uprv_strcpy(ignoredProps[ignoredPropsCount++], s);
-}
-
-static void U_CALLCONV
-binariesLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- const Binaries *bin;
- char *s;
- uint32_t start, end, uv;
- int32_t i;
-
- bin=(const Binaries *)context;
-
- u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]);
- exit(*pErrorCode);
- }
-
- /* parse binary property name */
- s=(char *)u_skipWhitespace(fields[1][0]);
- for(i=0;; ++i) {
- if(i==bin->binariesCount) {
- /* ignore unrecognized properties */
- if(beVerbose) {
- addIgnoredProp(s, fields[1][1]);
- }
- return;
- }
- if(isToken(bin->binaries[i].propName, s)) {
- break;
- }
- }
-
- if(bin->binaries[i].vecShift>=32) {
- fprintf(stderr, "genprops error: shift value %d>=32 for %s %s\n",
- (int)bin->binaries[i].vecShift, bin->ucdFile, bin->binaries[i].propName);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- uv=U_MASK(bin->binaries[i].vecShift);
-
- if(start==0 && end==0x10ffff) {
- /* Also set bits for initialValue and errorValue. */
- end=UPVEC_MAX_CP;
- }
- upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops error: unable to set %s code: %s\n",
- bin->binaries[i].propName, u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-}
-
-static void
-parseBinariesFile(char *filename, char *basename, const char *suffix,
- const Binaries *bin,
- UErrorCode *pErrorCode) {
- char *fields[2][2];
- int32_t i;
-
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
-
- writeUCDFilename(basename, bin->ucdFile, suffix);
-
- ignoredPropsCount=0;
-
- u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode));
- }
-
- if(beVerbose) {
- for(i=0; i<ignoredPropsCount; ++i) {
- printf("genprops: ignoring property %s in %s.txt\n", ignoredProps[i], bin->ucdFile);
- }
- }
-}
-
-/* -------------------------------------------------------------------------- */
-
-U_CFUNC void
-initAdditionalProperties() {
- UErrorCode errorCode=U_ZERO_ERROR;
- pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: upvec_open() failed - %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
-}
-
-U_CFUNC void
-exitAdditionalProperties() {
- utrie_close(newTrie);
- upvec_close(pv);
-}
-
-U_CFUNC void
-generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) {
- char *basename;
-
- basename=filename+uprv_strlen(filename);
-
- /* process various UCD .txt files */
-
- /* add Han numeric types & values */
- parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 2, numericLineFn, pErrorCode);
-
- parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode);
-
- /*
- * UTR 24 says:
- * Section 2:
- * "Common - For characters that may be used
- * within multiple scripts,
- * or any unassigned code points."
- *
- * Section 4:
- * "The value COMMON is the default value,
- * given to all code points that are not
- * explicitly mentioned in the data file."
- *
- * COMMON==USCRIPT_COMMON==0 - nothing to do
- */
- parseSingleEnumFile(filename, basename, suffix, &scriptSingleEnum, pErrorCode);
-
- parseSingleEnumFile(filename, basename, suffix, &blockSingleEnum, pErrorCode);
-
- parseBinariesFile(filename, basename, suffix, &propListBinaries, pErrorCode);
-
- parseBinariesFile(filename, basename, suffix, &derCorePropsBinaries, pErrorCode);
-
- parseSingleEnumFile(filename, basename, suffix, &graphemeClusterBreakSingleEnum, pErrorCode);
-
- parseSingleEnumFile(filename, basename, suffix, &wordBreakSingleEnum, pErrorCode);
-
- parseSingleEnumFile(filename, basename, suffix, &sentenceBreakSingleEnum, pErrorCode);
-
- /*
- * LineBreak-4.0.0.txt:
- * - All code points, assigned and unassigned, that are not listed
- * explicitly are given the value "XX".
- *
- * XX==U_LB_UNKNOWN==0 - nothing to do
- */
- parseSingleEnumFile(filename, basename, suffix, &lineBreakSingleEnum, pErrorCode);
-
- /*
- * Preset East Asian Width defaults:
- *
- * http://www.unicode.org/reports/tr11/#Unassigned
- * 7.1 Unassigned and Private Use characters
- *
- * All unassigned characters are by default classified as non-East Asian neutral,
- * except for the range U+20000 to U+2FFFD,
- * since all code positions from U+20000 to U+2FFFD are intended for CJK ideographs (W).
- * All Private use characters are by default classified as ambiguous,
- * since their definition depends on context.
- *
- * N for all ==0 - nothing to do
- * A for Private Use
- * W for plane 2
- */
- *pErrorCode=U_ZERO_ERROR;
- upvec_setValue(pv, 0xe000, 0xf8ff, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
- upvec_setValue(pv, 0xf0000, 0xffffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
- upvec_setValue(pv, 0x100000, 0x10fffd, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
- upvec_setValue(pv, 0x20000, 0x2fffd, 0, (uint32_t)(U_EA_WIDE<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops: unable to set default East Asian Widths: %s\n", u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-
- /* parse EastAsianWidth.txt */
- parseSingleEnumFile(filename, basename, suffix, &eawSingleEnum, pErrorCode);
-
- {
- UPVecToUTrieContext toUTrie={ NULL, 50000 /* capacity */, 0, TRUE /* latin1Linear */ };
- upvec_compact(pv, upvec_compactToUTrieHandler, &toUTrie, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
- u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
- newTrie=toUTrie.newTrie;
- }
-}
-
-/* DerivedAge.txt ----------------------------------------------------------- */
-
-static void U_CALLCONV
-ageLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- char *s, *numberLimit;
- uint32_t value, start, end, version;
-
- u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 0 at %s\n", fields[0][0]);
- exit(*pErrorCode);
- }
-
- /* ignore "unassigned" (the default is already set to 0.0) */
- s=(char *)u_skipWhitespace(fields[1][0]);
- if(0==uprv_strncmp(s, "unassigned", 10)) {
- return;
- }
-
- /* parse version number */
- value=(uint32_t)uprv_strtoul(s, &numberLimit, 10);
- if(s==numberLimit || value==0 || value>15 || (*numberLimit!='.' && *numberLimit!=' ' && *numberLimit!='\t' && *numberLimit!=0)) {
- fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- version=value<<4;
-
- /* parse minor version number */
- if(*numberLimit=='.') {
- s=(char *)u_skipWhitespace(numberLimit+1);
- value=(uint32_t)uprv_strtoul(s, &numberLimit, 10);
- if(s==numberLimit || value>15 || (*numberLimit!=' ' && *numberLimit!='\t' && *numberLimit!=0)) {
- fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]);
- *pErrorCode=U_PARSE_ERROR;
- exit(U_PARSE_ERROR);
- }
- version|=value;
- }
-
- if(start==0 && end==0x10ffff) {
- /* Also set bits for initialValue and errorValue. */
- end=UPVEC_MAX_CP;
- }
- upvec_setValue(pv, start, end, 0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops error: unable to set character age: %s\n", u_errorName(*pErrorCode));
- exit(*pErrorCode);
- }
-}
-
-/* DerivedNumericValues.txt ------------------------------------------------- */
-
-static void U_CALLCONV
-numericLineFn(void *context,
- char *fields[][2], int32_t fieldCount,
- UErrorCode *pErrorCode) {
- Props newProps={ 0 };
- char *s, *numberLimit;
- uint32_t start, end, value, oldProps32;
- int32_t oldType;
- char c;
- UBool isFraction;
-
- /* get the code point range */
- u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 0 at %s\n", fields[0][0]);
- exit(*pErrorCode);
- }
-
- /*
- * Ignore the
- * # @missing: 0000..10FFFF; NaN
- * line from Unicode 5.1's DerivedNumericValues.txt:
- * The following code cannot parse "NaN", and we don't want to overwrite
- * the numeric values for all characters after reading most
- * from UnicodeData.txt already.
- */
- if(start==0 && end==0x10ffff) {
- return;
- }
-
- /* check if the numeric value is a fraction (this code does not handle any) */
- isFraction=FALSE;
- s=uprv_strchr(fields[1][0], '.');
- if(s!=NULL) {
- numberLimit=s+1;
- while('0'<=(c=*numberLimit++) && c<='9') {
- if(c!='0') {
- isFraction=TRUE;
- break;
- }
- }
- }
-
- if(isFraction) {
- value=0;
- } else {
- /* parse numeric value */
- s=(char *)u_skipWhitespace(fields[1][0]);
-
- /* try large powers of 10 first, may otherwise overflow strtoul() */
- if(0==uprv_strncmp(s, "10000000000", 11)) {
- /* large powers of 10 are encoded in a special way, see store.c */
- uint8_t exp=0;
-
- numberLimit=s;
- while(*(++numberLimit)=='0') {
- ++exp;
- }
- value=1;
- newProps.exponent=exp;
- } else {
- /* normal number parsing */
- value=(uint32_t)uprv_strtoul(s, &numberLimit, 10);
- }
- if(numberLimit<=s || (*numberLimit!='.' && u_skipWhitespace(numberLimit)!=fields[1][1]) || value>=0x80000000) {
- fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 1 at %s\n", fields[0][0]);
- exit(U_PARSE_ERROR);
- }
- }
-
- /*
- * Unicode 4.0.1 removes the third column that used to list the numeric type.
- * Assume that either the data is the same as in UnicodeData.txt,
- * or else that the numeric type is "numeric".
- * This should work because we only expect to add numeric values for
- * Han characters; for those, UnicodeData.txt lists only ranges without
- * specific properties for single characters.
- */
-
- /* set the new numeric type and value */
- newProps.numericType=(uint8_t)U_NT_NUMERIC; /* assumed numeric type, see Unicode 4.0.1 comment */
- newProps.numericValue=(int32_t)value; /* newly parsed numeric value */
- /* the exponent may have been set above */
- value=makeProps(&newProps);
-
- for(; start<=end; ++start) {
- oldProps32=getProps(start);
- oldType=(int32_t)GET_NUMERIC_TYPE(oldProps32);
-
- if(isFraction) {
- if(oldType!=0) {
- /* this code point was already listed with its numeric value in UnicodeData.txt */
- continue;
- } else {
- fprintf(stderr, "genprops: not prepared for new fractions in DerivedNumericValues.txt field 1 at %s\n", fields[1][0]);
- exit(U_PARSE_ERROR);
- }
- }
-
- /*
- * For simplicity, and because we only expect to set numeric values for Han characters,
- * for now we only allow to set these values for Lo characters.
- */
- if(oldType==0 && GET_CATEGORY(oldProps32)!=U_OTHER_LETTER) {
- fprintf(stderr, "genprops error: new numeric value for a character other than Lo in DerivedNumericValues.txt at %s\n", fields[0][0]);
- exit(U_PARSE_ERROR);
- }
-
- /* verify that we do not change an existing value (fractions were excluded above) */
- if(oldType!=0) {
- /* the code point already has a value stored */
- if((oldProps32&0xff00)!=(value&0xff00)) {
- fprintf(stderr, "genprops error: new numeric value differs from old one for U+%04lx\n", (long)start);
- exit(U_PARSE_ERROR);
- }
- /* same value, continue */
- } else {
- /* the code point is getting a new numeric value */
- if(beVerbose) {
- printf("adding U+%04x numeric type %d value 0x%04x from %s\n", (int)start, U_NT_NUMERIC, (int)value, fields[0][0]);
- }
-
- addProps(start, value|GET_CATEGORY(oldProps32));
- }
- }
-}
-
-/* data serialization ------------------------------------------------------- */
-
-U_CFUNC int32_t
-writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROPS_INDEX_COUNT]) {
- const uint32_t *pvArray;
- int32_t pvRows, pvCount;
- int32_t length;
- UErrorCode errorCode;
-
- pvArray=upvec_getArray(pv, &pvRows, NULL);
- pvCount=pvRows*UPROPS_VECTOR_WORDS;
-
- errorCode=U_ZERO_ERROR;
- length=utrie_serialize(newTrie, p, capacity, NULL, TRUE, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genprops error: unable to serialize trie for additional properties: %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
- if(p!=NULL) {
- if(beVerbose) {
- printf("size in bytes of additional props trie:%5u\n", (int)length);
- }
- if(f!=NULL) {
- UTrie trie={ NULL };
- UTrie2 *trie2;
-
- utrie_unserialize(&trie, p, length, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "genprops error: failed to utrie_unserialize(trie for additional properties) - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
-
- /* use UTrie2 */
- trie2=utrie2_fromUTrie(&trie, trie.initialValue, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "genprops error: utrie2_fromUTrie() failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- {
- /* delete lead surrogate code unit values */
- UChar lead;
- trie2=utrie2_cloneAsThawed(trie2, &errorCode);
- for(lead=0xd800; lead<0xdc00; ++lead) {
- utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
- }
- utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "genbidi error: deleting lead surrogate code unit values failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- }
-
- usrc_writeUTrie2Arrays(f,
- "static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
- trie2,
- "\n};\n\n");
- usrc_writeUTrie2Struct(f,
- "static const UTrie2 propsVectorsTrie={\n",
- trie2, "propsVectorsTrie_index", NULL,
- "};\n\n");
-
- utrie2_close(trie2);
- }
-
- p+=length;
- capacity-=length;
-
- /* set indexes */
- indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
- indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+length/4;
- indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
- indexes[UPROPS_RESERVED_INDEX]=
- indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
-
- indexes[UPROPS_MAX_VALUES_INDEX]=
- (((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
- (((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
- (((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
- indexes[UPROPS_MAX_VALUES_2_INDEX]=
- (((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
- (((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
- (((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
- (((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
- ((int32_t)U_DT_COUNT-1);
- }
-
- if(p!=NULL && (pvCount*4)<=capacity) {
- if(f!=NULL) {
- usrc_writeArray(f,
- "static const uint32_t propsVectors[%ld]={\n",
- pvArray, 32, pvCount,
- "};\n\n");
- fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
- fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]);
- } else {
- uprv_memcpy(p, pvArray, pvCount*4);
- }
- if(beVerbose) {
- printf("number of additional props vectors: %5u\n", (int)pvRows);
- printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS);
- }
- }
- length+=pvCount*4;
-
- return length;
-}
diff --git a/tools/genprops/store.c b/tools/genprops/store.c
deleted file mode 100644
index 80464952..00000000
--- a/tools/genprops/store.c
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: store.c
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999dec11
-* created by: Markus W. Scherer
-*
-* Store Unicode character properties efficiently for
-* random access.
-*/
-
-#include <stdio.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "utrie.h"
-#include "unicode/udata.h"
-#include "unewdata.h"
-#include "writesrc.h"
-#include "uprops.h"
-#include "genprops.h"
-
-#define DO_DEBUG_OUT 0
-
-/* Unicode character properties file format ------------------------------------
-
-The file format prepared and written here contains several data
-structures that store indexes or data.
-
-Before the data contents described below, there are the headers required by
-the udata API for loading ICU data. Especially, a UDataInfo structure
-precedes the actual data. It contains platform properties values and the
-file format version.
-
-The following is a description of format version 5 .
-
-The format changes between version 3 and 4 because the properties related to
-case mappings and bidi/shaping are pulled out into separate files
-for modularization.
-In order to reduce the need for code changes, some of the previous data
-structures are omitted, rather than rearranging everything.
-
-For details see "Changes in format version 4" below.
-
-Format version 5 became necessary because the bit field for script codes
-overflowed. Several bit fields got rearranged, and three (Script, Block,
-Word_Break) got widened by one bit each.
-
-Data contents:
-
-The contents is a parsed, binary form of several Unicode character
-database files, most prominently UnicodeData.txt.
-
-Any Unicode code point from 0 to 0x10ffff can be looked up to get
-the properties, if any, for that code point. This means that the input
-to the lookup are 21-bit unsigned integers, with not all of the
-21-bit range used.
-
-It is assumed that client code keeps a uint32_t pointer
-to the beginning of the data:
-
- const uint32_t *p32;
-
-Formally, the file contains the following structures:
-
- const int32_t indexes[16] with values i0..i15:
-
- i0 indicates the length of the main trie.
- i0..i3 all have the same value in format version 4.0;
- the related props32[] and exceptions[] and uchars[] were used in format version 3
-
- i0 propsIndex; -- 32-bit unit index to the table of 32-bit properties words
- i1 exceptionsIndex; -- 32-bit unit index to the table of 32-bit exception words
- i2 exceptionsTopIndex; -- 32-bit unit index to the array of UChars for special mappings
-
- i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
- i4 additionalVectorsIndex; -- 32-bit unit index to the table of properties vectors
- i5 additionalVectorsColumns; -- number of 32-bit words per properties vector
-
- i6 reservedItemIndex; -- 32-bit unit index to the top of the properties vectors table
- i7..i9 reservedIndexes; -- reserved values; 0 for now
-
- i10 maxValues; -- maximum code values for vector word 0, see uprops.h (new in format version 3.1+)
- i11 maxValues2; -- maximum code values for vector word 2, see uprops.h (new in format version 3.2)
- i12..i15 reservedIndexes; -- reserved values; 0 for now
-
- PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
-
- P, E, and U are not used (empty) in format version 4
-
- P const uint32_t props32[i1-i0];
- E const uint32_t exceptions[i2-i1];
- U const UChar uchars[2*(i3-i2)];
-
- AT serialized trie for additional properties (byte size: 4*(i4-i3))
- PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
-
-Trie lookup and properties:
-
-In order to condense the data for the 21-bit code space, several properties of
-the Unicode code assignment are exploited:
-- The code space is sparse.
-- There are several 10k of consecutive codes with the same properties.
-- Characters and scripts are allocated in groups of 16 code points.
-- Inside blocks for scripts the properties are often repetitive.
-- The 21-bit space is not fully used for Unicode.
-
-The lookup of properties for a given code point is done with a trie lookup,
-using the UTrie implementation.
-The trie lookup result is a 16-bit properties word.
-
-With a given Unicode code point
-
- UChar32 c;
-
-and 0<=c<0x110000, the lookup is done like this:
-
- uint16_t props;
- UTRIE_GET16(trie, c, props);
-
-Each 16-bit properties word contains:
-
- 0.. 4 general category
- 5.. 7 numeric type
- non-digit numbers are stored with multiple types and pseudo-types
- in order to facilitate compact encoding:
- 0 no numeric value (0)
- 1 decimal digit value (0..9)
- 2 digit value (0..9)
- 3 (U_NT_NUMERIC) normal non-digit numeric value 0..0xff
- 4 (internal type UPROPS_NT_FRACTION) fraction
- 5 (internal type UPROPS_NT_LARGE) large number >0xff
- 6..7 reserved
-
- when returning the numeric type from a public API,
- internal types must be turned into U_NT_NUMERIC
-
- 8..15 numeric value
- encoding of fractions and large numbers see below
-
-Fractions:
- // n is the 8-bit numeric value from bits 8..15 of the trie word (shifted down)
- int32_t num, den;
- num=n>>3; // num=0..31
- den=(n&7)+2; // den=2..9
- if(num==0) {
- num=-1; // num=-1 or 1..31
- }
- double result=(double)num/(double)den;
-
-Large numbers:
- // n is the 8-bit numeric value from bits 8..15 of the trie word (shifted down)
- int32_t m, e;
- m=n>>4; // m=0..15
- e=(n&0xf);
- if(m==0) {
- m=1; // for large powers of 10
- e+=18; // e=18..33
- } else {
- e+=2; // e=2..17
- } // m==10..15 are reserved
- double result=(double)m*10^e;
-
---- Additional properties (new in format version 2.1) ---
-
-The second trie for additional properties (AT) is also a UTrie with 16-bit data.
-The data words consist of 32-bit unit indexes (not row indexes!) into the
-table of unique properties vectors (PV).
-Each vector contains a set of properties.
-The width of a vector (number of uint32_t per row) may change
-with the formatVersion, it is stored in i5.
-
-Current properties: see icu/source/common/uprops.h
-
---- Changes in format version 3.1 ---
-
-See i10 maxValues above, contains only UBLOCK_COUNT and USCRIPT_CODE_LIMIT.
-
---- Changes in format version 3.2 ---
-
-- The tries use linear Latin-1 ranges.
-- The additional properties bits store full properties XYZ instead
- of partial Other_XYZ, so that changes in the derivation formulas
- need not be tracked in runtime library code.
-- Joining Type and Line Break are also stored completely, so that uprops.c
- needs no runtime formulas for enumerated properties either.
-- Store the case-sensitive flag in the main properties word.
-- i10 also contains U_LB_COUNT and U_EA_COUNT.
-- i11 contains maxValues2 for vector word 2.
-
---- Changes in format version 4 ---
-
-The format changes between version 3 and 4 because the properties related to
-case mappings and bidi/shaping are pulled out into separate files
-for modularization.
-In order to reduce the need for code changes, some of the previous data
-structures are omitted, rather than rearranging everything.
-
-(The change to format version 4 is for ICU 3.4. The last CVS revision of
-genprops/store.c for format version 3.2 is 1.48.)
-
-The main trie's data is significantly simplified:
-- The trie's 16-bit data word is used directly instead of as an index
- into props32[].
-- The trie uses the default trie folding functions instead of custom ones.
-- Numeric values are stored directly in the trie data word, with special
- encodings.
-- No more exception data (the data that needed it was pulled out, or, in the
- case of numeric values, encoded differently).
-- No more string data (pulled out - was for case mappings).
-
-Also, some of the previously used properties vector bits are reserved again.
-
-The indexes[] values for the omitted structures are still filled in
-(indicating zero-length arrays) so that the swapper code remains unchanged.
-
---- Changes in format version 5 ---
-
-Rearranged bit fields in the second trie (AT) because the script code field
-overflowed. Old code would have seen nonsensically low values for new, higher
-script codes.
-Modified bit fields in icu/source/common/uprops.h
-
------------------------------------------------------------------------------ */
-
-/* UDataInfo cf. udata.h */
-static UDataInfo dataInfo={
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- U_SIZEOF_UCHAR,
- 0,
-
- { 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */
- { 5, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
- { 5, 1, 0, 0 } /* dataVersion */
-};
-
-static UNewTrie *pTrie=NULL;
-
-/* -------------------------------------------------------------------------- */
-
-extern void
-setUnicodeVersion(const char *v) {
- UVersionInfo version;
- u_versionFromString(version, v);
- uprv_memcpy(dataInfo.dataVersion, version, 4);
-}
-
-extern void
-initStore() {
- pTrie=utrie_open(NULL, NULL, 40000, 0, 0, TRUE);
- if(pTrie==NULL) {
- fprintf(stderr, "error: unable to create a UNewTrie\n");
- exit(U_MEMORY_ALLOCATION_ERROR);
- }
-
- initAdditionalProperties();
-}
-
-extern void
-exitStore() {
- utrie_close(pTrie);
- exitAdditionalProperties();
-}
-
-static uint32_t printNumericTypeValueError(Props *p) {
- fprintf(stderr, "genprops error: unable to encode numeric type & value %d %ld/%lu E%d\n",
- (int)p->numericType, (long)p->numericValue, (unsigned long)p->denominator, p->exponent);
- exit(U_ILLEGAL_ARGUMENT_ERROR);
- return 0;
-}
-
-/* store a character's properties ------------------------------------------- */
-
-extern uint32_t
-makeProps(Props *p) {
- uint32_t den;
- int32_t type, value, exp;
-
- /* encode numeric type & value */
- type=p->numericType;
- value=p->numericValue;
- den=p->denominator;
- exp=p->exponent;
-
- if(den!=0) {
- /* fraction */
- if( type!=U_NT_NUMERIC ||
- value<-1 || value==0 || value>UPROPS_FRACTION_MAX_NUM ||
- den<UPROPS_FRACTION_MIN_DEN || UPROPS_FRACTION_MAX_DEN<den ||
- exp!=0
- ) {
- return printNumericTypeValueError(p);
- }
- type=UPROPS_NT_FRACTION;
-
- if(value==-1) {
- value=0;
- }
- den-=UPROPS_FRACTION_DEN_OFFSET;
- value=(value<<UPROPS_FRACTION_NUM_SHIFT)|den;
- } else if(exp!=0) {
- /* very large value */
- if( type!=U_NT_NUMERIC ||
- value<1 || 9<value ||
- exp<UPROPS_LARGE_MIN_EXP || UPROPS_LARGE_MAX_EXP_EXTRA<exp
- ) {
- return printNumericTypeValueError(p);
- }
- type=UPROPS_NT_LARGE;
-
- if(exp<=UPROPS_LARGE_MAX_EXP) {
- /* 1..9 * 10^(2..17) */
- exp-=UPROPS_LARGE_EXP_OFFSET;
- } else {
- /* 1 * 10^(18..33) */
- if(value!=1) {
- return printNumericTypeValueError(p);
- }
- value=0;
- exp-=UPROPS_LARGE_EXP_OFFSET_EXTRA;
- }
- value=(value<<UPROPS_LARGE_MANT_SHIFT)|exp;
- } else if(value>UPROPS_MAX_SMALL_NUMBER) {
- /* large value */
- if(type!=U_NT_NUMERIC) {
- return printNumericTypeValueError(p);
- }
- type=UPROPS_NT_LARGE;
-
- /* split the value into mantissa and exponent, base 10 */
- while((value%10)==0) {
- value/=10;
- ++exp;
- }
- if(value>9) {
- return printNumericTypeValueError(p);
- }
-
- exp-=UPROPS_LARGE_EXP_OFFSET;
- value=(value<<UPROPS_LARGE_MANT_SHIFT)|exp;
- } else if(value<0) {
- /* unable to encode negative values, other than fractions -1/x */
- return printNumericTypeValueError(p);
-
- /* } else normal value=0..0xff { */
- }
-
- /* encode the properties */
- return
- (uint32_t)p->generalCategory |
- ((uint32_t)type<<UPROPS_NUMERIC_TYPE_SHIFT) |
- ((uint32_t)value<<UPROPS_NUMERIC_VALUE_SHIFT);
-}
-
-extern void
-addProps(uint32_t c, uint32_t x) {
- if(!utrie_set32(pTrie, (UChar32)c, x)) {
- fprintf(stderr, "error: too many entries for the properties trie\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-}
-
-extern uint32_t
-getProps(uint32_t c) {
- return utrie_get32(pTrie, (UChar32)c, NULL);
-}
-
-/* areas of same properties ------------------------------------------------- */
-
-extern void
-repeatProps(uint32_t first, uint32_t last, uint32_t x) {
- if(!utrie_setRange32(pTrie, (UChar32)first, (UChar32)(last+1), x, FALSE)) {
- fprintf(stderr, "error: too many entries for the properties trie\n");
- exit(U_BUFFER_OVERFLOW_ERROR);
- }
-}
-
-/* generate output data ----------------------------------------------------- */
-
-extern void
-generateData(const char *dataDir, UBool csource) {
- static int32_t indexes[UPROPS_INDEX_COUNT]={
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
- };
- static uint8_t trieBlock[40000];
- static uint8_t additionalProps[120000];
-
- UNewDataMemory *pData;
- UErrorCode errorCode=U_ZERO_ERROR;
- uint32_t size = 0;
- int32_t trieSize, additionalPropsSize, offset;
- long dataLength;
-
- trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
- exit(errorCode);
- }
-
- offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */
-
- /* round up trie size to 4-alignment */
- trieSize=(trieSize+3)&~3;
- offset+=trieSize>>2;
- indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */
- indexes[UPROPS_EXCEPTIONS_INDEX]= /* structures from the old format version 3 */
- indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */
- indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;
-
- if(beVerbose) {
- printf("trie size in bytes: %5u\n", (int)trieSize);
- }
-
- if(csource) {
- /* write .c file for hardcoded data */
- UTrie trie={ NULL };
- UTrie2 *trie2;
- FILE *f;
-
- utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "genprops error: failed to utrie_unserialize(uprops.icu main trie) - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
-
- /* use UTrie2 */
- dataInfo.formatVersion[0]=6;
- dataInfo.formatVersion[2]=0;
- dataInfo.formatVersion[3]=0;
- trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "genprops error: utrie2_fromUTrie() failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- {
- /* delete lead surrogate code unit values */
- UChar lead;
- trie2=utrie2_cloneAsThawed(trie2, &errorCode);
- for(lead=0xd800; lead<0xdc00; ++lead) {
- utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
- }
- utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(
- stderr,
- "genprops error: deleting lead surrogate code unit values failed - %s\n",
- u_errorName(errorCode));
- exit(errorCode);
- }
- }
-
- f=usrc_create(dataDir, "uchar_props_data.c");
- if(f!=NULL) {
- usrc_writeArray(f,
- "static const UVersionInfo formatVersion={",
- dataInfo.formatVersion, 8, 4,
- "};\n\n");
- usrc_writeArray(f,
- "static const UVersionInfo dataVersion={",
- dataInfo.dataVersion, 8, 4,
- "};\n\n");
- usrc_writeUTrie2Arrays(f,
- "static const uint16_t propsTrie_index[%ld]={\n", NULL,
- trie2,
- "\n};\n\n");
- usrc_writeUTrie2Struct(f,
- "static const UTrie2 propsTrie={\n",
- trie2, "propsTrie_index", NULL,
- "};\n\n");
-
- additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes);
- size=4*offset+additionalPropsSize; /* total size of data */
-
- usrc_writeArray(f,
- "static const int32_t indexes[UPROPS_INDEX_COUNT]={",
- indexes, 32, UPROPS_INDEX_COUNT,
- "};\n\n");
- fclose(f);
- }
- utrie2_close(trie2);
- } else {
- /* write the data */
- pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
- haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genprops: unable to create data memory, %s\n", u_errorName(errorCode));
- exit(errorCode);
- }
-
- additionalPropsSize=writeAdditionalData(NULL, additionalProps, sizeof(additionalProps), indexes);
- size=4*offset+additionalPropsSize; /* total size of data */
-
- udata_writeBlock(pData, indexes, sizeof(indexes));
- udata_writeBlock(pData, trieBlock, trieSize);
- udata_writeBlock(pData, additionalProps, additionalPropsSize);
-
- /* finish up */
- dataLength=udata_finish(pData, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "genprops: error %d writing the output file\n", errorCode);
- exit(errorCode);
- }
-
- if(dataLength!=(long)size) {
- fprintf(stderr, "genprops: data length %ld != calculated size %lu\n",
- dataLength, (unsigned long)size);
- exit(U_INTERNAL_PROGRAM_ERROR);
- }
- }
-
- if(beVerbose) {
- printf("data size: %6lu\n", (unsigned long)size);
- }
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/genrb/Makefile.in b/tools/genrb/Makefile.in
index 8c3b07eb..83a67db8 100644
--- a/tools/genrb/Makefile.in
+++ b/tools/genrb/Makefile.in
@@ -1,15 +1,8 @@
#################################################################################
-## Makefile.in for ICU - tools/genrb #
-## Copyright (c) 1999-2009, International Business Machines Corporation and #
-## others. All Rights Reserved. #
+## Makefile.in for ICU - tools/genrb #
+## Copyright (c) 1999-2009, International Business Machines Corporation and #
+## others. All Rights Reserved. #
#################################################################################
-#
-#
-# For Help with this Makefile and the procedures for generating ICU4J data, please see
-# "icu4j-readme.txt" in $icu4c_root/source/data
-#
-#
-
## Source directory information
srcdir = @srcdir@
@@ -50,39 +43,11 @@ DERB_OBJ = derb.o
DEPS = $(OBJECTS:.o=.d)
DERB_DEPS = $(DERB_OBJ:.o=.d)
-# build-icu4j variables
-GENDTJAR_ICUHOME=$(shell pwd)/../../..
-GENDTJAR_OPTS=--icu-root="$(GENDTJAR_ICUHOME)" --jar="$(GENDTJAR_JARHOME)" --icu4j-root="$(ICU4J_HOME)" --verbose
-GENDTJAR_TEMP=./temp/
-GENDTJAR=$(srcdir)/gendtjar.pl
-
-include Makefile.local
-ifeq (,$(ICU4J_HOME))
-ICU4J_HOME=$(shell pwd)/icu4j
-endif
-
-##Added by Brian Rower 6/25/08
-##if JAVA_HOME was not set, try to set it by finding jar
-ifeq (,$(JAVA_HOME))
-JAVA_HOME=$(shell dirname `which jar`)/..
-ifeq (,$(GENDTJAR_JARHOME))
-GENDTJAR_JARHOME=$(shell dirname `which jar`)
-endif
-endif
-
-##if GENDTJAR_JARHOME (the directory that "jar" lives in) is not set
-##set it equal to $JAVA_HOME/bin
-ifeq (,$(GENDTJAR_JARHOME))
-GENDTJAR_JARHOME=""$(JAVA_HOME)"/bin"
-endif
-
-
-
## List of phony targets
.PHONY : all all-local install install-local clean clean-local \
-distclean distclean-local dist dist-local check check-local install-man \
-build-icu4j
+distclean distclean-local dist dist-local check check-local install-man
## Clear suffix list
.SUFFIXES :
@@ -110,7 +75,7 @@ dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
- $(RMV) $(TARGET) $(DERB) $(OBJECTS) $(DERB_OBJ) $(GENDTJAR_TEMP)
+ $(RMV) $(TARGET) $(DERB) $(OBJECTS) $(DERB_OBJ)
distclean-local: clean-local
$(RMV) Makefile
@@ -129,23 +94,6 @@ $(DERB) : $(DERB_OBJ)
$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
$(POST_BUILD_STEP)
-#$(GENDTJAR_JARHOME)/jar:
-# @echo $@ not found - make sure GENDTJAR_JARHOME is set. See Makefile.in
-# exit 1
-
-$(ICU4J_HOME)/build.xml:
- @echo warning: $@ not found - make sure ICU4J_HOME is set.
-
-
-#Removed GENDTJAR_JARHOME dependancy. If the file path happens to contain a space in it,
-#having it as a target will cause it to error out when it thinks that there are TWO targets. - Brian Rower 6/25/08
-build-icu4j: $(GENDTJAR) $(ICU4J_HOME)/build.xml #$(GENDTJAR_JARHOME)/jar
- # clean up old temp files
- -$(RMV) $(GENDTJAR_TEMP)
- perl $(GENDTJAR) $(GENDTJAR_OPTS)
- @echo Finished building to $(ICU4J_HOME)
-
-
# This line is needed to serialize builds when the gmake -j option is used.
$(TARGET_STUB_NAME).$(SECTION): $(DERB_STUB_NAME).$(SECTION)
@@ -162,4 +110,3 @@ ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif
-
diff --git a/tools/genrb/derb.c b/tools/genrb/derb.c
index 7231d9ad..55c3316a 100644
--- a/tools/genrb/derb.c
+++ b/tools/genrb/derb.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2008, International Business Machines
+* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -467,58 +467,11 @@ static void printHex(FILE *out, UConverter *converter, uint8_t what) {
printString(out, converter, hex, (int32_t)(sizeof(hex)/sizeof(*hex)));
}
-static const UChar *
-derb_getString(const ResourceData *pResData, const Resource res, int32_t *pLength) {
- if(res!=RES_BOGUS) {
- int32_t *p=(int32_t *)RES_GET_POINTER(pResData->pRoot, res);
- if (pLength) {
- *pLength=*p;
- }
- return (UChar *)++p;
- } else {
- if (pLength) {
- *pLength=0;
- }
- return NULL;
- }
-}
-
-static const char *
-derb_getTableKey(const Resource *pRoot, const Resource res, uint16_t indexS) {
- uint16_t *p=(uint16_t *)RES_GET_POINTER(pRoot, res);
- if(indexS<*p) {
- return ((const char *)(pRoot)+(p[indexS+1])); /*RES_GET_KEY(pRoot, p[indexS+1]);*/
- } else {
- return NULL; /* indexS>itemCount */
- }
-}
-
-static Resource
-derb_getArrayItem(Resource *pRoot, Resource res, int32_t indexR) {
- int32_t *p=(int32_t *)RES_GET_POINTER(pRoot, res);
- if(indexR<*p) {
- return ((Resource *)(p))[1+indexR];
- } else {
- return RES_BOGUS; /* indexR>itemCount */
- }
-}
-
-static Resource
-derb_getTableItem(const Resource *pRoot, const Resource res, uint16_t indexR) {
- uint16_t *p=(uint16_t *)RES_GET_POINTER(pRoot, res);
- uint16_t count=*p;
- if(indexR<count) {
- return ((Resource *)(p+1+count+(~count&1)))[indexR];
- } else {
- return RES_BOGUS; /* indexR>itemCount */
- }
-}
-
static void printOutAlias(FILE *out, UConverter *converter, UResourceBundle *parent, Resource r, const char *key, int32_t indent, const char *pname, UErrorCode *status) {
static const UChar cr[] = { '\n' };
int32_t len = 0;
- const UChar* thestr = derb_getString(&(parent->fResData), r, &len);
+ const UChar* thestr = res_getAlias(&(parent->fResData), r, &len);
UChar *string = quotedString(thestr);
if(trunc && len > truncsize) {
char msg[128];
@@ -557,7 +510,7 @@ static void printOutBundle(FILE *out, UConverter *converter, UResourceBundle *re
const char *key = ures_getKey(resource);
switch(ures_getType(resource)) {
- case RES_STRING :
+ case URES_STRING :
{
int32_t len=0;
const UChar* thestr = ures_getString(resource, &len, status);
@@ -598,7 +551,7 @@ static void printOutBundle(FILE *out, UConverter *converter, UResourceBundle *re
}
break;
- case RES_INT :
+ case URES_INT :
{
static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0020, 0x007B, 0x0020 }; /* ":int { " */
static const UChar closeStr[] = { 0x0020, 0x007D }; /* " }" */
@@ -619,7 +572,7 @@ static void printOutBundle(FILE *out, UConverter *converter, UResourceBundle *re
printString(out, converter, cr, (int32_t)(sizeof(cr) / sizeof(*cr)));
break;
}
- case RES_BINARY :
+ case URES_BINARY :
{
int32_t len = 0;
const int8_t *data = (const int8_t *)ures_getBinary(resource, &len, status);
@@ -652,7 +605,7 @@ static void printOutBundle(FILE *out, UConverter *converter, UResourceBundle *re
}
}
break;
- case RES_INT_VECTOR :
+ case URES_INT_VECTOR :
{
int32_t len = 0;
const int32_t *data = ures_getIntVector(resource, &len, status);
@@ -687,8 +640,8 @@ static void printOutBundle(FILE *out, UConverter *converter, UResourceBundle *re
}
}
break;
- case RES_TABLE :
- case RES_ARRAY :
+ case URES_TABLE :
+ case URES_ARRAY :
{
static const UChar openStr[] = { 0x007B }; /* "{" */
static const UChar closeStr[] = { 0x007D, '\n' }; /* "}\n" */
@@ -701,7 +654,7 @@ static void printOutBundle(FILE *out, UConverter *converter, UResourceBundle *re
}
printString(out, converter, openStr, (int32_t)(sizeof(openStr) / sizeof(*openStr)));
if(verbose) {
- if(ures_getType(resource) == RES_TABLE) {
+ if(ures_getType(resource) == URES_TABLE) {
printCString(out, converter, "// TABLE", -1);
} else {
printCString(out, converter, "// ARRAY", -1);
@@ -720,17 +673,18 @@ static void printOutBundle(FILE *out, UConverter *converter, UResourceBundle *re
}
}
} else { /* we have to use low level access to do this */
- Resource r = RES_BOGUS;
- for(i = 0; i < ures_getSize(resource); i++) {
+ Resource r;
+ int32_t resSize = ures_getSize(resource);
+ UBool isTable = (UBool)(ures_getType(resource) == URES_TABLE);
+ for(i = 0; i < resSize; i++) {
/* need to know if it's an alias */
- if(ures_getType(resource) == RES_TABLE) {
- r = derb_getTableItem(resource->fResData.pRoot, resource->fRes, (int16_t)i);
- key = derb_getTableKey(resource->fResData.pRoot, resource->fRes, (int16_t)i);
+ if(isTable) {
+ r = res_getTableItemByIndex(&resource->fResData, resource->fRes, i, &key);
} else {
- r = derb_getArrayItem(resource->fResData.pRoot, resource->fRes, i);
+ r = res_getArrayItem(&resource->fResData, resource->fRes, i);
}
if(U_SUCCESS(*status)) {
- if(RES_GET_TYPE(r) == RES_ALIAS) {
+ if(res_getPublicType(r) == URES_ALIAS) {
printOutAlias(out, converter, resource, r, key, indent+indentsize, pname, status);
} else {
t = ures_getByIndex(resource, i, t, status);
diff --git a/tools/genrb/gendtjar.pl b/tools/genrb/gendtjar.pl
deleted file mode 100755
index 741e1f67..00000000
--- a/tools/genrb/gendtjar.pl
+++ /dev/null
@@ -1,391 +0,0 @@
-#!/usr/bin/perl
-# ********************************************************************
-# * COPYRIGHT:
-# * Copyright (c) 2002-2008, International Business Machines Corporation and
-# * others. All Rights Reserved.
-# ********************************************************************
-
-# Script to generate the icudata.jar and testdata.jar files. This file is
-# part of icu4j. It is checked into CVS. It is generated from
-# locale data in the icu4c project. See usage() notes (below)
-# for more information.
-
-# This script requires perl. For Win32, I recommend www.activestate.com.
-
-# Ram Viswanadha
-# copied heavily from genrbjar.pl
-#
-# 6/25/08 - Modified to better handle cygwin paths - Brian Rower
-#
-use File::Find;
-use File::Basename;
-use IO::File;
-use Cwd;
-use File::Copy;
-use Getopt::Long;
-use File::Path;
-use File::Copy;
-use Cwd;
-use Cwd 'abs_path';
-
-main();
-
-#------------------------------------------------------------------
-sub main(){
-
- GetOptions(
- "--icu-root=s" => \$icuRootDir,
- "--jar=s" => \$jarDir,
- "--icu4j-root=s" => \$icu4jDir,
- "--version=s" => \$version,
- "--verbose" => \$verbose,
- "--help" => \$help
- );
- $cwd = abs_path(getcwd);
-
- if($help){
- usage();
- }
- unless (defined $icuRootDir){
- $icuRootDir =abs_path($cwd."/../../..");
- }
- unless (defined $icu4jDir){
- $icu4jDir =abs_path($icuRootDir."/../icu4j");
- }
- unless (defined $jarDir){
- if(defined $ENV{'JAVA_HOME'}){
- $jarDir=$ENV{'JAVA_HOME'}."/bin";
- }else{
- print("ERROR: JAVA_HOME enviroment variable undefined and --jar argument not specifed.\n");
- usage();
- }
- }
-
- $platform = getPlatform();
- $icuBinDir = $icuRootDir;
-
- $path=$ENV{'PATH'};
-
- if(($platform eq "cygwin") || ($platform eq "linux")){
- $icuBinDir .= "/source/bin";
- $icuLibDir = abs_path($icuBinDir."/../lib");
- $path .=":$icuBinDir:$icuLibDir";
-
- $libpath = $ENV{'LD_LIBRARY_PATH'}.":$icuLibDir";
- $ENV{'LD_LIBRARY_PATH'} = $libpath;
-
- #print ("##### LD_LIBRARY_PATH = $ENV{'LD_LIBRARY_PATH'}\n");
-
- }elsif($platform eq "aix"){
-
- $icuBinDir .= "/source/bin";
- $icuLibDir = abs_path($icuBinDir."/../lib");
- $path .=":$icuBinDir:$icuLibDir";
-
- $libpath = $ENV{'LIBPATH'}.":$icuLibDir";
- $ENV{'LIBPATH'} = $libpath;
- #print ("##### LIBPATH = $ENV{'LIBPATH'}\n");
- }elsif($platform eq "darwin"){
- $icuBinDir .= "/source/bin";
- $icuLibDir = abs_path($icuBinDir."/../lib");
- $path .=":$icuBinDir:$icuLibDir";
-
- $libpath = $ENV{'DYLD_LIBRARY_PATH'}.":$icuLibDir";
- $ENV{'DYLD_LIBRARY_PATH'} = $libpath;
-
- }elsif($platform eq "MSWin32"){
- $icuBinDir =$icuRootDir."/bin";
- $path .=$icuBinDir;
-
- }
- $ENV{'PATH'} = $path;
- #print ("##### PATH = $ENV{'PATH'}\n");
- # TODO add more platforms and test on Linux and Unix
-
- $icuBuildDir =$icuRootDir."/source/data/out/build";
- $icuTestDataSrcDir =$icuRootDir."/source/test/testdata/";
- $icuTestDataDir =$icuRootDir."/source/test/testdata/out/build/";
-
- # now build ICU
- buildICU($platform, $icuRootDir, $icuTestDataDir, $verbose);
-
- #figure out the version and endianess
- unless (defined $version){
- ($version, $endian) = getVersion();
- #print "#################### $version, $endian ######\n";
- }
-
- $icupkg = $icuBinDir."/icupkg -tb";
- $tempDir = $cwd."/temp";
- $version =~ s/\.//;
- $icu4jImpl = "com/ibm/icu/impl/data/";
- $icu4jDataDir = $icu4jImpl."icudt".$version."b";
- $icu4jDevDataDir = "com/ibm/icu/dev/data/";
- $icu4jTestDataDir = "$icu4jDevDataDir/testdata";
-
- $icuDataDir =$icuBuildDir."/icudt".$version.checkPlatformEndianess();
-
- #remove the stale directories
- unlink($tempDir);
-
- convertData($icuDataDir, $icupkg, $tempDir, $icu4jDataDir, $verbose);
- #convertData($icuDataDir."/coll/", $icupkg, $tempDir, $icu4jDataDir."/coll");
- createJar("\"$jarDir/jar\"", "icudata.jar", $tempDir, $icu4jDataDir, $verbose);
-
- convertTestData($icuTestDataDir, $icupkg, $tempDir, $icu4jTestDataDir, $verbose);
- createJar("\"$jarDir/jar\"", "testdata.jar", $tempDir, $icu4jTestDataDir, $verbose);
- copyData($icu4jDir, $icu4jImpl, $icu4jDevDataDir, $tempDir, $verbose);
-}
-
-#-----------------------------------------------------------------------
-sub buildICU{
- local($platform, $icuRootDir, $icuTestDataDir, $verbose) = @_;
- $icuSrcDir = $icuRootDir."/source";
- $icuSrcDataDir = $icuSrcDir."/data";
-
- chdir($icuSrcDir);
- # clean the data directories
- unlink($icuBuildDir."../");
- unlink($icuTestDataDir."../");
-
- if(($platform eq "cygwin")||($platform eq "darwin")||($platform eq "linux")){
-
- # make all in ICU
- cmd("make all", $verbose);
- chdir($icuSrcDataDir);
- cmd("make uni-core-data", $verbose);
- if(chdir($icuTestDataSrcDir)){
- print("Invoking make in directory $icuTestDataSrcDir\n");
- cmd("make JAVA_OUT_DIR=\"$icu4jDir/src/com/ibm/icu/dev/test/util/\" all java-output", $verbose);
- }else{
- die "Could not cd to $icuTestDataSrcDir\n";
- }
- }elsif($platform eq "aix"){
- # make all in ICU
- cmd("gmake all", $verbose);
- chdir($icuSrcDataDir);
- cmd("gmake uni-core-data", $verbose);
- chdir($icuTestDataDir."../../");
- cmd("gmake JAVA_OUT_DIR=\"$icu4jDir/src/com/ibm/icu/dev/test/util/\" all java-output", $verbose);
- }elsif($platform eq "MSWin32"){
- #devenv.com $projectFileName \/build $configurationName > \"$cLogFile\" 2>&1
- cmd("devenv.com allinone/allinone.sln /useenv /build Debug", $verbose);
- # build required data. this is required coz building icu will not build all the data
- chdir($icuSrcDataDir);
- cmd("NMAKE /f makedata.mak ICUMAKE=\"$icuSrcDataDir\" CFG=debug uni-core-data", $verbose);
- print "WARNING: Don't know how to build java-output on $platform. \n";
- }else{
- print "ERROR: Could not build ICU unknown platform $platform. \n";
- exit(-1);
- }
-
- chdir($cwd);
-}
-#-----------------------------------------------------------------------
-sub getVersion{
- my @list;
- opendir(DIR,$icuBuildDir);
-
- @list = readdir(DIR);
- closedir(DIR);
-
- if(scalar(@list)>3){
- print("ERROR: More than 1 directory in build. Can't decide the version");
- exit(-1);
- }
- foreach $item (@list){
- next if($item eq "." || $item eq "..");
- my ($ver, $end) =$item =~ m/icudt(.*)(l|b|e)$/;
- return $ver,$end;
- }
-}
-
-#-----------------------------------------------------------------------
-sub getPlatform{
- $platform = $^O;
- return $platform;
-}
-#-----------------------------------------------------------------------
-sub createJar{
- local($jar, $jarFile, $tempDir, $dirToJar, $verbose) = @_;
- chdir($tempDir);
- $command="";
- print "INFO: Creating $jarFile\n";
- if($platform eq "cygwin") {
- #make sure the given path is a cygwin path not a windows path
- $jar = `cygpath -au $jar`;
- chop($jar);
-
- #added by Brian Rower 6/25/08
- #The following code deals with spaces in the path
- if(index($jar, "/ ") > 0)
- {
- $jar =~ s/[\/]\s/\\ /g;
- }
- elsif(index($jar, " ") > 0)
- {
- $jar =~ s/\s/\\ /g;
- }
- $tempDir = `cygpath -aw $tempDir`;
- chop($tempDir);
- $tempDir =~ s/\\/\\\\/g;
- }
- if(defined $verbose){
- $command = "$jar cvf $jarFile -C $tempDir $dirToJar";
- }else{
- $command = "$jar cf $jarFile -C $tempDir $dirToJar";
- }
- cmd($command, $verbose);
-}
-#-----------------------------------------------------------------------
-sub checkPlatformEndianess {
- my $is_big_endian = unpack("h*", pack("s", 1)) =~ /01/;
- if ($is_big_endian) {
- return "b";
- }else{
- return "l";
- }
-}
-#-----------------------------------------------------------------------
-sub copyData{
- local($icu4jDir, $icu4jImpl, $icu4jDevDataDir, $tempDir) =@_;
- print("INFO: Copying $tempDir/icudata.jar to $icu4jDir/src/$icu4jImpl\n");
- mkpath("$icu4jDir/src/$icu4jImpl");
- copy("$tempDir/icudata.jar", "$icu4jDir/src/$icu4jImpl");
- print("INFO: Copying $tempDir/testdata.jar $icu4jDir/src/$icu4jDevDataDir\n");
- mkpath("$icu4jDir/src/$icu4jDevDataDir");
- copy("$tempDir/testdata.jar","$icu4jDir/src/$icu4jDevDataDir");
-}
-#-----------------------------------------------------------------------
-sub convertData{
- local($icuDataDir, $icupkg, $tempDir, $icu4jDataDir) =@_;
- my $dir = $tempDir."/".$icu4jDataDir;
- # create the temp directory
- mkpath($dir) ;
- # cd to the temp directory
- chdir($tempDir);
- my $endian = checkPlatformEndianess();
- my @list;
- opendir(DIR,$icuDataDir);
- #print $icuDataDir;
- @list = readdir(DIR);
- closedir(DIR);
- my $op = $icupkg;
- #print "####### $endian ############\n";
- if($endian eq "l"){
- print "INFO: {Command: $op $icuDataDir/*.*}\n";
- }else{
- print "INFO: {Command: copy($icuDataDir/*.*, $tempDir/$icu4jDataDir/*)}\n";
- }
-
- $i=0;
- # now convert
- foreach $item (@list){
- next if($item eq "." || $item eq "..");
- # next if($item =~ /^t_.*$\.res/ ||$item =~ /^translit_.*$\.res/ ||
- # $item=~/$\.crs/ || $item=~ /$\.txt/ ||
- # $item=~/icudata\.res/ || $item=~/$\.exp/ || $item=~/$\.lib/ ||
- # $item=~/$\.obj/ || $item=~/$\.lst/);
- next if($item =~ /^t_.*$\.res/ ||$item =~ /^translit_.*$\.res/ ||
- $item=~/$\.crs/ || $item=~ /$\.txt/ ||
- $item=~/icudata\.res/ || $item=~/$\.exp/ || $item=~/$\.lib/ || $item=~/$\.obj/ ||
- $item=~/$\.lst/);
- if(-d "$icuDataDir/$item"){
- convertData("$icuDataDir/$item/", $icupkg, $tempDir, "$icu4jDataDir/$item/");
- next;
- }
- if($endian eq "l"){
- $command = $icupkg." $icuDataDir/$item $tempDir/$icu4jDataDir/$item";
- cmd($command, $verbose);
- }else{
- $rc = copy("$icuDataDir/$item", "$tempDir/$icu4jDataDir/$item");
- if($rc==1){
- #die "ERROR: Could not copy $icuDataDir/$item to $tempDir/$icu4jDataDir/$item, $!";
- }
- }
-
- }
- chdir("..");
- print "INFO: DONE\n";
-}
-#-----------------------------------------------------------------------
-sub convertTestData{
- local($icuDataDir, $icupkg, $tempDir, $icu4jDataDir) =@_;
- my $dir = $tempDir."/".$icu4jDataDir;
- # create the temp directory
- mkpath($dir);
- # cd to the temp directory
- chdir($tempDir);
- my $op = $icupkg;
- print "INFO: {Command: $op $icuDataDir/*.*}\n";
- my @list;
- opendir(DIR,$icuDataDir) or die "ERROR: Could not open the $icuDataDir directory for reading $!";
- #print $icuDataDir;
- @list = readdir(DIR);
- closedir(DIR);
- my $endian = checkPlatformEndianess();
- $i=0;
- # now convert
- foreach $item (@list){
- next if($item eq "." || $item eq "..");
- next if( item=~/$\.crs/ || $item=~ /$\.txt/ ||
- $item=~/$\.exp/ || $item=~/$\.lib/ || $item=~/$\.obj/ ||
- $item=~/$\.mak/ || $item=~/test\.icu/ || $item=~/$\.lst/);
- $file = $item;
- $file =~ s/testdata_//g;
- if($endian eq "l"){
- $command = "$icupkg $icuDataDir/$item $tempDir/$icu4jDataDir/$file";
- cmd($command, $verbose);
- }else{
- #print("Copying $icuDataDir/$item $tempDir/$icu4jDataDir/$file\n");
- copy("$icuDataDir/$item", "$tempDir/$icu4jDataDir/$file");
- }
-
-
- }
- chdir("..");
- print "INFO: DONE\n";
-}
-#------------------------------------------------------------------------------------------------
-sub cmd {
- my $cmd = shift;
- my $verbose = shift;
- my $prompt = shift;
-
- $prompt = "Command: $cmd.." unless ($prompt);
- if(defined $verbose){
- print $prompt."\n";
- }
- system($cmd);
- my $exit_value = $? >> 8;
- #my $signal_num = $? & 127;
- #my $dumped_core = $? & 128;
- if ($exit_value == 0) {
- if(defined $verbose){
- print "ok\n";
- }
- } else {
- ++$errCount;
- print "ERROR: Execution of $prompt returned ($exit_value)\n";
- exit(1);
- }
-}
-#-----------------------------------------------------------------------
-sub usage {
- print << "END";
-Usage:
-gendtjar.pl
-Options:
- --icu-root=<directory where icu4c lives>
- --jar=<directory where jar.exe lives>
- --icu4j-root=<directory>
- --version=<ICU4C version>
- --verbose
- --help
-e.g:
-gendtjar.pl --icu-root=\\work\\icu --jar=\\jdk1.4.1\\bin --icu4j-root=\\work\\icu4j --version=3.0
-END
- exit(0);
-}
-
-
diff --git a/tools/genrb/genrb.c b/tools/genrb/genrb.c
index 2cdba01f..16857598 100644
--- a/tools/genrb/genrb.c
+++ b/tools/genrb/genrb.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1998-2008, International Business Machines
+* Copyright (C) 1998-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -19,6 +19,8 @@
#include "genrb.h"
#include "unicode/uclean.h"
+#include "ucmndata.h" /* TODO: for reading the pool bundle */
+
/* Protos */
static void processFile(const char *filename, const char* cp, const char *inputDir, const char *outputDir, const char *packageName, UErrorCode *status);
static char *make_res_filename(const char *filename, const char *outputDir,
@@ -54,7 +56,10 @@ enum
NO_BINARY_COLLATION,
/*added by Jing*/
LANGUAGE,
- NO_COLLATION_RULES
+ NO_COLLATION_RULES,
+ FORMAT_VERSION,
+ WRITE_POOL_BUNDLE,
+ USE_POOL_BUNDLE
};
UOption options[]={
@@ -71,11 +76,14 @@ UOption options[]={
UOPTION_COPYRIGHT,
/* UOPTION_PACKAGE_NAME, This option is deprecated and should not be used ever. */
UOPTION_BUNDLE_NAME,
- UOPTION_DEF( "write-xliff", 'x', UOPT_OPTIONAL_ARG),
- UOPTION_DEF( "strict", 'k', UOPT_NO_ARG), /* 14 */
- UOPTION_DEF( "noBinaryCollation", 'C', UOPT_NO_ARG),/* 15 */
- UOPTION_DEF( "language", 'l', UOPT_REQUIRES_ARG), /* 16 */
- UOPTION_DEF( "omitCollationRules", 'R', UOPT_NO_ARG),/* 17 */
+ UOPTION_DEF("write-xliff", 'x', UOPT_OPTIONAL_ARG),
+ UOPTION_DEF("strict", 'k', UOPT_NO_ARG), /* 14 */
+ UOPTION_DEF("noBinaryCollation", 'C', UOPT_NO_ARG),/* 15 */
+ UOPTION_DEF("language", 'l', UOPT_REQUIRES_ARG), /* 16 */
+ UOPTION_DEF("omitCollationRules", 'R', UOPT_NO_ARG),/* 17 */
+ UOPTION_DEF("formatVersion", '\x01', UOPT_REQUIRES_ARG),/* 18 */
+ UOPTION_DEF("writePoolBundle", '\x01', UOPT_NO_ARG),/* 19 */
+ UOPTION_DEF("usePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 20 */
};
static UBool write_java = FALSE;
@@ -83,6 +91,20 @@ static UBool write_xliff = FALSE;
static const char* outputEnc ="";
static const char* gPackageName=NULL;
static const char* bundleName=NULL;
+static struct SRBRoot *newPoolBundle = NULL;
+
+/* TODO: separate header file for ResFile? */
+typedef struct ResFile {
+ uint8_t *fBytes;
+ const int32_t *fIndexes;
+ const char *fKeys;
+ int32_t fKeysLength;
+ int32_t fKeysCount;
+ int32_t fChecksum;
+} ResFile;
+
+static ResFile poolBundle = { NULL };
+
/*added by Jing*/
static const char* language = NULL;
static const char* xliffOutputFileName = NULL;
@@ -107,6 +129,24 @@ main(int argc,
} else if(argc<2) {
argc = -1;
}
+ if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) {
+ fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]);
+ argc = -1;
+ }
+ if(options[FORMAT_VERSION].doesOccur) {
+ const char *s = options[FORMAT_VERSION].value;
+ if(uprv_strlen(s) != 1 || (s[0] != '1' && s[0] != '2')) {
+ fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s);
+ argc = -1;
+ } else if(s[0] == '1' &&
+ (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur)
+ ) {
+ fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]);
+ argc = -1;
+ } else {
+ setFormatVersion(s[0] - '0');
+ }
+ }
if(options[VERSION].doesOccur) {
fprintf(stderr,
@@ -118,7 +158,7 @@ main(int argc,
if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
/*
- * Broken into chucks because the C89 standard says the minimum
+ * Broken into chunks because the C89 standard says the minimum
* required supported string length is 509 bytes.
*/
fprintf(stderr,
@@ -161,6 +201,14 @@ main(int argc,
"\t-R or --omitCollationRules do not include collation (tailoring) rules;\n"
"\t makes .res file smaller and maintains collator instantiation speed\n"
"\t but tailoring rules will not be available (they are rarely used)\n");
+ fprintf(stderr,
+ "\t --formatVersion write a .res file compatible with the requested formatVersion (single digit);\n"
+ "\t for example, --formatVersion 1\n");
+ fprintf(stderr,
+ "\t --writePoolBundle write a pool.res file with all of the keys of all input bundles\n"
+ "\t --usePoolBundle [path-to-pool.res] point to keys from the pool.res keys pool bundle if they are available there;\n"
+ "\t makes .res files smaller but dependent on the pool bundle\n"
+ "\t (--writePoolBundle and --usePoolBundle cannot be combined)\n");
return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
@@ -241,6 +289,118 @@ main(int argc,
language = options[LANGUAGE].value;
}
+ if(options[WRITE_POOL_BUNDLE].doesOccur) {
+ newPoolBundle = bundle_open(NULL, TRUE, &status);
+ if(U_FAILURE(status)) {
+ fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status));
+ return status;
+ } else {
+ const char *poolResName = "pool.res";
+ char *nameWithoutSuffix = uprv_malloc(uprv_strlen(poolResName) + 1);
+ if (nameWithoutSuffix == NULL) {
+ fprintf(stderr, "out of memory error\n");
+ return U_MEMORY_ALLOCATION_ERROR;
+ }
+ uprv_strcpy(nameWithoutSuffix, poolResName);
+ *uprv_strrchr(nameWithoutSuffix, '.') = 0;
+ newPoolBundle->fLocale = nameWithoutSuffix;
+ }
+ }
+
+ if(options[USE_POOL_BUNDLE].doesOccur) {
+ const char *poolResName = "pool.res";
+ FileStream *poolFile;
+ int32_t poolFileSize;
+ int32_t indexLength;
+ /*
+ * TODO: Consolidate inputDir/filename handling from main() and processFile()
+ * into a common function, and use it here as well.
+ * Try to create toolutil functions for dealing with dir/filenames and
+ * loading ICU data files without udata_open().
+ * Share code with icupkg?
+ * Also, make_res_filename() seems to be unused. Review and remove.
+ */
+ if (options[USE_POOL_BUNDLE].value!=NULL) {
+ uprv_strcpy(theCurrentFileName, options[USE_POOL_BUNDLE].value);
+ uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
+ } else if (inputDir) {
+ uprv_strcpy(theCurrentFileName, inputDir);
+ uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
+ } else {
+ *theCurrentFileName = 0;
+ }
+ uprv_strcat(theCurrentFileName, poolResName);
+ poolFile = T_FileStream_open(theCurrentFileName, "rb");
+ if (poolFile == NULL) {
+ fprintf(stderr, "unable to open pool bundle file %s\n", theCurrentFileName);
+ return 1;
+ }
+ poolFileSize = T_FileStream_size(poolFile);
+ if (poolFileSize < 32) {
+ fprintf(stderr, "the pool bundle file %s is too small\n", theCurrentFileName);
+ return 1;
+ }
+ poolBundle.fBytes = (uint8_t *)uprv_malloc((poolFileSize + 15) & ~15);
+ if (poolFileSize > 0 && poolBundle.fBytes == NULL) {
+ fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", theCurrentFileName);
+ return U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ UDataSwapper *ds;
+ const DataHeader *header;
+ int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize);
+ int32_t keysBottom;
+ if (bytesRead != poolFileSize) {
+ fprintf(stderr, "unable to read the pool bundle file %s\n", theCurrentFileName);
+ return 1;
+ }
+ /*
+ * Swap the pool bundle so that a single checked-in file can be used.
+ * The swapper functions also test that the data looks like
+ * a well-formed .res file.
+ */
+ ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead,
+ U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n",
+ theCurrentFileName, u_errorName(status));
+ return status;
+ }
+ ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status);
+ udata_closeSwapper(ds);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n",
+ theCurrentFileName, u_errorName(status));
+ return status;
+ }
+ header = (const DataHeader *)poolBundle.fBytes;
+ if (header->info.formatVersion[0]!=2) {
+ fprintf(stderr, "invalid format of pool bundle file %s\n", theCurrentFileName);
+ return U_INVALID_FORMAT_ERROR;
+ }
+ poolBundle.fKeys = (const char *)header + header->dataHeader.headerSize;
+ poolBundle.fIndexes = (const int32_t *)poolBundle.fKeys + 1;
+ indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff;
+ if (indexLength <= URES_INDEX_POOL_CHECKSUM) {
+ fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", theCurrentFileName);
+ return U_INVALID_FORMAT_ERROR;
+ }
+ keysBottom = (1 + indexLength) * 4;
+ poolBundle.fKeys += keysBottom;
+ poolBundle.fKeysLength = (poolBundle.fIndexes[URES_INDEX_KEYS_TOP] * 4) - keysBottom;
+ poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM];
+ }
+ for (i = 0; i < poolBundle.fKeysLength; ++i) {
+ if (poolBundle.fKeys[i] == 0) {
+ ++poolBundle.fKeysCount;
+ }
+ }
+ T_FileStream_close(poolFile);
+ setUsePoolBundle(TRUE);
+ }
+
+ if((argc-1)!=1) {
+ printf("genrb number of files: %d\n", argc - 1);
+ }
/* generate the binary files */
for(i = 1; i < argc; ++i) {
status = U_ZERO_ERROR;
@@ -260,8 +420,19 @@ main(int argc,
processFile(arg, encoding, inputDir, outputDir, gPackageName, &status);
}
+ uprv_free(poolBundle.fBytes);
+
+ if(options[WRITE_POOL_BUNDLE].doesOccur) {
+ char outputFileName[256];
+ bundle_write(newPoolBundle, outputDir, NULL, outputFileName, sizeof(outputFileName), &status);
+ bundle_close(newPoolBundle, &status);
+ if(U_FAILURE(status)) {
+ fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status));
+ }
+ }
+
/* Dont return warnings as a failure */
- if (! U_FAILURE(status)) {
+ if (U_SUCCESS(status)) {
return 0;
}
@@ -383,6 +554,31 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
fprintf(stderr, "couldn't parse the file %s. Error:%s\n", filename,u_errorName(*status));
goto finish;
}
+ if(options[WRITE_POOL_BUNDLE].doesOccur) {
+ int32_t newKeysLength;
+ const char *newKeys, *newKeysLimit;
+ bundle_compactKeys(data, status);
+ newKeys = bundle_getKeyBytes(data, &newKeysLength);
+ bundle_addKeyBytes(newPoolBundle, newKeys, newKeysLength, status);
+ if(U_FAILURE(*status)) {
+ fprintf(stderr, "bundle_compactKeys(%s) or bundle_getKeyBytes() failed: %s\n",
+ filename, u_errorName(*status));
+ goto finish;
+ }
+ /* count the number of just-added key strings */
+ for(newKeysLimit = newKeys + newKeysLength; newKeys < newKeysLimit; ++newKeys) {
+ if(*newKeys == 0) {
+ ++newPoolBundle->fKeysCount;
+ }
+ }
+ }
+
+ if(options[USE_POOL_BUNDLE].doesOccur) {
+ data->fPoolBundleKeys = poolBundle.fKeys;
+ data->fPoolBundleKeysLength = poolBundle.fKeysLength;
+ data->fPoolBundleKeysCount = poolBundle.fKeysCount;
+ data->fPoolChecksum = poolBundle.fChecksum;
+ }
/* Determine the target rb filename */
rbname = make_res_filename(filename, outputDir, packageName, status);
diff --git a/tools/genrb/parse.c b/tools/genrb/parse.c
index 136f56eb..b1b5eaa6 100644
--- a/tools/genrb/parse.c
+++ b/tools/genrb/parse.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1998-2008, International Business Machines
+* Copyright (C) 1998-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -712,15 +712,9 @@ addCollation(struct SResource *result, uint32_t startline, UErrorCode *status)
}
else if (uprv_strcmp(subtag, "Sequence") == 0)
{
-#if UCONFIG_NO_COLLATION
- warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h");
+#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
+ warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
#else
- /* in order to achieve smaller data files, we can direct genrb */
- /* to omit collation rules */
- if(!gOmitCollationRules) {
- /* first we add the "Sequence", so that we always have rules */
- table_add(result, member, line, status);
- }
if(gMakeBinaryCollation) {
UErrorCode intStatus = U_ZERO_ERROR;
@@ -729,8 +723,6 @@ addCollation(struct SResource *result, uint32_t startline, UErrorCode *status)
uint8_t *data = NULL;
UCollator *coll = NULL;
UParseError parseError;
- /* add sequence */
- /*table_add(result, member, line, status);*/
coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength,
UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus);
@@ -754,9 +746,8 @@ addCollation(struct SResource *result, uint32_t startline, UErrorCode *status)
if (U_SUCCESS(intStatus) && data != NULL)
{
- member = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status);
- /*table_add(bundle->fRoot, member, line, status);*/
- table_add(result, member, line, status);
+ struct SResource *collationBin = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status);
+ table_add(result, collationBin, line, status);
uprv_free(data);
}
else
@@ -772,6 +763,11 @@ addCollation(struct SResource *result, uint32_t startline, UErrorCode *status)
}
else
{
+ if(intStatus == U_FILE_ACCESS_ERROR) {
+ error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
+ *status = intStatus;
+ return NULL;
+ }
warning(line, "%%Collation could not be constructed from CollationElements - check context!");
if(isStrict()){
*status = intStatus;
@@ -784,6 +780,13 @@ addCollation(struct SResource *result, uint32_t startline, UErrorCode *status)
}
}
#endif
+ /* in order to achieve smaller data files, we can direct genrb */
+ /* to omit collation rules */
+ if(gOmitCollationRules) {
+ bundle_closeString(bundle, member);
+ } else {
+ table_add(result, member, line, status);
+ }
}
/*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
@@ -1838,7 +1841,7 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *st
ustr_init(&comment);
expect(TOK_STRING, &tokenValue, &comment, NULL, status);
- bundle = bundle_open(&comment, status);
+ bundle = bundle_open(&comment, FALSE, status);
if (bundle == NULL || U_FAILURE(*status))
{
diff --git a/tools/genrb/reslist.c b/tools/genrb/reslist.c
index b389ef18..b23b41b3 100644
--- a/tools/genrb/reslist.c
+++ b/tools/genrb/reslist.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2000-2008, International Business Machines
+* Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -23,21 +23,39 @@
#include "unicode/putil.h"
#include "errmsg.h"
+#include "uarrsort.h"
+#include "uinvchar.h"
+
+/*
+ * Align binary data at a 16-byte offset from the start of the resource bundle,
+ * to be safe for any data type it may contain.
+ */
#define BIN_ALIGNMENT 16
static UBool gIncludeCopyright = FALSE;
+static UBool gUsePoolBundle = FALSE;
+static int32_t gFormatVersion = 2;
+
+static UChar gEmptyString = 0;
+
+/* How do we store string values? */
+enum {
+ STRINGS_UTF16_V1, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
+ STRINGS_UTF16_V2 /* formatVersion 2: optional length in 1..3 UChars + UChars + NUL */
+};
+
+enum {
+ MAX_IMPLICIT_STRING_LENGTH = 40 /* do not store the length explicitly for such strings */
+};
/*
* res_none() returns the address of kNoResource,
* for use in non-error cases when no resource is to be added to the bundle.
* (NULL is used in error cases.)
*/
-static struct SResource kNoResource = { RES_NONE };
-
-uint32_t res_write(UNewDataMemory *mem, struct SResource *res,
- uint32_t usedOffset, UErrorCode *status);
+static const struct SResource kNoResource = { URES_NONE };
-static const UDataInfo dataInfo= {
+static UDataInfo dataInfo= {
sizeof(UDataInfo),
0,
@@ -46,11 +64,17 @@ static const UDataInfo dataInfo= {
sizeof(UChar),
0,
- {0x52, 0x65, 0x73, 0x42}, /* dataFormat="resb" */
- {1, 2, 0, 0}, /* formatVersion */
+ {0x52, 0x65, 0x73, 0x42}, /* dataFormat="ResB" */
+ {1, 3, 0, 0}, /* formatVersion */
{1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/
};
+static const UVersionInfo gFormatVersions[3] = { /* indexed by a major-formatVersion integer */
+ { 0, 0, 0, 0 },
+ { 1, 3, 0, 0 },
+ { 2, 0, 0, 0 }
+};
+
static uint8_t calcPadding(uint32_t size) {
/* returns space we need to pad */
return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0);
@@ -65,267 +89,620 @@ UBool getIncludeCopyright(void){
return gIncludeCopyright;
}
-/* Writing Functions */
-static uint32_t string_write(UNewDataMemory *mem, struct SResource *res,
- uint32_t usedOffset, UErrorCode *status) {
- udata_write32(mem, res->u.fString.fLength);
- udata_writeUString(mem, res->u.fString.fChars, res->u.fString.fLength + 1);
- udata_writePadding(mem, calcPadding(res->fSize));
+void setFormatVersion(int32_t formatVersion) {
+ gFormatVersion = formatVersion;
+}
- return usedOffset;
+void setUsePoolBundle(UBool use) {
+ gUsePoolBundle = use;
}
+static void
+bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status);
+
/* Writing Functions */
-static uint32_t alias_write(UNewDataMemory *mem, struct SResource *res,
- uint32_t usedOffset, UErrorCode *status) {
- udata_write32(mem, res->u.fString.fLength);
- udata_writeUString(mem, res->u.fString.fChars, res->u.fString.fLength + 1);
- udata_writePadding(mem, calcPadding(res->fSize));
- return usedOffset;
-}
+/*
+ * type_write16() functions write resource values into f16BitUnits
+ * and determine the resource item word, if possible.
+ */
+static void
+res_write16(struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status);
-static uint32_t array_write(UNewDataMemory *mem, struct SResource *res,
- uint32_t usedOffset, UErrorCode *status) {
- uint32_t *resources = NULL;
- uint32_t i = 0;
+/*
+ * type_preWrite() functions calculate ("preflight") and advance the *byteOffset
+ * by the size of their data in the binary file and
+ * determine the resource item word.
+ * Most type_preWrite() functions may add any number of bytes, but res_preWrite()
+ * will always pad it to a multiple of 4.
+ * The resource item type may be a related subtype of the fType.
+ *
+ * The type_preWrite() and type_write() functions start and end at the same
+ * byteOffset values.
+ * Prewriting allows bundle_write() to determine the root resource item word,
+ * before actually writing the bundle contents to the file,
+ * which is necessary because the root item is stored at the beginning.
+ */
+static void
+res_preWrite(uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status);
- struct SResource *current = NULL;
+/*
+ * type_write() functions write their data to mem and update the byteOffset
+ * in parallel.
+ * (A kingdom for C++ and polymorphism...)
+ */
+static void
+res_write(UNewDataMemory *mem, uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status);
+static uint16_t *
+reserve16BitUnits(struct SRBRoot *bundle, int32_t length, UErrorCode *status) {
if (U_FAILURE(*status)) {
- return 0;
+ return NULL;
+ }
+ if ((bundle->f16BitUnitsLength + length) > bundle->f16BitUnitsCapacity) {
+ uint16_t *newUnits;
+ int32_t capacity = 2 * bundle->f16BitUnitsCapacity + length + 1024;
+ capacity &= ~1; /* ensures padding fits if f16BitUnitsLength needs it */
+ newUnits = (uint16_t *)uprv_malloc(capacity * 2);
+ if (newUnits == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ if (bundle->f16BitUnitsLength > 0) {
+ uprv_memcpy(newUnits, bundle->f16BitUnits, bundle->f16BitUnitsLength * 2);
+ } else {
+ newUnits[0] = 0;
+ bundle->f16BitUnitsLength = 1;
+ }
+ uprv_free(bundle->f16BitUnits);
+ bundle->f16BitUnits = newUnits;
+ bundle->f16BitUnitsCapacity = capacity;
}
+ return bundle->f16BitUnits + bundle->f16BitUnitsLength;
+}
- if (res->u.fArray.fCount > 0) {
- resources = (uint32_t *) uprv_malloc(sizeof(uint32_t) * res->u.fArray.fCount);
+static int32_t
+makeRes16(uint32_t resWord) {
+ uint32_t type, offset;
+ if (resWord == 0) {
+ return 0; /* empty string */
+ }
+ type = RES_GET_TYPE(resWord);
+ offset = RES_GET_OFFSET(resWord);
+ if (type == URES_STRING_V2 && offset <= 0xffff) {
+ return (int32_t)offset;
+ }
+ return -1;
+}
- if (resources == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
+static int32_t
+mapKey(struct SRBRoot *bundle, int32_t oldpos) {
+ const KeyMapEntry *map = bundle->fKeyMap;
+ int32_t i, start, limit;
+
+ /* do a binary search for the old, pre-bundle_compactKeys() key offset */
+ start = bundle->fPoolBundleKeysCount;
+ limit = start + bundle->fKeysCount;
+ while (start < limit - 1) {
+ i = (start + limit) / 2;
+ if (oldpos < map[i].oldpos) {
+ limit = i;
+ } else {
+ start = i;
}
+ }
+ assert(oldpos == map[start].oldpos);
+ return map[start].newpos;
+}
- current = res->u.fArray.fFirst;
- i = 0;
+static uint16_t
+makeKey16(struct SRBRoot *bundle, int32_t key) {
+ if (key >= 0) {
+ return (uint16_t)key;
+ } else {
+ return (uint16_t)(key + bundle->fLocalKeyLimit); /* offset in the pool bundle */
+ }
+}
- while (current != NULL) {
- if (current->fType == URES_INT) {
- resources[i] = (current->fType << 28) | (current->u.fIntValue.fValue & 0xFFFFFFF);
- } else if (current->fType == URES_BINARY) {
- uint32_t uo = usedOffset;
+/*
+ * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
+ * For unique UTF-16 v2 strings, res_write16() sees fRes != RES_BOGUS
+ * and exits early.
+ */
+static void
+string_write16(struct SRBRoot *bundle, struct SResource *res, UErrorCode *status) {
+ struct SResource *same;
+ if ((same = res->u.fString.fSame) != NULL) {
+ /* This is a duplicate. */
+ if (same->fRes == RES_BOGUS) {
+ /* The original has not been visited yet. */
+ string_write16(bundle, same, status);
+ }
+ res->fRes = same->fRes;
+ res->fWritten = same->fWritten;
+ }
+}
- usedOffset = res_write(mem, current, usedOffset, status);
- resources[i] = (current->fType << 28) | (usedOffset >> 2);
- usedOffset += current->fSize + calcPadding(current->fSize) - (usedOffset - uo);
- } else {
- usedOffset = res_write(mem, current, usedOffset, status);
- resources[i] = (current->fType << 28) | (usedOffset >> 2);
- usedOffset += current->fSize + calcPadding(current->fSize);
- }
+static void
+array_write16(struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ struct SResource *current;
+ int32_t res16 = 0;
- i++;
- current = current->fNext;
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ if (res->u.fArray.fCount == 0 && gFormatVersion > 1) {
+ res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY);
+ res->fWritten = TRUE;
+ return;
+ }
+ for (current = res->u.fArray.fFirst; current != NULL; current = current->fNext) {
+ res_write16(bundle, current, status);
+ res16 |= makeRes16(current->fRes);
+ }
+ if (U_SUCCESS(*status) && res->u.fArray.fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
+ uint16_t *p16 = reserve16BitUnits(bundle, 1 + res->u.fArray.fCount, status);
+ if (U_SUCCESS(*status)) {
+ res->fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnitsLength);
+ *p16++ = (uint16_t)res->u.fArray.fCount;
+ for (current = res->u.fArray.fFirst; current != NULL; current = current->fNext) {
+ *p16++ = (uint16_t)makeRes16(current->fRes);
+ }
+ bundle->f16BitUnitsLength += 1 + res->u.fArray.fCount;
+ res->fWritten = TRUE;
}
+ }
+}
- /* usedOffset += res->fSize + pad; */
+static void
+table_write16(struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ struct SResource *current;
+ int32_t maxKey = 0, maxPoolKey = 0x80000000;
+ int32_t res16 = 0;
+ UBool hasLocalKeys = FALSE, hasPoolKeys = FALSE;
- udata_write32(mem, res->u.fArray.fCount);
- udata_writeBlock(mem, resources, sizeof(uint32_t) * res->u.fArray.fCount);
- uprv_free(resources);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ if (res->u.fTable.fCount == 0 && gFormatVersion > 1) {
+ res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
+ res->fWritten = TRUE;
+ return;
+ }
+ /* Find the smallest table type that fits the data. */
+ for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
+ int32_t key;
+ res_write16(bundle, current, status);
+ if (bundle->fKeyMap == NULL) {
+ key = current->fKey;
+ } else {
+ key = current->fKey = mapKey(bundle, current->fKey);
+ }
+ if (key >= 0) {
+ hasLocalKeys = TRUE;
+ if (key > maxKey) {
+ maxKey = key;
+ }
+ } else {
+ hasPoolKeys = TRUE;
+ if (key > maxPoolKey) {
+ maxPoolKey = key;
+ }
+ }
+ res16 |= makeRes16(current->fRes);
+ }
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ if(res->u.fTable.fCount > (uint32_t)bundle->fMaxTableLength) {
+ bundle->fMaxTableLength = res->u.fTable.fCount;
+ }
+ maxPoolKey &= 0x7fffffff;
+ if (res->u.fTable.fCount <= 0xffff &&
+ (!hasLocalKeys || maxKey < bundle->fLocalKeyLimit) &&
+ (!hasPoolKeys || maxPoolKey < (0x10000 - bundle->fLocalKeyLimit))
+ ) {
+ if (res16 >= 0 && gFormatVersion > 1) {
+ uint16_t *p16 = reserve16BitUnits(bundle, 1 + res->u.fTable.fCount * 2, status);
+ if (U_SUCCESS(*status)) {
+ /* 16-bit count, key offsets and values */
+ res->fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnitsLength);
+ *p16++ = (uint16_t)res->u.fTable.fCount;
+ for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
+ *p16++ = makeKey16(bundle, current->fKey);
+ }
+ for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
+ *p16++ = (uint16_t)makeRes16(current->fRes);
+ }
+ bundle->f16BitUnitsLength += 1 + res->u.fTable.fCount * 2;
+ res->fWritten = TRUE;
+ }
+ } else {
+ /* 16-bit count, 16-bit key offsets, 32-bit values */
+ res->u.fTable.fType = URES_TABLE;
+ }
} else {
- /* array is empty */
- udata_write32(mem, 0);
+ /* 32-bit count, key offsets and values */
+ res->u.fTable.fType = URES_TABLE32;
}
-
- return usedOffset;
}
-static uint32_t intvector_write(UNewDataMemory *mem, struct SResource *res,
- uint32_t usedOffset, UErrorCode *status) {
- uint32_t i = 0;
- udata_write32(mem, res->u.fIntVector.fCount);
- for(i = 0; i<res->u.fIntVector.fCount; i++) {
- udata_write32(mem, res->u.fIntVector.fArray[i]);
+static void
+res_write16(struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ if (U_FAILURE(*status) || res == NULL) {
+ return;
+ }
+ if (res->fRes != RES_BOGUS) {
+ /*
+ * The resource item word was already precomputed, which means
+ * no further data needs to be written.
+ * This might be an integer, or an empty or UTF-16 v2 string,
+ * an empty binary, etc.
+ */
+ return;
}
+ switch (res->fType) {
+ case URES_STRING:
+ string_write16(bundle, res, status);
+ break;
+ case URES_ARRAY:
+ array_write16(bundle, res, status);
+ break;
+ case URES_TABLE:
+ table_write16(bundle, res, status);
+ break;
+ default:
+ /* Only a few resource types write 16-bit units. */
+ break;
+ }
+}
- return usedOffset;
+/*
+ * Only called for UTF-16 v1 strings.
+ * For UTF-16 v2 strings, res_preWrite() sees fRes != RES_BOGUS
+ * and exits early.
+ */
+static void
+string_preWrite(uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ /* Write the UTF-16 v1 string. */
+ res->fRes = URES_MAKE_RESOURCE(URES_STRING, *byteOffset >> 2);
+ *byteOffset += 4 + (res->u.fString.fLength + 1) * U_SIZEOF_UCHAR;
}
-static uint32_t bin_write(UNewDataMemory *mem, struct SResource *res,
- uint32_t usedOffset, UErrorCode *status) {
+static void
+bin_preWrite(uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
uint32_t pad = 0;
- uint32_t extrapad = calcPadding(res->fSize);
- uint32_t dataStart = usedOffset + sizeof(res->u.fBinaryValue.fLength);
+ uint32_t dataStart = *byteOffset + sizeof(res->u.fBinaryValue.fLength);
if (dataStart % BIN_ALIGNMENT) {
pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
- udata_writePadding(mem, pad);
- usedOffset += pad;
- }
-
- udata_write32(mem, res->u.fBinaryValue.fLength);
- if (res->u.fBinaryValue.fLength > 0) {
- udata_writeBlock(mem, res->u.fBinaryValue.fData, res->u.fBinaryValue.fLength);
+ *byteOffset += pad; /* pad == 4 or 8 or 12 */
}
- udata_writePadding(mem, (BIN_ALIGNMENT - pad + extrapad));
-
- return usedOffset;
+ res->fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2);
+ *byteOffset += 4 + res->u.fBinaryValue.fLength;
}
-static uint32_t int_write(UNewDataMemory *mem, struct SResource *res,
- uint32_t usedOffset, UErrorCode *status) {
- return usedOffset;
-}
+static void
+array_preWrite(uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ struct SResource *current;
-static uint32_t table_write(UNewDataMemory *mem, struct SResource *res,
- uint32_t usedOffset, UErrorCode *status) {
- uint8_t pad = 0;
- uint32_t i = 0;
- uint16_t *keys16 = NULL;
- int32_t *keys32 = NULL;
- uint32_t *resources = NULL;
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ for (current = res->u.fArray.fFirst; current != NULL; current = current->fNext) {
+ res_preWrite(byteOffset, bundle, current, status);
+ }
+ res->fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2);
+ *byteOffset += (1 + res->u.fArray.fCount) * 4;
+}
- struct SResource *current = NULL;
+static void
+table_preWrite(uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ struct SResource *current;
if (U_FAILURE(*status)) {
- return 0;
+ return;
}
+ for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
+ res_preWrite(byteOffset, bundle, current, status);
+ }
+ if (res->u.fTable.fType == URES_TABLE) {
+ /* 16-bit count, 16-bit key offsets, 32-bit values */
+ res->fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2);
+ *byteOffset += 2 + res->u.fTable.fCount * 6;
+ } else {
+ /* 32-bit count, key offsets and values */
+ res->fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2);
+ *byteOffset += 4 + res->u.fTable.fCount * 8;
+ }
+}
- pad = calcPadding(res->fSize);
-
- if (res->u.fTable.fCount > 0) {
- if(res->fType == URES_TABLE) {
- keys16 = (uint16_t *) uprv_malloc(sizeof(uint16_t) * res->u.fTable.fCount);
- if (keys16 == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
+static void
+res_preWrite(uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ if (U_FAILURE(*status) || res == NULL) {
+ return;
+ }
+ if (res->fRes != RES_BOGUS) {
+ /*
+ * The resource item word was already precomputed, which means
+ * no further data needs to be written.
+ * This might be an integer, or an empty or UTF-16 v2 string,
+ * an empty binary, etc.
+ */
+ return;
+ }
+ switch (res->fType) {
+ case URES_STRING:
+ string_preWrite(byteOffset, bundle, res, status);
+ break;
+ case URES_ALIAS:
+ res->fRes = URES_MAKE_RESOURCE(URES_ALIAS, *byteOffset >> 2);
+ *byteOffset += 4 + (res->u.fString.fLength + 1) * U_SIZEOF_UCHAR;
+ break;
+ case URES_INT_VECTOR:
+ if (res->u.fIntVector.fCount == 0 && gFormatVersion > 1) {
+ res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR);
+ res->fWritten = TRUE;
} else {
- keys32 = (int32_t *) uprv_malloc(sizeof(int32_t) * res->u.fTable.fCount);
- if (keys32 == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
+ res->fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2);
+ *byteOffset += (1 + res->u.fIntVector.fCount) * 4;
}
+ break;
+ case URES_BINARY:
+ bin_preWrite(byteOffset, bundle, res, status);
+ break;
+ case URES_INT:
+ break;
+ case URES_ARRAY:
+ array_preWrite(byteOffset, bundle, res, status);
+ break;
+ case URES_TABLE:
+ table_preWrite(byteOffset, bundle, res, status);
+ break;
+ default:
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ break;
+ }
+ *byteOffset += calcPadding(*byteOffset);
+}
- resources = (uint32_t *) uprv_malloc(sizeof(uint32_t) * res->u.fTable.fCount);
-
- if (resources == NULL) {
- uprv_free(keys16);
- uprv_free(keys32);
- *status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- current = res->u.fTable.fFirst;
- i = 0;
-
- while (current != NULL) {
- assert(i < res->u.fTable.fCount);
-
- /* where the key is */
- if(res->fType == URES_TABLE) {
- keys16[i] = (uint16_t) current->fKey;
- } else {
- keys32[i] = current->fKey;
- }
+/*
+ * Only called for UTF-16 v1 strings. For UTF-16 v2 strings,
+ * res_write() sees fWritten and exits early.
+ */
+static void string_write(UNewDataMemory *mem, uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ /* Write the UTF-16 v1 string. */
+ int32_t length = res->u.fString.fLength;
+ udata_write32(mem, length);
+ udata_writeUString(mem, res->u.fString.fChars, length + 1);
+ *byteOffset += 4 + (length + 1) * U_SIZEOF_UCHAR;
+ res->fWritten = TRUE;
+}
- if (current->fType == URES_INT) {
- resources[i] = (current->fType << 28) | (current->u.fIntValue.fValue & 0xFFFFFFF);
- } else if (current->fType == URES_BINARY) {
- uint32_t uo = usedOffset;
+static void alias_write(UNewDataMemory *mem, uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ int32_t length = res->u.fString.fLength;
+ udata_write32(mem, length);
+ udata_writeUString(mem, res->u.fString.fChars, length + 1);
+ *byteOffset += 4 + (length + 1) * U_SIZEOF_UCHAR;
+}
- usedOffset = res_write(mem, current, usedOffset, status);
- resources[i] = (current->fType << 28) | (usedOffset >> 2);
- usedOffset += current->fSize + calcPadding(current->fSize) - (usedOffset - uo);
- } else {
- usedOffset = res_write(mem, current, usedOffset, status);
- resources[i] = (current->fType << 28) | (usedOffset >> 2);
- usedOffset += current->fSize + calcPadding(current->fSize);
- }
+static void array_write(UNewDataMemory *mem, uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ uint32_t i;
- i++;
- current = current->fNext;
- }
+ struct SResource *current = NULL;
- if(res->fType == URES_TABLE) {
- udata_write16(mem, (uint16_t)res->u.fTable.fCount);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ for (i = 0, current = res->u.fArray.fFirst; current != NULL; ++i, current = current->fNext) {
+ res_write(mem, byteOffset, bundle, current, status);
+ }
+ assert(i == res->u.fArray.fCount);
- udata_writeBlock(mem, keys16, sizeof(uint16_t) * res->u.fTable.fCount);
- udata_writePadding(mem, pad);
- } else {
- udata_write32(mem, res->u.fTable.fCount);
+ udata_write32(mem, res->u.fArray.fCount);
+ for (current = res->u.fArray.fFirst; current != NULL; current = current->fNext) {
+ udata_write32(mem, current->fRes);
+ }
+ *byteOffset += (1 + res->u.fArray.fCount) * 4;
+}
- udata_writeBlock(mem, keys32, sizeof(int32_t) * res->u.fTable.fCount);
- }
+static void intvector_write(UNewDataMemory *mem, uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ uint32_t i = 0;
+ udata_write32(mem, res->u.fIntVector.fCount);
+ for(i = 0; i<res->u.fIntVector.fCount; i++) {
+ udata_write32(mem, res->u.fIntVector.fArray[i]);
+ }
+ *byteOffset += (1 + res->u.fIntVector.fCount) * 4;
+}
- udata_writeBlock(mem, resources, sizeof(uint32_t) * res->u.fTable.fCount);
+static void bin_write(UNewDataMemory *mem, uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ uint32_t pad = 0;
+ uint32_t dataStart = *byteOffset + sizeof(res->u.fBinaryValue.fLength);
- uprv_free(keys16);
- uprv_free(keys32);
- uprv_free(resources);
- } else {
- /* table is empty */
- if(res->fType == URES_TABLE) {
- udata_write16(mem, 0);
- udata_writePadding(mem, pad);
- } else {
- udata_write32(mem, 0);
- }
+ if (dataStart % BIN_ALIGNMENT) {
+ pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
+ udata_writePadding(mem, pad); /* pad == 4 or 8 or 12 */
+ *byteOffset += pad;
}
- return usedOffset;
+ udata_write32(mem, res->u.fBinaryValue.fLength);
+ if (res->u.fBinaryValue.fLength > 0) {
+ udata_writeBlock(mem, res->u.fBinaryValue.fData, res->u.fBinaryValue.fLength);
+ }
+ *byteOffset += 4 + res->u.fBinaryValue.fLength;
}
-uint32_t res_write(UNewDataMemory *mem, struct SResource *res,
- uint32_t usedOffset, UErrorCode *status) {
+static void table_write(UNewDataMemory *mem, uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ struct SResource *current;
+ uint32_t i;
+
if (U_FAILURE(*status)) {
- return 0;
+ return;
}
+ for (i = 0, current = res->u.fTable.fFirst; current != NULL; ++i, current = current->fNext) {
+ assert(i < res->u.fTable.fCount);
+ res_write(mem, byteOffset, bundle, current, status);
+ }
+ assert(i == res->u.fTable.fCount);
- if (res != NULL) {
- switch (res->fType) {
- case URES_STRING:
- return string_write (mem, res, usedOffset, status);
- case URES_ALIAS:
- return alias_write (mem, res, usedOffset, status);
- case URES_INT_VECTOR:
- return intvector_write (mem, res, usedOffset, status);
- case URES_BINARY:
- return bin_write (mem, res, usedOffset, status);
- case URES_INT:
- return int_write (mem, res, usedOffset, status);
- case URES_ARRAY:
- return array_write (mem, res, usedOffset, status);
- case URES_TABLE:
- case URES_TABLE32:
- return table_write (mem, res, usedOffset, status);
-
- default:
- break;
+ if(res->u.fTable.fType == URES_TABLE) {
+ udata_write16(mem, (uint16_t)res->u.fTable.fCount);
+ for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
+ udata_write16(mem, makeKey16(bundle, current->fKey));
+ }
+ *byteOffset += (1 + res->u.fTable.fCount)* 2;
+ if ((res->u.fTable.fCount & 1) == 0) {
+ /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */
+ udata_writePadding(mem, 2);
+ *byteOffset += 2;
}
+ } else /* URES_TABLE32 */ {
+ udata_write32(mem, res->u.fTable.fCount);
+ for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
+ udata_write32(mem, (uint32_t)current->fKey);
+ }
+ *byteOffset += (1 + res->u.fTable.fCount)* 4;
+ }
+ for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
+ udata_write32(mem, current->fRes);
}
+ *byteOffset += res->u.fTable.fCount * 4;
+}
- *status = U_INTERNAL_PROGRAM_ERROR;
- return 0;
+void res_write(UNewDataMemory *mem, uint32_t *byteOffset,
+ struct SRBRoot *bundle, struct SResource *res,
+ UErrorCode *status) {
+ uint8_t paddingSize;
+
+ if (U_FAILURE(*status) || res == NULL) {
+ return;
+ }
+ if (res->fWritten) {
+ assert(res->fRes != RES_BOGUS);
+ return;
+ }
+ switch (res->fType) {
+ case URES_STRING:
+ string_write (mem, byteOffset, bundle, res, status);
+ break;
+ case URES_ALIAS:
+ alias_write (mem, byteOffset, bundle, res, status);
+ break;
+ case URES_INT_VECTOR:
+ intvector_write (mem, byteOffset, bundle, res, status);
+ break;
+ case URES_BINARY:
+ bin_write (mem, byteOffset, bundle, res, status);
+ break;
+ case URES_INT:
+ break; /* fRes was set by int_open() */
+ case URES_ARRAY:
+ array_write (mem, byteOffset, bundle, res, status);
+ break;
+ case URES_TABLE:
+ table_write (mem, byteOffset, bundle, res, status);
+ break;
+ default:
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ break;
+ }
+ paddingSize = calcPadding(*byteOffset);
+ if (paddingSize > 0) {
+ udata_writePadding(mem, paddingSize);
+ *byteOffset += paddingSize;
+ }
+ res->fWritten = TRUE;
}
-void bundle_write(struct SRBRoot *bundle, const char *outputDir, const char *outputPkg, char *writtenFilename, int writtenFilenameLen, UErrorCode *status) {
+void bundle_write(struct SRBRoot *bundle,
+ const char *outputDir, const char *outputPkg,
+ char *writtenFilename, int writtenFilenameLen,
+ UErrorCode *status) {
UNewDataMemory *mem = NULL;
- uint8_t pad = 0;
- uint32_t root = 0;
- uint32_t usedOffset = 0;
+ uint32_t byteOffset = 0;
uint32_t top, size;
char dataName[1024];
int32_t indexes[URES_INDEX_TOP];
- if (writtenFilename && writtenFilenameLen) {
- *writtenFilename = 0;
+ bundle_compactKeys(bundle, status);
+ /*
+ * Add padding bytes to fKeys so that fKeysTop is 4-aligned.
+ * Safe because the capacity is a multiple of 4.
+ */
+ while (bundle->fKeysTop & 3) {
+ bundle->fKeys[bundle->fKeysTop++] = (char)0xaa;
+ }
+ /*
+ * In URES_TABLE, use all local key offsets that fit into 16 bits,
+ * and use the remaining 16-bit offsets for pool key offsets
+ * if there are any.
+ * If there are no local keys, then use the whole 16-bit space
+ * for pool key offsets.
+ * Note: This cannot be changed without changing the major formatVersion.
+ */
+ if (bundle->fKeysBottom < bundle->fKeysTop) {
+ if (bundle->fKeysTop <= 0x10000) {
+ bundle->fLocalKeyLimit = bundle->fKeysTop;
+ } else {
+ bundle->fLocalKeyLimit = 0x10000;
+ }
+ } else {
+ bundle->fLocalKeyLimit = 0;
}
+ bundle_compactStrings(bundle, status);
+ res_write16(bundle, bundle->fRoot, status);
+ if (bundle->f16BitUnitsLength & 1) {
+ bundle->f16BitUnits[bundle->f16BitUnitsLength++] = 0xaaaa; /* pad to multiple of 4 bytes */
+ }
+ /* all keys have been mapped */
+ uprv_free(bundle->fKeyMap);
+ bundle->fKeyMap = NULL;
+
+ byteOffset = bundle->fKeysTop + bundle->f16BitUnitsLength * 2;
+ res_preWrite(&byteOffset, bundle, bundle->fRoot, status);
+
+ /* total size including the root item */
+ top = byteOffset;
+
if (U_FAILURE(*status)) {
return;
}
+ if (writtenFilename && writtenFilenameLen) {
+ *writtenFilename = 0;
+ }
+
if (writtenFilename) {
int32_t off = 0, len = 0;
if (outputDir) {
@@ -376,24 +753,15 @@ void bundle_write(struct SRBRoot *bundle, const char *outputDir, const char *out
uprv_strcpy(dataName, bundle->fLocale);
}
+ uprv_memcpy(dataInfo.formatVersion, gFormatVersions + gFormatVersion, sizeof(UVersionInfo));
+
mem = udata_create(outputDir, "res", dataName, &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, status);
if(U_FAILURE(*status)){
return;
}
- pad = calcPadding(bundle->fKeyPoint);
-
- usedOffset = bundle->fKeyPoint + pad ; /* top of the strings */
-
- /* we're gonna put the main table at the end */
- top = usedOffset + bundle->fRoot->u.fTable.fChildrenSize;
- root = (top) >> 2 | (bundle->fRoot->fType << 28);
/* write the root item */
- udata_write32(mem, root);
-
- /* add to top the size of the root item */
- top += bundle->fRoot->fSize;
- top += calcPadding(top);
+ udata_write32(mem, bundle->fRoot->fRes);
/*
* formatVersion 1.1 (ICU 2.8):
@@ -401,8 +769,8 @@ void bundle_write(struct SRBRoot *bundle, const char *outputDir, const char *out
* to make it easier to parse resource bundles in icuswap or from Java etc.
*/
uprv_memset(indexes, 0, sizeof(indexes));
- indexes[URES_INDEX_LENGTH]= URES_INDEX_TOP;
- indexes[URES_INDEX_STRINGS_TOP]= (int32_t)(usedOffset>>2);
+ indexes[URES_INDEX_LENGTH]= bundle->fIndexLength;
+ indexes[URES_INDEX_KEYS_TOP]= bundle->fKeysTop>>2;
indexes[URES_INDEX_RESOURCES_TOP]= (int32_t)(top>>2);
indexes[URES_INDEX_BUNDLE_TOP]= indexes[URES_INDEX_RESOURCES_TOP];
indexes[URES_INDEX_MAX_TABLE_LENGTH]= bundle->fMaxTableLength;
@@ -412,22 +780,43 @@ void bundle_write(struct SRBRoot *bundle, const char *outputDir, const char *out
* write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set
* the memset() above initialized all indexes[] to 0
*/
- if(bundle->noFallback) {
+ if (bundle->noFallback) {
indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK;
}
+ /*
+ * formatVersion 2.0 (ICU 4.4):
+ * more compact string value storage, optional pool bundle
+ */
+ if (URES_INDEX_16BIT_TOP < bundle->fIndexLength) {
+ indexes[URES_INDEX_16BIT_TOP] = (bundle->fKeysTop>>2) + (bundle->f16BitUnitsLength>>1);
+ }
+ if (URES_INDEX_POOL_CHECKSUM < bundle->fIndexLength) {
+ if (bundle->fIsPoolBundle) {
+ indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK;
+ indexes[URES_INDEX_POOL_CHECKSUM] =
+ (int32_t)computeCRC((char *)(bundle->fKeys + bundle->fKeysBottom),
+ (uint32_t)(bundle->fKeysTop - bundle->fKeysBottom),
+ 0);
+ } else if (gUsePoolBundle) {
+ indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE;
+ indexes[URES_INDEX_POOL_CHECKSUM] = bundle->fPoolChecksum;
+ }
+ }
/* write the indexes[] */
- udata_writeBlock(mem, indexes, sizeof(indexes));
+ udata_writeBlock(mem, indexes, bundle->fIndexLength*4);
/* write the table key strings */
- udata_writeBlock(mem, bundle->fKeys+URES_STRINGS_BOTTOM,
- bundle->fKeyPoint-URES_STRINGS_BOTTOM);
+ udata_writeBlock(mem, bundle->fKeys+bundle->fKeysBottom,
+ bundle->fKeysTop-bundle->fKeysBottom);
- /* write the padding bytes after the table key strings */
- udata_writePadding(mem, pad);
+ /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */
+ udata_writeBlock(mem, bundle->f16BitUnits, bundle->f16BitUnitsLength*2);
/* write all of the bundle contents: the root item and its children */
- usedOffset = res_write(mem, bundle->fRoot, usedOffset, status);
+ byteOffset = bundle->fKeysTop + bundle->f16BitUnitsLength * 2;
+ res_write(mem, &byteOffset, bundle, bundle->fRoot, status);
+ assert(byteOffset == top);
size = udata_finish(mem, status);
if(top != size) {
@@ -438,222 +827,205 @@ void bundle_write(struct SRBRoot *bundle, const char *outputDir, const char *out
}
/* Opening Functions */
-struct SResource* res_open(const struct UString* comment, UErrorCode* status){
- struct SResource *res;
+/* gcc 4.2 complained "no previous prototype for res_open" without this prototype... */
+struct SResource* res_open(struct SRBRoot *bundle, const char *tag,
+ const struct UString* comment, UErrorCode* status);
+
+struct SResource* res_open(struct SRBRoot *bundle, const char *tag,
+ const struct UString* comment, UErrorCode* status){
+ struct SResource *res;
+ int32_t key = bundle_addtag(bundle, tag, status);
if (U_FAILURE(*status)) {
return NULL;
}
res = (struct SResource *) uprv_malloc(sizeof(struct SResource));
-
if (res == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memset(res, 0, sizeof(struct SResource));
+ res->fKey = key;
+ res->fRes = RES_BOGUS;
ustr_init(&res->fComment);
if(comment != NULL){
ustr_cpy(&res->fComment, comment, status);
+ if (U_FAILURE(*status)) {
+ res_close(res);
+ return NULL;
+ }
}
return res;
-
}
struct SResource* res_none() {
- return &kNoResource;
+ return (struct SResource*)&kNoResource;
}
-struct SResource* table_open(struct SRBRoot *bundle, char *tag, const struct UString* comment, UErrorCode *status) {
-
- struct SResource *res = res_open(comment, status);
-
- res->fKey = bundle_addtag(bundle, tag, status);
-
+struct SResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
+ struct SResource *res = res_open(bundle, tag, comment, status);
if (U_FAILURE(*status)) {
- res_close(res);
return NULL;
}
-
- res->fNext = NULL;
-
- /*
- * always open a table not a table32 in case it remains empty -
- * try to use table32 only when necessary
- */
res->fType = URES_TABLE;
- res->fSize = sizeof(uint16_t);
-
- res->u.fTable.fCount = 0;
- res->u.fTable.fChildrenSize = 0;
- res->u.fTable.fFirst = NULL;
- res->u.fTable.fRoot = bundle;
-
+ res->u.fTable.fRoot = bundle;
return res;
}
struct SResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
-
- struct SResource *res = res_open(comment, status);
-
+ struct SResource *res = res_open(bundle, tag, comment, status);
if (U_FAILURE(*status)) {
return NULL;
}
-
res->fType = URES_ARRAY;
- res->fKey = bundle_addtag(bundle, tag, status);
-
- if (U_FAILURE(*status)) {
- uprv_free(res);
- return NULL;
- }
-
- res->fNext = NULL;
- res->fSize = sizeof(int32_t);
+ return res;
+}
- res->u.fArray.fCount = 0;
- res->u.fArray.fChildrenSize = 0;
- res->u.fArray.fFirst = NULL;
- res->u.fArray.fLast = NULL;
+static int32_t U_CALLCONV
+string_hash(const UHashTok key) {
+ const struct SResource *res = (struct SResource *)key.pointer;
+ return uhash_hashUCharsN(res->u.fString.fChars, res->u.fString.fLength);
+}
- return res;
+static UBool U_CALLCONV
+string_comp(const UHashTok key1, const UHashTok key2) {
+ const struct SResource *res1 = (struct SResource *)key1.pointer;
+ const struct SResource *res2 = (struct SResource *)key2.pointer;
+ return 0 == u_strCompare(res1->u.fString.fChars, res1->u.fString.fLength,
+ res2->u.fString.fChars, res2->u.fString.fLength,
+ FALSE);
}
struct SResource *string_open(struct SRBRoot *bundle, char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
- struct SResource *res = res_open(comment, status);
-
+ struct SResource *res = res_open(bundle, tag, comment, status);
if (U_FAILURE(*status)) {
return NULL;
}
-
res->fType = URES_STRING;
- res->fKey = bundle_addtag(bundle, tag, status);
- if (U_FAILURE(*status)) {
- uprv_free(res);
- return NULL;
+ if (len == 0 && gFormatVersion > 1) {
+ res->u.fString.fChars = &gEmptyString;
+ res->fRes = 0;
+ res->fWritten = TRUE;
+ return res;
}
- res->fNext = NULL;
-
res->u.fString.fLength = len;
- res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
- if (res->u.fString.fChars == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(res);
- return NULL;
+ if (gFormatVersion > 1) {
+ /* check for duplicates */
+ res->u.fString.fChars = (UChar *)value;
+ if (bundle->fStringSet == NULL) {
+ UErrorCode localStatus = U_ZERO_ERROR; /* if failure: just don't detect dups */
+ bundle->fStringSet = uhash_open(string_hash, string_comp, string_comp, &localStatus);
+ } else {
+ res->u.fString.fSame = uhash_get(bundle->fStringSet, res);
+ }
}
+ if (res->u.fString.fSame == NULL) {
+ /* this is a new string */
+ res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
- uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * (len + 1));
- res->fSize = sizeof(int32_t) + sizeof(UChar) * (len+1);
+ if (res->u.fString.fChars == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(res);
+ return NULL;
+ }
+ uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * len);
+ res->u.fString.fChars[len] = 0;
+ if (bundle->fStringSet != NULL) {
+ /* put it into the set for finding duplicates */
+ uhash_put(bundle->fStringSet, res, res, status);
+ }
+
+ if (bundle->fStringsForm != STRINGS_UTF16_V1) {
+ if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(value[0]) && len == u_strlen(value)) {
+ /*
+ * This string will be stored without an explicit length.
+ * Runtime will detect !U16_IS_TRAIL(value[0]) and call u_strlen().
+ */
+ res->u.fString.fNumCharsForLength = 0;
+ } else if (len <= 0x3ee) {
+ res->u.fString.fNumCharsForLength = 1;
+ } else if (len <= 0xfffff) {
+ res->u.fString.fNumCharsForLength = 2;
+ } else {
+ res->u.fString.fNumCharsForLength = 3;
+ }
+ bundle->f16BitUnitsLength += res->u.fString.fNumCharsForLength + len + 1; /* +1 for the NUL */
+ }
+ } else {
+ /* this is a duplicate of fSame */
+ struct SResource *same = res->u.fString.fSame;
+ res->u.fString.fChars = same->u.fString.fChars;
+ }
return res;
}
/* TODO: make alias_open and string_open use the same code */
struct SResource *alias_open(struct SRBRoot *bundle, char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
- struct SResource *res = res_open(comment, status);
-
+ struct SResource *res = res_open(bundle, tag, comment, status);
if (U_FAILURE(*status)) {
return NULL;
}
-
res->fType = URES_ALIAS;
- res->fKey = bundle_addtag(bundle, tag, status);
-
- if (U_FAILURE(*status)) {
- uprv_free(res);
- return NULL;
+ if (len == 0 && gFormatVersion > 1) {
+ res->u.fString.fChars = &gEmptyString;
+ res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_ALIAS);
+ res->fWritten = TRUE;
+ return res;
}
- res->fNext = NULL;
-
res->u.fString.fLength = len;
res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
-
if (res->u.fString.fChars == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(res);
return NULL;
}
-
uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * (len + 1));
- res->fSize = sizeof(int32_t) + sizeof(UChar) * (len + 1);
-
return res;
}
struct SResource* intvector_open(struct SRBRoot *bundle, char *tag, const struct UString* comment, UErrorCode *status) {
- struct SResource *res = res_open(comment, status);
-
+ struct SResource *res = res_open(bundle, tag, comment, status);
if (U_FAILURE(*status)) {
return NULL;
}
-
res->fType = URES_INT_VECTOR;
- res->fKey = bundle_addtag(bundle, tag, status);
-
- if (U_FAILURE(*status)) {
- uprv_free(res);
- return NULL;
- }
-
- res->fNext = NULL;
- res->fSize = sizeof(int32_t);
res->u.fIntVector.fCount = 0;
res->u.fIntVector.fArray = (uint32_t *) uprv_malloc(sizeof(uint32_t) * RESLIST_MAX_INT_VECTOR);
-
if (res->u.fIntVector.fArray == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(res);
return NULL;
}
-
return res;
}
struct SResource *int_open(struct SRBRoot *bundle, char *tag, int32_t value, const struct UString* comment, UErrorCode *status) {
- struct SResource *res = res_open(comment, status);
-
+ struct SResource *res = res_open(bundle, tag, comment, status);
if (U_FAILURE(*status)) {
return NULL;
}
-
res->fType = URES_INT;
- res->fKey = bundle_addtag(bundle, tag, status);
-
- if (U_FAILURE(*status)) {
- uprv_free(res);
- return NULL;
- }
-
- res->fSize = 0;
- res->fNext = NULL;
res->u.fIntValue.fValue = value;
-
+ res->fRes = URES_MAKE_RESOURCE(URES_INT, value & 0x0FFFFFFF);
+ res->fWritten = TRUE;
return res;
}
struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) {
- struct SResource *res = res_open(comment, status);
-
+ struct SResource *res = res_open(bundle, tag, comment, status);
if (U_FAILURE(*status)) {
return NULL;
}
-
res->fType = URES_BINARY;
- res->fKey = bundle_addtag(bundle, tag, status);
-
- if (U_FAILURE(*status)) {
- uprv_free(res);
- return NULL;
- }
-
- res->fNext = NULL;
res->u.fBinaryValue.fLength = length;
res->u.fBinaryValue.fFileName = NULL;
@@ -674,61 +1046,60 @@ struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t len
}
else {
res->u.fBinaryValue.fData = NULL;
+ if (gFormatVersion > 1) {
+ res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY);
+ res->fWritten = TRUE;
+ }
}
- res->fSize = sizeof(int32_t) + sizeof(uint8_t) * length + BIN_ALIGNMENT;
-
return res;
}
-struct SRBRoot *bundle_open(const struct UString* comment, UErrorCode *status) {
- struct SRBRoot *bundle = NULL;
+struct SRBRoot *bundle_open(const struct UString* comment, UBool isPoolBundle, UErrorCode *status) {
+ struct SRBRoot *bundle;
if (U_FAILURE(*status)) {
return NULL;
}
bundle = (struct SRBRoot *) uprv_malloc(sizeof(struct SRBRoot));
-
if (bundle == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
uprv_memset(bundle, 0, sizeof(struct SRBRoot));
- bundle->fLocale = NULL;
-
- bundle->fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
- bundle->fKeysCapacity = KEY_SPACE_SIZE;
-
- if(comment != NULL){
-
- }
-
- if (bundle->fKeys == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(bundle);
- return NULL;
- }
-
- /* formatVersion 1.1: start fKeyPoint after the root item and indexes[] */
- bundle->fKeyPoint = URES_STRINGS_BOTTOM;
- uprv_memset(bundle->fKeys, 0, URES_STRINGS_BOTTOM);
-
- bundle->fCount = 0;
- bundle->fRoot = table_open(bundle, NULL, comment, status);
-
- if (bundle->fRoot == NULL || U_FAILURE(*status)) {
+ bundle->fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
+ bundle->fRoot = table_open(bundle, NULL, comment, status);
+ if (bundle->fKeys == NULL || bundle->fRoot == NULL || U_FAILURE(*status)) {
if (U_SUCCESS(*status)) {
*status = U_MEMORY_ALLOCATION_ERROR;
}
-
- uprv_free(bundle->fKeys);
- uprv_free(bundle);
-
+ bundle_close(bundle, status);
return NULL;
}
+ bundle->fLocale = NULL;
+ bundle->fKeysCapacity = KEY_SPACE_SIZE;
+ /* formatVersion 1.1: start fKeysTop after the root item and indexes[] */
+ bundle->fIsPoolBundle = isPoolBundle;
+ if (gUsePoolBundle || isPoolBundle) {
+ bundle->fIndexLength = URES_INDEX_POOL_CHECKSUM + 1;
+ } else if (gFormatVersion >= 2) {
+ bundle->fIndexLength = URES_INDEX_16BIT_TOP + 1;
+ } else /* formatVersion 1 */ {
+ bundle->fIndexLength = URES_INDEX_ATTRIBUTES + 1;
+ }
+ bundle->fKeysBottom = (1 /* root */ + bundle->fIndexLength) * 4;
+ uprv_memset(bundle->fKeys, 0, bundle->fKeysBottom);
+ bundle->fKeysTop = bundle->fKeysBottom;
+
+ if (gFormatVersion == 1) {
+ bundle->fStringsForm = STRINGS_UTF16_V1;
+ } else {
+ bundle->fStringsForm = STRINGS_UTF16_V2;
+ }
+
return bundle;
}
@@ -768,7 +1139,10 @@ static void array_close(struct SResource *array) {
}
static void string_close(struct SResource *string) {
- if (string->u.fString.fChars != NULL) {
+ if (string->u.fString.fChars != NULL &&
+ string->u.fString.fChars != &gEmptyString &&
+ string->u.fString.fSame == NULL
+ ) {
uprv_free(string->u.fString.fChars);
string->u.fString.fChars =NULL;
}
@@ -821,7 +1195,6 @@ void res_close(struct SResource *res) {
array_close(res);
break;
case URES_TABLE:
- case URES_TABLE32:
table_close(res);
break;
default:
@@ -835,19 +1208,20 @@ void res_close(struct SResource *res) {
}
void bundle_close(struct SRBRoot *bundle, UErrorCode *status) {
- if (bundle->fRoot != NULL) {
- res_close(bundle->fRoot);
- }
-
- if (bundle->fLocale != NULL) {
- uprv_free(bundle->fLocale);
- }
+ res_close(bundle->fRoot);
+ uprv_free(bundle->fLocale);
+ uprv_free(bundle->fKeys);
+ uprv_free(bundle->fKeyMap);
+ uhash_close(bundle->fStringSet);
+ uprv_free(bundle->f16BitUnits);
+ uprv_free(bundle);
+}
- if (bundle->fKeys != NULL) {
- uprv_free(bundle->fKeys);
+void bundle_closeString(struct SRBRoot *bundle, struct SResource *string) {
+ if (bundle->fStringSet != NULL) {
+ uhash_remove(bundle->fStringSet, string);
}
-
- uprv_free(bundle);
+ string_close(string);
}
/* Adding Functions */
@@ -855,6 +1229,7 @@ void table_add(struct SResource *table, struct SResource *res, int linenumber, U
struct SResource *current = NULL;
struct SResource *prev = NULL;
struct SResTable *list;
+ const char *resKeyString;
if (U_FAILURE(*status)) {
return;
@@ -868,36 +1243,7 @@ void table_add(struct SResource *table, struct SResource *res, int linenumber, U
/* here we need to traverse the list */
list = &(table->u.fTable);
-
- if(table->fType == URES_TABLE && res->fKey > 0xffff) {
- /* this table straddles the 64k strings boundary, update to a table32 */
- table->fType = URES_TABLE32;
-
- /*
- * increase the size because count and each string offset
- * increase from uint16_t to int32_t
- */
- table->fSize += (1 + list->fCount) * 2;
- }
-
++(list->fCount);
- if(list->fCount > (uint32_t)list->fRoot->fMaxTableLength) {
- list->fRoot->fMaxTableLength = list->fCount;
- }
-
- /*
- * URES_TABLE: 6 bytes = 1 uint16_t key string offset + 1 uint32_t Resource
- * URES_TABLE32: 8 bytes = 1 int32_t key string offset + 1 uint32_t Resource
- */
- table->fSize += table->fType == URES_TABLE ? 6 : 8;
-
- table->u.fTable.fChildrenSize += res->fSize + calcPadding(res->fSize);
-
- if (res->fType == URES_TABLE || res->fType == URES_TABLE32) {
- table->u.fTable.fChildrenSize += res->u.fTable.fChildrenSize;
- } else if (res->fType == URES_ARRAY) {
- table->u.fTable.fChildrenSize += res->u.fArray.fChildrenSize;
- }
/* is list still empty? */
if (list->fFirst == NULL) {
@@ -906,13 +1252,26 @@ void table_add(struct SResource *table, struct SResource *res, int linenumber, U
return;
}
+ resKeyString = list->fRoot->fKeys + res->fKey;
+
current = list->fFirst;
while (current != NULL) {
- if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), ((list->fRoot->fKeys) + (res->fKey))) < 0) {
+ const char *currentKeyString = list->fRoot->fKeys + current->fKey;
+ int diff;
+ /*
+ * formatVersion 1: compare key strings in native-charset order
+ * formatVersion 2 and up: compare key strings in ASCII order
+ */
+ if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) {
+ diff = uprv_strcmp(currentKeyString, resKeyString);
+ } else {
+ diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString);
+ }
+ if (diff < 0) {
prev = current;
current = current->fNext;
- } else if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), ((list->fRoot->fKeys) + (res->fKey))) > 0) {
+ } else if (diff > 0) {
/* we're either in front of list, or in middle */
if (prev == NULL) {
/* front of the list */
@@ -926,7 +1285,7 @@ void table_add(struct SResource *table, struct SResource *res, int linenumber, U
return;
} else {
/* Key already exists! ERROR! */
- error(linenumber, "duplicate key '%s' in table, first appeared at line %d", list->fRoot->fKeys + current->fKey, current->line);
+ error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line);
*status = U_UNSUPPORTED_ERROR;
return;
}
@@ -951,15 +1310,6 @@ void array_add(struct SResource *array, struct SResource *res, UErrorCode *statu
}
(array->u.fArray.fCount)++;
-
- array->fSize += sizeof(uint32_t);
- array->u.fArray.fChildrenSize += res->fSize + calcPadding(res->fSize);
-
- if (res->fType == URES_TABLE || res->fType == URES_TABLE32) {
- array->u.fArray.fChildrenSize += res->u.fTable.fChildrenSize;
- } else if (res->fType == URES_ARRAY) {
- array->u.fArray.fChildrenSize += res->u.fArray.fChildrenSize;
- }
}
void intvector_add(struct SResource *intvector, int32_t value, UErrorCode *status) {
@@ -969,8 +1319,6 @@ void intvector_add(struct SResource *intvector, int32_t value, UErrorCode *statu
*(intvector->u.fIntVector.fArray + intvector->u.fIntVector.fCount) = value;
intvector->u.fIntVector.fCount++;
-
- intvector->fSize += sizeof(uint32_t);
}
/* Misc Functions */
@@ -997,25 +1345,47 @@ void bundle_setlocale(struct SRBRoot *bundle, UChar *locale, UErrorCode *status)
}
+static const char *
+getKeyString(const struct SRBRoot *bundle, int32_t key) {
+ if (key < 0) {
+ return bundle->fPoolBundleKeys + (key & 0x7fffffff);
+ } else {
+ return bundle->fKeys + key;
+ }
+}
+
+const char *
+res_getKeyString(const struct SRBRoot *bundle, const struct SResource *res, char temp[8]) {
+ if (res->fKey == -1) {
+ return NULL;
+ }
+ return getKeyString(bundle, res->fKey);
+}
+
+const char *
+bundle_getKeyBytes(struct SRBRoot *bundle, int32_t *pLength) {
+ *pLength = bundle->fKeysTop - bundle->fKeysBottom;
+ return bundle->fKeys + bundle->fKeysBottom;
+}
int32_t
-bundle_addtag(struct SRBRoot *bundle, const char *tag, UErrorCode *status) {
- int32_t keypos, length;
+bundle_addKeyBytes(struct SRBRoot *bundle, const char *keyBytes, int32_t length, UErrorCode *status) {
+ int32_t keypos;
if (U_FAILURE(*status)) {
return -1;
}
-
- if (tag == NULL) {
- /* do not set an error: the root table has a NULL tag */
+ if (length < 0 || (keyBytes == NULL && length != 0)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
return -1;
}
+ if (length == 0) {
+ return bundle->fKeysTop;
+ }
- keypos = bundle->fKeyPoint;
-
- bundle->fKeyPoint += length = (int32_t) (uprv_strlen(tag) + 1);
-
- if (bundle->fKeyPoint >= bundle->fKeysCapacity) {
+ keypos = bundle->fKeysTop;
+ bundle->fKeysTop += length;
+ if (bundle->fKeysTop >= bundle->fKeysCapacity) {
/* overflow - resize the keys buffer */
bundle->fKeysCapacity += KEY_SPACE_SIZE;
bundle->fKeys = uprv_realloc(bundle->fKeys, bundle->fKeysCapacity);
@@ -1025,7 +1395,372 @@ bundle_addtag(struct SRBRoot *bundle, const char *tag, UErrorCode *status) {
}
}
- uprv_memcpy(bundle->fKeys + keypos, tag, length);
+ uprv_memcpy(bundle->fKeys + keypos, keyBytes, length);
+
+ return keypos;
+}
+
+int32_t
+bundle_addtag(struct SRBRoot *bundle, const char *tag, UErrorCode *status) {
+ int32_t keypos;
+
+ if (U_FAILURE(*status)) {
+ return -1;
+ }
+
+ if (tag == NULL) {
+ /* no error: the root table and array items have no keys */
+ return -1;
+ }
+ keypos = bundle_addKeyBytes(bundle, tag, (int32_t)(uprv_strlen(tag) + 1), status);
+ if (U_SUCCESS(*status)) {
+ ++bundle->fKeysCount;
+ }
return keypos;
}
+
+static int32_t
+compareInt32(int32_t lPos, int32_t rPos) {
+ /*
+ * Compare possibly-negative key offsets. Don't just return lPos - rPos
+ * because that is prone to negative-integer underflows.
+ */
+ if (lPos < rPos) {
+ return -1;
+ } else if (lPos > rPos) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int32_t U_CALLCONV
+compareKeySuffixes(const void *context, const void *l, const void *r) {
+ const struct SRBRoot *bundle=(const struct SRBRoot *)context;
+ int32_t lPos = ((const KeyMapEntry *)l)->oldpos;
+ int32_t rPos = ((const KeyMapEntry *)r)->oldpos;
+ const char *lStart = getKeyString(bundle, lPos);
+ const char *lLimit = lStart;
+ const char *rStart = getKeyString(bundle, rPos);
+ const char *rLimit = rStart;
+ int32_t diff;
+ while (*lLimit != 0) { ++lLimit; }
+ while (*rLimit != 0) { ++rLimit; }
+ /* compare keys in reverse character order */
+ while (lStart < lLimit && rStart < rLimit) {
+ diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit;
+ if (diff != 0) {
+ return diff;
+ }
+ }
+ /* sort equal suffixes by descending key length */
+ diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart);
+ if (diff != 0) {
+ return diff;
+ }
+ /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */
+ return compareInt32(lPos, rPos);
+}
+
+static int32_t U_CALLCONV
+compareKeyNewpos(const void *context, const void *l, const void *r) {
+ return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos);
+}
+
+static int32_t U_CALLCONV
+compareKeyOldpos(const void *context, const void *l, const void *r) {
+ return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
+}
+
+void
+bundle_compactKeys(struct SRBRoot *bundle, UErrorCode *status) {
+ KeyMapEntry *map;
+ char *keys;
+ int32_t i;
+ int32_t keysCount = bundle->fPoolBundleKeysCount + bundle->fKeysCount;
+ if (U_FAILURE(*status) || bundle->fKeysCount == 0 || bundle->fKeyMap != NULL) {
+ return;
+ }
+ map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry));
+ if (map == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ keys = (char *)bundle->fPoolBundleKeys;
+ for (i = 0; i < bundle->fPoolBundleKeysCount; ++i) {
+ map[i].oldpos =
+ (int32_t)(keys - bundle->fPoolBundleKeys) | 0x80000000; /* negative oldpos */
+ map[i].newpos = 0;
+ while (*keys != 0) { ++keys; } /* skip the key */
+ ++keys; /* skip the NUL */
+ }
+ keys = bundle->fKeys + bundle->fKeysBottom;
+ for (; i < keysCount; ++i) {
+ map[i].oldpos = (int32_t)(keys - bundle->fKeys);
+ map[i].newpos = 0;
+ while (*keys != 0) { ++keys; } /* skip the key */
+ ++keys; /* skip the NUL */
+ }
+ /* Sort the keys so that each one is immediately followed by all of its suffixes. */
+ uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
+ compareKeySuffixes, bundle, FALSE, status);
+ /*
+ * Make suffixes point into earlier, longer strings that contain them
+ * and mark the old, now unused suffix bytes as deleted.
+ */
+ if (U_SUCCESS(*status)) {
+ keys = bundle->fKeys;
+ for (i = 0; i < keysCount;) {
+ /*
+ * This key is not a suffix of the previous one;
+ * keep this one and delete the following ones that are
+ * suffixes of this one.
+ */
+ const char *key;
+ const char *keyLimit;
+ int32_t j = i + 1;
+ map[i].newpos = map[i].oldpos;
+ if (j < keysCount && map[j].oldpos < 0) {
+ /* Key string from the pool bundle, do not delete. */
+ i = j;
+ continue;
+ }
+ key = getKeyString(bundle, map[i].oldpos);
+ for (keyLimit = key; *keyLimit != 0; ++keyLimit) {}
+ for (; j < keysCount && map[j].oldpos >= 0; ++j) {
+ const char *k;
+ char *suffix;
+ const char *suffixLimit;
+ int32_t offset;
+ suffix = keys + map[j].oldpos;
+ for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {}
+ offset = (int32_t)(keyLimit - key) - (suffixLimit - suffix);
+ if (offset < 0) {
+ break; /* suffix cannot be longer than the original */
+ }
+ /* Is it a suffix of the earlier, longer key? */
+ for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
+ if (suffix == suffixLimit && *k == *suffixLimit) {
+ map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */
+ /* mark the suffix as deleted */
+ while (*suffix != 0) { *suffix++ = 1; }
+ *suffix = 1;
+ } else {
+ break; /* not a suffix, restart from here */
+ }
+ }
+ i = j;
+ }
+ /*
+ * Re-sort by newpos, then modify the key characters array in-place
+ * to squeeze out unused bytes, and readjust the newpos offsets.
+ */
+ uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
+ compareKeyNewpos, NULL, FALSE, status);
+ if (U_SUCCESS(*status)) {
+ int32_t oldpos, newpos, limit;
+ oldpos = newpos = bundle->fKeysBottom;
+ limit = bundle->fKeysTop;
+ /* skip key offsets that point into the pool bundle rather than this new bundle */
+ for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {}
+ if (i < keysCount) {
+ while (oldpos < limit) {
+ if (keys[oldpos] == 1) {
+ ++oldpos; /* skip unused bytes */
+ } else {
+ /* adjust the new offsets for keys starting here */
+ while (i < keysCount && map[i].newpos == oldpos) {
+ map[i++].newpos = newpos;
+ }
+ /* move the key characters to their new position */
+ keys[newpos++] = keys[oldpos++];
+ }
+ }
+ assert(i == keysCount);
+ }
+ bundle->fKeysTop = newpos;
+ /* Re-sort once more, by old offsets for binary searching. */
+ uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
+ compareKeyOldpos, NULL, FALSE, status);
+ if (U_SUCCESS(*status)) {
+ /* key size reduction by limit - newpos */
+ bundle->fKeyMap = map;
+ map = NULL;
+ }
+ }
+ }
+ uprv_free(map);
+}
+
+static int32_t U_CALLCONV
+compareStringSuffixes(const void *context, const void *l, const void *r) {
+ struct SResource *left = *((struct SResource **)l);
+ struct SResource *right = *((struct SResource **)r);
+ const UChar *lStart = left->u.fString.fChars;
+ const UChar *lLimit = lStart + left->u.fString.fLength;
+ const UChar *rStart = right->u.fString.fChars;
+ const UChar *rLimit = rStart + right->u.fString.fLength;
+ int32_t diff;
+ /* compare keys in reverse character order */
+ while (lStart < lLimit && rStart < rLimit) {
+ diff = (int32_t)*--lLimit - (int32_t)*--rLimit;
+ if (diff != 0) {
+ return diff;
+ }
+ }
+ /* sort equal suffixes by descending string length */
+ return right->u.fString.fLength - left->u.fString.fLength;
+}
+
+static int32_t U_CALLCONV
+compareStringLengths(const void *context, const void *l, const void *r) {
+ struct SResource *left = *((struct SResource **)l);
+ struct SResource *right = *((struct SResource **)r);
+ int32_t diff;
+ /* Make "is suffix of another string" compare greater than a non-suffix. */
+ diff = (int)(left->u.fString.fSame != NULL) - (int)(right->u.fString.fSame != NULL);
+ if (diff != 0) {
+ return diff;
+ }
+ /* sort by ascending string length */
+ return left->u.fString.fLength - right->u.fString.fLength;
+}
+
+static int32_t
+string_writeUTF16v2(struct SRBRoot *bundle, struct SResource *res, int32_t utf16Length) {
+ int32_t length = res->u.fString.fLength;
+ res->fRes = URES_MAKE_RESOURCE(URES_STRING_V2, utf16Length);
+ res->fWritten = TRUE;
+ switch(res->u.fString.fNumCharsForLength) {
+ case 0:
+ break;
+ case 1:
+ bundle->f16BitUnits[utf16Length++] = (uint16_t)(0xdc00 + length);
+ break;
+ case 2:
+ bundle->f16BitUnits[utf16Length] = (uint16_t)(0xdfef + (length >> 16));
+ bundle->f16BitUnits[utf16Length + 1] = (uint16_t)length;
+ utf16Length += 2;
+ break;
+ case 3:
+ bundle->f16BitUnits[utf16Length] = 0xdfff;
+ bundle->f16BitUnits[utf16Length + 1] = (uint16_t)(length >> 16);
+ bundle->f16BitUnits[utf16Length + 2] = (uint16_t)length;
+ utf16Length += 3;
+ break;
+ default:
+ break; /* will not occur */
+ }
+ u_memcpy(bundle->f16BitUnits + utf16Length, res->u.fString.fChars, length + 1);
+ return utf16Length + length + 1;
+}
+
+static void
+bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ switch(bundle->fStringsForm) {
+ case STRINGS_UTF16_V2:
+ if (bundle->f16BitUnitsLength > 0) {
+ struct SResource **array;
+ int32_t count = uhash_count(bundle->fStringSet);
+ int32_t i, pos;
+ /*
+ * Allocate enough space for the initial NUL and the UTF-16 v2 strings,
+ * and some extra for URES_TABLE16 and URES_ARRAY16 values.
+ * Round down to an even number.
+ */
+ int32_t utf16Length = (bundle->f16BitUnitsLength + 20000) & ~1;
+ bundle->f16BitUnits = (UChar *)uprv_malloc(utf16Length * U_SIZEOF_UCHAR);
+ array = (struct SResource **)uprv_malloc(count * sizeof(struct SResource **));
+ if (bundle->f16BitUnits == NULL || array == NULL) {
+ uprv_free(bundle->f16BitUnits);
+ bundle->f16BitUnits = NULL;
+ uprv_free(array);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ bundle->f16BitUnitsCapacity = utf16Length;
+ /* insert the initial NUL */
+ bundle->f16BitUnits[0] = 0;
+ utf16Length = 1;
+ ++bundle->f16BitUnitsLength;
+ for (pos = -1, i = 0; i < count; ++i) {
+ array[i] = (struct SResource *)uhash_nextElement(bundle->fStringSet, &pos)->key.pointer;
+ }
+ /* Sort the strings so that each one is immediately followed by all of its suffixes. */
+ uprv_sortArray(array, count, (int32_t)sizeof(struct SResource **),
+ compareStringSuffixes, NULL, FALSE, status);
+ /*
+ * Make suffixes point into earlier, longer strings that contain them.
+ * Temporarily use fSame and fSuffixOffset for suffix strings to
+ * refer to the remaining ones.
+ */
+ if (U_SUCCESS(*status)) {
+ for (i = 0; i < count;) {
+ /*
+ * This string is not a suffix of the previous one;
+ * write this one and subsume the following ones that are
+ * suffixes of this one.
+ */
+ struct SResource *res = array[i];
+ const UChar *strLimit = res->u.fString.fChars + res->u.fString.fLength;
+ int32_t j;
+ for (j = i + 1; j < count; ++j) {
+ struct SResource *suffixRes = array[j];
+ const UChar *s;
+ const UChar *suffix = suffixRes->u.fString.fChars;
+ const UChar *suffixLimit = suffix + suffixRes->u.fString.fLength;
+ int32_t offset = res->u.fString.fLength - suffixRes->u.fString.fLength;
+ if (offset < 0) {
+ break; /* suffix cannot be longer than the original */
+ }
+ /* Is it a suffix of the earlier, longer key? */
+ for (s = strLimit; suffix < suffixLimit && *--s == *--suffixLimit;) {}
+ if (suffix == suffixLimit && *s == *suffixLimit) {
+ if (suffixRes->u.fString.fNumCharsForLength == 0) {
+ /* yes, point to the earlier string */
+ suffixRes->u.fString.fSame = res;
+ suffixRes->u.fString.fSuffixOffset = offset;
+ } else {
+ /* write the suffix by itself if we need explicit length */
+ }
+ } else {
+ break; /* not a suffix, restart from here */
+ }
+ }
+ i = j;
+ }
+ }
+ /*
+ * Re-sort the strings by ascending length (except suffixes last)
+ * to optimize for URES_TABLE16 and URES_ARRAY16:
+ * Keep as many as possible within reach of 16-bit offsets.
+ */
+ uprv_sortArray(array, count, (int32_t)sizeof(struct SResource **),
+ compareStringLengths, NULL, FALSE, status);
+ if (U_SUCCESS(*status)) {
+ /* Write the non-suffix strings. */
+ for (i = 0; i < count && array[i]->u.fString.fSame == NULL; ++i) {
+ utf16Length = string_writeUTF16v2(bundle, array[i], utf16Length);
+ }
+ /* Write the suffix strings. Make each point to the real string. */
+ for (; i < count; ++i) {
+ struct SResource *res = array[i];
+ struct SResource *same = res->u.fString.fSame;
+ res->fRes = same->fRes + same->u.fString.fNumCharsForLength + res->u.fString.fSuffixOffset;
+ res->u.fString.fSame = NULL;
+ res->fWritten = TRUE;
+ }
+ }
+ assert(utf16Length <= bundle->f16BitUnitsLength);
+ bundle->f16BitUnitsLength = utf16Length;
+ uprv_free(array);
+ }
+ break;
+ default:
+ break;
+ }
+}
diff --git a/tools/genrb/reslist.h b/tools/genrb/reslist.h
index f5f533da..52737afd 100644
--- a/tools/genrb/reslist.h
+++ b/tools/genrb/reslist.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2000-2008, International Business Machines
+* Copyright (C) 2000-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -29,22 +29,43 @@
#include "cstring.h"
#include "unewdata.h"
#include "ustr.h"
+#include "uhash.h"
U_CDECL_BEGIN
+typedef struct KeyMapEntry {
+ int32_t oldpos, newpos;
+} KeyMapEntry;
+
/* Resource bundle root table */
struct SRBRoot {
- char *fLocale;
- int32_t fKeyPoint;
- char *fKeys;
- int32_t fKeysCapacity;
- int32_t fCount;
struct SResource *fRoot;
+ char *fLocale;
+ int32_t fIndexLength;
int32_t fMaxTableLength;
UBool noFallback; /* see URES_ATT_NO_FALLBACK */
+ int8_t fStringsForm; /* default STRINGS_UTF16_V1 */
+ UBool fIsPoolBundle;
+
+ char *fKeys;
+ KeyMapEntry *fKeyMap;
+ int32_t fKeysBottom, fKeysTop;
+ int32_t fKeysCapacity;
+ int32_t fKeysCount;
+ int32_t fLocalKeyLimit; /* key offset < limit fits into URES_TABLE */
+
+ UHashtable *fStringSet;
+ uint16_t *f16BitUnits;
+ int32_t f16BitUnitsCapacity;
+ int32_t f16BitUnitsLength;
+
+ const char *fPoolBundleKeys;
+ int32_t fPoolBundleKeysLength;
+ int32_t fPoolBundleKeysCount;
+ int32_t fPoolChecksum;
};
-struct SRBRoot *bundle_open(const struct UString* comment, UErrorCode *status);
+struct SRBRoot *bundle_open(const struct UString* comment, UBool isPoolBundle, UErrorCode *status);
void bundle_write(struct SRBRoot *bundle, const char *outputDir, const char *outputPkg, char *writtenFilename, int writtenFilenameLen, UErrorCode *status);
/* write a java resource file */
@@ -64,8 +85,16 @@ void bundle_close(struct SRBRoot *bundle, UErrorCode *status);
void bundle_setlocale(struct SRBRoot *bundle, UChar *locale, UErrorCode *status);
int32_t bundle_addtag(struct SRBRoot *bundle, const char *tag, UErrorCode *status);
+const char *
+bundle_getKeyBytes(struct SRBRoot *bundle, int32_t *pLength);
+
+int32_t
+bundle_addKeyBytes(struct SRBRoot *bundle, const char *keyBytes, int32_t length, UErrorCode *status);
+
+void
+bundle_compactKeys(struct SRBRoot *bundle, UErrorCode *status);
+
/* Various resource types */
-struct SResource* res_open(const struct UString* comment, UErrorCode* status);
/*
* Return a unique pointer to a dummy object,
@@ -76,17 +105,16 @@ struct SResource* res_none(void);
struct SResTable {
uint32_t fCount;
- uint32_t fChildrenSize;
+ int8_t fType; /* determined by table_write16() for table_preWrite() & table_write() */
struct SResource *fFirst;
struct SRBRoot *fRoot;
};
-struct SResource* table_open(struct SRBRoot *bundle, char *tag, const struct UString* comment, UErrorCode *status);
+struct SResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status);
void table_add(struct SResource *table, struct SResource *res, int linenumber, UErrorCode *status);
struct SResArray {
uint32_t fCount;
- uint32_t fChildrenSize;
struct SResource *fFirst;
struct SResource *fLast;
};
@@ -95,12 +123,22 @@ struct SResource* array_open(struct SRBRoot *bundle, const char *tag, const stru
void array_add(struct SResource *array, struct SResource *res, UErrorCode *status);
struct SResString {
- uint32_t fLength;
+ struct SResource *fSame; /* used for duplicates */
UChar *fChars;
+ int32_t fLength;
+ int32_t fSuffixOffset; /* this string is a suffix of fSame at this offset */
+ int8_t fNumCharsForLength;
};
struct SResource *string_open(struct SRBRoot *bundle, char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status);
+/**
+ * Remove a string from a bundle and close (delete) it.
+ * The string must not have been added to a table or array yet.
+ * This function only undoes what string_open() did.
+ */
+void bundle_closeString(struct SRBRoot *bundle, struct SResource *string);
+
struct SResource *alias_open(struct SRBRoot *bundle, char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status);
struct SResIntVector {
@@ -128,10 +166,11 @@ struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t len
/* Resource place holder */
struct SResource {
- UResType fType;
- int32_t fKey;
- uint32_t fSize; /* Size in bytes outside the header part */
- int line; /* used internally to report duplicate keys in tables */
+ int8_t fType; /* nominal type: fRes (when != 0xffffffff) may use subtype */
+ UBool fWritten; /* res_write() can exit early */
+ uint32_t fRes; /* resource item word; 0xffffffff if not known yet */
+ int32_t fKey; /* Index into bundle->fKeys; -1 if no key. */
+ int line; /* used internally to report duplicate keys in tables */
struct SResource *fNext; /*This is for internal chaining while building*/
struct UString fComment;
union {
@@ -144,9 +183,20 @@ struct SResource {
} u;
};
+const char *
+res_getKeyString(const struct SRBRoot *bundle, const struct SResource *res, char temp[8]);
+
void res_close(struct SResource *res);
+
void setIncludeCopyright(UBool val);
UBool getIncludeCopyright(void);
+void setFormatVersion(int32_t formatVersion);
+
+void setUsePoolBundle(UBool use);
+
+/* in wrtxml.cpp */
+uint32_t computeCRC(char *ptr, uint32_t len, uint32_t lastcrc);
+
U_CDECL_END
#endif /* #ifndef RESLIST_H */
diff --git a/tools/genrb/wrtjava.c b/tools/genrb/wrtjava.c
index 0ce4e61a..948fc8dd 100644
--- a/tools/genrb/wrtjava.c
+++ b/tools/genrb/wrtjava.c
@@ -306,10 +306,12 @@ str_write_java( uint16_t* src, int32_t srcLen, UBool printEndLine, UErrorCode *s
/* Writing Functions */
static void
string_write_java(struct SResource *res,UErrorCode *status) {
+ char resKeyBuffer[8];
+ const char *resname = res_getKeyString(srBundle, res, resKeyBuffer);
str_write_java(res->u.fString.fChars,res->u.fString.fLength,TRUE,status);
- if(res->fKey > 0 && uprv_strcmp(srBundle->fKeys+res->fKey,"Rule")==0)
+ if(resname != NULL && uprv_strcmp(resname,"Rule")==0)
{
UChar* buf = (UChar*) uprv_malloc(sizeof(UChar)*res->u.fString.fLength);
uprv_memcpy(buf,res->u.fString.fChars,res->u.fString.fLength);
@@ -388,12 +390,14 @@ intvector_write_java( struct SResource *res, UErrorCode *status) {
const char* intArr = "new int[] {\n";
/* const char* intC = "new Integer("; */
const char* stringArr = "new String[]{\n";
+ char resKeyBuffer[8];
+ const char *resname = res_getKeyString(srBundle, res, resKeyBuffer);
char buf[100];
int len =0;
buf[0]=0;
write_tabs(out);
- if(res->fKey > 0 && uprv_strcmp(srBundle->fKeys+res->fKey,"DateTimeElements")==0){
+ if(resname != NULL && uprv_strcmp(resname,"DateTimeElements")==0){
T_FileStream_write(out, stringArr, (int32_t)uprv_strlen(stringArr));
tabCount++;
for(i = 0; i<res->u.fIntVector.fCount; i++) {
@@ -528,6 +532,9 @@ table_write_java(struct SResource *res, UErrorCode *status) {
while (current != NULL) {
+ char currentKeyBuffer[8];
+ const char *currentKeyString = res_getKeyString(srBundle, current, currentKeyBuffer);
+
assert(i < res->u.fTable.fCount);
write_tabs(out);
@@ -538,10 +545,10 @@ table_write_java(struct SResource *res, UErrorCode *status) {
allStrings=FALSE;
write_tabs(out);
- if(current->fKey > 0){
+ if(currentKeyString != NULL) {
T_FileStream_write(out, "\"", 1);
- T_FileStream_write(out, srBundle->fKeys+current->fKey,
- (int32_t)uprv_strlen(srBundle->fKeys+current->fKey));
+ T_FileStream_write(out, currentKeyString,
+ (int32_t)uprv_strlen(currentKeyString));
T_FileStream_write(out, "\",\n", 2);
T_FileStream_write(out, "\n", 1);
@@ -602,7 +609,6 @@ res_write_java(struct SResource *res,UErrorCode *status) {
array_write_java (res, status);
return;
case URES_TABLE:
- case URES_TABLE32:
table_write_java (res, status);
return;
default:
diff --git a/tools/genrb/wrtxml.cpp b/tools/genrb/wrtxml.cpp
index 0956f0d2..52c04ea6 100644
--- a/tools/genrb/wrtxml.cpp
+++ b/tools/genrb/wrtxml.cpp
@@ -90,7 +90,7 @@ static void write_tabs(FileStream* os){
}
/*get ID for each element. ID is globally unique.*/
-static char* getID(const char* id, char* curKey, char* result) {
+static char* getID(const char* id, const char* curKey, char* result) {
if(curKey == NULL) {
result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
@@ -124,7 +124,7 @@ static char* getID(const char* id, char* curKey, char* result) {
* conversion is not portable across platforms with different endianess.
*/
-static uint32_t computeCRC(char *ptr, uint32_t len, uint32_t lastcrc){
+uint32_t computeCRC(char *ptr, uint32_t len, uint32_t lastcrc){
int32_t crc;
uint32_t temp1;
uint32_t temp2;
@@ -533,13 +533,14 @@ printComments(struct UString *src, const char *resName, UBool printTranslate, UE
*/
static char *printContainer(struct SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
{
- char *resname = NULL;
+ char resKeyBuffer[8];
+ const char *resname = NULL;
char *sid = NULL;
write_tabs(out);
- if (res->fKey >= 0 && uprv_strcmp(srBundle->fKeys + res->fKey, "") != 0) {
- resname = srBundle->fKeys + res->fKey;
+ resname = res_getKeyString(srBundle, res, resKeyBuffer);
+ if (resname != NULL && *resname != 0) {
sid = getID(id, resname, sid);
} else {
sid = getID(id, NULL, sid);
@@ -979,7 +980,6 @@ res_write_xml(struct SResource *res, const char* id, const char* language, UBoo
return;
case URES_TABLE:
- case URES_TABLE32:
table_write_xml (res, id, language, isTopLevel, status);
return;
diff --git a/tools/genren/Makefile b/tools/genren/Makefile
index 49b9a30d..a2c33182 100644
--- a/tools/genren/Makefile
+++ b/tools/genren/Makefile
@@ -1,6 +1,6 @@
#******************************************************************************
#
-# Copyright (C) 2002-2008, International Business Machines
+# Copyright (C) 2002-2009, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
@@ -11,7 +11,7 @@ top_srcdir = $(TOP)
top_builddir = $(TOP)
--include $(TOP)/icudefs.mk
+include $(TOP)/icudefs.mk
ICUDIR=ICUunrenamed
#SO=so
diff --git a/tools/genren/genren.pl b/tools/genren/genren.pl
index 028668a1..e52473ca 100755
--- a/tools/genren/genren.pl
+++ b/tools/genren/genren.pl
@@ -1,7 +1,7 @@
#!/usr/bin/perl
#*
#*******************************************************************************
-#* Copyright (C) 2001-2009, International Business Machines
+#* Copyright (C) 2001-2010, International Business Machines
#* Corporation and others. All Rights Reserved.
#*******************************************************************************
#*
@@ -25,7 +25,7 @@ $path = substr($0, 0, rindex($0, "/")+1)."../../common/unicode/uversion.h";
$nmopts = '-Cg -f s';
$post = '';
-$mode = 'LINUX';
+$mode = 'POSIX';
(-e $path) || die "Cannot find uversion.h";
@@ -68,7 +68,6 @@ $HEADERDEF =~ s/\./_/;
#We will print our copyright here + warnings
-
$YEAR = strftime "%Y",localtime;
print HEADER <<"EndOfHeaderComment";
@@ -100,9 +99,33 @@ print HEADER <<"EndOfHeaderComment";
/* #define U_DISABLE_RENAMING 1 */
#if !U_DISABLE_RENAMING
+
+/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give
+ the platform a chance to define it first.
+ Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined.
+ */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/umachine.h"
+#endif
+
+/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/uvernum.h"
+#endif
+
+/* Error out before the following defines cause very strange and unexpected code breakage */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used.
+#endif
+
EndOfHeaderComment
+$fileCount = 0;
+$itemCount = 0;
+$symbolCount = 0;
+
for(;@ARGV; shift(@ARGV)) {
+ $fileCount++;
@NMRESULT = `nm $nmopts $ARGV[0] $post`;
if($?) {
warn "Couldn't do 'nm' for $ARGV[0], continuing...\n";
@@ -114,6 +137,7 @@ for(;@ARGV; shift(@ARGV)) {
# splice @NMRESULT, 0, 10;
}
foreach (@NMRESULT) { # Process every line of result and stuff it in $_
+ $itemCount++;
if($mode =~ /POSIX/) {
($_, $address, $type) = split(/\|/);
} elsif ($mode =~ /Mach-O/) {
@@ -129,8 +153,9 @@ for(;@ARGV; shift(@ARGV)) {
if(!($type =~ /[UAwW?]/)) {
if(/@@/) { # These would be imports
&verbose( "Import: $_ \"$type\"\n");
- } elsif (/::/) { # C++ methods, stuff class name in associative array
&verbose( "C++ method: $_\n");
+ } elsif (/^[^\(]*::/) { # C++ methods, stuff class name in associative array
+ ## DON'T match ... ( foo::bar ... want :: to be to the left of paren
## icu_2_0::CharString::~CharString(void) -> CharString
@CppName = split(/::/); ## remove scope stuff
if(@CppName>1) {
@@ -144,22 +169,33 @@ for(;@ARGV; shift(@ARGV)) {
} elsif($CppName[0] =~ /^~/) {
&verbose ("Skipping C++ destructor: $_\n");
} else {
+ &verbose( " Class: '$CppName[0]': $_ \n");
$CppClasses{$CppName[0]}++;
+ $symbolCount++;
}
+ } elsif ( my ($cfn) = m/^([A-Za-z0-9_]*)\(.*/ ) {
+ &verbose ( "$ARGV[0]: got global C++ function $cfn with '$_'\n" );
+ $CFuncs{$cfn}++;
+ $symbolCount++;
} elsif ( /\(/) { # These are strange functions
- print STDERR "$_\n";
+ print STDERR "$ARGV[0]: Not sure what to do with '$_'\n";
+ } elsif ( /^_init/ ) {
+ &verbose( "$ARGV[0]: Skipped initializer $_\n" );
+ } elsif ( /^_fini/ ) {
+ &verbose( "$ARGV[0]: Skipped finilizer $_\n" );
} elsif ( /icu_/) {
- print STDERR "Skipped strange mangled function $_\n";
+ print STDERR "$ARGV[0]: Skipped strange mangled function $_\n";
} elsif ( /^vtable for /) {
- print STDERR "Skipped vtable $_\n";
+ print STDERR "$ARGV[0]: Skipped vtable $_\n";
} elsif ( /^typeinfo for /) {
- print STDERR "Skipped typeinfo $_\n";
+ print STDERR "$ARGV[0]: Skipped typeinfo $_\n";
} elsif ( /operator\+/ ) {
- print STDERR "Skipped ignored function $_\n";
+ print STDERR "$ARGV[0]: Skipped ignored function $_\n";
} else { # This is regular C function
&verbose( "C func: $_\n");
@funcname = split(/[\(\s+]/);
$CFuncs{$funcname[0]}++;
+ $symbolCount++;
}
} else {
&verbose( "Skipped: $_ $1\n");
@@ -167,16 +203,28 @@ for(;@ARGV; shift(@ARGV)) {
}
}
+if( $fileCount == 0 ) {
+ die "Error: $itemCount lines from $fileCount files processed, but $symbolCount symbols were found.\n";
+}
+
+if( $symbolCount == 0 ) {
+ die "Error: $itemCount lines from $fileCount files processed, but $symbolCount symbols were found.\n";
+}
+
+print " Loaded $symbolCount symbols from $itemCount lines in $fileCount files.\n";
+
print HEADER "\n/* C exports renaming data */\n\n";
foreach(sort keys(%CFuncs)) {
- print HEADER "#define $_ $_$U_ICU_VERSION_SUFFIX\n";
+ print HEADER "#define $_ U_ICU_ENTRY_POINT_RENAME($_)\n";
+# print HEADER "#define $_ $_$U_ICU_VERSION_SUFFIX\n";
}
+print HEADER "\n\n";
print HEADER "/* C++ class names renaming defines */\n\n";
print HEADER "#ifdef XP_CPLUSPLUS\n";
print HEADER "#if !U_HAVE_NAMESPACE\n\n";
foreach(sort keys(%CppClasses)) {
- print HEADER "#define $_ $_$U_ICU_VERSION_SUFFIX\n";
+ print HEADER "#define $_ U_ICU_ENTRY_POINT_RENAME($_)\n";
}
print HEADER "\n#endif\n";
print HEADER "#endif\n";
diff --git a/tools/gentest/gentest.c b/tools/gentest/gentest.c
index 4be795f2..cdb4ea60 100644
--- a/tools/gentest/gentest.c
+++ b/tools/gentest/gentest.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2009, International Business Machines
+* Copyright (C) 1999-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -160,8 +160,10 @@ outputJavaStuff(const char* progname, const char *outputDir) {
return 1;
}
- fprintf(out, "/** Copyright (C) %d, International Business Machines Corporation and Others. All Rights Reserved. **/\n\n", year);
- fprintf(out, "/* NOTE: this file is AUTOMATICALLY GENERATED by gentest. */\n\n");
+ fprintf(out, "/** Copyright (C) 2007-%d, International Business Machines Corporation and Others. All Rights Reserved. **/\n\n", year);
+ fprintf(out, "/* NOTE: this file is AUTOMATICALLY GENERATED by gentest.\n"
+ " * See: {ICU4C}/source/data/icu4j-readme.txt for more information. \n"
+ " **/\n\n");
fprintf(out, "package com.ibm.icu.dev.test.util;\n\n");
fprintf(out, "public class DebugUtilitiesData extends Object {\n");
fprintf(out, " public static final String ICU4C_VERSION=\"%s\";\n", U_ICU_VERSION);
@@ -196,14 +198,17 @@ outputJavaStuff(const char* progname, const char *outputDir) {
fprintf(out,
" ");
switch(t) {
+#if !UCONFIG_NO_FORMATTING
case UDBG_UCalendarDateFields:
case UDBG_UCalendarMonths:
- /* Temporary workaround for IS_LEAP_MOTH #6051 */
+ /* Temporary workaround for IS_LEAP_MONTH #6051 */
if (t == UDBG_UCalendarDateFields && i == 22) {
- fprintf(out, "com.ibm.icu.util.ChineseCalendar.%s, /* %d */", udbg_enumName((UDebugEnumType)t,i), i);
- } else
- fprintf(out, "com.ibm.icu.util.Calendar.%s, /* %d */", udbg_enumName((UDebugEnumType)t,i), i);
+ fprintf(out, "com.ibm.icu.util.ChineseCalendar.%s, /* %d */", udbg_enumName((UDebugEnumType)t,i), i);
+ } else {
+ fprintf(out, "com.ibm.icu.util.Calendar.%s, /* %d */", udbg_enumName((UDebugEnumType)t,i), i);
+ }
break;
+#endif
case UDBG_UDebugEnumType:
default:
fprintf(out,"%d, /* %s */", i, udbg_enumName((UDebugEnumType)t,i));
@@ -219,34 +224,4 @@ outputJavaStuff(const char* progname, const char *outputDir) {
return 0;
-#if 0
- int32_t count = udbg_enumCount((UDebugEnumType)t);
- if(count == -1) {
- fprintf(stderr,"%s: enumCount(%d) returned -1\n", progname, count);
- return 1;
- }
- for(t=0;t<=UDBG_ENUM_COUNT;t++) {
- int32_t count = udbg_enumCount((UDebugEnumType)t);
- if(count == -1) {
- fprintf(stderr,"%s: enumCount(%d) returned -1\n", progname, count);
- return 1;
- }
- for(i=0;i<=count;i++) {
- if(i<count) {
- if( i!=udbg_enumArrayValue((UDebugEnumType)t, i)) {
- fprintf(stderr, "%s: FAIL: udbg_enumArrayValue(%d,%d) returned %d, expected %d\n", progname, t, i, udbg_enumArrayValue((UDebugEnumType)t,i), i);
- return 1;
- }
- }
- fprintf(stderr, "%s: udbg_enumArrayValue(%d,%d) = %s, returned %d\n", progname, t, i,
- udbg_enumName((UDebugEnumType)t,i), udbg_enumArrayValue((UDebugEnumType)t,i));
- }
- if(udbg_enumExpectedCount((UDebugEnumType)t) != count) {
- fprintf(stderr, "%s: FAIL: udbg_enumExpectedCount(%d): %d, != UCAL_FIELD_COUNT=%d \n", progname, t, udbg_enumExpectedCount((UDebugEnumType)t), count);
- return 1;
- } else {
- fprintf(stderr, "%s: udbg_ucal_fieldCount: %d, UCAL_FIELD_COUNT=udbg_enumCount %d ", progname, udbg_enumExpectedCount((UDebugEnumType)t), count);
- }
- }
-#endif
}
diff --git a/tools/genuca/Makefile.in b/tools/genuca/Makefile.in
deleted file mode 100644
index 2f138e25..00000000
--- a/tools/genuca/Makefile.in
+++ /dev/null
@@ -1,96 +0,0 @@
-## Makefile.in for ICU - tools/genuca
-## Copyright (c) 1999-2008, International Business Machines Corporation and
-## others. All Rights Reserved.
-
-## Source directory information
-srcdir = @srcdir@
-top_srcdir = @top_srcdir@
-
-top_builddir = ../..
-
-include $(top_builddir)/icudefs.mk
-
-## Build directory information
-subdir = tools/genuca
-
-TARGET_STUB_NAME = genuca
-
-SECTION = 8
-
-MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
-
-
-## Extra files to remove for 'make clean'
-CLEANFILES = *~ $(DEPS) $(MAN_FILES)
-
-## Target information
-TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
-
-ifneq ($(top_builddir),$(top_srcdir))
-CPPFLAGS += -I$(top_builddir)/common
-endif
-CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil
-LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
-
-OBJECTS = genuca.o
-
-DEPS = $(OBJECTS:.o=.d)
-
-## List of phony targets
-.PHONY : all all-local install install-local clean clean-local \
-distclean distclean-local dist dist-local check check-local install-man
-
-## Clear suffix list
-.SUFFIXES :
-
-## List of standard targets
-all: all-local
-install: install-local
-clean: clean-local
-distclean : distclean-local
-dist: dist-local
-check: all check-local
-
-all-local: $(TARGET) $(MAN_FILES)
-
-install-local: all-local install-man
- $(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
- $(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
-
-install-man: $(MAN_FILES)
- $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
- $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
-
-dist-local:
-
-clean-local:
- test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
- $(RMV) $(TARGET) $(OBJECTS)
-
-distclean-local: clean-local
- $(RMV) Makefile
-
-check-local: all-local
-
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-$(TARGET) : $(OBJECTS)
- $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
- $(POST_BUILD_STEP)
-
-
-%.$(SECTION): $(srcdir)/%.$(SECTION).in
- cd $(top_builddir) \
- && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-
-ifeq (,$(MAKECMDGOALS))
--include $(DEPS)
-else
-ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
--include $(DEPS)
-endif
-endif
-
diff --git a/tools/genuca/genuca.8.in b/tools/genuca/genuca.8.in
deleted file mode 100644
index e8ab27d0..00000000
--- a/tools/genuca/genuca.8.in
+++ /dev/null
@@ -1,94 +0,0 @@
-.\" Hey, Emacs! This is -*-nroff-*- you know...
-.\"
-.\" genuca.8: manual page for the genuca utility
-.\"
-.\" Copyright (C) 2000-2001 IBM, Inc. and others.
-.\"
-.TH GENUCA 8 "22 February 2001" "ICU MANPAGE" "ICU @VERSION@ Manual"
-.SH NAME
-.B genuca
-\- create the UCA data table
-.SH SYNOPSIS
-.B genuca
-[
-.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
-]
-[
-.BR "\-V\fP, \fB\-\-version"
-]
-[
-.BR "\-v\fP, \fB\-\-verbose"
-]
-[
-.BI "\-c\fP, \fB\-\-copyright"
-]
-[
-.BI "\-s\fP, \fB\-\-sourcedir" " source"
-]
-[
-.BI "\-d\fP, \fB\-\-destdir" " destination"
-]
-[
-.IR file
-]
-.SH DESCRIPTION
-.B genuca
-compiles the Unicode Collation Algorithm (UCA) data from
-.I file
-(or from
-.B FractionalUCA.txt
-if
-.I file
-is omitted) into its binary form, the files
-.B ucadata.dat
-and
-.BR invuca.dat .
-These binary files can then be read directly by ICU, or used by
-.BR pkgdata (8)
-for incorporation into a larger archive or library.
-.SH OPTIONS
-.TP
-.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
-Print help about usage and exit.
-.TP
-.BR "\-V\fP, \fB\-\-version"
-Print the version of
-.B genuca
-and exit.
-.TP
-.BR "\-v\fP, \fB\-\-verbose"
-Display extra informative messages during execution.
-.TP
-.BI "\-c\fP, \fB\-\-copyright"
-Include a copyright notice into the binary data.
-.TP
-.BI "\-s\fP, \fB\-\-sourcedir" " source"
-Set the source directory to
-.IR source .
-The default source directory is specified by the environment variable
-.BR ICU_DATA .
-.TP
-.BI "\-d\fP, \fB\-\-destdir" " destination"
-Set the destination directory to
-.IR destination .
-The default destination directory is specified by the environment variable
-.BR ICU_DATA .
-.SH ENVIRONMENT
-.TP 10
-.B ICU_DATA
-Specifies the directory containing ICU data. Defaults to
-.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
-Some tools in ICU depend on the presence of the trailing slash. It is thus
-important to make sure that it is present if
-.B ICU_DATA
-is set.
-.SH FILES
-.TP 15
-.B FractionalUCA.txt
-Machine-readable file containing data for the Unicode collation algorithm.
-.SH VERSION
-@VERSION@
-.SH COPYRIGHT
-Copyright (C) 2001 IBM, Inc. and others.
-.SH SEE ALSO
-.BR pkgdata (8)
diff --git a/tools/genuca/genuca.cpp b/tools/genuca/genuca.cpp
deleted file mode 100644
index f5c6ab56..00000000
--- a/tools/genuca/genuca.cpp
+++ /dev/null
@@ -1,1215 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2000-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: genuca.cpp
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created at the end of XX century
-* created by: Vladimir Weinstein
-*
-* This program reads the Franctional UCA table and generates
-* internal format for UCA table as well as inverse UCA table.
-* It then writes binary files containing the data: ucadata.dat
-* & invuca.dat
-* Change history:
-* 02/23/2001 grhoten Made it into a tool
-* 02/23/2001 weiv Moved element & table handling code to i18n
-* 05/09/2001 weiv Case bits are now in the CEs, not in front
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "unicode/udata.h"
-#include "unicode/uclean.h"
-#include "ucol_imp.h"
-#include "genuca.h"
-#include "uoptions.h"
-#include "toolutil.h"
-#include "unewdata.h"
-#include "cstring.h"
-#include "cmemory.h"
-
-#include <stdio.h>
-
-/*
- * Global - verbosity
- */
-UBool VERBOSE = FALSE;
-
-static UVersionInfo UCAVersion;
-
-#if UCONFIG_NO_COLLATION
-
-/* dummy UDataInfo cf. udata.h */
-static UDataInfo dummyDataInfo = {
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- U_SIZEOF_UCHAR,
- 0,
-
- { 0, 0, 0, 0 }, /* dummy dataFormat */
- { 0, 0, 0, 0 }, /* dummy formatVersion */
- { 0, 0, 0, 0 } /* dummy dataVersion */
-};
-
-#else
-
-static const UDataInfo ucaDataInfo={
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- sizeof(UChar),
- 0,
-
- {UCA_DATA_FORMAT_0, UCA_DATA_FORMAT_1, UCA_DATA_FORMAT_2, UCA_DATA_FORMAT_3}, /* dataFormat="UCol" */
- /* 03/26/2002 bumped up version since format has changed */
- /* 09/16/2002 bumped up version since we went from UColAttributeValue */
- /* to int32_t in UColOptionSet */
- /* 05/13/2003 This one also updated since we added UCA and UCD versions */
- /* to header */
- /* 09/11/2003 Adding information required by data swapper */
- {UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1, UCA_FORMAT_VERSION_2, UCA_FORMAT_VERSION_3}, /* formatVersion */
- {0, 0, 0, 0} /* dataVersion = Unicode Version*/
-};
-
-static const UDataInfo invUcaDataInfo={
- sizeof(UDataInfo),
- 0,
-
- U_IS_BIG_ENDIAN,
- U_CHARSET_FAMILY,
- sizeof(UChar),
- 0,
-
- {INVUCA_DATA_FORMAT_0, INVUCA_DATA_FORMAT_1, INVUCA_DATA_FORMAT_2, INVUCA_DATA_FORMAT_3}, /* dataFormat="InvC" */
- /* 03/26/2002 bumped up version since format has changed */
- /* 04/29/2003 2.1 format - we have added UCA version to header */
- {INVUCA_FORMAT_VERSION_0, INVUCA_FORMAT_VERSION_1, INVUCA_FORMAT_VERSION_2, INVUCA_FORMAT_VERSION_3}, /* formatVersion */
- {0, 0, 0, 0} /* dataVersion = Unicode Version*/
-};
-
-UCAElements le;
-
-int32_t readElement(char **from, char *to, char separator, UErrorCode *status) {
- if(U_FAILURE(*status)) {
- return 0;
- }
- char buffer[1024];
- int32_t i = 0;
- while(**from != separator) {
- if(**from != ' ') {
- *(buffer+i++) = **from;
- }
- (*from)++;
- }
- (*from)++;
- *(buffer + i) = 0;
- //*to = (char *)malloc(strlen(buffer)+1);
- strcpy(to, buffer);
- return i/2;
-}
-
-
-uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UErrorCode *status) {
- if(U_FAILURE(*status)) {
- return 0;
- }
- uint32_t value = 0;
- char primsave = '\0';
- char secsave = '\0';
- char tersave = '\0';
- char *primend = primary+4;
- if(strlen(primary) > 4) {
- primsave = *primend;
- *primend = '\0';
- }
- char *secend = secondary+2;
- if(strlen(secondary) > 2) {
- secsave = *secend;
- *secend = '\0';
- }
- char *terend = tertiary+2;
- if(strlen(tertiary) > 2) {
- tersave = *terend;
- *terend = '\0';
- }
- uint32_t primvalue = (uint32_t)((*primary!='\0')?strtoul(primary, &primend, 16):0);
- uint32_t secvalue = (uint32_t)((*secondary!='\0')?strtoul(secondary, &secend, 16):0);
- uint32_t tervalue = (uint32_t)((*tertiary!='\0')?strtoul(tertiary, &terend, 16):0);
- if(primvalue <= 0xFF) {
- primvalue <<= 8;
- }
-
- value = ((primvalue<<UCOL_PRIMARYORDERSHIFT)&UCOL_PRIMARYORDERMASK)|
- ((secvalue<<UCOL_SECONDARYORDERSHIFT)&UCOL_SECONDARYORDERMASK)|
- (tervalue&UCOL_TERTIARYORDERMASK);
-
- if(primsave!='\0') {
- *primend = primsave;
- }
- if(secsave!='\0') {
- *secend = secsave;
- }
- if(tersave!='\0') {
- *terend = tersave;
- }
- return value;
-}
-
-static uint32_t inverseTable[0xFFFF][3];
-static uint32_t inversePos = 0;
-static UChar stringContinue[0xFFFF];
-static uint32_t sContPos = 0;
-
-static void addNewInverse(UCAElements *element, UErrorCode *status) {
- if(U_FAILURE(*status)) {
- return;
- }
- if(VERBOSE && isContinuation(element->CEs[1])) {
- //fprintf(stdout, "+");
- }
- inversePos++;
- inverseTable[inversePos][0] = element->CEs[0];
- if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
- inverseTable[inversePos][1] = element->CEs[1];
- } else {
- inverseTable[inversePos][1] = 0;
- }
- if(element->cSize < 2) {
- inverseTable[inversePos][2] = element->cPoints[0];
- } else { /* add a new store of cruft */
- inverseTable[inversePos][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
- memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
- sContPos += element->cSize+1;
- }
-}
-
-static void insertInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
- if(U_FAILURE(*status)) {
- return;
- }
-
- if(VERBOSE && isContinuation(element->CEs[1])) {
- //fprintf(stdout, "+");
- }
- if(position <= inversePos) {
- /*move stuff around */
- uint32_t amountToMove = (inversePos - position+1)*sizeof(inverseTable[0]);
- uprv_memmove(inverseTable[position+1], inverseTable[position], amountToMove);
- }
- inverseTable[position][0] = element->CEs[0];
- if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
- inverseTable[position][1] = element->CEs[1];
- } else {
- inverseTable[position][1] = 0;
- }
- if(element->cSize < 2) {
- inverseTable[position][2] = element->cPoints[0];
- } else { /* add a new store of cruft */
- inverseTable[position][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
- memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
- sContPos += element->cSize+1;
- }
- inversePos++;
-}
-
-static void addToExistingInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
-
- if(U_FAILURE(*status)) {
- return;
- }
-
- if((inverseTable[position][2] & UCOL_INV_SIZEMASK) == 0) { /* single element, have to make new extension place and put both guys there */
- stringContinue[sContPos] = (UChar)inverseTable[position][2];
- inverseTable[position][2] = ((element->cSize+3) << UCOL_INV_SHIFTVALUE) | sContPos;
- sContPos++;
- stringContinue[sContPos++] = 0xFFFF;
- memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
- sContPos += element->cSize;
- stringContinue[sContPos++] = 0xFFFE;
- } else { /* adding to the already existing continuing table */
- uint32_t contIndex = inverseTable[position][2] & UCOL_INV_OFFSETMASK;
- uint32_t contSize = (inverseTable[position][2] & UCOL_INV_SIZEMASK) >> UCOL_INV_SHIFTVALUE;
-
- if(contIndex+contSize < sContPos) {
- /*fprintf(stderr, ".", sContPos, contIndex+contSize);*/
- memcpy(stringContinue+contIndex+contSize+element->cSize+1, stringContinue+contIndex+contSize, (element->cSize+1)*sizeof(UChar));
- }
-
- stringContinue[contIndex+contSize-1] = 0xFFFF;
- memcpy(stringContinue+contIndex+contSize, element->cPoints, element->cSize*sizeof(UChar));
- sContPos += element->cSize+1;
- stringContinue[contIndex+contSize+element->cSize] = 0xFFFE;
-
- inverseTable[position][2] = ((contSize+element->cSize+1) << UCOL_INV_SHIFTVALUE) | contIndex;
- }
-}
-
-/*
- * Takes two CEs (lead and continuation) and
- * compares them as CEs should be compared:
- * primary vs. primary, secondary vs. secondary
- * tertiary vs. tertiary
- */
-static int32_t compareCEs(uint32_t *source, uint32_t *target) {
- uint32_t s1 = source[0], s2, t1 = target[0], t2;
- if(isContinuation(source[1])) {
- s2 = source[1];
- } else {
- s2 = 0;
- }
- if(isContinuation(target[1])) {
- t2 = target[1];
- } else {
- t2 = 0;
- }
-
- uint32_t s = 0, t = 0;
- if(s1 == t1 && s2 == t2) {
- return 0;
- }
- s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
- t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
- if(s < t) {
- return -1;
- } else if(s > t) {
- return 1;
- } else {
- s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
- t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
- if(s < t) {
- return -1;
- } else if(s > t) {
- return 1;
- } else {
- s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
- t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
- if(s < t) {
- return -1;
- } else {
- return 1;
- }
- }
- }
-}
-
-static uint32_t addToInverse(UCAElements *element, UErrorCode *status) {
- uint32_t position = inversePos;
- uint32_t saveElement = element->CEs[0];
- int32_t compResult = 0;
- element->CEs[0] &= 0xFFFFFF3F;
- if(element->noOfCEs == 1) {
- element->CEs[1] = 0;
- }
- if(inversePos == 0) {
- inverseTable[0][0] = inverseTable[0][1] = inverseTable[0][2] = 0;
- addNewInverse(element, status);
- } else if(compareCEs(inverseTable[inversePos], element->CEs) > 0) {
- while((compResult = compareCEs(inverseTable[--position], element->CEs)) > 0);
- if(VERBOSE) { fprintf(stdout, "p:%u ", (int)position); }
- if(compResult == 0) {
- addToExistingInverse(element, position, status);
- } else {
- insertInverse(element, position+1, status);
- }
- } else if(compareCEs(inverseTable[inversePos], element->CEs) == 0) {
- addToExistingInverse(element, inversePos, status);
- } else {
- addNewInverse(element, status);
- }
- element->CEs[0] = saveElement;
- if(VERBOSE) { fprintf(stdout, "+"); }
- return inversePos;
-}
-
-static InverseUCATableHeader *assembleInverseTable(UErrorCode *status)
-{
- InverseUCATableHeader *result = NULL;
- uint32_t headerByteSize = paddedsize(sizeof(InverseUCATableHeader));
- uint32_t inverseTableByteSize = (inversePos+2)*sizeof(uint32_t)*3;
- uint32_t contsByteSize = sContPos * sizeof(UChar);
- uint32_t i = 0;
-
- result = (InverseUCATableHeader *)uprv_malloc(headerByteSize + inverseTableByteSize + contsByteSize);
- uprv_memset(result, 0, headerByteSize + inverseTableByteSize + contsByteSize);
- if(result != NULL) {
- result->byteSize = headerByteSize + inverseTableByteSize + contsByteSize;
-
- inversePos++;
- inverseTable[inversePos][0] = 0xFFFFFFFF;
- inverseTable[inversePos][1] = 0xFFFFFFFF;
- inverseTable[inversePos][2] = 0x0000FFFF;
- inversePos++;
-
- for(i = 2; i<inversePos; i++) {
- if(compareCEs(inverseTable[i-1], inverseTable[i]) > 0) {
- fprintf(stderr, "Error at %i: %08X & %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i][0]);
- } else if(inverseTable[i-1][0] == inverseTable[i][0] && !(inverseTable[i-1][1] < inverseTable[i][1])) {
- fprintf(stderr, "Continuation error at %i: %08X %08X & %08X %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i-1][1], (int)inverseTable[i][0], (int)inverseTable[i][1]);
- }
- }
-
- result->tableSize = inversePos;
- result->contsSize = sContPos;
-
- result->table = headerByteSize;
- result->conts = headerByteSize + inverseTableByteSize;
-
- memcpy((uint8_t *)result + result->table, inverseTable, inverseTableByteSize);
- memcpy((uint8_t *)result + result->conts, stringContinue, contsByteSize);
-
- } else {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- return result;
-}
-
-
-static void writeOutInverseData(InverseUCATableHeader *data,
- const char *outputDir,
- const char *copyright,
- UErrorCode *status)
-{
- UNewDataMemory *pData;
-
- long dataLength;
-
- UDataInfo invUcaInfo;
- uprv_memcpy(&invUcaInfo, &invUcaDataInfo, sizeof(UDataInfo));
- u_getUnicodeVersion(invUcaInfo.dataVersion);
-
- pData=udata_create(outputDir, INVC_DATA_TYPE, INVC_DATA_NAME, &invUcaInfo,
- copyright, status);
-
- if(U_FAILURE(*status)) {
- fprintf(stderr, "Error: unable to create %s"INVC_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
- return;
- }
-
- /* write the data to the file */
- if (VERBOSE) {
- fprintf(stdout, "Writing out inverse UCA table: %s%c%s.%s\n", outputDir, U_FILE_SEP_CHAR,
- INVC_DATA_NAME,
- INVC_DATA_TYPE);
- }
- udata_writeBlock(pData, data, data->byteSize);
-
- /* finish up */
- dataLength=udata_finish(pData, status);
- if(U_FAILURE(*status)) {
- fprintf(stderr, "Error: error %d writing the output file\n", *status);
- return;
- }
-}
-
-
-
-static int32_t hex2num(char hex) {
- if(hex>='0' && hex <='9') {
- return hex-'0';
- } else if(hex>='a' && hex<='f') {
- return hex-'a'+10;
- } else if(hex>='A' && hex<='F') {
- return hex-'A'+10;
- } else {
- return 0;
- }
-}
-
-UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UErrorCode *status) {
- char buffer[2048], primary[100], secondary[100], tertiary[100];
- UBool detectedContraction;
- int32_t i = 0;
- unsigned int theValue;
- char *pointer = NULL;
- char *commentStart = NULL;
- char *startCodePoint = NULL;
- char *endCodePoint = NULL;
- char *spacePointer = NULL;
- char *dashPointer = NULL;
- char *result = fgets(buffer, 2048, data);
- int32_t buflen = (int32_t)uprv_strlen(buffer);
- if(U_FAILURE(*status)) {
- return 0;
- }
- *primary = *secondary = *tertiary = '\0';
- if(result == NULL) {
- if(feof(data)) {
- return NULL;
- } else {
- fprintf(stderr, "empty line but no EOF!\n");
- *status = U_INVALID_FORMAT_ERROR;
- return NULL;
- }
- }
- while(buflen>0 && (buffer[buflen-1] == '\r' || buffer[buflen-1] == '\n')) {
- buffer[--buflen] = 0;
- }
-
- if(buffer[0] == 0 || buffer[0] == '#') {
- return NULL; // just a comment, skip whole line
- }
-
- UCAElements *element = &le; //(UCAElements *)malloc(sizeof(UCAElements));
-
- enum ActionType {
- READCE,
- READHEX,
- READUCAVERSION
- };
-
- // Directives.
- if(buffer[0] == '[') {
- uint32_t cnt = 0;
- static const struct {
- char name[128];
- uint32_t *what;
- ActionType what_to_do;
- } vt[] = { {"[first tertiary ignorable", consts->UCA_FIRST_TERTIARY_IGNORABLE, READCE},
- {"[last tertiary ignorable", consts->UCA_LAST_TERTIARY_IGNORABLE, READCE},
- {"[first secondary ignorable", consts->UCA_FIRST_SECONDARY_IGNORABLE, READCE},
- {"[last secondary ignorable", consts->UCA_LAST_SECONDARY_IGNORABLE, READCE},
- {"[first primary ignorable", consts->UCA_FIRST_PRIMARY_IGNORABLE, READCE},
- {"[last primary ignorable", consts->UCA_LAST_PRIMARY_IGNORABLE, READCE},
- {"[first variable", consts->UCA_FIRST_VARIABLE, READCE},
- {"[last variable", consts->UCA_LAST_VARIABLE, READCE},
- {"[first regular", consts->UCA_FIRST_NON_VARIABLE, READCE},
- {"[last regular", consts->UCA_LAST_NON_VARIABLE, READCE},
- {"[first implicit", consts->UCA_FIRST_IMPLICIT, READCE},
- {"[last implicit", consts->UCA_LAST_IMPLICIT, READCE},
- {"[first trailing", consts->UCA_FIRST_TRAILING, READCE},
- {"[last trailing", consts->UCA_LAST_TRAILING, READCE},
-
- {"[fixed top", &consts->UCA_PRIMARY_TOP_MIN, READHEX},
- {"[fixed first implicit byte", &consts->UCA_PRIMARY_IMPLICIT_MIN, READHEX},
- {"[fixed last implicit byte", &consts->UCA_PRIMARY_IMPLICIT_MAX, READHEX},
- {"[fixed first trail byte", &consts->UCA_PRIMARY_TRAILING_MIN, READHEX},
- {"[fixed last trail byte", &consts->UCA_PRIMARY_TRAILING_MAX, READHEX},
- {"[fixed first special byte", &consts->UCA_PRIMARY_SPECIAL_MIN, READHEX},
- {"[fixed last special byte", &consts->UCA_PRIMARY_SPECIAL_MAX, READHEX},
- {"[variable top = ", &t->options->variableTopValue, READHEX},
- {"[UCA version = ", NULL, READUCAVERSION}
- };
- for (cnt = 0; cnt<sizeof(vt)/sizeof(vt[0]); cnt++) {
- uint32_t vtLen = (uint32_t)uprv_strlen(vt[cnt].name);
- if(uprv_strncmp(buffer, vt[cnt].name, vtLen) == 0) {
- element->variableTop = TRUE;
- if(vt[cnt].what_to_do == READHEX) {
- if(sscanf(buffer+vtLen, "%4x", &theValue) != 1) /* read first code point */
- {
- fprintf(stderr, " scanf(hex) failed on !\n ");
- }
- *(vt[cnt].what) = (UChar)theValue;
- //if(cnt == 1) { // first implicit
- // we need to set the value for top next
- //uint32_t nextTop = ucol_prv_calculateImplicitPrimary(0x4E00); // CJK base
- //consts->UCA_NEXT_TOP_VALUE = theValue<<24 | 0x030303;
- //}
- } else if (vt[cnt].what_to_do == READCE) { /* vt[cnt].what_to_do == READCE */
- pointer = strchr(buffer+vtLen, '[');
- if(pointer) {
- pointer++;
- element->sizePrim[0]=readElement(&pointer, primary, ',', status);
- element->sizeSec[0]=readElement(&pointer, secondary, ',', status);
- element->sizeTer[0]=readElement(&pointer, tertiary, ']', status);
-
- vt[cnt].what[0] = getSingleCEValue(primary, secondary, tertiary, status);
- if(element->sizePrim[0] > 2 || element->sizeSec[0] > 1 || element->sizeTer[0] > 1) {
- uint32_t CEi = 1;
- uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
- if(2*CEi<element->sizePrim[i]) {
- value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
- value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
- }
-
- if(2*CEi+1<element->sizePrim[i]) {
- value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
- value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
- }
-
- if(CEi<element->sizeSec[i]) {
- value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
- value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
- }
-
- if(CEi<element->sizeTer[i]) {
- value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
- value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
- }
-
- CEi++;
-
- vt[cnt].what[1] = value;
- //element->CEs[CEindex++] = value;
- } else {
- vt[cnt].what[1] = 0;
- }
- } else {
- fprintf(stderr, "Failed to read a CE from line %s\n", buffer);
- }
- } else { //vt[cnt].what_to_do == READUCAVERSION
- u_versionFromString(UCAVersion, buffer+vtLen);
- if(VERBOSE) {
- fprintf(stdout, "UCA version [%hu.%hu.%hu.%hu]\n", UCAVersion[0], UCAVersion[1], UCAVersion[2], UCAVersion[3]);
- }
- }
- //element->cPoints[0] = (UChar)theValue;
- //return element;
- return NULL;
- }
- }
- fprintf(stderr, "Warning: unrecognized option: %s\n", buffer);
- //*status = U_INVALID_FORMAT_ERROR;
- return NULL;
- }
- element->variableTop = FALSE;
-
- startCodePoint = buffer;
- endCodePoint = strchr(startCodePoint, ';');
-
- if(endCodePoint == 0) {
- fprintf(stderr, "error - line with no code point!\n");
- *status = U_INVALID_FORMAT_ERROR; /* No code point - could be an error, but probably only an empty line */
- return NULL;
- } else {
- *(endCodePoint) = 0;
- }
-
- memset(element, 0, sizeof(*element));
-
- element->cPoints = element->uchars;
-
- spacePointer = strchr(buffer, ' ');
- if(sscanf(buffer, "%4x", &theValue) != 1) /* read first code point */
- {
- fprintf(stderr, " scanf(hex) failed!\n ");
- }
- element->cPoints[0] = (UChar)theValue;
-
- if(spacePointer == 0) {
- detectedContraction = FALSE;
- element->cSize = 1;
- } else {
- dashPointer = strchr(buffer, '|');
- if (dashPointer != NULL) {
- // prefix characters
- element->prefixChars[0] = (UChar)theValue;
- element->prefixSize = 1;
- element->prefix = element->prefixChars;
- sscanf(dashPointer+1, "%4x", &theValue);
- element->cPoints[0] = (UChar)theValue;
- element->cSize = 1;
- }
- else {
- // Contractions or surrogate characters.
- i = 1;
- detectedContraction = TRUE;
- while(spacePointer != NULL) {
- sscanf(spacePointer+1, "%4x", &theValue);
- element->cPoints[i++] = (UChar)theValue;
- spacePointer = strchr(spacePointer+1, ' ');
- }
- element->cSize = i;
- }
-
-
- //fprintf(stderr, "Number of codepoints in contraction: %i\n", i);
- }
-
- startCodePoint = endCodePoint+1;
-
- commentStart = strchr(startCodePoint, '#');
- if(commentStart == NULL) {
- commentStart = strlen(startCodePoint) + startCodePoint;
- }
-
- i = 0;
- uint32_t CEindex = 0;
- element->noOfCEs = 0;
- for(;;) {
- endCodePoint = strchr(startCodePoint, ']');
- if(endCodePoint == NULL || endCodePoint >= commentStart) {
- break;
- }
- pointer = strchr(startCodePoint, '[');
- pointer++;
-
- element->sizePrim[i]=readElement(&pointer, primary, ',', status);
- element->sizeSec[i]=readElement(&pointer, secondary, ',', status);
- element->sizeTer[i]=readElement(&pointer, tertiary, ']', status);
-
-
- /* I want to get the CEs entered right here, including continuation */
- element->CEs[CEindex++] = getSingleCEValue(primary, secondary, tertiary, status);
-
- uint32_t CEi = 1;
- while(2*CEi<element->sizePrim[i] || CEi<element->sizeSec[i] || CEi<element->sizeTer[i]) {
- uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
- if(2*CEi<element->sizePrim[i]) {
- value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
- value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
- }
-
- if(2*CEi+1<element->sizePrim[i]) {
- value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
- value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
- }
-
- if(CEi<element->sizeSec[i]) {
- value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
- value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
- }
-
- if(CEi<element->sizeTer[i]) {
- value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
- value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
- }
-
- CEi++;
-
- element->CEs[CEindex++] = value;
- }
-
- startCodePoint = endCodePoint+1;
- i++;
- }
- element->noOfCEs = CEindex;
-#if 0
- element->isThai = UCOL_ISTHAIPREVOWEL(element->cPoints[0]);
-#endif
- // we don't want any strange stuff after useful data!
- if (pointer == NULL) {
- /* huh? Did we get ']' without the '['? Pair your brackets! */
- *status=U_INVALID_FORMAT_ERROR;
- }
- else {
- while(pointer < commentStart) {
- if(*pointer != ' ' && *pointer != '\t')
- {
- *status=U_INVALID_FORMAT_ERROR;
- break;
- }
- pointer++;
- }
- }
-
- if(U_FAILURE(*status)) {
- fprintf(stderr, "problem putting stuff in hash table %s\n", u_errorName(*status));
- *status = U_INTERNAL_PROGRAM_ERROR;
- return NULL;
- }
-
- return element;
-}
-
-
-void writeOutData(UCATableHeader *data,
- UCAConstants *consts,
- UChar contractions[][3],
- uint32_t noOfcontractions,
- const char *outputDir,
- const char *copyright,
- UErrorCode *status)
-{
- if(U_FAILURE(*status)) {
- return;
- }
-
- uint32_t size = data->size;
-
- data->UCAConsts = data->size;
- data->size += paddedsize(sizeof(UCAConstants));
-
- if(noOfcontractions != 0) {
- contractions[noOfcontractions][0] = 0;
- contractions[noOfcontractions][1] = 0;
- contractions[noOfcontractions][2] = 0;
- noOfcontractions++;
-
-
- data->contractionUCACombos = data->size;
- data->contractionUCACombosWidth = 3;
- data->contractionUCACombosSize = noOfcontractions;
- data->size += paddedsize((noOfcontractions*3*sizeof(UChar)));
- }
-
- UNewDataMemory *pData;
-
- long dataLength;
- UDataInfo ucaInfo;
- uprv_memcpy(&ucaInfo, &ucaDataInfo, sizeof(UDataInfo));
- u_getUnicodeVersion(ucaInfo.dataVersion);
-
- pData=udata_create(outputDir, UCA_DATA_TYPE, UCA_DATA_NAME, &ucaInfo,
- copyright, status);
-
- if(U_FAILURE(*status)) {
- fprintf(stderr, "Error: unable to create %s"UCA_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
- return;
- }
-
- /* write the data to the file */
- if (VERBOSE) {
- fprintf(stdout, "Writing out UCA table: %s%c%s.%s\n", outputDir,
- U_FILE_SEP_CHAR,
- U_ICUDATA_NAME "_" UCA_DATA_NAME,
- UCA_DATA_TYPE);
- }
- udata_writeBlock(pData, data, size);
-
- // output the constants here
- udata_writeBlock(pData, consts, sizeof(UCAConstants));
-
- if(noOfcontractions != 0) {
- udata_writeBlock(pData, contractions, noOfcontractions*3*sizeof(UChar));
- udata_writePadding(pData, paddedsize((noOfcontractions*3*sizeof(UChar))) - noOfcontractions*3*sizeof(uint16_t));
- }
-
- /* finish up */
- dataLength=udata_finish(pData, status);
- if(U_FAILURE(*status)) {
- fprintf(stderr, "Error: error %d writing the output file\n", *status);
- return;
- }
-}
-
-static int32_t
-write_uca_table(const char *filename,
- const char *outputDir,
- const char *copyright,
- UErrorCode *status)
-{
- FILE *data = fopen(filename, "r");
- if(data == NULL) {
- fprintf(stderr, "Couldn't open file: %s\n", filename);
- return -1;
- }
- uint32_t line = 0;
- UCAElements *element = NULL;
- UChar variableTopValue = 0;
- UCATableHeader *myD = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
- /* test for NULL */
- if(myD == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- fclose(data);
- return 0;
- }
- uprv_memset(myD, 0, sizeof(UCATableHeader));
- UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
- /* test for NULL */
- if(opts == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(myD);
- fclose(data);
- return 0;
- }
- uprv_memset(opts, 0, sizeof(UColOptionSet));
- UChar contractionCEs[512][3];
- uprv_memset(contractionCEs, 0, 512*3*sizeof(UChar));
- uint32_t noOfContractions = 0;
- UCAConstants consts;
- uprv_memset(&consts, 0, sizeof(consts));
-#if 0
- UCAConstants consts = {
- UCOL_RESET_TOP_VALUE,
- UCOL_FIRST_PRIMARY_IGNORABLE,
- UCOL_LAST_PRIMARY_IGNORABLE,
- UCOL_LAST_PRIMARY_IGNORABLE_CONT,
- UCOL_FIRST_SECONDARY_IGNORABLE,
- UCOL_LAST_SECONDARY_IGNORABLE,
- UCOL_FIRST_TERTIARY_IGNORABLE,
- UCOL_LAST_TERTIARY_IGNORABLE,
- UCOL_FIRST_VARIABLE,
- UCOL_LAST_VARIABLE,
- UCOL_FIRST_NON_VARIABLE,
- UCOL_LAST_NON_VARIABLE,
-
- UCOL_NEXT_TOP_VALUE,
-/*
- UCOL_NEXT_FIRST_PRIMARY_IGNORABLE,
- UCOL_NEXT_LAST_PRIMARY_IGNORABLE,
- UCOL_NEXT_FIRST_SECONDARY_IGNORABLE,
- UCOL_NEXT_LAST_SECONDARY_IGNORABLE,
- UCOL_NEXT_FIRST_TERTIARY_IGNORABLE,
- UCOL_NEXT_LAST_TERTIARY_IGNORABLE,
- UCOL_NEXT_FIRST_VARIABLE,
- UCOL_NEXT_LAST_VARIABLE,
-*/
-
- PRIMARY_IMPLICIT_MIN,
- PRIMARY_IMPLICIT_MAX
- };
-#endif
-
-
- uprv_memset(inverseTable, 0xDA, sizeof(int32_t)*3*0xFFFF);
-
- opts->variableTopValue = variableTopValue;
- opts->strength = UCOL_TERTIARY;
- opts->frenchCollation = UCOL_OFF;
- opts->alternateHandling = UCOL_NON_IGNORABLE; /* attribute for handling variable elements*/
- opts->caseFirst = UCOL_OFF; /* who goes first, lower case or uppercase */
- opts->caseLevel = UCOL_OFF; /* do we have an extra case level */
- opts->normalizationMode = UCOL_OFF; /* attribute for normalization */
- opts->hiraganaQ = UCOL_OFF; /* attribute for JIS X 4061, used only in Japanese */
- opts->numericCollation = UCOL_OFF;
- myD->jamoSpecial = FALSE;
-
- tempUCATable *t = uprv_uca_initTempTable(myD, opts, NULL, IMPLICIT_TAG, LEAD_SURROGATE_TAG, status);
- if(U_FAILURE(*status))
- {
- fprintf(stderr, "Failed to init UCA temp table: %s\n", u_errorName(*status));
- uprv_free(opts);
- uprv_free(myD);
- fclose(data);
- return -1;
- }
-
-#if 0
- IMPLICIT_TAG = 9,
-/*
- *****************************************************************************************
- * NON_CHARACTER FDD0 - FDEF, FFFE, FFFF, 1FFFE, 1FFFF, 2FFFE, 2FFFF,...e.g. **FFFE, **FFFF
- ******************************************************************************************
- */
-#endif
-
-// * set to zero
-struct {
- UChar32 start;
- UChar32 end;
- int32_t value;
- } ranges[] =
- {
-#if 0
- {0xAC00, 0xD7AF, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) }, //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
- {0xD800, 0xDBFF, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24) }, //1 LEAD_SURROGATE_TAG, /* D800-DBFF*/
- {0xDC00, 0xDFFF, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) }, //2 TRAIL_SURROGATE DC00-DFFF
- {0x3400, 0x4DB5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //3 CJK_IMPLICIT_TAG, /* 0x3400-0x4DB5*/
- {0x4E00, 0x9FA5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //4 CJK_IMPLICIT_TAG, /* 0x4E00-0x9FA5*/
- {0xF900, 0xFA2D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //5 CJK_IMPLICIT_TAG, /* 0xF900-0xFA2D*/
- {0x20000, 0x2A6D6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //6 CJK_IMPLICIT_TAG, /* 0x20000-0x2A6D6*/
- {0x2F800, 0x2FA1D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //7 CJK_IMPLICIT_TAG, /* 0x2F800-0x2FA1D*/
-#endif
- {0xAC00, 0xD7B0, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) }, //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
- //{0xD800, 0xDC00, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24) }, //1 LEAD_SURROGATE_TAG, /* D800-DBFF*/
- {0xDC00, 0xE000, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) }, //2 TRAIL_SURROGATE DC00-DFFF
- // Now directly handled in the collation code by the swapCJK function.
- //{0x3400, 0x4DB6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //3 CJK_IMPLICIT_TAG, /* 0x3400-0x4DB5*/
- //{0x4E00, 0x9FA6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //4 CJK_IMPLICIT_TAG, /* 0x4E00-0x9FA5*/
- //{0xF900, 0xFA2E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //5 CJK_IMPLICIT_TAG, /* 0xF900-0xFA2D*/
- //{0x20000, 0x2A6D7, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //6 CJK_IMPLICIT_TAG, /* 0x20000-0x2A6D6*/
- //{0x2F800, 0x2FA1E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //7 CJK_IMPLICIT_TAG, /* 0x2F800-0x2FA1D*/
- };
- uint32_t i = 0;
-
- for(i = 0; i<sizeof(ranges)/sizeof(ranges[0]); i++) {
- /*ucmpe32_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value); */
- utrie_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value, TRUE);
- }
-
-
- int32_t surrogateCount = 0;
- while(!feof(data)) {
- if(U_FAILURE(*status)) {
- fprintf(stderr, "Something returned an error %i (%s) while processing line %u of %s. Exiting...\n",
- *status, u_errorName(*status), (int)line, filename);
- exit(*status);
- }
-
- element = readAnElement(data, t, &consts, status);
- line++;
- if(VERBOSE) {
- fprintf(stdout, "%u ", (int)line);
- }
- if(element != NULL) {
- // we have read the line, now do something sensible with the read data!
-
- // Below stuff was taken care of in readAnElement
- //if(element->variableTop == TRUE && variableTopValue == 0) {
- // t->options->variableTopValue = element->cPoints[0];
- //}
-
- // if element is a contraction, we want to add it to contractions
- if(element->cSize > 1 && element->cPoints[0] != 0xFDD0) { // this is a contraction
- if(UTF_IS_LEAD(element->cPoints[0]) && UTF_IS_TRAIL(element->cPoints[1]) && element->cSize == 2) {
- surrogateCount++;
- } else {
- contractionCEs[noOfContractions][0] = element->cPoints[0];
- contractionCEs[noOfContractions][1] = element->cPoints[1];
- if(element->cSize > 2) { // the third one
- contractionCEs[noOfContractions][2] = element->cPoints[2];
- } else {
- contractionCEs[noOfContractions][2] = 0;
- }
- noOfContractions++;
- }
- }
- else {
- // TODO (claireho): does this work? Need more tests
- // The following code is to handle the UCA pre-context rules
- // for L/l with middle dot. We share the structures for contractionCombos.
- // The format for pre-context character is
- // contractionCEs[0]: codepoint in element->cPoints[0]
- // contractionCEs[1]: '\0' to differentiate with contractions.
- // contractionCEs[2]: prefix char
- if (element->prefixSize>0) {
- contractionCEs[noOfContractions][0]=element->cPoints[0];
- contractionCEs[noOfContractions][1]='\0';
- contractionCEs[noOfContractions][2]=element->prefixChars[0];
- noOfContractions++;
- }
-
- }
-
- /* we're first adding to inverse, because addAnElement will reverse the order */
- /* of code points and stuff... we don't want that to happen */
- addToInverse(element, status);
- if(!(element->cSize > 1 && element->cPoints[0] == 0xFDD0)) {
- uprv_uca_addAnElement(t, element, status);
- }
- }
- }
-
- if(UCAVersion[0] == 0 && UCAVersion[1] == 0 && UCAVersion[2] == 0 && UCAVersion[3] == 0) {
- fprintf(stderr, "UCA version not specified. Cannot create data file!\n");
- uprv_uca_closeTempTable(t);
- uprv_free(opts);
- uprv_free(myD);
- fclose(data);
- return -1;
- }
-/* {
- uint32_t trieWord = utrie_get32(t->mapping, 0xDC01, NULL);
- }*/
-
- if (VERBOSE) {
- fprintf(stdout, "\nLines read: %u\n", (int)line);
- fprintf(stdout, "Surrogate count: %i\n", (int)surrogateCount);
- fprintf(stdout, "Raw data breakdown:\n");
- /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
- fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
- fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
- fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
- }
-
-
- /* produce canonical closure for table */
- /* first set up constants for implicit calculation */
- uprv_uca_initImplicitConstants(status);
- /* do the closure */
- int32_t noOfClosures = uprv_uca_canonicalClosure(t, NULL, status);
- if(noOfClosures != 0) {
- fprintf(stderr, "Warning: %i canonical closures occured!\n", (int)noOfClosures);
- }
-
- /* test */
- UCATableHeader *myData = uprv_uca_assembleTable(t, status);
-
- if (VERBOSE) {
- fprintf(stdout, "Compacted data breakdown:\n");
- /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
- fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
- fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
- fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
- }
-
- if(U_FAILURE(*status)) {
- fprintf(stderr, "Error creating table: %s\n", u_errorName(*status));
- uprv_uca_closeTempTable(t);
- uprv_free(opts);
- uprv_free(myD);
- fclose(data);
- return -1;
- }
-
- /* populate the version info struct with version info*/
- myData->version[0] = UCOL_BUILDER_VERSION;
- myData->version[1] = UCAVersion[0];
- myData->version[2] = UCAVersion[1];
- myData->version[3] = UCAVersion[2];
- /*TODO:The fractional rules version should be taken from FractionalUCA.txt*/
- // Removed this macro. Instead, we use the fields below
- //myD->version[1] = UCOL_FRACTIONAL_UCA_VERSION;
- //myD->UCAVersion = UCAVersion; // out of FractionalUCA.txt
- uprv_memcpy(myData->UCAVersion, UCAVersion, sizeof(UVersionInfo));
- u_getUnicodeVersion(myData->UCDVersion);
-
- writeOutData(myData, &consts, contractionCEs, noOfContractions, outputDir, copyright, status);
-
- InverseUCATableHeader *inverse = assembleInverseTable(status);
- uprv_memcpy(inverse->UCAVersion, UCAVersion, sizeof(UVersionInfo));
- writeOutInverseData(inverse, outputDir, copyright, status);
-
- uprv_uca_closeTempTable(t);
- uprv_free(myD);
- uprv_free(opts);
-
-
- uprv_free(myData);
- uprv_free(inverse);
- fclose(data);
-
- return 0;
-}
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-static UOption options[]={
- UOPTION_HELP_H, /* 0 Numbers for those who*/
- UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */
- UOPTION_COPYRIGHT, /* 2 */
- UOPTION_VERSION, /* 3 */
- UOPTION_DESTDIR, /* 4 */
- UOPTION_SOURCEDIR, /* 5 */
- UOPTION_VERBOSE, /* 6 */
- UOPTION_ICUDATADIR /* 7 */
- /* weiv can't count :))))) */
-};
-
-int main(int argc, char* argv[]) {
- UErrorCode status = U_ZERO_ERROR;
- const char* destdir = NULL;
- const char* srcDir = NULL;
- char filename[300];
- char *basename = NULL;
- const char *copyright = NULL;
- uprv_memset(&UCAVersion, 0, 4);
-
- U_MAIN_INIT_ARGS(argc, argv);
-
- /* preset then read command line options */
- options[4].value=u_getDataDirectory();
- options[5].value="";
- argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
-
- /* error handling, printing usage message */
- if(argc<0) {
- fprintf(stderr,
- "error in command line argument \"%s\"\n",
- argv[-argc]);
- } else if(argc<2) {
- argc=-1;
- }
- if(options[0].doesOccur || options[1].doesOccur) {
- fprintf(stderr,
- "usage: %s [-options] file\n"
- "\tRead in UCA collation text data and write out the binary collation data\n"
- "options:\n"
- "\t-h or -? or --help this usage text\n"
- "\t-V or --version show a version message\n"
- "\t-c or --copyright include a copyright notice\n"
- "\t-d or --destdir destination directory, followed by the path\n"
- "\t-s or --sourcedir source directory, followed by the path\n"
- "\t-v or --verbose turn on verbose output\n"
- "\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
- "\t followed by path, defaults to %s\n",
- argv[0], u_getDataDirectory());
- return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
- }
- if(options[3].doesOccur) {
- fprintf(stdout, "genuca version %hu.%hu, ICU tool to read UCA text data and create UCA data tables for collation.\n",
-#if UCONFIG_NO_COLLATION
- 0, 0
-#else
- UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1
-#endif
- );
- fprintf(stdout, U_COPYRIGHT_STRING"\n");
- exit(0);
- }
-
- /* get the options values */
- destdir = options[4].value;
- srcDir = options[5].value;
- VERBOSE = options[6].doesOccur;
-
- if (options[2].doesOccur) {
- copyright = U_COPYRIGHT_STRING;
- }
-
- if (options[7].doesOccur) {
- u_setDataDirectory(options[7].value);
- }
- /* Initialize ICU */
- u_init(&status);
- if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
- fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
- argv[0], u_errorName(status));
- exit(1);
- }
- status = U_ZERO_ERROR;
-
-
- /* prepare the filename beginning with the source dir */
- uprv_strcpy(filename, srcDir);
- basename=filename+uprv_strlen(filename);
-
- if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
- *basename++ = U_FILE_SEP_CHAR;
- }
-
- if(argc < 0) {
- uprv_strcpy(basename, "FractionalUCA.txt");
- } else {
- argv++;
- uprv_strcpy(basename, getLongPathname(*argv));
- }
-
-#if 0
- if(u_getCombiningClass(0x0053) == 0)
- {
- fprintf(stderr, "SEVERE ERROR: Normalization data is not functioning! Bailing out. Was not able to load unorm.dat.\n");
- exit(1);
- }
-#endif
-
-#if UCONFIG_NO_COLLATION
-
- UNewDataMemory *pData;
- const char *msg;
-
- msg = "genuca writes dummy " UCA_DATA_NAME "." UCA_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
- fprintf(stderr, "%s\n", msg);
- pData = udata_create(destdir, UCA_DATA_TYPE, UCA_DATA_NAME, &dummyDataInfo,
- NULL, &status);
- udata_writeBlock(pData, msg, strlen(msg));
- udata_finish(pData, &status);
-
- msg = "genuca writes dummy " INVC_DATA_NAME "." INVC_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
- fprintf(stderr, "%s\n", msg);
- pData = udata_create(destdir, INVC_DATA_TYPE, INVC_DATA_NAME, &dummyDataInfo,
- NULL, &status);
- udata_writeBlock(pData, msg, strlen(msg));
- udata_finish(pData, &status);
-
- return (int)status;
-
-#else
-
- return write_uca_table(filename, destdir, copyright, &status);
-
-#endif
-}
-
-/*
- * Hey, Emacs, please set the following:
- *
- * Local Variables:
- * indent-tabs-mode: nil
- * End:
- *
- */
diff --git a/tools/genuca/genuca.h b/tools/genuca/genuca.h
deleted file mode 100644
index a98e6699..00000000
--- a/tools/genuca/genuca.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
-*******************************************************************************
-*
-* Copyright (C) 2000-2004, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: genuca.h
-* encoding: US-ASCII
-* tab size: 8 (not used)
-* indentation:4
-*
-* created at the end of XX century
-* created by: Vladimir Weinstein
-*
-* This program reads the Franctional UCA table and generates
-* internal format for UCA table as well as inverse UCA table.
-* It then writes binary files containing the data: ucadata.dat
-* & invuca.dat
-*/
-
-#ifndef UCADATA_H
-#define UCADATA_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION
-
-#include "ucol_elm.h"
-#include <stdio.h>
-#include <string.h>
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "ucol_imp.h"
-#include "uhash.h"
-#include "unewdata.h"
-
-
-void deleteElement(void *element);
-int32_t readElement(char **from, char *to, char separator, UErrorCode *status);
-uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UBool caseBit, UErrorCode *status);
-void printOutTable(UCATableHeader *myData, UErrorCode *status);
-UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UErrorCode *status);
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif
diff --git a/tools/icu-svnprops-check.py b/tools/icu-svnprops-check.py
new file mode 100755
index 00000000..7ea63e1a
--- /dev/null
+++ b/tools/icu-svnprops-check.py
@@ -0,0 +1,195 @@
+#! /usr/bin/python
+
+# Copyright (C) 2009, International Business Machines Corporation, Google and Others.
+# All rights reserved.
+
+#
+# Script to check and fix svn property settings for ICU source files.
+# Also check for the correct line endings on files with svn:eol-style = native
+#
+# THIS SCRIPT DOES NOT WORK ON WINDOWS
+# It only works correctly on platforms where the native line ending is a plain \n
+#
+# usage:
+# icu-svnprops-check.py [options]
+#
+# options:
+# -f | --fix Fix any problems that are found
+# -h | --help Print a usage line and exit.
+#
+# The tool operates recursively on the directory from which it is run.
+# Only files from the svn repository are checked.
+# No changes are made to the repository; only the working copy will be altered.
+
+import sys
+import os
+import os.path
+import re
+import getopt
+
+#
+# svn autoprops definitions.
+# Copy and paste here the ICU recommended auto-props from
+# http://icu-project.org/docs/subversion_howto/index.html
+#
+# This program will parse this autoprops string, and verify that files in
+# the repository have the recommeded properties set.
+#
+svn_auto_props = """
+### Section for configuring automatic properties.
+[auto-props]
+### The format of the entries is:
+### file-name-pattern = propname[=value][;propname[=value]...]
+### The file-name-pattern can contain wildcards (such as '*' and
+### '?'). All entries which match will be applied to the file.
+### Note that auto-props functionality must be enabled, which
+### is typically done by setting the 'enable-auto-props' option.
+*.c = svn:eol-style=native
+*.cc = svn:eol-style=native
+*.cpp = svn:eol-style=native
+*.h = svn:eol-style=native
+*.rc = svn:eol-style=native
+*.dsp = svn:eol-style=native
+*.dsw = svn:eol-style=native
+*.sln = svn:eol-style=native
+*.vcproj = svn:eol-style=native
+configure = svn:eol-style=native;svn:executable
+*.sh = svn:eol-style=native;svn:executable
+*.pl = svn:eol-style=native;svn:executable
+*.py = svn:eol-style=native;svn:executable
+*.txt = svn:mime-type=text/plain;svn:eol-style=native
+*.java = svn:eol-style=native
+*.ucm = svn:eol-style=native
+*.html = svn:eol-style=native;svn:mime-type=text/html
+*.htm = svn:eol-style=native;svn:mime-type=text/html
+*.xml = svn:eol-style=native
+Makefile = svn:eol-style=native
+*.in = svn:eol-style=native
+*.mak = svn:eol-style=native
+*.mk = svn:eol-style=native
+*.png = svn:mime-type=image/png
+*.jpeg = svn:mime-type=image/jpeg
+*.jpg = svn:mime-type=image/jpeg
+*.bin = svn:mime-type=application/octet-stream
+*.brk = svn:mime-type=application/octet-stream
+*.cnv = svn:mime-type=application/octet-stream
+*.dat = svn:mime-type=application/octet-stream
+*.icu = svn:mime-type=application/octet-stream
+*.res = svn:mime-type=application/octet-stream
+*.spp = svn:mime-type=application/octet-stream
+# new additions 2007-dec-5 srl
+*.rtf = mime-type=text/rtf
+*.pdf = mime-type=application/pdf
+# changed 2008-04-08: modified .txt, above, adding mime-type
+"""
+
+
+# file_types: The parsed form of the svn auto-props specification.
+# A list of file types - .cc, .cpp, .txt, etc.
+# each element is a [type, proplist]
+# "type" is a regular expression string that will match a file name
+# prop list is another list, one element per property.
+# Each property item is a two element list, [prop name, prop value]
+file_types = list()
+
+def parse_auto_props():
+ aprops = svn_auto_props.splitlines()
+ for propline in aprops:
+ if re.match("\s*(#.*)?$", propline): # Match comment and blank lines
+ continue
+ if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line.
+ continue
+ if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> =
+ print "Bad line from autoprops definitions: " + propline
+ continue
+ file_type, string_proplist = propline.split("=", 1)
+
+ #transform the file type expression from autoprops into a normal regular expression.
+ # e.g. "*.cpp" ==> ".*\.cpp$"
+ file_type = file_type.strip()
+ file_type = file_type.replace(".", "\.")
+ file_type = file_type.replace("*", ".*")
+ file_type = file_type + "$"
+
+ # example string_proplist at this point: " svn:eol-style=native;svn:executable"
+ string_proplist = string_proplist.split(";")
+ proplist = list()
+ for prop in string_proplist:
+ if prop.find("=") >= 0:
+ prop_name, prop_val = prop.split("=")
+ else:
+ # properties with no explicit value, e.g. svn:executable
+ prop_name, prop_val = prop, ""
+ prop_name = prop_name.strip()
+ prop_val = prop_val.strip()
+ proplist.append((prop_name, prop_val))
+
+ file_types.append((file_type, proplist))
+ # print file_types
+
+
+def runCommand(cmd):
+ output_file = os.popen(cmd);
+ output_text = output_file.read();
+ exit_status = output_file.close();
+ if exit_status:
+ print >>sys.stderr, '"', cmd, '" failed. Exiting.'
+ sys.exit(exit_status)
+ return output_text
+
+
+def usage():
+ print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
+
+
+def main(argv):
+ fix_problems = False;
+ try:
+ opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
+ except getopt.GetoptError:
+ print "unrecognized option: " + argv[0]
+ usage()
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt in ("-h", "--help"):
+ usage()
+ sys.exit()
+ if opt in ("-f", "--fix"):
+ fix_problems = True
+ if args:
+ print "unexpected command line argument"
+ usage()
+ sys.exit()
+
+ parse_auto_props()
+ output = runCommand("svn ls -R ");
+ file_list = output.splitlines()
+
+ for f in file_list:
+ if os.path.isdir(f):
+ # print "Skipping dir " + f
+ continue
+ if not os.path.isfile(f):
+ print "Repository file not in working copy: " + f
+ continue;
+
+ for file_pattern, props in file_types:
+ if re.match(file_pattern, f):
+ # print "doing " + f
+ for propname, propval in props:
+ actual_propval = runCommand("svn propget --strict " + propname + " " + f)
+ #print propname + ": " + actual_propval
+ if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
+ print "svn propset %s %s %s" % (propname, propval, f)
+ if fix_problems:
+ os.system("svn propset %s %s %s" % (propname, propval, f))
+ if propname == "svn:eol-style" and propval == "native":
+ if os.system("grep -q -v \r " + f):
+ if fix_problems:
+ print f + ": Removing DOS CR characters."
+ os.system("sed -i s/\r// " + f);
+ else:
+ print f + " contains DOS CR characters."
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff --git a/tools/icuinfo/Makefile.in b/tools/icuinfo/Makefile.in
new file mode 100644
index 00000000..e17945fb
--- /dev/null
+++ b/tools/icuinfo/Makefile.in
@@ -0,0 +1,113 @@
+## Makefile.in for ICU - tools/icuinfo
+## Copyright (c) 1999-2010, International Business Machines Corporation and
+## others. All Rights Reserved.
+## Madhu Katragadda
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = tools/icuinfo
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS) $(PLUGIN_OBJECTS) $(PLUGINFILE) $(PLUGIN)
+
+## Target information
+TARGET = icuinfo$(EXEEXT)
+
+ifneq ($(top_builddir),$(top_srcdir))
+CPPFLAGS += -I$(top_builddir)/common
+endif
+CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil -I$(top_srcdir)/tools/ctestfw
+CPPFLAGS+= -I$(top_srcdir)/i18n
+LIBS = $(LIBCTESTFW) $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
+
+OBJECTS = icuinfo.o
+PLUGIN_OBJECTS = testplug.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+# pass some information
+
+ICUINFO_OPTS=-i ../../data/out/build/$(ICUDATA_PLATFORM_NAME)
+
+CPPFLAGS+= -DU_PLATFORM=\"@platform@\" -DU_BUILD=\"@build@\" -DU_HOST=\"@host@\" -DU_CC=\"@CC@\" -DU_CXX=\"@CXX@\"
+# -DENABLE_RELEASE=@ENABLE_RELEASE@ -DENABLE_DEBUG=@ENABLE_DEBUG@ "
+
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local \
+distclean distclean-local dist dist-local check check-local plugin-check
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET)
+
+install-local: all-local
+ $(MKINSTALLDIRS) $(DESTDIR)$(bindir)
+ $(INSTALL) $(TARGET) $(DESTDIR)$(bindir)
+
+dist-local:
+
+clean-local:
+ test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+ $(RMV) $(TARGET) $(OBJECTS)
+
+distclean-local: clean-local
+ $(RMV) Makefile
+
+check-local: $(TARGET)
+ $(INVOKE) ./$(TARGET) $(ICUINFO_OPTS)
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+-include Makefile.local
+
+$(TARGET) : $(OBJECTS)
+ $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
+ $(POST_BUILD_STEP)
+
+PLUGIN=$(LIBPREFIX)plugin.$(SO)
+SO_TARGET=$(PLUGIN)
+
+PLUGINDIR=$(shell pwd)
+
+PLUGINFILE=$(PLUGINDIR)/icuplugins$(SO_TARGET_VERSION_MAJOR).txt
+
+$(PLUGINFILE): Makefile
+ echo "$(CURR_FULL_DIR)/$(PLUGIN) myPlugin x=4" > $@
+
+CFLAGS+=$(SHAREDLIBCFLAGS)
+
+$(PLUGIN): $(PLUGIN_OBJECTS)
+ $(SHLIB.cc) $(SHAREDLIBCFLAGS) $(LD_SONAME) $(OUTOPT)$@ $^ $(LIBS)
+
+plugin: $(PLUGIN)
+
+plugin-check: $(PLUGIN) $(PLUGINFILE)
+ $(INVOKE) ICU_PLUGINS="$(CURR_FULL_DIR)" ./$(TARGET) -v -L
+
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+
diff --git a/tools/icuinfo/icuinfo.cpp b/tools/icuinfo/icuinfo.cpp
new file mode 100644
index 00000000..9e52a26c
--- /dev/null
+++ b/tools/icuinfo/icuinfo.cpp
@@ -0,0 +1,318 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: icuinfo.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009-2010
+* created by: Steven R. Loomis
+*
+* This program shows some basic info about the current ICU.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uclean.h"
+#include "unicode/udbgutil.h"
+#include "unewdata.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uoptions.h"
+#include "toolutil.h"
+#include "icuplugimp.h"
+#include <unicode/uloc.h>
+#include <unicode/ucnv.h>
+#include "unicode/ucal.h"
+#include <unicode/ulocdata.h>
+#include "putilimp.h"
+#include "unicode/uchar.h"
+
+static UOption options[]={
+ /*0*/ UOPTION_HELP_H,
+ /*1*/ UOPTION_HELP_QUESTION_MARK,
+ /*2*/ UOPTION_ICUDATADIR,
+ /*3*/ UOPTION_VERBOSE,
+ /*4*/ UOPTION_DEF("list-plugins", 'L', UOPT_NO_ARG),
+ /*5*/ UOPTION_DEF("milisecond-time", 'm', UOPT_NO_ARG),
+ /*6*/ UOPTION_DEF("cleanup", 'K', UOPT_NO_ARG),
+};
+
+static UErrorCode initStatus = U_ZERO_ERROR;
+static UBool icuInitted = FALSE;
+
+static void do_init() {
+ if(!icuInitted) {
+ u_init(&initStatus);
+ icuInitted = TRUE;
+ }
+}
+
+/**
+ * Print the current platform
+ */
+static const char *getPlatform()
+{
+#if defined(U_PLATFORM)
+ return U_PLATFORM;
+#elif defined(U_WINDOWS)
+ return "Windows";
+#elif defined(U_PALMOS)
+ return "PalmOS";
+#elif defined(_PLATFORM_H)
+ return "Other (POSIX-like)";
+#else
+ return "unknown"
+#endif
+}
+
+void cmd_millis()
+{
+ printf("Milliseconds since Epoch: %.0f\n", uprv_getUTCtime());
+}
+
+void cmd_version(UBool noLoad)
+{
+ UVersionInfo icu;
+ UErrorCode status = U_ZERO_ERROR;
+ char str[200];
+ printf("<ICUINFO>\n");
+ printf("International Components for Unicode for C/C++\n");
+ printf("%s\n", U_COPYRIGHT_STRING);
+ printf("Compiled-Version: %s\n", U_ICU_VERSION);
+ u_getVersion(icu);
+ u_versionToString(icu, str);
+ printf("Runtime-Version: %s\n", str);
+ printf("Compiled-Unicode-Version: %s\n", U_UNICODE_VERSION);
+ u_getUnicodeVersion(icu);
+ u_versionToString(icu, str);
+ printf("Runtime-Unicode-Version: %s\n", U_UNICODE_VERSION);
+ printf("Platform: %s\n", getPlatform());
+#if defined(U_BUILD)
+ printf("Build: %s\n", U_BUILD);
+#if defined(U_HOST)
+ if(strcmp(U_BUILD,U_HOST)) {
+ printf("Host: %s\n", U_HOST);
+ }
+#endif
+#endif
+#if defined(U_CC)
+ printf("C compiler: %s\n", U_CC);
+#endif
+#if defined(U_CXX)
+ printf("C++ compiler: %s\n", U_CXX);
+#endif
+#if defined(CYGWINMSVC)
+ printf("Cygwin: CYGWINMSVC\n");
+#endif
+ printf("ICUDATA: %s\n", U_ICUDATA_NAME);
+ do_init();
+ printf("Data Directory: %s\n", u_getDataDirectory());
+ printf("ICU Initialization returned: %s\n", u_errorName(initStatus));
+ printf( "Default locale: %s\n", uloc_getDefault());
+ {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ ulocdata_getCLDRVersion(icu, &subStatus);
+ if(U_SUCCESS(subStatus)) {
+ u_versionToString(icu, str);
+ printf("CLDR-Version: %s\n", str);
+ } else {
+ printf("CLDR-Version: %s\n", u_errorName(subStatus));
+ }
+ }
+
+#if !UCONFIG_NO_CONVERSION
+ if(noLoad == FALSE)
+ {
+ printf("Default converter: %s\n", ucnv_getDefaultName());
+ }
+#endif
+#if !UCONFIG_NO_FORMATTING
+ {
+ UChar buf[100];
+ char buf2[100];
+ UErrorCode subsubStatus= U_ZERO_ERROR;
+ int32_t len;
+
+ len = ucal_getDefaultTimeZone(buf, 100, &subsubStatus);
+ if(U_SUCCESS(subsubStatus)&&len>0) {
+ u_UCharsToChars(buf, buf2, len+1);
+ printf("Default TZ: %s\n", buf2);
+ } else {
+ printf("Default TZ: %s\n", u_errorName(subsubStatus));
+ }
+ }
+ {
+ UErrorCode subStatus = U_ZERO_ERROR;
+ const char *tzVer = ucal_getTZDataVersion(&subStatus);
+ if(U_FAILURE(subStatus)) {
+ tzVer = u_errorName(subStatus);
+ }
+ printf("TZ data version: %s\n", tzVer);
+ }
+#endif
+
+#if U_ENABLE_DYLOAD
+ const char *pluginFile = uplug_getPluginFile();
+ printf("Plugin file is: %s\n", (pluginFile&&*pluginFile)?pluginFile:"(not set. try setting ICU_PLUGINS to a directory.)");
+#else
+ fprintf(stderr, "Dynamic Loading: is disabled. No plugins will be loaded at start-up.\n");
+#endif
+ printf("</ICUINFO>\n\n");
+}
+
+void cmd_cleanup()
+{
+ u_cleanup();
+ fprintf(stderr,"ICU u_cleanup() called.\n");
+}
+
+
+void cmd_listplugins() {
+ int32_t i;
+ UPlugData *plug;
+
+ do_init();
+ printf("ICU Initialized: u_init() returned %s\n", u_errorName(initStatus));
+
+ printf("Plugins: \n");
+ printf( "# %6s %s \n",
+ "Level",
+ "Name" );
+ printf( " %10s:%-10s\n",
+ "Library",
+ "Symbol"
+ );
+
+
+ printf( " config| (configuration string)\n");
+ printf( " >>> Error | Explanation \n");
+ printf( "-----------------------------------\n");
+
+ for(i=0;(plug=uplug_getPlugInternal(i))!=NULL;i++) {
+ UErrorCode libStatus = U_ZERO_ERROR;
+ const char *name = uplug_getPlugName(plug);
+ const char *sym = uplug_getSymbolName(plug);
+ const char *lib = uplug_getLibraryName(plug, &libStatus);
+ const char *config = uplug_getConfiguration(plug);
+ UErrorCode loadStatus = uplug_getPlugLoadStatus(plug);
+ const char *message = NULL;
+
+ printf("\n#%d %-6s %s \n",
+ i+1,
+ udbg_enumName(UDBG_UPlugLevel,(int32_t)uplug_getPlugLevel(plug)),
+ name!=NULL?(*name?name:"this plugin did not call uplug_setPlugName()"):"(null)"
+ );
+ printf(" plugin| %10s:%-10s\n",
+ (U_SUCCESS(libStatus)?(lib!=NULL?lib:"(null)"):u_errorName(libStatus)),
+ sym!=NULL?sym:"(null)"
+ );
+
+ if(config!=NULL&&*config) {
+ printf(" config| %s\n", config);
+ }
+
+ switch(loadStatus) {
+ case U_PLUGIN_CHANGED_LEVEL_WARNING:
+ message = "Note: This plugin changed the system level (by allocating memory or calling something which does). Later plugins may not load.";
+ break;
+
+ case U_PLUGIN_DIDNT_SET_LEVEL:
+ message = "Error: This plugin did not call uplug_setPlugLevel during QUERY.";
+ break;
+
+ case U_PLUGIN_TOO_HIGH:
+ message = "Error: This plugin couldn't load because the system level was too high. Try loading this plugin earlier.";
+ break;
+
+ case U_ZERO_ERROR:
+ message = NULL; /* no message */
+ break;
+ default:
+ if(U_FAILURE(loadStatus)) {
+ message = "error loading:";
+ } else {
+ message = "warning during load:";
+ }
+ }
+
+ if(message!=NULL) {
+ printf("\\\\\\ status| %s\n"
+ "/// %s\n", u_errorName(loadStatus), message);
+ }
+
+ }
+ if(i==0) {
+ printf("No plugins loaded.\n");
+ }
+
+}
+
+
+
+extern int
+main(int argc, char* argv[]) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UBool didSomething = FALSE;
+
+ /* preset then read command line options */
+ argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
+
+ /* error handling, printing usage message */
+ if(argc<0) {
+ fprintf(stderr,
+ "error in command line argument \"%s\"\n",
+ argv[-argc]);
+ }
+ if( options[0].doesOccur || options[1].doesOccur) {
+ fprintf(stderr, "%s: Output information about the current ICU\n", argv[0]);
+ fprintf(stderr, "Options:\n"
+ " -h or --help - Print this help message.\n"
+ " -m or --millisecond-time - Print the current UTC time in milliseconds.\n"
+ " -d <dir> or --icudatadir <dir> - Set the ICU Data Directory\n"
+ " -v - Print version and configuration information about ICU\n"
+ " -L or --list-plugins - List and diagnose issues with ICU Plugins\n"
+ " -K or --cleanup - Call u_cleanup() before exitting (will attempt to unload plugins)\n"
+ "\n"
+ "If no arguments are given, the tool will print ICU version and configuration information.\n"
+ );
+ fprintf(stderr, "International Components for Unicode %s\n%s\n", U_ICU_VERSION, U_COPYRIGHT_STRING );
+ return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+ }
+
+ if(options[2].doesOccur) {
+ u_setDataDirectory(options[2].value);
+ }
+
+ if(options[5].doesOccur) {
+ cmd_millis();
+ didSomething=TRUE;
+ }
+ if(options[4].doesOccur) {
+ cmd_listplugins();
+ didSomething = TRUE;
+ }
+
+ if(options[3].doesOccur) {
+ cmd_version(FALSE);
+ didSomething = TRUE;
+ }
+
+ if(options[6].doesOccur) { /* 2nd part of version: cleanup */
+ cmd_cleanup();
+ didSomething = TRUE;
+ }
+
+ if(!didSomething) {
+ cmd_version(FALSE); /* at least print the version # */
+ }
+
+ return U_FAILURE(errorCode);
+}
diff --git a/tools/genuca/genuca.vcproj b/tools/icuinfo/icuinfo.vcproj
index 0056316e..d5d7fb73 100644
--- a/tools/genuca/genuca.vcproj
+++ b/tools/icuinfo/icuinfo.vcproj
@@ -2,8 +2,10 @@
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
- Name="genuca"
- ProjectGUID="{86829694-A375-4C58-B4EA-96EF514E3225}"
+ Name="icuinfo"
+ ProjectGUID="{E7611F49-F088-4175-9446-6111444E72C8}"
+ RootNamespace="icuinfo"
+ Keyword="Win32Proj"
TargetFrameworkVersion="131072"
>
<Platforms>
@@ -18,9 +20,9 @@
</ToolFiles>
<Configurations>
<Configuration
- Name="Debug|Win32"
- OutputDirectory=".\x86\Debug"
- IntermediateDirectory=".\x86\Debug"
+ Name="Release|Win32"
+ OutputDirectory="x86\Release"
+ IntermediateDirectory="x86\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
@@ -43,26 +45,22 @@
/>
<Tool
Name="VCMIDLTool"
- TypeLibraryName=".\x86\Debug/genuca.tlb"
/>
<Tool
Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
+ AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil;..\ctestfw"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
+ StringPooling="true"
+ RuntimeLibrary="2"
+ EnableFunctionLevelLinking="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Debug/genuca.pch"
- AssemblerListingLocation=".\x86\Debug/"
- ObjectFile=".\x86\Debug/"
- ProgramDataBaseFileName=".\x86\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
+ PrecompiledHeaderFile="x86\Release/icuinfo.pch"
+ AssemblerListingLocation="x86\Release/"
+ ObjectFile="x86\Release/"
+ ProgramDataBaseFileName="x86\Release/"
+ WarningLevel="4"
SuppressStartupBanner="true"
- DebugInformationFormat="4"
CompileAs="0"
/>
<Tool
@@ -70,19 +68,17 @@
/>
<Tool
Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
+ PreprocessorDefinitions="NDEBUG"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x86\Debug/genuca.exe"
- LinkIncremental="2"
+ OutputFile="$(OutDir)/icuinfo.exe"
+ LinkIncremental="1"
SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x86\Debug/genuca.pdb"
+ ProgramDatabaseFile="x86\Release/icuinfo.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
@@ -92,7 +88,6 @@
/>
<Tool
Name="VCManifestTool"
- UseFAT32Workaround="true"
/>
<Tool
Name="VCXDCMakeTool"
@@ -111,9 +106,9 @@
/>
</Configuration>
<Configuration
- Name="Release|Win32"
- OutputDirectory=".\x86\Release"
- IntermediateDirectory=".\x86\Release"
+ Name="Debug|Win32"
+ OutputDirectory="x86\Debug"
+ IntermediateDirectory="x86\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
@@ -136,23 +131,25 @@
/>
<Tool
Name="VCMIDLTool"
- TypeLibraryName=".\x86\Release/genuca.tlb"
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
+ Optimization="0"
+ AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil;..\ctestfw"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="3"
+ BufferSecurityCheck="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Release/genuca.pch"
- AssemblerListingLocation=".\x86\Release/"
- ObjectFile=".\x86\Release/"
- ProgramDataBaseFileName=".\x86\Release/"
- WarningLevel="3"
+ PrecompiledHeaderFile="Debug/icuinfo.pch"
+ AssemblerListingLocation="Debug/"
+ ObjectFile="Debug/"
+ ProgramDataBaseFileName="Debug/"
+ BrowseInformation="1"
+ WarningLevel="4"
SuppressStartupBanner="true"
+ DebugInformationFormat="4"
CompileAs="0"
/>
<Tool
@@ -160,18 +157,18 @@
/>
<Tool
Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
+ PreprocessorDefinitions="_DEBUG"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x86\Release/genuca.exe"
- LinkIncremental="1"
+ OutputFile="$(OutDir)/icuinfo.exe"
+ LinkIncremental="2"
SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x86\Release/genuca.pdb"
+ GenerateDebugInformation="true"
+ ProgramDatabaseFile="$(OutDir)/icuinfo.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
@@ -181,6 +178,7 @@
/>
<Tool
Name="VCManifestTool"
+ UseFAT32Workaround="true"
/>
<Tool
Name="VCXDCMakeTool"
@@ -199,9 +197,9 @@
/>
</Configuration>
<Configuration
- Name="Debug|x64"
- OutputDirectory=".\x64\Debug"
- IntermediateDirectory=".\x64\Debug"
+ Name="Release|x64"
+ OutputDirectory=".\x64\Release"
+ IntermediateDirectory=".\x64\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
@@ -225,26 +223,22 @@
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
- TypeLibraryName=".\x64\Debug/genuca.tlb"
/>
<Tool
Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- BufferSecurityCheck="true"
+ AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil;..\ctestfw"
+ PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
+ StringPooling="true"
+ RuntimeLibrary="2"
+ EnableFunctionLevelLinking="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Debug/genuca.pch"
- AssemblerListingLocation=".\x64\Debug/"
- ObjectFile=".\x64\Debug/"
- ProgramDataBaseFileName=".\x64\Debug/"
- BrowseInformation="1"
- WarningLevel="3"
+ PrecompiledHeaderFile="x64\Release/icuinfo.pch"
+ AssemblerListingLocation="x64\Release/"
+ ObjectFile="x64\Release/"
+ ProgramDataBaseFileName="x64\Release/"
+ WarningLevel="4"
SuppressStartupBanner="true"
- DebugInformationFormat="3"
CompileAs="0"
/>
<Tool
@@ -252,19 +246,17 @@
/>
<Tool
Name="VCResourceCompilerTool"
- PreprocessorDefinitions="_DEBUG"
- Culture="1033"
+ PreprocessorDefinitions="NDEBUG"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x64\Debug/genuca.exe"
- LinkIncremental="2"
+ OutputFile="$(OutDir)/icuinfo.exe"
+ LinkIncremental="1"
SuppressStartupBanner="true"
- GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x64\Debug/genuca.pdb"
+ ProgramDatabaseFile="x64\Release/icuinfo.pdb"
SubSystem="1"
TargetMachine="17"
/>
@@ -273,7 +265,6 @@
/>
<Tool
Name="VCManifestTool"
- UseFAT32Workaround="true"
/>
<Tool
Name="VCXDCMakeTool"
@@ -288,16 +279,13 @@
Name="VCAppVerifierTool"
/>
<Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
- Name="Release|x64"
- OutputDirectory=".\x64\Release"
- IntermediateDirectory=".\x64\Release"
+ Name="Debug|x64"
+ OutputDirectory=".\x64\Debug"
+ IntermediateDirectory=".\x64\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
@@ -321,23 +309,25 @@
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
- TypeLibraryName=".\x64\Release/genuca.tlb"
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
- StringPooling="true"
- RuntimeLibrary="2"
- EnableFunctionLevelLinking="true"
+ Optimization="0"
+ AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil;..\ctestfw"
+ PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="3"
+ BufferSecurityCheck="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Release/genuca.pch"
- AssemblerListingLocation=".\x64\Release/"
- ObjectFile=".\x64\Release/"
- ProgramDataBaseFileName=".\x64\Release/"
- WarningLevel="3"
+ PrecompiledHeaderFile="Debug/icuinfo.pch"
+ AssemblerListingLocation="Debug/"
+ ObjectFile="Debug/"
+ ProgramDataBaseFileName="Debug/"
+ BrowseInformation="1"
+ WarningLevel="4"
SuppressStartupBanner="true"
+ DebugInformationFormat="3"
CompileAs="0"
/>
<Tool
@@ -345,18 +335,18 @@
/>
<Tool
Name="VCResourceCompilerTool"
- PreprocessorDefinitions="NDEBUG"
- Culture="1033"
+ PreprocessorDefinitions="_DEBUG"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x64\Release/genuca.exe"
- LinkIncremental="1"
+ OutputFile="$(OutDir)/icuinfo.exe"
+ LinkIncremental="2"
SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x64\Release/genuca.pdb"
+ GenerateDebugInformation="true"
+ ProgramDatabaseFile="$(OutDir)/icuinfo.pdb"
SubSystem="1"
TargetMachine="17"
/>
@@ -365,6 +355,7 @@
/>
<Tool
Name="VCManifestTool"
+ UseFAT32Workaround="true"
/>
<Tool
Name="VCXDCMakeTool"
@@ -379,9 +370,6 @@
Name="VCAppVerifierTool"
/>
<Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
@@ -389,29 +377,14 @@
<References>
</References>
<Files>
- <Filter
- Name="Source Files"
- Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
- >
- <File
- RelativePath=".\genuca.cpp"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h;hpp;hxx;hm;inl"
+ <File
+ RelativePath=".\icuinfo.cpp"
>
- <File
- RelativePath=".\genuca.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+ </File>
+ <File
+ RelativePath=".\icuplugins_windows_sample.txt"
>
- </Filter>
+ </File>
</Files>
<Globals>
</Globals>
diff --git a/tools/icuinfo/icuplugins_windows_sample.txt b/tools/icuinfo/icuplugins_windows_sample.txt
new file mode 100644
index 00000000..598de012
--- /dev/null
+++ b/tools/icuinfo/icuplugins_windows_sample.txt
@@ -0,0 +1,57 @@
+# Copyright (C) 2009-2010 IBM Corporation and Others. All Rights Reserved.
+#
+# This is a sample ICU Plugins control file for Windows.
+# It's also an example control file for any platform.
+#
+# This file can be copied to, for example, C:\SOMEDIRECTORY\icuplugins##.txt
+# where ## is the major and minor ICU versions (i.e. just 96 for version 9.6.3)
+# and C:\SOMEDIRECTORY is any directory.
+#
+# Then, set the variable ICU_PLUGINS to C:\SOMEDIRECTORY
+#
+# Then, ICU will load the test plugin from either the debug or non-debug
+# plugin DLL (depending on whether ICU is in debug or non-debug state).
+#
+# To see the results, run the command "icuinfo -v -L"
+#
+# The format of this file is pretty simple.
+# These lines are comments.
+#
+# Non-comment lines have two or three elements in them, and look like this:
+#
+# LIBRARYNAME ENTRYPOINT [ CONFIGURATION .. ]
+#
+# Tabs or spaces separate the three items.
+#
+# LIBRARYNAME is the name of a shared library, either a short name if it is on the PATH,
+# or a full pathname.
+#
+# ENTRYPOINT is the short (undecorated) symbol name of the plugin's entrypoint.
+# see unicode/icuplug.h for information.
+#
+# CONFIGURATION is the entire rest of the line. It's passed as-is to the plugin.
+#
+#
+# This sample file tries to load 'myPlugin'.
+# It is in the testplug project. (You will need to rebuild either the debug or release version of this DLL.)
+# The configuration string isn't used, but is just an example
+
+## A high level test plugin that does nothing.
+testplug.dll myPlugin hello=world
+
+## A "bad" plugin that is low level but performs a malloc.
+## Sometimes this is desired, but, note that it may cause
+## later plugins to fail to load.
+#testplug.dll myPluginBad hello=world
+
+## A "high-level" plugin that does nothing.
+## It will be loaded after the low level plugins.
+#testplug.dll myPluginHigh
+
+## A "low-level" plugin that does nothing.
+## It will be loaded before the high level plugins.
+#testplug.dll myPluginLow
+
+## A low level plugin that just prints a message when uprv_malloc and related functions are called
+## Note, it cannot be unloaded.
+#testplug.dll debugMemoryPlugin
diff --git a/tools/icuinfo/testplug.c b/tools/icuinfo/testplug.c
new file mode 100644
index 00000000..e97bb69e
--- /dev/null
+++ b/tools/icuinfo/testplug.c
@@ -0,0 +1,204 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : testplug.c
+*
+* Date Name Description
+* 10/29/2009 srl New.
+******************************************************************************
+*
+*
+* This file implements a number of example ICU plugins.
+*
+*/
+
+#include "unicode/icuplug.h"
+#include <stdio.h> /* for fprintf */
+#include <stdlib.h> /* for malloc */
+#include "unicode/udbgutil.h"
+#include "unicode/uclean.h"
+#include "cmemory.h"
+
+/**
+ * Prototypes
+ */
+#define DECLARE_PLUGIN(x) U_CAPI UPlugTokenReturn U_EXPORT2 x (UPlugData *data, UPlugReason reason, UErrorCode *status)
+
+DECLARE_PLUGIN(myPlugin);
+DECLARE_PLUGIN(myPluginLow);
+DECLARE_PLUGIN(myPluginFailQuery);
+DECLARE_PLUGIN(myPluginFailToken);
+DECLARE_PLUGIN(myPluginBad);
+DECLARE_PLUGIN(myPluginHigh);
+DECLARE_PLUGIN(debugMemoryPlugin);
+
+/**
+ * A simple, trivial plugin.
+ */
+
+U_CAPI
+UPlugTokenReturn U_EXPORT2 myPlugin (
+ UPlugData *data,
+ UPlugReason reason,
+ UErrorCode *status) {
+ /* Just print this for debugging */
+ fprintf(stderr,"MyPlugin: data=%p, reason=%s, status=%s\n", (void*)data, udbg_enumName(UDBG_UPlugReason,(int32_t)reason), u_errorName(*status));
+
+ if(reason==UPLUG_REASON_QUERY) {
+ uplug_setPlugName(data, "Just a Test High-Level Plugin"); /* This call is optional in response to UPLUG_REASON_QUERY, but is a good idea. */
+ uplug_setPlugLevel(data, UPLUG_LEVEL_HIGH); /* This call is Mandatory in response to UPLUG_REASON_QUERY */
+ }
+
+ return UPLUG_TOKEN; /* This must always be returned, to indicate that the entrypoint was actually a plugin. */
+}
+
+
+U_CAPI
+UPlugTokenReturn U_EXPORT2 myPluginLow (
+ UPlugData *data,
+ UPlugReason reason,
+ UErrorCode *status) {
+ fprintf(stderr,"MyPluginLow: data=%p, reason=%s, status=%s\n", (void*)data, udbg_enumName(UDBG_UPlugReason,(int32_t)reason), u_errorName(*status));
+
+ if(reason==UPLUG_REASON_QUERY) {
+ uplug_setPlugName(data, "Low Plugin");
+ uplug_setPlugLevel(data, UPLUG_LEVEL_LOW);
+ }
+
+ return UPLUG_TOKEN;
+}
+
+/**
+ * Doesn't respond to QUERY properly.
+ */
+U_CAPI
+UPlugTokenReturn U_EXPORT2 myPluginFailQuery (
+ UPlugData *data,
+ UPlugReason reason,
+ UErrorCode *status) {
+ fprintf(stderr,"MyPluginFailQuery: data=%p, reason=%s, status=%s\n", (void*)data, udbg_enumName(UDBG_UPlugReason,(int32_t)reason), u_errorName(*status));
+
+ /* Should respond to UPLUG_REASON_QUERY here. */
+
+ return UPLUG_TOKEN;
+}
+
+/**
+ * Doesn't return the proper token.
+ */
+U_CAPI
+UPlugTokenReturn U_EXPORT2 myPluginFailToken (
+ UPlugData *data,
+ UPlugReason reason,
+ UErrorCode *status) {
+ fprintf(stderr,"MyPluginFailToken: data=%p, reason=%s, status=%s\n", (void*)data, udbg_enumName(UDBG_UPlugReason,(int32_t)reason), u_errorName(*status));
+
+ if(reason==UPLUG_REASON_QUERY) {
+ uplug_setPlugName(data, "myPluginFailToken Plugin");
+ uplug_setPlugLevel(data, UPLUG_LEVEL_LOW);
+ }
+
+ return 0; /* Wrong. */
+}
+
+
+
+/**
+ * Says it's low, but isn't.
+ */
+U_CAPI
+UPlugTokenReturn U_EXPORT2 myPluginBad (
+ UPlugData *data,
+ UPlugReason reason,
+ UErrorCode *status) {
+ fprintf(stderr,"MyPluginLow: data=%p, reason=%s, status=%s\n", (void*)data, udbg_enumName(UDBG_UPlugReason,(int32_t)reason), u_errorName(*status));
+
+ if(reason==UPLUG_REASON_QUERY) {
+ uplug_setPlugName(data, "Bad Plugin");
+ uplug_setPlugLevel(data, UPLUG_LEVEL_LOW);
+ } else if(reason == UPLUG_REASON_LOAD) {
+ void *ctx = uprv_malloc(12345);
+
+ uplug_setContext(data, ctx);
+ fprintf(stderr,"I'm %p and I did a bad thing and malloced %p\n", (void*)data, (void*)ctx);
+ } else if(reason == UPLUG_REASON_UNLOAD) {
+ void * ctx = uplug_getContext(data);
+
+ uprv_free(ctx);
+ }
+
+
+ return UPLUG_TOKEN;
+}
+
+U_CAPI
+UPlugTokenReturn U_EXPORT2 myPluginHigh (
+ UPlugData *data,
+ UPlugReason reason,
+ UErrorCode *status) {
+ fprintf(stderr,"MyPluginHigh: data=%p, reason=%s, status=%s\n", (void*)data, udbg_enumName(UDBG_UPlugReason,(int32_t)reason), u_errorName(*status));
+
+ if(reason==UPLUG_REASON_QUERY) {
+ uplug_setPlugName(data, "High Plugin");
+ uplug_setPlugLevel(data, UPLUG_LEVEL_HIGH);
+ }
+
+ return UPLUG_TOKEN;
+}
+
+
+/* Debug Memory Plugin (see hpmufn.c) */
+static void * U_CALLCONV myMemAlloc(const void *context, size_t size) {
+ void *retPtr = (void *)malloc(size);
+ (void)context; /* unused */
+ fprintf(stderr, "MEM: malloc(%d) = %p\n", (int32_t)size, retPtr);
+ return retPtr;
+}
+
+static void U_CALLCONV myMemFree(const void *context, void *mem) {
+ (void)context; /* unused */
+
+ free(mem);
+ fprintf(stderr, "MEM: free(%p)\n", mem);
+}
+
+static void * U_CALLCONV myMemRealloc(const void *context, void *mem, size_t size) {
+ void *retPtr;
+ (void)context; /* unused */
+
+
+ if(mem==NULL) {
+ retPtr = NULL;
+ } else {
+ retPtr = realloc(mem, size);
+ }
+ fprintf(stderr, "MEM: realloc(%p, %d) = %p\n", mem, (int32_t)size, retPtr);
+ return retPtr;
+}
+
+U_CAPI
+UPlugTokenReturn U_EXPORT2 debugMemoryPlugin (
+ UPlugData *data,
+ UPlugReason reason,
+ UErrorCode *status) {
+ fprintf(stderr,"debugMemoryPlugin: data=%p, reason=%s, status=%s\n", (void*)data, udbg_enumName(UDBG_UPlugReason,(int32_t)reason), u_errorName(*status));
+
+ if(reason==UPLUG_REASON_QUERY) {
+ uplug_setPlugLevel(data, UPLUG_LEVEL_LOW);
+ uplug_setPlugName(data, "Memory Plugin");
+ } else if(reason==UPLUG_REASON_LOAD) {
+ u_setMemoryFunctions(uplug_getContext(data), &myMemAlloc, &myMemRealloc, &myMemFree, status);
+ fprintf(stderr, "MEM: status now %s\n", u_errorName(*status));
+ } else if(reason==UPLUG_REASON_UNLOAD) {
+ fprintf(stderr, "MEM: not possible to unload this plugin (no way to reset memory functions)...\n");
+ uplug_setPlugNoUnload(data, TRUE);
+ }
+
+ return UPLUG_TOKEN;
+}
+
diff --git a/tools/genpname/genpname.vcproj b/tools/icuinfo/testplug.vcproj
index c7439fde..9bf9c951 100644
--- a/tools/genpname/genpname.vcproj
+++ b/tools/icuinfo/testplug.vcproj
@@ -2,8 +2,8 @@
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
- Name="genpname"
- ProjectGUID="{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}"
+ Name="testplug"
+ ProjectGUID="{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}"
TargetFrameworkVersion="131072"
>
<Platforms>
@@ -19,9 +19,9 @@
<Configurations>
<Configuration
Name="Release|Win32"
- OutputDirectory=".\x86\Release"
+ OutputDirectory=".\..\..\..\lib"
IntermediateDirectory=".\x86\Release"
- ConfigurationType="1"
+ ConfigurationType="2"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
@@ -32,8 +32,6 @@
/>
<Tool
Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
/>
<Tool
Name="VCXMLDataGeneratorTool"
@@ -43,18 +41,23 @@
/>
<Tool
Name="VCMIDLTool"
- TypeLibraryName=".\x86\Release/genpname.tlb"
+ PreprocessorDefinitions="NDEBUG"
+ MkTypLibCompatible="true"
+ SuppressStartupBanner="true"
+ TargetEnvironment="1"
+ TypeLibraryName=".\..\..\..\lib\testplug.tlb"
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
+ WholeProgramOptimization="true"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\common;..\toolutil;..\ctestfw"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;T_CTEST_IMPLEMENTATION"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Release/genpname.pch"
+ PrecompiledHeaderFile=".\x86\Release/testplug.pch"
AssemblerListingLocation=".\x86\Release/"
ObjectFile=".\x86\Release/"
ProgramDataBaseFileName=".\x86\Release/"
@@ -75,13 +78,15 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x86\Release/genpname.exe"
+ OutputFile="..\..\..\bin\testplug.dll"
LinkIncremental="1"
SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x86\Release/genpname.pdb"
- SubSystem="1"
+ ProgramDatabaseFile=".\..\..\..\lib\testplug.pdb"
+ EnableCOMDATFolding="2"
+ LinkTimeCodeGeneration="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
+ ImportLibrary=".\..\..\..\lib\testplug.lib"
/>
<Tool
Name="VCALinkTool"
@@ -107,9 +112,9 @@
</Configuration>
<Configuration
Name="Debug|Win32"
- OutputDirectory=".\x86\Debug"
+ OutputDirectory=".\..\..\..\lib"
IntermediateDirectory=".\x86\Debug"
- ConfigurationType="1"
+ ConfigurationType="2"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
@@ -120,8 +125,6 @@
/>
<Tool
Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
- Outputs="..\..\..\bin\$(TargetFileName)"
/>
<Tool
Name="VCXMLDataGeneratorTool"
@@ -131,19 +134,23 @@
/>
<Tool
Name="VCMIDLTool"
- TypeLibraryName=".\x86\Debug/genpname.tlb"
+ PreprocessorDefinitions="_DEBUG"
+ MkTypLibCompatible="true"
+ SuppressStartupBanner="true"
+ TargetEnvironment="1"
+ TypeLibraryName=".\..\..\..\lib\testplugd.tlb"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\common;..\toolutil;..\ctestfw"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE;T_CTEST_IMPLEMENTATION"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
BufferSecurityCheck="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x86\Debug/genpname.pch"
+ PrecompiledHeaderFile=".\x86\Debug/testplug.pch"
AssemblerListingLocation=".\x86\Debug/"
ObjectFile=".\x86\Debug/"
ProgramDataBaseFileName=".\x86\Debug/"
@@ -166,14 +173,14 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x86\Debug/genpname.exe"
+ OutputFile="..\..\..\bin\testplug.dll"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x86\Debug/genpname.pdb"
- SubSystem="1"
+ ProgramDatabaseFile=".\..\..\..\lib\testplugd.pdb"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
+ ImportLibrary=".\..\..\..\lib\testplugd.lib"
/>
<Tool
Name="VCALinkTool"
@@ -202,7 +209,7 @@
Name="Release|x64"
OutputDirectory=".\x64\Release"
IntermediateDirectory=".\x64\Release"
- ConfigurationType="1"
+ ConfigurationType="2"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
@@ -213,8 +220,6 @@
/>
<Tool
Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
/>
<Tool
Name="VCXMLDataGeneratorTool"
@@ -224,19 +229,23 @@
/>
<Tool
Name="VCMIDLTool"
+ PreprocessorDefinitions="NDEBUG"
+ MkTypLibCompatible="true"
+ SuppressStartupBanner="true"
TargetEnvironment="3"
- TypeLibraryName=".\x64\Release/genpname.tlb"
+ TypeLibraryName=".\..\..\..\lib64\testplug.tlb"
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
+ WholeProgramOptimization="true"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\common;..\toolutil;..\ctestfw"
+ PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;T_CTEST_IMPLEMENTATION"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Release/genpname.pch"
+ PrecompiledHeaderFile=".\x64\Release/testplug.pch"
AssemblerListingLocation=".\x64\Release/"
ObjectFile=".\x64\Release/"
ProgramDataBaseFileName=".\x64\Release/"
@@ -257,11 +266,13 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x64\Release/genpname.exe"
+ OutputFile="..\..\..\bin64\testplug.dll"
LinkIncremental="1"
SuppressStartupBanner="true"
- ProgramDatabaseFile=".\x64\Release/genpname.pdb"
- SubSystem="1"
+ ProgramDatabaseFile=".\..\..\..\lib64\testplug.pdb"
+ EnableCOMDATFolding="2"
+ LinkTimeCodeGeneration="1"
+ ImportLibrary=".\..\..\..\lib64\testplug.lib"
TargetMachine="17"
/>
<Tool
@@ -283,9 +294,6 @@
Name="VCAppVerifierTool"
/>
<Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
@@ -293,7 +301,7 @@
Name="Debug|x64"
OutputDirectory=".\x64\Debug"
IntermediateDirectory=".\x64\Debug"
- ConfigurationType="1"
+ ConfigurationType="2"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
@@ -304,8 +312,6 @@
/>
<Tool
Name="VCCustomBuildTool"
- CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
- Outputs="..\..\..\bin64\$(TargetFileName)"
/>
<Tool
Name="VCXMLDataGeneratorTool"
@@ -315,20 +321,23 @@
/>
<Tool
Name="VCMIDLTool"
+ PreprocessorDefinitions="_DEBUG"
+ MkTypLibCompatible="true"
+ SuppressStartupBanner="true"
TargetEnvironment="3"
- TypeLibraryName=".\x64\Debug/genpname.tlb"
+ TypeLibraryName=".\..\..\..\lib64\testplugd.tlb"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
- AdditionalIncludeDirectories="..\..\common;..\toolutil"
- PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\common;..\toolutil;..\ctestfw"
+ PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE;T_CTEST_IMPLEMENTATION"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
BufferSecurityCheck="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
- PrecompiledHeaderFile=".\x64\Debug/genpname.pch"
+ PrecompiledHeaderFile=".\x64\Debug/testplug.pch"
AssemblerListingLocation=".\x64\Debug/"
ObjectFile=".\x64\Debug/"
ProgramDataBaseFileName=".\x64\Debug/"
@@ -351,12 +360,12 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile=".\x64\Debug/genpname.exe"
+ OutputFile="..\..\..\bin64\testplug.dll"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
- ProgramDatabaseFile=".\x64\Debug/genpname.pdb"
- SubSystem="1"
+ ProgramDatabaseFile=".\..\..\..\lib64\testplugd.pdb"
+ ImportLibrary=".\..\..\..\lib64\testplugd.lib"
TargetMachine="17"
/>
<Tool
@@ -379,9 +388,6 @@
Name="VCAppVerifierTool"
/>
<Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
@@ -394,7 +400,7 @@
Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
>
<File
- RelativePath=".\genpname.cpp"
+ RelativePath=".\testplug.c"
>
</File>
</Filter>
@@ -402,10 +408,6 @@
Name="Header Files"
Filter="h;hpp;hxx;hm;inl"
>
- <File
- RelativePath=".\data.h"
- >
- </File>
</Filter>
<Filter
Name="Resource Files"
diff --git a/tools/icupkg/icupkg.cpp b/tools/icupkg/icupkg.cpp
index 65ca35be..b6ca5ecd 100644
--- a/tools/icupkg/icupkg.cpp
+++ b/tools/icupkg/icupkg.cpp
@@ -315,7 +315,7 @@ main(int argc, char *argv[]) {
* If we swap a single file, just assume that we are modifying it.
* The Package class does not give us access to the item and its type.
*/
- isModified=(UBool)(!isPackage || outType!=pkg->getInType());
+ isModified|=(UBool)(!isPackage || outType!=pkg->getInType());
} else if(isPackage) {
outType=pkg->getInType(); // default to input type
} else /* !isPackage: swap single file */ {
diff --git a/tools/makeconv/genmbcs.c b/tools/makeconv/genmbcs.c
index 696ce4fe..84043833 100644
--- a/tools/makeconv/genmbcs.c
+++ b/tools/makeconv/genmbcs.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2000-2009, International Business Machines
+* Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -687,7 +687,7 @@ MBCSAddFromUnicode(MBCSData *mbcsData,
maxCharLength=mbcsData->ucm->states.maxCharLength;
if( mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO &&
- (*bytes==0xe || *bytes==0xf)
+ (!IGNORE_SISO_CHECK && (*bytes==0xe || *bytes==0xf))
) {
fprintf(stderr, "error: illegal mapping to SI or SO for SI/SO codepage: U+%04x<->0x%s\n",
(int)c, printBytes(buffer, bytes, length));
diff --git a/tools/makeconv/makeconv.c b/tools/makeconv/makeconv.c
index a2815e85..ea1e442f 100644
--- a/tools/makeconv/makeconv.c
+++ b/tools/makeconv/makeconv.c
@@ -1,7 +1,7 @@
/*
********************************************************************************
*
- * Copyright (C) 1998-2008, International Business Machines
+ * Copyright (C) 1998-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************************
@@ -79,6 +79,7 @@ extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPP
*/
UBool VERBOSE = FALSE;
UBool SMALL = FALSE;
+UBool IGNORE_SISO_CHECK = FALSE;
static void
createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
@@ -174,6 +175,7 @@ enum {
OPT_DESTDIR,
OPT_VERBOSE,
OPT_SMALL,
+ OPT_IGNORE_SISO_CHECK,
OPT_COUNT
};
@@ -184,7 +186,8 @@ static UOption options[]={
UOPTION_VERSION,
UOPTION_DESTDIR,
UOPTION_VERBOSE,
- { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
+ { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
+ { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
};
int main(int argc, char* argv[])
@@ -236,7 +239,8 @@ int main(int argc, char* argv[])
"\t --small Generate smaller .cnv files. They will be\n"
"\t significantly smaller but may not be compatible with\n"
"\t older versions of ICU and will require heap memory\n"
- "\t allocation when loaded.\n");
+ "\t allocation when loaded.\n"
+ "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
@@ -253,6 +257,10 @@ int main(int argc, char* argv[])
VERBOSE = options[OPT_VERBOSE].doesOccur;
SMALL = options[OPT_SMALL].doesOccur;
+ if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
+ IGNORE_SISO_CHECK = TRUE;
+ }
+
if (destdir != NULL && *destdir != 0) {
uprv_strcpy(outFileName, destdir);
destdirlen = uprv_strlen(destdir);
@@ -579,7 +587,7 @@ readFile(ConvData *data, const char* converterName,
if(data->ucm->baseName[0]==0) {
dataIsBase=TRUE;
baseStates=&data->ucm->states;
- ucm_processStates(baseStates);
+ ucm_processStates(baseStates, IGNORE_SISO_CHECK);
} else {
dataIsBase=FALSE;
baseStates=NULL;
@@ -782,7 +790,7 @@ createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod
fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
- } else if(1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
+ } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
diff --git a/tools/makeconv/makeconv.h b/tools/makeconv/makeconv.h
index a3c2d375..3cb50706 100644
--- a/tools/makeconv/makeconv.h
+++ b/tools/makeconv/makeconv.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2000-2007, International Business Machines
+* Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -25,6 +25,7 @@
/* exports from makeconv.c */
U_CFUNC UBool VERBOSE;
U_CFUNC UBool SMALL;
+U_CFUNC UBool IGNORE_SISO_CHECK;
/* converter table type for writing */
enum {
diff --git a/tools/pkgdata/pkgdata.cpp b/tools/pkgdata/pkgdata.cpp
index 435361d2..2f85e7a5 100644
--- a/tools/pkgdata/pkgdata.cpp
+++ b/tools/pkgdata/pkgdata.cpp
@@ -1,5 +1,5 @@
/******************************************************************************
- * Copyright (C) 2000-2009, International Business Machines
+ * Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: pkgdata.c
@@ -76,6 +76,9 @@ U_CDECL_END
#if defined(WINDOWS_WITH_MSVC) || defined(U_LINUX)
#define CAN_WRITE_OBJ_CODE
#endif
+#if defined(U_CYGWIN) || defined(CYGWINMSVC)
+#define USING_CYGWIN
+#endif
/*
* When building the data library without assembly,
@@ -88,7 +91,7 @@ U_CDECL_END
#endif
/* Need to fix the file seperator character when using MinGW. */
-#ifdef WINDOWS_WITH_GNUC
+#if defined(WINDOWS_WITH_GNUC) || defined(USING_CYGWIN)
#define PKGDATA_FILE_SEP_STRING "/"
#else
#define PKGDATA_FILE_SEP_STRING U_FILE_SEP_STRING
@@ -104,7 +107,7 @@ static int32_t pkg_executeOptions(UPKGOptions *o);
#ifdef WINDOWS_WITH_MSVC
static int32_t pkg_createWindowsDLL(const char mode, const char *gencFilePath, UPKGOptions *o);
#endif
-static int32_t pkg_createSymLinks(const char *targetDir);
+static int32_t pkg_createSymLinks(const char *targetDir, UBool specialHandling=FALSE);
static int32_t pkg_installLibrary(const char *installDir, const char *dir);
static int32_t pkg_installFileMode(const char *installDir, const char *srcDir, const char *fileListName);
@@ -115,9 +118,10 @@ static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetD
static int32_t pkg_createWithAssemblyCode(const char *targetDir, const char mode, const char *gencFilePath);
static int32_t pkg_generateLibraryFile(const char *targetDir, const char mode, const char *objectFile, char *command = NULL);
static int32_t pkg_archiveLibrary(const char *targetDir, const char *version, UBool reverseExt);
-static void createFileNames(const char *version_major, const char *version, const char *libName, const UBool reverseExt);
+static void createFileNames(UPKGOptions *o, const char mode, const char *version_major, const char *version, const char *libName, const UBool reverseExt);
-static int32_t pkg_getOptionsFromICUConfig(UOption *option);
+static int32_t pkg_getOptionsFromICUConfig(UBool verbose, UOption *option);
+static int runCommand(const char* command, UBool specialHandling=FALSE);
enum {
NAME,
@@ -209,12 +213,13 @@ enum {
LIB_FILE_VERSION_TMP,
#ifdef U_CYGWIN
LIB_FILE_CYGWIN,
+ LIB_FILE_CYGWIN_VERSION,
#endif
LIB_FILENAMES_SIZE
};
static char libFileNames[LIB_FILENAMES_SIZE][256];
-static void pkg_checkFlag(UPKGOptions *o);
+static UPKGOptions *pkg_checkFlag(UPKGOptions *o);
const char options_help[][320]={
"Set the data name",
@@ -235,7 +240,7 @@ const char options_help[][320]={
"Install the data (specify target)",
"Specify a custom source directory",
"Specify a custom entrypoint name (default: short name)",
- "Specify a version when packaging in DLL or static mode",
+ "Specify a version when packaging in dll or static mode",
"Add package to all file names if not present",
"Library name to build (if different than package name)",
"Quite mode. (e.g. Do not output a readme file for static libraries)"
@@ -282,10 +287,10 @@ main(int argc, char* argv[]) {
}
-#ifndef WINDOWS_WITH_MSVC
- if(!options[BLDOPT].doesOccur) {
- if (pkg_getOptionsFromICUConfig(&options[BLDOPT]) != 0) {
- fprintf(stderr, " required parameter is missing: -O is required \n");
+#if !defined(WINDOWS_WITH_MSVC) || defined(USING_CYGWIN)
+ if(!options[BLDOPT].doesOccur && uprv_strcmp(options[MODE].value, "common") != 0) {
+ if (pkg_getOptionsFromICUConfig(options[VERBOSE].doesOccur, &options[BLDOPT]) != 0) {
+ fprintf(stderr, " required parameter is missing: -O is required for static and shared builds.\n");
fprintf(stderr, "Run '%s --help' for help.\n", progname);
return 1;
}
@@ -380,8 +385,13 @@ main(int argc, char* argv[]) {
o.verbose = options[VERBOSE].doesOccur;
-#ifndef WINDOWS_WITH_MSVC /* on UNIX, we'll just include the file... */
- o.options = options[BLDOPT].value;
+
+#if !defined(WINDOWS_WITH_MSVC) || defined(USING_CYGWIN) /* on UNIX, we'll just include the file... */
+ if (options[BLDOPT].doesOccur) {
+ o.options = options[BLDOPT].value;
+ } else {
+ o.options = NULL;
+ }
#endif
if(options[COPYRIGHT].doesOccur) {
o.comment = U_COPYRIGHT_STRING;
@@ -461,6 +471,50 @@ main(int argc, char* argv[]) {
return result;
}
+static int runCommand(const char* command, UBool specialHandling) {
+ char *cmd = NULL;
+ char cmdBuffer[SMALL_BUFFER_MAX_SIZE];
+ int32_t len = strlen(command);
+
+ if (len == 0) {
+ return 0;
+ }
+
+ if (!specialHandling) {
+#if defined(USING_CYGWIN) || defined(OS400)
+#define CMD_PADDING_SIZE 20
+ if ((len + CMD_PADDING_SIZE) >= SMALL_BUFFER_MAX_SIZE) {
+ cmd = (char *)uprv_malloc(len + CMD_PADDING_SIZE);
+ } else {
+ cmd = cmdBuffer;
+ }
+#ifdef USING_CYGWIN
+ sprintf(cmd, "bash -c \"%s\"", command);
+
+#elif defined(OS400)
+ sprintf(cmd, "QSH CMD('%s')", command);
+#endif
+#else
+ goto normal_command_mode;
+#endif
+ } else {
+normal_command_mode:
+ cmd = (char *)command;
+ }
+
+ printf("pkgdata: %s\n", cmd);
+ int result = system(cmd);
+ if (result != 0) {
+ printf("-- return status = %d\n", result);
+ }
+
+ if (cmd != cmdBuffer && cmd != command) {
+ uprv_free(cmd);
+ }
+
+ return result;
+}
+
#define LN_CMD "ln -s"
#define RM_CMD "rm -f"
@@ -480,30 +534,6 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
char datFileNamePath[LARGE_BUFFER_MAX_SIZE] = "";
char checkLibFile[LARGE_BUFFER_MAX_SIZE] = "";
- /* Initialize pkgdataFlags */
- pkgDataFlags = (char**)uprv_malloc(sizeof(char*) * PKGDATA_FLAGS_SIZE);
- if (pkgDataFlags != NULL) {
- for (int32_t i = 0; i < PKGDATA_FLAGS_SIZE; i++) {
- pkgDataFlags[i] = (char*)uprv_malloc(sizeof(char) * SMALL_BUFFER_MAX_SIZE);
- if (pkgDataFlags[i] != NULL) {
- pkgDataFlags[i][0] = 0;
- } else {
- fprintf(stderr,"Error allocating memory for pkgDataFlags.\n");
- return -1;
- }
- }
- } else {
- fprintf(stderr,"Error allocating memory for pkgDataFlags.\n");
- return -1;
- }
-#ifndef WINDOWS_WITH_MSVC
- /* Read in options file. */
- parseFlagsFile(o->options, pkgDataFlags, SMALL_BUFFER_MAX_SIZE, (int32_t)PKGDATA_FLAGS_SIZE, &status);
- if (U_FAILURE(status)) {
- fprintf(stderr,"Unable to open or read \"%s\" option file.\n", o->options);
- return -1;
- }
-#endif
if (mode == MODE_FILES) {
/* Copy the raw data to the installation directory. */
if (o->install != NULL) {
@@ -512,6 +542,10 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
uprv_strcat(targetDir, PKGDATA_FILE_SEP_STRING);
uprv_strcat(targetDir, o->shortName);
}
+
+ if(o->verbose) {
+ fprintf(stdout, "# Install: Files mode, copying files to %s..\n", targetDir);
+ }
result = pkg_installFileMode(targetDir, o->srcDir, o->fileListFiles->str);
}
return result;
@@ -529,7 +563,10 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
uprv_strcat(datFileNamePath, datFileName);
- result = writePackageDatFile(datFileNamePath, o->comment, o->srcDir, o->fileListFiles->str, NULL, U_IS_BIG_ENDIAN ? 'b' : 'l');
+ if(o->verbose) {
+ fprintf(stdout, "# Writing package file %s ..\n", datFileNamePath);
+ }
+ result = writePackageDatFile(datFileNamePath, o->comment, o->srcDir, o->fileListFiles->str, NULL, U_CHARSET_FAMILY ? 'e' : U_IS_BIG_ENDIAN ? 'b' : 'l');
if (result != 0) {
fprintf(stderr,"Error writing package dat file.\n");
return result;
@@ -550,6 +587,10 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
/* Move the dat file created to the target directory. */
result = rename(datFileNamePath, targetFileNamePath);
+
+ if(o->verbose) {
+ fprintf(stdout, "# Moving package file to %s ..\n", targetFileNamePath);
+ }
if (result != 0) {
fprintf(stderr, "Unable to move dat file (%s) to target location (%s).\n", datFileNamePath, targetFileNamePath);
}
@@ -560,7 +601,42 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
char version_major[10] = "";
UBool reverseExt = FALSE;
-#ifndef WINDOWS_WITH_MSVC
+ /* Initialize pkgdataFlags */
+ pkgDataFlags = (char**)uprv_malloc(sizeof(char*) * PKGDATA_FLAGS_SIZE);
+ if (pkgDataFlags != NULL) {
+ for (int32_t i = 0; i < PKGDATA_FLAGS_SIZE; i++) {
+ pkgDataFlags[i] = (char*)uprv_malloc(sizeof(char) * SMALL_BUFFER_MAX_SIZE);
+ if (pkgDataFlags[i] != NULL) {
+ pkgDataFlags[i][0] = 0;
+ } else {
+ fprintf(stderr,"Error allocating memory for pkgDataFlags.\n");
+ return -1;
+ }
+ }
+ } else {
+ fprintf(stderr,"Error allocating memory for pkgDataFlags.\n");
+ return -1;
+ }
+
+ if(o->verbose) {
+ fprintf(stdout, "# pkgDataFlags=");
+ for(int32_t i=0;i<PKGDATA_FLAGS_SIZE && pkgDataFlags[i][0];i++) {
+ fprintf(stdout, "%c \"%s\"", (i>0)?',':' ',pkgDataFlags[i]);
+ }
+ fprintf(stdout, "\n");
+ }
+
+#if !defined(WINDOWS_WITH_MSVC) || defined(USING_CYGWIN)
+ /* Read in options file. */
+ if(o->verbose) {
+ fprintf(stdout, "# Reading options file %s\n", o->options);
+ }
+ parseFlagsFile(o->options, pkgDataFlags, SMALL_BUFFER_MAX_SIZE, (int32_t)PKGDATA_FLAGS_SIZE, &status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr,"Unable to open or read \"%s\" option file. status = %s\n", o->options, u_errorName(status));
+ return -1;
+ }
+
/* Get the version major number. */
if (o->version != NULL) {
for (uint32_t i = 0;i < sizeof(version_major);i++) {
@@ -581,19 +657,29 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
}
#endif
/* Using the base libName and version number, generate the library file names. */
- createFileNames(version_major, o->version, o->libName, reverseExt);
+ createFileNames(o, mode, version_major, o->version, o->libName, reverseExt);
- if (o->version != 0 && o->rebuild == FALSE) {
+ if ((o->version!=NULL || (mode==MODE_STATIC)) && o->rebuild == FALSE) {
/* Check to see if a previous built data library file exists and check if it is the latest. */
- sprintf(checkLibFile, "%s%s", targetDir, libFileNames[LIB_FILE_VERSION_TMP]);
+ sprintf(checkLibFile, "%s%s", targetDir, libFileNames[LIB_FILE_VERSION]);
if (T_FileStream_file_exists(checkLibFile)) {
if (isFileModTimeLater(checkLibFile, o->srcDir, TRUE) && isFileModTimeLater(checkLibFile, o->options)) {
if (o->install != NULL) {
- uprv_strcpy(libFileNames[LIB_FILE_VERSION], libFileNames[LIB_FILE_VERSION_TMP]);
- result = pkg_installLibrary(o->install, targetDir);
+ if(o->verbose) {
+ fprintf(stdout, "# Installing already-built library into %s\n", o->install);
+ }
+ result = pkg_installLibrary(o->install, targetDir);
+ } else {
+ if(o->verbose) {
+ printf("# Not rebuilding %s - up to date.\n", checkLibFile);
+ }
}
return result;
+ } else if (o->verbose && (o->install!=NULL)) {
+ fprintf(stdout, "# Not installing up-to-date library %s into %s\n", checkLibFile, o->install);
}
+ } else if(o->verbose && (o->install!=NULL)) {
+ fprintf(stdout, "# Not installing missing %s into %s\n", checkLibFile, o->install);
}
}
@@ -603,8 +689,14 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
if (pkgDataFlags[GENCCODE_ASSEMBLY_TYPE][0] != 0) {
const char* genccodeAssembly = pkgDataFlags[GENCCODE_ASSEMBLY_TYPE];
+ if(o->verbose) {
+ fprintf(stdout, "# Generating assembly code %s of type %s ..\n", gencFilePath, genccodeAssembly);
+ }
+
/* Offset genccodeAssembly by 3 because "-a " */
- if (checkAssemblyHeaderName(genccodeAssembly+3)) {
+ if (genccodeAssembly &&
+ (uprv_strlen(genccodeAssembly)>3) &&
+ checkAssemblyHeaderName(genccodeAssembly+3)) {
writeAssemblyCode(datFileNamePath, o->tmpDir, o->entryName, NULL, gencFilePath);
result = pkg_createWithAssemblyCode(targetDir, mode, gencFilePath);
@@ -612,19 +704,28 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
fprintf(stderr, "Error generating assembly code for data.\n");
return result;
} else if (mode == MODE_STATIC) {
- return result;
+ if(o->install != NULL) {
+ if(o->verbose) {
+ fprintf(stdout, "# Installing static library into %s\n", o->install);
+ }
+ result = pkg_installLibrary(o->install, targetDir);
+ }
+ return result;
}
} else {
fprintf(stderr,"Assembly type \"%s\" is unknown.\n", genccodeAssembly);
return -1;
}
} else {
+ if(o->verbose) {
+ fprintf(stdout, "# Writing object code to %s ..\n", gencFilePath);
+ }
#ifdef CAN_WRITE_OBJ_CODE
writeObjectCode(datFileNamePath, o->tmpDir, o->entryName, NULL, NULL, gencFilePath);
#ifdef U_LINUX
result = pkg_generateLibraryFile(targetDir, mode, gencFilePath);
#elif defined(WINDOWS_WITH_MSVC)
- return pkg_createWindowsDLL(mode, gencFilePath, o);
+ result = pkg_createWindowsDLL(mode, gencFilePath, o);
#endif
#elif defined(BUILD_DATA_WITHOUT_ASSEMBLY)
result = pkg_createWithoutAssemblyCode(o, targetDir, mode);
@@ -635,22 +736,33 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
}
}
#ifndef U_WINDOWS
- /* Certain platforms uses archive library. (e.g. AIX) */
- result = pkg_archiveLibrary(targetDir, o->version, reverseExt);
- if (result != 0) {
- fprintf(stderr, "Error creating data archive library file.\n");
- return result;
- }
+ if(mode != MODE_STATIC) {
+ /* Certain platforms uses archive library. (e.g. AIX) */
+ if(o->verbose) {
+ fprintf(stdout, "# Creating data archive library file ..\n");
+ }
+ result = pkg_archiveLibrary(targetDir, o->version, reverseExt);
+ if (result != 0) {
+ fprintf(stderr, "Error creating data archive library file.\n");
+ return result;
+ }
#ifndef OS400
- /* Create symbolic links for the final library file. */
- result = pkg_createSymLinks(targetDir);
- if (result != 0) {
- fprintf(stderr, "Error creating symbolic links of the data library file.\n");
- return result;
- }
+ /* Create symbolic links for the final library file. */
+ result = pkg_createSymLinks(targetDir);
+ if (result != 0) {
+ fprintf(stderr, "Error creating symbolic links of the data library file.\n");
+ return result;
+ }
#endif
+ } /* !MODE_STATIC */
+#endif
+
+#if !defined(U_WINDOWS) || defined(USING_CYGWIN)
/* Install the libraries if option was set. */
if (o->install != NULL) {
+ if(o->verbose) {
+ fprintf(stdout, "# Installing library file to %s ..\n", o->install);
+ }
result = pkg_installLibrary(o->install, targetDir);
if (result != 0) {
fprintf(stderr, "Error installing the data library.\n");
@@ -666,21 +778,29 @@ static int32_t pkg_executeOptions(UPKGOptions *o) {
* Given the base libName and version numbers, generate the libary file names and store it in libFileNames.
* Depending on the configuration, the library name may either end with version number or shared object suffix.
*/
-static void createFileNames(const char *version_major, const char *version, const char *libName, UBool reverseExt) {
+static void createFileNames(UPKGOptions *o, const char mode, const char *version_major, const char *version, const char *libName, UBool reverseExt) {
sprintf(libFileNames[LIB_FILE], "%s%s",
pkgDataFlags[LIBPREFIX],
libName);
+
+ if(o->verbose) {
+ fprintf(stdout, "# libFileName[LIB_FILE] = %s\n", libFileNames[LIB_FILE]);
+ }
+
if (version != NULL) {
#ifdef U_CYGWIN
- sprintf(libFileNames[LIB_FILE_CYGWIN], "cyg%s%s.%s",
+ sprintf(libFileNames[LIB_FILE_CYGWIN], "cyg%s.%s",
+ libName,
+ pkgDataFlags[SO_EXT]);
+ sprintf(libFileNames[LIB_FILE_CYGWIN_VERSION], "cyg%s%s.%s",
libName,
version_major,
pkgDataFlags[SO_EXT]);
- sprintf(pkgDataFlags[SO_EXT], "%s.%s",
- pkgDataFlags[SO_EXT],
- pkgDataFlags[A_EXT]);
-#elif defined(OS400)
+ uprv_strcat(pkgDataFlags[SO_EXT], ".");
+ uprv_strcat(pkgDataFlags[SO_EXT], pkgDataFlags[A_EXT]);
+
+#elif defined(OS400) || defined(_AIX)
sprintf(libFileNames[LIB_FILE_VERSION_TMP], "%s.%s",
libFileNames[LIB_FILE],
pkgDataFlags[SOBJ_EXT]);
@@ -697,21 +817,38 @@ static void createFileNames(const char *version_major, const char *version, cons
reverseExt ? version_major : pkgDataFlags[SO_EXT],
reverseExt ? pkgDataFlags[SO_EXT] : version_major);
- libFileNames[LIB_FILE_VERSION][0] = 0;
+ sprintf(libFileNames[LIB_FILE_VERSION], "%s%s%s.%s",
+ libFileNames[LIB_FILE],
+ pkgDataFlags[LIB_EXT_ORDER][0] == '.' ? "." : "",
+ reverseExt ? version : pkgDataFlags[SO_EXT],
+ reverseExt ? pkgDataFlags[SO_EXT] : version);
+
+ if(o->verbose) {
+ fprintf(stdout, "# libFileName[LIB_FILE_VERSION] = %s\n", libFileNames[LIB_FILE_VERSION]);
+ }
#ifdef U_CYGWIN
/* Cygwin only deals with the version major number. */
uprv_strcpy(libFileNames[LIB_FILE_VERSION_TMP], libFileNames[LIB_FILE_VERSION_MAJOR]);
#endif
}
+ if(mode == MODE_STATIC) {
+ sprintf(libFileNames[LIB_FILE_VERSION], "%s.%s", libFileNames[LIB_FILE], pkgDataFlags[A_EXT]);
+ libFileNames[LIB_FILE_VERSION_MAJOR][0]=0;
+ if(o->verbose) {
+ fprintf(stdout, "# libFileName[LIB_FILE_VERSION] = %s (static)\n", libFileNames[LIB_FILE_VERSION]);
+ }
+ }
}
/* Create the symbolic links for the final library file. */
-static int32_t pkg_createSymLinks(const char *targetDir) {
+static int32_t pkg_createSymLinks(const char *targetDir, UBool specialHandling) {
int32_t result = 0;
char cmd[LARGE_BUFFER_MAX_SIZE];
+ char name1[SMALL_BUFFER_MAX_SIZE]; /* symlink file name */
+ char name2[SMALL_BUFFER_MAX_SIZE]; /* file name to symlink */
-#ifndef U_CYGWIN
+#ifndef USING_CYGWIN
/* No symbolic link to make. */
if (uprv_strlen(libFileNames[LIB_FILE_VERSION]) == 0 || uprv_strlen(libFileNames[LIB_FILE_VERSION_MAJOR]) == 0) {
return result;
@@ -724,20 +861,34 @@ static int32_t pkg_createSymLinks(const char *targetDir) {
LN_CMD,
libFileNames[LIB_FILE_VERSION],
libFileNames[LIB_FILE_VERSION_MAJOR]);
- result = system(cmd);
+ result = runCommand(cmd);
if (result != 0) {
return result;
}
#endif
- sprintf(cmd, "cd %s && %s %s.%s && %s %s %s.%s",
+
+ if (specialHandling) {
+#ifdef U_CYGWIN
+ sprintf(name1, "%s", libFileNames[LIB_FILE_CYGWIN]);
+ sprintf(name2, "%s", libFileNames[LIB_FILE_CYGWIN_VERSION]);
+#else
+ goto normal_symlink_mode;
+#endif
+ } else {
+normal_symlink_mode:
+ sprintf(name1, "%s.%s", libFileNames[LIB_FILE], pkgDataFlags[SO_EXT]);
+ sprintf(name2, "%s", libFileNames[LIB_FILE_VERSION]);
+ }
+
+ sprintf(cmd, "cd %s && %s %s && %s %s %s",
targetDir,
RM_CMD,
- libFileNames[LIB_FILE], pkgDataFlags[SO_EXT],
+ name1,
LN_CMD,
- libFileNames[LIB_FILE_VERSION],
- libFileNames[LIB_FILE], pkgDataFlags[SO_EXT]);
+ name2,
+ name1);
- result = system(cmd);
+ result = runCommand(cmd);
return result;
}
@@ -753,13 +904,39 @@ static int32_t pkg_installLibrary(const char *installDir, const char *targetDir)
installDir, PKGDATA_FILE_SEP_STRING, libFileNames[LIB_FILE_VERSION]
);
- result = system(cmd);
+ result = runCommand(cmd);
+
+ if (result != 0) {
+ return result;
+ }
+
+#ifdef CYGWINMSVC
+ sprintf(cmd, "cd %s && %s %s.lib %s",
+ targetDir,
+ pkgDataFlags[INSTALL_CMD],
+ libFileNames[LIB_FILE],
+ installDir
+ );
+ result = runCommand(cmd);
+
+ if (result != 0) {
+ return result;
+ }
+#elif defined (U_CYGWIN)
+ sprintf(cmd, "cd %s && %s %s %s",
+ targetDir,
+ pkgDataFlags[INSTALL_CMD],
+ libFileNames[LIB_FILE_CYGWIN_VERSION],
+ installDir
+ );
+ result = runCommand(cmd);
if (result != 0) {
return result;
}
+#endif
- return pkg_createSymLinks(installDir);
+ return pkg_createSymLinks(installDir, TRUE);
}
#ifdef U_WINDOWS_MSVC
@@ -795,7 +972,7 @@ static int32_t pkg_installFileMode(const char *installDir, const char *srcDir, c
srcDir, PKGDATA_FILE_SEP_STRING, buffer,
installDir, PKGDATA_FILE_SEP_STRING, buffer);
- result = system(cmd);
+ result = runCommand(cmd);
if (result != 0) {
fprintf(stderr, "Failed to install data file with command: %s\n", cmd);
break;
@@ -815,7 +992,7 @@ static int32_t pkg_installFileMode(const char *installDir, const char *srcDir, c
}
#else
sprintf(cmd, "%s %s %s %s", WIN_INSTALL_CMD, srcDir, installDir, WIN_INSTALL_CMD_FLAGS);
- result = system(cmd);
+ result = runCommand(cmd);
if (result != 0) {
fprintf(stderr, "Failed to install data file with command: %s\n", cmd);
}
@@ -831,7 +1008,7 @@ static int32_t pkg_archiveLibrary(const char *targetDir, const char *version, UB
int32_t result = 0;
char cmd[LARGE_BUFFER_MAX_SIZE];
- /* If the shard object suffix and the final object suffix is different and the final object suffix and the
+ /* If the shared object suffix and the final object suffix is different and the final object suffix and the
* archive file suffix is the same, then the final library needs to be archived.
*/
if (uprv_strcmp(pkgDataFlags[SOBJ_EXT], pkgDataFlags[SO_EXT]) != 0 && uprv_strcmp(pkgDataFlags[A_EXT], pkgDataFlags[SO_EXT]) == 0) {
@@ -849,7 +1026,17 @@ static int32_t pkg_archiveLibrary(const char *targetDir, const char *version, UB
targetDir,
libFileNames[LIB_FILE_VERSION_TMP]);
- result = system(cmd);
+ result = runCommand(cmd);
+ if (result != 0) {
+ return result;
+ }
+
+ sprintf(cmd, "%s %s%s",
+ pkgDataFlags[RANLIB],
+ targetDir,
+ libFileNames[LIB_FILE_VERSION]);
+
+ result = runCommand(cmd);
if (result != 0) {
return result;
}
@@ -860,7 +1047,7 @@ static int32_t pkg_archiveLibrary(const char *targetDir, const char *version, UB
targetDir,
libFileNames[LIB_FILE_VERSION_TMP]);
- result = system(cmd);
+ result = runCommand(cmd);
if (result != 0) {
return result;
}
@@ -896,19 +1083,22 @@ static int32_t pkg_generateLibraryFile(const char *targetDir, const char mode, c
}
if (mode == MODE_STATIC) {
-#ifdef OS400
- sprintf(cmd, "QSH CMD('%s %s %s%s.%s %s')",
-#else
- sprintf(cmd, "%s %s %s%s.%s %s",
-#endif
+ sprintf(cmd, "%s %s %s%s %s",
pkgDataFlags[AR],
pkgDataFlags[ARFLAGS],
targetDir,
- libFileNames[LIB_FILE],
- pkgDataFlags[A_EXT],
+ libFileNames[LIB_FILE_VERSION],
objectFile);
- result = system(cmd);
+ result = runCommand(cmd);
+ if (result == 0) {
+ sprintf(cmd, "%s %s%s",
+ pkgDataFlags[RANLIB],
+ targetDir,
+ libFileNames[LIB_FILE_VERSION]);
+
+ result = runCommand(cmd);
+ }
} else /* if (mode == MODE_DLL) */ {
#ifdef U_CYGWIN
sprintf(cmd, "%s%s%s %s -o %s%s %s %s%s %s %s",
@@ -916,13 +1106,9 @@ static int32_t pkg_generateLibraryFile(const char *targetDir, const char mode, c
targetDir,
libFileNames[LIB_FILE_VERSION_TMP],
pkgDataFlags[LDICUDTFLAGS],
- targetDir, libFileNames[LIB_FILE_CYGWIN],
-#else
-#ifdef OS400
- sprintf(cmd, "QSH CMD('%s %s -o %s%s %s %s%s %s %s')",
+ targetDir, libFileNames[LIB_FILE_CYGWIN_VERSION],
#else
sprintf(cmd, "%s %s -o %s%s %s %s%s %s %s",
-#endif
pkgDataFlags[GENLIB],
pkgDataFlags[LDICUDTFLAGS],
targetDir,
@@ -935,7 +1121,7 @@ static int32_t pkg_generateLibraryFile(const char *targetDir, const char mode, c
pkgDataFlags[BIR_FLAGS]);
/* Generate the library file. */
- result = system(cmd);
+ result = runCommand(cmd);
}
if (freeCmd) {
@@ -961,7 +1147,7 @@ static int32_t pkg_createWithAssemblyCode(const char *targetDir, const char mode
tempObjectFile,
gencFilePath);
- result = system(cmd);
+ result = runCommand(cmd);
if (result != 0) {
return result;
}
@@ -973,20 +1159,31 @@ static int32_t pkg_createWithAssemblyCode(const char *targetDir, const char mode
/*
* Generation of the data library without assembly code needs to compile each data file
* individually and then link it all together.
+ * Note: Any update to the directory structure of the data needs to be reflected here.
*/
enum {
DATA_PREFIX_BRKITR,
DATA_PREFIX_COLL,
+ DATA_PREFIX_CURR,
+ DATA_PREFIX_LANG,
DATA_PREFIX_RBNF,
+ DATA_PREFIX_REGION,
DATA_PREFIX_TRANSLIT,
+ DATA_PREFIX_ZONE,
DATA_PREFIX_LENGTH
};
+
const static char DATA_PREFIX[DATA_PREFIX_LENGTH][10] = {
"brkitr",
"coll",
+ "curr",
+ "lang",
"rbnf",
- "translit"
+ "region",
+ "translit",
+ "zone"
};
+
static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetDir, const char mode) {
int32_t result = 0;
CharList *list = o->filePaths;
@@ -1003,6 +1200,11 @@ static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetD
o->tmpDir,
PKGDATA_FILE_SEP_STRING,
libFileNames[LIB_FILE]);
+ /* Remove previous icudtall.c file. */
+ if (T_FileStream_file_exists(icudtAll) && (result = remove(icudtAll)) != 0) {
+ fprintf(stderr, "Unable to remove old icudtall file: %s\n", icudtAll);
+ return result;
+ }
#endif
if (list == NULL || listNames == NULL) {
@@ -1033,17 +1235,13 @@ static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetD
uprv_strcpy(tempObjectFile, gencmnFile);
tempObjectFile[uprv_strlen(tempObjectFile) - 1] = 'o';
-#ifdef OS400
- sprintf(cmd, "QSH CMD('%s %s -o %s %s')",
-#else
sprintf(cmd, "%s %s -o %s %s"
-#endif
pkgDataFlags[COMPILER],
pkgDataFlags[LIBFLAGS],
tempObjectFile,
gencmnFile);
- result = system(cmd);
+ result = runCommand(cmd);
if (result != 0) {
break;
}
@@ -1053,17 +1251,20 @@ static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetD
} else {
char newName[SMALL_BUFFER_MAX_SIZE];
char dataName[SMALL_BUFFER_MAX_SIZE];
+ char dataDirName[SMALL_BUFFER_MAX_SIZE];
const char *pSubstring;
file = list->str;
name = listNames->str;
newName[0] = dataName[0] = 0;
for (int32_t n = 0; n < DATA_PREFIX_LENGTH; n++) {
- /* If the name contains a prefix, alter the new name accordingly. */
- pSubstring = uprv_strstr(name, DATA_PREFIX[n]);
+ dataDirName[0] = 0;
+ sprintf(dataDirName, "%s%s", DATA_PREFIX[n], PKGDATA_FILE_SEP_STRING);
+ /* If the name contains a prefix (indicating directory), alter the new name accordingly. */
+ pSubstring = uprv_strstr(name, dataDirName);
if (pSubstring != NULL) {
char newNameTmp[SMALL_BUFFER_MAX_SIZE] = "";
- const char *p = name + uprv_strlen(DATA_PREFIX[n]) + 1;
+ const char *p = name + uprv_strlen(dataDirName);
for (int32_t i = 0;;i++) {
if (p[i] == '.') {
newNameTmp[i] = '_';
@@ -1088,15 +1289,17 @@ static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetD
writeCCode(file, o->tmpDir, dataName[0] != 0 ? dataName : o->shortName, newName[0] != 0 ? newName : NULL, gencmnFile);
#ifdef USE_SINGLE_CCODE_FILE
-#ifdef OS400
- sprintf(cmd, "QSH CMD('cat %s >> %s')", gencmnFile, icudtAll);
-#else
sprintf(cmd, "cat %s >> %s", gencmnFile, icudtAll);
-#endif
- result = system(cmd);
+ result = runCommand(cmd);
if (result != 0) {
break;
+ } else {
+ /* Remove the c code file after concatenating it to icudtall.c file. */
+ if ((result = remove(gencmnFile)) != 0) {
+ fprintf(stderr, "Unable to remove c code file: %s\n", gencmnFile);
+ return result;
+ }
}
#endif
}
@@ -1110,14 +1313,14 @@ static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetD
pkgDataFlags[LIBFLAGS],
tempObjectFile,
gencmnFile);
- result = system(cmd);
+ result = runCommand(cmd);
if (result != 0) {
break;
}
- sprintf(buffer, "%s %s",
- buffer,
- tempObjectFile);
+ uprv_strcat(buffer, " ");
+ uprv_strcat(buffer, tempObjectFile);
+
#endif
if (i > 0) {
@@ -1129,21 +1332,17 @@ static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetD
#ifdef USE_SINGLE_CCODE_FILE
uprv_strcpy(tempObjectFile, icudtAll);
tempObjectFile[uprv_strlen(tempObjectFile) - 1] = 'o';
-#ifdef OS400
- sprintf(cmd, "QSH CMD('%s %s -o %s %s')",
-#else
+
sprintf(cmd, "%s %s -o %s %s",
-#endif
pkgDataFlags[COMPILER],
pkgDataFlags[LIBFLAGS],
tempObjectFile,
icudtAll);
- result = system(cmd);
+ result = runCommand(cmd);
if (result == 0) {
- sprintf(buffer, "%s %s",
- buffer,
- tempObjectFile);
+ uprv_strcat(buffer, " ");
+ uprv_strcat(buffer, tempObjectFile);
}
#endif
@@ -1151,6 +1350,7 @@ static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetD
/* Generate the library file. */
result = pkg_generateLibraryFile(targetDir, mode, buffer, cmd);
}
+
uprv_free(buffer);
uprv_free(cmd);
@@ -1194,12 +1394,21 @@ static int32_t pkg_createWindowsDLL(const char mode, const char *gencFilePath, U
uprv_strcat(dllFilePath, PKGDATA_FILE_SEP_STRING);
uprv_strcpy(libFilePath, dllFilePath);
- uprv_strcpy(resFilePath, o->tmpDir);
- uprv_strcat(resFilePath, PKGDATA_FILE_SEP_STRING);
-
+#ifdef CYGWINMSVC
+ uprv_strcat(libFilePath, o->libName);
+ uprv_strcat(libFilePath, ".lib");
+
+ uprv_strcat(dllFilePath, o->libName);
+ uprv_strcat(dllFilePath, o->version);
+#else
uprv_strcat(dllFilePath, o->entryName);
- uprv_strcat(dllFilePath, DLL_EXT);
+
uprv_strcat(libFilePath, LIB_FILE);
+#endif
+ uprv_strcat(dllFilePath, DLL_EXT);
+
+ uprv_strcpy(resFilePath, o->tmpDir);
+ uprv_strcat(resFilePath, PKGDATA_FILE_SEP_STRING);
uprv_strcat(resFilePath, ICUDATA_RES_FILE);
if (!T_FileStream_file_exists(resFilePath)) {
@@ -1209,7 +1418,10 @@ static int32_t pkg_createWindowsDLL(const char mode, const char *gencFilePath, U
/* Check if dll file and lib file exists and that it is not newer than genc file. */
if (!o->rebuild && (T_FileStream_file_exists(dllFilePath) && isFileModTimeLater(dllFilePath, gencFilePath)) &&
(T_FileStream_file_exists(libFilePath) && isFileModTimeLater(libFilePath, gencFilePath))) {
- return 0;
+ if(o->verbose) {
+ printf("# Not rebuilding %s - up to date.\n", gencFilePath);
+ }
+ return 0;
}
sprintf(cmd, "%s\"%s\" %s\"%s\" \"%s\" \"%s\"",
@@ -1222,11 +1434,11 @@ static int32_t pkg_createWindowsDLL(const char mode, const char *gencFilePath, U
);
}
- return system(cmd);
+ return runCommand(cmd, TRUE);
}
#endif
-static void pkg_checkFlag(UPKGOptions *o) {
+static UPKGOptions *pkg_checkFlag(UPKGOptions *o) {
#ifdef U_AIX
/* AIX needs a map file. */
char *flag = NULL;
@@ -1284,14 +1496,13 @@ static void pkg_checkFlag(UPKGOptions *o) {
f = T_FileStream_open(mapFile, "w");
if (f == NULL) {
fprintf(stderr,"Unable to create map file: %s.\n", mapFile);
- return;
+ } else {
+ sprintf(tmpbuffer, "%s%s ", o->entryName, UDATA_CMN_INTERMEDIATE_SUFFIX);
+
+ T_FileStream_writeLine(f, tmpbuffer);
+
+ T_FileStream_close(f);
}
-
- sprintf(tmpbuffer, "%s%s ", o->entryName, UDATA_CMN_INTERMEDIATE_SUFFIX);
-
- T_FileStream_writeLine(f, tmpbuffer);
-
- T_FileStream_close(f);
}
#elif defined(U_CYGWIN)
/* Cygwin needs to change flag options. */
@@ -1327,6 +1538,9 @@ static void pkg_checkFlag(UPKGOptions *o) {
}
}
#endif
+ // Don't really need a return value, just need to stop compiler warnings about
+ // the unused parameter 'o' on platforms where it is not otherwise used.
+ return o;
}
static void loadLists(UPKGOptions *o, UErrorCode *status)
@@ -1342,7 +1556,7 @@ static void loadLists(UPKGOptions *o, UErrorCode *status)
for(l = o->fileListFiles; l; l = l->next) {
if(o->verbose) {
- fprintf(stdout, "# Reading %s..\n", l->str);
+ fprintf(stdout, "# pkgdata: Reading %s..\n", l->str);
}
/* TODO: stdin */
in = T_FileStream_open(l->str, "r"); /* open files list */
@@ -1433,14 +1647,33 @@ static void loadLists(UPKGOptions *o, UErrorCode *status)
}
/* Try calling icu-config directly to get the option file. */
-static int32_t pkg_getOptionsFromICUConfig(UOption *option) {
+ static int32_t pkg_getOptionsFromICUConfig(UBool verbose, UOption *option) {
#if U_HAVE_POPEN
- FILE *p;
+ FILE *p = NULL;
size_t n;
static char buf[512] = "";
+ char cmdBuf[1024];
+ UErrorCode status = U_ZERO_ERROR;
const char cmd[] = "icu-config --incpkgdatafile";
- p = popen(cmd, "r");
+ /* #1 try the same path where pkgdata was called from. */
+ findDirname(progname, cmdBuf, 1024, &status);
+ if(U_SUCCESS(status)) {
+ uprv_strncat(cmdBuf, U_FILE_SEP_STRING, 1024);
+ uprv_strncat(cmdBuf, cmd, 1024);
+
+ if(verbose) {
+ fprintf(stdout, "# Calling icu-config: %s\n", cmdBuf);
+ }
+ p = popen(cmdBuf, "r");
+ }
+
+ if(p == NULL) {
+ if(verbose) {
+ fprintf(stdout, "# Calling icu-config: %s\n", cmd);
+ }
+ p = popen(cmd, "r");
+ }
if(p == NULL)
{
@@ -1477,6 +1710,10 @@ static int32_t pkg_getOptionsFromICUConfig(UOption *option) {
return -1;
}
+ if(verbose) {
+ fprintf(stdout, "# icu-config said: %s\n", buf);
+ }
+
option->value = buf;
option->doesOccur = TRUE;
diff --git a/tools/toolutil/flagparser.c b/tools/toolutil/flagparser.c
index 548d1b85..a09de57e 100644
--- a/tools/toolutil/flagparser.c
+++ b/tools/toolutil/flagparser.c
@@ -9,7 +9,7 @@
#define LARGE_BUFFER_MAX_SIZE 2048
-static void extractFlag(char* buffer, int32_t bufferSize, char* flag);
+static void extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, UErrorCode *status);
static int32_t getFlagOffset(const char *buffer, int32_t bufferSize);
/*
@@ -23,6 +23,7 @@ parseFlagsFile(const char *fileName, char **flagBuffer, int32_t flagBufferSize,
FileStream *f = T_FileStream_open(fileName, "r");
if (f == NULL) {
*status = U_FILE_ACCESS_ERROR;
+ return;
}
for (i = 0; i < numOfFlags; i++) {
@@ -31,7 +32,10 @@ parseFlagsFile(const char *fileName, char **flagBuffer, int32_t flagBufferSize,
break;
}
- extractFlag(buffer, LARGE_BUFFER_MAX_SIZE, flagBuffer[i]);
+ extractFlag(buffer, LARGE_BUFFER_MAX_SIZE, flagBuffer[i], flagBufferSize, status);
+ if (U_FAILURE(*status)) {
+ break;
+ }
}
T_FileStream_close(f);
@@ -41,7 +45,7 @@ parseFlagsFile(const char *fileName, char **flagBuffer, int32_t flagBufferSize,
/*
* Extract the setting after the '=' and store it in flag excluding the newline character.
*/
-static void extractFlag(char* buffer, int32_t bufferSize, char* flag) {
+static void extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, UErrorCode *status) {
int32_t i;
char *pBuffer;
int32_t offset;
@@ -52,6 +56,10 @@ static void extractFlag(char* buffer, int32_t bufferSize, char* flag) {
offset = getFlagOffset(buffer, bufferSize);
pBuffer = buffer+offset;
for(i = 0;;i++) {
+ if (i >= flagSize) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
if (pBuffer[i+1] == 0) {
/* Indicates a new line character. End here. */
flag[i] = 0;
diff --git a/tools/toolutil/package.cpp b/tools/toolutil/package.cpp
index cffedfbe..7b35bbc7 100644
--- a/tools/toolutil/package.cpp
+++ b/tools/toolutil/package.cpp
@@ -352,7 +352,12 @@ readFile(const char *path, const char *name, int32_t &length, char &type) {
if(typeEnum<0 || U_FAILURE(errorCode)) {
fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
free(data);
+#if !UCONFIG_NO_LEGACY_CONVERSION
exit(U_INVALID_FORMAT_ERROR);
+#else
+ fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
+ exit(0);
+#endif
}
type=makeTypeLetter(typeEnum);
@@ -1096,6 +1101,7 @@ Package::extractItem(const char *filesPath, const char *outName, int32_t idx, ch
exit(errorCode);
}
udata_closeSwapper(ds);
+ pItem->type=outType;
}
// create the file and write its contents
diff --git a/tools/toolutil/pkg_genc.h b/tools/toolutil/pkg_genc.h
index 3c23d3d8..750b363a 100644
--- a/tools/toolutil/pkg_genc.h
+++ b/tools/toolutil/pkg_genc.h
@@ -10,7 +10,7 @@
#include "unicode/utypes.h"
U_INTERNAL void U_EXPORT2
-printAssemblyHeadersToStdErr();
+printAssemblyHeadersToStdErr(void);
U_INTERNAL UBool U_EXPORT2
checkAssemblyHeaderName(const char* optAssembly);
diff --git a/tools/toolutil/pkg_icu.cpp b/tools/toolutil/pkg_icu.cpp
index b3d6a3c5..7af5b1c1 100644
--- a/tools/toolutil/pkg_icu.cpp
+++ b/tools/toolutil/pkg_icu.cpp
@@ -21,6 +21,8 @@
// read a file list -------------------------------------------------------- ***
+U_NAMESPACE_USE
+
static const struct {
const char *suffix;
int32_t length;
diff --git a/tools/toolutil/pkg_icu.h b/tools/toolutil/pkg_icu.h
index cbdd1efa..77798e80 100644
--- a/tools/toolutil/pkg_icu.h
+++ b/tools/toolutil/pkg_icu.h
@@ -13,10 +13,10 @@
U_CAPI int U_EXPORT2
writePackageDatFile(const char *outFilename, const char *outComment,
- const char *sourcePath, const char *addList, Package *pkg,
+ const char *sourcePath, const char *addList, U_NAMESPACE_QUALIFIER Package *pkg,
char outType);
-U_CAPI Package * U_EXPORT2
+U_CAPI U_NAMESPACE_QUALIFIER Package * U_EXPORT2
readList(const char *filesPath, const char *listname, UBool readContents);
#endif
diff --git a/tools/toolutil/pkgitems.cpp b/tools/toolutil/pkgitems.cpp
index a374133a..03fd0930 100644
--- a/tools/toolutil/pkgitems.cpp
+++ b/tools/toolutil/pkgitems.cpp
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2003-2008, International Business Machines
+* Copyright (C) 2003-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -15,7 +15,7 @@
*
* Companion file to package.cpp. Deals with details of ICU data item formats.
* Used for item dependencies.
-* Contains adapted code from uresdata.c and ucnv_bld.c (swapper code from 2003).
+* Contains adapted code from ucnv_bld.c (swapper code from 2003).
*/
#include "unicode/utypes.h"
@@ -23,6 +23,7 @@
#include "unicode/putil.h"
#include "unicode/udata.h"
#include "cstring.h"
+#include "uinvchar.h"
#include "ucmndata.h"
#include "udataswp.h"
#include "swapimpl.h"
@@ -53,6 +54,74 @@ printError(void *context, const char *fmt, va_list args) {
U_CDECL_END
+// a data item in native-platform form ------------------------------------- ***
+
+U_NAMESPACE_BEGIN
+
+class NativeItem {
+public:
+ NativeItem() : pItem(NULL), pInfo(NULL), bytes(NULL), swapped(NULL), length(0) {}
+ NativeItem(const Item *item, UDataSwapFn *swap) : swapped(NULL) {
+ setItem(item, swap);
+ }
+ ~NativeItem() {
+ delete [] swapped;
+ }
+ const UDataInfo *getDataInfo() const {
+ return pInfo;
+ }
+ const uint8_t *getBytes() const {
+ return bytes;
+ }
+ int32_t getLength() const {
+ return length;
+ }
+
+ void setItem(const Item *item, UDataSwapFn *swap) {
+ pItem=item;
+ int32_t infoLength, itemHeaderLength;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ pInfo=::getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ exit(errorCode); // should succeed because readFile() checks headers
+ }
+ length=pItem->length-itemHeaderLength;
+
+ if(pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY) {
+ bytes=pItem->data+itemHeaderLength;
+ } else {
+ UDataSwapper *ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
+ pItem->name, u_errorName(errorCode));
+ exit(errorCode);
+ }
+
+ ds->printError=printError;
+ ds->printErrorContext=stderr;
+
+ swapped=new uint8_t[pItem->length];
+ if(swapped==NULL) {
+ fprintf(stderr, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem->name);
+ exit(U_MEMORY_ALLOCATION_ERROR);
+ }
+ swap(ds, pItem->data, pItem->length, swapped, &errorCode);
+ pInfo=::getDataInfo(swapped, pItem->length, infoLength, itemHeaderLength, &errorCode);
+ bytes=swapped+itemHeaderLength;
+ udata_closeSwapper(ds);
+ }
+ }
+
+private:
+ const Item *pItem;
+ const UDataInfo *pInfo;
+ const uint8_t *bytes;
+ uint8_t *swapped;
+ int32_t length;
+};
+
+U_NAMESPACE_END
+
// check a dependency ------------------------------------------------------ ***
/*
@@ -60,10 +129,9 @@ U_CDECL_END
* and a suffix
*/
static void
-checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix,
- CheckDependency check, void *context,
- UErrorCode *pErrorCode) {
- char target[200];
+makeTargetName(const char *itemName, const char *id, int32_t idLength, const char *suffix,
+ char *target, int32_t capacity,
+ UErrorCode *pErrorCode) {
const char *itemID;
int32_t treeLength, suffixLength, targetLength;
@@ -82,8 +150,8 @@ checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char
}
suffixLength=(int32_t)strlen(suffix);
targetLength=treeLength+idLength+suffixLength;
- if(targetLength>=(int32_t)sizeof(target)) {
- fprintf(stderr, "icupkg/checkIDSuffix(%s) alias target item name length %ld too long\n",
+ if(targetLength>=capacity) {
+ fprintf(stderr, "icupkg/makeTargetName(%s) target item name length %ld too long\n",
itemName, (long)targetLength);
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
return;
@@ -92,8 +160,17 @@ checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char
memcpy(target, itemName, treeLength);
memcpy(target+treeLength, id, idLength);
memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL
+}
- check(context, itemName, target);
+static void
+checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix,
+ CheckDependency check, void *context,
+ UErrorCode *pErrorCode) {
+ char target[200];
+ makeTargetName(itemName, id, idLength, suffix, target, (int32_t)sizeof(target), pErrorCode);
+ if(U_SUCCESS(*pErrorCode)) {
+ check(context, itemName, target);
+ }
}
/* assemble the target item name from the item's parent item name */
@@ -139,235 +216,123 @@ checkParent(const char *itemName, CheckDependency check, void *context,
// get dependencies from resource bundles ---------------------------------- ***
-static const char gAliasKey[]="%%ALIAS";
-static const char gDependencyKey[]="%%DEPENDENCY";
-enum { gAliasKeyLength=7, gDependencyKeyLength=12 };
+static const UChar SLASH=0x2f;
/*
- * Enumerate one resource item and its children and extract dependencies from
- * aliases.
- * Code adapted from ures_preflightResource() and ures_swapResource().
+ * Check for the alias from the string or alias resource res.
*/
static void
-ures_enumDependencies(const UDataSwapper *ds,
- const char *itemName,
- const Resource *inBundle, int32_t length,
- Resource res, const char *inKey, const char *parentKey, int32_t depth,
- CheckDependency check, void *context,
- UErrorCode *pErrorCode) {
- const Resource *p;
- int32_t offset;
- UBool useResSuffix = TRUE;
+checkAlias(const char *itemName,
+ Resource res, const UChar *alias, int32_t length, UBool useResSuffix,
+ CheckDependency check, void *context, UErrorCode *pErrorCode) {
+ int32_t i;
- if(res==0 || RES_GET_TYPE(res)==URES_INT) {
- /* empty string or integer, nothing to do */
+ if(!uprv_isInvariantUString(alias, length)) {
+ fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n",
+ itemName, res);
+ *pErrorCode=U_INVALID_CHAR_FOUND;
return;
}
- /* all other types use an offset to point to their data */
- offset=(int32_t)RES_GET_OFFSET(res);
- if(0<=length && length<=offset) {
- udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource offset exceeds bundle length %d\n",
- itemName, res, length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- p=inBundle+offset;
+ // extract the locale ID from alias strings like
+ // locale_ID/key1/key2/key3
+ // locale_ID
- switch(RES_GET_TYPE(res)) {
- /* strings and aliases have physically the same value layout */
- case URES_STRING:
- // Check for %%ALIAS
- if(depth==1 && inKey!=NULL) {
- char key[gAliasKeyLength+1];
- int32_t keyLength;
+ // search for the first slash
+ for(i=0; i<length && alias[i]!=SLASH; ++i) {}
- keyLength=(int32_t)strlen(inKey);
- if(keyLength!=gAliasKeyLength) {
- break;
- }
- ds->swapInvChars(ds, inKey, gAliasKeyLength+1, key, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n",
- itemName, res);
- return;
- }
- if(0!=strcmp(key, gAliasKey)) {
- break;
- }
+ if(res_getPublicType(res)==URES_ALIAS) {
+ // ignore aliases with an initial slash:
+ // /ICUDATA/... and /pkgname/... go to a different package
+ // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
+ if(i==0) {
+ return; // initial slash ('/')
}
- // Check for %%DEPENDENCY
- else if(depth==2 && parentKey!=NULL) {
- char key[gDependencyKeyLength+1];
- int32_t keyLength;
- keyLength=(int32_t)strlen(parentKey);
- if(keyLength!=gDependencyKeyLength) {
- break;
- }
- ds->swapInvChars(ds, parentKey, gDependencyKeyLength+1, key, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n",
- itemName, res);
- return;
- }
- if(0!=strcmp(key, gDependencyKey)) {
- break;
- }
- useResSuffix = FALSE;
- } else {
- // we ignore all other strings
- break;
+ // ignore the intra-bundle path starting from the first slash ('/')
+ length=i;
+ } else /* URES_STRING */ {
+ // the whole string should only consist of a locale ID
+ if(i!=length) {
+ fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
+ itemName, res);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return;
}
- // for the top-level %%ALIAS or %%DEPENDENCY string fall through to URES_ALIAS
- case URES_ALIAS:
- {
- char localeID[32];
- const uint16_t *p16;
- int32_t i, stringLength;
- uint16_t u16, ored16;
-
- stringLength=udata_readInt32(ds, (int32_t)*p);
-
- /* top=offset+1+(string length +1)/2 rounded up */
- offset+=1+((stringLength+1)+1)/2;
- if(offset>length) {
- break; // the resource does not fit into the bundle, print error below
- }
-
- // extract the locale ID from alias strings like
- // locale_ID/key1/key2/key3
- // locale_ID
- if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) {
- u16=0x2f; // slash in local endianness
- } else {
- u16=0x2f00; // slash in opposite endianness
- }
- p16=(const uint16_t *)(p+1); // Unicode string contents
+ }
- // search for the first slash
- for(i=0; i<stringLength && p16[i]!=u16; ++i) {}
+ // convert the Unicode string to char *
+ char localeID[32];
+ if(length>=(int32_t)sizeof(localeID)) {
+ fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
+ itemName, res, (long)length);
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+ u_UCharsToChars(alias, localeID, length);
+ localeID[length]=0;
- if(RES_GET_TYPE(res)==URES_ALIAS) {
- // ignore aliases with an initial slash:
- // /ICUDATA/... and /pkgname/... go to a different package
- // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
- if(i==0) {
- break; // initial slash ('/')
- }
+ checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode);
+}
- // ignore the intra-bundle path starting from the first slash ('/')
- stringLength=i;
- } else /* URES_STRING */ {
- // the whole string should only consist of a locale ID
- if(i!=stringLength) {
- udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
- itemName, res);
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return;
+/*
+ * Enumerate one resource item and its children and extract dependencies from
+ * aliases.
+ */
+static void
+ures_enumDependencies(const char *itemName,
+ const ResourceData *pResData,
+ Resource res, const char *inKey, const char *parentKey, int32_t depth,
+ CheckDependency check, void *context,
+ UErrorCode *pErrorCode) {
+ switch(res_getPublicType(res)) {
+ case URES_STRING:
+ {
+ UBool useResSuffix = TRUE;
+ // Check for %%ALIAS
+ if(depth==1 && inKey!=NULL) {
+ if(0!=strcmp(inKey, "%%ALIAS")) {
+ break;
}
}
-
- // convert the Unicode string to char * and
- // check that it has a bundle path but no package
- if(stringLength>=(int32_t)sizeof(localeID)) {
- udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
- itemName, res, stringLength);
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return;
- }
-
- // convert the alias Unicode string to US-ASCII
- ored16=0;
- if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) {
- for(i=0; i<stringLength; ++i) {
- u16=p16[i];
- ored16|=u16;
- localeID[i]=(char)u16;
+ // Check for %%DEPENDENCY
+ else if(depth==2 && parentKey!=NULL) {
+ if(0!=strcmp(parentKey, "%%DEPENDENCY")) {
+ break;
}
+ useResSuffix = FALSE;
} else {
- for(i=0; i<stringLength; ++i) {
- u16=p16[i];
- ored16|=u16;
- localeID[i]=(char)(u16>>8);
- }
- ored16=(uint16_t)((ored16<<8)|(ored16>>8));
- }
- localeID[stringLength]=0;
- if(ored16>0x7f) {
- udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-ASCII characters\n",
- itemName, res);
- *pErrorCode=U_INVALID_CHAR_FOUND;
- return;
- }
-
-#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY)
- // swap to EBCDIC
- // our swapper is probably not the right one, but
- // the function uses it only for printing errors
- uprv_ebcdicFromAscii(ds, localeID, stringLength, localeID, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
+ // we ignore all other strings
+ break;
}
-#endif
-#if U_CHARSET_FAMILY!=U_ASCII_FAMILY && U_CHARSET_FAMILY!=U_EBCDIC_FAMILY
-# error Unknown U_CHARSET_FAMILY value!
-#endif
-
- checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode);
+ int32_t length;
+ const UChar *alias=res_getString(pResData, res, &length);
+ checkAlias(itemName, res, alias, length, useResSuffix, check, context, pErrorCode);
+ }
+ break;
+ case URES_ALIAS:
+ {
+ int32_t length;
+ const UChar *alias=res_getAlias(pResData, res, &length);
+ checkAlias(itemName, res, alias, length, TRUE, check, context, pErrorCode);
}
break;
case URES_TABLE:
- case URES_TABLE32:
{
- const uint16_t *pKey16;
- const int32_t *pKey32;
-
- Resource item;
- int32_t i, count;
-
- if(RES_GET_TYPE(res)==URES_TABLE) {
- /* get table item count */
- pKey16=(const uint16_t *)p;
- count=ds->readUInt16(*pKey16++);
-
- pKey32=NULL;
-
- /* top=((1+ table item count)/2 rounded up)+(table item count) */
- offset+=((1+count)+1)/2;
- } else {
- /* get table item count */
- pKey32=(const int32_t *)p;
- count=udata_readInt32(ds, *pKey32++);
-
- pKey16=NULL;
-
- /* top=(1+ table item count)+(table item count) */
- offset+=1+count;
- }
-
- p=inBundle+offset; /* pointer to table resources */
- offset+=count;
-
- if(offset>length) {
- break; // the resource does not fit into the bundle, print error below
- }
-
/* recurse */
- for(i=0; i<count; ++i) {
- item=ds->readUInt32(*p++);
+ int32_t count=res_countArrayItems(pResData, res);
+ for(int32_t i=0; i<count; ++i) {
+ const char *itemKey;
+ Resource item=res_getTableItemByIndex(pResData, res, i, &itemKey);
ures_enumDependencies(
- ds, itemName, inBundle, length, item,
- ((const char *)inBundle)+
- (pKey16!=NULL ?
- ds->readUInt16(pKey16[i]) :
- udata_readInt32(ds, pKey32[i])),
+ itemName, pResData,
+ item, itemKey,
inKey, depth+1,
check, context,
pErrorCode);
if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%08x) failed\n",
- itemName, res, i, item);
+ fprintf(stderr, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n",
+ itemName, res, i, itemKey, item);
break;
}
}
@@ -375,28 +340,19 @@ ures_enumDependencies(const UDataSwapper *ds,
break;
case URES_ARRAY:
{
- Resource item;
- int32_t i, count;
-
- /* top=offset+1+(array length) */
- count=udata_readInt32(ds, (int32_t)*p++);
- offset+=1+count;
-
- if(offset>length) {
- break; // the resource does not fit into the bundle, print error below
- }
-
/* recurse */
- for(i=0; i<count; ++i) {
- item=ds->readUInt32(*p++);
+ int32_t count=res_countArrayItems(pResData, res);
+ for(int32_t i=0; i<count; ++i) {
+ Resource item=res_getArrayItem(pResData, res, i);
ures_enumDependencies(
- ds, itemName, inBundle, length,
- item, NULL, inKey, depth+1,
+ itemName, pResData,
+ item, NULL,
+ inKey, depth+1,
check, context,
pErrorCode);
if(U_FAILURE(*pErrorCode)) {
- udata_printError(ds, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
- itemName, res, i, item);
+ fprintf(stderr, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
+ itemName, res, i, item);
break;
}
}
@@ -405,75 +361,79 @@ ures_enumDependencies(const UDataSwapper *ds,
default:
break;
}
-
- if(U_FAILURE(*pErrorCode)) {
- /* nothing to do */
- } else if(0<=length && length<offset) {
- udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource limit exceeds bundle length %d\n",
- itemName, res, length);
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- }
}
-/* code adapted from ures_swap() */
static void
-ures_enumDependencies(const UDataSwapper *ds,
- const char *itemName, const UDataInfo *pInfo,
+ures_enumDependencies(const char *itemName, const UDataInfo *pInfo,
const uint8_t *inBytes, int32_t length,
CheckDependency check, void *context,
UErrorCode *pErrorCode) {
- const Resource *inBundle;
- Resource rootRes;
-
- /* the following integers count Resource item offsets (4 bytes each), not bytes */
- int32_t bundleLength;
+ ResourceData resData;
- /* check format version */
- if(pInfo->formatVersion[0]!=1) {
- fprintf(stderr, "icupkg: .res format version %02x not supported\n",
- pInfo->formatVersion[0]);
+ res_read(&resData, pInfo, inBytes, length, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ fprintf(stderr, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n",
+ pInfo->formatVersion[0], pInfo->formatVersion[1]);
exit(U_UNSUPPORTED_ERROR);
}
- /* a resource bundle must contain at least one resource item */
- bundleLength=length/4;
-
- /* formatVersion 1.1 must have a root item and at least 5 indexes */
- if( bundleLength<
- (pInfo->formatVersion[1]==0 ? 1 : 1+5)
- ) {
- fprintf(stderr, "icupkg: too few bytes (%d after header) for a resource bundle\n",
- length);
- exit(U_INDEX_OUTOFBOUNDS_ERROR);
- }
-
- inBundle=(const Resource *)inBytes;
- rootRes=ds->readUInt32(*inBundle);
-
- ures_enumDependencies(
- ds, itemName, inBundle, bundleLength,
- rootRes, NULL, NULL, 0,
- check, context,
- pErrorCode);
-
/*
* if the bundle attributes are present and the nofallback flag is not set,
* then add the parent bundle as a dependency
*/
- if(pInfo->formatVersion[1]>=1) {
- int32_t indexes[URES_INDEX_TOP];
- const int32_t *inIndexes;
-
- inIndexes=(const int32_t *)inBundle+1;
- indexes[URES_INDEX_LENGTH]=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH]);
- if(indexes[URES_INDEX_LENGTH]>URES_INDEX_ATTRIBUTES) {
- indexes[URES_INDEX_ATTRIBUTES]=udata_readInt32(ds, inIndexes[URES_INDEX_ATTRIBUTES]);
- if(0==(indexes[URES_INDEX_ATTRIBUTES]&URES_ATT_NO_FALLBACK)) {
- /* this bundle participates in locale fallback */
- checkParent(itemName, check, context, pErrorCode);
- }
+ if(pInfo->formatVersion[0]>1 || (pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1)) {
+ if(!resData.noFallback) {
+ /* this bundle participates in locale fallback */
+ checkParent(itemName, check, context, pErrorCode);
}
}
+
+ U_NAMESPACE_QUALIFIER NativeItem nativePool;
+
+ if(resData.usesPoolBundle) {
+ char poolName[200];
+ makeTargetName(itemName, "pool", 4, ".res", poolName, (int32_t)sizeof(poolName), pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+ check(context, itemName, poolName);
+ // TODO: The Package should be passed in.
+ // Since the context is always a Package, we could just redeclare it.
+ U_NAMESPACE_QUALIFIER Package *pkg=(U_NAMESPACE_QUALIFIER Package *)context;
+ int32_t index=pkg->findItem(poolName);
+ if(index<0) {
+ // We cannot work with a bundle if its pool resource is missing.
+ // check() already printed a complaint.
+ return;
+ }
+ // TODO: Cache the native version in the Item itself.
+ nativePool.setItem(pkg->getItem(index), ures_swap);
+ const UDataInfo *poolInfo=nativePool.getDataInfo();
+ if(poolInfo->formatVersion[0]<=1) {
+ fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
+ return;
+ }
+ const int32_t *poolIndexes=(const int32_t *)nativePool.getBytes()+1;
+ int32_t poolIndexLength=poolIndexes[URES_INDEX_LENGTH]&0xff;
+ if(!(poolIndexLength>URES_INDEX_POOL_CHECKSUM &&
+ (poolIndexes[URES_INDEX_ATTRIBUTES]&URES_ATT_IS_POOL_BUNDLE))
+ ) {
+ fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
+ return;
+ }
+ if(resData.pRoot[1+URES_INDEX_POOL_CHECKSUM]==poolIndexes[URES_INDEX_POOL_CHECKSUM]) {
+ resData.poolBundleKeys=(const char *)(poolIndexes+poolIndexLength);
+ } else {
+ fprintf(stderr, "icupkg: %s has mismatched checksum for %s\n", poolName, itemName);
+ return;
+ }
+ }
+
+ ures_enumDependencies(
+ itemName, &resData,
+ resData.rootRes, NULL, NULL, 0,
+ check, context,
+ pErrorCode);
}
// get dependencies from conversion tables --------------------------------- ***
@@ -616,52 +576,59 @@ U_NAMESPACE_BEGIN
void
Package::enumDependencies(Item *pItem, void *context, CheckDependency check) {
- const UDataInfo *pInfo;
- const uint8_t *inBytes;
- int32_t format, length, infoLength, itemHeaderLength;
- UErrorCode errorCode;
-
- errorCode=U_ZERO_ERROR;
- pInfo=getDataInfo(pItem->data,pItem->length, infoLength, itemHeaderLength, &errorCode);
+ int32_t infoLength, itemHeaderLength;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const UDataInfo *pInfo=getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
if(U_FAILURE(errorCode)) {
return; // should not occur because readFile() checks headers
}
// find the data format and call the corresponding function, if any
- format=getDataFormat(pInfo->dataFormat);
+ int32_t format=getDataFormat(pInfo->dataFormat);
if(format>=0) {
- UDataSwapper *ds;
-
- // TODO: share/cache swappers
- ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
- pItem->name, u_errorName(errorCode));
- exit(errorCode);
- }
-
- ds->printError=printError;
- ds->printErrorContext=stderr;
-
- inBytes=pItem->data+itemHeaderLength;
- length=pItem->length-itemHeaderLength;
-
switch(format) {
case FMT_RES:
- ures_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
- break;
+ {
+ /*
+ * Swap the resource bundle (if necessary) so that we can use
+ * the normal runtime uresdata.c code to read it.
+ * We do not want to duplicate that code, especially not together with on-the-fly swapping.
+ */
+ NativeItem nrb(pItem, ures_swap);
+ ures_enumDependencies(pItem->name, nrb.getDataInfo(), nrb.getBytes(), nrb.getLength(), check, context, &errorCode);
+ break;
+ }
case FMT_CNV:
- ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
- break;
+ {
+ // TODO: share/cache swappers
+ UDataSwapper *ds=udata_openSwapper(
+ (UBool)pInfo->isBigEndian, pInfo->charsetFamily,
+ U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
+ &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
+ pItem->name, u_errorName(errorCode));
+ exit(errorCode);
+ }
+
+ ds->printError=printError;
+ ds->printErrorContext=stderr;
+
+ const uint8_t *inBytes=pItem->data+itemHeaderLength;
+ int32_t length=pItem->length-itemHeaderLength;
+
+ ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
+ udata_closeSwapper(ds);
+ break;
+ }
default:
break;
}
- udata_closeSwapper(ds);
-
if(U_FAILURE(errorCode)) {
exit(errorCode);
}
}
}
+
U_NAMESPACE_END
diff --git a/tools/toolutil/swapimpl.cpp b/tools/toolutil/swapimpl.cpp
index ceb7eb09..011cae5e 100644
--- a/tools/toolutil/swapimpl.cpp
+++ b/tools/toolutil/swapimpl.cpp
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2005-2009, International Business Machines
+* Copyright (C) 2005-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -50,6 +50,7 @@
#include "ucol_swp.h"
#include "ucnv_bld.h"
#include "unormimp.h"
+#include "normalizer2impl.h"
#include "sprpimpl.h"
#include "propname.h"
#include "rbbidata.h"
@@ -91,7 +92,7 @@ uprops_swap(const UDataSwapper *ds,
pInfo->dataFormat[1]==0x50 &&
pInfo->dataFormat[2]==0x72 &&
pInfo->dataFormat[3]==0x6f &&
- (pInfo->formatVersion[0]==3 || pInfo->formatVersion[0]==4 || pInfo->formatVersion[0]==5) &&
+ (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=6) &&
pInfo->formatVersion[2]==UTRIE_SHIFT &&
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
)) {
@@ -541,10 +542,7 @@ test_swap(const UDataSwapper *ds,
const uint8_t *inBytes;
uint8_t *outBytes;
- const int32_t *inIndexes;
- int32_t indexes[32];
-
- int32_t i, offset, count;
+ int32_t offset;
/* udata_swapDataHeader checks the arguments */
headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
@@ -594,6 +592,7 @@ test_swap(const UDataSwapper *ds,
return headerSize+size;
}
+
/* swap any data (except a .dat package) ------------------------------------ */
static const struct {
@@ -621,6 +620,7 @@ static const struct {
#if !UCONFIG_NO_NORMALIZATION
{ { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */
+ { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */
#endif
#if !UCONFIG_NO_COLLATION
{ { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */
@@ -632,9 +632,9 @@ static const struct {
#endif
{ { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */
{ { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */
-
+#if !UCONFIG_NO_NORMALIZATION
{ { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */
-
+#endif
{ { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */
};
diff --git a/tools/toolutil/toolutil.c b/tools/toolutil/toolutil.cpp
index d91ce647..a866ece7 100644
--- a/tools/toolutil/toolutil.c
+++ b/tools/toolutil/toolutil.cpp
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2009, International Business Machines
+* Copyright (C) 1999-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -21,11 +21,6 @@
#include <stdio.h>
#include <sys/stat.h>
#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "toolutil.h"
-#include "unicode/ucal.h"
#ifdef U_WINDOWS
# define VC_EXTRALEAN
@@ -42,6 +37,27 @@
#endif
#include <errno.h>
+#include "unicode/errorcode.h"
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "toolutil.h"
+#include "unicode/ucal.h"
+
+U_NAMESPACE_BEGIN
+
+IcuToolErrorCode::~IcuToolErrorCode() {
+ // Safe because our handleFailure() does not throw exceptions.
+ if(isFailure()) { handleFailure(); }
+}
+
+void IcuToolErrorCode::handleFailure() const {
+ fprintf(stderr, "error at %s: %s\n", location, errorName());
+ exit(errorCode);
+}
+
+U_NAMESPACE_END
+
static int32_t currentYear = -1;
U_CAPI int32_t U_EXPORT2 getCurrentYear() {
@@ -55,10 +71,10 @@ U_CAPI int32_t U_EXPORT2 getCurrentYear() {
currentYear = ucal_get(cal, UCAL_YEAR, &status);
ucal_close(cal);
}
- return currentYear;
#else
- return 2008;
+ /* No formatting- no way to set the current year. */
#endif
+ return currentYear;
}
@@ -86,6 +102,41 @@ getLongPathname(const char *pathname) {
}
U_CAPI const char * U_EXPORT2
+findDirname(const char *path, char *buffer, int32_t bufLen, UErrorCode* status) {
+ if(U_FAILURE(*status)) return NULL;
+ const char *resultPtr = NULL;
+ int32_t resultLen = 0;
+
+ const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
+#if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR
+ const char *basenameAlt=uprv_strrchr(path, U_FILE_ALT_SEP_CHAR);
+ if(basenameAlt && (!basename || basename<basenameAlt)) {
+ basename = basenameAlt;
+ }
+#endif
+ if(!basename) {
+ /* no basename - return '.'. */
+ resultPtr = ".";
+ resultLen = 1;
+ } else {
+ resultPtr = path;
+ resultLen = basename - path;
+ if(resultLen<1) {
+ resultLen = 1; /* '/' or '/a' -> '/' */
+ }
+ }
+
+ if((resultLen+1) <= bufLen) {
+ uprv_strncpy(buffer, resultPtr, resultLen);
+ buffer[resultLen]=0;
+ return buffer;
+ } else {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return NULL;
+ }
+}
+
+U_CAPI const char * U_EXPORT2
findBasename(const char *filename) {
const char *basename=uprv_strrchr(filename, U_FILE_SEP_CHAR);
@@ -235,6 +286,7 @@ utm_hasCapacity(UToolMemory *mem, int32_t capacity) {
fprintf(stderr, "error: %s - out of memory\n", mem->name);
exit(U_MEMORY_ALLOCATION_ERROR);
}
+ mem->capacity=newCapacity;
}
return TRUE;
@@ -242,9 +294,11 @@ utm_hasCapacity(UToolMemory *mem, int32_t capacity) {
U_CAPI void * U_EXPORT2
utm_alloc(UToolMemory *mem) {
- char *p=(char *)mem->array+mem->idx*mem->size;
- int32_t newIndex=mem->idx+1;
+ char *p=NULL;
+ int32_t oldIndex=mem->idx;
+ int32_t newIndex=oldIndex+1;
if(utm_hasCapacity(mem, newIndex)) {
+ p=(char *)mem->array+oldIndex*mem->size;
mem->idx=newIndex;
uprv_memset(p, 0, mem->size);
}
@@ -253,9 +307,11 @@ utm_alloc(UToolMemory *mem) {
U_CAPI void * U_EXPORT2
utm_allocN(UToolMemory *mem, int32_t n) {
- char *p=(char *)mem->array+mem->idx*mem->size;
- int32_t newIndex=mem->idx+n;
+ char *p=NULL;
+ int32_t oldIndex=mem->idx;
+ int32_t newIndex=oldIndex+n;
if(utm_hasCapacity(mem, newIndex)) {
+ p=(char *)mem->array+oldIndex*mem->size;
mem->idx=newIndex;
uprv_memset(p, 0, n*mem->size);
}
diff --git a/tools/toolutil/toolutil.h b/tools/toolutil/toolutil.h
index 1817d2f6..be32942c 100644
--- a/tools/toolutil/toolutil.h
+++ b/tools/toolutil/toolutil.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2009, International Business Machines
+* Copyright (C) 1999-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -21,6 +21,33 @@
#include "unicode/utypes.h"
+#ifdef XP_CPLUSPLUS
+
+#include "unicode/errorcode.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * ErrorCode subclass for use in ICU command-line tools.
+ * The destructor calls handleFailure() which calls exit(errorCode) when isFailure().
+ */
+class U_TOOLUTIL_API IcuToolErrorCode : public ErrorCode {
+public:
+ /**
+ * @param loc A short string describing where the IcuToolErrorCode is used.
+ */
+ IcuToolErrorCode(const char *loc) : location(loc) {}
+ virtual ~IcuToolErrorCode();
+protected:
+ virtual void handleFailure() const;
+private:
+ const char *location;
+};
+
+U_NAMESPACE_END
+
+#endif
+
/*
* For Windows, a path/filename may be the short (8.3) version
* of the "real", long one. In this case, the short one
@@ -39,16 +66,33 @@
U_CAPI const char * U_EXPORT2
getLongPathname(const char *pathname);
-/*
+/**
* Find the basename at the end of a pathname, i.e., the part
* after the last file separator, and return a pointer
* to this part of the pathname.
* If the pathname only contains a basename and no file separator,
* then the pathname pointer itself is returned.
- */
+ **/
U_CAPI const char * U_EXPORT2
findBasename(const char *filename);
+/**
+ * Find the directory name of a pathname, that is, everything
+ * up to but not including the last file separator.
+ *
+ * If successful, copies the directory name into the output buffer along with
+ * a terminating NULL.
+ *
+ * If there isn't a directory name in the path, it returns the current directory string ('.').
+ * @param path the full pathname to inspect.
+ * @param buffer the output buffer
+ * @param bufLen the output buffer length
+ * @param status error code- may return U_BUFFER_OVERFLOW_ERROR if bufLen is too small.
+ * @return If successful, a pointer to the output buffer. If failure or bufLen is too small, NULL.
+ **/
+U_CAPI const char * U_EXPORT2
+findDirname(const char *path, char *buffer, int32_t bufLen, UErrorCode* status);
+
/*
* Return the current year in the Gregorian calendar. Used for copyright generation.
*/
diff --git a/tools/toolutil/toolutil.vcproj b/tools/toolutil/toolutil.vcproj
index 40238a5b..9a42486a 100644
--- a/tools/toolutil/toolutil.vcproj
+++ b/tools/toolutil/toolutil.vcproj
@@ -78,7 +78,7 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile="..\..\..\bin\icutu41.dll"
+ OutputFile="..\..\..\bin\icutu45.dll"
LinkIncremental="1"
SuppressStartupBanner="true"
AdditionalLibraryDirectories="..\..\..\lib"
@@ -174,7 +174,7 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile="..\..\..\bin\icutu41d.dll"
+ OutputFile="..\..\..\bin\icutu45d.dll"
LinkIncremental="2"
SuppressStartupBanner="true"
AdditionalLibraryDirectories="..\..\..\lib"
@@ -269,7 +269,7 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile="..\..\..\bin64\icutu41.dll"
+ OutputFile="..\..\..\bin64\icutu45.dll"
LinkIncremental="1"
SuppressStartupBanner="true"
AdditionalLibraryDirectories="..\..\..\lib64"
@@ -367,7 +367,7 @@
/>
<Tool
Name="VCLinkerTool"
- OutputFile="..\..\..\bin64\icutu41d.dll"
+ OutputFile="..\..\..\bin64\icutu45d.dll"
LinkIncremental="2"
SuppressStartupBanner="true"
AdditionalLibraryDirectories="..\..\..\lib64"
@@ -407,261 +407,246 @@
<References>
</References>
<Files>
- <Filter
- Name="Source Files"
- Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+ <File
+ RelativePath=".\filestrm.c"
>
- <File
- RelativePath=".\filestrm.c"
- >
- </File>
- <File
- RelativePath=".\filetools.cpp"
- >
- </File>
- <File
- RelativePath=".\flagparser.c"
- >
- </File>
- <File
- RelativePath=".\package.cpp"
- >
- </File>
- <File
- RelativePath=".\pkg_genc.c"
- >
- <FileConfiguration
- Name="Release|Win32"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug|Win32"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release|x64"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug|x64"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- </File>
- <File
- RelativePath=".\pkg_gencmn.c"
- >
- <FileConfiguration
- Name="Release|Win32"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug|Win32"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release|x64"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug|x64"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- </File>
- <File
- RelativePath=".\pkg_icu.cpp"
- >
- </File>
- <File
- RelativePath=".\pkgitems.cpp"
- >
- </File>
- <File
- RelativePath=".\swapimpl.cpp"
- >
- </File>
- <File
- RelativePath=".\toolutil.c"
- >
- <FileConfiguration
- Name="Release|Win32"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug|Win32"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release|x64"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug|x64"
- >
- <Tool
- Name="VCCLCompilerTool"
- DisableLanguageExtensions="false"
- />
- </FileConfiguration>
- </File>
- <File
- RelativePath=".\ucbuf.c"
- >
- </File>
- <File
- RelativePath=".\ucm.c"
- >
- </File>
- <File
- RelativePath=".\ucmstate.c"
- >
- </File>
- <File
- RelativePath=".\unewdata.c"
- >
- </File>
- <File
- RelativePath=".\uoptions.c"
- >
- </File>
- <File
- RelativePath=".\uparse.c"
- >
- </File>
- <File
- RelativePath=".\writesrc.c"
- >
- </File>
- <File
- RelativePath=".\xmlparser.cpp"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h;hpp;hxx;hm;inl"
- >
- <File
- RelativePath=".\filestrm.h"
- >
- </File>
- <File
- RelativePath=".\filetools.h"
- >
- </File>
- <File
- RelativePath=".\flagparser.h"
- >
- </File>
- <File
- RelativePath=".\package.h"
- >
- </File>
- <File
- RelativePath=".\pkg_genc.h"
- >
- </File>
- <File
- RelativePath=".\pkg_gencmn.h"
- >
- </File>
- <File
- RelativePath=".\pkg_icu.h"
- >
- </File>
- <File
- RelativePath=".\pkg_imp.h"
- >
- </File>
- <File
- RelativePath=".\platform_xopen_source_extended.h"
- >
- </File>
- <File
- RelativePath=".\swapimpl.h"
- >
- </File>
- <File
- RelativePath=".\toolutil.h"
- >
- </File>
- <File
- RelativePath=".\ucbuf.h"
- >
- </File>
- <File
- RelativePath=".\ucm.h"
- >
- </File>
- <File
- RelativePath=".\unewdata.h"
- >
- </File>
- <File
- RelativePath=".\uoptions.h"
- >
- </File>
- <File
- RelativePath=".\uparse.h"
- >
- </File>
- <File
- RelativePath=".\writesrc.h"
- >
- </File>
- <File
- RelativePath=".\xmlparser.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+ </File>
+ <File
+ RelativePath=".\filestrm.h"
+ >
+ </File>
+ <File
+ RelativePath=".\filetools.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\filetools.h"
+ >
+ </File>
+ <File
+ RelativePath=".\flagparser.c"
+ >
+ </File>
+ <File
+ RelativePath=".\flagparser.h"
+ >
+ </File>
+ <File
+ RelativePath=".\package.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\package.h"
+ >
+ </File>
+ <File
+ RelativePath=".\pkg_genc.c"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\pkg_genc.h"
+ >
+ </File>
+ <File
+ RelativePath=".\pkg_gencmn.c"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\pkg_gencmn.h"
+ >
+ </File>
+ <File
+ RelativePath=".\pkg_icu.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\pkg_icu.h"
+ >
+ </File>
+ <File
+ RelativePath=".\pkg_imp.h"
+ >
+ </File>
+ <File
+ RelativePath=".\pkgitems.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\platform_xopen_source_extended.h"
+ >
+ </File>
+ <File
+ RelativePath=".\swapimpl.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\swapimpl.h"
+ >
+ </File>
+ <File
+ RelativePath=".\toolutil.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ DisableLanguageExtensions="false"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\toolutil.h"
+ >
+ </File>
+ <File
+ RelativePath=".\ucbuf.c"
+ >
+ </File>
+ <File
+ RelativePath=".\ucbuf.h"
+ >
+ </File>
+ <File
+ RelativePath=".\ucm.c"
+ >
+ </File>
+ <File
+ RelativePath=".\ucm.h"
+ >
+ </File>
+ <File
+ RelativePath=".\ucmstate.c"
+ >
+ </File>
+ <File
+ RelativePath=".\unewdata.c"
+ >
+ </File>
+ <File
+ RelativePath=".\unewdata.h"
+ >
+ </File>
+ <File
+ RelativePath=".\uoptions.c"
+ >
+ </File>
+ <File
+ RelativePath=".\uoptions.h"
+ >
+ </File>
+ <File
+ RelativePath=".\uparse.c"
+ >
+ </File>
+ <File
+ RelativePath=".\uparse.h"
+ >
+ </File>
+ <File
+ RelativePath=".\writesrc.c"
+ >
+ </File>
+ <File
+ RelativePath=".\writesrc.h"
+ >
+ </File>
+ <File
+ RelativePath=".\xmlparser.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\xmlparser.h"
>
- </Filter>
+ </File>
</Files>
<Globals>
</Globals>
diff --git a/tools/toolutil/ucm.h b/tools/toolutil/ucm.h
index 6ee13215..20324f54 100644
--- a/tools/toolutil/ucm.h
+++ b/tools/toolutil/ucm.h
@@ -1,6 +1,6 @@
/*
*******************************************************************************
- * Copyright (C) 2003-2009, International Business Machines
+ * Copyright (C) 2003-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: ucm.h
@@ -237,7 +237,7 @@ U_CAPI void U_EXPORT2
ucm_addState(UCMStates *states, const char *s);
U_CAPI void U_EXPORT2
-ucm_processStates(UCMStates *states);
+ucm_processStates(UCMStates *states, UBool ignoreSISOCheck);
U_CAPI int32_t U_EXPORT2
ucm_countChars(UCMStates *states,
diff --git a/tools/toolutil/ucmstate.c b/tools/toolutil/ucmstate.c
index 393d18b3..e1adb974 100644
--- a/tools/toolutil/ucmstate.c
+++ b/tools/toolutil/ucmstate.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2003-2005, International Business Machines
+* Copyright (C) 2003-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -421,7 +421,7 @@ sumUpStates(UCMStates *states) {
}
U_CAPI void U_EXPORT2
-ucm_processStates(UCMStates *states) {
+ucm_processStates(UCMStates *states, UBool ignoreSISOCheck) {
int32_t entry, state, cell, count;
if(states->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
@@ -557,10 +557,11 @@ ucm_processStates(UCMStates *states) {
exit(U_INVALID_TABLE_FORMAT);
}
/* are the SI/SO all in the right places? */
- if( states->stateTable[0][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) &&
+ if( ignoreSISOCheck ||
+ (states->stateTable[0][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) &&
states->stateTable[0][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0) &&
states->stateTable[1][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) &&
- states->stateTable[1][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0)
+ states->stateTable[1][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0))
) {
states->outputType=MBCS_OUTPUT_2_SISO;
} else {
diff --git a/tools/toolutil/unewdata.c b/tools/toolutil/unewdata.c
index b483a194..9ea60d56 100644
--- a/tools/toolutil/unewdata.c
+++ b/tools/toolutil/unewdata.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999,2008, International Business Machines
+* Copyright (C) 1999-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -14,6 +14,7 @@
* created by: Markus W. Scherer
*/
+#include <stdio.h>
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/ustring.h"
@@ -162,6 +163,33 @@ udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode) {
return fileLength;
}
+/* dummy UDataInfo cf. udata.h */
+static const UDataInfo dummyDataInfo = {
+ sizeof(UDataInfo),
+ 0,
+
+ U_IS_BIG_ENDIAN,
+ U_CHARSET_FAMILY,
+ U_SIZEOF_UCHAR,
+ 0,
+
+ { 0, 0, 0, 0 }, /* dummy dataFormat */
+ { 0, 0, 0, 0 }, /* dummy formatVersion */
+ { 0, 0, 0, 0 } /* dummy dataVersion */
+};
+
+U_CAPI void U_EXPORT2
+udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode) {
+ if(U_SUCCESS(*pErrorCode)) {
+ udata_finish(udata_create(dir, type, name, &dummyDataInfo, NULL, pErrorCode), pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ fprintf(stderr, "error %s writing dummy data file %s" U_FILE_SEP_STRING "%s.%s\n",
+ u_errorName(*pErrorCode), dir, name, type);
+ exit(*pErrorCode);
+ }
+ }
+}
+
U_CAPI void U_EXPORT2
udata_write8(UNewDataMemory *pData, uint8_t byte) {
if(pData!=NULL && pData->file!=NULL) {
diff --git a/tools/toolutil/unewdata.h b/tools/toolutil/unewdata.h
index fb190e6f..d25b8e9f 100644
--- a/tools/toolutil/unewdata.h
+++ b/tools/toolutil/unewdata.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2000, International Business Machines
+* Copyright (C) 1999-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -66,6 +66,10 @@ udata_create(const char *dir, const char *type, const char *name,
U_CAPI uint32_t U_EXPORT2
udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode);
+/** @memo Write a dummy data file. */
+U_CAPI void U_EXPORT2
+udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode);
+
/** @memo Write an 8-bit byte to the file. */
U_CAPI void U_EXPORT2
udata_write8(UNewDataMemory *pData, uint8_t byte);
diff --git a/tools/toolutil/uparse.c b/tools/toolutil/uparse.c
index ed2f1e90..c988e3da 100644
--- a/tools/toolutil/uparse.c
+++ b/tools/toolutil/uparse.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2000-2007, International Business Machines
+* Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -28,14 +28,26 @@
#include <stdio.h>
+/* Is c a whitespace character? */
+#define IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
+
U_CAPI const char * U_EXPORT2
u_skipWhitespace(const char *s) {
- while(*s==' ' || *s=='\t') {
+ while(IS_INV_WHITESPACE(*s)) {
++s;
}
return s;
}
+U_CAPI char * U_EXPORT2
+u_rtrim(char *s) {
+ char *end=uprv_strchr(s, 0);
+ while(s<end && IS_INV_WHITESPACE(*(end-1))) {
+ *--end = 0;
+ }
+ return end;
+}
+
/*
* If the string starts with # @missing: then return the pointer to the
* following non-whitespace character.
@@ -69,7 +81,7 @@ u_parseDelimitedFile(const char *filename, char delimiter,
char *start, *limit;
int32_t i, length;
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ if(U_FAILURE(*pErrorCode)) {
return;
}
@@ -90,12 +102,8 @@ u_parseDelimitedFile(const char *filename, char delimiter,
}
while(T_FileStream_readLine(file, line, sizeof(line))!=NULL) {
- length=(int32_t)uprv_strlen(line);
-
/* remove trailing newline characters */
- while(length>0 && (line[length-1]=='\r' || line[length-1]=='\n')) {
- line[--length]=0;
- }
+ length=(int32_t)(u_rtrim(line)-line);
/*
* detect a line with # @missing:
@@ -118,7 +126,7 @@ u_parseDelimitedFile(const char *filename, char delimiter,
limit=uprv_strchr(start, '#');
if(limit!=NULL) {
/* get white space before the pound sign */
- while(limit>start && (*(limit-1)==' ' || *(limit-1)=='\t')) {
+ while(limit>start && IS_INV_WHITESPACE(*(limit-1))) {
--limit;
}
@@ -185,7 +193,7 @@ u_parseCodePoints(const char *s,
uint32_t value;
int32_t count;
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ if(U_FAILURE(*pErrorCode)) {
return 0;
}
if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {
@@ -202,7 +210,7 @@ u_parseCodePoints(const char *s,
/* read one code point */
value=(uint32_t)uprv_strtoul(s, &end, 16);
- if(end<=s || (*end!=' ' && *end!='\t' && *end!=';' && *end!=0) || value>=0x110000) {
+ if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
*pErrorCode=U_PARSE_ERROR;
return 0;
}
@@ -234,7 +242,7 @@ u_parseString(const char *s,
uint32_t value;
int32_t destLength;
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ if(U_FAILURE(*pErrorCode)) {
return 0;
}
if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {
@@ -261,21 +269,22 @@ u_parseString(const char *s,
/* read one code point */
value=(uint32_t)uprv_strtoul(s, &end, 16);
- if(end<=s || (*end!=' ' && *end!='\t' && *end!=';' && *end!=0) || value>=0x110000) {
+ if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
*pErrorCode=U_PARSE_ERROR;
return 0;
}
/* store the first code point */
- if(destLength==0 && pFirst!=NULL) {
+ if(pFirst!=NULL) {
*pFirst=value;
+ pFirst=NULL;
}
/* append it to the destination array */
- if((destLength+UTF_CHAR_LENGTH(value))<=destCapacity) {
- UTF_APPEND_CHAR_UNSAFE(dest, destLength, value);
+ if((destLength+U16_LENGTH(value))<=destCapacity) {
+ U16_APPEND_UNSAFE(dest, destLength, value);
} else {
- destLength+=UTF_CHAR_LENGTH(value);
+ destLength+=U16_LENGTH(value);
}
/* go to the following characters */
@@ -285,13 +294,14 @@ u_parseString(const char *s,
/* read a range like start or start..end */
U_CAPI int32_t U_EXPORT2
-u_parseCodePointRange(const char *s,
- uint32_t *pStart, uint32_t *pEnd,
- UErrorCode *pErrorCode) {
+u_parseCodePointRangeAnyTerminator(const char *s,
+ uint32_t *pStart, uint32_t *pEnd,
+ const char **terminator,
+ UErrorCode *pErrorCode) {
char *end;
uint32_t value;
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ if(U_FAILURE(*pErrorCode)) {
return 0;
}
if(s==NULL || pStart==NULL || pEnd==NULL) {
@@ -299,15 +309,10 @@ u_parseCodePointRange(const char *s,
return 0;
}
- s=u_skipWhitespace(s);
- if(*s==';' || *s==0) {
- *pErrorCode=U_PARSE_ERROR;
- return 0;
- }
-
/* read the start code point */
+ s=u_skipWhitespace(s);
value=(uint32_t)uprv_strtoul(s, &end, 16);
- if(end<=s || (*end!=' ' && *end!='\t' && *end!='.' && *end!=';') || value>=0x110000) {
+ if(end<=s || value>=0x110000) {
*pErrorCode=U_PARSE_ERROR;
return 0;
}
@@ -315,19 +320,15 @@ u_parseCodePointRange(const char *s,
/* is there a "..end"? */
s=u_skipWhitespace(end);
- if(*s==';' || *s==0) {
- return 1;
- }
-
if(*s!='.' || s[1]!='.') {
- *pErrorCode=U_PARSE_ERROR;
- return 0;
+ *terminator=end;
+ return 1;
}
- s+=2;
+ s=u_skipWhitespace(s+2);
/* read the end code point */
value=(uint32_t)uprv_strtoul(s, &end, 16);
- if(end<=s || (*end!=' ' && *end!='\t' && *end!=';') || value>=0x110000) {
+ if(end<=s || value>=0x110000) {
*pErrorCode=U_PARSE_ERROR;
return 0;
}
@@ -339,14 +340,25 @@ u_parseCodePointRange(const char *s,
return 0;
}
- /* no garbage after that? */
- s=u_skipWhitespace(end);
- if(*s==';' || *s==0) {
- return value-*pStart+1;
- } else {
- *pErrorCode=U_PARSE_ERROR;
- return 0;
+ *terminator=end;
+ return value-*pStart+1;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_parseCodePointRange(const char *s,
+ uint32_t *pStart, uint32_t *pEnd,
+ UErrorCode *pErrorCode) {
+ const char *terminator;
+ int32_t rangeLength=
+ u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode);
+ if(U_SUCCESS(*pErrorCode)) {
+ terminator=u_skipWhitespace(terminator);
+ if(*terminator!=';' && *terminator!=0) {
+ *pErrorCode=U_PARSE_ERROR;
+ return 0;
+ }
}
+ return rangeLength;
}
U_CAPI int32_t U_EXPORT2
diff --git a/tools/toolutil/uparse.h b/tools/toolutil/uparse.h
index dc3e0b8c..96bd1ff2 100644
--- a/tools/toolutil/uparse.h
+++ b/tools/toolutil/uparse.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2000-2004, International Business Machines
+* Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -34,6 +34,15 @@ U_CDECL_BEGIN
U_CAPI const char * U_EXPORT2
u_skipWhitespace(const char *s);
+/**
+ * Trim whitespace (including line endings) from the end of the string.
+ *
+ * @param s Pointer to the string.
+ * @return Pointer to the new end of the string.
+ */
+U_CAPI char * U_EXPORT2
+u_rtrim(char *s);
+
/** Function type for u_parseDelimitedFile(). */
typedef void U_CALLCONV
UParseLineFn(void *context,
@@ -117,6 +126,16 @@ u_parseCodePointRange(const char *s,
uint32_t *pStart, uint32_t *pEnd,
UErrorCode *pErrorCode);
+/**
+ * Same as u_parseCodePointRange() but the range may be terminated by
+ * any character. The position of the terminating character is returned via
+ * the *terminator output parameter.
+ */
+U_CAPI int32_t U_EXPORT2
+u_parseCodePointRangeAnyTerminator(const char *s,
+ uint32_t *pStart, uint32_t *pEnd,
+ const char **terminator,
+ UErrorCode *pErrorCode);
U_CAPI int32_t U_EXPORT2
u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
diff --git a/tools/toolutil/writesrc.c b/tools/toolutil/writesrc.c
index 631c3ccf..59d6e057 100644
--- a/tools/toolutil/writesrc.c
+++ b/tools/toolutil/writesrc.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2005-2008, International Business Machines
+* Copyright (C) 2005-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -24,8 +24,8 @@
#include "cstring.h"
#include "writesrc.h"
-U_CAPI FILE * U_EXPORT2
-usrc_create(const char *path, const char *filename) {
+static FILE *
+usrc_createWithHeader(const char *path, const char *filename, const char *header) {
char buffer[1024];
const char *p;
char *q;
@@ -55,19 +55,7 @@ usrc_create(const char *path, const char *filename) {
lt=localtime(&t);
strftime(year, sizeof(year), "%Y", lt);
strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
- fprintf(
- f,
- "/*\n"
- " * Copyright (C) 1999-%s, International Business Machines\n"
- " * Corporation and others. All Rights Reserved.\n"
- " *\n"
- " * file name: %s\n"
- " *\n"
- " * machine-generated on: %s\n"
- " */\n\n",
- year,
- filename,
- buffer);
+ fprintf(f, header, year, filename, buffer);
} else {
fprintf(
stderr,
@@ -77,6 +65,33 @@ usrc_create(const char *path, const char *filename) {
return f;
}
+U_CAPI FILE * U_EXPORT2
+usrc_create(const char *path, const char *filename) {
+ const char *header=
+ "/*\n"
+ " * Copyright (C) 1999-%s, International Business Machines\n"
+ " * Corporation and others. All Rights Reserved.\n"
+ " *\n"
+ " * file name: %s\n"
+ " *\n"
+ " * machine-generated on: %s\n"
+ " */\n\n";
+ return usrc_createWithHeader(path, filename, header);
+}
+
+U_CAPI FILE * U_EXPORT2
+usrc_createTextData(const char *path, const char *filename) {
+ const char *header=
+ "# Copyright (C) 1999-%s, International Business Machines\n"
+ "# Corporation and others. All Rights Reserved.\n"
+ "#\n"
+ "# file name: %s\n"
+ "#\n"
+ "# machine-generated on: %s\n"
+ "#\n\n";
+ return usrc_createWithHeader(path, filename, header);
+}
+
U_CAPI void U_EXPORT2
usrc_writeArray(FILE *f,
const char *prefix,
diff --git a/tools/toolutil/writesrc.h b/tools/toolutil/writesrc.h
index 3636dcae..4519cf18 100644
--- a/tools/toolutil/writesrc.h
+++ b/tools/toolutil/writesrc.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2005-2008, International Business Machines
+* Copyright (C) 2005-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -25,11 +25,19 @@
/**
* Create a source text file and write a header comment with the ICU copyright.
+ * Writes a C/Java-style comment.
*/
U_CAPI FILE * U_EXPORT2
usrc_create(const char *path, const char *filename);
/**
+ * Create a source text file and write a header comment with the ICU copyright.
+ * Writes the comment with # lines, as used in scripts and text data.
+ */
+U_CAPI FILE * U_EXPORT2
+usrc_createTextData(const char *path, const char *filename);
+
+/**
* Write the contents of an array of 8/16/32-bit words.
* The prefix and postfix are optional (can be NULL) and are written first/last.
* The prefix may contain a %ld or similar field for the array length.
diff --git a/tools/toolutil/xmlparser.cpp b/tools/toolutil/xmlparser.cpp
index 55688320..c00e1dbf 100644
--- a/tools/toolutil/xmlparser.cpp
+++ b/tools/toolutil/xmlparser.cpp
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2004-2008, International Business Machines
+* Copyright (C) 2004-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -563,7 +563,7 @@ UnicodeString
UXMLParser::scanContent(UErrorCode &status) {
UnicodeString result;
if (mXMLCharData.lookingAt(fPos, status)) {
- result = mXMLCharData.group(0, status);
+ result = mXMLCharData.group((int32_t)0, status);
// Normalize the new-lines. (Before char ref substitution)
mNewLineNormalizer.reset(result);
result = mNewLineNormalizer.replaceAll(fOneLF, status);
@@ -622,7 +622,7 @@ UXMLParser::replaceCharRefs(UnicodeString &s, UErrorCode &status) {
// An unrecognized &entity; Leave it alone.
// TODO: check that it really looks like an entity, and is not some
// random & in the text.
- replacement = mAmps.group(0, status);
+ replacement = mAmps.group((int32_t)0, status);
}
mAmps.appendReplacement(result, replacement, status);
}
diff --git a/tools/tzcode/Makefile.in b/tools/tzcode/Makefile.in
index ce7c1a50..a21df33a 100644
--- a/tools/tzcode/Makefile.in
+++ b/tools/tzcode/Makefile.in
@@ -1,4 +1,4 @@
-# Some Portions Copyright (c) 2006-2007 IBM and others. All Rights Reserved.
+# Some Portions Copyright (c) 2006-2010 IBM and others. All Rights Reserved.
srcdir = @srcdir@
top_srcdir = @top_srcdir@
@@ -9,9 +9,12 @@ subdir = tools/tzcode
include $(top_builddir)/icudefs.mk
-OBJECTS= zic.o localtime.o asctime.o scheck.o ialloc.o
+ifeq ($(TZDATA),)
TZDATA = $(firstword $(wildcard ./tzdata*.tar.gz) $(wildcard $(srcdir)/tzdata*.tar.gz))
+endif
+ifeq ($(TZCODE),)
TZCODE = $(firstword $(wildcard ./tzcode*.tar.gz) $(wildcard $(srcdir)/tzcode*.tar.gz))
+endif
TZORIG=./tzorig
TZORIG_TZDIR=./tzorig/tzdir
@@ -27,6 +30,15 @@ ICUZDUMPOUT=$(shell pwd)/icuzdumpout
ZDUMP_OPTS= -v -a -d $(ZDUMPOUT) -c 1902,2038 -i
ICUZDUMP_OPTS= -a -d $(ICUZDUMPOUT)
+# Executables & objects
+OBJECTS= zic.o localtime.o asctime.o scheck.o ialloc.o
+ZICTARG=$(BINDIR)/zic$(EXEEXT)
+ZICEXEC=$(TOOLBINDIR)/zic$(TOOLEXEEXT)
+TZ2ICUTARG=$(BINDIR)/tz2icu$(EXEEXT)
+TZ2ICUEXEC=$(TOOLBINDIR)/tz2icu$(TOOLEXEEXT)
+ICUZDUMPTARG=$(BINDIR)/icuzdump$(EXEEXT)
+ICUZDUMPEXEC=$(TOOLBINDIR)/icuzdump$(TOOLEXEEXT)
+
ifeq ($(TZDATA),)
all:
@echo ERROR "tzdata*.tar.gz" can\'t be found.
@@ -74,10 +86,10 @@ check-dump: dump-out
endif
endif
-$(ICUZDUMPOUT): icuzdump$(EXEEXT)
+$(ICUZDUMPOUT): $(ICUZDUMPEXEC)
-$(RMV) $(ICUZDUMPOUT)
-mkdir $(ICUZDUMPOUT)
- $(INVOKE) ./icuzdump $(ICUZDUMP_OPTS)
+ $(INVOKE) $(ICUZDUMPEXEC) $(ICUZDUMP_OPTS)
#
@@ -93,7 +105,6 @@ SDATA= solar87 solar88 solar89
TDATA= $(YDATA) $(NDATA) $(SDATA)
YEARISTYPE= ./yearistype
-ZIC = ./zic
TZDIR=zoneinfo
CFLAGS+=-D_POSIX_C_SOURCE
@@ -101,42 +112,45 @@ CPPFLAGS+= -DTZDIR=\"$(TZDIR)\"
# more data
XDATA=zone.tab yearistype.sh leapseconds iso3166.tab
-ICUDATA=ZoneMetaData.java icu_zone.txt tz2icu zoneinfo.txt
+ICUDATA=ZoneMetaData.java icu_zone.txt tz2icu zoneinfo64.txt zoneinfo.txt
-zic: $(OBJECTS) yearistype $(srcdir)/tz2icu.h
+$(ZICTARG): $(OBJECTS) $(TDATA) yearistype $(srcdir)/tz2icu.h
$(CC) $(CFLAGS) $(TZORIG_EXTRA_CFLAGS) $(LFLAGS) -I$(srcdir) $(OBJECTS) $(LDLIBS) -o $@
-tz2icu: $(srcdir)/tz2icu.cpp $(srcdir)/tz2icu.h
+$(TZ2ICUTARG): $(srcdir)/tz2icu.cpp $(srcdir)/tz2icu.h
$(CXX) -W -Wall -I$(srcdir) -I$(top_srcdir)/common -pedantic $(srcdir)/tz2icu.cpp -o $@
-icuzdump${EXEEXT}: $(srcdir)/icuzdump.cpp
+$(ICUZDUMPTARG): $(srcdir)/icuzdump.cpp
$(LINK.cc) -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/io -pedantic $(srcdir)/icuzdump.cpp $(LIBICUUC) $(LIBICUDT) $(LIBICUI18N) $(LIBICUIO) $(LIBICUTOOLUTIL) -o $@
-# $(CXX) -W -Wall -I$(srcdir) -I$(top_srcdir)/common -pedantic $(srcdir)/icuzdump.cpp -o $@
+$(TDATA): tdatamarker
-yearistype.sh: $(TZDATA)
+tdatamarker: $(TZDATA)
gunzip -d < $(TZDATA) | tar xf -
+ touch $@
yearistype: yearistype.sh
cp yearistype.sh yearistype
chmod +x yearistype
-posix_only: zic $(TDATA) $(srcdir)/icuzones
- $(ZIC) -y $(YEARISTYPE) -d $(TZDIR) -L /dev/null $(TDATA) $(srcdir)/icuzones
+posix_only: $(ZICEXEC) $(TDATA) $(srcdir)/icuzones
+ $(ZICEXEC) -y $(YEARISTYPE) -d $(TZDIR) -L /dev/null $(TDATA) $(srcdir)/icuzones
-icu_data: tz2icu posix_only
- ./tz2icu $(TZDIR) zone.tab `echo $(TZDATA) | sed -e "s/.*\/tzdata//;s/\.tar\.gz$$//"`
+icu_data: $(TZ2ICUEXEC) posix_only
+ $(TZ2ICUEXEC) $(TZDIR) zone.tab `echo $(TZDATA) | sed -e "s/.*\/tzdata//;s/\.tar\.gz$$//"`
+ $(TZ2ICUEXEC) $(TZDIR) zone.tab `echo $(TZDATA) | sed -e "s/.*\/tzdata//;s/\.tar\.gz$$//"` --old
clean:
- -rm -f core *.o *.out zdump${EXEEXT} zic${EXEEXT} yearistype date tz2icu${EXEEXT}
+ -rm -f core *.o *.out zdump${EXEEXT} $(ZICTARG) yearistype date $(TZ2ICUTARG)
@echo ICU specific cleanup:
-rm -f $(ICUDATA)
-rm -rf $(TZDIR)
- -$(RMV) icuzdump${EXEEXT} tzorig ./zdumpout/ ./icuzdumpout/
+ -$(RMV) $(ICUZDUMPTARG) tzorig ./zdumpout/ ./icuzdumpout/
ifneq ($(TZDATA),)
-rm -rf `gunzip -d < $(TZDATA) | tar tf - | grep -o '[^ ]*$$' | tr '\n' ' '`
+ -rm tdatamarker
endif
checkclean:
diff --git a/tools/tzcode/tz2icu.cpp b/tools/tzcode/tz2icu.cpp
index 9cb9f699..238e8656 100644
--- a/tools/tzcode/tz2icu.cpp
+++ b/tools/tzcode/tz2icu.cpp
@@ -1,7 +1,7 @@
/*
**********************************************************************
-* Copyright (c) 2003-2008, International Business Machines
+* Copyright (c) 2003-2010, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
@@ -48,10 +48,11 @@
#include "tz2icu.h"
#include "unicode/uversion.h"
-#define USE64BITDATA
-
using namespace std;
+bool ICU44PLUS = TRUE;
+string TZ_RESOURCE_NAME = ICU_TZ_RESOURCE;
+
//--------------------------------------------------------------------
// Time utilities
//--------------------------------------------------------------------
@@ -287,7 +288,7 @@ bool readbool(ifstream& file) {
* Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo
* @param file an already-open file stream
*/
-void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData=false) {
+void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData) {
int32_t i;
// Check for TZ_ICU_MAGIC signature at file start. If we get a
@@ -360,7 +361,7 @@ void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData=false) {
// Build transitions vector out of corresponding times and types.
bool insertInitial = false;
- if (is64bitData) {
+ if (is64bitData && !ICU44PLUS) {
if (timecnt > 0) {
int32_t minidx = -1;
for (i=0; i<timecnt; ++i) {
@@ -436,7 +437,7 @@ void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData=false) {
}
}
} else {
- initialTypeIdx = 0;
+ initialTypeIdx = 0;
}
assert(initialTypeIdx >= 0);
// Add the initial type associated with the lowest int32 time
@@ -545,15 +546,16 @@ void handleFile(string path, string id) {
throw invalid_argument("can't open file");
}
+ // eat 32bit data part
ZoneInfo info;
- readzoneinfo(file, info);
+ readzoneinfo(file, info, false);
// Check for errors
if (!file) {
throw invalid_argument("read error");
}
-#ifdef USE64BITDATA
+ // we only use 64bit part
ZoneInfo info64;
readzoneinfo(file, info64, true);
@@ -590,27 +592,6 @@ void handleFile(string path, string id) {
}
ZONEINFO[id] = info64;
-
-#else
- // Check eof-relative pos (there may be a cleaner way to do this)
- int64_t eofPos = (int64_t) file.tellg();
- char buf[32];
- file.read(buf, 4);
- file.seekg(0, ios::end);
- eofPos = eofPos - (int64_t) file.tellg();
- if (eofPos) {
- // 2006c merged 32 and 64 bit versions in a fat binary
- // 64 version starts at the end of 32 bit version.
- // Therefore, if the file is *not* consumed, check
- // if it is maybe being restarted.
- if (strncmp(buf, TZ_ICU_MAGIC, 4) != 0) {
- ostringstream os;
- os << (-eofPos) << " unprocessed bytes at end";
- throw invalid_argument(os.str());
- }
- }
- ZONEINFO[id] = info;
-#endif
}
/**
@@ -1034,7 +1015,6 @@ void readFinalZonesAndRules(istream& in) {
void ZoneInfo::print(ostream& os, const string& id) const {
// Implement compressed format #2:
-
os << " /* " << id << " */ ";
if (aliasTo >= 0) {
@@ -1043,22 +1023,75 @@ void ZoneInfo::print(ostream& os, const string& id) const {
return;
}
- os << ":array {" << endl;
+ if (ICU44PLUS) {
+ os << ":table {" << endl;
+ } else {
+ os << ":array {" << endl;
+ }
vector<Transition>::const_iterator trn;
vector<ZoneType>::const_iterator typ;
- bool first=true;
- os << " :intvector { ";
- for (trn = transitions.begin(); trn != transitions.end(); ++trn) {
- if (!first) os << ", ";
- first = false;
- os << trn->time;
+ bool first;
+
+ if (ICU44PLUS) {
+ trn = transitions.begin();
+
+ // pre 32bit transitions
+ if (trn != transitions.end() && trn->time < LOWEST_TIME32) {
+ os << " transPre32:intvector { ";
+ for (first = true; trn != transitions.end() && trn->time < LOWEST_TIME32; ++trn) {
+ if (!first) {
+ os<< ", ";
+ }
+ first = false;
+ os << (int32_t)(trn->time >> 32) << ", " << (int32_t)(trn->time & 0x00000000ffffffff);
+ }
+ os << " }" << endl;
+ }
+
+ // 32bit transtions
+ if (trn != transitions.end() && trn->time < HIGHEST_TIME32) {
+ os << " trans:intvector { ";
+ for (first = true; trn != transitions.end() && trn->time < HIGHEST_TIME32; ++trn) {
+ if (!first) {
+ os << ", ";
+ }
+ first = false;
+ os << trn->time;
+ }
+ os << " }" << endl;
+ }
+
+ // post 32bit transitons
+ if (trn != transitions.end()) {
+ os << " transPost32:intvector { ";
+ for (first = true; trn != transitions.end(); ++trn) {
+ if (!first) {
+ os<< ", ";
+ }
+ first = false;
+ os << (int32_t)(trn->time >> 32) << ", " << (int32_t)(trn->time & 0x00000000ffffffff);
+ }
+ os << " }" << endl;
+ }
+ } else {
+ os << " :intvector { ";
+ for (trn = transitions.begin(), first = true; trn != transitions.end(); ++trn) {
+ if (!first) os << ", ";
+ first = false;
+ os << trn->time;
+ }
+ os << " }" << endl;
}
- os << " }" << endl;
+
first=true;
- os << " :intvector { ";
+ if (ICU44PLUS) {
+ os << " typeOffsets:intvector { ";
+ } else {
+ os << " :intvector { ";
+ }
for (typ = types.begin(); typ != types.end(); ++typ) {
if (!first) os << ", ";
first = false;
@@ -1066,23 +1099,43 @@ void ZoneInfo::print(ostream& os, const string& id) const {
}
os << " }" << endl;
- os << " :bin { \"" << hex << setfill('0');
- for (trn = transitions.begin(); trn != transitions.end(); ++trn) {
- os << setw(2) << trn->type;
+ if (ICU44PLUS) {
+ if (transitions.size() != 0) {
+ os << " typeMap:bin { \"" << hex << setfill('0');
+ for (trn = transitions.begin(); trn != transitions.end(); ++trn) {
+ os << setw(2) << trn->type;
+ }
+ os << dec << "\" }" << endl;
+ }
+ } else {
+ os << " :bin { \"" << hex << setfill('0');
+ for (trn = transitions.begin(); trn != transitions.end(); ++trn) {
+ os << setw(2) << trn->type;
+ }
+ os << dec << "\" }" << endl;
}
- os << dec << "\" }" << endl;
// Final zone info, if any
if (finalYear != -1) {
- os << " \"" << finalRuleID << "\"" << endl;
- os << " :intvector { " << finalOffset << ", "
- << finalYear << " }" << endl;
+ if (ICU44PLUS) {
+ os << " finalRule { \"" << finalRuleID << "\" }" << endl;
+ os << " finalRaw:int { " << finalOffset << " }" << endl;
+ os << " finalYear:int { " << finalYear << " }" << endl;
+ } else {
+ os << " \"" << finalRuleID << "\"" << endl;
+ os << " :intvector { " << finalOffset << ", "
+ << finalYear << " }" << endl;
+ }
}
// Alias list, if any
if (aliases.size() != 0) {
first = true;
- os << " :intvector { ";
+ if (ICU44PLUS) {
+ os << " links:intvector { ";
+ } else {
+ os << " :intvector { ";
+ }
for (set<int32_t>::const_iterator i=aliases.begin(); i!=aliases.end(); ++i) {
if (!first) os << ", ";
first = false;
@@ -1100,7 +1153,7 @@ operator<<(ostream& os, const ZoneMap& zoneinfo) {
for (ZoneMapIter it = zoneinfo.begin();
it != zoneinfo.end();
++it) {
- if(c) os << ",";
+ if(c && !ICU44PLUS) os << ",";
it->second.print(os, it->first);
os << "//Z#" << c++ << endl;
}
@@ -1189,42 +1242,117 @@ void ZoneInfo::optimizeTypeList() {
if (aliasTo >= 0) return; // Nothing to do for aliases
- // If there are zero transitions and one type, then leave that as-is.
- if (transitions.size() == 0) {
- if (types.size() != 1) {
- cerr << "Error: transition count = 0, type count = " << types.size() << endl;
+ if (!ICU44PLUS) {
+ // This is the old logic which has a bug, which occasionally removes
+ // the type before the first transition. The problem was fixed
+ // by inserting the dummy transition indirectly.
+
+ // If there are zero transitions and one type, then leave that as-is.
+ if (transitions.size() == 0) {
+ if (types.size() != 1) {
+ cerr << "Error: transition count = 0, type count = " << types.size() << endl;
+ }
+ return;
}
- return;
- }
- set<SimplifiedZoneType> simpleset;
- for (vector<Transition>::const_iterator i=transitions.begin();
- i!=transitions.end(); ++i) {
- assert(i->type < (int32_t)types.size());
- simpleset.insert(types[i->type]);
- }
+ set<SimplifiedZoneType> simpleset;
+ for (vector<Transition>::const_iterator i=transitions.begin();
+ i!=transitions.end(); ++i) {
+ assert(i->type < (int32_t)types.size());
+ simpleset.insert(types[i->type]);
+ }
- // Map types to integer indices
- map<SimplifiedZoneType,int32_t> simplemap;
- int32_t n=0;
- for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin();
- i!=simpleset.end(); ++i) {
- simplemap[*i] = n++;
- }
+ // Map types to integer indices
+ map<SimplifiedZoneType,int32_t> simplemap;
+ int32_t n=0;
+ for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin();
+ i!=simpleset.end(); ++i) {
+ simplemap[*i] = n++;
+ }
+
+ // Remap transitions
+ for (vector<Transition>::iterator i=transitions.begin();
+ i!=transitions.end(); ++i) {
+ assert(i->type < (int32_t)types.size());
+ ZoneType oldtype = types[i->type];
+ SimplifiedZoneType newtype(oldtype);
+ assert(simplemap.find(newtype) != simplemap.end());
+ i->type = simplemap[newtype];
+ }
+
+ // Replace type list
+ types.clear();
+ copy(simpleset.begin(), simpleset.end(), back_inserter(types));
+
+ } else {
+ if (types.size() > 1) {
+ // Note: localtime uses the very first non-dst type as initial offsets.
+ // If all types are DSTs, the very first type is treated as the initial offsets.
+
+ // Decide a type used as the initial offsets. ICU put the type at index 0.
+ ZoneType initialType = types[0];
+ for (vector<ZoneType>::const_iterator i=types.begin(); i!=types.end(); ++i) {
+ if (i->dstoffset == 0) {
+ initialType = *i;
+ break;
+ }
+ }
+
+ SimplifiedZoneType initialSimplifiedType(initialType);
+
+ // create a set of unique types, but ignoring fields which we're not interested in
+ set<SimplifiedZoneType> simpleset;
+ simpleset.insert(initialSimplifiedType);
+ for (vector<Transition>::const_iterator i=transitions.begin(); i!=transitions.end(); ++i) {
+ assert(i->type < (int32_t)types.size());
+ simpleset.insert(types[i->type]);
+ }
+
+ // Map types to integer indices, however, keeping the first type at offset 0
+ map<SimplifiedZoneType,int32_t> simplemap;
+ simplemap[initialSimplifiedType] = 0;
+ int32_t n = 1;
+ for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); i!=simpleset.end(); ++i) {
+ if (*i < initialSimplifiedType || initialSimplifiedType < *i) {
+ simplemap[*i] = n++;
+ }
+ }
+
+ // Remap transitions
+ for (vector<Transition>::iterator i=transitions.begin();
+ i!=transitions.end(); ++i) {
+ assert(i->type < (int32_t)types.size());
+ ZoneType oldtype = types[i->type];
+ SimplifiedZoneType newtype(oldtype);
+ assert(simplemap.find(newtype) != simplemap.end());
+ i->type = simplemap[newtype];
+ }
+
+ // Replace type list
+ types.clear();
+ types.push_back(initialSimplifiedType);
+ for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); i!=simpleset.end(); ++i) {
+ if (*i < initialSimplifiedType || initialSimplifiedType < *i) {
+ types.push_back(*i);
+ }
+ }
- // Remap transitions
- for (vector<Transition>::iterator i=transitions.begin();
- i!=transitions.end(); ++i) {
- assert(i->type < (int32_t)types.size());
- ZoneType oldtype = types[i->type];
- SimplifiedZoneType newtype(oldtype);
- assert(simplemap.find(newtype) != simplemap.end());
- i->type = simplemap[newtype];
+ // Reiterating transitions to remove any transitions which
+ // do not actually change the raw/dst offsets
+ int32_t prevTypeIdx = 0;
+ for (vector<Transition>::iterator i=transitions.begin(); i!=transitions.end();) {
+ if (i->type == prevTypeIdx) {
+ // this is not a time transition, probably just name change
+ // e.g. America/Resolute after 2006 in 2010b
+ transitions.erase(i);
+ } else {
+ prevTypeIdx = i->type;
+ i++;
+ }
+ }
+ }
}
- // Replace type list
- types.clear();
- copy(simpleset.begin(), simpleset.end(), back_inserter(types));
}
/**
@@ -1233,6 +1361,17 @@ void ZoneInfo::optimizeTypeList() {
void ZoneInfo::mergeFinalData(const FinalZone& fz) {
int32_t year = fz.year;
int64_t seconds = yearToSeconds(year);
+
+ if (!ICU44PLUS) {
+ if (seconds > HIGHEST_TIME32) {
+ // Avoid transitions beyond signed 32bit max second.
+ // This may result incorrect offset computation around
+ // HIGHEST_TIME32. This is a limitation of ICU
+ // before 4.4.
+ seconds = HIGHEST_TIME32;
+ }
+ }
+
vector<Transition>::iterator it =
find_if(transitions.begin(), transitions.end(),
bind2nd(ptr_fun(isAfter), seconds));
@@ -1292,22 +1431,35 @@ void FinalRule::print(ostream& os) const {
int main(int argc, char *argv[]) {
string rootpath, zonetab, version;
+ bool validArgs = FALSE;
- if (argc != 4) {
- cout << "Usage: tz2icu <dir> <cmap> <vers>" << endl
- << " <dir> path to zoneinfo file tree generated by" << endl
- << " ICU-patched version of zic" << endl
- << " <cmap> country map, from tzdata archive," << endl
- << " typically named \"zone.tab\"" << endl
- << " <vers> version string, such as \"2003e\"" << endl;
- exit(1);
- } else {
+ if (argc == 4 || argc == 5) {
+ validArgs = TRUE;
rootpath = argv[1];
zonetab = argv[2];
version = argv[3];
+ if (argc == 5) {
+ if (strcmp(argv[4], "--old") == 0) {
+ ICU44PLUS = FALSE;
+ TZ_RESOURCE_NAME = ICU_TZ_RESOURCE_OLD;
+ } else {
+ validArgs = FALSE;
+ }
+ }
+ }
+ if (!validArgs) {
+ cout << "Usage: tz2icu <dir> <cmap> <tzver> [--old]" << endl
+ << " <dir> path to zoneinfo file tree generated by" << endl
+ << " ICU-patched version of zic" << endl
+ << " <cmap> country map, from tzdata archive," << endl
+ << " typically named \"zone.tab\"" << endl
+ << " <tzver> version string, such as \"2003e\"" << endl
+ << " --old generating resource format before ICU4.4" << endl;
+ exit(1);
}
cout << "Olson data version: " << version << endl;
+ cout << "ICU 4.4+ format: " << (ICU44PLUS ? "Yes" : "No") << endl;
try {
ifstream finals(ICU_ZONE_FILE);
@@ -1326,70 +1478,6 @@ int main(int argc, char *argv[]) {
return 1;
}
-//############################################################################
-//# Note: We no longer use tz.alias to define alias for legacy ICU time zones.
-//# The contents of tz.alias were migrated into zic source format and
-//# processed by zic as 'Link'.
-//############################################################################
-#if 0
- // Read the legacy alias list and process it. Treat the legacy mappings
- // like links, but also record them in the "legacy" hash.
- try {
- ifstream aliases(ICU_TZ_ALIAS);
- if (!aliases) {
- cerr << "Error: Unable to open " ICU_TZ_ALIAS << endl;
- return 1;
- }
- int32_t n = 0;
- string line;
- while (getline(aliases, line)) {
- string::size_type lb = line.find('#');
- if (lb != string::npos) {
- line.resize(lb); // trim comments
- }
- vector<string> a;
- istringstream is(line);
- copy(istream_iterator<string>(is),istream_iterator<string>(),
- back_inserter(a));
- if (a.size() == 0) continue; // blank line
- if (a.size() != 2) {
- cerr << "Error: Can't parse \"" << line << "\" in "
- ICU_TZ_ALIAS << endl;
- exit(1);
- }
- ++n;
-
- string alias(a[0]), olson(a[1]);
- if (links.find(alias) != links.end()) {
- cerr << "Error: Alias \"" << alias
- << "\" is an Olson zone in "
- ICU_TZ_ALIAS << endl;
- return 1;
- }
- if (reverseLinks.find(alias) != reverseLinks.end()) {
- cerr << "Error: Alias \"" << alias
- << "\" is an Olson link to \"" << reverseLinks[olson]
- << "\" in " << ICU_TZ_ALIAS << endl;
- return 1;
- }
-
- // Record source for error reporting
- if (linkSource.find(olson) == linkSource.end()) {
- linkSource[olson] = "ICU alias";
- }
- assert(linkSource.find(alias) == linkSource.end());
- linkSource[alias] = "ICU alias";
-
- links[olson].insert(alias);
- reverseLinks[alias] = olson;
- }
- cout << "Finished reading " << n
- << " aliases from " ICU_TZ_ALIAS << endl;
- } catch (const exception& error) {
- cerr << "Error: While reading " ICU_TZ_ALIAS ": " << error.what() << endl;
- return 1;
- }
-#endif
try {
// Recursively scan all files below the given path, accumulating
// their data into ZONEINFO. All files must be TZif files. Any
@@ -1570,9 +1658,10 @@ int main(int argc, char *argv[]) {
struct tm* now = localtime(&sec);
int32_t thisYear = now->tm_year + 1900;
+ string filename = TZ_RESOURCE_NAME + ".txt";
// Write out a resource-bundle source file containing data for
// all zones.
- ofstream file(ICU_TZ_RESOURCE ".txt");
+ ofstream file(filename.c_str());
if (file) {
file << "//---------------------------------------------------------" << endl
<< "// Copyright (C) 2003";
@@ -1592,7 +1681,7 @@ int main(int argc, char *argv[]) {
<< "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
<< "//---------------------------------------------------------" << endl
<< endl
- << ICU_TZ_RESOURCE ":table(nofallback) {" << endl
+ << TZ_RESOURCE_NAME << ":table(nofallback) {" << endl
<< " TZVersion { \"" << version << "\" }" << endl
<< " Zones:array { " << endl
<< ZONEINFO // Zones (the actual data)
@@ -1615,35 +1704,47 @@ int main(int argc, char *argv[]) {
}
file << " }" << endl;
- // Emit country (region) map. Emitting the string zone IDs results
- // in a 188 kb binary resource; emitting the zone index numbers
- // trims this to 171 kb. More work for the runtime code, but
- // a smaller data footprint.
- file << " Regions { " << endl;
- int32_t rc = 0;
- for (map<string, set<string> >::const_iterator i=countryMap.begin();
- i != countryMap.end(); ++i) {
- string country = i->first;
- const set<string>& zones(i->second);
- file << " ";
- if(country[0]==0) {
- file << "Default";
+ // Emit country (region) map.
+ if (ICU44PLUS) {
+ file << " Regions:array {" << endl;
+ int32_t zn = 0;
+ for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
+ map<string, string>::iterator cit = reverseCountryMap.find(i->first);
+ if (cit == reverseCountryMap.end()) {
+ file << " \"001\",";
+ } else {
+ file << " \"" << cit->second << "\", ";
+ }
+ file << "//Z#" << zn++ << " " << i->first << endl;
}
- file << country << ":intvector { ";
- bool first = true;
- for (set<string>::const_iterator j=zones.begin();
- j != zones.end(); ++j) {
- if (!first) file << ", ";
- first = false;
- if (zoneIDs.find(*j) == zoneIDs.end()) {
- cerr << "Error: Nonexistent zone in country map: " << *j << endl;
- return 1;
+ file << " }" << endl;
+ } else {
+ file << " Regions { " << endl;
+ int32_t rc = 0;
+ for (map<string, set<string> >::const_iterator i=countryMap.begin();
+ i != countryMap.end(); ++i) {
+ string country = i->first;
+ const set<string>& zones(i->second);
+ file << " ";
+ if(country[0]==0) {
+ file << "Default";
+ }
+ file << country << ":intvector { ";
+ bool first = true;
+ for (set<string>::const_iterator j=zones.begin();
+ j != zones.end(); ++j) {
+ if (!first) file << ", ";
+ first = false;
+ if (zoneIDs.find(*j) == zoneIDs.end()) {
+ cerr << "Error: Nonexistent zone in country map: " << *j << endl;
+ return 1;
+ }
+ file << zoneIDs[*j]; // emit the zone's index number
}
- file << zoneIDs[*j]; // emit the zone's index number
+ file << " } //R#" << rc++ << endl;
}
- file << " } //R#" << rc++ << endl;
+ file << " }" << endl;
}
- file << " }" << endl;
file << "}" << endl;
}
@@ -1651,100 +1752,10 @@ int main(int argc, char *argv[]) {
file.close();
if (file) { // recheck error bit
- cout << "Finished writing " ICU_TZ_RESOURCE ".txt" << endl;
+ cout << "Finished writing " << TZ_RESOURCE_NAME << ".txt" << endl;
} else {
- cerr << "Error: Unable to open/write to " ICU_TZ_RESOURCE ".txt" << endl;
+ cerr << "Error: Unable to open/write to " << TZ_RESOURCE_NAME << ".txt" << endl;
return 1;
}
-
-#define ICU4J_TZ_CLASS "ZoneMetaData"
-
- // Write out a Java source file containing only a few pieces of
- // meta-data missing from the core JDK: the equivalency lists and
- // the country map.
- ofstream java(ICU4J_TZ_CLASS ".java");
- if (java) {
- java << "//---------------------------------------------------------" << endl
- << "// Copyright (C) 2003";
- if (thisYear > 2003) {
- java << "-" << thisYear;
- }
- java << ", International Business Machines" << endl
- << "// Corporation and others. All Rights Reserved." << endl
- << "//---------------------------------------------------------" << endl
- << "// Build tool: tz2icu" << endl
- << "// Build date: " << asctime(now) /* << endl -- asctime emits CR */
- << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl
- << "// Olson version: " << version << endl
- << "// ICU version: " << U_ICU_VERSION << endl
- << "//---------------------------------------------------------" << endl
- << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl
- << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl
- << "//---------------------------------------------------------" << endl
- << endl
- << "package com.ibm.icu.impl;" << endl
- << endl
- << "public final class " ICU4J_TZ_CLASS " {" << endl;
-
- // Emit equivalency lists
- bool first1 = true;
- java << " public static final String VERSION = \"" + version + "\";" << endl;
- java << " public static final String[][] EQUIV = {" << endl;
- for (ZoneMap::const_iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
- if (i->second.isAlias() || i->second.getAliases().size() == 0) {
- continue;
- }
- if (!first1) java << "," << endl;
- first1 = false;
- // The ID of this zone (the canonical zone, to which the
- // aliases point) will be sorted into the list, so it
- // won't be at position 0. If we want to know which is
- // the canonical zone, we should move it to position 0.
- java << " { ";
- bool first2 = true;
- const set<int32_t>& s = i->second.getAliases();
- for (set<int32_t>::const_iterator j=s.begin(); j!=s.end(); ++j) {
- if (!first2) java << ", ";
- java << '"' << zoneIDlist[*j] << '"';
- first2 = false;
- }
- java << " }";
- }
- java << endl
- << " };" << endl;
-
- // Emit country map.
- first1 = true;
- java << " public static final String[][] COUNTRY = {" << endl;
- for (map<string, set<string> >::const_iterator i=countryMap.begin();
- i != countryMap.end(); ++i) {
- if (!first1) java << "," << endl;
- first1 = false;
- string country = i->first;
- const set<string>& zones(i->second);
- java << " { \"" << country << '"';
- for (set<string>::const_iterator j=zones.begin();
- j != zones.end(); ++j) {
- java << ", \"" << *j << '"';
- }
- java << " }";
- }
- java << endl
- << " };" << endl;
-
- java << "}" << endl;
- }
-
- java.close();
-
- if (java) { // recheck error bit
- cout << "Finished writing " ICU4J_TZ_CLASS ".java" << endl;
- } else {
- cerr << "Error: Unable to open/write to " ICU4J_TZ_CLASS ".java" << endl;
- return 1;
- }
-
- return 0;
}
-
//eof
diff --git a/tools/tzcode/tz2icu.h b/tools/tzcode/tz2icu.h
index d3cfa641..a488ea6d 100644
--- a/tools/tzcode/tz2icu.h
+++ b/tools/tzcode/tz2icu.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (c) 2003-2004, International Business Machines
+* Copyright (c) 2003-2010, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
@@ -38,6 +38,7 @@ typedef unsigned char ICUZoneinfoVersion;
* resource name within the file. That is, the output will be to the
* file ICU_TZ_RESOURCE ".txt" and the resource within it will be
* ICU_TZ_RESOURCE. */
-#define ICU_TZ_RESOURCE "zoneinfo"
+#define ICU_TZ_RESOURCE_OLD "zoneinfo"
+#define ICU_TZ_RESOURCE "zoneinfo64"
#endif
diff --git a/tools/tzcode/zic.c b/tools/tzcode/zic.c
index fcc3c823..5dc9db01 100644
--- a/tools/tzcode/zic.c
+++ b/tools/tzcode/zic.c
@@ -2371,22 +2371,11 @@ wp = ecpyalloc(_("no POSIX environment variable for zone"));
* Rule Brazil is impacted by this limitation, because
* the final set of rules are starting in 2038. Although
* this code put the first couple of transitions populated
- * by the final rules, they will be dropped off when
- * collecting transition times. So, we need to keep
- * the start year of the final rule in 2038, not 2039.
- * Fortunately, the Brazil rules in 2038 and beyond use
- * the same base offset/dst saving amount. Thus, even
- * we skip the first couple of transitions, the final
- * rule set for 2038 works properly. So for now,
- * we do not increment the final rule start year only when
- * it falls into year 2038. We need to revisit this code
- * in future to fix the root cause of this problem (ICU
- * resource type limitation - signed int32).
- * Oct 7, 2008 - Yoshito */
- int finalStartYear = (year == 2038) ? year : year + 1;
+ * by the final rules, they might be dropped off when
+ * collecting transition times by tz2icu. */
emit_icu_zone(icuFile,
zpfirst->z_name, zp->z_gmtoff,
- rp, finalRuleIndex, finalStartYear);
+ rp, finalRuleIndex, year + 1);
/* only emit this for the first year */
finalRule1 = NULL;
}