diff options
author | Elliott Hughes <enh@google.com> | 2010-10-29 11:44:15 -0700 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2010-10-29 12:05:59 -0700 |
commit | 7173c2a35d19fbdcad4e894b17842fdc2b6dfa81 (patch) | |
tree | 8953ed8e75a9bd198f16aa02fa4548d3b1274f6c | |
parent | 89f309f809ddaba6a3ba4a31a8ee74cfa5e85be3 (diff) | |
download | icu4c-7173c2a35d19fbdcad4e894b17842fdc2b6dfa81.tar.gz |
More icu_dat_generator.py improvements.
Find the input files and work out the ICU version for ourselves, rather than
having a hard-coded list for the former and requiring the user to supply the
latter.
Also fix most lint warnings.
Bug: 3139942
Change-Id: Ibbf2d6b74765fb9019dea524d40275643fd08d6a
-rwxr-xr-x | stubdata/icu_dat_generator.py | 175 | ||||
-rw-r--r-- | stubdata/readme.txt | 17 |
2 files changed, 91 insertions, 101 deletions
diff --git a/stubdata/icu_dat_generator.py b/stubdata/icu_dat_generator.py index 11edf5f0..27a0ce4a 100755 --- a/stubdata/icu_dat_generator.py +++ b/stubdata/icu_dat_generator.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python # # Copyright (C) 2010 The Android Open Source Project # @@ -17,23 +17,24 @@ # Generate ICU dat files for locale relevant resources. # # Usage: -# icu_dat_generator.py [-v] [-h] ICU-VERSION +# icu_dat_generator.py [-v] [-h] # # Sample usage: -# $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py --verbose 4.4 +# $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py --verbose # # Add new dat file: -# 1. Add icudtxxl-<datname>.txt to $ANDROID_BUILD_TOP/external/icu4c/stubdata. -# Check the example file under -# $ANDROID_BUILD_TOP/external/icu4c/stubdata/icudt42l-us.txt -# 2. Add an entry to main() --> datlist[] -# 3. Run this script to generate dat files. +# 1. Add icudtxxl-<datname>.txt to $ANDROID_BUILD_TOP/external/icu4c/stubdata. +# Check the example file under +# $ANDROID_BUILD_TOP/external/icu4c/stubdata/icudt42l-us.txt +# 2. Add an entry to main() --> datlist[] +# 3. Run this script to generate dat files. # # For ICU upgrade # We cannot get CLDR version from dat file unless calling ICU function. # If there is a CLDR version change, please modify "global CLDR_VERSION". import getopt +import glob import os.path import re import shutil @@ -43,55 +44,46 @@ import sys def PrintHelpAndExit(): print "Usage:" - print " icu_dat_generator.py [-v|--verbose] [-h|--help] ICU-VERSION" + print " icu_dat_generator.py [-v|--verbose] [-h|--help]" print "Example:" - print " $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py 4.4" + print " $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py" sys.exit(1) def InvokeIcuTool(tool, working_dir, args): - command_list = [os.path.join(GetIcuPrebuiltDir(), tool)] + command_list = [os.path.join(ICU_PREBUILT_DIR, tool)] command_list.extend(args) if VERBOSE: command = "[%s] %s" % (working_dir, " ".join(command_list)) print command - ret = subprocess.call(command_list, cwd = working_dir) + ret = subprocess.call(command_list, cwd=working_dir) if ret != 0: sys.exit(command_list[0:]) -def GetIcuPrebuiltDir(): - return os.path.join(os.environ.get("ANDROID_EABI_TOOLCHAIN"), "..", "..", - "..", "icu-" + ICU_VERSION) - - def ExtractAllResourceToTempDir(): # copy icudtxxl-all.dat to icudtxxl.dat - source_dat = os.path.join(ICU4C_DIR, "stubdata", ICUDATA + "-all.dat") - dest_dat = os.path.join(ICU4C_DIR, "stubdata", ICUDATA_DAT) - shutil.copyfile(source_dat, dest_dat) - InvokeIcuTool("icupkg", None, [dest_dat, "-x", "*", "-d", TMP_DAT_PATH]) - - -def MakeDat(icu_dat_path, dat_name): - # Get the resource list. e.g. icudt42l-us.txt, icudt42l-default.txt. - dat_list_file_path = os.path.join(icu_dat_path, ICUDATA + "-" + dat_name + ".txt") - print "------ Processing '%s'..." % (dat_list_file_path) - if not os.path.isfile(dat_list_file_path): - print "%s not present for resource list." % dat_list_file_path - return - GenResIndex(dat_list_file_path) - CopyAndroidCnvFiles(icu_dat_path) - os.chdir(TMP_DAT_PATH) - # Run command such as "icupkg -tl -s icudt42l -a icudt42l-us.txt new icudt42l.dat". - args = ["-tl", "-s", TMP_DAT_PATH, "-a", dat_list_file_path, "new", ICUDATA_DAT] - InvokeIcuTool("icupkg", None, args) - - -def WriteIndex(path, list, cldr_version = None): - res_index = "res_index.txt" + src_dat = os.path.join(ICU4C_DIR, "stubdata", ICUDATA + "-all.dat") + dst_dat = os.path.join(ICU4C_DIR, "stubdata", ICUDATA + ".dat") + shutil.copyfile(src_dat, dst_dat) + InvokeIcuTool("icupkg", None, [dst_dat, "-x", "*", "-d", TMP_DAT_PATH]) + + +def MakeDat(input_file, stubdata_dir): + print "------ Processing '%s'..." % (input_file) + if not os.path.isfile(input_file): + print "%s not a file!" % input_file + sys.exit(1) + GenResIndex(input_file) + CopyAndroidCnvFiles(stubdata_dir) + # Run "icupkg -tl -s icudt42l -a icudt42l-us.txt new icudt42l.dat". + args = ["-tl", "-s", TMP_DAT_PATH, "-a", input_file, "new", ICUDATA + ".dat"] + InvokeIcuTool("icupkg", TMP_DAT_PATH, args) + + +def WriteIndex(path, locales, cldr_version=None): empty_value = " {\"\"}\n" # key-value pair for all locale entries f = open(path, "w") @@ -99,8 +91,8 @@ def WriteIndex(path, list, cldr_version = None): if cldr_version: f.write(" CLDRVersion { %s }\n" % cldr_version) f.write(" InstalledLocales {\n") - for item in list: - f.write(item + empty_value) + for locale in locales: + f.write(locale + empty_value) f.write(" }\n") f.write("}\n") @@ -112,16 +104,16 @@ def AddResFile(collection, path): if end > 0: collection.add(path[path.find("/")+1:end]) else: - # TODO: this is a bug, right? we really just wanted to strip the extension, - # and don't care whether it was .res or not? - print "warning: ignoring '%s'; not a .res file" % (path.rstrip()) + # TODO(enh): this is a bug, right? we really just wanted to strip the + # extension, and don't care whether it was .res or not? + print "warning: ignoring non-.res file %s" % path.rstrip() return # Open input file (such as icudt42l-us.txt). # Go through the list and generate res_index.txt for locales, brkitr, # coll, et cetera. -def GenResIndex(dat_list_file_path): +def GenResIndex(input_file): res_index = "res_index.txt" brkitrs = set() @@ -132,9 +124,9 @@ def GenResIndex(dat_list_file_path): regions = set() zones = set() - for line in open(dat_list_file_path, "r"): + for line in open(input_file, "r"): if "root." in line or "res_index" in line or "_.res" in line: - continue; + continue if "brkitr/" in line: AddResFile(brkitrs, line) elif "coll/" in line: @@ -155,13 +147,13 @@ def GenResIndex(dat_list_file_path): locales.add(line[:end]) kind_to_locales = { - "brkitr": brkitrs, - "coll": colls, - "curr": currs, - "lang": langs, - "locales": locales, - "region": regions, - "zone": zones + "brkitr": brkitrs, + "coll": colls, + "curr": currs, + "lang": langs, + "locales": locales, + "region": regions, + "zone": zones } # Find every locale we've mentioned, for whatever reason. @@ -184,7 +176,8 @@ def GenResIndex(dat_list_file_path): # Warn about the missing files. for missing_file in sorted(missing_files): - print "warning: %s exists but isn't included in %s" % (missing_file, dat_list_file_path) + relative_path = "/".join(missing_file.split("/")[-2:]) + print "warning: missing data for supported locale: %s" % relative_path # Write the genrb input files. WriteIndex(os.path.join(TMP_DAT_PATH, res_index), locales, CLDR_VERSION) @@ -201,7 +194,7 @@ def GenResIndex(dat_list_file_path): InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, kind), [res_index]) -def CopyAndroidCnvFiles(icu_dat_path): +def CopyAndroidCnvFiles(stubdata_dir): android_specific_cnv = ["gsm-03.38-2000.cnv", "iso-8859_16-2001.cnv", "docomo-shift_jis-2007.cnv", @@ -210,59 +203,63 @@ def CopyAndroidCnvFiles(icu_dat_path): "softbank-jisx-208-2007.cnv", "softbank-shift_jis-2007.cnv"] for cnv_file in android_specific_cnv: - source_path = os.path.join(icu_dat_path, "cnv", cnv_file) - dest_path = os.path.join(TMP_DAT_PATH, cnv_file) - shutil.copyfile(source_path, dest_path) + src_path = os.path.join(stubdata_dir, "cnv", cnv_file) + dst_path = os.path.join(TMP_DAT_PATH, cnv_file) + shutil.copyfile(src_path, dst_path) if VERBOSE: - print "copy " + source_path + " " + dest_path + print "copy " + src_path + " " + dst_path def main(): global ANDROID_BUILD_TOP # $ANDROID_BUILD_TOP - global ICU4C_DIR # $ANDROID_BUILD_TOP/external/icu4c - global ICU_VERSION # ICU version number + global ICU4C_DIR # $ANDROID_BUILD_TOP/external/icu4c + global ICU_PREBUILT_DIR # Directory containing pre-built ICU tools. global ICUDATA # e.g. "icudt42l" - global ICUDATA_DAT # e.g. "icudt42l.dat" - global CLDR_VERSION # CLDR version. The value can be vary upon ICU release. + global CLDR_VERSION # CLDR version. The value varies between ICU releases. global TMP_DAT_PATH # temp directory to store all resource files and # intermediate dat files. global VERBOSE + CLDR_VERSION = "1.8" VERBOSE = False show_help = False try: - opts, args = getopt.getopt(sys.argv[1:], 'hv', ['help', 'verbose']) + opts, args = getopt.getopt(sys.argv[1:], "hv", ["help", "verbose"]) except getopt.error: PrintHelpAndExit() - for opt, arg in opts: - if opt in ('-h', '--help'): + for opt, _ in opts: + if opt in ("-h", "--help"): show_help = True - elif opt in ('-v', '--verbose'): + elif opt in ("-v", "--verbose"): VERBOSE = True - if len(args) < 1: + if args: show_help = True if show_help: PrintHelpAndExit() - # TODO: is there any advantage to requiring this as an argument? couldn't we just glob it from - # the file system, looking for "icudt\d+l.*\.txt"? - ICU_VERSION = args[0] - if re.search(r'[0-9]+\.[0-9]+', ICU_VERSION) == None: - print "'%s' is not a valid icu version number!" % (ICU_VERSION) - sys.exit(1) - ICUDATA = "icudt" + re.sub(r'([^0-9])', "", ICU_VERSION) + "l" - CLDR_VERSION = "1.8" ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP") if not ANDROID_BUILD_TOP: print "$ANDROID_BUILD_TOP not set! Run 'env_setup.sh'." sys.exit(1) ICU4C_DIR = os.path.join(ANDROID_BUILD_TOP, "external", "icu4c") - - ICUDATA_DAT = ICUDATA + ".dat" - # Check for required source files. stubdata_dir = os.path.join(ICU4C_DIR, "stubdata") + + # Find all the input files. + input_files = glob.glob(os.path.join(stubdata_dir, "icudt[0-9][0-9]l-*.txt")) + + # Work out the ICU version from the input filenames, so we can find the + # appropriate pre-built ICU tools. + icu_version = re.sub(r"([^0-9])", "", os.path.basename(input_files[0])) + ICU_PREBUILT_DIR = os.path.join(os.environ.get("ANDROID_EABI_TOOLCHAIN"), + "..", "..", "..", "icu-%s.%s" % (icu_version[0], icu_version[1])) + if not os.path.exists(ICU_PREBUILT_DIR): + print "%s does not exist!" % ICU_PREBUILT_DIR + + ICUDATA = "icudt" + icu_version + "l" + + # Check that -all.dat exists (since we build the other .dat files from that). full_data_filename = os.path.join(stubdata_dir, ICUDATA + "-all.dat") if not os.path.isfile(full_data_filename): print "%s not present." % full_data_filename @@ -277,18 +274,16 @@ def main(): # Extract resource files from icudtxxl-all.dat to TMP_DAT_PATH. ExtractAllResourceToTempDir() - # TODO: is there any advantage to hard-coding this? couldn't we just glob it from the file system? - datlist = ["us", "us-euro", "default", "us-japan", "zh", "medium", "large"] - for dat_subtag in datlist: - output_filename = os.path.join(stubdata_dir, ICUDATA + "-" + dat_subtag + ".dat") - MakeDat(stubdata_dir, dat_subtag) - # Copy icudtxxl.dat to stubdata directory with corresponding subtag. - shutil.copyfile(os.path.join(TMP_DAT_PATH, ICUDATA_DAT), output_filename) - print "Generated ICU data: %s" % (output_filename) + # Process each input file in turn. + for input_file in sorted(input_files): + output_file = input_file[:-3] + "dat" + MakeDat(input_file, stubdata_dir) + shutil.copyfile(os.path.join(TMP_DAT_PATH, ICUDATA + ".dat"), output_file) + print "Generated ICU data: %s" % output_file # Cleanup temporary working directory and icudtxxl.dat shutil.rmtree(TMP_DAT_PATH) - os.remove(os.path.join(stubdata_dir, ICUDATA_DAT)) + os.remove(os.path.join(stubdata_dir, ICUDATA + ".dat")) if __name__ == "__main__": main() diff --git a/stubdata/readme.txt b/stubdata/readme.txt index 2b6c259b..5d51bbe8 100644 --- a/stubdata/readme.txt +++ b/stubdata/readme.txt @@ -2,25 +2,20 @@ This directory is used for building our Android ICU data file. 1. To generate ICU data files:run the icu_dat_generator.py script. The command is: - ./icu_dat_generator.py <icu version> [-v] [-h] - For example: - ./icu_dat_generator.py 4.4 + ./icu_dat_generator.py [-v] [-h] 2. To add a resource to Android ICU data file: insert an entry to icudtxx-<tag name>.txt under external/icu4c/stubdata directory then run the icu_dat_generator.py. For example, to add French sort to icudt44l-us.dat, you need to a. Add an entry, "coll/fr.res", into external/icu4c/stubdata/icudt44l-us.txt - b. run "./icu_dat_generator.py 4.4". + b. run "./icu_dat_generator.py". -3. To add a new Adnroid ICU data file: add the <tag name> to datlist[] in - icu_dat_generator.py and add corresponding resource list to - icudtxxl<tag name>.txt. Then run the script icu_dat_generator.py to generate - dat files. +3. To add a new Android ICU data file: create the .txt file and run the script + to generate the .dat file. For example, to add icudt44l-latin.dat, you need to - a. Modify icu_dat_generator.py by adding "latin" into datlist. - b. Make a new file icudt44l-latin.txt to include the resource list. - c. run "./icu_dat_generator.py 4.4". + a. Make a new file icudt44l-latin.txt to include the resource list. + b. run "./icu_dat_generator.py". 4. Add a new resource or modify existing ICU resource definition: Note: This is a rare case. You should talk to ICU team first if it is a bug |