aboutsummaryrefslogtreecommitdiff
path: root/stubdata
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2010-10-28 17:46:38 -0700
committerElliott Hughes <enh@google.com>2010-10-29 10:15:41 -0700
commit89f309f809ddaba6a3ba4a31a8ee74cfa5e85be3 (patch)
tree5ccf6d0e9efa651602e25b164a894c209c8274f0 /stubdata
parentc3d84b2d6cb1f8ae329866aca53255ecdeb01630 (diff)
downloadicu4c-89f309f809ddaba6a3ba4a31a8ee74cfa5e85be3.tar.gz
Better checking for missing data.
This patch warns about files that aren't .res files (which we reference in our .txt files but don't actually compile into the .dat files), and warns about locales for which we have some but not all of the locale data, which is probably a mistake. (It might make more sense if the .txt file just said which locales we want and the script went out and collected all the locale data.) This outputs a *lot* of warnings, but I'll address those separately. The generated files are unchanged. I've also removed support for RBNF data, since we don't use it anywhere and so it would probably be a wasteful mistake to reintroduce the data. Bug: 3139942 Change-Id: Iac2619d1c8d2cc10bc9b87ecc317acb86e410877
Diffstat (limited to 'stubdata')
-rwxr-xr-xstubdata/icu_dat_generator.py251
1 files changed, 118 insertions, 133 deletions
diff --git a/stubdata/icu_dat_generator.py b/stubdata/icu_dat_generator.py
index 158ee81c..11edf5f0 100755
--- a/stubdata/icu_dat_generator.py
+++ b/stubdata/icu_dat_generator.py
@@ -17,15 +17,15 @@
# Generate ICU dat files for locale relevant resources.
#
# Usage:
-# icu_dat_generator.py icu-version [-v] [-h]
+# icu_dat_generator.py [-v] [-h] ICU-VERSION
#
# Sample usage:
-# $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py 4.4 --verbose
+# $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py --verbose 4.4
#
# Add new dat file:
# 1. Add icudtxxl-<datname>.txt to $ANDROID_BUILD_TOP/external/icu4c/stubdata.
-# Check the exemplar file under
-# $ANDROID_BUILD_TOP/external/icu4c/stubdata/icudt42l-us.dat.
+# Check the example file under
+# $ANDROID_BUILD_TOP/external/icu4c/stubdata/icudt42l-us.txt
# 2. Add an entry to main() --> datlist[]
# 3. Run this script to generate dat files.
#
@@ -35,27 +35,18 @@
import getopt
import os.path
+import re
import shutil
import subprocess
import sys
-# Return 0 if the version_string contains non-digit characters.
-def GetIcuVersion(version_string):
- list = version_string.split(".")
- version = ""
- for number in list:
- if (number.isdigit()):
- version += number
- else:
- return -1
- return version
-
-def PrintHelp():
+def PrintHelpAndExit():
print "Usage:"
- print "icu_dat_generator.py icu-version [-v|--verbose] [-h|--help]"
+ print " icu_dat_generator.py [-v|--verbose] [-h|--help] ICU-VERSION"
print "Example:"
- print "$ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py 4.4"
+ print " $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py 4.4"
+ sys.exit(1)
def InvokeIcuTool(tool, working_dir, args):
@@ -65,7 +56,7 @@ def InvokeIcuTool(tool, working_dir, args):
if VERBOSE:
command = "[%s] %s" % (working_dir, " ".join(command_list))
print command
-
+
ret = subprocess.call(command_list, cwd = working_dir)
if ret != 0:
sys.exit(command_list[0:])
@@ -78,28 +69,25 @@ def GetIcuPrebuiltDir():
def ExtractAllResourceToTempDir():
# copy icudtxxl-all.dat to icudtxxl.dat
- source_dat = os.path.join(ANDROID_ROOT, "external", "icu4c", "stubdata",
- ICUDATA + "-all.dat")
- dest_dat = os.path.join(ANDROID_ROOT, "external", "icu4c", "stubdata",
- ICUDATA_DAT)
+ source_dat = os.path.join(ICU4C_DIR, "stubdata", ICUDATA + "-all.dat")
+ dest_dat = os.path.join(ICU4C_DIR, "stubdata", ICUDATA_DAT)
shutil.copyfile(source_dat, dest_dat)
InvokeIcuTool("icupkg", None, [dest_dat, "-x", "*", "-d", TMP_DAT_PATH])
def MakeDat(icu_dat_path, dat_name):
# Get the resource list. e.g. icudt42l-us.txt, icudt42l-default.txt.
- dat_list_file_path = os.path.join(icu_dat_path, ICUDATA + "-" + dat_name +
- ".txt")
+ dat_list_file_path = os.path.join(icu_dat_path, ICUDATA + "-" + dat_name + ".txt")
+ print "------ Processing '%s'..." % (dat_list_file_path)
if not os.path.isfile(dat_list_file_path):
print "%s not present for resource list." % dat_list_file_path
return
GenResIndex(dat_list_file_path)
CopyAndroidCnvFiles(icu_dat_path)
os.chdir(TMP_DAT_PATH)
- # Run command such as "icupkg -tl -s icudt42l -a icudt42l-us.txt
- # new icudt42l.dat"
- InvokeIcuTool("icupkg", None, ["-tl", "-s", TMP_DAT_PATH, "-a", dat_list_file_path, "new",
- ICUDATA_DAT])
+ # Run command such as "icupkg -tl -s icudt42l -a icudt42l-us.txt new icudt42l.dat".
+ args = ["-tl", "-s", TMP_DAT_PATH, "-a", dat_list_file_path, "new", ICUDATA_DAT]
+ InvokeIcuTool("icupkg", None, args)
def WriteIndex(path, list, cldr_version = None):
@@ -119,101 +107,98 @@ def WriteIndex(path, list, cldr_version = None):
f.close()
-def ShowMissing(whats, locales, data_dir_name, dat_file):
- if not len(whats):
- return
- dat_file = os.path.basename(dat_file)
- for missing in locales.difference(whats):
- p = os.path.join(ANDROID_ROOT, "external", "icu4c", "data", data_dir_name,
- missing + ".txt")
- if os.path.exists(p):
- print "warning: %s exists but isn't included in %s" % (p, dat_file)
+def AddResFile(collection, path):
+ end = path.find(".res")
+ if end > 0:
+ collection.add(path[path.find("/")+1:end])
+ else:
+ # TODO: this is a bug, right? we really just wanted to strip the extension,
+ # and don't care whether it was .res or not?
+ print "warning: ignoring '%s'; not a .res file" % (path.rstrip())
+ return
-# Open dat file such as icudt42l-us.txt.
+# Open input file (such as icudt42l-us.txt).
# Go through the list and generate res_index.txt for locales, brkitr,
-# coll and rbnf.
+# coll, et cetera.
def GenResIndex(dat_list_file_path):
res_index = "res_index.txt"
- locales = set()
brkitrs = set()
colls = set()
currs = set()
langs = set()
+ locales = set()
regions = set()
zones = set()
- rbnfs = set()
for line in open(dat_list_file_path, "r"):
- if line.find("root.") >= 0:
- continue
- if line.find("res_index") >= 0:
- continue
- if line.find("_.res") >= 0:
+ if "root." in line or "res_index" in line or "_.res" in line:
continue;
- if line.find("brkitr/") >= 0:
- end = line.find(".res")
- if end > 0:
- brkitrs.add(line[line.find("/")+1:end])
- elif line.find("coll/") >= 0:
- end = line.find(".res")
- if end > 0:
- colls.add(line[line.find("/")+1:end])
- elif line.find("curr/") >= 0:
- end = line.find(".res")
- if end > 0:
- currs.add(line[line.find("/")+1:end])
- elif line.find("lang/") >= 0:
- end = line.find(".res")
- if end > 0:
- langs.add(line[line.find("/")+1:end])
- elif line.find("region/") >= 0:
- end = line.find(".res")
- if end > 0:
- regions.add(line[line.find("/")+1:end])
- elif line.find("zone/") >= 0:
- end = line.find(".res")
- if end > 0:
- zones.add(line[line.find("/")+1:end])
- elif line.find("rbnf/") >= 0:
- end = line.find(".res")
- if end > 0:
- rbnfs.add(line[line.find("/")+1:end])
- elif line.find(".res") >= 0:
+ if "brkitr/" in line:
+ AddResFile(brkitrs, line)
+ elif "coll/" in line:
+ AddResFile(colls, line)
+ elif "curr/" in line:
+ AddResFile(currs, line)
+ elif "lang/" in line:
+ AddResFile(langs, line)
+ elif "region/" in line:
+ AddResFile(regions, line)
+ elif "zone/" in line:
+ AddResFile(zones, line)
+ elif ".res" in line:
# We need to determine the resource is locale resource or misc resource.
# To determine the locale resource, we assume max script length is 3.
end = line.find(".res")
if end <= 3 or (line.find("_") <= 3 and line.find("_") > 0):
locales.add(line[:end])
- ShowMissing(brkitrs, locales, "brkitr", dat_list_file_path)
- ShowMissing(colls, locales, "coll", dat_list_file_path)
- ShowMissing(currs, locales, "curr", dat_list_file_path)
- ShowMissing(langs, locales, "lang", dat_list_file_path)
- ShowMissing(regions, locales, "region", dat_list_file_path)
- ShowMissing(zones, locales, "zone", dat_list_file_path)
- ShowMissing(rbnfs, locales, "rbnf", dat_list_file_path)
+ kind_to_locales = {
+ "brkitr": brkitrs,
+ "coll": colls,
+ "curr": currs,
+ "lang": langs,
+ "locales": locales,
+ "region": regions,
+ "zone": zones
+ }
+
+ # Find every locale we've mentioned, for whatever reason.
+ every_locale = set()
+ for locales in kind_to_locales.itervalues():
+ every_locale = every_locale.union(locales)
+ if VERBOSE:
+ for kind, locales in kind_to_locales.items():
+ print "%s=%s" % (kind, sorted(locales))
+ print "every_locale=" % sorted(every_locale)
+
+ # Find cases where we've included only part of a locale's data.
+ missing_files = []
+ for locale in every_locale:
+ for kind, locales in kind_to_locales.items():
+ p = os.path.join(ICU4C_DIR, "data", kind, locale + ".txt")
+ if not locale in locales and os.path.exists(p):
+ missing_files.append(p)
+
+ # Warn about the missing files.
+ for missing_file in sorted(missing_files):
+ print "warning: %s exists but isn't included in %s" % (missing_file, dat_list_file_path)
+
+ # Write the genrb input files.
WriteIndex(os.path.join(TMP_DAT_PATH, res_index), locales, CLDR_VERSION)
- WriteIndex(os.path.join(TMP_DAT_PATH, "brkitr", res_index), brkitrs)
- WriteIndex(os.path.join(TMP_DAT_PATH, "coll", res_index), colls)
- WriteIndex(os.path.join(TMP_DAT_PATH, "curr", res_index), currs)
- WriteIndex(os.path.join(TMP_DAT_PATH, "lang", res_index), langs)
- WriteIndex(os.path.join(TMP_DAT_PATH, "region", res_index), regions)
- WriteIndex(os.path.join(TMP_DAT_PATH, "zone", res_index), zones)
- WriteIndex(os.path.join(TMP_DAT_PATH, "rbnf", res_index), rbnfs)
+ for kind, locales in kind_to_locales.items():
+ if kind == "locales":
+ continue
+ WriteIndex(os.path.join(TMP_DAT_PATH, kind, res_index), locales)
# Call genrb to generate new res_index.res.
InvokeIcuTool("genrb", TMP_DAT_PATH, [res_index])
- InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, "brkitr"), [res_index])
- InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, "coll"), [res_index])
- InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, "curr"), [res_index])
- InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, "lang"), [res_index])
- InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, "region"), [res_index])
- InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, "zone"), [res_index])
- if len(rbnfs):
- InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, "rbnf"), [res_index])
+ for kind, locales in kind_to_locales.items():
+ if kind == "locales":
+ continue
+ InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, kind), [res_index])
def CopyAndroidCnvFiles(icu_dat_path):
@@ -233,59 +218,58 @@ def CopyAndroidCnvFiles(icu_dat_path):
def main():
- global ANDROID_ROOT # Android project home directory
+ global ANDROID_BUILD_TOP # $ANDROID_BUILD_TOP
+ global ICU4C_DIR # $ANDROID_BUILD_TOP/external/icu4c
global ICU_VERSION # ICU version number
global ICUDATA # e.g. "icudt42l"
global ICUDATA_DAT # e.g. "icudt42l.dat"
global CLDR_VERSION # CLDR version. The value can be vary upon ICU release.
global TMP_DAT_PATH # temp directory to store all resource files and
# intermediate dat files.
- global HELP
global VERBOSE
- argc = len(sys.argv)
- if argc < 2:
- print "You must provide icu version number."
- print "Example: ./icu_dat_generator.py 4.4"
- sys.exit(1)
- ICU_VERSION = sys.argv[1]
- version = GetIcuVersion(ICU_VERSION)
- if (version == -1):
- print sys.argv[1] + " is not a valid icu version number!"
- sys.exit(1)
- ICUDATA = "icudt" + version + "l"
- CLDR_VERSION = "1.8"
- ANDROID_ROOT = os.environ.get("ANDROID_BUILD_TOP")
- if not ANDROID_ROOT:
- print "$ANDROID_BUILD_TOP not set! Run 'env_setup.sh'."
- sys.exit(1)
- ICUDATA_DAT = ICUDATA + ".dat"
- HELP = False
VERBOSE = False
+ show_help = False
try:
- opts, args = getopt.getopt(sys.argv[2:], 'hv', ['help', 'verbose'])
+ opts, args = getopt.getopt(sys.argv[1:], 'hv', ['help', 'verbose'])
except getopt.error:
- print "Invalid option"
- PrintHelp()
- sys.exit(1)
+ PrintHelpAndExit()
for opt, arg in opts:
if opt in ('-h', '--help'):
- PrintHelp()
- sys.exit(1)
+ show_help = True
elif opt in ('-v', '--verbose'):
VERBOSE = True
+ if len(args) < 1:
+ show_help = True
+ if show_help:
+ PrintHelpAndExit()
- # Check for requiered source files.
- icu_dat_path = os.path.join(ANDROID_ROOT, "external", "icu4c", "stubdata")
- full_data_filename = os.path.join(icu_dat_path, ICUDATA + "-all.dat")
+ # TODO: is there any advantage to requiring this as an argument? couldn't we just glob it from
+ # the file system, looking for "icudt\d+l.*\.txt"?
+ ICU_VERSION = args[0]
+ if re.search(r'[0-9]+\.[0-9]+', ICU_VERSION) == None:
+ print "'%s' is not a valid icu version number!" % (ICU_VERSION)
+ sys.exit(1)
+ ICUDATA = "icudt" + re.sub(r'([^0-9])', "", ICU_VERSION) + "l"
+ CLDR_VERSION = "1.8"
+ ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP")
+ if not ANDROID_BUILD_TOP:
+ print "$ANDROID_BUILD_TOP not set! Run 'env_setup.sh'."
+ sys.exit(1)
+ ICU4C_DIR = os.path.join(ANDROID_BUILD_TOP, "external", "icu4c")
+
+ ICUDATA_DAT = ICUDATA + ".dat"
+ # Check for required source files.
+ stubdata_dir = os.path.join(ICU4C_DIR, "stubdata")
+ full_data_filename = os.path.join(stubdata_dir, ICUDATA + "-all.dat")
if not os.path.isfile(full_data_filename):
print "%s not present." % full_data_filename
sys.exit(1)
# Create a temporary working directory.
- TMP_DAT_PATH = os.path.join(ANDROID_ROOT, "external", "icu4c", "tmp")
+ TMP_DAT_PATH = os.path.join(ICU4C_DIR, "tmp")
if os.path.exists(TMP_DAT_PATH):
shutil.rmtree(TMP_DAT_PATH)
os.mkdir(TMP_DAT_PATH)
@@ -293,17 +277,18 @@ def main():
# Extract resource files from icudtxxl-all.dat to TMP_DAT_PATH.
ExtractAllResourceToTempDir()
+ # TODO: is there any advantage to hard-coding this? couldn't we just glob it from the file system?
datlist = ["us", "us-euro", "default", "us-japan", "zh", "medium", "large"]
for dat_subtag in datlist:
- MakeDat(icu_dat_path, dat_subtag)
+ output_filename = os.path.join(stubdata_dir, ICUDATA + "-" + dat_subtag + ".dat")
+ MakeDat(stubdata_dir, dat_subtag)
# Copy icudtxxl.dat to stubdata directory with corresponding subtag.
- shutil.copyfile(os.path.join(TMP_DAT_PATH, ICUDATA_DAT),
- os.path.join(icu_dat_path, ICUDATA + "-" + dat_subtag + ".dat"))
- print "Generate ICU data:" + os.path.join(icu_dat_path, ICUDATA + "-" + dat_subtag + ".dat")
+ shutil.copyfile(os.path.join(TMP_DAT_PATH, ICUDATA_DAT), output_filename)
+ print "Generated ICU data: %s" % (output_filename)
# Cleanup temporary working directory and icudtxxl.dat
shutil.rmtree(TMP_DAT_PATH)
- os.remove(os.path.join(icu_dat_path, ICUDATA_DAT))
+ os.remove(os.path.join(stubdata_dir, ICUDATA_DAT))
if __name__ == "__main__":
main()