summaryrefslogtreecommitdiff
path: root/tools/updatecldrdata.py
blob: 6697d7273b2900685adf121b89929be9aa29725d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/python -B

"""Regenerates (just) ICU data source files used to build ICU data files."""

from __future__ import print_function

import os
import shutil
import subprocess
import sys

import i18nutil
import icuutil


# Run with no arguments from any directory, with no special setup required.
# See icu4c/source/data/cldr-icu-readme.txt for the upstream ICU instructions.
def main():
  if subprocess.call(["which", "mvn"]) != 0 or subprocess.call(["which", "ant"]) != 0:
    print("Can't find the required tools. Run `sudo apt-get install maven ant` to install")
    exit(1)

  cldr_dir = icuutil.cldrDir()
  print('Found cldr in %s ...' % cldr_dir)
  icu_dir = icuutil.icuDir()
  print('Found icu in %s ...' % icu_dir)

  # Ant doesn't have any mechanism for using a build directory separate from the
  # source directory so this build script creates a temporary directory and then
  # copies all necessary ICU4J and CLDR source code to here before building it:
  i18nutil.SwitchToNewTemporaryDirectory()
  build_dir = os.getcwd()
  cldr_build_dir = os.path.join(build_dir, 'cldr')
  icu4c_build_dir = os.path.join(build_dir, 'icu4c')
  icu4j_build_dir = os.path.join(build_dir, 'icu4j')
  icu_tools_build_dir = os.path.join(build_dir, 'icu_tools')

  print('Copying CLDR source code ...')
  shutil.copytree(cldr_dir, cldr_build_dir, symlinks=True)
  print('Copying ICU4C source code ...')
  shutil.copytree(os.path.join(icu_dir, 'icu4c'), icu4c_build_dir, symlinks=True)
  print('Copying ICU4J source code ...')
  shutil.copytree(os.path.join(icu_dir, 'icu4j'), icu4j_build_dir, symlinks=True)
  print('Copying ICU tools source code ...')
  shutil.copytree(os.path.join(icu_dir, 'tools'), icu_tools_build_dir, symlinks=True)

  # Setup environment variables for all subshell
  os.environ['ANT_OPTS'] = '-Xmx4096m'

  # This is the location of the original CLDR source tree (not the temporary
  # copy of the tools source code) from where the data files are to be read:
  os.environ['CLDR_DIR'] = cldr_build_dir  # os.path.join(os.getcwd(), 'cldr')

  os.environ['ICU4C_ROOT'] = icu4c_build_dir
  os.environ['ICU4J_ROOT'] = icu4j_build_dir
  os.environ['TOOLS_ROOT'] = icu_tools_build_dir
  cldr_tmp_dir = os.path.join(build_dir, 'cldr-staging')
  os.environ['CLDR_TMP_DIR'] = cldr_tmp_dir

  icu_tools_cldr_dir = os.path.join(icu_tools_build_dir, 'cldr')
  print('Installing CLDR tools ...')
  os.chdir(icu_tools_cldr_dir)
  subprocess.check_call(['ant', 'install-cldr-libs'])

  print('Building ICU data...')
  icu4c_data_build_dir = os.path.join(icu4c_build_dir, 'source/data')
  os.chdir(icu4c_data_build_dir)
  subprocess.check_call(['ant', 'cleanprod'])
  subprocess.check_call(['ant', 'setup'])
  subprocess.check_call(['ant', 'proddata'])

  # Finally we "compile" CLDR-data to a "production" form and place it in ICU
  os.chdir(os.path.join(icu_tools_build_dir, 'cldr', 'cldr-to-icu'))
  cldr_production_tmp_dir = os.path.join(cldr_tmp_dir, 'production')
  subprocess.check_call([
    'ant',
    '-f',
    'build-icu-data.xml',
    '-DcldrDataDir=' + cldr_production_tmp_dir,
    '-DincludePseudoLocales=true'
  ])

  os.chdir(icu_tools_cldr_dir)
  subprocess.check_call([
    'ant',
    'copy-cldr-testdata',
  ])

  # Copy the generated data files from the temporary directory into AOSP.
  icu4c_data_source_dir = os.path.join(icu_dir, 'icu4c/source/data')
  rmAndCopyTree(icu4c_data_build_dir, icu4c_data_source_dir)
  rmAndCopyTree(
    os.path.join(icu4c_build_dir, 'source/test/testdata/cldr'),
    os.path.join(icu_dir, 'icu4c/source/test/testdata/cldr'))
  rmAndCopyTree(
    os.path.join(icu4j_build_dir, 'main/tests/core/src/com/ibm/icu/dev/data/cldr'),
    os.path.join(icu_dir, 'icu4j/main/tests/core/src/com/ibm/icu/dev/data/cldr'))

  localeCanonicalization_src = os.path.join(
    cldr_production_tmp_dir, 'common/testData/localeIdentifiers/localeCanonicalization.txt')
  shutil.copy(localeCanonicalization_src, os.path.join(
    icu_dir, 'icu4c/source/test/testdata/localeCanonicalization.txt'))
  shutil.copy(localeCanonicalization_src, os.path.join(
    icu_dir, 'icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt'))

  # Generate icu4c/source/data/misc/langInfo.txt by a ICU4J tool
  langInfo_dst_path = os.path.join(icu4c_data_source_dir, 'misc', 'langInfo.txt')
  print('Building %s' % langInfo_dst_path)
  langInfo_out_path = '/tmp/langInfo.txt'  # path hardcoded in the LocaleDistanceBuilder tool
  if os.path.exists(langInfo_out_path):
    os.remove(langInfo_out_path)

  os.chdir(icu4j_build_dir)
  subprocess.check_call(['ant', 'icu4jJar'])
  os.chdir(os.path.join(icu4j_build_dir, 'tools', 'misc'))
  subprocess.check_call(['ant', 'jar'])
  subprocess.check_call([
    'java',
    '-cp',
    'out/lib/icu4j-tools.jar:../../icu4j.jar',
    'com.ibm.icu.dev.tool.locale.LocaleDistanceBuilder',
  ])
  print('Copying {src} to {dst}'.format(src=langInfo_out_path, dst=langInfo_dst_path))
  shutil.copyfile(langInfo_out_path, langInfo_dst_path)

  print('Look in %s for new data source files' % icu4c_data_source_dir)
  sys.exit(0)


def rmAndCopyTree(src, dst):
  if os.path.exists(dst):
    shutil.rmtree(dst)
  shutil.copytree(src, dst)


if __name__ == '__main__':
  main()