aboutsummaryrefslogtreecommitdiff
path: root/tools/generate_constants_json.py
blob: 3fe8684897ab452d767558593f0a9299780f16c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (C) 2019 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper tool to generate cross-compiled syscall and constant tables to JSON.

This script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and
generates the `constants.json` file with that. LLVM IR files are moderately
architecture-neutral (at least for this case).
"""

import argparse
import collections
import json
import re
import sys

_STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*')
_TABLE_ENTRY_RE = re.compile(
    r'%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}')
# This looks something like
#
#  i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5
#
# For arm-v7a. What we are interested in are the @.str.x and the very last
# number.
_TABLE_ENTRY_CONTENTS = re.compile(r'.*?(null|@[a-zA-Z0-9.]+).* (-?\d+)')

# When testing clang-r458909, we found a new constant_entry pattern:
#   %struct.constant_entry { ptr @.str.894, i32 ptrtoint (ptr @.str.895 to i32) },
# For the same constant, current clang-r458507 generates:
#   %struct.constant_entry { i8* getelementptr inbounds
#    ([19 x i8], [19 x i8]* @.str.894, i32 0, i32 0),
#    i32 ptrtoint ([9 x i8]* @.str.895 to i32) },
# This is for a char* constant defined in linux-x86/libconstants.gen.c:
#   { "FS_KEY_DESC_PREFIX", (unsigned long) FS_KEY_DESC_PREFIX },
# and FS_KEY_DESC_PREFIX is defined as a char* "fscrypt:"
# Current output for that constant in constants.json is:
#   "FS_KEY_DESC_PREFIX": 0,
# but that value does not seem to be useful or accurate.
# So here we define a pattern to ignore such pointer constants:
_IGNORED_ENTRY_CONTENTS = re.compile(r'.*? ptrto.* \(.*\)')

ParseResults = collections.namedtuple('ParseResults', ['table_name',
                                                       'table_entries'])

HELP_EPILOG = """Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c
"""


def parse_llvm_ir(ir):
    """Parses a single LLVM IR file."""
    string_constants = collections.OrderedDict()
    table_entries = collections.OrderedDict()
    table_name = ''
    for line in ir:
        string_constant_match = _STRING_CONSTANT_RE.match(line)
        if string_constant_match:
            string_constants[string_constant_match.group(
                1)] = string_constant_match.group(2)
            continue

        if '@syscall_table' in line or '@constant_table' in line:
            if '@syscall_table' in line:
                table_name = 'syscalls'
            else:
                table_name = 'constants'
            for entry in _TABLE_ENTRY_RE.findall(line):
                groups = _TABLE_ENTRY_CONTENTS.match(entry)
                if not groups:
                    if _IGNORED_ENTRY_CONTENTS.match(entry):
                        continue
                    raise ValueError('Failed to parse table entry %r' % entry)
                name, value = groups.groups()
                if name == 'null':
                    # This is the end-of-table marker.
                    break
                table_entries[string_constants[name]] = int(value)

    return ParseResults(table_name=table_name, table_entries=table_entries)


def main(argv=None):
    """Main entrypoint."""

    if argv is None:
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG)
    parser.add_argument('--output',
                        help='The path of the generated constants.json file.',
                        type=argparse.FileType('w'),
                        required=True)
    parser.add_argument(
        'llvm_ir_files',
        help='An LLVM IR file with one of the {constants,syscall} table.',
        metavar='llvm_ir_file',
        nargs='+',
        type=argparse.FileType('r'))
    opts = parser.parse_args(argv)

    constants_json = {}
    for ir in opts.llvm_ir_files:
        parse_results = parse_llvm_ir(ir)
        constants_json[parse_results.table_name] = parse_results.table_entries

    # Populate the top-level fields.
    constants_json['arch_nr'] = constants_json['constants']['MINIJAIL_ARCH_NR']
    constants_json['bits'] = constants_json['constants']['MINIJAIL_ARCH_BITS']

    # It is a bit more complicated to generate the arch_name, since the
    # constants can only output numeric values. Use a hardcoded mapping instead.
    if constants_json['arch_nr'] == 0xC000003E:
        constants_json['arch_name'] = 'x86_64'
    elif constants_json['arch_nr'] == 0x40000003:
        constants_json['arch_name'] = 'x86'
    elif constants_json['arch_nr'] == 0xC00000B7:
        constants_json['arch_name'] = 'arm64'
    elif constants_json['arch_nr'] == 0x40000028:
        constants_json['arch_name'] = 'arm'
    else:
        raise ValueError('Unknown architecture: 0x%08X' %
                         constants_json['arch_nr'])

    json.dump(constants_json, opts.output, indent='  ')
    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))