aboutsummaryrefslogtreecommitdiff
path: root/asn1crypto/parser.py
blob: 0a19135e2b9a9b201c664354c1ec50d201b535aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# coding: utf-8

"""
Functions for parsing and dumping using the ASN.1 DER encoding. Exports the
following items:

 - emit()
 - parse()
 - peek()

Other type classes are defined that help compose the types listed above.
"""

from __future__ import unicode_literals, division, absolute_import, print_function

import sys

from ._types import byte_cls, chr_cls, type_name
from .util import int_from_bytes, int_to_bytes

_PY2 = sys.version_info <= (3,)
_INSUFFICIENT_DATA_MESSAGE = 'Insufficient data - %s bytes requested but only %s available'


def emit(class_, method, tag, contents):
    """
    Constructs a byte string of an ASN.1 DER-encoded value

    This is typically not useful. Instead, use one of the standard classes from
    asn1crypto.core, or construct a new class with specific fields, and call the
    .dump() method.

    :param class_:
        An integer ASN.1 class value: 0 (universal), 1 (application),
        2 (context), 3 (private)

    :param method:
        An integer ASN.1 method value: 0 (primitive), 1 (constructed)

    :param tag:
        An integer ASN.1 tag value

    :param contents:
        A byte string of the encoded byte contents

    :return:
        A byte string of the ASN.1 DER value (header and contents)
    """

    if not isinstance(class_, int):
        raise TypeError('class_ must be an integer, not %s' % type_name(class_))

    if class_ < 0 or class_ > 3:
        raise ValueError('class_ must be one of 0, 1, 2 or 3, not %s' % class_)

    if not isinstance(method, int):
        raise TypeError('method must be an integer, not %s' % type_name(method))

    if method < 0 or method > 1:
        raise ValueError('method must be 0 or 1, not %s' % method)

    if not isinstance(tag, int):
        raise TypeError('tag must be an integer, not %s' % type_name(tag))

    if tag < 0:
        raise ValueError('tag must be greater than zero, not %s' % tag)

    if not isinstance(contents, byte_cls):
        raise TypeError('contents must be a byte string, not %s' % type_name(contents))

    return _dump_header(class_, method, tag, contents) + contents


def parse(contents, strict=False):
    """
    Parses a byte string of ASN.1 BER/DER-encoded data.

    This is typically not useful. Instead, use one of the standard classes from
    asn1crypto.core, or construct a new class with specific fields, and call the
    .load() class method.

    :param contents:
        A byte string of BER/DER-encoded data

    :param strict:
        A boolean indicating if trailing data should be forbidden - if so, a
        ValueError will be raised when trailing data exists

    :raises:
        ValueError - when the contents do not contain an ASN.1 header or are truncated in some way
        TypeError - when contents is not a byte string

    :return:
        A 6-element tuple:
         - 0: integer class (0 to 3)
         - 1: integer method
         - 2: integer tag
         - 3: byte string header
         - 4: byte string content
         - 5: byte string trailer
    """

    if not isinstance(contents, byte_cls):
        raise TypeError('contents must be a byte string, not %s' % type_name(contents))

    contents_len = len(contents)
    info, consumed = _parse(contents, contents_len)
    if strict and consumed != contents_len:
        raise ValueError('Extra data - %d bytes of trailing data were provided' % (contents_len - consumed))
    return info


def peek(contents):
    """
    Parses a byte string of ASN.1 BER/DER-encoded data to find the length

    This is typically used to look into an encoded value to see how long the
    next chunk of ASN.1-encoded data is. Primarily it is useful when a
    value is a concatenation of multiple values.

    :param contents:
        A byte string of BER/DER-encoded data

    :raises:
        ValueError - when the contents do not contain an ASN.1 header or are truncated in some way
        TypeError - when contents is not a byte string

    :return:
        An integer with the number of bytes occupied by the ASN.1 value
    """

    if not isinstance(contents, byte_cls):
        raise TypeError('contents must be a byte string, not %s' % type_name(contents))

    info, consumed = _parse(contents, len(contents))
    return consumed


def _parse(encoded_data, data_len, pointer=0, lengths_only=False):
    """
    Parses a byte string into component parts

    :param encoded_data:
        A byte string that contains BER-encoded data

    :param data_len:
        The integer length of the encoded data

    :param pointer:
        The index in the byte string to parse from

    :param lengths_only:
        A boolean to cause the call to return a 2-element tuple of the integer
        number of bytes in the header and the integer number of bytes in the
        contents. Internal use only.

    :return:
        A 2-element tuple:
         - 0: A tuple of (class_, method, tag, header, content, trailer)
         - 1: An integer indicating how many bytes were consumed
    """

    if data_len < pointer + 2:
        raise ValueError(_INSUFFICIENT_DATA_MESSAGE % (2, data_len - pointer))

    start = pointer
    first_octet = ord(encoded_data[pointer]) if _PY2 else encoded_data[pointer]
    pointer += 1

    tag = first_octet & 31
    # Base 128 length using 8th bit as continuation indicator
    if tag == 31:
        tag = 0
        while True:
            num = ord(encoded_data[pointer]) if _PY2 else encoded_data[pointer]
            pointer += 1
            tag *= 128
            tag += num & 127
            if num >> 7 == 0:
                break

    length_octet = ord(encoded_data[pointer]) if _PY2 else encoded_data[pointer]
    pointer += 1

    if length_octet >> 7 == 0:
        if lengths_only:
            return (pointer, pointer + (length_octet & 127))
        contents_end = pointer + (length_octet & 127)

    else:
        length_octets = length_octet & 127
        if length_octets:
            pointer += length_octets
            contents_end = pointer + int_from_bytes(encoded_data[pointer - length_octets:pointer], signed=False)
            if lengths_only:
                return (pointer, contents_end)

        else:
            # To properly parse indefinite length values, we need to scan forward
            # parsing headers until we find a value with a length of zero. If we
            # just scanned looking for \x00\x00, nested indefinite length values
            # would not work.
            contents_end = pointer
            # Unfortunately we need to understand the contents of the data to
            # properly scan forward, which bleeds some representation info into
            # the parser. This condition handles the unused bits byte in
            # constructed bit strings.
            if tag == 3:
                contents_end += 1
            while contents_end < data_len:
                sub_header_end, contents_end = _parse(encoded_data, data_len, contents_end, lengths_only=True)
                if contents_end == sub_header_end and encoded_data[contents_end - 2:contents_end] == b'\x00\x00':
                    break
            if lengths_only:
                return (pointer, contents_end)
            if contents_end > data_len:
                raise ValueError(_INSUFFICIENT_DATA_MESSAGE % (contents_end, data_len))
            return (
                (
                    first_octet >> 6,
                    (first_octet >> 5) & 1,
                    tag,
                    encoded_data[start:pointer],
                    encoded_data[pointer:contents_end - 2],
                    b'\x00\x00'
                ),
                contents_end
            )

    if contents_end > data_len:
        raise ValueError(_INSUFFICIENT_DATA_MESSAGE % (contents_end, data_len))
    return (
        (
            first_octet >> 6,
            (first_octet >> 5) & 1,
            tag,
            encoded_data[start:pointer],
            encoded_data[pointer:contents_end],
            b''
        ),
        contents_end
    )


def _dump_header(class_, method, tag, contents):
    """
    Constructs the header bytes for an ASN.1 object

    :param class_:
        An integer ASN.1 class value: 0 (universal), 1 (application),
        2 (context), 3 (private)

    :param method:
        An integer ASN.1 method value: 0 (primitive), 1 (constructed)

    :param tag:
        An integer ASN.1 tag value

    :param contents:
        A byte string of the encoded byte contents

    :return:
        A byte string of the ASN.1 DER header
    """

    header = b''

    id_num = 0
    id_num |= class_ << 6
    id_num |= method << 5

    if tag >= 31:
        cont_bit = 0
        while tag > 0:
            header = chr_cls(cont_bit | (tag & 0x7f)) + header
            if not cont_bit:
                cont_bit = 0x80
            tag = tag >> 7
        header = chr_cls(id_num | 31) + header
    else:
        header += chr_cls(id_num | tag)

    length = len(contents)
    if length <= 127:
        header += chr_cls(length)
    else:
        length_bytes = int_to_bytes(length)
        header += chr_cls(0x80 | len(length_bytes))
        header += length_bytes

    return header