aboutsummaryrefslogtreecommitdiff
path: root/projects/bs4/bs4_fuzzer.py
blob: b5125121a45fdc5898e77dbd6f3960463c0e8194 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python3

# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import atheris

with atheris.instrument_imports():
  import logging
  import warnings
  from bs4 import BeautifulSoup


try:
  import HTMLParser
  HTMLParseError = HTMLParser.HTMLParseError
except ImportError:
  # HTMLParseError is removed in Python 3.5. Since it can never be
  # thrown in 3.5, we can just define our own class as a placeholder.

  class HTMLParseError(Exception):
    pass


@atheris.instrument_func
def TestOneInput(data):
  """TestOneInput gets random data from the fuzzer, and throws it at bs4."""
  if len(data) < 1:
    return

  parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml']
  try:
    idx = int(data[0]) % len(parsers)
  except ValueError:
    return

  try:
    soup = BeautifulSoup(data[1:], features=parsers[idx])
  except HTMLParseError:
    return
  except ValueError:
    return

  list(soup.find_all(True))
  soup.prettify()


def main():
  logging.disable(logging.CRITICAL)
  warnings.filterwarnings('ignore')
  atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True)
  atheris.Fuzz()


if __name__ == "__main__":
  main()