Coverage for /pythoncovmergedfiles/medio/medio/src/bs4_fuzzer.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

56 statements  

1###### Coverage stub 

2import atexit 

3import coverage 

4cov = coverage.coverage(data_file='.coverage', cover_pylib=True) 

5cov.start() 

6# Register an exist handler that will print coverage 

7def exit_handler(): 

8 cov.stop() 

9 cov.save() 

10atexit.register(exit_handler) 

11####### End of coverage stub 

12#!/usr/bin/python3 

13 

14# Copyright 2020 Google LLC 

15# 

16# Licensed under the Apache License, Version 2.0 (the "License"); 

17# you may not use this file except in compliance with the License. 

18# You may obtain a copy of the License at 

19# 

20# http://www.apache.org/licenses/LICENSE-2.0 

21# 

22# Unless required by applicable law or agreed to in writing, software 

23# distributed under the License is distributed on an "AS IS" BASIS, 

24# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

25# See the License for the specific language governing permissions and 

26# limitations under the License. 

27 

28import sys 

29import atheris 

30 

31with atheris.instrument_imports(): 

32 import logging 

33 import warnings 

34 from bs4 import BeautifulSoup, ParserRejectedMarkup 

35 import soupsieve 

36 from soupsieve.util import SelectorSyntaxError 

37 

38 

39try: 

40 import HTMLParser 

41 HTMLParseError = HTMLParser.HTMLParseError 

42except ImportError: 

43 # HTMLParseError is removed in Python 3.5. Since it can never be 

44 # thrown in 3.5, we can just define our own class as a placeholder. 

45 

46 class HTMLParseError(Exception): 

47 pass 

48 

49 

50@atheris.instrument_func 

51def TestOneInput(data): 

52 """TestOneInput gets random data from the fuzzer, and throws it at bs4.""" 

53 if len(data) < 12: 

54 return 

55 

56 parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml'] 

57 try: 

58 idx = int(data[0]) % len(parsers) 

59 except ValueError: 

60 return 

61 

62 css_selector, data = data[1:10], data[10:] 

63 

64 try: 

65 soup = BeautifulSoup(data[1:], features=parsers[idx]) 

66 except HTMLParseError: 

67 return 

68 except ValueError: 

69 return 

70 except ParserRejectedMarkup: 

71 return 

72 

73 list(soup.find_all(True)) 

74 if soup.css: 

75 try: 

76 soup.css.select(css_selector.decode('utf-8', 'replace')) 

77 except SelectorSyntaxError: 

78 return 

79 except NotImplementedError: 

80 return 

81 soup.prettify() 

82 

83 

84def main(): 

85 logging.disable(logging.CRITICAL) 

86 warnings.filterwarnings('ignore') 

87 atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True) 

88 atheris.Fuzz() 

89 

90 

91if __name__ == "__main__": 

92 main()