Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scrapy/utils/curl.py: 32%

66 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-07 06:38 +0000

1import argparse 

2import warnings 

3from http.cookies import SimpleCookie 

4from shlex import split 

5from urllib.parse import urlparse 

6 

7from w3lib.http import basic_auth_header 

8 

9 

10class DataAction(argparse.Action): 

11 def __call__(self, parser, namespace, values, option_string=None): 

12 value = str(values) 

13 if value.startswith("$"): 

14 value = value[1:] 

15 setattr(namespace, self.dest, value) 

16 

17 

18class CurlParser(argparse.ArgumentParser): 

19 def error(self, message): 

20 error_msg = f"There was an error parsing the curl command: {message}" 

21 raise ValueError(error_msg) 

22 

23 

24curl_parser = CurlParser() 

25curl_parser.add_argument("url") 

26curl_parser.add_argument("-H", "--header", dest="headers", action="append") 

27curl_parser.add_argument("-X", "--request", dest="method") 

28curl_parser.add_argument("-d", "--data", "--data-raw", dest="data", action=DataAction) 

29curl_parser.add_argument("-u", "--user", dest="auth") 

30 

31 

32safe_to_ignore_arguments = [ 

33 ["--compressed"], 

34 # `--compressed` argument is not safe to ignore, but it's included here 

35 # because the `HttpCompressionMiddleware` is enabled by default 

36 ["-s", "--silent"], 

37 ["-v", "--verbose"], 

38 ["-#", "--progress-bar"], 

39] 

40 

41for argument in safe_to_ignore_arguments: 

42 curl_parser.add_argument(*argument, action="store_true") 

43 

44 

45def _parse_headers_and_cookies(parsed_args): 

46 headers = [] 

47 cookies = {} 

48 for header in parsed_args.headers or (): 

49 name, val = header.split(":", 1) 

50 name = name.strip() 

51 val = val.strip() 

52 if name.title() == "Cookie": 

53 for name, morsel in SimpleCookie(val).items(): 

54 cookies[name] = morsel.value 

55 else: 

56 headers.append((name, val)) 

57 

58 if parsed_args.auth: 

59 user, password = parsed_args.auth.split(":", 1) 

60 headers.append(("Authorization", basic_auth_header(user, password))) 

61 

62 return headers, cookies 

63 

64 

65def curl_to_request_kwargs( 

66 curl_command: str, ignore_unknown_options: bool = True 

67) -> dict: 

68 """Convert a cURL command syntax to Request kwargs. 

69 

70 :param str curl_command: string containing the curl command 

71 :param bool ignore_unknown_options: If true, only a warning is emitted when 

72 cURL options are unknown. Otherwise 

73 raises an error. (default: True) 

74 :return: dictionary of Request kwargs 

75 """ 

76 

77 curl_args = split(curl_command) 

78 

79 if curl_args[0] != "curl": 

80 raise ValueError('A curl command must start with "curl"') 

81 

82 parsed_args, argv = curl_parser.parse_known_args(curl_args[1:]) 

83 

84 if argv: 

85 msg = f'Unrecognized options: {", ".join(argv)}' 

86 if ignore_unknown_options: 

87 warnings.warn(msg) 

88 else: 

89 raise ValueError(msg) 

90 

91 url = parsed_args.url 

92 

93 # curl automatically prepends 'http' if the scheme is missing, but Request 

94 # needs the scheme to work 

95 parsed_url = urlparse(url) 

96 if not parsed_url.scheme: 

97 url = "http://" + url 

98 

99 method = parsed_args.method or "GET" 

100 

101 result = {"method": method.upper(), "url": url} 

102 

103 headers, cookies = _parse_headers_and_cookies(parsed_args) 

104 

105 if headers: 

106 result["headers"] = headers 

107 if cookies: 

108 result["cookies"] = cookies 

109 if parsed_args.data: 

110 result["body"] = parsed_args.data 

111 if not parsed_args.method: 

112 # if the "data" is specified but the "method" is not specified, 

113 # the default method is 'POST' 

114 result["method"] = "POST" 

115 

116 return result