Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scrapy/utils/curl.py: 32%
66 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-07 06:38 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-07 06:38 +0000
1import argparse
2import warnings
3from http.cookies import SimpleCookie
4from shlex import split
5from urllib.parse import urlparse
7from w3lib.http import basic_auth_header
10class DataAction(argparse.Action):
11 def __call__(self, parser, namespace, values, option_string=None):
12 value = str(values)
13 if value.startswith("$"):
14 value = value[1:]
15 setattr(namespace, self.dest, value)
18class CurlParser(argparse.ArgumentParser):
19 def error(self, message):
20 error_msg = f"There was an error parsing the curl command: {message}"
21 raise ValueError(error_msg)
24curl_parser = CurlParser()
25curl_parser.add_argument("url")
26curl_parser.add_argument("-H", "--header", dest="headers", action="append")
27curl_parser.add_argument("-X", "--request", dest="method")
28curl_parser.add_argument("-d", "--data", "--data-raw", dest="data", action=DataAction)
29curl_parser.add_argument("-u", "--user", dest="auth")
32safe_to_ignore_arguments = [
33 ["--compressed"],
34 # `--compressed` argument is not safe to ignore, but it's included here
35 # because the `HttpCompressionMiddleware` is enabled by default
36 ["-s", "--silent"],
37 ["-v", "--verbose"],
38 ["-#", "--progress-bar"],
39]
41for argument in safe_to_ignore_arguments:
42 curl_parser.add_argument(*argument, action="store_true")
45def _parse_headers_and_cookies(parsed_args):
46 headers = []
47 cookies = {}
48 for header in parsed_args.headers or ():
49 name, val = header.split(":", 1)
50 name = name.strip()
51 val = val.strip()
52 if name.title() == "Cookie":
53 for name, morsel in SimpleCookie(val).items():
54 cookies[name] = morsel.value
55 else:
56 headers.append((name, val))
58 if parsed_args.auth:
59 user, password = parsed_args.auth.split(":", 1)
60 headers.append(("Authorization", basic_auth_header(user, password)))
62 return headers, cookies
65def curl_to_request_kwargs(
66 curl_command: str, ignore_unknown_options: bool = True
67) -> dict:
68 """Convert a cURL command syntax to Request kwargs.
70 :param str curl_command: string containing the curl command
71 :param bool ignore_unknown_options: If true, only a warning is emitted when
72 cURL options are unknown. Otherwise
73 raises an error. (default: True)
74 :return: dictionary of Request kwargs
75 """
77 curl_args = split(curl_command)
79 if curl_args[0] != "curl":
80 raise ValueError('A curl command must start with "curl"')
82 parsed_args, argv = curl_parser.parse_known_args(curl_args[1:])
84 if argv:
85 msg = f'Unrecognized options: {", ".join(argv)}'
86 if ignore_unknown_options:
87 warnings.warn(msg)
88 else:
89 raise ValueError(msg)
91 url = parsed_args.url
93 # curl automatically prepends 'http' if the scheme is missing, but Request
94 # needs the scheme to work
95 parsed_url = urlparse(url)
96 if not parsed_url.scheme:
97 url = "http://" + url
99 method = parsed_args.method or "GET"
101 result = {"method": method.upper(), "url": url}
103 headers, cookies = _parse_headers_and_cookies(parsed_args)
105 if headers:
106 result["headers"] = headers
107 if cookies:
108 result["cookies"] = cookies
109 if parsed_args.data:
110 result["body"] = parsed_args.data
111 if not parsed_args.method:
112 # if the "data" is specified but the "method" is not specified,
113 # the default method is 'POST'
114 result["method"] = "POST"
116 return result