Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scrapy/http/request/__init_

1"""

2This module implements the Request class which is used to represent HTTP

3requests in Scrapy.

5See documentation in docs/topics/request-response.rst

6"""

7import inspect

8from typing import (

9 Any,

10 AnyStr,

11 Callable,

12 Dict,

13 Iterable,

14 List,

15 Mapping,

16 NoReturn,

17 Optional,

18 Tuple,

19 Type,

20 TypeVar,

21 Union,

22 cast,

23)

25from w3lib.url import safe_url_string

27import scrapy

28from scrapy.http.headers import Headers

29from scrapy.utils.curl import curl_to_request_kwargs

30from scrapy.utils.python import to_bytes

31from scrapy.utils.trackref import object_ref

32from scrapy.utils.url import escape_ajax

34RequestTypeVar = TypeVar("RequestTypeVar", bound="Request")

37def NO_CALLBACK(*args: Any, **kwargs: Any) -> NoReturn:

38 """When assigned to the ``callback`` parameter of

39 :class:`~scrapy.http.Request`, it indicates that the request is not meant

40 to have a spider callback at all.

42 For example:

44 .. code-block:: python

46 Request("https://example.com", callback=NO_CALLBACK)

48 This value should be used by :ref:`components <topics-components>` that

49 create and handle their own requests, e.g. through

50 :meth:`scrapy.core.engine.ExecutionEngine.download`, so that downloader

51 middlewares handling such requests can treat them differently from requests

52 intended for the :meth:`~scrapy.Spider.parse` callback.

53 """

54 raise RuntimeError(

55 "The NO_CALLBACK callback has been called. This is a special callback "

56 "value intended for requests whose callback is never meant to be "

57 "called."

58 )

61class Request(object_ref):

62 """Represents an HTTP request, which is usually generated in a Spider and

63 executed by the Downloader, thus generating a :class:`Response`.

64 """

66 attributes: Tuple[str, ...] = (

67 "url",

68 "callback",

69 "method",

70 "headers",

71 "body",

72 "cookies",

73 "meta",

74 "encoding",

75 "priority",

76 "dont_filter",

77 "errback",

78 "flags",

79 "cb_kwargs",

80 )

81 """A tuple of :class:`str` objects containing the name of all public

82 attributes of the class that are also keyword parameters of the

83 ``__init__`` method.

85 Currently used by :meth:`Request.replace`, :meth:`Request.to_dict` and

86 :func:`~scrapy.utils.request.request_from_dict`.

87 """

89 def __init__(

90 self,

91 url: str,

92 callback: Optional[Callable] = None,

93 method: str = "GET",

94 headers: Union[Mapping[AnyStr, Any], Iterable[Tuple[AnyStr, Any]], None] = None,

95 body: Optional[Union[bytes, str]] = None,

96 cookies: Optional[Union[dict, List[dict]]] = None,

97 meta: Optional[Dict[str, Any]] = None,

98 encoding: str = "utf-8",

99 priority: int = 0,

100 dont_filter: bool = False,

101 errback: Optional[Callable] = None,

102 flags: Optional[List[str]] = None,

103 cb_kwargs: Optional[Dict[str, Any]] = None,

104 ) -> None:

105 self._encoding: str = encoding # this one has to be set first

106 self.method: str = str(method).upper()

107 self._set_url(url)

108 self._set_body(body)

109 if not isinstance(priority, int):

110 raise TypeError(f"Request priority not an integer: {priority!r}")

111 self.priority: int = priority

112

113 if not (callable(callback) or callback is None):

114 raise TypeError(

115 f"callback must be a callable, got {type(callback).__name__}"

116 )

117 if not (callable(errback) or errback is None):

118 raise TypeError(f"errback must be a callable, got {type(errback).__name__}")

119 self.callback: Optional[Callable] = callback

120 self.errback: Optional[Callable] = errback

121

122 self.cookies: Union[dict, List[dict]] = cookies or {}

123 self.headers: Headers = Headers(headers or {}, encoding=encoding)

124 self.dont_filter: bool = dont_filter

125

126 self._meta: Optional[Dict[str, Any]] = dict(meta) if meta else None

127 self._cb_kwargs: Optional[Dict[str, Any]] = (

128 dict(cb_kwargs) if cb_kwargs else None

129 )

130 self.flags: List[str] = [] if flags is None else list(flags)

131

132 @property

133 def cb_kwargs(self) -> Dict[str, Any]:

134 if self._cb_kwargs is None:

135 self._cb_kwargs = {}

136 return self._cb_kwargs

137

138 @property

139 def meta(self) -> Dict[str, Any]:

140 if self._meta is None:

141 self._meta = {}

142 return self._meta

143

144 @property

145 def url(self) -> str:

146 return self._url

147

148 def _set_url(self, url: str) -> None:

149 if not isinstance(url, str):

150 raise TypeError(f"Request url must be str, got {type(url).__name__}")

151

152 s = safe_url_string(url, self.encoding)

153 self._url = escape_ajax(s)

154

155 if (

156 "://" not in self._url

157 and not self._url.startswith("about:")

158 and not self._url.startswith("data:")

159 ):

160 raise ValueError(f"Missing scheme in request url: {self._url}")

161

162 @property

163 def body(self) -> bytes:

164 return self._body

165

166 def _set_body(self, body: Optional[Union[str, bytes]]) -> None:

167 self._body = b"" if body is None else to_bytes(body, self.encoding)

168

169 @property

170 def encoding(self) -> str:

171 return self._encoding

172

173 def __repr__(self) -> str:

174 return f"<{self.method} {self.url}>"

175

176 def copy(self) -> "Request":

177 return self.replace()

178

179 def replace(self, *args: Any, **kwargs: Any) -> "Request":

180 """Create a new Request with the same attributes except for those given new values"""

181 for x in self.attributes:

182 kwargs.setdefault(x, getattr(self, x))

183 cls = kwargs.pop("cls", self.__class__)

184 return cast(Request, cls(*args, **kwargs))

185

186 @classmethod

187 def from_curl(

188 cls: Type[RequestTypeVar],

189 curl_command: str,

190 ignore_unknown_options: bool = True,

191 **kwargs: Any,

192 ) -> RequestTypeVar:

193 """Create a Request object from a string containing a `cURL

194 <https://curl.haxx.se/>`_ command. It populates the HTTP method, the

195 URL, the headers, the cookies and the body. It accepts the same

196 arguments as the :class:`Request` class, taking preference and

197 overriding the values of the same arguments contained in the cURL

198 command.

199

200 Unrecognized options are ignored by default. To raise an error when

201 finding unknown options call this method by passing

202 ``ignore_unknown_options=False``.

203

204 .. caution:: Using :meth:`from_curl` from :class:`~scrapy.http.Request`

205 subclasses, such as :class:`~scrapy.http.JsonRequest`, or

206 :class:`~scrapy.http.XmlRpcRequest`, as well as having

207 :ref:`downloader middlewares <topics-downloader-middleware>`

208 and

209 :ref:`spider middlewares <topics-spider-middleware>`

210 enabled, such as

211 :class:`~scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware`,

212 :class:`~scrapy.downloadermiddlewares.useragent.UserAgentMiddleware`,

213 or

214 :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`,

215 may modify the :class:`~scrapy.http.Request` object.

216

217 To translate a cURL command into a Scrapy request,

218 you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.

219 """

220 request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)

221 request_kwargs.update(kwargs)

222 return cls(**request_kwargs)

223

224 def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> Dict[str, Any]:

225 """Return a dictionary containing the Request's data.

226

227 Use :func:`~scrapy.utils.request.request_from_dict` to convert back into a :class:`~scrapy.Request` object.

228

229 If a spider is given, this method will try to find out the name of the spider methods used as callback

230 and errback and include them in the output dict, raising an exception if they cannot be found.

231 """

232 d = {

233 "url": self.url, # urls are safe (safe_string_url)

234 "callback": _find_method(spider, self.callback)

235 if callable(self.callback)

236 else self.callback,

237 "errback": _find_method(spider, self.errback)

238 if callable(self.errback)

239 else self.errback,

240 "headers": dict(self.headers),

241 }

242 for attr in self.attributes:

243 d.setdefault(attr, getattr(self, attr))

244 if type(self) is not Request: # pylint: disable=unidiomatic-typecheck

245 d["_class"] = self.__module__ + "." + self.__class__.__name__

246 return d

247

248

249def _find_method(obj: Any, func: Callable) -> str:

250 """Helper function for Request.to_dict"""

251 # Only instance methods contain ``__func__``

252 if obj and hasattr(func, "__func__"):

253 members = inspect.getmembers(obj, predicate=inspect.ismethod)

254 for name, obj_func in members:

255 # We need to use __func__ to access the original function object because instance

256 # method objects are generated each time attribute is retrieved from instance.

257 #

258 # Reference: The standard type hierarchy

259 # https://docs.python.org/3/reference/datamodel.html

260 if obj_func.__func__ is func.__func__:

261 return name

262 raise ValueError(f"Function {func} is not an instance method in: {obj}")

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scrapy/http/request/init.py: 37%

90 statements