Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scrapy/http/request/__init__.py: 37%
90 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-07 06:38 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-07 06:38 +0000
1"""
2This module implements the Request class which is used to represent HTTP
3requests in Scrapy.
5See documentation in docs/topics/request-response.rst
6"""
7import inspect
8from typing import (
9 Any,
10 AnyStr,
11 Callable,
12 Dict,
13 Iterable,
14 List,
15 Mapping,
16 NoReturn,
17 Optional,
18 Tuple,
19 Type,
20 TypeVar,
21 Union,
22 cast,
23)
25from w3lib.url import safe_url_string
27import scrapy
28from scrapy.http.headers import Headers
29from scrapy.utils.curl import curl_to_request_kwargs
30from scrapy.utils.python import to_bytes
31from scrapy.utils.trackref import object_ref
32from scrapy.utils.url import escape_ajax
34RequestTypeVar = TypeVar("RequestTypeVar", bound="Request")
37def NO_CALLBACK(*args: Any, **kwargs: Any) -> NoReturn:
38 """When assigned to the ``callback`` parameter of
39 :class:`~scrapy.http.Request`, it indicates that the request is not meant
40 to have a spider callback at all.
42 For example:
44 .. code-block:: python
46 Request("https://example.com", callback=NO_CALLBACK)
48 This value should be used by :ref:`components <topics-components>` that
49 create and handle their own requests, e.g. through
50 :meth:`scrapy.core.engine.ExecutionEngine.download`, so that downloader
51 middlewares handling such requests can treat them differently from requests
52 intended for the :meth:`~scrapy.Spider.parse` callback.
53 """
54 raise RuntimeError(
55 "The NO_CALLBACK callback has been called. This is a special callback "
56 "value intended for requests whose callback is never meant to be "
57 "called."
58 )
61class Request(object_ref):
62 """Represents an HTTP request, which is usually generated in a Spider and
63 executed by the Downloader, thus generating a :class:`Response`.
64 """
66 attributes: Tuple[str, ...] = (
67 "url",
68 "callback",
69 "method",
70 "headers",
71 "body",
72 "cookies",
73 "meta",
74 "encoding",
75 "priority",
76 "dont_filter",
77 "errback",
78 "flags",
79 "cb_kwargs",
80 )
81 """A tuple of :class:`str` objects containing the name of all public
82 attributes of the class that are also keyword parameters of the
83 ``__init__`` method.
85 Currently used by :meth:`Request.replace`, :meth:`Request.to_dict` and
86 :func:`~scrapy.utils.request.request_from_dict`.
87 """
89 def __init__(
90 self,
91 url: str,
92 callback: Optional[Callable] = None,
93 method: str = "GET",
94 headers: Union[Mapping[AnyStr, Any], Iterable[Tuple[AnyStr, Any]], None] = None,
95 body: Optional[Union[bytes, str]] = None,
96 cookies: Optional[Union[dict, List[dict]]] = None,
97 meta: Optional[Dict[str, Any]] = None,
98 encoding: str = "utf-8",
99 priority: int = 0,
100 dont_filter: bool = False,
101 errback: Optional[Callable] = None,
102 flags: Optional[List[str]] = None,
103 cb_kwargs: Optional[Dict[str, Any]] = None,
104 ) -> None:
105 self._encoding: str = encoding # this one has to be set first
106 self.method: str = str(method).upper()
107 self._set_url(url)
108 self._set_body(body)
109 if not isinstance(priority, int):
110 raise TypeError(f"Request priority not an integer: {priority!r}")
111 self.priority: int = priority
113 if not (callable(callback) or callback is None):
114 raise TypeError(
115 f"callback must be a callable, got {type(callback).__name__}"
116 )
117 if not (callable(errback) or errback is None):
118 raise TypeError(f"errback must be a callable, got {type(errback).__name__}")
119 self.callback: Optional[Callable] = callback
120 self.errback: Optional[Callable] = errback
122 self.cookies: Union[dict, List[dict]] = cookies or {}
123 self.headers: Headers = Headers(headers or {}, encoding=encoding)
124 self.dont_filter: bool = dont_filter
126 self._meta: Optional[Dict[str, Any]] = dict(meta) if meta else None
127 self._cb_kwargs: Optional[Dict[str, Any]] = (
128 dict(cb_kwargs) if cb_kwargs else None
129 )
130 self.flags: List[str] = [] if flags is None else list(flags)
132 @property
133 def cb_kwargs(self) -> Dict[str, Any]:
134 if self._cb_kwargs is None:
135 self._cb_kwargs = {}
136 return self._cb_kwargs
138 @property
139 def meta(self) -> Dict[str, Any]:
140 if self._meta is None:
141 self._meta = {}
142 return self._meta
144 @property
145 def url(self) -> str:
146 return self._url
148 def _set_url(self, url: str) -> None:
149 if not isinstance(url, str):
150 raise TypeError(f"Request url must be str, got {type(url).__name__}")
152 s = safe_url_string(url, self.encoding)
153 self._url = escape_ajax(s)
155 if (
156 "://" not in self._url
157 and not self._url.startswith("about:")
158 and not self._url.startswith("data:")
159 ):
160 raise ValueError(f"Missing scheme in request url: {self._url}")
162 @property
163 def body(self) -> bytes:
164 return self._body
166 def _set_body(self, body: Optional[Union[str, bytes]]) -> None:
167 self._body = b"" if body is None else to_bytes(body, self.encoding)
169 @property
170 def encoding(self) -> str:
171 return self._encoding
173 def __repr__(self) -> str:
174 return f"<{self.method} {self.url}>"
176 def copy(self) -> "Request":
177 return self.replace()
179 def replace(self, *args: Any, **kwargs: Any) -> "Request":
180 """Create a new Request with the same attributes except for those given new values"""
181 for x in self.attributes:
182 kwargs.setdefault(x, getattr(self, x))
183 cls = kwargs.pop("cls", self.__class__)
184 return cast(Request, cls(*args, **kwargs))
186 @classmethod
187 def from_curl(
188 cls: Type[RequestTypeVar],
189 curl_command: str,
190 ignore_unknown_options: bool = True,
191 **kwargs: Any,
192 ) -> RequestTypeVar:
193 """Create a Request object from a string containing a `cURL
194 <https://curl.haxx.se/>`_ command. It populates the HTTP method, the
195 URL, the headers, the cookies and the body. It accepts the same
196 arguments as the :class:`Request` class, taking preference and
197 overriding the values of the same arguments contained in the cURL
198 command.
200 Unrecognized options are ignored by default. To raise an error when
201 finding unknown options call this method by passing
202 ``ignore_unknown_options=False``.
204 .. caution:: Using :meth:`from_curl` from :class:`~scrapy.http.Request`
205 subclasses, such as :class:`~scrapy.http.JsonRequest`, or
206 :class:`~scrapy.http.XmlRpcRequest`, as well as having
207 :ref:`downloader middlewares <topics-downloader-middleware>`
208 and
209 :ref:`spider middlewares <topics-spider-middleware>`
210 enabled, such as
211 :class:`~scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware`,
212 :class:`~scrapy.downloadermiddlewares.useragent.UserAgentMiddleware`,
213 or
214 :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`,
215 may modify the :class:`~scrapy.http.Request` object.
217 To translate a cURL command into a Scrapy request,
218 you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
219 """
220 request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
221 request_kwargs.update(kwargs)
222 return cls(**request_kwargs)
224 def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> Dict[str, Any]:
225 """Return a dictionary containing the Request's data.
227 Use :func:`~scrapy.utils.request.request_from_dict` to convert back into a :class:`~scrapy.Request` object.
229 If a spider is given, this method will try to find out the name of the spider methods used as callback
230 and errback and include them in the output dict, raising an exception if they cannot be found.
231 """
232 d = {
233 "url": self.url, # urls are safe (safe_string_url)
234 "callback": _find_method(spider, self.callback)
235 if callable(self.callback)
236 else self.callback,
237 "errback": _find_method(spider, self.errback)
238 if callable(self.errback)
239 else self.errback,
240 "headers": dict(self.headers),
241 }
242 for attr in self.attributes:
243 d.setdefault(attr, getattr(self, attr))
244 if type(self) is not Request: # pylint: disable=unidiomatic-typecheck
245 d["_class"] = self.__module__ + "." + self.__class__.__name__
246 return d
249def _find_method(obj: Any, func: Callable) -> str:
250 """Helper function for Request.to_dict"""
251 # Only instance methods contain ``__func__``
252 if obj and hasattr(func, "__func__"):
253 members = inspect.getmembers(obj, predicate=inspect.ismethod)
254 for name, obj_func in members:
255 # We need to use __func__ to access the original function object because instance
256 # method objects are generated each time attribute is retrieved from instance.
257 #
258 # Reference: The standard type hierarchy
259 # https://docs.python.org/3/reference/datamodel.html
260 if obj_func.__func__ is func.__func__:
261 return name
262 raise ValueError(f"Function {func} is not an instance method in: {obj}")