Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/http/client.py: 17%

1r"""HTTP/1.1 client library

3<intro stuff goes here>

4<other stuff, too>

6HTTPConnection goes through a number of "states", which define when a client

7may legally make another request or fetch the response for a particular

8request. This diagram details these state transitions:

10 (null)

11 |

12 | HTTPConnection()

13 v

14 Idle

15 |

16 | putrequest()

17 v

18 Request-started

19 |

20 | ( putheader() )* endheaders()

21 v

22 Request-sent

23 |\_____________________________

24 | | getresponse() raises

25 | response = getresponse() | ConnectionError

26 v v

27 Unread-response Idle

28 [Response-headers-read]

29 |\____________________

30 | |

31 | response.read() | putrequest()

32 v v

33 Idle Req-started-unread-response

34 ______/|

35 / |

36 response.read() | | ( putheader() )* endheaders()

37 v v

38 Request-started Req-sent-unread-response

39 |

40 | response.read()

41 v

42 Request-sent

44This diagram presents the following rules:

45 -- a second request may not be started until {response-headers-read}

46 -- a response [object] cannot be retrieved until {request-sent}

47 -- there is no differentiation between an unread response body and a

48 partially read response body

50Note: this enforcement is applied by the HTTPConnection class. The

51 HTTPResponse class does not enforce this state machine, which

52 implies sophisticated clients may accelerate the request/response

53 pipeline. Caution should be taken, though: accelerating the states

54 beyond the above pattern may imply knowledge of the server's

55 connection-close behavior for certain requests. For example, it

56 is impossible to tell whether the server will close the connection

57 UNTIL the response headers have been read; this means that further

58 requests cannot be placed into the pipeline until it is known that

59 the server will NOT be closing the connection.

61Logical State __state __response

62------------- ------- ----------

63Idle _CS_IDLE None

64Request-started _CS_REQ_STARTED None

65Request-sent _CS_REQ_SENT None

66Unread-response _CS_IDLE <response_class>

67Req-started-unread-response _CS_REQ_STARTED <response_class>

68Req-sent-unread-response _CS_REQ_SENT <response_class>

69"""

71import email.parser

72import email.message

73import http

74import io

75import re

76import socket

77import collections.abc

78from urllib.parse import urlsplit

80# HTTPMessage, parse_headers(), and the HTTP status code constants are

81# intentionally omitted for simplicity

82__all__ = ["HTTPResponse", "HTTPConnection",

83 "HTTPException", "NotConnected", "UnknownProtocol",

84 "UnknownTransferEncoding", "UnimplementedFileMode",

85 "IncompleteRead", "InvalidURL", "ImproperConnectionState",

86 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",

87 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",

88 "responses"]

90HTTP_PORT = 80

91HTTPS_PORT = 443

93_UNKNOWN = 'UNKNOWN'

95# connection states

96_CS_IDLE = 'Idle'

97_CS_REQ_STARTED = 'Request-started'

98_CS_REQ_SENT = 'Request-sent'

100

101# hack to maintain backwards compatibility

102globals().update(http.HTTPStatus.__members__)

103

104# another hack to maintain backwards compatibility

105# Mapping status codes to official W3C names

106responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}

107

108# maximal line length when calling readline().

109_MAXLINE = 65536

110_MAXHEADERS = 100

111

112# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)

113#

114# VCHAR = %x21-7E

115# obs-text = %x80-FF

116# header-field = field-name ":" OWS field-value OWS

117# field-name = token

118# field-value = *( field-content / obs-fold )

119# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]

120# field-vchar = VCHAR / obs-text

121#

122# obs-fold = CRLF 1*( SP / HTAB )

123# ; obsolete line folding

124# ; see Section 3.2.4

125

126# token = 1*tchar

127#

128# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"

129# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"

130# / DIGIT / ALPHA

131# ; any VCHAR, except delimiters

132#

133# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1

134

135# the patterns for both name and value are more lenient than RFC

136# definitions to allow for backwards compatibility

137_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch

138_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search

139

140# These characters are not allowed within HTTP URL paths.

141# See https://tools.ietf.org/html/rfc3986#section-3.3 and the

142# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.

143# Prevents CVE-2019-9740. Includes control characters such as \r\n.

144# We don't restrict chars above \x7f as putrequest() limits us to ASCII.

145_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')

146# Arguably only these _should_ allowed:

147# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")

148# We are more lenient for assumed real world compatibility purposes.

149

150# We always set the Content-Length header for these methods because some

151# servers will otherwise respond with a 411

152_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}

153

154

155def _encode(data, name='data'):

156 """Call data.encode("latin-1") but show a better error message."""

157 try:

158 return data.encode("latin-1")

159 except UnicodeEncodeError as err:

160 raise UnicodeEncodeError(

161 err.encoding,

162 err.object,

163 err.start,

164 err.end,

165 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "

166 "if you want to send it encoded in UTF-8." %

167 (name.title(), data[err.start:err.end], name)) from None

168

169

170class HTTPMessage(email.message.Message):

171 # XXX The only usage of this method is in

172 # http.server.CGIHTTPRequestHandler. Maybe move the code there so

173 # that it doesn't need to be part of the public API. The API has

174 # never been defined so this could cause backwards compatibility

175 # issues.

176

177 def getallmatchingheaders(self, name):

178 """Find all header lines matching a given header name.

179

180 Look through the list of headers and find all lines matching a given

181 header name (and their continuation lines). A list of the lines is

182 returned, without interpretation. If the header does not occur, an

183 empty list is returned. If the header occurs multiple times, all

184 occurrences are returned. Case is not important in the header name.

185

186 """

187 name = name.lower() + ':'

188 n = len(name)

189 lst = []

190 hit = 0

191 for line in self.keys():

192 if line[:n].lower() == name:

193 hit = 1

194 elif not line[:1].isspace():

195 hit = 0

196 if hit:

197 lst.append(line)

198 return lst

199

200def parse_headers(fp, _class=HTTPMessage):

201 """Parses only RFC2822 headers from a file pointer.

202

203 email Parser wants to see strings rather than bytes.

204 But a TextIOWrapper around self.rfile would buffer too many bytes

205 from the stream, bytes which we later need to read as bytes.

206 So we read the correct bytes here, as bytes, for email Parser

207 to parse.

208

209 """

210 headers = []

211 while True:

212 line = fp.readline(_MAXLINE + 1)

213 if len(line) > _MAXLINE:

214 raise LineTooLong("header line")

215 headers.append(line)

216 if len(headers) > _MAXHEADERS:

217 raise HTTPException("got more than %d headers" % _MAXHEADERS)

218 if line in (b'\r\n', b'\n', b''):

219 break

220 hstring = b''.join(headers).decode('iso-8859-1')

221 return email.parser.Parser(_class=_class).parsestr(hstring)

222

223

224class HTTPResponse(io.BufferedIOBase):

225

226 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.

227

228 # The bytes from the socket object are iso-8859-1 strings.

229 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded

230 # text following RFC 2047. The basic status line parsing only

231 # accepts iso-8859-1.

232

233 def __init__(self, sock, debuglevel=0, method=None, url=None):

234 # If the response includes a content-length header, we need to

235 # make sure that the client doesn't read more than the

236 # specified number of bytes. If it does, it will block until

237 # the server times out and closes the connection. This will

238 # happen if a self.fp.read() is done (without a size) whether

239 # self.fp is buffered or not. So, no self.fp.read() by

240 # clients unless they know what they are doing.

241 self.fp = sock.makefile("rb")

242 self.debuglevel = debuglevel

243 self._method = method

244

245 # The HTTPResponse object is returned via urllib. The clients

246 # of http and urllib expect different attributes for the

247 # headers. headers is used here and supports urllib. msg is

248 # provided as a backwards compatibility layer for http

249 # clients.

250

251 self.headers = self.msg = None

252

253 # from the Status-Line of the response

254 self.version = _UNKNOWN # HTTP-Version

255 self.status = _UNKNOWN # Status-Code

256 self.reason = _UNKNOWN # Reason-Phrase

257

258 self.chunked = _UNKNOWN # is "chunked" being used?

259 self.chunk_left = _UNKNOWN # bytes left to read in current chunk

260 self.length = _UNKNOWN # number of bytes left in response

261 self.will_close = _UNKNOWN # conn will close at end of response

262

263 def _read_status(self):

264 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")

265 if len(line) > _MAXLINE:

266 raise LineTooLong("status line")

267 if self.debuglevel > 0:

268 print("reply:", repr(line))

269 if not line:

270 # Presumably, the server closed the connection before

271 # sending a valid response.

272 raise RemoteDisconnected("Remote end closed connection without"

273 " response")

274 try:

275 version, status, reason = line.split(None, 2)

276 except ValueError:

277 try:

278 version, status = line.split(None, 1)

279 reason = ""

280 except ValueError:

281 # empty version will cause next test to fail.

282 version = ""

283 if not version.startswith("HTTP/"):

284 self._close_conn()

285 raise BadStatusLine(line)

286

287 # The status code is a three-digit number

288 try:

289 status = int(status)

290 if status < 100 or status > 999:

291 raise BadStatusLine(line)

292 except ValueError:

293 raise BadStatusLine(line)

294 return version, status, reason

295

296 def begin(self):

297 if self.headers is not None:

298 # we've already started reading the response

299 return

300

301 # read until we get a non-100 response

302 while True:

303 version, status, reason = self._read_status()

304 if status != CONTINUE:

305 break

306 # skip the header from the 100 response

307 while True:

308 skip = self.fp.readline(_MAXLINE + 1)

309 if len(skip) > _MAXLINE:

310 raise LineTooLong("header line")

311 skip = skip.strip()

312 if not skip:

313 break

314 if self.debuglevel > 0:

315 print("header:", skip)

316

317 self.code = self.status = status

318 self.reason = reason.strip()

319 if version in ("HTTP/1.0", "HTTP/0.9"):

320 # Some servers might still return "0.9", treat it as 1.0 anyway

321 self.version = 10

322 elif version.startswith("HTTP/1."):

323 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1

324 else:

325 raise UnknownProtocol(version)

326

327 self.headers = self.msg = parse_headers(self.fp)

328

329 if self.debuglevel > 0:

330 for hdr, val in self.headers.items():

331 print("header:", hdr + ":", val)

332

333 # are we using the chunked-style of transfer encoding?

334 tr_enc = self.headers.get("transfer-encoding")

335 if tr_enc and tr_enc.lower() == "chunked":

336 self.chunked = True

337 self.chunk_left = None

338 else:

339 self.chunked = False

340

341 # will the connection close at the end of the response?

342 self.will_close = self._check_close()

343

344 # do we have a Content-Length?

345 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"

346 self.length = None

347 length = self.headers.get("content-length")

348

349 # are we using the chunked-style of transfer encoding?

350 tr_enc = self.headers.get("transfer-encoding")

351 if length and not self.chunked:

352 try:

353 self.length = int(length)

354 except ValueError:

355 self.length = None

356 else:

357 if self.length < 0: # ignore nonsensical negative lengths

358 self.length = None

359 else:

360 self.length = None

361

362 # does the body have a fixed length? (of zero)

363 if (status == NO_CONTENT or status == NOT_MODIFIED or

364 100 <= status < 200 or # 1xx codes

365 self._method == "HEAD"):

366 self.length = 0

367

368 # if the connection remains open, and we aren't using chunked, and

369 # a content-length was not provided, then assume that the connection

370 # WILL close.

371 if (not self.will_close and

372 not self.chunked and

373 self.length is None):

374 self.will_close = True

375

376 def _check_close(self):

377 conn = self.headers.get("connection")

378 if self.version == 11:

379 # An HTTP/1.1 proxy is assumed to stay open unless

380 # explicitly closed.

381 if conn and "close" in conn.lower():

382 return True

383 return False

384

385 # Some HTTP/1.0 implementations have support for persistent

386 # connections, using rules different than HTTP/1.1.

387

388 # For older HTTP, Keep-Alive indicates persistent connection.

389 if self.headers.get("keep-alive"):

390 return False

391

392 # At least Akamai returns a "Connection: Keep-Alive" header,

393 # which was supposed to be sent by the client.

394 if conn and "keep-alive" in conn.lower():

395 return False

396

397 # Proxy-Connection is a netscape hack.

398 pconn = self.headers.get("proxy-connection")

399 if pconn and "keep-alive" in pconn.lower():

400 return False

401

402 # otherwise, assume it will close

403 return True

404

405 def _close_conn(self):

406 fp = self.fp

407 self.fp = None

408 fp.close()

409

410 def close(self):

411 try:

412 super().close() # set "closed" flag

413 finally:

414 if self.fp:

415 self._close_conn()

416

417 # These implementations are for the benefit of io.BufferedReader.

418

419 # XXX This class should probably be revised to act more like

420 # the "raw stream" that BufferedReader expects.

421

422 def flush(self):

423 super().flush()

424 if self.fp:

425 self.fp.flush()

426

427 def readable(self):

428 """Always returns True"""

429 return True

430

431 # End of "raw stream" methods

432

433 def isclosed(self):

434 """True if the connection is closed."""

435 # NOTE: it is possible that we will not ever call self.close(). This

436 # case occurs when will_close is TRUE, length is None, and we

437 # read up to the last byte, but NOT past it.

438 #

439 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be

440 # called, meaning self.isclosed() is meaningful.

441 return self.fp is None

442

443 def read(self, amt=None):

444 if self.fp is None:

445 return b""

446

447 if self._method == "HEAD":

448 self._close_conn()

449 return b""

450

451 if amt is not None:

452 # Amount is given, implement using readinto

453 b = bytearray(amt)

454 n = self.readinto(b)

455 return memoryview(b)[:n].tobytes()

456 else:

457 # Amount is not given (unbounded read) so we must check self.length

458 # and self.chunked

459

460 if self.chunked:

461 return self._readall_chunked()

462

463 if self.length is None:

464 s = self.fp.read()

465 else:

466 try:

467 s = self._safe_read(self.length)

468 except IncompleteRead:

469 self._close_conn()

470 raise

471 self.length = 0

472 self._close_conn() # we read everything

473 return s

474

475 def readinto(self, b):

476 """Read up to len(b) bytes into bytearray b and return the number

477 of bytes read.

478 """

479

480 if self.fp is None:

481 return 0

482

483 if self._method == "HEAD":

484 self._close_conn()

485 return 0

486

487 if self.chunked:

488 return self._readinto_chunked(b)

489

490 if self.length is not None:

491 if len(b) > self.length:

492 # clip the read to the "end of response"

493 b = memoryview(b)[0:self.length]

494

495 # we do not use _safe_read() here because this may be a .will_close

496 # connection, and the user is reading more bytes than will be provided

497 # (for example, reading in 1k chunks)

498 n = self.fp.readinto(b)

499 if not n and b:

500 # Ideally, we would raise IncompleteRead if the content-length

501 # wasn't satisfied, but it might break compatibility.

502 self._close_conn()

503 elif self.length is not None:

504 self.length -= n

505 if not self.length:

506 self._close_conn()

507 return n

508

509 def _read_next_chunk_size(self):

510 # Read the next chunk size from the file

511 line = self.fp.readline(_MAXLINE + 1)

512 if len(line) > _MAXLINE:

513 raise LineTooLong("chunk size")

514 i = line.find(b";")

515 if i >= 0:

516 line = line[:i] # strip chunk-extensions

517 try:

518 return int(line, 16)

519 except ValueError:

520 # close the connection as protocol synchronisation is

521 # probably lost

522 self._close_conn()

523 raise

524

525 def _read_and_discard_trailer(self):

526 # read and discard trailer up to the CRLF terminator

527 ### note: we shouldn't have any trailers!

528 while True:

529 line = self.fp.readline(_MAXLINE + 1)

530 if len(line) > _MAXLINE:

531 raise LineTooLong("trailer line")

532 if not line:

533 # a vanishingly small number of sites EOF without

534 # sending the trailer

535 break

536 if line in (b'\r\n', b'\n', b''):

537 break

538

539 def _get_chunk_left(self):

540 # return self.chunk_left, reading a new chunk if necessary.

541 # chunk_left == 0: at the end of the current chunk, need to close it

542 # chunk_left == None: No current chunk, should read next.

543 # This function returns non-zero or None if the last chunk has

544 # been read.

545 chunk_left = self.chunk_left

546 if not chunk_left: # Can be 0 or None

547 if chunk_left is not None:

548 # We are at the end of chunk, discard chunk end

549 self._safe_read(2) # toss the CRLF at the end of the chunk

550 try:

551 chunk_left = self._read_next_chunk_size()

552 except ValueError:

553 raise IncompleteRead(b'')

554 if chunk_left == 0:

555 # last chunk: 1*("0") [ chunk-extension ] CRLF

556 self._read_and_discard_trailer()

557 # we read everything; close the "file"

558 self._close_conn()

559 chunk_left = None

560 self.chunk_left = chunk_left

561 return chunk_left

562

563 def _readall_chunked(self):

564 assert self.chunked != _UNKNOWN

565 value = []

566 try:

567 while True:

568 chunk_left = self._get_chunk_left()

569 if chunk_left is None:

570 break

571 value.append(self._safe_read(chunk_left))

572 self.chunk_left = 0

573 return b''.join(value)

574 except IncompleteRead:

575 raise IncompleteRead(b''.join(value))

576

577 def _readinto_chunked(self, b):

578 assert self.chunked != _UNKNOWN

579 total_bytes = 0

580 mvb = memoryview(b)

581 try:

582 while True:

583 chunk_left = self._get_chunk_left()

584 if chunk_left is None:

585 return total_bytes

586

587 if len(mvb) <= chunk_left:

588 n = self._safe_readinto(mvb)

589 self.chunk_left = chunk_left - n

590 return total_bytes + n

591

592 temp_mvb = mvb[:chunk_left]

593 n = self._safe_readinto(temp_mvb)

594 mvb = mvb[n:]

595 total_bytes += n

596 self.chunk_left = 0

597

598 except IncompleteRead:

599 raise IncompleteRead(bytes(b[0:total_bytes]))

600

601 def _safe_read(self, amt):

602 """Read the number of bytes requested.

603

604 This function should be used when <amt> bytes "should" be present for

605 reading. If the bytes are truly not available (due to EOF), then the

606 IncompleteRead exception can be used to detect the problem.

607 """

608 data = self.fp.read(amt)

609 if len(data) < amt:

610 raise IncompleteRead(data, amt-len(data))

611 return data

612

613 def _safe_readinto(self, b):

614 """Same as _safe_read, but for reading into a buffer."""

615 amt = len(b)

616 n = self.fp.readinto(b)

617 if n < amt:

618 raise IncompleteRead(bytes(b[:n]), amt-n)

619 return n

620

621 def read1(self, n=-1):

622 """Read with at most one underlying system call. If at least one

623 byte is buffered, return that instead.

624 """

625 if self.fp is None or self._method == "HEAD":

626 return b""

627 if self.chunked:

628 return self._read1_chunked(n)

629 if self.length is not None and (n < 0 or n > self.length):

630 n = self.length

631 result = self.fp.read1(n)

632 if not result and n:

633 self._close_conn()

634 elif self.length is not None:

635 self.length -= len(result)

636 return result

637

638 def peek(self, n=-1):

639 # Having this enables IOBase.readline() to read more than one

640 # byte at a time

641 if self.fp is None or self._method == "HEAD":

642 return b""

643 if self.chunked:

644 return self._peek_chunked(n)

645 return self.fp.peek(n)

646

647 def readline(self, limit=-1):

648 if self.fp is None or self._method == "HEAD":

649 return b""

650 if self.chunked:

651 # Fallback to IOBase readline which uses peek() and read()

652 return super().readline(limit)

653 if self.length is not None and (limit < 0 or limit > self.length):

654 limit = self.length

655 result = self.fp.readline(limit)

656 if not result and limit:

657 self._close_conn()

658 elif self.length is not None:

659 self.length -= len(result)

660 return result

661

662 def _read1_chunked(self, n):

663 # Strictly speaking, _get_chunk_left() may cause more than one read,

664 # but that is ok, since that is to satisfy the chunked protocol.

665 chunk_left = self._get_chunk_left()

666 if chunk_left is None or n == 0:

667 return b''

668 if not (0 <= n <= chunk_left):

669 n = chunk_left # if n is negative or larger than chunk_left

670 read = self.fp.read1(n)

671 self.chunk_left -= len(read)

672 if not read:

673 raise IncompleteRead(b"")

674 return read

675

676 def _peek_chunked(self, n):

677 # Strictly speaking, _get_chunk_left() may cause more than one read,

678 # but that is ok, since that is to satisfy the chunked protocol.

679 try:

680 chunk_left = self._get_chunk_left()

681 except IncompleteRead:

682 return b'' # peek doesn't worry about protocol

683 if chunk_left is None:

684 return b'' # eof

685 # peek is allowed to return more than requested. Just request the

686 # entire chunk, and truncate what we get.

687 return self.fp.peek(chunk_left)[:chunk_left]

688

689 def fileno(self):

690 return self.fp.fileno()

691

692 def getheader(self, name, default=None):

693 '''Returns the value of the header matching *name*.

694

695 If there are multiple matching headers, the values are

696 combined into a single string separated by commas and spaces.

697

698 If no matching header is found, returns *default* or None if

699 the *default* is not specified.

700

701 If the headers are unknown, raises http.client.ResponseNotReady.

702

703 '''

704 if self.headers is None:

705 raise ResponseNotReady()

706 headers = self.headers.get_all(name) or default

707 if isinstance(headers, str) or not hasattr(headers, '__iter__'):

708 return headers

709 else:

710 return ', '.join(headers)

711

712 def getheaders(self):

713 """Return list of (header, value) tuples."""

714 if self.headers is None:

715 raise ResponseNotReady()

716 return list(self.headers.items())

717

718 # We override IOBase.__iter__ so that it doesn't check for closed-ness

719

720 def __iter__(self):

721 return self

722

723 # For compatibility with old-style urllib responses.

724

725 def info(self):

726 '''Returns an instance of the class mimetools.Message containing

727 meta-information associated with the URL.

728

729 When the method is HTTP, these headers are those returned by

730 the server at the head of the retrieved HTML page (including

731 Content-Length and Content-Type).

732

733 When the method is FTP, a Content-Length header will be

734 present if (as is now usual) the server passed back a file

735 length in response to the FTP retrieval request. A

736 Content-Type header will be present if the MIME type can be

737 guessed.

738

739 When the method is local-file, returned headers will include

740 a Date representing the file's last-modified time, a

741 Content-Length giving file size, and a Content-Type

742 containing a guess at the file's type. See also the

743 description of the mimetools module.

744

745 '''

746 return self.headers

747

748 def geturl(self):

749 '''Return the real URL of the page.

750

751 In some cases, the HTTP server redirects a client to another

752 URL. The urlopen() function handles this transparently, but in

753 some cases the caller needs to know which URL the client was

754 redirected to. The geturl() method can be used to get at this

755 redirected URL.

756

757 '''

758 return self.url

759

760 def getcode(self):

761 '''Return the HTTP status code that was sent with the response,

762 or None if the URL is not an HTTP URL.

763

764 '''

765 return self.status

766

767class HTTPConnection:

768

769 _http_vsn = 11

770 _http_vsn_str = 'HTTP/1.1'

771

772 response_class = HTTPResponse

773 default_port = HTTP_PORT

774 auto_open = 1

775 debuglevel = 0

776

777 @staticmethod

778 def _is_textIO(stream):

779 """Test whether a file-like object is a text or a binary stream.

780 """

781 return isinstance(stream, io.TextIOBase)

782

783 @staticmethod

784 def _get_content_length(body, method):

785 """Get the content-length based on the body.

786

787 If the body is None, we set Content-Length: 0 for methods that expect

788 a body (RFC 7230, Section 3.3.2). We also set the Content-Length for

789 any method if the body is a str or bytes-like object and not a file.

790 """

791 if body is None:

792 # do an explicit check for not None here to distinguish

793 # between unset and set but empty

794 if method.upper() in _METHODS_EXPECTING_BODY:

795 return 0

796 else:

797 return None

798

799 if hasattr(body, 'read'):

800 # file-like object.

801 return None

802

803 try:

804 # does it implement the buffer protocol (bytes, bytearray, array)?

805 mv = memoryview(body)

806 return mv.nbytes

807 except TypeError:

808 pass

809

810 if isinstance(body, str):

811 return len(body)

812

813 return None

814

815 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,

816 source_address=None, blocksize=8192):

817 self.timeout = timeout

818 self.source_address = source_address

819 self.blocksize = blocksize

820 self.sock = None

821 self._buffer = []

822 self.__response = None

823 self.__state = _CS_IDLE

824 self._method = None

825 self._tunnel_host = None

826 self._tunnel_port = None

827 self._tunnel_headers = {}

828

829 (self.host, self.port) = self._get_hostport(host, port)

830

831 self._validate_host(self.host)

832

833 # This is stored as an instance variable to allow unit

834 # tests to replace it with a suitable mockup

835 self._create_connection = socket.create_connection

836

837 def set_tunnel(self, host, port=None, headers=None):

838 """Set up host and port for HTTP CONNECT tunnelling.

839

840 In a connection that uses HTTP CONNECT tunneling, the host passed to the

841 constructor is used as a proxy server that relays all communication to

842 the endpoint passed to `set_tunnel`. This done by sending an HTTP

843 CONNECT request to the proxy server when the connection is established.

844

845 This method must be called before the HTML connection has been

846 established.

847

848 The headers argument should be a mapping of extra HTTP headers to send

849 with the CONNECT request.

850 """

851

852 if self.sock:

853 raise RuntimeError("Can't set up tunnel for established connection")

854

855 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)

856 if headers:

857 self._tunnel_headers = headers

858 else:

859 self._tunnel_headers.clear()

860

861 def _get_hostport(self, host, port):

862 if port is None:

863 i = host.rfind(':')

864 j = host.rfind(']') # ipv6 addresses have [...]

865 if i > j:

866 try:

867 port = int(host[i+1:])

868 except ValueError:

869 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/

870 port = self.default_port

871 else:

872 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])

873 host = host[:i]

874 else:

875 port = self.default_port

876 if host and host[0] == '[' and host[-1] == ']':

877 host = host[1:-1]

878

879 return (host, port)

880

881 def set_debuglevel(self, level):

882 self.debuglevel = level

883

884 def _tunnel(self):

885 connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,

886 self._tunnel_port)

887 connect_bytes = connect_str.encode("ascii")

888 self.send(connect_bytes)

889 for header, value in self._tunnel_headers.items():

890 header_str = "%s: %s\r\n" % (header, value)

891 header_bytes = header_str.encode("latin-1")

892 self.send(header_bytes)

893 self.send(b'\r\n')

894

895 response = self.response_class(self.sock, method=self._method)

896 (version, code, message) = response._read_status()

897

898 if code != http.HTTPStatus.OK:

899 self.close()

900 raise OSError("Tunnel connection failed: %d %s" % (code,

901 message.strip()))

902 while True:

903 line = response.fp.readline(_MAXLINE + 1)

904 if len(line) > _MAXLINE:

905 raise LineTooLong("header line")

906 if not line:

907 # for sites which EOF without sending a trailer

908 break

909 if line in (b'\r\n', b'\n', b''):

910 break

911

912 if self.debuglevel > 0:

913 print('header:', line.decode())

914

915 def connect(self):

916 """Connect to the host and port specified in __init__."""

917 self.sock = self._create_connection(

918 (self.host,self.port), self.timeout, self.source_address)

919 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

920

921 if self._tunnel_host:

922 self._tunnel()

923

924 def close(self):

925 """Close the connection to the HTTP server."""

926 self.__state = _CS_IDLE

927 try:

928 sock = self.sock

929 if sock:

930 self.sock = None

931 sock.close() # close it manually... there may be other refs

932 finally:

933 response = self.__response

934 if response:

935 self.__response = None

936 response.close()

937

938 def send(self, data):

939 """Send `data' to the server.

940 ``data`` can be a string object, a bytes object, an array object, a

941 file-like object that supports a .read() method, or an iterable object.

942 """

943

944 if self.sock is None:

945 if self.auto_open:

946 self.connect()

947 else:

948 raise NotConnected()

949

950 if self.debuglevel > 0:

951 print("send:", repr(data))

952 if hasattr(data, "read") :

953 if self.debuglevel > 0:

954 print("sendIng a read()able")

955 encode = self._is_textIO(data)

956 if encode and self.debuglevel > 0:

957 print("encoding file using iso-8859-1")

958 while 1:

959 datablock = data.read(self.blocksize)

960 if not datablock:

961 break

962 if encode:

963 datablock = datablock.encode("iso-8859-1")

964 self.sock.sendall(datablock)

965 return

966 try:

967 self.sock.sendall(data)

968 except TypeError:

969 if isinstance(data, collections.abc.Iterable):

970 for d in data:

971 self.sock.sendall(d)

972 else:

973 raise TypeError("data should be a bytes-like object "

974 "or an iterable, got %r" % type(data))

975

976 def _output(self, s):

977 """Add a line of output to the current request buffer.

978

979 Assumes that the line does *not* end with \\r\\n.

980 """

981 self._buffer.append(s)

982

983 def _read_readable(self, readable):

984 if self.debuglevel > 0:

985 print("sendIng a read()able")

986 encode = self._is_textIO(readable)

987 if encode and self.debuglevel > 0:

988 print("encoding file using iso-8859-1")

989 while True:

990 datablock = readable.read(self.blocksize)

991 if not datablock:

992 break

993 if encode:

994 datablock = datablock.encode("iso-8859-1")

995 yield datablock

996

997 def _send_output(self, message_body=None, encode_chunked=False):

998 """Send the currently buffered request and clear the buffer.

999

1000 Appends an extra \\r\\n to the buffer.

1001 A message_body may be specified, to be appended to the request.

1002 """

1003 self._buffer.extend((b"", b""))

1004 msg = b"\r\n".join(self._buffer)

1005 del self._buffer[:]

1006 self.send(msg)

1007

1008 if message_body is not None:

1009

1010 # create a consistent interface to message_body

1011 if hasattr(message_body, 'read'):

1012 # Let file-like take precedence over byte-like. This

1013 # is needed to allow the current position of mmap'ed

1014 # files to be taken into account.

1015 chunks = self._read_readable(message_body)

1016 else:

1017 try:

1018 # this is solely to check to see if message_body

1019 # implements the buffer API. it /would/ be easier

1020 # to capture if PyObject_CheckBuffer was exposed

1021 # to Python.

1022 memoryview(message_body)

1023 except TypeError:

1024 try:

1025 chunks = iter(message_body)

1026 except TypeError:

1027 raise TypeError("message_body should be a bytes-like "

1028 "object or an iterable, got %r"

1029 % type(message_body))

1030 else:

1031 # the object implements the buffer interface and

1032 # can be passed directly into socket methods

1033 chunks = (message_body,)

1034

1035 for chunk in chunks:

1036 if not chunk:

1037 if self.debuglevel > 0:

1038 print('Zero length chunk ignored')

1039 continue

1040

1041 if encode_chunked and self._http_vsn == 11:

1042 # chunked encoding

1043 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \

1044 + b'\r\n'

1045 self.send(chunk)

1046

1047 if encode_chunked and self._http_vsn == 11:

1048 # end chunked transfer

1049 self.send(b'0\r\n\r\n')

1050

1051 def putrequest(self, method, url, skip_host=False,

1052 skip_accept_encoding=False):

1053 """Send a request to the server.

1054

1055 `method' specifies an HTTP request method, e.g. 'GET'.

1056 `url' specifies the object being requested, e.g. '/index.html'.

1057 `skip_host' if True does not add automatically a 'Host:' header

1058 `skip_accept_encoding' if True does not add automatically an

1059 'Accept-Encoding:' header

1060 """

1061

1062 # if a prior response has been completed, then forget about it.

1063 if self.__response and self.__response.isclosed():

1064 self.__response = None

1065

1066

1067 # in certain cases, we cannot issue another request on this connection.

1068 # this occurs when:

1069 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)

1070 # 2) a response to a previous request has signalled that it is going

1071 # to close the connection upon completion.

1072 # 3) the headers for the previous response have not been read, thus

1073 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)

1074 #

1075 # if there is no prior response, then we can request at will.

1076 #

1077 # if point (2) is true, then we will have passed the socket to the

1078 # response (effectively meaning, "there is no prior response"), and

1079 # will open a new one when a new request is made.

1080 #

1081 # Note: if a prior response exists, then we *can* start a new request.

1082 # We are not allowed to begin fetching the response to this new

1083 # request, however, until that prior response is complete.

1084 #

1085 if self.__state == _CS_IDLE:

1086 self.__state = _CS_REQ_STARTED

1087 else:

1088 raise CannotSendRequest(self.__state)

1089

1090 # Save the method for use later in the response phase

1091 self._method = method

1092

1093 url = url or '/'

1094 self._validate_path(url)

1095

1096 request = '%s %s %s' % (method, url, self._http_vsn_str)

1097

1098 self._output(self._encode_request(request))

1099

1100 if self._http_vsn == 11:

1101 # Issue some standard headers for better HTTP/1.1 compliance

1102

1103 if not skip_host:

1104 # this header is issued *only* for HTTP/1.1

1105 # connections. more specifically, this means it is

1106 # only issued when the client uses the new

1107 # HTTPConnection() class. backwards-compat clients

1108 # will be using HTTP/1.0 and those clients may be

1109 # issuing this header themselves. we should NOT issue

1110 # it twice; some web servers (such as Apache) barf

1111 # when they see two Host: headers

1112

1113 # If we need a non-standard port,include it in the

1114 # header. If the request is going through a proxy,

1115 # but the host of the actual URL, not the host of the

1116 # proxy.

1117

1118 netloc = ''

1119 if url.startswith('http'):

1120 nil, netloc, nil, nil, nil = urlsplit(url)

1121

1122 if netloc:

1123 try:

1124 netloc_enc = netloc.encode("ascii")

1125 except UnicodeEncodeError:

1126 netloc_enc = netloc.encode("idna")

1127 self.putheader('Host', netloc_enc)

1128 else:

1129 if self._tunnel_host:

1130 host = self._tunnel_host

1131 port = self._tunnel_port

1132 else:

1133 host = self.host

1134 port = self.port

1135

1136 try:

1137 host_enc = host.encode("ascii")

1138 except UnicodeEncodeError:

1139 host_enc = host.encode("idna")

1140

1141 # As per RFC 273, IPv6 address should be wrapped with []

1142 # when used as Host header

1143

1144 if host.find(':') >= 0:

1145 host_enc = b'[' + host_enc + b']'

1146

1147 if port == self.default_port:

1148 self.putheader('Host', host_enc)

1149 else:

1150 host_enc = host_enc.decode("ascii")

1151 self.putheader('Host', "%s:%s" % (host_enc, port))

1152

1153 # note: we are assuming that clients will not attempt to set these

1154 # headers since *this* library must deal with the

1155 # consequences. this also means that when the supporting

1156 # libraries are updated to recognize other forms, then this

1157 # code should be changed (removed or updated).

1158

1159 # we only want a Content-Encoding of "identity" since we don't

1160 # support encodings such as x-gzip or x-deflate.

1161 if not skip_accept_encoding:

1162 self.putheader('Accept-Encoding', 'identity')

1163

1164 # we can accept "chunked" Transfer-Encodings, but no others

1165 # NOTE: no TE header implies *only* "chunked"

1166 #self.putheader('TE', 'chunked')

1167

1168 # if TE is supplied in the header, then it must appear in a

1169 # Connection header.

1170 #self.putheader('Connection', 'TE')

1171

1172 else:

1173 # For HTTP/1.0, the server will assume "not chunked"

1174 pass

1175

1176 def _encode_request(self, request):

1177 # ASCII also helps prevent CVE-2019-9740.

1178 return request.encode('ascii')

1179

1180 def _validate_path(self, url):

1181 """Validate a url for putrequest."""

1182 # Prevent CVE-2019-9740.

1183 match = _contains_disallowed_url_pchar_re.search(url)

1184 if match:

1185 raise InvalidURL(f"URL can't contain control characters. {url!r} "

1186 f"(found at least {match.group()!r})")

1187

1188 def _validate_host(self, host):

1189 """Validate a host so it doesn't contain control characters."""

1190 # Prevent CVE-2019-18348.

1191 match = _contains_disallowed_url_pchar_re.search(host)

1192 if match:

1193 raise InvalidURL(f"URL can't contain control characters. {host!r} "

1194 f"(found at least {match.group()!r})")

1195

1196 def putheader(self, header, *values):

1197 """Send a request header line to the server.

1198

1199 For example: h.putheader('Accept', 'text/html')

1200 """

1201 if self.__state != _CS_REQ_STARTED:

1202 raise CannotSendHeader()

1203

1204 if hasattr(header, 'encode'):

1205 header = header.encode('ascii')

1206

1207 if not _is_legal_header_name(header):

1208 raise ValueError('Invalid header name %r' % (header,))

1209

1210 values = list(values)

1211 for i, one_value in enumerate(values):

1212 if hasattr(one_value, 'encode'):

1213 values[i] = one_value.encode('latin-1')

1214 elif isinstance(one_value, int):

1215 values[i] = str(one_value).encode('ascii')

1216

1217 if _is_illegal_header_value(values[i]):

1218 raise ValueError('Invalid header value %r' % (values[i],))

1219

1220 value = b'\r\n\t'.join(values)

1221 header = header + b': ' + value

1222 self._output(header)

1223

1224 def endheaders(self, message_body=None, *, encode_chunked=False):

1225 """Indicate that the last header line has been sent to the server.

1226

1227 This method sends the request to the server. The optional message_body

1228 argument can be used to pass a message body associated with the

1229 request.

1230 """

1231 if self.__state == _CS_REQ_STARTED:

1232 self.__state = _CS_REQ_SENT

1233 else:

1234 raise CannotSendHeader()

1235 self._send_output(message_body, encode_chunked=encode_chunked)

1236

1237 def request(self, method, url, body=None, headers={}, *,

1238 encode_chunked=False):

1239 """Send a complete request to the server."""

1240 self._send_request(method, url, body, headers, encode_chunked)

1241

1242 def _send_request(self, method, url, body, headers, encode_chunked):

1243 # Honor explicitly requested Host: and Accept-Encoding: headers.

1244 header_names = frozenset(k.lower() for k in headers)

1245 skips = {}

1246 if 'host' in header_names:

1247 skips['skip_host'] = 1

1248 if 'accept-encoding' in header_names:

1249 skips['skip_accept_encoding'] = 1

1250

1251 self.putrequest(method, url, **skips)

1252

1253 # chunked encoding will happen if HTTP/1.1 is used and either

1254 # the caller passes encode_chunked=True or the following

1255 # conditions hold:

1256 # 1. content-length has not been explicitly set

1257 # 2. the body is a file or iterable, but not a str or bytes-like

1258 # 3. Transfer-Encoding has NOT been explicitly set by the caller

1259

1260 if 'content-length' not in header_names:

1261 # only chunk body if not explicitly set for backwards

1262 # compatibility, assuming the client code is already handling the

1263 # chunking

1264 if 'transfer-encoding' not in header_names:

1265 # if content-length cannot be automatically determined, fall

1266 # back to chunked encoding

1267 encode_chunked = False

1268 content_length = self._get_content_length(body, method)

1269 if content_length is None:

1270 if body is not None:

1271 if self.debuglevel > 0:

1272 print('Unable to determine size of %r' % body)

1273 encode_chunked = True

1274 self.putheader('Transfer-Encoding', 'chunked')

1275 else:

1276 self.putheader('Content-Length', str(content_length))

1277 else:

1278 encode_chunked = False

1279

1280 for hdr, value in headers.items():

1281 self.putheader(hdr, value)

1282 if isinstance(body, str):

1283 # RFC 2616 Section 3.7.1 says that text default has a

1284 # default charset of iso-8859-1.

1285 body = _encode(body, 'body')

1286 self.endheaders(body, encode_chunked=encode_chunked)

1287

1288 def getresponse(self):

1289 """Get the response from the server.

1290

1291 If the HTTPConnection is in the correct state, returns an

1292 instance of HTTPResponse or of whatever object is returned by

1293 the response_class variable.

1294

1295 If a request has not been sent or if a previous response has

1296 not be handled, ResponseNotReady is raised. If the HTTP

1297 response indicates that the connection should be closed, then

1298 it will be closed before the response is returned. When the

1299 connection is closed, the underlying socket is closed.

1300 """

1301

1302 # if a prior response has been completed, then forget about it.

1303 if self.__response and self.__response.isclosed():

1304 self.__response = None

1305

1306 # if a prior response exists, then it must be completed (otherwise, we

1307 # cannot read this response's header to determine the connection-close

1308 # behavior)

1309 #

1310 # note: if a prior response existed, but was connection-close, then the

1311 # socket and response were made independent of this HTTPConnection

1312 # object since a new request requires that we open a whole new

1313 # connection

1314 #

1315 # this means the prior response had one of two states:

1316 # 1) will_close: this connection was reset and the prior socket and

1317 # response operate independently

1318 # 2) persistent: the response was retained and we await its

1319 # isclosed() status to become true.

1320 #

1321 if self.__state != _CS_REQ_SENT or self.__response:

1322 raise ResponseNotReady(self.__state)

1323

1324 if self.debuglevel > 0:

1325 response = self.response_class(self.sock, self.debuglevel,

1326 method=self._method)

1327 else:

1328 response = self.response_class(self.sock, method=self._method)

1329

1330 try:

1331 try:

1332 response.begin()

1333 except ConnectionError:

1334 self.close()

1335 raise

1336 assert response.will_close != _UNKNOWN

1337 self.__state = _CS_IDLE

1338

1339 if response.will_close:

1340 # this effectively passes the connection to the response

1341 self.close()

1342 else:

1343 # remember this, so we can tell when it is complete

1344 self.__response = response

1345

1346 return response

1347 except:

1348 response.close()

1349 raise

1350

1351try:

1352 import ssl

1353except ImportError:

1354 pass

1355else:

1356 class HTTPSConnection(HTTPConnection):

1357 "This class allows communication via SSL."

1358

1359 default_port = HTTPS_PORT

1360

1361 # XXX Should key_file and cert_file be deprecated in favour of context?

1362

1363 def __init__(self, host, port=None, key_file=None, cert_file=None,

1364 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,

1365 source_address=None, *, context=None,

1366 check_hostname=None, blocksize=8192):

1367 super(HTTPSConnection, self).__init__(host, port, timeout,

1368 source_address,

1369 blocksize=blocksize)

1370 if (key_file is not None or cert_file is not None or

1371 check_hostname is not None):

1372 import warnings

1373 warnings.warn("key_file, cert_file and check_hostname are "

1374 "deprecated, use a custom context instead.",

1375 DeprecationWarning, 2)

1376 self.key_file = key_file

1377 self.cert_file = cert_file

1378 if context is None:

1379 context = ssl._create_default_https_context()

1380 # enable PHA for TLS 1.3 connections if available

1381 if context.post_handshake_auth is not None:

1382 context.post_handshake_auth = True

1383 will_verify = context.verify_mode != ssl.CERT_NONE

1384 if check_hostname is None:

1385 check_hostname = context.check_hostname

1386 if check_hostname and not will_verify:

1387 raise ValueError("check_hostname needs a SSL context with "

1388 "either CERT_OPTIONAL or CERT_REQUIRED")

1389 if key_file or cert_file:

1390 context.load_cert_chain(cert_file, key_file)

1391 # cert and key file means the user wants to authenticate.

1392 # enable TLS 1.3 PHA implicitly even for custom contexts.

1393 if context.post_handshake_auth is not None:

1394 context.post_handshake_auth = True

1395 self._context = context

1396 if check_hostname is not None:

1397 self._context.check_hostname = check_hostname

1398

1399 def connect(self):

1400 "Connect to a host on a given (SSL) port."

1401

1402 super().connect()

1403

1404 if self._tunnel_host:

1405 server_hostname = self._tunnel_host

1406 else:

1407 server_hostname = self.host

1408

1409 self.sock = self._context.wrap_socket(self.sock,

1410 server_hostname=server_hostname)

1411

1412 __all__.append("HTTPSConnection")

1413

1414class HTTPException(Exception):

1415 # Subclasses that define an __init__ must call Exception.__init__

1416 # or define self.args. Otherwise, str() will fail.

1417 pass

1418

1419class NotConnected(HTTPException):

1420 pass

1421

1422class InvalidURL(HTTPException):

1423 pass

1424

1425class UnknownProtocol(HTTPException):

1426 def __init__(self, version):

1427 self.args = version,

1428 self.version = version

1429

1430class UnknownTransferEncoding(HTTPException):

1431 pass

1432

1433class UnimplementedFileMode(HTTPException):

1434 pass

1435

1436class IncompleteRead(HTTPException):

1437 def __init__(self, partial, expected=None):

1438 self.args = partial,

1439 self.partial = partial

1440 self.expected = expected

1441 def __repr__(self):

1442 if self.expected is not None:

1443 e = ', %i more expected' % self.expected

1444 else:

1445 e = ''

1446 return '%s(%i bytes read%s)' % (self.__class__.__name__,

1447 len(self.partial), e)

1448 __str__ = object.__str__

1449

1450class ImproperConnectionState(HTTPException):

1451 pass

1452

1453class CannotSendRequest(ImproperConnectionState):

1454 pass

1455

1456class CannotSendHeader(ImproperConnectionState):

1457 pass

1458

1459class ResponseNotReady(ImproperConnectionState):

1460 pass

1461

1462class BadStatusLine(HTTPException):

1463 def __init__(self, line):

1464 if not line:

1465 line = repr(line)

1466 self.args = line,

1467 self.line = line

1468

1469class LineTooLong(HTTPException):

1470 def __init__(self, line_type):

1471 HTTPException.__init__(self, "got more than %d bytes when reading %s"

1472 % (_MAXLINE, line_type))

1473

1474class RemoteDisconnected(ConnectionResetError, BadStatusLine):

1475 def __init__(self, *pos, **kw):

1476 BadStatusLine.__init__(self, "")

1477 ConnectionResetError.__init__(self, *pos, **kw)

1478

1479# for backwards compatibility

1480error = HTTPException