Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/http/client.py: 18%

1r"""HTTP/1.1 client library

3<intro stuff goes here>

4<other stuff, too>

6HTTPConnection goes through a number of "states", which define when a client

7may legally make another request or fetch the response for a particular

8request. This diagram details these state transitions:

10 (null)

11 |

12 | HTTPConnection()

13 v

14 Idle

15 |

16 | putrequest()

17 v

18 Request-started

19 |

20 | ( putheader() )* endheaders()

21 v

22 Request-sent

23 |\_____________________________

24 | | getresponse() raises

25 | response = getresponse() | ConnectionError

26 v v

27 Unread-response Idle

28 [Response-headers-read]

29 |\____________________

30 | |

31 | response.read() | putrequest()

32 v v

33 Idle Req-started-unread-response

34 ______/|

35 / |

36 response.read() | | ( putheader() )* endheaders()

37 v v

38 Request-started Req-sent-unread-response

39 |

40 | response.read()

41 v

42 Request-sent

44This diagram presents the following rules:

45 -- a second request may not be started until {response-headers-read}

46 -- a response [object] cannot be retrieved until {request-sent}

47 -- there is no differentiation between an unread response body and a

48 partially read response body

50Note: this enforcement is applied by the HTTPConnection class. The

51 HTTPResponse class does not enforce this state machine, which

52 implies sophisticated clients may accelerate the request/response

53 pipeline. Caution should be taken, though: accelerating the states

54 beyond the above pattern may imply knowledge of the server's

55 connection-close behavior for certain requests. For example, it

56 is impossible to tell whether the server will close the connection

57 UNTIL the response headers have been read; this means that further

58 requests cannot be placed into the pipeline until it is known that

59 the server will NOT be closing the connection.

61Logical State __state __response

62------------- ------- ----------

63Idle _CS_IDLE None

64Request-started _CS_REQ_STARTED None

65Request-sent _CS_REQ_SENT None

66Unread-response _CS_IDLE <response_class>

67Req-started-unread-response _CS_REQ_STARTED <response_class>

68Req-sent-unread-response _CS_REQ_SENT <response_class>

69"""

71import email.parser

72import email.message

73import http

74import io

75import re

76import socket

77import collections.abc

78from urllib.parse import urlsplit

80# HTTPMessage, parse_headers(), and the HTTP status code constants are

81# intentionally omitted for simplicity

82__all__ = ["HTTPResponse", "HTTPConnection",

83 "HTTPException", "NotConnected", "UnknownProtocol",

84 "UnknownTransferEncoding", "UnimplementedFileMode",

85 "IncompleteRead", "InvalidURL", "ImproperConnectionState",

86 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",

87 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",

88 "responses"]

90HTTP_PORT = 80

91HTTPS_PORT = 443

93_UNKNOWN = 'UNKNOWN'

95# connection states

96_CS_IDLE = 'Idle'

97_CS_REQ_STARTED = 'Request-started'

98_CS_REQ_SENT = 'Request-sent'

100

101# hack to maintain backwards compatibility

102globals().update(http.HTTPStatus.__members__)

103

104# another hack to maintain backwards compatibility

105# Mapping status codes to official W3C names

106responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}

107

108# maximal line length when calling readline().

109_MAXLINE = 65536

110_MAXHEADERS = 100

111

112# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)

113#

114# VCHAR = %x21-7E

115# obs-text = %x80-FF

116# header-field = field-name ":" OWS field-value OWS

117# field-name = token

118# field-value = *( field-content / obs-fold )

119# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]

120# field-vchar = VCHAR / obs-text

121#

122# obs-fold = CRLF 1*( SP / HTAB )

123# ; obsolete line folding

124# ; see Section 3.2.4

125

126# token = 1*tchar

127#

128# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"

129# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"

130# / DIGIT / ALPHA

131# ; any VCHAR, except delimiters

132#

133# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1

134

135# the patterns for both name and value are more lenient than RFC

136# definitions to allow for backwards compatibility

137_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch

138_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search

139

140# These characters are not allowed within HTTP URL paths.

141# See https://tools.ietf.org/html/rfc3986#section-3.3 and the

142# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.

143# Prevents CVE-2019-9740. Includes control characters such as \r\n.

144# We don't restrict chars above \x7f as putrequest() limits us to ASCII.

145_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')

146# Arguably only these _should_ allowed:

147# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")

148# We are more lenient for assumed real world compatibility purposes.

149

150# These characters are not allowed within HTTP method names

151# to prevent http header injection.

152_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')

153

154# We always set the Content-Length header for these methods because some

155# servers will otherwise respond with a 411

156_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}

157

158

159def _encode(data, name='data'):

160 """Call data.encode("latin-1") but show a better error message."""

161 try:

162 return data.encode("latin-1")

163 except UnicodeEncodeError as err:

164 raise UnicodeEncodeError(

165 err.encoding,

166 err.object,

167 err.start,

168 err.end,

169 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "

170 "if you want to send it encoded in UTF-8." %

171 (name.title(), data[err.start:err.end], name)) from None

172

173

174class HTTPMessage(email.message.Message):

175 # XXX The only usage of this method is in

176 # http.server.CGIHTTPRequestHandler. Maybe move the code there so

177 # that it doesn't need to be part of the public API. The API has

178 # never been defined so this could cause backwards compatibility

179 # issues.

180

181 def getallmatchingheaders(self, name):

182 """Find all header lines matching a given header name.

183

184 Look through the list of headers and find all lines matching a given

185 header name (and their continuation lines). A list of the lines is

186 returned, without interpretation. If the header does not occur, an

187 empty list is returned. If the header occurs multiple times, all

188 occurrences are returned. Case is not important in the header name.

189

190 """

191 name = name.lower() + ':'

192 n = len(name)

193 lst = []

194 hit = 0

195 for line in self.keys():

196 if line[:n].lower() == name:

197 hit = 1

198 elif not line[:1].isspace():

199 hit = 0

200 if hit:

201 lst.append(line)

202 return lst

203

204def _read_headers(fp):

205 """Reads potential header lines into a list from a file pointer.

206

207 Length of line is limited by _MAXLINE, and number of

208 headers is limited by _MAXHEADERS.

209 """

210 headers = []

211 while True:

212 line = fp.readline(_MAXLINE + 1)

213 if len(line) > _MAXLINE:

214 raise LineTooLong("header line")

215 headers.append(line)

216 if len(headers) > _MAXHEADERS:

217 raise HTTPException("got more than %d headers" % _MAXHEADERS)

218 if line in (b'\r\n', b'\n', b''):

219 break

220 return headers

221

222def parse_headers(fp, _class=HTTPMessage):

223 """Parses only RFC2822 headers from a file pointer.

224

225 email Parser wants to see strings rather than bytes.

226 But a TextIOWrapper around self.rfile would buffer too many bytes

227 from the stream, bytes which we later need to read as bytes.

228 So we read the correct bytes here, as bytes, for email Parser

229 to parse.

230

231 """

232 headers = _read_headers(fp)

233 hstring = b''.join(headers).decode('iso-8859-1')

234 return email.parser.Parser(_class=_class).parsestr(hstring)

235

236

237class HTTPResponse(io.BufferedIOBase):

238

239 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.

240

241 # The bytes from the socket object are iso-8859-1 strings.

242 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded

243 # text following RFC 2047. The basic status line parsing only

244 # accepts iso-8859-1.

245

246 def __init__(self, sock, debuglevel=0, method=None, url=None):

247 # If the response includes a content-length header, we need to

248 # make sure that the client doesn't read more than the

249 # specified number of bytes. If it does, it will block until

250 # the server times out and closes the connection. This will

251 # happen if a self.fp.read() is done (without a size) whether

252 # self.fp is buffered or not. So, no self.fp.read() by

253 # clients unless they know what they are doing.

254 self.fp = sock.makefile("rb")

255 self.debuglevel = debuglevel

256 self._method = method

257

258 # The HTTPResponse object is returned via urllib. The clients

259 # of http and urllib expect different attributes for the

260 # headers. headers is used here and supports urllib. msg is

261 # provided as a backwards compatibility layer for http

262 # clients.

263

264 self.headers = self.msg = None

265

266 # from the Status-Line of the response

267 self.version = _UNKNOWN # HTTP-Version

268 self.status = _UNKNOWN # Status-Code

269 self.reason = _UNKNOWN # Reason-Phrase

270

271 self.chunked = _UNKNOWN # is "chunked" being used?

272 self.chunk_left = _UNKNOWN # bytes left to read in current chunk

273 self.length = _UNKNOWN # number of bytes left in response

274 self.will_close = _UNKNOWN # conn will close at end of response

275

276 def _read_status(self):

277 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")

278 if len(line) > _MAXLINE:

279 raise LineTooLong("status line")

280 if self.debuglevel > 0:

281 print("reply:", repr(line))

282 if not line:

283 # Presumably, the server closed the connection before

284 # sending a valid response.

285 raise RemoteDisconnected("Remote end closed connection without"

286 " response")

287 try:

288 version, status, reason = line.split(None, 2)

289 except ValueError:

290 try:

291 version, status = line.split(None, 1)

292 reason = ""

293 except ValueError:

294 # empty version will cause next test to fail.

295 version = ""

296 if not version.startswith("HTTP/"):

297 self._close_conn()

298 raise BadStatusLine(line)

299

300 # The status code is a three-digit number

301 try:

302 status = int(status)

303 if status < 100 or status > 999:

304 raise BadStatusLine(line)

305 except ValueError:

306 raise BadStatusLine(line)

307 return version, status, reason

308

309 def begin(self):

310 if self.headers is not None:

311 # we've already started reading the response

312 return

313

314 # read until we get a non-100 response

315 while True:

316 version, status, reason = self._read_status()

317 if status != CONTINUE:

318 break

319 # skip the header from the 100 response

320 skipped_headers = _read_headers(self.fp)

321 if self.debuglevel > 0:

322 print("headers:", skipped_headers)

323 del skipped_headers

324

325 self.code = self.status = status

326 self.reason = reason.strip()

327 if version in ("HTTP/1.0", "HTTP/0.9"):

328 # Some servers might still return "0.9", treat it as 1.0 anyway

329 self.version = 10

330 elif version.startswith("HTTP/1."):

331 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1

332 else:

333 raise UnknownProtocol(version)

334

335 self.headers = self.msg = parse_headers(self.fp)

336

337 if self.debuglevel > 0:

338 for hdr, val in self.headers.items():

339 print("header:", hdr + ":", val)

340

341 # are we using the chunked-style of transfer encoding?

342 tr_enc = self.headers.get("transfer-encoding")

343 if tr_enc and tr_enc.lower() == "chunked":

344 self.chunked = True

345 self.chunk_left = None

346 else:

347 self.chunked = False

348

349 # will the connection close at the end of the response?

350 self.will_close = self._check_close()

351

352 # do we have a Content-Length?

353 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"

354 self.length = None

355 length = self.headers.get("content-length")

356 if length and not self.chunked:

357 try:

358 self.length = int(length)

359 except ValueError:

360 self.length = None

361 else:

362 if self.length < 0: # ignore nonsensical negative lengths

363 self.length = None

364 else:

365 self.length = None

366

367 # does the body have a fixed length? (of zero)

368 if (status == NO_CONTENT or status == NOT_MODIFIED or

369 100 <= status < 200 or # 1xx codes

370 self._method == "HEAD"):

371 self.length = 0

372

373 # if the connection remains open, and we aren't using chunked, and

374 # a content-length was not provided, then assume that the connection

375 # WILL close.

376 if (not self.will_close and

377 not self.chunked and

378 self.length is None):

379 self.will_close = True

380

381 def _check_close(self):

382 conn = self.headers.get("connection")

383 if self.version == 11:

384 # An HTTP/1.1 proxy is assumed to stay open unless

385 # explicitly closed.

386 if conn and "close" in conn.lower():

387 return True

388 return False

389

390 # Some HTTP/1.0 implementations have support for persistent

391 # connections, using rules different than HTTP/1.1.

392

393 # For older HTTP, Keep-Alive indicates persistent connection.

394 if self.headers.get("keep-alive"):

395 return False

396

397 # At least Akamai returns a "Connection: Keep-Alive" header,

398 # which was supposed to be sent by the client.

399 if conn and "keep-alive" in conn.lower():

400 return False

401

402 # Proxy-Connection is a netscape hack.

403 pconn = self.headers.get("proxy-connection")

404 if pconn and "keep-alive" in pconn.lower():

405 return False

406

407 # otherwise, assume it will close

408 return True

409

410 def _close_conn(self):

411 fp = self.fp

412 self.fp = None

413 fp.close()

414

415 def close(self):

416 try:

417 super().close() # set "closed" flag

418 finally:

419 if self.fp:

420 self._close_conn()

421

422 # These implementations are for the benefit of io.BufferedReader.

423

424 # XXX This class should probably be revised to act more like

425 # the "raw stream" that BufferedReader expects.

426

427 def flush(self):

428 super().flush()

429 if self.fp:

430 self.fp.flush()

431

432 def readable(self):

433 """Always returns True"""

434 return True

435

436 # End of "raw stream" methods

437

438 def isclosed(self):

439 """True if the connection is closed."""

440 # NOTE: it is possible that we will not ever call self.close(). This

441 # case occurs when will_close is TRUE, length is None, and we

442 # read up to the last byte, but NOT past it.

443 #

444 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be

445 # called, meaning self.isclosed() is meaningful.

446 return self.fp is None

447

448 def read(self, amt=None):

449 if self.fp is None:

450 return b""

451

452 if self._method == "HEAD":

453 self._close_conn()

454 return b""

455

456 if amt is not None:

457 # Amount is given, implement using readinto

458 b = bytearray(amt)

459 n = self.readinto(b)

460 return memoryview(b)[:n].tobytes()

461 else:

462 # Amount is not given (unbounded read) so we must check self.length

463 # and self.chunked

464

465 if self.chunked:

466 return self._readall_chunked()

467

468 if self.length is None:

469 s = self.fp.read()

470 else:

471 try:

472 s = self._safe_read(self.length)

473 except IncompleteRead:

474 self._close_conn()

475 raise

476 self.length = 0

477 self._close_conn() # we read everything

478 return s

479

480 def readinto(self, b):

481 """Read up to len(b) bytes into bytearray b and return the number

482 of bytes read.

483 """

484

485 if self.fp is None:

486 return 0

487

488 if self._method == "HEAD":

489 self._close_conn()

490 return 0

491

492 if self.chunked:

493 return self._readinto_chunked(b)

494

495 if self.length is not None:

496 if len(b) > self.length:

497 # clip the read to the "end of response"

498 b = memoryview(b)[0:self.length]

499

500 # we do not use _safe_read() here because this may be a .will_close

501 # connection, and the user is reading more bytes than will be provided

502 # (for example, reading in 1k chunks)

503 n = self.fp.readinto(b)

504 if not n and b:

505 # Ideally, we would raise IncompleteRead if the content-length

506 # wasn't satisfied, but it might break compatibility.

507 self._close_conn()

508 elif self.length is not None:

509 self.length -= n

510 if not self.length:

511 self._close_conn()

512 return n

513

514 def _read_next_chunk_size(self):

515 # Read the next chunk size from the file

516 line = self.fp.readline(_MAXLINE + 1)

517 if len(line) > _MAXLINE:

518 raise LineTooLong("chunk size")

519 i = line.find(b";")

520 if i >= 0:

521 line = line[:i] # strip chunk-extensions

522 try:

523 return int(line, 16)

524 except ValueError:

525 # close the connection as protocol synchronisation is

526 # probably lost

527 self._close_conn()

528 raise

529

530 def _read_and_discard_trailer(self):

531 # read and discard trailer up to the CRLF terminator

532 ### note: we shouldn't have any trailers!

533 while True:

534 line = self.fp.readline(_MAXLINE + 1)

535 if len(line) > _MAXLINE:

536 raise LineTooLong("trailer line")

537 if not line:

538 # a vanishingly small number of sites EOF without

539 # sending the trailer

540 break

541 if line in (b'\r\n', b'\n', b''):

542 break

543

544 def _get_chunk_left(self):

545 # return self.chunk_left, reading a new chunk if necessary.

546 # chunk_left == 0: at the end of the current chunk, need to close it

547 # chunk_left == None: No current chunk, should read next.

548 # This function returns non-zero or None if the last chunk has

549 # been read.

550 chunk_left = self.chunk_left

551 if not chunk_left: # Can be 0 or None

552 if chunk_left is not None:

553 # We are at the end of chunk, discard chunk end

554 self._safe_read(2) # toss the CRLF at the end of the chunk

555 try:

556 chunk_left = self._read_next_chunk_size()

557 except ValueError:

558 raise IncompleteRead(b'')

559 if chunk_left == 0:

560 # last chunk: 1*("0") [ chunk-extension ] CRLF

561 self._read_and_discard_trailer()

562 # we read everything; close the "file"

563 self._close_conn()

564 chunk_left = None

565 self.chunk_left = chunk_left

566 return chunk_left

567

568 def _readall_chunked(self):

569 assert self.chunked != _UNKNOWN

570 value = []

571 try:

572 while True:

573 chunk_left = self._get_chunk_left()

574 if chunk_left is None:

575 break

576 value.append(self._safe_read(chunk_left))

577 self.chunk_left = 0

578 return b''.join(value)

579 except IncompleteRead:

580 raise IncompleteRead(b''.join(value))

581

582 def _readinto_chunked(self, b):

583 assert self.chunked != _UNKNOWN

584 total_bytes = 0

585 mvb = memoryview(b)

586 try:

587 while True:

588 chunk_left = self._get_chunk_left()

589 if chunk_left is None:

590 return total_bytes

591

592 if len(mvb) <= chunk_left:

593 n = self._safe_readinto(mvb)

594 self.chunk_left = chunk_left - n

595 return total_bytes + n

596

597 temp_mvb = mvb[:chunk_left]

598 n = self._safe_readinto(temp_mvb)

599 mvb = mvb[n:]

600 total_bytes += n

601 self.chunk_left = 0

602

603 except IncompleteRead:

604 raise IncompleteRead(bytes(b[0:total_bytes]))

605

606 def _safe_read(self, amt):

607 """Read the number of bytes requested.

608

609 This function should be used when <amt> bytes "should" be present for

610 reading. If the bytes are truly not available (due to EOF), then the

611 IncompleteRead exception can be used to detect the problem.

612 """

613 data = self.fp.read(amt)

614 if len(data) < amt:

615 raise IncompleteRead(data, amt-len(data))

616 return data

617

618 def _safe_readinto(self, b):

619 """Same as _safe_read, but for reading into a buffer."""

620 amt = len(b)

621 n = self.fp.readinto(b)

622 if n < amt:

623 raise IncompleteRead(bytes(b[:n]), amt-n)

624 return n

625

626 def read1(self, n=-1):

627 """Read with at most one underlying system call. If at least one

628 byte is buffered, return that instead.

629 """

630 if self.fp is None or self._method == "HEAD":

631 return b""

632 if self.chunked:

633 return self._read1_chunked(n)

634 if self.length is not None and (n < 0 or n > self.length):

635 n = self.length

636 result = self.fp.read1(n)

637 if not result and n:

638 self._close_conn()

639 elif self.length is not None:

640 self.length -= len(result)

641 return result

642

643 def peek(self, n=-1):

644 # Having this enables IOBase.readline() to read more than one

645 # byte at a time

646 if self.fp is None or self._method == "HEAD":

647 return b""

648 if self.chunked:

649 return self._peek_chunked(n)

650 return self.fp.peek(n)

651

652 def readline(self, limit=-1):

653 if self.fp is None or self._method == "HEAD":

654 return b""

655 if self.chunked:

656 # Fallback to IOBase readline which uses peek() and read()

657 return super().readline(limit)

658 if self.length is not None and (limit < 0 or limit > self.length):

659 limit = self.length

660 result = self.fp.readline(limit)

661 if not result and limit:

662 self._close_conn()

663 elif self.length is not None:

664 self.length -= len(result)

665 return result

666

667 def _read1_chunked(self, n):

668 # Strictly speaking, _get_chunk_left() may cause more than one read,

669 # but that is ok, since that is to satisfy the chunked protocol.

670 chunk_left = self._get_chunk_left()

671 if chunk_left is None or n == 0:

672 return b''

673 if not (0 <= n <= chunk_left):

674 n = chunk_left # if n is negative or larger than chunk_left

675 read = self.fp.read1(n)

676 self.chunk_left -= len(read)

677 if not read:

678 raise IncompleteRead(b"")

679 return read

680

681 def _peek_chunked(self, n):

682 # Strictly speaking, _get_chunk_left() may cause more than one read,

683 # but that is ok, since that is to satisfy the chunked protocol.

684 try:

685 chunk_left = self._get_chunk_left()

686 except IncompleteRead:

687 return b'' # peek doesn't worry about protocol

688 if chunk_left is None:

689 return b'' # eof

690 # peek is allowed to return more than requested. Just request the

691 # entire chunk, and truncate what we get.

692 return self.fp.peek(chunk_left)[:chunk_left]

693

694 def fileno(self):

695 return self.fp.fileno()

696

697 def getheader(self, name, default=None):

698 '''Returns the value of the header matching *name*.

699

700 If there are multiple matching headers, the values are

701 combined into a single string separated by commas and spaces.

702

703 If no matching header is found, returns *default* or None if

704 the *default* is not specified.

705

706 If the headers are unknown, raises http.client.ResponseNotReady.

707

708 '''

709 if self.headers is None:

710 raise ResponseNotReady()

711 headers = self.headers.get_all(name) or default

712 if isinstance(headers, str) or not hasattr(headers, '__iter__'):

713 return headers

714 else:

715 return ', '.join(headers)

716

717 def getheaders(self):

718 """Return list of (header, value) tuples."""

719 if self.headers is None:

720 raise ResponseNotReady()

721 return list(self.headers.items())

722

723 # We override IOBase.__iter__ so that it doesn't check for closed-ness

724

725 def __iter__(self):

726 return self

727

728 # For compatibility with old-style urllib responses.

729

730 def info(self):

731 '''Returns an instance of the class mimetools.Message containing

732 meta-information associated with the URL.

733

734 When the method is HTTP, these headers are those returned by

735 the server at the head of the retrieved HTML page (including

736 Content-Length and Content-Type).

737

738 When the method is FTP, a Content-Length header will be

739 present if (as is now usual) the server passed back a file

740 length in response to the FTP retrieval request. A

741 Content-Type header will be present if the MIME type can be

742 guessed.

743

744 When the method is local-file, returned headers will include

745 a Date representing the file's last-modified time, a

746 Content-Length giving file size, and a Content-Type

747 containing a guess at the file's type. See also the

748 description of the mimetools module.

749

750 '''

751 return self.headers

752

753 def geturl(self):

754 '''Return the real URL of the page.

755

756 In some cases, the HTTP server redirects a client to another

757 URL. The urlopen() function handles this transparently, but in

758 some cases the caller needs to know which URL the client was

759 redirected to. The geturl() method can be used to get at this

760 redirected URL.

761

762 '''

763 return self.url

764

765 def getcode(self):

766 '''Return the HTTP status code that was sent with the response,

767 or None if the URL is not an HTTP URL.

768

769 '''

770 return self.status

771

772class HTTPConnection:

773

774 _http_vsn = 11

775 _http_vsn_str = 'HTTP/1.1'

776

777 response_class = HTTPResponse

778 default_port = HTTP_PORT

779 auto_open = 1

780 debuglevel = 0

781

782 @staticmethod

783 def _is_textIO(stream):

784 """Test whether a file-like object is a text or a binary stream.

785 """

786 return isinstance(stream, io.TextIOBase)

787

788 @staticmethod

789 def _get_content_length(body, method):

790 """Get the content-length based on the body.

791

792 If the body is None, we set Content-Length: 0 for methods that expect

793 a body (RFC 7230, Section 3.3.2). We also set the Content-Length for

794 any method if the body is a str or bytes-like object and not a file.

795 """

796 if body is None:

797 # do an explicit check for not None here to distinguish

798 # between unset and set but empty

799 if method.upper() in _METHODS_EXPECTING_BODY:

800 return 0

801 else:

802 return None

803

804 if hasattr(body, 'read'):

805 # file-like object.

806 return None

807

808 try:

809 # does it implement the buffer protocol (bytes, bytearray, array)?

810 mv = memoryview(body)

811 return mv.nbytes

812 except TypeError:

813 pass

814

815 if isinstance(body, str):

816 return len(body)

817

818 return None

819

820 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,

821 source_address=None, blocksize=8192):

822 self.timeout = timeout

823 self.source_address = source_address

824 self.blocksize = blocksize

825 self.sock = None

826 self._buffer = []

827 self.__response = None

828 self.__state = _CS_IDLE

829 self._method = None

830 self._tunnel_host = None

831 self._tunnel_port = None

832 self._tunnel_headers = {}

833

834 (self.host, self.port) = self._get_hostport(host, port)

835

836 self._validate_host(self.host)

837

838 # This is stored as an instance variable to allow unit

839 # tests to replace it with a suitable mockup

840 self._create_connection = socket.create_connection

841

842 def set_tunnel(self, host, port=None, headers=None):

843 """Set up host and port for HTTP CONNECT tunnelling.

844

845 In a connection that uses HTTP CONNECT tunneling, the host passed to the

846 constructor is used as a proxy server that relays all communication to

847 the endpoint passed to `set_tunnel`. This done by sending an HTTP

848 CONNECT request to the proxy server when the connection is established.

849

850 This method must be called before the HTTP connection has been

851 established.

852

853 The headers argument should be a mapping of extra HTTP headers to send

854 with the CONNECT request.

855 """

856

857 if self.sock:

858 raise RuntimeError("Can't set up tunnel for established connection")

859

860 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)

861 if headers:

862 self._tunnel_headers = headers

863 else:

864 self._tunnel_headers.clear()

865

866 def _get_hostport(self, host, port):

867 if port is None:

868 i = host.rfind(':')

869 j = host.rfind(']') # ipv6 addresses have [...]

870 if i > j:

871 try:

872 port = int(host[i+1:])

873 except ValueError:

874 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/

875 port = self.default_port

876 else:

877 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])

878 host = host[:i]

879 else:

880 port = self.default_port

881 if host and host[0] == '[' and host[-1] == ']':

882 host = host[1:-1]

883

884 return (host, port)

885

886 def set_debuglevel(self, level):

887 self.debuglevel = level

888

889 def _tunnel(self):

890 connect = b"CONNECT %s:%d HTTP/1.0\r\n" % (

891 self._tunnel_host.encode("ascii"), self._tunnel_port)

892 headers = [connect]

893 for header, value in self._tunnel_headers.items():

894 headers.append(f"{header}: {value}\r\n".encode("latin-1"))

895 headers.append(b"\r\n")

896 # Making a single send() call instead of one per line encourages

897 # the host OS to use a more optimal packet size instead of

898 # potentially emitting a series of small packets.

899 self.send(b"".join(headers))

900 del headers

901

902 response = self.response_class(self.sock, method=self._method)

903 (version, code, message) = response._read_status()

904

905 if code != http.HTTPStatus.OK:

906 self.close()

907 raise OSError(f"Tunnel connection failed: {code} {message.strip()}")

908 while True:

909 line = response.fp.readline(_MAXLINE + 1)

910 if len(line) > _MAXLINE:

911 raise LineTooLong("header line")

912 if not line:

913 # for sites which EOF without sending a trailer

914 break

915 if line in (b'\r\n', b'\n', b''):

916 break

917

918 if self.debuglevel > 0:

919 print('header:', line.decode())

920

921 def connect(self):

922 """Connect to the host and port specified in __init__."""

923 self.sock = self._create_connection(

924 (self.host,self.port), self.timeout, self.source_address)

925 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

926

927 if self._tunnel_host:

928 self._tunnel()

929

930 def close(self):

931 """Close the connection to the HTTP server."""

932 self.__state = _CS_IDLE

933 try:

934 sock = self.sock

935 if sock:

936 self.sock = None

937 sock.close() # close it manually... there may be other refs

938 finally:

939 response = self.__response

940 if response:

941 self.__response = None

942 response.close()

943

944 def send(self, data):

945 """Send `data' to the server.

946 ``data`` can be a string object, a bytes object, an array object, a

947 file-like object that supports a .read() method, or an iterable object.

948 """

949

950 if self.sock is None:

951 if self.auto_open:

952 self.connect()

953 else:

954 raise NotConnected()

955

956 if self.debuglevel > 0:

957 print("send:", repr(data))

958 if hasattr(data, "read") :

959 if self.debuglevel > 0:

960 print("sendIng a read()able")

961 encode = self._is_textIO(data)

962 if encode and self.debuglevel > 0:

963 print("encoding file using iso-8859-1")

964 while 1:

965 datablock = data.read(self.blocksize)

966 if not datablock:

967 break

968 if encode:

969 datablock = datablock.encode("iso-8859-1")

970 self.sock.sendall(datablock)

971 return

972 try:

973 self.sock.sendall(data)

974 except TypeError:

975 if isinstance(data, collections.abc.Iterable):

976 for d in data:

977 self.sock.sendall(d)

978 else:

979 raise TypeError("data should be a bytes-like object "

980 "or an iterable, got %r" % type(data))

981

982 def _output(self, s):

983 """Add a line of output to the current request buffer.

984

985 Assumes that the line does *not* end with \\r\\n.

986 """

987 self._buffer.append(s)

988

989 def _read_readable(self, readable):

990 if self.debuglevel > 0:

991 print("sendIng a read()able")

992 encode = self._is_textIO(readable)

993 if encode and self.debuglevel > 0:

994 print("encoding file using iso-8859-1")

995 while True:

996 datablock = readable.read(self.blocksize)

997 if not datablock:

998 break

999 if encode:

1000 datablock = datablock.encode("iso-8859-1")

1001 yield datablock

1002

1003 def _send_output(self, message_body=None, encode_chunked=False):

1004 """Send the currently buffered request and clear the buffer.

1005

1006 Appends an extra \\r\\n to the buffer.

1007 A message_body may be specified, to be appended to the request.

1008 """

1009 self._buffer.extend((b"", b""))

1010 msg = b"\r\n".join(self._buffer)

1011 del self._buffer[:]

1012 self.send(msg)

1013

1014 if message_body is not None:

1015

1016 # create a consistent interface to message_body

1017 if hasattr(message_body, 'read'):

1018 # Let file-like take precedence over byte-like. This

1019 # is needed to allow the current position of mmap'ed

1020 # files to be taken into account.

1021 chunks = self._read_readable(message_body)

1022 else:

1023 try:

1024 # this is solely to check to see if message_body

1025 # implements the buffer API. it /would/ be easier

1026 # to capture if PyObject_CheckBuffer was exposed

1027 # to Python.

1028 memoryview(message_body)

1029 except TypeError:

1030 try:

1031 chunks = iter(message_body)

1032 except TypeError:

1033 raise TypeError("message_body should be a bytes-like "

1034 "object or an iterable, got %r"

1035 % type(message_body))

1036 else:

1037 # the object implements the buffer interface and

1038 # can be passed directly into socket methods

1039 chunks = (message_body,)

1040

1041 for chunk in chunks:

1042 if not chunk:

1043 if self.debuglevel > 0:

1044 print('Zero length chunk ignored')

1045 continue

1046

1047 if encode_chunked and self._http_vsn == 11:

1048 # chunked encoding

1049 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \

1050 + b'\r\n'

1051 self.send(chunk)

1052

1053 if encode_chunked and self._http_vsn == 11:

1054 # end chunked transfer

1055 self.send(b'0\r\n\r\n')

1056

1057 def putrequest(self, method, url, skip_host=False,

1058 skip_accept_encoding=False):

1059 """Send a request to the server.

1060

1061 `method' specifies an HTTP request method, e.g. 'GET'.

1062 `url' specifies the object being requested, e.g. '/index.html'.

1063 `skip_host' if True does not add automatically a 'Host:' header

1064 `skip_accept_encoding' if True does not add automatically an

1065 'Accept-Encoding:' header

1066 """

1067

1068 # if a prior response has been completed, then forget about it.

1069 if self.__response and self.__response.isclosed():

1070 self.__response = None

1071

1072

1073 # in certain cases, we cannot issue another request on this connection.

1074 # this occurs when:

1075 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)

1076 # 2) a response to a previous request has signalled that it is going

1077 # to close the connection upon completion.

1078 # 3) the headers for the previous response have not been read, thus

1079 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)

1080 #

1081 # if there is no prior response, then we can request at will.

1082 #

1083 # if point (2) is true, then we will have passed the socket to the

1084 # response (effectively meaning, "there is no prior response"), and

1085 # will open a new one when a new request is made.

1086 #

1087 # Note: if a prior response exists, then we *can* start a new request.

1088 # We are not allowed to begin fetching the response to this new

1089 # request, however, until that prior response is complete.

1090 #

1091 if self.__state == _CS_IDLE:

1092 self.__state = _CS_REQ_STARTED

1093 else:

1094 raise CannotSendRequest(self.__state)

1095

1096 self._validate_method(method)

1097

1098 # Save the method for use later in the response phase

1099 self._method = method

1100

1101 url = url or '/'

1102 self._validate_path(url)

1103

1104 request = '%s %s %s' % (method, url, self._http_vsn_str)

1105

1106 self._output(self._encode_request(request))

1107

1108 if self._http_vsn == 11:

1109 # Issue some standard headers for better HTTP/1.1 compliance

1110

1111 if not skip_host:

1112 # this header is issued *only* for HTTP/1.1

1113 # connections. more specifically, this means it is

1114 # only issued when the client uses the new

1115 # HTTPConnection() class. backwards-compat clients

1116 # will be using HTTP/1.0 and those clients may be

1117 # issuing this header themselves. we should NOT issue

1118 # it twice; some web servers (such as Apache) barf

1119 # when they see two Host: headers

1120

1121 # If we need a non-standard port,include it in the

1122 # header. If the request is going through a proxy,

1123 # but the host of the actual URL, not the host of the

1124 # proxy.

1125

1126 netloc = ''

1127 if url.startswith('http'):

1128 nil, netloc, nil, nil, nil = urlsplit(url)

1129

1130 if netloc:

1131 try:

1132 netloc_enc = netloc.encode("ascii")

1133 except UnicodeEncodeError:

1134 netloc_enc = netloc.encode("idna")

1135 self.putheader('Host', netloc_enc)

1136 else:

1137 if self._tunnel_host:

1138 host = self._tunnel_host

1139 port = self._tunnel_port

1140 else:

1141 host = self.host

1142 port = self.port

1143

1144 try:

1145 host_enc = host.encode("ascii")

1146 except UnicodeEncodeError:

1147 host_enc = host.encode("idna")

1148

1149 # As per RFC 273, IPv6 address should be wrapped with []

1150 # when used as Host header

1151

1152 if host.find(':') >= 0:

1153 host_enc = b'[' + host_enc + b']'

1154

1155 if port == self.default_port:

1156 self.putheader('Host', host_enc)

1157 else:

1158 host_enc = host_enc.decode("ascii")

1159 self.putheader('Host', "%s:%s" % (host_enc, port))

1160

1161 # note: we are assuming that clients will not attempt to set these

1162 # headers since *this* library must deal with the

1163 # consequences. this also means that when the supporting

1164 # libraries are updated to recognize other forms, then this

1165 # code should be changed (removed or updated).

1166

1167 # we only want a Content-Encoding of "identity" since we don't

1168 # support encodings such as x-gzip or x-deflate.

1169 if not skip_accept_encoding:

1170 self.putheader('Accept-Encoding', 'identity')

1171

1172 # we can accept "chunked" Transfer-Encodings, but no others

1173 # NOTE: no TE header implies *only* "chunked"

1174 #self.putheader('TE', 'chunked')

1175

1176 # if TE is supplied in the header, then it must appear in a

1177 # Connection header.

1178 #self.putheader('Connection', 'TE')

1179

1180 else:

1181 # For HTTP/1.0, the server will assume "not chunked"

1182 pass

1183

1184 def _encode_request(self, request):

1185 # ASCII also helps prevent CVE-2019-9740.

1186 return request.encode('ascii')

1187

1188 def _validate_method(self, method):

1189 """Validate a method name for putrequest."""

1190 # prevent http header injection

1191 match = _contains_disallowed_method_pchar_re.search(method)

1192 if match:

1193 raise ValueError(

1194 f"method can't contain control characters. {method!r} "

1195 f"(found at least {match.group()!r})")

1196

1197 def _validate_path(self, url):

1198 """Validate a url for putrequest."""

1199 # Prevent CVE-2019-9740.

1200 match = _contains_disallowed_url_pchar_re.search(url)

1201 if match:

1202 raise InvalidURL(f"URL can't contain control characters. {url!r} "

1203 f"(found at least {match.group()!r})")

1204

1205 def _validate_host(self, host):

1206 """Validate a host so it doesn't contain control characters."""

1207 # Prevent CVE-2019-18348.

1208 match = _contains_disallowed_url_pchar_re.search(host)

1209 if match:

1210 raise InvalidURL(f"URL can't contain control characters. {host!r} "

1211 f"(found at least {match.group()!r})")

1212

1213 def putheader(self, header, *values):

1214 """Send a request header line to the server.

1215

1216 For example: h.putheader('Accept', 'text/html')

1217 """

1218 if self.__state != _CS_REQ_STARTED:

1219 raise CannotSendHeader()

1220

1221 if hasattr(header, 'encode'):

1222 header = header.encode('ascii')

1223

1224 if not _is_legal_header_name(header):

1225 raise ValueError('Invalid header name %r' % (header,))

1226

1227 values = list(values)

1228 for i, one_value in enumerate(values):

1229 if hasattr(one_value, 'encode'):

1230 values[i] = one_value.encode('latin-1')

1231 elif isinstance(one_value, int):

1232 values[i] = str(one_value).encode('ascii')

1233

1234 if _is_illegal_header_value(values[i]):

1235 raise ValueError('Invalid header value %r' % (values[i],))

1236

1237 value = b'\r\n\t'.join(values)

1238 header = header + b': ' + value

1239 self._output(header)

1240

1241 def endheaders(self, message_body=None, *, encode_chunked=False):

1242 """Indicate that the last header line has been sent to the server.

1243

1244 This method sends the request to the server. The optional message_body

1245 argument can be used to pass a message body associated with the

1246 request.

1247 """

1248 if self.__state == _CS_REQ_STARTED:

1249 self.__state = _CS_REQ_SENT

1250 else:

1251 raise CannotSendHeader()

1252 self._send_output(message_body, encode_chunked=encode_chunked)

1253

1254 def request(self, method, url, body=None, headers={}, *,

1255 encode_chunked=False):

1256 """Send a complete request to the server."""

1257 self._send_request(method, url, body, headers, encode_chunked)

1258

1259 def _send_request(self, method, url, body, headers, encode_chunked):

1260 # Honor explicitly requested Host: and Accept-Encoding: headers.

1261 header_names = frozenset(k.lower() for k in headers)

1262 skips = {}

1263 if 'host' in header_names:

1264 skips['skip_host'] = 1

1265 if 'accept-encoding' in header_names:

1266 skips['skip_accept_encoding'] = 1

1267

1268 self.putrequest(method, url, **skips)

1269

1270 # chunked encoding will happen if HTTP/1.1 is used and either

1271 # the caller passes encode_chunked=True or the following

1272 # conditions hold:

1273 # 1. content-length has not been explicitly set

1274 # 2. the body is a file or iterable, but not a str or bytes-like

1275 # 3. Transfer-Encoding has NOT been explicitly set by the caller

1276

1277 if 'content-length' not in header_names:

1278 # only chunk body if not explicitly set for backwards

1279 # compatibility, assuming the client code is already handling the

1280 # chunking

1281 if 'transfer-encoding' not in header_names:

1282 # if content-length cannot be automatically determined, fall

1283 # back to chunked encoding

1284 encode_chunked = False

1285 content_length = self._get_content_length(body, method)

1286 if content_length is None:

1287 if body is not None:

1288 if self.debuglevel > 0:

1289 print('Unable to determine size of %r' % body)

1290 encode_chunked = True

1291 self.putheader('Transfer-Encoding', 'chunked')

1292 else:

1293 self.putheader('Content-Length', str(content_length))

1294 else:

1295 encode_chunked = False

1296

1297 for hdr, value in headers.items():

1298 self.putheader(hdr, value)

1299 if isinstance(body, str):

1300 # RFC 2616 Section 3.7.1 says that text default has a

1301 # default charset of iso-8859-1.

1302 body = _encode(body, 'body')

1303 self.endheaders(body, encode_chunked=encode_chunked)

1304

1305 def getresponse(self):

1306 """Get the response from the server.

1307

1308 If the HTTPConnection is in the correct state, returns an

1309 instance of HTTPResponse or of whatever object is returned by

1310 the response_class variable.

1311

1312 If a request has not been sent or if a previous response has

1313 not be handled, ResponseNotReady is raised. If the HTTP

1314 response indicates that the connection should be closed, then

1315 it will be closed before the response is returned. When the

1316 connection is closed, the underlying socket is closed.

1317 """

1318

1319 # if a prior response has been completed, then forget about it.

1320 if self.__response and self.__response.isclosed():

1321 self.__response = None

1322

1323 # if a prior response exists, then it must be completed (otherwise, we

1324 # cannot read this response's header to determine the connection-close

1325 # behavior)

1326 #

1327 # note: if a prior response existed, but was connection-close, then the

1328 # socket and response were made independent of this HTTPConnection

1329 # object since a new request requires that we open a whole new

1330 # connection

1331 #

1332 # this means the prior response had one of two states:

1333 # 1) will_close: this connection was reset and the prior socket and

1334 # response operate independently

1335 # 2) persistent: the response was retained and we await its

1336 # isclosed() status to become true.

1337 #

1338 if self.__state != _CS_REQ_SENT or self.__response:

1339 raise ResponseNotReady(self.__state)

1340

1341 if self.debuglevel > 0:

1342 response = self.response_class(self.sock, self.debuglevel,

1343 method=self._method)

1344 else:

1345 response = self.response_class(self.sock, method=self._method)

1346

1347 try:

1348 try:

1349 response.begin()

1350 except ConnectionError:

1351 self.close()

1352 raise

1353 assert response.will_close != _UNKNOWN

1354 self.__state = _CS_IDLE

1355

1356 if response.will_close:

1357 # this effectively passes the connection to the response

1358 self.close()

1359 else:

1360 # remember this, so we can tell when it is complete

1361 self.__response = response

1362

1363 return response

1364 except:

1365 response.close()

1366 raise

1367

1368try:

1369 import ssl

1370except ImportError:

1371 pass

1372else:

1373 class HTTPSConnection(HTTPConnection):

1374 "This class allows communication via SSL."

1375

1376 default_port = HTTPS_PORT

1377

1378 # XXX Should key_file and cert_file be deprecated in favour of context?

1379

1380 def __init__(self, host, port=None, key_file=None, cert_file=None,

1381 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,

1382 source_address=None, *, context=None,

1383 check_hostname=None, blocksize=8192):

1384 super(HTTPSConnection, self).__init__(host, port, timeout,

1385 source_address,

1386 blocksize=blocksize)

1387 if (key_file is not None or cert_file is not None or

1388 check_hostname is not None):

1389 import warnings

1390 warnings.warn("key_file, cert_file and check_hostname are "

1391 "deprecated, use a custom context instead.",

1392 DeprecationWarning, 2)

1393 self.key_file = key_file

1394 self.cert_file = cert_file

1395 if context is None:

1396 context = ssl._create_default_https_context()

1397 # enable PHA for TLS 1.3 connections if available

1398 if context.post_handshake_auth is not None:

1399 context.post_handshake_auth = True

1400 will_verify = context.verify_mode != ssl.CERT_NONE

1401 if check_hostname is None:

1402 check_hostname = context.check_hostname

1403 if check_hostname and not will_verify:

1404 raise ValueError("check_hostname needs a SSL context with "

1405 "either CERT_OPTIONAL or CERT_REQUIRED")

1406 if key_file or cert_file:

1407 context.load_cert_chain(cert_file, key_file)

1408 # cert and key file means the user wants to authenticate.

1409 # enable TLS 1.3 PHA implicitly even for custom contexts.

1410 if context.post_handshake_auth is not None:

1411 context.post_handshake_auth = True

1412 self._context = context

1413 if check_hostname is not None:

1414 self._context.check_hostname = check_hostname

1415

1416 def connect(self):

1417 "Connect to a host on a given (SSL) port."

1418

1419 super().connect()

1420

1421 if self._tunnel_host:

1422 server_hostname = self._tunnel_host

1423 else:

1424 server_hostname = self.host

1425

1426 self.sock = self._context.wrap_socket(self.sock,

1427 server_hostname=server_hostname)

1428

1429 __all__.append("HTTPSConnection")

1430

1431class HTTPException(Exception):

1432 # Subclasses that define an __init__ must call Exception.__init__

1433 # or define self.args. Otherwise, str() will fail.

1434 pass

1435

1436class NotConnected(HTTPException):

1437 pass

1438

1439class InvalidURL(HTTPException):

1440 pass

1441

1442class UnknownProtocol(HTTPException):

1443 def __init__(self, version):

1444 self.args = version,

1445 self.version = version

1446

1447class UnknownTransferEncoding(HTTPException):

1448 pass

1449

1450class UnimplementedFileMode(HTTPException):

1451 pass

1452

1453class IncompleteRead(HTTPException):

1454 def __init__(self, partial, expected=None):

1455 self.args = partial,

1456 self.partial = partial

1457 self.expected = expected

1458 def __repr__(self):

1459 if self.expected is not None:

1460 e = ', %i more expected' % self.expected

1461 else:

1462 e = ''

1463 return '%s(%i bytes read%s)' % (self.__class__.__name__,

1464 len(self.partial), e)

1465 __str__ = object.__str__

1466

1467class ImproperConnectionState(HTTPException):

1468 pass

1469

1470class CannotSendRequest(ImproperConnectionState):

1471 pass

1472

1473class CannotSendHeader(ImproperConnectionState):

1474 pass

1475

1476class ResponseNotReady(ImproperConnectionState):

1477 pass

1478

1479class BadStatusLine(HTTPException):

1480 def __init__(self, line):

1481 if not line:

1482 line = repr(line)

1483 self.args = line,

1484 self.line = line

1485

1486class LineTooLong(HTTPException):

1487 def __init__(self, line_type):

1488 HTTPException.__init__(self, "got more than %d bytes when reading %s"

1489 % (_MAXLINE, line_type))

1490

1491class RemoteDisconnected(ConnectionResetError, BadStatusLine):

1492 def __init__(self, *pos, **kw):

1493 BadStatusLine.__init__(self, "")

1494 ConnectionResetError.__init__(self, *pos, **kw)

1495

1496# for backwards compatibility

1497error = HTTPException