Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/http/server.py: 14%

1"""HTTP server classes.

3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see

4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,

5and CGIHTTPRequestHandler for CGI scripts.

7It does, however, optionally implement HTTP/1.1 persistent connections,

8as of version 0.3.

10Notes on CGIHTTPRequestHandler

11------------------------------

13This class implements GET and POST requests to cgi-bin scripts.

15If the os.fork() function is not present (e.g. on Windows),

16subprocess.Popen() is used as a fallback, with slightly altered semantics.

18In all cases, the implementation is intentionally naive -- all

19requests are executed synchronously.

21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL

22-- it may execute arbitrary Python code or external programs.

24Note that status code 200 is sent prior to execution of a CGI script, so

25scripts cannot send other status codes such as 302 (redirect).

27XXX To do:

29- log requests even later (to capture byte count)

30- log user-agent header and other interesting goodies

31- send error log to separate file

32"""

35# See also:

36#

37# HTTP Working Group T. Berners-Lee

38# INTERNET-DRAFT R. T. Fielding

39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen

40# Expires September 8, 1995 March 8, 1995

41#

42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt

43#

44# and

45#

46# Network Working Group R. Fielding

47# Request for Comments: 2616 et al

48# Obsoletes: 2068 June 1999

49# Category: Standards Track

50#

51# URL: http://www.faqs.org/rfcs/rfc2616.html

53# Log files

54# ---------

55#

56# Here's a quote from the NCSA httpd docs about log file format.

57#

58# | The logfile format is as follows. Each line consists of:

59# |

60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb

61# |

62# | host: Either the DNS name or the IP number of the remote client

63# | rfc931: Any information returned by identd for this person,

64# | - otherwise.

65# | authuser: If user sent a userid for authentication, the user name,

66# | - otherwise.

67# | DD: Day

68# | Mon: Month (calendar name)

69# | YYYY: Year

70# | hh: hour (24-hour format, the machine's timezone)

71# | mm: minutes

72# | ss: seconds

73# | request: The first line of the HTTP request as sent by the client.

74# | ddd: the status code returned by the server, - if not available.

75# | bbbb: the total number of bytes sent,

76# | *not including the HTTP/1.0 header*, - if not available

77# |

78# | You can determine the name of the file accessed through request.

79#

80# (Actually, the latter is only true if you know the server configuration

81# at the time the request was made!)

83__version__ = "0.6"

85__all__ = [

86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",

87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",

88]

90import copy

91import datetime

92import email.utils

93import html

94import http.client

95import io

96import mimetypes

97import os

98import posixpath

99import select

100import shutil

101import socket # For gethostbyaddr()

102import socketserver

103import sys

104import time

105import urllib.parse

106import contextlib

107from functools import partial

108

109from http import HTTPStatus

110

111

112# Default error message template

113DEFAULT_ERROR_MESSAGE = """\

114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"

115 "http://www.w3.org/TR/html4/strict.dtd">

116<html>

117 <head>

118 <meta http-equiv="Content-Type" content="text/html;charset=utf-8">

119 <title>Error response</title>

120 </head>

121 <body>

122 <h1>Error response</h1>

123 <p>Error code: %(code)d</p>

124 <p>Message: %(message)s.</p>

125 <p>Error code explanation: %(code)s - %(explain)s.</p>

126 </body>

127</html>

128"""

129

130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"

131

132class HTTPServer(socketserver.TCPServer):

133

134 allow_reuse_address = 1 # Seems to make sense in testing environment

135

136 def server_bind(self):

137 """Override server_bind to store the server name."""

138 socketserver.TCPServer.server_bind(self)

139 host, port = self.server_address[:2]

140 self.server_name = socket.getfqdn(host)

141 self.server_port = port

142

143

144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):

145 daemon_threads = True

146

147

148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):

149

150 """HTTP request handler base class.

151

152 The following explanation of HTTP serves to guide you through the

153 code as well as to expose any misunderstandings I may have about

154 HTTP (so you don't need to read the code to figure out I'm wrong

155 :-).

156

157 HTTP (HyperText Transfer Protocol) is an extensible protocol on

158 top of a reliable stream transport (e.g. TCP/IP). The protocol

159 recognizes three parts to a request:

160

161 1. One line identifying the request type and path

162 2. An optional set of RFC-822-style headers

163 3. An optional data part

164

165 The headers and data are separated by a blank line.

166

167 The first line of the request has the form

168

169 <command> <path> <version>

170

171 where <command> is a (case-sensitive) keyword such as GET or POST,

172 <path> is a string containing path information for the request,

173 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".

174 <path> is encoded using the URL encoding scheme (using %xx to signify

175 the ASCII character with hex code xx).

176

177 The specification specifies that lines are separated by CRLF but

178 for compatibility with the widest range of clients recommends

179 servers also handle LF. Similarly, whitespace in the request line

180 is treated sensibly (allowing multiple spaces between components

181 and allowing trailing whitespace).

182

183 Similarly, for output, lines ought to be separated by CRLF pairs

184 but most clients grok LF characters just fine.

185

186 If the first line of the request has the form

187

188 <command> <path>

189

190 (i.e. <version> is left out) then this is assumed to be an HTTP

191 0.9 request; this form has no optional headers and data part and

192 the reply consists of just the data.

193

194 The reply form of the HTTP 1.x protocol again has three parts:

195

196 1. One line giving the response code

197 2. An optional set of RFC-822-style headers

198 3. The data

199

200 Again, the headers and data are separated by a blank line.

201

202 The response code line has the form

203

204 <version> <responsecode> <responsestring>

205

206 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),

207 <responsecode> is a 3-digit response code indicating success or

208 failure of the request, and <responsestring> is an optional

209 human-readable string explaining what the response code means.

210

211 This server parses the request and the headers, and then calls a

212 function specific to the request type (<command>). Specifically,

213 a request SPAM will be handled by a method do_SPAM(). If no

214 such method exists the server sends an error response to the

215 client. If it exists, it is called with no arguments:

216

217 do_SPAM()

218

219 Note that the request name is case sensitive (i.e. SPAM and spam

220 are different requests).

221

222 The various request details are stored in instance variables:

223

224 - client_address is the client IP address in the form (host,

225 port);

226

227 - command, path and version are the broken-down request line;

228

229 - headers is an instance of email.message.Message (or a derived

230 class) containing the header information;

231

232 - rfile is a file object open for reading positioned at the

233 start of the optional input data part;

234

235 - wfile is a file object open for writing.

236

237 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!

238

239 The first thing to be written must be the response line. Then

240 follow 0 or more header lines, then a blank line, and then the

241 actual data (if any). The meaning of the header lines depends on

242 the command executed by the server; in most cases, when data is

243 returned, there should be at least one header line of the form

244

245 Content-type: <type>/<subtype>

246

247 where <type> and <subtype> should be registered MIME types,

248 e.g. "text/html" or "text/plain".

249

250 """

251

252 # The Python system version, truncated to its first component.

253 sys_version = "Python/" + sys.version.split()[0]

254

255 # The server software version. You may want to override this.

256 # The format is multiple whitespace-separated strings,

257 # where each string is of the form name[/version].

258 server_version = "BaseHTTP/" + __version__

259

260 error_message_format = DEFAULT_ERROR_MESSAGE

261 error_content_type = DEFAULT_ERROR_CONTENT_TYPE

262

263 # The default request version. This only affects responses up until

264 # the point where the request line is parsed, so it mainly decides what

265 # the client gets back when sending a malformed request line.

266 # Most web servers default to HTTP 0.9, i.e. don't send a status line.

267 default_request_version = "HTTP/0.9"

268

269 def parse_request(self):

270 """Parse a request (internal).

271

272 The request should be stored in self.raw_requestline; the results

273 are in self.command, self.path, self.request_version and

274 self.headers.

275

276 Return True for success, False for failure; on failure, any relevant

277 error response has already been sent back.

278

279 """

280 self.command = None # set in case of error on the first line

281 self.request_version = version = self.default_request_version

282 self.close_connection = True

283 requestline = str(self.raw_requestline, 'iso-8859-1')

284 requestline = requestline.rstrip('\r\n')

285 self.requestline = requestline

286 words = requestline.split()

287 if len(words) == 0:

288 return False

289

290 if len(words) >= 3: # Enough to determine protocol version

291 version = words[-1]

292 try:

293 if not version.startswith('HTTP/'):

294 raise ValueError

295 base_version_number = version.split('/', 1)[1]

296 version_number = base_version_number.split(".")

297 # RFC 2145 section 3.1 says there can be only one "." and

298 # - major and minor numbers MUST be treated as

299 # separate integers;

300 # - HTTP/2.4 is a lower version than HTTP/2.13, which in

301 # turn is lower than HTTP/12.3;

302 # - Leading zeros MUST be ignored by recipients.

303 if len(version_number) != 2:

304 raise ValueError

305 version_number = int(version_number[0]), int(version_number[1])

306 except (ValueError, IndexError):

307 self.send_error(

308 HTTPStatus.BAD_REQUEST,

309 "Bad request version (%r)" % version)

310 return False

311 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":

312 self.close_connection = False

313 if version_number >= (2, 0):

314 self.send_error(

315 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,

316 "Invalid HTTP version (%s)" % base_version_number)

317 return False

318 self.request_version = version

319

320 if not 2 <= len(words) <= 3:

321 self.send_error(

322 HTTPStatus.BAD_REQUEST,

323 "Bad request syntax (%r)" % requestline)

324 return False

325 command, path = words[:2]

326 if len(words) == 2:

327 self.close_connection = True

328 if command != 'GET':

329 self.send_error(

330 HTTPStatus.BAD_REQUEST,

331 "Bad HTTP/0.9 request type (%r)" % command)

332 return False

333 self.command, self.path = command, path

334

335 # Examine the headers and look for a Connection directive.

336 try:

337 self.headers = http.client.parse_headers(self.rfile,

338 _class=self.MessageClass)

339 except http.client.LineTooLong as err:

340 self.send_error(

341 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,

342 "Line too long",

343 str(err))

344 return False

345 except http.client.HTTPException as err:

346 self.send_error(

347 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,

348 "Too many headers",

349 str(err)

350 )

351 return False

352

353 conntype = self.headers.get('Connection', "")

354 if conntype.lower() == 'close':

355 self.close_connection = True

356 elif (conntype.lower() == 'keep-alive' and

357 self.protocol_version >= "HTTP/1.1"):

358 self.close_connection = False

359 # Examine the headers and look for an Expect directive

360 expect = self.headers.get('Expect', "")

361 if (expect.lower() == "100-continue" and

362 self.protocol_version >= "HTTP/1.1" and

363 self.request_version >= "HTTP/1.1"):

364 if not self.handle_expect_100():

365 return False

366 return True

367

368 def handle_expect_100(self):

369 """Decide what to do with an "Expect: 100-continue" header.

370

371 If the client is expecting a 100 Continue response, we must

372 respond with either a 100 Continue or a final response before

373 waiting for the request body. The default is to always respond

374 with a 100 Continue. You can behave differently (for example,

375 reject unauthorized requests) by overriding this method.

376

377 This method should either return True (possibly after sending

378 a 100 Continue response) or send an error response and return

379 False.

380

381 """

382 self.send_response_only(HTTPStatus.CONTINUE)

383 self.end_headers()

384 return True

385

386 def handle_one_request(self):

387 """Handle a single HTTP request.

388

389 You normally don't need to override this method; see the class

390 __doc__ string for information on how to handle specific HTTP

391 commands such as GET and POST.

392

393 """

394 try:

395 self.raw_requestline = self.rfile.readline(65537)

396 if len(self.raw_requestline) > 65536:

397 self.requestline = ''

398 self.request_version = ''

399 self.command = ''

400 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)

401 return

402 if not self.raw_requestline:

403 self.close_connection = True

404 return

405 if not self.parse_request():

406 # An error code has been sent, just exit

407 return

408 mname = 'do_' + self.command

409 if not hasattr(self, mname):

410 self.send_error(

411 HTTPStatus.NOT_IMPLEMENTED,

412 "Unsupported method (%r)" % self.command)

413 return

414 method = getattr(self, mname)

415 method()

416 self.wfile.flush() #actually send the response if not already done.

417 except socket.timeout as e:

418 #a read or a write timed out. Discard this connection

419 self.log_error("Request timed out: %r", e)

420 self.close_connection = True

421 return

422

423 def handle(self):

424 """Handle multiple requests if necessary."""

425 self.close_connection = True

426

427 self.handle_one_request()

428 while not self.close_connection:

429 self.handle_one_request()

430

431 def send_error(self, code, message=None, explain=None):

432 """Send and log an error reply.

433

434 Arguments are

435 * code: an HTTP error code

436 3 digits

437 * message: a simple optional 1 line reason phrase.

438 *( HTAB / SP / VCHAR / %x80-FF )

439 defaults to short entry matching the response code

440 * explain: a detailed message defaults to the long entry

441 matching the response code.

442

443 This sends an error response (so it must be called before any

444 output has been generated), logs the error, and finally sends

445 a piece of HTML explaining the error to the user.

446

447 """

448

449 try:

450 shortmsg, longmsg = self.responses[code]

451 except KeyError:

452 shortmsg, longmsg = '???', '???'

453 if message is None:

454 message = shortmsg

455 if explain is None:

456 explain = longmsg

457 self.log_error("code %d, message %s", code, message)

458 self.send_response(code, message)

459 self.send_header('Connection', 'close')

460

461 # Message body is omitted for cases described in:

462 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)

463 # - RFC7231: 6.3.6. 205(Reset Content)

464 body = None

465 if (code >= 200 and

466 code not in (HTTPStatus.NO_CONTENT,

467 HTTPStatus.RESET_CONTENT,

468 HTTPStatus.NOT_MODIFIED)):

469 # HTML encode to prevent Cross Site Scripting attacks

470 # (see bug #1100201)

471 content = (self.error_message_format % {

472 'code': code,

473 'message': html.escape(message, quote=False),

474 'explain': html.escape(explain, quote=False)

475 })

476 body = content.encode('UTF-8', 'replace')

477 self.send_header("Content-Type", self.error_content_type)

478 self.send_header('Content-Length', str(len(body)))

479 self.end_headers()

480

481 if self.command != 'HEAD' and body:

482 self.wfile.write(body)

483

484 def send_response(self, code, message=None):

485 """Add the response header to the headers buffer and log the

486 response code.

487

488 Also send two standard headers with the server software

489 version and the current date.

490

491 """

492 self.log_request(code)

493 self.send_response_only(code, message)

494 self.send_header('Server', self.version_string())

495 self.send_header('Date', self.date_time_string())

496

497 def send_response_only(self, code, message=None):

498 """Send the response header only."""

499 if self.request_version != 'HTTP/0.9':

500 if message is None:

501 if code in self.responses:

502 message = self.responses[code][0]

503 else:

504 message = ''

505 if not hasattr(self, '_headers_buffer'):

506 self._headers_buffer = []

507 self._headers_buffer.append(("%s %d %s\r\n" %

508 (self.protocol_version, code, message)).encode(

509 'latin-1', 'strict'))

510

511 def send_header(self, keyword, value):

512 """Send a MIME header to the headers buffer."""

513 if self.request_version != 'HTTP/0.9':

514 if not hasattr(self, '_headers_buffer'):

515 self._headers_buffer = []

516 self._headers_buffer.append(

517 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))

518

519 if keyword.lower() == 'connection':

520 if value.lower() == 'close':

521 self.close_connection = True

522 elif value.lower() == 'keep-alive':

523 self.close_connection = False

524

525 def end_headers(self):

526 """Send the blank line ending the MIME headers."""

527 if self.request_version != 'HTTP/0.9':

528 self._headers_buffer.append(b"\r\n")

529 self.flush_headers()

530

531 def flush_headers(self):

532 if hasattr(self, '_headers_buffer'):

533 self.wfile.write(b"".join(self._headers_buffer))

534 self._headers_buffer = []

535

536 def log_request(self, code='-', size='-'):

537 """Log an accepted request.

538

539 This is called by send_response().

540

541 """

542 if isinstance(code, HTTPStatus):

543 code = code.value

544 self.log_message('"%s" %s %s',

545 self.requestline, str(code), str(size))

546

547 def log_error(self, format, *args):

548 """Log an error.

549

550 This is called when a request cannot be fulfilled. By

551 default it passes the message on to log_message().

552

553 Arguments are the same as for log_message().

554

555 XXX This should go to the separate error log.

556

557 """

558

559 self.log_message(format, *args)

560

561 def log_message(self, format, *args):

562 """Log an arbitrary message.

563

564 This is used by all other logging functions. Override

565 it if you have specific logging wishes.

566

567 The first argument, FORMAT, is a format string for the

568 message to be logged. If the format string contains

569 any % escapes requiring parameters, they should be

570 specified as subsequent arguments (it's just like

571 printf!).

572

573 The client ip and current date/time are prefixed to

574 every message.

575

576 """

577

578 sys.stderr.write("%s - - [%s] %s\n" %

579 (self.address_string(),

580 self.log_date_time_string(),

581 format%args))

582

583 def version_string(self):

584 """Return the server software version string."""

585 return self.server_version + ' ' + self.sys_version

586

587 def date_time_string(self, timestamp=None):

588 """Return the current date and time formatted for a message header."""

589 if timestamp is None:

590 timestamp = time.time()

591 return email.utils.formatdate(timestamp, usegmt=True)

592

593 def log_date_time_string(self):

594 """Return the current time formatted for logging."""

595 now = time.time()

596 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)

597 s = "%02d/%3s/%04d %02d:%02d:%02d" % (

598 day, self.monthname[month], year, hh, mm, ss)

599 return s

600

601 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

602

603 monthname = [None,

604 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',

605 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

606

607 def address_string(self):

608 """Return the client address."""

609

610 return self.client_address[0]

611

612 # Essentially static class variables

613

614 # The version of the HTTP protocol we support.

615 # Set this to HTTP/1.1 to enable automatic keepalive

616 protocol_version = "HTTP/1.0"

617

618 # MessageClass used to parse headers

619 MessageClass = http.client.HTTPMessage

620

621 # hack to maintain backwards compatibility

622 responses = {

623 v: (v.phrase, v.description)

624 for v in HTTPStatus.__members__.values()

625 }

626

627

628class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):

629

630 """Simple HTTP request handler with GET and HEAD commands.

631

632 This serves files from the current directory and any of its

633 subdirectories. The MIME type for files is determined by

634 calling the .guess_type() method.

635

636 The GET and HEAD requests are identical except that the HEAD

637 request omits the actual contents of the file.

638

639 """

640

641 server_version = "SimpleHTTP/" + __version__

642 extensions_map = _encodings_map_default = {

643 '.gz': 'application/gzip',

644 '.Z': 'application/octet-stream',

645 '.bz2': 'application/x-bzip2',

646 '.xz': 'application/x-xz',

647 }

648

649 def __init__(self, *args, directory=None, **kwargs):

650 if directory is None:

651 directory = os.getcwd()

652 self.directory = os.fspath(directory)

653 super().__init__(*args, **kwargs)

654

655 def do_GET(self):

656 """Serve a GET request."""

657 f = self.send_head()

658 if f:

659 try:

660 self.copyfile(f, self.wfile)

661 finally:

662 f.close()

663

664 def do_HEAD(self):

665 """Serve a HEAD request."""

666 f = self.send_head()

667 if f:

668 f.close()

669

670 def send_head(self):

671 """Common code for GET and HEAD commands.

672

673 This sends the response code and MIME headers.

674

675 Return value is either a file object (which has to be copied

676 to the outputfile by the caller unless the command was HEAD,

677 and must be closed by the caller under all circumstances), or

678 None, in which case the caller has nothing further to do.

679

680 """

681 path = self.translate_path(self.path)

682 f = None

683 if os.path.isdir(path):

684 parts = urllib.parse.urlsplit(self.path)

685 if not parts.path.endswith('/'):

686 # redirect browser - doing basically what apache does

687 self.send_response(HTTPStatus.MOVED_PERMANENTLY)

688 new_parts = (parts[0], parts[1], parts[2] + '/',

689 parts[3], parts[4])

690 new_url = urllib.parse.urlunsplit(new_parts)

691 self.send_header("Location", new_url)

692 self.end_headers()

693 return None

694 for index in "index.html", "index.htm":

695 index = os.path.join(path, index)

696 if os.path.exists(index):

697 path = index

698 break

699 else:

700 return self.list_directory(path)

701 ctype = self.guess_type(path)

702 # check for trailing "/" which should return 404. See Issue17324

703 # The test for this was added in test_httpserver.py

704 # However, some OS platforms accept a trailingSlash as a filename

705 # See discussion on python-dev and Issue34711 regarding

706 # parseing and rejection of filenames with a trailing slash

707 if path.endswith("/"):

708 self.send_error(HTTPStatus.NOT_FOUND, "File not found")

709 return None

710 try:

711 f = open(path, 'rb')

712 except OSError:

713 self.send_error(HTTPStatus.NOT_FOUND, "File not found")

714 return None

715

716 try:

717 fs = os.fstat(f.fileno())

718 # Use browser cache if possible

719 if ("If-Modified-Since" in self.headers

720 and "If-None-Match" not in self.headers):

721 # compare If-Modified-Since and time of last file modification

722 try:

723 ims = email.utils.parsedate_to_datetime(

724 self.headers["If-Modified-Since"])

725 except (TypeError, IndexError, OverflowError, ValueError):

726 # ignore ill-formed values

727 pass

728 else:

729 if ims.tzinfo is None:

730 # obsolete format with no timezone, cf.

731 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1

732 ims = ims.replace(tzinfo=datetime.timezone.utc)

733 if ims.tzinfo is datetime.timezone.utc:

734 # compare to UTC datetime of last modification

735 last_modif = datetime.datetime.fromtimestamp(

736 fs.st_mtime, datetime.timezone.utc)

737 # remove microseconds, like in If-Modified-Since

738 last_modif = last_modif.replace(microsecond=0)

739

740 if last_modif <= ims:

741 self.send_response(HTTPStatus.NOT_MODIFIED)

742 self.end_headers()

743 f.close()

744 return None

745

746 self.send_response(HTTPStatus.OK)

747 self.send_header("Content-type", ctype)

748 self.send_header("Content-Length", str(fs[6]))

749 self.send_header("Last-Modified",

750 self.date_time_string(fs.st_mtime))

751 self.end_headers()

752 return f

753 except:

754 f.close()

755 raise

756

757 def list_directory(self, path):

758 """Helper to produce a directory listing (absent index.html).

759

760 Return value is either a file object, or None (indicating an

761 error). In either case, the headers are sent, making the

762 interface the same as for send_head().

763

764 """

765 try:

766 list = os.listdir(path)

767 except OSError:

768 self.send_error(

769 HTTPStatus.NOT_FOUND,

770 "No permission to list directory")

771 return None

772 list.sort(key=lambda a: a.lower())

773 r = []

774 try:

775 displaypath = urllib.parse.unquote(self.path,

776 errors='surrogatepass')

777 except UnicodeDecodeError:

778 displaypath = urllib.parse.unquote(path)

779 displaypath = html.escape(displaypath, quote=False)

780 enc = sys.getfilesystemencoding()

781 title = 'Directory listing for %s' % displaypath

782 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '

783 '"http://www.w3.org/TR/html4/strict.dtd">')

784 r.append('<html>\n<head>')

785 r.append('<meta http-equiv="Content-Type" '

786 'content="text/html; charset=%s">' % enc)

787 r.append('<title>%s</title>\n</head>' % title)

788 r.append('<body>\n<h1>%s</h1>' % title)

789 r.append('<hr>\n<ul>')

790 for name in list:

791 fullname = os.path.join(path, name)

792 displayname = linkname = name

793 # Append / for directories or @ for symbolic links

794 if os.path.isdir(fullname):

795 displayname = name + "/"

796 linkname = name + "/"

797 if os.path.islink(fullname):

798 displayname = name + "@"

799 # Note: a link to a directory displays with @ and links with /

800 r.append('<li><a href="%s">%s</a></li>'

801 % (urllib.parse.quote(linkname,

802 errors='surrogatepass'),

803 html.escape(displayname, quote=False)))

804 r.append('</ul>\n<hr>\n</body>\n</html>\n')

805 encoded = '\n'.join(r).encode(enc, 'surrogateescape')

806 f = io.BytesIO()

807 f.write(encoded)

808 f.seek(0)

809 self.send_response(HTTPStatus.OK)

810 self.send_header("Content-type", "text/html; charset=%s" % enc)

811 self.send_header("Content-Length", str(len(encoded)))

812 self.end_headers()

813 return f

814

815 def translate_path(self, path):

816 """Translate a /-separated PATH to the local filename syntax.

817

818 Components that mean special things to the local file system

819 (e.g. drive or directory names) are ignored. (XXX They should

820 probably be diagnosed.)

821

822 """

823 # abandon query parameters

824 path = path.split('?',1)[0]

825 path = path.split('#',1)[0]

826 # Don't forget explicit trailing slash when normalizing. Issue17324

827 trailing_slash = path.rstrip().endswith('/')

828 try:

829 path = urllib.parse.unquote(path, errors='surrogatepass')

830 except UnicodeDecodeError:

831 path = urllib.parse.unquote(path)

832 path = posixpath.normpath(path)

833 words = path.split('/')

834 words = filter(None, words)

835 path = self.directory

836 for word in words:

837 if os.path.dirname(word) or word in (os.curdir, os.pardir):

838 # Ignore components that are not a simple file/directory name

839 continue

840 path = os.path.join(path, word)

841 if trailing_slash:

842 path += '/'

843 return path

844

845 def copyfile(self, source, outputfile):

846 """Copy all data between two file objects.

847

848 The SOURCE argument is a file object open for reading

849 (or anything with a read() method) and the DESTINATION

850 argument is a file object open for writing (or

851 anything with a write() method).

852

853 The only reason for overriding this would be to change

854 the block size or perhaps to replace newlines by CRLF

855 -- note however that this the default server uses this

856 to copy binary data as well.

857

858 """

859 shutil.copyfileobj(source, outputfile)

860

861 def guess_type(self, path):

862 """Guess the type of a file.

863

864 Argument is a PATH (a filename).

865

866 Return value is a string of the form type/subtype,

867 usable for a MIME Content-type header.

868

869 The default implementation looks the file's extension

870 up in the table self.extensions_map, using application/octet-stream

871 as a default; however it would be permissible (if

872 slow) to look inside the data to make a better guess.

873

874 """

875 base, ext = posixpath.splitext(path)

876 if ext in self.extensions_map:

877 return self.extensions_map[ext]

878 ext = ext.lower()

879 if ext in self.extensions_map:

880 return self.extensions_map[ext]

881 guess, _ = mimetypes.guess_type(path)

882 if guess:

883 return guess

884 return 'application/octet-stream'

885

886

887# Utilities for CGIHTTPRequestHandler

888

889def _url_collapse_path(path):

890 """

891 Given a URL path, remove extra '/'s and '.' path elements and collapse

892 any '..' references and returns a collapsed path.

893

894 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.

895 The utility of this function is limited to is_cgi method and helps

896 preventing some security attacks.

897

898 Returns: The reconstituted URL, which will always start with a '/'.

899

900 Raises: IndexError if too many '..' occur within the path.

901

902 """

903 # Query component should not be involved.

904 path, _, query = path.partition('?')

905 path = urllib.parse.unquote(path)

906

907 # Similar to os.path.split(os.path.normpath(path)) but specific to URL

908 # path semantics rather than local operating system semantics.

909 path_parts = path.split('/')

910 head_parts = []

911 for part in path_parts[:-1]:

912 if part == '..':

913 head_parts.pop() # IndexError if more '..' than prior parts

914 elif part and part != '.':

915 head_parts.append( part )

916 if path_parts:

917 tail_part = path_parts.pop()

918 if tail_part:

919 if tail_part == '..':

920 head_parts.pop()

921 tail_part = ''

922 elif tail_part == '.':

923 tail_part = ''

924 else:

925 tail_part = ''

926

927 if query:

928 tail_part = '?'.join((tail_part, query))

929

930 splitpath = ('/' + '/'.join(head_parts), tail_part)

931 collapsed_path = "/".join(splitpath)

932

933 return collapsed_path

937nobody = None

939def nobody_uid():

940 """Internal routine to get nobody's uid"""

941 global nobody

942 if nobody:

943 return nobody

944 try:

945 import pwd

946 except ImportError:

947 return -1

948 try:

949 nobody = pwd.getpwnam('nobody')[2]

950 except KeyError:

951 nobody = 1 + max(x[2] for x in pwd.getpwall())

952 return nobody

953

954

955def executable(path):

956 """Test for executable file."""

957 return os.access(path, os.X_OK)

958

959

960class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):

961

962 """Complete HTTP server with GET, HEAD and POST commands.

963

964 GET and HEAD also support running CGI scripts.

965

966 The POST command is *only* implemented for CGI scripts.

967

968 """

969

970 # Determine platform specifics

971 have_fork = hasattr(os, 'fork')

972

973 # Make rfile unbuffered -- we need to read one line and then pass

974 # the rest to a subprocess, so we can't use buffered input.

975 rbufsize = 0

976

977 def do_POST(self):

978 """Serve a POST request.

979

980 This is only implemented for CGI scripts.

981

982 """

983

984 if self.is_cgi():

985 self.run_cgi()

986 else:

987 self.send_error(

988 HTTPStatus.NOT_IMPLEMENTED,

989 "Can only POST to CGI scripts")

990

991 def send_head(self):

992 """Version of send_head that support CGI scripts"""

993 if self.is_cgi():

994 return self.run_cgi()

995 else:

996 return SimpleHTTPRequestHandler.send_head(self)

997

998 def is_cgi(self):

999 """Test whether self.path corresponds to a CGI script.

1000

1001 Returns True and updates the cgi_info attribute to the tuple

1002 (dir, rest) if self.path requires running a CGI script.

1003 Returns False otherwise.

1004

1005 If any exception is raised, the caller should assume that

1006 self.path was rejected as invalid and act accordingly.

1007

1008 The default implementation tests whether the normalized url

1009 path begins with one of the strings in self.cgi_directories

1010 (and the next character is a '/' or the end of the string).

1011

1012 """

1013 collapsed_path = _url_collapse_path(self.path)

1014 dir_sep = collapsed_path.find('/', 1)

1015 while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:

1016 dir_sep = collapsed_path.find('/', dir_sep+1)

1017 if dir_sep > 0:

1018 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]

1019 self.cgi_info = head, tail

1020 return True

1021 return False

1022

1023

1024 cgi_directories = ['/cgi-bin', '/htbin']

1025

1026 def is_executable(self, path):

1027 """Test whether argument path is an executable file."""

1028 return executable(path)

1029

1030 def is_python(self, path):

1031 """Test whether argument path is a Python script."""

1032 head, tail = os.path.splitext(path)

1033 return tail.lower() in (".py", ".pyw")

1034

1035 def run_cgi(self):

1036 """Execute a CGI script."""

1037 dir, rest = self.cgi_info

1038 path = dir + '/' + rest

1039 i = path.find('/', len(dir)+1)

1040 while i >= 0:

1041 nextdir = path[:i]

1042 nextrest = path[i+1:]

1043

1044 scriptdir = self.translate_path(nextdir)

1045 if os.path.isdir(scriptdir):

1046 dir, rest = nextdir, nextrest

1047 i = path.find('/', len(dir)+1)

1048 else:

1049 break

1050

1051 # find an explicit query string, if present.

1052 rest, _, query = rest.partition('?')

1053

1054 # dissect the part after the directory name into a script name &

1055 # a possible additional path, to be stored in PATH_INFO.

1056 i = rest.find('/')

1057 if i >= 0:

1058 script, rest = rest[:i], rest[i:]

1059 else:

1060 script, rest = rest, ''

1061

1062 scriptname = dir + '/' + script

1063 scriptfile = self.translate_path(scriptname)

1064 if not os.path.exists(scriptfile):

1065 self.send_error(

1066 HTTPStatus.NOT_FOUND,

1067 "No such CGI script (%r)" % scriptname)

1068 return

1069 if not os.path.isfile(scriptfile):

1070 self.send_error(

1071 HTTPStatus.FORBIDDEN,

1072 "CGI script is not a plain file (%r)" % scriptname)

1073 return

1074 ispy = self.is_python(scriptname)

1075 if self.have_fork or not ispy:

1076 if not self.is_executable(scriptfile):

1077 self.send_error(

1078 HTTPStatus.FORBIDDEN,

1079 "CGI script is not executable (%r)" % scriptname)

1080 return

1081

1082 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html

1083 # XXX Much of the following could be prepared ahead of time!

1084 env = copy.deepcopy(os.environ)

1085 env['SERVER_SOFTWARE'] = self.version_string()

1086 env['SERVER_NAME'] = self.server.server_name

1087 env['GATEWAY_INTERFACE'] = 'CGI/1.1'

1088 env['SERVER_PROTOCOL'] = self.protocol_version

1089 env['SERVER_PORT'] = str(self.server.server_port)

1090 env['REQUEST_METHOD'] = self.command

1091 uqrest = urllib.parse.unquote(rest)

1092 env['PATH_INFO'] = uqrest

1093 env['PATH_TRANSLATED'] = self.translate_path(uqrest)

1094 env['SCRIPT_NAME'] = scriptname

1095 if query:

1096 env['QUERY_STRING'] = query

1097 env['REMOTE_ADDR'] = self.client_address[0]

1098 authorization = self.headers.get("authorization")

1099 if authorization:

1100 authorization = authorization.split()

1101 if len(authorization) == 2:

1102 import base64, binascii

1103 env['AUTH_TYPE'] = authorization[0]

1104 if authorization[0].lower() == "basic":

1105 try:

1106 authorization = authorization[1].encode('ascii')

1107 authorization = base64.decodebytes(authorization).\

1108 decode('ascii')

1109 except (binascii.Error, UnicodeError):

1110 pass

1111 else:

1112 authorization = authorization.split(':')

1113 if len(authorization) == 2:

1114 env['REMOTE_USER'] = authorization[0]

1115 # XXX REMOTE_IDENT

1116 if self.headers.get('content-type') is None:

1117 env['CONTENT_TYPE'] = self.headers.get_content_type()

1118 else:

1119 env['CONTENT_TYPE'] = self.headers['content-type']

1120 length = self.headers.get('content-length')

1121 if length:

1122 env['CONTENT_LENGTH'] = length

1123 referer = self.headers.get('referer')

1124 if referer:

1125 env['HTTP_REFERER'] = referer

1126 accept = self.headers.get_all('accept', ())

1127 env['HTTP_ACCEPT'] = ','.join(accept)

1128 ua = self.headers.get('user-agent')

1129 if ua:

1130 env['HTTP_USER_AGENT'] = ua

1131 co = filter(None, self.headers.get_all('cookie', []))

1132 cookie_str = ', '.join(co)

1133 if cookie_str:

1134 env['HTTP_COOKIE'] = cookie_str

1135 # XXX Other HTTP_* headers

1136 # Since we're setting the env in the parent, provide empty

1137 # values to override previously set values

1138 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',

1139 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):

1140 env.setdefault(k, "")

1141

1142 self.send_response(HTTPStatus.OK, "Script output follows")

1143 self.flush_headers()

1144

1145 decoded_query = query.replace('+', ' ')

1146

1147 if self.have_fork:

1148 # Unix -- fork as we should

1149 args = [script]

1150 if '=' not in decoded_query:

1151 args.append(decoded_query)

1152 nobody = nobody_uid()

1153 self.wfile.flush() # Always flush before forking

1154 pid = os.fork()

1155 if pid != 0:

1156 # Parent

1157 pid, sts = os.waitpid(pid, 0)

1158 # throw away additional data [see bug #427345]

1159 while select.select([self.rfile], [], [], 0)[0]:

1160 if not self.rfile.read(1):

1161 break

1162 exitcode = os.waitstatus_to_exitcode(sts)

1163 if exitcode:

1164 self.log_error(f"CGI script exit code {exitcode}")

1165 return

1166 # Child

1167 try:

1168 try:

1169 os.setuid(nobody)

1170 except OSError:

1171 pass

1172 os.dup2(self.rfile.fileno(), 0)

1173 os.dup2(self.wfile.fileno(), 1)

1174 os.execve(scriptfile, args, env)

1175 except:

1176 self.server.handle_error(self.request, self.client_address)

1177 os._exit(127)

1178

1179 else:

1180 # Non-Unix -- use subprocess

1181 import subprocess

1182 cmdline = [scriptfile]

1183 if self.is_python(scriptfile):

1184 interp = sys.executable

1185 if interp.lower().endswith("w.exe"):

1186 # On Windows, use python.exe, not pythonw.exe

1187 interp = interp[:-5] + interp[-4:]

1188 cmdline = [interp, '-u'] + cmdline

1189 if '=' not in query:

1190 cmdline.append(query)

1191 self.log_message("command: %s", subprocess.list2cmdline(cmdline))

1192 try:

1193 nbytes = int(length)

1194 except (TypeError, ValueError):

1195 nbytes = 0

1196 p = subprocess.Popen(cmdline,

1197 stdin=subprocess.PIPE,

1198 stdout=subprocess.PIPE,

1199 stderr=subprocess.PIPE,

1200 env = env

1201 )

1202 if self.command.lower() == "post" and nbytes > 0:

1203 data = self.rfile.read(nbytes)

1204 else:

1205 data = None

1206 # throw away additional data [see bug #427345]

1207 while select.select([self.rfile._sock], [], [], 0)[0]:

1208 if not self.rfile._sock.recv(1):

1209 break

1210 stdout, stderr = p.communicate(data)

1211 self.wfile.write(stdout)

1212 if stderr:

1213 self.log_error('%s', stderr)

1214 p.stderr.close()

1215 p.stdout.close()

1216 status = p.returncode

1217 if status:

1218 self.log_error("CGI script exit status %#x", status)

1219 else:

1220 self.log_message("CGI script exited OK")

1221

1222

1223def _get_best_family(*address):

1224 infos = socket.getaddrinfo(

1225 *address,

1226 type=socket.SOCK_STREAM,

1227 flags=socket.AI_PASSIVE,

1228 )

1229 family, type, proto, canonname, sockaddr = next(iter(infos))

1230 return family, sockaddr

1231

1232

1233def test(HandlerClass=BaseHTTPRequestHandler,

1234 ServerClass=ThreadingHTTPServer,

1235 protocol="HTTP/1.0", port=8000, bind=None):

1236 """Test the HTTP request handler class.

1237

1238 This runs an HTTP server on port 8000 (or the port argument).

1239

1240 """

1241 ServerClass.address_family, addr = _get_best_family(bind, port)

1242

1243 HandlerClass.protocol_version = protocol

1244 with ServerClass(addr, HandlerClass) as httpd:

1245 host, port = httpd.socket.getsockname()[:2]

1246 url_host = f'[{host}]' if ':' in host else host

1247 print(

1248 f"Serving HTTP on {host} port {port} "

1249 f"(http://{url_host}:{port}/) ..."

1250 )

1251 try:

1252 httpd.serve_forever()

1253 except KeyboardInterrupt:

1254 print("\nKeyboard interrupt received, exiting.")

1255 sys.exit(0)

1256

1257if __name__ == '__main__':

1258 import argparse

1259

1260 parser = argparse.ArgumentParser()

1261 parser.add_argument('--cgi', action='store_true',

1262 help='Run as CGI Server')

1263 parser.add_argument('--bind', '-b', metavar='ADDRESS',

1264 help='Specify alternate bind address '

1265 '[default: all interfaces]')

1266 parser.add_argument('--directory', '-d', default=os.getcwd(),

1267 help='Specify alternative directory '

1268 '[default:current directory]')

1269 parser.add_argument('port', action='store',

1270 default=8000, type=int,

1271 nargs='?',

1272 help='Specify alternate port [default: 8000]')

1273 args = parser.parse_args()

1274 if args.cgi:

1275 handler_class = CGIHTTPRequestHandler

1276 else:

1277 handler_class = partial(SimpleHTTPRequestHandler,

1278 directory=args.directory)

1279

1280 # ensure dual-stack is not disabled; ref #38907

1281 class DualStackServer(ThreadingHTTPServer):

1282 def server_bind(self):

1283 # suppress exception when protocol is IPv4

1284 with contextlib.suppress(Exception):

1285 self.socket.setsockopt(

1286 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)

1287 return super().server_bind()

1288

1289 test(

1290 HandlerClass=handler_class,

1291 ServerClass=DualStackServer,

1292 port=args.port,

1293 bind=args.bind,

1294 )