Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/http/server.py: 15%
591 statements
« prev ^ index » next coverage.py v7.0.1, created at 2022-12-25 06:11 +0000
« prev ^ index » next coverage.py v7.0.1, created at 2022-12-25 06:11 +0000
1"""HTTP server classes.
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
10Notes on CGIHTTPRequestHandler
11------------------------------
13This class implements GET and POST requests to cgi-bin scripts.
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
27XXX To do:
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
35# See also:
36#
37# HTTP Working Group T. Berners-Lee
38# INTERNET-DRAFT R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40# Expires September 8, 1995 March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group R. Fielding
47# Request for Comments: 2616 et al
48# Obsoletes: 2068 June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# | host: Either the DNS name or the IP number of the remote client
63# | rfc931: Any information returned by identd for this person,
64# | - otherwise.
65# | authuser: If user sent a userid for authentication, the user name,
66# | - otherwise.
67# | DD: Day
68# | Mon: Month (calendar name)
69# | YYYY: Year
70# | hh: hour (24-hour format, the machine's timezone)
71# | mm: minutes
72# | ss: seconds
73# | request: The first line of the HTTP request as sent by the client.
74# | ddd: the status code returned by the server, - if not available.
75# | bbbb: the total number of bytes sent,
76# | *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
83__version__ = "0.6"
85__all__ = [
86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106import contextlib
107from functools import partial
109from http import HTTPStatus
112# Default error message template
113DEFAULT_ERROR_MESSAGE = """\
114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
115 "http://www.w3.org/TR/html4/strict.dtd">
116<html>
117 <head>
118 <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
119 <title>Error response</title>
120 </head>
121 <body>
122 <h1>Error response</h1>
123 <p>Error code: %(code)d</p>
124 <p>Message: %(message)s.</p>
125 <p>Error code explanation: %(code)s - %(explain)s.</p>
126 </body>
127</html>
128"""
130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
132class HTTPServer(socketserver.TCPServer):
134 allow_reuse_address = 1 # Seems to make sense in testing environment
136 def server_bind(self):
137 """Override server_bind to store the server name."""
138 socketserver.TCPServer.server_bind(self)
139 host, port = self.server_address[:2]
140 self.server_name = socket.getfqdn(host)
141 self.server_port = port
144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
145 daemon_threads = True
148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
150 """HTTP request handler base class.
152 The following explanation of HTTP serves to guide you through the
153 code as well as to expose any misunderstandings I may have about
154 HTTP (so you don't need to read the code to figure out I'm wrong
155 :-).
157 HTTP (HyperText Transfer Protocol) is an extensible protocol on
158 top of a reliable stream transport (e.g. TCP/IP). The protocol
159 recognizes three parts to a request:
161 1. One line identifying the request type and path
162 2. An optional set of RFC-822-style headers
163 3. An optional data part
165 The headers and data are separated by a blank line.
167 The first line of the request has the form
169 <command> <path> <version>
171 where <command> is a (case-sensitive) keyword such as GET or POST,
172 <path> is a string containing path information for the request,
173 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
174 <path> is encoded using the URL encoding scheme (using %xx to signify
175 the ASCII character with hex code xx).
177 The specification specifies that lines are separated by CRLF but
178 for compatibility with the widest range of clients recommends
179 servers also handle LF. Similarly, whitespace in the request line
180 is treated sensibly (allowing multiple spaces between components
181 and allowing trailing whitespace).
183 Similarly, for output, lines ought to be separated by CRLF pairs
184 but most clients grok LF characters just fine.
186 If the first line of the request has the form
188 <command> <path>
190 (i.e. <version> is left out) then this is assumed to be an HTTP
191 0.9 request; this form has no optional headers and data part and
192 the reply consists of just the data.
194 The reply form of the HTTP 1.x protocol again has three parts:
196 1. One line giving the response code
197 2. An optional set of RFC-822-style headers
198 3. The data
200 Again, the headers and data are separated by a blank line.
202 The response code line has the form
204 <version> <responsecode> <responsestring>
206 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
207 <responsecode> is a 3-digit response code indicating success or
208 failure of the request, and <responsestring> is an optional
209 human-readable string explaining what the response code means.
211 This server parses the request and the headers, and then calls a
212 function specific to the request type (<command>). Specifically,
213 a request SPAM will be handled by a method do_SPAM(). If no
214 such method exists the server sends an error response to the
215 client. If it exists, it is called with no arguments:
217 do_SPAM()
219 Note that the request name is case sensitive (i.e. SPAM and spam
220 are different requests).
222 The various request details are stored in instance variables:
224 - client_address is the client IP address in the form (host,
225 port);
227 - command, path and version are the broken-down request line;
229 - headers is an instance of email.message.Message (or a derived
230 class) containing the header information;
232 - rfile is a file object open for reading positioned at the
233 start of the optional input data part;
235 - wfile is a file object open for writing.
237 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
239 The first thing to be written must be the response line. Then
240 follow 0 or more header lines, then a blank line, and then the
241 actual data (if any). The meaning of the header lines depends on
242 the command executed by the server; in most cases, when data is
243 returned, there should be at least one header line of the form
245 Content-type: <type>/<subtype>
247 where <type> and <subtype> should be registered MIME types,
248 e.g. "text/html" or "text/plain".
250 """
252 # The Python system version, truncated to its first component.
253 sys_version = "Python/" + sys.version.split()[0]
255 # The server software version. You may want to override this.
256 # The format is multiple whitespace-separated strings,
257 # where each string is of the form name[/version].
258 server_version = "BaseHTTP/" + __version__
260 error_message_format = DEFAULT_ERROR_MESSAGE
261 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
263 # The default request version. This only affects responses up until
264 # the point where the request line is parsed, so it mainly decides what
265 # the client gets back when sending a malformed request line.
266 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
267 default_request_version = "HTTP/0.9"
269 def parse_request(self):
270 """Parse a request (internal).
272 The request should be stored in self.raw_requestline; the results
273 are in self.command, self.path, self.request_version and
274 self.headers.
276 Return True for success, False for failure; on failure, any relevant
277 error response has already been sent back.
279 """
280 self.command = None # set in case of error on the first line
281 self.request_version = version = self.default_request_version
282 self.close_connection = True
283 requestline = str(self.raw_requestline, 'iso-8859-1')
284 requestline = requestline.rstrip('\r\n')
285 self.requestline = requestline
286 words = requestline.split()
287 if len(words) == 0:
288 return False
290 if len(words) >= 3: # Enough to determine protocol version
291 version = words[-1]
292 try:
293 if not version.startswith('HTTP/'):
294 raise ValueError
295 base_version_number = version.split('/', 1)[1]
296 version_number = base_version_number.split(".")
297 # RFC 2145 section 3.1 says there can be only one "." and
298 # - major and minor numbers MUST be treated as
299 # separate integers;
300 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
301 # turn is lower than HTTP/12.3;
302 # - Leading zeros MUST be ignored by recipients.
303 if len(version_number) != 2:
304 raise ValueError
305 version_number = int(version_number[0]), int(version_number[1])
306 except (ValueError, IndexError):
307 self.send_error(
308 HTTPStatus.BAD_REQUEST,
309 "Bad request version (%r)" % version)
310 return False
311 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
312 self.close_connection = False
313 if version_number >= (2, 0):
314 self.send_error(
315 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
316 "Invalid HTTP version (%s)" % base_version_number)
317 return False
318 self.request_version = version
320 if not 2 <= len(words) <= 3:
321 self.send_error(
322 HTTPStatus.BAD_REQUEST,
323 "Bad request syntax (%r)" % requestline)
324 return False
325 command, path = words[:2]
326 if len(words) == 2:
327 self.close_connection = True
328 if command != 'GET':
329 self.send_error(
330 HTTPStatus.BAD_REQUEST,
331 "Bad HTTP/0.9 request type (%r)" % command)
332 return False
333 self.command, self.path = command, path
335 # Examine the headers and look for a Connection directive.
336 try:
337 self.headers = http.client.parse_headers(self.rfile,
338 _class=self.MessageClass)
339 except http.client.LineTooLong as err:
340 self.send_error(
341 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
342 "Line too long",
343 str(err))
344 return False
345 except http.client.HTTPException as err:
346 self.send_error(
347 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
348 "Too many headers",
349 str(err)
350 )
351 return False
353 conntype = self.headers.get('Connection', "")
354 if conntype.lower() == 'close':
355 self.close_connection = True
356 elif (conntype.lower() == 'keep-alive' and
357 self.protocol_version >= "HTTP/1.1"):
358 self.close_connection = False
359 # Examine the headers and look for an Expect directive
360 expect = self.headers.get('Expect', "")
361 if (expect.lower() == "100-continue" and
362 self.protocol_version >= "HTTP/1.1" and
363 self.request_version >= "HTTP/1.1"):
364 if not self.handle_expect_100():
365 return False
366 return True
368 def handle_expect_100(self):
369 """Decide what to do with an "Expect: 100-continue" header.
371 If the client is expecting a 100 Continue response, we must
372 respond with either a 100 Continue or a final response before
373 waiting for the request body. The default is to always respond
374 with a 100 Continue. You can behave differently (for example,
375 reject unauthorized requests) by overriding this method.
377 This method should either return True (possibly after sending
378 a 100 Continue response) or send an error response and return
379 False.
381 """
382 self.send_response_only(HTTPStatus.CONTINUE)
383 self.end_headers()
384 return True
386 def handle_one_request(self):
387 """Handle a single HTTP request.
389 You normally don't need to override this method; see the class
390 __doc__ string for information on how to handle specific HTTP
391 commands such as GET and POST.
393 """
394 try:
395 self.raw_requestline = self.rfile.readline(65537)
396 if len(self.raw_requestline) > 65536:
397 self.requestline = ''
398 self.request_version = ''
399 self.command = ''
400 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
401 return
402 if not self.raw_requestline:
403 self.close_connection = True
404 return
405 if not self.parse_request():
406 # An error code has been sent, just exit
407 return
408 mname = 'do_' + self.command
409 if not hasattr(self, mname):
410 self.send_error(
411 HTTPStatus.NOT_IMPLEMENTED,
412 "Unsupported method (%r)" % self.command)
413 return
414 method = getattr(self, mname)
415 method()
416 self.wfile.flush() #actually send the response if not already done.
417 except socket.timeout as e:
418 #a read or a write timed out. Discard this connection
419 self.log_error("Request timed out: %r", e)
420 self.close_connection = True
421 return
423 def handle(self):
424 """Handle multiple requests if necessary."""
425 self.close_connection = True
427 self.handle_one_request()
428 while not self.close_connection:
429 self.handle_one_request()
431 def send_error(self, code, message=None, explain=None):
432 """Send and log an error reply.
434 Arguments are
435 * code: an HTTP error code
436 3 digits
437 * message: a simple optional 1 line reason phrase.
438 *( HTAB / SP / VCHAR / %x80-FF )
439 defaults to short entry matching the response code
440 * explain: a detailed message defaults to the long entry
441 matching the response code.
443 This sends an error response (so it must be called before any
444 output has been generated), logs the error, and finally sends
445 a piece of HTML explaining the error to the user.
447 """
449 try:
450 shortmsg, longmsg = self.responses[code]
451 except KeyError:
452 shortmsg, longmsg = '???', '???'
453 if message is None:
454 message = shortmsg
455 if explain is None:
456 explain = longmsg
457 self.log_error("code %d, message %s", code, message)
458 self.send_response(code, message)
459 self.send_header('Connection', 'close')
461 # Message body is omitted for cases described in:
462 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
463 # - RFC7231: 6.3.6. 205(Reset Content)
464 body = None
465 if (code >= 200 and
466 code not in (HTTPStatus.NO_CONTENT,
467 HTTPStatus.RESET_CONTENT,
468 HTTPStatus.NOT_MODIFIED)):
469 # HTML encode to prevent Cross Site Scripting attacks
470 # (see bug #1100201)
471 content = (self.error_message_format % {
472 'code': code,
473 'message': html.escape(message, quote=False),
474 'explain': html.escape(explain, quote=False)
475 })
476 body = content.encode('UTF-8', 'replace')
477 self.send_header("Content-Type", self.error_content_type)
478 self.send_header('Content-Length', str(len(body)))
479 self.end_headers()
481 if self.command != 'HEAD' and body:
482 self.wfile.write(body)
484 def send_response(self, code, message=None):
485 """Add the response header to the headers buffer and log the
486 response code.
488 Also send two standard headers with the server software
489 version and the current date.
491 """
492 self.log_request(code)
493 self.send_response_only(code, message)
494 self.send_header('Server', self.version_string())
495 self.send_header('Date', self.date_time_string())
497 def send_response_only(self, code, message=None):
498 """Send the response header only."""
499 if self.request_version != 'HTTP/0.9':
500 if message is None:
501 if code in self.responses:
502 message = self.responses[code][0]
503 else:
504 message = ''
505 if not hasattr(self, '_headers_buffer'):
506 self._headers_buffer = []
507 self._headers_buffer.append(("%s %d %s\r\n" %
508 (self.protocol_version, code, message)).encode(
509 'latin-1', 'strict'))
511 def send_header(self, keyword, value):
512 """Send a MIME header to the headers buffer."""
513 if self.request_version != 'HTTP/0.9':
514 if not hasattr(self, '_headers_buffer'):
515 self._headers_buffer = []
516 self._headers_buffer.append(
517 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
519 if keyword.lower() == 'connection':
520 if value.lower() == 'close':
521 self.close_connection = True
522 elif value.lower() == 'keep-alive':
523 self.close_connection = False
525 def end_headers(self):
526 """Send the blank line ending the MIME headers."""
527 if self.request_version != 'HTTP/0.9':
528 self._headers_buffer.append(b"\r\n")
529 self.flush_headers()
531 def flush_headers(self):
532 if hasattr(self, '_headers_buffer'):
533 self.wfile.write(b"".join(self._headers_buffer))
534 self._headers_buffer = []
536 def log_request(self, code='-', size='-'):
537 """Log an accepted request.
539 This is called by send_response().
541 """
542 if isinstance(code, HTTPStatus):
543 code = code.value
544 self.log_message('"%s" %s %s',
545 self.requestline, str(code), str(size))
547 def log_error(self, format, *args):
548 """Log an error.
550 This is called when a request cannot be fulfilled. By
551 default it passes the message on to log_message().
553 Arguments are the same as for log_message().
555 XXX This should go to the separate error log.
557 """
559 self.log_message(format, *args)
561 def log_message(self, format, *args):
562 """Log an arbitrary message.
564 This is used by all other logging functions. Override
565 it if you have specific logging wishes.
567 The first argument, FORMAT, is a format string for the
568 message to be logged. If the format string contains
569 any % escapes requiring parameters, they should be
570 specified as subsequent arguments (it's just like
571 printf!).
573 The client ip and current date/time are prefixed to
574 every message.
576 """
578 sys.stderr.write("%s - - [%s] %s\n" %
579 (self.address_string(),
580 self.log_date_time_string(),
581 format%args))
583 def version_string(self):
584 """Return the server software version string."""
585 return self.server_version + ' ' + self.sys_version
587 def date_time_string(self, timestamp=None):
588 """Return the current date and time formatted for a message header."""
589 if timestamp is None:
590 timestamp = time.time()
591 return email.utils.formatdate(timestamp, usegmt=True)
593 def log_date_time_string(self):
594 """Return the current time formatted for logging."""
595 now = time.time()
596 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
597 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
598 day, self.monthname[month], year, hh, mm, ss)
599 return s
601 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
603 monthname = [None,
604 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
605 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
607 def address_string(self):
608 """Return the client address."""
610 return self.client_address[0]
612 # Essentially static class variables
614 # The version of the HTTP protocol we support.
615 # Set this to HTTP/1.1 to enable automatic keepalive
616 protocol_version = "HTTP/1.0"
618 # MessageClass used to parse headers
619 MessageClass = http.client.HTTPMessage
621 # hack to maintain backwards compatibility
622 responses = {
623 v: (v.phrase, v.description)
624 for v in HTTPStatus.__members__.values()
625 }
628class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
630 """Simple HTTP request handler with GET and HEAD commands.
632 This serves files from the current directory and any of its
633 subdirectories. The MIME type for files is determined by
634 calling the .guess_type() method.
636 The GET and HEAD requests are identical except that the HEAD
637 request omits the actual contents of the file.
639 """
641 server_version = "SimpleHTTP/" + __version__
643 def __init__(self, *args, directory=None, **kwargs):
644 if directory is None:
645 directory = os.getcwd()
646 self.directory = directory
647 super().__init__(*args, **kwargs)
649 def do_GET(self):
650 """Serve a GET request."""
651 f = self.send_head()
652 if f:
653 try:
654 self.copyfile(f, self.wfile)
655 finally:
656 f.close()
658 def do_HEAD(self):
659 """Serve a HEAD request."""
660 f = self.send_head()
661 if f:
662 f.close()
664 def send_head(self):
665 """Common code for GET and HEAD commands.
667 This sends the response code and MIME headers.
669 Return value is either a file object (which has to be copied
670 to the outputfile by the caller unless the command was HEAD,
671 and must be closed by the caller under all circumstances), or
672 None, in which case the caller has nothing further to do.
674 """
675 path = self.translate_path(self.path)
676 f = None
677 if os.path.isdir(path):
678 parts = urllib.parse.urlsplit(self.path)
679 if not parts.path.endswith('/'):
680 # redirect browser - doing basically what apache does
681 self.send_response(HTTPStatus.MOVED_PERMANENTLY)
682 new_parts = (parts[0], parts[1], parts[2] + '/',
683 parts[3], parts[4])
684 new_url = urllib.parse.urlunsplit(new_parts)
685 self.send_header("Location", new_url)
686 self.end_headers()
687 return None
688 for index in "index.html", "index.htm":
689 index = os.path.join(path, index)
690 if os.path.exists(index):
691 path = index
692 break
693 else:
694 return self.list_directory(path)
695 ctype = self.guess_type(path)
696 # check for trailing "/" which should return 404. See Issue17324
697 # The test for this was added in test_httpserver.py
698 # However, some OS platforms accept a trailingSlash as a filename
699 # See discussion on python-dev and Issue34711 regarding
700 # parseing and rejection of filenames with a trailing slash
701 if path.endswith("/"):
702 self.send_error(HTTPStatus.NOT_FOUND, "File not found")
703 return None
704 try:
705 f = open(path, 'rb')
706 except OSError:
707 self.send_error(HTTPStatus.NOT_FOUND, "File not found")
708 return None
710 try:
711 fs = os.fstat(f.fileno())
712 # Use browser cache if possible
713 if ("If-Modified-Since" in self.headers
714 and "If-None-Match" not in self.headers):
715 # compare If-Modified-Since and time of last file modification
716 try:
717 ims = email.utils.parsedate_to_datetime(
718 self.headers["If-Modified-Since"])
719 except (TypeError, IndexError, OverflowError, ValueError):
720 # ignore ill-formed values
721 pass
722 else:
723 if ims.tzinfo is None:
724 # obsolete format with no timezone, cf.
725 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
726 ims = ims.replace(tzinfo=datetime.timezone.utc)
727 if ims.tzinfo is datetime.timezone.utc:
728 # compare to UTC datetime of last modification
729 last_modif = datetime.datetime.fromtimestamp(
730 fs.st_mtime, datetime.timezone.utc)
731 # remove microseconds, like in If-Modified-Since
732 last_modif = last_modif.replace(microsecond=0)
734 if last_modif <= ims:
735 self.send_response(HTTPStatus.NOT_MODIFIED)
736 self.end_headers()
737 f.close()
738 return None
740 self.send_response(HTTPStatus.OK)
741 self.send_header("Content-type", ctype)
742 self.send_header("Content-Length", str(fs[6]))
743 self.send_header("Last-Modified",
744 self.date_time_string(fs.st_mtime))
745 self.end_headers()
746 return f
747 except:
748 f.close()
749 raise
751 def list_directory(self, path):
752 """Helper to produce a directory listing (absent index.html).
754 Return value is either a file object, or None (indicating an
755 error). In either case, the headers are sent, making the
756 interface the same as for send_head().
758 """
759 try:
760 list = os.listdir(path)
761 except OSError:
762 self.send_error(
763 HTTPStatus.NOT_FOUND,
764 "No permission to list directory")
765 return None
766 list.sort(key=lambda a: a.lower())
767 r = []
768 try:
769 displaypath = urllib.parse.unquote(self.path,
770 errors='surrogatepass')
771 except UnicodeDecodeError:
772 displaypath = urllib.parse.unquote(path)
773 displaypath = html.escape(displaypath, quote=False)
774 enc = sys.getfilesystemencoding()
775 title = 'Directory listing for %s' % displaypath
776 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
777 '"http://www.w3.org/TR/html4/strict.dtd">')
778 r.append('<html>\n<head>')
779 r.append('<meta http-equiv="Content-Type" '
780 'content="text/html; charset=%s">' % enc)
781 r.append('<title>%s</title>\n</head>' % title)
782 r.append('<body>\n<h1>%s</h1>' % title)
783 r.append('<hr>\n<ul>')
784 for name in list:
785 fullname = os.path.join(path, name)
786 displayname = linkname = name
787 # Append / for directories or @ for symbolic links
788 if os.path.isdir(fullname):
789 displayname = name + "/"
790 linkname = name + "/"
791 if os.path.islink(fullname):
792 displayname = name + "@"
793 # Note: a link to a directory displays with @ and links with /
794 r.append('<li><a href="%s">%s</a></li>'
795 % (urllib.parse.quote(linkname,
796 errors='surrogatepass'),
797 html.escape(displayname, quote=False)))
798 r.append('</ul>\n<hr>\n</body>\n</html>\n')
799 encoded = '\n'.join(r).encode(enc, 'surrogateescape')
800 f = io.BytesIO()
801 f.write(encoded)
802 f.seek(0)
803 self.send_response(HTTPStatus.OK)
804 self.send_header("Content-type", "text/html; charset=%s" % enc)
805 self.send_header("Content-Length", str(len(encoded)))
806 self.end_headers()
807 return f
809 def translate_path(self, path):
810 """Translate a /-separated PATH to the local filename syntax.
812 Components that mean special things to the local file system
813 (e.g. drive or directory names) are ignored. (XXX They should
814 probably be diagnosed.)
816 """
817 # abandon query parameters
818 path = path.split('?',1)[0]
819 path = path.split('#',1)[0]
820 # Don't forget explicit trailing slash when normalizing. Issue17324
821 trailing_slash = path.rstrip().endswith('/')
822 try:
823 path = urllib.parse.unquote(path, errors='surrogatepass')
824 except UnicodeDecodeError:
825 path = urllib.parse.unquote(path)
826 path = posixpath.normpath(path)
827 words = path.split('/')
828 words = filter(None, words)
829 path = self.directory
830 for word in words:
831 if os.path.dirname(word) or word in (os.curdir, os.pardir):
832 # Ignore components that are not a simple file/directory name
833 continue
834 path = os.path.join(path, word)
835 if trailing_slash:
836 path += '/'
837 return path
839 def copyfile(self, source, outputfile):
840 """Copy all data between two file objects.
842 The SOURCE argument is a file object open for reading
843 (or anything with a read() method) and the DESTINATION
844 argument is a file object open for writing (or
845 anything with a write() method).
847 The only reason for overriding this would be to change
848 the block size or perhaps to replace newlines by CRLF
849 -- note however that this the default server uses this
850 to copy binary data as well.
852 """
853 shutil.copyfileobj(source, outputfile)
855 def guess_type(self, path):
856 """Guess the type of a file.
858 Argument is a PATH (a filename).
860 Return value is a string of the form type/subtype,
861 usable for a MIME Content-type header.
863 The default implementation looks the file's extension
864 up in the table self.extensions_map, using application/octet-stream
865 as a default; however it would be permissible (if
866 slow) to look inside the data to make a better guess.
868 """
870 base, ext = posixpath.splitext(path)
871 if ext in self.extensions_map:
872 return self.extensions_map[ext]
873 ext = ext.lower()
874 if ext in self.extensions_map:
875 return self.extensions_map[ext]
876 else:
877 return self.extensions_map['']
879 if not mimetypes.inited:
880 mimetypes.init() # try to read system mime.types
881 extensions_map = mimetypes.types_map.copy()
882 extensions_map.update({
883 '': 'application/octet-stream', # Default
884 '.py': 'text/plain',
885 '.c': 'text/plain',
886 '.h': 'text/plain',
887 })
890# Utilities for CGIHTTPRequestHandler
892def _url_collapse_path(path):
893 """
894 Given a URL path, remove extra '/'s and '.' path elements and collapse
895 any '..' references and returns a collapsed path.
897 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
898 The utility of this function is limited to is_cgi method and helps
899 preventing some security attacks.
901 Returns: The reconstituted URL, which will always start with a '/'.
903 Raises: IndexError if too many '..' occur within the path.
905 """
906 # Query component should not be involved.
907 path, _, query = path.partition('?')
908 path = urllib.parse.unquote(path)
910 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
911 # path semantics rather than local operating system semantics.
912 path_parts = path.split('/')
913 head_parts = []
914 for part in path_parts[:-1]:
915 if part == '..':
916 head_parts.pop() # IndexError if more '..' than prior parts
917 elif part and part != '.':
918 head_parts.append( part )
919 if path_parts:
920 tail_part = path_parts.pop()
921 if tail_part:
922 if tail_part == '..':
923 head_parts.pop()
924 tail_part = ''
925 elif tail_part == '.':
926 tail_part = ''
927 else:
928 tail_part = ''
930 if query:
931 tail_part = '?'.join((tail_part, query))
933 splitpath = ('/' + '/'.join(head_parts), tail_part)
934 collapsed_path = "/".join(splitpath)
936 return collapsed_path
940nobody = None
942def nobody_uid():
943 """Internal routine to get nobody's uid"""
944 global nobody
945 if nobody:
946 return nobody
947 try:
948 import pwd
949 except ImportError:
950 return -1
951 try:
952 nobody = pwd.getpwnam('nobody')[2]
953 except KeyError:
954 nobody = 1 + max(x[2] for x in pwd.getpwall())
955 return nobody
958def executable(path):
959 """Test for executable file."""
960 return os.access(path, os.X_OK)
963class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
965 """Complete HTTP server with GET, HEAD and POST commands.
967 GET and HEAD also support running CGI scripts.
969 The POST command is *only* implemented for CGI scripts.
971 """
973 # Determine platform specifics
974 have_fork = hasattr(os, 'fork')
976 # Make rfile unbuffered -- we need to read one line and then pass
977 # the rest to a subprocess, so we can't use buffered input.
978 rbufsize = 0
980 def do_POST(self):
981 """Serve a POST request.
983 This is only implemented for CGI scripts.
985 """
987 if self.is_cgi():
988 self.run_cgi()
989 else:
990 self.send_error(
991 HTTPStatus.NOT_IMPLEMENTED,
992 "Can only POST to CGI scripts")
994 def send_head(self):
995 """Version of send_head that support CGI scripts"""
996 if self.is_cgi():
997 return self.run_cgi()
998 else:
999 return SimpleHTTPRequestHandler.send_head(self)
1001 def is_cgi(self):
1002 """Test whether self.path corresponds to a CGI script.
1004 Returns True and updates the cgi_info attribute to the tuple
1005 (dir, rest) if self.path requires running a CGI script.
1006 Returns False otherwise.
1008 If any exception is raised, the caller should assume that
1009 self.path was rejected as invalid and act accordingly.
1011 The default implementation tests whether the normalized url
1012 path begins with one of the strings in self.cgi_directories
1013 (and the next character is a '/' or the end of the string).
1015 """
1016 collapsed_path = _url_collapse_path(self.path)
1017 dir_sep = collapsed_path.find('/', 1)
1018 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1019 if head in self.cgi_directories:
1020 self.cgi_info = head, tail
1021 return True
1022 return False
1025 cgi_directories = ['/cgi-bin', '/htbin']
1027 def is_executable(self, path):
1028 """Test whether argument path is an executable file."""
1029 return executable(path)
1031 def is_python(self, path):
1032 """Test whether argument path is a Python script."""
1033 head, tail = os.path.splitext(path)
1034 return tail.lower() in (".py", ".pyw")
1036 def run_cgi(self):
1037 """Execute a CGI script."""
1038 dir, rest = self.cgi_info
1039 path = dir + '/' + rest
1040 i = path.find('/', len(dir)+1)
1041 while i >= 0:
1042 nextdir = path[:i]
1043 nextrest = path[i+1:]
1045 scriptdir = self.translate_path(nextdir)
1046 if os.path.isdir(scriptdir):
1047 dir, rest = nextdir, nextrest
1048 i = path.find('/', len(dir)+1)
1049 else:
1050 break
1052 # find an explicit query string, if present.
1053 rest, _, query = rest.partition('?')
1055 # dissect the part after the directory name into a script name &
1056 # a possible additional path, to be stored in PATH_INFO.
1057 i = rest.find('/')
1058 if i >= 0:
1059 script, rest = rest[:i], rest[i:]
1060 else:
1061 script, rest = rest, ''
1063 scriptname = dir + '/' + script
1064 scriptfile = self.translate_path(scriptname)
1065 if not os.path.exists(scriptfile):
1066 self.send_error(
1067 HTTPStatus.NOT_FOUND,
1068 "No such CGI script (%r)" % scriptname)
1069 return
1070 if not os.path.isfile(scriptfile):
1071 self.send_error(
1072 HTTPStatus.FORBIDDEN,
1073 "CGI script is not a plain file (%r)" % scriptname)
1074 return
1075 ispy = self.is_python(scriptname)
1076 if self.have_fork or not ispy:
1077 if not self.is_executable(scriptfile):
1078 self.send_error(
1079 HTTPStatus.FORBIDDEN,
1080 "CGI script is not executable (%r)" % scriptname)
1081 return
1083 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1084 # XXX Much of the following could be prepared ahead of time!
1085 env = copy.deepcopy(os.environ)
1086 env['SERVER_SOFTWARE'] = self.version_string()
1087 env['SERVER_NAME'] = self.server.server_name
1088 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1089 env['SERVER_PROTOCOL'] = self.protocol_version
1090 env['SERVER_PORT'] = str(self.server.server_port)
1091 env['REQUEST_METHOD'] = self.command
1092 uqrest = urllib.parse.unquote(rest)
1093 env['PATH_INFO'] = uqrest
1094 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1095 env['SCRIPT_NAME'] = scriptname
1096 if query:
1097 env['QUERY_STRING'] = query
1098 env['REMOTE_ADDR'] = self.client_address[0]
1099 authorization = self.headers.get("authorization")
1100 if authorization:
1101 authorization = authorization.split()
1102 if len(authorization) == 2:
1103 import base64, binascii
1104 env['AUTH_TYPE'] = authorization[0]
1105 if authorization[0].lower() == "basic":
1106 try:
1107 authorization = authorization[1].encode('ascii')
1108 authorization = base64.decodebytes(authorization).\
1109 decode('ascii')
1110 except (binascii.Error, UnicodeError):
1111 pass
1112 else:
1113 authorization = authorization.split(':')
1114 if len(authorization) == 2:
1115 env['REMOTE_USER'] = authorization[0]
1116 # XXX REMOTE_IDENT
1117 if self.headers.get('content-type') is None:
1118 env['CONTENT_TYPE'] = self.headers.get_content_type()
1119 else:
1120 env['CONTENT_TYPE'] = self.headers['content-type']
1121 length = self.headers.get('content-length')
1122 if length:
1123 env['CONTENT_LENGTH'] = length
1124 referer = self.headers.get('referer')
1125 if referer:
1126 env['HTTP_REFERER'] = referer
1127 accept = []
1128 for line in self.headers.getallmatchingheaders('accept'):
1129 if line[:1] in "\t\n\r ":
1130 accept.append(line.strip())
1131 else:
1132 accept = accept + line[7:].split(',')
1133 env['HTTP_ACCEPT'] = ','.join(accept)
1134 ua = self.headers.get('user-agent')
1135 if ua:
1136 env['HTTP_USER_AGENT'] = ua
1137 co = filter(None, self.headers.get_all('cookie', []))
1138 cookie_str = ', '.join(co)
1139 if cookie_str:
1140 env['HTTP_COOKIE'] = cookie_str
1141 # XXX Other HTTP_* headers
1142 # Since we're setting the env in the parent, provide empty
1143 # values to override previously set values
1144 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1145 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1146 env.setdefault(k, "")
1148 self.send_response(HTTPStatus.OK, "Script output follows")
1149 self.flush_headers()
1151 decoded_query = query.replace('+', ' ')
1153 if self.have_fork:
1154 # Unix -- fork as we should
1155 args = [script]
1156 if '=' not in decoded_query:
1157 args.append(decoded_query)
1158 nobody = nobody_uid()
1159 self.wfile.flush() # Always flush before forking
1160 pid = os.fork()
1161 if pid != 0:
1162 # Parent
1163 pid, sts = os.waitpid(pid, 0)
1164 # throw away additional data [see bug #427345]
1165 while select.select([self.rfile], [], [], 0)[0]:
1166 if not self.rfile.read(1):
1167 break
1168 if sts:
1169 self.log_error("CGI script exit status %#x", sts)
1170 return
1171 # Child
1172 try:
1173 try:
1174 os.setuid(nobody)
1175 except OSError:
1176 pass
1177 os.dup2(self.rfile.fileno(), 0)
1178 os.dup2(self.wfile.fileno(), 1)
1179 os.execve(scriptfile, args, env)
1180 except:
1181 self.server.handle_error(self.request, self.client_address)
1182 os._exit(127)
1184 else:
1185 # Non-Unix -- use subprocess
1186 import subprocess
1187 cmdline = [scriptfile]
1188 if self.is_python(scriptfile):
1189 interp = sys.executable
1190 if interp.lower().endswith("w.exe"):
1191 # On Windows, use python.exe, not pythonw.exe
1192 interp = interp[:-5] + interp[-4:]
1193 cmdline = [interp, '-u'] + cmdline
1194 if '=' not in query:
1195 cmdline.append(query)
1196 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1197 try:
1198 nbytes = int(length)
1199 except (TypeError, ValueError):
1200 nbytes = 0
1201 p = subprocess.Popen(cmdline,
1202 stdin=subprocess.PIPE,
1203 stdout=subprocess.PIPE,
1204 stderr=subprocess.PIPE,
1205 env = env
1206 )
1207 if self.command.lower() == "post" and nbytes > 0:
1208 data = self.rfile.read(nbytes)
1209 else:
1210 data = None
1211 # throw away additional data [see bug #427345]
1212 while select.select([self.rfile._sock], [], [], 0)[0]:
1213 if not self.rfile._sock.recv(1):
1214 break
1215 stdout, stderr = p.communicate(data)
1216 self.wfile.write(stdout)
1217 if stderr:
1218 self.log_error('%s', stderr)
1219 p.stderr.close()
1220 p.stdout.close()
1221 status = p.returncode
1222 if status:
1223 self.log_error("CGI script exit status %#x", status)
1224 else:
1225 self.log_message("CGI script exited OK")
1228def _get_best_family(*address):
1229 infos = socket.getaddrinfo(
1230 *address,
1231 type=socket.SOCK_STREAM,
1232 flags=socket.AI_PASSIVE,
1233 )
1234 family, type, proto, canonname, sockaddr = next(iter(infos))
1235 return family, sockaddr
1238def test(HandlerClass=BaseHTTPRequestHandler,
1239 ServerClass=ThreadingHTTPServer,
1240 protocol="HTTP/1.0", port=8000, bind=None):
1241 """Test the HTTP request handler class.
1243 This runs an HTTP server on port 8000 (or the port argument).
1245 """
1246 ServerClass.address_family, addr = _get_best_family(bind, port)
1248 HandlerClass.protocol_version = protocol
1249 with ServerClass(addr, HandlerClass) as httpd:
1250 host, port = httpd.socket.getsockname()[:2]
1251 url_host = f'[{host}]' if ':' in host else host
1252 print(
1253 f"Serving HTTP on {host} port {port} "
1254 f"(http://{url_host}:{port}/) ..."
1255 )
1256 try:
1257 httpd.serve_forever()
1258 except KeyboardInterrupt:
1259 print("\nKeyboard interrupt received, exiting.")
1260 sys.exit(0)
1262if __name__ == '__main__':
1263 import argparse
1265 parser = argparse.ArgumentParser()
1266 parser.add_argument('--cgi', action='store_true',
1267 help='Run as CGI Server')
1268 parser.add_argument('--bind', '-b', metavar='ADDRESS',
1269 help='Specify alternate bind address '
1270 '[default: all interfaces]')
1271 parser.add_argument('--directory', '-d', default=os.getcwd(),
1272 help='Specify alternative directory '
1273 '[default:current directory]')
1274 parser.add_argument('port', action='store',
1275 default=8000, type=int,
1276 nargs='?',
1277 help='Specify alternate port [default: 8000]')
1278 args = parser.parse_args()
1279 if args.cgi:
1280 handler_class = CGIHTTPRequestHandler
1281 else:
1282 handler_class = partial(SimpleHTTPRequestHandler,
1283 directory=args.directory)
1285 # ensure dual-stack is not disabled; ref #38907
1286 class DualStackServer(ThreadingHTTPServer):
1287 def server_bind(self):
1288 # suppress exception when protocol is IPv4
1289 with contextlib.suppress(Exception):
1290 self.socket.setsockopt(
1291 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1292 return super().server_bind()
1294 test(
1295 HandlerClass=handler_class,
1296 ServerClass=DualStackServer,
1297 port=args.port,
1298 bind=args.bind,
1299 )