python1 |
1 | This diagram presents the following rules: -- a second request may not be started until {response-headers-read} -- a response [object] cannot be retrieved until {request-sent} -- there is no differentiation between an unread response body and a partially read response body Note: this enforcement is applied by the HTTPConnection class. The HTTPResponse class does not enforce this state machine, which implies sophisticated clients may accelerate the request/response pipeline. |
2 | Caution should be taken, though: accelerating the states beyond the above pattern may imply knowledge of the server's connection-close behavior for certain requests. For example, it is impossible to tell whether the server will close the connection UNTIL the response headers have been read; this means that further requests cannot be placed into the pipeline until it is known that the server will NOT be closing the connection. |
3 | import email.parser import email.message import http import io import re import socket import collections.abc from urllib.parse import urlsplit # HTTPMessage, parse_headers(), and the HTTP status code constants are # intentionally omitted for simplicity __all__ = ["HTTPResponse", "HTTPConnection", "HTTPException", "NotConnected", "UnknownProtocol", "UnknownTransferEncoding", "UnimplementedFileMode", "IncompleteRead", "InvalidURL", "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error", "responses"] HTTP_PORT = 80 HTTPS_PORT = 443 _UNKNOWN = 'UNKNOWN' # connection states _CS_IDLE = 'Idle' _CS_REQ_STARTED = 'Request-started' _CS_REQ_SENT = 'Request-sent' # hack to maintain backwards compatibility globals().update(http.HTTPStatus.__members__) # another hack to maintain backwards compatibility # Mapping status codes to official W3C names responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} # maximal line length when calling readline(). |
4 | _MAXLINE = 65536 _MAXHEADERS = 100 # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) # # VCHAR = %x21-7E # obs-text = %x80-FF # header-field = field-name ":" OWS field-value OWS # field-name = token # field-value = *( field-content / obs-fold ) # field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] # field-vchar = VCHAR / obs-text # # obs-fold = CRLF 1*( SP / HTAB ) # ; obsolete line folding # ; see Section 3.2.4 # token = 1*tchar # # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" # / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" # / DIGIT / ALPHA # ; any VCHAR, except delimiters # # VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 # the patterns for both name and value are more lenient than RFC # definitions to allow for backwards compatibility _is_legal_header_name = re.compile(rb'[^:\s][^:\r ]*').fullmatch _is_illegal_header_value = re.compile(rb' (?![ \t])|\r(?![ \t ])').search # These characters are not allowed within HTTP URL paths. |
5 | # See https://tools.ietf.org/html/rfc3986#section-3.3 and the # https://tools.ietf.org/html/rfc3986#appendix-A pchar definition. # Prevents CVE-2019-9740. Includes control characters such as \r . # We don't restrict chars above \x7f as putrequest() limits us to ASCII. _contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]') # Arguably only these _should_ allowed: # _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$") # We are more lenient for assumed real world compatibility purposes. |
6 | # These characters are not allowed within HTTP method names # to prevent http header injection. _contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]') # We always set the Content-Length header for these methods because some # servers will otherwise respond with a 411 _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} def _encode(data, name='data'): """Call data.encode("latin-1") but show a better error message.""" try: return data.encode("latin-1") except UnicodeEncodeError as err: raise UnicodeEncodeError( err.encoding, err.object, err.start, err.end, "%s (%.20r) is not valid Latin-1. |
7 | Use %s.encode('utf-8') " "if you want to send it encoded in UTF-8." % (name.title(), data[err.start:err.end], name)) from None class HTTPMessage(email.message.Message): # XXX The only usage of this method is in # http.server.CGIHTTPRequestHandler. Maybe move the code there so # that it doesn't need to be part of the public API. The API has # never been defined so this could cause backwards compatibility # issues. |
8 | def getallmatchingheaders(self, name): """Find all header lines matching a given header name. Look through the list of headers and find all lines matching a given header name (and their continuation lines). A list of the lines is returned, without interpretation. If the header does not occur, an empty list is returned. If the header occurs multiple times, all occurrences are returned. Case is not important in the header name. |
9 | """ name = name.lower() + ':' n = len(name) lst = [] hit = 0 for line in self.keys(): if line[:n].lower() == name: hit = 1 elif not line[:1].isspace(): hit = 0 if hit: lst.append(line) return lst def _read_headers(fp): """Reads potential header lines into a list from a file pointer. Length of line is limited by _MAXLINE, and number of headers is limited by _MAXHEADERS. """ headers = [] while True: line = fp.readline(_MAXLINE + 1) if len(line) > _MAXLINE: raise LineTooLong("header line") headers.append(line) if len(headers) > _MAXHEADERS: raise HTTPException("got more than %d headers" % _MAXHEADERS) if line in (b'\r ', b' ', b''): break return headers def parse_headers(fp, _class=HTTPMessage): """Parses only RFC2822 headers from a file pointer. |
10 | email Parser wants to see strings rather than bytes. But a TextIOWrapper around self.rfile would buffer too many bytes from the stream, bytes which we later need to read as bytes. So we read the correct bytes here, as bytes, for email Parser to parse. """ headers = _read_headers(fp) hstring = b''.join(headers).decode('iso-8859-1') return email.parser.Parser(_class=_class).parsestr(hstring) class HTTPResponse(io.BufferedIOBase): # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. |
… |
Комментарии