Package wtf :: Package impl :: Package http :: Module _request
[hide private]
[frames] | no frames]

Source Code for Module wtf.impl.http._request

  1  # -*- coding: ascii -*- 
  2  # 
  3  # Copyright 2006-2012 
  4  # Andr\xe9 Malo or his licensors, as applicable 
  5  # 
  6  # Licensed under the Apache License, Version 2.0 (the "License"); 
  7  # you may not use this file except in compliance with the License. 
  8  # You may obtain a copy of the License at 
  9  # 
 10  #     http://www.apache.org/licenses/LICENSE-2.0 
 11  # 
 12  # Unless required by applicable law or agreed to in writing, software 
 13  # distributed under the License is distributed on an "AS IS" BASIS, 
 14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 15  # See the License for the specific language governing permissions and 
 16  # limitations under the License. 
 17  r""" 
 18  HTTP Request State Machine 
 19  ========================== 
 20   
 21  This is a simple state pattern implementing the request flow. 
 22   
 23  :Variables: 
 24   - `CRLF`: ASCII CRLF sequence (\r\n) 
 25   
 26  :Types: 
 27   - `CRLF`: ``str`` 
 28  """ 
 29  __author__ = u"Andr\xe9 Malo" 
 30  __docformat__ = "restructuredtext en" 
 31   
 32  import re as _re 
 33  import socket as _socket 
 34   
 35  from wtf import Error 
 36  from wtf import stream as _stream 
 37  from wtf.impl import _util as _impl_util 
 38  from wtf.impl.http import _util as _http_util 
 39   
 40  CRLF = _http_util.CRLF 
 41   
 42  ParseError = _http_util.ParseError 
 43  BadRequest = _http_util.BadRequest 
 44   
45 -class RequestTimeout(ParseError):
46 """ Request timed out """ 47 status = "408 Request Timeout"
48
49 -class ExpectationFailed(ParseError):
50 """ Expectation failed """ 51 status = "417 Expectation Failed"
52
53 -class UnsupportedHTTPVersion(ParseError):
54 """ HTTP Version not supported """ 55 status = "505 HTTP Version Not Supported"
56
57 -class UnImplemented(ParseError):
58 """ A feature is unimplemented """ 59 status = "501 Unimplemented"
60
61 -class InvalidRequestLine(BadRequest):
62 """ Request line is invalid """
63
64 -class InvalidContentLength(BadRequest):
65 """ The supplied content length is invalid """
66
67 -class InvalidTransferEncoding(BadRequest):
68 """ An invalid transfer encoding was supplied """
69
70 -class MissingHostHeader(BadRequest):
71 """ Host header is mandatory with HTTP/1.1 """
72 73
74 -class StateError(Error):
75 """ HTTP request state error """
76 77
78 -class BaseState(object):
79 """ 80 Base state class 81 82 Every state method raises a StateError in here. Override implemented 83 function in derived classes. 84 85 :IVariables: 86 - `_request`: The request instance 87 - `response_started`: Was the response already started? 88 89 :Types: 90 - `_request`: `HTTPRequest` 91 - `response_started`: ``bool`` 92 """ 93 response_started = None 94
95 - def __init__(self, request):
96 """ 97 Initialization 98 99 :Parameters: 100 - `request`: Request instance 101 102 :Types: 103 - `request`: `HTTPRequest` 104 """ 105 self._request = request
106
107 - def _set_state(self, state):
108 """ 109 (Re)set the request state 110 111 :Parameters: 112 - `state`: New state class 113 114 :Types: 115 - `state`: `BaseState` 116 """ 117 self._request.state = state(self._request)
118
119 - def read_request(self):
120 """ 121 Read the request line, parse method, url and protocol version 122 123 :return: A tuple of method, url and protocol version 124 (``('method', 'url', (major, minor))``) 125 :rtype: ``tuple`` 126 127 :Exceptions: 128 - `InvalidRequestLine`: The request line was invalid 129 """ 130 raise StateError()
131
132 - def read_headers(self):
133 """ 134 Read and parse the headers 135 136 :return: A dict of comma folded headers, keys are lower cased 137 :rtype: ``dict`` 138 139 :Exceptions: 140 - `http._util.InvalidHeaderLine`: An invalid header line was found 141 - `http._util.IncompleteHeaders`: The sents headers are incomplete 142 """ 143 raise StateError()
144
145 - def request_body_stream(self):
146 """ 147 Return a stream for the request body. 148 149 Chunking and Expect handling are done transparently. 150 151 :return: A stream for the request body 152 :rtype: ``dict`` 153 """ 154 raise StateError()
155
156 - def send_continue(self):
157 """ Send 100 Continue intermediate response """ 158 raise StateError()
159
160 - def _send_continue(self):
161 """ Actually 100 continue sending impl, for out of state needs """ 162 assert self._request.protocol >= (1, 1), "Protocol < 1.1" 163 writer = self._request.connection.writer 164 writer.write("HTTP/1.1 100 Continue" + CRLF + CRLF) 165 writer.flush() 166 self._request.sent_100 = True
167
168 - def send_status(self, status):
169 """ 170 Send the response status line 171 172 :Parameters: 173 - `status`: The status line (3 digit code, space, reason) 174 175 :Types: 176 - `status`: ``str`` 177 """ 178 # pylint: disable = W0613 179 180 raise StateError()
181
182 - def send_headers(self, headers):
183 """ 184 Send the headers 185 186 Actually the headers may be accumulated until finish_headers is called 187 188 :Parameters: 189 - `headers`: List of headers (``[('name', 'value'), ...]``) 190 191 :Types: 192 - `headers`: ``iterable`` 193 """ 194 # pylint: disable = W0613 195 196 raise StateError()
197
198 - def finish_headers(self):
199 """ 200 Finish header sending, prepare the response for the body 201 202 This function does *not* guarantee, that headers are actually sent. 203 It might be implemented in a manner that headers are still being 204 modified, when the first body chunk comes in (but they all must 205 be flushed then). 206 """ 207 raise StateError()
208
209 - def response_body_stream(self):
210 """ Retrieve the response body stream """ 211 raise StateError()
212 213
214 -class RequestInitialState(BaseState):
215 """ 216 Initial state of a request, for example on a fresh connection. 217 218 States to go from here: 219 220 - `RequestLineReadyState` 221 222 :CVariables: 223 - `_LINE_MATCH`: Regex match callable to check if the line does not 224 start with a WS 225 - `_VER_MATCH`: Regex match callable to parse the HTTP version 226 227 :Types: 228 - `_LINE_MATCH`: ``callable`` 229 - `_VER_MATCH`: ``callable`` 230 """ 231 response_started = False 232 _LINE_MATCH = _re.compile(r'\S').match 233 _VER_MATCH = _re.compile(r'HTTP/(?P<major>\d+)\.(?P<minor>\d+)$').match 234
235 - def read_request(self):
236 """ Read request line """ 237 request = self._request 238 request_line = request.connection.reader.readline() 239 if not self._LINE_MATCH(request_line): 240 raise InvalidRequestLine("Invalid request line format") 241 242 request_line = request_line.split() 243 if len(request_line) == 2: 244 (method, url), protocol = request_line, (0, 9) 245 if method != 'GET': 246 raise InvalidRequestLine("Invalid method on HTTP/0.9 request") 247 elif len(request_line) == 3: 248 method, url, protocol = request_line 249 match = self._VER_MATCH(protocol) 250 if not match: 251 raise InvalidRequestLine("Invalid protocol string") 252 protocol = tuple(map(int, match.group('major', 'minor'))) 253 if protocol < (1, 0): 254 raise InvalidRequestLine("Invalid protocol version") 255 else: 256 raise InvalidRequestLine("Request line format not recognized") 257 258 request.method = method 259 request.url = url 260 request.protocol = protocol 261 self._set_state(RequestLineReadyState)
262 263
264 -class RequestLineReadyState(BaseState):
265 """ 266 The headers can be read now 267 268 States to go from here: 269 270 - `RequestHeadersReadyState` 271 """ 272 response_started = False 273
274 - def read_headers(self):
275 """ Read and parse the headers """ 276 request = self._request 277 if request.protocol >= (1, 0): 278 headers = _http_util.read_headers(request.connection.reader) 279 if request.protocol >= (1, 1) and 'host' not in headers: 280 raise MissingHostHeader( 281 "HTTP/1.1 requests MUST supply a Host header" 282 ) 283 else: 284 headers = {} 285 request.headers = headers 286 self._set_state(RequestHeadersReadyState)
287 288
289 -class RequestHeadersReadyState(BaseState):
290 """ 291 The body can be read now and/or the response can be started 292 293 States to go from here: 294 295 - `ResponseContinueWaitState` 296 - `ResponseStatusWaitState` 297 298 :CVariables: 299 - `_DECODERS`: Decoder mapping accessor 300 301 :Types: 302 - `_DECODERS`: ``callable`` 303 """ 304 response_started = False 305 _DECODERS = { 306 'chunked': _http_util.ChunkedReader, 307 }.get 308
309 - def __init__(self, request):
310 """ Initialization """ 311 super(RequestHeadersReadyState, self).__init__(request) 312 self._request._request_body_stream, self._next_state = \ 313 self._setup_request_body_stream()
314
316 """ Return a body stream """ 317 # pylint: disable = R0912 318 319 next_state, request = ResponseStatusWaitState, self._request 320 stream = oldstream = request.connection.reader 321 322 # First look out for Transfer-Encoding 323 if request.protocol >= (1, 1): 324 codings = [item for item in [ 325 item.strip().lower() for item in 326 request.headers.get('transfer-encoding', '').split(',') 327 ] if item and item != 'identity'][::-1] 328 if codings: 329 if codings[0] != 'chunked': 330 raise InvalidTransferEncoding( 331 "Last transfer encoding MUST be chunked" 332 ) 333 for coding in codings: 334 decoder = self._DECODERS(coding) 335 if decoder is None: 336 raise UnImplemented( 337 "Transfer-Encoding: %s in not implemented" % 338 coding 339 ) 340 stream = decoder(stream) 341 342 # Content-Length is second choice 343 if stream == oldstream and 'content-length' in request.headers: 344 try: 345 clen = int(request.headers['content-length']) 346 if clen < 0: 347 raise ValueError() 348 except (TypeError, ValueError): 349 raise InvalidContentLength( 350 "Provide a valid Content-Length, please." 351 ) 352 else: 353 stream = _impl_util.ContentLengthReader(stream, clen) 354 355 # No body at all 356 if stream == oldstream: 357 stream = None 358 359 # Expect handling 360 elif request.protocol >= (1, 1) and 'expect' in request.headers: 361 # the lowering is only partially correct (RFC 2616), 362 # but we're checking for the only known (insensitive) token 363 # token anyway, so it doesn't hurt. 364 expectations = set([item.strip().lower() for item in 365 request.headers['expect'].split(',')]) 366 if '100-continue' in expectations and len(expectations) == 1: 367 stream = _http_util.ExpectationReader(stream, request) 368 elif expectations: 369 raise ExpectationFailed("Unrecognized expectation") 370 next_state = ResponseContinueWaitState 371 request.expects_100 = True 372 373 if stream is not None and stream != oldstream: 374 stream = _stream.GenericStream(stream, read_exact=True) 375 376 return stream, next_state
377
378 - def request_body_stream(self):
379 """ Determine the stream for the request body """ 380 self._set_state(self._next_state) 381 return self._request._request_body_stream # pylint: disable = W0212
382
383 - def send_status(self, status):
384 """ Send status line """ 385 self._set_state(self._next_state) 386 return self._request.send_status(status)
387 388
389 -class ResponseContinueWaitState(BaseState):
390 """ 391 We're waiting for either 100 continue emission of send_status 392 393 States to go from here: 394 395 - `ResponseStatusWaitState` 396 """ 397 response_started = False 398
399 - def send_continue(self):
400 """ Send 100 continue intermediate response """ 401 self._send_continue() 402 self._set_state(ResponseStatusWaitState)
403
404 - def send_status(self, status):
405 """ Send status line """ 406 self._set_state(ResponseStatusWaitState) 407 return self._request.send_status(status)
408 409
410 -class ResponseStatusWaitState(BaseState):
411 """ 412 Waiting for status line 413 414 States to go from here: 415 416 - `ResponseHeadersWaitState` 417 """ 418 response_started = False 419
420 - def send_status(self, status):
421 """ Send status line """ 422 self._request._response_status_line = status 423 self._request.response_status = int(status[:3]) 424 self._set_state(ResponseHeadersWaitState)
425 426
427 -class ResponseHeadersWaitState(BaseState):
428 """ 429 We're waiting for headers to be set and sent 430 431 States to go from here: 432 433 - `ResponseBodyWaitState` 434 435 :IVariables: 436 - `_headers`: Ordered list of header names 437 - `_hdict`: Dict of header names -> values (``{'name': ['value', ...]}``) 438 439 :Types: 440 - `_headers`: ``list`` 441 - `_hdict`: ``dict`` 442 """ 443 response_started = False 444
445 - def __init__(self, request):
446 """ Initialization """ 447 super(ResponseHeadersWaitState, self).__init__(request) 448 self._headers = ['server', 'date'] 449 self._hdict = {'server': [], 'date': []}
450
451 - def send_headers(self, headers):
452 """ Send headers """ 453 for name, value in headers: 454 name = name.lower() 455 if name not in self._hdict: 456 self._headers.append(name) 457 self._hdict[name] = [] 458 self._hdict[name].append(value)
459
460 - def finish_headers(self):
461 """ Finalize headers """ 462 self._set_state(ResponseBodyWaitState) 463 request = self._request 464 if request.protocol >= (1, 0): 465 out, hdict, writer = [], self._hdict, request.connection.writer 466 request.response_headers = hdict 467 request.connection.compute_status() 468 if request.connection.persist or request.sent_100: 469 # suck the whole request body, before sending a response 470 # (avoiding dead locks) 471 472 if request.expects_100 and not request.sent_100: 473 request.send_continue = self._send_continue 474 # pylint: disable = W0212 475 stream = request._request_body_stream 476 if stream is not None: 477 dummy, read = True, stream.read 478 while dummy: 479 dummy = read(0) 480 if request.send_continue == self._send_continue: 481 del request.send_continue 482 483 hdict.update({ 484 'server': ["WTF"], 'date': [_http_util.make_date()] 485 }) 486 for key, value in request.connection.headers.iteritems(): 487 if key not in hdict: 488 self._headers.append(key) 489 hdict[key] = [value] 490 491 for name in self._headers: 492 if name in hdict: 493 cname = name.title() 494 if name == 'set-cookie': 495 out.extend([(cname, val) for val in hdict[name]]) 496 else: 497 out.append((cname, ", ".join(hdict[name]))) 498 499 writer.write( 500 # pylint: disable = W0212 501 "HTTP/%d.%d " % request.http_version + 502 request._response_status_line + CRLF 503 ) 504 writer.writelines([ 505 "%s: %s%s" % (name, value, CRLF) for name, value in out 506 ]) 507 writer.write(CRLF)
508 509
510 -class ResponseBodyWaitState(BaseState):
511 """ 512 We're waiting for someone to send the response body 513 514 States to go from here: 515 516 - `ResponseDoneState` 517 """ 518 response_started = True 519
520 - def response_body_stream(self):
521 """ Determine the response body stream """ 522 self._set_state(ResponseDoneState) 523 request = self._request 524 if request.method == 'HEAD': 525 stream = _stream.dev_null 526 else: 527 stream = request.connection.writer 528 if request.response_headers and \ 529 'transfer-encoding' in request.response_headers: 530 stream = _stream.GenericStream( 531 _http_util.ChunkedWriter(stream) 532 ) 533 request._response_body_stream = stream 534 return stream
535 536
537 -class ResponseDoneState(BaseState):
538 """ Nothing can be done here anymore """ 539 response_started = True
540 541
542 -class HTTPRequest(object):
543 """ 544 HTTP Request abstraction 545 546 :IVariables: 547 - `_request_body_stream`: Stream for accessing the request body (or 548 ``None``). Transfer encodings and the Expect/Continue mechanism 549 are dealt with transparently. Just read it. 550 - `_response_body_stream`: Stream for writing the response body (or 551 ``None``) 552 - `_server`: HTTP server instance 553 - `state`: Current state object. Additional methods and properties are 554 looked up there (see `BaseState` for documentation) 555 - `headers`: Request header dictionary 556 - `response_status`: Response status code sent to the client 557 - `response_headers`: Response headers sent to the client 558 - `method`: Request method used 559 - `url`: Request URL 560 - `protocol`: Request protocol version 561 - `connection`: HTTP connection abstraction 562 - `http_version`: Maximum supported HTTP version 563 - `flags`: Worker flags 564 565 :Types: 566 - `_request_body_stream`: `wtf.stream.GenericStream` 567 - `_response_body_stream`: `wtf.stream.GenericStream` 568 - `_server`: `http.HTTPServer` 569 - `state`: `BaseState` 570 - `headers`: ``dict`` 571 - `response_status`: ``int`` 572 - `response_headers`: ``dict`` 573 - `method`: ``str`` 574 - `url`: ``str`` 575 - `protocol`: ``tuple`` 576 - `connection`: `HTTPConnection` 577 - `http_version`: ``tuple`` 578 - `flags`: `wtf.impl.FlagsInterface` 579 """ 580 _request_body_stream, _response_body_stream = None, None 581 expects_100, sent_100, _response_status_line = False, False, None 582 headers, response_status, response_headers = None, None, None 583 method, url, protocol = 'GET', '*', (0, 9) 584 connection = None 585
586 - def __init__(self, server, connection, flags):
587 """ 588 Initialization 589 590 :Parameters: 591 - `server`: Server instance 592 - `connection`: Connection, this request is served on 593 - `flags`: Worker flags 594 595 :Types: 596 - `server`: `HTTPServer` 597 - `connection`: `Connection` 598 - `flags`: `FlagsInterface` 599 """ 600 self._server = server 601 self.http_version = server.http_version 602 self.keep_alive = server.keep_alive 603 self.flags = flags 604 self.connection = _http_util.HTTPConnection(self, connection) 605 self.state = RequestInitialState(self)
606
607 - def close(self):
608 """ Close all streams """ 609 self._set_state(ResponseDoneState) 610 if self._response_body_stream is not None: 611 self._response_body_stream.close() # flush all pending stuff 612 connection, self.connection = self.connection, None 613 if connection is not None: 614 connection.close()
615
616 - def __getattr__(self, name):
617 """ 618 Delegate call to the current state implementation 619 620 :Parameters: 621 - `name`: The symbol to fetch 622 623 :Types: 624 - `name`: ``str`` 625 626 :return: The resolved symbol depending on the state (should be a 627 callable) 628 :rtype: any 629 """ 630 return getattr(self.state, name)
631
632 - def parse(self):
633 """ 634 Parse the request 635 636 :return: Request environment (based on the connection environment) 637 :rtype: ``dict`` 638 """ 639 try: 640 self.read_request() 641 self.connection.settimeout(self._server.timeouts.general) 642 if self.protocol > self.http_version: 643 raise UnsupportedHTTPVersion("Sorry.") 644 self.read_headers() 645 except _socket.timeout: 646 raise RequestTimeout("Try typing a little faster")
647
648 - def error(self, status, message):
649 """ 650 Emit a simple error 651 652 :Parameters: 653 - `status`: The status line to emit, it will be repeated in the body 654 (which is labeled text/plain for >= HTTP/1.0 or wrapped into HTML 655 for HTTP/0.9) 656 - `message`: The message to emit 657 658 :Types: 659 - `status`: ``str`` 660 - `message`: ``str`` 661 """ 662 protocol, write = self.protocol, self.connection.writer.write 663 if protocol >= (1, 0): 664 out = status + CRLF + message + CRLF 665 write("HTTP/%d.%d " % self.http_version + status + CRLF) 666 write("Date: %s%s" % (_http_util.make_date(), CRLF)) 667 write("Content-Type: text/plain" + CRLF) 668 write("Content-Length: %s%s" % (len(out), CRLF)) 669 if protocol >= (1, 1): 670 write("Connection: close" + CRLF) 671 write(CRLF) 672 write(out) 673 else: 674 out = """ 675 <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> 676 <html> 677 <head><title>%(status)s</title></head> 678 <body><h1>%(status)s</h1><p>%(message)s</p></body> 679 </html> 680 """.strip() % { 681 'status': status.replace('&', '&amp;').replace('<', '&lt;'), 682 'message': message.replace('&', '&amp;').replace('<', '&lt;'), 683 } 684 write(out + CRLF)
685