Package wtf :: Module webutil
[hide private]
[frames] | no frames]

Source Code for Module wtf.webutil

  1  # -*- coding: ascii -*- 
  2  # 
  3  # Copyright 2006-2012 
  4  # Andr\xe9 Malo or his licensors, as applicable 
  5  # 
  6  # Licensed under the Apache License, Version 2.0 (the "License"); 
  7  # you may not use this file except in compliance with the License. 
  8  # You may obtain a copy of the License at 
  9  # 
 10  #     http://www.apache.org/licenses/LICENSE-2.0 
 11  # 
 12  # Unless required by applicable law or agreed to in writing, software 
 13  # distributed under the License is distributed on an "AS IS" BASIS, 
 14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 15  # See the License for the specific language governing permissions and 
 16  # limitations under the License. 
 17  """ 
 18  Common Utilities 
 19  ================ 
 20   
 21  Certain utilities to make the life more easy. 
 22   
 23  :Variables: 
 24   - `PIXEL`: Transparent 1x1 pixel GIF. Can be used for delivering webbugs etc. 
 25     Usage: ``response.content_type('image/gif'); return [PIXEL]`` 
 26   
 27  :Types: 
 28   - `PIXEL`: ``str`` 
 29  """ 
 30  __author__ = u"Andr\xe9 Malo" 
 31  __docformat__ = "restructuredtext en" 
 32   
 33  import re as _re 
 34  import urlparse as _urlparse 
 35   
 36  PIXEL = 'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!' \ 
 37          '\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01' \ 
 38          '\x00\x00\x02\x01D\x00;' 
39 40 41 -def escape_html(toescape, quotes=True):
42 """ 43 Escape a string for HTML output 44 45 :Parameters: 46 - `toescape`: The string to escape 47 - `quotes`: Escape quotes, too? 48 49 :Types: 50 - `toescape`: ``basestring`` 51 - `quotes`: ``bool`` 52 53 :return: The escaped string 54 :rtype: ``basestring`` 55 """ 56 if isinstance(toescape, unicode): 57 xquote, result = (u'"', u'&quot;'), toescape.replace(u'&', u'&amp;' 58 ).replace(u'<', u'&lt;').replace(u'>', u'&gt;') 59 else: 60 xquote, result = ('"', '&quot;'), str(toescape).replace('&', '&amp;' 61 ).replace('<', '&lt;').replace('>', '&gt;') 62 if quotes: 63 result = result.replace(*xquote) 64 return result
65
66 67 -def escape_js(toescape):
68 """ 69 Escape a string for JS output (to be inserted into a JS string) 70 71 The output is always of type ``str``. 72 73 :Parameters: 74 `toescape` : ``basestring`` 75 The string to escape 76 77 :Return: The escaped string 78 :Rtype: ``str`` 79 """ 80 if isinstance(toescape, unicode): 81 result = toescape.replace(u'\\', u'\\\\').encode('unicode_escape') 82 else: 83 result = str(toescape).replace('\\', '\\\\').encode('string_escape') 84 return result.replace("'", "\\'").replace('"', '\\"').replace('/', '\\/')
85
86 87 -def decode_simple(value):
88 """ 89 Return unicode version of value 90 91 Simple heuristics: Try UTF-8 first, cp1252 then 92 93 :Parameters: 94 - `value`: The value to decode 95 96 :Types: 97 - `value`: ``str`` 98 99 :return: The decoded value 100 :rtype: ``unicode`` 101 """ 102 try: 103 return value.decode('utf-8') 104 except UnicodeError: 105 return value.decode('cp1252')
106
107 108 -class URL(object):
109 """ 110 URL abstraction (RFC 1738) 111 112 :CVariables: 113 - `_PARTS`: ordered list of known URL parts (available via instance 114 attributes) 115 116 :IVariables: 117 - `scheme`: The URL scheme 118 - `netloc`: The net location if available (or ``''``) 119 - `path`: The unescaped path if available, for non-path-based schemes 120 this contains the unescaped non-path ;-) (or ``''``) 121 - `params`: optional unescaped path parameters (or ``''``) 122 - `query`: query object 123 - `fragment`: optional fragment. Strictly spoken this isn't part of URLs 124 but of URL references. But who cares. (or ``''``) 125 126 :Types: 127 - `_PARTS`: ``tuple`` 128 - `scheme`: ``str`` 129 - `netloc`: ``unicode`` 130 - `path`: ``unicode`` 131 - `params`: ``unicode`` 132 - `query`: `Query` 133 - `fragment`: ``unicode`` 134 """ 135 scheme, netloc, path, params, query, fragment = [''] * 6 136 _PARTS = ('scheme', 'netloc', 'path', 'params', 'query', 'fragment') 137 _PATH_SAFE = '/()=~' 138 _unicode = False 139
140 - def __init__(self, url, decode=None):
141 """ 142 Initialization 143 144 :Parameters: 145 - `url`: The url to parse. If it's an instance of this class, the 146 parameters will be copied 147 - `decode`: Decoder of parsed octet data 148 149 :Types: 150 - `url`: ``basestring`` or `URL` 151 - `decode`: ``callable`` 152 """ 153 if isinstance(url, URL): 154 for key in self._PARTS: 155 setattr(self, key, getattr(url, key)) 156 self.query = Query(url.query) 157 self._unicode = url._unicode # pylint: disable = W0212 158 else: 159 if decode is None: 160 decode = decode_simple 161 if decode: 162 self._unicode = True 163 if not isinstance(url, unicode): 164 url = decode(url) 165 if self._unicode: 166 url = url.encode('utf-8') 167 for key, value in zip(self._PARTS, _urlparse.urlparse(url)): 168 setattr(self, key, value) 169 if not isinstance(self.netloc, unicode): 170 self.netloc = decode_simple(self.netloc) 171 self.netloc = self.netloc.encode('idna') 172 if self._unicode: 173 self.netloc = self.netloc.decode('idna') 174 self.path = decode(unquote(self.path)) 175 self.params = decode(unquote(self.params)) 176 self.fragment = decode(self.fragment) 177 self.query = Query(self.query, decode=decode)
178
179 - def __str__(self):
180 """ 181 String representation, hostname idna encoded 182 183 :return: The string representation 184 :rtype: ``str`` 185 """ 186 if self._unicode: 187 encode = lambda x, enc = 'utf-8': x.encode(enc) 188 else: 189 encode = lambda x, enc = 'utf-8': x 190 191 return _urlparse.urlunparse(( 192 self.scheme, 193 encode(self.netloc, 'idna'), 194 quote(encode(self.path), self._PATH_SAFE), 195 quote(encode(self.params), self._PATH_SAFE), 196 str(self.query), 197 encode(self.fragment), 198 ))
199
200 - def __repr__(self):
201 """ 202 Debug representation 203 204 :return: The debug representation 205 :rtype: ``str`` 206 """ 207 return "%s(%r)" % (self.__class__.__name__, str(self))
208
209 - def __unicode__(self):
210 """ 211 Unicode representation, hostname as unicode (vs. idna) 212 213 :return: The unicode representation 214 :rtype: ``unicode`` 215 """ 216 if self._unicode: 217 encode = lambda x, enc = 'utf-8': x.encode(enc) 218 decode = lambda x: x.decode('utf-8') 219 else: 220 encode = lambda x, enc = 'utf-8': x 221 decode = decode_simple 222 223 return decode(_urlparse.urlunparse(( 224 self.scheme, 225 encode(self.netloc), 226 quote(encode(self.path), self._PATH_SAFE), 227 quote(encode(self.params), self._PATH_SAFE), 228 str(self.query), 229 encode(self.fragment), 230 )))
231 232 @classmethod
233 - def fromcomponents(cls, path, scheme=None, netloc=None, query=None):
234 """ 235 Create URL object from **unescaped** path 236 237 For convenience you can optionally add query, scheme and netloc. 238 239 :Parameters: 240 - `path`: The path to create the URL from 241 - `scheme`: Optional URL scheme (like ``http``) 242 - `netloc`: Optional net location (like ``example.com``) 243 - `query`: Optional query string (encoded) or `Query` object 244 245 :Types: 246 - `path`: ``basestring`` 247 - `scheme`: ``str`` 248 - `netloc`: ``basestring`` 249 - `query`: ``str`` 250 251 :return: New URL object 252 :rtype: `URL` 253 """ 254 if not isinstance(path, unicode): 255 path = decode_simple(path) 256 path = path.encode('utf-8') 257 self = cls(quote(path, cls._PATH_SAFE)) 258 if scheme is not None: 259 self.scheme = str(scheme) 260 if netloc is not None: 261 if not isinstance(netloc, unicode): 262 netloc = decode_simple(netloc) 263 self.netloc = netloc.encode('idna') 264 if query is not None: 265 self.query = Query(query) 266 return self
267
268 - def copy(self):
269 """ 270 Copy the URL 271 272 :return: a new `URL` instance 273 :rtype: `URL` 274 """ 275 return self.__class__(self)
276
277 278 -class Query(object):
279 """ 280 Class for query string parsing and modification 281 (stolen from svnmailer) 282 283 :CVariables: 284 - `_QUERYRE`: Regex for splitting a query string 285 on possible delimiters (``&`` and ``;``) 286 287 :Ivariables: 288 - `_query_dict`: Dictionary of key->valuelist pairs 289 (``{'key': ['val1', 'val2'], ...}``) 290 - `_keyorder`: Original order of the keys (``['key', ...]``) 291 - `_delim`: The delimiter to use for reconstructing the query string 292 293 :Types: 294 - `_QUERYRE`: ``_sre.SRE_Pattern`` 295 - `_query_dict`: ``dict`` 296 - `_keyorder`: ``list`` 297 - `_delim`: ``unicode`` 298 """ 299 _QUERYRE = _re.compile(r'[&;]') 300 _unicode = False 301
302 - def __init__(self, query=u'', delim='&', decode=None):
303 """ 304 Initialization 305 306 :Parameters: 307 - `query`: The query string to store 308 - `delim`: The delimiter for reconstructing the query 309 - `decode`: Parameter decoder 310 311 :Types: 312 - `query`: ``unicode`` or `Query` 313 - `delim`: ``unicode`` 314 - `decode`: ``callable`` 315 """ 316 if not query: 317 if decode is None or decode: 318 self._unicode = True 319 query_dict = {} 320 keyorder = [] 321 elif isinstance(query, Query): 322 # pylint: disable = E1103, W0212 323 query_dict = dict([(key, list(val)) 324 for key, val in query._query_dict.items() 325 ]) 326 keyorder = list(query._keyorder) 327 self._unicode = query._unicode 328 else: 329 query_dict = {} 330 keyorder = [] 331 if decode is None: 332 decode = decode_simple 333 if decode: 334 self._unicode = True 335 if not isinstance(query, unicode): 336 query = decode(query) 337 query = query.encode('utf-8') 338 if not decode: 339 decode = lambda x: x 340 for tup in [pair.split('=', 1) 341 for pair in self._QUERYRE.split(query)]: 342 if len(tup) == 1: 343 key, val = decode(unquote_plus(tup[0])), None 344 else: 345 key, val = map(decode, map(unquote_plus, tup)) 346 query_dict.setdefault(key, []).append(val) 347 keyorder.append(key) 348 349 self._keyorder = keyorder 350 self._query_dict = query_dict 351 self._delim = delim
352
353 - def __str__(self):
354 """ 355 Returns the query as string again 356 357 :return: The query as string (type depends on the input) 358 :rtype: ``str`` 359 """ 360 result = [] 361 qdict = dict((key, list(reversed(val))) 362 for key, val in self._query_dict.iteritems()) 363 for key in self._keyorder: 364 val = qdict[key].pop() 365 if self._unicode: 366 key = key.encode('utf-8') 367 key = quote_plus(key) 368 if val is None: 369 result.append(key) 370 else: 371 if isinstance(val, unicode): 372 val = val.encode('utf-8') 373 val = quote_plus(val) 374 result.append("%s=%s" % (key, val)) 375 376 return self._delim.join(result)
377
378 - def __unicode__(self):
379 """ Unicode representation (just ascii decoded str() value) """ 380 return decode_simple(str(self))
381
382 - def __contains__(self, key):
383 """ 384 Returns whether `key` occurs in the query as parameter name 385 386 :Parameters: 387 - `key`: The key to lookup 388 389 :Types: 390 - `key`: ``unicode`` 391 392 :return: Does `key` occur? 393 :rtype: ``bool`` 394 """ 395 if self._unicode: 396 key = unicode(key) 397 return key in self._query_dict
398
399 - def __getitem__(self, key):
400 """ 401 Returns the value list for parameter named `key` 402 403 Don't modify the returned list without adjusting `_keyorder`, 404 too. At best don't modify it directly at all :) 405 406 :Parameters: 407 - `key`: The key to lookup 408 409 :Types: 410 - `key`: ``unicode`` 411 412 :return: The value list (``['val1', 'val2', ...]``) 413 :rtype: ``list`` 414 415 :exception KeyError: The key does not exist 416 """ 417 if self._unicode: 418 key = unicode(key) 419 return tuple(self._query_dict[key])
420
421 - def __setitem__(self, key, value):
422 """ 423 Replace all occurences of `key` with the new one 424 425 :Parameters: 426 - `key`: key to replace 427 - `value`: value to set 428 429 :Types: 430 - `key`: ``unicode`` 431 - `value`: ``unicode`` 432 """ 433 self.remove([key]) 434 self.add([(key, value)])
435
436 - def replace(self, **kwargs):
437 """ 438 Conveniently replace multiple key value pairs at once 439 440 :Parameters: 441 - `kwargs`: key value pairs (unicode/unicode) 442 443 :Types: 444 - `kwargs`: ``dict`` 445 """ 446 self.remove(kwargs.iterkeys()) 447 self.add(kwargs.iteritems())
448
449 - def remove(self, keys):
450 """ 451 Removes certain parameters from the query if present 452 453 Non-present parameters are silently ignored 454 455 :Parameters: 456 - `keys`: The names of the parameters to remove 457 458 :Types: 459 - `keys`: sequence 460 """ 461 if self._unicode: 462 keys = map(unicode, keys) 463 for key in keys: 464 if key in self._query_dict: 465 del self._query_dict[key] 466 self._keyorder = [ 467 nkey for nkey in self._keyorder if nkey != key 468 ]
469
470 - def add(self, toadd):
471 """ 472 Adds certain key value pairs to the query 473 474 :Parameters: 475 - `toadd`: A sequence of key-value-pairs 476 (``((u'key', u'value), ...)``) 477 478 :Types: 479 - `toadd`: ``iterable`` 480 """ 481 for key, val in toadd: 482 if self._unicode: 483 key = unicode(key) 484 if val is not None: 485 if self._unicode: 486 try: 487 val = unicode(val) 488 except ValueError: 489 pass 490 self._query_dict.setdefault(key, []).append(val) 491 self._keyorder.append(key)
492
493 - def modify(self, remove=None, add=None, replace=None):
494 """ 495 Summarizes certain query modification methods 496 497 `replace` is a convenience parameter, it's actually a combination 498 of `remove` and `add`. The order of processing is: 499 500 1. append the `replace` parameters to `remove` and `add` 501 2. apply `remove` 502 3. apply `add` 503 504 :Parameters: 505 - `remove`: parameters to remove (see `Query.remove` 506 method) 507 - `add`: parameters to add (see `Query.add` method) 508 - `replace`: parameters to override (see `Query.add` for the 509 format) 510 511 :Types: 512 - `remove`: sequence 513 - `add`: sequence 514 - `replace`: sequence 515 """ 516 remove = list(remove or []) 517 add = list(add or []) 518 replace = list(replace or []) 519 520 # append replace list to remove and add 521 remove.extend([tup[0] for tup in replace]) 522 add.extend(replace) 523 524 self.remove(remove) 525 self.add(add)
526 527 528 from wtf import c_override 529 cimpl = c_override('_wtf_cutil') 530 if cimpl is not None: 531 # pylint: disable = E1103 532 quote = cimpl.quote 533 quote_plus = cimpl.quote_plus 534 unquote = cimpl.unquote 535 unquote_plus = cimpl.unquote_plus 536 else: 537 import urllib as _urllib
538 539 - def quote(s, safe='/', encoding='utf-8', errors='strict', 540 _orig=_urllib.quote):
541 """ 542 Replacement for ``urllib.quote``, which also handles unicode. 543 544 :Parameters: 545 - `s`: The string to quote 546 - `safe`: safe characters (not quoted) 547 - `encoding`: Encoding to apply in case `s` is unicode 548 - `errors`: Error handling in case `s` is unicode 549 550 :Types: 551 - `s`: ``basestring`` 552 - `safe`: ``str`` 553 - `encoding`: ``str`` 554 - `errors`: ``str`` 555 556 :return: The quoted string 557 :rtype: ``str`` 558 559 :Exceptions: 560 - `UnicodeError`: Encoding error 561 """ 562 # pylint: disable = C0103 563 564 if isinstance(s, unicode): 565 s = s.encode(encoding, errors) 566 else: 567 s = str(s) 568 return _orig(s, safe)
569
570 571 - def quote_plus(s, safe='/', encoding='utf-8', errors='strict', 572 _orig =_urllib.quote_plus):
573 """ 574 Replacement for ``urllib.quote_plus``, which also handles unicode. 575 576 :Parameters: 577 - `s`: The string to quote 578 - `safe`: safe characters (not quoted) 579 - `encoding`: Encoding to apply in case `s` is unicode 580 - `errors`: Error handling in case `s` is unicode 581 582 :Types: 583 - `s`: ``basestring`` 584 - `safe`: ``str`` 585 - `encoding`: ``str`` 586 - `errors`: ``str`` 587 588 :return: The quoted string 589 :rtype: ``str`` 590 591 :Exceptions: 592 - `UnicodeError`: Encoding error 593 """ 594 # pylint: disable = C0103 595 596 if isinstance(s, unicode): 597 s = s.encode(encoding, errors) 598 else: 599 s = str(s) 600 return _orig(s, safe)
601 602 unquote = _urllib.unquote 603 unquote_plus = _urllib.unquote_plus 604 605 del c_override, cimpl 606