1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 """
18 Common Utilities
19 ================
20
21 Certain utilities to make the life more easy.
22
23 :Variables:
24 - `PIXEL`: Transparent 1x1 pixel GIF. Can be used for delivering webbugs etc.
25 Usage: ``response.content_type('image/gif'); return [PIXEL]``
26
27 :Types:
28 - `PIXEL`: ``str``
29 """
30 __author__ = u"Andr\xe9 Malo"
31 __docformat__ = "restructuredtext en"
32
33 import re as _re
34 import urlparse as _urlparse
35
36 PIXEL = 'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!' \
37 '\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01' \
38 '\x00\x00\x02\x01D\x00;'
42 """
43 Escape a string for HTML output
44
45 :Parameters:
46 - `toescape`: The string to escape
47 - `quotes`: Escape quotes, too?
48
49 :Types:
50 - `toescape`: ``basestring``
51 - `quotes`: ``bool``
52
53 :return: The escaped string
54 :rtype: ``basestring``
55 """
56 if isinstance(toescape, unicode):
57 xquote, result = (u'"', u'"'), toescape.replace(u'&', u'&'
58 ).replace(u'<', u'<').replace(u'>', u'>')
59 else:
60 xquote, result = ('"', '"'), str(toescape).replace('&', '&'
61 ).replace('<', '<').replace('>', '>')
62 if quotes:
63 result = result.replace(*xquote)
64 return result
65
68 """
69 Escape a string for JS output (to be inserted into a JS string)
70
71 The output is always of type ``str``.
72
73 :Parameters:
74 `toescape` : ``basestring``
75 The string to escape
76
77 :Return: The escaped string
78 :Rtype: ``str``
79 """
80 if isinstance(toescape, unicode):
81 result = toescape.replace(u'\\', u'\\\\').encode('unicode_escape')
82 else:
83 result = str(toescape).replace('\\', '\\\\').encode('string_escape')
84 return result.replace("'", "\\'").replace('"', '\\"').replace('/', '\\/')
85
88 """
89 Return unicode version of value
90
91 Simple heuristics: Try UTF-8 first, cp1252 then
92
93 :Parameters:
94 - `value`: The value to decode
95
96 :Types:
97 - `value`: ``str``
98
99 :return: The decoded value
100 :rtype: ``unicode``
101 """
102 try:
103 return value.decode('utf-8')
104 except UnicodeError:
105 return value.decode('cp1252')
106
107
108 -class URL(object):
109 """
110 URL abstraction (RFC 1738)
111
112 :CVariables:
113 - `_PARTS`: ordered list of known URL parts (available via instance
114 attributes)
115
116 :IVariables:
117 - `scheme`: The URL scheme
118 - `netloc`: The net location if available (or ``''``)
119 - `path`: The unescaped path if available, for non-path-based schemes
120 this contains the unescaped non-path ;-) (or ``''``)
121 - `params`: optional unescaped path parameters (or ``''``)
122 - `query`: query object
123 - `fragment`: optional fragment. Strictly spoken this isn't part of URLs
124 but of URL references. But who cares. (or ``''``)
125
126 :Types:
127 - `_PARTS`: ``tuple``
128 - `scheme`: ``str``
129 - `netloc`: ``unicode``
130 - `path`: ``unicode``
131 - `params`: ``unicode``
132 - `query`: `Query`
133 - `fragment`: ``unicode``
134 """
135 scheme, netloc, path, params, query, fragment = [''] * 6
136 _PARTS = ('scheme', 'netloc', 'path', 'params', 'query', 'fragment')
137 _PATH_SAFE = '/()=~'
138 _unicode = False
139
141 """
142 Initialization
143
144 :Parameters:
145 - `url`: The url to parse. If it's an instance of this class, the
146 parameters will be copied
147 - `decode`: Decoder of parsed octet data
148
149 :Types:
150 - `url`: ``basestring`` or `URL`
151 - `decode`: ``callable``
152 """
153 if isinstance(url, URL):
154 for key in self._PARTS:
155 setattr(self, key, getattr(url, key))
156 self.query = Query(url.query)
157 self._unicode = url._unicode
158 else:
159 if decode is None:
160 decode = decode_simple
161 if decode:
162 self._unicode = True
163 if not isinstance(url, unicode):
164 url = decode(url)
165 if self._unicode:
166 url = url.encode('utf-8')
167 for key, value in zip(self._PARTS, _urlparse.urlparse(url)):
168 setattr(self, key, value)
169 if not isinstance(self.netloc, unicode):
170 self.netloc = decode_simple(self.netloc)
171 self.netloc = self.netloc.encode('idna')
172 if self._unicode:
173 self.netloc = self.netloc.decode('idna')
174 self.path = decode(unquote(self.path))
175 self.params = decode(unquote(self.params))
176 self.fragment = decode(self.fragment)
177 self.query = Query(self.query, decode=decode)
178
180 """
181 String representation, hostname idna encoded
182
183 :return: The string representation
184 :rtype: ``str``
185 """
186 if self._unicode:
187 encode = lambda x, enc = 'utf-8': x.encode(enc)
188 else:
189 encode = lambda x, enc = 'utf-8': x
190
191 return _urlparse.urlunparse((
192 self.scheme,
193 encode(self.netloc, 'idna'),
194 quote(encode(self.path), self._PATH_SAFE),
195 quote(encode(self.params), self._PATH_SAFE),
196 str(self.query),
197 encode(self.fragment),
198 ))
199
201 """
202 Debug representation
203
204 :return: The debug representation
205 :rtype: ``str``
206 """
207 return "%s(%r)" % (self.__class__.__name__, str(self))
208
210 """
211 Unicode representation, hostname as unicode (vs. idna)
212
213 :return: The unicode representation
214 :rtype: ``unicode``
215 """
216 if self._unicode:
217 encode = lambda x, enc = 'utf-8': x.encode(enc)
218 decode = lambda x: x.decode('utf-8')
219 else:
220 encode = lambda x, enc = 'utf-8': x
221 decode = decode_simple
222
223 return decode(_urlparse.urlunparse((
224 self.scheme,
225 encode(self.netloc),
226 quote(encode(self.path), self._PATH_SAFE),
227 quote(encode(self.params), self._PATH_SAFE),
228 str(self.query),
229 encode(self.fragment),
230 )))
231
232 @classmethod
234 """
235 Create URL object from **unescaped** path
236
237 For convenience you can optionally add query, scheme and netloc.
238
239 :Parameters:
240 - `path`: The path to create the URL from
241 - `scheme`: Optional URL scheme (like ``http``)
242 - `netloc`: Optional net location (like ``example.com``)
243 - `query`: Optional query string (encoded) or `Query` object
244
245 :Types:
246 - `path`: ``basestring``
247 - `scheme`: ``str``
248 - `netloc`: ``basestring``
249 - `query`: ``str``
250
251 :return: New URL object
252 :rtype: `URL`
253 """
254 if not isinstance(path, unicode):
255 path = decode_simple(path)
256 path = path.encode('utf-8')
257 self = cls(quote(path, cls._PATH_SAFE))
258 if scheme is not None:
259 self.scheme = str(scheme)
260 if netloc is not None:
261 if not isinstance(netloc, unicode):
262 netloc = decode_simple(netloc)
263 self.netloc = netloc.encode('idna')
264 if query is not None:
265 self.query = Query(query)
266 return self
267
269 """
270 Copy the URL
271
272 :return: a new `URL` instance
273 :rtype: `URL`
274 """
275 return self.__class__(self)
276
279 """
280 Class for query string parsing and modification
281 (stolen from svnmailer)
282
283 :CVariables:
284 - `_QUERYRE`: Regex for splitting a query string
285 on possible delimiters (``&`` and ``;``)
286
287 :Ivariables:
288 - `_query_dict`: Dictionary of key->valuelist pairs
289 (``{'key': ['val1', 'val2'], ...}``)
290 - `_keyorder`: Original order of the keys (``['key', ...]``)
291 - `_delim`: The delimiter to use for reconstructing the query string
292
293 :Types:
294 - `_QUERYRE`: ``_sre.SRE_Pattern``
295 - `_query_dict`: ``dict``
296 - `_keyorder`: ``list``
297 - `_delim`: ``unicode``
298 """
299 _QUERYRE = _re.compile(r'[&;]')
300 _unicode = False
301
302 - def __init__(self, query=u'', delim='&', decode=None):
303 """
304 Initialization
305
306 :Parameters:
307 - `query`: The query string to store
308 - `delim`: The delimiter for reconstructing the query
309 - `decode`: Parameter decoder
310
311 :Types:
312 - `query`: ``unicode`` or `Query`
313 - `delim`: ``unicode``
314 - `decode`: ``callable``
315 """
316 if not query:
317 if decode is None or decode:
318 self._unicode = True
319 query_dict = {}
320 keyorder = []
321 elif isinstance(query, Query):
322
323 query_dict = dict([(key, list(val))
324 for key, val in query._query_dict.items()
325 ])
326 keyorder = list(query._keyorder)
327 self._unicode = query._unicode
328 else:
329 query_dict = {}
330 keyorder = []
331 if decode is None:
332 decode = decode_simple
333 if decode:
334 self._unicode = True
335 if not isinstance(query, unicode):
336 query = decode(query)
337 query = query.encode('utf-8')
338 if not decode:
339 decode = lambda x: x
340 for tup in [pair.split('=', 1)
341 for pair in self._QUERYRE.split(query)]:
342 if len(tup) == 1:
343 key, val = decode(unquote_plus(tup[0])), None
344 else:
345 key, val = map(decode, map(unquote_plus, tup))
346 query_dict.setdefault(key, []).append(val)
347 keyorder.append(key)
348
349 self._keyorder = keyorder
350 self._query_dict = query_dict
351 self._delim = delim
352
354 """
355 Returns the query as string again
356
357 :return: The query as string (type depends on the input)
358 :rtype: ``str``
359 """
360 result = []
361 qdict = dict((key, list(reversed(val)))
362 for key, val in self._query_dict.iteritems())
363 for key in self._keyorder:
364 val = qdict[key].pop()
365 if self._unicode:
366 key = key.encode('utf-8')
367 key = quote_plus(key)
368 if val is None:
369 result.append(key)
370 else:
371 if isinstance(val, unicode):
372 val = val.encode('utf-8')
373 val = quote_plus(val)
374 result.append("%s=%s" % (key, val))
375
376 return self._delim.join(result)
377
379 """ Unicode representation (just ascii decoded str() value) """
380 return decode_simple(str(self))
381
383 """
384 Returns whether `key` occurs in the query as parameter name
385
386 :Parameters:
387 - `key`: The key to lookup
388
389 :Types:
390 - `key`: ``unicode``
391
392 :return: Does `key` occur?
393 :rtype: ``bool``
394 """
395 if self._unicode:
396 key = unicode(key)
397 return key in self._query_dict
398
400 """
401 Returns the value list for parameter named `key`
402
403 Don't modify the returned list without adjusting `_keyorder`,
404 too. At best don't modify it directly at all :)
405
406 :Parameters:
407 - `key`: The key to lookup
408
409 :Types:
410 - `key`: ``unicode``
411
412 :return: The value list (``['val1', 'val2', ...]``)
413 :rtype: ``list``
414
415 :exception KeyError: The key does not exist
416 """
417 if self._unicode:
418 key = unicode(key)
419 return tuple(self._query_dict[key])
420
422 """
423 Replace all occurences of `key` with the new one
424
425 :Parameters:
426 - `key`: key to replace
427 - `value`: value to set
428
429 :Types:
430 - `key`: ``unicode``
431 - `value`: ``unicode``
432 """
433 self.remove([key])
434 self.add([(key, value)])
435
437 """
438 Conveniently replace multiple key value pairs at once
439
440 :Parameters:
441 - `kwargs`: key value pairs (unicode/unicode)
442
443 :Types:
444 - `kwargs`: ``dict``
445 """
446 self.remove(kwargs.iterkeys())
447 self.add(kwargs.iteritems())
448
450 """
451 Removes certain parameters from the query if present
452
453 Non-present parameters are silently ignored
454
455 :Parameters:
456 - `keys`: The names of the parameters to remove
457
458 :Types:
459 - `keys`: sequence
460 """
461 if self._unicode:
462 keys = map(unicode, keys)
463 for key in keys:
464 if key in self._query_dict:
465 del self._query_dict[key]
466 self._keyorder = [
467 nkey for nkey in self._keyorder if nkey != key
468 ]
469
470 - def add(self, toadd):
471 """
472 Adds certain key value pairs to the query
473
474 :Parameters:
475 - `toadd`: A sequence of key-value-pairs
476 (``((u'key', u'value), ...)``)
477
478 :Types:
479 - `toadd`: ``iterable``
480 """
481 for key, val in toadd:
482 if self._unicode:
483 key = unicode(key)
484 if val is not None:
485 if self._unicode:
486 try:
487 val = unicode(val)
488 except ValueError:
489 pass
490 self._query_dict.setdefault(key, []).append(val)
491 self._keyorder.append(key)
492
493 - def modify(self, remove=None, add=None, replace=None):
494 """
495 Summarizes certain query modification methods
496
497 `replace` is a convenience parameter, it's actually a combination
498 of `remove` and `add`. The order of processing is:
499
500 1. append the `replace` parameters to `remove` and `add`
501 2. apply `remove`
502 3. apply `add`
503
504 :Parameters:
505 - `remove`: parameters to remove (see `Query.remove`
506 method)
507 - `add`: parameters to add (see `Query.add` method)
508 - `replace`: parameters to override (see `Query.add` for the
509 format)
510
511 :Types:
512 - `remove`: sequence
513 - `add`: sequence
514 - `replace`: sequence
515 """
516 remove = list(remove or [])
517 add = list(add or [])
518 replace = list(replace or [])
519
520
521 remove.extend([tup[0] for tup in replace])
522 add.extend(replace)
523
524 self.remove(remove)
525 self.add(add)
526
527
528 from wtf import c_override
529 cimpl = c_override('_wtf_cutil')
530 if cimpl is not None:
531
532 quote = cimpl.quote
533 quote_plus = cimpl.quote_plus
534 unquote = cimpl.unquote
535 unquote_plus = cimpl.unquote_plus
536 else:
537 import urllib as _urllib
538
539 - def quote(s, safe='/', encoding='utf-8', errors='strict',
540 _orig=_urllib.quote):
541 """
542 Replacement for ``urllib.quote``, which also handles unicode.
543
544 :Parameters:
545 - `s`: The string to quote
546 - `safe`: safe characters (not quoted)
547 - `encoding`: Encoding to apply in case `s` is unicode
548 - `errors`: Error handling in case `s` is unicode
549
550 :Types:
551 - `s`: ``basestring``
552 - `safe`: ``str``
553 - `encoding`: ``str``
554 - `errors`: ``str``
555
556 :return: The quoted string
557 :rtype: ``str``
558
559 :Exceptions:
560 - `UnicodeError`: Encoding error
561 """
562
563
564 if isinstance(s, unicode):
565 s = s.encode(encoding, errors)
566 else:
567 s = str(s)
568 return _orig(s, safe)
569
573 """
574 Replacement for ``urllib.quote_plus``, which also handles unicode.
575
576 :Parameters:
577 - `s`: The string to quote
578 - `safe`: safe characters (not quoted)
579 - `encoding`: Encoding to apply in case `s` is unicode
580 - `errors`: Error handling in case `s` is unicode
581
582 :Types:
583 - `s`: ``basestring``
584 - `safe`: ``str``
585 - `encoding`: ``str``
586 - `errors`: ``str``
587
588 :return: The quoted string
589 :rtype: ``str``
590
591 :Exceptions:
592 - `UnicodeError`: Encoding error
593 """
594
595
596 if isinstance(s, unicode):
597 s = s.encode(encoding, errors)
598 else:
599 s = str(s)
600 return _orig(s, safe)
601
602 unquote = _urllib.unquote
603 unquote_plus = _urllib.unquote_plus
604
605 del c_override, cimpl
606