1
2
3 r"""
4 =====================
5 Javascript Minifier
6 =====================
7
8 rJSmin is a javascript minifier written in python.
9
10 The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\.
11
12 :Copyright:
13
14 Copyright 2011 - 2015
15 Andr\xe9 Malo or his licensors, as applicable
16
17 :License:
18
19 Licensed under the Apache License, Version 2.0 (the "License");
20 you may not use this file except in compliance with the License.
21 You may obtain a copy of the License at
22
23 http://www.apache.org/licenses/LICENSE-2.0
24
25 Unless required by applicable law or agreed to in writing, software
26 distributed under the License is distributed on an "AS IS" BASIS,
27 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 See the License for the specific language governing permissions and
29 limitations under the License.
30
31 The module is a re-implementation aiming for speed, so it can be used at
32 runtime (rather than during a preprocessing step). Usually it produces the
33 same results as the original ``jsmin.c``. It differs in the following ways:
34
35 - there is no error detection: unterminated string, regex and comment
36 literals are treated as regular javascript code and minified as such.
37 - Control characters inside string and regex literals are left untouched; they
38 are not converted to spaces (nor to \\n)
39 - Newline characters are not allowed inside string and regex literals, except
40 for line continuations in string literals (ECMA-5).
41 - "return /regex/" is recognized correctly.
42 - Line terminators after regex literals are handled more sensibly
43 - "+ +" and "- -" sequences are not collapsed to '++' or '--'
44 - Newlines before ! operators are removed more sensibly
45 - Comments starting with an exclamation mark (``!``) can be kept optionally
46 - rJSmin does not handle streams, but only complete strings. (However, the
47 module provides a "streamy" interface).
48
49 Since most parts of the logic are handled by the regex engine it's way faster
50 than the original python port of ``jsmin.c`` by Baruch Even. The speed factor
51 varies between about 6 and 55 depending on input and python version (it gets
52 faster the more compressed the input already is). Compared to the
53 speed-refactored python port by Dave St.Germain the performance gain is less
54 dramatic but still between 3 and 50 (for huge inputs). See the docs/BENCHMARKS
55 file for details.
56
57 rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
58
59 Both python 2 and python 3 are supported.
60
61 .. _jsmin.c by Douglas Crockford:
62 http://www.crockford.com/javascript/jsmin.c
63 """
64 if __doc__:
65
66 __doc__ = __doc__.encode('ascii').decode('unicode_escape')
67 __author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape')
68 __docformat__ = "restructuredtext en"
69 __license__ = "Apache License, Version 2.0"
70 __version__ = '1.0.12'
71 __all__ = ['jsmin']
72
73 import re as _re
74
75
77 """
78 Generate JS minifier based on `jsmin.c by Douglas Crockford`_
79
80 .. _jsmin.c by Douglas Crockford:
81 http://www.crockford.com/javascript/jsmin.c
82
83 :Parameters:
84 `python_only` : ``bool``
85 Use only the python variant. If true, the c extension is not even
86 tried to be loaded. (tdi.c._tdi_rjsmin)
87
88 :Return: Minifier
89 :Rtype: ``callable``
90 """
91
92
93
94 if not python_only:
95 from .. import c
96 rjsmin = c.load('rjsmin')
97 if rjsmin is not None:
98 return rjsmin.jsmin
99 try:
100 xrange
101 except NameError:
102 xrange = range
103
104 space_chars = r'[\000-\011\013\014\016-\040]'
105
106 line_comment = r'(?://[^\r\n]*)'
107 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
108 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)'
109 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)'
110
111 string1 = \
112 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
113 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
114 strings = r'(?:%s|%s)' % (string1, string2)
115
116 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
117 nospecial = r'[^/\\\[\r\n]'
118 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
119 nospecial, charclass, nospecial
120 )
121 space = r'(?:%s|%s)' % (space_chars, space_comment)
122 newline = r'(?:%s?[\r\n])' % line_comment
123
124 def fix_charclass(result):
125 """ Fixup string of chars to fit into a regex char class """
126 pos = result.find('-')
127 if pos >= 0:
128 result = r'%s%s-' % (result[:pos], result[pos + 1:])
129
130 def sequentize(string):
131 """
132 Notate consecutive characters as sequence
133
134 (1-4 instead of 1234)
135 """
136 first, last, result = None, None, []
137 for char in map(ord, string):
138 if last is None:
139 first = last = char
140 elif last + 1 == char:
141 last = char
142 else:
143 result.append((first, last))
144 first = last = char
145 if last is not None:
146 result.append((first, last))
147 return ''.join(['%s%s%s' % (
148 chr(first),
149 last > first + 1 and '-' or '',
150 last != first and chr(last) or ''
151 ) for first, last in result])
152
153 return _re.sub(
154 r'([\000-\040\047])',
155 lambda m: '\\%03o' % ord(m.group(1)), (
156 sequentize(result)
157 .replace('\\', '\\\\')
158 .replace('[', '\\[')
159 .replace(']', '\\]')
160 )
161 )
162
163 def id_literal_(what):
164 """ Make id_literal like char class """
165 match = _re.compile(what).match
166 result = ''.join([
167 chr(c) for c in xrange(127) if not match(chr(c))
168 ])
169 return '[^%s]' % fix_charclass(result)
170
171 def not_id_literal_(keep):
172 """ Make negated id_literal like char class """
173 match = _re.compile(id_literal_(keep)).match
174 result = ''.join([
175 chr(c) for c in xrange(127) if not match(chr(c))
176 ])
177 return r'[%s]' % fix_charclass(result)
178
179 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
180 preregex1 = r'[(,=:\[!&|?{};\r\n]'
181 preregex2 = r'%(not_id_literal)sreturn' % locals()
182
183 id_literal = id_literal_(r'[a-zA-Z0-9_$]')
184 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
185 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
186 post_regex_off = id_literal_(r'[^\000-\040}\])?:|,;.&=+-]')
187
188 dull = r'[^\047"/\000-\040]'
189
190 space_sub_simple = _re.compile((
191
192
193 r'(%(dull)s+)'
194 r'|(%(strings)s%(dull)s*)'
195 r'|(?<=%(preregex1)s)'
196 r'%(space)s*(?:%(newline)s%(space)s*)*'
197 r'(%(regex)s)'
198 r'(%(space)s*(?:%(newline)s%(space)s*)+'
199 r'(?=%(post_regex_off)s))?'
200 r'|(?<=%(preregex2)s)'
201 r'%(space)s*(?:(%(newline)s)%(space)s*)*'
202 r'(%(regex)s)'
203 r'(%(space)s*(?:%(newline)s%(space)s*)+'
204 r'(?=%(post_regex_off)s))?'
205 r'|(?<=%(id_literal_close)s)'
206 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
207 r'(?=%(id_literal_open)s)'
208 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
209 r'|(?<=\+)(%(space)s)+(?=\+)'
210 r'|(?<=-)(%(space)s)+(?=-)'
211 r'|%(space)s+'
212 r'|(?:%(newline)s%(space)s*)+'
213 ) % locals()).sub
214
215
216
217 def space_subber_simple(match):
218 """ Substitution callback """
219
220
221 groups = match.groups()
222 if groups[0]:
223 return groups[0]
224 elif groups[1]:
225 return groups[1]
226 elif groups[2]:
227 if groups[3]:
228 return groups[2] + '\n'
229 return groups[2]
230 elif groups[5]:
231 return "%s%s%s" % (
232 groups[4] and '\n' or '',
233 groups[5],
234 groups[6] and '\n' or '',
235 )
236 elif groups[7]:
237 return '\n'
238 elif groups[8] or groups[9] or groups[10]:
239 return ' '
240 else:
241 return ''
242
243 space_sub_banged = _re.compile((
244
245
246 r'(%(dull)s+)'
247 r'|(%(strings)s%(dull)s*)'
248 r'|(?<=%(preregex1)s)'
249 r'(%(space)s*(?:%(newline)s%(space)s*)*)'
250 r'(%(regex)s)'
251 r'(%(space)s*(?:%(newline)s%(space)s*)+'
252 r'(?=%(post_regex_off)s))?'
253 r'|(?<=%(preregex2)s)'
254 r'(%(space)s*(?:(%(newline)s)%(space)s*)*)'
255 r'(%(regex)s)'
256 r'(%(space)s*(?:%(newline)s%(space)s*)+'
257 r'(?=%(post_regex_off)s))?'
258 r'|(?<=%(id_literal_close)s)'
259 r'(%(space)s*(?:%(newline)s%(space)s*)+)'
260 r'(?=%(id_literal_open)s)'
261 r'|(?<=%(id_literal)s)(%(space)s+)(?=%(id_literal)s)'
262 r'|(?<=\+)(%(space)s+)(?=\+)'
263 r'|(?<=-)(%(space)s+)(?=-)'
264 r'|(%(space)s+)'
265 r'|((?:%(newline)s%(space)s*)+)'
266 ) % locals()).sub
267
268
269
270 keep = _re.compile((
271 r'%(space_chars)s+|%(space_comment_nobang)s+|%(newline)s+'
272 r'|(%(bang_comment)s+)'
273 ) % locals()).sub
274 keeper = lambda m: m.groups()[0] or ''
275
276
277
278 def space_subber_banged(match):
279 """ Substitution callback """
280
281
282 groups = match.groups()
283 if groups[0]:
284 return groups[0]
285 elif groups[1]:
286 return groups[1]
287 elif groups[3]:
288 return "%s%s%s%s" % (
289 keep(keeper, groups[2]),
290 groups[3],
291 keep(keeper, groups[4] or ''),
292 groups[4] and '\n' or '',
293 )
294 elif groups[7]:
295 return "%s%s%s%s%s" % (
296 keep(keeper, groups[5]),
297 groups[6] and '\n' or '',
298 groups[7],
299 keep(keeper, groups[8] or ''),
300 groups[8] and '\n' or '',
301 )
302 elif groups[9]:
303 return keep(keeper, groups[9]) + '\n'
304 elif groups[10] or groups[11] or groups[12]:
305 return keep(keeper, groups[10] or groups[11] or groups[12]) or ' '
306 else:
307 return keep(keeper, groups[13] or groups[14])
308
309 def jsmin(script, keep_bang_comments=False):
310 r"""
311 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
312
313 Instead of parsing the stream char by char, it uses a regular
314 expression approach which minifies the whole script with one big
315 substitution regex.
316
317 .. _jsmin.c by Douglas Crockford:
318 http://www.crockford.com/javascript/jsmin.c
319
320 :Parameters:
321 `script` : ``str``
322 Script to minify
323
324 `keep_bang_comments` : ``bool``
325 Keep comments starting with an exclamation mark? (``/*!...*/``)
326
327 :Return: Minified script
328 :Rtype: ``str``
329 """
330
331
332 if keep_bang_comments:
333 return space_sub_banged(
334 space_subber_banged, '\n%s\n' % script
335 ).strip()
336 else:
337 return space_sub_simple(
338 space_subber_simple, '\n%s\n' % script
339 ).strip()
340
341 return jsmin
342
343 jsmin = _make_jsmin()
344
345
347 r"""
348 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
349
350 Instead of parsing the stream char by char, it uses a regular
351 expression approach which minifies the whole script with one big
352 substitution regex.
353
354 .. _jsmin.c by Douglas Crockford:
355 http://www.crockford.com/javascript/jsmin.c
356
357 :Warning: This function is the digest of a _make_jsmin() call. It just
358 utilizes the resulting regexes. It's here for fun and may
359 vanish any time. Use the `jsmin` function instead.
360
361 :Parameters:
362 `script` : ``str``
363 Script to minify
364
365 `keep_bang_comments` : ``bool``
366 Keep comments starting with an exclamation mark? (``/*!...*/``)
367
368 :Return: Minified script
369 :Rtype: ``str``
370 """
371 if not keep_bang_comments:
372 rex = (
373 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
374 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
375 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?'
376 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*'
377 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
378 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r'
379 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r'
380 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/))((?:[\000-\011\013\014'
381 r'\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r'
382 r'\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:'
383 r'[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)+,.:;=?\]|}-]))?|(?<=[\00'
384 r'0-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\040]|(?'
385 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]'
386 r'))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*'
387 r'\*+)*/))*)*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\['
388 r'[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/))(('
389 r'?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)'
390 r'*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\04'
391 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)+,.:;'
392 r'=?\]|}-]))?|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000-\011\01'
393 r'3\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?:'
394 r'//[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]'
395 r'*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,./:-@\\-^'
396 r'`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\013\014\0'
397 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./'
398 r':-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|(?:/\*['
399 r'^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013'
400 r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:['
401 r'\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
402 r')+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]'
403 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
404 )
405
406 def subber(match):
407 """ Substitution callback """
408 groups = match.groups()
409 return (
410 groups[0] or
411 groups[1] or
412 (groups[3] and (groups[2] + '\n')) or
413 groups[2] or
414 (groups[5] and "%s%s%s" % (
415 groups[4] and '\n' or '',
416 groups[5],
417 groups[6] and '\n' or '',
418 )) or
419 (groups[7] and '\n') or
420 (groups[8] and ' ') or
421 (groups[9] and ' ') or
422 (groups[10] and ' ') or
423 ''
424 )
425 else:
426 rex = (
427 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
428 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
429 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?'
430 r'{};\r\n])((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/'
431 r'*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013'
432 r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*)((?:/(?!'
433 r'[\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^'
434 r'\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/))((?:[\000-\011\013\01'
435 r'4\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^'
436 r'\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+('
437 r'?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)+,.:;=?\]|}-]))?|(?<=['
438 r'\000-#%-,./:-@\[-^`{-~-]return)((?:[\000-\011\013\014\016-\040'
439 r']|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?['
440 r'\r\n]))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*]['
441 r'^*]*\*+)*/))*)*)((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|'
442 r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*'
443 r'/))((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]'
444 r'*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\01'
445 r'6-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)'
446 r'+,.:;=?\]|}-]))?|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])((?:[\000-'
447 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:'
448 r'(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/'
449 r'\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+)(?=[^\000-\040"#%-\047)*,./'
450 r':-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\01'
451 r'3\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+)(?=[^\000'
452 r'-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|'
453 r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+)(?=\+)|(?<=-)((?:[\000-\0'
454 r'11\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+)(?=-'
455 r')|((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*'
456 r'\*+)*/))+)|((?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014'
457 r'\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+)'
458 )
459
460 keep = _re.compile((
461 r'[\000-\011\013\014\016-\040]+|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*'
462 r'\*+)*/)+|(?:(?://[^\r\n]*)?[\r\n])+|((?:/\*![^*]*\*+(?:[^/*][^'
463 r'*]*\*+)*/)+)'
464 ) % locals()).sub
465 keeper = lambda m: m.groups()[0] or ''
466
467 def subber(match):
468 """ Substitution callback """
469 groups = match.groups()
470 return (
471 groups[0] or
472 groups[1] or
473 (groups[3] and "%s%s%s%s" % (
474 keep(keeper, groups[2]),
475 groups[3],
476 keep(keeper, groups[4] or ''),
477 groups[4] and '\n' or '',
478 )) or
479 (groups[7] and "%s%s%s%s%s" % (
480 keep(keeper, groups[5]),
481 groups[6] and '\n' or '',
482 groups[7],
483 keep(keeper, groups[8] or ''),
484 groups[8] and '\n' or '',
485 )) or
486 (groups[9] and keep(keeper, groups[9] + '\n')) or
487 (groups[10] and keep(keeper, groups[10]) or ' ') or
488 (groups[11] and keep(keeper, groups[11]) or ' ') or
489 (groups[12] and keep(keeper, groups[12]) or ' ') or
490 keep(keeper, groups[13] or groups[14])
491 )
492
493 return _re.sub(rex, subber, '\n%s\n' % script).strip()
494
495
496 if __name__ == '__main__':
498 """ Main """
499 import sys as _sys
500
501 argv = _sys.argv[1:]
502 keep_bang_comments = '-b' in argv or '-bp' in argv or '-pb' in argv
503 if '-p' in argv or '-bp' in argv or '-pb' in argv:
504 xjsmin = _make_jsmin(python_only=True)
505 else:
506 xjsmin = jsmin
507
508 _sys.stdout.write(xjsmin(
509 _sys.stdin.read(), keep_bang_comments=keep_bang_comments
510 ))
511
512 main()
513