forked from Ensembl/ensembl-webcode
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPack.pm
467 lines (421 loc) · 18.1 KB
/
Pack.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
#Pack (July 2005)
# Based on "Pack.js" by Dean Edwards <http://dean.edwards.name/>
# Ported to Perl by Rob Seiler, ELR Software Pty Ltd <http://www.elr.com.au>
# Copyright 2005. License <http://creativecommons.org/licenses/LGPL/2.1/>
package Pack;
use strict;
use Data::Dumper;
use ParseMaster;
# Package wide variable declarations
use vars qw/$VERSION $PM_VERSION
$_X_encodePrivate $_JSunpack $_JSdecode %baseLookup
$_X_encode10 $_X_encode36 $_X_encode62 $_X_encode95
$_JSencode10 $_JSencode36 $_JSencode62 $_JSencode95
@_X_parsers
$_X_script $_X_encoding $_X_fastDecode $_X_specialChars
/;
$VERSION = '024';
$PM_VERSION = $ParseMaster::VERSION;
# Package wide constants
my $X_IGNORE = q{$1};
my $X_ENCODE = q/\x24encode\(\x24count\)/; # NB: requires g modifier
my $PERL = 'perl'; # Flag to indicate whether we need to use one of our "internal" Perl encoding functions
my $JSCRIPT = 'jscript'; # or embed a pre-build JScript encoding function
########################################
##################
sub pack($$$$) { # require 4 arguments
##################
#print Dumper(@_);
($_X_script, $_X_encoding, $_X_fastDecode, $_X_specialChars) = @_;
# validate parameters (sort of!)
$_X_script .= "\n";
$_X_encoding = ($_X_encoding > 95) ? 95 : $_X_encoding;
@_X_parsers = (); # Reset parsers
####################
sub _X_pack($) { # require 1 argument
####################
# apply all parsing routines
my $X_script = shift;
for (my $i = 0; $i<scalar(@_X_parsers); $i++) {
my $X_parse = $_X_parsers[$i];
$X_script = &$X_parse($X_script);
}
return $X_script;
};
######################
sub _X_addParser { #
######################
# keep a list of parsing functions, they'll be executed all at once
my $X_parser = shift;
push (@_X_parsers,$X_parser);
}
#############################
sub _X_basicCompression { #
#############################
# zero encoding - just removal of white space and comments
my $X_script = shift;
my $parser = ParseMaster->new();
# make safe
$parser->escapeChar("\\");
# protect strings
$parser->add(q/'[^'\n\r]*'/, $X_IGNORE);
$parser->add(q/"[^"\n\r]*"/, $X_IGNORE);
# remove comments
$parser->add(q/\/\/[^\n\r]*[\n\r]/);
$parser->add(q/\/\*[^*]*\*+([^\/][^*]*\*+)*\//);
# protect regular expressions
$parser->add(q/\s+(\/[^\/\n\r\*][^\/\n\r]*\/g?i?)/, q{$2}); # IGNORE
$parser->add(q/[^\w\x24\/'"*)\?:]\/[^\/\n\r\*][^\/\n\r]*\/g?i?/, $X_IGNORE);
# remove: ;;; doSomething();
$parser->add(q/;;[^\n\r]+[\n\r]/) if ($_X_specialChars);
# remove redundant semi-colons
$parser->add(q/;+\s*([};])/, q{$2});
# remove white-space
$parser->add(q/(\b|\x24)\s+(\b|\x24)/, q{$2 $3});
$parser->add(q/([+\-])\s+([+\-])/, q{$2 $3});
$parser->add(q/\s+/, '');
# done
return $parser->exec($X_script);
}
###############################
sub _X_encodeSpecialChars { #
###############################
my $X_script = shift;
my $parser = ParseMaster->new();
# replace: $name -> n, $$name -> $$na
$parser->add(q/((\x24+)([a-zA-Z\x24_]+))(\d*)/,
sub {
my $X_offset = pop;
my @X_match = @_;
my $X_length = length($X_match[$X_offset+2]);
my $lengthnext = length($X_match[$X_offset+3]);
my $X_start = $X_length - ((($X_length - $lengthnext) > 0) ? ($X_length - $lengthnext) : 0);
my $str = $X_match[$X_offset+1];
$str = substr($str,$X_start,$X_length) . $X_match[$X_offset+4];
return "$str";
});
# replace: _name -> _0, double-underscore (__name) is ignored
my $X_regexp = q/\b_[A-Za-z\d]\w*/;
# build the word list
my %X_keywords = &_X_analyze($X_script, $X_regexp, $_X_encodePrivate);
#print Dumper(%X_keywords);
# quick ref
my $X_encoded = \$X_keywords{X_encoded}; # eg _private1 => '_0',_private2 => '_1';
#print Dumper($X_encoded);
$parser->add($X_regexp, sub {my $X_offset = pop; my @X_match = @_; return ${$X_encoded}->{$X_match[$X_offset]};});
return $parser->exec($X_script);
};
###########################
sub _X_encodeKeywords { #
###########################
my $X_script = shift;
# escape high-ascii values already in the script (i.e. in strings)
if ($_X_encoding > 62) {$X_script = &_X_escape95($X_script)};
# create the parser
my $parser = ParseMaster->new();
my $X_encode = &_X_getEncoder($_X_encoding,$PERL);
# for high-ascii, don't encode single character low-ascii
my $X_regexp = ($_X_encoding > 62) ? q/\w\w+/ : q/\w+/;
# build the word list
my %X_keywords = &_X_analyze($X_script, $X_regexp, $X_encode);
#print Dumper(%X_keywords);
my $X_encoded = \$X_keywords{X_encoded}; # eg alert => 2, function => 10 etc
# encode
$parser->add($X_regexp, sub {my $X_offset = pop; my @X_match = @_; return ${$X_encoded}->{$X_match[$X_offset]};});
# if encoded, wrap the script in a decoding function
return $X_script && _X_bootStrap(\$parser->exec($X_script), \%X_keywords);
}
####################
sub _X_analyze { #
####################
#print Dumper(@_);
my ($X_script, $X_regexp, $X_encode) = @_;
# analyse
# retreive all words in the script
my @X_all = $X_script =~ m/$X_regexp/g; # Save all captures in a list context
my %XX_sorted = (); # list of words sorted by frequency
my %XX_encoded = (); # dictionary of word->encoding
my %XX_protected = (); # instances of "protected" words
if (@X_all) {
my @X_unsorted = (); # same list, not sorted
my %X_protected = (); # "protected" words (dictionary of word->"word")
my %X_values = (); # dictionary of charCode->encoding (eg. 256->ff)
my %X_count = (); # word->count
my $i = scalar(@X_all); my $j = 0; my $X_word = '';
# count the occurrences - used for sorting later
do {
$X_word = '$' . $X_all[--$i];
if (!exists($X_count{$X_word})) {
$X_count{$X_word} = [0,$i]; # Store both the usage count and original array position (ie a secondary sort key)
$X_unsorted[$j] = $X_word;
# make a dictionary of all of the protected words in this script
# these are words that might be mistaken for encoding
$X_values{$j} = &$X_encode($j);
my $v = '$'.$X_values{$j};
$X_protected{$v} = $j++;
}
# increment the word counter
$X_count{$X_word}[0]++;
} while ($i);
#print Dumper (%X_values);
#print Dumper (@X_unsorted);
#print Dumper (%X_protected);
# prepare to sort the word list, first we must protect
# words that are also used as codes. we assign them a code
# equivalent to the word itself.
# e.g. if "do" falls within our encoding range
# then we store keywords["do"] = "do";
# this avoids problems when decoding
$i = scalar(@X_unsorted);
do {
$X_word = $X_unsorted[--$i];
if (exists($X_protected{$X_word})) {
$XX_sorted{$X_protected{$X_word}} = substr($X_word,1);
$XX_protected{$X_protected{$X_word}} = 1; # true
$X_count{$X_word}[0] = 0;
}
} while ($i);
#print Dumper (%XX_protected);
#print Dumper (%XX_sorted);
#print Dumper (%X_count);
# sort the words by frequency
# Sort with count a primary key and original array order as secondary key - which is apparently the default in javascript!
@X_unsorted = sort ({($X_count{$b}[0] - $X_count{$a}[0]) or ($X_count{$b}[1] <=> $X_count{$a}[1])} @X_unsorted);
#print Dumper (@X_unsorted) . "\n";
$j = 0;
# because there are "protected" words in the list
# we must add the sorted words around them
do {
if (!exists($XX_sorted{$i})) {$XX_sorted{$i} = substr($X_unsorted[$j++],1)}
$XX_encoded{$XX_sorted{$i}} = $X_values{$i};
} while (++$i < scalar(@X_unsorted));
}
#print Dumper(X_sorted => \%XX_sorted, X_encoded => \%XX_encoded, X_protected => \%XX_protected);
return (X_sorted => \%XX_sorted, X_encoded => \%XX_encoded, X_protected => \%XX_protected);
}
######################
sub _X_bootStrap { #
######################
# build the boot function used for loading and decoding
my ($X_packed, $X_keywords) = @_; # Reference arguments!
#print Dumper ($X_keywords) . "\n";
# $packed: the packed script - dereference and escape
$X_packed = "'" . &_X_escape($$X_packed) ."'";
my %sorted = %{$$X_keywords{X_sorted}}; # Dereference to local variables
my %protected = %{$$X_keywords{X_protected}}; # for simplicity
my @sorted = ();
foreach my $key (keys %sorted) {$sorted[$key] = $sorted{$key}}; # Convert hash to a standard list
# ascii: base for encoding
my $X_ascii = ((scalar(@sorted) > $_X_encoding) ? $_X_encoding : scalar(@sorted)) || 1;
# count: number of (unique {RS}) words contained in the script
my $X_count = scalar(@sorted); # Use $X_count for assigning $X_ascii
# keywords: list of words contained in the script
foreach my $i (keys %protected) {$sorted[$i] = ''}; # Blank out protected words
#print Dumper(@sorted) . "\n";
# convert from a string to an array - prepare keywords as a JScript string->array {RS}
$X_keywords = "'" . join('|',@sorted) . "'.split('|')";
# encode: encoding function (used for decoding the script)
my $X_encode = $_X_encoding > 62 ? $_JSencode95 : &_X_getEncoder($X_ascii,$JSCRIPT); # This is a JScript function (as a string)
$X_encode =~ s/_encoding/\x24ascii/g; $X_encode =~ s/arguments\.callee/\x24encode/g;
my $X_inline = '$count' . ($X_ascii > 10 ? '.toString($ascii)' : '');
# decode: code snippet to speed up decoding
my $X_decode = '';
if ($_X_fastDecode) {
# create the decoder
$X_decode = &_X_getFunctionBody($_JSdecode); # ie from the Javascript literal function
if ($_X_encoding > 62) {$X_decode =~ s/\\\\w/[\\xa1-\\xff]/g}
# perform the encoding inline for lower ascii values
elsif ($X_ascii < 36) {$X_decode =~ s/$X_ENCODE/$X_inline/g}
# special case: when $X_count==0 there ar no keywords. i want to keep
# the basic shape of the unpacking funcion so i'll frig the code...
if (!$X_count) {$X_decode =~ s/(\x24count)\s*=\s*1/$1=0/}
}
# boot function
my $X_unpack = $_JSunpack;
if ($_X_fastDecode) {
# insert the decoder
$X_unpack =~ s/\{/\{$X_decode;/;
}
$X_unpack =~ s/"/'/g;
if ($_X_encoding > 62) { # high-ascii
# get rid of the word-boundaries for regexp matches
$X_unpack =~ s/'\\\\b'\s*\+|\+\s*'\\\\b'//g; # Not checked! {RS}
}
if ($X_ascii > 36 || $_X_encoding > 62 || $_X_fastDecode) {
# insert the encode function
$X_unpack =~ s/\{/\{\$encode=$X_encode;/;
} else {
# perform the encoding inline
$X_unpack =~ s/$X_ENCODE/$X_inline/;
}
# arguments {RS} Do this before using &pack because &pack changes the pack parameters (eg $fastDecode) in Perl!!
my $X_params = "$X_packed,$X_ascii,$X_count,$X_keywords"; # Interpolate to comma separated string
if ($_X_fastDecode) {
# insert placeholders for the decoder
$X_params .= ',0,{}';
}
# pack the boot function too
$X_unpack = &pack($X_unpack,0,0,1);
# the whole thing
return "eval(" . $X_unpack . "(" . $X_params . "))\n";
};
#######################
sub _X_getEncoder { #
#######################
# mmm.. ..which one do i need ?? ({RS} Perl or JScript ??)
my ($X_ascii,$language) = @_;
my $perl_encoder = ($X_ascii > 10) ? ($X_ascii > 36) ? ($X_ascii > 62) ? $_X_encode95 : $_X_encode62 : $_X_encode36 : $_X_encode10;
my $jscript_encoder = ($X_ascii > 10) ? ($X_ascii > 36) ? ($X_ascii > 62) ? $_JSencode95 : $_JSencode62 : $_JSencode36 : $_JSencode10;
return ($language eq $JSCRIPT) ? $jscript_encoder : $perl_encoder;
};
#############################
# Perl versions of encoders #
#############################
# base10 zero encoding - characters: 0123456789
$_X_encode10 = sub {return &_encodeBase(shift,10)};
# base36 - characters: 0123456789abcdefghijklmnopqrstuvwxyz
$_X_encode36 = sub {return &_encodeBase(shift,36)};
# base62 - characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
$_X_encode62 = sub {return &_encodeBase(shift,62)};
# high-ascii values - characters: 、�ウЖ��⑭�渦慨偽係杭纂従神疎団兎波品北洋椀冫嘖孛忤掣桀毳烙痰邃繙艾蜉謖邇關髓齡��巐鄕�
$_X_encode95 = sub {return &_encodeBase(shift,95)};
# Lookup character sets for baseN encoding
$baseLookup{10} = [(0..9)[0..9]]; # base 10
$baseLookup{36} = [(0..9,'a'..'z')[0..35]]; # base 36
$baseLookup{62} = [(0..9,'a'..'z','A'..'Z')[0..61]]; # base 62
$baseLookup{95} = (); for (my $i=0; $i<95; $i++) {$baseLookup{95}[$i] = chr($i+161)}; # base95 (high ascii)
#print Dumper(%baseLookup);
#####################
sub _encodeBase { #
#####################
# Generic base conversion function using defined lookup arrays (perl version only)
my ($X_charCode, $base) = @_;
my $X_encoded = '';
# Do we know this encoding?
if (exists ($baseLookup{$base})) {
if ($X_charCode == 0) {$X_encoded = $baseLookup{$base}[0]}
while($X_charCode > 0) {
$X_encoded = $baseLookup{$base}[$X_charCode % $base] . $X_encoded;
$X_charCode = int($X_charCode / $base);
}
}
else {$X_encoded = "$X_charCode"} # default is to return unchanged (ie as for base 10) if no baselookup is available
return $X_encoded;
};
#############################
$_X_encodePrivate = sub { #
#############################
# special _chars
my $X_charCode = shift;
return '_' . $X_charCode;
};
############################
sub _X_escape($script) { #
############################
# protect characters used by the parser
my $X_script = shift;
$X_script =~ s/([\\'])/\\$1/g;
return $X_script;
};
#####################
sub _X_escape95 { #
#####################
# protect high-ascii characters already in the script
my $X_script = shift;
$X_script =~ s/([\xa1-\xff])/sprintf("\\x%1x",ord($1))/eg;
return $X_script;
};
############################
sub _X_getFunctionBody { #
############################
# extract the body of a function (ie between opening/closing {}) - consistent with Dean Edwards approach
my $X_function = shift;
$X_function =~ m/^.*\{(.*)\}*$/sg; # Multiline, global (greedy)
my $start = index($X_function,'{');
my $end = rindex($X_function,'}');
$X_function = substr($X_function,($start+1),($end-1-$start));
return $X_function;
};
######################
sub _X_globalize { #
######################
# set the global flag on a RegExp (you have to create a new one) !!! Unused in perl version
# my $X_regexp = shift;
};
# build the parsing routine
&_X_addParser(\&_X_basicCompression);
&_X_addParser(\&_X_encodeSpecialChars) if ($_X_specialChars);
&_X_addParser(\&_X_encodeKeywords) if ($_X_encoding);
# go!
return &_X_pack($_X_script);
}
########################
# Javascript Literals #
########################
# JScript function "_unpack" - from DeanEdwards pack.js (NB: No ";" after final "}")
($_JSunpack) = <<'END_JSCRIPT_UNPACK';
/* unpacking function - this is the boot strap function */
/* data extracted from this packing routine is passed to */
/* this function when decoded in the target */
function($packed, $ascii, $count, $keywords, $encode, $decode) {
while ($count--)
if ($keywords[$count])
$packed = $packed.replace(new RegExp('\\b' + $encode($count) + '\\b', 'g'), $keywords[$count]);
/* RS_Debug = $packed; */ /* {RS} !!!!!!!!! */
return $packed;
}
END_JSCRIPT_UNPACK
# JScript function "_decode" - from DeanEdwards pack.js
($_JSdecode) = <<'END_JSCRIPT_DECODE';
/* code-snippet inserted into the unpacker to speed up decoding */
function() {
/* does the browser support String.replace where the */
/* replacement value is a function? */
if (!''.replace(/^/, String)) {
/* decode all the values we need */
while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
/* global replacement function */
$keywords = [function($encoded){return $decode[$encoded]}];
/* generic match */
$encode = function(){return'\\w+'};
/* reset the loop counter - we are now doing a global replace */
$count = 1;
}
};
END_JSCRIPT_DECODE
# JScript versions of encoders
($_JSencode10) = <<'END_JSCRIPT_ENCODE10';
/* zero encoding */
/* characters: 0123456789 */
function($charCode) {
return $charCode;
};
END_JSCRIPT_ENCODE10
($_JSencode36) = <<'END_JSCRIPT_ENCODE36';
/* inherent base36 support */
/* characters: 0123456789abcdefghijklmnopqrstuvwxyz */
function($charCode) {
return $charCode.toString(36);
};
END_JSCRIPT_ENCODE36
($_JSencode62) = <<'END_JSCRIPT_ENCODE62';
/* hitch a ride on base36 and add the upper case alpha characters */
/* characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ */
function($charCode) {
return ($charCode < _encoding ? '' : arguments.callee(parseInt($charCode / _encoding))) +
(($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
};
END_JSCRIPT_ENCODE62
($_JSencode95) = <<'END_JSCRIPT_ENCODE95';
/* use high-ascii values */
/* characters: 、�ウЖ��⑭�渦慨偽係杭纂従神疎団兎波品北洋椀冫嘖孛忤掣桀毳烙痰邃繙艾蜉謖邇關髓齡��巐鄕� */
function($charCode) {
return ($charCode < _encoding ? '' : arguments.callee($charCode / _encoding)) +
String.fromCharCode($charCode % _encoding + 161);
};
END_JSCRIPT_ENCODE95
###########
# END #
###########
1; # Pack #
###########