-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcharconv
executable file
·96 lines (82 loc) · 2.89 KB
/
charconv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env perl
#=======================================================================
# charconv
# File ID: 6e32b188-5d37-11df-98d6-90e6ba3022ac
# Konverterer fra cp865 til Unicode. Alternativ konverteringsfil
# spesifiseres med «-i»-parameteret, ellers brukes default filnavn. -p
# skriver ut en enkel HTML-header og -slutt.
# Lisens: GNU GPL v2 eller seinere.
#=======================================================================
use strict;
use warnings;
require 'getopts.pl';
my $map_file = "$ENV{HOME}/src/svn/div/trunk/ibm2ent/cp865.txt";
my %Table;
$| = 1;
Getopts('hi:pP');
defined($main::opt_i) && ($map_file = $main::opt_i);
for (my $a = 0; $a < 256; $a++) {
$Table{$a} = widechar($a);
}
open(MapFP, "<$map_file") || die("$map_file: Can’t open map file: $!");
while (<MapFP>) {
(/^#/ || /^\s*$/) && next;
if (/^0x(..)\s+0x(....)/) {
my ($Cp, $Uni) = (hex($1), hex($2));
if ($Cp != $Uni) {
$Table{$Cp} = widechar($Uni);
}
} else {
print STDERR "Line $.: Invalid line: \"$_\"\n";
}
}
$main::opt_P && print("<html><head></head><body><pre>\n") ||
$main::opt_p && print("<pre>\n");
while(<>) {
s/(.)/$Table{ord($1)}/g;
print;
}
$main::opt_P && print("</pre></body></html>\n") ||
$main::opt_p && print("</pre>\n");
# Henta fra h2u,v 1.5 (r386) 2002/11/20 00:09:40
sub widechar {
my $Val = shift;
my $allow_invalid = 0;
if ($Val < 0x80) {
return sprintf("%c", $Val);
} elsif ($Val < 0x800) {
return sprintf("%c%c", 0xC0 | ($Val >> 6),
0x80 | ($Val & 0x3F));
} elsif ($Val < 0x10000) {
unless ($allow_invalid) {
if (($Val >= 0xD800 && $Val <= 0xDFFF) || ($Val eq 0xFFFE) || ($Val eq 0xFFFF)) {
$Val = 0xFFFD;
}
}
return sprintf("%c%c%c", 0xE0 | ($Val >> 12),
0x80 | (($Val >> 6) & 0x3F),
0x80 | ($Val & 0x3F));
} elsif ($Val < 0x200000) {
return sprintf("%c%c%c%c", 0xF0 | ($Val >> 18),
0x80 | (($Val >> 12) & 0x3F),
0x80 | (($Val >> 6) & 0x3F),
0x80 | ($Val & 0x3F));
} elsif ($Val < 0x4000000) {
return sprintf("%c%c%c%c%c", 0xF8 | ($Val >> 24),
0x80 | (($Val >> 18) & 0x3F),
0x80 | (($Val >> 12) & 0x3F),
0x80 | (($Val >> 6) & 0x3F),
0x80 | ( $Val & 0x3F));
} elsif ($Val < 0x80000000) {
return sprintf("%c%c%c%c%c%c", 0xFC | ($Val >> 30),
0x80 | (($Val >> 24) & 0x3F),
0x80 | (($Val >> 18) & 0x3F),
0x80 | (($Val >> 12) & 0x3F),
0x80 | (($Val >> 6) & 0x3F),
0x80 | ( $Val & 0x3F));
} else {
return widechar(0xFFFD);
}
} # widechar()
__END__
#### End of file charconv ####