-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhistogram
executable file
·105 lines (101 loc) · 1.54 KB
/
histogram
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#! /usr/bin/awk -f
BEGIN{
if (ARGC>2)
{
print "#WARING: Average over multiple files!!";
}
if (ARGV[1]=="")
{
print "#FILE: STDIN";
}
else
{
print "#FILE:",ARGV[1];
}
if (col=="")
{
col=1;
print "#Setting column to",col,"(use option \"-v col=X\" to change)";
}
else
{
print "#Using column",col;
}
if (bins=="")
{
bins=100;
print "#Setting bins to",bins,"(use option \"-v bins=X\" to change)";
}
else
{
print "#Using bins",bins;
}
c=0;
warn=0;
}
/^[@#]/{
next;
}
{
if (NF>=col)
{
data[c]=$col;
if (min == "")
{
min=$col;
}
if (min > $col)
{
min=$col;
}
if (max == "")
{
max=$col;
}
if (max < $col)
{
max=$col;
}
c++;
}
else
{
if (warn == 0)
{
print "# Not enough data in line",NR;
warn++;
}
}
}
END{
bin_size=(max-min)/bins;
for (i=0;i<bins;i++)
{
histogramm[i]=0;
}
for (i=0;i<c;i++)
{
#this is for max data to not create extra bin!!!
if (data[i]==max)
{
histo_nr=bins-1;
}
else
{
#if data is EXACT on the border it counts to the higher bin!
histo_nr=int((data[i]-min)/bin_size);
}
histogramm[histo_nr]++;
}
print "#values=",c,"min=",min,"max=",max,"bin_size=",bin_size;
print "#Output format: rounded_value entree norm_ent low_border upper_border";
for (i=0;i<bins;i++)
{
norm_ent=histogramm[i]/c;
low_border=min+(bin_size*i);
upper_border=min+(bin_size*(i+1));
histo_value=(upper_border+low_border)/2.0;
print histo_value,histogramm[i],norm_ent,low_border,upper_border;
}
}
#fs md5sum 962376de599988fff0358f8c13164806