-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmodeling_L4_preprocess.pl
20 lines (20 loc) · 3.34 KB
/
modeling_L4_preprocess.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/perl
system("cat ../Level1/enz_L1|cut -f2|cut -d\".\" -f1-4|awk '{print \",EC\"\$0}'>ssf_ps_pfam_enz_addECclass_L4");
#Add labels
system("awk 'NR==FNR{a[NR]=\$0; next} {\$(NF+1)=a[FNR]}1' ssf_ps_pfam_enz_addECclass_L4 ../Level1/ssf_ps_pfam_enz2.arff|sed 's/'' ''/''''/g'>ssf_ps_pfam_enz3_L4.arff");
# For 61->211
my @array = ([1,10,2],[1,10,3],[1,10,99],[1,1,1],[1,11,1],[1,11,2],[1,1,2],[1,12,1],[1,12,2],[1,12,5],[1,12,7],[1,12,98],[1,12,99],[1,1,3],[1,13,11],[1,13,12],[1,13,99],[1,14,11],[1,14,12],[1,14,13],[1,14,14],[1,14,15],[1,14,16],[1,14,17],[1,14,18],[1,14,19],[1,14,20],[1,14,21],[1,14,99],[1,1,5],[1,15,1],[1,16,1],[1,16,3],[1,16,8],[1,17,1],[1,17,3],[1,17,4],[1,17,7],[1,17,99],[1,18,1],[1,18,6],[1,1,98],[1,1,99],[1,20,1],[1,20,4],[1,20,98],[1,2,1],[1,21,3],[1,21,4],[1,22,1],[1,2,3],[1,2,4],[1,2,5],[1,2,7],[1,2,99],[1,3,1],[1,3,2],[1,3,3],[1,3,5],[1,3,7],[1,3,99],[1,4,1],[1,4,3],[1,4,4],[1,4,7],[1,4,99],[1,5,1],[1,5,3],[1,5,5],[1,5,8],[1,5,99],[1,6,1],[1,6,2],[1,6,3],[1,6,5],[1,6,99],[1,7,1],[1,7,2],[1,7,3],[1,7,7],[1,7,99],[1,8,1],[1,8,3],[1,8,4],[1,8,7],[1,8,98],[1,8,99],[1,9,3],[1,97,1],[2,1,1],[2,1,2],[2,1,3],[2,2,1],[2,3,1],[2,3,2],[2,3,3],[2,4,1],[2,4,2],[2,4,99],[2,5,1],[2,6,1],[2,6,99],[2,7,1],[2,7,10],[2,7,11],[2,7,12],[2,7,13],[2,7,2],[2,7,3],[2,7,4],[2,7,6],[2,7,7],[2,7,8],[2,7,9],[2,8,1],[2,8,2],[2,8,3],[2,8,4],[2,9,1],[3,10,1],[3,1,1],[3,1,11],[3,11,1],[3,1,13],[3,1,2],[3,1,21],[3,1,22],[3,1,25],[3,1,26],[3,1,27],[3,1,3],[3,1,30],[3,1,31],[3,13,1],[3,1,4],[3,1,5],[3,1,6],[3,1,7],[3,1,8],[3,2,1],[3,2,2],[3,3,1],[3,3,2],[3,4,11],[3,4,13],[3,4,14],[3,4,15],[3,4,16],[3,4,17],[3,4,18],[3,4,19],[3,4,21],[3,4,22],[3,4,23],[3,4,24],[3,4,25],[3,5,1],[3,5,2],[3,5,3],[3,5,4],[3,5,5],[3,5,99],[3,6,1],[3,6,3],[3,6,4],[3,6,5],[3,7,1],[3,8,1],[4,1,1],[4,1,2],[4,1,3],[4,1,99],[4,2,1],[4,2,2],[4,2,3],[4,2,99],[4,3,1],[4,3,2],[4,3,3],[4,4,1],[4,5,1],[4,6,1],[4,99,1],[5,1,1],[5,1,2],[5,1,3],[5,1,99],[5,2,1],[5,3,1],[5,3,2],[5,3,3],[5,3,4],[5,3,99],[5,4,1],[5,4,2],[5,4,3],[5,4,4],[5,4,99],[5,5,1],[5,99,1],[6,1,1],[6,1,2],[6,2,1],[6,3,1],[6,3,2],[6,3,3],[6,3,4],[6,3,5],[6,4,1],[6,5,1],[6,6,1]);
for (my $x = 0; $x <= $#array; $x++) {
my $i=$array[$x][0];
my $j=$array[$x][1];
my $k=$array[$x][2];
system("cat ../Level3/SSFPSPFENZ_L3_top3_EC$i.$j\_sparse_atleast2|awk -F':' '{print \$1}'>SSFPSPFENZ_L4_all_EC$i.$j\_sparse_atleast2");
system("awk -F\",\" 'FILENAME==\"ssf_ps_pfam_enz3_L4.arff\"{a[\$1]=\$0} FILENAME==\"SSFPSPFENZ_L4_all_EC$i.$j\_sparse_atleast2\"{if(a[\$1]){print a[\$1]}}' ssf_ps_pfam_enz3_L4.arff SSFPSPFENZ_L4_all_EC$i.$j\_sparse_atleast2>SSFPSPFENZI_L4_EC$i.$j.arff");
system("awk '/,EC$i.$j.$k/{print \$0}' SSFPSPFENZI_L4_EC$i.$j.arff>SSFPSPFENZI_L4_EC$i.$j.$k.arff");
system("cat SSFPSPFENZI_L4_EC$i.$j.$k.arff|awk 'END {if (NR>=15) print \"[$i,$j,$k]\"}'>>SSFPSPFENZ_L4_EC_instance_Usable");
system("cat SSFPSPFENZI_L4_EC$i.$j.$k.arff|awk -F',' '{print \$NF}'|cut -d'.' -f1-4|sort|uniq|tr '.' ','|awk -F',' '{print \$1 \",\" \$2 \",\" \$3}'|awk '{print \"[\"substr(\$0,3)\"]\"}'>>SSFPSPFENZ_L4_EC_class");
system("rm SSFPSPFENZ_L4_all_EC$i.$j\_sparse_atleast2");
}
system("cat SSFPSPFENZ_L4_EC_class|sort|uniq -c|awk -F' ' '{if(\$1==1) print \$2}'>SSFPSPFENZ_L4_EC_singleClassLabel");
system("cat SSFPSPFENZ_L4_EC_instance_Usable|tr '\n' ','> SSFPSPFENZ_L4_EC_label_Usable");
system("rm ssf_ps_pfam_enz_addECclass_L4 ssf_ps_pfam_enz3_L4.arff");