-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathwiener.m
134 lines (100 loc) · 2.95 KB
/
wiener.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
function Y = wiener(X, SR)
% Y = wiener(X, SR)
% Apply wiener filter enhancement, attempting to duplicate
% ICSI's "nr" process.
% 2014-05-15 Dan Ellis [email protected]
% preemphasize
emphfilt = [1 -0.96];
X = filter(emphfilt, 1, X);
% STFT
targetwinsec = 0.025;
nfft = 2^round(log(targetwinsec*SR)/log(2));
nhop = nfft/4;
fftframesec = nhop/SR;
XS = stft(X, nfft, nfft, nhop);
% Magnitude
XMag = abs(XS);
% Mel domain
nmel = 40;
WW = fft2melmx(nfft, SR, nmel, 1, 0, SR/2, 0, 1);
XMel = WW * XMag;
% Figure voice activity (simple smoothed energy threshold)
VAD = yet_another_vad(XMel, SR/nhop);
% "Modified Wiener filter" per icslp02-aurora.pdf
% "Qualcomm-ICSI-OGI features for ASR", Adami et al, Interspeech 2002.
% Noise spectrum estimate - simple (dB) average over all non-VAD frames
% (log domain to match auroralib.c)
Noise = idB(mean(dB(XMel(:, find(VAD))),2));
% Duplicate for all time to give noise estimate W_hat
W_hat = repmat(Noise, 1, size(XMel,2));
% Online estimate of noise floor
%pole_r = 0.98;
%W_hat = filter_by_row((1 - pole_r), [1 -pole_r], XMel, pole_r*Noise);
X2 = XMel.^2;
W_hat2 = W_hat.^2;
% Wiener filter
%SNRapost = 10*log10(max(1e-2, (sum(X2) - sum(W_hat2))./sum(W_hat2)));
% SNRapost is actually (signal+noise)/(noise estimate) (noiscomp.c:138)
SNRapost = 10*log10(max(1e-2, (sum(X2))./sum(W_hat2)));
% Mapping SNR to overmasking factor (eqn (2) from paper)
gamma_k = max(1.25, min(3.125, -1.875/20*SNRapost + 3.125));
% Estimating masking filter as overmasked SNR (eqn (1) from paper)
beta = 0.01;
Hinst2 = max(beta, (X2 - repmat(gamma_k, nmel, 1) .* W_hat2)./X2);
% Smooth in time and frequency
twinlen = 21;
t_kern = hanning(twinlen)'/sum(hanning(twinlen));
fwinlen = 5;
f_kern = hanning(fwinlen)'/sum(hanning(fwinlen));
% nr.c actually uses a one-pole smoothing along time with
% alpha = 0.1, and a two-frame advance to account for lag.
% I think it uses 16 ms hop.
% (noisecomp.c:164; nr.c:251;
% freqfilt looks to be boxcar smoothing over 21 FFT bins
% (noisecomp.c:238 et seq.)
H2 = conv2(f_kern, t_kern, Hinst2, 'same');
alpha = 0.001;
Shat = sqrt(max(alpha*W_hat2, X2 .* H2));
% Back into FFT domain
% Reconstruct
XMask = WW'*(Shat./XMel);
Y = istft(XS.*XMask, nfft, nfft, nhop);
% deemphasize
Y = filter(1, emphfilt, Y);
% Y is my final answer
% Plotting all at end to preserve flow
do_plot = 0;
if do_plot
% plot
%specgram(X, nfft, SR);
ax = [-30 30];
subplot(411)
imgsc(dB(XMel));
caxis(ax);
title('X')
subplot(412)
imgsc(dB(W_hat));
caxis(ax);
title('W\_hat')
subplot(413)
%plot(SNRapost);
%title('SNRapost');
imgsc(dB(Hinst2)/2)
caxis(ax);
title('Hinst');
subplot(414)
%imgsc(dB(Shat));
%caxis(ax);
%title('Shat');
imgsc(dB(H2)/2)
caxis(ax);
title('H');
linkaxes
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Y = filter_by_row(B, A, X, Z)
% Apply a single filter to every row of X; use initial state
nr = size(X,1);
for i = 1:nr
Y(i,:) = filter(B, A, X(i,:), Z(i,:));
end