-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVAR_gLasso.h
549 lines (464 loc) · 28.6 KB
/
VAR_gLasso.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
#ifndef VAR_GLASSO_H_
#define VAR_GLASSO_H_
//#include <boost/numeric/conversion/cast.hpp>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_sf_expint.h>
#include <gsl/gsl_bspline.h>
#include <nmsimplex.h>
#include <asserts.h>
using namespace std;
class VAR_gLASSO {
public:
VAR_gLASSO () { }; //default constructor
~VAR_gLASSO () { }; //default destructor
//calculate the least-squares estimates as an initial estimate used for the Block Coordinate Descent Algorithm. INPUT: INPUT: a T0 by 1 matrix of data on the response (Y),
//a N*L*m by T0 matrix of all B-spline basis functions evaluated at all past data points on the X's (BS). OUTPUT: a N*L*m by 1 matrix of least-squares estimates (beta).
static void calcul_LS (Matrix &beta, const Matrix &Y, const Matrix &BS);
//calculate the gradient vector of the least-squares criterion function for each additive component (j,ell). INPUT: a T0 by 1 matrix of centered data on the response (Y),
//a N*L*m by T0 matrix of all centered B-spline basis functions evaluated at all past data points on the X's (BS), a N*L*m by 1 matrix of B-spline coefficients (beta),
//the number of covariates (N), and a specific additive component j = 1,...,N; ell = 1,...L. OUTPUT; a m by 1 matrix of gradients.
static void calcul_Gradients (Matrix &rho_jl, const Matrix &Y, const Matrix &BS, const Matrix &beta, const int N, const int j, const int ell);
//calculate the sum of squared residuals (SSR) for the additive autoregression model. INPUT: a T0 by 1 matrix of centered data on the response (Y), a N*L*m by T0 matrix of all
//centered B-spline basis functions, evaluated at all past data points on the X's (BS), a N*L*m by 1 matrix of B-spline coefficients (beta), the number of covariates (N), and
//the maximum number of lags (L). OUTPUT: the SSR.
static double calcul_SSR (const Matrix &Y, const Matrix &BS, const Matrix &beta, const int N, const int L_T);
//calculate the group LASSO objective function at a specific block, say the (j,ell) block while keeping all the other blocks fixed.
//INPUT: a m by 1 matrix of slope coefficients for the (j,ell)-th block (beta_jl), a T0 by 1 matrix of centered data on the response (Y), a N*L*m by T0 matrix of all centered B-spline
//basis functions evaluated at all past data points on the X's (BS), a N*L*m by 1 matrix of B-spline coefficients (beta), the number of covariates (N), a tuning coefficient (lambda_T),
//and a specific block j = 1,...,N; ell = 1,...L. OUTPUT: a function of specific block coordinates, say beta_jl.
static double obj_Func (const Matrix &beta_jl, const Matrix &Y, const Matrix &BS, const Matrix &beta, const int N, const double lambda_T, const int j, const int ell);
//rewrite VAR_gLASSO::obj_Func (defined above) in the GSL functional format, which can then be used as an argument to the GSL minimization routine
//employing the Simplex algorithm of Nelder and Mead.
static double obj_Func_gsl (const gsl_vector *beta_jl0, void *parms);
//calculate the group LASSO estimates for the B-spline coefficients by using the least-squares estimates as initial estimates. This routine employs the Block Coordinate Descent Algorithm
//(see p. 69 in Buhlmann & van de Geer (2011).
//INPUT: an N*L*m by 1 matrix of initial parameter vetor (beta_init), a T0 by 1 matrix of centered data on the response (Y), a N*L*m by T0 matrix of all centered B-spline basis
//functions evaluated at all past data points on the X's (BS), the number of covariates (N), the number of time lags (L_T), a tuning parameter (lambda_T), a maximum number of
//iterations (max_iter), a minimum tolerance level for numerical convergence (min_tol), and an output stream (glasso_out).
//OUTPUT: a N*L*m by 1 matrix of the group LASSO estimates for the B-spline coefficients (beta_init).
static void calcul_gLasso (Matrix &beta_init, const Matrix &Y, const Matrix &BS, const int N, const int L_T, const double lambda_T, const int max_iter,const double min_tol,
ofstream &glasso_out);
//select the optimal tuning parameters for the group LASSO by employing the BIC and the EBIC. INPUT: an N*L*m by 1 matrix of initial parameters (beta_init0), a T0 by 1 matrix of
//centered data on the response (Y), a N*L*m by T0 matrix of all centered B-spline basis functions evaluated at all past data points on the X's (BS), a divergence rate for the tuning
//parameter (lambda_glasso_rate), lower and upper bounds (lb and ub) of the scaling factor C0 in the tuning parameter (lambda = C0*lambda_glasso_rate), a number of grid points
//(ngrid) in the interval [lb, ub], a number of the number of covariates (N), the number of time lags (L_T), a maximum number of iterations (max_iter), a minimum tolerance level
//for numerical convergence (min_tol), output streams (ic_out and glasso_out).
//OUTPUT: optimal tuning parameters using the BIC and EBIC; optimal values of the BIC and the EBIC (opt_BIC and opt_EBIC).
static Matrix calcul_IC (double & opt_BIC, double & opt_EBIC, const Matrix &beta_init0, const Matrix &Y, const Matrix &BS, const double lambda_glasso_rate,
const double lb, const double ub, const int ngrid, const int N, const int L_T, const int max_iter, const double min_tol, ofstream &ic_out,
ofstream &glasso_out);
//calculate the adaptive group LASSO estimates from datasets for nonparametric additive ARDL models with each component being approximated by B-splines. INPUT: a T by 1 matrix of data
//on the response Y (Y0), a T by N matrix of data on covariates (X0), a maximum number of autoregressive lags (L_T), an order of B-spline polynomials (poly_degree, set it to 4 to yield cubic B-splines),
//a number of break points in [0,1] (nbreak), tuning parameters for group LASSO and adaptive group LASSO (lambda1 and lambda2), a maximum number of iterations (max_iter) for the Block
//Coordinate Descent Algorithm, a minimum level of tolerance to evaluate numerical convergence of this algorithm (min_tol), a output file stream (glasso_out), and an initial estimates (beta_init).
//OUTPUT: a N*L*m by 1 matrix of the adaptive group LASSO estimates for the B-spline coefficients (beta_init).
static void calcul_adap_gLasso (Matrix &beta_init, const Matrix &Y0, const Matrix &X0, const int L_T, const int poly_degree, const int nbreak, const double lambda1,
const double lambda2, const int max_iter, const double min_tol, ofstream &glasso_out);
};
//calculate the least-squares estimates as an initial estimate used for the Block Coordinate Descent Algorithm. INPUT: INPUT: a T0 by 1 matrix of data on the response (Y),
//a N*L*m by T0 matrix of all B-spline basis functions evaluated at all past data points on the X's (BS). OUTPUT: a N*L*m by 1 matrix of least-squares estimates (beta).
void VAR_gLASSO::calcul_LS (Matrix &beta, const Matrix &Y, const Matrix &BS) {
auto T0 = 0;
T0 = Y.nRow();
auto M = BS.nRow();
ASSERT ((T0 == BS.nCol()) && (M == beta.nRow()));
Matrix denom(M,M), denom1(M,M), num(M,1);
denom = BS * Tr(BS);
num = BS * Y;
inverse (denom1, denom);
beta = denom1 * num;
}
//calculate the gradient vector of the least-squares criterion function for each additive component (j,ell). INPUT: a T0 by 1 matrix of centered data on the response (Y),
//a N*L*m by T0 matrix of all centered B-spline basis functions evaluated at all past data points on the X's (BS), a N*L*m by 1 matrix of B-spline coefficients (beta),
//the number of covariates (N), and a specific additive component j = 1,...,N; ell = 1,...L. OUTPUT; a m by 1 matrix of gradients.
void VAR_gLASSO::calcul_Gradients (Matrix &rho_jl, const Matrix &Y, const Matrix &BS, const Matrix &beta, const int N, const int j, const int ell) {
auto T0 = 0, L_T = 0, m_T = 0;
T0 = BS.nCol();
ASSERT (T0 == Y.nRow() && BS.nRow() == beta.nRow());
m_T = rho_jl.nRow();
L_T = BS.nRow() / (N*m_T);
Matrix sum0(T0,1);
sum0.set(0.);
for (auto t = 1; t <= T0; ++t) {
for (auto j0 = 1; j0 <= N; ++j0) {
for (auto ell0 = 1; ell0 <= L_T; ++ell0) {
for (auto k = 1; k <= m_T; ++k) {
sum0(t) += beta((j0-1)*m_T*L_T+(ell0-1)*m_T+k) * BS((j0-1)*m_T*L_T+(ell0-1)*m_T+k, t);
}
}
}
}
rho_jl.set(0.);
for (auto k = 1; k <= m_T; ++k) {
for (auto t = 1; t <= T0; ++t) {
rho_jl(k) += 2*BS((j-1)*m_T*L_T+(ell-1)*m_T+k, t) * (Y(t) - sum0(t));
}
}
}
//calculate the sum of squared residuals (SSR) for the additive autoregression model. INPUT: a T0 by 1 matrix of centered data on the response (Y), a N*L*m by T0 matrix of all centered
//B-spline basis functions, evaluated at all past data points on the X's (BS), a N*L*m by 1 matrix of B-spline coefficients (beta), the number of covariates (N), and the maximum number
//of lags (L). OUTPUT: the SSR.
double VAR_gLASSO::calcul_SSR (const Matrix &Y, const Matrix &BS, const Matrix &beta, const int N, const int L_T) {
auto T0 = 0, m_T = 0;
T0 = Y.nRow();
m_T = BS.nRow() / (N*L_T);
ASSERT(T0 == BS.nCol());
double SSR = 0., sum0 = 0.;
for (auto t = 1; t <= T0; ++t) {
sum0 = 0.;
for (auto j0 = 1; j0 <= N; ++j0) {
for (auto ell0 = 1; ell0 <= L_T; ++ell0) {
for (auto k = 1; k <= m_T; ++k) {
sum0 += beta((j0-1)*m_T*L_T+(ell0-1)*m_T+k) * BS((j0-1)*m_T*L_T+(ell0-1)*m_T+k, t);
}
}
}
SSR += pow(Y(t) - sum0, 2.);
}
return SSR;
}
//calculate the group LASSO objective function at a specific block, say the (j,ell) block while keeping all the other blocks fixed.
//INPUT: a m by 1 matrix of slope coefficients for the (j,ell)-th block (beta_jl), a T0 by 1 matrix of centered data on the response (Y), a N*L*m by T0 matrix of all centered B-spline
//basis functions evaluated at all past data points on the X's (BS), a N*L*m by 1 matrix of B-spline coefficients (beta), the number of covariates (N), a tuning coefficient (lambda_T),
//and a specific block j = 1,...,N; ell = 1,...L. OUTPUT: a function of specific block coordinates, say beta_jl.
double VAR_gLASSO::obj_Func (const Matrix &beta_jl, const Matrix &Y, const Matrix &BS, const Matrix &beta_, const int N, const double lambda_T, const int j, const int ell) {
auto T0 = 0, m_T = 0, L_T = 0;
T0 = Y.nRow();
m_T = beta_jl.nRow();
L_T = BS.nRow() / (N*m_T);
ASSERT(T0 == BS.nCol());
Matrix beta = beta_; //copy matrices beta_ to beta
for (auto k = 1; k <= m_T; ++k) {
beta((j-1)*m_T*L_T+(ell-1)*m_T+k) = beta_jl(k); //copy beta_jl to beta at the (j,ell)-th position
}
auto res = 0., enorm_sq = 0.;
res = VAR_gLASSO::calcul_SSR (Y, BS, beta, N, L_T); //calculate the SSR
for (auto j0 = 1; j0 <= N; ++j0) {
for (auto ell0 = 1; ell0 <= L_T; ++ell0) {
enorm_sq = 0.; //re-set the initial value to zero
for (auto k = 1; k <= m_T; ++k) {
enorm_sq += pow(beta((j0-1)*m_T*L_T+(ell0-1)*m_T+k), 2.);
}
res += lambda_T * std::sqrt(enorm_sq);
}
}
return res;
}
//rewrite VAR_gLASSO::obj_Func (defined above) in the GSL functional format, which can then be used as an argument to the GSL minimization routine
//employing the Simplex algorithm of Nelder and Mead.
double VAR_gLASSO::obj_Func_gsl (const gsl_vector *beta_jl0, void *parms) {
double *p = (double *) parms;
int m_T = beta_jl0->size, N = p[0], L_T = p[1], T0 = p[2], j = p[4], ell = p[5], BS_nr = N*L_T*m_T;
auto lambda_T = p[3]; //tuning parameter
Matrix Y(T0,1), BS(BS_nr,T0), beta(BS_nr,1), beta_jl(m_T,1);
for (auto t = 1; t <= T0; ++t) {
Y(t) = p[5+t]; //reconstruct the data matrix on the response
}
for (auto j0 = 1; j0 <= N; ++j0) {
for (auto ell0 = 1; ell0 <= L_T; ++ell0) {
for (auto k = 1; k <= m_T; ++k ) {
for (auto t = 1; t <= T0; ++t) {
BS((j0-1)*m_T*L_T+(ell0-1)*m_T+k, t) = p[5+T0+T0*((j0-1)*m_T*L_T+(ell0-1)*m_T+k-1)+t]; //reconstruct the N*L*m by T0 matrix of all B-spline basis functions evaluated at all past data points on the X's (BS)
}
beta((j0-1)*m_T*L_T+(ell0-1)*m_T+k) = p[5+T0+T0*N*L_T*m_T+(j0-1)*m_T*L_T+(ell0-1)*m_T+k]; //reconstruct the N*L*m by 1 matrix of B-spline coefficients
}
}
}
for (auto k = 1; k <= m_T; ++k) {
beta_jl(k) = gsl_vector_get(beta_jl0, k-1); //copy beta_jl0 to beta_jl
}
return VAR_gLASSO::obj_Func (beta_jl, Y, BS, beta, N, lambda_T, j, ell);
}
//calculate the group LASSO estimates for the B-spline coefficients by using the least-squares estimates as initial estimates. This routine employs the Block Coordinate Descent Algorithm
//(see p. 69 in Buhlmann & van de Geer (2011).
//INPUT: an N*L*m by 1 matrix of initial parameters (beta_init), a T0 by 1 matrix of centered data on the response (Y), a N*L*m by T0 matrix of all centered B-spline basis functions
//evaluated at all past data points on the X's (BS), the number of covariates (N), the number of time lags (L_T), a tuning parameter (lambda_T), a maximum number of iterations (max_iter),
//a minimum tolerance level for numerical convergence (min_tol), and an output stream (glasso_out). OUTPUT: a N*L*m by 1 matrix of the group LASSO estimates for the B-spline
//coefficients (beta_init).
void VAR_gLASSO::calcul_gLasso (Matrix &beta_init, const Matrix &Y, const Matrix &BS, const int N, const int L_T, const double lambda_T, const int max_iter,
const double min_tol, ofstream &glasso_out) {
ASSERT (Y.nRow() == BS.nCol() && beta_init.nRow() == BS.nRow());
auto iter = 1, nr_beta = 0, m_T = 0, T0 = 0;
nr_beta = beta_init.nRow();
m_T = nr_beta / (N*L_T);
T0 = Y.nRow();
Matrix beta0(nr_beta,1), beta1(nr_beta,1), rho_jl(m_T,1), beta_jl1(m_T,1);
gsl_vector *beta_jl0 = gsl_vector_alloc(m_T); //define a vector of starting points used in the the Simplex minimization algorithm of Nelder and Mead
//VAR_gLASSO::calcul_LS (beta_init, Y, BS); //use the LS estimates as an initial estimate
glasso_out << "The initial estimates for the B-spline coefficients are: " << endl;
for (auto j = 1; j <= N; ++j) {
for (auto ell = 1; ell <= L_T; ++ell) {
for (auto k = 1; k <= m_T; ++k) {
glasso_out << "j = " << j << ", ell = " << ell << ", k = " << k << ": " << beta_init((j-1)*m_T*L_T+(ell-1)*m_T+k) << endl;
}
}
}
std::vector<int> set_j;
std::vector<int> set_ell;
for (auto j = 1; j <= N; ++j) {
for (auto ell = 1; ell <= L_T; ++ell) {
for (auto k = 1; k <= m_T; ++k) {
beta_jl1(k) = beta_init((j-1)*m_T*L_T+(ell-1)*m_T+k);
}
if (ENorm(beta_jl1) != 0.) { //include only non-zero groups
set_j.push_back(j); //copy group indices to vectors of type integer
set_ell.push_back(ell);
}
}
}
std::sort(set_j.begin(), set_j.end()); // sort set_j
std::sort(set_ell.begin(), set_ell.end()); // sort set_ell
auto last_j = std::unique(set_j.begin(), set_j.end()); //override the duplicate elements of set_j
auto last_ell = std::unique(set_ell.begin(), set_ell.end()); //override the duplicate elements of set_ell
set_j.erase(last_j, set_j.end()); //remove extra elements at the end from the range of set_j
set_ell.erase(last_ell, set_ell.end()); //remove extra elements at the end from the range of set_ell
//cout << "size of set_j = " << set_j.size() << " and size of set_ell = " << set_ell.size() << endl;
//main loop
do {
//cycling through all the non-zero block coordinates {1, ... , m_T}
for (auto j : set_j) {
for (auto ell : set_ell) {
//cout << "(j, ell) = " << "(" << j << ", " << ell << ")" << endl;
beta0 = beta_init; //re-set beta0 for each (j,ell)-th group
for (auto k = 1; k <= m_T; ++k) {
//copy the (j,ell)-th block to beta_jl0, which is then used as the starting values to call the Simplex minimization algorithm right below
gsl_vector_set (beta_jl0, k-1, beta0((j-1)*m_T*L_T+(ell-1)*m_T+k));
beta0((j-1)*m_T*L_T+(ell-1)*m_T+k) = 0.; //assign zeros to the (j,ell)-th block in the vector beta0
}
VAR_gLASSO::calcul_Gradients (rho_jl, Y, BS, beta0, N, j, ell); //calculate the gradients of the SSE for the (j,ell)-th block at beta0
//cout << "ENorm(rho_jl) = " << ENorm(rho_jl) << endl;
if (ENorm(rho_jl) <= lambda_T) {
for (auto k = 1; k <= m_T; ++k) {
beta1((j-1)*m_T*L_T+(ell-1)*m_T+k) = 0.;
}
}
else {
double *par = new double[6+T0+(T0+1)*N*L_T*m_T]; //define an array
//assign the parameters for the GSL objective function to be minimized
par[0] = N;
par[1] = L_T;
par[2] = T0;
par[3] = lambda_T;
par[4] = j;
par[5] = ell;
for (auto t = 1; t <= T0; ++t) {
par[5+t] = Y(t);
}
for (auto j0 = 1; j0 <= N; ++j0) {
for (auto ell0 = 1; ell0 <= L_T; ++ell0) {
for (auto k = 1; k <= m_T; ++k ) {
for (auto t = 1; t <= T0; ++t) {
par[5+T0+T0*((j0-1)*m_T*L_T+(ell0-1)*m_T+k-1)+t] = BS((j0-1)*m_T*L_T+(ell0-1)*m_T+k, t);
}
par[5+T0+T0*N*L_T*m_T+(j0-1)*m_T*L_T+(ell0-1)*m_T+k] = beta0((j0-1)*m_T*L_T+(ell0-1)*m_T+k);
}
}
}
//cerr << "Starting the NM Simplex algorithm. . ." << endl;
minimization<VAR_gLASSO::obj_Func_gsl> (beta_jl1, beta_jl0, par); //call the Simplex minimization algorithm of Nelder and Mead initiazed at beta_jl0
for (auto k = 1; k <= m_T; ++k) {
beta1((j-1)*m_T*L_T+(ell-1)*m_T+k) = beta_jl1(k); //copy the minimum points from beta_jl1 to beta1
}
delete [] par; //delete par
}
}
}
//cout << "stopcdn(beta1, beta_init) = " << stopcdn(beta1, beta_init) << endl;
if (stopcdn(beta1, beta_init) <= min_tol) {
beta_init = beta1;
break;
}
else {
beta_init = beta1;
iter += 1;
}
//cout << "VAR_gLASSO::calcul_gLasso: Implementing the BCDA -- loop # " << iter << ". " << endl;
} while (iter <= max_iter);
if (iter <= max_iter) {
glasso_out << "VAR_gLASSO::calcul_gLasso: The block coordinate descent algorithm converges at the " << iter << "-th iteration. . ."<< endl;
cout << "VAR_gLASSO::calcul_gLasso: The block coordinate descent algorithm converges at the " << iter << "-th iteration. . ."<< endl;
}
else {
cerr << "VAR_gLASSO::calcul_gLasso: The block coordinate descent algorithm does not converge when the maximum number of iterations is set to " << max_iter << ". . ." << endl;
glasso_out << "VAR_gLASSO::calcul_gLasso: The block coordinate descent algorithm does not converge when the maximum number of iterations is set to " << max_iter << ". . ." << endl;
}
glasso_out << "The group LASSO estimates for the B-spline coefficients are: " << endl;
for (auto j = 1; j <= N; ++j) {
for (auto ell = 1; ell <= L_T; ++ell) {
for (auto k = 1; k <= m_T; ++k) {
glasso_out << "j = " << j << ", ell = " << ell << ", k = " << k << ": " << beta_init((j-1)*m_T*L_T+(ell-1)*m_T+k) << endl;
}
}
}
gsl_vector_free (beta_jl0); //free memory
}
//calculate the adaptive group LASSO estimates from datasets for nonparametric additive ARDL models with each component being approximated by B-splines. INPUT: a T by 1 matrix of data
//on the response Y (Y0), a T by N matrix of data on covariates (X0), a maximum number of autoregressive lags (L_T), an order of B-spline polynomials (poly_degree, set it to 4 to yield cubic B-splines),
//a number of break points in [0,1] (nbreak), tuning parameters for group LASSO and adaptive group LASSO (lambda1 and lambda2), a maximum number of iterations (max_iter) for the Block
//Coordinate Descent Algorithm, a minimum level of tolerance to evaluate numerical convergence of this algorithm (min_tol), a output file stream (glasso_out), and an initial estimates (beta_init).
//OUTPUT: a N*L*m by 1 matrix of the adaptive group LASSO estimates for the B-spline coefficients (beta_init).
void VAR_gLASSO::calcul_adap_gLasso (Matrix &beta_init, const Matrix &Y0, const Matrix &X0, const int L_T, const int poly_degree, const int nbreak, const double lambda1,
const double lambda2, const int max_iter, const double min_tol, ofstream &glasso_out) {
auto T = Y0.nRow(), N = X0.nCol(), beta_nR = beta_init.nRow();
ASSERT (T == X0.nRow());
int ncoeffs = nbreak + poly_degree - 2;
int T0 = T- L_T;
Matrix Y(T0,1), X(T,N);
do_Norm(X, X0); //normalize data to the 0-1 range
for (auto t = L_T+1; t <= T; ++t ) {
Y(t-L_T) = Y0(t);
}
double mean_Y = mean_u(Y);
for (auto t = 1; t <= T0; ++t) {
Y(t) = Y(t) - mean_Y; //re-center data on the response
}
gsl_bspline_workspace *bw; //define workspace for B-splines
bw = gsl_bspline_alloc(poly_degree, nbreak);
//use uniform breakpoints on [0, 1]
gsl_bspline_knots_uniform(0.0, 1.0, bw);
//calculate the knots
//for (auto k = 1; k <= ncoeffs+poly_degree; ++k )
//cout << "knot = " << k << ": " << gsl_vector_get(bw->knots, k-1) << endl;
Matrix BS(beta_nR,T0), ave_BS(beta_nR,1), beta_init_jl(ncoeffs,1);
gsl_vector *B = gsl_vector_alloc (ncoeffs); //define a GSL array pointer
for (auto j= 1; j <= N; ++j) {
for (auto ell = 1; ell <= L_T; ++ell) {
for (auto t = L_T+1; t <= T; ++t) {
gsl_bspline_eval(X(t-ell,j), B, bw); //evaluate all B-spline basis functions at X(t-ell,j)
for (auto k = 1; k <= ncoeffs; ++k) {
BS((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k, t-L_T) = poly_degree * gsl_vector_get(B, k-1) / (nbreak-1); //normalize all the B-splines basis functions
//cout << "j = " << j << ", ell = " << ell << ", k = " << k << ", t = " << t << ": " << BS((j-1)*ncoeffs*L+(ell-1)*ncoeffs+k, t-L) << endl;
}
}
}
}
gsl_bspline_free (bw); //free the B-spline worksapce
gsl_vector_free (B); //free memory
ave_BS.set(0.);
for (auto j= 1; j <= N; ++j) {
for (auto ell = 1; ell <= L_T; ++ell) {
for (auto k = 1; k <= ncoeffs; ++k) {
for (auto t = 1; t <= T0; ++t) {
ave_BS((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k) += BS((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k, t) / T0; //calculate the temporal averages for all B-spline basis functions
}
}
}
}
for (auto j= 1; j <= N; ++j) {
for (auto ell = 1; ell <= L_T; ++ell) {
for (auto t = 1; t <= T0; ++t) {
for (auto k = 1; k <= ncoeffs; ++k) {
//calculate the centered B-spline basis functions
BS((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k, t) = BS((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k, t) - ave_BS((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k);
//cout << "j = " << j << ", ell = " << ell << ", k = " << k << ", t = " << t << ": " << BS((j-1)*ncoeffs*L+(ell-1)*ncoeffs+k, t-L) << endl;
}
}
}
}
glasso_out << "Do the group LASSO: " << endl;
cout << "Doing the group LASSO. . ." << endl;
VAR_gLASSO::calcul_gLasso (beta_init, Y, BS, N, L_T, lambda1, max_iter, min_tol, glasso_out); //do the group LASSO
cout << "The group LASSO estimates for the B-spline coefficients are: " << endl;
glasso_out << "The group LASSO estimates for the B-spline coefficients are: " << endl;
for (auto j = 1; j <= N; ++j) {
for (auto ell = 1; ell <= L_T; ++ell) {
for (auto k = 1; k <= ncoeffs; ++k) {
cout << "j = " << j << ", ell = " << ell << ", k = " << k << ": " << beta_init((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k) << endl;
glasso_out << "j = " << j << ", ell = " << ell << ", k = " << k << ": " << beta_init((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k) << endl;
}
}
}
Matrix norm_beta(N,L_T);
for (auto j= 1; j <= N; ++j) {
for (auto ell = 1; ell <= L_T; ++ell) {
for (auto k = 1; k <= ncoeffs; ++k)
beta_init_jl(k) = beta_init((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k);
norm_beta(j,ell) = ENorm(beta_init_jl);
for (auto t = 1; t <= T0; ++t) {
for (auto k = 1; k <= ncoeffs; ++k) {
//multiply B-spline basis functions with the Euclidean norms of the group LASSO estimates in each (j,ell) block
BS((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k, t) = BS((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k, t) * norm_beta(j,ell);
//cout << "j = " << j << ", ell = " << ell << ", k = " << k << ", t = " << t << ": " << BS((j-1)*ncoeffs*L+(ell-1)*ncoeffs+k, t-L) << endl;
}
}
}
}
glasso_out << "Do the adaptive group LASSO: " << endl;
cout << "Doing the adaptive group LASSO. . ." << endl;
VAR_gLASSO::calcul_gLasso (beta_init, Y, BS, N, L_T, lambda2, max_iter, min_tol, glasso_out); //do the adaptive group LASSO
cout << "The adaptive group LASSO estimates for the B-spline coefficients are: " << endl;
glasso_out << "The adaptive group LASSO estimates for the B-spline coefficients are: " << endl;
for (auto j = 1; j <= N; ++j) {
for (auto ell = 1; ell <= L_T; ++ell) {
for (auto k = 1; k <= ncoeffs; ++k) {
//multiply the obtained B-spline coefficients with the Euclidean norms of the group LASSO estimates in each (j,ell) block to yield
//the estimated slope coefficients of the original B-spline basis functions
beta_init((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k) = beta_init((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k) * norm_beta(j,ell);
cout << "j = " << j << ", ell = " << ell << ", k = " << k << ": " << beta_init((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k) << endl;
glasso_out << "j = " << j << ", ell = " << ell << ", k = " << k << ": " << beta_init((j-1)*ncoeffs*L_T+(ell-1)*ncoeffs+k) << endl;
}
}
}
}
//select the optimal tuning parameters for the group LASSO by employing the BIC and the EBIC as in Huang, Horowitz & Fei (2009). INPUT: an N*L*m by 1 matrix of initial parameters (beta_init0),
//a T0 by 1 matrix of centered data on the response (Y), a N*L*m by T0 matrix of all centered B-spline basis functions evaluated at all past data points on the X's (BS), a divergence rate
//for the tuning parameter (lambda_glasso_rate), lower and upper bounds (lb and ub) of the scaling factor C0 in the tuning parameter (lambda = C0*lambda_glasso_rate), a number of grid
//points (ngrid) in the interval [lb, ub], a number of the number of covariates (N), the number of time lags (L_T), a maximum number of iterations (max_iter), a minimum tolerance level
//for numerical convergence (min_tol), and output streams (ic_out and glasso_out).
//OUTPUT: optimal tuning parameters using the BIC and EBIC; optimal values of the BIC and the EBIC (opt_BIC and opt_EBIC).
Matrix VAR_gLASSO::calcul_IC (double & opt_BIC, double & opt_EBIC, const Matrix &beta_init0, const Matrix &Y, const Matrix &BS, const double lambda_glasso_rate,
const double lb, const double ub, const int ngrid, const int N, const int L_T, const int max_iter, const double min_tol, ofstream &ic_out,
ofstream &glasso_out) {
auto i = 0, T0 = Y.nRow(), beta_nR = beta_init0.nRow(), df = 0;
ASSERT ((lb < ub) && (Y.nRow() == BS.nCol()) && (beta_init0.nRow() == BS.nRow()));
Matrix beta_init(beta_nR,1), BIC(ngrid,1), EBIC(ngrid,1), lambda(ngrid,1), min0(2,1), res(2,1);
double ssr = 0.;
ic_out << "Calculating BIC and EBIC. . ." << endl;
cout << "Calculating BIC and EBIC. . ." << endl;
ic_out << "lambda" << " , " << "BIC" << " , " << "EBIC" << endl;
cout << "lambda" << " , " << "BIC" << " , " << "EBIC" << endl;
#pragma omp parallel for default(shared) schedule(dynamic,CHUNK) private(i) firstprivate(beta_init,df,ssr)
for (i = 0; i < ngrid; ++i) {
lambda(i+1) = (lb + i*(ub-lb)/(ngrid-1)) * lambda_glasso_rate;
//cout << "lambda(i+1) = " << lambda(i+1) << endl;
beta_init = beta_init0; //copy beta_init0 to beta_init to initialize the group LASSO
VAR_gLASSO::calcul_gLasso (beta_init, Y, BS, N, L_T, lambda(i+1), max_iter, min_tol, glasso_out); //call the group LASSO routine with this lambda
df = nonzero(beta_init); //the number of degrees of freedom for this lambda
if (df == 0) {
ic_out << "All the B-spline coefficients are shrunk to zeros when the penalty parameter (lambda_T) = " << lambda(i+1) << endl;
cout << "All the B-spline coefficients are shrunk to zeros when the penalty parameter (lambda_T) = " << lambda(i+1) << endl;
BIC(i+1) = 1000;
EBIC(i+1) = 1000;
}
else {
ssr = VAR_gLASSO::calcul_SSR (Y, BS, beta_init, N, L_T); //calculate SSR for this lambda
BIC(i+1) = std::log(ssr) + df*std::log(T0)/T0;
EBIC(i+1) = std::log(ssr) + df*std::log(T0)/T0 + 0.5*df*std::log(N*L_T)/T0;
ic_out << lambda(i+1) << " , " << BIC(i+1) << " , " << EBIC(i+1) << endl;
cout << lambda(i+1) << " , " << BIC(i+1) << " , " << EBIC(i+1) << endl;
}
}
min0 = minn(BIC); //find the minimum BIC
opt_BIC = min0(2);
int opt_BIC_ind = ((int) min0(1));
res(1) = lambda(opt_BIC_ind);
ic_out << "Optimal value for the tuning parameter using BIC is " << res(1) << " with the optimal BIC = " << opt_BIC << endl;
cout << "Optimal value for the tuning parameter using BIC is " << res(1) << " with the optimal BIC = " << opt_BIC << endl;
min0 = minn(EBIC); //find the minimum EBIC
opt_EBIC = min0(2);
int opt_EBIC_ind = ((int) min0(1));
res(2) = lambda(opt_EBIC_ind);
ic_out << "Optimal value for the tuning parameter using EBIC is " << res(2) << " with the optimal BIC = " << opt_EBIC << endl;
cout << "Optimal value for the tuning parameter using EBIC is " << res(2) << " with the optimal EBIC = " << opt_EBIC << endl;
return res;
}
#endif