Multiple updates:

- replace dgamma with kappa_ratio - add max_grad to avoid coefficients diverging - revert convergence check back onto coefficients
wenjie2wang · Dec 12, 2023 · 415f242 · 415f242
1 parent 67556f2
commit 415f242
Show file tree

Hide file tree

Showing 12 changed files with 167 additions and 148 deletions.
diff --git a/R/abclass.R b/R/abclass.R
@@ -112,8 +112,9 @@ abclass <- function(x, y,
 ##'     penalty.
 ##' @param offset An optional numeric matrix for offsets of the decision
 ##'     functions.
-##' @param dgamma A positive number specifying the increment to the minimal
-##'     gamma parameter for group SCAD or group MCP.
+##' @param kappa_ratio A positive number within (0, 1) specifying the ratio of
+##'     reciprocal gamma parameter for group SCAD or group MCP.  A close-to-zero
+##'     \code{kappa_ratio} would give a solution close to lasso solution.
 ##' @param lum_a A positive number greater than one representing the parameter
 ##'     \emph{a} in LUM, which will be used only if \code{loss = "lum"}.  The
 ##'     default value is \code{1.0}.
@@ -126,6 +127,8 @@ abclass <- function(x, y,
 ##'     The default value is \code{10^5}.
 ##' @param epsilon A positive number specifying the relative tolerance that
 ##'     determines convergence.  The default value is \code{1e-4}.
+##' @param max_grad The maximum value of gradients of loss function to avoid
+##'     coefficients diverging.
 ##' @param standardize A logical value indicating if each column of the design
 ##'     matrix should be standardized internally to have mean zero and standard
 ##'     deviation equal to the sample size.  The default value is \code{TRUE}.
@@ -148,12 +151,13 @@ abclass.control <- function(lambda = NULL,
                             group_weight = NULL,
                             group_penalty = c("lasso", "scad", "mcp"),
                             offset = NULL,
-                            dgamma = 1.0,
+                            kappa_ratio = 0.9,
                             lum_a = 1.0,
                             lum_c = 1.0,
                             boost_umin = - 5.0,
                             maxit = 1e5L,
                             epsilon = 1e-4,
+                            max_grad = -1e-5,
                             standardize = TRUE,
                             varying_active_set = TRUE,
                             verbose = 0L,
@@ -180,11 +184,12 @@ abclass.control <- function(lambda = NULL,
         standardize = standardize,
         maxit = as.integer(maxit),
         epsilon = epsilon,
+        max_grad = max_grad,
         varying_active_set = varying_active_set,
         verbose = as.integer(verbose),
         boost_umin = boost_umin,
         lum_a = lum_a,
         lum_c = lum_c,
-        dgamma = dgamma
+        kappa_ratio = kappa_ratio
     ), class = "abclass.control")
 }
diff --git a/R/abclass_engine.R b/R/abclass_engine.R
@@ -95,7 +95,7 @@
             if (control$group_penalty == "lasso") {
                 res$regularization[common_pars]
             } else {
-                res$regularization[c(common_pars, "dgamma", "gamma")]
+                res$regularization[c(common_pars, "kappa_ratio", "gamma")]
             }
         } else {
             res$regularization[return_lambda]

diff --git a/R/moml_engine.R b/R/moml_engine.R
@@ -102,7 +102,7 @@
             if (control$group_penalty == "lasso") {
                 res$regularization[common_pars]
             } else {
-                res$regularization[c(common_pars, "dgamma", "gamma")]
+                res$regularization[c(common_pars, "kappa_ratio", "gamma")]
             }
         } else {
             res$regularization[return_lambda]

diff --git a/inst/include/abclass/AbclassGroup.h b/inst/include/abclass/AbclassGroup.h
@@ -41,6 +41,9 @@ namespace abclass
                                         const unsigned int j) const
         {
             arma::vec inner_grad { loss_derivative(inner) };
+            // avoid coefficients diverging
+            inner_grad.elem(arma::find(inner_grad >
+                                       control_.max_grad_)).zeros();
             arma::vec tmp_vec {
                 x_.col(j) % control_.obs_weight_ % inner_grad
             };

diff --git a/inst/include/abclass/AbclassGroupLasso.h b/inst/include/abclass/AbclassGroupLasso.h
@@ -176,12 +176,6 @@ namespace abclass
             }
             const size_t j1 { j + inter_ };
             const double mj { mm_lowerbound_(j) };
-            // early exit for zero mj from constant columns
-            if (isAlmostEqual(mj, 0.0)) {
-                beta.row(j1).zeros();
-                is_active(j) = 0;
-                continue;
-            }
             const arma::rowvec old_beta_j { beta.row(j1) };
             const arma::rowvec uj {
                 - mm_gradient(inner, j) + mj * old_beta_j
@@ -240,8 +234,8 @@ namespace abclass
         )
     {
         size_t i {0}, num_iter {0};
-        // arma::mat beta0 { beta };
-        double loss0 { objective0(inner) }, loss1 { loss0 };
+        arma::mat beta0 { beta };
+        // double loss0 { objective0(inner) }, loss1 { loss0 };
         // use active-set if p > n ("helps when p >> n")
         if (varying_active_set) {
             arma::uvec is_active_strong { is_active },
@@ -259,15 +253,15 @@ namespace abclass
                     num_iter = ii + 1;
                     run_one_active_cycle(beta, inner, is_active_varying,
                                          lambda, ridge, true, verbose);
-                    // if (rel_diff(beta0, beta) < epsilon) {
-                    //     break;
-                    // }
-                    // beta0 = beta;
-                    loss1 = objective0(inner);
-                    if (std::abs(loss1 - loss0) < epsilon) {
+                    if (rel_diff(beta0, beta) < epsilon) {
                         break;
                     }
-                    loss0 = loss1;
+                    beta0 = beta;
+                    // loss1 = objective0(inner);
+                    // if (std::abs(loss1 - loss0) < epsilon) {
+                    //     break;
+                    // }
+                    // loss0 = loss1;
                     ++ii;
                 }
                 // run a full cycle over the converged beta
@@ -308,15 +302,15 @@ namespace abclass
                 ++num_iter;
                 run_one_active_cycle(beta, inner, is_active,
                                      lambda, ridge, false, verbose);
-                // if (rel_diff(beta0, beta) < epsilon) {
-                //     break;
-                // }
-                // beta0 = beta;
-                loss1 = objective0(inner);
-                if (std::abs(loss1 - loss0) < epsilon) {
+                if (rel_diff(beta0, beta) < epsilon) {
                     break;
                 }
-                loss0 = loss1;
+                beta0 = beta;
+                // loss1 = objective0(inner);
+                // if (std::abs(loss1 - loss0) < epsilon) {
+                //     break;
+                // }
+                // loss0 = loss1;
                 ++i;
             }
             if (verbose > 0) {
@@ -341,7 +335,6 @@ namespace abclass
         // set group weight from the control_
         set_group_weight();
         arma::uvec penalty_group { arma::find(control_.group_weight_ > 0.0) };
-        arma::uvec penalty_free { arma::find(control_.group_weight_ == 0.0) };
         // initialize
         arma::vec one_inner;
         if (control_.has_offset_) {
@@ -387,12 +380,9 @@ namespace abclass
 
         double one_strong_rhs { 0.0 };
         // get the solution (intercepts) of l1_lambda_max for a warm start
-        arma::uvec is_active_strong { arma::zeros<arma::uvec>(p0_) };
+        arma::uvec is_active_strong { arma::ones<arma::uvec>(p0_) };
         // only need to estimate beta not in the penalty group
-        for (arma::uvec::iterator it { penalty_free.begin() };
-             it != penalty_free.end(); ++it) {
-            is_active_strong(*it) = 1;
-        }
+        is_active_strong.elem(penalty_group).zeros();
         double l1_lambda { control_.alpha_ * lambda_max_ };
         double l2_lambda { (1 - control_.alpha_) * lambda_max_ };
         run_gmd_active_cycle(one_beta,
@@ -404,6 +394,9 @@ namespace abclass
                              control_.max_iter_,
                              control_.epsilon_,
                              control_.verbose_);
+        // exclude constant covariates from penalty group
+        // so that they will not be considered as active by strong rule at all
+        penalty_group = penalty_group.elem(arma::find(mm_lowerbound_ > 0.0));
         double old_lambda { l1_lambda }; // for strong rule
         // main loop: for each lambda
         for (size_t li { 0 }; li < control_.lambda_.n_elem; ++li) {

diff --git a/inst/include/abclass/AbclassGroupMCP.h b/inst/include/abclass/AbclassGroupMCP.h
@@ -143,16 +143,20 @@ namespace abclass
         // for a sequence of lambda's
         inline void fit() override;
 
-        inline void set_gamma(const double dgamma = 0.01)
+        inline void set_gamma(const double kappa_ratio = 0.99)
         {
+            // kappa must be in (0, 1)
+            if (is_le(kappa_ratio, 0.0) || is_ge(kappa_ratio, 1.0)) {
+                throw std::range_error("The 'kappa_ratio' must be in (0, 1).");
+            }
             if (mm_lowerbound_.empty()) {
                 set_mm_lowerbound();
             }
             // exclude zeros lowerbounds from constant columns
             const double min_mg {
-                mm_lowerbound_(arma::find(mm_lowerbound_ > 0.0)).min()
+                mm_lowerbound_.elem(arma::find(mm_lowerbound_ > 0.0)).min()
             };
-            control_.gamma_ = dgamma + 1.0 / min_mg;
+            control_.gamma_ = 1.0 / min_mg / kappa_ratio;
         }
     };
 
@@ -198,12 +202,6 @@ namespace abclass
             }
             const size_t j1 { j + inter_ };
             const double mj { mm_lowerbound_(j) }; // m_g
-            // early exit for zero mj from constant columns
-            if (isAlmostEqual(mj, 0.0)) {
-                beta.row(j1).zeros();
-                is_active(j) = 0;
-                continue;
-            }
             const arma::rowvec old_beta_j { beta.row(j1) };
             const arma::rowvec zj {
                 - mm_gradient(inner, j) / mj + old_beta_j
@@ -215,7 +213,7 @@ namespace abclass
             if (zj2 < gamma * lambda_j * mj_ratio) {
                 double tmp { 1 - lambda_j / mj / zj2 };
                 if (tmp > 0.0) {
-                    double igamma_j { 1.0 / (gamma * mj) };
+                    double igamma_j { 1.0 / gamma / mj };
                     beta.row(j1) = tmp * zj / (mj_ratio - igamma_j);
                 } else {
                     beta.row(j1).zeros();
@@ -269,8 +267,8 @@ namespace abclass
         )
     {
         size_t i {0}, num_iter {0};
-        // arma::mat beta0 { beta };
-        double loss0 { objective0(inner) }, loss1 { loss0 };
+        arma::mat beta0 { beta };
+        // double loss0 { objective0(inner) }, loss1 { loss0 };
         // use active-set if p > n ("helps when p >> n")
         if (varying_active_set) {
             arma::uvec is_active_strong { is_active },
@@ -288,15 +286,15 @@ namespace abclass
                     Rcpp::checkUserInterrupt();
                     run_one_active_cycle(beta, inner, is_active_varying,
                                          lambda, gamma, ridge, true, verbose);
-                    // if (rel_diff(beta0, beta) < epsilon) {
-                    //     break;
-                    // }
-                    // beta0 = beta;
-                    loss1 = objective0(inner);
-                    if (std::abs(loss1 - loss0) < epsilon) {
+                    if (rel_diff(beta0, beta) < epsilon) {
                         break;
                     }
-                    loss0 = loss1;
+                    beta0 = beta;
+                    // loss1 = objective0(inner);
+                    // if (std::abs(loss1 - loss0) < epsilon) {
+                    //     break;
+                    // }
+                    // loss0 = loss1;
                     ++ii;
                 }
                 // run a full cycle over the converged beta
@@ -340,15 +338,15 @@ namespace abclass
                 ++num_iter;
                 run_one_active_cycle(beta, inner, is_active,
                                      lambda, gamma, ridge, false, verbose);
-                // if (rel_diff(beta0, beta) < epsilon) {
-                //     break;
-                // }
-                // beta0 = beta;
-                loss1 = objective0(inner);
-                if (std::abs(loss1 - loss0) < epsilon) {
+                if (rel_diff(beta0, beta) < epsilon) {
                     break;
                 }
-                loss0 = loss1;
+                beta0 = beta;
+                // loss1 = objective0(inner);
+                // if (std::abs(loss1 - loss0) < epsilon) {
+                //     break;
+                // }
+                // loss0 = loss1;
                 ++i;
             }
             if (verbose > 0) {
@@ -373,9 +371,8 @@ namespace abclass
         // set group weight
         set_group_weight();
         // set gamma
-        set_gamma(control_.dgamma_);
+        set_gamma(control_.kappa_ratio_);
         arma::uvec penalty_group { arma::find(control_.group_weight_ > 0.0) };
-        arma::uvec penalty_free { arma::find(control_.group_weight_ == 0.0) };
         // initialize
         arma::vec one_inner;
         if (control_.has_offset_) {
@@ -421,12 +418,9 @@ namespace abclass
 
         double one_strong_rhs { 0.0 };
         // get the solution (intercepts) of l1_lambda_max for a warm start
-        arma::uvec is_active_strong { arma::zeros<arma::uvec>(p0_) };
+        arma::uvec is_active_strong { arma::ones<arma::uvec>(p0_) };
         // only need to estimate beta not in the penalty group
-        for (arma::uvec::iterator it { penalty_free.begin() };
-             it != penalty_free.end(); ++it) {
-            is_active_strong(*it) = 1;
-        }
+        is_active_strong.elem(penalty_group).zeros();
         double l1_lambda { control_.alpha_ * lambda_max_ };
         double l2_lambda { (1 - control_.alpha_) * lambda_max_ };
         run_gmd_active_cycle(one_beta,
@@ -439,6 +433,9 @@ namespace abclass
                              control_.max_iter_,
                              control_.epsilon_,
                              control_.verbose_);
+        // exclude constant covariates from penalty group
+        // so that they will not be considered as active by strong rule at all
+        penalty_group = penalty_group.elem(arma::find(mm_lowerbound_ > 0.0));
         // optim with varying active set when p > n
         double old_lambda { l1_lambda }; // for strong rule
         // main loop: for each lambda
@@ -448,7 +445,7 @@ namespace abclass
             l2_lambda = (1 - control_.alpha_) * lambda_li;
             // early exit for lambda greater than lambda_max_
             // note that lambda is sorted
-            if (lambda_li >= lambda_max_) {
+            if (is_ge(lambda_li, lambda_max_)) {
                 coef_.slice(li) = rescale_coef(one_beta);
                 this->loss_wo_penalty_(li) = objective0(one_inner);
                 this->penalty_(li) = regularization(