From 415f242bde18d0a495e8c6b0a57cac3ff51e4112 Mon Sep 17 00:00:00 2001
From: Wenjie Wang <wang@wwenjie.org>
Date: Mon, 11 Dec 2023 23:03:03 -0500
Subject: [PATCH] Multiple updates:

- replace dgamma with kappa_ratio
- add max_grad to avoid coefficients diverging
- revert convergence check back onto coefficients
---
 R/abclass.R                              | 13 +++--
 R/abclass_engine.R                       |  2 +-
 R/moml_engine.R                          |  2 +-
 inst/include/abclass/AbclassGroup.h      |  3 ++
 inst/include/abclass/AbclassGroupLasso.h | 49 ++++++++----------
 inst/include/abclass/AbclassGroupMCP.h   | 65 +++++++++++------------
 inst/include/abclass/AbclassGroupSCAD.h  | 66 +++++++++++-------------
 inst/include/abclass/AbclassNet.h        | 53 ++++++++++---------
 inst/include/abclass/Control.h           | 19 ++++---
 inst/include/abclass/Logistic.h          | 19 +++++--
 man/abclass.Rd                           | 11 ++--
 src/template_helpers.h                   | 13 +++--
 12 files changed, 167 insertions(+), 148 deletions(-)

diff --git a/R/abclass.R b/R/abclass.R
index 3a548ab..fdc1169 100644
--- a/R/abclass.R
+++ b/R/abclass.R
@@ -112,8 +112,9 @@ abclass <- function(x, y,
 ##'     penalty.
 ##' @param offset An optional numeric matrix for offsets of the decision
 ##'     functions.
-##' @param dgamma A positive number specifying the increment to the minimal
-##'     gamma parameter for group SCAD or group MCP.
+##' @param kappa_ratio A positive number within (0, 1) specifying the ratio of
+##'     reciprocal gamma parameter for group SCAD or group MCP.  A close-to-zero
+##'     \code{kappa_ratio} would give a solution close to lasso solution.
 ##' @param lum_a A positive number greater than one representing the parameter
 ##'     \emph{a} in LUM, which will be used only if \code{loss = "lum"}.  The
 ##'     default value is \code{1.0}.
@@ -126,6 +127,8 @@ abclass <- function(x, y,
 ##'     The default value is \code{10^5}.
 ##' @param epsilon A positive number specifying the relative tolerance that
 ##'     determines convergence.  The default value is \code{1e-4}.
+##' @param max_grad The maximum value of gradients of loss function to avoid
+##'     coefficients diverging.
 ##' @param standardize A logical value indicating if each column of the design
 ##'     matrix should be standardized internally to have mean zero and standard
 ##'     deviation equal to the sample size.  The default value is \code{TRUE}.
@@ -148,12 +151,13 @@ abclass.control <- function(lambda = NULL,
                             group_weight = NULL,
                             group_penalty = c("lasso", "scad", "mcp"),
                             offset = NULL,
-                            dgamma = 1.0,
+                            kappa_ratio = 0.9,
                             lum_a = 1.0,
                             lum_c = 1.0,
                             boost_umin = - 5.0,
                             maxit = 1e5L,
                             epsilon = 1e-4,
+                            max_grad = -1e-5,
                             standardize = TRUE,
                             varying_active_set = TRUE,
                             verbose = 0L,
@@ -180,11 +184,12 @@ abclass.control <- function(lambda = NULL,
         standardize = standardize,
         maxit = as.integer(maxit),
         epsilon = epsilon,
+        max_grad = max_grad,
         varying_active_set = varying_active_set,
         verbose = as.integer(verbose),
         boost_umin = boost_umin,
         lum_a = lum_a,
         lum_c = lum_c,
-        dgamma = dgamma
+        kappa_ratio = kappa_ratio
     ), class = "abclass.control")
 }
diff --git a/R/abclass_engine.R b/R/abclass_engine.R
index 37b748d..466ab8c 100644
--- a/R/abclass_engine.R
+++ b/R/abclass_engine.R
@@ -95,7 +95,7 @@
             if (control$group_penalty == "lasso") {
                 res$regularization[common_pars]
             } else {
-                res$regularization[c(common_pars, "dgamma", "gamma")]
+                res$regularization[c(common_pars, "kappa_ratio", "gamma")]
             }
         } else {
             res$regularization[return_lambda]
diff --git a/R/moml_engine.R b/R/moml_engine.R
index fb2112e..2b4ffb7 100644
--- a/R/moml_engine.R
+++ b/R/moml_engine.R
@@ -102,7 +102,7 @@
             if (control$group_penalty == "lasso") {
                 res$regularization[common_pars]
             } else {
-                res$regularization[c(common_pars, "dgamma", "gamma")]
+                res$regularization[c(common_pars, "kappa_ratio", "gamma")]
             }
         } else {
             res$regularization[return_lambda]
diff --git a/inst/include/abclass/AbclassGroup.h b/inst/include/abclass/AbclassGroup.h
index a79decd..841885f 100644
--- a/inst/include/abclass/AbclassGroup.h
+++ b/inst/include/abclass/AbclassGroup.h
@@ -41,6 +41,9 @@ namespace abclass
                                         const unsigned int j) const
         {
             arma::vec inner_grad { loss_derivative(inner) };
+            // avoid coefficients diverging
+            inner_grad.elem(arma::find(inner_grad >
+                                       control_.max_grad_)).zeros();
             arma::vec tmp_vec {
                 x_.col(j) % control_.obs_weight_ % inner_grad
             };
diff --git a/inst/include/abclass/AbclassGroupLasso.h b/inst/include/abclass/AbclassGroupLasso.h
index 285f284..14146fe 100644
--- a/inst/include/abclass/AbclassGroupLasso.h
+++ b/inst/include/abclass/AbclassGroupLasso.h
@@ -176,12 +176,6 @@ namespace abclass
             }
             const size_t j1 { j + inter_ };
             const double mj { mm_lowerbound_(j) };
-            // early exit for zero mj from constant columns
-            if (isAlmostEqual(mj, 0.0)) {
-                beta.row(j1).zeros();
-                is_active(j) = 0;
-                continue;
-            }
             const arma::rowvec old_beta_j { beta.row(j1) };
             const arma::rowvec uj {
                 - mm_gradient(inner, j) + mj * old_beta_j
@@ -240,8 +234,8 @@ namespace abclass
         )
     {
         size_t i {0}, num_iter {0};
-        // arma::mat beta0 { beta };
-        double loss0 { objective0(inner) }, loss1 { loss0 };
+        arma::mat beta0 { beta };
+        // double loss0 { objective0(inner) }, loss1 { loss0 };
         // use active-set if p > n ("helps when p >> n")
         if (varying_active_set) {
             arma::uvec is_active_strong { is_active },
@@ -259,15 +253,15 @@ namespace abclass
                     num_iter = ii + 1;
                     run_one_active_cycle(beta, inner, is_active_varying,
                                          lambda, ridge, true, verbose);
-                    // if (rel_diff(beta0, beta) < epsilon) {
-                    //     break;
-                    // }
-                    // beta0 = beta;
-                    loss1 = objective0(inner);
-                    if (std::abs(loss1 - loss0) < epsilon) {
+                    if (rel_diff(beta0, beta) < epsilon) {
                         break;
                     }
-                    loss0 = loss1;
+                    beta0 = beta;
+                    // loss1 = objective0(inner);
+                    // if (std::abs(loss1 - loss0) < epsilon) {
+                    //     break;
+                    // }
+                    // loss0 = loss1;
                     ++ii;
                 }
                 // run a full cycle over the converged beta
@@ -308,15 +302,15 @@ namespace abclass
                 ++num_iter;
                 run_one_active_cycle(beta, inner, is_active,
                                      lambda, ridge, false, verbose);
-                // if (rel_diff(beta0, beta) < epsilon) {
-                //     break;
-                // }
-                // beta0 = beta;
-                loss1 = objective0(inner);
-                if (std::abs(loss1 - loss0) < epsilon) {
+                if (rel_diff(beta0, beta) < epsilon) {
                     break;
                 }
-                loss0 = loss1;
+                beta0 = beta;
+                // loss1 = objective0(inner);
+                // if (std::abs(loss1 - loss0) < epsilon) {
+                //     break;
+                // }
+                // loss0 = loss1;
                 ++i;
             }
             if (verbose > 0) {
@@ -341,7 +335,6 @@ namespace abclass
         // set group weight from the control_
         set_group_weight();
         arma::uvec penalty_group { arma::find(control_.group_weight_ > 0.0) };
-        arma::uvec penalty_free { arma::find(control_.group_weight_ == 0.0) };
         // initialize
         arma::vec one_inner;
         if (control_.has_offset_) {
@@ -387,12 +380,9 @@ namespace abclass
 
         double one_strong_rhs { 0.0 };
         // get the solution (intercepts) of l1_lambda_max for a warm start
-        arma::uvec is_active_strong { arma::zeros<arma::uvec>(p0_) };
+        arma::uvec is_active_strong { arma::ones<arma::uvec>(p0_) };
         // only need to estimate beta not in the penalty group
-        for (arma::uvec::iterator it { penalty_free.begin() };
-             it != penalty_free.end(); ++it) {
-            is_active_strong(*it) = 1;
-        }
+        is_active_strong.elem(penalty_group).zeros();
         double l1_lambda { control_.alpha_ * lambda_max_ };
         double l2_lambda { (1 - control_.alpha_) * lambda_max_ };
         run_gmd_active_cycle(one_beta,
@@ -404,6 +394,9 @@ namespace abclass
                              control_.max_iter_,
                              control_.epsilon_,
                              control_.verbose_);
+        // exclude constant covariates from penalty group
+        // so that they will not be considered as active by strong rule at all
+        penalty_group = penalty_group.elem(arma::find(mm_lowerbound_ > 0.0));
         double old_lambda { l1_lambda }; // for strong rule
         // main loop: for each lambda
         for (size_t li { 0 }; li < control_.lambda_.n_elem; ++li) {
diff --git a/inst/include/abclass/AbclassGroupMCP.h b/inst/include/abclass/AbclassGroupMCP.h
index e1b9531..68609de 100644
--- a/inst/include/abclass/AbclassGroupMCP.h
+++ b/inst/include/abclass/AbclassGroupMCP.h
@@ -143,16 +143,20 @@ namespace abclass
         // for a sequence of lambda's
         inline void fit() override;
 
-        inline void set_gamma(const double dgamma = 0.01)
+        inline void set_gamma(const double kappa_ratio = 0.99)
         {
+            // kappa must be in (0, 1)
+            if (is_le(kappa_ratio, 0.0) || is_ge(kappa_ratio, 1.0)) {
+                throw std::range_error("The 'kappa_ratio' must be in (0, 1).");
+            }
             if (mm_lowerbound_.empty()) {
                 set_mm_lowerbound();
             }
             // exclude zeros lowerbounds from constant columns
             const double min_mg {
-                mm_lowerbound_(arma::find(mm_lowerbound_ > 0.0)).min()
+                mm_lowerbound_.elem(arma::find(mm_lowerbound_ > 0.0)).min()
             };
-            control_.gamma_ = dgamma + 1.0 / min_mg;
+            control_.gamma_ = 1.0 / min_mg / kappa_ratio;
         }
     };
 
@@ -198,12 +202,6 @@ namespace abclass
             }
             const size_t j1 { j + inter_ };
             const double mj { mm_lowerbound_(j) }; // m_g
-            // early exit for zero mj from constant columns
-            if (isAlmostEqual(mj, 0.0)) {
-                beta.row(j1).zeros();
-                is_active(j) = 0;
-                continue;
-            }
             const arma::rowvec old_beta_j { beta.row(j1) };
             const arma::rowvec zj {
                 - mm_gradient(inner, j) / mj + old_beta_j
@@ -215,7 +213,7 @@ namespace abclass
             if (zj2 < gamma * lambda_j * mj_ratio) {
                 double tmp { 1 - lambda_j / mj / zj2 };
                 if (tmp > 0.0) {
-                    double igamma_j { 1.0 / (gamma * mj) };
+                    double igamma_j { 1.0 / gamma / mj };
                     beta.row(j1) = tmp * zj / (mj_ratio - igamma_j);
                 } else {
                     beta.row(j1).zeros();
@@ -269,8 +267,8 @@ namespace abclass
         )
     {
         size_t i {0}, num_iter {0};
-        // arma::mat beta0 { beta };
-        double loss0 { objective0(inner) }, loss1 { loss0 };
+        arma::mat beta0 { beta };
+        // double loss0 { objective0(inner) }, loss1 { loss0 };
         // use active-set if p > n ("helps when p >> n")
         if (varying_active_set) {
             arma::uvec is_active_strong { is_active },
@@ -288,15 +286,15 @@ namespace abclass
                     Rcpp::checkUserInterrupt();
                     run_one_active_cycle(beta, inner, is_active_varying,
                                          lambda, gamma, ridge, true, verbose);
-                    // if (rel_diff(beta0, beta) < epsilon) {
-                    //     break;
-                    // }
-                    // beta0 = beta;
-                    loss1 = objective0(inner);
-                    if (std::abs(loss1 - loss0) < epsilon) {
+                    if (rel_diff(beta0, beta) < epsilon) {
                         break;
                     }
-                    loss0 = loss1;
+                    beta0 = beta;
+                    // loss1 = objective0(inner);
+                    // if (std::abs(loss1 - loss0) < epsilon) {
+                    //     break;
+                    // }
+                    // loss0 = loss1;
                     ++ii;
                 }
                 // run a full cycle over the converged beta
@@ -340,15 +338,15 @@ namespace abclass
                 ++num_iter;
                 run_one_active_cycle(beta, inner, is_active,
                                      lambda, gamma, ridge, false, verbose);
-                // if (rel_diff(beta0, beta) < epsilon) {
-                //     break;
-                // }
-                // beta0 = beta;
-                loss1 = objective0(inner);
-                if (std::abs(loss1 - loss0) < epsilon) {
+                if (rel_diff(beta0, beta) < epsilon) {
                     break;
                 }
-                loss0 = loss1;
+                beta0 = beta;
+                // loss1 = objective0(inner);
+                // if (std::abs(loss1 - loss0) < epsilon) {
+                //     break;
+                // }
+                // loss0 = loss1;
                 ++i;
             }
             if (verbose > 0) {
@@ -373,9 +371,8 @@ namespace abclass
         // set group weight
         set_group_weight();
         // set gamma
-        set_gamma(control_.dgamma_);
+        set_gamma(control_.kappa_ratio_);
         arma::uvec penalty_group { arma::find(control_.group_weight_ > 0.0) };
-        arma::uvec penalty_free { arma::find(control_.group_weight_ == 0.0) };
         // initialize
         arma::vec one_inner;
         if (control_.has_offset_) {
@@ -421,12 +418,9 @@ namespace abclass
 
         double one_strong_rhs { 0.0 };
         // get the solution (intercepts) of l1_lambda_max for a warm start
-        arma::uvec is_active_strong { arma::zeros<arma::uvec>(p0_) };
+        arma::uvec is_active_strong { arma::ones<arma::uvec>(p0_) };
         // only need to estimate beta not in the penalty group
-        for (arma::uvec::iterator it { penalty_free.begin() };
-             it != penalty_free.end(); ++it) {
-            is_active_strong(*it) = 1;
-        }
+        is_active_strong.elem(penalty_group).zeros();
         double l1_lambda { control_.alpha_ * lambda_max_ };
         double l2_lambda { (1 - control_.alpha_) * lambda_max_ };
         run_gmd_active_cycle(one_beta,
@@ -439,6 +433,9 @@ namespace abclass
                              control_.max_iter_,
                              control_.epsilon_,
                              control_.verbose_);
+        // exclude constant covariates from penalty group
+        // so that they will not be considered as active by strong rule at all
+        penalty_group = penalty_group.elem(arma::find(mm_lowerbound_ > 0.0));
         // optim with varying active set when p > n
         double old_lambda { l1_lambda }; // for strong rule
         // main loop: for each lambda
@@ -448,7 +445,7 @@ namespace abclass
             l2_lambda = (1 - control_.alpha_) * lambda_li;
             // early exit for lambda greater than lambda_max_
             // note that lambda is sorted
-            if (lambda_li >= lambda_max_) {
+            if (is_ge(lambda_li, lambda_max_)) {
                 coef_.slice(li) = rescale_coef(one_beta);
                 this->loss_wo_penalty_(li) = objective0(one_inner);
                 this->penalty_(li) = regularization(
diff --git a/inst/include/abclass/AbclassGroupSCAD.h b/inst/include/abclass/AbclassGroupSCAD.h
index cea65b4..cbc1017 100644
--- a/inst/include/abclass/AbclassGroupSCAD.h
+++ b/inst/include/abclass/AbclassGroupSCAD.h
@@ -149,21 +149,22 @@ namespace abclass
         inline void fit() override;
 
         // setter
-        inline void set_gamma(const double dgamma = 0.01)
+        inline void set_gamma(const double kappa_ratio = 0.99)
         {
+            // kappa must be in (0, 1)
+            if (is_le(kappa_ratio, 0.0) || is_ge(kappa_ratio, 1.0)) {
+                throw std::range_error("The 'kappa_ratio' must be in (0, 1).");
+            }
             if (mm_lowerbound_.empty()) {
                 set_mm_lowerbound();
             }
             // exclude zeros lowerbounds from constant columns
             const double min_mg {
-                std::min(mm_lowerbound_(arma::find(mm_lowerbound_ > 0.0)).min(),
+                std::min(mm_lowerbound_.elem(
+                             arma::find(mm_lowerbound_ > 0.0)).min(),
                          mm_lowerbound0_)
             };
-            if (dgamma > 0.0) {
-                control_.gamma_ = dgamma + 1.0 + 1.0 / min_mg;
-                return;
-            }
-            throw std::range_error("The 'dgamma' must be positive.");
+            control_.gamma_ = (1.0 + 1.0 / min_mg) / kappa_ratio;
         }
 
     };
@@ -210,12 +211,6 @@ namespace abclass
             }
             const size_t j1 { j + inter_ };
             const double mj { mm_lowerbound_(j) }; // m_g
-            // early exit for zero mj from constant columns
-            if (isAlmostEqual(mj, 0.0)) {
-                beta.row(j1).zeros();
-                is_active(j) = 0;
-                continue;
-            }
             const arma::rowvec old_beta_j { beta.row(j1) };
             const arma::rowvec zj {
                 - mm_gradient(inner, j) / mj + old_beta_j
@@ -286,8 +281,8 @@ namespace abclass
         )
     {
         size_t i {0}, num_iter {0};
-        // arma::mat beta0 { beta };
-        double loss0 { objective0(inner) }, loss1 { loss0 };
+        arma::mat beta0 { beta };
+        // double loss0 { objective0(inner) }, loss1 { loss0 };
         // use active-set if p > n ("helps when p >> n")
         if (varying_active_set) {
             arma::uvec is_active_strong { is_active },
@@ -305,15 +300,15 @@ namespace abclass
                     Rcpp::checkUserInterrupt();
                     run_one_active_cycle(beta, inner, is_active_varying,
                                          lambda, gamma, ridge, true, verbose);
-                    // if (rel_diff(beta0, beta) < epsilon) {
-                    //     break;
-                    // }
-                    // beta0 = beta;
-                    loss1 = objective0(inner);
-                    if (std::abs(loss1 - loss0) < epsilon) {
+                    if (rel_diff(beta0, beta) < epsilon) {
                         break;
                     }
-                    loss0 = loss1;
+                    beta0 = beta;
+                    // loss1 = objective0(inner);
+                    // if (std::abs(loss1 - loss0) < epsilon) {
+                    //     break;
+                    // }
+                    // loss0 = loss1;
                     ++ii;
                 }
                 // run a full cycle over the converged beta
@@ -357,15 +352,15 @@ namespace abclass
                 ++num_iter;
                 run_one_active_cycle(beta, inner, is_active,
                                      lambda, gamma, ridge, false, verbose);
-                // if (rel_diff(beta0, beta) < epsilon) {
-                //     break;
-                // }
-                // beta0 = beta;
-                loss1 = objective0(inner);
-                if (std::abs(loss1 - loss0) < epsilon) {
+                if (rel_diff(beta0, beta) < epsilon) {
                     break;
                 }
-                loss0 = loss1;
+                beta0 = beta;
+                // loss1 = objective0(inner);
+                // if (std::abs(loss1 - loss0) < epsilon) {
+                //     break;
+                // }
+                // loss0 = loss1;
                 ++i;
             }
             if (verbose > 0) {
@@ -390,9 +385,8 @@ namespace abclass
         // set group weight
         set_group_weight();
         // set gamma
-        set_gamma(control_.dgamma_);
+        set_gamma(control_.kappa_ratio_);
         arma::uvec penalty_group { arma::find(control_.group_weight_ > 0.0) };
-        arma::uvec penalty_free { arma::find(control_.group_weight_ == 0.0) };
         // initialize
         arma::vec one_inner;
         if (control_.has_offset_) {
@@ -438,12 +432,9 @@ namespace abclass
 
         double one_strong_rhs { 0.0 };
         // get the solution (intercepts) of l1_lambda_max for a warm start
-        arma::uvec is_active_strong { arma::zeros<arma::uvec>(p0_) };
+        arma::uvec is_active_strong { arma::ones<arma::uvec>(p0_) };
         // only need to estimate beta not in the penalty group
-        for (arma::uvec::iterator it { penalty_free.begin() };
-             it != penalty_free.end(); ++it) {
-            is_active_strong(*it) = 1;
-        }
+        is_active_strong.elem(penalty_group).zeros();
         double l1_lambda { control_.alpha_ * lambda_max_ };
         double l2_lambda { (1 - control_.alpha_) * lambda_max_ };
         run_gmd_active_cycle(one_beta,
@@ -456,6 +447,9 @@ namespace abclass
                              control_.max_iter_,
                              control_.epsilon_,
                              control_.verbose_);
+        // exclude constant covariates from penalty group
+        // so that they will not be considered as active by strong rule at all
+        penalty_group = penalty_group.elem(arma::find(mm_lowerbound_ > 0.0));
         // optim with varying active set when p > n
         double old_lambda { lambda_max_ }; // for strong rule
         // main loop: for each lambda
diff --git a/inst/include/abclass/AbclassNet.h b/inst/include/abclass/AbclassNet.h
index b77aab3..7629946 100644
--- a/inst/include/abclass/AbclassNet.h
+++ b/inst/include/abclass/AbclassNet.h
@@ -74,6 +74,9 @@ namespace abclass
                                   const arma::vec& vj_xl) const
         {
             arma::vec inner_grad { loss_derivative(inner) };
+            // avoid coefficients diverging
+            inner_grad.elem(arma::find(inner_grad >
+                                       control_.max_grad_)).zeros();
             return arma::mean(control_.obs_weight_ % vj_xl % inner_grad);
         }
 
@@ -248,8 +251,8 @@ namespace abclass
         )
     {
         size_t i {0};
-        // arma::mat beta0 { beta };
-        double loss0 { objective0(inner) }, loss1 { loss0 };
+        arma::mat beta0 { beta };
+        // double loss0 { objective0(inner) }, loss1 { loss0 };
         // use active-set if p > n ("helps when p >> n")
         if (varying_active_set) {
             arma::umat is_active_strong { is_active },
@@ -267,15 +270,15 @@ namespace abclass
                     Rcpp::checkUserInterrupt();
                     run_one_active_cycle(beta, inner, is_active_varying,
                                          l1_lambda, l2_lambda, true, verbose);
-                    // if (rel_diff(beta0, beta) < epsilon) {
-                    //     break;
-                    // }
-                    // beta0 = beta;
-                    loss1 = objective0(inner);
-                    if (std::abs(loss1 - loss0) < epsilon) {
+                    if (rel_diff(beta0, beta) < epsilon) {
                         break;
                     }
-                    loss0 = loss1;
+                    beta0 = beta;
+                    // loss1 = objective0(inner);
+                    // if (std::abs(loss1 - loss0) < epsilon) {
+                    //     break;
+                    // }
+                    // loss0 = loss1;
                     ii++;
                 }
                 // run a full cycle over the converged beta
@@ -319,15 +322,15 @@ namespace abclass
                 num_iter_ = i + 1;
                 run_one_active_cycle(beta, inner, is_active,
                                      l1_lambda, l2_lambda, false, verbose);
-                // if (rel_diff(beta0, beta) < epsilon) {
-                //     break;
-                // }
-                // beta0 = beta;
-                loss1 = objective0(inner);
-                if (std::abs(loss1 - loss0) < epsilon) {
+                if (rel_diff(beta0, beta) < epsilon) {
                     break;
                 }
-                loss0 = loss1;
+                beta0 = beta;
+                // loss1 = objective0(inner);
+                // if (std::abs(loss1 - loss0) < epsilon) {
+                //     break;
+                // }
+                // loss0 = loss1;
                 i++;
             }
             if (verbose > 0) {
@@ -415,21 +418,21 @@ namespace abclass
         const unsigned int verbose
         )
     {
-        // arma::mat beta0 { beta };
-        double loss0 { objective0(inner) }, loss1 { loss0 };
+        arma::mat beta0 { beta };
+        // double loss0 { objective0(inner) }, loss1 { loss0 };
         for (size_t i {0}; i < max_iter; ++i) {
             Rcpp::checkUserInterrupt();
             num_iter_ = i + 1;
             run_one_full_cycle(beta, inner, l1_lambda, l2_lambda, verbose);
-            // if (rel_diff(beta0, beta) < epsilon) {
-            //     break;
-            // }
-            // beta0 = beta;
-            loss1 = objective0(inner);
-            if (std::abs(loss1 - loss0) < epsilon) {
+            if (rel_diff(beta0, beta) < epsilon) {
                 break;
             }
-            loss0 = loss1;
+            beta0 = beta;
+            // loss1 = objective0(inner);
+            // if (std::abs(loss1 - loss0) < epsilon) {
+            //     break;
+            // }
+            // loss0 = loss1;
         }
         if (verbose > 0) {
             if (num_iter_ < max_iter) {
diff --git a/inst/include/abclass/Control.h b/inst/include/abclass/Control.h
index f3d73d2..6626fe8 100644
--- a/inst/include/abclass/Control.h
+++ b/inst/include/abclass/Control.h
@@ -45,8 +45,8 @@ namespace abclass
         //   group {lasso,scad,mcp}
         arma::vec group_weight_ { arma::vec() }; // adaptive group weights
         //   group {scad,mcp}
-        double dgamma_ { 0.01 }; // delta gamma
-        double gamma_ { - 1.0 }; // gamma for group non-convex penalty
+        double kappa_ratio_ { 0.99 }; // parameter to set gamma
+        double gamma_ { - 1.0 };      // gamma for group non-convex penalty
 
         // tuning
         //   cross-validation
@@ -59,6 +59,7 @@ namespace abclass
         // optimization
         unsigned int max_iter_ { 100000 }; // maximum number of iterations
         double epsilon_ { 1e-3 };          // tolerance to check convergence
+        double max_grad_ { - 1e-5 };       // maximum first gradient of loss
         bool varying_active_set_ { true }; // if active set should be adaptive
         bool standardize_ { true };        // is x_ standardized (column-wise)
         unsigned int verbose_ { 0 };
@@ -74,6 +75,7 @@ namespace abclass
 
         Control(const unsigned int max_iter,
                 const double epsilon,
+                const double max_grad,
                 const bool standardize = true,
                 const unsigned int verbose = 0)
         {
@@ -82,6 +84,7 @@ namespace abclass
             }
             max_iter_ = max_iter;
             epsilon_ = epsilon;
+            max_grad_ = max_grad,
             standardize_ = standardize;
             verbose_ = verbose;
         }
@@ -145,14 +148,14 @@ namespace abclass
             return this;
         }
         inline Control* reg_group(const arma::vec& group_weight,
-                                  const double dgamma = 1.0)
+                                  const double kappa_ratio = 0.99)
         {
-            group_weight_ = group_weight;
-            if (dgamma > 0.0) {
-                dgamma_ = dgamma;
-            } else {
-                throw std::range_error("The 'dgamma' must be positive.");
+            // kappa must be in (0, 1)
+            if (is_le(kappa_ratio, 0.0) || is_ge(kappa_ratio, 1.0)) {
+                throw std::range_error("The 'kappa_ratio' must be in (0, 1).");
             }
+            group_weight_ = group_weight;
+            kappa_ratio_ = kappa_ratio;
             return this;
         }
         // tuning
diff --git a/inst/include/abclass/Logistic.h b/inst/include/abclass/Logistic.h
index 92d1a14..910616a 100644
--- a/inst/include/abclass/Logistic.h
+++ b/inst/include/abclass/Logistic.h
@@ -27,21 +27,34 @@ namespace abclass
     class Logistic
     {
     public:
-        Logistic() {};
+        Logistic(){}
 
         // loss function
+        inline double loss(const double u) const
+        {
+            return std::log(1.0 + std::exp(- u));
+        }
         inline double loss(const arma::vec& u,
                            const arma::vec& obs_weight) const
         {
-            return arma::mean(obs_weight % arma::log(1.0 + arma::exp(- u)));
+            double res { 0.0 };
+            for (size_t i {0}; i < u.n_elem; ++i) {
+                res += obs_weight(i) * loss(u[i]);
+            }
+            return res / static_cast<double>(u.n_elem);
+            // return arma::mean(obs_weight % arma::log(1.0 + arma::exp(- u)));
         }
 
         // the first derivative of the loss function
+        inline double dloss(const double u) const
+        {
+            return - 1.0 / (1.0 + std::exp(u));
+        }
         inline arma::vec dloss(const arma::vec& u) const
         {
             arma::vec out { arma::zeros(u.n_elem) };
             for (size_t i {0}; i < out.n_elem; ++i) {
-                out[i] = - 1.0 / (1.0 + std::exp(u[i]));
+                out[i] = dloss(u[i]);
             }
             return out;
             // return - 1.0 / (1.0 + arma::exp(u));
diff --git a/man/abclass.Rd b/man/abclass.Rd
index 2233624..1245b05 100644
--- a/man/abclass.Rd
+++ b/man/abclass.Rd
@@ -24,12 +24,13 @@ abclass.control(
   group_weight = NULL,
   group_penalty = c("lasso", "scad", "mcp"),
   offset = NULL,
-  dgamma = 1,
+  kappa_ratio = 0.9,
   lum_a = 1,
   lum_c = 1,
   boost_umin = -5,
   maxit = 100000L,
   epsilon = 1e-04,
+  max_grad = -1e-05,
   standardize = TRUE,
   varying_active_set = TRUE,
   verbose = 0L,
@@ -96,8 +97,9 @@ penalty.}
 \item{offset}{An optional numeric matrix for offsets of the decision
 functions.}
 
-\item{dgamma}{A positive number specifying the increment to the minimal
-gamma parameter for group SCAD or group MCP.}
+\item{kappa_ratio}{A positive number within (0, 1) specifying the ratio of
+reciprocal gamma parameter for group SCAD or group MCP.  A close-to-zero
+\code{kappa_ratio} would give a solution close to lasso solution.}
 
 \item{lum_a}{A positive number greater than one representing the parameter
 \emph{a} in LUM, which will be used only if \code{loss = "lum"}.  The
@@ -116,6 +118,9 @@ The default value is \code{10^5}.}
 \item{epsilon}{A positive number specifying the relative tolerance that
 determines convergence.  The default value is \code{1e-4}.}
 
+\item{max_grad}{The maximum value of gradients of loss function to avoid
+coefficients diverging.}
+
 \item{standardize}{A logical value indicating if each column of the design
 matrix should be standardized internally to have mean zero and standard
 deviation equal to the sample size.  The default value is \code{TRUE}.
diff --git a/src/template_helpers.h b/src/template_helpers.h
index 1bf7c03..1eb0585 100644
--- a/src/template_helpers.h
+++ b/src/template_helpers.h
@@ -36,7 +36,7 @@ inline Rcpp::List template_fit(T& object)
                 Rcpp::Named("alpha") = object.control_.alpha_,
                 Rcpp::Named("group_weight") =
                 abclass::arma2rvec(object.control_.group_weight_),
-                Rcpp::Named("dgamma") = object.control_.dgamma_,
+                Rcpp::Named("kappa_ratio") = object.control_.kappa_ratio_,
                 Rcpp::Named("gamma") = object.control_.gamma_
                 )
             );
@@ -71,7 +71,7 @@ inline Rcpp::List template_fit(T& object)
             Rcpp::Named("alpha") = object.control_.alpha_,
             Rcpp::Named("group_weight") =
             abclass::arma2rvec(object.control_.group_weight_),
-            Rcpp::Named("dgamma") = object.control_.dgamma_,
+            Rcpp::Named("kappa_ratio") = object.control_.kappa_ratio_,
             Rcpp::Named("gamma") = object.control_.gamma_
             ),
         Rcpp::Named("loss_wo_penalty") = abclass::arma2rvec(
@@ -84,8 +84,11 @@ inline Rcpp::List template_fit(T& object)
 inline abclass::Control abclass_control(const Rcpp::List& control)
 {
     abclass::Control ctrl {
-        control["maxit"], control["epsilon"],
-        control["standardize"], control["verbose"]
+        control["maxit"],
+        control["epsilon"],
+        control["max_grad"],
+        control["standardize"],
+        control["verbose"]
     };
     ctrl.set_intercept(control["intercept"])->
         set_weight(control["weight"])->
@@ -96,7 +99,7 @@ inline abclass::Control abclass_control(const Rcpp::List& control)
         reg_path(control["lambda"])->
         reg_net(control["alpha"])->
         reg_group(control["group_weight"],
-                  control["dgamma"])->
+                  control["kappa_ratio"])->
         tune_cv(control["nfolds"],
                 control["stratified"],
                 control["alignment"])->