Source code for ds385.coordinatedescent

import numpy as np


[docs]def soft_threshold(b, l): """Soft threshold operator used within coordinate descent for lasso. The soft threshold operator is defined as .. math:: f(b, \\lambda) = \\text{sign}(b) \\max{(|b| - \\lambda, 0)} """ return np.sign(b) * np.maximum(np.abs(b) - l, 0)
[docs]def cd( X, y, initbeta=None, tol=1e-8, maxiters=5000, l=0, l1=False, l2=False, ): """Coordinate descent for regularized linear regression models .. math:: l_{\\lambda}(\\beta) = \\sum_{n=1}^N(y_n - \\beta_0 - \\sum_{j=1}^J x_{ij} \\beta_j)^2 + \\lambda \\sum_{j=1}^J \\beta_j^q where :math:`q \\in \\{1, 2\\}`. The loss function :math:`l(\\beta)` is minimized, for a fixed and user supplied penalty parameter :math:`\\lambda > 0` using coordinate descent and starting from the initial values given in `initbeta`. If no initial values are supplied, random numbers uniformly distributed in :math:`(-2, 2)` are used. The routine terminates when either the parameters :math:`\\beta_j` for :math:`j \\in 1:J` stop changing to within the specified tolerance `tol`, which defaults to :math:`1e-8`, or when the maximum number of iterations is reached, `maxiters = 5_000`. The arguments `l, l1,` and `l2` control the penalty parameter. The argument `l` specifies the value of the penalty parameter :math:`\\lambda` and defaults to :math:`0`. When `l` is equal to :math:`0` the model fit with coordinate descent is an unregularized linear regression model. If `l` is positive, then either the lasso or the ridge linear regression model is fit, depending on which of `l1` (lasso) or `l2` (ridge) is true. Parameters ---------- X: 2d np.array A 2 dimensional numpy array with :math:`N` observations in the rows and :math:`K` predictors. This form is known in the statistics community as a model matrix. y: 1d np.array A 1 dimensional numpy array with :math:`N` observations in the rows. This is known as a response vector in the statistics community. initbeta: 1d np.array A 1 dimensional numpy array of initial values for the Returns ------- np.array Minimized values of :math:`\\beta` for the supplied values of :math:`X, y, \\lambda`. """ assert l >= 0 if l1: assert not l2 if l2: assert not l1 N, K = np.shape(X) if initbeta is None: initbeta = np.random.uniform(low=-2, high=2, size=K) m = np.mean(y) yc = y - m beta = np.copy(initbeta) iters = 0 while True: iters += 1 beta_old = np.copy(beta) for k in range(K): yhat = np.matmul(X, beta) Xbk = np.dot(X[:, k], beta[k]) beta[k] = np.dot(yc - yhat + Xbk, X[:, k]) d = np.dot(X[:, k], X[:, k]) if k != 0 and l1: beta[k] = soft_threshold(beta[k], l) if k != 0 and l2: d += l beta[k] /= d d = np.linalg.norm(beta - beta_old) if d < tol: break if iters > maxiters: print(f"Maximum iterations reached {iters}") break beta[0] = np.sum(y - np.matmul(X[:, 1:K], beta[1:K])) / N return beta