Source code for ds385.coordinatedescent
import numpy as np
[docs]def soft_threshold(b, l):
"""Soft threshold operator used within coordinate descent for lasso.
The soft threshold operator is defined as
.. math::
f(b, \\lambda) = \\text{sign}(b) \\max{(|b| - \\lambda, 0)}
"""
return np.sign(b) * np.maximum(np.abs(b) - l, 0)
[docs]def cd(
X,
y,
initbeta=None,
tol=1e-8,
maxiters=5000,
l=0,
l1=False,
l2=False,
):
"""Coordinate descent for regularized linear regression models
.. math::
l_{\\lambda}(\\beta) = \\sum_{n=1}^N(y_n - \\beta_0 - \\sum_{j=1}^J
x_{ij} \\beta_j)^2 + \\lambda \\sum_{j=1}^J \\beta_j^q
where :math:`q \\in \\{1, 2\\}`.
The loss function :math:`l(\\beta)` is minimized, for a fixed and user
supplied penalty parameter :math:`\\lambda > 0` using coordinate descent and
starting from the initial values given in `initbeta`. If no initial values
are supplied, random numbers uniformly distributed in :math:`(-2, 2)` are
used.
The routine terminates when either the parameters :math:`\\beta_j` for
:math:`j \\in 1:J` stop changing to within the specified tolerance `tol`,
which defaults to :math:`1e-8`, or when the maximum number of iterations is
reached, `maxiters = 5_000`.
The arguments `l, l1,` and `l2` control the penalty parameter. The argument
`l` specifies the value of the penalty parameter :math:`\\lambda` and
defaults to :math:`0`. When `l` is equal to :math:`0` the model fit with
coordinate descent is an unregularized linear regression model. If `l` is
positive, then either the lasso or the ridge linear regression model is fit,
depending on which of `l1` (lasso) or `l2` (ridge) is true.
Parameters
----------
X: 2d np.array
A 2 dimensional numpy array with :math:`N` observations in the rows and
:math:`K` predictors. This form is known in the statistics community as
a model matrix.
y: 1d np.array
A 1 dimensional numpy array with :math:`N` observations in the rows.
This is known as a response vector in the statistics community.
initbeta: 1d np.array
A 1 dimensional numpy array of initial values for the
Returns
-------
np.array
Minimized values of :math:`\\beta` for the supplied values of :math:`X,
y, \\lambda`.
"""
assert l >= 0
if l1:
assert not l2
if l2:
assert not l1
N, K = np.shape(X)
if initbeta is None:
initbeta = np.random.uniform(low=-2, high=2, size=K)
m = np.mean(y)
yc = y - m
beta = np.copy(initbeta)
iters = 0
while True:
iters += 1
beta_old = np.copy(beta)
for k in range(K):
yhat = np.matmul(X, beta)
Xbk = np.dot(X[:, k], beta[k])
beta[k] = np.dot(yc - yhat + Xbk, X[:, k])
d = np.dot(X[:, k], X[:, k])
if k != 0 and l1:
beta[k] = soft_threshold(beta[k], l)
if k != 0 and l2:
d += l
beta[k] /= d
d = np.linalg.norm(beta - beta_old)
if d < tol:
break
if iters > maxiters:
print(f"Maximum iterations reached {iters}")
break
beta[0] = np.sum(y - np.matmul(X[:, 1:K], beta[1:K])) / N
return beta