Source code for ds385.coordinatedescent

import numpy as np


[docs]def soft_threshold(b, l):
    """Soft threshold operator used within coordinate descent for lasso.

    The soft threshold operator is defined as

    .. math::
        f(b, \\lambda) = \\text{sign}(b) \\max{(|b| - \\lambda, 0)}

    """
    return np.sign(b) * np.maximum(np.abs(b) - l, 0)


[docs]def cd(
    X,
    y,
    initbeta=None,
    tol=1e-8,
    maxiters=5000,
    l=0,
    l1=False,
    l2=False,
):
    """Coordinate descent for regularized linear regression models


    .. math::
        l_{\\lambda}(\\beta) = \\sum_{n=1}^N(y_n - \\beta_0 - \\sum_{j=1}^J
        x_{ij} \\beta_j)^2 + \\lambda \\sum_{j=1}^J \\beta_j^q

    where :math:`q \\in \\{1, 2\\}`.

    The loss function :math:`l(\\beta)` is minimized, for a fixed and user
    supplied penalty parameter :math:`\\lambda > 0` using coordinate descent and
    starting from the initial values given in `initbeta`.  If no initial values
    are supplied, random numbers uniformly distributed in :math:`(-2, 2)` are
    used.

    The routine terminates when either the parameters :math:`\\beta_j` for
    :math:`j \\in 1:J` stop changing to within the specified tolerance `tol`,
    which defaults to :math:`1e-8`, or when the maximum number of iterations is
    reached, `maxiters = 5_000`.

    The arguments `l, l1,` and `l2` control the penalty parameter.  The argument
    `l` specifies the value of the penalty parameter :math:`\\lambda` and
    defaults to :math:`0`.  When `l` is equal to :math:`0` the model fit with
    coordinate descent is an unregularized linear regression model.  If `l` is
    positive, then either the lasso or the ridge linear regression model is fit,
    depending on which of `l1` (lasso) or `l2` (ridge) is true.

    Parameters
    ----------
    X: 2d np.array
        A 2 dimensional numpy array with :math:`N` observations in the rows and
        :math:`K` predictors.  This form is known in the statistics community as
        a model matrix.

    y: 1d np.array
        A 1 dimensional numpy array with :math:`N` observations in the rows.
        This is known as a response vector in the statistics community.

    initbeta: 1d np.array
        A 1 dimensional numpy array of initial values for the

    Returns
    -------
    np.array
        Minimized values of :math:`\\beta` for the supplied values of :math:`X,
        y, \\lambda`.

    """
    assert l >= 0
    if l1:
        assert not l2
    if l2:
        assert not l1

    N, K = np.shape(X)

    if initbeta is None:
        initbeta = np.random.uniform(low=-2, high=2, size=K)

    m = np.mean(y)
    yc = y - m

    beta = np.copy(initbeta)
    iters = 0
    while True:
        iters += 1
        beta_old = np.copy(beta)

        for k in range(K):
            yhat = np.matmul(X, beta)
            Xbk = np.dot(X[:, k], beta[k])
            beta[k] = np.dot(yc - yhat + Xbk, X[:, k])
            d = np.dot(X[:, k], X[:, k])

            if k != 0 and l1:
                beta[k] = soft_threshold(beta[k], l)

            if k != 0 and l2:
                d += l

            beta[k] /= d

        d = np.linalg.norm(beta - beta_old)
        if d < tol:
            break

        if iters > maxiters:
            print(f"Maximum iterations reached {iters}")
            break

    beta[0] = np.sum(y - np.matmul(X[:, 1:K], beta[1:K])) / N
    return beta
Source code for ds385.coordinatedescent

ds385

Navigation

Related Topics