Linear RegressionΒΆ

[2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16})
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
[3]:
df = pd.read_csv("https://raw.githubusercontent.com/roualdes/data/master/penguins.csv")
df.dropna(subset=["bill_length_mm", "body_mass_g"], inplace = True)
[4]:
plt.scatter(df["bill_length_mm"], df["body_mass_g"])
[4]:
<matplotlib.collections.PathCollection at 0x12229f580>
../_images/notebooks_linear_regression_3_1.png
[5]:
N = df.shape[0]
X = np.c_[df["bill_length_mm"]]
\[loss(\beta_0, \beta_1 | x, y) = \sum_{n=1}^N (y_n - \beta_0 - \beta_1 * x_n)^2\]
[6]:
data = {
    "X": X,
    "y": df["body_mass_g"]
}
def loss(theta, data):
    return np.sum( (data["y"] - theta[0] - data["X"] @ theta[1:]) ** 2)
[13]:
minimize(loss, np.random.normal(size = 2),
         args = (data),
         method = "L-BFGS-B")
[13]:
      fun: 141638625.62092987
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([0., 0.])
  message: 'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
     nfev: 63
      nit: 15
     njev: 21
   status: 0
  success: True
        x: array([362.24698551,  87.41661658])
[8]:
fit = LinearRegression().fit(X, df["body_mass_g"])
fit.intercept_
[8]:
362.30672165392207
[9]:
fit.coef_
[9]:
array([87.41527705])