Linear RegressionΒΆ
[2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16})
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
[3]:
df = pd.read_csv("https://raw.githubusercontent.com/roualdes/data/master/penguins.csv")
df.dropna(subset=["bill_length_mm", "body_mass_g"], inplace = True)
[4]:
plt.scatter(df["bill_length_mm"], df["body_mass_g"])
[4]:
<matplotlib.collections.PathCollection at 0x12229f580>
[5]:
N = df.shape[0]
X = np.c_[df["bill_length_mm"]]
\[loss(\beta_0, \beta_1 | x, y) = \sum_{n=1}^N (y_n - \beta_0 - \beta_1 * x_n)^2\]
[6]:
data = {
"X": X,
"y": df["body_mass_g"]
}
def loss(theta, data):
return np.sum( (data["y"] - theta[0] - data["X"] @ theta[1:]) ** 2)
[13]:
minimize(loss, np.random.normal(size = 2),
args = (data),
method = "L-BFGS-B")
[13]:
fun: 141638625.62092987
hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
jac: array([0., 0.])
message: 'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
nfev: 63
nit: 15
njev: 21
status: 0
success: True
x: array([362.24698551, 87.41661658])
[8]:
fit = LinearRegression().fit(X, df["body_mass_g"])
fit.intercept_
[8]:
362.30672165392207
[9]:
fit.coef_
[9]:
array([87.41527705])