import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor
iris = load_iris().data
feature = iris[:,2].reshape(-1,1)
labels = iris[:,-1]
X_train,X_test,y_train,y_test = train_test_split(feature,labels,test_size=.3)
sgd = SGDRegressor(fit_intercept=False,eta0=0.01).fit(X_train,y_train)
sgd.score(X_train,y_train)
sgd.coef_,sgd.intercept_
max_,min_ = feature.max()+1,feature.min()-1
x = np.linspace(min_,max_,100)
y = sgd.coef_ * x
plt.plot(feature,labels,lw=0,marker='o')
plt.plot(x,y)
python实现梯度下降线性回归
- 1.误差问题:fit_intercept
- 2.迭代率:eat0
- 3.最大迭代次数,迭代次数越多(过拟合),越少(欠拟合)
- 4.增量式机器学习
class SGDLinearRegression(object):
def __init__(self,fit_intercept=True,eta0=0.01,max_iter=100):
self.fit_intercept = fit_intercept
self.eta0 = eta0
self.max_iter = max_iter
def fit(self,X,y):
X = np.array(X)
y = np.array(y)
sample_sum,feature_nums = X.shape
self.coef_ = np.zeros(shape=(feature_nums,1))
self.intercept_ = 0
self.save_loss_ = np.zeros(shape=self.max_iter)
for i in np.arange(self.max_iter):
h = np.add(np.dot(X,self.coef_),self.intercept_)
grad_w,grad_b,loss_ = self.loss(X,y,h,sample_sum)
self.coef_ -= self.eta0 * grad_w
if self.fit_intercept:
self.intercept_ -= self.eta0 * grad_b
else:
self.intercept_ = 0
self.coef_ = self.coef_.ravel()
return self
def loss(self,X,y,h,sample_sum):
if y.ndim < 2:
y = y.reshape(-1,1)
loss = np.sqrt(np.divide(np.sum(np.square(np.subtract(y,h))),sample_sum))
grad_w = X.T.dot(h-y) / sample_sum
grad_b = .5 * np.sum(h-y) / sample_sum
return grad_w,grad_b,loss
def predict(self,X):
X = np.array(X)
return X.dot(self.coef_) + self.intercept_
sgd_ = SGDLinearRegression(fit_intercept=False,eta0=0.01,max_iter=100).fit(X_train,y_train)
sgd_.coef_,sgd_.intercept_
max_,min_ = feature.max()+1,feature.min()-1
x = np.linspace(min_,max_,100)
y = sgd_.coef_* x
plt.plot(feature,labels,lw=0,marker='o')
plt.plot(x,y)