Hard Voting硬投票法
import numpy as np
from sklearn import datasets
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)
voting_clf = VotingClassifier(estimators=[
('log_clf',LogisticRegression()),
('svm_clf',SVC()),
('dt_clf',DecisionTreeClassifier(random_state=666))
],voting = 'hard')
voting_clf.fit(X_train,y_train)
voting_clf.score(X_test,y_test)
输出准确率:0.896
Soft Voting软投票法
import numpy as np
from sklearn import datasets
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)
voting_clf2 = VotingClassifier(estimators=[
('log_clf',LogisticRegression()),
('svm_clf',SVC(probability=True)),
('dt_clf',DecisionTreeClassifier(random_state=666))
],voting = 'soft')
voting_clf2.fit(X_train,y_train)
voting_clf2.score(X_test,y_test)
输出准确率:0.912
Bagging放回取样
n_estimators=500基学习器的数量为500个
max_samples=100训练每个基学习器的样本数量为100个
import numpy as np
from sklearn import datasets
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)
from sklearn.ensemble import BaggingClassifier
bagging_clf = BaggingClassifier(DecisionTreeClassifier(random_state=666),n_estimators=500,max_samples=100,bootstrap=True)
bagging_clf.fit(X_train,y_train)
bagging_clf.score(X_test,y_test)
输出准确率:0.916
Random Subspaces针对特征进行随机采样
max_features : The number of features to draw from X to train each base estimator.
import numpy as np
from sklearn import datasets
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
ramdom_subspaces_clf = BaggingClassifier(DecisionTreeClassifier(random_state=666),n_estimators=500,max_samples=1.0,bootstrap=True,oob_score=True,max_features=1,bootstrap_features=True)
ramdom_subspaces_clf.fit(X,y)
ramdom_subspaces_clf.oob_score_
输出准确率:0.824
Random Patches针对样本和特征进行随机采样
from sklearn.ensemble import BaggingClassifier
import numpy as np
from sklearn import datasets
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
ramdom_patches_clf = BaggingClassifier(DecisionTreeClassifier(random_state=666),n_estimators=500,max_samples=100,bootstrap=True,oob_score=True,max_features=1,bootstrap_features=True)
ramdom_patches_clf.fit(X,y)
ramdom_patches_clf.oob_score_
输出准确率:0.86
Random Forest随机森林
import numpy as np
from sklearn import datasets
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
rf_clf = RandomForestClassifier(n_estimators=500,random_state=666,oob_score=True)
rf_clf.fit(X,y)
rf_clf.oob_score_
输出准确率:0.896
Extra-Trees极其随机森林
import numpy as np
from sklearn import datasets
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
from sklearn.ensemble import ExtraTreesClassifier
et_clf = ExtraTreesClassifier(n_estimators=500,bootstrap=True,oob_score=True,random_state=666)
et_clf.fit(X,y)
et_clf.oob_score_
输出准确率:0.892
AdaBoosting
import numpy as np
from sklearn import datasets
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2),n_estimators=500,random_state=666)
ada_clf.fit(X_train,y_train)
ada_clf.score(X_test,y_test)
输出准确率:0.872
Gradient Boosting Decision Tree(GBDT)梯度提升树
import numpy as np
from sklearn import datasets
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)
gb_clf = GradientBoostingClassifier(max_depth=2, n_estimators=30)
gb_clf.fit(X_train,y_train)
gb_clf.score(X_test,y_test)
输出准确率:0.912