随机森林n_estimators 学习曲线
随机森林
单颗树与随机森林的的分对比
# 导入包
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
# 实例化红酒数据集
wine = load_wine()
# 划分测试集和训练集
x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)
# 实例化决策树和随机森林,random_state=0
clf = DecisionTreeClassifier(random_state=0)
rfc = RandomForestClassifier(random_state=0)
# 训练模型
clf.fit(x_train, y_train)
rfc.fit(x_train, y_train)
RandomForestClassifier(random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=0)
# 返回测试集的分
clf_score = clf.score(x_test, y_test)
rfc_score = rfc.score(x_test, y_test)
print("sinle tree: {0}\nrandom tree: {1}".format(clf_score, rfc_score))
sinle tree: 0.9074074074074074
random tree: 0.9629629629629629
单颗树与随机森林在交叉验证下的对比图
# 导入交叉验证和画图工具
%matplotlib inline
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
# 实例化决策树和随机森林
clf = DecisionTreeClassifier()
rfc = RandomForestClassifier(n_estimators=25) #创建25棵树组成的随机森林
# 实例化交叉验证 10次
clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
# 查看决策树和随机森林的最好结果
print("single tree mean socre: {}\nrandom tree mean socre {}".format(clf_corss.mean(), rfc_corss.mean()))
single tree mean socre: 0.8705882352941178
random tree mean socre 0.9722222222222221
# 画出决策树和随机森林对比图
plt.plot(range(1, 11), clf_corss, label="single tree")
plt.plot(range(1, 11), rfc_corss, label="random tree")
plt.xticks(range(1, 11))
plt.legend()
<matplotlib.legend.Legend at 0x7ff6f4815d50>
?
?
clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
clf_corss
array([0.88888889, 0.88888889, 0.72222222, 0.88888889, 0.83333333,
0.83333333, 1. , 0.94444444, 0.94117647, 0.76470588])
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
rfc_corss
array([1. , 1. , 0.94444444, 0.94444444, 0.88888889,
1. , 1. , 1. , 1. , 1. ])
十次交叉验证下决策树和随机森林的对比
# 创建分数列表
clf_list = []
rfc_list = []
for i in range(10):
clf = DecisionTreeClassifier()
rfc = RandomForestClassifier(n_estimators=25)
clf_corss_mean = cross_val_score(clf, wine.data, wine.target, cv=10).mean()
rfc_corss_mean = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
clf_list.append(clf_corss_mean)
rfc_list.append(rfc_corss_mean)
# 画出决策树和随机森林对比图
plt.plot(range(1, 11), clf_list, label="single tree")
plt.plot(range(1, 11), rfc_list, label="random tree")
plt.xticks(range(1, 11))
plt.legend()
<matplotlib.legend.Legend at 0x7ff6f490f670>
n_estimators 学习曲线
# 1-200颗树的学习曲线
superpa = []
for i in range(200):
rfc = RandomForestClassifier(n_estimators=i+1, n_jobs=-1)
rfc_cross = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
superpa.append(rfc_cross)
print(max(superpa), superpa.index(max(superpa)))
plt.figure(figsize=(20,8))
plt.plot(range(1,201), superpa, label="rfc_cross_mean")
plt.legend()
0.9888888888888889 20
<matplotlib.legend.Legend at 0x7ff6f540f100>
?
?