# -*- coding: utf-8 -*-
"""
Created on Mon Jul  2 16:02:56 2018

@author: GY
"""

#监督学习
#----------------------------------------------------------------------------------------------------------------------#
import mglearn
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
import numpy as np


#---------------------------------------------------------------------------------------------------------------
#二分类数据集
#数据,标签
X,y=mglearn.datasets.make_forge()
mglearn.discrete_scatter(X[:,0],X[:,1],y)#第一个特征-x,第二个特征-y
plt.legend(["Class 0","Class 1"],loc=4)
plt.xlabel('Frist feature')
plt.ylabel('Second feature')
print(X.shape)



#-------------------------------------------------------------------------------------------------------------
#回归数据集
X,y=mglearn.datasets.make_wave(n_samples=40)
plt.plot(X,y,'o')
plt.ylim(-3,3)
plt.xlabel('Feature')
plt.ylabel('Target')



#---------------------------------------------------------------------------------------------------------------------
#真实数据集
cancer=load_breast_cancer()
        #cancer.keys()
            #dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])
        #cancer.data.shape
            #(569, 30)
            
#建立特征的分类,dict
{n:v for n,v in zip(cancer.target_names,np.bincount(cancer.target)) }