# -*- coding: utf-8 -*-
"""
RandomForestClassifier
skleran Of 9 A model in 3 Use of data .
1. Knowledge point : sklearn Self generated classification sample set 、 Standardization 、 Draw a contour map 、 Split training and test sets
2. result : about 2 Linear and nonlinear of dimensions 3 A classification problem , It's all proven Random forest can achieve better results .
""" import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
# make_classification Randomly generate continuous independent variables and classified dependent variables
# make_moons Generate two-dimensional arguments And categorical variables , Generate a semicircular graph 、 Moon type
# make_circles Generate two-dimensional arguments And categorical variables , Generate a semicircular graph 、 Moon type from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA # Linear discriminant analysis (LDA)
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA # Second discriminant analysis (QDA) h = .02 # step size in the mesh names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
"Random Forest", "AdaBoost", "Naive Bayes", "LDA", "QDA"]
classifiers = [
KNeighborsClassifier(3),
SVC(kernel="linear", C=0.025),
SVC(gamma=2, C=1),
DecisionTreeClassifier(max_depth=5),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
AdaBoostClassifier(),
GaussianNB(),
LDA(),
QDA()] # The sample set contains 2 There are two independent variables , n_classes=2 Indicates that the dependent variable category contains 2 class
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1,n_classes=2) # Generating sample sets
plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)
rng = np.random.RandomState(2) # Every time an instance is generated , The first n The second example is i Sub random , Always with the second m The second example is i The second random is the same .
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y) datasets = [make_moons(noise=0.3, random_state=0),
make_circles(noise=0.2, factor=0.5, random_state=1),
linearly_separable
] figure = plt.figure(figsize=(27, 9))
i = 1
# iterate over datasets
for ds in datasets:
# preprocess dataset, split into training and test part
X, y = ds
X = StandardScaler().fit_transform(X) # Standardized transformation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) # Divide training and test sets x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h)) # Output xx,yy, It's the coordinate matrix # just plot the dataset first
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot(len(datasets), len(classifiers) + 1, i) # canvas
# Plot the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(()) # Cleared the axis numbers
ax.set_yticks(()) # Cleared the axis numbers
i += 1 # iterate over classifiers
for name, clf in zip(names, classifiers):
ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test) # Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
if hasattr(clf, "decision_function"):
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) #np.c_ Join two matrices by row [[1,2],[1,2],[1,2]] , Yes mesh Input... For each sample point in the matrix after f(x,y) Output a prediction .
else:
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] # Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) # draw The climbing line diagram of the prediction function in the coordinate system # Plot also the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # Scatter the training data
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
alpha=0.6) # Scatter plot of test data ax.set_xlim(xx.min(), xx.max()) # Set the axis range
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(()) # Clear the axis scale
ax.set_yticks(())
ax.set_title(name) # Set the title right above
ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip(''),
size=15, horizontalalignment='right') # Specify the position in the coordinate system , To add text
i += 1 figure.subplots_adjust(left=.02, right=.98) # adjustment The space between the images
plt.show()

Original source URL

Random forest learning -2-sklearn More articles about

  1. Random forest learning -sklearn

    Random forest Python Realization (RandomForestClassifier) # -*- coding: utf- -*- """ RandomForestClassif ...

  2. [Machine Learning & Algorithm] Random forests (Random Forest)

    1 What is random forest ? As an emerging . A highly flexible machine learning algorithm , Random forests (Random Forest, abbreviation RF) It has a wide application prospect , From marketing to health insurance , It can be used to model marketing simulation , Count the customers ...

  3. Random forests (Random Forest)

    Read the directory 1 What is random forest ? 2 The characteristics of random forest 3 Basic knowledge of random forest 4 Random forest production 5 Out of bag error rate (oob error) 6 A simple example of how random forests work 7 Random forest Pyth ...

  4. Random forests (Random Forest), Decision tree ,bagging, boosting(Adaptive Boosting,GBDT)

    http://www.cnblogs.com/maybe2030/p/4585705.html Read the directory 1 What is random forest ? 2 The characteristics of random forest 3 Basic knowledge of random forest 4 Random forest production 5 ...

  5. [Machine Learning & Algorithm] Random forests (Random Forest)- Reprint

    author :Poll The notes   Blog source :http://www.cnblogs.com/maybe2030/  Read the directory 1 What is random forest ? 2 The characteristics of random forest 3 Basic knowledge of random forest 4 Random forest production ...

  6. Random forests (Random Forest, abbreviation RF)

    Read the directory 1 What is random forest ? 2 The characteristics of random forest 3 Basic knowledge of random forest 4 Random forest production 5 Out of bag error rate (oob error) 6 A simple example of how random forests work 7 Random forest Pyth ...

  7. Random forests random forest And python Realization

    The introduction wants to obtain the main characteristics of data through random forest 1. The theory is based on how individual learners are generated , The current ensemble learning methods can be roughly divided into two categories , In other words, there is a strong dependence between individual learners , Serialization methods that must be generated serially , And there is no strong dependence between individual learners ...

  8. Random forests (Random Forest) Detailed explanation ( turn )

    source : Poll The notes cnblogs.com/maybe2030/p/4585705.html 1 What is random forest ?   As an emerging . A highly flexible machine learning algorithm , Random forests (Random Fores ...

  9. Random forest classifier (Random Forest)

    Read the directory 1 What is random forest ? 2 The characteristics of random forest 3 Basic knowledge of random forest 4 Random forest production 5 Out of bag error rate (oob error) 6 A simple example of how random forests work 7 Random forest Pyth ...

Random recommendation

  1. BZOJ Tree chain partition topic summary

    1036,2157,2243,4034,4196;2325,2908,3083,3159,3531,3626,3999; You don't have to cut the tree :1146;2819,2843,4448,4530.

  2. NPOI Reading and writing Excel

    Example function Overview : 1. Support Excel2003 as well as 2007 2. Support Excel Read DataTable(TableToExcel) 3. Support DataTable Export to Excel(TableToExcel) ...

  3. How to remove SDL TRADOS Self developed plug-ins in

    I learned to use it last year SDL We compiled an example called SimpleText Plug in for , Every time open TRADOS 2014 Prompt to load the plug-in three times , It's very annoying . But when I want to unload it , But I can't do it , I don't know what to do . This question has been bothering me for a long time , This morning, the heart and nature compare ...

  4. Team development 《 Speed snail 》NABC analysis

    One . brief introduction Project name : Speed snail characteristic : It's easy to operate , Vision and hearing cooperate , Let users have the most perfect experience . Two .NABC analysis N(need): Today, people can't live without mobile phones all the time , It's hard to avoid boredom , At the moment, it is easy to operate and can make people use their brains ...

  5. DD_belatedPNG, solve IE6 I won't support it PNG-24 The perfect solution

    png24 stay ie We support transparency in the future . Finally, we found the following feasible way : We know IE6 Transparency is not supported PNG-24 Of , This undoubtedly limits the space of web design . But the whole Internet is dealing with this IE6 Transparency of PNG-24 Side of ...

  6. 【 Reprint 】Android Push scheme analysis (MQTT/XMPP/GCM)

    http://m.oschina.net/blog/82059 The main idea of this article is , For the present Android This paper analyzes and compares several mainstream message push schemes on the platform , It objectively reflects the advantages and disadvantages of these push schemes , Help you choose the most suitable one ...

  7. Linux Kernel Integer overflow vulnerability

    Vulnerability name : Linux Kernel Integer overflow vulnerability CNNVD Number : CNNVD-201311-062 Release time : 2013-11-07 Update time : 2013-11-07 Hazard rating :    Hole type : ...

  8. Use HTML5 Design aids

    Use HTML5 Design aids Rajesh Lal Download the code sample If you are really interested in the audience , You will need to design accessibility for your website .  Accessibility makes web pages easier to access . Easier to use , It's available for everyone to browse .  Usually , Use the latest ...

  9. mysqldump: Couldn't execute 'show events': Cannot proceed because system tables used by Event Schedu

    Recently, the old version of mysql Examples pour in percona 5.5.30, It's all on line , The result will be mysql The table under the library is also poured in , This is a tragedy , Backup error . Can't , take mysql The data in the library is poured out , Empty , Pour in p ...

  10. Dynamic add test options button radioButton( One )

    Recently doing WebView The function of loading test questions , But if you put the option button WebView in , It's slow to click . So we use the original RadioButton, And the questions and answers are in WebView in . But the number of options is uncertain , So we need ...