Example of feature extraction operation implemented in Python

This article describes the feature extraction operation implemented by Python. Share with you for your reference, as follows:

# - *- coding: utf-8-*-"""
Created on Mon Aug 2110:57:292017
@ author:Floating heart
"""
# Filtered feature selection
# Choose based on the variance. The smaller the variance, the poorer the ability to recognize the attribute and can be eliminated
from sklearn.feature_selection import VarianceThreshold
x=[[100,1,2,3],[100,4,5,6],[100,7,8,9],[101,11,12,13]]
selector=VarianceThreshold(1) #Variance threshold value,
selector.fit(x)
selector.variances_ #Show variance of attributes
selector.transform(x)#Feature selection
selector.get_support(True) #After selecting the result, the index before the feature
selector.inverse_transform(selector.transform(x)) #Restore the result of feature selection to the original data
# Data that has been removed, displayed as 0
# Univariate feature selection
from sklearn.feature_selection import SelectKBest,f_classif
x=[[1,2,3,4,5],[5,4,3,2,1],[3,3,3,3,3],[1,1,1,1,1]]
y=[0,1,0,1]
selector=SelectKBest(score_func=f_classif,k=3)#Select 3 features, the indicator uses the analysis of variance F value
selector.fit(x,y)
selector.scores_ #Score for each feature
selector.pvalues_
selector.get_support(True) #If true, return the selected feature index, if False, then
# What is returned is an array of boolean values, the array is only those features selected
selector.transform(x)
# Feature selection when wrapping
from sklearn.feature_selection import RFE
from sklearn.svm import LinearSVC #Select svm as the evaluation algorithm
from sklearn.datasets import load_iris #Load data set
iris=load_iris()
x=iris.data
y=iris.target
estimator=LinearSVC()
selector=RFE(estimator=estimator,n_features_to_select=2) #Choose 2 features
selector.fit(x,y)
selector.n_features_  #Give the number of selected features
selector.support_   #Gives the mask of the selected feature
selector.ranking_   #Feature ranking, the selected feature is ranked 1
# Note: Feature extraction is not necessarily related to the improvement of prediction performance, and then compare;
from sklearn.feature_selection import RFE
from sklearn.svm import LinearSVC
from sklearn import cross_validation
from sklearn.datasets import load_iris
# Download Data
iris=load_iris()
X=iris.data
y=iris.target
# Feature extraction
estimator=LinearSVC()
selector=RFE(estimator=estimator,n_features_to_select=2)
X_t=selector.fit_transform(X,y)
# Split test set and validation set
x_train,x_test,y_train,y_test=cross_validation.train_test_split(X,y,
test_size=0.25,random_state=0,stratify=y)
x_train_t,x_test_t,y_train_t,y_test_t=cross_validation.train_test_split(X_t,y,
test_size=0.25,random_state=0,stratify=y)
clf=LinearSVC()
clf_t=LinearSVC()
clf.fit(x_train,y_train)
clf_t.fit(x_train_t,y_train_t)print('origin dataset test score:',clf.score(x_test,y_test))
# origin dataset test score:0.973684210526print('selected Dataset:test score:',clf_t.score(x_test_t,y_test_t))
# selected Dataset:test score:0.947368421053import numpy as np
from sklearn.feature_selection import RFECV
from sklearn.svm import LinearSVC
from sklearn.datasets import load_iris
iris=load_iris()
x=iris.data
y=iris.target
estimator=LinearSVC()
selector=RFECV(estimator=estimator,cv=3)
selector.fit(x,y)
selector.n_features_
selector.support_
selector.ranking_
selector.grid_scores_
# Embedded feature selection
import numpy as np
from sklearn.feature_selection import SelectFromModel
from sklearn.svm import LinearSVC
from sklearn.datasets import load_digits
digits=load_digits()
x=digits.data
y=digits.target
estimator=LinearSVC(penalty='l1',dual=False)
selector=SelectFromModel(estimator=estimator,threshold='mean')
selector.fit(x,y)
selector.transform(x)
selector.threshold_
selector.get_support(indices=True)
# scikitlearn provides a Pipeline to talk about multiple learners forming a pipeline, usually in the form of a pipeline: standardize data,
#- - 》Feature extraction learner——————&quot;The learner that performs prediction, except after the last learner,
# All the previous learners must provide a transform method, which is used for data transformation (such as normalization, regularization,
# And feature extraction
# Learner pipeline (pipeline)
from sklearn.svm import LinearSVC
from sklearn.datasets import load_digits
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
def test_Pipeline(data):
x_train,x_test,y_train,y_test=data
steps=[('linear_svm',LinearSVC(C=1,penalty='l1',dual=False)),('logisticregression',LogisticRegression(C=1))]
pipeline=Pipeline(steps)
pipeline.fit(x_train,y_train)print('named steps',pipeline.named_steps)print('pipeline score',pipeline.score(x_test,y_test))if __name__=='__main__':
data=load_digits()
x=data.data
y=data.target
test_Pipeline(cross_validation.train_test_split(x,y,test_size=0.25,
random_state=0,stratify=y))

More readers who are interested in Python related content can check the topic of this site: "Python data structure and algorithm tutorial", "Python coding operation skills summary", "Python function usage skills summary", "Python string operation skills summary" and "Python Getting Started and Advanced Classic Tutorial"

I hope this article will help you Python program design.

Articles you may be interested in:

Detailed explanation of image processing and feature extraction in python
Python Summary of methods for data extraction
Use the NLTK library in Python to realize the extraction of stemming tutorial
Python-opencv extracts the contour instance of an image when there is noise
Explain the number extraction method in String in Python3
Python realizes the method of extracting Baidu search results
Python extracts the url list in the page
Python extracts the specified content instance details according to regular expression
Two methods for python to read video stream and extract video frame