python 特征选择 绘图 + mine
demo代码:
# _*_coding:UTF-8_*_
import numpy as np
import sys
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
import sys
from sklearn import preprocessing
from sklearn.ensemble import ExtraTreesClassifier
import os
from minepy import MINE def iterbrowse(path):
for home, dirs, files in os.walk(path):
for filename in files:
yield os.path.join(home, filename) def get_data(filename):
white_verify = []
with open(filename) as f:
lines = f.readlines()
data = {}
for line in lines:
a = line.split("\t")
if len(a) != 78:
print(line)
raise Exception("fuck")
white_verify.append([float(n) for n in a[3:]])
return white_verify if __name__ == '__main__':
# pdb.set_trace()
neg_file = "cc_data/black_all.txt"
pos_file = "cc_data/white_all.txt"
X = []
y = []
if os.path.isfile(pos_file):
if pos_file.endswith('.txt'):
pos_set = np.genfromtxt(pos_file)
elif pos_file.endswith('.npy'):
pos_set = np.load(pos_file)
X.extend(pos_set)
y += [0] * len(pos_set)
if os.path.isfile(neg_file):
if neg_file.endswith('.txt'):
neg_set = np.genfromtxt(neg_file)
elif neg_file.endswith('.npy'):
neg_set = np.load(neg_file) '''
X.extend(list(neg_set) * 5)
y += [1] * (5 * len(neg_set))
'''
X.extend(neg_set)
y += [1] * len(neg_set) print("len of X:", len(X))
print("X sample:", X[:3])
print("len of y:", len(y))
print("y sample:", y[:3])
X = [x[3:] for x in X]
print("filtered X sample:", X[:3]) cols = [str(i + 6) for i in range(len(X[0]))]
clf = ExtraTreesClassifier()
clf.fit(X, y)
print (clf.feature_importances_)
print "Features sorted by their score:"
print sorted(zip(clf.feature_importances_, cols), reverse=True) black_verify = []
for f in iterbrowse("todo/top"):
print(f)
black_verify += get_data(f)
# ValueError: operands could not be broadcast together with shapes (1,74) (75,) (1,74)
print(black_verify)
black_verify_labels = [3] * len(black_verify) white_verify = get_data("todo/white_verify.txt")
print(white_verify)
white_verify_labels = [2] * len(white_verify) unknown_verify = get_data("todo/pek_feature74.txt")
print(unknown_verify) # extend data
X = np.concatenate((X, black_verify))
y += black_verify_labels
X = np.concatenate((X, white_verify))
y += white_verify_labels #################################### plot ####################################
data_train = pd.DataFrame(X)
# cols = [str(i) for i in range(6, 81)]
data_train.columns = cols # add label column
# data_train = data_train.assign(label=pd.Series(y))
data_train["label"] = pd.Series(y) print(data_train.info())
print(data_train.columns) import matplotlib.pyplot as plt for col in cols:
fig = plt.figure(figsize=(20, 16), dpi=8)
fig.set(alpha=0.2)
plt.figure()
data_train[data_train.label == 0.0][col].plot()
data_train[data_train.label == 1.0][col].plot()
data_train[data_train.label == 2.0][col].plot()
data_train[data_train.label == 3.0][col].plot()
plt.xlabel(u"sample data id")
plt.ylabel(u"value")
plt.title(col)
plt.legend((u'white', u'black', u"white-todo", u"black-todo"), loc='best')
plt.show() print "calculate MINE mic value:"
for col in cols:
print col,
mine = MINE(alpha=0.6, c=15,
est="mic_approx") # http://minepy.readthedocs.io/en/latest/python.html#second-example
mine.compute_score(data_train[col], y)
print "MIC=", mine.mic() sys.exit(-1)
extend data 表示待预测的数据
关于mic:
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
from minepy import MINE rs = np.random.RandomState(seed=0) def mysubplot(x, y, numRows, numCols, plotNum,
xlim=(-4, 4), ylim=(-4, 4)): r = np.around(np.corrcoef(x, y)[0, 1], 1)
mine = MINE(alpha=0.6, c=15, est="mic_approx")
mine.compute_score(x, y)
mic = np.around(mine.mic(), 1)
ax = plt.subplot(numRows, numCols, plotNum,
xlim=xlim, ylim=ylim)
ax.set_title('Pearson r=%.1f\nMIC=%.1f' % (r, mic),fontsize=10)
ax.set_frame_on(False)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
ax.plot(x, y, ',')
ax.set_xticks([])
ax.set_yticks([])
return ax def rotation(xy, t):
return np.dot(xy, [[np.cos(t), -np.sin(t)], [np.sin(t), np.cos(t)]]) def mvnormal(n=1000):
cors = [1.0, 0.8, 0.4, 0.0, -0.4, -0.8, -1.0]
for i, cor in enumerate(cors):
cov = [[1, cor],[cor, 1]]
xy = rs.multivariate_normal([0, 0], cov, n)
mysubplot(xy[:, 0], xy[:, 1], 3, 7, i+1) def rotnormal(n=1000):
ts = [0, np.pi/12, np.pi/6, np.pi/4, np.pi/2-np.pi/6,
np.pi/2-np.pi/12, np.pi/2]
cov = [[1, 1],[1, 1]]
xy = rs.multivariate_normal([0, 0], cov, n)
for i, t in enumerate(ts):
xy_r = rotation(xy, t)
mysubplot(xy_r[:, 0], xy_r[:, 1], 3, 7, i+8) def others(n=1000):
x = rs.uniform(-1, 1, n)
y = 4*(x**2-0.5)**2 + rs.uniform(-1, 1, n)/3
mysubplot(x, y, 3, 7, 15, (-1, 1), (-1/3, 1+1/3)) y = rs.uniform(-1, 1, n)
xy = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1)), axis=1)
xy = rotation(xy, -np.pi/8)
lim = np.sqrt(2+np.sqrt(2)) / np.sqrt(2)
mysubplot(xy[:, 0], xy[:, 1], 3, 7, 16, (-lim, lim), (-lim, lim)) xy = rotation(xy, -np.pi/8)
lim = np.sqrt(2)
mysubplot(xy[:, 0], xy[:, 1], 3, 7, 17, (-lim, lim), (-lim, lim)) y = 2*x**2 + rs.uniform(-1, 1, n)
mysubplot(x, y, 3, 7, 18, (-1, 1), (-1, 3)) y = (x**2 + rs.uniform(0, 0.5, n)) * \
np.array([-1, 1])[rs.random_integers(0, 1, size=n)]
mysubplot(x, y, 3, 7, 19, (-1.5, 1.5), (-1.5, 1.5)) y = np.cos(x * np.pi) + rs.uniform(0, 1/8, n)
x = np.sin(x * np.pi) + rs.uniform(0, 1/8, n)
mysubplot(x, y, 3, 7, 20, (-1.5, 1.5), (-1.5, 1.5)) xy1 = np.random.multivariate_normal([3, 3], [[1, 0], [0, 1]], int(n/4))
xy2 = np.random.multivariate_normal([-3, 3], [[1, 0], [0, 1]], int(n/4))
xy3 = np.random.multivariate_normal([-3, -3], [[1, 0], [0, 1]], int(n/4))
xy4 = np.random.multivariate_normal([3, -3], [[1, 0], [0, 1]], int(n/4))
xy = np.concatenate((xy1, xy2, xy3, xy4), axis=0)
mysubplot(xy[:, 0], xy[:, 1], 3, 7, 21, (-7, 7), (-7, 7)) plt.figure(facecolor='white')
mvnormal(n=800)
rotnormal(n=200)
others(n=800)
plt.tight_layout()
plt.show()

python 特征选择 绘图 + mine的更多相关文章
- python常用绘图软件包记录
在没有使用python之前,觉得matlab的绘图功能还算可以~但现在发现python的绘图包真的好强大,绘制出的图像非常专业漂亮,但具体使用还有待学习,这里记录学习过程中遇到的python绘图包,以 ...
- Python之绘图和可视化
Python之绘图和可视化 1. 启用matplotlib 最常用的Pylab模式的IPython(IPython --pylab) 2. matplotlib的图像都位于Figure对象中. 可以使 ...
- 10分钟轻松学会python turtle绘图
 1. 画布(canvas) 1.1 相关函数: 2. 画笔 2.1 画笔的状态 2.2 画笔的属性 2.3 绘图命令 3. 命令详解 4. 绘图举例 4.1 太阳花 4.2 绘制小蟒蛇 4.3 绘 ...
- python matplotlib 绘图基础
在利用Python做数据分析时,探索数据以及结果展现上图表的应用是不可或缺的. 在Python中通常情况下都是用matplotlib模块进行图表制作. 先理下,matplotlib的结构原理: mat ...
- 10分钟轻松学会 Python turtle 绘图
python2.6版本中后引入的一个简单的绘图工具,叫做海龟绘图(Turtle Graphics),turtle库是python的内部库,使用导入即可 import turtle 先说明一下turtl ...
- Python函数绘图
最近看数学,发现有时候画个图还真管用,对理解和展示效果都不错.尤其是三维空间和一些复杂函数,相当直观,也有助于解题.本来想用mathlab,下载安装都太费事,杀鸡不用牛刀,Python基本就能实现.下 ...
- 【Matplotlib】利用Python进行绘图
[Matplotlib] 教程:https://morvanzhou.github.io/tutorials/data-manipulation/plt/ 官方文档:https://matplotli ...
- python海龟绘图
最近学了python,看了几本书之后,才明白python的强大,python是一种解释型的语言,即每写一行程序就执行一行. 而且在科学计算方面,处理的能力特别的方便. 比如python中的字典dict ...
- 【震惊】手把手教你用python做绘图工具(一)
在这篇博客里将为你介绍如何通过numpy和cv2进行结和去创建画布,包括空白画布.白色画布和彩色画布.创建画布是制作绘图工具的前提,有了画布我们就可以在画布上尽情的挥洒自己的艺术细胞. 还在为如何去绘 ...
随机推荐
- 题目3 : Fibonacci
时间限制:10000ms 单点时限:1000ms 内存限制:256MB 描述 Given a sequence {an}, how many non-empty sub-sequence of it ...
- 线性判别函数-Fisher 线性判别
这是我在上模式识别课程时的内容,也有参考这里. 线性判别函数的基本概念 判别函数为线性的情况的一般表达式 式中x是d 维特征向量,又称样本向量, 称为权向量, 分别表示为 是个常数,称为阈值权. 设样 ...
- php 判断数组中是否有重复的值
$input = array(4, "4", "3", 4, 3, "3"); $result = array_unique($input) ...
- 简单的积雪shader
// Upgrade NOTE: replaced '_World2Object' with 'unity_WorldToObject' Shader "Custom/CoverSnow&q ...
- PHP中foreach用法详细讲解
1.foreach是什么? foreach是PHP的一种语法结构,其实就是一个工具,(工具:就是工作的时候用到的器具),那么在程序开发过程中,为了达到程序效果,就用到了foreach. 2.如何用? ...
- JS中setInterval、setTimeout不能传递带参数的函数的解决办法
在JS中无论是setTimeout还是setInterval,在使用函数名作为调用句柄时都不能带参数,而在许多场合必须要带参数,这就需要想方法解决. 一.采用字符串形式:——(缺陷)参数不能被周期性改 ...
- struts2一个实例中遇到的问题
今天实现了一个登录功能的Struts2小程序. 期间遇到了许多问题,记忆犹新的是 (1)新版本的tomcat9和eclipse Neon Release (4.6.0) 发生了冲突,启动服务器的时候老 ...
- 【BZOJ4930】棋盘 拆边费用流
[BZOJ4930]棋盘 Description 给定一个n×n的棋盘,棋盘上每个位置要么为空要么为障碍.定义棋盘上两个位置(x,y),(u,v)能互相攻击当前仅 当满足以下两个条件: 1:x=u或y ...
- python脚本分析nginx访问日志
日志格式如下: 223.74.135.248 [11/May/2017:11:19:47 +0800] "POST /login/getValidateCode HTTP/1.1" ...
- docker 存储定义成direct-lvm 模式
配置direct-lvm模式 1. 停止Docker systemctl stop docker 2. 安装依赖包 device-mapper-persistent-data,lvm2, and ...