吴裕雄 python 机器学习——K均值聚类KMeans模型

import numpy as np

import matplotlib.pyplot as plt

from sklearn import  cluster

from sklearn.metrics import adjusted_rand_score

from sklearn.datasets.samples_generator import make_blobs

def create_data(centers,num=100,std=0.7):

    X, labels_true = make_blobs(n_samples=num, centers=centers, cluster_std=std)

    return  X,labels_true

# 用于产生聚类的中心点

centers=[[1,1],[2,2],[1,2],[10,20]]

# 产生用于聚类的数据集

X,labels_true=create_data(centers,1000,0.5)

#K-MEANS聚类模型

def test_Kmeans(*data):

    X,labels_true=data

    clst=cluster.KMeans()

    clst.fit(X)

    predicted_labels=clst.predict(X)

    print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels))

    print("Sum center distance %s"%clst.inertia_)

# 用于产生聚类的中心点

centers=[[1,1],[2,2],[1,2],[10,20]]

# 产生用于聚类的数据集

X,labels_true=create_data(centers,1000,0.5)

#  调用 test_Kmeans 函数

test_Kmeans(X,labels_true)

def test_Kmeans_nclusters(*data):

    '''

    测试 KMeans 的聚类结果随 n_clusters 参数的影响

    '''

    X,labels_true=data

    nums=range(1,50)

    ARIs=[]

    Distances=[]

    for num in nums:

        clst=cluster.KMeans(n_clusters=num)

        clst.fit(X)

        predicted_labels=clst.predict(X)

        ARIs.append(adjusted_rand_score(labels_true,predicted_labels))

        Distances.append(clst.inertia_)

    ## 绘图

    fig=plt.figure()

    ax=fig.add_subplot(1,2,1)

    ax.plot(nums,ARIs,marker="+")

    ax.set_xlabel("n_clusters")

    ax.set_ylabel("ARI")

    ax=fig.add_subplot(1,2,2)

    ax.plot(nums,Distances,marker='o')

    ax.set_xlabel("n_clusters")

    ax.set_ylabel("inertia_")

    fig.suptitle("KMeans")

    plt.show()

test_Kmeans_nclusters(X,labels_true) #  调用 test_Kmeans_nclusters 函数

def test_Kmeans_n_init(*data):

    '''

    测试 KMeans 的聚类结果随 n_init 和 init  参数的影响

    '''

    X,labels_true=data

    nums=range(1,50)

    ## 绘图

    fig=plt.figure()

    ARIs_k=[]

    Distances_k=[]

    ARIs_r=[]

    Distances_r=[]

    for num in nums:

            clst=cluster.KMeans(n_init=num,init='k-means++')

            clst.fit(X)

            predicted_labels=clst.predict(X)

            ARIs_k.append(adjusted_rand_score(labels_true,predicted_labels))

            Distances_k.append(clst.inertia_)

            clst=cluster.KMeans(n_init=num,init='random')

            clst.fit(X)

            predicted_labels=clst.predict(X)

            ARIs_r.append(adjusted_rand_score(labels_true,predicted_labels))

            Distances_r.append(clst.inertia_)

    ax=fig.add_subplot(1,2,1)

    ax.plot(nums,ARIs_k,marker="+",label="k-means++")

    ax.plot(nums,ARIs_r,marker="+",label="random")

    ax.set_xlabel("n_init")

    ax.set_ylabel("ARI")

    ax.set_ylim(0,1)

    ax.legend(loc='best')

    ax=fig.add_subplot(1,2,2)

    ax.plot(nums,Distances_k,marker='o',label="k-means++")

    ax.plot(nums,Distances_r,marker='o',label="random")

    ax.set_xlabel("n_init")

    ax.set_ylabel("inertia_")

    ax.legend(loc='best')

    fig.suptitle("KMeans")

    plt.show()

test_Kmeans_n_init(X,labels_true) #  调用 test_Kmeans_n_init 函数

吴裕雄 python 机器学习——K均值聚类KMeans模型的更多相关文章

吴裕雄 python 机器学习——混合高斯聚类GMM模型
import numpy as np import matplotlib.pyplot as plt from sklearn import mixture from sklearn.metrics ...
吴裕雄 python 机器学习——超大规模数据集降维IncrementalPCA模型
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datas ...
吴裕雄 python 机器学习——数据预处理正则化Normalizer模型
from sklearn.preprocessing import Normalizer #数据预处理正则化Normalizer模型 def test_Normalizer(): X=[[1,2,3, ...
吴裕雄 python 机器学习——数据预处理标准化MaxAbsScaler模型
from sklearn.preprocessing import MaxAbsScaler #数据预处理标准化MaxAbsScaler模型 def test_MaxAbsScaler(): X=[[ ...
吴裕雄 python 机器学习——数据预处理标准化StandardScaler模型
from sklearn.preprocessing import StandardScaler #数据预处理标准化StandardScaler模型 def test_StandardScaler() ...
吴裕雄 python 机器学习——数据预处理标准化MinMaxScaler模型
from sklearn.preprocessing import MinMaxScaler #数据预处理标准化MinMaxScaler模型 def test_MinMaxScaler(): X=[[ ...
吴裕雄 python 机器学习——支持向量机线性分类LinearSVC模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
吴裕雄 python 机器学习——数据预处理字典学习模型
from sklearn.decomposition import DictionaryLearning #数据预处理字典学习DictionaryLearning模型 def test_Diction ...
吴裕雄 python 机器学习——数据预处理流水线Pipeline模型
from sklearn.svm import LinearSVC from sklearn.pipeline import Pipeline from sklearn import neighbor ...

随机推荐

avalonjs 笔记
1>复选卡框和单选框复选卡框监控已选框的数组,即通过属性监控来判断是否全选 <div ms-controller="test"> <ul> < ...
Linux下Maven的安装与使用
pache Maven,是一个软件(特别是Java软件)项目管理及自动构建工具,由Apache软件基金会所提供.基于项目对象模型(POM)概念,Maven利用一个中央信息片断能管理一个项目的构建.报 ...
Setuptool+pip安装
https://pypi.python.org/pypi/setuptools 1. 下载ez_setup.py文件,cmd进入安装目录: 2. python setup.py install htt ...
java中super的用法
在Java中,super关键字有2个用法,一个是访问父类的函数,一个是访问父类的变量,总体来说,就是一个功能,访问父类的成员. 代码如下: class Person { String name ; i ...
npm使用【转】
NPM是一个Node包管理和分发工具,已经成为了非官方的发布Node模块(包)的标准.有了NPM,可以很快的找到特定服务要使用的包,进行下载.安装以及管理已经安装的包.在安装nodeJS 安装包的时候 ...
gulp-usemin 插件使用
关于什么是gulp,它和grunt有什么区别等问题,这里不做任何介绍.本文主要介绍如何使用gulp-usemin这款插件,同时也会简单介绍本文中用到的一些插件. 什么是gulp-usemin 用来将H ...
Android开发环境包下载地址
Android SDK Android NDK Android Studio 官方下载地址 (网上转来的) 如果下载速度很慢或者无法下载,有三种解决方法 1.忍耐. 2.使用P2SP下载工具,比如 ...
Android-原生对话框
package liudeli.ui.all; import android.app.Activity; import android.app.AlertDialog; import android. ...
day 21 01 序列化模块和模块的导入的复习以及包的初识
day 21 01 序列化和模块的导入的复习以及包的初识 1.序列化模块什么是序列化模块:数据类型转化成字符串的过程就是序列卷为什么要使用序列化模块:为了方便存储和网络传输三种序列化模块: (1 ...
使用web API和NPOI导出Excel
使用MVC controller输出excel的例子,自不待言,例子满天飞. 由于本项目使用的是Asp.net MVC API,因此在本项目使用API,实现了文件下载功能.代码的原理很简单,基本上是老 ...

吴裕雄 python 机器学习——K均值聚类KMeans模型

吴裕雄 python 机器学习——K均值聚类KMeans模型的更多相关文章

随机推荐

热门专题