吴裕雄 python 机器学习——K均值聚类KMeans模型

import numpy as np

import matplotlib.pyplot as plt

from sklearn import  cluster

from sklearn.metrics import adjusted_rand_score

from sklearn.datasets.samples_generator import make_blobs

def create_data(centers,num=100,std=0.7):

    X, labels_true = make_blobs(n_samples=num, centers=centers, cluster_std=std)

    return  X,labels_true

# 用于产生聚类的中心点

centers=[[1,1],[2,2],[1,2],[10,20]]

# 产生用于聚类的数据集

X,labels_true=create_data(centers,1000,0.5)

#K-MEANS聚类模型

def test_Kmeans(*data):

    X,labels_true=data

    clst=cluster.KMeans()

    clst.fit(X)

    predicted_labels=clst.predict(X)

    print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels))

    print("Sum center distance %s"%clst.inertia_)

# 用于产生聚类的中心点

centers=[[1,1],[2,2],[1,2],[10,20]]

# 产生用于聚类的数据集

X,labels_true=create_data(centers,1000,0.5)

#  调用 test_Kmeans 函数

test_Kmeans(X,labels_true)

def test_Kmeans_nclusters(*data):

    '''

    测试 KMeans 的聚类结果随 n_clusters 参数的影响

    '''

    X,labels_true=data

    nums=range(1,50)

    ARIs=[]

    Distances=[]

    for num in nums:

        clst=cluster.KMeans(n_clusters=num)

        clst.fit(X)

        predicted_labels=clst.predict(X)

        ARIs.append(adjusted_rand_score(labels_true,predicted_labels))

        Distances.append(clst.inertia_)

    ## 绘图

    fig=plt.figure()

    ax=fig.add_subplot(1,2,1)

    ax.plot(nums,ARIs,marker="+")

    ax.set_xlabel("n_clusters")

    ax.set_ylabel("ARI")

    ax=fig.add_subplot(1,2,2)

    ax.plot(nums,Distances,marker='o')

    ax.set_xlabel("n_clusters")

    ax.set_ylabel("inertia_")

    fig.suptitle("KMeans")

    plt.show()

test_Kmeans_nclusters(X,labels_true) #  调用 test_Kmeans_nclusters 函数

def test_Kmeans_n_init(*data):

    '''

    测试 KMeans 的聚类结果随 n_init 和 init  参数的影响

    '''

    X,labels_true=data

    nums=range(1,50)

    ## 绘图

    fig=plt.figure()

    ARIs_k=[]

    Distances_k=[]

    ARIs_r=[]

    Distances_r=[]

    for num in nums:

            clst=cluster.KMeans(n_init=num,init='k-means++')

            clst.fit(X)

            predicted_labels=clst.predict(X)

            ARIs_k.append(adjusted_rand_score(labels_true,predicted_labels))

            Distances_k.append(clst.inertia_)

            clst=cluster.KMeans(n_init=num,init='random')

            clst.fit(X)

            predicted_labels=clst.predict(X)

            ARIs_r.append(adjusted_rand_score(labels_true,predicted_labels))

            Distances_r.append(clst.inertia_)

    ax=fig.add_subplot(1,2,1)

    ax.plot(nums,ARIs_k,marker="+",label="k-means++")

    ax.plot(nums,ARIs_r,marker="+",label="random")

    ax.set_xlabel("n_init")

    ax.set_ylabel("ARI")

    ax.set_ylim(0,1)

    ax.legend(loc='best')

    ax=fig.add_subplot(1,2,2)

    ax.plot(nums,Distances_k,marker='o',label="k-means++")

    ax.plot(nums,Distances_r,marker='o',label="random")

    ax.set_xlabel("n_init")

    ax.set_ylabel("inertia_")

    ax.legend(loc='best')

    fig.suptitle("KMeans")

    plt.show()

test_Kmeans_n_init(X,labels_true) #  调用 test_Kmeans_n_init 函数

吴裕雄 python 机器学习——K均值聚类KMeans模型的更多相关文章

吴裕雄 python 机器学习——混合高斯聚类GMM模型
import numpy as np import matplotlib.pyplot as plt from sklearn import mixture from sklearn.metrics ...
吴裕雄 python 机器学习——超大规模数据集降维IncrementalPCA模型
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datas ...
吴裕雄 python 机器学习——数据预处理正则化Normalizer模型
from sklearn.preprocessing import Normalizer #数据预处理正则化Normalizer模型 def test_Normalizer(): X=[[1,2,3, ...
吴裕雄 python 机器学习——数据预处理标准化MaxAbsScaler模型
from sklearn.preprocessing import MaxAbsScaler #数据预处理标准化MaxAbsScaler模型 def test_MaxAbsScaler(): X=[[ ...
吴裕雄 python 机器学习——数据预处理标准化StandardScaler模型
from sklearn.preprocessing import StandardScaler #数据预处理标准化StandardScaler模型 def test_StandardScaler() ...
吴裕雄 python 机器学习——数据预处理标准化MinMaxScaler模型
from sklearn.preprocessing import MinMaxScaler #数据预处理标准化MinMaxScaler模型 def test_MinMaxScaler(): X=[[ ...
吴裕雄 python 机器学习——支持向量机线性分类LinearSVC模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
吴裕雄 python 机器学习——数据预处理字典学习模型
from sklearn.decomposition import DictionaryLearning #数据预处理字典学习DictionaryLearning模型 def test_Diction ...
吴裕雄 python 机器学习——数据预处理流水线Pipeline模型
from sklearn.svm import LinearSVC from sklearn.pipeline import Pipeline from sklearn import neighbor ...

随机推荐

p4042 [AHOI2014/JSOI2014]骑士游戏
传送门分析我们发现对于一个怪物要不然用魔法代价使其无需考虑后续点要么用普通攻击使其转移到他所连的所有点上且所有边大于0 所以我们可以先将一个点的最优代价设为魔法攻击的代价之后我们倒着跑spfa求 ...
Java 程序员最喜欢的 11 款免费 IDE 编辑器
Java开发人员需要花费大量的时间埋头于Java代码中,使用各种不同的IDE(Intergrated Development Environment)来开发Java代码,所以下面我将为大家介绍11个不 ...
添加字段modify
ALTER TABLE tc_activity_turntable ADD `foot_pic` VARCHAR () NOT NULL DEFAULT '' COMMENT '底部图片';
[SoapUI] 按照 Test Step Type 获取所有满足条件的 Test Step
获取当前测试用例下所有Groovy Script类型的测试步骤 def testStepList = testRunner.testCase.getTestStepsOfType(com.eviwar ...
requests+正则表达式爬取妹子图
做了一个爬取妹子图某张索引页面的爬虫,主要用request和正则表达式. 感谢崔庆才大神的爬虫教学视频和 gitbook: B站:https://www.bilibili.com/video/a ...
Type Hierarchy
Window - Preferences - General - Keys Name: Open Type Hierarchy Description: Open a type hie ...
(自己转)比较ArrayList、LinkedList、Vector
1. List概述 List,就如图名字所示一样,是元素的有序列表.当我们讨论List时,将其与Set作对比是一个很好的办法,Set集合中的元素是无序且唯一的.下图是Collection的类继承图,从 ...
recv函数的用法详解
recv函数 int recv( SOCKET s, char FAR *buf, int len, int flags ); 不论是客户还是服务器应用程序都用rec ...
MySQL中如何为查询的数据添加自增序号、顺序呢？
背景介绍很多时候我们在使用mysql查询数据的时候都会遇到一个问题,就是查询出来了一堆数据,但是查询的数据的表并没有序号,然而部分数据库显示工具是有外带序号显示,但是这种序号不是由sql产生的,而是 ...
[LeetCode 题解]: Add Two Numbers
You are given two linked lists representing two non-negative numbers. The digits are stored in rever ...

吴裕雄 python 机器学习——K均值聚类KMeans模型

吴裕雄 python 机器学习——K均值聚类KMeans模型的更多相关文章

随机推荐

热门专题