吴裕雄 python 机器学习——密度聚类DBSCAN模型

import numpy as np

import matplotlib.pyplot as plt

from sklearn import  cluster

from sklearn.metrics import adjusted_rand_score

from sklearn.datasets.samples_generator import make_blobs

def create_data(centers,num=100,std=0.7):

    X, labels_true = make_blobs(n_samples=num, centers=centers, cluster_std=std)

    return  X,labels_true

#密度聚类DBSCAN模型

def test_DBSCAN(*data):

    X,labels_true=data

    clst=cluster.DBSCAN()

    predicted_labels=clst.fit_predict(X)

    print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels))

    print("Core sample num:%d"%len(clst.core_sample_indices_))

# 用于产生聚类的中心点

centers=[[1,1],[2,2],[1,2],[10,20]]

# 产生用于聚类的数据集

X,labels_true=create_data(centers,1000,0.5)

#  调用 test_DBSCAN 函数

test_DBSCAN(X,labels_true)

def test_DBSCAN_epsilon(*data):

    '''

    测试 DBSCAN 的聚类结果随  eps 参数的影响

    '''

    X,labels_true=data

    epsilons=np.logspace(-1,1.5)

    ARIs=[]

    Core_nums=[]

    for epsilon in epsilons:

        clst=cluster.DBSCAN(eps=epsilon)

        predicted_labels=clst.fit_predict(X)

        ARIs.append( adjusted_rand_score(labels_true,predicted_labels))

        Core_nums.append(len(clst.core_sample_indices_))

    ## 绘图

    fig=plt.figure()

    ax=fig.add_subplot(1,2,1)

    ax.plot(epsilons,ARIs,marker='+')

    ax.set_xscale('log')

    ax.set_xlabel(r"$\epsilon$")

    ax.set_ylim(0,1)

    ax.set_ylabel('ARI')

    ax=fig.add_subplot(1,2,2)

    ax.plot(epsilons,Core_nums,marker='o')

    ax.set_xscale('log')

    ax.set_xlabel(r"$\epsilon$")

    ax.set_ylabel('Core_Nums')

    fig.suptitle("DBSCAN")

    plt.show()

#  调用 test_DBSCAN_epsilon 函数

test_DBSCAN_epsilon(X,labels_true)

def test_DBSCAN_min_samples(*data):

    '''

    测试 DBSCAN 的聚类结果随  min_samples 参数的影响

    '''

    X,labels_true=data

    min_samples=range(1,100)

    ARIs=[]

    Core_nums=[]

    for num in min_samples:

        clst=cluster.DBSCAN(min_samples=num)

        predicted_labels=clst.fit_predict(X)

        ARIs.append( adjusted_rand_score(labels_true,predicted_labels))

        Core_nums.append(len(clst.core_sample_indices_))

    ## 绘图

    fig=plt.figure()

    ax=fig.add_subplot(1,2,1)

    ax.plot(min_samples,ARIs,marker='+')

    ax.set_xlabel( "min_samples")

    ax.set_ylim(0,1)

    ax.set_ylabel('ARI')

    ax=fig.add_subplot(1,2,2)

    ax.plot(min_samples,Core_nums,marker='o')

    ax.set_xlabel( "min_samples")

    ax.set_ylabel('Core_Nums')

    fig.suptitle("DBSCAN")

    plt.show()

#  调用 test_DBSCAN_min_samples 函数

test_DBSCAN_min_samples(X,labels_true)

吴裕雄 python 机器学习——密度聚类DBSCAN模型的更多相关文章

吴裕雄 python 机器学习——层次聚类AgglomerativeClustering模型
import numpy as np import matplotlib.pyplot as plt from sklearn import cluster from sklearn.metrics ...
吴裕雄 python 机器学习——支持向量机非线性回归SVR模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
吴裕雄 python 机器学习——KNN回归KNeighborsRegressor模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
吴裕雄 python 机器学习——半监督学习LabelSpreading模型
import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import d ...
吴裕雄 python 机器学习——支持向量机线性回归SVR模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
吴裕雄 python 机器学习——混合高斯聚类GMM模型
import numpy as np import matplotlib.pyplot as plt from sklearn import mixture from sklearn.metrics ...
吴裕雄 python 机器学习——K均值聚类KMeans模型
import numpy as np import matplotlib.pyplot as plt from sklearn import cluster from sklearn.metrics ...
吴裕雄 python 机器学习——分类决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

随机推荐

Python文件修改和常用方法
为了更好地说明接下来的文件修改操作,我们有必要先来学习下文件操作的常用方法. 一.文件处理中的常用方法 #!/usr/bin/env python3 #-*- coding:utf-8 -*- # w ...
ref & out - C#中的参数传递
[ref & out - C#中的参数传递] ref与out均指定函数参数按引用传递,惟一的不同是,ref传递的参数必须初始化,而out可以不用. ref与out无法作为重载的依据,即ref与 ...
在页面完成读取EXCEL
protected void btnUpload_Click(object sender, EventArgs e) { if (Page.IsValid) { string sFILENAME = ...
566. Reshape the Matrix矩阵重排
［抄题］: In MATLAB, there is a very useful function called 'reshape', which can reshape a matrix into a ...
RocketMq2
javascript总结5:js常见的数据类型
1 Number 数字类型 :包含正数,负数,小数十进制表示: var n1 =23; 十六进制表示法:从0-9,a(A)-f(F)表示数字.以0x开头. var n2 = 0x42 2 字符串数据 ...
LibreOJ 6278 数列分块入门 2(分块)
题解:非常高妙的分块,每个块对应一个桶,桶内元素全部sort过,加值时,对于零散块O(sqrt(n))暴力修改,然后暴力重构桶.对于大块直接整块加.查询时对于非完整块O(sqrt(n))暴力遍历.对 ...
Sharepoint2013搜索学习笔记之自定义结果显示模板(九)
搜索结果通过套用定义好的显示模板来展示结果,显示模板由js和html组成,我们可以通过修改显示模板,然后将修改好的显示模板跟搜索结果绑定起来,来修改搜索结果的显示效果,例子如下图: 修改前修改后第 ...
sqlserver中多条数据合并成一条数据（stuff 与 for xml path 连用）
SQL 列转行,即多行合并成一条需求:按照分组,将多条记录内容合并成一条,效果如下: 数据库示例: CREATE TABLE [t2]([NID] [bigint] NULL,[district ...
.Net Core 项目引用本地类库方式（二）
上篇文章有详细的介绍.Net Core 项目中引用本地类库通过打包,然后Nugety引用方式,这里再介绍一种引用包的方式

吴裕雄 python 机器学习——密度聚类DBSCAN模型

吴裕雄 python 机器学习——密度聚类DBSCAN模型的更多相关文章

随机推荐

热门专题