using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Drawing;
using System.Drawing.Imaging;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Random random = new Random(Math.Abs(unchecked((int)DateTime.Now.Ticks)));
List<Coordinate> rawData = new List<Coordinate>();
for (int i = ; i < ; i++)
{
rawData.Add(new Coordinate { X = random.Next(, ), Y = random.Next(, ) });
} KmeansPlus plus = new KmeansPlus();
plus.Data = rawData;
plus.K = ;
plus.ShortDistance = ;
plus.Start();
DrawPoint(rawData, plus.Means, plus.Clustering);
Console.ReadLine();
} /// <summary>
/// 绘制图片显示出来
/// </summary>
/// <param name="vector"></param>
/// <param name="mean"></param>
/// <param name="clustering"></param>
static void DrawPoint(List<Coordinate> vector, List<Coordinate> mean, int[] clustering)
{
Bitmap bit = new Bitmap(, );
Graphics g = Graphics.FromImage(bit);
g.Clear(Color.White);
SolidBrush Black = new SolidBrush(Color.Black);
SolidBrush Red = new SolidBrush(Color.Red);
List<Color> ColorList = new List<Color>();
ColorList.Add(Color.MediumOrchid);
ColorList.Add(Color.DeepPink);
ColorList.Add(Color.Blue);
ColorList.Add(Color.Brown);
ColorList.Add(Color.Coral);
ColorList.Add(Color.CornflowerBlue);
ColorList.Add(Color.DarkCyan);
ColorList.Add(Color.DarkGreen);
ColorList.Add(Color.DarkMagenta);
ColorList.Add(Color.DarkRed);
ColorList.Add(Color.DodgerBlue);
Font f = new Font("宋体", );
for (int i = ; i < vector.Count; i++)
{
Coordinate p = vector[i]; g.FillEllipse(Black, Convert.ToInt32(p.X), Convert.ToInt32(p.Y), , ); } for (int i = ; i < vector.Count; i++)
{
Coordinate p = vector[i];
int color = clustering[i];
g.DrawString(color.ToString(), f, new SolidBrush(ColorList[color]), Convert.ToInt32(p.X), Convert.ToInt32(p.Y)); }
int j = ;
foreach (Coordinate p in mean)
{ g.FillEllipse(new SolidBrush(ColorList[j]), Convert.ToInt32(p.X), Convert.ToInt32(p.Y), , );
j++;
}
bit.Save(@"E:\Debug\" + DateTime.Now.Millisecond + ".png", ImageFormat.Png);
}
} }
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text; namespace ConsoleApplication1
{
public class Kmeans
{
public int[] Cluster(List<Coordinate> rawData, List<Coordinate> means, ref List<Coordinate> endmeans)
{ List<Coordinate> data = rawData; bool changed = true; int[] clustering = null; int maxCount = data.Count * ; // sanity check
int ct = ;
while (changed == true && ct < maxCount)
{
++ct;
int[] _clustering = new int[data.Count];
List<Coordinate> _means = new List<Coordinate>();
changed = UpdateMeans(data, means, clustering, ref _clustering, ref _means);
means = _means; clustering = _clustering;
}
endmeans = means; return clustering;
} /// <summary>
/// 计算核心点和聚类点。
/// </summary>
/// <param name="data">原始数据</param>
/// <param name="means">上一次的中心点</param>
/// <param name="clustering">上一次的聚类点</param>
/// <param name="newclustering">返回新的中心点</param>
/// <param name="newmeans">返回新的聚类点</param>
/// <returns>是否发生更新。</returns>
private static bool UpdateMeans(List<Coordinate> data, List<Coordinate> means, int[] clustering, ref int[] newclustering, ref List<Coordinate> newmeans)
{
newclustering = new int[data.Count];
int[] clusterCounts = new int[means.Count];
for (int i = ; i < data.Count; i++)
{
int _clusting = MinIndex(data[i], means);
newclustering[i] = _clusting;
clusterCounts[_clusting]++;
}
List<Coordinate> _means = new List<Coordinate>();
for (int i = ; i < means.Count; i++)
{
Coordinate p = new Coordinate { X = , Y = };
_means.Add(p); }
for (int i = ; i < data.Count; ++i)
{
int cluster = newclustering[i];
_means[cluster].X += data[i].X; // accumulate sum
_means[cluster].Y += data[i].Y; // accumulate sum
}
newmeans = new List<Coordinate>();
for (int k = ; k < _means.Count; ++k)
{ double x = _means[k].X / clusterCounts[k]; // danger of div by 0
double y = _means[k].Y / clusterCounts[k]; // danger of div by 0
Coordinate p = new Coordinate { X = x, Y = y };
newmeans.Add(p);
}
if (clustering == null)
{
return true;
}
else
{
for (int i = ; i < newclustering.Length; i++)
{
if (newclustering[i] != clustering[i])
{
return true;
}
}
}
return false;
} /// <summary>
/// 计算点到核心点距离获取最小距离点索引;
/// </summary>
/// <param name="p"></param>
/// <param name="means"></param>
/// <returns></returns>
private static int MinIndex(Coordinate p, List<Coordinate> means)
{
double[] distances = new double[means.Count];
for (int i = ; i < means.Count; i++)
{
distances[i] = Distance(p, means[i]);
}
int indexOfMin = ;
double smallDist = distances[];
for (int k = ; k < distances.Length; ++k)
{
if (distances[k] < smallDist)
{
smallDist = distances[k];
indexOfMin = k;
}
}
return indexOfMin;
}
/// <summary>
/// 距离计算
/// </summary>
/// <param name="tuple"></param>
/// <param name="mean"></param>
/// <returns></returns>
public static double Distance(Coordinate tuple, Coordinate meas)
{
double sumSquaredDiffs = 0.0;
sumSquaredDiffs += Math.Pow((tuple.X - meas.X), );
sumSquaredDiffs += Math.Pow((tuple.Y - meas.Y), );
return Math.Sqrt(sumSquaredDiffs);
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text; namespace ConsoleApplication1
{
public class KmeansPlus
{
private int _k = ;
private int _shortdistance = ; /// <summary>
/// 初始设置值
/// </summary>
public int K { get { return this._k; } set { this._k = value; } }
/// <summary>
/// 最短距离合并
/// </summary>
public int ShortDistance { get { return this._shortdistance; } set { this._shortdistance = value; } }
/// <summary>
/// 初始数据
/// </summary>
public List<Coordinate> Data { get; set; }
/// <summary>
/// 返回中心点信息
/// </summary>
public List<Coordinate> Means { get; set; }
/// <summary>
/// 数据分组信息
/// </summary>
public int[] Clustering { get; set; }
public void Start()
{
Random random = new Random(Math.Abs(unchecked((int)DateTime.Now.Ticks))); List<Coordinate> means = new List<Coordinate>(); int maxlength = Data.Count - ;
int measpoint = random.Next(, maxlength);
means.Add(Data[measpoint]); List<Coordinate> result = new List<Coordinate>();
for (int i = ; i < Data.Count; i++)
{
result.Add(Data[i]);
}
result.Remove(Data[measpoint]);
for (int i = ; i < K; i++)
{
List<Coordinate> nextresult = new List<Coordinate>();
means = UpdateCompass(result, means, ref nextresult);
result = nextresult;
} means = MergeMeans(means, ShortDistance); List<Coordinate> Lmeans = new List<Coordinate>();
int[] clustering = new Kmeans().Cluster(Data, means, ref Lmeans); Clustering = clustering;
Means = Lmeans;
}
/// <summary>
/// 罗盘法随机核心点
/// </summary>
/// <param name="data">点</param>
/// <param name="meas">中心点</param>
/// <param name="result">去除核心点的数组,下一次使用</param>
/// <returns></returns>
private static List<Coordinate> UpdateCompass(List<Coordinate> data, List<Coordinate> meas, ref List<Coordinate> result)
{
double[] distance = new double[data.Count];
result = new List<Coordinate>();
double sumlength = ;
for (int j = ; j < data.Count; j++)
{
double[] _distance = new double[meas.Count];
for (int i = ; i < meas.Count; i++)
{
_distance[i] = Kmeans.Distance(data[j], meas[i]);
}
double min = _distance.Min();
distance[j] = min;
sumlength += min;
}
Random random = new Random(Math.Abs(unchecked((int)DateTime.Now.Ticks)));
double measpoint = random.Next(, Convert.ToInt32(sumlength)); int _postion = ;
for (int j = ; j < distance.Length; j++)
{
if ((measpoint - distance[j]) <= )
{
_postion = j; }
measpoint -= distance[j];
}
meas.Add(data[_postion]); data.Remove(data[_postion]);
result = data; return meas;
} /// <summary>
/// 合并中心
/// </summary>
/// <param name="meas">中心点</param>
/// <param name="len">合并最小距离</param>
/// <returns></returns>
private static List<Coordinate> MergeMeans(List<Coordinate> means, int len)
{ for (int i = ; i < means.Count - ; i++)
{
for (int j = ; j < means.Count; j++)
{
if (i == j)
{
continue;
}
double l = Kmeans.Distance(means[i], means[j]);
if (l < len)
{
means.Remove(means[j]);
j--;
}
}
} return means;
}
//private static double Distance(Coordinate tuple, Coordinate meas)
//{
// double sumSquaredDiffs = 0.0;
// sumSquaredDiffs += Math.Pow((tuple.X - meas.X), 2);
// sumSquaredDiffs += Math.Pow((tuple.Y - meas.Y), 2);
// return Math.Sqrt(sumSquaredDiffs);
//}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text; namespace ConsoleApplication1
{
[Serializable]
public class Coordinate
{
public double X { get; set; }
public double Y { get; set; }
}
}

Kmeans++算是DONet实现的更多相关文章

  1. 记录近期小改K-Means至MapReduce上的心得

    背景: 在所有聚类算法中KMeans算是表面上最简单的一种,没有过多恼人的古希腊符号公式,没有过分繁杂的公式嵌套.对于一个初学矩阵或者仅有向量概念的非专业人士的来说,不可不畏是一把踹门利器.这个世界上 ...

  2. EM 算法(二)-KMeans

    KMeans 算法太过简单,不再赘述 本文尝试用 EM 算法解释 KMeans,而事实上 KMeans 算是 EM 的一个特例 EM 算法是包含隐变量的参数估计模型,那对应到 KMeans 上,隐变量 ...

  3. 【机器学习】聚类算法:层次聚类、K-means聚类

    聚类算法实践(一)--层次聚类.K-means聚类 摘要: 所谓聚类,就是将相似的事物聚集在一 起,而将不相似的事物划分到不同的类别的过程,是数据分析之中十分重要的一种手段.比如古典生物学之中,人们通 ...

  4. Canopy算法计算聚类的簇数

    Kmeans算是是聚类中的经典算法.步骤例如以下: 选择K个点作为初始质心 repeat 将每一个点指派到近期的质心,形成K个簇 又一次计算每一个簇的质心 until 簇不发生变化或达到最大迭代次数 ...

  5. 【原创】数据挖掘案例——ReliefF和K-means算法的医学应用

    数据挖掘方法的提出,让人们有能力最终认识数据的真正价值,即蕴藏在数据中的信息和知识.数据挖掘 (DataMiriing),指的是从大型数据库或数据仓库中提取人们感兴趣的知识,这些知识是隐含的.事先未知 ...

  6. 【转】算法杂货铺——k均值聚类(K-means)

    k均值聚类(K-means) 4.1.摘要 在前面的文章中,介绍了三种常见的分类算法.分类作为一种监督学习方法,要求必须事先明确知道各个类别的信息,并且断言所有待分类项都有一个类别与之对应.但是很多时 ...

  7. 二分K-means算法

    二分K-means聚类(bisecting K-means) 算法优缺点: 由于这个是K-means的改进算法,所以优缺点与之相同. 算法思想: 1.要了解这个首先应该了解K-means算法,可以看这 ...

  8. k-means均值聚类算法(转)

    4.1.摘要 在前面的文章中,介绍了三种常见的分类算法.分类作为一种监督学习方法,要求必须事先明确知道各个类别的信息,并且断言所有待分类项都有一个类别与之对应.但是很多时候上述条件得不到满足,尤其是在 ...

  9. 基于ReliefF和K-means算法的医学应用实例

    基于ReliefF和K-means算法的医学应用实例 数据挖掘方法的提出,让人们有能力最终认识数据的真正价值,即蕴藏在数据中的信息和知识.数据挖掘 (DataMiriing),指的是从大型数据库或数据 ...

随机推荐

  1. META-INF文件夹是干啥的,META-INF文件夹的作用, META-INF文件夹能删吗

    今天有人问到 META-INF文件夹是干啥的,META-INF文件夹的作用, META-INF文件夹能删吗,还有项目的META-INF下面一般会有个MANIFEST.MF 文件,都是干啥的. 百度搜了 ...

  2. 关于/usr/local/lib/libz.a(zutil.o): relocation R_X86_64_32 against `.rodata.str1.1' can not be used when making a shared object; recompile with -fPIC解决办法

    具体报错截图如下: 解决方法: 题外话,我对makefill cmake也是一窍不通因此本人也是不想去积极的解决这个问题,但是当你求助无缘的时候你才会静心去思考.读到这句话的时候也许你已经发现了问题所 ...

  3. Python从零开始(1)新手常问

    如何清除屏幕 如果是在Windows命令行中,输入 import os os.system('cls') 在IDEL中没有找到完美的清除屏幕的方法 网上提到用新建窗口的方法 如何退出Python提示符 ...

  4. SIGKDD历年Best Papers

    作者:我爱机器学习原文链接:SIGKDD历年Best Papers SIGKDD(Data Mining)(1997-2016) 年份 标题 一作 一作单位 2016 FRAUDAR: Boundin ...

  5. Windows环境下MongoDB的安装与配置

    MongoDB是一种高性能的文档型数据库,现介绍一下在Windows环境下MongDB的安装与配置 获取MongoDB 打开官方网站 www.mongodb.org,找到页面右上解的DownLoad链 ...

  6. mysql数据类型和列属性

    列属性: 定义一个字段时对该字段设置的额外的信息或约束 1.  关联操作:reference 2.  字段默认值:default value 3.  主索引和唯一索引:primary key 和uni ...

  7. golang中不定参数与数组切片的区别

    package main import "fmt" func main() { myfunc1(, , , ) //传递不定数量的参数 myfunc2([], , , }) //传 ...

  8. Httpclient请求数据(post)

    public static String loginCheck_POST_HttpClient(String name,String pass,String url){ String result = ...

  9. Java 基础知识相关好文章

    1. 使用简单易懂的例子,分析了equals 和 hashCode 两个方法的异同,尤其中自定义类中对他们的重写,对Set等容器类的在插入时的判断是否相等的影响. http://blog.csdn.n ...

  10. linux -a 到 -z 的意义

    shell if判断中常用的也就是绿色部分,尾部部分越看越不懂.从百度文库转载. [ -a FILE ] 如果 FILE 存在则为真. [ -b FILE ] 如果 FILE 存在且是一个块特殊文件则 ...