using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Drawing;
using System.Drawing.Imaging;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Random random = new Random(Math.Abs(unchecked((int)DateTime.Now.Ticks)));
List<Coordinate> rawData = new List<Coordinate>();
for (int i = ; i < ; i++)
{
rawData.Add(new Coordinate { X = random.Next(, ), Y = random.Next(, ) });
} KmeansPlus plus = new KmeansPlus();
plus.Data = rawData;
plus.K = ;
plus.ShortDistance = ;
plus.Start();
DrawPoint(rawData, plus.Means, plus.Clustering);
Console.ReadLine();
} /// <summary>
/// 绘制图片显示出来
/// </summary>
/// <param name="vector"></param>
/// <param name="mean"></param>
/// <param name="clustering"></param>
static void DrawPoint(List<Coordinate> vector, List<Coordinate> mean, int[] clustering)
{
Bitmap bit = new Bitmap(, );
Graphics g = Graphics.FromImage(bit);
g.Clear(Color.White);
SolidBrush Black = new SolidBrush(Color.Black);
SolidBrush Red = new SolidBrush(Color.Red);
List<Color> ColorList = new List<Color>();
ColorList.Add(Color.MediumOrchid);
ColorList.Add(Color.DeepPink);
ColorList.Add(Color.Blue);
ColorList.Add(Color.Brown);
ColorList.Add(Color.Coral);
ColorList.Add(Color.CornflowerBlue);
ColorList.Add(Color.DarkCyan);
ColorList.Add(Color.DarkGreen);
ColorList.Add(Color.DarkMagenta);
ColorList.Add(Color.DarkRed);
ColorList.Add(Color.DodgerBlue);
Font f = new Font("宋体", );
for (int i = ; i < vector.Count; i++)
{
Coordinate p = vector[i]; g.FillEllipse(Black, Convert.ToInt32(p.X), Convert.ToInt32(p.Y), , ); } for (int i = ; i < vector.Count; i++)
{
Coordinate p = vector[i];
int color = clustering[i];
g.DrawString(color.ToString(), f, new SolidBrush(ColorList[color]), Convert.ToInt32(p.X), Convert.ToInt32(p.Y)); }
int j = ;
foreach (Coordinate p in mean)
{ g.FillEllipse(new SolidBrush(ColorList[j]), Convert.ToInt32(p.X), Convert.ToInt32(p.Y), , );
j++;
}
bit.Save(@"E:\Debug\" + DateTime.Now.Millisecond + ".png", ImageFormat.Png);
}
} }
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text; namespace ConsoleApplication1
{
public class Kmeans
{
public int[] Cluster(List<Coordinate> rawData, List<Coordinate> means, ref List<Coordinate> endmeans)
{ List<Coordinate> data = rawData; bool changed = true; int[] clustering = null; int maxCount = data.Count * ; // sanity check
int ct = ;
while (changed == true && ct < maxCount)
{
++ct;
int[] _clustering = new int[data.Count];
List<Coordinate> _means = new List<Coordinate>();
changed = UpdateMeans(data, means, clustering, ref _clustering, ref _means);
means = _means; clustering = _clustering;
}
endmeans = means; return clustering;
} /// <summary>
/// 计算核心点和聚类点。
/// </summary>
/// <param name="data">原始数据</param>
/// <param name="means">上一次的中心点</param>
/// <param name="clustering">上一次的聚类点</param>
/// <param name="newclustering">返回新的中心点</param>
/// <param name="newmeans">返回新的聚类点</param>
/// <returns>是否发生更新。</returns>
private static bool UpdateMeans(List<Coordinate> data, List<Coordinate> means, int[] clustering, ref int[] newclustering, ref List<Coordinate> newmeans)
{
newclustering = new int[data.Count];
int[] clusterCounts = new int[means.Count];
for (int i = ; i < data.Count; i++)
{
int _clusting = MinIndex(data[i], means);
newclustering[i] = _clusting;
clusterCounts[_clusting]++;
}
List<Coordinate> _means = new List<Coordinate>();
for (int i = ; i < means.Count; i++)
{
Coordinate p = new Coordinate { X = , Y = };
_means.Add(p); }
for (int i = ; i < data.Count; ++i)
{
int cluster = newclustering[i];
_means[cluster].X += data[i].X; // accumulate sum
_means[cluster].Y += data[i].Y; // accumulate sum
}
newmeans = new List<Coordinate>();
for (int k = ; k < _means.Count; ++k)
{ double x = _means[k].X / clusterCounts[k]; // danger of div by 0
double y = _means[k].Y / clusterCounts[k]; // danger of div by 0
Coordinate p = new Coordinate { X = x, Y = y };
newmeans.Add(p);
}
if (clustering == null)
{
return true;
}
else
{
for (int i = ; i < newclustering.Length; i++)
{
if (newclustering[i] != clustering[i])
{
return true;
}
}
}
return false;
} /// <summary>
/// 计算点到核心点距离获取最小距离点索引;
/// </summary>
/// <param name="p"></param>
/// <param name="means"></param>
/// <returns></returns>
private static int MinIndex(Coordinate p, List<Coordinate> means)
{
double[] distances = new double[means.Count];
for (int i = ; i < means.Count; i++)
{
distances[i] = Distance(p, means[i]);
}
int indexOfMin = ;
double smallDist = distances[];
for (int k = ; k < distances.Length; ++k)
{
if (distances[k] < smallDist)
{
smallDist = distances[k];
indexOfMin = k;
}
}
return indexOfMin;
}
/// <summary>
/// 距离计算
/// </summary>
/// <param name="tuple"></param>
/// <param name="mean"></param>
/// <returns></returns>
public static double Distance(Coordinate tuple, Coordinate meas)
{
double sumSquaredDiffs = 0.0;
sumSquaredDiffs += Math.Pow((tuple.X - meas.X), );
sumSquaredDiffs += Math.Pow((tuple.Y - meas.Y), );
return Math.Sqrt(sumSquaredDiffs);
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text; namespace ConsoleApplication1
{
public class KmeansPlus
{
private int _k = ;
private int _shortdistance = ; /// <summary>
/// 初始设置值
/// </summary>
public int K { get { return this._k; } set { this._k = value; } }
/// <summary>
/// 最短距离合并
/// </summary>
public int ShortDistance { get { return this._shortdistance; } set { this._shortdistance = value; } }
/// <summary>
/// 初始数据
/// </summary>
public List<Coordinate> Data { get; set; }
/// <summary>
/// 返回中心点信息
/// </summary>
public List<Coordinate> Means { get; set; }
/// <summary>
/// 数据分组信息
/// </summary>
public int[] Clustering { get; set; }
public void Start()
{
Random random = new Random(Math.Abs(unchecked((int)DateTime.Now.Ticks))); List<Coordinate> means = new List<Coordinate>(); int maxlength = Data.Count - ;
int measpoint = random.Next(, maxlength);
means.Add(Data[measpoint]); List<Coordinate> result = new List<Coordinate>();
for (int i = ; i < Data.Count; i++)
{
result.Add(Data[i]);
}
result.Remove(Data[measpoint]);
for (int i = ; i < K; i++)
{
List<Coordinate> nextresult = new List<Coordinate>();
means = UpdateCompass(result, means, ref nextresult);
result = nextresult;
} means = MergeMeans(means, ShortDistance); List<Coordinate> Lmeans = new List<Coordinate>();
int[] clustering = new Kmeans().Cluster(Data, means, ref Lmeans); Clustering = clustering;
Means = Lmeans;
}
/// <summary>
/// 罗盘法随机核心点
/// </summary>
/// <param name="data">点</param>
/// <param name="meas">中心点</param>
/// <param name="result">去除核心点的数组,下一次使用</param>
/// <returns></returns>
private static List<Coordinate> UpdateCompass(List<Coordinate> data, List<Coordinate> meas, ref List<Coordinate> result)
{
double[] distance = new double[data.Count];
result = new List<Coordinate>();
double sumlength = ;
for (int j = ; j < data.Count; j++)
{
double[] _distance = new double[meas.Count];
for (int i = ; i < meas.Count; i++)
{
_distance[i] = Kmeans.Distance(data[j], meas[i]);
}
double min = _distance.Min();
distance[j] = min;
sumlength += min;
}
Random random = new Random(Math.Abs(unchecked((int)DateTime.Now.Ticks)));
double measpoint = random.Next(, Convert.ToInt32(sumlength)); int _postion = ;
for (int j = ; j < distance.Length; j++)
{
if ((measpoint - distance[j]) <= )
{
_postion = j; }
measpoint -= distance[j];
}
meas.Add(data[_postion]); data.Remove(data[_postion]);
result = data; return meas;
} /// <summary>
/// 合并中心
/// </summary>
/// <param name="meas">中心点</param>
/// <param name="len">合并最小距离</param>
/// <returns></returns>
private static List<Coordinate> MergeMeans(List<Coordinate> means, int len)
{ for (int i = ; i < means.Count - ; i++)
{
for (int j = ; j < means.Count; j++)
{
if (i == j)
{
continue;
}
double l = Kmeans.Distance(means[i], means[j]);
if (l < len)
{
means.Remove(means[j]);
j--;
}
}
} return means;
}
//private static double Distance(Coordinate tuple, Coordinate meas)
//{
// double sumSquaredDiffs = 0.0;
// sumSquaredDiffs += Math.Pow((tuple.X - meas.X), 2);
// sumSquaredDiffs += Math.Pow((tuple.Y - meas.Y), 2);
// return Math.Sqrt(sumSquaredDiffs);
//}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text; namespace ConsoleApplication1
{
[Serializable]
public class Coordinate
{
public double X { get; set; }
public double Y { get; set; }
}
}

Kmeans++算是DONet实现的更多相关文章

  1. 记录近期小改K-Means至MapReduce上的心得

    背景: 在所有聚类算法中KMeans算是表面上最简单的一种,没有过多恼人的古希腊符号公式,没有过分繁杂的公式嵌套.对于一个初学矩阵或者仅有向量概念的非专业人士的来说,不可不畏是一把踹门利器.这个世界上 ...

  2. EM 算法(二)-KMeans

    KMeans 算法太过简单,不再赘述 本文尝试用 EM 算法解释 KMeans,而事实上 KMeans 算是 EM 的一个特例 EM 算法是包含隐变量的参数估计模型,那对应到 KMeans 上,隐变量 ...

  3. 【机器学习】聚类算法:层次聚类、K-means聚类

    聚类算法实践(一)--层次聚类.K-means聚类 摘要: 所谓聚类,就是将相似的事物聚集在一 起,而将不相似的事物划分到不同的类别的过程,是数据分析之中十分重要的一种手段.比如古典生物学之中,人们通 ...

  4. Canopy算法计算聚类的簇数

    Kmeans算是是聚类中的经典算法.步骤例如以下: 选择K个点作为初始质心 repeat 将每一个点指派到近期的质心,形成K个簇 又一次计算每一个簇的质心 until 簇不发生变化或达到最大迭代次数 ...

  5. 【原创】数据挖掘案例——ReliefF和K-means算法的医学应用

    数据挖掘方法的提出,让人们有能力最终认识数据的真正价值,即蕴藏在数据中的信息和知识.数据挖掘 (DataMiriing),指的是从大型数据库或数据仓库中提取人们感兴趣的知识,这些知识是隐含的.事先未知 ...

  6. 【转】算法杂货铺——k均值聚类(K-means)

    k均值聚类(K-means) 4.1.摘要 在前面的文章中,介绍了三种常见的分类算法.分类作为一种监督学习方法,要求必须事先明确知道各个类别的信息,并且断言所有待分类项都有一个类别与之对应.但是很多时 ...

  7. 二分K-means算法

    二分K-means聚类(bisecting K-means) 算法优缺点: 由于这个是K-means的改进算法,所以优缺点与之相同. 算法思想: 1.要了解这个首先应该了解K-means算法,可以看这 ...

  8. k-means均值聚类算法(转)

    4.1.摘要 在前面的文章中,介绍了三种常见的分类算法.分类作为一种监督学习方法,要求必须事先明确知道各个类别的信息,并且断言所有待分类项都有一个类别与之对应.但是很多时候上述条件得不到满足,尤其是在 ...

  9. 基于ReliefF和K-means算法的医学应用实例

    基于ReliefF和K-means算法的医学应用实例 数据挖掘方法的提出,让人们有能力最终认识数据的真正价值,即蕴藏在数据中的信息和知识.数据挖掘 (DataMiriing),指的是从大型数据库或数据 ...

随机推荐

  1. Android开发--ListView的应用

    1.简介 ListView用于以列表的形式展示数据.它在装载数据时,不能使用ListView类的add()等相关方法添加,而要借助Adapter对象进行添加.另外,由于 系统提供的Adapter往往不 ...

  2. Oracle中查看所有表和字段以及表注释.字段注释

    获取表: select table_name from user_tables; //当前用户拥有的表 select table_name from all_tables; //所有用户的表 sele ...

  3. Rigid motion segmentation

    In computer vision, rigid motion segmentation is the process of separating regions, features, or tra ...

  4. PHP分页代码

       }            <a href="fenye.php?page=<?php echo  <?php  }    <a href="fenye ...

  5. CALayer 4 详解 -----转自李明杰

    CALayer4-自定义层   本文目录 一.自定义层的方法1 二.自定义层的方法2 三.其他 自定义层,其实就是在层上绘图,一共有2种方法,下面详细介绍一下. 回到顶部 一.自定义层的方法1 方法描 ...

  6. php foreach 语法的遍历来源数组如果不是一个有效数组php会出现错误警告 Invalid argument supplied for foreach()

    在php中,foreach语法的遍历来源数组如果不是一个有效数组,php会出现错误警告 Invalid argument supplied for foreach() ,但是很多时候这个数组是取自某些 ...

  7. Git 仓库和记录操作到仓库

    Git 配置好了,来 clone 个或者新建个仓库来试试, $ git clone git@github.com:git/git.git 把 Git 的源码克隆下来,克隆会自动创建本地仓库,并创建本地 ...

  8. jQuery判断网页中的id是否有重复的

    From:http://blog.csdn.net/china_skag/article/details/6915323判断网页中的ID是否有重复的:指定ID判断 $(function(){ $(&q ...

  9. Spring(4)

    Spring的Bean的配置形式 1.基于XML的形式(无需讲解) 2.基于注解的形式(需要引入AOP的jar包,此jar包实现了AOP的注解) 当在Spring配置文件中引入类扫描注解命名空间并且指 ...

  10. 在Django中进行注册用户的邮件确认

    之前利用Flask写博客时(http://hbnnlove.sinaapp.com),我对注册模块的逻辑设计很简单,就是用户填写注册表单,然后提交,数据库会更新User表中的数据,字段主要有用户名,哈 ...