先贴源码地址

https://github.com/dotnet/corefx/tree/master/src/System.Linq/src

.NET CORE很大一个好处就是代码的开源,你可以详细的查看你使用类的源代码,并学习微软的写法和实现思路。我们这个系列熟悉基本类库是一个目的,另一个目的就是学习微软的实现思路和编程方法。

今天我们就单独讨论的问题是linq中的distinct方法是如何实现。最后还会有我们实际编程时候对distinct方法的扩展。

System.Linq

linq中Distinct方法在Enumerable类中

Enumerable

public static partial class Enumerable

内部去重方法实现有2个重载

1

public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source) => Distinct(source, null);

2

public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
{
if (source == null)
{
throw Error.ArgumentNull(nameof(source));
}
return new DistinctIterator<TSource>(source, comparer);
}

去重迭代器DistinctIterator

private sealed class DistinctIterator

去重迭代器,先把元素都加到Set<TSource> _set;中,然后用set的UnionWith去重

这里的set是内部实现的一个轻量级的hash set 具体代码下一部分介绍

/// <summary>
/// An iterator that yields the distinct values in an <see cref="IEnumerable{TSource}"/>.
/// </summary>
/// <typeparam name="TSource">The type of the source enumerable.</typeparam>
private sealed class DistinctIterator<TSource> : Iterator<TSource>, IIListProvider<TSource>
{
private readonly IEnumerable<TSource> _source;
private readonly IEqualityComparer<TSource> _comparer;
private Set<TSource> _set;
private IEnumerator<TSource> _enumerator; public DistinctIterator(IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
{
Debug.Assert(source != null);
_source = source;
_comparer = comparer;
}
public override Iterator<TSource> Clone() => new DistinctIterator<TSource>(_source, _comparer); public override bool MoveNext()
{
switch (_state)
{
case :
_enumerator = _source.GetEnumerator();
if (!_enumerator.MoveNext())
{
Dispose();
return false;
}
TSource element = _enumerator.Current;
_set = new Set<TSource>(_comparer);
_set.Add(element);
_current = element;
_state = ;
return true;
case :
while (_enumerator.MoveNext())
{
element = _enumerator.Current;
if (_set.Add(element))
{
_current = element;
return true;
}
}
break;
}
Dispose();
return false;
}
public override void Dispose()
{
if (_enumerator != null)
{
_enumerator.Dispose();
_enumerator = null;
_set = null;
}
base.Dispose();
}
private Set<TSource> FillSet()
{
Set<TSource> set = new Set<TSource>(_comparer);
set.UnionWith(_source);
return set;
} public TSource[] ToArray() => FillSet().ToArray();
public List<TSource> ToList() => FillSet().ToList();
public int GetCount(bool onlyIfCheap) => onlyIfCheap ? - : FillSet().Count;
}

内部密封的哈希Set

这部分其实是distinct实现的重点,所以内容较多。

在添加元素的时候会对数据进行过滤,如果相同返回false,不同的才会被加入哈希链表中。

/// <summary>
/// A lightweight hash set.
/// </summary>
/// <typeparam name="TElement">The type of the set's items.</typeparam>
internal sealed class Set<TElement>
{
变量
/// <summary>
/// The comparer used to hash and compare items in the set.
/// </summary>
private readonly IEqualityComparer<TElement> _comparer; /// <summary>
/// The hash buckets, which are used to index into the slots.
/// </summary>
private int[] _buckets; /// <summary>
/// The slots, each of which store an item and its hash code.
/// </summary>
private Slot[] _slots;
/// <summary>
/// An entry in the hash set.
/// </summary> private struct Slot
{
/// <summary>
/// The hash code of the item.
/// </summary>
internal int _hashCode; /// <summary>
/// In the case of a hash collision(碰撞), the index of the next slot to probe(查看).
/// </summary>
internal int _next; /// <summary>
/// The item held by this slot.
/// </summary>
internal TElement _value;
} /// <summary>
/// The number of items in this set.
/// </summary>
private int _count;
构造函数
/// <summary>
/// Constructs a set that compares items with the specified comparer.
/// </summary>
/// <param name="comparer">
/// The comparer. If this is <c>null</c>, it defaults to <see cref="EqualityComparer{TElement}.Default"/>.
/// </param>
public Set(IEqualityComparer<TElement> comparer)
{
_comparer = comparer ?? EqualityComparer<TElement>.Default;
_buckets = new int[];
_slots = new Slot[];
}
新增方法
/// <summary>
/// Attempts to add an item to this set.
/// </summary>
/// <param name="value">The item to add.</param>
/// <returns>
/// <c>true</c> if the item was not in the set; otherwise, <c>false</c>.
/// </returns>
public bool Add(TElement value)
{
//根据值获取哈希值 最终调用的是_comparer.GetHashCode(value)
int hashCode = InternalGetHashCode(value);
//遍历对比值 直接找到对应的桶,遍历桶中的元素 _slots[i]._next最后一个值会是-1,所以会跳出循环
for (int i = _buckets[hashCode % _buckets.Length] - ; i >= ; i = _slots[i]._next)
{
if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value))
{
return false;
}
}
//如果超出长度,就扩展 乘以2加1
if (_count == _slots.Length)
{
Resize();
} int index = _count;
_count++;
int bucket = hashCode % _buckets.Length;//这里具体桶的位置需要除以总体长度,这样空间利用率更好
_slots[index]._hashCode = hashCode;
_slots[index]._value = value;
_slots[index]._next = _buckets[bucket] - ;//桶中前一个元素的位置索引
_buckets[bucket] = index + ;
return true;
}
去除方法
/// <summary>
/// Attempts to remove an item from this set.
/// </summary>
/// <param name="value">The item to remove.</param>
/// <returns>
/// <c>true</c> if the item was in the set; otherwise, <c>false</c>.
/// </returns>
public bool Remove(TElement value)
{
int hashCode = InternalGetHashCode(value);
int bucket = hashCode % _buckets.Length;
int last = -;
for (int i = _buckets[bucket] - ; i >= ; last = i, i = _slots[i]._next)
{
if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value))
{
if (last < )
{
_buckets[bucket] = _slots[i]._next + ;
}
else
{
_slots[last]._next = _slots[i]._next;
} _slots[i]._hashCode = -;
_slots[i]._value = default(TElement);
_slots[i]._next = -;
return true;
}
} return false;
}
扩展set
/// <summary>
/// Expands the capacity of this set to double the current capacity, plus one.
/// </summary>
private void Resize()
{
int newSize = checked((_count * ) + );//这个要检测是否超出int长度限制
int[] newBuckets = new int[newSize];
Slot[] newSlots = new Slot[newSize];
Array.Copy(_slots, , newSlots, , _count);//赋值newSlots数组
for (int i = ; i < _count; i++)
{
int bucket = newSlots[i]._hashCode % newSize;
newSlots[i]._next = newBuckets[bucket] - ;//重新记录桶位置
newBuckets[bucket] = i + ;
} _buckets = newBuckets;
_slots = newSlots;
} /// <summary>
/// Creates an array from the items in this set.
/// </summary>
/// <returns>An array of the items in this set.</returns>
public TElement[] ToArray()
{
TElement[] array = new TElement[_count];
for (int i = ; i != array.Length; ++i)
{
array[i] = _slots[i]._value;
} return array;
} /// <summary>
/// Creates a list from the items in this set.
/// </summary>
/// <returns>A list of the items in this set.</returns>
public List<TElement> ToList()
{
int count = _count;
List<TElement> list = new List<TElement>(count);
for (int i = ; i != count; ++i)
{
list.Add(_slots[i]._value);
} return list;
}
UnionWith方法,实际是执行add
/// <summary>
/// The number of items in this set.
/// </summary>
public int Count => _count; /// <summary>
/// Unions this set with an enumerable.
/// </summary>
/// <param name="other">The enumerable.</param>
public void UnionWith(IEnumerable<TElement> other)
{
Debug.Assert(other != null); foreach (TElement item in other)
{
Add(item);
}
}
内部哈希方法
/// <summary>
/// Gets the hash code of the provided value with its sign bit zeroed out, so that modulo has a positive result.
/// </summary>
/// <param name="value">The value to hash.</param>
/// <returns>The lower 31 bits of the value's hash code.</returns>
private int InternalGetHashCode(TElement value) => value == null ? : _comparer.GetHashCode(value) & 0x7FFFFFFF;
}

扩展distinct的关键

实现IEqualityComparer接口

public interface IEqualityComparer<in T>
{
// true if the specified objects are equal; otherwise, false.
bool Equals(T x, T y);
// Returns a hash code for the specified object.
// 异常:
// T:System.ArgumentNullException:
// The type of obj is a reference type and obj is null.
int GetHashCode(T obj);
}

distinct扩展方法

使用params,支持多字段。

public static class ComparerHelper
{
/// <summary>
/// 自定义Distinct扩展方法
/// </summary>
/// <typeparam name="T">要去重的对象类</typeparam>
/// <param name="source">要去重的对象</param>
/// <param name="getfield">获取自定义去重字段的委托</param>
/// <returns></returns>
public static IEnumerable<T> DistinctEx<T>(this IEnumerable<T> source, params Func<T, object>[] getfield)
{
return source.Distinct(new CompareEntityFields<T>(getfield));
}
}
public class CompareEntityFields<T> : IEqualityComparer<T>
{
private readonly Func<T, object>[] _compareFields; /// <summary>
/// 可以根据字段比对数据
/// </summary>
/// <param name="compareFields">比对字段引用</param>
public CompareEntityFields(params Func<T, object>[] compareFields)
{
_compareFields = compareFields;
} /// <summary>Determines whether the specified objects are equal.</summary>
/// <param name="x">The first object of type T to compare.</param>
/// <param name="y">The second object of type T to compare.</param>
/// <returns>true if the specified objects are equal; otherwise, false.</returns>
bool IEqualityComparer<T>.Equals(T x, T y)
{
if (_compareFields == null || _compareFields.Length <= )
{
return EqualityComparer<T>.Default.Equals(x, y);
} bool result = true;
foreach (var func in _compareFields)
{
var xv = func(x);
var yv = func(y);
result = xv == null && yv == null || Equals(xv, yv);
if (!result) break;
} return result;
} /// <summary>Returns a hash code for the specified object.</summary>
/// <param name="obj">The <see cref="T:System.Object"></see> for which a hash code is to be returned.</param>
/// <returns>A hash code for the specified object.</returns>
/// <exception cref="T:System.ArgumentNullException">
/// The type of <paramref name="obj">obj</paramref> is a reference type
/// and <paramref name="obj">obj</paramref> is null.
/// </exception>
int IEqualityComparer<T>.GetHashCode(T obj)
{
return obj.ToString().GetHashCode();
}
}

Core源码(二) Linq的Distinct扩展的更多相关文章

  1. 一个由正则表达式引发的血案 vs2017使用rdlc实现批量打印 vs2017使用rdlc [asp.net core 源码分析] 01 - Session SignalR sql for xml path用法 MemCahe C# 操作Excel图形——绘制、读取、隐藏、删除图形 IOC,DIP,DI,IoC容器

    1. 血案由来 近期我在为Lazada卖家中心做一个自助注册的项目,其中的shop name校验规则较为复杂,要求:1. 英文字母大小写2. 数字3. 越南文4. 一些特殊字符,如“&”,“- ...

  2. ASP.NET Core[源码分析篇] - WebHost

    _configureServicesDelegates的承接 在[ASP.NET Core[源码分析篇] - Startup]这篇文章中,我们得知了目前为止(UseStartup),所有的动作都是在_ ...

  3. ASP.NET Core[源码分析篇] - Authentication认证

    原文:ASP.NET Core[源码分析篇] - Authentication认证 追本溯源,从使用开始 首先看一下我们通常是如何使用微软自带的认证,一般在Startup里面配置我们所需的依赖认证服务 ...

  4. AQS源码二探-JUC系列

    本文已在公众号上发布,感谢关注,期待和你交流. AQS源码二探-JUC系列 共享模式 doAcquireShared 这个方法是共享模式下获取资源失败,执行入队和等待操作,等待的线程在被唤醒后也在这个 ...

  5. ASP.NET Core源码学习(一)Hosting

    ASP.NET Core源码的学习,我们从Hosting开始, Hosting的GitHub地址为:https://github.com/aspnet/Hosting.git 朋友们可以从以上链接克隆 ...

  6. Unity UGUI图文混排源码(二)

    Unity UGUI图文混排源码(一):http://blog.csdn.net/qq992817263/article/details/51112304 Unity UGUI图文混排源码(二):ht ...

  7. asp.net core源码地址

    https://github.com/dotnet/corefx 这个是.net core的 开源项目地址 https://github.com/aspnet 这个下面是asp.net core 框架 ...

  8. JMeter 源码二次开发函数示例

    JMeter 源码二次开发函数示例 一.JMeter 5.0 版本 实际测试中,依靠jmeter自带的函数已经无法满足我们需求,这个时候就需要二次开发.本次导入的是jmeter 5.0的源码进行实际的 ...

  9. 一起来看CORE源码(一) ConcurrentDictionary

    先贴源码地址 https://github.com/dotnet/corefx/blob/master/src/System.Collections.Concurrent/src/System/Col ...

  10. ASP .NET CORE 源码地址

    ASP .NET CORE 源码地址:https://github.com/dotnet/ 下拉可以查找相应的源码信息, 例如:查找 ASP .NET CORE Microsoft.Extension ...

随机推荐

  1. 【RTOS】为H7配套的uCOS-III模板也是可以用于MDK AC6的,提供个模板

    AC6模板 链接:https://pan.baidu.com/s/1_4z_Lg51jMT87RrRM6Qs3g   提取码:2gns 原始的这个模板直接修改为AC6: 编译有几十处警告,修改下面三个 ...

  2. CSS画一个三角形,CSS绘制空心三角形,CSS实现箭头

     壹 ❀ 引 这两天因为项目工作较少,闲下来去看了GitHub上关于面试题日更收录的文章,毕竟明年有新的打算.在CSS收录中有一题是 用css创建一个三角形,并简述原理 .当然对于我来说画一个三角形是 ...

  3. Windows安装与配置—Node.js

    一.搭建环境 1.下载软件 打开下载链接:https://nodejs.org/zh-cn/ , 2.双击安装,指定安装位置 3.测试是否安装成功 用管理员方式打开命令行cmd,输入node -v如果 ...

  4. python3 连接 zookeeper

    zookeeper的增 删 改 查 watch监听. from kazoo.client import KazooClient import time,os import timeit os.chdi ...

  5. 利用Python开发智能阅卷系统

    前言 本文的文字及图片来源于网络,仅供学习.交流使用,不具有任何商业用途,版权归原作者所有,如有问题请及时联系我们以作处理. 作者: 机器学习与统计学 PS:如有需要Python学习资料的小伙伴可以加 ...

  6. 利用Azure虚拟机安装Dynamics 365 Customer Engagement之五:安装SQL Server

    我是微软Dynamics 365 & Power Platform方面的工程师罗勇,也是2015年7月到2018年6月连续三年Dynamics CRM/Business Solutions方面 ...

  7. [转]Introduction - Run Excel Macro using VBScript

    本文转自:https://wellsr.com/vba/2015/excel/run-macro-without-opening-excel-using-vbscript/ Have you ever ...

  8. SAP 资产相关日期

    1. Capitalized Date(资本化日期) 可以手工输入资本化日期,或者如果不输入,则通常默认写入First Acquisition Date (资产第一次购置时输入资产价值日). 2.As ...

  9. Docker启动时提示Get Permission Denied while trying to connect解决方法

    环境描述 vmware15虚拟机安装centos7.4 64位系统,docker版本19.03.2 问题描述 安装完docker后,执行docker相关命令 docker run ubuntu:15. ...

  10. dedecmsV5.7 调用其他站点的数据库的数据的方法

    问题:网站是用dedecmsv5.7写的,后来加了一套论坛discuzX3.4.因为dede要调用dz的数据,本来用jsonp跨域请求的数据,但是m端掉用的时候会把请求的链接的域名后面自动加个/m(不 ...