Core源码(二) Linq的Distinct扩展
先贴源码地址
https://github.com/dotnet/corefx/tree/master/src/System.Linq/src
.NET CORE很大一个好处就是代码的开源,你可以详细的查看你使用类的源代码,并学习微软的写法和实现思路。我们这个系列熟悉基本类库是一个目的,另一个目的就是学习微软的实现思路和编程方法。
今天我们就单独讨论的问题是linq中的distinct方法是如何实现。最后还会有我们实际编程时候对distinct方法的扩展。
System.Linq
linq中Distinct方法在Enumerable类中
Enumerable
public static partial class Enumerable
内部去重方法实现有2个重载
1
public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source) => Distinct(source, null);
2
public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
{
if (source == null)
{
throw Error.ArgumentNull(nameof(source));
}
return new DistinctIterator<TSource>(source, comparer);
}
去重迭代器DistinctIterator
private sealed class DistinctIterator
去重迭代器,先把元素都加到Set<TSource> _set;中,然后用set的UnionWith去重
这里的set是内部实现的一个轻量级的hash set 具体代码下一部分介绍
/// <summary>
/// An iterator that yields the distinct values in an <see cref="IEnumerable{TSource}"/>.
/// </summary>
/// <typeparam name="TSource">The type of the source enumerable.</typeparam>
private sealed class DistinctIterator<TSource> : Iterator<TSource>, IIListProvider<TSource>
{
private readonly IEnumerable<TSource> _source;
private readonly IEqualityComparer<TSource> _comparer;
private Set<TSource> _set;
private IEnumerator<TSource> _enumerator; public DistinctIterator(IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
{
Debug.Assert(source != null);
_source = source;
_comparer = comparer;
}
public override Iterator<TSource> Clone() => new DistinctIterator<TSource>(_source, _comparer); public override bool MoveNext()
{
switch (_state)
{
case :
_enumerator = _source.GetEnumerator();
if (!_enumerator.MoveNext())
{
Dispose();
return false;
}
TSource element = _enumerator.Current;
_set = new Set<TSource>(_comparer);
_set.Add(element);
_current = element;
_state = ;
return true;
case :
while (_enumerator.MoveNext())
{
element = _enumerator.Current;
if (_set.Add(element))
{
_current = element;
return true;
}
}
break;
}
Dispose();
return false;
}
public override void Dispose()
{
if (_enumerator != null)
{
_enumerator.Dispose();
_enumerator = null;
_set = null;
}
base.Dispose();
}
private Set<TSource> FillSet()
{
Set<TSource> set = new Set<TSource>(_comparer);
set.UnionWith(_source);
return set;
} public TSource[] ToArray() => FillSet().ToArray();
public List<TSource> ToList() => FillSet().ToList();
public int GetCount(bool onlyIfCheap) => onlyIfCheap ? - : FillSet().Count;
}
内部密封的哈希Set
这部分其实是distinct实现的重点,所以内容较多。
在添加元素的时候会对数据进行过滤,如果相同返回false,不同的才会被加入哈希链表中。
/// <summary>
/// A lightweight hash set.
/// </summary>
/// <typeparam name="TElement">The type of the set's items.</typeparam>
internal sealed class Set<TElement>
{
变量
/// <summary>
/// The comparer used to hash and compare items in the set.
/// </summary>
private readonly IEqualityComparer<TElement> _comparer; /// <summary>
/// The hash buckets, which are used to index into the slots.
/// </summary>
private int[] _buckets; /// <summary>
/// The slots, each of which store an item and its hash code.
/// </summary>
private Slot[] _slots;
/// <summary>
/// An entry in the hash set.
/// </summary> private struct Slot
{
/// <summary>
/// The hash code of the item.
/// </summary>
internal int _hashCode; /// <summary>
/// In the case of a hash collision(碰撞), the index of the next slot to probe(查看).
/// </summary>
internal int _next; /// <summary>
/// The item held by this slot.
/// </summary>
internal TElement _value;
} /// <summary>
/// The number of items in this set.
/// </summary>
private int _count;
构造函数
/// <summary>
/// Constructs a set that compares items with the specified comparer.
/// </summary>
/// <param name="comparer">
/// The comparer. If this is <c>null</c>, it defaults to <see cref="EqualityComparer{TElement}.Default"/>.
/// </param>
public Set(IEqualityComparer<TElement> comparer)
{
_comparer = comparer ?? EqualityComparer<TElement>.Default;
_buckets = new int[];
_slots = new Slot[];
}
新增方法
/// <summary>
/// Attempts to add an item to this set.
/// </summary>
/// <param name="value">The item to add.</param>
/// <returns>
/// <c>true</c> if the item was not in the set; otherwise, <c>false</c>.
/// </returns>
public bool Add(TElement value)
{
//根据值获取哈希值 最终调用的是_comparer.GetHashCode(value)
int hashCode = InternalGetHashCode(value);
//遍历对比值 直接找到对应的桶,遍历桶中的元素 _slots[i]._next最后一个值会是-1,所以会跳出循环
for (int i = _buckets[hashCode % _buckets.Length] - ; i >= ; i = _slots[i]._next)
{
if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value))
{
return false;
}
}
//如果超出长度,就扩展 乘以2加1
if (_count == _slots.Length)
{
Resize();
} int index = _count;
_count++;
int bucket = hashCode % _buckets.Length;//这里具体桶的位置需要除以总体长度,这样空间利用率更好
_slots[index]._hashCode = hashCode;
_slots[index]._value = value;
_slots[index]._next = _buckets[bucket] - ;//桶中前一个元素的位置索引
_buckets[bucket] = index + ;
return true;
}
去除方法
/// <summary>
/// Attempts to remove an item from this set.
/// </summary>
/// <param name="value">The item to remove.</param>
/// <returns>
/// <c>true</c> if the item was in the set; otherwise, <c>false</c>.
/// </returns>
public bool Remove(TElement value)
{
int hashCode = InternalGetHashCode(value);
int bucket = hashCode % _buckets.Length;
int last = -;
for (int i = _buckets[bucket] - ; i >= ; last = i, i = _slots[i]._next)
{
if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value))
{
if (last < )
{
_buckets[bucket] = _slots[i]._next + ;
}
else
{
_slots[last]._next = _slots[i]._next;
} _slots[i]._hashCode = -;
_slots[i]._value = default(TElement);
_slots[i]._next = -;
return true;
}
} return false;
}
扩展set
/// <summary>
/// Expands the capacity of this set to double the current capacity, plus one.
/// </summary>
private void Resize()
{
int newSize = checked((_count * ) + );//这个要检测是否超出int长度限制
int[] newBuckets = new int[newSize];
Slot[] newSlots = new Slot[newSize];
Array.Copy(_slots, , newSlots, , _count);//赋值newSlots数组
for (int i = ; i < _count; i++)
{
int bucket = newSlots[i]._hashCode % newSize;
newSlots[i]._next = newBuckets[bucket] - ;//重新记录桶位置
newBuckets[bucket] = i + ;
} _buckets = newBuckets;
_slots = newSlots;
} /// <summary>
/// Creates an array from the items in this set.
/// </summary>
/// <returns>An array of the items in this set.</returns>
public TElement[] ToArray()
{
TElement[] array = new TElement[_count];
for (int i = ; i != array.Length; ++i)
{
array[i] = _slots[i]._value;
} return array;
} /// <summary>
/// Creates a list from the items in this set.
/// </summary>
/// <returns>A list of the items in this set.</returns>
public List<TElement> ToList()
{
int count = _count;
List<TElement> list = new List<TElement>(count);
for (int i = ; i != count; ++i)
{
list.Add(_slots[i]._value);
} return list;
}
UnionWith方法,实际是执行add
/// <summary>
/// The number of items in this set.
/// </summary>
public int Count => _count; /// <summary>
/// Unions this set with an enumerable.
/// </summary>
/// <param name="other">The enumerable.</param>
public void UnionWith(IEnumerable<TElement> other)
{
Debug.Assert(other != null); foreach (TElement item in other)
{
Add(item);
}
}
内部哈希方法
/// <summary>
/// Gets the hash code of the provided value with its sign bit zeroed out, so that modulo has a positive result.
/// </summary>
/// <param name="value">The value to hash.</param>
/// <returns>The lower 31 bits of the value's hash code.</returns>
private int InternalGetHashCode(TElement value) => value == null ? : _comparer.GetHashCode(value) & 0x7FFFFFFF;
}
扩展distinct的关键
实现IEqualityComparer接口
public interface IEqualityComparer<in T>
{
// true if the specified objects are equal; otherwise, false.
bool Equals(T x, T y);
// Returns a hash code for the specified object.
// 异常:
// T:System.ArgumentNullException:
// The type of obj is a reference type and obj is null.
int GetHashCode(T obj);
}
distinct扩展方法
使用params,支持多字段。
public static class ComparerHelper
{
/// <summary>
/// 自定义Distinct扩展方法
/// </summary>
/// <typeparam name="T">要去重的对象类</typeparam>
/// <param name="source">要去重的对象</param>
/// <param name="getfield">获取自定义去重字段的委托</param>
/// <returns></returns>
public static IEnumerable<T> DistinctEx<T>(this IEnumerable<T> source, params Func<T, object>[] getfield)
{
return source.Distinct(new CompareEntityFields<T>(getfield));
}
}
public class CompareEntityFields<T> : IEqualityComparer<T>
{
private readonly Func<T, object>[] _compareFields; /// <summary>
/// 可以根据字段比对数据
/// </summary>
/// <param name="compareFields">比对字段引用</param>
public CompareEntityFields(params Func<T, object>[] compareFields)
{
_compareFields = compareFields;
} /// <summary>Determines whether the specified objects are equal.</summary>
/// <param name="x">The first object of type T to compare.</param>
/// <param name="y">The second object of type T to compare.</param>
/// <returns>true if the specified objects are equal; otherwise, false.</returns>
bool IEqualityComparer<T>.Equals(T x, T y)
{
if (_compareFields == null || _compareFields.Length <= )
{
return EqualityComparer<T>.Default.Equals(x, y);
} bool result = true;
foreach (var func in _compareFields)
{
var xv = func(x);
var yv = func(y);
result = xv == null && yv == null || Equals(xv, yv);
if (!result) break;
} return result;
} /// <summary>Returns a hash code for the specified object.</summary>
/// <param name="obj">The <see cref="T:System.Object"></see> for which a hash code is to be returned.</param>
/// <returns>A hash code for the specified object.</returns>
/// <exception cref="T:System.ArgumentNullException">
/// The type of <paramref name="obj">obj</paramref> is a reference type
/// and <paramref name="obj">obj</paramref> is null.
/// </exception>
int IEqualityComparer<T>.GetHashCode(T obj)
{
return obj.ToString().GetHashCode();
}
}
Core源码(二) Linq的Distinct扩展的更多相关文章
- 一个由正则表达式引发的血案 vs2017使用rdlc实现批量打印 vs2017使用rdlc [asp.net core 源码分析] 01 - Session SignalR sql for xml path用法 MemCahe C# 操作Excel图形——绘制、读取、隐藏、删除图形 IOC,DIP,DI,IoC容器
1. 血案由来 近期我在为Lazada卖家中心做一个自助注册的项目,其中的shop name校验规则较为复杂,要求:1. 英文字母大小写2. 数字3. 越南文4. 一些特殊字符,如“&”,“- ...
- ASP.NET Core[源码分析篇] - WebHost
_configureServicesDelegates的承接 在[ASP.NET Core[源码分析篇] - Startup]这篇文章中,我们得知了目前为止(UseStartup),所有的动作都是在_ ...
- ASP.NET Core[源码分析篇] - Authentication认证
原文:ASP.NET Core[源码分析篇] - Authentication认证 追本溯源,从使用开始 首先看一下我们通常是如何使用微软自带的认证,一般在Startup里面配置我们所需的依赖认证服务 ...
- AQS源码二探-JUC系列
本文已在公众号上发布,感谢关注,期待和你交流. AQS源码二探-JUC系列 共享模式 doAcquireShared 这个方法是共享模式下获取资源失败,执行入队和等待操作,等待的线程在被唤醒后也在这个 ...
- ASP.NET Core源码学习(一)Hosting
ASP.NET Core源码的学习,我们从Hosting开始, Hosting的GitHub地址为:https://github.com/aspnet/Hosting.git 朋友们可以从以上链接克隆 ...
- Unity UGUI图文混排源码(二)
Unity UGUI图文混排源码(一):http://blog.csdn.net/qq992817263/article/details/51112304 Unity UGUI图文混排源码(二):ht ...
- asp.net core源码地址
https://github.com/dotnet/corefx 这个是.net core的 开源项目地址 https://github.com/aspnet 这个下面是asp.net core 框架 ...
- JMeter 源码二次开发函数示例
JMeter 源码二次开发函数示例 一.JMeter 5.0 版本 实际测试中,依靠jmeter自带的函数已经无法满足我们需求,这个时候就需要二次开发.本次导入的是jmeter 5.0的源码进行实际的 ...
- 一起来看CORE源码(一) ConcurrentDictionary
先贴源码地址 https://github.com/dotnet/corefx/blob/master/src/System.Collections.Concurrent/src/System/Col ...
- ASP .NET CORE 源码地址
ASP .NET CORE 源码地址:https://github.com/dotnet/ 下拉可以查找相应的源码信息, 例如:查找 ASP .NET CORE Microsoft.Extension ...
随机推荐
- 【RTOS】为H7配套的uCOS-III模板也是可以用于MDK AC6的,提供个模板
AC6模板 链接:https://pan.baidu.com/s/1_4z_Lg51jMT87RrRM6Qs3g 提取码:2gns 原始的这个模板直接修改为AC6: 编译有几十处警告,修改下面三个 ...
- CSS画一个三角形,CSS绘制空心三角形,CSS实现箭头
壹 ❀ 引 这两天因为项目工作较少,闲下来去看了GitHub上关于面试题日更收录的文章,毕竟明年有新的打算.在CSS收录中有一题是 用css创建一个三角形,并简述原理 .当然对于我来说画一个三角形是 ...
- Windows安装与配置—Node.js
一.搭建环境 1.下载软件 打开下载链接:https://nodejs.org/zh-cn/ , 2.双击安装,指定安装位置 3.测试是否安装成功 用管理员方式打开命令行cmd,输入node -v如果 ...
- python3 连接 zookeeper
zookeeper的增 删 改 查 watch监听. from kazoo.client import KazooClient import time,os import timeit os.chdi ...
- 利用Python开发智能阅卷系统
前言 本文的文字及图片来源于网络,仅供学习.交流使用,不具有任何商业用途,版权归原作者所有,如有问题请及时联系我们以作处理. 作者: 机器学习与统计学 PS:如有需要Python学习资料的小伙伴可以加 ...
- 利用Azure虚拟机安装Dynamics 365 Customer Engagement之五:安装SQL Server
我是微软Dynamics 365 & Power Platform方面的工程师罗勇,也是2015年7月到2018年6月连续三年Dynamics CRM/Business Solutions方面 ...
- [转]Introduction - Run Excel Macro using VBScript
本文转自:https://wellsr.com/vba/2015/excel/run-macro-without-opening-excel-using-vbscript/ Have you ever ...
- SAP 资产相关日期
1. Capitalized Date(资本化日期) 可以手工输入资本化日期,或者如果不输入,则通常默认写入First Acquisition Date (资产第一次购置时输入资产价值日). 2.As ...
- Docker启动时提示Get Permission Denied while trying to connect解决方法
环境描述 vmware15虚拟机安装centos7.4 64位系统,docker版本19.03.2 问题描述 安装完docker后,执行docker相关命令 docker run ubuntu:15. ...
- dedecmsV5.7 调用其他站点的数据库的数据的方法
问题:网站是用dedecmsv5.7写的,后来加了一套论坛discuzX3.4.因为dede要调用dz的数据,本来用jsonp跨域请求的数据,但是m端掉用的时候会把请求的链接的域名后面自动加个/m(不 ...