Core源码(二) Linq的Distinct扩展
先贴源码地址
https://github.com/dotnet/corefx/tree/master/src/System.Linq/src
.NET CORE很大一个好处就是代码的开源,你可以详细的查看你使用类的源代码,并学习微软的写法和实现思路。我们这个系列熟悉基本类库是一个目的,另一个目的就是学习微软的实现思路和编程方法。
今天我们就单独讨论的问题是linq中的distinct方法是如何实现。最后还会有我们实际编程时候对distinct方法的扩展。
System.Linq
linq中Distinct方法在Enumerable类中
Enumerable
public static partial class Enumerable
内部去重方法实现有2个重载
1
public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source) => Distinct(source, null);
2
public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
{
if (source == null)
{
throw Error.ArgumentNull(nameof(source));
}
return new DistinctIterator<TSource>(source, comparer);
}
去重迭代器DistinctIterator
private sealed class DistinctIterator
去重迭代器,先把元素都加到Set<TSource> _set;中,然后用set的UnionWith去重
这里的set是内部实现的一个轻量级的hash set 具体代码下一部分介绍
/// <summary>
/// An iterator that yields the distinct values in an <see cref="IEnumerable{TSource}"/>.
/// </summary>
/// <typeparam name="TSource">The type of the source enumerable.</typeparam>
private sealed class DistinctIterator<TSource> : Iterator<TSource>, IIListProvider<TSource>
{
private readonly IEnumerable<TSource> _source;
private readonly IEqualityComparer<TSource> _comparer;
private Set<TSource> _set;
private IEnumerator<TSource> _enumerator; public DistinctIterator(IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
{
Debug.Assert(source != null);
_source = source;
_comparer = comparer;
}
public override Iterator<TSource> Clone() => new DistinctIterator<TSource>(_source, _comparer); public override bool MoveNext()
{
switch (_state)
{
case :
_enumerator = _source.GetEnumerator();
if (!_enumerator.MoveNext())
{
Dispose();
return false;
}
TSource element = _enumerator.Current;
_set = new Set<TSource>(_comparer);
_set.Add(element);
_current = element;
_state = ;
return true;
case :
while (_enumerator.MoveNext())
{
element = _enumerator.Current;
if (_set.Add(element))
{
_current = element;
return true;
}
}
break;
}
Dispose();
return false;
}
public override void Dispose()
{
if (_enumerator != null)
{
_enumerator.Dispose();
_enumerator = null;
_set = null;
}
base.Dispose();
}
private Set<TSource> FillSet()
{
Set<TSource> set = new Set<TSource>(_comparer);
set.UnionWith(_source);
return set;
} public TSource[] ToArray() => FillSet().ToArray();
public List<TSource> ToList() => FillSet().ToList();
public int GetCount(bool onlyIfCheap) => onlyIfCheap ? - : FillSet().Count;
}
内部密封的哈希Set
这部分其实是distinct实现的重点,所以内容较多。
在添加元素的时候会对数据进行过滤,如果相同返回false,不同的才会被加入哈希链表中。
/// <summary>
/// A lightweight hash set.
/// </summary>
/// <typeparam name="TElement">The type of the set's items.</typeparam>
internal sealed class Set<TElement>
{
变量
/// <summary>
/// The comparer used to hash and compare items in the set.
/// </summary>
private readonly IEqualityComparer<TElement> _comparer; /// <summary>
/// The hash buckets, which are used to index into the slots.
/// </summary>
private int[] _buckets; /// <summary>
/// The slots, each of which store an item and its hash code.
/// </summary>
private Slot[] _slots;
/// <summary>
/// An entry in the hash set.
/// </summary> private struct Slot
{
/// <summary>
/// The hash code of the item.
/// </summary>
internal int _hashCode; /// <summary>
/// In the case of a hash collision(碰撞), the index of the next slot to probe(查看).
/// </summary>
internal int _next; /// <summary>
/// The item held by this slot.
/// </summary>
internal TElement _value;
} /// <summary>
/// The number of items in this set.
/// </summary>
private int _count;
构造函数
/// <summary>
/// Constructs a set that compares items with the specified comparer.
/// </summary>
/// <param name="comparer">
/// The comparer. If this is <c>null</c>, it defaults to <see cref="EqualityComparer{TElement}.Default"/>.
/// </param>
public Set(IEqualityComparer<TElement> comparer)
{
_comparer = comparer ?? EqualityComparer<TElement>.Default;
_buckets = new int[];
_slots = new Slot[];
}
新增方法
/// <summary>
/// Attempts to add an item to this set.
/// </summary>
/// <param name="value">The item to add.</param>
/// <returns>
/// <c>true</c> if the item was not in the set; otherwise, <c>false</c>.
/// </returns>
public bool Add(TElement value)
{
//根据值获取哈希值 最终调用的是_comparer.GetHashCode(value)
int hashCode = InternalGetHashCode(value);
//遍历对比值 直接找到对应的桶,遍历桶中的元素 _slots[i]._next最后一个值会是-1,所以会跳出循环
for (int i = _buckets[hashCode % _buckets.Length] - ; i >= ; i = _slots[i]._next)
{
if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value))
{
return false;
}
}
//如果超出长度,就扩展 乘以2加1
if (_count == _slots.Length)
{
Resize();
} int index = _count;
_count++;
int bucket = hashCode % _buckets.Length;//这里具体桶的位置需要除以总体长度,这样空间利用率更好
_slots[index]._hashCode = hashCode;
_slots[index]._value = value;
_slots[index]._next = _buckets[bucket] - ;//桶中前一个元素的位置索引
_buckets[bucket] = index + ;
return true;
}
去除方法
/// <summary>
/// Attempts to remove an item from this set.
/// </summary>
/// <param name="value">The item to remove.</param>
/// <returns>
/// <c>true</c> if the item was in the set; otherwise, <c>false</c>.
/// </returns>
public bool Remove(TElement value)
{
int hashCode = InternalGetHashCode(value);
int bucket = hashCode % _buckets.Length;
int last = -;
for (int i = _buckets[bucket] - ; i >= ; last = i, i = _slots[i]._next)
{
if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value))
{
if (last < )
{
_buckets[bucket] = _slots[i]._next + ;
}
else
{
_slots[last]._next = _slots[i]._next;
} _slots[i]._hashCode = -;
_slots[i]._value = default(TElement);
_slots[i]._next = -;
return true;
}
} return false;
}
扩展set
/// <summary>
/// Expands the capacity of this set to double the current capacity, plus one.
/// </summary>
private void Resize()
{
int newSize = checked((_count * ) + );//这个要检测是否超出int长度限制
int[] newBuckets = new int[newSize];
Slot[] newSlots = new Slot[newSize];
Array.Copy(_slots, , newSlots, , _count);//赋值newSlots数组
for (int i = ; i < _count; i++)
{
int bucket = newSlots[i]._hashCode % newSize;
newSlots[i]._next = newBuckets[bucket] - ;//重新记录桶位置
newBuckets[bucket] = i + ;
} _buckets = newBuckets;
_slots = newSlots;
} /// <summary>
/// Creates an array from the items in this set.
/// </summary>
/// <returns>An array of the items in this set.</returns>
public TElement[] ToArray()
{
TElement[] array = new TElement[_count];
for (int i = ; i != array.Length; ++i)
{
array[i] = _slots[i]._value;
} return array;
} /// <summary>
/// Creates a list from the items in this set.
/// </summary>
/// <returns>A list of the items in this set.</returns>
public List<TElement> ToList()
{
int count = _count;
List<TElement> list = new List<TElement>(count);
for (int i = ; i != count; ++i)
{
list.Add(_slots[i]._value);
} return list;
}
UnionWith方法,实际是执行add
/// <summary>
/// The number of items in this set.
/// </summary>
public int Count => _count; /// <summary>
/// Unions this set with an enumerable.
/// </summary>
/// <param name="other">The enumerable.</param>
public void UnionWith(IEnumerable<TElement> other)
{
Debug.Assert(other != null); foreach (TElement item in other)
{
Add(item);
}
}
内部哈希方法
/// <summary>
/// Gets the hash code of the provided value with its sign bit zeroed out, so that modulo has a positive result.
/// </summary>
/// <param name="value">The value to hash.</param>
/// <returns>The lower 31 bits of the value's hash code.</returns>
private int InternalGetHashCode(TElement value) => value == null ? : _comparer.GetHashCode(value) & 0x7FFFFFFF;
}
扩展distinct的关键
实现IEqualityComparer接口
public interface IEqualityComparer<in T>
{
// true if the specified objects are equal; otherwise, false.
bool Equals(T x, T y);
// Returns a hash code for the specified object.
// 异常:
// T:System.ArgumentNullException:
// The type of obj is a reference type and obj is null.
int GetHashCode(T obj);
}
distinct扩展方法
使用params,支持多字段。
public static class ComparerHelper
{
/// <summary>
/// 自定义Distinct扩展方法
/// </summary>
/// <typeparam name="T">要去重的对象类</typeparam>
/// <param name="source">要去重的对象</param>
/// <param name="getfield">获取自定义去重字段的委托</param>
/// <returns></returns>
public static IEnumerable<T> DistinctEx<T>(this IEnumerable<T> source, params Func<T, object>[] getfield)
{
return source.Distinct(new CompareEntityFields<T>(getfield));
}
}
public class CompareEntityFields<T> : IEqualityComparer<T>
{
private readonly Func<T, object>[] _compareFields; /// <summary>
/// 可以根据字段比对数据
/// </summary>
/// <param name="compareFields">比对字段引用</param>
public CompareEntityFields(params Func<T, object>[] compareFields)
{
_compareFields = compareFields;
} /// <summary>Determines whether the specified objects are equal.</summary>
/// <param name="x">The first object of type T to compare.</param>
/// <param name="y">The second object of type T to compare.</param>
/// <returns>true if the specified objects are equal; otherwise, false.</returns>
bool IEqualityComparer<T>.Equals(T x, T y)
{
if (_compareFields == null || _compareFields.Length <= )
{
return EqualityComparer<T>.Default.Equals(x, y);
} bool result = true;
foreach (var func in _compareFields)
{
var xv = func(x);
var yv = func(y);
result = xv == null && yv == null || Equals(xv, yv);
if (!result) break;
} return result;
} /// <summary>Returns a hash code for the specified object.</summary>
/// <param name="obj">The <see cref="T:System.Object"></see> for which a hash code is to be returned.</param>
/// <returns>A hash code for the specified object.</returns>
/// <exception cref="T:System.ArgumentNullException">
/// The type of <paramref name="obj">obj</paramref> is a reference type
/// and <paramref name="obj">obj</paramref> is null.
/// </exception>
int IEqualityComparer<T>.GetHashCode(T obj)
{
return obj.ToString().GetHashCode();
}
}
Core源码(二) Linq的Distinct扩展的更多相关文章
- 一个由正则表达式引发的血案 vs2017使用rdlc实现批量打印 vs2017使用rdlc [asp.net core 源码分析] 01 - Session SignalR sql for xml path用法 MemCahe C# 操作Excel图形——绘制、读取、隐藏、删除图形 IOC,DIP,DI,IoC容器
1. 血案由来 近期我在为Lazada卖家中心做一个自助注册的项目,其中的shop name校验规则较为复杂,要求:1. 英文字母大小写2. 数字3. 越南文4. 一些特殊字符,如“&”,“- ...
- ASP.NET Core[源码分析篇] - WebHost
_configureServicesDelegates的承接 在[ASP.NET Core[源码分析篇] - Startup]这篇文章中,我们得知了目前为止(UseStartup),所有的动作都是在_ ...
- ASP.NET Core[源码分析篇] - Authentication认证
原文:ASP.NET Core[源码分析篇] - Authentication认证 追本溯源,从使用开始 首先看一下我们通常是如何使用微软自带的认证,一般在Startup里面配置我们所需的依赖认证服务 ...
- AQS源码二探-JUC系列
本文已在公众号上发布,感谢关注,期待和你交流. AQS源码二探-JUC系列 共享模式 doAcquireShared 这个方法是共享模式下获取资源失败,执行入队和等待操作,等待的线程在被唤醒后也在这个 ...
- ASP.NET Core源码学习(一)Hosting
ASP.NET Core源码的学习,我们从Hosting开始, Hosting的GitHub地址为:https://github.com/aspnet/Hosting.git 朋友们可以从以上链接克隆 ...
- Unity UGUI图文混排源码(二)
Unity UGUI图文混排源码(一):http://blog.csdn.net/qq992817263/article/details/51112304 Unity UGUI图文混排源码(二):ht ...
- asp.net core源码地址
https://github.com/dotnet/corefx 这个是.net core的 开源项目地址 https://github.com/aspnet 这个下面是asp.net core 框架 ...
- JMeter 源码二次开发函数示例
JMeter 源码二次开发函数示例 一.JMeter 5.0 版本 实际测试中,依靠jmeter自带的函数已经无法满足我们需求,这个时候就需要二次开发.本次导入的是jmeter 5.0的源码进行实际的 ...
- 一起来看CORE源码(一) ConcurrentDictionary
先贴源码地址 https://github.com/dotnet/corefx/blob/master/src/System.Collections.Concurrent/src/System/Col ...
- ASP .NET CORE 源码地址
ASP .NET CORE 源码地址:https://github.com/dotnet/ 下拉可以查找相应的源码信息, 例如:查找 ASP .NET CORE Microsoft.Extension ...
随机推荐
- 服务监控之 Spring Boot Admin.
一.概述 开始阅读这篇文章之前,建议先阅读下<SpringBoot 之Actuator>,该篇文章提到 Spring Boot Actuator 提供了对单个Spring Boot的监控 ...
- Linux配置部署_新手向(三)——MySql安装与配置
目录 前言 安装 防火墙 小结 前言 马上就要放假了,按捺不住激动的心情(其实是实在敲不下去代码),就继续鼓捣虚拟机来做些常规的安装与使用吧,毕竟闲着也是闲着,唉,opengl还是难啊. 安装 其实网 ...
- [WPF 自定义控件]使用WindowChrome自定义RibbonWindow
1. 为什么要自定义RibbonWindow 自定义Window有可能是设计或功能上的要求,可以是非必要的,而自定义RibbonWindow则不一样: 如果程序使用了自定义样式的Window,为了统一 ...
- IL指令列表
使用编译器可以将C#代码编译为中间语言(Intermediate Language,IL)代码,中间语言是一种平台无关的指令集,最终会由CLR将中间语言字节码转换为对应平台的机器码从而执行:阅读IL代 ...
- vue之前端鉴权
vue项目前端鉴权方式常用的有以下三种: 1.渲染菜单时控制模块按钮的显示隐藏(不足:直接输入链接仍然可以访问模块) 2.在路由导航守卫中拦截,针对没有权限的模块进行重定向(不足:每次访问模块都需要鉴 ...
- vue-awesome-swiper中的数据异步加载
<template> <div> //第一个轮播 加了v-if 判断,可以实现 loop 轮循 <swiper v-if="gglist.length>1 ...
- 使用elementUI的日期选择框,两选择框关联时间限值
elementui 本身也提供了在一个输入框内关联选择时间的组件,非常好使,但无奈项目需要用两个输入框去关联的选择: <el-date-picker class="datepicker ...
- nmap的简单使用
主机探测 1.扫描单个主机 nmap 192.168.1.2 2.扫描整个子网 nmap 192.168.1.1/24 3.扫描多个目标 nmap 192.168.1.1 192.168.1.1.5 ...
- Java反射01 : 概念、入门示例、用途及注意事项
1.Java反射定义 本文转载自:https://blog.csdn.net/hanchao5272/article/details/79360452 官方定义如下: Reflection enabl ...
- HTTP 简述
HTTP 简介: 1.Hyper Text Transfer Protocol(超文本传输协议),主要用于 Web 浏览器和 Web 服务器之间的通信 2.它基于 TCP/IP 通信协议来传输数据 3 ...