[分词] C#SegList分词辅助类,帮助类（转载）

主要功能如下
最新的SegList分词辅助类,帮助类
看下面代码吧

/// <summary>

/// 类说明：SegList

/// 编 码 人：苏飞

/// 联系方式：361983679

/// 更新网站：[url=http://www.sufeinet.com/thread-655-1-1.html]http://www.sufeinet.com/thread-655-1-1.html[/url]

/// </summary>

using System;

using System.Collections;

using System.IO;

using System.Text.RegularExpressions;

namespace DotNet.Utilities

{

    /// <summary>

    /// 分词辅助类

    /// </summary>

    public class SegList

    {

        public int MaxLength;

        private ArrayList m_seg;

        public int Count

        {

            get

            {

                return m_seg.Count;

            }

        }

        public SegList()

        {

            m_seg = new ArrayList();

            MaxLength = ;

        }

        public void Add(object obj)

        {

            m_seg.Add(obj);

            if (MaxLength < obj.ToString().Length)

            {

                MaxLength = obj.ToString().Length;

            }

        }

        public object GetElem(int i)

        {

            if (i < this.Count)

                return m_seg[i];

            else

                return null;

        }

        public void SetElem(int i, object obj)

        {

            m_seg[i] = obj;

        }

        public bool Contains(object obj)

        {

            return m_seg.Contains(obj);

        }

        /// <summary>

        /// 按长度排序

        /// </summary>

        public void Sort()

        {

            Sort(this);

        }

        /// <summary>

        /// 按长度排序

        /// </summary>

        public void Sort(SegList list)

        {

            int max = ;

            for (int i = ; i < list.Count - ; ++i)

            {

                max = i;

                for (int j = i + ; j < list.Count; ++j)

                {

                    string str1 = list.GetElem(j).ToString();

                    string str2 = list.GetElem(max).ToString();

                    int l1;

                    int l2;

                    if (str1 == "null")

                        l1 = ;

                    else

                        l1 = str1.Length;

                    if (str2 == "null")

                        l2 = ;

                    else

                        l2 = str2.Length;

                    if (l1 > l2)

                        max = j;

                }

                object o = list.GetElem(max);

                list.SetElem(max, list.GetElem(i));

                list.SetElem(i, o);

            }

        }

    }

    /// <summary>

    /// 分词类

    /// </summary>

    //----------------调用----------------------

    //Segment seg = new Segment();

    //seg.InitWordDics();

    //seg.EnablePrefix = true;

    //seg.Separator =" ";

    //seg.SegmentText("字符串", false).Trim();

    //-------------------------------------------

    public class Segment

    {

        #region 私有字段

        private string m_DicPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sDict.dic");

        private string m_NoisePath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sNoise.dic");

        private string m_NumberPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sNumber.dic");

        private string m_WordPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sWord.dic");

        private string m_PrefixPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sPrefix.dic");

        private Hashtable htWords;

        private ArrayList alNoise;

        private ArrayList alNumber;

        private ArrayList alWord;

        private ArrayList alPrefix;

        private double m_EventTime = ;

        /// <summary>

        /// 分隔符

        /// </summary>

        private string m_Separator = " ";

        /// <summary>

        /// 用于验证汉字的正则表达式

        /// </summary>

        private string strChinese = "[\u4e00-\u9fa5]";

        #endregion

        #region 公有属性

        /// <summary>

        /// 基本词典路径

        /// </summary>

        public string DicPath

        {

            get

            {

                return m_DicPath;

            }

            set

            {

                m_DicPath = value;

            }

        }

        /// <summary>

        /// 数据缓存函数

        /// </summary>

        /// <param name="key">索引键</param>

        /// <param name="val">缓存的数据</param>

        private static void SetCache(string key, object val)

        {

            if (val == null) val = " ";

            System.Web.HttpContext.Current.Application.Lock();

            System.Web.HttpContext.Current.Application.Set(key, val);

            System.Web.HttpContext.Current.Application.UnLock();

        }

        /// <summary>

        /// 读取缓存

        /// </summary>

        private static object GetCache(string key)

        {

            return System.Web.HttpContext.Current.Application.Get(key);

        }

        /// <summary>

        /// 暂时无用

        /// </summary>

        public string NoisePath

        {

            get

            {

                return m_NoisePath;

            }

            set

            {

                m_NoisePath = value;

            }

        }

        /// <summary>

        /// 数字词典路径

        /// </summary>

        public string NumberPath

        {

            get

            {

                return m_NumberPath;

            }

            set

            {

                m_NumberPath = value;

            }

        }

        /// <summary>

        /// 字母词典路径

        /// </summary>

        public string WordPath

        {

            get

            {

                return m_WordPath;

            }

            set

            {

                m_WordPath = value;

            }

        }

        /// <summary>

        /// 姓名前缀字典 用于纠错姓名

        /// </summary>

        public string PrefixPath

        {

            get

            {

                return m_PrefixPath;

            }

            set

            {

                m_PrefixPath = value;

            }

        }

        /// <summary>

        /// 是否开启姓名纠错功能

        /// </summary>

        public bool EnablePrefix

        {

            get

            {

                if (alPrefix.Count == )

                    return false;

                else

                    return true;

            }

            set

            {

                if (value)

                    alPrefix = LoadWords(PrefixPath, alPrefix);

                else

                    alPrefix = new ArrayList();

            }

        }

        /// <summary>

        /// 用时每次进行加载或分词动作后改属性表示为上一次动作所用时间

        /// 已精确到毫秒但分词操作在字符串较短时可能为0

        /// </summary>

        public double EventTime

        {

            get

            {

                return m_EventTime;

            }

        }

        /// <summary>

        /// 分隔符,默认为空格

        /// </summary>

        public string Separator

        {

            get

            {

                return m_Separator;

            }

            set

            {

                if (value != "" && value != null) m_Separator = value;

            }

        }

        #endregion

        #region 构造方法

        /// <summary>

        /// 构造方法

        /// </summary>

        public Segment()

        { }

        /// <summary>

        /// 构造方法

        /// </summary>

        public Segment(string p_DicPath, string p_NoisePath, string p_NumberPath, string p_WordPath)

        {

            m_WordPath = p_DicPath;

            m_WordPath = p_NoisePath;

            m_WordPath = p_NumberPath;

            m_WordPath = p_WordPath;

            this.InitWordDics();

        }

        #endregion

        #region 公有方法

        /// <summary>

        /// 加载词列表

        /// </summary>

        public void InitWordDics()

        {

            DateTime start = DateTime.Now;

            if (GetCache("jcms_dict") == null)

            {

                htWords = new Hashtable();

                Hashtable father = htWords;

                Hashtable forfather = htWords;

                string strChar1;

                string strChar2;

                StreamReader reader = new StreamReader(DicPath, System.Text.Encoding.UTF8);

                string strline = reader.ReadLine();

                SegList list;

                Hashtable child = new Hashtable();

                long i = ;

                while (strline != null && strline.Trim() != "")

                {

                    i++;

                    strChar1 = strline.Substring(, );

                    strChar2 = strline.Substring(, );

                    if (!htWords.ContainsKey(strChar1))

                    {

                        father = new Hashtable();

                        htWords.Add(strChar1, father);

                    }

                    else

                    {

                        father = (Hashtable)htWords[strChar1];

                    }

                    if (!father.ContainsKey(strChar2))

                    {

                        list = new SegList();

                        if (strline.Length > )

                            list.Add(strline.Substring());

                        else

                            list.Add("null");

                        father.Add(strChar2, list);

                    }

                    else

                    {

                        list = (SegList)father[strChar2];

                        if (strline.Length > )

                        {

                            list.Add(strline.Substring());

                        }

                        else

                        {

                            list.Add("null");

                        }

                        father[strChar2] = list;

                    }

                    htWords[strChar1] = father;

                    strline = reader.ReadLine();

                }

                try

                {

                    reader.Close();

                }

                catch

                { }

                SetCache("jcms_dict", htWords);

            }

            htWords = (Hashtable)GetCache("jcms_dict");

            alNoise = LoadWords(NoisePath, alNoise);

            alNumber = LoadWords(NumberPath, alNumber);

            alWord = LoadWords(WordPath, alWord);

            alPrefix = LoadWords(PrefixPath, alPrefix);

            TimeSpan duration = DateTime.Now - start;

            m_EventTime = duration.TotalMilliseconds;

        }

        /// <summary>

        /// 加载文本词组到ArrayList

        /// </summary>

        public ArrayList LoadWords(string strPath, ArrayList list)

        {

            StreamReader reader = new StreamReader(strPath, System.Text.Encoding.UTF8);

            list = new ArrayList();

            string strline = reader.ReadLine();

            while (strline != null)

            {

                list.Add(strline);

                strline = reader.ReadLine();

            }

            try

            {

                reader.Close();

            }

            catch

            { }

            return list;

        }

        /// <summary>

        /// 输出词列表

        /// </summary>

        public void OutWords()

        {

            IDictionaryEnumerator idEnumerator1 = htWords.GetEnumerator();

            while (idEnumerator1.MoveNext())

            {

                IDictionaryEnumerator idEnumerator2 = ((Hashtable)idEnumerator1.Value).GetEnumerator();

                while (idEnumerator2.MoveNext())

                {

                    SegList aa = (SegList)idEnumerator2.Value;

                    for (int i = ; i < aa.Count; i++)

                    {

                        Console.WriteLine(idEnumerator1.Key.ToString() + idEnumerator2.Key.ToString() + aa.GetElem(i).ToString());

                    }

                }

            }

        }

        /// <summary>

        /// 输出ArrayList

        /// </summary>

        public void OutArrayList(ArrayList list)

        {

            if (list == null) return;

            for (int i = ; i < list.Count; i++)

            {

                Console.WriteLine(list[i].ToString());

            }

        }

        /// <summary>

        /// 分词过程,不支持回车

        /// </summary>

        /// <param name="strText">要分词的文本</param>

        /// <returns>分词后的文本</returns>

        public string SegmentText(string strText)

        {

            strText = (strText + "$").Trim();

            if (htWords == null) return strText;

            if (strText.Length < ) return strText;

            DateTime start = DateTime.Now;

            int length = ;

            int preFix = ;

            bool word = false;

            bool number = false;

            string reText = "";

            string strPrefix = "";

            string strLastChar = "";

            string strLastWords = Separator;

            for (int i = ; i < strText.Length - ; i++)

            {

                #region 对于每一个字的处理过程

                string strChar1 = strText.Substring(i, );

                string strChar2 = strText.Substring(i + , ).Trim();

                bool yes;

                SegList l;

                Hashtable h;

                if (reText.Length > ) strLastChar = reText.Substring(reText.Length - );

                if (strChar1 == " ")

                {

                    if ((number || word) && strLastChar != Separator) reText += this.Separator;

                    yes = true;

                }

                else

                    yes = false;

                int CharType = GetCharType(strChar1);

                switch (CharType)

                {

                    case :

                        #region  如果是数字，如果数字的上一位是字母要和后面的数字分开

                        if (word)

                        {

                            reText += Separator;

                        }

                        word = false;

                        number = true;

                        strLastWords = "";

                        break;

                        #endregion

                    case :

                    case :

                        #region 如果是字母

                        if (number)

                            strLastWords = Separator;

                        else

                            strLastWords = "";

                        word = true;

                        number = false;

                        break;

                        #endregion

                    case :

                    case :

                        #region 第一级哈希表是否包含关键字，假如包含处理第二级哈希表

                        //上一个字是否为字母

                        if (word) reText += Separator;

                        #region 检测上一个是否是数字，这个过程是用于修正数字后的量词的

                        if (number && CharType != )

                        {

                            h = (Hashtable)htWords["n"];

                            if (h.ContainsKey(strChar1))

                            {

                                l = (SegList)h[strChar1];

                                if (l.Contains(strChar2))

                                {

                                    reText += strChar1 + strChar2 + Separator;

                                    yes = true;

                                    i++;

                                }

                                else if (l.Contains("null"))

                                {

                                    reText += strChar1 + Separator;

                                    yes = true;

                                }

                            }

                            else

                                reText += Separator;

                        }

                        #endregion

                        //非汉字数字的汉字

                        if (CharType == )

                        {

                            word = false;

                            number = false;

                            strLastWords = Separator;

                        }

                        else

                        {

                            word = false;

                            number = true;

                            strLastWords = "";

                        }

                        //第二级哈希表取出

                        h = (Hashtable)htWords[strChar1];

                        //第二级哈希表是否包含关键字

                        if (h.ContainsKey(strChar2))

                        {

                            #region  第二级包含关键字

                            //取出ArrayList对象

                            l = (SegList)h[strChar2];

                            //遍历每一个对象 看是否能组合成词

                            for (int j = ; j < l.Count; j++)

                            {

                                bool have = false;

                                string strChar3 = l.GetElem(j).ToString();

                                //对于每一个取出的词进行检测,看是否匹配，长度保护

                                if ((strChar3.Length + i + ) < strText.Length)

                                {

                                    //向i+2后取出m长度的字

                                    string strChar = strText.Substring(i + , strChar3.Length).Trim();

                                    if (strChar3 == strChar && !yes)

                                    {

                                        if (strPrefix != "")

                                        {

                                            reText += strPrefix + Separator;

                                            strPrefix = "";

                                            preFix = ;

                                        }

                                        reText += strChar1 + strChar2 + strChar;

                                        i += strChar3.Length + ;

                                        have = true;

                                        yes = true;

                                        break;

                                    }

                                }

                                else if ((strChar3.Length + i + ) == strText.Length)

                                {

                                    string strChar = strText.Substring(i + ).Trim();

                                    if (strChar3 == strChar && !yes)

                                    {

                                        if (strPrefix != "")

                                        {

                                            reText += strPrefix + Separator;

                                            strPrefix = "";

                                            preFix = ;

                                        }

                                        reText += strChar1 + strChar2 + strChar;

                                        i += strChar3.Length + ;

                                        have = true;

                                        yes = true;

                                        break;

                                    }

                                }

                                if (!have && j == l.Count -  && l.Contains("null") && !yes)

                                {

                                    if (preFix == )

                                    {

                                        reText += strPrefix + strChar1 + strChar2;

                                        strPrefix = "";

                                        preFix = ;

                                    }

                                    else if (preFix > )

                                    {

                                        reText += strPrefix + strLastWords + strChar1 + strChar2;

                                        strPrefix = "";

                                        preFix = ;

                                    }

                                    else

                                    {

                                        if (CharType == ) reText += strChar1 + strChar2;

                                        else reText += strChar1 + strChar2;

                                        strLastWords = this.Separator;

                                        number = false;

                                    }

                                    i++;

                                    yes = true;

                                    break;

                                }

                                else if (have)

                                {

                                    break;

                                }

                            }

                            #endregion

                            //如果没有匹配还可能有一种情况，这个词语只有两个字，以这两个字开头的词语不存在

                            if (!yes && l.Contains("null"))

                            {

                                if (preFix == )

                                {

                                    reText += strPrefix + strChar1 + strChar2;

                                    strPrefix = "";

                                    preFix = ;

                                }

                                else if (preFix > )

                                {

                                    reText += strPrefix + strLastWords + strChar1 + strChar2;

                                    strPrefix = "";

                                    preFix = ;

                                }

                                else

                                {

                                    if (CharType == ) reText += strChar1 + strChar2;

                                    else reText += strChar1 + strChar2;

                                    strLastWords = this.Separator;

                                    number = false;

                                }

                                i++;

                                yes = true;

                            }

                            if (reText.Length > ) strLastChar = reText.Substring(reText.Length - );

                            if (CharType ==  && GetCharType(strLastChar) == )

                            {

                                number = true;

                            }

                            else if (strLastChar != this.Separator) reText += this.Separator;

                        }

                        #endregion

                        break;

                    default:

                        #region 未知字符,可能是生僻字,也可能是标点符合之类

                        if (word && !yes)

                        {

                            reText += Separator;

                        }

                        else if (number && !yes)

                        {

                            reText += Separator;

                        }

                        number = false;

                        word = false;

                        strLastWords = this.Separator;

                        break;

                        #endregion

                }

                if (!yes && number || !yes && word)

                {

                    reText += strChar1;

                    yes = true;

                }

                if (!yes)

                {

                    #region 处理姓名问题

                    if (preFix == )

                    {

                        if (alPrefix.Contains(strChar1 + strChar2))

                        {

                            i++;

                            strPrefix = strChar1 + strChar2;

                            preFix++;

                        }

                        else if (alPrefix.Contains(strChar1))

                        {

                            if (!number)

                            {

                                strPrefix = strChar1;

                                preFix++;

                            }

                            else

                            {

                                reText += strChar1 + strLastWords;

                                number = false;

                                word = false;

                            }

                        }

                        else

                        {

                            if (preFix == )

                            {

                                reText += strPrefix + Separator + strChar1 + Separator;

                                strPrefix = "";

                                preFix = ;

                            }

                            else if (preFix > )

                            {

                                if (Regex.IsMatch(strChar1, strChinese))

                                {

                                    strPrefix += strChar1;

                                    preFix++;

                                }

                                else

                                {

                                    reText += strPrefix + Separator + strChar1 + Separator;

                                    strPrefix = "";

                                    preFix = ;

                                }

                            }

                            else

                            {

                                reText += strChar1 + strLastWords;

                                number = false;

                                word = false;

                            }

                        }

                    }

                    else

                    {

                        if (preFix == )

                        {

                            reText += strPrefix + Separator + strChar1 + Separator;

                            strPrefix = "";

                            preFix = ;

                        }

                        else if (preFix > )

                        {

                            if (Regex.IsMatch(strChar1, strChinese))

                            {

                                strPrefix += strChar1;

                                preFix++;

                            }

                            else

                            {

                                reText += strPrefix + Separator + strChar1 + Separator;

                                strPrefix = "";

                                preFix = ;

                            }

                        }

                        else

                        {

                            reText += strChar1 + strLastWords;

                            number = false;

                        }

                    }

                    #endregion

                }

                length = i;

                #endregion

            }

            #region 最后防止最后一个字的丢失

            if (length < strText.Length - )

            {

                string strLastChar1 = strText.Substring(strText.Length - ).Trim();

                string strLastChar2 = strText.Substring(strText.Length - ).Trim();

                if (reText.Length > ) strLastChar = reText.Substring(reText.Length - );

                if (preFix != )

                {

                    reText += strPrefix + strLastChar1;

                }

                else

                {

                    switch (GetCharType(strLastChar1))

                    {

                        case :

                            if (strLastChar1 != "." && strLastChar1 != "．")

                                reText += strLastChar1;

                            else

                                reText += Separator + strLastChar1;

                            break;

                        case :

                        case :

                            if (alWord.Contains(strLastChar2))

                                reText += strLastChar1;

                            break;

                        case :

                        case :

                            if ((number || word) && strLastChar != Separator)

                                reText += Separator + strLastChar1;

                            else

                                reText += strLastChar1;

                            break;

                        default:

                            if (strLastChar != Separator)

                                reText += Separator + strLastChar1;

                            else

                                reText += strLastChar1;

                            break;

                    }

                }

                if (reText.Length > ) strLastChar = (reText.Substring(reText.Length - ));

                if (strLastChar != this.Separator) reText += this.Separator;

            }

            #endregion

            TimeSpan duration = DateTime.Now - start;

            m_EventTime = duration.TotalMilliseconds;

            return reText.Replace(" $", ""); //这里包含一个字的，则去掉

        }

        /// <summary>

        /// 重载分词过程,支持回车

        /// </summary>

        public string SegmentText(string strText, bool Enter)

        {

            if (Enter)

            {

                DateTime start = DateTime.Now;

                string[] strArr = strText.Split('\n');

                string reText = "";

                for (int i = ; i < strArr.Length; i++)

                {

                    reText += SegmentText(strArr[i]) + "\r\n";

                }

                TimeSpan duration = DateTime.Now - start;

                m_EventTime = duration.TotalMilliseconds;

                return reText;

            }

            else

            {

                return SegmentText(strText);

            }

        }

        #region 判断字符类型

        /// <summary>

        /// 判断字符类型,0为未知,1为数字,2为字母,3为汉字,4为汉字数字

        /// </summary>

        private int GetCharType(string p_Char)

        {

            int CharType = ;

            if (alNumber.Contains(p_Char)) CharType = ;

            if (alWord.Contains(p_Char)) CharType = ;

            if (htWords.ContainsKey(p_Char)) CharType += ;

            return CharType;

        }

        #endregion

        #region 对加载的词典排序并重新写入

        /// <summary>

        /// 对加载的词典排序并重新写入

        /// </summary>

        public void SortDic()

        {

            SortDic(false);

        }

        /// <summary>

        /// 对加载的词典排序并重新写入

        /// </summary>

        /// <param name="Reload">是否重新加载</param>

        public void SortDic(bool Reload)

        {

            DateTime start = DateTime.Now;

            StreamWriter sw = new StreamWriter(DicPath, false, System.Text.Encoding.UTF8);

            IDictionaryEnumerator idEnumerator1 = htWords.GetEnumerator();

            while (idEnumerator1.MoveNext())

            {

                IDictionaryEnumerator idEnumerator2 = ((Hashtable)idEnumerator1.Value).GetEnumerator();

                while (idEnumerator2.MoveNext())

                {

                    SegList aa = (SegList)idEnumerator2.Value;

                    aa.Sort();

                    for (int i = ; i < aa.Count; i++)

                    {

                        if (aa.GetElem(i).ToString() == "null")

                            sw.WriteLine(idEnumerator1.Key.ToString() + idEnumerator2.Key.ToString());

                        else

                            sw.WriteLine(idEnumerator1.Key.ToString() + idEnumerator2.Key.ToString() + aa.GetElem(i).ToString());

                    }

                }

            }

            sw.Close();

            if (Reload) InitWordDics();

            TimeSpan duration = DateTime.Now - start;

            m_EventTime = duration.TotalMilliseconds;

        }

        #endregion

        /// <summary>

        /// 删除两行完全相同的词,暂时无用!

        /// </summary>

        /// <returns>相同词条个数</returns>

        public int Optimize()

        {

            int l = ;

            DateTime start = DateTime.Now;

            Hashtable htOptimize = new Hashtable();

            StreamReader reader = new StreamReader(DicPath, System.Text.Encoding.UTF8);

            string strline = reader.ReadLine();

            while (strline != null && strline.Trim() != "")

            {

                if (!htOptimize.ContainsKey(strline))

                    htOptimize.Add(strline, null);

                else

                    l++;

            }

            Console.WriteLine("ready");

            try

            {

                reader.Close();

            }

            catch { }

            StreamWriter sw = new StreamWriter(DicPath, false, System.Text.Encoding.UTF8);

            IDictionaryEnumerator ide = htOptimize.GetEnumerator();

            while (ide.MoveNext())

                sw.WriteLine(ide.Key.ToString());

            try

            {

                sw.Close();

            }

            catch { }

            TimeSpan duration = DateTime.Now - start;

            m_EventTime = duration.TotalMilliseconds;

            return l;

        }

        #endregion

    }

}

[分词] C#SegList分词辅助类,帮助类（转载）的更多相关文章

ElasticSearch已经配置好ik分词和mmseg分词(转)
ElasticSearch是一个基于Lucene构建的开源,分布式,RESTful搜索引擎.设计用于云计算中,能够达到实时搜索,稳定,可靠,快速,安装使用方便.支持通过HTTP使用JSON进行数据索引 ...
为 Elasticsearch 添加中文分词，对比分词器效果
转自:http://keenwon.com/1404.html 为 Elasticsearch 添加中文分词,对比分词器效果 Posted in 后端 By KeenWon On 2014年12月12 ...
python中文分词：结巴分词
中文分词是中文文本处理的一个基础性工作,结巴分词利用进行中文分词.其基本实现原理有三点: 基于Trie树结构实现高效的词图扫描,生成句子中汉字所有可能成词情况所构成的有向无环图(DAG) 采用了动态规 ...
.添加索引和类型，同时设定edgengram分词和charsplit分词
1.添加索引和类型,同时设定edgengram分词和charsplit分词 curl -XPUT 'http://127.0.0.1:9200/userindex/' -d '{ "se ...
为Elasticsearch添加中文分词，对比分词器效果
http://keenwon.com/1404.html Elasticsearch中,内置了很多分词器(analyzers),例如standard (标准分词器).english(英文分词)和chi ...
ES 09 - 定制Elasticsearch的分词器 (自定义分词策略)
目录 1 索引的分析 1.1 分析器的组成 1.2 倒排索引的核心原理-normalization 2 ES的默认分词器 3 修改分词器 4 定制分词器 4.1 向索引中添加自定义的分词器 4.2 测 ...
Elasticsearch拼音分词和IK分词的安装及使用
一.Es插件配置及下载 1.IK分词器的下载安装关于IK分词器的介绍不再多少,一言以蔽之,IK分词是目前使用非常广泛分词效果比较好的中文分词器.做ES开发的,中文分词十有八九使用的都是IK分词器. ...
和我一起打造个简单搜索之IK分词以及拼音分词
elasticsearch 官方默认的分词插件,对中文分词效果不理想,它是把中文词语分成了一个一个的汉字.所以我们引入 es 插件 es-ik.同时为了提升用户体验,引入 es-pinyin 插件.本 ...
盘古分词+一元/二元分词Lucene
本文参考自:https://blog.csdn.net/mss359681091/article/details/52078147 http://www.cnblogs.com/top5/archiv ...

随机推荐

如何禁止anonymous用户访问nexus
nexus在默认情况下是允许anonymous用户浏览仓库,并下载仓库中的jar包的,如果我们搭建的私服是有公网IP的,必须要进行设置,使得anonymous用户的权限被禁用.操作步骤如下: 使用管理 ...
BZOJ_1208_&_Codevs_1258_[HNOI2004]_宠物收养所_(平衡树/set)
描述 http://www.lydsy.com/JudgeOnline/problem.php?id=1208 (据说codevs要更新?就不放codevs的地址了吧...) 有宠物和人,每个单位都有 ...
20140708郑州培训第二题Impossible Game
Impossible Game题目描述你发明了一个简单的单人电脑游戏.在开始游戏时,玩家必须输入一个长度为 K 的字符串,且这个字符串的元素只能为‘A’‘B’‘C’或者‘D’.每一种字符串都代表一种颜 ...
SharePoint 2010 母版页定制小思路介绍
转:http://tech.ddvip.com/2013-11/1384521515206064.html 介绍:我们使用SharePoint2010做门户网站,经常需要定制母版页,但是2010提供的 ...
OnClientClick事件
1.OnClientClick="return validation()" //注意return 2.//默认情况下返回true function validation() ...
java 枚举（括号赋值）
详解在这里要注意的是: 1. 通过括号赋值,而且必须带有一个参构造器和一个属性跟方法,否则编译出错2. 赋值必须都赋值或都不赋值,不能一部分赋值一部分不赋值:如果不赋值则不能写构造器,赋值编译也出错 ...
c#自动更新+安装程序的制作
一.自动更新的实现让客户端实现自动更新,通常做法是在客户端部署一个单独的自动更新程序.主程序启动后,访问服务端,检查配置文件是否有更新版本,有更新版本就启动更新程序,由更新负责下载更新版本,并更新客 ...
使用Notify 和 wait ，使用Linklist实现生产者消费者问题
ref:http://www.cnblogs.com/happyPawpaw/archive/2013/01/18/2865957.html 注释很清楚的, import java.util.Link ...
linux diff详解
diff是Unix系统的一个很重要的工具程序. 它用来比较两个文本文件的差异,是代码版本管理的基石之一.你在命令行下,输入: $ diff <变动前的文件> <变动后的文件> ...
解决oracle_4031错误的方法
原因分析: 大量的硬解析出现,产生大量小的free chunk突然出现大的sql 1.临时方法 alter system flush shared_pool; 2.共享SQL alter system ...

[分词] C#SegList分词辅助类,帮助类 （转载）

[分词] C#SegList分词辅助类,帮助类 （转载）的更多相关文章

随机推荐

热门专题

[分词] C#SegList分词辅助类,帮助类（转载）

[分词] C#SegList分词辅助类,帮助类（转载）的更多相关文章