Trie树的常见应用大总结（面试+附代码实现）

（一）Trie的简单介绍

Trie树，又称字典树，单词查找树或者前缀树。是一种用于高速检索的多叉树结构，如英文字母的字典树是一个26叉树。数字的字典树是一个10叉树。

他的核心思想是空间换时间，空间消耗大可是插入和查询有着非常优秀的时间复杂度。

（二）Trie的定义

Trie树的键不是直接保存在节点中，而是由节点在树中的位置决定。一个节点的全部子孙都有同样的前缀（prefix）,从根节点到当前结点的路径上的全部字母组成当前位置的字符串。结点能够保存当前字符串、出现次数、指针数组(指向子树)以及是否是结尾标志等等。

typedef struct Trie_Node

{

   	char count[15];      //单词前缀出现的次数

   	struct Trie_Node* next[MAXN];    //指向各个子树的指针

	bool exist;    //标记结点处是否构成单词

}Trie;

Trie树能够利用字符串的公共前缀来节约存储空间，例如以下图所看到的：

watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQv/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/Center" alt="">

它有3个基本性质：

(1) 根节点不包括字符，除根节点外每个节点都仅仅包括一个字符。

(2) 从根节点到某一节点，路径上经过的字符连接起来，为该节点相应的字符串。

(3) 每一个节点的全部子节点包括的字符都不同样。

（三）Trie树的基本操作

（1）插入操作

按下标索引逐个插入字母，若当前字母存在则继续下一个，否则new出当前字母的结点，所以插入的时间复杂度仅仅和字符串的长度n有关，为O(n)。

void Insert(Trie *root, char* s,char *add)

{

	Trie *p=root;

	while(*s!='\0')

	{

		if(p->next[*s-'a']==NULL)

		{

			p->next[*s-'a']=createNode();

		}

		p=p->next[*s-'a'];

       // p->count=add;

        ++s;

	}

	p->exist=true;

    strcpy(p->count,add);

}

（2）查询操作

和插入操作相仿，若查询途中某一个结点并不存在，则直接就return返回。否则继续下去，当字符串结束时，trie树上也有结束标志。那么证明此字符串存在，return true；

int Search(Trie* root,const char* s)

{

	Trie *p=root;

	while(*s!='\0')

	{

		p=p->next[*s-'a'];

		if(p==NULL)

		return 0;

		++s;

	}

	return p->count;

}

（3）删除操作

一般来说，对Trie单个结点的删除操作不常见，所以我在这里也仅仅提供递归删除整个树的操作

void del(Trie *root)

{

	for(int i=0;i<MAXN;i++)

	{

		if(root->next[i]!=NULL)

		{

		   del(root->next[i]);

		}

	}

//	free(root);

   delete root;

}

（4）遍历操作

假设我们想要将trie中的字符串排序输出，直接先序遍历就可以。

void Print(Trie *root)

{

	Trie *p=root;

	if(p->exist)

	cout<<p->name<<": "<<p->count<<endl;

	for(int i=0;i<26;i++)

	{

		if(p->next[i]!=NULL){

			Print(p->next[i]);

		}

	}

}

（四）Trie树的详细应用

（1）统计前缀出现的次数

这是Trie最主要的应用，每一个结点的字母使用count记录出现的次数就可以。

这里提供一道题目，hdu 1251供大家练习。

//hdu 1251   统计前缀出现次数

#include <cstdio>

#include <iostream>

#include <string>

#include <cstring>

using namespace std;

const int MAXN=26;

typedef struct Trie_Node

{

   	int count;      //单词前缀出现的次数

   	struct Trie_Node* next[MAXN];    //指向各个子树的指针

	bool exist;    //标记结点处是否构成单词

}Trie;

Trie* createNode()

{

	//Trie* p =(Trie*)malloc(sizeof(Trie));

	Trie *p=new Trie;

	p->count=0;

	p->exist=false;

	memset(p->next,0,sizeof(p->next));

	return p;

}

void Insert(Trie *root, const char* s)

{

	Trie *p=root;

	while(*s!='\0')

	{

		if(p->next[*s-'a']==NULL)

		{

			p->next[*s-'a']=createNode();

		}

		p=p->next[*s-'a'];

        p->count+=1;

        ++s;

	}

	p->exist=true;

}

int Search(Trie* root,const char* s)

{

	Trie *p=root;

	while(*s!='\0')

	{

		p=p->next[*s-'a'];

		if(p==NULL)

		return 0;

		++s;

	}

	return p->count;

}

void del(Trie *root)

{

	for(int i=0;i<MAXN;i++)

	{

		if(root->next[i]!=NULL)

		{

		   del(root->next[i]);

		}

	}

//	free(root);

   delete root;

}

int main()

{

	char s[15];

	bool flag=false;

	Trie* root=createNode();

	while(gets(s))

	{

		if(flag)

		{

		   int ans=Search(root,s);

		   printf("%d\n",ans);

		}

		else

		{

			if(strlen(s)!=0)

			Insert(root,s);

		}

		if(strlen(s)==0)

		flag=true;

	}

	del(root);

	return 0;

}

（2）翻译（password，明文）

给定一组字符串s。k我们输入k则须要翻译成s。也就是说两者是映射关系。接下来我们给出一段话，让你翻译出正常的文章。

用map固然简便。可是Trie的效率更加高。

仅仅须要在k的结尾结点出记录下s就可以。

这里也提供一道题目。hdu 1075。(被凝视的是我原来的程序，wa了，有大神看出来麻烦告诉我一下。谢谢)。

/*

//hdu 1075映射

#include <cstdio>

#include <iostream>

#include <string>

#include <cstring>

#include <stdlib.h>

using namespace std;

const int MAXN=26;

typedef struct Trie_Node

{

   	char count[15];      //单词前缀出现的次数

   	struct Trie_Node* next[MAXN];    //指向各个子树的指针

	bool exist;    //标记结点处是否构成单词

}Trie;

Trie* createNode()

{

	Trie* p =(Trie*)malloc(sizeof(Trie));

	p->exist=false;

	memset(p->next,0,sizeof(p->next));

	return p;

}

void Insert(Trie *root, char* s,char *add)

{

	Trie *p=root;

	while(*s!='\0')

	{

		if(p->next[*s-'a']==NULL)

		{

			p->next[*s-'a']=createNode();

		}

		p=p->next[*s-'a'];

       // p->count=add;

        ++s;

	}

	p->exist=true;

    strcpy(p->count,add);

}

void Search(Trie* root, const char* s)

{

	Trie *p=root;

	while(*s!='\0')

	{

		if(p->next[*s-'a']==NULL)

		{

		    printf("%s",s);

		    return ;

		}

		p=p->next[*s-'a'];

	    ++s;

	}

	if(p->exist)

    printf("%s",p->count);

    else

    printf("%s",s);

}

void del(Trie *root)

{

	for(int i=0;i<MAXN;i++)

	{

		if(root->next[i]!=NULL)

		{

		   del(root->next[i]);

		}

	}

	free(root);

}

int main()

{

	char text[3013],from[15],to[15];

	Trie* root=createNode();

	scanf("%s",from);

    while(scanf("%s",from),strcmp(from,"END"))

    {

    	scanf("%s",to);

    	Insert(root,to,from);

    }

    scanf("%s",from);

    getchar();

    while(gets(text),strcmp(text,"END"))

    {

    	int len=strlen(text);

    	for(int i=0;i<len;i++)

    	{

	    	if(islower(text[i]))

	    	{

	    		int j=0;

	    		char temp[15];

	    		memset(temp,'\0',sizeof(temp));

	    		while(islower(text[i]))

	    		temp[j++]=text[i++];

	    		Search(root,temp);

	    	}

	    	if(!islower(text[i]))

	    	printf("%c",text[i]);

	    }

	    printf("\n");

    }

	return 0;

}

*/

#include<stdio.h>

#include<string.h>

#include<stdlib.h>

#include<string>

using namespace std;

struct node{

    char dic[15];

    node * next[26];

    bool flag;

}*root;

node *build()

{

    node *p=(node *)malloc(sizeof(node));

    for(int i=0;i<26;i++)

        p->next[i]=NULL;

    p->flag=false;

    return p;

}

void insert(char *earth,char *mars)

{

    int len=strlen(mars);

    node *p;

    p=root;

    for(int i=0;i<len;i++)

    {

        if(p->next[mars[i]-'a']==NULL)

            p->next[mars[i]-'a']=build();

        p=p->next[mars[i]-'a'];

    }

    p->flag=true;

    strcpy(p->dic,earth);

}

void query(char *earth)

{

    int len=strlen(earth);

    node *p;

    p=root;

    for(int i=0;i<len;i++)

    {

        if(p->next[earth[i]-'a']==NULL)

        {

            printf("%s",earth);

            return;

        }

        p=p->next[earth[i]-'a'];

    }

    if(p->flag)

        printf("%s",p->dic);

    else

        printf("%s", earth);

}

int main()

{

    char earth[15],mars[15],ask[3010];

    scanf("%s",earth);

    root=build();

    while(scanf("%s",earth),strcmp(earth,"END"))

    {

        scanf("%s",mars);

        insert(earth,mars);

    }

    scanf("%s",earth);

    getchar();

    while(gets(ask),strcmp(ask,"END"))

    {

        int len=strlen(ask);

        for(int i=0;i<len;i++)

        {

            if(islower(ask[i]))

            {

                int j=0;

                memset(earth,'\0',sizeof(earth));

                while(islower(ask[i]))

                    earth[j++]=ask[i++];

                query(earth);

            }

            if(!islower(ask[i]))

                printf("%c",ask[i]);

        }

        printf("\n");

    }

    return 0;

}

（3）实现搜索引擎的热门搜索排名

我的初步想法是和(1)类似。对（1）中的trie进行先序遍历，将字符串和出现次数存进一个结构体，最后对这个数组进行高速排序。时间复杂度为O(nlogn),看网上说能够利用分治+trie

+最小堆。我还没有细致搞清楚，以后研究完在加入。

（4）输入自己主动补全

事实上原理都差点儿相同。把字符串结尾处的结点当作root。进行先序遍历，就可以得出全部以输入的字符串为前缀的答案。

/ 自己主动补全

#include <cstdio>

#include <iostream>

#include <string>

#include <cstring>

using namespace std;

const int MAXN=26;

typedef struct Trie_Node

{

   	int count;      //单词出现的次数

   	struct Trie_Node* next[MAXN];    //指向各个子树的指针

	bool exist;    //标记结点处是否构成单词

	char name[15];

}Trie;

Trie* createNode()

{

	Trie* p =(Trie*)malloc(sizeof(Trie));

	p->count=0;

	p->exist=false;

	memset(p->next,0,sizeof(p->next));

	return p;

}

void Insert(Trie *root,char* word)

{

	Trie *p=root;

	char *s=word;

	while(*s!='\0')

	{

		if(p->next[*s-'a']==NULL)

		{

			p->next[*s-'a']=createNode();

		}

		p=p->next[*s-'a'];

        ++s;

	}

	 p->exist=true;

	  p->count+=1;

   strcpy(p->name,word);

}

Trie* Search(Trie* root, char* s)

{

	Trie *p=root;

	while(*s!='\0')

	{

		p=p->next[*s-'a'];

		if(p==NULL)

		return 0;

		++s;

	}

	return p;

}

void del(Trie *root)

{

	for(int i=0;i<MAXN;i++)

	{

		if(root->next[i]!=NULL)

		{

		   del(root->next[i]);

		}

	}

	free(root);   

}

void Print(Trie *root)

{

	Trie *p=root;

	if(p->exist)

	cout<<p->name<<": "<<p->count<<endl;

	for(int i=0;i<26;i++)

	{

		if(p->next[i]!=NULL){

			Print(p->next[i]);

		}

	}

}

int main()

{

	char s[15];

	Trie* root=createNode();

	for(int i=0;i<5;i++)

	{

		cin>>s;

		Insert(root,s);

	}

	while(cin>>s)

	{

		Trie *ans=Search(root,s);

        if(ans)

		Print(ans);

	}

	del(root);

	return 0;

}

Trie树的常见应用大总结（面试+附代码实现）的更多相关文章

[POJ] #1002# 487-3279 : 桶排序/字典树(Trie树)/快速排序
一. 题目 487-3279 Time Limit: 2000MS Memory Limit: 65536K Total Submissions: 274040 Accepted: 48891 ...
剑指Offer——Trie树(字典树)
剑指Offer--Trie树(字典树) Trie树 Trie树,即字典树,又称单词查找树或键树,是一种树形结构,是一种的单词.对于每一个单词,我们要判断他出没出现过,如果出现了,求第一次出现在第几个位 ...
poj_3630 trie树
题目大意给定一系列电话号码,查看他们之间是否有i,j满足,号码i是号码j的前缀子串. 题目分析典型的trie树结构.直接使用trie树即可.但是需要注意,若使用指针形式的trie树,则在大数据量下 ...
双数组Trie树 (Double-array Trie) 及其应用
双数组Trie树(Double-array Trie, DAT)是由三个日本人提出的一种Trie树的高效实现 [1],兼顾了查询效率与空间存储.Ansj便是用DAT(虽然作者宣称是三数组Trie树,但 ...
Trie树之C-实现
title: Trie树之C++实现 comments: true date: 2016-10-02 16:59:54 categories: 算法 tags: Trie树前言之前写了一篇偏向于理 ...
Atitit 常见的树形结构红黑树二叉树 B树 B+树 Trie树 attilax理解与总结
Atitit 常见的树形结构红黑树二叉树 B树 B+树 Trie树 attilax理解与总结 1.1. 树形结构-- 一对多的关系1 1.2. 树的相关术语: 1 1.3. 常见的树形结构 ...
大数据处理-Trie树
大数据处理--Trie树 1.1.什么是Trie树 Trie树,即字典树,又称单词查找树或键树,是一种树形结构,是一种哈希树的变种.典型应用是用于统计和排序大量的字符串(但不仅限于字符串),所以经常被 ...
数据结构 | 30行代码，手把手带你实现Trie树
本文始发于个人公众号:TechFlow,原创不易,求个关注今天是算法和数据结构专题的第28篇文章,我们一起来聊聊一个经典的字符串处理数据结构--Trie. 在之前的4篇文章当中我们介绍了关于博弈论的 ...
[转]双数组TRIE树原理
原文名称: An Efficient Digital Search Algorithm by Using a Double-Array Structure 作者: JUN-ICHI AOE 译文: 使 ...

随机推荐

jQuery第二课点击弹出一个提示框
选择器允许您对元素组或单个元素进行操作. jQuery 选择器在前面的章节中,我们展示了一些有关如何选取 HTML 元素的实例. 关键点是学习 jQuery 选择器是如何准确地选取您希望应用效果的元 ...
rescan-scsi-bus.sh linux扫盘脚本
[root@ftp:/home/tools/shell] > yum install sg3_utils* Loaded plugins: fastestmirror Repository ba ...
pandas 3 设置值
from __future__ import print_function import pandas as pd import numpy as np np.random.seed(1) dates ...
简单搭建zookeeper集群分布式/伪分布式
分布式搭建一.下载zookeeper安装包自行下载:我用的是 zookeeper-3.5.4-beta.tar.gz 二.环境准备 1. 我的虚拟机自带的java是1.7的,这个版本要求java1 ...
ActiveMQ maven
http://outofmemory.cn/java/mq/apache-activemq-demo
c++_benchMark_vector_list_deque
title: c++_benchMark_vector_list_deque date: 2015-08-01 22:32:39 作者:titer1 + ZhangYu 出处:www.drysalte ...
Ignatius and the Princess III（杭电1028）（母函数）
Ignatius and the Princess III Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/32768 K ...
echarts 地图动态展示结合css+js
echarts地图展示功能非常强大,官网上静态展示的样例非常多了,动态的资料少.研究了下.我眼下实现的通过ajax从server获取数据动态展示地图. 另外,我们有时候希望在地图之上做些自己定义的东西 ...
android开发游记：ItemTouchHelper 使用RecyclerView打造可拖拽的GridView
以下是RecyclerView结合ItemTouchHelper实现的列表和网格布局的拖拽效果. 效果图例如以下:(gif图有点顿卡,事实上执行是非常流畅的) demo下载地址: DragRecycl ...
Linux 经常使用快捷键
桌面下: Alt+F5 取消最大化窗体 Alt+F9 最小化窗体 Alt+F10 最大化窗体 Alt+空格打开窗体的控制菜单 (点击窗体左上角图标出现的菜单) ctl+r ...

Trie树的常见应用大总结（面试+附代码实现）

Trie树的常见应用大总结（面试+附代码实现）的更多相关文章

随机推荐

热门专题