go路由httprouter中的压缩字典树算法图解及c++实现

go路由httprouter中的压缩字典树算法图解及c++实现

前言

准备从嵌入式往go后端转，今年准备学习一下gin框架，决定先从这个轻量级的路由请求器着手，本文讲讲它用到的压缩字典树算法。

httprouter简介

HttpRouter是一个Go编写的轻量级的高性能Http请求路由器(也可称为多路选择器multiplexer简称mux)

与Go的net/http包的默认mux不同，该路由器支持路由中的变量与请求方法进行匹配，同时具有很好的伸缩性。

该路由具有高性能的同时，也优化了内存的占用，即是是很长的路径和大量的路径，他都能很好的扩展，采用压缩字典树(基数树)结构实现高效的匹配。

压缩字典树

概念

压缩字典树，是trie树的一种，也称单词查找树、前缀树，善于进行字符串的检索、取字符串最长公共前缀、以及排序，常应用在搜索引擎中例如百度输入蔡可能自动弹出能匹配到的单词出来.

压缩tire和标准trie最大的不同点就节点的数量与插入字符串的个数成正比，而不是与字符串的长度成正比，所以当字符串数量越来越多，越密集且相似度极高的情况下，会退化成标准trie树。

下面分别是/,/bear,/bell,/bid,/bull,/buy,/sell,/stock,/stop 的标准tire 和压缩 tire

插入操作

下面图解一串子串插入压缩trie过程，/,/serach,/support,/blog , 在httprouter上截的一段例子，我们只插到/blog

插入/

插入/serach

插入/support

插入/blog

查询操作

查询比较简单，后面看代码也比较快。

1、先找共同前缀。

2、再找目录。

3、循环上面两步，知道当前path相等。

c+++实现

这里注册了4个路径的回调函数，addRoute 即是插入操作，handler即是查询。

// httprouter.hpp

#pragma once

#include <string>

#include <vector>

#include <functional>

#include <memory>

namespace httprouter{

typedef std::function<void(void)> handler_t;

typedef struct _tree_node {

	std::string                                     path;

	std::string                                     indices;

	std::vector<std::shared_ptr<struct _tree_node>> children;

	handler_t                                       handle;

}tree_node_t;

class node

{

public:

    //! ctor

	node();

    //! dtor

    ~node(void);

    //! copy ctor

    node(const node&) = delete;

    //! assignment operator

    node& operator=(const node&) = delete;

	//! addRouter adds a node with the given handle to the path

	//! Not concurrency-safe!

	void addRoute(std::string path, handler_t handle);

    //! get path handler

    handler_t handler(std::string path);

private:

	void insertChild(tree_node_t* node, std::string& path, handler_t handle);

private:

	std::shared_ptr<tree_node_t> node_;

};

}

// httprouter.cpp

#include <algorithm>

#include "httprouter.hpp"

using namespace httprouter;

node::node()

	:node_(new tree_node_t{

	  path:       "",

	  indices:    "",

	  children:   {},

	  handle:     nullptr,

		})

{

}

node::~node(){

}

void node::addRoute(std::string path, handler_t handle) {

	std::string fullPath = path;

	auto node = node_;

	// no-empty tree

	if (node->path.size() > 0 || node->children.size() > 0) {

		while (true) {

			bool have_indices = false;

			//find the longest common prefix.

			std::size_t i = 0;

			auto max = std::min(node->path.size(), path.size());

			for (; i < max && path[i] == node->path[i];) {

				i++;

			}

			// Split edge

			if (i < node->path.size()) {

				auto child = std::shared_ptr<tree_node_t>(new tree_node_t{

				  path :      std::string(node->path.c_str() + i),

				  indices :   node->indices,

				  children :  std::move(node->children),

				  handle :    node->handle,

				});

				node->children = std::vector<std::shared_ptr<tree_node_t>>{ child };

				node->indices = std::string(node->path.c_str() + i, 1);

				node->path = std::string(path.c_str(), i);

				node->handle = nullptr;

			}

			// make new node a child of this node

			if (i < path.size()) {

				path = std::string(path.c_str() + i);

				char ch = path[0];

				// Check if a child with the next path byte exists

				for (std::size_t i = 0; i < node->indices.size(); i++) {

					if (ch == node->indices[i]) {

						//i = node.incrementChildPrio(i);

						node = node->children[i];

						have_indices = true;

						break;

					}

				}

				if (have_indices) {

					continue;

				}

				//otherwise insert it

				if (ch != ':' && ch != '*') {

					node->indices += ch;

					auto child = std::shared_ptr<tree_node_t>(new tree_node_t{

					path :      "",

					indices :   "",

					children :  {},

					handle :    nullptr,

					});

					node->children.push_back(child);

					node = child;

				}

				insertChild(node.get(), path, handle);

				return;

			}

			else if (i == path.size()) {

				if (node->handle) {

					printf("error ! handle already exists.");

                    exit(1);

				}

				node->handle = handle;

			}

			return;

		}

	}

	else { // Empty tree

		insertChild(node.get(), path, handle);

	}

}

void node::insertChild(tree_node_t* node, std::string& path, handler_t handle) {

	node->path = std::string(path.c_str());

	node->handle = handle;

}

handler_t node::handler(std::string path) {

	auto node = node_;

	while (true) {

		if (path.size() > node->path.size()) {

			if (std::string(path.c_str(), node->path.size()) == node->path) {

				path = std::string(path.c_str() + node->path.size());

			}

			char ch = path[0];

			for (std::size_t i = 0; i < node->indices.size(); i++) {

				if (ch == node->indices[i]) {

					node = node->children[i];

					continue;

				}

			}

			// handle wildcard child

			// fix me

		}

		else if (path == node->path) {

			return node->handle;

		}

	}

}

//main.cpp

#include "httprouter.hpp"

#include <iostream>

void hello1() {

	std::cout << "hello1" << std::endl;

}

void hello2() {

  std::cout << "hello2" << std::endl;

}

void hello3() {

  std::cout << "hello3" << std::endl;

}

void hello4() {

  std::cout << "hello4" << std::endl;

}

void hello5() {

  std::cout << "hello5" << std::endl;

}

int main() {

  httprouter::node no;

  no.addRoute("/", hello1);

  no.addRoute("/serach/", hello2);

  no.addRoute("/support/", hello3);

  no.addRoute("/blog/", hello4);

  no.handler("/")();

  no.handler("/serach/")();

  no.handler("/support/")();

  no.handler("/blog/")();

}

结果:

节点信息: