ipv4_conntrack_defrag

ipv4_conntrack_defrag对输入包进行检查,如果是分片包,则调用nf_ct_ipv4_gather_frags函数进行重组;

 static unsigned int ipv4_conntrack_defrag(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct sock *sk = skb->sk; if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
inet_sk(sk)->nodefrag)
return NF_ACCEPT; #if IS_ENABLED(CONFIG_NF_CONNTRACK)
#if !IS_ENABLED(CONFIG_NF_NAT)
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
return NF_ACCEPT;
#endif
#endif
/* Gather fragments. */
/* 如果是分片的话进行分片重组 */
if (ip_is_fragment(ip_hdr(skb))) {
enum ip_defrag_users user =
nf_ct_defrag_user(state->hook, skb); if (nf_ct_ipv4_gather_frags(state->net, skb, user))
return NF_STOLEN;
}
return NF_ACCEPT;
}

nf_ct_ipv4_gather_frags内部调用了ip_defrag进行重组,ip_defrag相关分析,请移步IP分片重组;

 static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
u_int32_t user)
{
int err; local_bh_disable();
/* 分片重组 */
err = ip_defrag(net, skb, user);
local_bh_enable(); if (!err)
skb->ignore_df = ; return err;
}
ipv4_conntrack_in

ipv4_conntrack_in是对nf_conntrack_in的封装,是连接跟踪的输入本机或者由本机转发的入口函数,该函数获取l3proto ,l4proto,调用resolve_normal_ct检查是否有tuple节点,没有则创建,并且与skb关联,并调用l4proto->packet函数对连接状态进行处理;

 static unsigned int ipv4_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
 unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
int ret; /* 获取skb关联的nf_conn */
tmpl = nf_ct_get(skb, &ctinfo); /* 已经关联了nf_conn或者设置了不跟踪标记 */
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
/* Previously seen (loopback or untracked)? Ignore. */
/* 环回 || 不跟踪,返回accept */
if ((tmpl && !nf_ct_is_template(tmpl)) ||
ctinfo == IP_CT_UNTRACKED) {
NF_CT_STAT_INC_ATOMIC(net, ignore);
return NF_ACCEPT;
} /* 清空关联的nf_conn */
skb->_nfct = ;
} /* rcu_read_lock()ed by nf_hook_thresh */
/* 根据协议类型找到对应协议的l3proto */
l3proto = __nf_ct_l3proto_find(pf); /* 获取数据偏移和4层协议 */
ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
if (ret <= ) {
pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
goto out;
} /* 根据协议和4层协议号获取l4proto */
l4proto = __nf_ct_l4proto_find(pf, protonum); /* It may be an special packet, error, unclean...
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
/* 如果l4设置了错误检查函数,则进行检查 */
if (l4proto->error != NULL) {
ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum);
if (ret <= ) {
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
goto out;
}
/* ICMP[v6] protocol trackers may assign one conntrack. */
if (skb->_nfct)
goto out;
}
repeat:
/* 查看hash中是否有对应tuple节点,没有则新建;更新nf_conn_info状态,并且与skb进行关联 */
ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
l3proto, l4proto);
if (ret < ) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
ret = NF_DROP;
goto out;
} /* 获取skb关联的nf_conn */
ct = nf_ct_get(skb, &ctinfo);
/* 没有关联的nf_conn,不是连接合法的一部分 */
if (!ct) {
/* Not valid part of a connection */
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = NF_ACCEPT;
goto out;
} /* Decide what timeout policy we want to apply to this flow. */
/* 获取超时策略,扩展中的策略,或者默认l4proto中的策略 */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto); /* 处理4层协议的状态,tcp为tcp_packet */
ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
if (ret <= ) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put(&ct->ct_general);
skb->_nfct = ;
NF_CT_STAT_INC_ATOMIC(net, invalid);
if (ret == -NF_DROP)
NF_CT_STAT_INC_ATOMIC(net, drop);
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
* fresh conntrack.
*/
if (ret == -NF_REPEAT)
goto repeat;
ret = -ret;
goto out;
} /* 第一次收到应答,则设置IPS_SEEN_REPLY_BIT标记,原值为0,则需要记录应答事件 */
if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
!test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_REPLY, ct);
out:
if (tmpl)
nf_ct_put(tmpl); return ret;
}

resolve_normal_ct函数将数据包中的相关字段设置到tuple中,并且检查hash中是否有该tuple,如果没有则新建tuple,而后设置连接状态,并且与skb进行关联;

 static int
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
struct nf_conn *ct;
u32 hash; /* 将源目的地址端口协议方向等字段设置到tuple */
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
pr_debug("Can't get tuple\n");
return ;
} /* look for tuple match */
/* 从hash中查找tuple */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
hash = hash_conntrack_raw(&tuple, net);
h = __nf_conntrack_find_get(net, zone, &tuple, hash); /* 未找到该tuple */
if (!h) {
/* 创建一个节点 */
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
skb, dataoff, hash);
if (!h)
return ;
if (IS_ERR(h))
return PTR_ERR(h);
} /* 获取到nf_conn */
ct = nf_ct_tuplehash_to_ctrack(h); /* It exists; we have (non-exclusive) reference. */
/* 应答方向,已建立连接应答 */
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
ctinfo = IP_CT_ESTABLISHED_REPLY;
}
/* 原始方向 */
else {
/* Once we've had two way comms, always ESTABLISHED. */
/* 已经见过应答了,那么是已连接状态 */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
pr_debug("normal packet for %p\n", ct);
ctinfo = IP_CT_ESTABLISHED;
}
/* 有期望连接标记,则设置关联字段 */
else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
pr_debug("related packet for %p\n", ct);
ctinfo = IP_CT_RELATED;
}
/* 其他情况,新连接 */
else {
pr_debug("new packet for %p\n", ct);
ctinfo = IP_CT_NEW;
}
} /* skb关联nf_conn */
nf_ct_set(skb, ct, ctinfo);
return ;
}
ipv4_conntrack_local

ipv4_conntrack_local是由本机发出的数据包连接跟踪的入口,是对nf_conntrack_in函数的封装;

 static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT; /* 分片,返回accpet */
if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
return NF_ACCEPT; /* 调用conntrack_in */
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
ipv4_helper

ipv4_helper函数查找已经注册的help扩展,如果存在则调用扩展的helper函数;

 static unsigned int ipv4_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */
/* 获取skb关联的nf_conn */
ct = nf_ct_get(skb, &ctinfo);
/* 未关联,或者是 已建立连接的关联连接的响应 */
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT; /* 获取help扩展 */
help = nfct_help(ct); /* 没有扩展 */
if (!help)
return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_thresh */
/* 或者helper */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT; /* 执行扩展的help函数 */
return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
}
ipv4_confirm

ipv4_confirm相关函数完成对连接的确认,并且将连接按照方向加入到对应的hash表中;

 static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo; /* 获取skb关联的nf_conn */
ct = nf_ct_get(skb, &ctinfo);
/* 未关联,或者是 已建立连接的关联连接的响应 */
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
goto out; /* adjust seqs for loopback traffic only in outgoing direction */
/* 有调整序号标记,且不是环回包,调整序号 */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
}
out:
/* We've seen it coming out the other side: confirm it */
/* 调用conntrack_confirm */
return nf_conntrack_confirm(skb);
}
 static inline int nf_conntrack_confirm(struct sk_buff *skb)
{
struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb);
int ret = NF_ACCEPT; /* nf_conn存在 */
if (ct) {
/* 未确认,则进行确认 */
if (!nf_ct_is_confirmed(ct))
ret = __nf_conntrack_confirm(skb);
/* accpet状态事件通知 */
if (likely(ret == NF_ACCEPT))
nf_ct_deliver_cached_events(ct);
}
return ret;
}
 int
__nf_conntrack_confirm(struct sk_buff *skb)
{
const struct nf_conntrack_zone *zone;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
struct nf_conn_tstamp *tstamp;
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
unsigned int sequence;
int ret = NF_DROP; ct = nf_ct_get(skb, &ctinfo);
net = nf_ct_net(ct); /* ipt_REJECT uses nf_conntrack_attach to attach related
ICMP/TCP RST packets in other direction. Actual packet
which created connection will be IP_CT_NEW or for an
expected connection, IP_CT_RELATED. */
/* 只对原始方向的连接进行确认,应答方向是已经处理过的 */
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
return NF_ACCEPT; zone = nf_ct_zone(ct);
local_bh_disable(); /* 计算原始方向和应答方向的hash */
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
/* reuse the hash saved before */
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
hash = scale_hash(hash);
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); /* We're not in hash table, and we refuse to set up related
* connections for unconfirmed conns. But packet copies and
* REJECT will give spurious warnings here.
*/
/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ /* No external references means no one else could have
* confirmed us.
*/
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
* user context, else we insert an already 'dead' hash, blocking
* further use of that particular connection -JM.
*/
nf_ct_del_from_dying_or_unconfirmed_list(ct); if (unlikely(nf_ct_is_dying(ct))) {
nf_ct_add_to_dying_list(ct);
goto dying;
} /* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */ /* 下面两个如果找到说明有冲突 */ /* 遍历原始方向hash,查找是否有相同节点 */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out; /* 遍历应答方向hash,查找是否有相同节点 */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out; /* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
/* 设置超时时间 */
ct->timeout += nfct_time_stamp;
/* 引用计数增加 */
atomic_inc(&ct->ct_general.use);
/* 更新为已确认 */
ct->status |= IPS_CONFIRMED; /* set conntrack timestamp, if enabled. */
/* 有时间戳扩展,则设置时间戳 */
tstamp = nf_conn_tstamp_find(ct);
if (tstamp) {
if (skb->tstamp == )
__net_timestamp(skb); tstamp->start = ktime_to_ns(skb->tstamp);
}
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
* guarantee that no other CPU can find the conntrack before the above
* stores are visible.
*/
/* 将原始节点和应答节点插入到对应的hash中 */
__nf_conntrack_hash_insert(ct, hash, reply_hash);
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable(); /* 事件通知 */
help = nfct_help(ct);
if (help && help->helper)
nf_conntrack_event_cache(IPCT_HELPER, ct); nf_conntrack_event_cache(master_ct(ct) ?
IPCT_RELATED : IPCT_NEW, ct);
return NF_ACCEPT; out:
/* 加入到dying列表 */
nf_ct_add_to_dying_list(ct);
/* 解决冲突?? */
ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
dying:
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
local_bh_enable();
return ret;
}

Netfilter 之 连接跟踪钩子函数分析的更多相关文章

  1. Netfilter之连接跟踪实现机制初步分析

    Netfilter之连接跟踪实现机制初步分析 原文: http://blog.chinaunix.net/uid-22227409-id-2656910.html 什么是连接跟踪 连接跟踪(CONNT ...

  2. Netfilter 之 连接跟踪相关数据结构

    Netfilter通过连接跟踪来记录和跟踪连接的状态,为状态防火墙和NAT提供基础支持: 钩子点与钩子函数 下图为钩子点和钩子函数的关系图(点击图片查看原图),其中ipv4_conntrack_def ...

  3. Netfilter 之 连接跟踪初始化

    基础参数初始化 nf_conntrack_init_start函数完成连接跟踪基础参数的初始化,包括了hash,slab,扩展项,GC任务等: int nf_conntrack_init_start( ...

  4. Netfilter 之 连接跟踪的helper

    注册helper nf_conntrack_ftp_init是连接跟踪ftp模块的初始化函数,可以看到其调用了nf_conntrack_helpers_register来注册helper: stati ...

  5. Netfilter&iptables:如何理解连接跟踪机制?

    如何理解Netfilter中的连接跟踪机制? 本篇我打算以一个问句开头,因为在知识探索的道路上只有多问然后充分调动起思考的机器才能让自己走得更远.连接跟踪定义很简单:用来记录和跟踪连接的状态. 问:为 ...

  6. 对于数据包的截取,使用linux中的netfilter钩子函数

    http://blog.csdn.net/wswifth/article/details/5115358 在师哥的代码(packet.c)中使用的是Linux2.4内核中的一个子系统:netfilte ...

  7. linux内核netfilter连接跟踪的hash算法

    linux内核netfilter连接跟踪的hash算法 linux内核中的netfilter是一款强大的基于状态的防火墙,具有连接跟踪(conntrack)的实现.conntrack是netfilte ...

  8. Netfilter 之 钩子函数与钩子点关系图

    概述 通过钩子点和优先级的代码追溯,得到如下对应关系图,图中横坐标为钩子点,纵坐标为优先级,每个钩子点上的钩子函数按照优先级排布: 详细分析 5个钩子点如下所示,在这个五个钩子点上的钩子函数按照上面的 ...

  9. Netfilter 之 钩子函数注册

    通过注册流程代码的分析,能够明确钩子函数的注册流程,理解存储钩子函数的数据结构,如下图(点击图片可查看原图): 废话不多说,开始分析: nf_hook_ops是注册的钩子函数的核心结构,字段含义如下所 ...

随机推荐

  1. canvas-八卦图和时钟实现

    八卦图: <body> canvas id="></canvas> <script> //获取到画布元素 let myCanvas = docume ...

  2. 客户想要的 vs 客户实际预算:漫画解读软件开发模式 ​​​​

    转自:http://blog.jobbole.com/113230/ 1913 年,美利坚工业之神——亨利福特,发明了世界上第一条流水线,汽车工业从此进入了大规模生产的时代.丰田公司提出的丰田生产系统 ...

  3. ASR测试方法---字错率(WER)、句错率(SER)统计

    一.基础概念 1.1.语音识别(ASR) 语音识别(speech recognition)技术,也被称为自动语音识别(英语:Automatic Speech Recognition, ASR), 狭隘 ...

  4. EEPROM IIC

    1. 数据位的有效性规定 I2C总线进行数据传送时,时钟信号为高电平期间,数据线上的数据必须保持稳定,只有在时钟线上的信号为低电平期间,数据线上的高电平或低电平状态才允许变化 2. 起始和终止信号 S ...

  5. liunx pip安装

    方法一 wget https://bootstrap.pypa.io/get-pip.py python get-pip.py 方法二 wget https://pypi.python.org/pac ...

  6. 基于SAML2.0的SAP云产品Identity Authentication过程介绍

    SAP官网的架构图 https://cloudplatform.sap.com/scenarios/usecases/authentication.html 上图介绍了用户访问SAP云平台时经历的Au ...

  7. 学习手写vue,理解原理

    class Compiler{ constructor(el,vm){ // 判断el属性 是不是 一个元素, 如果不是就获取 this.el = this.isElementNode(el)?el: ...

  8. 【loj#2524】【bzoj5303】 [Haoi2018]反色游戏(圆方树)

    题目传送门:loj bzoj 题意中的游戏方案可以转化为一个异或方程组的解,将边作为变量,点作为方程,因此若方程有解,方程的解的方案数就是2的自由元个数次方.我们观察一下方程,就可以发现自由元数量=边 ...

  9. openresty 阶段说明

    开发中常用的7阶段 set_by_lua*: 流程分支处理判断变量初始化 rewrite_by_lua*: 转发.重定向.缓存等功能(例如特定请求代理到外网) access_by_lua*: IP 准 ...

  10. R的数据结构--数组

    数组:可以认为数组是矩阵的扩展,它将矩阵扩展到2维以上.如果给定的数组是1维的则相当于向量,2维的相当于矩阵. R语言中的数组元素的类型也是单一的,可以是数值型,逻辑型,字符型或复数型 参数解释 ar ...