以下代码取自 kernel-2.6. .

[数据结构]
struct icmp_control {
void (*handler)(struct sk_buff *skb); //icmp处理函数,根据icmp的类型字段
short error; /* This ICMP is classed as an error message */
};
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+]; //每个icmp类型有一个项 [/数据结构]
[初始化]
文件net/ipv4/af_inet.c中,函数
static int __init inet_init(void)
{
......
if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < ) //注册协议处理函数,参看下面协议处理实现
printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
......
icmp_init(&inet_family_ops); //icmp协议初始化
......
}
icmp初始化函数
static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; //每cpu变量
void __init icmp_init(struct net_proto_family *ops)
{
struct inet_sock *inet;
int i;
for_each_possible_cpu(i) { //循环所有的cpu
int err;
//在每个cpu上调用__sock_create函数创建一个 socket实例。
err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &per_cpu(__icmp_socket, i));
if (err < )
panic("Failed to create the ICMP control socket.\n"); per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; //指定分配内存方法为atomic
/* Enough space for 2 * 64K ICMP packets, including sk_buff struct overhead. */
per_cpu(__icmp_socket, i)->sk->sk_sndbuf = ( * (( * ) + sizeof(struct sk_buff))); //指定发送缓冲区大小 inet = inet_sk(per_cpu(__icmp_socket, i)->sk); //获取inet_sock指针, 分配sock结构时空间大小就是inet_sock的大小
inet->uc_ttl = -;
inet->pmtudisc = IP_PMTUDISC_DONT; /* Unhash it so that IP input processing does not even see it, we do not wish this socket to see incoming packets. */
//进入的包看不到这些socket结构
per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
}
}
[/初始化]
[协议处理实现]
注册的协议处理函数,当ip向上递交数据包时,如果发现是icmp协议就会调用这个函数。
static struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
};
处理进入的icmp包
int icmp_rcv(struct sk_buff *skb)
{
struct icmphdr *icmph;
struct rtable *rt = (struct rtable *)skb->dst; //路由缓存 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); switch (skb->ip_summed) { //skb的ip校验和标志
case CHECKSUM_COMPLETE:
if (!csum_fold(skb->csum)) //没有伪头部的校验和检测
break; /* fall through */
case CHECKSUM_NONE:
skb->csum = ;
if (__skb_checksum_complete(skb)) //全部内容的校验和检测
goto error;
}
if (!pskb_pull(skb, sizeof(struct icmphdr))) //是否有icmp头空间,如果有移动data指针到icmp头后面
goto error; icmph = icmp_hdr(skb); //获取icmp头
ICMPMSGIN_INC_STATS_BH(icmph->type); /*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently discarded.
*/
if (icmph->type > NR_ICMP_TYPES)
goto error;
//icmp是发送到本地的多播或广播地址
if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
/* RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be silently ignored (we let user decide with a sysctl). * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently discarded if to broadcast/multicast.*/
if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && sysctl_icmp_echo_ignore_broadcasts) {
goto error;
}
//除了回显和时间截,地址掩码请求和应答,其他到广播和多播的icmp包全部丢弃
if (icmph->type != ICMP_ECHO && icmph->type != ICMP_TIMESTAMP &&
icmph->type != ICMP_ADDRESS && icmph->type != ICMP_ADDRESSREPLY) {
goto error;
}
}
icmp_pointers[icmph->type].handler(skb); //根据icmp类型调用相应的处理函数
drop:
kfree_skb(skb); //处理完了释放skb
return ;
error:
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
goto drop;
}
类型处理函数在内核中被静态的初始化.
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + ] = {
[ICMP_ECHOREPLY] = {
.handler = icmp_discard, //空函数
},
[] = {
.handler = icmp_discard,
.error = ,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[ICMP_DEST_UNREACH] = {
.handler = icmp_unreach,
.error = ,
},
[ICMP_SOURCE_QUENCH] = {
.handler = icmp_unreach,
.error = ,
},
[ICMP_REDIRECT] = {
.handler = icmp_redirect,
.error = ,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[ICMP_ECHO] = {
.handler = icmp_echo,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[] = {
.handler = icmp_discard,
.error = ,
},
[ICMP_TIME_EXCEEDED] = {
.handler = icmp_unreach,
.error = ,
},
[ICMP_PARAMETERPROB] = {
.handler = icmp_unreach,
.error = ,
},
[ICMP_TIMESTAMP] = {
.handler = icmp_timestamp,
},
[ICMP_TIMESTAMPREPLY] = {
.handler = icmp_discard,
},
[ICMP_INFO_REQUEST] = {
.handler = icmp_discard,
},
[ICMP_INFO_REPLY] = {
.handler = icmp_discard,
},
[ICMP_ADDRESS] = {
.handler = icmp_address,
},
[ICMP_ADDRESSREPLY] = {
.handler = icmp_address_reply,
},
};
我们一个一个看。
icmp接收到不可达包的处理,不可达包括ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH.
static void icmp_unreach(struct sk_buff *skb)
{
struct iphdr *iph;
struct icmphdr *icmph;
int hash, protocol;
struct net_protocol *ipprot;
struct sock *raw_sk;
u32 info = ;
//数据部分包括了携带的ip头吗
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out_err; icmph = icmp_hdr(skb); //icmp头
iph = (struct iphdr *)skb->data; //携带的ip头 //ip头损坏
if (iph->ihl < ) /* Mangled header, drop. */
goto out_err; if (icmph->type == ICMP_DEST_UNREACH) { //icmp类型是目的不可达
switch (icmph->code & ) { //错误码标识
case ICMP_NET_UNREACH: //网络
case ICMP_HOST_UNREACH: //主机
case ICMP_PROT_UNREACH: //协议
case ICMP_PORT_UNREACH: //端口
break; //不可达
case ICMP_FRAG_NEEDED: //需要分片
if (ipv4_config.no_pmtu_disc) {
LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: fragmentation needed and DF set.\n", NIPQUAD(iph->daddr));
} else {
//在到那个目的地址的路由缓存中保存mtu的大小,在发送数据时就会根据这个mtu大小进行分片
info = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu));
if (!info)
goto out;
}
case ICMP_SR_FAILED:
LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source Route Failed.\n", NIPQUAD(iph->daddr));
break;
default:
break;
}
if (icmph->code > NR_ICMP_UNREACH) //超过限制,错误的的不可达码
goto out;
} else if (icmph->type == ICMP_PARAMETERPROB)
info = ntohl(icmph->un.gateway) >> ;
//一些路由器会发送应答到广播地址,可能是用户工具引起的问题
if (!sysctl_icmp_ignore_bogus_error_responses && inet_addr_type(iph->daddr) == RTN_BROADCAST) {
if (net_ratelimit())
printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP type %u, code %u "
"error to a broadcast: %u.%u.%u.%u on %s\n", NIPQUAD(ip_hdr(skb)->saddr),
icmph->type, icmph->code, NIPQUAD(iph->daddr), skb->dev->name);
goto out;
}
/* Checkin full IP header plus 8 bytes of protocol to avoid additional coding at protocol handlers. */
if (!pskb_may_pull(skb, iph->ihl * + )) //ip头加8字节的协议
goto out; iph = (struct iphdr *)skb->data;
protocol = iph->protocol; //获取协议
hash = protocol & (MAX_INET_PROTOS - ); //递交icmp信息到 raw socket, why ??????
read_lock(&raw_v4_lock);
if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, iph->saddr, skb->dev->ifindex)) != NULL) {
raw_err(raw_sk, skb, info);
raw_sk = sk_next(raw_sk);
iph = (struct iphdr *)skb->data;
}
}
read_unlock(&raw_v4_lock); rcu_read_lock();
ipprot = rcu_dereference(inet_protos[hash]); //根据协议查找协议处理结构
if (ipprot && ipprot->err_handler) //如果有,调用相关的协议错误处理函数处理这个icmp不可达包
ipprot->err_handler(skb, info);
rcu_read_unlock();
out:
return;
out_err:
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
goto out;
}
icmp重定向处理
static void icmp_redirect(struct sk_buff *skb)
{
struct iphdr *iph; if (skb->len < sizeof(struct iphdr)) 长度检测
goto out_err;
/* Get the copied header of the packet that caused the redirect */
if (!pskb_may_pull(skb, sizeof(struct iphdr))) //ip头长度检测
goto out; iph = (struct iphdr *)skb->data; //取出ip头 switch (icmp_hdr(skb)->code & ) { //编码
case ICMP_REDIR_NET: //网络重定向
case ICMP_REDIR_NETTOS:
/* As per RFC recommendations now handle it as a host redirect.*/
case ICMP_REDIR_HOST: //主机重定向
case ICMP_REDIR_HOSTTOS:
//在路由告诉缓存中,更新相同缓存项的rt_gateway字段
ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr, icmp_hdr(skb)->un.gateway, iph->saddr, skb->dev);
break;
}
out:
return;
out_err:
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
goto out;
}
icmp回显请求
static void icmp_echo(struct sk_buff *skb)
{
if (!sysctl_icmp_echo_ignore_all) { //是否忽略回显请求
struct icmp_bxm icmp_param;
//保存一些icmp内容
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
icmp_param.skb = skb;
icmp_param.offset = ;
icmp_param.data_len = skb->len;
icmp_param.head_len = sizeof(struct icmphdr);
icmp_reply(&icmp_param, skb);
}
}
static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
{
struct sock *sk = icmp_socket->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipcm_cookie ipc;
struct rtable *rt = (struct rtable *)skb->dst; //路由缓存
__be32 daddr;
//解析其中的ip选项
if (ip_options_echo(&icmp_param->replyopts, skb))
return; if (icmp_xmit_lock()) //是否可以锁定这个cpu上的icmp_socket.
return;
icmp_param->data.icmph.checksum = ; inet->tos = ip_hdr(skb)->tos;
daddr = ipc.addr = rt->rt_src; //目的地址
ipc.opt = NULL;
if (icmp_param->replyopts.optlen) { //有ip选项
ipc.opt = &icmp_param->replyopts;
if (ipc.opt->srr)
daddr = icmp_param->replyopts.faddr;
}
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rt->rt_spec_dst,
.tos = RT_TOS(ip_hdr(skb)->tos) } },
.proto = IPPROTO_ICMP }; security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl)) //路由查找,如果没找到那么什么也不发送了
goto out_unlock;
}
//是否立即发送应答
if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, icmp_param->data.icmph.code))
icmp_push_reply(icmp_param, &ipc, rt); //发送应答 ip_rt_put(rt);
out_unlock:
icmp_xmit_unlock();
}
判断应答是否发送
static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
{
struct dst_entry *dst = &rt->u.dst;
int rc = ; if (type > NR_ICMP_TYPES) //类型超过范围, 这应该是个bug,需要添加 rc = 0
goto out; /* Don't limit PMTU discovery. */
//这两个类型不做限制
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
goto out; /* No rate limit on loopback */
if (dst->dev && (dst->dev->flags & IFF_LOOPBACK)) //回环设备也不限制
goto out; /* Limit if icmp type is enabled in ratemask. */
if (( << type) & sysctl_icmp_ratemask) //用户通过/proc配置了限制速度的icmp类型掩码
rc = xrlim_allow(dst, sysctl_icmp_ratelimit);
out:
return rc;
}
#define XRLIM_BURST_FACTOR 6
int xrlim_allow(struct dst_entry *dst, int timeout)
{
unsigned long now;
int rc = ; //不发送 now = jiffies;
dst->rate_tokens += now - dst->rate_last; //累加过去的时间
dst->rate_last = now; //最后使用时间 if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout) //累加时间超过指定的范围
dst->rate_tokens = XRLIM_BURST_FACTOR * timeout; //设为最大值 if (dst->rate_tokens >= timeout) { //超过用户配置的时间限制
dst->rate_tokens -= timeout; //递减配置的时间限制
rc = ; //发送
}
return rc;
}
发送icmp应答函数
static void icmp_push_reply(struct icmp_bxm *icmp_param, struct ipcm_cookie *ipc, struct rtable *rt)
{
struct sk_buff *skb;
//分配skb拷贝接收的skb数据到新分配的skb内存中,新skb被链入到icmp_socket->sk->sk_write_queue中.
if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len,
icmp_param->head_len, ipc, rt, MSG_DONTWAIT) < )
ip_flush_pending_frames(icmp_socket->sk); //拷贝失败
else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { //提取分配的skb
struct icmphdr *icmph = icmp_hdr(skb);
__wsum csum = ;
struct sk_buff *skb1;
//计算校验和
skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {
csum = csum_add(csum, skb1->csum);
}
csum = csum_partial_copy_nocheck((void *)&icmp_param->data, (char *)icmph, icmp_param->head_len, csum);
icmph->checksum = csum_fold(csum);
skb->ip_summed = CHECKSUM_NONE;
ip_push_pending_frames(icmp_socket->sk); //发送队列中的skb
}
}
static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
__wsum csum;
//拷贝数据
csum = skb_copy_and_csum_bits(icmp_param->skb, icmp_param->offset + offset, to, len, );
//添加所有icmp_param->skb的校验和到地一个skb中
skb->csum = csum_block_add(skb->csum, csum, odd);
if (icmp_pointers[icmp_param->data.icmph.type].error)
nf_ct_attach(skb, icmp_param->skb);
return ;
}
拷贝数据到ip数据负载部分,如果需要将所有碎片链入到sk->sk_write_queue队列中
int ip_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable *rt, unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb; struct ip_options *opt = NULL;
int hh_len;
int exthdrlen;
int mtu;
int copy;
int err;
int offset = ;
unsigned int maxfraglen, fragheaderlen;
int csummode = CHECKSUM_NONE; if (flags & MSG_PROBE)
return ;
if (skb_queue_empty(&sk->sk_write_queue)) { //写队列为空
opt = ipc->opt;
if (opt) { //有ip选项
if (inet->cork.opt == NULL) { //inet socket中ip选项指针为空,分配一个ip选项+ip最长头空间
inet->cork.opt = kmalloc(sizeof(struct ip_options) + , sk->sk_allocation);
if (unlikely(inet->cork.opt == NULL))
return -ENOBUFS;
}
//拷贝icmp中携带的ip选项
memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
inet->cork.flags |= IPCORK_OPT;
inet->cork.addr = ipc->addr; //记录发送这个icmp的地址
}
//IP_PMTUDISC_PROBE 表示忽略对方的mtu, 如果忽略使用本地设备的mtu,设置分片大小
inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
inet->cork.rt = rt; //保存路由
inet->cork.length = ;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = ;
if ((exthdrlen = rt->u.dst.header_len) != ) { //需要额外的头长度
length += exthdrlen;
transhdrlen += exthdrlen;
}
} else { //队列不为空,用保存好的数据初始化一些变量
rt = inet->cork.rt;
if (inet->cork.flags & IPCORK_OPT)
opt = inet->cork.opt; transhdrlen = ;
exthdrlen = ;
mtu = inet->cork.fragsize;
}
hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); //足够的硬件头空间 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : ); //每个碎片的ip头长度
maxfraglen = ((mtu - fragheaderlen) & ~) + fragheaderlen; //每个碎片的最大长度 if (inet->cork.length + length > 0xFFFF - fragheaderlen) { //发送来的数据长度超过了允许的最大ip数据长度(65535 - ip头 + ip选项)
ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
return -EMSGSIZE;
}
/* transhdrlen > 0 means that this is the first fragment and we wish it won't be fragmented in the future. */
if (transhdrlen && length + fragheaderlen <= mtu && rt->u.dst.dev->features & NETIF_F_V4_CSUM && !exthdrlen)
csummode = CHECKSUM_PARTIAL;
inet->cork.length += length; //累加这个长度
//长度 > mtu ,协议是 udp,且网卡设备支持GSO分片
if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && (rt->u.dst.dev->features & NETIF_F_UFO)) {
err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags);
if (err)
goto error; return ;
}
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) //队列为空
goto alloc_new_skb; while (length > ) {
/* Check if the remaining data fits into current packet. */
copy = mtu - skb->len;////这的mtu,我认为应该为maxfraglen, 这样就不用 fraggap变量和相关的操作了
if (copy < length)
copy = maxfraglen - skb->len;
if (copy <= ) {
char *data;
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
struct sk_buff *skb_prev;
alloc_new_skb: skb_prev = skb;
if (skb_prev)
fraggap = skb_prev->len - maxfraglen;
else
fraggap = ; /* If remaining data exceeds the mtu, we know we need more fragment(s). */
datalen = length + fraggap; //这的mtu,我认为应该为maxfraglen, 这样就不用 fraggap变量和相关的操作了
if (datalen > mtu - fragheaderlen) //数据长度超过mtu - ip头长度,需要分片
datalen = maxfraglen - fragheaderlen; //设置成合适的长度 fraglen = datalen + fragheaderlen; //一个碎片的完整长度
if ((flags & MSG_MORE) && !(rt->u.dst.dev->features & NETIF_F_SG))
alloclen = mtu;
else
alloclen = datalen + fragheaderlen; /* The last fragment gets additional space at tail. Note, with MSG_MORE we overallocate on fragments, * because we have no idea what fragment will be the last. */
if (datalen == length + fraggap) //最后一个分片将添加额外的长度
alloclen += rt->u.dst.trailer_len; if (transhdrlen) { //指定了传输层头长度
//分配内存hh_len是硬件地址长度
skb = sock_alloc_send_skb(sk, alloclen + hh_len + , (flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (atomic_read(&sk->sk_wmem_alloc) <= * sk->sk_sndbuf)
skb = sock_wmalloc(sk, alloclen + hh_len + , , sk->sk_allocation); if (unlikely(skb == NULL))
err = -ENOBUFS;
}
if (skb == NULL) //分配失败
goto error; /* Fill in the control structures */
skb->ip_summed = csummode;
skb->csum = ;
skb_reserve(skb, hh_len); //保留出硬件地址空间 data和tail向后移动 hh_len /*Find where to start putting bytes. */
data = skb_put(skb, fraglen); //返回data移动tail和增加len
skb_set_network_header(skb, exthdrlen);//如果有额外头,移动网络头位置
//传输层头在网络头后面
skb->transport_header = (skb->network_header + fragheaderlen);//fragheaderlen 可能包括ip选项长度
data += fragheaderlen; //data指向传输层头位置 if (fraggap) { //把上一个skb最后几个没有对齐的字节拷贝到这新包的 data + transhdrlen位置
skb->csum = skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen, fraggap, );
skb_prev->csum = csum_sub(skb_prev->csum, skb->csum);
data += fraggap; //移动指针
pskb_trim_unique(skb_prev, maxfraglen); //修改上一个skb的数据长度,进行缩小 }
//datalen包括传输层头和数据
copy = datalen - transhdrlen - fraggap;//要拷贝的数据长度
//从from拷贝一些传输层头后面的数据到data+transhdrlen的位置
if (copy > && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < ) {
err = -EFAULT;
kfree_skb(skb);
goto error;
}
offset += copy; //偏移累加
length -= datalen - fraggap; //长度递减,包含传输层头长度
transhdrlen = ;
exthdrlen = ;
csummode = CHECKSUM_NONE; /* Put the packet on the pending queue. */
__skb_queue_tail(&sk->sk_write_queue, skb); //链入队列
continue;
}
if (copy > length)
copy = length;
if (!(rt->u.dst.dev->features & NETIF_F_SG)) { //设备不支持SG
unsigned int off;
off = skb->len;
if (getfrag(from, skb_put(skb, copy), offset, copy, off, skb) < ) {
__skb_trim(skb, off); err = -EFAULT;
goto error;
}
} else { //按SG分页处理
int i = skb_shinfo(skb)->nr_frags;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-];
struct page *page = sk->sk_sndmsg_page;
int off = sk->sk_sndmsg_off;
unsigned int left; if (page && (left = PAGE_SIZE - off) > ) {
if (copy >= left)
copy = left; if (page != frag->page) {
if (i == MAX_SKB_FRAGS) {
err = -EMSGSIZE;
goto error;
}
get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, );
frag = &skb_shinfo(skb)->frags[i];
} } else if (i < MAX_SKB_FRAGS) {
if (copy > PAGE_SIZE)
copy = PAGE_SIZE; page = alloc_pages(sk->sk_allocation, );
if (page == NULL) {
err = -ENOMEM;
goto error;
}
sk->sk_sndmsg_page = page;
sk->sk_sndmsg_off = ; skb_fill_page_desc(skb, i, page, , );
frag = &skb_shinfo(skb)->frags[i]; } else {
err = -EMSGSIZE;
goto error;
}
if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < ) {
err = -EFAULT;
goto error;
}
sk->sk_sndmsg_off += copy;
frag->size += copy;
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
}
offset += copy;
length -= copy;
}
return ;
error:
inet->cork.length -= length;
IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
return err;
}
ip_append_data函数失败就会调用这个函数十分所有skb
void ip_flush_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
kfree_skb(skb); ip_cork_release(inet_sk(sk));
}
icmp_push_reply-> 取出队列中的skb,然后添加完整的ip头然后发送出去
int ip_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct inet_sock *inet = inet_sk(sk);
struct ip_options *opt = NULL;
struct rtable *rt = inet->cork.rt;
struct iphdr *iph;
__be16 df = ;
__u8 ttl;
int err = ; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) //取出一个skb
goto out;
tail_skb = &(skb_shinfo(skb)->frag_list); //指向分片连表头 /* move skb->data to ip header from ext header */
if (skb->data < skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb)); //移动data指针到ip头位置 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { //循环出队所有skb
__skb_pull(tmp_skb, skb_network_header_len(skb)); //移动data到传输层头位置
*tail_skb = tmp_skb; //当执行第一次时等于是(skb_shinfo(skb)->frag_list) = tmp_skb
tail_skb = &(tmp_skb->next); //指向了tmp_skb的next
//累加这个包的长度
skb->len += tmp_skb->len;
skb->data_len += tmp_skb->len;
skb->truesize += tmp_skb->truesize;
__sock_put(tmp_skb->sk); //递减sock的引用计数
tmp_skb->destructor = NULL;
tmp_skb->sk = NULL;
}
//到这就是把所有在sk->sk_write_queue中的skb(所有分片)组合到第一个skb的skb_shinfo(skb)->frag_list连表中了。 /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow to fragment the frame generated here.
* No matter, what transforms how transforms change size of the packet, it will come out. */
if (inet->pmtudisc < IP_PMTUDISC_DO)
skb->local_df = ; //不分片
/* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too, we still allow to fragment this frame locally. */
if (inet->pmtudisc >= IP_PMTUDISC_DO || (skb->len <= dst_mtu(&rt->u.dst) && ip_dont_fragment(sk, &rt->u.dst)))
df = htons(IP_DF); //设置不分片标志
if (inet->cork.flags & IPCORK_OPT) //有ip选项
opt = inet->cork.opt; if (rt->rt_type == RTN_MULTICAST) //多播ttl
ttl = inet->mc_ttl;
else
ttl = ip_select_ttl(inet, &rt->u.dst); //单播,需要计算 iph = (struct iphdr *)skb->data; //在第一个skb中添加ip头
iph->version = ;
iph->ihl = ;
if (opt) {
iph->ihl += opt->optlen>>;
ip_options_build(skb, opt, inet->cork.addr, rt, );
}
iph->tos = inet->tos;
iph->tot_len = htons(skb->len);
iph->frag_off = df;
ip_select_ident(iph, &rt->u.dst, sk); //选择一个ip标识
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
iph->saddr = rt->rt_src;
iph->daddr = rt->rt_dst;
ip_send_check(iph); //校验和 skb->priority = sk->sk_priority;
skb->dst = dst_clone(&rt->u.dst); if (iph->protocol == IPPROTO_ICMP)
icmp_out_count(((struct icmphdr *)skb_transport_header(skb))->type); //更新一些统计信息 //发送这个skb到netfilter的LOCAL_OUT hook
err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
if (err) {
if (err > )
err = inet->recverr ? net_xmit_errno(err) : ; if (err)
goto error;
}
out:
ip_cork_release(inet);
return err;
error:
IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
goto out;
}
到这需要简单说一下,其实我们看的是icmp回显请求相关的流程,其中什么ip碎片应该就根本不会发生,
但一些函数在ip层使用所以有些看起来十分的复杂。 icmp时间截请求处理
static void icmp_timestamp(struct sk_buff *skb)
{
struct timeval tv;
struct icmp_bxm icmp_param; if (skb->len < ) //长度不对
goto out_err;
/* Fill in the current time as ms since midnight UT: */
do_gettimeofday(&tv); //获取当前时间
icmp_param.data.times[] = htonl((tv.tv_sec % ) * + tv.tv_usec / );
icmp_param.data.times[] = icmp_param.data.times[];
//拷贝skb中的数据到 times[0]中
if (skb_copy_bits(skb, , &icmp_param.data.times[], ))
BUG(); icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; //时间截应答
icmp_param.data.icmph.code = ;
icmp_param.skb = skb;
icmp_param.offset = ;
icmp_param.data_len = ;
icmp_param.head_len = sizeof(struct icmphdr) + ;
icmp_reply(&icmp_param, skb);
out:
return;
out_err:
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
goto out;
}
地址掩码请求,linux没有实现它,参考内核中这函数的注释
static void icmp_address(struct sk_buff *skb)
{
#if 0
if (net_ratelimit())
printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
#endif
}
地址掩码应答处理
static void icmp_address_reply(struct sk_buff *skb)
{
struct rtable *rt = (struct rtable *)skb->dst; //路由缓存
struct net_device *dev = skb->dev;
struct in_device *in_dev;
struct in_ifaddr *ifa;
//长度不对或没有标志重定向源地址
if (skb->len < || !(rt->rt_flags & RTCF_DIRECTSRC))
goto out;
in_dev = in_dev_get(dev);
if (!in_dev)
goto out;
rcu_read_lock();
//设备有地址,打开调试项,设备允许转发
if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) {
__be32 _mask, *mp;
//取出掩码
mp = skb_header_pointer(skb, , sizeof(_mask), &_mask);
BUG_ON(mp == NULL);
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
//循环所有地址,如果掩码匹配且路由地址也匹配
if (*mp == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa))
break;
}
if (!ifa && net_ratelimit()) { //都不匹配
printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from %s/%u.%u.%u.%u\n",
NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src));
}
}
rcu_read_unlock();
in_dev_put(in_dev);
out:;
}
[/协议处理实现]

ICMP 实现的更多相关文章

  1. [协议]ICMP协议剖析

    1.ICMP简介 ICMP全名为(INTERNET CONTROL MESSAGE PROTOCOL)网络控制消息协议. ICMP的协议号为1. ICMP报文就像是IP报文的小弟,总顶着IP报文的名头 ...

  2. 简单了解ICMP协议

    ping命令是什么协议? 维基百科: ping是一种电脑网络工具,用来测试数据包能否通过IP协议到达特定主机.ping的运作原理是向目标主机传出一个ICMP echo@要求数据包,并等待接受echo回 ...

  3. ICMP的应用--Traceroute

    Traceroute是用来侦测主机到目的主机之间所经路由情况的重要工具,也是最便利的工具.前面说到,尽管ping工具也可以进行侦测,但是,因为ip头的限制,ping不能完全的记录下所经过的路由器.所以 ...

  4. ICMP Protocol

    [ICMP Protocol] 参考: 1.ICMP Types and Codes:http://www.nthelp.com/icmp.html 2.RFC 792 - Internet Cont ...

  5. TCP协议学习记录 (一) ICMP时间戳请求

    程序只实现了获取时间戳,至于将时间戳转换成具体日期和时间,暂时没有好的办法. #define TIME_STAMP_REQUEST 13 struct iphdr { unsigned ; //包头长 ...

  6. 002.ICMP--拼接ICMP包,实现简单Ping程序(原始套接字)

    一.大致流程: 将ICMP头和时间数据设置好后,通过创建好的原始套接字socket发出去.目的主机计算效验和后会将数据原样返回,用当前时间和返回的数据结算时间差,计算出rtt. 二.数据结构: ICM ...

  7. linux原始套接字(2)-icmp请求与接收

    一.概述                                                    上一篇arp请求使用的是链路层的原始套接字.icmp封装在ip数据报里面,所以icmp请 ...

  8. 网络错误定位案例 ICMP host *** unreachable - admin prohibited

    1. 环境 一台物理服务器 9.115.251.86,上面创建两个虚机,每个虚机两个网卡: vm1:eth0 - 9.*.*.232 eth1:10.0.0.14 vm2: eth0 - 9.8.*. ...

  9. GO语言练习:网络编程 ICMP 示例

    1.代码 2.编译及运行 1.Go语言网络编程:ICMP示例代码 icmptest.go package main import ( "fmt" "net" & ...

  10. 一个ICMP单元

    unit ICMPUtils; interface {$IFDEF VER80} { This source file is *NOT* compatible with Delphi 1 becaus ...

随机推荐

  1. LSJ_NHibernate第二章 ManagerPage

    前言: 项目为传统的三层架构,可以根据个人的需求进行拓展. 很多人都在质疑B层的作用,我认为B层才是核心,这个取决于业务的复杂度 项目的结构也比较的简单,我们先从最底层说起,ManagerPage,这 ...

  2. string应用

    今天在网上搜了一些资料. C# string类应用 判断是否包含子串 想要判断一个字符串中是否包含某个子串,可以用Contains方法来实现: ? public bool Contains (stri ...

  3. kettle中通过 时间戳(timestamp)方式 来实现数据库的增量同步操作(一)

    这个实验主要思想是在创建数据库表的时候, 通过增加一个额外的字段,也就是时间戳字段, 例如在同步表 tt1 和表 tt2 的时候, 通过检查那个表是最新更新的,那个表就作为新表,而另外的表最为旧表被新 ...

  4. 解决Android Studio启动速度慢的问题。避免每次启动Android Studio都要fetching Android sdk compoment information。

    Android Studio每次启动都要去fetching sdk,由于Android sdk 官网在大陆连不上,所以每次启动时界面都会停在那里很久. 解决办法就是设置取消每次fetching sdk ...

  5. IOS-开发日志-UILabel相关

    UILabel属性 1.text:设置标签显示文本. 2.attributedText:设置标签属性文本. Ios代码 NSString *text = @"first"; NSM ...

  6. java新手笔记17 参数

    package com.yfs.javase; public class ParamDemo { public static void main(String[] args) { int a = 3, ...

  7. 安装aptana插件报Error opening the editor. java.lang.NullPointerException

    Aptana的官方网站下载eclipse的插件:  http://update.aptana.com/update/studio/3.2/ ,可以在线安装也可以下载插件后再安装,我是以在线的形式安装的 ...

  8. UVA 11384 Help is needed for Dexter(问题转化 递归)

    Help is needed for Dexter Time Limit: 3 Second Dexter is tired of Dee Dee. So he decided to keep Dee ...

  9. Codevs 1183 泥泞的道路

    1183 泥泞的道路 时间限制: 1 s 空间限制: 128000 KB 题目等级 : 钻石 Diamond 传送门 题目描述 Description CS有n个小区,并且任意小区之间都有两条单向道路 ...

  10. caffe源码阅读(3)-Datalayer

    DataLayer是把数据从文件导入到网络的层,从网络定义prototxt文件可以看一下数据层定义 layer { name: "data" type: "Data&qu ...