dctcp-2.6.26-rev1.1.0.patch

 diff -Naur linux-2.6./include/linux/sysctl.h linux-2.6.-dctcp-rev1.1.0/include/linux/sysctl.h
--- linux-2.6./include/linux/sysctl.h -- ::29.000000000 -
+++ linux-2.6.-dctcp-rev1.1.0/include/linux/sysctl.h -- ::50.000000000 -
@@ -, +, @@
NET_TCP_ALLOWED_CONG_CONTROL=,
NET_TCP_MAX_SSTHRESH=,
NET_TCP_FRTO_RESPONSE=,
+ NET_TCP_DELAYED_ACK=,
+ NET_TCP_DCTCP_ENABLE=,
+ NET_TCP_DCTCP_SHIFT_G=,
}; enum {
diff -Naur linux-2.6./include/linux/tcp.h linux-2.6.-dctcp-rev1.1.0/include/linux/tcp.h
--- linux-2.6./include/linux/tcp.h -- ::29.000000000 -
+++ linux-2.6.-dctcp-rev1.1.0/include/linux/tcp.h -- ::45.000000000 -
@@ -, +, @@
/* TCP MD5 Signagure Option information */
struct tcp_md5sig_info *md5sig_info;
#endif
+
+/* DCTCP Specific Parameters */
+ u32 acked_bytes_ecn;
+ u32 acked_bytes_total;
+ u32 prior_rcv_nxt;
+ u32 dctcp_alpha;
+ u32 next_seq;
+ u32 ce_state; /* 0: last pkt was non-ce , 1: last pkt was ce */
+ u32 delayed_ack_reserved;
}; static inline struct tcp_sock *tcp_sk(const struct sock *sk)
diff -Naur linux-2.6./include/net/tcp.h linux-2.6.-dctcp-rev1.1.0/include/net/tcp.h
--- linux-2.6./include/net/tcp.h -- ::29.000000000 -
+++ linux-2.6.-dctcp-rev1.1.0/include/net/tcp.h -- ::50.000000000 -
@@ -, +, @@
extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn;
+extern int sysctl_tcp_delayed_ack;
+extern int sysctl_tcp_dctcp_enable;
+extern int sysctl_tcp_dctcp_shift_g;
extern int sysctl_tcp_dsack;
extern int sysctl_tcp_mem[];
extern int sysctl_tcp_wmem[];
diff -Naur linux-2.6./kernel/sysctl_check.c linux-2.6.-dctcp-rev1.1.0/kernel/sysctl_check.c
--- linux-2.6./kernel/sysctl_check.c -- ::29.000000000 -
+++ linux-2.6.-dctcp-rev1.1.0/kernel/sysctl_check.c -- ::50.000000000 -
@@ -, +, @@
{ NET_TCP_FACK, "tcp_fack" },
{ NET_TCP_REORDERING, "tcp_reordering" },
{ NET_TCP_ECN, "tcp_ecn" },
+ { NET_TCP_DELAYED_ACK, "tcp_delayed_ack" },
+ { NET_TCP_DCTCP_ENABLE, "tcp_dctcp_enable" },
+ { NET_TCP_DCTCP_SHIFT_G, "tcp_dctcp_shift_g" },
{ NET_TCP_DSACK, "tcp_dsack" },
{ NET_TCP_MEM, "tcp_mem" },
{ NET_TCP_WMEM, "tcp_wmem" },
diff -Naur linux-2.6./net/ipv4/sysctl_net_ipv4.c linux-2.6.-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c
--- linux-2.6./net/ipv4/sysctl_net_ipv4.c -- ::29.000000000 -
+++ linux-2.6.-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c -- ::50.000000000 -
@@ -, +, @@
.proc_handler = &proc_dointvec
},
{
+ .ctl_name = NET_TCP_DELAYED_ACK,
+ .procname = "tcp_delayed_ack",
+ .data = &sysctl_tcp_delayed_ack,
+ .maxlen = sizeof(int),
+ .mode = ,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_TCP_DCTCP_ENABLE,
+ .procname = "tcp_dctcp_enable",
+ .data = &sysctl_tcp_dctcp_enable,
+ .maxlen = sizeof(int),
+ .mode = ,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_TCP_DCTCP_SHIFT_G,
+ .procname = "tcp_dctcp_shift_g",
+ .data = &sysctl_tcp_dctcp_shift_g,
+ .maxlen = sizeof(int),
+ .mode = ,
+ .proc_handler = &proc_dointvec
+ },
+ {
.ctl_name = NET_TCP_DSACK,
.procname = "tcp_dsack",
.data = &sysctl_tcp_dsack,
diff -Naur linux-2.6./net/ipv4/tcp_input.c linux-2.6.-dctcp-rev1.1.0/net/ipv4/tcp_input.c
--- linux-2.6./net/ipv4/tcp_input.c -- ::29.000000000 -
+++ linux-2.6.-dctcp-rev1.1.0/net/ipv4/tcp_input.c -- ::21.000000000 -
@@ -, +, @@
int sysctl_tcp_fack __read_mostly = ;
int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
int sysctl_tcp_ecn __read_mostly;
+int sysctl_tcp_delayed_ack __read_mostly = ;
+int sysctl_tcp_dctcp_enable __read_mostly;
+int sysctl_tcp_dctcp_shift_g __read_mostly = ; /* g=1/2^5 */
int sysctl_tcp_dsack __read_mostly = ;
int sysctl_tcp_app_win __read_mostly = ;
int sysctl_tcp_adv_win_scale __read_mostly = ;
@@ -, +, @@
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
} -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_dctcp_check_ce(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
{
if (tp->ecn_flags & TCP_ECN_OK) {
- if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
- /* Funny extension: if ECT is not set on a segment,
- * it is surely retransmit. It is not in ECN RFC,
- * but Linux follows this rule. */
- else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
- tcp_enter_quickack_mode((struct sock *)tp);
+ u32 temp_rcv_nxt;
+
+ if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) {
+
+ /* rcv_nxt is already update in previous process (tcp_rcv_established) */
+
+ if(sysctl_tcp_dctcp_enable) {
+
+ /* state has changed from CE=0 to CE=1 && delayed ack has not sent yet */
+ if(tp->ce_state == && tp->delayed_ack_reserved) {
+
+ /* save current rcv_nxt */
+ temp_rcv_nxt = tp->rcv_nxt;
+ /* generate previous ack with CE=0 */
+ tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+ tp->rcv_nxt = tp->prior_rcv_nxt;
+ tcp_send_ack(sk);
+ /* recover current rcv_nxt */
+ tp->rcv_nxt = temp_rcv_nxt;
+ }
+
+ tp->ce_state = ;
+ }
+
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+
+
+ /* Funny extension: if ECT is not set on a segment,
+ * it is surely retransmit. It is not in ECN RFC,
+ * but Linux follows this rule. */
+ } else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) {
+ tcp_enter_quickack_mode((struct sock *)tp);
+ }else {
+ /* It has ECT but it doesn't have CE */
+
+ if(sysctl_tcp_dctcp_enable) {
+
+ if(tp->ce_state != && tp->delayed_ack_reserved) {
+
+ /* save current rcv_nxt */
+ temp_rcv_nxt = tp->rcv_nxt;
+ /* generate previous ack with CE=1 */
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ tp->rcv_nxt = tp->prior_rcv_nxt;
+ tcp_send_ack(sk);
+ /* recover current rcv_nxt */
+ tp->rcv_nxt = temp_rcv_nxt;
+ }
+
+ tp->ce_state = ;
+
+ /* deassert only when DCTCP is enabled */
+ tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+ }
+
+ }
+
+ /* set current rcv_nxt to prior_rcv_nxt */
+ tp->prior_rcv_nxt = tp->rcv_nxt;
}
} @@ -, +, @@
*/
tcp_incr_quickack(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
+
+ tp->ce_state = ;
} else {
int m = now - icsk->icsk_ack.lrcvtime; @@ -, +, @@
}
icsk->icsk_ack.lrcvtime = now; - TCP_ECN_check_ce(tp, skb);
+ TCP_ECN_dctcp_check_ce(sk, tp, skb); if (skb->len >= )
tcp_grow_window(sk, skb);
@@ -, +, @@
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk); + __u32 ssthresh_old;
+ __u32 cwnd_old;
+ __u32 cwnd_new;
+
tp->prior_ssthresh = ;
tp->bytes_acked = ;
if (icsk->icsk_ca_state < TCP_CA_CWR) {
tp->undo_marker = ;
- if (set_ssthresh)
- tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
- tp->snd_cwnd = min(tp->snd_cwnd,
- tcp_packets_in_flight(tp) + 1U);
+
+ if(!sysctl_tcp_dctcp_enable) {
+
+ if (set_ssthresh)
+ tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+
+ tp->snd_cwnd = min(tp->snd_cwnd,
+ tcp_packets_in_flight(tp) + 1U);
+
+ }else {
+
+ cwnd_new = max (tp->snd_cwnd - ((tp->snd_cwnd * tp->dctcp_alpha)>>) , 2U);
+
+ if(set_ssthresh) {
+
+ ssthresh_old = tp->snd_ssthresh;
+ tp->snd_ssthresh = cwnd_new;
+
+ /* printk("%llu alpha= %d ssth old= %d new= %d\n", */
+ /* ktime_to_us(ktime_get_real()), */
+ /* tp->dctcp_alpha, */
+ /* ssthresh_old, */
+ /* tp->snd_ssthresh); */
+ }
+
+ cwnd_old = tp->snd_cwnd;
+ tp->snd_cwnd = cwnd_new;
+
+ /* printk("%llu alpha= %d cwnd old= %d new= %d\n", */
+ /* ktime_to_us(ktime_get_real()), */
+ /* tp->dctcp_alpha, */
+ /* cwnd_old, */
+ /* tp->snd_cwnd); */
+ }
+
tp->snd_cwnd_cnt = ;
tp->high_seq = tp->snd_nxt;
tp->snd_cwnd_stamp = tcp_time_stamp;
TCP_ECN_queue_cwr(tp);
-
+
tcp_set_ca_state(sk, TCP_CA_CWR);
}
}
@@ -, +, @@
tcp_try_keep_open(sk);
tcp_moderate_cwnd(tp);
} else {
- tcp_cwnd_down(sk, flag);
+ if(!sysctl_tcp_dctcp_enable)
+ tcp_cwnd_down(sk, flag);
}
} @@ -, +, @@
int prior_packets;
int frto_cwnd = ; + __u32 alpha_old;
+ __u32 acked_bytes;
+
/* If the ack is newer than sent or older than previous acks
* then we can probably ignore it.
*/
@@ -, +, @@
tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
} +
+ /* START: DCTCP Processing */
+
+ /* calc acked bytes */
+ if(after(ack,prior_snd_una)) {
+ acked_bytes = ack - prior_snd_una;
+ } else {
+ acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
+ }
+
+ if(flag & FLAG_ECE)
+ tp->acked_bytes_ecn += acked_bytes;
+
+ tp->acked_bytes_total += acked_bytes;
+
+ /* Expired RTT */
+ if (!before(tp->snd_una,tp->next_seq)) {
+
+ /* For avoiding denominator == 1 */
+ if(tp->acked_bytes_total == ) tp->acked_bytes_total = ;
+
+ alpha_old = tp->dctcp_alpha;
+
+ /* alpha = (1-g) * alpha + g * F */
+ tp->dctcp_alpha = alpha_old - (alpha_old >> sysctl_tcp_dctcp_shift_g)
+ + (tp->acked_bytes_ecn << ( - sysctl_tcp_dctcp_shift_g)) / tp->acked_bytes_total;
+
+ if(tp->dctcp_alpha > ) tp->dctcp_alpha = ; /* round to 0-1024 */
+
+ /* printk("bytes_ecn= %d total= %d alpha: old= %d new= %d\n", */
+ /* tp->acked_bytes_ecn, tp->acked_bytes_total, alpha_old, tp->dctcp_alpha); */
+
+ tp->acked_bytes_ecn = ;
+ tp->acked_bytes_total = ;
+ tp->next_seq = tp->snd_nxt;
+ }
+
+ /* END: DCTCP Processing */
+
/* We passed data and got it acked, remove any soft error
* log. Something worked...
*/
@@ -, +, @@
goto queue_and_out;
} - TCP_ECN_check_ce(tp, skb);
+ TCP_ECN_dctcp_check_ce(sk, tp, skb); if (tcp_try_rmem_schedule(sk, skb->truesize))
goto drop;
@@ -, +, @@
&& __tcp_select_window(sk) >= tp->rcv_wnd) ||
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
+ /* Delayed ACK is disabled or ... */
+ sysctl_tcp_delayed_ack == ||
/* We have out of order data. */
(ofo_possible && skb_peek(&tp->out_of_order_queue))) {
/* Then ack it now */
@@ -, +, @@
} EXPORT_SYMBOL(sysctl_tcp_ecn);
+EXPORT_SYMBOL(sysctl_tcp_delayed_ack);
+EXPORT_SYMBOL(sysctl_tcp_dctcp_enable);
+EXPORT_SYMBOL(sysctl_tcp_dctcp_shift_g);
EXPORT_SYMBOL(sysctl_tcp_reordering);
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
EXPORT_SYMBOL(tcp_parse_options);
diff -Naur linux-2.6./net/ipv4/tcp_minisocks.c linux-2.6.-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c
--- linux-2.6./net/ipv4/tcp_minisocks.c -- ::29.000000000 -
+++ linux-2.6.-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c -- ::45.000000000 -
@@ -, +, @@
newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + ;
newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + ; + /* Initialize DCTCP internal parameters */
+ newtp->next_seq = newtp->snd_nxt;
+ newtp->acked_bytes_ecn = ;
+ newtp->acked_bytes_total = ;
+
tcp_prequeue_init(newtp); tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
diff -Naur linux-2.6./net/ipv4/tcp_output.c linux-2.6.-dctcp-rev1.1.0/net/ipv4/tcp_output.c
--- linux-2.6./net/ipv4/tcp_output.c -- ::29.000000000 -
+++ linux-2.6.-dctcp-rev1.1.0/net/ipv4/tcp_output.c -- ::50.000000000 -
@@ -, +, @@
struct tcp_sock *tp = tcp_sk(sk); tp->ecn_flags = ;
- if (sysctl_tcp_ecn) {
+ if (sysctl_tcp_ecn || sysctl_tcp_dctcp_enable) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
tp->ecn_flags = TCP_ECN_OK;
}
@@ -, +, @@
TCP_ECN_send(sk, skb, tcp_header_size);
} + /* In DCTCP, Assert ECT bit to all packets*/
+ if(sysctl_tcp_dctcp_enable)
+ INET_ECN_xmit(sk);
+
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
@@ -, +, @@
tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
TCP_ECN_send_syn(sk, buff); + /* Initialize DCTCP internal parameters */
+ tp->next_seq = tp->snd_nxt;
+ tp->acked_bytes_ecn = ;
+ tp->acked_bytes_total = ;
+
/* Send it off. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
@@ -, +, @@
int ato = icsk->icsk_ack.ato;
unsigned long timeout; + /* Delayed ACK reserved flag for DCTCP */
+ struct tcp_sock *tp = tcp_sk(sk);
+ tp->delayed_ack_reserved = ;
+
if (ato > TCP_DELACK_MIN) {
const struct tcp_sock *tp = tcp_sk(sk);
int max_ato = HZ / ;
@@ -, +, @@
{
struct sk_buff *buff; + /* Delayed ACK reserved flag for DCTCP */
+ struct tcp_sock *tp = tcp_sk(sk);
+ tp->delayed_ack_reserved = ;
+
/* If we have been reset, we may not send again. */
if (sk->sk_state == TCP_CLOSE)
return;

https://github.com/myasuda/DCTCP-Linux/blob/master/dctcp-2.6.26-rev1.1.0.patch

dctcp-2.6.26-rev1.1.0.patch的更多相关文章

  1. ODA: After Apply ODA 12.2.1.2.0 Patch, Unable to Create TableSpace Due to [ORA-15001: diskgroup "DATA" does not exist or is not mounted | ORA-15040: diskgroup is incomplete] (Doc ID 2375553.1)

    ODA: After Apply ODA 12.2.1.2.0 Patch, Unable to Create TableSpace Due to [ORA-15001: diskgroup &quo ...

  2. 26、xcode8.0 解决没有iPhone4模拟器问题

    第一步:随便打开Xcode项目 ,点击电脑屏幕右上角 Xcode->Preference 第二步: 点击下载ios 8.1 Simulator 等到下载完成即可在xcode中添加iphone4s ...

  3. 静默方式安装10g数据库软件+升级patch+手工建库

    通常我们安装Oracle数据库软件,都是用OUI图形界面来完成的,但有些Unix/Linux系统中并未安装图形系统,也就无法使用图形界面来安装Oracle的产品了,对于这种场景,就只能采用静默方式来安 ...

  4. 批处理基本知识以及进阶 V2.0

    批处理基本知识以及进阶 将以要执行的程序指令 , 像在 dos 模式下一下写入记事本 , 保存成 bat 文件 , 就可以执行了 一 . 简单批处理内部命令简介 1.Echo 命令 打开回显或关闭请求 ...

  5. [Android6.0][RK3399] 双屏异显代码实现流程分析(二)【转】

    本文转载自:http://blog.csdn.net/dearsq/article/details/55050125 Patch Code dtsi rk3399-androiddtsi rk3399 ...

  6. kafka 0.11.0.3 源码编译

    首先下载 kafka 0.11.0.3 版本 源码: http://mirrors.hust.edu.cn/apache/kafka/0.11.0.3/ 下载源码 首先安装 gradle,不再说明 1 ...

  7. spark2.0的10个特性介绍

    1. Spark 2.0 ! 还记得我们的第七篇 Spark 博文里吗?里面我用三点来总结 spark dataframe 的好处: 当时是主要介绍 spark 里的 dataframe,今天是想总结 ...

  8. 弱省互测#0 t2

    题意 给定两个字符串 A 和 B,求下面四个问题的答案: 1.在 A 的子串中,不是 B 的子串的字符串的数量. 2.在 A 的子串中,不是 B 的子序列的字符串的数量. 3.在 A 的子序列中,不是 ...

  9. Consolidated Seed Table Upgrade Patch(Patch 17204589)

    $ adop phase=apply patches= hotpatch=yes abandon=no Enter the APPS password: Enter the SYSTEM passwo ...

随机推荐

  1. python3 md5

    参考: https://docs.python.org/3/library/hashlib.html?highlight=hashlib#credits https://blog.csdn.net/w ...

  2. 解决GitHub下载很慢的问题

    此方法参照 github下载慢,轻松提速教程 1.获取github的IP地址.访问:https://www.ipaddress.com/ 网址依次获取以下三个网址的IP github.com gith ...

  3. mysql语句插入前判断数据是否重复

    在mysql中插入数据有时需要判断数据插入是否重复 语句编写:insert into 表(相应字段) select 相应字段 from dual where not exists (select 相应 ...

  4. (转)python time模块和datetime模块详解

    python time模块和datetime模块详解 原文:http://www.cnblogs.com/tkqasn/p/6001134.html 一.time模块 time模块中时间表现的格式主要 ...

  5. Oracle给Select结果集加锁,Skip Locked(跳过加锁行获得可以加锁的结果集)

    1.通过select for update或select for update wait或select for update nowait给数据集加锁 具体实现参考select for update和 ...

  6. 关于在真实物理机器上用cloudermanger或ambari搭建大数据集群注意事项总结、经验和感悟心得(图文详解)

    写在前面的话 (1) 最近一段时间,因担任我团队实验室的大数据环境集群真实物理机器工作,至此,本人秉持负责.认真和细心的态度,先分别在虚拟机上模拟搭建ambari(基于CentOS6.5版本)和clo ...

  7. orcale 之 数据完整性约束

    数据完整性约束是对数据描述的某种约束的条件,在关系型数据库中一般有:完整性约束,实体完整性约束,参照完整性约束和用户自定义完整性约束. 实体完整性约束 在数据库中一张表一般对应一个现实生活中的实体,比 ...

  8. PHP读取文件的多种方法

    1.传统的方法 fopen, fclose feof:file.end of file 例子: $file_handle = fopen("c:\\myfile.txt", &qu ...

  9. html的framset使用

    frameset主要用在显示多个页面的需求下: 看代码: <html> <head> <title>html frameset test</title> ...

  10. 设置 mysql允许外网访问

    mysql的root账户,我在连接时通常用的是localhost或127.0.0.1,公司的测试服务器上的mysql也是localhost所以我想访问无法访问,测试暂停. 解决方法如下: 1,修改表, ...