summaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/Kconfig11
-rw-r--r--net/dccp/Makefile8
-rw-r--r--net/dccp/ackvec.c134
-rw-r--r--net/dccp/ackvec.h23
-rw-r--r--net/dccp/ccid.c6
-rw-r--r--net/dccp/ccid.h26
-rw-r--r--net/dccp/ccids/Kconfig75
-rw-r--r--net/dccp/ccids/ccid2.c73
-rw-r--r--net/dccp/ccids/ccid2.h3
-rw-r--r--net/dccp/ccids/ccid3.c930
-rw-r--r--net/dccp/ccids/ccid3.h129
-rw-r--r--net/dccp/ccids/lib/loss_interval.c8
-rw-r--r--net/dccp/ccids/lib/loss_interval.h2
-rw-r--r--net/dccp/ccids/lib/packet_history.c219
-rw-r--r--net/dccp/ccids/lib/packet_history.h130
-rw-r--r--net/dccp/ccids/lib/tfrc.h23
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c241
-rw-r--r--net/dccp/dccp.h111
-rw-r--r--net/dccp/feat.c131
-rw-r--r--net/dccp/feat.h48
-rw-r--r--net/dccp/input.c114
-rw-r--r--net/dccp/ipv4.c553
-rw-r--r--net/dccp/ipv6.c639
-rw-r--r--net/dccp/minisocks.c53
-rw-r--r--net/dccp/options.c68
-rw-r--r--net/dccp/output.c111
-rw-r--r--net/dccp/probe.c8
-rw-r--r--net/dccp/proto.c70
-rw-r--r--net/dccp/sysctl.c60
-rw-r--r--net/dccp/timer.c140
30 files changed, 2183 insertions, 1964 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index e2a095d0fd80..b8a68dd41000 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -4,15 +4,15 @@ menu "DCCP Configuration (EXPERIMENTAL)"
config IP_DCCP
tristate "The DCCP Protocol (EXPERIMENTAL)"
---help---
- Datagram Congestion Control Protocol
+ Datagram Congestion Control Protocol (RFC 4340)
- From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
+ From http://www.ietf.org/rfc/rfc4340.txt:
The Datagram Congestion Control Protocol (DCCP) is a transport
protocol that implements bidirectional, unicast connections of
congestion-controlled, unreliable datagrams. It should be suitable
for use by applications such as streaming media, Internet telephony,
- and on-line games
+ and on-line games.
To compile this protocol support as a module, choose M here: the
module will be called dccp.
@@ -38,6 +38,9 @@ config IP_DCCP_DEBUG
---help---
Only use this if you're hacking DCCP.
+ When compiling DCCP as a module, this debugging output can be toggled
+ by setting the parameter dccp_debug of the `dccp' module to 0 or 1.
+
Just say N.
config NET_DCCPPROBE
@@ -49,7 +52,7 @@ config NET_DCCPPROBE
DCCP congestion avoidance modules. If you don't understand
what was just said, you don't need it: say N.
- Documentation on how to use the packet generator can be found
+ Documentation on how to use DCCP connection probing can be found
at http://linux-net.osdl.org/index.php/DccpProbe
To compile this code as a module, choose M here: the
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 17ed99c46617..f4f8793aafff 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,13 +1,13 @@
-obj-$(CONFIG_IPV6) += dccp_ipv6.o
-
-dccp_ipv6-y := ipv6.o
-
obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
dccp_ipv4-y := ipv4.o
+# build dccp_ipv6 as module whenever either IPv6 or DCCP is a module
+obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
+dccp_ipv6-y := ipv6.o
+
dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 4d176d33983f..a086c6312d3b 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -21,8 +21,8 @@
#include <net/sock.h>
-static kmem_cache_t *dccp_ackvec_slab;
-static kmem_cache_t *dccp_ackvec_record_slab;
+static struct kmem_cache *dccp_ackvec_slab;
+static struct kmem_cache *dccp_ackvec_record_slab;
static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
{
@@ -67,15 +67,16 @@ static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
-#ifdef CONFIG_IP_DCCP_DEBUG
- const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT tx: " : "server tx: ";
-#endif
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
- int len = av->dccpav_vec_len + 2;
+ /* Figure out how many options do we need to represent the ackvec */
+ const u16 nr_opts = (av->dccpav_vec_len +
+ DCCP_MAX_ACKVEC_OPT_LEN - 1) /
+ DCCP_MAX_ACKVEC_OPT_LEN;
+ u16 len = av->dccpav_vec_len + 2 * nr_opts, i;
struct timeval now;
u32 elapsed_time;
- unsigned char *to, *from;
+ const unsigned char *tail, *from;
+ unsigned char *to;
struct dccp_ackvec_record *avr;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
@@ -94,26 +95,39 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
- to = skb_push(skb, len);
- *to++ = DCCPO_ACK_VECTOR_0;
- *to++ = len;
-
+ to = skb_push(skb, len);
len = av->dccpav_vec_len;
from = av->dccpav_buf + av->dccpav_buf_head;
+ tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN;
+
+ for (i = 0; i < nr_opts; ++i) {
+ int copylen = len;
+
+ if (len > DCCP_MAX_ACKVEC_OPT_LEN)
+ copylen = DCCP_MAX_ACKVEC_OPT_LEN;
- /* Check if buf_head wraps */
- if ((int)av->dccpav_buf_head + len > DCCP_MAX_ACKVEC_LEN) {
- const u32 tailsize = DCCP_MAX_ACKVEC_LEN - av->dccpav_buf_head;
+ *to++ = DCCPO_ACK_VECTOR_0;
+ *to++ = copylen + 2;
- memcpy(to, from, tailsize);
- to += tailsize;
- len -= tailsize;
- from = av->dccpav_buf;
+ /* Check if buf_head wraps */
+ if (from + copylen > tail) {
+ const u16 tailsize = tail - from;
+
+ memcpy(to, from, tailsize);
+ to += tailsize;
+ len -= tailsize;
+ copylen -= tailsize;
+ from = av->dccpav_buf;
+ }
+
+ memcpy(to, from, copylen);
+ from += copylen;
+ to += copylen;
+ len -= copylen;
}
- memcpy(to, from, len);
/*
- * From draft-ietf-dccp-spec-11.txt:
+ * From RFC 4340, A.2:
*
* For each acknowledgement it sends, the HC-Receiver will add an
* acknowledgement record. ack_seqno will equal the HC-Receiver
@@ -129,9 +143,9 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
dccp_ackvec_insert_avr(av, avr);
- dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
+ dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
"ack_ackno=%llu\n",
- debug_prefix, avr->dccpavr_sent_len,
+ dccp_role(sk), avr->dccpavr_sent_len,
(unsigned long long)avr->dccpavr_ack_seqno,
(unsigned long long)avr->dccpavr_ack_ackno);
return 0;
@@ -145,7 +159,6 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1;
av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
- av->dccpav_ack_ptr = 0;
av->dccpav_time.tv_sec = 0;
av->dccpav_time.tv_usec = 0;
av->dccpav_vec_len = 0;
@@ -174,13 +187,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
}
static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
- const u8 index)
+ const u32 index)
{
return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
}
static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
- const u8 index)
+ const u32 index)
{
return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
}
@@ -210,7 +223,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
gap = -new_head;
}
new_head += DCCP_MAX_ACKVEC_LEN;
- }
+ }
av->dccpav_buf_head = new_head;
@@ -224,7 +237,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
}
/*
- * Implements the draft-ietf-dccp-spec-11.txt Appendix A
+ * Implements the RFC 4340, Appendix A
*/
int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
const u64 ackno, const u8 state)
@@ -237,7 +250,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
* We may well decide to do buffer compression, etc, but for now lets
* just drop.
*
- * From Appendix A:
+ * From Appendix A.1.1 (`New Packets'):
*
* Of course, the circular buffer may overflow, either when the
* HC-Sender is sending data at a very high rate, when the
@@ -274,13 +287,13 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
/*
* A.1.2. Old Packets
*
- * When a packet with Sequence Number S arrives, and
- * S <= buf_ackno, the HC-Receiver will scan the table
- * for the byte corresponding to S. (Indexing structures
+ * When a packet with Sequence Number S <= buf_ackno
+ * arrives, the HC-Receiver will scan the table for
+ * the byte corresponding to S. (Indexing structures
* could reduce the complexity of this scan.)
*/
u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
- u8 index = av->dccpav_buf_head;
+ u32 index = av->dccpav_buf_head;
while (1) {
const u8 len = dccp_ackvec_len(av, index);
@@ -322,21 +335,18 @@ out_duplicate:
#ifdef CONFIG_IP_DCCP_DEBUG
void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
{
- if (!dccp_debug)
- return;
-
- printk("ACK vector len=%d, ackno=%llu |", len,
- (unsigned long long)ackno);
+ dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len,
+ (unsigned long long)ackno);
while (len--) {
const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
- printk("%d,%d|", state, rl);
+ dccp_pr_debug_cat("%d,%d|", state, rl);
++vector;
}
- printk("\n");
+ dccp_pr_debug_cat("\n");
}
void dccp_ackvec_print(const struct dccp_ackvec *av)
@@ -380,24 +390,20 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
*/
list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) {
if (ackno == avr->dccpavr_ack_seqno) {
-#ifdef CONFIG_IP_DCCP_DEBUG
- struct dccp_sock *dp = dccp_sk(sk);
- const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT rx ack: " : "server rx ack: ";
-#endif
- dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
+ dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
"ack_ackno=%llu, ACKED!\n",
- debug_prefix, 1,
+ dccp_role(sk), 1,
(unsigned long long)avr->dccpavr_ack_seqno,
(unsigned long long)avr->dccpavr_ack_ackno);
dccp_ackvec_throw_record(av, avr);
break;
- }
+ } else if (avr->dccpavr_ack_seqno > ackno)
+ break; /* old news */
}
}
static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
- struct sock *sk, u64 ackno,
+ struct sock *sk, u64 *ackno,
const unsigned char len,
const unsigned char *vector)
{
@@ -420,7 +426,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
u64 ackno_end_rl;
- dccp_set_seqno(&ackno_end_rl, ackno - rl);
+ dccp_set_seqno(&ackno_end_rl, *ackno - rl);
/*
* If our AVR sequence number is greater than the ack, go
@@ -428,25 +434,19 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
*/
list_for_each_entry_from(avr, &av->dccpav_records,
dccpavr_node) {
- if (!after48(avr->dccpavr_ack_seqno, ackno))
+ if (!after48(avr->dccpavr_ack_seqno, *ackno))
goto found;
}
/* End of the dccpav_records list, not found, exit */
break;
found:
- if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) {
+ if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) {
const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
-#ifdef CONFIG_IP_DCCP_DEBUG
- struct dccp_sock *dp = dccp_sk(sk);
- const char *debug_prefix =
- dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT rx ack: " : "server rx ack: ";
-#endif
- dccp_pr_debug("%sACK vector 0, len=%d, "
+ dccp_pr_debug("%s ACK vector 0, len=%d, "
"ack_seqno=%llu, ack_ackno=%llu, "
"ACKED!\n",
- debug_prefix, len,
+ dccp_role(sk), len,
(unsigned long long)
avr->dccpavr_ack_seqno,
(unsigned long long)
@@ -460,27 +460,23 @@ found:
*/
}
- dccp_set_seqno(&ackno, ackno_end_rl - 1);
+ dccp_set_seqno(ackno, ackno_end_rl - 1);
++vector;
}
}
int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
- const u8 opt, const u8 *value, const u8 len)
+ u64 *ackno, const u8 opt, const u8 *value, const u8 len)
{
- if (len > DCCP_MAX_ACKVEC_LEN)
+ if (len > DCCP_MAX_ACKVEC_OPT_LEN)
return -1;
/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
- DCCP_SKB_CB(skb)->dccpd_ack_seq,
- len, value);
+ ackno, len, value);
return 0;
}
-static char dccp_ackvec_slab_msg[] __initdata =
- KERN_CRIT "DCCP: Unable to create ack vectors slab caches\n";
-
int __init dccp_ackvec_init(void)
{
dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
@@ -502,7 +498,7 @@ out_destroy_slab:
kmem_cache_destroy(dccp_ackvec_slab);
dccp_ackvec_slab = NULL;
out_err:
- printk(dccp_ackvec_slab_msg);
+ DCCP_CRIT("Unable to create Ack Vector slab cache");
return -ENOBUFS;
}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 2424effac7f6..96504a3b16e4 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -17,7 +17,9 @@
#include <linux/types.h>
/* Read about the ECN nonce to see why it is 253 */
-#define DCCP_MAX_ACKVEC_LEN 253
+#define DCCP_MAX_ACKVEC_OPT_LEN 253
+/* We can spread an ack vector across multiple options */
+#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2)
#define DCCP_ACKVEC_STATE_RECEIVED 0
#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
@@ -28,8 +30,7 @@
/** struct dccp_ackvec - ack vector
*
- * This data structure is the one defined in the DCCP draft
- * Appendix A.
+ * This data structure is the one defined in RFC 4340, Appendix A.
*
* @dccpav_buf_head - circular buffer head
* @dccpav_buf_tail - circular buffer tail
@@ -42,7 +43,6 @@
* Ack Vectors it has recently sent. For each packet sent carrying an
* Ack Vector, it remembers four variables:
*
- * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
* @dccpav_records - list of dccp_ackvec_record
* @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
*
@@ -53,9 +53,8 @@ struct dccp_ackvec {
u64 dccpav_buf_ackno;
struct list_head dccpav_records;
struct timeval dccpav_time;
- u8 dccpav_buf_head;
- u8 dccpav_ack_ptr;
- u8 dccpav_vec_len;
+ u16 dccpav_buf_head;
+ u16 dccpav_vec_len;
u8 dccpav_buf_nonce;
u8 dccpav_ack_nonce;
u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN];
@@ -78,9 +77,9 @@ struct dccp_ackvec_record {
struct list_head dccpavr_node;
u64 dccpavr_ack_seqno;
u64 dccpavr_ack_ackno;
- u8 dccpavr_ack_ptr;
+ u16 dccpavr_ack_ptr;
+ u16 dccpavr_sent_len;
u8 dccpavr_ack_nonce;
- u8 dccpavr_sent_len;
};
struct sock;
@@ -99,7 +98,8 @@ extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
struct sock *sk, const u64 ackno);
extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
- const u8 opt, const u8 *value, const u8 len);
+ u64 *ackno, const u8 opt,
+ const u8 *value, const u8 len);
extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
@@ -138,7 +138,8 @@ static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
}
static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
- const u8 opt, const u8 *value, const u8 len)
+ const u64 *ackno, const u8 opt,
+ const u8 *value, const u8 len)
{
return -1;
}
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index ff05e59043cd..d8cf92f09e68 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -55,9 +55,9 @@ static inline void ccids_read_unlock(void)
#define ccids_read_unlock() do { } while(0)
#endif
-static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
+static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
{
- kmem_cache_t *slab;
+ struct kmem_cache *slab;
char slab_name_fmt[32], *slab_name;
va_list args;
@@ -75,7 +75,7 @@ static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
return slab;
}
-static void ccid_kmem_cache_destroy(kmem_cache_t *slab)
+static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
{
if (slab != NULL) {
const char *name = kmem_cache_name(slab);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index f7eb6c613414..c65cb2453e43 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -27,9 +27,9 @@ struct ccid_operations {
unsigned char ccid_id;
const char *ccid_name;
struct module *ccid_owner;
- kmem_cache_t *ccid_hc_rx_slab;
+ struct kmem_cache *ccid_hc_rx_slab;
__u32 ccid_hc_rx_obj_size;
- kmem_cache_t *ccid_hc_tx_slab;
+ struct kmem_cache *ccid_hc_tx_slab;
__u32 ccid_hc_tx_obj_size;
int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
@@ -43,8 +43,6 @@ struct ccid_operations {
unsigned char* value);
int (*ccid_hc_rx_insert_options)(struct sock *sk,
struct sk_buff *skb);
- int (*ccid_hc_tx_insert_options)(struct sock *sk,
- struct sk_buff *skb);
void (*ccid_hc_tx_packet_recv)(struct sock *sk,
struct sk_buff *skb);
int (*ccid_hc_tx_parse_options)(struct sock *sk,
@@ -52,9 +50,9 @@ struct ccid_operations {
unsigned char len, u16 idx,
unsigned char* value);
int (*ccid_hc_tx_send_packet)(struct sock *sk,
- struct sk_buff *skb, int len);
- void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more,
- int len);
+ struct sk_buff *skb);
+ void (*ccid_hc_tx_packet_sent)(struct sock *sk,
+ int more, unsigned int len);
void (*ccid_hc_rx_get_info)(struct sock *sk,
struct tcp_info *info);
void (*ccid_hc_tx_get_info)(struct sock *sk,
@@ -94,16 +92,16 @@ extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb, int len)
+ struct sk_buff *skb)
{
int rc = 0;
if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
- rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb, len);
+ rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
return rc;
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
- int more, int len)
+ int more, unsigned int len)
{
if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len);
@@ -146,14 +144,6 @@ static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
return rc;
}
-static inline int ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb)
-{
- if (ccid->ccid_ops->ccid_hc_tx_insert_options != NULL)
- return ccid->ccid_ops->ccid_hc_tx_insert_options(sk, skb);
- return 0;
-}
-
static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 32752f750447..80f469887691 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -22,19 +22,26 @@ config IP_DCCP_CCID2
for lost packets, would prefer CCID 2 to CCID 3. On-line games may
also prefer CCID 2.
- CCID 2 is further described in:
- http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid2-10.txt
+ CCID 2 is further described in RFC 4341,
+ http://www.ietf.org/rfc/rfc4341.txt
- This text was extracted from:
- http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt
+ This text was extracted from RFC 4340 (sec. 10.1),
+ http://www.ietf.org/rfc/rfc4340.txt
+
+ To compile this CCID as a module, choose M here: the module will be
+ called dccp_ccid2.
If in doubt, say M.
config IP_DCCP_CCID2_DEBUG
- bool "CCID2 debug"
+ bool "CCID2 debugging messages"
depends on IP_DCCP_CCID2
---help---
- Enable CCID2 debug messages.
+ Enable CCID2-specific debugging messages.
+
+ When compiling CCID2 as a module, this debugging output can
+ additionally be toggled by setting the ccid2_debug module
+ parameter to 0 or 1.
If in doubt, say N.
@@ -53,20 +60,66 @@ config IP_DCCP_CCID3
suitable than CCID 2 for applications such streaming media where a
relatively smooth sending rate is of importance.
- CCID 3 is further described in:
-
- http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid3-11.txt.
+ CCID 3 is further described in RFC 4342,
+ http://www.ietf.org/rfc/rfc4342.txt
The TFRC congestion control algorithms were initially described in
RFC 3448.
- This text was extracted from:
- http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt
+ This text was extracted from RFC 4340 (sec. 10.2),
+ http://www.ietf.org/rfc/rfc4340.txt
+ To compile this CCID as a module, choose M here: the module will be
+ called dccp_ccid3.
+
If in doubt, say M.
config IP_DCCP_TFRC_LIB
depends on IP_DCCP_CCID3
def_tristate IP_DCCP_CCID3
+config IP_DCCP_CCID3_DEBUG
+ bool "CCID3 debugging messages"
+ depends on IP_DCCP_CCID3
+ ---help---
+ Enable CCID3-specific debugging messages.
+
+ When compiling CCID3 as a module, this debugging output can
+ additionally be toggled by setting the ccid3_debug module
+ parameter to 0 or 1.
+
+ If in doubt, say N.
+
+config IP_DCCP_CCID3_RTO
+ int "Use higher bound for nofeedback timer"
+ default 100
+ depends on IP_DCCP_CCID3 && EXPERIMENTAL
+ ---help---
+ Use higher lower bound for nofeedback timer expiration.
+
+ The TFRC nofeedback timer normally expires after the maximum of 4
+ RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
+ with a small RTT this can mean a high processing load and reduced
+ performance, since then the nofeedback timer is triggered very
+ frequently.
+
+ This option enables to set a higher lower bound for the nofeedback
+ value. Values in units of milliseconds can be set here.
+
+ A value of 0 disables this feature by enforcing the value specified
+ in RFC 3448. The following values have been suggested as bounds for
+ experimental use:
+ * 16-20ms to match the typical multimedia inter-frame interval
+ * 100ms as a reasonable compromise [default]
+ * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
+
+ The default of 100ms is a compromise between a large value for
+ efficient DCCP implementations, and a small value to avoid disrupting
+ the network in times of congestion.
+
+ The purpose of the nofeedback timer is to slow DCCP down when there
+ is serious network congestion: experimenting with larger values should
+ therefore not be performed on WANs.
+
+
endmenu
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 2efb505aeb35..fd38b05d6f79 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -23,7 +23,7 @@
*/
/*
- * This implementation should follow: draft-ietf-dccp-ccid2-10.txt
+ * This implementation should follow RFC 4341
*
* BUGS:
* - sequence number wrapping
@@ -33,18 +33,11 @@
#include "../dccp.h"
#include "ccid2.h"
-static int ccid2_debug;
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-#define ccid2_pr_debug(format, a...) \
- do { if (ccid2_debug) \
- printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
- } while (0)
-#else
-#define ccid2_pr_debug(format, a...)
-#endif
+static int ccid2_debug;
+#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
-#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
{
int len = 0;
@@ -86,7 +79,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
}
#else
-#define ccid2_hc_tx_check_sanity(hctx) do {} while (0)
+#define ccid2_pr_debug(format, a...)
+#define ccid2_hc_tx_check_sanity(hctx)
#endif
static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
@@ -131,8 +125,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
return 0;
}
-static int ccid2_hc_tx_send_packet(struct sock *sk,
- struct sk_buff *skb, int len)
+static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
struct ccid2_hc_tx_sock *hctx;
@@ -274,7 +267,7 @@ static void ccid2_start_rto_timer(struct sock *sk)
jiffies + hctx->ccid2hctx_rto);
}
-static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
+static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
@@ -352,14 +345,14 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
- ccid2_pr_debug("Sent: seq=%llu\n", seq);
+ ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq);
do {
struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
while (seqp != hctx->ccid2hctx_seqh) {
ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
- seqp->ccid2s_seq, seqp->ccid2s_acked,
- seqp->ccid2s_sent);
+ (unsigned long long)seqp->ccid2s_seq,
+ seqp->ccid2s_acked, seqp->ccid2s_sent);
seqp = seqp->ccid2s_next;
}
} while (0);
@@ -426,7 +419,7 @@ static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
return -1;
out_invalid_option:
- BUG_ON(1); /* should never happen... options were previously parsed ! */
+ DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
return -1;
}
@@ -480,7 +473,8 @@ static inline void ccid2_new_ack(struct sock *sk,
/* first measurement */
if (hctx->ccid2hctx_srtt == -1) {
ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
- r, jiffies, seqp->ccid2s_seq);
+ r, jiffies,
+ (unsigned long long)seqp->ccid2s_seq);
ccid2_change_srtt(hctx, r);
hctx->ccid2hctx_rttvar = r >> 1;
} else {
@@ -524,8 +518,8 @@ static inline void ccid2_new_ack(struct sock *sk,
hctx->ccid2hctx_lastrtt = jiffies;
ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
- hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
- hctx->ccid2hctx_rto, HZ, r);
+ hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
+ hctx->ccid2hctx_rto, HZ, r);
hctx->ccid2hctx_sent = 0;
}
@@ -618,7 +612,17 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
- seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+ if (after48(ackno, hctx->ccid2hctx_high_ack))
+ hctx->ccid2hctx_high_ack = ackno;
+
+ seqp = hctx->ccid2hctx_seqt;
+ while (before48(seqp->ccid2s_seq, ackno)) {
+ seqp = seqp->ccid2s_next;
+ if (seqp == hctx->ccid2hctx_seqh) {
+ seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+ break;
+ }
+ }
/* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for
* this single ack. I round up.
@@ -636,8 +640,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
u64 ackno_end_rl;
dccp_set_seqno(&ackno_end_rl, ackno - rl);
- ccid2_pr_debug("ackvec start:%llu end:%llu\n", ackno,
- ackno_end_rl);
+ ccid2_pr_debug("ackvec start:%llu end:%llu\n",
+ (unsigned long long)ackno,
+ (unsigned long long)ackno_end_rl);
/* if the seqno we are analyzing is larger than the
* current ackno, then move towards the tail of our
* seqnos.
@@ -662,9 +667,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* new packet received or marked */
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
!seqp->ccid2s_acked) {
- if (state ==
+ if (state ==
DCCP_ACKVEC_STATE_ECN_MARKED) {
- ccid2_congestion_event(hctx,
+ ccid2_congestion_event(hctx,
seqp);
} else
ccid2_new_ack(sk, seqp,
@@ -672,7 +677,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
seqp->ccid2s_acked = 1;
ccid2_pr_debug("Got ack for %llu\n",
- seqp->ccid2s_seq);
+ (unsigned long long)seqp->ccid2s_seq);
ccid2_hc_tx_dec_pipe(sk);
}
if (seqp == hctx->ccid2hctx_seqt) {
@@ -695,7 +700,14 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* The state about what is acked should be correct now
* Check for NUMDUPACK
*/
- seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+ seqp = hctx->ccid2hctx_seqt;
+ while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) {
+ seqp = seqp->ccid2s_next;
+ if (seqp == hctx->ccid2hctx_seqh) {
+ seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+ break;
+ }
+ }
done = 0;
while (1) {
if (seqp->ccid2s_acked) {
@@ -718,7 +730,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
while (1) {
if (!seqp->ccid2s_acked) {
ccid2_pr_debug("Packet lost: %llu\n",
- seqp->ccid2s_seq);
+ (unsigned long long)seqp->ccid2s_seq);
/* XXX need to traverse from tail -> head in
* order to detect multiple congestion events in
* one ack vector.
@@ -769,6 +781,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hctx->ccid2hctx_lastrtt = 0;
hctx->ccid2hctx_rpdupack = -1;
hctx->ccid2hctx_last_cong = jiffies;
+ hctx->ccid2hctx_high_ack = 0;
hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
@@ -821,8 +834,10 @@ static struct ccid_operations ccid2 = {
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
};
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
module_param(ccid2_debug, int, 0444);
MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
+#endif
static __init int ccid2_module_init(void)
{
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 5b2ef4acb300..ebd79499c85a 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -35,7 +35,7 @@ struct ccid2_seq {
struct ccid2_seq *ccid2s_next;
};
-#define CCID2_SEQBUF_LEN 256
+#define CCID2_SEQBUF_LEN 1024
#define CCID2_SEQBUF_MAX 128
/** struct ccid2_hc_tx_sock - CCID2 TX half connection
@@ -72,6 +72,7 @@ struct ccid2_hc_tx_sock {
int ccid2hctx_rpdupack;
int ccid2hctx_sendwait;
unsigned long ccid2hctx_last_cong;
+ u64 ccid2hctx_high_ack;
};
struct ccid2_hc_rx_sock {
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 67d2dc0e7c67..fa6b75372ed7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -41,32 +41,9 @@
#include "lib/tfrc.h"
#include "ccid3.h"
-/*
- * Reason for maths here is to avoid 32 bit overflow when a is big.
- * With this we get close to the limit.
- */
-static u32 usecs_div(const u32 a, const u32 b)
-{
- const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 :
- a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 :
- a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 :
- a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 :
- a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 :
- a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 :
- a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 :
- a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 :
- 100000;
- const u32 tmp = a * (USEC_PER_SEC / div);
- return (b >= 2 * div) ? tmp / (b / div) : tmp;
-}
-
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static int ccid3_debug;
-
-#ifdef CCID3_DEBUG
-#define ccid3_pr_debug(format, a...) \
- do { if (ccid3_debug) \
- printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
- } while (0)
+#define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a)
#else
#define ccid3_pr_debug(format, a...)
#endif
@@ -75,15 +52,7 @@ static struct dccp_tx_hist *ccid3_tx_hist;
static struct dccp_rx_hist *ccid3_rx_hist;
static struct dccp_li_hist *ccid3_li_hist;
-/* TFRC sender states */
-enum ccid3_hc_tx_states {
- TFRC_SSTATE_NO_SENT = 1,
- TFRC_SSTATE_NO_FBACK,
- TFRC_SSTATE_FBACK,
- TFRC_SSTATE_TERM,
-};
-
-#ifdef CCID3_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
{
static char *ccid3_state_names[] = {
@@ -110,325 +79,319 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
hctx->ccid3hctx_state = state;
}
-/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
-static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
+/*
+ * Recalculate scheduled nominal send time t_nom, inter-packet interval
+ * t_ipi, and delta value. Should be called after each change to X.
+ */
+static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
{
- /*
- * If no feedback spec says t_ipi is 1 second (set elsewhere and then
- * doubles after every no feedback timer (separate function)
- */
- if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
- hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x);
-}
+ timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
-/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
-static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
-{
+ /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
+ hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_x >> 6);
+
+ /* Update nominal send time with regard to the new t_ipi */
+ timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+
+ /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
TFRC_OPSYS_HALF_TIME_GRAN);
}
-
/*
* Update X by
* If (p > 0)
- * x_calc = calcX(s, R, p);
+ * X_calc = calcX(s, R, p);
* X = max(min(X_calc, 2 * X_recv), s / t_mbi);
* Else
* If (now - tld >= R)
* X = max(min(2 * X, 2 * X_recv), s / R);
* tld = now;
- */
-static void ccid3_hc_tx_update_x(struct sock *sk)
+ *
+ * Note: X and X_recv are both stored in units of 64 * bytes/second, to support
+ * fine-grained resolution of sending rates. This requires scaling by 2^6
+ * throughout the code. Only X_calc is unscaled (in bytes/second).
+ *
+ * If X has changed, we also update the scheduled send time t_now,
+ * the inter-packet interval t_ipi, and the delta value.
+ */
+static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
+
{
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ const __u64 old_x = hctx->ccid3hctx_x;
- /* To avoid large error in calcX */
- if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
- hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
- hctx->ccid3hctx_rtt,
- hctx->ccid3hctx_p);
- hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc,
- 2 * hctx->ccid3hctx_x_recv),
- (hctx->ccid3hctx_s /
- TFRC_MAX_BACK_OFF_TIME));
- } else {
- struct timeval now;
+ if (hctx->ccid3hctx_p > 0) {
- dccp_timestamp(sk, &now);
- if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
- hctx->ccid3hctx_rtt) {
- hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
- hctx->ccid3hctx_x) * 2,
- usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_rtt));
- hctx->ccid3hctx_t_ld = now;
- }
+ hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
+ hctx->ccid3hctx_x_recv * 2);
+ hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
+ (((__u64)hctx->ccid3hctx_s) << 6) /
+ TFRC_T_MBI);
+
+ } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) -
+ (suseconds_t)hctx->ccid3hctx_rtt >= 0) {
+
+ hctx->ccid3hctx_x =
+ max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv),
+ scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
+ hctx->ccid3hctx_rtt));
+ hctx->ccid3hctx_t_ld = *now;
+ }
+
+ if (hctx->ccid3hctx_x != old_x)
+ ccid3_update_send_time(hctx);
+}
+
+/*
+ * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1)
+ * @len: DCCP packet payload size in bytes
+ */
+static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
+{
+ if (unlikely(len == 0))
+ ccid3_pr_debug("Packet payload length is 0 - not updating\n");
+ else
+ hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
+ (9 * hctx->ccid3hctx_s + len) / 10;
+ /*
+ * Note: We could do a potential optimisation here - when `s' changes,
+ * recalculate sending rate and consequently t_ipi, t_delta, and
+ * t_now. This is however non-standard, and the benefits are not
+ * clear, so it is currently left out.
+ */
+}
+
+/*
+ * Update Window Counter using the algorithm from [RFC 4342, 8.1].
+ * The algorithm is not applicable if RTT < 4 microseconds.
+ */
+static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
+ struct timeval *now)
+{
+ suseconds_t delta;
+ u32 quarter_rtts;
+
+ if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */
+ return;
+
+ delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count);
+ DCCP_BUG_ON(delta < 0);
+
+ quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4);
+
+ if (quarter_rtts > 0) {
+ hctx->ccid3hctx_t_last_win_count = *now;
+ hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5);
+ hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */
+
+ ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count);
}
}
static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
- unsigned long next_tmout = 0;
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ unsigned long t_nfb = USEC_PER_SEC / 5;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
/* XXX: set some sensible MIB */
- sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
- jiffies + HZ / 5);
- goto out;
+ goto restart_timer;
}
- ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
+ ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
ccid3_tx_state_name(hctx->ccid3hctx_state));
-
+
switch (hctx->ccid3hctx_state) {
- case TFRC_SSTATE_TERM:
- goto out;
case TFRC_SSTATE_NO_FBACK:
- /* Halve send rate */
- hctx->ccid3hctx_x /= 2;
- if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s /
- TFRC_MAX_BACK_OFF_TIME))
- hctx->ccid3hctx_x = (hctx->ccid3hctx_s /
- TFRC_MAX_BACK_OFF_TIME);
-
- ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
- "bytes/s\n",
- dccp_role(sk), sk,
+ /* RFC 3448, 4.4: Halve send rate directly */
+ hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
+ (((__u64)hctx->ccid3hctx_s) << 6) /
+ TFRC_T_MBI);
+
+ ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u "
+ "bytes/s\n", dccp_role(sk), sk,
ccid3_tx_state_name(hctx->ccid3hctx_state),
- hctx->ccid3hctx_x);
- next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x),
- TFRC_INITIAL_TIMEOUT);
- /*
- * FIXME - not sure above calculation is correct. See section
- * 5 of CCID3 11 should adjust tx_t_ipi and double that to
- * achieve it really
- */
+ (unsigned)(hctx->ccid3hctx_x >> 6));
+ /* The value of R is still undefined and so we can not recompute
+ * the timout value. Keep initial value as per [RFC 4342, 5]. */
+ t_nfb = TFRC_INITIAL_TIMEOUT;
+ ccid3_update_send_time(hctx);
break;
case TFRC_SSTATE_FBACK:
/*
* Check if IDLE since last timeout and recv rate is less than
- * 4 packets per RTT
+ * 4 packets (in units of 64*bytes/sec) per RTT
*/
if (!hctx->ccid3hctx_idle ||
- (hctx->ccid3hctx_x_recv >=
- 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) {
- ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
+ (hctx->ccid3hctx_x_recv >= 4 *
+ scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
+ hctx->ccid3hctx_rtt))) {
+ struct timeval now;
+
+ ccid3_pr_debug("%s(%p, state=%s), not idle\n",
dccp_role(sk), sk,
- ccid3_tx_state_name(hctx->ccid3hctx_state));
- /* Halve sending rate */
+ ccid3_tx_state_name(hctx->ccid3hctx_state));
- /* If (X_calc > 2 * X_recv)
+ /*
+ * Modify the cached value of X_recv [RFC 3448, 4.4]
+ *
+ * If (p == 0 || X_calc > 2 * X_recv)
* X_recv = max(X_recv / 2, s / (2 * t_mbi));
* Else
* X_recv = X_calc / 4;
+ *
+ * Note that X_recv is scaled by 2^6 while X_calc is not
*/
- BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P &&
- hctx->ccid3hctx_x_calc == 0);
-
- /* check also if p is zero -> x_calc is infinity? */
- if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
- hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
- hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
- hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
- else
- hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
-
- /* Update sending rate */
- ccid3_hc_tx_update_x(sk);
+ BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
+
+ if (hctx->ccid3hctx_p == 0 ||
+ (hctx->ccid3hctx_x_calc >
+ (hctx->ccid3hctx_x_recv >> 5))) {
+
+ hctx->ccid3hctx_x_recv =
+ max(hctx->ccid3hctx_x_recv / 2,
+ (((__u64)hctx->ccid3hctx_s) << 6) /
+ (2 * TFRC_T_MBI));
+
+ if (hctx->ccid3hctx_p == 0)
+ dccp_timestamp(sk, &now);
+ } else {
+ hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
+ hctx->ccid3hctx_x_recv <<= 4;
+ }
+ /* Now recalculate X [RFC 3448, 4.3, step (4)] */
+ ccid3_hc_tx_update_x(sk, &now);
}
/*
* Schedule no feedback timer to expire in
- * max(4 * R, 2 * s / X)
+ * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
+ * See comments in packet_recv() regarding the value of t_RTO.
*/
- next_tmout = max_t(u32, hctx->ccid3hctx_t_rto,
- 2 * usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x));
+ t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
- dump_stack();
+ case TFRC_SSTATE_NO_SENT:
+ DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk);
+ /* fall through */
+ case TFRC_SSTATE_TERM:
goto out;
}
- sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
- jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
hctx->ccid3hctx_idle = 1;
+
+restart_timer:
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+ jiffies + usecs_to_jiffies(t_nfb));
out:
bh_unlock_sock(sk);
sock_put(sk);
}
-static int ccid3_hc_tx_send_packet(struct sock *sk,
- struct sk_buff *skb, int len)
+/*
+ * returns
+ * > 0: delay (in msecs) that should pass before actually sending
+ * = 0: can send immediately
+ * < 0: error condition; do not send packet
+ */
+static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
- struct dccp_tx_hist_entry *new_packet;
struct timeval now;
- long delay;
- int rc = -ENOTCONN;
+ suseconds_t delay;
- BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM);
+ BUG_ON(hctx == NULL);
- /* Check if pure ACK or Terminating*/
/*
- * XXX: We only call this function for DATA and DATAACK, on, these
- * packets can have zero length, but why the comment about "pure ACK"?
+ * This function is called only for Data and DataAck packets. Sending
+ * zero-sized Data(Ack)s is theoretically possible, but for congestion
+ * control this case is pathological - ignore it.
*/
- if (unlikely(len == 0))
- goto out;
-
- /* See if last packet allocated was not sent */
- new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
- if (new_packet == NULL || new_packet->dccphtx_sent) {
- new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist,
- SLAB_ATOMIC);
-
- rc = -ENOBUFS;
- if (unlikely(new_packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, not enough "
- "mem to add to history, send refused\n",
- __FUNCTION__, dccp_role(sk), sk);
- goto out;
- }
-
- dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
- }
+ if (unlikely(skb->len == 0))
+ return -EBADMSG;
dccp_timestamp(sk, &now);
switch (hctx->ccid3hctx_state) {
case TFRC_SSTATE_NO_SENT:
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
- jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
+ (jiffies +
+ usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
hctx->ccid3hctx_last_win_count = 0;
hctx->ccid3hctx_t_last_win_count = now;
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
- hctx->ccid3hctx_t_ipi = TFRC_INITIAL_IPI;
- /* Set nominal send time for initial packet */
+ /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
+ ccid3_hc_tx_update_s(hctx, skb->len);
+ hctx->ccid3hctx_x = hctx->ccid3hctx_s;
+ hctx->ccid3hctx_x <<= 6;
+
+ /* First timeout, according to [RFC 3448, 4.2], is 1 second */
+ hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
+ /* Initial delta: minimum of 0.5 sec and t_gran/2 */
+ hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;
+
+ /* Set t_0 for initial packet */
hctx->ccid3hctx_t_nom = now;
- timeval_add_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- ccid3_calc_new_delta(hctx);
- rc = 0;
break;
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
- delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) -
- hctx->ccid3hctx_delta);
- delay /= -1000;
- /* divide by -1000 is to convert to ms and get sign right */
- rc = delay > 0 ? delay : 0;
- break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
- dump_stack();
- rc = -EINVAL;
+ delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
+ /*
+ * Scheduling of packet transmissions [RFC 3448, 4.6]
+ *
+ * if (t_now > t_nom - delta)
+ * // send the packet now
+ * else
+ * // send the packet in (t_nom - t_now) milliseconds.
+ */
+ if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0)
+ return delay / 1000L;
+
+ ccid3_hc_tx_update_win_count(hctx, &now);
break;
+ case TFRC_SSTATE_TERM:
+ DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
+ return -EINVAL;
}
- /* Can we send? if so add options and add to packet history */
- if (rc == 0) {
- dp->dccps_hc_tx_insert_options = 1;
- new_packet->dccphtx_ccval =
- DCCP_SKB_CB(skb)->dccpd_ccval =
- hctx->ccid3hctx_last_win_count;
- timeval_add_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- }
-out:
- return rc;
+ /* prepare to send now (add options etc.) */
+ dp->dccps_hc_tx_insert_options = 1;
+ DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+
+ /* set the nominal send time for the next following packet */
+ timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+
+ return 0;
}
-static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
+static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
+ unsigned int len)
{
- const struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
struct timeval now;
+ struct dccp_tx_hist_entry *packet;
- BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM);
-
- dccp_timestamp(sk, &now);
-
- /* check if we have sent a data packet */
- if (len > 0) {
- unsigned long quarter_rtt;
- struct dccp_tx_hist_entry *packet;
-
- packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
- if (unlikely(packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: packet doesn't "
- "exists in history!\n", __FUNCTION__);
- return;
- }
- if (unlikely(packet->dccphtx_sent)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: no unsent packet in "
- "history!\n", __FUNCTION__);
- return;
- }
- packet->dccphtx_tstamp = now;
- packet->dccphtx_seqno = dp->dccps_gss;
- /*
- * Check if win_count have changed
- * Algorithm in "8.1. Window Counter Valuer" in
- * draft-ietf-dccp-ccid3-11.txt
- */
- quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count);
- if (likely(hctx->ccid3hctx_rtt > 8))
- quarter_rtt /= hctx->ccid3hctx_rtt / 4;
-
- if (quarter_rtt > 0) {
- hctx->ccid3hctx_t_last_win_count = now;
- hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count +
- min_t(unsigned long, quarter_rtt, 5)) % 16;
- ccid3_pr_debug("%s, sk=%p, window changed from "
- "%u to %u!\n",
- dccp_role(sk), sk,
- packet->dccphtx_ccval,
- hctx->ccid3hctx_last_win_count);
- }
+ BUG_ON(hctx == NULL);
- hctx->ccid3hctx_idle = 0;
- packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
- packet->dccphtx_sent = 1;
- } else
- ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
- dccp_role(sk), sk, dp->dccps_gss);
+ ccid3_hc_tx_update_s(hctx, len);
- switch (hctx->ccid3hctx_state) {
- case TFRC_SSTATE_NO_SENT:
- /* if first wasn't pure ack */
- if (len != 0)
- printk(KERN_CRIT "%s: %s, First packet sent is noted "
- "as a data packet\n",
- __FUNCTION__, dccp_role(sk));
+ packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC);
+ if (unlikely(packet == NULL)) {
+ DCCP_CRIT("packet history - out of memory!");
return;
- case TFRC_SSTATE_NO_FBACK:
- case TFRC_SSTATE_FBACK:
- if (len > 0) {
- timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- ccid3_calc_new_t_ipi(hctx);
- ccid3_calc_new_delta(hctx);
- timeval_add_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- }
- break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
- dump_stack();
- break;
}
+ dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet);
+
+ dccp_timestamp(sk, &now);
+ packet->dccphtx_tstamp = now;
+ packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss;
+ packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
+ packet->dccphtx_sent = 1;
+ hctx->ccid3hctx_idle = 0;
}
static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -438,13 +401,11 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
struct ccid3_options_received *opt_recv;
struct dccp_tx_hist_entry *packet;
struct timeval now;
- unsigned long next_tmout;
- u32 t_elapsed;
+ unsigned long t_nfb;
u32 pinv;
- u32 x_recv;
- u32 r_sample;
+ suseconds_t r_sample, t_elapsed;
- BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM);
+ BUG_ON(hctx == NULL);
/* we are only interested in ACKs */
if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
@@ -453,41 +414,49 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
opt_recv = &hctx->ccid3hctx_options_received;
- t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
- x_recv = opt_recv->ccid3or_receive_rate;
- pinv = opt_recv->ccid3or_loss_event_rate;
-
switch (hctx->ccid3hctx_state) {
- case TFRC_SSTATE_NO_SENT:
- /* FIXME: what to do here? */
- return;
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
- /* Calculate new round trip sample by
- * R_sample = (now - t_recvdata) - t_delay */
- /* get t_recvdata from history */
+ /* get packet from history to look up t_recvdata */
packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
- DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ DCCP_SKB_CB(skb)->dccpd_ack_seq);
if (unlikely(packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, seqno "
- "%llu(%s) does't exist in history!\n",
- __FUNCTION__, dccp_role(sk), sk,
+ DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist "
+ "in history!\n", dccp_role(sk), sk,
(unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
return;
}
- /* Update RTT */
+ /* Update receive rate in units of 64 * bytes/second */
+ hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
+ hctx->ccid3hctx_x_recv <<= 6;
+
+ /* Update loss event rate */
+ pinv = opt_recv->ccid3or_loss_event_rate;
+ if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
+ hctx->ccid3hctx_p = 0;
+ else /* can not exceed 100% */
+ hctx->ccid3hctx_p = 1000000 / pinv;
+
dccp_timestamp(sk, &now);
- r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
+
+ /*
+ * Calculate new round trip sample as per [RFC 3448, 4.3] by
+ * R_sample = (now - t_recvdata) - t_elapsed
+ */
+ r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
+ t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
+
+ DCCP_BUG_ON(r_sample < 0);
if (unlikely(r_sample <= t_elapsed))
- LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, "
- "t_elapsed=%uus\n",
- __FUNCTION__, r_sample, t_elapsed);
+ DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n",
+ (int)r_sample, (int)t_elapsed);
else
r_sample -= t_elapsed;
+ CCID3_RTT_SANITY_CHECK(r_sample);
- /* Update RTT estimate by
+ /* Update RTT estimate by
* If (No feedback recv)
* R = R_sample;
* Else
@@ -496,97 +465,96 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* q is a constant, RFC 3448 recomments 0.9
*/
if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
- ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
- hctx->ccid3hctx_rtt = r_sample;
- } else
- hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
- r_sample / 10;
-
- ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, "
- "r_sample=%us\n", dccp_role(sk), sk,
- hctx->ccid3hctx_rtt, r_sample);
-
- /* Update timeout interval */
- hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
- USEC_PER_SEC);
+ /*
+ * Larger Initial Windows [RFC 4342, sec. 5]
+ * We deviate in that we use `s' instead of `MSS'.
+ */
+ __u64 w_init = min(4 * hctx->ccid3hctx_s,
+ max(2 * hctx->ccid3hctx_s, 4380));
+ hctx->ccid3hctx_rtt = r_sample;
+ hctx->ccid3hctx_x = scaled_div(w_init << 6, r_sample);
+ hctx->ccid3hctx_t_ld = now;
- /* Update receive rate */
- hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */
+ ccid3_update_send_time(hctx);
- /* Update loss event rate */
- if (pinv == ~0 || pinv == 0)
- hctx->ccid3hctx_p = 0;
- else {
- hctx->ccid3hctx_p = 1000000 / pinv;
+ ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, "
+ "R_sample=%dus, X=%u\n", dccp_role(sk),
+ sk, hctx->ccid3hctx_s, w_init,
+ (int)r_sample,
+ (unsigned)(hctx->ccid3hctx_x >> 6));
- if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
- hctx->ccid3hctx_p = TFRC_SMALLEST_P;
- ccid3_pr_debug("%s, sk=%p, Smallest p used!\n",
- dccp_role(sk), sk);
- }
+ ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
+ } else {
+ hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
+ (u32)r_sample) / 10;
+
+ /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
+ if (hctx->ccid3hctx_p > 0)
+ hctx->ccid3hctx_x_calc =
+ tfrc_calc_x(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_rtt,
+ hctx->ccid3hctx_p);
+ ccid3_hc_tx_update_x(sk, &now);
+
+ ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, "
+ "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
+ dccp_role(sk),
+ sk, hctx->ccid3hctx_rtt, (int)r_sample,
+ hctx->ccid3hctx_s, hctx->ccid3hctx_p,
+ hctx->ccid3hctx_x_calc,
+ (unsigned)(hctx->ccid3hctx_x_recv >> 6),
+ (unsigned)(hctx->ccid3hctx_x >> 6));
}
/* unschedule no feedback timer */
sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
- /* Update sending rate */
- ccid3_hc_tx_update_x(sk);
-
- /* Update next send time */
- timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- ccid3_calc_new_t_ipi(hctx);
- timeval_add_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- ccid3_calc_new_delta(hctx);
-
/* remove all packets older than the one acked from history */
dccp_tx_hist_purge_older(ccid3_tx_hist,
&hctx->ccid3hctx_hist, packet);
/*
- * As we have calculated new ipi, delta, t_nom it is possible that
- * we now can send a packet, so wake up dccp_wait_for_ccids.
+ * As we have calculated new ipi, delta, t_nom it is possible
+ * that we now can send a packet, so wake up dccp_wait_for_ccid
*/
sk->sk_write_space(sk);
/*
+ * Update timeout interval for the nofeedback timer.
+ * We use a configuration option to increase the lower bound.
+ * This can help avoid triggering the nofeedback timer too
+ * often ('spinning') on LANs with small RTTs.
+ */
+ hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
+ CONFIG_IP_DCCP_CCID3_RTO *
+ (USEC_PER_SEC/1000));
+ /*
* Schedule no feedback timer to expire in
- * max(4 * R, 2 * s / X)
+ * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
*/
- next_tmout = max(hctx->ccid3hctx_t_rto,
- 2 * usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x));
-
- ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
+ t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
+
+ ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
"expire in %lu jiffies (%luus)\n",
- dccp_role(sk), sk,
- usecs_to_jiffies(next_tmout), next_tmout);
+ dccp_role(sk),
+ sk, usecs_to_jiffies(t_nfb), t_nfb);
- sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
- jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+ jiffies + usecs_to_jiffies(t_nfb));
/* set idle flag */
- hctx->ccid3hctx_idle = 1;
+ hctx->ccid3hctx_idle = 1;
break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
- dump_stack();
+ case TFRC_SSTATE_NO_SENT:
+ /*
+ * XXX when implementing bidirectional rx/tx check this again
+ */
+ DCCP_WARN("Illegal ACK received - no packet sent\n");
+ /* fall through */
+ case TFRC_SSTATE_TERM: /* ignore feedback when closing */
break;
}
}
-static int ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
-{
- const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-
- BUG_ON(hctx == NULL);
-
- if (sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)
- DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
- return 0;
-}
-
static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
unsigned char len, u16 idx,
unsigned char *value)
@@ -611,13 +579,14 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
switch (option) {
case TFRC_OPT_LOSS_EVENT_RATE:
if (unlikely(len != 4)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid "
- "len for TFRC_OPT_LOSS_EVENT_RATE\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), invalid len %d "
+ "for TFRC_OPT_LOSS_EVENT_RATE\n",
+ dccp_role(sk), sk, len);
rc = -EINVAL;
} else {
- opt_recv->ccid3or_loss_event_rate = ntohl(*(__be32 *)value);
- ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
+ opt_recv->ccid3or_loss_event_rate =
+ ntohl(*(__be32 *)value);
+ ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
dccp_role(sk), sk,
opt_recv->ccid3or_loss_event_rate);
}
@@ -625,20 +594,21 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
case TFRC_OPT_LOSS_INTERVALS:
opt_recv->ccid3or_loss_intervals_idx = idx;
opt_recv->ccid3or_loss_intervals_len = len;
- ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
+ ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n",
dccp_role(sk), sk,
opt_recv->ccid3or_loss_intervals_idx,
opt_recv->ccid3or_loss_intervals_len);
break;
case TFRC_OPT_RECEIVE_RATE:
if (unlikely(len != 4)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid "
- "len for TFRC_OPT_RECEIVE_RATE\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), invalid len %d "
+ "for TFRC_OPT_RECEIVE_RATE\n",
+ dccp_role(sk), sk, len);
rc = -EINVAL;
} else {
- opt_recv->ccid3or_receive_rate = ntohl(*(__be32 *)value);
- ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
+ opt_recv->ccid3or_receive_rate =
+ ntohl(*(__be32 *)value);
+ ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
dccp_role(sk), sk,
opt_recv->ccid3or_receive_rate);
}
@@ -650,22 +620,15 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
{
- struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
- if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
- dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
- hctx->ccid3hctx_s = dp->dccps_packet_size;
- else
- hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
-
- /* Set transmission rate to 1 packet per second */
- hctx->ccid3hctx_x = hctx->ccid3hctx_s;
- hctx->ccid3hctx_t_rto = USEC_PER_SEC;
+ hctx->ccid3hctx_s = 0;
+ hctx->ccid3hctx_rtt = 0;
hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
- hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
+ hctx->ccid3hctx_no_feedback_timer.function =
+ ccid3_hc_tx_no_feedback_timer;
hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
init_timer(&hctx->ccid3hctx_no_feedback_timer);
@@ -689,14 +652,7 @@ static void ccid3_hc_tx_exit(struct sock *sk)
* RX Half Connection methods
*/
-/* TFRC receiver states */
-enum ccid3_hc_rx_states {
- TFRC_RSTATE_NO_DATA = 1,
- TFRC_RSTATE_DATA,
- TFRC_RSTATE_TERM = 127,
-};
-
-#ifdef CCID3_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
static char *ccid3_rx_state_names[] = {
@@ -722,14 +678,24 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
hcrx->ccid3hcrx_state = state;
}
+static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
+{
+ if (unlikely(len == 0)) /* don't update on empty packets (e.g. ACKs) */
+ ccid3_pr_debug("Packet payload length is 0 - not updating\n");
+ else
+ hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len :
+ (9 * hcrx->ccid3hcrx_s + len) / 10;
+}
+
static void ccid3_hc_rx_send_feedback(struct sock *sk)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_rx_hist_entry *packet;
struct timeval now;
+ suseconds_t delta;
- ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+ ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
dccp_timestamp(sk, &now);
@@ -737,25 +703,22 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
case TFRC_RSTATE_NO_DATA:
hcrx->ccid3hcrx_x_recv = 0;
break;
- case TFRC_RSTATE_DATA: {
- const u32 delta = timeval_delta(&now,
- &hcrx->ccid3hcrx_tstamp_last_feedback);
- hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv,
- delta);
- }
+ case TFRC_RSTATE_DATA:
+ delta = timeval_delta(&now,
+ &hcrx->ccid3hcrx_tstamp_last_feedback);
+ DCCP_BUG_ON(delta < 0);
+ hcrx->ccid3hcrx_x_recv =
+ scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
- dump_stack();
+ case TFRC_RSTATE_TERM:
+ DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
return;
}
packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
if (unlikely(packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, no data packet "
- "in history!\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), no data packet in history!\n",
+ dccp_role(sk), sk);
return;
}
@@ -763,13 +726,19 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval;
hcrx->ccid3hcrx_bytes_recv = 0;
- /* Convert to multiples of 10us */
- hcrx->ccid3hcrx_elapsed_time =
- timeval_delta(&now, &packet->dccphrx_tstamp) / 10;
+ /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
+ delta = timeval_delta(&now, &packet->dccphrx_tstamp);
+ DCCP_BUG_ON(delta < 0);
+ hcrx->ccid3hcrx_elapsed_time = delta / 10;
+
if (hcrx->ccid3hcrx_p == 0)
- hcrx->ccid3hcrx_pinv = ~0;
- else
+ hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
+ else if (hcrx->ccid3hcrx_p > 1000000) {
+ DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
+ hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */
+ } else
hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
+
dp->dccps_hc_rx_insert_options = 1;
dccp_send_ack(sk);
}
@@ -797,9 +766,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
hcrx->ccid3hcrx_elapsed_time)) ||
dccp_insert_option_timestamp(sk, skb) ||
dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
- &pinv, sizeof(pinv)) ||
+ &pinv, sizeof(pinv)) ||
dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
- &x_recv, sizeof(x_recv)))
+ &x_recv, sizeof(x_recv)))
return -1;
return 0;
@@ -813,12 +782,13 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
- u32 rtt, delta, x_recv, fval, p, tmp2;
+ u32 x_recv, p;
+ suseconds_t rtt, delta;
struct timeval tstamp = { 0, };
int interval = 0;
int win_count = 0;
int step = 0;
- u64 tmp1;
+ u64 fval;
list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
dccphrx_node) {
@@ -843,58 +813,66 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
}
if (unlikely(step == 0)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, packet history "
- "contains no data packets!\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), packet history has no data packets!\n",
+ dccp_role(sk), sk);
return ~0;
}
if (unlikely(interval == 0)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Could not find a "
- "win_count interval > 0. Defaulting to 1\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
+ "Defaulting to 1\n", dccp_role(sk), sk);
interval = 1;
}
found:
if (!tail) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n",
- __FUNCTION__);
+ DCCP_CRIT("tail is null\n");
return ~0;
}
- rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
- ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
- dccp_role(sk), sk, rtt);
- if (rtt == 0)
- rtt = 1;
- dccp_timestamp(sk, &tstamp);
- delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
- x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
+ delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
+ DCCP_BUG_ON(delta < 0);
- if (x_recv == 0)
- x_recv = hcrx->ccid3hcrx_x_recv;
+ rtt = delta * 4 / interval;
+ ccid3_pr_debug("%s(%p), approximated RTT to %dus\n",
+ dccp_role(sk), sk, (int)rtt);
- tmp1 = (u64)x_recv * (u64)rtt;
- do_div(tmp1,10000000);
- tmp2 = (u32)tmp1;
+ /*
+ * Determine the length of the first loss interval via inverse lookup.
+ * Assume that X_recv can be computed by the throughput equation
+ * s
+ * X_recv = --------
+ * R * fval
+ * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
+ */
+ if (rtt == 0) { /* would result in divide-by-zero */
+ DCCP_WARN("RTT==0, returning 1/p = 1\n");
+ return 1000000;
+ }
- if (!tmp2) {
- LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 "
- "%s: x_recv = %u, rtt =%u\n",
- __FUNCTION__, x_recv, rtt);
- return ~0;
+ dccp_timestamp(sk, &tstamp);
+ delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
+ DCCP_BUG_ON(delta <= 0);
+
+ x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+ if (x_recv == 0) { /* would also trigger divide-by-zero */
+ DCCP_WARN("X_recv==0\n");
+ if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
+ DCCP_BUG("stored value of X_recv is zero");
+ return 1000000;
+ }
}
- fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
- /* do not alter order above or you will get overflow on 32 bit */
+ fval = scaled_div(hcrx->ccid3hcrx_s, rtt);
+ fval = scaled_div32(fval, x_recv);
p = tfrc_calc_x_reverse_lookup(fval);
- ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied "
+
+ ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
"loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
if (p == 0)
return ~0;
else
- return 1000000 / p;
+ return 1000000 / p;
}
static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
@@ -924,11 +902,10 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
/* new loss event detected */
/* calculate last interval length */
seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
- entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC);
+ entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC);
if (entry == NULL) {
- printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__);
- dump_stack();
+ DCCP_BUG("out of memory - can not allocate entry");
return;
}
@@ -949,7 +926,8 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
struct dccp_rx_hist_entry *packet)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
+ struct dccp_rx_hist_entry *rx_hist =
+ dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
u64 seqno = packet->dccphrx_seqno;
u64 tmp_seqno;
int loss = 0;
@@ -977,7 +955,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
dccp_inc_seqno(&tmp_seqno);
while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
tmp_seqno, &ccval)) {
- hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+ hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
hcrx->ccid3hcrx_ccval_nonloss = ccval;
dccp_inc_seqno(&tmp_seqno);
}
@@ -1003,13 +981,11 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
const struct dccp_options_received *opt_recv;
struct dccp_rx_hist_entry *packet;
struct timeval now;
- u8 win_count;
- u32 p_prev, rtt_prev, r_sample, t_elapsed;
- int loss;
+ u32 p_prev, rtt_prev;
+ suseconds_t r_sample, t_elapsed;
+ int loss, payload_size;
- BUG_ON(hcrx == NULL ||
- !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
- hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
+ BUG_ON(hcrx == NULL);
opt_recv = &dccp_sk(sk)->dccps_options_received;
@@ -1026,12 +1002,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
r_sample = timeval_usecs(&now);
t_elapsed = opt_recv->dccpor_elapsed_time * 10;
+ DCCP_BUG_ON(r_sample < 0);
if (unlikely(r_sample <= t_elapsed))
- LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, "
- "t_elapsed=%uus\n",
- __FUNCTION__, r_sample, t_elapsed);
+ DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
+ r_sample, t_elapsed);
else
r_sample -= t_elapsed;
+ CCID3_RTT_SANITY_CHECK(r_sample);
if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
hcrx->ccid3hcrx_rtt = r_sample;
@@ -1040,8 +1017,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
r_sample / 10;
if (rtt_prev != hcrx->ccid3hcrx_rtt)
- ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n",
- dccp_role(sk), hcrx->ccid3hcrx_rtt,
+ ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
+ dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
opt_recv->dccpor_elapsed_time);
break;
case DCCP_PKT_DATA:
@@ -1051,52 +1028,48 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
- skb, SLAB_ATOMIC);
+ skb, GFP_ATOMIC);
if (unlikely(packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Not enough mem to "
- "add rx packet to history, consider it lost!\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), Not enough mem to add rx packet "
+ "to history, consider it lost!\n", dccp_role(sk), sk);
return;
}
- win_count = packet->dccphrx_ccval;
-
loss = ccid3_hc_rx_detect_loss(sk, packet);
if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
return;
+ payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+ ccid3_hc_rx_update_s(hcrx, payload_size);
+
switch (hcrx->ccid3hcrx_state) {
case TFRC_RSTATE_NO_DATA:
- ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial "
- "feedback\n",
- dccp_role(sk), sk,
+ ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial "
+ "feedback\n", dccp_role(sk), sk,
dccp_state_name(sk->sk_state), skb);
ccid3_hc_rx_send_feedback(sk);
ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
return;
case TFRC_RSTATE_DATA:
- hcrx->ccid3hcrx_bytes_recv += skb->len -
- dccp_hdr(skb)->dccph_doff * 4;
+ hcrx->ccid3hcrx_bytes_recv += payload_size;
if (loss)
break;
dccp_timestamp(sk, &now);
- if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >=
- hcrx->ccid3hcrx_rtt) {
+ if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) -
+ (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) {
hcrx->ccid3hcrx_tstamp_last_ack = now;
ccid3_hc_rx_send_feedback(sk);
}
return;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
- dump_stack();
+ case TFRC_RSTATE_TERM:
+ DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
return;
}
/* Dealing with packet loss */
- ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
+ ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
dccp_role(sk), sk, dccp_state_name(sk->sk_state));
p_prev = hcrx->ccid3hcrx_p;
@@ -1108,10 +1081,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* Scaling up by 1000000 as fixed decimal */
if (i_mean != 0)
hcrx->ccid3hcrx_p = 1000000 / i_mean;
- } else {
- printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__);
- dump_stack();
- }
+ } else
+ DCCP_BUG("empty loss history");
if (hcrx->ccid3hcrx_p > p_prev) {
ccid3_hc_rx_send_feedback(sk);
@@ -1121,23 +1092,17 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
{
- struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
- ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
-
- if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
- dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
- hcrx->ccid3hcrx_s = dp->dccps_packet_size;
- else
- hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
+ ccid3_pr_debug("entry\n");
hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
- hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */
+ hcrx->ccid3hcrx_s = 0;
+ hcrx->ccid3hcrx_rtt = 0;
return 0;
}
@@ -1166,9 +1131,9 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
BUG_ON(hcrx == NULL);
- info->tcpi_ca_state = hcrx->ccid3hcrx_state;
- info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
- info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
+ info->tcpi_ca_state = hcrx->ccid3hcrx_state;
+ info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+ info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
}
static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
@@ -1249,7 +1214,6 @@ static struct ccid_operations ccid3 = {
.ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet,
.ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent,
.ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv,
- .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
.ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options,
.ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock),
.ccid_hc_rx_init = ccid3_hc_rx_init,
@@ -1261,9 +1225,11 @@ static struct ccid_operations ccid3 = {
.ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt,
.ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt,
};
-
+
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
module_param(ccid3_debug, int, 0444);
MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
+#endif
static __init int ccid3_module_init(void)
{
@@ -1282,7 +1248,7 @@ static __init int ccid3_module_init(void)
goto out_free_tx;
rc = ccid_register(&ccid3);
- if (rc != 0)
+ if (rc != 0)
goto out_free_loss_interval_history;
out:
return rc;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 0a2cb7536d26..15776a88c090 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,22 +42,24 @@
#include <linux/tfrc.h>
#include "../ccid.h"
-#define TFRC_MIN_PACKET_SIZE 16
-#define TFRC_STD_PACKET_SIZE 256
-#define TFRC_MAX_PACKET_SIZE 65535
-
-/* Two seconds as per CCID3 spec */
+/* Two seconds as per RFC 3448 4.2 */
#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
-#define TFRC_INITIAL_IPI (USEC_PER_SEC / 4)
-
/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
-/* In seconds */
-#define TFRC_MAX_BACK_OFF_TIME 64
+/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
+#define TFRC_T_MBI 64
+
+/* What we think is a reasonable upper limit on RTT values */
+#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC))
-#define TFRC_SMALLEST_P 40
+#define CCID3_RTT_SANITY_CHECK(rtt) do { \
+ if (rtt > CCID3_SANE_RTT_MAX) { \
+ DCCP_CRIT("RTT (%d) too large, substituting %d", \
+ (int)rtt, (int)CCID3_SANE_RTT_MAX); \
+ rtt = CCID3_SANE_RTT_MAX; \
+ } } while (0)
enum ccid3_options {
TFRC_OPT_LOSS_EVENT_RATE = 192,
@@ -73,26 +75,36 @@ struct ccid3_options_received {
u32 ccid3or_receive_rate;
};
-/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock
+/* TFRC sender states */
+enum ccid3_hc_tx_states {
+ TFRC_SSTATE_NO_SENT = 1,
+ TFRC_SSTATE_NO_FBACK,
+ TFRC_SSTATE_FBACK,
+ TFRC_SSTATE_TERM,
+};
+
+/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
*
- * @ccid3hctx_state - Sender state
- * @ccid3hctx_x - Current sending rate
- * @ccid3hctx_x_recv - Receive rate
- * @ccid3hctx_x_calc - Calculated send (?) rate
- * @ccid3hctx_s - Packet size
- * @ccid3hctx_rtt - Estimate of current round trip time in usecs
- * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
- * @ccid3hctx_last_win_count - Last window counter sent
- * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
- * with last_win_count value sent
- * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
- * @ccid3hctx_idle - FIXME
- * @ccid3hctx_t_ld - Time last doubled during slow start
- * @ccid3hctx_t_nom - Nominal send time of next packet
- * @ccid3hctx_t_ipi - Interpacket (send) interval
- * @ccid3hctx_delta - Send timer delta
- * @ccid3hctx_hist - Packet history
- */
+ * @ccid3hctx_x - Current sending rate in 64 * bytes per second
+ * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second
+ * @ccid3hctx_x_calc - Calculated rate in bytes per second
+ * @ccid3hctx_rtt - Estimate of current round trip time in usecs
+ * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
+ * @ccid3hctx_s - Packet size in bytes
+ * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs
+ * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
+ * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
+ * @ccid3hctx_last_win_count - Last window counter sent
+ * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
+ * with last_win_count value sent
+ * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
+ * @ccid3hctx_idle - Flag indicating that sender is idling
+ * @ccid3hctx_t_ld - Time last doubled during slow start
+ * @ccid3hctx_t_nom - Nominal send time of next packet
+ * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
+ * @ccid3hctx_hist - Packet history
+ * @ccid3hctx_options_received - Parsed set of retrieved options
+ */
struct ccid3_hc_tx_sock {
struct tfrc_tx_info ccid3hctx_tfrc;
#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x
@@ -103,7 +115,7 @@ struct ccid3_hc_tx_sock {
#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto
#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi
u16 ccid3hctx_s;
- u8 ccid3hctx_state;
+ enum ccid3_hc_tx_states ccid3hctx_state:8;
u8 ccid3hctx_last_win_count;
u8 ccid3hctx_idle;
struct timeval ccid3hctx_t_last_win_count;
@@ -115,23 +127,48 @@ struct ccid3_hc_tx_sock {
struct ccid3_options_received ccid3hctx_options_received;
};
+/* TFRC receiver states */
+enum ccid3_hc_rx_states {
+ TFRC_RSTATE_NO_DATA = 1,
+ TFRC_RSTATE_DATA,
+ TFRC_RSTATE_TERM = 127,
+};
+
+/** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
+ *
+ * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
+ * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
+ * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4)
+ * @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number
+ * @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal
+ * @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1)
+ * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states
+ * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
+ * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
+ * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent
+ * @ccid3hcrx_hist - Packet history
+ * @ccid3hcrx_li_hist - Loss Interval History
+ * @ccid3hcrx_s - Received packet size in bytes
+ * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
+ * @ccid3hcrx_elapsed_time - Time since packet reception
+ */
struct ccid3_hc_rx_sock {
- struct tfrc_rx_info ccid3hcrx_tfrc;
-#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
-#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
-#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
- u64 ccid3hcrx_seqno_nonloss:48,
- ccid3hcrx_ccval_nonloss:4,
- ccid3hcrx_state:8,
- ccid3hcrx_ccval_last_counter:4;
- u32 ccid3hcrx_bytes_recv;
- struct timeval ccid3hcrx_tstamp_last_feedback;
- struct timeval ccid3hcrx_tstamp_last_ack;
- struct list_head ccid3hcrx_hist;
- struct list_head ccid3hcrx_li_hist;
- u16 ccid3hcrx_s;
- u32 ccid3hcrx_pinv;
- u32 ccid3hcrx_elapsed_time;
+ struct tfrc_rx_info ccid3hcrx_tfrc;
+#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
+#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
+#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
+ u64 ccid3hcrx_seqno_nonloss:48,
+ ccid3hcrx_ccval_nonloss:4,
+ ccid3hcrx_ccval_last_counter:4;
+ enum ccid3_hc_rx_states ccid3hcrx_state:8;
+ u32 ccid3hcrx_bytes_recv;
+ struct timeval ccid3hcrx_tstamp_last_feedback;
+ struct timeval ccid3hcrx_tstamp_last_ack;
+ struct list_head ccid3hcrx_hist;
+ struct list_head ccid3hcrx_li_hist;
+ u16 ccid3hcrx_s;
+ u32 ccid3hcrx_pinv;
+ u32 ccid3hcrx_elapsed_time;
};
static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 906c81ab9d4f..0a0baef16b3e 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -13,7 +13,7 @@
#include <linux/module.h>
#include <net/sock.h>
-
+#include "../../dccp.h"
#include "loss_interval.h"
struct dccp_li_hist *dccp_li_hist_new(const char *name)
@@ -109,7 +109,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
i_tot = max(i_tot0, i_tot1);
if (!w_tot) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__);
+ DCCP_WARN("w_tot = 0\n");
return 1;
}
@@ -125,10 +125,10 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
int i;
for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
- entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
+ entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC);
if (entry == NULL) {
dccp_li_hist_purge(hist, list);
- dump_stack();
+ DCCP_BUG("loss interval list entry is NULL");
return 0;
}
entry->dccplih_interval = ~0;
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 0ae85f0340b2..eb257014dd74 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -20,7 +20,7 @@
#define DCCP_LI_HIST_IVAL_F_LENGTH 8
struct dccp_li_hist {
- kmem_cache_t *dccplih_slab;
+ struct kmem_cache *dccplih_slab;
};
extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index b876c9c81c65..2e8ef42721e2 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -36,9 +36,100 @@
#include <linux/module.h>
#include <linux/string.h>
-
#include "packet_history.h"
+/*
+ * Transmitter History Routines
+ */
+struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
+{
+ struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+ static const char dccp_tx_hist_mask[] = "tx_hist_%s";
+ char *slab_name;
+
+ if (hist == NULL)
+ goto out;
+
+ slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
+ GFP_ATOMIC);
+ if (slab_name == NULL)
+ goto out_free_hist;
+
+ sprintf(slab_name, dccp_tx_hist_mask, name);
+ hist->dccptxh_slab = kmem_cache_create(slab_name,
+ sizeof(struct dccp_tx_hist_entry),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (hist->dccptxh_slab == NULL)
+ goto out_free_slab_name;
+out:
+ return hist;
+out_free_slab_name:
+ kfree(slab_name);
+out_free_hist:
+ kfree(hist);
+ hist = NULL;
+ goto out;
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
+
+void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
+{
+ const char* name = kmem_cache_name(hist->dccptxh_slab);
+
+ kmem_cache_destroy(hist->dccptxh_slab);
+ kfree(name);
+ kfree(hist);
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
+
+struct dccp_tx_hist_entry *
+ dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
+{
+ struct dccp_tx_hist_entry *packet = NULL, *entry;
+
+ list_for_each_entry(entry, list, dccphtx_node)
+ if (entry->dccphtx_seqno == seq) {
+ packet = entry;
+ break;
+ }
+
+ return packet;
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
+
+void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
+{
+ struct dccp_tx_hist_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, list, dccphtx_node) {
+ list_del_init(&entry->dccphtx_node);
+ dccp_tx_hist_entry_delete(hist, entry);
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
+
+void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
+ struct list_head *list,
+ struct dccp_tx_hist_entry *packet)
+{
+ struct dccp_tx_hist_entry *next;
+
+ list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
+ list_del_init(&packet->dccphtx_node);
+ dccp_tx_hist_entry_delete(hist, packet);
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
+
+/*
+ * Receiver History Routines
+ */
struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
{
struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
@@ -83,18 +174,24 @@ void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
EXPORT_SYMBOL_GPL(dccp_rx_hist_delete);
-void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
+int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+ u8 *ccval)
{
- struct dccp_rx_hist_entry *entry, *next;
+ struct dccp_rx_hist_entry *packet = NULL, *entry;
- list_for_each_entry_safe(entry, next, list, dccphrx_node) {
- list_del_init(&entry->dccphrx_node);
- kmem_cache_free(hist->dccprxh_slab, entry);
- }
-}
+ list_for_each_entry(entry, list, dccphrx_node)
+ if (entry->dccphrx_seqno == seq) {
+ packet = entry;
+ break;
+ }
-EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
+ if (packet)
+ *ccval = packet->dccphrx_ccval;
+ return packet != NULL;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry);
struct dccp_rx_hist_entry *
dccp_rx_hist_find_data_packet(const struct list_head *list)
{
@@ -184,110 +281,18 @@ void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
-struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
-{
- struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
- static const char dccp_tx_hist_mask[] = "tx_hist_%s";
- char *slab_name;
-
- if (hist == NULL)
- goto out;
-
- slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
- GFP_ATOMIC);
- if (slab_name == NULL)
- goto out_free_hist;
-
- sprintf(slab_name, dccp_tx_hist_mask, name);
- hist->dccptxh_slab = kmem_cache_create(slab_name,
- sizeof(struct dccp_tx_hist_entry),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (hist->dccptxh_slab == NULL)
- goto out_free_slab_name;
-out:
- return hist;
-out_free_slab_name:
- kfree(slab_name);
-out_free_hist:
- kfree(hist);
- hist = NULL;
- goto out;
-}
-
-EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
-
-void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
-{
- const char* name = kmem_cache_name(hist->dccptxh_slab);
-
- kmem_cache_destroy(hist->dccptxh_slab);
- kfree(name);
- kfree(hist);
-}
-
-EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
-
-struct dccp_tx_hist_entry *
- dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
-{
- struct dccp_tx_hist_entry *packet = NULL, *entry;
-
- list_for_each_entry(entry, list, dccphtx_node)
- if (entry->dccphtx_seqno == seq) {
- packet = entry;
- break;
- }
-
- return packet;
-}
-
-EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
-
-int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
- u8 *ccval)
-{
- struct dccp_rx_hist_entry *packet = NULL, *entry;
-
- list_for_each_entry(entry, list, dccphrx_node)
- if (entry->dccphrx_seqno == seq) {
- packet = entry;
- break;
- }
-
- if (packet)
- *ccval = packet->dccphrx_ccval;
-
- return packet != NULL;
-}
-
-EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry);
-
-void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
- struct list_head *list,
- struct dccp_tx_hist_entry *packet)
+void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
{
- struct dccp_tx_hist_entry *next;
+ struct dccp_rx_hist_entry *entry, *next;
- list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
- list_del_init(&packet->dccphtx_node);
- dccp_tx_hist_entry_delete(hist, packet);
+ list_for_each_entry_safe(entry, next, list, dccphrx_node) {
+ list_del_init(&entry->dccphrx_node);
+ kmem_cache_free(hist->dccprxh_slab, entry);
}
}
-EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
-
-void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
-{
- struct dccp_tx_hist_entry *entry, *next;
-
- list_for_each_entry_safe(entry, next, list, dccphtx_node) {
- list_del_init(&entry->dccphtx_node);
- dccp_tx_hist_entry_delete(hist, entry);
- }
-}
+EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
-EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
"Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 067cf1c85a37..1f960c19ea1b 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -49,43 +49,27 @@
#define TFRC_WIN_COUNT_PER_RTT 4
#define TFRC_WIN_COUNT_LIMIT 16
+/*
+ * Transmitter History data structures and declarations
+ */
struct dccp_tx_hist_entry {
struct list_head dccphtx_node;
u64 dccphtx_seqno:48,
- dccphtx_ccval:4,
dccphtx_sent:1;
u32 dccphtx_rtt;
struct timeval dccphtx_tstamp;
};
-struct dccp_rx_hist_entry {
- struct list_head dccphrx_node;
- u64 dccphrx_seqno:48,
- dccphrx_ccval:4,
- dccphrx_type:4;
- u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
- struct timeval dccphrx_tstamp;
-};
-
struct dccp_tx_hist {
- kmem_cache_t *dccptxh_slab;
+ struct kmem_cache *dccptxh_slab;
};
extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
-extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
-
-struct dccp_rx_hist {
- kmem_cache_t *dccprxh_slab;
-};
-
-extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
-extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
-extern struct dccp_rx_hist_entry *
- dccp_rx_hist_find_data_packet(const struct list_head *list);
+extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
static inline struct dccp_tx_hist_entry *
- dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
- const gfp_t prio)
+ dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
+ const gfp_t prio)
{
struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
prio);
@@ -96,18 +80,20 @@ static inline struct dccp_tx_hist_entry *
return entry;
}
-static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
- struct dccp_tx_hist_entry *entry)
+static inline struct dccp_tx_hist_entry *
+ dccp_tx_hist_head(struct list_head *list)
{
- if (entry != NULL)
- kmem_cache_free(hist->dccptxh_slab, entry);
+ struct dccp_tx_hist_entry *head = NULL;
+
+ if (!list_empty(list))
+ head = list_entry(list->next, struct dccp_tx_hist_entry,
+ dccphtx_node);
+ return head;
}
extern struct dccp_tx_hist_entry *
dccp_tx_hist_find_entry(const struct list_head *list,
const u64 seq);
-extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
- u8 *ccval);
static inline void dccp_tx_hist_add_entry(struct list_head *list,
struct dccp_tx_hist_entry *entry)
@@ -115,30 +101,45 @@ static inline void dccp_tx_hist_add_entry(struct list_head *list,
list_add(&entry->dccphtx_node, list);
}
+static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
+ struct dccp_tx_hist_entry *entry)
+{
+ if (entry != NULL)
+ kmem_cache_free(hist->dccptxh_slab, entry);
+}
+
+extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
+ struct list_head *list);
+
extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
struct list_head *list,
struct dccp_tx_hist_entry *next);
-extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
- struct list_head *list);
+/*
+ * Receiver History data structures and declarations
+ */
+struct dccp_rx_hist_entry {
+ struct list_head dccphrx_node;
+ u64 dccphrx_seqno:48,
+ dccphrx_ccval:4,
+ dccphrx_type:4;
+ u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
+ struct timeval dccphrx_tstamp;
+};
-static inline struct dccp_tx_hist_entry *
- dccp_tx_hist_head(struct list_head *list)
-{
- struct dccp_tx_hist_entry *head = NULL;
+struct dccp_rx_hist {
+ struct kmem_cache *dccprxh_slab;
+};
- if (!list_empty(list))
- head = list_entry(list->next, struct dccp_tx_hist_entry,
- dccphtx_node);
- return head;
-}
+extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
+extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
static inline struct dccp_rx_hist_entry *
- dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
- const struct sock *sk,
- const u32 ndp,
- const struct sk_buff *skb,
- const gfp_t prio)
+ dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
+ const struct sock *sk,
+ const u32 ndp,
+ const struct sk_buff *skb,
+ const gfp_t prio)
{
struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
prio);
@@ -156,18 +157,8 @@ static inline struct dccp_rx_hist_entry *
return entry;
}
-static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
- struct dccp_rx_hist_entry *entry)
-{
- if (entry != NULL)
- kmem_cache_free(hist->dccprxh_slab, entry);
-}
-
-extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
- struct list_head *list);
-
static inline struct dccp_rx_hist_entry *
- dccp_rx_hist_head(struct list_head *list)
+ dccp_rx_hist_head(struct list_head *list)
{
struct dccp_rx_hist_entry *head = NULL;
@@ -177,6 +168,27 @@ static inline struct dccp_rx_hist_entry *
return head;
}
+extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+ u8 *ccval);
+extern struct dccp_rx_hist_entry *
+ dccp_rx_hist_find_data_packet(const struct list_head *list);
+
+extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+ struct list_head *rx_list,
+ struct list_head *li_list,
+ struct dccp_rx_hist_entry *packet,
+ u64 nonloss_seqno);
+
+static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
+ struct dccp_rx_hist_entry *entry)
+{
+ if (entry != NULL)
+ kmem_cache_free(hist->dccprxh_slab, entry);
+}
+
+extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
+ struct list_head *list);
+
static inline int
dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
{
@@ -184,12 +196,6 @@ static inline int
entry->dccphrx_type == DCCP_PKT_DATAACK;
}
-extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
- struct list_head *rx_list,
- struct list_head *li_list,
- struct dccp_rx_hist_entry *packet,
- u64 nonloss_seqno);
-
extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
struct list_head *li_list, u8 *win_loss);
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 45f30f59ea2a..faf5f7e219e3 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -13,8 +13,29 @@
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
-
#include <linux/types.h>
+#include <asm/div64.h>
+
+/* integer-arithmetic divisions of type (a * 1000000)/b */
+static inline u64 scaled_div(u64 a, u32 b)
+{
+ BUG_ON(b==0);
+ a *= 1000000;
+ do_div(a, b);
+ return a;
+}
+
+static inline u32 scaled_div32(u64 a, u32 b)
+{
+ u64 result = scaled_div(a, b);
+
+ if (result > UINT_MAX) {
+ DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U",
+ (unsigned long long)a, b);
+ return UINT_MAX;
+ }
+ return result;
+}
extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 44076e0c6591..90009fd77e15 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -13,16 +13,83 @@
*/
#include <linux/module.h>
-
-#include <asm/div64.h>
-
+#include "../../dccp.h"
#include "tfrc.h"
#define TFRC_CALC_X_ARRSIZE 500
+#define TFRC_CALC_X_SPLIT 50000 /* 0.05 * 1000000, details below */
+#define TFRC_SMALLEST_P (TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE)
-#define TFRC_CALC_X_SPLIT 50000
-/* equivalent to 0.05 */
-
+/*
+ TFRC TCP Reno Throughput Equation Lookup Table for f(p)
+
+ The following two-column lookup table implements a part of the TCP throughput
+ equation from [RFC 3448, sec. 3.1]:
+
+ s
+ X_calc = --------------------------------------------------------------
+ R * sqrt(2*b*p/3) + (3 * t_RTO * sqrt(3*b*p/8) * (p + 32*p^3))
+
+ Where:
+ X is the transmit rate in bytes/second
+ s is the packet size in bytes
+ R is the round trip time in seconds
+ p is the loss event rate, between 0 and 1.0, of the number of loss
+ events as a fraction of the number of packets transmitted
+ t_RTO is the TCP retransmission timeout value in seconds
+ b is the number of packets acknowledged by a single TCP ACK
+
+ We can assume that b = 1 and t_RTO is 4 * R. The equation now becomes:
+
+ s
+ X_calc = -------------------------------------------------------
+ R * sqrt(p*2/3) + (12 * R * sqrt(p*3/8) * (p + 32*p^3))
+
+ which we can break down into:
+
+ s
+ X_calc = ---------
+ R * f(p)
+
+ where f(p) is given for 0 < p <= 1 by:
+
+ f(p) = sqrt(2*p/3) + 12 * sqrt(3*p/8) * (p + 32*p^3)
+
+ Since this is kernel code, floating-point arithmetic is avoided in favour of
+ integer arithmetic. This means that nearly all fractional parameters are
+ scaled by 1000000:
+ * the parameters p and R
+ * the return result f(p)
+ The lookup table therefore actually tabulates the following function g(q):
+
+ g(q) = 1000000 * f(q/1000000)
+
+ Hence, when p <= 1, q must be less than or equal to 1000000. To achieve finer
+ granularity for the practically more relevant case of small values of p (up to
+ 5%), the second column is used; the first one ranges up to 100%. This split
+ corresponds to the value of q = TFRC_CALC_X_SPLIT. At the same time this also
+ determines the smallest resolution possible with this lookup table:
+
+ TFRC_SMALLEST_P = TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE
+
+ The entire table is generated by:
+ for(i=0; i < TFRC_CALC_X_ARRSIZE; i++) {
+ lookup[i][0] = g((i+1) * 1000000/TFRC_CALC_X_ARRSIZE);
+ lookup[i][1] = g((i+1) * TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE);
+ }
+
+ With the given configuration, we have, with M = TFRC_CALC_X_ARRSIZE-1,
+ lookup[0][0] = g(1000000/(M+1)) = 1000000 * f(0.2%)
+ lookup[M][0] = g(1000000) = 1000000 * f(100%)
+ lookup[0][1] = g(TFRC_SMALLEST_P) = 1000000 * f(0.01%)
+ lookup[M][1] = g(TFRC_CALC_X_SPLIT) = 1000000 * f(5%)
+
+ In summary, the two columns represent f(p) for the following ranges:
+ * The first column is for 0.002 <= p <= 1.0
+ * The second column is for 0.0001 <= p <= 0.05
+ Where the columns overlap, the second (finer-grained) is given preference,
+ i.e. the first column is used only for p >= 0.05.
+ */
static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
{ 37172, 8172 },
{ 53499, 11567 },
@@ -526,117 +593,105 @@ static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
{ 243315981, 271305 }
};
-/* Calculate the send rate as per section 3.1 of RFC3448
-
-Returns send rate in bytes per second
-
-Integer maths and lookups are used as not allowed floating point in kernel
-
-The function for Xcalc as per section 3.1 of RFC3448 is:
-
-X = s
- -------------------------------------------------------------
- R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
-
-where
-X is the trasmit rate in bytes/second
-s is the packet size in bytes
-R is the round trip time in seconds
-p is the loss event rate, between 0 and 1.0, of the number of loss events
- as a fraction of the number of packets transmitted
-t_RTO is the TCP retransmission timeout value in seconds
-b is the number of packets acknowledged by a single TCP acknowledgement
-
-we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
-
-X = s
- -----------------------------------------------------------------------
- R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
-
-
-which we can break down into:
-
-X = s
- --------
- R * f(p)
-
-where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
-
-Function parameters:
-s - bytes
-R - RTT in usecs
-p - loss rate (decimal fraction multiplied by 1,000,000)
-
-Returns Xcalc in bytes per second
-
-DON'T alter this code unless you run test cases against it as the code
-has been manipulated to stop underflow/overlow.
+/* return largest index i such that fval <= lookup[i][small] */
+static inline u32 tfrc_binsearch(u32 fval, u8 small)
+{
+ u32 try, low = 0, high = TFRC_CALC_X_ARRSIZE - 1;
+
+ while (low < high) {
+ try = (low + high) / 2;
+ if (fval <= tfrc_calc_x_lookup[try][small])
+ high = try;
+ else
+ low = try + 1;
+ }
+ return high;
+}
-*/
+/**
+ * tfrc_calc_x - Calculate the send rate as per section 3.1 of RFC3448
+ *
+ * @s: packet size in bytes
+ * @R: RTT scaled by 1000000 (i.e., microseconds)
+ * @p: loss ratio estimate scaled by 1000000
+ * Returns X_calc in bytes per second (not scaled).
+ */
u32 tfrc_calc_x(u16 s, u32 R, u32 p)
{
- int index;
+ u16 index;
u32 f;
- u64 tmp1, tmp2;
+ u64 result;
- if (p < TFRC_CALC_X_SPLIT)
- index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1;
- else
- index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1;
+ /* check against invalid parameters and divide-by-zero */
+ BUG_ON(p > 1000000); /* p must not exceed 100% */
+ BUG_ON(p == 0); /* f(0) = 0, divide by zero */
+ if (R == 0) { /* possible divide by zero */
+ DCCP_CRIT("WARNING: RTT is 0, returning maximum X_calc.");
+ return ~0U;
+ }
- if (index < 0)
- /* p should be 0 unless there is a bug in my code */
- index = 0;
+ if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
+ if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
+ DCCP_WARN("Value of p (%d) below resolution. "
+ "Substituting %d\n", p, TFRC_SMALLEST_P);
+ index = 0;
+ } else /* 0.0001 <= p <= 0.05 */
+ index = p/TFRC_SMALLEST_P - 1;
- if (R == 0)
- R = 1; /* RTT can't be zero or else divide by zero */
+ f = tfrc_calc_x_lookup[index][1];
- BUG_ON(index >= TFRC_CALC_X_ARRSIZE);
+ } else { /* 0.05 < p <= 1.00 */
+ index = p/(1000000/TFRC_CALC_X_ARRSIZE) - 1;
- if (p >= TFRC_CALC_X_SPLIT)
f = tfrc_calc_x_lookup[index][0];
- else
- f = tfrc_calc_x_lookup[index][1];
-
- tmp1 = ((u64)s * 100000000);
- tmp2 = ((u64)R * (u64)f);
- do_div(tmp2, 10000);
- do_div(tmp1, tmp2);
- /* Don't alter above math unless you test due to overflow on 32 bit */
-
- return (u32)tmp1;
+ }
+
+ /*
+ * Compute X = s/(R*f(p)) in bytes per second.
+ * Since f(p) and R are both scaled by 1000000, we need to multiply by
+ * 1000000^2. To avoid overflow, the result is computed in two stages.
+ * This works under almost all reasonable operational conditions, for a
+ * wide range of parameters. Yet, should some strange combination of
+ * parameters result in overflow, the use of scaled_div32 will catch
+ * this and return UINT_MAX - which is a logically adequate consequence.
+ */
+ result = scaled_div(s, R);
+ return scaled_div32(result, f);
}
EXPORT_SYMBOL_GPL(tfrc_calc_x);
/*
- * args: fvalue - function value to match
- * returns: p closest to that value
+ * tfrc_calc_x_reverse_lookup - try to find p given f(p)
*
- * both fvalue and p are multiplied by 1,000,000 to use ints
+ * @fvalue: function value to match, scaled by 1000000
+ * Returns closest match for p, also scaled by 1000000
*/
u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
{
- int ctr = 0;
- int small;
+ int index;
- if (fvalue < tfrc_calc_x_lookup[0][1])
+ if (fvalue == 0) /* f(p) = 0 whenever p = 0 */
return 0;
- if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1])
- small = 1;
- else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0])
+ /* Error cases. */
+ if (fvalue < tfrc_calc_x_lookup[0][1]) {
+ DCCP_WARN("fvalue %d smaller than resolution\n", fvalue);
+ return tfrc_calc_x_lookup[0][1];
+ }
+ if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) {
+ DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue);
return 1000000;
- else
- small = 0;
-
- while (fvalue > tfrc_calc_x_lookup[ctr][small])
- ctr++;
+ }
- if (small)
- return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE;
- else
- return 1000000 * ctr / TFRC_CALC_X_ARRSIZE;
+ if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) {
+ index = tfrc_binsearch(fvalue, 1);
+ return (index + 1) * TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE;
+ }
+
+ /* else ... it must be in the coarse-grained column */
+ index = tfrc_binsearch(fvalue, 0);
+ return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
}
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0a21be437ed3..a0900bf98e6b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -18,15 +18,33 @@
#include <net/tcp.h>
#include "ackvec.h"
+/*
+ * DCCP - specific warning and debugging macros.
+ */
+#define DCCP_WARN(fmt, a...) LIMIT_NETDEBUG(KERN_WARNING "%s: " fmt, \
+ __FUNCTION__, ##a)
+#define DCCP_CRIT(fmt, a...) printk(KERN_CRIT fmt " at %s:%d/%s()\n", ##a, \
+ __FILE__, __LINE__, __FUNCTION__)
+#define DCCP_BUG(a...) do { DCCP_CRIT("BUG: " a); dump_stack(); } while(0)
+#define DCCP_BUG_ON(cond) do { if (unlikely((cond) != 0)) \
+ DCCP_BUG("\"%s\" holds (exception!)", \
+ __stringify(cond)); \
+ } while (0)
+
+#ifdef MODULE
+#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \
+ printk(fmt, ##args); \
+ } while(0)
+#else
+#define DCCP_PRINTK(enable, fmt, args...) printk(fmt, ##args)
+#endif
+#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \
+ "%s: " fmt, __FUNCTION__, ##a)
+
#ifdef CONFIG_IP_DCCP_DEBUG
extern int dccp_debug;
-
-#define dccp_pr_debug(format, a...) \
- do { if (dccp_debug) \
- printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
- } while (0)
-#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \
- printk(format, ##a); } while (0)
+#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
+#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#else
#define dccp_pr_debug(format, a...)
#define dccp_pr_debug_cat(format, a...)
@@ -35,22 +53,26 @@ extern int dccp_debug;
extern struct inet_hashinfo dccp_hashinfo;
extern atomic_t dccp_orphan_count;
-extern int dccp_tw_count;
-extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
-/* FIXME: Right size this */
-#define DCCP_MAX_OPT_LEN 128
-
-#define DCCP_MAX_PACKET_HDR 32
-
-#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
+/*
+ * Set safe upper bounds for header and option length. Since Data Offset is 8
+ * bits (RFC 4340, sec. 5.1), the total header length can never be more than
+ * 4 * 255 = 1020 bytes. The largest possible header length is 28 bytes (X=1):
+ * - DCCP-Response with ACK Subheader and 4 bytes of Service code OR
+ * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields
+ * Hence a safe upper bound for the maximum option length is 1020-28 = 992
+ */
+#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int))
+#define DCCP_MAX_PACKET_HDR 28
+#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
+#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
* state, about 60 seconds */
-/* draft-ietf-dccp-spec-11.txt initial RTO value */
+/* RFC 1122, 4.2.3.1 initial RTO value */
#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
/* Maximal interval between probes for local resources. */
@@ -58,6 +80,18 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+/* sysctl variables for DCCP */
+extern int sysctl_dccp_request_retries;
+extern int sysctl_dccp_retries1;
+extern int sysctl_dccp_retries2;
+extern int sysctl_dccp_feat_sequence_window;
+extern int sysctl_dccp_feat_rx_ccid;
+extern int sysctl_dccp_feat_tx_ccid;
+extern int sysctl_dccp_feat_ack_ratio;
+extern int sysctl_dccp_feat_send_ack_vector;
+extern int sysctl_dccp_feat_send_ndp_count;
+extern int sysctl_dccp_tx_qlen;
+
/* is seq1 < seq2 ? */
static inline int before48(const u64 seq1, const u64 seq2)
{
@@ -123,10 +157,36 @@ DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
#define DCCP_ADD_STATS_USER(field, val) \
SNMP_ADD_STATS_USER(dccp_statistics, field, val)
+/*
+ * Checksumming routines
+ */
+static inline int dccp_csum_coverage(const struct sk_buff *skb)
+{
+ const struct dccp_hdr* dh = dccp_hdr(skb);
+
+ if (dh->dccph_cscov == 0)
+ return skb->len;
+ return (dh->dccph_doff + dh->dccph_cscov - 1) * sizeof(u32);
+}
+
+static inline void dccp_csum_outgoing(struct sk_buff *skb)
+{
+ int cov = dccp_csum_coverage(skb);
+
+ if (cov >= skb->len)
+ dccp_hdr(skb)->dccph_cscov = 0;
+
+ skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
+}
+
+extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
+
extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
extern void dccp_send_ack(struct sock *sk);
extern void dccp_send_delayed_ack(struct sock *sk);
+extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk);
+
extern void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type);
@@ -147,18 +207,7 @@ extern const char *dccp_state_name(const int state);
extern void dccp_set_state(struct sock *sk, const int state);
extern void dccp_done(struct sock *sk);
-static inline void dccp_openreq_init(struct request_sock *req,
- struct dccp_sock *dp,
- struct sk_buff *skb)
-{
- /*
- * FIXME: fill in the other req fields from the DCCP options
- * received
- */
- inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
- inet_rsk(req)->acked = 0;
- req->rcv_wnd = 0;
-}
+extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
@@ -217,14 +266,9 @@ extern void dccp_shutdown(struct sock *sk, int how);
extern int inet_dccp_listen(struct socket *sock, int backlog);
extern unsigned int dccp_poll(struct file *file, struct socket *sock,
poll_table *wait);
-extern void dccp_v4_send_check(struct sock *sk, int len,
- struct sk_buff *skb);
extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len);
-extern int dccp_v4_checksum(const struct sk_buff *skb,
- const __be32 saddr, const __be32 daddr);
-
extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk, const int active);
extern int dccp_invalid_packet(struct sk_buff *skb);
@@ -388,6 +432,7 @@ static inline void timeval_sub_usecs(struct timeval *tv,
tv->tv_sec--;
tv->tv_usec += USEC_PER_SEC;
}
+ DCCP_BUG_ON(tv->tv_sec < 0);
}
#ifdef CONFIG_SYSCTL
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index a1b0682ee77c..95b6927ec653 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -12,7 +12,6 @@
#include <linux/module.h>
-#include "dccp.h"
#include "ccid.h"
#include "feat.h"
@@ -23,9 +22,17 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
{
struct dccp_opt_pend *opt;
- dccp_pr_debug("feat change type=%d feat=%d\n", type, feature);
+ dccp_feat_debug(type, feature, *val);
- /* XXX sanity check feat change request */
+ if (!dccp_feat_is_valid_type(type)) {
+ DCCP_WARN("option type %d invalid in negotiation\n", type);
+ return 1;
+ }
+ if (!dccp_feat_is_valid_length(type, feature, len)) {
+ DCCP_WARN("invalid length %d\n", len);
+ return 1;
+ }
+ /* XXX add further sanity checks */
/* check if that feature is already being negotiated */
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
@@ -95,14 +102,14 @@ static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
/* XXX taking only u8 vals */
static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
{
- dccp_pr_debug("changing [%d] feat %d to %d\n", type, feat, val);
+ dccp_feat_debug(type, feat, val);
switch (feat) {
case DCCPF_CCID:
return dccp_feat_update_ccid(sk, type, val);
default:
- dccp_pr_debug("IMPLEMENT changing [%d] feat %d to %d\n",
- type, feat, val);
+ dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n",
+ dccp_feat_typename(type), feat);
break;
}
return 0;
@@ -162,7 +169,8 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
break;
default:
- WARN_ON(1); /* XXX implement res */
+ DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat);
+ /* XXX implement res */
return -EFAULT;
}
@@ -265,10 +273,10 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
u8 *copy;
int rc;
- /* NN features must be change L */
- if (type == DCCPO_CHANGE_R) {
- dccp_pr_debug("received CHANGE_R %d for NN feat %d\n",
- type, feature);
+ /* NN features must be Change L (sec. 6.3.2) */
+ if (type != DCCPO_CHANGE_L) {
+ dccp_pr_debug("received %s for NN feature %d\n",
+ dccp_feat_typename(type), feature);
return -EFAULT;
}
@@ -279,12 +287,11 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
if (opt == NULL)
return -ENOMEM;
- copy = kmalloc(len, GFP_ATOMIC);
+ copy = kmemdup(val, len, GFP_ATOMIC);
if (copy == NULL) {
kfree(opt);
return -ENOMEM;
}
- memcpy(copy, val, len);
opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */
opt->dccpop_feat = feature;
@@ -299,7 +306,8 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
return rc;
}
- dccp_pr_debug("Confirming NN feature %d (val=%d)\n", feature, *copy);
+ dccp_feat_debug(type, feature, *copy);
+
list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
return 0;
@@ -318,14 +326,19 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
return;
}
- opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R :
- DCCPO_CONFIRM_L;
+ switch (type) {
+ case DCCPO_CHANGE_L: opt->dccpop_type = DCCPO_CONFIRM_R; break;
+ case DCCPO_CHANGE_R: opt->dccpop_type = DCCPO_CONFIRM_L; break;
+ default: DCCP_WARN("invalid type %d\n", type); return;
+
+ }
opt->dccpop_feat = feature;
opt->dccpop_val = NULL;
opt->dccpop_len = 0;
/* change feature */
- dccp_pr_debug("Empty confirm feature %d type %d\n", feature, type);
+ dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature);
+
list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
}
@@ -359,7 +372,7 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
{
int rc;
- dccp_pr_debug("got feat change type=%d feat=%d\n", type, feature);
+ dccp_feat_debug(type, feature, *val);
/* figure out if it's SP or NN feature */
switch (feature) {
@@ -375,6 +388,8 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
/* XXX implement other features */
default:
+ dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n",
+ dccp_feat_typename(type), feature);
rc = -EFAULT;
break;
}
@@ -403,20 +418,27 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
u8 t;
struct dccp_opt_pend *opt;
struct dccp_minisock *dmsk = dccp_msk(sk);
- int rc = 1;
+ int found = 0;
int all_confirmed = 1;
- dccp_pr_debug("got feat confirm type=%d feat=%d\n", type, feature);
-
- /* XXX sanity check type & feat */
+ dccp_feat_debug(type, feature, *val);
/* locate our change request */
- t = type == DCCPO_CONFIRM_L ? DCCPO_CHANGE_R : DCCPO_CHANGE_L;
+ switch (type) {
+ case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break;
+ case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break;
+ default: DCCP_WARN("invalid type %d\n", type);
+ return 1;
+
+ }
+ /* XXX sanity check feature value */
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
if (!opt->dccpop_conf && opt->dccpop_type == t &&
opt->dccpop_feat == feature) {
- /* we found it */
+ found = 1;
+ dccp_pr_debug("feature %d found\n", opt->dccpop_feat);
+
/* XXX do sanity check */
opt->dccpop_conf = 1;
@@ -425,9 +447,7 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
dccp_feat_update(sk, opt->dccpop_type,
opt->dccpop_feat, *val);
- dccp_pr_debug("feat %d type %d confirmed %d\n",
- feature, type, *val);
- rc = 0;
+ /* XXX check the return value of dccp_feat_update */
break;
}
@@ -446,9 +466,9 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
}
- if (rc)
- dccp_pr_debug("feat %d type %d never requested\n",
- feature, type);
+ if (!found)
+ dccp_pr_debug("%s(%d, ...) never requested\n",
+ dccp_feat_typename(type), feature);
return 0;
}
@@ -501,20 +521,18 @@ int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) {
struct dccp_opt_pend *newopt;
/* copy the value of the option */
- u8 *val = kmalloc(opt->dccpop_len, GFP_ATOMIC);
+ u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC);
if (val == NULL)
goto out_clean;
- memcpy(val, opt->dccpop_val, opt->dccpop_len);
- newopt = kmalloc(sizeof(*newopt), GFP_ATOMIC);
+ newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC);
if (newopt == NULL) {
kfree(val);
goto out_clean;
}
/* insert the option */
- memcpy(newopt, opt, sizeof(*newopt));
newopt->dccpop_val = val;
list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending);
@@ -545,10 +563,9 @@ static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat,
u8 *val, u8 len)
{
int rc = -ENOMEM;
- u8 *copy = kmalloc(len, GFP_KERNEL);
+ u8 *copy = kmemdup(val, len, GFP_KERNEL);
if (copy != NULL) {
- memcpy(copy, val, len);
rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL);
if (rc)
kfree(copy);
@@ -583,3 +600,45 @@ out:
}
EXPORT_SYMBOL_GPL(dccp_feat_init);
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+const char *dccp_feat_typename(const u8 type)
+{
+ switch(type) {
+ case DCCPO_CHANGE_L: return("ChangeL");
+ case DCCPO_CONFIRM_L: return("ConfirmL");
+ case DCCPO_CHANGE_R: return("ChangeR");
+ case DCCPO_CONFIRM_R: return("ConfirmR");
+ /* the following case must not appear in feature negotation */
+ default: dccp_pr_debug("unknown type %d [BUG!]\n", type);
+ }
+ return NULL;
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_typename);
+
+const char *dccp_feat_name(const u8 feat)
+{
+ static const char *feature_names[] = {
+ [DCCPF_RESERVED] = "Reserved",
+ [DCCPF_CCID] = "CCID",
+ [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos",
+ [DCCPF_SEQUENCE_WINDOW] = "Sequence Window",
+ [DCCPF_ECN_INCAPABLE] = "ECN Incapable",
+ [DCCPF_ACK_RATIO] = "Ack Ratio",
+ [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector",
+ [DCCPF_SEND_NDP_COUNT] = "Send NDP Count",
+ [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
+ [DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
+ };
+ if (feat >= DCCPF_MIN_CCID_SPECIFIC)
+ return "CCID-specific";
+
+ if (dccp_feat_is_reserved(feat))
+ return feature_names[DCCPF_RESERVED];
+
+ return feature_names[feat];
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_name);
+#endif /* CONFIG_IP_DCCP_DEBUG */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index cee553d416ca..2c373ad7edcf 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -12,9 +12,46 @@
*/
#include <linux/types.h>
+#include "dccp.h"
-struct sock;
-struct dccp_minisock;
+static inline int dccp_feat_is_valid_length(u8 type, u8 feature, u8 len)
+{
+ /* sec. 6.1: Confirm has at least length 3,
+ * sec. 6.2: Change has at least length 4 */
+ if (len < 3)
+ return 1;
+ if (len < 4 && (type == DCCPO_CHANGE_L || type == DCCPO_CHANGE_R))
+ return 1;
+ /* XXX: add per-feature length validation (sec. 6.6.8) */
+ return 0;
+}
+
+static inline int dccp_feat_is_reserved(const u8 feat)
+{
+ return (feat > DCCPF_DATA_CHECKSUM &&
+ feat < DCCPF_MIN_CCID_SPECIFIC) ||
+ feat == DCCPF_RESERVED;
+}
+
+/* feature negotiation knows only these four option types (RFC 4340, sec. 6) */
+static inline int dccp_feat_is_valid_type(const u8 optnum)
+{
+ return optnum >= DCCPO_CHANGE_L && optnum <= DCCPO_CONFIRM_R;
+
+}
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+extern const char *dccp_feat_typename(const u8 type);
+extern const char *dccp_feat_name(const u8 feat);
+
+static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
+{
+ dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type),
+ dccp_feat_name(feat), feat, val);
+}
+#else
+#define dccp_feat_debug(type, feat, val)
+#endif /* CONFIG_IP_DCCP_DEBUG */
extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
u8 *val, u8 len, gfp_t gfp);
@@ -26,11 +63,4 @@ extern void dccp_feat_clean(struct dccp_minisock *dmsk);
extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
extern int dccp_feat_init(struct dccp_minisock *dmsk);
-extern int dccp_feat_default_sequence_window;
-extern int dccp_feat_default_rx_ccid;
-extern int dccp_feat_default_tx_ccid;
-extern int dccp_feat_default_ack_ratio;
-extern int dccp_feat_default_send_ack_vector;
-extern int dccp_feat_default_send_ndp_count;
-
#endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 7f9dc6ac58c9..565bc80557ce 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -1,6 +1,6 @@
/*
* net/dccp/input.c
- *
+ *
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
@@ -82,7 +82,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
* Otherwise,
* Drop packet and return
*/
- if (dh->dccph_type == DCCP_PKT_SYNC ||
+ if (dh->dccph_type == DCCP_PKT_SYNC ||
dh->dccph_type == DCCP_PKT_SYNCACK) {
if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
dp->dccps_awl, dp->dccps_awh) &&
@@ -128,21 +128,18 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
DCCP_PKT_WITHOUT_ACK_SEQ))
dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
} else {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, "
- "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
- "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
- "sending SYNC...\n",
- dccp_packet_name(dh->dccph_type),
- (unsigned long long) lswl,
- (unsigned long long)
- DCCP_SKB_CB(skb)->dccpd_seq,
- (unsigned long long) dp->dccps_swh,
- (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
+ DCCP_WARN("DCCP: Step 6 failed for %s packet, "
+ "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
+ "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
+ "sending SYNC...\n", dccp_packet_name(dh->dccph_type),
+ (unsigned long long) lswl,
+ (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq,
+ (unsigned long long) dp->dccps_swh,
+ (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists",
- (unsigned long long) lawl,
- (unsigned long long)
- DCCP_SKB_CB(skb)->dccpd_ack_seq,
- (unsigned long long) dp->dccps_awh);
+ (unsigned long long) lawl,
+ (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ (unsigned long long) dp->dccps_awh);
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
return -1;
}
@@ -188,8 +185,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
dccp_rcv_close(sk, skb);
return 0;
case DCCP_PKT_REQUEST:
- /* Step 7
- * or (S.is_server and P.type == Response)
+ /* Step 7
+ * or (S.is_server and P.type == Response)
* or (S.is_client and P.type == Request)
* or (S.state >= OPEN and P.type == Request
* and P.seqno >= S.OSR)
@@ -216,11 +213,11 @@ send_sync:
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_PKT_SYNCACK);
/*
- * From the draft:
+ * From RFC 4340, sec. 5.7
*
* As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets
* MAY have non-zero-length application data areas, whose
- * contents * receivers MUST ignore.
+ * contents receivers MUST ignore.
*/
goto discard;
}
@@ -251,8 +248,18 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
DCCP_ACKVEC_STATE_RECEIVED))
goto discard;
- ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
- ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+ /*
+ * Deliver to the CCID module in charge.
+ * FIXME: Currently DCCP operates one-directional only, i.e. a listening
+ * server is not at the same time a connecting client. There is
+ * not much sense in delivering to both rx/tx sides at the moment
+ * (only one is active at a time); when moving to bidirectional
+ * service, this needs to be revised.
+ */
+ if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER)
+ ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+ else
+ ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
return __dccp_rcv_established(sk, skb, dh, len);
discard:
@@ -267,7 +274,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
const struct dccp_hdr *dh,
const unsigned len)
{
- /*
+ /*
* Step 4: Prepare sequence numbers in REQUEST
* If S.state == REQUEST,
* If (P.type == Response or P.type == Reset)
@@ -335,7 +342,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
* from the Response * /
* S.state := PARTOPEN
* Set PARTOPEN timer
- * Continue with S.state == PARTOPEN
+ * Continue with S.state == PARTOPEN
* / * Step 12 will send the Ack completing the
* three-way handshake * /
*/
@@ -366,7 +373,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
*/
__kfree_skb(skb);
return 0;
- }
+ }
dccp_send_ack(sk);
return -1;
}
@@ -374,7 +381,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
out_invalid_packet:
/* dccp_v4_do_rcv will send a reset */
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
- return 1;
+ return 1;
}
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -431,29 +438,25 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/*
* Step 3: Process LISTEN state
- * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv)
*
* If S.state == LISTEN,
- * If P.type == Request or P contains a valid Init Cookie
- * option,
- * * Must scan the packet's options to check for an Init
- * Cookie. Only the Init Cookie is processed here,
- * however; other options are processed in Step 8. This
- * scan need only be performed if the endpoint uses Init
- * Cookies *
- * * Generate a new socket and switch to that socket *
- * Set S := new socket for this port pair
- * S.state = RESPOND
- * Choose S.ISS (initial seqno) or set from Init Cookie
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
- * Continue with S.state == RESPOND
- * * A Response packet will be generated in Step 11 *
- * Otherwise,
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
- *
- * NOTE: the check for the packet types is done in
- * dccp_rcv_state_process
+ * If P.type == Request or P contains a valid Init Cookie option,
+ * (* Must scan the packet's options to check for Init
+ * Cookies. Only Init Cookies are processed here,
+ * however; other options are processed in Step 8. This
+ * scan need only be performed if the endpoint uses Init
+ * Cookies *)
+ * (* Generate a new socket and switch to that socket *)
+ * Set S := new socket for this port pair
+ * S.state = RESPOND
+ * Choose S.ISS (initial seqno) or set from Init Cookies
+ * Initialize S.GAR := S.ISS
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
+ * Cookies Continue with S.state == RESPOND
+ * (* A Response packet will be generated in Step 11 *)
+ * Otherwise,
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
*/
if (sk->sk_state == DCCP_LISTEN) {
if (dh->dccph_type == DCCP_PKT_REQUEST) {
@@ -485,14 +488,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
+ if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
- DCCP_SKB_CB(skb)->dccpd_seq,
- DCCP_ACKVEC_STATE_RECEIVED))
- goto discard;
+ DCCP_SKB_CB(skb)->dccpd_seq,
+ DCCP_ACKVEC_STATE_RECEIVED))
+ goto discard;
- ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
- ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+ /* XXX see the comments in dccp_rcv_established about this */
+ if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER)
+ ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+ else
+ ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
}
/*
@@ -574,7 +580,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
}
}
- if (!queued) {
+ if (!queued) {
discard:
__kfree_skb(skb);
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7e746c4c1688..90c74b4adb73 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -113,13 +113,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* OK, now commit destination to socket. */
sk_setup_caps(sk, &rt->u.dst);
- dp->dccps_gar =
- dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
- inet->daddr,
- inet->sport,
- usin->sin_port);
- dccp_update_gss(sk, dp->dccps_iss);
-
+ dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr,
+ inet->sport, inet->dport);
inet->id = dp->dccps_iss ^ jiffies;
err = dccp_connect(sk);
@@ -162,7 +157,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
/* We don't check in the destentry if pmtu discovery is forbidden
* on this route. We just assume that no packet_to_big packets
* are send back when pmtu discovery is not active.
- * There is a small race when the user changes this flag in the
+ * There is a small race when the user changes this flag in the
* route, but I think that's acceptable.
*/
if ((dst = __sk_dst_check(sk, 0)) == NULL)
@@ -183,7 +178,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
dccp_sync_mss(sk, mtu);
/*
- * From: draft-ietf-dccp-spec-11.txt
+ * From RFC 4340, sec. 14.1:
*
* DCCP-Sync packets are the best choice for upward
* probing, since DCCP-Sync probes do not risk application
@@ -193,86 +188,6 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
} /* else let the usual retransmit timer handle it */
}
-static void dccp_v4_reqsk_send_ack(struct sk_buff *rxskb,
- struct request_sock *req)
-{
- int err;
- struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
- const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
- sizeof(struct dccp_hdr_ext) +
- sizeof(struct dccp_hdr_ack_bits);
- struct sk_buff *skb;
-
- if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
- return;
-
- skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC);
- if (skb == NULL)
- return;
-
- /* Reserve space for headers. */
- skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header);
-
- skb->dst = dst_clone(rxskb->dst);
-
- skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_hdr_ack_len);
-
- /* Build DCCP header and checksum it. */
- dh->dccph_type = DCCP_PKT_ACK;
- dh->dccph_sport = rxdh->dccph_dport;
- dh->dccph_dport = rxdh->dccph_sport;
- dh->dccph_doff = dccp_hdr_ack_len / 4;
- dh->dccph_x = 1;
-
- dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
- DCCP_SKB_CB(rxskb)->dccpd_seq);
-
- bh_lock_sock(dccp_v4_ctl_socket->sk);
- err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
- rxskb->nh.iph->daddr,
- rxskb->nh.iph->saddr, NULL);
- bh_unlock_sock(dccp_v4_ctl_socket->sk);
-
- if (err == NET_XMIT_CN || err == 0) {
- DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
- DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
- }
-}
-
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
- struct dst_entry *dst)
-{
- int err = -1;
- struct sk_buff *skb;
-
- /* First, grab a route. */
-
- if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
- goto out;
-
- skb = dccp_make_response(sk, dst, req);
- if (skb != NULL) {
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct dccp_hdr *dh = dccp_hdr(skb);
-
- dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr,
- ireq->rmt_addr);
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
- ireq->rmt_addr,
- ireq->opt);
- if (err == NET_XMIT_CN)
- err = 0;
- }
-
-out:
- dst_release(dst);
- return err;
-}
-
/*
* This routine is called by the ICMP module when it gets some sort of error
* condition. If err < 0 then the socket should be closed and the error
@@ -329,7 +244,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
seq = dccp_hdr_seq(skb);
if (sk->sk_state != DCCP_LISTEN &&
!between48(seq, dp->dccps_swl, dp->dccps_swh)) {
- NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -429,19 +344,24 @@ out:
sock_put(sk);
}
-/* This routine computes an IPv4 DCCP checksum. */
-void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
+ __be32 src, __be32 dst)
+{
+ return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum);
+}
+
+void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb)
{
const struct inet_sock *inet = inet_sk(sk);
struct dccp_hdr *dh = dccp_hdr(skb);
- dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr);
+ dccp_csum_outgoing(skb);
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr);
}
EXPORT_SYMBOL_GPL(dccp_v4_send_check);
-static inline u64 dccp_v4_init_sequence(const struct sock *sk,
- const struct sk_buff *skb)
+static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
{
return secure_dccp_sequence_number(skb->nh.iph->daddr,
skb->nh.iph->saddr,
@@ -449,93 +369,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
dccp_hdr(skb)->dccph_sport);
}
-int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
-{
- struct inet_request_sock *ireq;
- struct dccp_sock dp;
- struct request_sock *req;
- struct dccp_request_sock *dreq;
- const __be32 saddr = skb->nh.iph->saddr;
- const __be32 daddr = skb->nh.iph->daddr;
- const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
-
- /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
- if (((struct rtable *)skb->dst)->rt_flags &
- (RTCF_BROADCAST | RTCF_MULTICAST)) {
- reset_code = DCCP_RESET_CODE_NO_CONNECTION;
- goto drop;
- }
-
- if (dccp_bad_service_code(sk, service)) {
- reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
- goto drop;
- }
- /*
- * TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- if (inet_csk_reqsk_queue_is_full(sk))
- goto drop;
-
- /*
- * Accept backlog is full. If we have already queued enough
- * of warm entries in syn queue, drop request. It is better than
- * clogging syn queue with openreqs with exponentially increasing
- * timeout.
- */
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
- goto drop;
-
- req = reqsk_alloc(sk->sk_prot->rsk_prot);
- if (req == NULL)
- goto drop;
-
- if (dccp_parse_options(sk, skb))
- goto drop_and_free;
-
- dccp_openreq_init(req, &dp, skb);
-
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
-
- ireq = inet_rsk(req);
- ireq->loc_addr = daddr;
- ireq->rmt_addr = saddr;
- req->rcv_wnd = dccp_feat_default_sequence_window;
- ireq->opt = NULL;
-
- /*
- * Step 3: Process LISTEN state
- *
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
- *
- * In fact we defer setting S.GSR, S.SWL, S.SWH to
- * dccp_create_openreq_child.
- */
- dreq = dccp_rsk(req);
- dreq->dreq_isr = dcb->dccpd_seq;
- dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
- dreq->dreq_service = service;
-
- if (dccp_v4_send_response(sk, req, NULL))
- goto drop_and_free;
-
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- return 0;
-
-drop_and_free:
- reqsk_free(req);
-drop:
- DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
- dcb->dccpd_reset_code = reset_code;
- return -1;
-}
-
-EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
-
/*
* The three way handshake has completed - we got a valid ACK or DATAACK -
* now create the new socket.
@@ -621,47 +454,6 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk;
}
-int dccp_v4_checksum(const struct sk_buff *skb, const __be32 saddr,
- const __be32 daddr)
-{
- const struct dccp_hdr* dh = dccp_hdr(skb);
- int checksum_len;
- u32 tmp;
-
- if (dh->dccph_cscov == 0)
- checksum_len = skb->len;
- else {
- checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
- checksum_len = checksum_len < skb->len ? checksum_len :
- skb->len;
- }
-
- tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
- return csum_tcpudp_magic(saddr, daddr, checksum_len,
- IPPROTO_DCCP, tmp);
-}
-
-EXPORT_SYMBOL_GPL(dccp_v4_checksum);
-
-static int dccp_v4_verify_checksum(struct sk_buff *skb,
- const __be32 saddr, const __be32 daddr)
-{
- struct dccp_hdr *dh = dccp_hdr(skb);
- int checksum_len;
- u32 tmp;
-
- if (dh->dccph_cscov == 0)
- checksum_len = skb->len;
- else {
- checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
- checksum_len = checksum_len < skb->len ? checksum_len :
- skb->len;
- }
- tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
- return csum_tcpudp_magic(saddr, daddr, checksum_len,
- IPPROTO_DCCP, tmp) == 0 ? 0 : -1;
-}
-
static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
struct sk_buff *skb)
{
@@ -675,7 +467,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
.uli_u = { .ports =
{ .sport = dccp_hdr(skb)->dccph_dport,
.dport = dccp_hdr(skb)->dccph_sport }
- }
+ }
};
security_skb_classify_flow(skb, &fl);
@@ -687,7 +479,37 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
return &rt->u.dst;
}
-static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
+ struct dst_entry *dst)
+{
+ int err = -1;
+ struct sk_buff *skb;
+
+ /* First, grab a route. */
+
+ if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+ goto out;
+
+ skb = dccp_make_response(sk, dst, req);
+ if (skb != NULL) {
+ const struct inet_request_sock *ireq = inet_rsk(req);
+ struct dccp_hdr *dh = dccp_hdr(skb);
+
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr,
+ ireq->rmt_addr);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
+ ireq->rmt_addr,
+ ireq->opt);
+ err = net_xmit_eval(err);
+ }
+
+out:
+ dst_release(dst);
+ return err;
+}
+
+static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
int err;
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
@@ -696,7 +518,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
sizeof(struct dccp_hdr_reset);
struct sk_buff *skb;
struct dst_entry *dst;
- u64 seqno;
+ u64 seqno = 0;
/* Never send a reset in response to a reset. */
if (rxdh->dccph_type == DCCP_PKT_RESET)
@@ -718,9 +540,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
- skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_hdr_reset_len);
+ dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
/* Build DCCP header and checksum it. */
dh->dccph_type = DCCP_PKT_RESET;
@@ -731,17 +551,16 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
dccp_hdr_reset(skb)->dccph_reset_code =
DCCP_SKB_CB(rxskb)->dccpd_reset_code;
- /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
- seqno = 0;
+ /* See "8.3.1. Abnormal Termination" in RFC 4340 */
if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
dccp_hdr_set_seq(dh, seqno);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
- DCCP_SKB_CB(rxskb)->dccpd_seq);
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
- dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr,
- rxskb->nh.iph->daddr);
+ dccp_csum_outgoing(skb);
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr,
+ rxskb->nh.iph->daddr);
bh_lock_sock(dccp_v4_ctl_socket->sk);
err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
@@ -749,7 +568,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
rxskb->nh.iph->saddr, NULL);
bh_unlock_sock(dccp_v4_ctl_socket->sk);
- if (err == NET_XMIT_CN || err == 0) {
+ if (net_xmit_eval(err) == 0) {
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
}
@@ -757,6 +576,103 @@ out:
dst_release(dst);
}
+static void dccp_v4_reqsk_destructor(struct request_sock *req)
+{
+ kfree(inet_rsk(req)->opt);
+}
+
+static struct request_sock_ops dccp_request_sock_ops __read_mostly = {
+ .family = PF_INET,
+ .obj_size = sizeof(struct dccp_request_sock),
+ .rtx_syn_ack = dccp_v4_send_response,
+ .send_ack = dccp_reqsk_send_ack,
+ .destructor = dccp_v4_reqsk_destructor,
+ .send_reset = dccp_v4_ctl_send_reset,
+};
+
+int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+ struct inet_request_sock *ireq;
+ struct request_sock *req;
+ struct dccp_request_sock *dreq;
+ const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
+ struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+ __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
+
+ /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
+ if (((struct rtable *)skb->dst)->rt_flags &
+ (RTCF_BROADCAST | RTCF_MULTICAST)) {
+ reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+ goto drop;
+ }
+
+ if (dccp_bad_service_code(sk, service)) {
+ reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+ goto drop;
+ }
+ /*
+ * TW buckets are converted to open requests without
+ * limitations, they conserve resources and peer is
+ * evidently real one.
+ */
+ if (inet_csk_reqsk_queue_is_full(sk))
+ goto drop;
+
+ /*
+ * Accept backlog is full. If we have already queued enough
+ * of warm entries in syn queue, drop request. It is better than
+ * clogging syn queue with openreqs with exponentially increasing
+ * timeout.
+ */
+ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ goto drop;
+
+ req = reqsk_alloc(&dccp_request_sock_ops);
+ if (req == NULL)
+ goto drop;
+
+ if (dccp_parse_options(sk, skb))
+ goto drop_and_free;
+
+ dccp_reqsk_init(req, skb);
+
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
+ ireq = inet_rsk(req);
+ ireq->loc_addr = skb->nh.iph->daddr;
+ ireq->rmt_addr = skb->nh.iph->saddr;
+ ireq->opt = NULL;
+
+ /*
+ * Step 3: Process LISTEN state
+ *
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+ *
+ * In fact we defer setting S.GSR, S.SWL, S.SWH to
+ * dccp_create_openreq_child.
+ */
+ dreq = dccp_rsk(req);
+ dreq->dreq_isr = dcb->dccpd_seq;
+ dreq->dreq_iss = dccp_v4_init_sequence(skb);
+ dreq->dreq_service = service;
+
+ if (dccp_v4_send_response(sk, req, NULL))
+ goto drop_and_free;
+
+ inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+ return 0;
+
+drop_and_free:
+ reqsk_free(req);
+drop:
+ DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+ dcb->dccpd_reset_code = reset_code;
+ return -1;
+}
+
+EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
+
int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_hdr *dh = dccp_hdr(skb);
@@ -769,24 +685,23 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
/*
* Step 3: Process LISTEN state
- * If S.state == LISTEN,
- * If P.type == Request or P contains a valid Init Cookie
- * option,
- * * Must scan the packet's options to check for an Init
- * Cookie. Only the Init Cookie is processed here,
- * however; other options are processed in Step 8. This
- * scan need only be performed if the endpoint uses Init
- * Cookies *
- * * Generate a new socket and switch to that socket *
- * Set S := new socket for this port pair
- * S.state = RESPOND
- * Choose S.ISS (initial seqno) or set from Init Cookie
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
- * Continue with S.state == RESPOND
- * * A Response packet will be generated in Step 11 *
- * Otherwise,
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
+ * If P.type == Request or P contains a valid Init Cookie option,
+ * (* Must scan the packet's options to check for Init
+ * Cookies. Only Init Cookies are processed here,
+ * however; other options are processed in Step 8. This
+ * scan need only be performed if the endpoint uses Init
+ * Cookies *)
+ * (* Generate a new socket and switch to that socket *)
+ * Set S := new socket for this port pair
+ * S.state = RESPOND
+ * Choose S.ISS (initial seqno) or set from Init Cookies
+ * Initialize S.GAR := S.ISS
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
+ * Continue with S.state == RESPOND
+ * (* A Response packet will be generated in Step 11 *)
+ * Otherwise,
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
*
* NOTE: the check for the packet types is done in
* dccp_rcv_state_process
@@ -809,7 +724,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v4_ctl_send_reset(skb);
+ dccp_v4_ctl_send_reset(sk, skb);
discard:
kfree_skb(skb);
return 0;
@@ -817,60 +732,74 @@ discard:
EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
+/**
+ * dccp_invalid_packet - check for malformed packets
+ * Implements RFC 4340, 8.5: Step 1: Check header basics
+ * Packets that fail these checks are ignored and do not receive Resets.
+ */
int dccp_invalid_packet(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
+ unsigned int cscov;
if (skb->pkt_type != PACKET_HOST)
return 1;
+ /* If the packet is shorter than 12 bytes, drop packet and return */
if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n");
+ DCCP_WARN("pskb_may_pull failed\n");
return 1;
}
dh = dccp_hdr(skb);
- /* If the packet type is not understood, drop packet and return */
+ /* If P.type is not understood, drop packet and return */
if (dh->dccph_type >= DCCP_PKT_INVALID) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n");
+ DCCP_WARN("invalid packet type\n");
return 1;
}
/*
- * If P.Data Offset is too small for packet type, or too large for
- * packet, drop packet and return
+ * If P.Data Offset is too small for packet type, drop packet and return
*/
if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
- "too small 1\n",
- dh->dccph_doff);
+ DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff);
return 1;
}
-
+ /*
+ * If P.Data Offset is too too large for packet, drop packet and return
+ */
if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
- "too small 2\n",
- dh->dccph_doff);
+ DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff);
return 1;
}
- dh = dccp_hdr(skb);
-
/*
* If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
* has short sequence numbers), drop packet and return
*/
- if (dh->dccph_x == 0 &&
- dh->dccph_type != DCCP_PKT_DATA &&
- dh->dccph_type != DCCP_PKT_ACK &&
- dh->dccph_type != DCCP_PKT_DATAACK) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack "
- "nor DataAck and P.X == 0\n",
- dccp_packet_name(dh->dccph_type));
+ if (dh->dccph_type >= DCCP_PKT_DATA &&
+ dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) {
+ DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n",
+ dccp_packet_name(dh->dccph_type));
return 1;
}
+ /*
+ * If P.CsCov is too large for the packet size, drop packet and return.
+ * This must come _before_ checksumming (not as RFC 4340 suggests).
+ */
+ cscov = dccp_csum_coverage(skb);
+ if (cscov > skb->len) {
+ DCCP_WARN("P.CsCov %u exceeds packet length %d\n",
+ dh->dccph_cscov, skb->len);
+ return 1;
+ }
+
+ /* If header checksum is incorrect, drop packet and return.
+ * (This step is completed in the AF-dependent functions.) */
+ skb->csum = skb_checksum(skb, 0, cscov, 0);
+
return 0;
}
@@ -881,17 +810,16 @@ static int dccp_v4_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
struct sock *sk;
+ int min_cov;
- /* Step 1: Check header basics: */
+ /* Step 1: Check header basics */
if (dccp_invalid_packet(skb))
goto discard_it;
- /* If the header checksum is incorrect, drop packet and return */
- if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
- skb->nh.iph->daddr) < 0) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n",
- __FUNCTION__);
+ /* Step 1: If header checksum is incorrect, drop packet and return */
+ if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) {
+ DCCP_WARN("dropped packet with invalid checksum\n");
goto discard_it;
}
@@ -913,23 +841,20 @@ static int dccp_v4_rcv(struct sk_buff *skb)
dccp_pr_debug_cat("\n");
} else {
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
- dccp_pr_debug_cat(", ack=%llu\n",
- (unsigned long long)
+ dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
/* Step 2:
- * Look up flow ID in table and get corresponding socket */
+ * Look up flow ID in table and get corresponding socket */
sk = __inet_lookup(&dccp_hashinfo,
skb->nh.iph->saddr, dh->dccph_sport,
skb->nh.iph->daddr, dh->dccph_dport,
inet_iif(skb));
- /*
+ /*
* Step 2:
- * If no socket ...
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
+ * If no socket ...
*/
if (sk == NULL) {
dccp_pr_debug("failed to look up flow ID in table and "
@@ -937,51 +862,61 @@ static int dccp_v4_rcv(struct sk_buff *skb)
goto no_dccp_socket;
}
- /*
+ /*
* Step 2:
- * ... or S.state == TIMEWAIT,
+ * ... or S.state == TIMEWAIT,
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
-
if (sk->sk_state == DCCP_TIME_WAIT) {
- dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: "
- "do_time_wait\n");
- goto do_time_wait;
+ dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
+ inet_twsk_put(inet_twsk(sk));
+ goto no_dccp_socket;
+ }
+
+ /*
+ * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
+ * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
+ * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
+ */
+ min_cov = dccp_sk(sk)->dccps_pcrlen;
+ if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) {
+ dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
+ dh->dccph_cscov, min_cov);
+ /* FIXME: "Such packets SHOULD be reported using Data Dropped
+ * options (Section 11.7) with Drop Code 0, Protocol
+ * Constraints." */
+ goto discard_and_relse;
}
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
nf_reset(skb);
- return sk_receive_skb(sk, skb);
+ return sk_receive_skb(sk, skb, 1);
no_dccp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
/*
* Step 2:
+ * If no socket ...
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v4_ctl_send_reset(skb);
+ dccp_v4_ctl_send_reset(sk, skb);
}
discard_it:
- /* Discard frame. */
kfree_skb(skb);
return 0;
discard_and_relse:
sock_put(sk);
goto discard_it;
-
-do_time_wait:
- inet_twsk_put(inet_twsk(sk));
- goto no_dccp_socket;
}
static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
@@ -1015,20 +950,6 @@ static int dccp_v4_init_sock(struct sock *sk)
return err;
}
-static void dccp_v4_reqsk_destructor(struct request_sock *req)
-{
- kfree(inet_rsk(req)->opt);
-}
-
-static struct request_sock_ops dccp_request_sock_ops = {
- .family = PF_INET,
- .obj_size = sizeof(struct dccp_request_sock),
- .rtx_syn_ack = dccp_v4_send_response,
- .send_ack = dccp_v4_reqsk_send_ack,
- .destructor = dccp_v4_reqsk_destructor,
- .send_reset = dccp_v4_ctl_send_reset,
-};
-
static struct timewait_sock_ops dccp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct inet_timewait_sock),
};
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7171a78671aa..6b91a9dd0411 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -36,13 +36,6 @@
/* Socket used for sending RSTs and ACKs */
static struct socket *dccp_v6_ctl_socket;
-static void dccp_v6_ctl_send_reset(struct sk_buff *skb);
-static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
- struct request_sock *req);
-static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb);
-
-static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-
static struct inet_connection_sock_af_ops dccp_ipv6_mapped;
static struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
@@ -65,205 +58,37 @@ static void dccp_v6_hash(struct sock *sk)
}
}
-static inline u16 dccp_v6_check(struct dccp_hdr *dh, int len,
- struct in6_addr *saddr,
- struct in6_addr *daddr,
- unsigned long base)
+/* add pseudo-header to DCCP checksum stored in skb->csum */
+static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
+ struct in6_addr *saddr,
+ struct in6_addr *daddr)
{
- return csum_ipv6_magic(saddr, daddr, len, IPPROTO_DCCP, base);
+ return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
}
-static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
+static inline void dccp_v6_send_check(struct sock *sk, int unused_value,
+ struct sk_buff *skb)
{
- const struct dccp_hdr *dh = dccp_hdr(skb);
-
- if (skb->protocol == htons(ETH_P_IPV6))
- return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
- skb->nh.ipv6h->saddr.s6_addr32,
- dh->dccph_dport,
- dh->dccph_sport);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct dccp_hdr *dh = dccp_hdr(skb);
- return secure_dccp_sequence_number(skb->nh.iph->daddr,
- skb->nh.iph->saddr,
- dh->dccph_dport,
- dh->dccph_sport);
+ dccp_csum_outgoing(skb);
+ dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
}
-static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
- int addr_len)
+static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+ __be16 sport, __be16 dport )
{
- struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- struct in6_addr *saddr = NULL, *final_p = NULL, final;
- struct flowi fl;
- struct dst_entry *dst;
- int addr_type;
- int err;
-
- dp->dccps_role = DCCP_ROLE_CLIENT;
-
- if (addr_len < SIN6_LEN_RFC2133)
- return -EINVAL;
-
- if (usin->sin6_family != AF_INET6)
- return -EAFNOSUPPORT;
-
- memset(&fl, 0, sizeof(fl));
-
- if (np->sndflow) {
- fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
- IP6_ECN_flow_init(fl.fl6_flowlabel);
- if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) {
- struct ip6_flowlabel *flowlabel;
- flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
- if (flowlabel == NULL)
- return -EINVAL;
- ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
- fl6_sock_release(flowlabel);
- }
- }
- /*
- * connect() to INADDR_ANY means loopback (BSD'ism).
- */
- if (ipv6_addr_any(&usin->sin6_addr))
- usin->sin6_addr.s6_addr[15] = 1;
-
- addr_type = ipv6_addr_type(&usin->sin6_addr);
-
- if (addr_type & IPV6_ADDR_MULTICAST)
- return -ENETUNREACH;
-
- if (addr_type & IPV6_ADDR_LINKLOCAL) {
- if (addr_len >= sizeof(struct sockaddr_in6) &&
- usin->sin6_scope_id) {
- /* If interface is set while binding, indices
- * must coincide.
- */
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != usin->sin6_scope_id)
- return -EINVAL;
-
- sk->sk_bound_dev_if = usin->sin6_scope_id;
- }
-
- /* Connect to link-local address requires an interface */
- if (!sk->sk_bound_dev_if)
- return -EINVAL;
- }
-
- ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
- np->flow_label = fl.fl6_flowlabel;
-
- /*
- * DCCP over IPv4
- */
- if (addr_type == IPV6_ADDR_MAPPED) {
- u32 exthdrlen = icsk->icsk_ext_hdr_len;
- struct sockaddr_in sin;
-
- SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
-
- if (__ipv6_only_sock(sk))
- return -ENETUNREACH;
-
- sin.sin_family = AF_INET;
- sin.sin_port = usin->sin6_port;
- sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
-
- icsk->icsk_af_ops = &dccp_ipv6_mapped;
- sk->sk_backlog_rcv = dccp_v4_do_rcv;
-
- err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
- if (err) {
- icsk->icsk_ext_hdr_len = exthdrlen;
- icsk->icsk_af_ops = &dccp_ipv6_af_ops;
- sk->sk_backlog_rcv = dccp_v6_do_rcv;
- goto failure;
- } else {
- ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
- inet->saddr);
- ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
- inet->rcv_saddr);
- }
-
- return err;
- }
-
- if (!ipv6_addr_any(&np->rcv_saddr))
- saddr = &np->rcv_saddr;
-
- fl.proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr);
- fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_dport = usin->sin6_port;
- fl.fl_ip_sport = inet->sport;
- security_sk_classify_flow(sk, &fl);
-
- if (np->opt != NULL && np->opt->srcrt != NULL) {
- const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-
- ipv6_addr_copy(&final, &fl.fl6_dst);
- ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
- final_p = &final;
- }
-
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
- goto failure;
-
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- err = xfrm_lookup(&dst, &fl, sk, 0);
- if (err < 0)
- goto failure;
-
- if (saddr == NULL) {
- saddr = &fl.fl6_src;
- ipv6_addr_copy(&np->rcv_saddr, saddr);
- }
-
- /* set the source address */
- ipv6_addr_copy(&np->saddr, saddr);
- inet->rcv_saddr = LOOPBACK4_IPV6;
-
- __ip6_dst_store(sk, dst, NULL, NULL);
-
- icsk->icsk_ext_hdr_len = 0;
- if (np->opt != NULL)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
-
- inet->dport = usin->sin6_port;
-
- dccp_set_state(sk, DCCP_REQUESTING);
- err = inet6_hash_connect(&dccp_death_row, sk);
- if (err)
- goto late_failure;
- /* FIXME */
-#if 0
- dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32,
- np->daddr.s6_addr32,
- inet->sport,
- inet->dport);
-#endif
- err = dccp_connect(sk);
- if (err)
- goto late_failure;
+ return secure_tcpv6_sequence_number(saddr, daddr, sport, dport);
+}
- return 0;
+static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
+{
+ return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
+ skb->nh.ipv6h->saddr.s6_addr32,
+ dccp_hdr(skb)->dccph_dport,
+ dccp_hdr(skb)->dccph_sport );
-late_failure:
- dccp_set_state(sk, DCCP_CLOSED);
- __sk_dst_reset(sk);
-failure:
- inet->dport = 0;
- sk->sk_route_caps = 0;
- return err;
}
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -277,7 +102,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u64 seq;
sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport,
- &hdr->saddr, dh->dccph_sport, skb->dev->ifindex);
+ &hdr->saddr, dh->dccph_sport, inet6_iif(skb));
if (sk == NULL) {
ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
@@ -464,16 +289,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
if (skb != NULL) {
struct dccp_hdr *dh = dccp_hdr(skb);
- dh->dccph_checksum = dccp_v6_check(dh, skb->len,
- &ireq6->loc_addr,
- &ireq6->rmt_addr,
- csum_partial((char *)dh,
- skb->len,
- skb->csum));
+ dh->dccph_checksum = dccp_v6_csum_finish(skb,
+ &ireq6->loc_addr,
+ &ireq6->rmt_addr);
ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
err = ip6_xmit(sk, skb, &fl, opt, 0);
- if (err == NET_XMIT_CN)
- err = 0;
+ err = net_xmit_eval(err);
}
done:
@@ -489,32 +310,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet6_rsk(req)->pktopts);
}
-static struct request_sock_ops dccp6_request_sock_ops = {
- .family = AF_INET6,
- .obj_size = sizeof(struct dccp6_request_sock),
- .rtx_syn_ack = dccp_v6_send_response,
- .send_ack = dccp_v6_reqsk_send_ack,
- .destructor = dccp_v6_reqsk_destructor,
- .send_reset = dccp_v6_ctl_send_reset,
-};
-
-static struct timewait_sock_ops dccp6_timewait_sock_ops = {
- .twsk_obj_size = sizeof(struct dccp6_timewait_sock),
-};
-
-static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct dccp_hdr *dh = dccp_hdr(skb);
-
- dh->dccph_checksum = csum_ipv6_magic(&np->saddr, &np->daddr,
- len, IPPROTO_DCCP,
- csum_partial((char *)dh,
- dh->dccph_doff << 2,
- skb->csum));
-}
-
-static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
+static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
@@ -522,7 +318,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
sizeof(struct dccp_hdr_reset);
struct sk_buff *skb;
struct flowi fl;
- u64 seqno;
+ u64 seqno = 0;
if (rxdh->dccph_type == DCCP_PKT_RESET)
return;
@@ -533,13 +329,11 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header,
GFP_ATOMIC);
if (skb == NULL)
- return;
+ return;
skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header);
- skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_hdr_reset_len);
+ dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
/* Swap the send and the receive. */
dh->dccph_type = DCCP_PKT_RESET;
@@ -550,21 +344,21 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
dccp_hdr_reset(skb)->dccph_reset_code =
DCCP_SKB_CB(rxskb)->dccpd_reset_code;
- /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
- seqno = 0;
+ /* See "8.3.1. Abnormal Termination" in RFC 4340 */
if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
dccp_hdr_set_seq(dh, seqno);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
- DCCP_SKB_CB(rxskb)->dccpd_seq);
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+ dccp_csum_outgoing(skb);
+ dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr,
+ &rxskb->nh.ipv6h->daddr);
memset(&fl, 0, sizeof(fl));
ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
- dh->dccph_checksum = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
- sizeof(*dh), IPPROTO_DCCP,
- skb->csum);
+
fl.proto = IPPROTO_DCCP;
fl.oif = inet6_iif(rxskb);
fl.fl_ip_dport = dh->dccph_dport;
@@ -584,60 +378,14 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
kfree_skb(skb);
}
-static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb,
- struct request_sock *req)
-{
- struct flowi fl;
- struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
- const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
- sizeof(struct dccp_hdr_ext) +
- sizeof(struct dccp_hdr_ack_bits);
- struct sk_buff *skb;
-
- skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header,
- GFP_ATOMIC);
- if (skb == NULL)
- return;
-
- skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header);
-
- skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_hdr_ack_len);
-
- /* Build DCCP header and checksum it. */
- dh->dccph_type = DCCP_PKT_ACK;
- dh->dccph_sport = rxdh->dccph_dport;
- dh->dccph_dport = rxdh->dccph_sport;
- dh->dccph_doff = dccp_hdr_ack_len / 4;
- dh->dccph_x = 1;
-
- dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
- DCCP_SKB_CB(rxskb)->dccpd_seq);
-
- memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
- ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
-
- /* FIXME: calculate checksum, IPv4 also should... */
-
- fl.proto = IPPROTO_DCCP;
- fl.oif = inet6_iif(rxskb);
- fl.fl_ip_dport = dh->dccph_dport;
- fl.fl_ip_sport = dh->dccph_sport;
- security_req_classify_flow(req, &fl);
-
- if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
- if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
- ip6_xmit(dccp_v6_ctl_socket->sk, skb, &fl, NULL, 0);
- DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
- return;
- }
- }
-
- kfree_skb(skb);
-}
+static struct request_sock_ops dccp6_request_sock_ops = {
+ .family = AF_INET6,
+ .obj_size = sizeof(struct dccp6_request_sock),
+ .rtx_syn_ack = dccp_v6_send_response,
+ .send_ack = dccp_reqsk_send_ack,
+ .destructor = dccp_v6_reqsk_destructor,
+ .send_reset = dccp_v6_ctl_send_reset,
+};
static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
{
@@ -672,13 +420,11 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct inet_request_sock *ireq;
- struct dccp_sock dp;
struct request_sock *req;
struct dccp_request_sock *dreq;
struct inet6_request_sock *ireq6;
struct ipv6_pinfo *np = inet6_sk(sk);
- const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
+ const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
@@ -691,7 +437,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_bad_service_code(sk, service)) {
reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
goto drop;
- }
+ }
/*
* There are no SYN attacks on IPv6, yet...
*/
@@ -701,22 +447,21 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet6_reqsk_alloc(sk->sk_prot->rsk_prot);
+ req = inet6_reqsk_alloc(&dccp6_request_sock_ops);
if (req == NULL)
goto drop;
- /* FIXME: process options */
+ if (dccp_parse_options(sk, skb))
+ goto drop_and_free;
- dccp_openreq_init(req, &dp, skb);
+ dccp_reqsk_init(req, skb);
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
ireq6 = inet6_rsk(req);
- ireq = inet_rsk(req);
ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
- req->rcv_wnd = dccp_feat_default_sequence_window;
ireq6->pktopts = NULL;
if (ipv6_opt_accepted(sk, skb) ||
@@ -735,14 +480,14 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
/*
* Step 3: Process LISTEN state
*
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
*
- * In fact we defer setting S.GSR, S.SWL, S.SWH to
- * dccp_create_openreq_child.
+ * In fact we defer setting S.GSR, S.SWL, S.SWH to
+ * dccp_create_openreq_child.
*/
dreq = dccp_rsk(req);
dreq->dreq_isr = dcb->dccpd_seq;
- dreq->dreq_iss = dccp_v6_init_sequence(sk, skb);
+ dreq->dreq_iss = dccp_v6_init_sequence(skb);
dreq->dreq_service = service;
if (dccp_v6_send_response(sk, req, NULL))
@@ -992,14 +737,46 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
+ /*
+ * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below
+ * (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example.
+ */
opt_skb = skb_clone(skb, GFP_ATOMIC);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
goto reset;
+ if (opt_skb) {
+ /* XXX This is where we would goto ipv6_pktoptions. */
+ __kfree_skb(opt_skb);
+ }
return 0;
}
+ /*
+ * Step 3: Process LISTEN state
+ * If S.state == LISTEN,
+ * If P.type == Request or P contains a valid Init Cookie option,
+ * (* Must scan the packet's options to check for Init
+ * Cookies. Only Init Cookies are processed here,
+ * however; other options are processed in Step 8. This
+ * scan need only be performed if the endpoint uses Init
+ * Cookies *)
+ * (* Generate a new socket and switch to that socket *)
+ * Set S := new socket for this port pair
+ * S.state = RESPOND
+ * Choose S.ISS (initial seqno) or set from Init Cookies
+ * Initialize S.GAR := S.ISS
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
+ * Continue with S.state == RESPOND
+ * (* A Response packet will be generated in Step 11 *)
+ * Otherwise,
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
+ *
+ * NOTE: the check for the packet types is done in
+ * dccp_rcv_state_process
+ */
if (sk->sk_state == DCCP_LISTEN) {
struct sock *nsk = dccp_v6_hnd_req(sk, skb);
@@ -1010,7 +787,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
* otherwise we just shortcircuit this and continue with
* the new socket..
*/
- if (nsk != sk) {
+ if (nsk != sk) {
if (dccp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb != NULL)
@@ -1021,10 +798,14 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
goto reset;
+ if (opt_skb) {
+ /* XXX This is where we would goto ipv6_pktoptions. */
+ __kfree_skb(opt_skb);
+ }
return 0;
reset:
- dccp_v6_ctl_send_reset(skb);
+ dccp_v6_ctl_send_reset(sk, skb);
discard:
if (opt_skb != NULL)
__kfree_skb(opt_skb);
@@ -1037,12 +818,20 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
const struct dccp_hdr *dh;
struct sk_buff *skb = *pskb;
struct sock *sk;
+ int min_cov;
- /* Step 1: Check header basics: */
+ /* Step 1: Check header basics */
if (dccp_invalid_packet(skb))
goto discard_it;
+ /* Step 1: If header checksum is incorrect, drop packet and return. */
+ if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr,
+ &skb->nh.ipv6h->daddr)) {
+ DCCP_WARN("dropped packet with invalid checksum\n");
+ goto discard_it;
+ }
+
dh = dccp_hdr(skb);
DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
@@ -1054,63 +843,247 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
/* Step 2:
- * Look up flow ID in table and get corresponding socket */
+ * Look up flow ID in table and get corresponding socket */
sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr,
dh->dccph_sport,
&skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
inet6_iif(skb));
/*
* Step 2:
- * If no socket ...
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
+ * If no socket ...
*/
- if (sk == NULL)
+ if (sk == NULL) {
+ dccp_pr_debug("failed to look up flow ID in table and "
+ "get corresponding socket\n");
goto no_dccp_socket;
+ }
/*
* Step 2:
- * ... or S.state == TIMEWAIT,
+ * ... or S.state == TIMEWAIT,
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
- if (sk->sk_state == DCCP_TIME_WAIT)
- goto do_time_wait;
+ if (sk->sk_state == DCCP_TIME_WAIT) {
+ dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
+ inet_twsk_put(inet_twsk(sk));
+ goto no_dccp_socket;
+ }
+
+ /*
+ * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
+ * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
+ * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
+ */
+ min_cov = dccp_sk(sk)->dccps_pcrlen;
+ if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) {
+ dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
+ dh->dccph_cscov, min_cov);
+ /* FIXME: send Data Dropped option (see also dccp_v4_rcv) */
+ goto discard_and_relse;
+ }
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- return sk_receive_skb(sk, skb) ? -1 : 0;
+ return sk_receive_skb(sk, skb, 1) ? -1 : 0;
no_dccp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
/*
* Step 2:
+ * If no socket ...
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v6_ctl_send_reset(skb);
+ dccp_v6_ctl_send_reset(sk, skb);
}
-discard_it:
-
- /*
- * Discard frame
- */
+discard_it:
kfree_skb(skb);
return 0;
discard_and_relse:
sock_put(sk);
goto discard_it;
+}
+
+static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct in6_addr *saddr = NULL, *final_p = NULL, final;
+ struct flowi fl;
+ struct dst_entry *dst;
+ int addr_type;
+ int err;
+
+ dp->dccps_role = DCCP_ROLE_CLIENT;
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ if (usin->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ memset(&fl, 0, sizeof(fl));
+
+ if (np->sndflow) {
+ fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
+ IP6_ECN_flow_init(fl.fl6_flowlabel);
+ if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) {
+ struct ip6_flowlabel *flowlabel;
+ flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ if (flowlabel == NULL)
+ return -EINVAL;
+ ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+ fl6_sock_release(flowlabel);
+ }
+ }
+ /*
+ * connect() to INADDR_ANY means loopback (BSD'ism).
+ */
+ if (ipv6_addr_any(&usin->sin6_addr))
+ usin->sin6_addr.s6_addr[15] = 1;
+
+ addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+ if (addr_type & IPV6_ADDR_MULTICAST)
+ return -ENETUNREACH;
+
+ if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ usin->sin6_scope_id) {
+ /* If interface is set while binding, indices
+ * must coincide.
+ */
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != usin->sin6_scope_id)
+ return -EINVAL;
-do_time_wait:
- inet_twsk_put(inet_twsk(sk));
- goto no_dccp_socket;
+ sk->sk_bound_dev_if = usin->sin6_scope_id;
+ }
+
+ /* Connect to link-local address requires an interface */
+ if (!sk->sk_bound_dev_if)
+ return -EINVAL;
+ }
+
+ ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+ np->flow_label = fl.fl6_flowlabel;
+
+ /*
+ * DCCP over IPv4
+ */
+ if (addr_type == IPV6_ADDR_MAPPED) {
+ u32 exthdrlen = icsk->icsk_ext_hdr_len;
+ struct sockaddr_in sin;
+
+ SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
+
+ if (__ipv6_only_sock(sk))
+ return -ENETUNREACH;
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = usin->sin6_port;
+ sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
+
+ icsk->icsk_af_ops = &dccp_ipv6_mapped;
+ sk->sk_backlog_rcv = dccp_v4_do_rcv;
+
+ err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
+ if (err) {
+ icsk->icsk_ext_hdr_len = exthdrlen;
+ icsk->icsk_af_ops = &dccp_ipv6_af_ops;
+ sk->sk_backlog_rcv = dccp_v6_do_rcv;
+ goto failure;
+ } else {
+ ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
+ inet->saddr);
+ ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
+ inet->rcv_saddr);
+ }
+
+ return err;
+ }
+
+ if (!ipv6_addr_any(&np->rcv_saddr))
+ saddr = &np->rcv_saddr;
+
+ fl.proto = IPPROTO_DCCP;
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr);
+ fl.oif = sk->sk_bound_dev_if;
+ fl.fl_ip_dport = usin->sin6_port;
+ fl.fl_ip_sport = inet->sport;
+ security_sk_classify_flow(sk, &fl);
+
+ if (np->opt != NULL && np->opt->srcrt != NULL) {
+ const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+
+ ipv6_addr_copy(&final, &fl.fl6_dst);
+ ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ final_p = &final;
+ }
+
+ err = ip6_dst_lookup(sk, &dst, &fl);
+ if (err)
+ goto failure;
+
+ if (final_p)
+ ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+ err = xfrm_lookup(&dst, &fl, sk, 0);
+ if (err < 0)
+ goto failure;
+
+ if (saddr == NULL) {
+ saddr = &fl.fl6_src;
+ ipv6_addr_copy(&np->rcv_saddr, saddr);
+ }
+
+ /* set the source address */
+ ipv6_addr_copy(&np->saddr, saddr);
+ inet->rcv_saddr = LOOPBACK4_IPV6;
+
+ __ip6_dst_store(sk, dst, NULL, NULL);
+
+ icsk->icsk_ext_hdr_len = 0;
+ if (np->opt != NULL)
+ icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+ np->opt->opt_nflen);
+
+ inet->dport = usin->sin6_port;
+
+ dccp_set_state(sk, DCCP_REQUESTING);
+ err = inet6_hash_connect(&dccp_death_row, sk);
+ if (err)
+ goto late_failure;
+
+ dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
+ np->daddr.s6_addr32,
+ inet->sport, inet->dport);
+ err = dccp_connect(sk);
+ if (err)
+ goto late_failure;
+
+ return 0;
+
+late_failure:
+ dccp_set_state(sk, DCCP_CLOSED);
+ __sk_dst_reset(sk);
+failure:
+ inet->dport = 0;
+ sk->sk_route_caps = 0;
+ return err;
}
static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
@@ -1173,6 +1146,10 @@ static int dccp_v6_destroy_sock(struct sock *sk)
return inet6_destroy_sock(sk);
}
+static struct timewait_sock_ops dccp6_timewait_sock_ops = {
+ .twsk_obj_size = sizeof(struct dccp6_timewait_sock),
+};
+
static struct proto dccp_v6_prot = {
.name = "DCCPv6",
.owner = THIS_MODULE,
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 9045438d6b36..6656bb497c7b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -11,6 +11,7 @@
*/
#include <linux/dccp.h>
+#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/timer.h>
@@ -31,8 +32,7 @@ struct inet_timewait_death_row dccp_death_row = {
.tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
(unsigned long)&dccp_death_row),
.twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
- inet_twdr_twkill_work,
- &dccp_death_row),
+ inet_twdr_twkill_work),
/* Short-time timewait calendar */
.twcal_hand = -1,
@@ -83,8 +83,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
* socket up. We've got bigger problems than
* non-graceful socket closings.
*/
- LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket "
- "table overflow\n");
+ DCCP_WARN("time wait bucket table overflow\n");
}
dccp_done(sk);
@@ -97,8 +96,8 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
/*
* Step 3: Process LISTEN state
*
- * // Generate a new socket and switch to that socket
- * Set S := new socket for this port pair
+ * (* Generate a new socket and switch to that socket *)
+ * Set S := new socket for this port pair
*/
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
@@ -147,9 +146,9 @@ out_free:
/*
* Step 3: Process LISTEN state
*
- * Choose S.ISS (initial seqno) or set from Init Cookie
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
- * Cookie
+ * Choose S.ISS (initial seqno) or set from Init Cookies
+ * Initialize S.GAR := S.ISS
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
*/
/* See dccp_v4_conn_request */
@@ -183,7 +182,7 @@ out_free:
EXPORT_SYMBOL_GPL(dccp_create_openreq_child);
-/*
+/*
* Process an incoming packet for RESPOND sockets represented
* as an request_sock.
*/
@@ -195,15 +194,17 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
- if (after48(DCCP_SKB_CB(skb)->dccpd_seq,
- dccp_rsk(req)->dreq_isr)) {
- struct dccp_request_sock *dreq = dccp_rsk(req);
+ struct dccp_request_sock *dreq = dccp_rsk(req);
+ if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) {
dccp_pr_debug("Retransmitted REQUEST\n");
- /* Send another RESPONSE packet */
- dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
- dccp_set_seqno(&dreq->dreq_isr,
- DCCP_SKB_CB(skb)->dccpd_seq);
+ dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+ /*
+ * Send another RESPONSE packet
+ * To protect against Request floods, increment retrans
+ * counter (backoff, monitored by dccp_response_timer).
+ */
+ req->retrans++;
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
}
/* Network Duplicate, discard packet */
@@ -243,7 +244,7 @@ listen_overflow:
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
- req->rsk_ops->send_reset(skb);
+ req->rsk_ops->send_reset(sk, skb);
inet_csk_reqsk_queue_drop(sk, req, prev);
goto out;
@@ -283,3 +284,19 @@ int dccp_child_process(struct sock *parent, struct sock *child,
}
EXPORT_SYMBOL_GPL(dccp_child_process);
+
+void dccp_reqsk_send_ack(struct sk_buff *skb, struct request_sock *rsk)
+{
+ DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state");
+}
+
+EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
+
+void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
+{
+ inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
+ inet_rsk(req)->acked = 0;
+ req->rcv_wnd = sysctl_dccp_feat_sequence_window;
+}
+
+EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 07a34696ac97..c03ba61eb6da 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -22,23 +22,23 @@
#include "dccp.h"
#include "feat.h"
-int dccp_feat_default_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
-int dccp_feat_default_rx_ccid = DCCPF_INITIAL_CCID;
-int dccp_feat_default_tx_ccid = DCCPF_INITIAL_CCID;
-int dccp_feat_default_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
-int dccp_feat_default_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
-int dccp_feat_default_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
+int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
+int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID;
+int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID;
+int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
+int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
+int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
-EXPORT_SYMBOL_GPL(dccp_feat_default_sequence_window);
+EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window);
void dccp_minisock_init(struct dccp_minisock *dmsk)
{
- dmsk->dccpms_sequence_window = dccp_feat_default_sequence_window;
- dmsk->dccpms_rx_ccid = dccp_feat_default_rx_ccid;
- dmsk->dccpms_tx_ccid = dccp_feat_default_tx_ccid;
- dmsk->dccpms_ack_ratio = dccp_feat_default_ack_ratio;
- dmsk->dccpms_send_ack_vector = dccp_feat_default_send_ack_vector;
- dmsk->dccpms_send_ndp_count = dccp_feat_default_send_ndp_count;
+ dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
+ dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid;
+ dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid;
+ dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio;
+ dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
+ dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count;
}
static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
@@ -60,12 +60,9 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
-#ifdef CONFIG_IP_DCCP_DEBUG
- const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT rx opt: " : "server rx opt: ";
-#endif
const struct dccp_hdr *dh = dccp_hdr(skb);
const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
+ u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr = options;
const unsigned char *opt_end = (unsigned char *)dh +
@@ -119,7 +116,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
goto out_invalid_option;
opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
- dccp_pr_debug("%sNDP count=%d\n", debug_prefix,
+ dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk),
opt_recv->dccpor_ndp);
break;
case DCCPO_CHANGE_L:
@@ -153,7 +150,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
break;
if (dccp_msk(sk)->dccpms_send_ack_vector &&
- dccp_ackvec_parse(sk, skb, opt, value, len))
+ dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
goto out_invalid_option;
break;
case DCCPO_TIMESTAMP:
@@ -165,8 +162,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
dccp_timestamp(sk, &dp->dccps_timestamp_time);
- dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
- debug_prefix, opt_recv->dccpor_timestamp,
+ dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n",
+ dccp_role(sk), opt_recv->dccpor_timestamp,
(unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
break;
@@ -176,8 +173,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
- dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ",
- debug_prefix,
+ dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
+ "ackno=%llu, ", dccp_role(sk),
opt_recv->dccpor_timestamp_echo,
len + 2,
(unsigned long long)
@@ -211,11 +208,11 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
if (elapsed_time > opt_recv->dccpor_elapsed_time)
opt_recv->dccpor_elapsed_time = elapsed_time;
- dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
- elapsed_time);
+ dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
+ dccp_role(sk), elapsed_time);
break;
/*
- * From draft-ietf-dccp-spec-11.txt:
+ * From RFC 4340, sec. 10.3:
*
* Option numbers 128 through 191 are for
* options sent from the HC-Sender to the
@@ -242,9 +239,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
}
break;
default:
- pr_info("DCCP(%p): option %d(len=%d) not "
- "implemented, ignoring\n",
- sk, opt, len);
+ DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
+ "implemented, ignoring", sk, opt, len);
break;
}
@@ -261,7 +257,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
out_invalid_option:
DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
- pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
+ DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
return -1;
}
@@ -451,8 +447,7 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
u8 *to;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
- LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small"
- " to insert feature %d option!\n", feat);
+ DCCP_WARN("packet too small for feature %d option!\n", feat);
return -1;
}
@@ -465,8 +460,10 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
if (len)
memcpy(to, val, len);
- dccp_pr_debug("option %d feat %d len %d\n", type, feat, len);
+ dccp_pr_debug("%s(%s (%d), ...), length %d\n",
+ dccp_feat_typename(type),
+ dccp_feat_name(feat), feat, len);
return 0;
}
@@ -560,11 +557,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
return -1;
dp->dccps_hc_rx_insert_options = 0;
}
- if (dp->dccps_hc_tx_insert_options) {
- if (ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb))
- return -1;
- dp->dccps_hc_tx_insert_options = 0;
- }
/* Feature negotiation */
/* Data packets can't do feat negotiation */
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 7102e3aed4ca..824569659083 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -1,6 +1,6 @@
/*
* net/dccp/output.c
- *
+ *
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
@@ -88,16 +88,15 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
return -EPROTO;
}
- skb->h.raw = skb_push(skb, dccp_header_size);
- dh = dccp_hdr(skb);
/* Build DCCP header and checksum it. */
- memset(dh, 0, dccp_header_size);
+ dh = dccp_zeroed_hdr(skb, dccp_header_size);
dh->dccph_type = dcb->dccpd_type;
dh->dccph_sport = inet->sport;
dh->dccph_dport = inet->dport;
dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
dh->dccph_ccval = dcb->dccpd_ccval;
+ dh->dccph_cscov = dp->dccps_pcslen;
/* XXX For now we're using only 48 bits sequence numbers */
dh->dccph_x = 1;
@@ -117,7 +116,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
break;
}
- icsk->icsk_af_ops->send_check(sk, skb->len, skb);
+ icsk->icsk_af_ops->send_check(sk, 0, skb);
if (set_ack)
dccp_event_ack_sent(sk);
@@ -125,17 +124,8 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- err = icsk->icsk_af_ops->queue_xmit(skb, 0);
- if (err <= 0)
- return err;
-
- /* NET_XMIT_CN is special. It does not guarantee,
- * that this packet is lost. It tells that device
- * is about to start to drop packets or already
- * drops some packets of the same priority and
- * invokes us to send less aggressively.
- */
- return err == NET_XMIT_CN ? 0 : err;
+ err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0);
+ return net_xmit_eval(err);
}
return -ENOBUFS;
}
@@ -185,14 +175,12 @@ void dccp_write_space(struct sock *sk)
/**
* dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
* @sk: socket to wait for
- * @timeo: for how long
*/
-static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
- long *timeo)
+static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
DEFINE_WAIT(wait);
- long delay;
+ unsigned long delay;
int rc;
while (1) {
@@ -200,22 +188,16 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
if (sk->sk_err)
goto do_error;
- if (!*timeo)
- goto do_nonblock;
if (signal_pending(current))
goto do_interrupted;
- rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
- skb->len);
+ rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
if (rc <= 0)
break;
delay = msecs_to_jiffies(rc);
- if (delay > *timeo || delay < 0)
- goto do_nonblock;
-
sk->sk_write_pending++;
release_sock(sk);
- *timeo -= schedule_timeout(delay);
+ schedule_timeout(delay);
lock_sock(sk);
sk->sk_write_pending--;
}
@@ -226,11 +208,8 @@ out:
do_error:
rc = -EPIPE;
goto out;
-do_nonblock:
- rc = -EAGAIN;
- goto out;
do_interrupted:
- rc = sock_intr_errno(*timeo);
+ rc = -EINTR;
goto out;
}
@@ -251,12 +230,9 @@ void dccp_write_xmit(struct sock *sk, int block)
{
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
- long timeo = 30000; /* If a packet is taking longer than 2 secs
- we have other issues */
while ((skb = skb_peek(&sk->sk_write_queue))) {
- int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
- skb->len);
+ int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
if (err > 0) {
if (!block) {
@@ -264,12 +240,9 @@ void dccp_write_xmit(struct sock *sk, int block)
msecs_to_jiffies(err)+jiffies);
break;
} else
- err = dccp_wait_for_ccid(sk, skb, &timeo);
- if (err) {
- printk(KERN_CRIT "%s:err at dccp_wait_for_ccid"
- " %d\n", __FUNCTION__, err);
- dump_stack();
- }
+ err = dccp_wait_for_ccid(sk, skb);
+ if (err && err != -EINTR)
+ DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
}
skb_dequeue(&sk->sk_write_queue);
@@ -291,14 +264,13 @@ void dccp_write_xmit(struct sock *sk, int block)
err = dccp_transmit_skb(sk, skb);
ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
- if (err) {
- printk(KERN_CRIT "%s:err from "
- "ccid_hc_tx_packet_sent %d\n",
- __FUNCTION__, err);
- dump_stack();
- }
- } else
+ if (err)
+ DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
+ err);
+ } else {
+ dccp_pr_debug("packet discarded\n");
kfree(skb);
+ }
}
}
@@ -329,9 +301,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
skb_reserve(skb, sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
- skb->csum = 0;
dreq = dccp_rsk(req);
+ if (inet_rsk(req)->acked) /* increase ISS upon retransmission */
+ dccp_inc_seqno(&dreq->dreq_iss);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
@@ -340,10 +313,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
return NULL;
}
- skb->h.raw = skb_push(skb, dccp_header_size);
-
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_header_size);
+ /* Build and checksum header */
+ dh = dccp_zeroed_hdr(skb, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_rsk(req)->rmt_port;
@@ -355,6 +326,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
+ dccp_csum_outgoing(skb);
+
+ /* We use `acked' to remember that a Response was already sent. */
+ inet_rsk(req)->acked = 1;
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
@@ -363,7 +338,6 @@ EXPORT_SYMBOL_GPL(dccp_make_response);
static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
const enum dccp_reset_codes code)
-
{
struct dccp_hdr *dh;
struct dccp_sock *dp = dccp_sk(sk);
@@ -379,7 +353,6 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
skb_reserve(skb, sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
- skb->csum = 0;
dccp_inc_seqno(&dp->dccps_gss);
@@ -392,10 +365,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
return NULL;
}
- skb->h.raw = skb_push(skb, dccp_header_size);
-
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_header_size);
+ dh = dccp_zeroed_hdr(skb, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_sk(sk)->dport;
@@ -407,7 +377,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
dccp_hdr_reset(skb)->dccph_reset_code = code;
- inet_csk(sk)->icsk_af_ops->send_check(sk, skb->len, skb);
+ inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
@@ -426,9 +396,8 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
code);
if (skb != NULL) {
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0);
- if (err == NET_XMIT_CN)
- err = 0;
+ err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, sk, 0);
+ return net_xmit_eval(err);
}
}
@@ -449,17 +418,21 @@ static inline void dccp_connect_init(struct sock *sk)
dccp_sync_mss(sk, dst_mtu(dst));
- dccp_update_gss(sk, dp->dccps_iss);
- /*
+ /*
* SWL and AWL are initially adjusted so that they are not less than
* the initial Sequence Numbers received and sent, respectively:
* SWL := max(GSR + 1 - floor(W/4), ISR),
* AWL := max(GSS - W' + 1, ISS).
* These adjustments MUST be applied only at the beginning of the
* connection.
- */
+ */
+ dccp_update_gss(sk, dp->dccps_iss);
dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
+ /* S.GAR - greatest valid acknowledgement number received on a non-Sync;
+ * initialized to S.ISS (sec. 8.5) */
+ dp->dccps_gar = dp->dccps_iss;
+
icsk->icsk_retransmits = 0;
init_timer(&dp->dccps_xmit_timer);
dp->dccps_xmit_timer.data = (unsigned long)sk;
@@ -481,7 +454,6 @@ int dccp_connect(struct sock *sk)
skb_reserve(skb, sk->sk_prot->max_header);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
- skb->csum = 0;
dccp_skb_entail(sk, skb);
dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
@@ -513,7 +485,6 @@ void dccp_send_ack(struct sock *sk)
/* Reserve space for headers */
skb_reserve(skb, sk->sk_prot->max_header);
- skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
dccp_transmit_skb(sk, skb);
}
@@ -567,7 +538,6 @@ void dccp_send_sync(struct sock *sk, const u64 seq,
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk->sk_prot->max_header);
- skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
DCCP_SKB_CB(skb)->dccpd_seq = seq;
@@ -593,7 +563,6 @@ void dccp_send_close(struct sock *sk, const int active)
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk->sk_prot->max_header);
- skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 146496fce2e2..f81e37de35d5 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -106,8 +106,10 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
}
static struct jprobe dccp_send_probe = {
- .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, },
- .entry = (kprobe_opcode_t *)&jdccp_sendmsg,
+ .kp = {
+ .symbol_name = "dccp_sendmsg",
+ },
+ .entry = JPROBE_ENTRY(jdccp_sendmsg),
};
static int dccpprobe_open(struct inode *inode, struct file *file)
@@ -160,6 +162,8 @@ static __init int dccpprobe_init(void)
init_waitqueue_head(&dccpw.wait);
spin_lock_init(&dccpw.lock);
dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
+ if (IS_ERR(dccpw.fifo))
+ return PTR_ERR(dccpw.fifo);
if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
goto err0;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 72cbdcfc2c65..63b3fa20e14b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -52,6 +52,9 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
EXPORT_SYMBOL_GPL(dccp_hashinfo);
+/* the maximum queue length for tx in packets. 0 is no limit */
+int sysctl_dccp_tx_qlen __read_mostly = 5;
+
void dccp_set_state(struct sock *sk, const int state)
{
const int oldstate = sk->sk_state;
@@ -193,7 +196,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
sk, GFP_KERNEL);
dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
sk, GFP_KERNEL);
- if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
+ if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
dp->dccps_hc_tx_ccid == NULL)) {
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
@@ -212,6 +215,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
dccp_init_xmit_timers(sk);
icsk->icsk_rto = DCCP_TIMEOUT_INIT;
+ icsk->icsk_syn_retries = sysctl_dccp_request_retries;
sk->sk_state = DCCP_CLOSED;
sk->sk_write_space = dccp_write_space;
icsk->icsk_sync_mss = dccp_sync_mss;
@@ -262,12 +266,12 @@ int dccp_destroy_sock(struct sock *sk)
EXPORT_SYMBOL_GPL(dccp_destroy_sock);
-static inline int dccp_listen_start(struct sock *sk)
+static inline int dccp_listen_start(struct sock *sk, int backlog)
{
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_LISTEN;
- return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
+ return inet_csk_listen_start(sk, backlog);
}
int dccp_disconnect(struct sock *sk, int flags)
@@ -386,7 +390,7 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_service_list *sl = NULL;
- if (service == DCCP_SERVICE_INVALID_VALUE ||
+ if (service == DCCP_SERVICE_INVALID_VALUE ||
optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
return -EINVAL;
@@ -451,9 +455,8 @@ out_free_val:
static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
- struct dccp_sock *dp;
- int err;
- int val;
+ struct dccp_sock *dp = dccp_sk(sk);
+ int val, err = 0;
if (optlen < sizeof(int))
return -EINVAL;
@@ -465,14 +468,11 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
return dccp_setsockopt_service(sk, val, optval, optlen);
lock_sock(sk);
- dp = dccp_sk(sk);
- err = 0;
-
switch (optname) {
case DCCP_SOCKOPT_PACKET_SIZE:
- dp->dccps_packet_size = val;
+ DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+ err = 0;
break;
-
case DCCP_SOCKOPT_CHANGE_L:
if (optlen != sizeof(struct dccp_so_feat))
err = -EINVAL;
@@ -481,7 +481,6 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
(struct dccp_so_feat __user *)
optval);
break;
-
case DCCP_SOCKOPT_CHANGE_R:
if (optlen != sizeof(struct dccp_so_feat))
err = -EINVAL;
@@ -490,12 +489,26 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
(struct dccp_so_feat __user *)
optval);
break;
-
+ case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
+ if (val < 0 || val > 15)
+ err = -EINVAL;
+ else
+ dp->dccps_pcslen = val;
+ break;
+ case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
+ if (val < 0 || val > 15)
+ err = -EINVAL;
+ else {
+ dp->dccps_pcrlen = val;
+ /* FIXME: add feature negotiation,
+ * ChangeL(MinimumChecksumCoverage, val) */
+ }
+ break;
default:
err = -ENOPROTOOPT;
break;
}
-
+
release_sock(sk);
return err;
}
@@ -569,12 +582,17 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case DCCP_SOCKOPT_PACKET_SIZE:
- val = dp->dccps_packet_size;
- len = sizeof(dp->dccps_packet_size);
- break;
+ DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+ return 0;
case DCCP_SOCKOPT_SERVICE:
return dccp_getsockopt_service(sk, len,
(__be32 __user *)optval, optlen);
+ case DCCP_SOCKOPT_SEND_CSCOV:
+ val = dp->dccps_pcslen;
+ break;
+ case DCCP_SOCKOPT_RECV_CSCOV:
+ val = dp->dccps_pcrlen;
+ break;
case 128 ... 191:
return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
len, (u32 __user *)optval, optlen);
@@ -630,6 +648,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
return -EMSGSIZE;
lock_sock(sk);
+
+ if (sysctl_dccp_tx_qlen &&
+ (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
+ rc = -EAGAIN;
+ goto out_release;
+ }
+
timeo = sock_sndtimeo(sk, noblock);
/*
@@ -788,7 +813,7 @@ int inet_dccp_listen(struct socket *sock, int backlog)
* FIXME: here it probably should be sk->sk_prot->listen_start
* see tcp_listen_start
*/
- err = dccp_listen_start(sk);
+ err = dccp_listen_start(sk, backlog);
if (err)
goto out;
}
@@ -805,7 +830,7 @@ EXPORT_SYMBOL_GPL(inet_dccp_listen);
static const unsigned char dccp_new_state[] = {
/* current state: new state: action: */
[0] = DCCP_CLOSED,
- [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
+ [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
[DCCP_REQUESTING] = DCCP_CLOSED,
[DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
[DCCP_LISTEN] = DCCP_CLOSED,
@@ -1008,8 +1033,7 @@ static int __init dccp_init(void)
} while (!dccp_hashinfo.ehash && --ehash_order > 0);
if (!dccp_hashinfo.ehash) {
- printk(KERN_CRIT "Failed to allocate DCCP "
- "established hash table\n");
+ DCCP_CRIT("Failed to allocate DCCP established hash table");
goto out_free_bind_bucket_cachep;
}
@@ -1031,7 +1055,7 @@ static int __init dccp_init(void)
} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
if (!dccp_hashinfo.bhash) {
- printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
+ DCCP_CRIT("Failed to allocate DCCP bind hash table");
goto out_free_dccp_ehash;
}
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 38bc157876f3..fdcfca3e9208 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/sysctl.h>
+#include "dccp.h"
#include "feat.h"
#ifndef CONFIG_SYSCTL
@@ -19,53 +20,76 @@
static struct ctl_table dccp_default_table[] = {
{
- .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW,
.procname = "seq_window",
- .data = &dccp_feat_default_sequence_window,
- .maxlen = sizeof(dccp_feat_default_sequence_window),
+ .data = &sysctl_dccp_feat_sequence_window,
+ .maxlen = sizeof(sysctl_dccp_feat_sequence_window),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_RX_CCID,
.procname = "rx_ccid",
- .data = &dccp_feat_default_rx_ccid,
- .maxlen = sizeof(dccp_feat_default_rx_ccid),
+ .data = &sysctl_dccp_feat_rx_ccid,
+ .maxlen = sizeof(sysctl_dccp_feat_rx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_TX_CCID,
.procname = "tx_ccid",
- .data = &dccp_feat_default_tx_ccid,
- .maxlen = sizeof(dccp_feat_default_tx_ccid),
+ .data = &sysctl_dccp_feat_tx_ccid,
+ .maxlen = sizeof(sysctl_dccp_feat_tx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_ACK_RATIO,
.procname = "ack_ratio",
- .data = &dccp_feat_default_ack_ratio,
- .maxlen = sizeof(dccp_feat_default_ack_ratio),
+ .data = &sysctl_dccp_feat_ack_ratio,
+ .maxlen = sizeof(sysctl_dccp_feat_ack_ratio),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_SEND_ACKVEC,
.procname = "send_ackvec",
- .data = &dccp_feat_default_send_ack_vector,
- .maxlen = sizeof(dccp_feat_default_send_ack_vector),
+ .data = &sysctl_dccp_feat_send_ack_vector,
+ .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_SEND_NDP,
.procname = "send_ndp",
- .data = &dccp_feat_default_send_ndp_count,
- .maxlen = sizeof(dccp_feat_default_send_ndp_count),
+ .data = &sysctl_dccp_feat_send_ndp_count,
+ .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count),
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "request_retries",
+ .data = &sysctl_dccp_request_retries,
+ .maxlen = sizeof(sysctl_dccp_request_retries),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "retries1",
+ .data = &sysctl_dccp_retries1,
+ .maxlen = sizeof(sysctl_dccp_retries1),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "retries2",
+ .data = &sysctl_dccp_retries2,
+ .maxlen = sizeof(sysctl_dccp_retries2),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tx_qlen",
+ .data = &sysctl_dccp_tx_qlen,
+ .maxlen = sizeof(sysctl_dccp_tx_qlen),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+
{ .ctl_name = 0, }
};
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 8447742f5615..e5348f369c60 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -1,6 +1,6 @@
/*
* net/dccp/timer.c
- *
+ *
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
@@ -15,15 +15,10 @@
#include "dccp.h"
-static void dccp_write_timer(unsigned long data);
-static void dccp_keepalive_timer(unsigned long data);
-static void dccp_delack_timer(unsigned long data);
-
-void dccp_init_xmit_timers(struct sock *sk)
-{
- inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
- &dccp_keepalive_timer);
-}
+/* sysctl variables governing numbers of retransmission attempts */
+int sysctl_dccp_request_retries __read_mostly = TCP_SYN_RETRIES;
+int sysctl_dccp_retries1 __read_mostly = TCP_RETR1;
+int sysctl_dccp_retries2 __read_mostly = TCP_RETR2;
static void dccp_write_err(struct sock *sk)
{
@@ -44,11 +39,10 @@ static int dccp_write_timeout(struct sock *sk)
if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
if (icsk->icsk_retransmits != 0)
dst_negative_advice(&sk->sk_dst_cache);
- retry_until = icsk->icsk_syn_retries ? :
- /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
+ retry_until = icsk->icsk_syn_retries ?
+ : sysctl_dccp_request_retries;
} else {
- if (icsk->icsk_retransmits >=
- /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
+ if (icsk->icsk_retransmits >= sysctl_dccp_retries1) {
/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu
black hole detection. :-(
@@ -72,7 +66,7 @@ static int dccp_write_timeout(struct sock *sk)
dst_negative_advice(&sk->sk_dst_cache);
}
- retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
+ retry_until = sysctl_dccp_retries2;
/*
* FIXME: see tcp_write_timout and tcp_out_of_resources
*/
@@ -86,53 +80,6 @@ static int dccp_write_timeout(struct sock *sk)
return 0;
}
-/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
-static void dccp_delack_timer(unsigned long data)
-{
- struct sock *sk = (struct sock *)data;
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- /* Try again later. */
- icsk->icsk_ack.blocked = 1;
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
- sk_reset_timer(sk, &icsk->icsk_delack_timer,
- jiffies + TCP_DELACK_MIN);
- goto out;
- }
-
- if (sk->sk_state == DCCP_CLOSED ||
- !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
- goto out;
- if (time_after(icsk->icsk_ack.timeout, jiffies)) {
- sk_reset_timer(sk, &icsk->icsk_delack_timer,
- icsk->icsk_ack.timeout);
- goto out;
- }
-
- icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
-
- if (inet_csk_ack_scheduled(sk)) {
- if (!icsk->icsk_ack.pingpong) {
- /* Delayed ACK missed: inflate ATO. */
- icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
- icsk->icsk_rto);
- } else {
- /* Delayed ACK missed: leave pingpong mode and
- * deflate ATO.
- */
- icsk->icsk_ack.pingpong = 0;
- icsk->icsk_ack.ato = TCP_ATO_MIN;
- }
- dccp_send_ack(sk);
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
- }
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
-}
-
/*
* The DCCP retransmit timer.
*/
@@ -142,7 +89,7 @@ static void dccp_retransmit_timer(struct sock *sk)
/* retransmit timer is used for feature negotiation throughout
* connection. In this case, no packet is re-transmitted, but rather an
- * ack is generated and pending changes are splaced into its options.
+ * ack is generated and pending changes are placed into its options.
*/
if (sk->sk_send_head == NULL) {
dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk);
@@ -154,12 +101,14 @@ static void dccp_retransmit_timer(struct sock *sk)
/*
* sk->sk_send_head has to have one skb with
* DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
- * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake
- * (PARTOPEN timer), etc).
- */
+ * packet types. The only packets eligible for retransmission are:
+ * -- Requests in client-REQUEST state (sec. 8.1.1)
+ * -- Acks in client-PARTOPEN state (sec. 8.1.5)
+ * -- CloseReq in server-CLOSEREQ state (sec. 8.3)
+ * -- Close in node-CLOSING state (sec. 8.3) */
BUG_TRAP(sk->sk_send_head != NULL);
- /*
+ /*
* More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
*/
@@ -194,7 +143,7 @@ backoff:
icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
DCCP_RTO_MAX);
- if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
+ if (icsk->icsk_retransmits > sysctl_dccp_retries1)
__sk_dst_reset(sk);
out:;
}
@@ -251,7 +200,7 @@ static void dccp_keepalive_timer(unsigned long data)
/* Only process if socket is not in use. */
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
- /* Try again later. */
+ /* Try again later. */
inet_csk_reset_keepalive_timer(sk, HZ / 20);
goto out;
}
@@ -264,3 +213,56 @@ out:
bh_unlock_sock(sk);
sock_put(sk);
}
+
+/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
+static void dccp_delack_timer(unsigned long data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later. */
+ icsk->icsk_ack.blocked = 1;
+ NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+ sk_reset_timer(sk, &icsk->icsk_delack_timer,
+ jiffies + TCP_DELACK_MIN);
+ goto out;
+ }
+
+ if (sk->sk_state == DCCP_CLOSED ||
+ !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+ goto out;
+ if (time_after(icsk->icsk_ack.timeout, jiffies)) {
+ sk_reset_timer(sk, &icsk->icsk_delack_timer,
+ icsk->icsk_ack.timeout);
+ goto out;
+ }
+
+ icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
+
+ if (inet_csk_ack_scheduled(sk)) {
+ if (!icsk->icsk_ack.pingpong) {
+ /* Delayed ACK missed: inflate ATO. */
+ icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
+ icsk->icsk_rto);
+ } else {
+ /* Delayed ACK missed: leave pingpong mode and
+ * deflate ATO.
+ */
+ icsk->icsk_ack.pingpong = 0;
+ icsk->icsk_ack.ato = TCP_ATO_MIN;
+ }
+ dccp_send_ack(sk);
+ NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+ }
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+void dccp_init_xmit_timers(struct sock *sk)
+{
+ inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
+ &dccp_keepalive_timer);
+}