summaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 21:49:40 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 21:49:40 +0100
commit0191b625ca5a46206d2fb862bb08f36f2fcb3b31 (patch)
tree454d1842b1833d976da62abcbd5c47521ebe9bd7 /net/dccp
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/sfrench/cifs-2.6 (diff)
parentnet: Allow dependancies of FDDI & Tokenring to be modular. (diff)
downloadlinux-0191b625ca5a46206d2fb862bb08f36f2fcb3b31.tar.xz
linux-0191b625ca5a46206d2fb862bb08f36f2fcb3b31.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1429 commits) net: Allow dependancies of FDDI & Tokenring to be modular. igb: Fix build warning when DCA is disabled. net: Fix warning fallout from recent NAPI interface changes. gro: Fix potential use after free sfc: If AN is enabled, always read speed/duplex from the AN advertising bits sfc: When disabling the NIC, close the device rather than unregistering it sfc: SFT9001: Add cable diagnostics sfc: Add support for multiple PHY self-tests sfc: Merge top-level functions for self-tests sfc: Clean up PHY mode management in loopback self-test sfc: Fix unreliable link detection in some loopback modes sfc: Generate unique names for per-NIC workqueues 802.3ad: use standard ethhdr instead of ad_header 802.3ad: generalize out mac address initializer 802.3ad: initialize ports LACPDU from const initializer 802.3ad: remove typedef around ad_system 802.3ad: turn ports is_individual into a bool 802.3ad: turn ports is_enabled into a bool 802.3ad: make ntt bool ixgbe: Fix set_ringparam in ixgbe to use the same memory pools. ... Fixed trivial IPv4/6 address printing conflicts in fs/cifs/connect.c due to the conversion to %pI (in this networking merge) and the addition of doing IPv6 addresses (from the earlier merge of CIFS).
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/ackvec.c9
-rw-r--r--net/dccp/ackvec.h5
-rw-r--r--net/dccp/ccid.c62
-rw-r--r--net/dccp/ccid.h26
-rw-r--r--net/dccp/ccids/ccid2.c6
-rw-r--r--net/dccp/dccp.h17
-rw-r--r--net/dccp/diag.c11
-rw-r--r--net/dccp/feat.c1458
-rw-r--r--net/dccp/feat.h130
-rw-r--r--net/dccp/input.c44
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dccp/ipv6.c15
-rw-r--r--net/dccp/minisocks.c54
-rw-r--r--net/dccp/options.c229
-rw-r--r--net/dccp/output.c19
-rw-r--r--net/dccp/probe.c19
-rw-r--r--net/dccp/proto.c224
-rw-r--r--net/dccp/sysctl.c21
-rw-r--r--net/dccp/timer.c12
19 files changed, 1528 insertions, 846 deletions
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 1e8be246ad15..01e4d39fa232 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -12,7 +12,6 @@
#include "ackvec.h"
#include "dccp.h"
-#include <linux/dccp.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/kernel.h>
@@ -68,7 +67,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
/* Figure out how many options do we need to represent the ackvec */
- const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN);
+ const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
u16 len = av->av_vec_len + 2 * nr_opts, i;
u32 elapsed_time;
const unsigned char *tail, *from;
@@ -100,8 +99,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
for (i = 0; i < nr_opts; ++i) {
int copylen = len;
- if (len > DCCP_MAX_ACKVEC_OPT_LEN)
- copylen = DCCP_MAX_ACKVEC_OPT_LEN;
+ if (len > DCCP_SINGLE_OPT_MAXLEN)
+ copylen = DCCP_SINGLE_OPT_MAXLEN;
*to++ = DCCPO_ACK_VECTOR_0;
*to++ = copylen + 2;
@@ -432,7 +431,7 @@ found:
int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
u64 *ackno, const u8 opt, const u8 *value, const u8 len)
{
- if (len > DCCP_MAX_ACKVEC_OPT_LEN)
+ if (len > DCCP_SINGLE_OPT_MAXLEN)
return -1;
/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index bcb64fb4acef..4ccee030524e 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -11,15 +11,14 @@
* published by the Free Software Foundation.
*/
+#include <linux/dccp.h>
#include <linux/compiler.h>
#include <linux/ktime.h>
#include <linux/list.h>
#include <linux/types.h>
-/* Read about the ECN nonce to see why it is 253 */
-#define DCCP_MAX_ACKVEC_OPT_LEN 253
/* We can spread an ack vector across multiple options */
-#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2)
+#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
#define DCCP_ACKVEC_STATE_RECEIVED 0
#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 8fe931a3d7a1..bcc643f992ae 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -13,6 +13,13 @@
#include "ccid.h"
+static u8 builtin_ccids[] = {
+ DCCPC_CCID2, /* CCID2 is supported by default */
+#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE)
+ DCCPC_CCID3,
+#endif
+};
+
static struct ccid_operations *ccids[CCID_MAX];
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
static atomic_t ccids_lockct = ATOMIC_INIT(0);
@@ -86,6 +93,47 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
}
}
+/* check that up to @array_len members in @ccid_array are supported */
+bool ccid_support_check(u8 const *ccid_array, u8 array_len)
+{
+ u8 i, j, found;
+
+ for (i = 0, found = 0; i < array_len; i++, found = 0) {
+ for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++)
+ found = (ccid_array[i] == builtin_ccids[j]);
+ if (!found)
+ return false;
+ }
+ return true;
+}
+
+/**
+ * ccid_get_builtin_ccids - Provide copy of `builtin' CCID array
+ * @ccid_array: pointer to copy into
+ * @array_len: value to return length into
+ * This function allocates memory - caller must see that it is freed after use.
+ */
+int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
+{
+ *ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any());
+ if (*ccid_array == NULL)
+ return -ENOBUFS;
+ *array_len = ARRAY_SIZE(builtin_ccids);
+ return 0;
+}
+
+int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+ char __user *optval, int __user *optlen)
+{
+ if (len < sizeof(builtin_ccids))
+ return -EINVAL;
+
+ if (put_user(sizeof(builtin_ccids), optlen) ||
+ copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids)))
+ return -EFAULT;
+ return 0;
+}
+
int ccid_register(struct ccid_operations *ccid_ops)
{
int err = -ENOBUFS;
@@ -205,20 +253,6 @@ out_module_put:
EXPORT_SYMBOL_GPL(ccid_new);
-struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp)
-{
- return ccid_new(id, sk, 1, gfp);
-}
-
-EXPORT_SYMBOL_GPL(ccid_hc_rx_new);
-
-struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk, gfp_t gfp)
-{
- return ccid_new(id, sk, 0, gfp);
-}
-
-EXPORT_SYMBOL_GPL(ccid_hc_tx_new);
-
static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
{
struct ccid_operations *ccid_ops;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index fdeae7b57319..18f69423a708 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -103,13 +103,31 @@ static inline void *ccid_priv(const struct ccid *ccid)
return (void *)ccid->ccid_priv;
}
+extern bool ccid_support_check(u8 const *ccid_array, u8 array_len);
+extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
+extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+ char __user *, int __user *);
+
extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
gfp_t gfp);
-extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk,
- gfp_t gfp);
-extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk,
- gfp_t gfp);
+static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
+{
+ struct ccid *ccid = dp->dccps_hc_rx_ccid;
+
+ if (ccid == NULL || ccid->ccid_ops == NULL)
+ return -1;
+ return ccid->ccid_ops->ccid_id;
+}
+
+static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
+{
+ struct ccid *ccid = dp->dccps_hc_tx_ccid;
+
+ if (ccid == NULL || ccid->ccid_ops == NULL)
+ return -1;
+ return ccid->ccid_ops->ccid_id;
+}
extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9a430734530c..c9ea19a4d85e 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,7 +25,7 @@
/*
* This implementation should follow RFC 4341
*/
-
+#include "../feat.h"
#include "../ccid.h"
#include "../dccp.h"
#include "ccid2.h"
@@ -147,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
val = max_ratio;
}
- if (val > 0xFFFF) /* RFC 4340, 11.3 */
- val = 0xFFFF;
+ if (val > DCCPF_ACK_RATIO_MAX)
+ val = DCCPF_ACK_RATIO_MAX;
if (val == dp->dccps_l_ack_ratio)
return;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b4bc6e095a0e..0bc4c9a02e19 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -49,7 +49,7 @@ extern int dccp_debug;
extern struct inet_hashinfo dccp_hashinfo;
-extern atomic_t dccp_orphan_count;
+extern struct percpu_counter dccp_orphan_count;
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
@@ -98,9 +98,6 @@ extern int sysctl_dccp_retries2;
extern int sysctl_dccp_feat_sequence_window;
extern int sysctl_dccp_feat_rx_ccid;
extern int sysctl_dccp_feat_tx_ccid;
-extern int sysctl_dccp_feat_ack_ratio;
-extern int sysctl_dccp_feat_send_ack_vector;
-extern int sysctl_dccp_feat_send_ndp_count;
extern int sysctl_dccp_tx_qlen;
extern int sysctl_dccp_sync_ratelimit;
@@ -252,7 +249,8 @@ extern const char *dccp_state_name(const int state);
extern void dccp_set_state(struct sock *sk, const int state);
extern void dccp_done(struct sock *sk);
-extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
+extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
+ struct sk_buff const *skb);
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
@@ -435,12 +433,19 @@ static inline int dccp_ack_pending(const struct sock *sk)
const struct dccp_sock *dp = dccp_sk(sk);
return dp->dccps_timestamp_echo != 0 ||
#ifdef CONFIG_IP_DCCP_ACKVEC
- (dccp_msk(sk)->dccpms_send_ack_vector &&
+ (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
#endif
inet_csk_ack_scheduled(sk);
}
+extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
+extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
+extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
+ struct sk_buff *skb);
+extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
+extern void dccp_feat_list_purge(struct list_head *fn_list);
+
extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
extern int dccp_insert_option_elapsed_time(struct sock *sk,
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index d8a3509b26f6..b21f261da75e 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -29,11 +29,14 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_backoff = icsk->icsk_backoff;
info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
- if (dccp_msk(sk)->dccpms_send_ack_vector)
+ if (dp->dccps_hc_rx_ackvec != NULL)
info->tcpi_options |= TCPI_OPT_SACK;
- ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
- ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
+ if (dp->dccps_hc_rx_ccid != NULL)
+ ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
+
+ if (dp->dccps_hc_tx_ccid != NULL)
+ ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
}
static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -45,7 +48,7 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
dccp_get_info(sk, _info);
}
-static struct inet_diag_handler dccp_diag_handler = {
+static const struct inet_diag_handler dccp_diag_handler = {
.idiag_hashinfo = &dccp_hashinfo,
.idiag_get_info = dccp_diag_get_info,
.idiag_type = DCCPDIAG_GETSOCK,
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 933a0ecf8d46..30f9fb76b921 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1,11 +1,17 @@
/*
* net/dccp/feat.c
*
- * An implementation of the DCCP protocol
- * Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ * Feature negotiation for the DCCP protocol (RFC 4340, section 6)
+ *
+ * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
+ * Rewrote from scratch, some bits from earlier code by
+ * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ *
*
* ASSUMPTIONS
* -----------
+ * o Feature negotiation is coordinated with connection setup (as in TCP), wild
+ * changes of parameters of an established connection are not supported.
* o All currently known SP features have 1-byte quantities. If in the future
* extensions of RFCs 4340..42 define features with item lengths larger than
* one byte, a feature-specific extension of the code will be required.
@@ -15,597 +21,1185 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
#include <linux/module.h>
-
#include "ccid.h"
#include "feat.h"
-#define DCCP_FEAT_SP_NOAGREE (-123)
-
-int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
- u8 *val, u8 len, gfp_t gfp)
-{
- struct dccp_opt_pend *opt;
-
- dccp_feat_debug(type, feature, *val);
-
- if (len > 3) {
- DCCP_WARN("invalid length %d\n", len);
- return -EINVAL;
- }
- /* XXX add further sanity checks */
-
- /* check if that feature is already being negotiated */
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- /* ok we found a negotiation for this option already */
- if (opt->dccpop_feat == feature && opt->dccpop_type == type) {
- dccp_pr_debug("Replacing old\n");
- /* replace */
- BUG_ON(opt->dccpop_val == NULL);
- kfree(opt->dccpop_val);
- opt->dccpop_val = val;
- opt->dccpop_len = len;
- opt->dccpop_conf = 0;
- return 0;
- }
- }
-
- /* negotiation for a new feature */
- opt = kmalloc(sizeof(*opt), gfp);
- if (opt == NULL)
- return -ENOMEM;
-
- opt->dccpop_type = type;
- opt->dccpop_feat = feature;
- opt->dccpop_len = len;
- opt->dccpop_val = val;
- opt->dccpop_conf = 0;
- opt->dccpop_sc = NULL;
-
- BUG_ON(opt->dccpop_val == NULL);
-
- list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_feat_change);
-
-static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
+/*
+ * Feature activation handlers.
+ *
+ * These all use an u64 argument, to provide enough room for NN/SP features. At
+ * this stage the negotiated values have been checked to be within their range.
+ */
+static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
- /* figure out if we are changing our CCID or the peer's */
- const int rx = type == DCCPO_CHANGE_R;
- const u8 ccid_nr = rx ? dmsk->dccpms_rx_ccid : dmsk->dccpms_tx_ccid;
- struct ccid *new_ccid;
+ struct ccid *new_ccid = ccid_new(ccid, sk, rx, gfp_any());
- /* Check if nothing is being changed. */
- if (ccid_nr == new_ccid_nr)
- return 0;
-
- new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC);
if (new_ccid == NULL)
return -ENOMEM;
if (rx) {
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
dp->dccps_hc_rx_ccid = new_ccid;
- dmsk->dccpms_rx_ccid = new_ccid_nr;
} else {
ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
dp->dccps_hc_tx_ccid = new_ccid;
- dmsk->dccpms_tx_ccid = new_ccid_nr;
}
+ return 0;
+}
+static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
+{
+ if (!rx)
+ dccp_msk(sk)->dccpms_sequence_window = seq_win;
return 0;
}
-static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
+static int dccp_hdlr_ack_ratio(struct sock *sk, u64 ratio, bool rx)
{
- dccp_feat_debug(type, feat, val);
+ if (rx)
+ dccp_sk(sk)->dccps_r_ack_ratio = ratio;
+ else
+ dccp_sk(sk)->dccps_l_ack_ratio = ratio;
+ return 0;
+}
- switch (feat) {
- case DCCPF_CCID:
- return dccp_feat_update_ccid(sk, type, val);
- default:
- dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n",
- dccp_feat_typename(type), feat);
- break;
+static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ if (rx) {
+ if (enable && dp->dccps_hc_rx_ackvec == NULL) {
+ dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(gfp_any());
+ if (dp->dccps_hc_rx_ackvec == NULL)
+ return -ENOMEM;
+ } else if (!enable) {
+ dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+ dp->dccps_hc_rx_ackvec = NULL;
+ }
}
return 0;
}
-static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
- u8 *rpref, u8 rlen)
+static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx)
{
- struct dccp_sock *dp = dccp_sk(sk);
- u8 *spref, slen, *res = NULL;
- int i, j, rc, agree = 1;
+ if (!rx)
+ dccp_sk(sk)->dccps_send_ndp_count = (enable > 0);
+ return 0;
+}
- BUG_ON(rpref == NULL);
+/*
+ * Minimum Checksum Coverage is located at the RX side (9.2.1). This means that
+ * `rx' holds when the sending peer informs about his partial coverage via a
+ * ChangeR() option. In the other case, we are the sender and the receiver
+ * announces its coverage via ChangeL() options. The policy here is to honour
+ * such communication by enabling the corresponding partial coverage - but only
+ * if it has not been set manually before; the warning here means that all
+ * packets will be dropped.
+ */
+static int dccp_hdlr_min_cscov(struct sock *sk, u64 cscov, bool rx)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
- /* check if we are the black sheep */
- if (dp->dccps_role == DCCP_ROLE_CLIENT) {
- spref = rpref;
- slen = rlen;
- rpref = opt->dccpop_val;
- rlen = opt->dccpop_len;
- } else {
- spref = opt->dccpop_val;
- slen = opt->dccpop_len;
+ if (rx)
+ dp->dccps_pcrlen = cscov;
+ else {
+ if (dp->dccps_pcslen == 0)
+ dp->dccps_pcslen = cscov;
+ else if (cscov > dp->dccps_pcslen)
+ DCCP_WARN("CsCov %u too small, peer requires >= %u\n",
+ dp->dccps_pcslen, (u8)cscov);
}
- /*
- * Now we have server preference list in spref and client preference in
- * rpref
- */
- BUG_ON(spref == NULL);
- BUG_ON(rpref == NULL);
+ return 0;
+}
- /* FIXME sanity check vals */
+static const struct {
+ u8 feat_num; /* DCCPF_xxx */
+ enum dccp_feat_type rxtx; /* RX or TX */
+ enum dccp_feat_type reconciliation; /* SP or NN */
+ u8 default_value; /* as in 6.4 */
+ int (*activation_hdlr)(struct sock *sk, u64 val, bool rx);
+/*
+ * Lookup table for location and type of features (from RFC 4340/4342)
+ * +--------------------------+----+-----+----+----+---------+-----------+
+ * | Feature | Location | Reconc. | Initial | Section |
+ * | | RX | TX | SP | NN | Value | Reference |
+ * +--------------------------+----+-----+----+----+---------+-----------+
+ * | DCCPF_CCID | | X | X | | 2 | 10 |
+ * | DCCPF_SHORT_SEQNOS | | X | X | | 0 | 7.6.1 |
+ * | DCCPF_SEQUENCE_WINDOW | | X | | X | 100 | 7.5.2 |
+ * | DCCPF_ECN_INCAPABLE | X | | X | | 0 | 12.1 |
+ * | DCCPF_ACK_RATIO | | X | | X | 2 | 11.3 |
+ * | DCCPF_SEND_ACK_VECTOR | X | | X | | 0 | 11.5 |
+ * | DCCPF_SEND_NDP_COUNT | | X | X | | 0 | 7.7.2 |
+ * | DCCPF_MIN_CSUM_COVER | X | | X | | 0 | 9.2.1 |
+ * | DCCPF_DATA_CHECKSUM | X | | X | | 0 | 9.3.1 |
+ * | DCCPF_SEND_LEV_RATE | X | | X | | 0 | 4342/8.4 |
+ * +--------------------------+----+-----+----+----+---------+-----------+
+ */
+} dccp_feat_table[] = {
+ { DCCPF_CCID, FEAT_AT_TX, FEAT_SP, 2, dccp_hdlr_ccid },
+ { DCCPF_SHORT_SEQNOS, FEAT_AT_TX, FEAT_SP, 0, NULL },
+ { DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100, dccp_hdlr_seq_win },
+ { DCCPF_ECN_INCAPABLE, FEAT_AT_RX, FEAT_SP, 0, NULL },
+ { DCCPF_ACK_RATIO, FEAT_AT_TX, FEAT_NN, 2, dccp_hdlr_ack_ratio},
+ { DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_ackvec },
+ { DCCPF_SEND_NDP_COUNT, FEAT_AT_TX, FEAT_SP, 0, dccp_hdlr_ndp },
+ { DCCPF_MIN_CSUM_COVER, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_min_cscov},
+ { DCCPF_DATA_CHECKSUM, FEAT_AT_RX, FEAT_SP, 0, NULL },
+ { DCCPF_SEND_LEV_RATE, FEAT_AT_RX, FEAT_SP, 0, NULL },
+};
+#define DCCP_FEAT_SUPPORTED_MAX ARRAY_SIZE(dccp_feat_table)
+
+/**
+ * dccp_feat_index - Hash function to map feature number into array position
+ * Returns consecutive array index or -1 if the feature is not understood.
+ */
+static int dccp_feat_index(u8 feat_num)
+{
+ /* The first 9 entries are occupied by the types from RFC 4340, 6.4 */
+ if (feat_num > DCCPF_RESERVED && feat_num <= DCCPF_DATA_CHECKSUM)
+ return feat_num - 1;
- /* Are values in any order? XXX Lame "algorithm" here */
- for (i = 0; i < slen; i++) {
- for (j = 0; j < rlen; j++) {
- if (spref[i] == rpref[j]) {
- res = &spref[i];
- break;
- }
- }
- if (res)
- break;
+ /*
+ * Other features: add cases for new feature types here after adding
+ * them to the above table.
+ */
+ switch (feat_num) {
+ case DCCPF_SEND_LEV_RATE:
+ return DCCP_FEAT_SUPPORTED_MAX - 1;
}
+ return -1;
+}
- /* we didn't agree on anything */
- if (res == NULL) {
- /* confirm previous value */
- switch (opt->dccpop_feat) {
- case DCCPF_CCID:
- /* XXX did i get this right? =P */
- if (opt->dccpop_type == DCCPO_CHANGE_L)
- res = &dccp_msk(sk)->dccpms_tx_ccid;
- else
- res = &dccp_msk(sk)->dccpms_rx_ccid;
- break;
+static u8 dccp_feat_type(u8 feat_num)
+{
+ int idx = dccp_feat_index(feat_num);
- default:
- DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat);
- /* XXX implement res */
- return -EFAULT;
- }
+ if (idx < 0)
+ return FEAT_UNKNOWN;
+ return dccp_feat_table[idx].reconciliation;
+}
- dccp_pr_debug("Don't agree... reconfirming %d\n", *res);
- agree = 0; /* this is used for mandatory options... */
- }
+static int dccp_feat_default_value(u8 feat_num)
+{
+ int idx = dccp_feat_index(feat_num);
+ /*
+ * There are no default values for unknown features, so encountering a
+ * negative index here indicates a serious problem somewhere else.
+ */
+ DCCP_BUG_ON(idx < 0);
- /* need to put result and our preference list */
- rlen = 1 + opt->dccpop_len;
- rpref = kmalloc(rlen, GFP_ATOMIC);
- if (rpref == NULL)
- return -ENOMEM;
+ return idx < 0 ? 0 : dccp_feat_table[idx].default_value;
+}
- *rpref = *res;
- memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len);
+static int __dccp_feat_activate(struct sock *sk, const int idx,
+ const bool is_local, dccp_feat_val const *fval)
+{
+ bool rx;
+ u64 val;
- /* put it in the "confirm queue" */
- if (opt->dccpop_sc == NULL) {
- opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC);
- if (opt->dccpop_sc == NULL) {
- kfree(rpref);
- return -ENOMEM;
+ if (idx < 0 || idx >= DCCP_FEAT_SUPPORTED_MAX)
+ return -1;
+ if (dccp_feat_table[idx].activation_hdlr == NULL)
+ return 0;
+
+ if (fval == NULL) {
+ val = dccp_feat_table[idx].default_value;
+ } else if (dccp_feat_table[idx].reconciliation == FEAT_SP) {
+ if (fval->sp.vec == NULL) {
+ /*
+ * This can happen when an empty Confirm is sent
+ * for an SP (i.e. known) feature. In this case
+ * we would be using the default anyway.
+ */
+ DCCP_CRIT("Feature #%d undefined: using default", idx);
+ val = dccp_feat_table[idx].default_value;
+ } else {
+ val = fval->sp.vec[0];
}
} else {
- /* recycle the confirm slot */
- BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
- kfree(opt->dccpop_sc->dccpoc_val);
- dccp_pr_debug("recycling confirm slot\n");
+ val = fval->nn;
}
- memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc));
- opt->dccpop_sc->dccpoc_val = rpref;
- opt->dccpop_sc->dccpoc_len = rlen;
+ /* Location is RX if this is a local-RX or remote-TX feature */
+ rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX));
- /* update the option on our side [we are about to send the confirm] */
- rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res);
- if (rc) {
- kfree(opt->dccpop_sc->dccpoc_val);
- kfree(opt->dccpop_sc);
- opt->dccpop_sc = NULL;
- return rc;
- }
+ return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
+}
- dccp_pr_debug("Will confirm %d\n", *rpref);
+/* Test for "Req'd" feature (RFC 4340, 6.4) */
+static inline int dccp_feat_must_be_understood(u8 feat_num)
+{
+ return feat_num == DCCPF_CCID || feat_num == DCCPF_SHORT_SEQNOS ||
+ feat_num == DCCPF_SEQUENCE_WINDOW;
+}
- /* say we want to change to X but we just got a confirm X, suppress our
- * change
- */
- if (!opt->dccpop_conf) {
- if (*opt->dccpop_val == *res)
- opt->dccpop_conf = 1;
- dccp_pr_debug("won't ask for change of same feature\n");
+/* copy constructor, fval must not already contain allocated memory */
+static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
+{
+ fval->sp.len = len;
+ if (fval->sp.len > 0) {
+ fval->sp.vec = kmemdup(val, len, gfp_any());
+ if (fval->sp.vec == NULL) {
+ fval->sp.len = 0;
+ return -ENOBUFS;
+ }
}
+ return 0;
+}
- return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */
+static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val)
+{
+ if (unlikely(val == NULL))
+ return;
+ if (dccp_feat_type(feat_num) == FEAT_SP)
+ kfree(val->sp.vec);
+ memset(val, 0, sizeof(*val));
}
-static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
+static struct dccp_feat_entry *
+ dccp_feat_clone_entry(struct dccp_feat_entry const *original)
{
- struct dccp_minisock *dmsk = dccp_msk(sk);
- struct dccp_opt_pend *opt;
- int rc = 1;
- u8 t;
+ struct dccp_feat_entry *new;
+ u8 type = dccp_feat_type(original->feat_num);
- /*
- * We received a CHANGE. We gotta match it against our own preference
- * list. If we got a CHANGE_R it means it's a change for us, so we need
- * to compare our CHANGE_L list.
- */
- if (type == DCCPO_CHANGE_L)
- t = DCCPO_CHANGE_R;
- else
- t = DCCPO_CHANGE_L;
+ if (type == FEAT_UNKNOWN)
+ return NULL;
- /* find our preference list for this feature */
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- if (opt->dccpop_type != t || opt->dccpop_feat != feature)
- continue;
+ new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any());
+ if (new == NULL)
+ return NULL;
- /* find the winner from the two preference lists */
- rc = dccp_feat_reconcile(sk, opt, val, len);
- break;
+ if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val,
+ original->val.sp.vec,
+ original->val.sp.len)) {
+ kfree(new);
+ return NULL;
}
+ return new;
+}
- /* We didn't deal with the change. This can happen if we have no
- * preference list for the feature. In fact, it just shouldn't
- * happen---if we understand a feature, we should have a preference list
- * with at least the default value.
- */
- BUG_ON(rc == 1);
+static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry)
+{
+ if (entry != NULL) {
+ dccp_feat_val_destructor(entry->feat_num, &entry->val);
+ kfree(entry);
+ }
+}
- return rc;
+/*
+ * List management functions
+ *
+ * Feature negotiation lists rely on and maintain the following invariants:
+ * - each feat_num in the list is known, i.e. we know its type and default value
+ * - each feat_num/is_local combination is unique (old entries are overwritten)
+ * - SP values are always freshly allocated
+ * - list is sorted in increasing order of feature number (faster lookup)
+ */
+static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list,
+ u8 feat_num, bool is_local)
+{
+ struct dccp_feat_entry *entry;
+
+ list_for_each_entry(entry, fn_list, node) {
+ if (entry->feat_num == feat_num && entry->is_local == is_local)
+ return entry;
+ else if (entry->feat_num > feat_num)
+ break;
+ }
+ return NULL;
}
-static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
+/**
+ * dccp_feat_entry_new - Central list update routine (called by all others)
+ * @head: list to add to
+ * @feat: feature number
+ * @local: whether the local (1) or remote feature with number @feat is meant
+ * This is the only constructor and serves to ensure the above invariants.
+ */
+static struct dccp_feat_entry *
+ dccp_feat_entry_new(struct list_head *head, u8 feat, bool local)
{
- struct dccp_opt_pend *opt;
- struct dccp_minisock *dmsk = dccp_msk(sk);
- u8 *copy;
- int rc;
+ struct dccp_feat_entry *entry;
+
+ list_for_each_entry(entry, head, node)
+ if (entry->feat_num == feat && entry->is_local == local) {
+ dccp_feat_val_destructor(entry->feat_num, &entry->val);
+ return entry;
+ } else if (entry->feat_num > feat) {
+ head = &entry->node;
+ break;
+ }
- /* NN features must be Change L (sec. 6.3.2) */
- if (type != DCCPO_CHANGE_L) {
- dccp_pr_debug("received %s for NN feature %d\n",
- dccp_feat_typename(type), feature);
- return -EFAULT;
+ entry = kmalloc(sizeof(*entry), gfp_any());
+ if (entry != NULL) {
+ entry->feat_num = feat;
+ entry->is_local = local;
+ list_add_tail(&entry->node, head);
}
+ return entry;
+}
- /* XXX sanity check opt val */
+/**
+ * dccp_feat_push_change - Add/overwrite a Change option in the list
+ * @fn_list: feature-negotiation list to update
+ * @feat: one of %dccp_feature_numbers
+ * @local: whether local (1) or remote (0) @feat_num is meant
+ * @needs_mandatory: whether to use Mandatory feature negotiation options
+ * @fval: pointer to NN/SP value to be inserted (will be copied)
+ */
+static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local,
+ u8 mandatory, dccp_feat_val *fval)
+{
+ struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
- /* copy option so we can confirm it */
- opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
- if (opt == NULL)
+ if (new == NULL)
return -ENOMEM;
- copy = kmemdup(val, len, GFP_ATOMIC);
- if (copy == NULL) {
- kfree(opt);
- return -ENOMEM;
- }
+ new->feat_num = feat;
+ new->is_local = local;
+ new->state = FEAT_INITIALISING;
+ new->needs_confirm = 0;
+ new->empty_confirm = 0;
+ new->val = *fval;
+ new->needs_mandatory = mandatory;
- opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */
- opt->dccpop_feat = feature;
- opt->dccpop_val = copy;
- opt->dccpop_len = len;
+ return 0;
+}
- /* change feature */
- rc = dccp_feat_update(sk, type, feature, *val);
- if (rc) {
- kfree(opt->dccpop_val);
- kfree(opt);
- return rc;
- }
+/**
+ * dccp_feat_push_confirm - Add a Confirm entry to the FN list
+ * @fn_list: feature-negotiation list to add to
+ * @feat: one of %dccp_feature_numbers
+ * @local: whether local (1) or remote (0) @feat_num is being confirmed
+ * @fval: pointer to NN/SP value to be inserted or NULL
+ * Returns 0 on success, a Reset code for further processing otherwise.
+ */
+static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local,
+ dccp_feat_val *fval)
+{
+ struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
- dccp_feat_debug(type, feature, *copy);
+ if (new == NULL)
+ return DCCP_RESET_CODE_TOO_BUSY;
- list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
+ new->feat_num = feat;
+ new->is_local = local;
+ new->state = FEAT_STABLE; /* transition in 6.6.2 */
+ new->needs_confirm = 1;
+ new->empty_confirm = (fval == NULL);
+ new->val.nn = 0; /* zeroes the whole structure */
+ if (!new->empty_confirm)
+ new->val = *fval;
+ new->needs_mandatory = 0;
return 0;
}
-static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
- u8 type, u8 feature)
+static int dccp_push_empty_confirm(struct list_head *fn_list, u8 feat, u8 local)
{
- /* XXX check if other confirms for that are queued and recycle slot */
- struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
+ return dccp_feat_push_confirm(fn_list, feat, local, NULL);
+}
- if (opt == NULL) {
- /* XXX what do we do? Ignoring should be fine. It's a change
- * after all =P
- */
- return;
- }
+static inline void dccp_feat_list_pop(struct dccp_feat_entry *entry)
+{
+ list_del(&entry->node);
+ dccp_feat_entry_destructor(entry);
+}
- switch (type) {
- case DCCPO_CHANGE_L:
- opt->dccpop_type = DCCPO_CONFIRM_R;
- break;
- case DCCPO_CHANGE_R:
- opt->dccpop_type = DCCPO_CONFIRM_L;
- break;
- default:
- DCCP_WARN("invalid type %d\n", type);
- kfree(opt);
- return;
+void dccp_feat_list_purge(struct list_head *fn_list)
+{
+ struct dccp_feat_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, fn_list, node)
+ dccp_feat_entry_destructor(entry);
+ INIT_LIST_HEAD(fn_list);
+}
+EXPORT_SYMBOL_GPL(dccp_feat_list_purge);
+
+/* generate @to as full clone of @from - @to must not contain any nodes */
+int dccp_feat_clone_list(struct list_head const *from, struct list_head *to)
+{
+ struct dccp_feat_entry *entry, *new;
+
+ INIT_LIST_HEAD(to);
+ list_for_each_entry(entry, from, node) {
+ new = dccp_feat_clone_entry(entry);
+ if (new == NULL)
+ goto cloning_failed;
+ list_add_tail(&new->node, to);
}
- opt->dccpop_feat = feature;
- opt->dccpop_val = NULL;
- opt->dccpop_len = 0;
+ return 0;
+
+cloning_failed:
+ dccp_feat_list_purge(to);
+ return -ENOMEM;
+}
- /* change feature */
- dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature);
+/**
+ * dccp_feat_valid_nn_length - Enforce length constraints on NN options
+ * Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only,
+ * incoming options are accepted as long as their values are valid.
+ */
+static u8 dccp_feat_valid_nn_length(u8 feat_num)
+{
+ if (feat_num == DCCPF_ACK_RATIO) /* RFC 4340, 11.3 and 6.6.8 */
+ return 2;
+ if (feat_num == DCCPF_SEQUENCE_WINDOW) /* RFC 4340, 7.5.2 and 6.5 */
+ return 6;
+ return 0;
+}
- list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
+static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val)
+{
+ switch (feat_num) {
+ case DCCPF_ACK_RATIO:
+ return val <= DCCPF_ACK_RATIO_MAX;
+ case DCCPF_SEQUENCE_WINDOW:
+ return val >= DCCPF_SEQ_WMIN && val <= DCCPF_SEQ_WMAX;
+ }
+ return 0; /* feature unknown - so we can't tell */
}
-static void dccp_feat_flush_confirm(struct sock *sk)
+/* check that SP values are within the ranges defined in RFC 4340 */
+static u8 dccp_feat_is_valid_sp_val(u8 feat_num, u8 val)
{
- struct dccp_minisock *dmsk = dccp_msk(sk);
- /* Check if there is anything to confirm in the first place */
- int yes = !list_empty(&dmsk->dccpms_conf);
+ switch (feat_num) {
+ case DCCPF_CCID:
+ return val == DCCPC_CCID2 || val == DCCPC_CCID3;
+ /* Type-check Boolean feature values: */
+ case DCCPF_SHORT_SEQNOS:
+ case DCCPF_ECN_INCAPABLE:
+ case DCCPF_SEND_ACK_VECTOR:
+ case DCCPF_SEND_NDP_COUNT:
+ case DCCPF_DATA_CHECKSUM:
+ case DCCPF_SEND_LEV_RATE:
+ return val < 2;
+ case DCCPF_MIN_CSUM_COVER:
+ return val < 16;
+ }
+ return 0; /* feature unknown */
+}
- if (!yes) {
- struct dccp_opt_pend *opt;
+static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len)
+{
+ if (sp_list == NULL || sp_len < 1)
+ return 0;
+ while (sp_len--)
+ if (!dccp_feat_is_valid_sp_val(feat_num, *sp_list++))
+ return 0;
+ return 1;
+}
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- if (opt->dccpop_conf) {
- yes = 1;
- break;
+/**
+ * dccp_feat_insert_opts - Generate FN options from current list state
+ * @skb: next sk_buff to be sent to the peer
+ * @dp: for client during handshake and general negotiation
+ * @dreq: used by the server only (all Changes/Confirms in LISTEN/RESPOND)
+ */
+int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
+ struct sk_buff *skb)
+{
+ struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
+ struct dccp_feat_entry *pos, *next;
+ u8 opt, type, len, *ptr, nn_in_nbo[DCCP_OPTVAL_MAXLEN];
+ bool rpt;
+
+ /* put entries into @skb in the order they appear in the list */
+ list_for_each_entry_safe_reverse(pos, next, fn, node) {
+ opt = dccp_feat_genopt(pos);
+ type = dccp_feat_type(pos->feat_num);
+ rpt = false;
+
+ if (pos->empty_confirm) {
+ len = 0;
+ ptr = NULL;
+ } else {
+ if (type == FEAT_SP) {
+ len = pos->val.sp.len;
+ ptr = pos->val.sp.vec;
+ rpt = pos->needs_confirm;
+ } else if (type == FEAT_NN) {
+ len = dccp_feat_valid_nn_length(pos->feat_num);
+ ptr = nn_in_nbo;
+ dccp_encode_value_var(pos->val.nn, ptr, len);
+ } else {
+ DCCP_BUG("unknown feature %u", pos->feat_num);
+ return -1;
}
}
+
+ if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt))
+ return -1;
+ if (pos->needs_mandatory && dccp_insert_option_mandatory(skb))
+ return -1;
+ /*
+ * Enter CHANGING after transmitting the Change option (6.6.2).
+ */
+ if (pos->state == FEAT_INITIALISING)
+ pos->state = FEAT_CHANGING;
}
+ return 0;
+}
- if (!yes)
- return;
+/**
+ * __feat_register_nn - Register new NN value on socket
+ * @fn: feature-negotiation list to register with
+ * @feat: an NN feature from %dccp_feature_numbers
+ * @mandatory: use Mandatory option if 1
+ * @nn_val: value to register (restricted to 4 bytes)
+ * Note that NN features are local by definition (RFC 4340, 6.3.2).
+ */
+static int __feat_register_nn(struct list_head *fn, u8 feat,
+ u8 mandatory, u64 nn_val)
+{
+ dccp_feat_val fval = { .nn = nn_val };
- /* OK there is something to confirm... */
- /* XXX check if packet is in flight? Send delayed ack?? */
- if (sk->sk_state == DCCP_OPEN)
- dccp_send_ack(sk);
+ if (dccp_feat_type(feat) != FEAT_NN ||
+ !dccp_feat_is_valid_nn_val(feat, nn_val))
+ return -EINVAL;
+
+ /* Don't bother with default values, they will be activated anyway. */
+ if (nn_val - (u64)dccp_feat_default_value(feat) == 0)
+ return 0;
+
+ return dccp_feat_push_change(fn, feat, 1, mandatory, &fval);
}
-int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
+/**
+ * __feat_register_sp - Register new SP value/list on socket
+ * @fn: feature-negotiation list to register with
+ * @feat: an SP feature from %dccp_feature_numbers
+ * @is_local: whether the local (1) or the remote (0) @feat is meant
+ * @mandatory: use Mandatory option if 1
+ * @sp_val: SP value followed by optional preference list
+ * @sp_len: length of @sp_val in bytes
+ */
+static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
+ u8 mandatory, u8 const *sp_val, u8 sp_len)
{
- int rc;
+ dccp_feat_val fval;
- dccp_feat_debug(type, feature, *val);
+ if (dccp_feat_type(feat) != FEAT_SP ||
+ !dccp_feat_sp_list_ok(feat, sp_val, sp_len))
+ return -EINVAL;
- /* figure out if it's SP or NN feature */
- switch (feature) {
- /* deal with SP features */
- case DCCPF_CCID:
- rc = dccp_feat_sp(sk, type, feature, val, len);
- break;
+ /* Avoid negotiating alien CCIDs by only advertising supported ones */
+ if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len))
+ return -EOPNOTSUPP;
- /* deal with NN features */
- case DCCPF_ACK_RATIO:
- rc = dccp_feat_nn(sk, type, feature, val, len);
- break;
+ if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
+ return -ENOMEM;
- /* XXX implement other features */
- default:
- dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n",
- dccp_feat_typename(type), feature);
- rc = -EFAULT;
- break;
- }
+ return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
+}
- /* check if there were problems changing features */
- if (rc) {
- /* If we don't agree on SP, we sent a confirm for old value.
- * However we propagate rc to caller in case option was
- * mandatory
+/**
+ * dccp_feat_register_sp - Register requests to change SP feature values
+ * @sk: client or listening socket
+ * @feat: one of %dccp_feature_numbers
+ * @is_local: whether the local (1) or remote (0) @feat is meant
+ * @list: array of preferred values, in descending order of preference
+ * @len: length of @list in bytes
+ */
+int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+ u8 const *list, u8 len)
+{ /* any changes must be registered before establishing the connection */
+ if (sk->sk_state != DCCP_CLOSED)
+ return -EISCONN;
+ if (dccp_feat_type(feat) != FEAT_SP)
+ return -EINVAL;
+ return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local,
+ 0, list, len);
+}
+
+/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */
+int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val)
+{
+ /* any changes must be registered before establishing the connection */
+ if (sk->sk_state != DCCP_CLOSED)
+ return -EISCONN;
+ if (dccp_feat_type(feat) != FEAT_NN)
+ return -EINVAL;
+ return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val);
+}
+
+/*
+ * Tracking features whose value depend on the choice of CCID
+ *
+ * This is designed with an extension in mind so that a list walk could be done
+ * before activating any features. However, the existing framework was found to
+ * work satisfactorily up until now, the automatic verification is left open.
+ * When adding new CCIDs, add a corresponding dependency table here.
+ */
+static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local)
+{
+ static const struct ccid_dependency ccid2_dependencies[2][2] = {
+ /*
+ * CCID2 mandates Ack Vectors (RFC 4341, 4.): as CCID is a TX
+ * feature and Send Ack Vector is an RX feature, `is_local'
+ * needs to be reversed.
*/
- if (rc != DCCP_FEAT_SP_NOAGREE)
- dccp_feat_empty_confirm(dccp_msk(sk), type, feature);
+ { /* Dependencies of the receiver-side (remote) CCID2 */
+ {
+ .dependent_feat = DCCPF_SEND_ACK_VECTOR,
+ .is_local = true,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { 0, 0, 0, 0 }
+ },
+ { /* Dependencies of the sender-side (local) CCID2 */
+ {
+ .dependent_feat = DCCPF_SEND_ACK_VECTOR,
+ .is_local = false,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { 0, 0, 0, 0 }
+ }
+ };
+ static const struct ccid_dependency ccid3_dependencies[2][5] = {
+ { /*
+ * Dependencies of the receiver-side CCID3
+ */
+ { /* locally disable Ack Vectors */
+ .dependent_feat = DCCPF_SEND_ACK_VECTOR,
+ .is_local = true,
+ .is_mandatory = false,
+ .val = 0
+ },
+ { /* see below why Send Loss Event Rate is on */
+ .dependent_feat = DCCPF_SEND_LEV_RATE,
+ .is_local = true,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { /* NDP Count is needed as per RFC 4342, 6.1.1 */
+ .dependent_feat = DCCPF_SEND_NDP_COUNT,
+ .is_local = false,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { 0, 0, 0, 0 },
+ },
+ { /*
+ * CCID3 at the TX side: we request that the HC-receiver
+ * will not send Ack Vectors (they will be ignored, so
+ * Mandatory is not set); we enable Send Loss Event Rate
+ * (Mandatory since the implementation does not support
+ * the Loss Intervals option of RFC 4342, 8.6).
+ * The last two options are for peer's information only.
+ */
+ {
+ .dependent_feat = DCCPF_SEND_ACK_VECTOR,
+ .is_local = false,
+ .is_mandatory = false,
+ .val = 0
+ },
+ {
+ .dependent_feat = DCCPF_SEND_LEV_RATE,
+ .is_local = false,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { /* this CCID does not support Ack Ratio */
+ .dependent_feat = DCCPF_ACK_RATIO,
+ .is_local = true,
+ .is_mandatory = false,
+ .val = 0
+ },
+ { /* tell receiver we are sending NDP counts */
+ .dependent_feat = DCCPF_SEND_NDP_COUNT,
+ .is_local = true,
+ .is_mandatory = false,
+ .val = 1
+ },
+ { 0, 0, 0, 0 }
+ }
+ };
+ switch (ccid) {
+ case DCCPC_CCID2:
+ return ccid2_dependencies[is_local];
+ case DCCPC_CCID3:
+ return ccid3_dependencies[is_local];
+ default:
+ return NULL;
}
+}
- /* generate the confirm [if required] */
- dccp_feat_flush_confirm(sk);
-
+/**
+ * dccp_feat_propagate_ccid - Resolve dependencies of features on choice of CCID
+ * @fn: feature-negotiation list to update
+ * @id: CCID number to track
+ * @is_local: whether TX CCID (1) or RX CCID (0) is meant
+ * This function needs to be called after registering all other features.
+ */
+static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local)
+{
+ const struct ccid_dependency *table = dccp_feat_ccid_deps(id, is_local);
+ int i, rc = (table == NULL);
+
+ for (i = 0; rc == 0 && table[i].dependent_feat != DCCPF_RESERVED; i++)
+ if (dccp_feat_type(table[i].dependent_feat) == FEAT_SP)
+ rc = __feat_register_sp(fn, table[i].dependent_feat,
+ table[i].is_local,
+ table[i].is_mandatory,
+ &table[i].val, 1);
+ else
+ rc = __feat_register_nn(fn, table[i].dependent_feat,
+ table[i].is_mandatory,
+ table[i].val);
return rc;
}
-EXPORT_SYMBOL_GPL(dccp_feat_change_recv);
+/**
+ * dccp_feat_finalise_settings - Finalise settings before starting negotiation
+ * @dp: client or listening socket (settings will be inherited)
+ * This is called after all registrations (socket initialisation, sysctls, and
+ * sockopt calls), and before sending the first packet containing Change options
+ * (ie. client-Request or server-Response), to ensure internal consistency.
+ */
+int dccp_feat_finalise_settings(struct dccp_sock *dp)
+{
+ struct list_head *fn = &dp->dccps_featneg;
+ struct dccp_feat_entry *entry;
+ int i = 2, ccids[2] = { -1, -1 };
-int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
- u8 *val, u8 len)
+ /*
+ * Propagating CCIDs:
+ * 1) not useful to propagate CCID settings if this host advertises more
+ * than one CCID: the choice of CCID may still change - if this is
+ * the client, or if this is the server and the client sends
+ * singleton CCID values.
+ * 2) since is that propagate_ccid changes the list, we defer changing
+ * the sorted list until after the traversal.
+ */
+ list_for_each_entry(entry, fn, node)
+ if (entry->feat_num == DCCPF_CCID && entry->val.sp.len == 1)
+ ccids[entry->is_local] = entry->val.sp.vec[0];
+ while (i--)
+ if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i))
+ return -1;
+ return 0;
+}
+
+/**
+ * dccp_feat_server_ccid_dependencies - Resolve CCID-dependent features
+ * It is the server which resolves the dependencies once the CCID has been
+ * fully negotiated. If no CCID has been negotiated, it uses the default CCID.
+ */
+int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq)
{
- u8 t;
- struct dccp_opt_pend *opt;
- struct dccp_minisock *dmsk = dccp_msk(sk);
- int found = 0;
- int all_confirmed = 1;
+ struct list_head *fn = &dreq->dreq_featneg;
+ struct dccp_feat_entry *entry;
+ u8 is_local, ccid;
- dccp_feat_debug(type, feature, *val);
+ for (is_local = 0; is_local <= 1; is_local++) {
+ entry = dccp_feat_list_lookup(fn, DCCPF_CCID, is_local);
- /* locate our change request */
- switch (type) {
- case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break;
- case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break;
- default: DCCP_WARN("invalid type %d\n", type);
- return 1;
+ if (entry != NULL && !entry->empty_confirm)
+ ccid = entry->val.sp.vec[0];
+ else
+ ccid = dccp_feat_default_value(DCCPF_CCID);
+ if (dccp_feat_propagate_ccid(fn, ccid, is_local))
+ return -1;
}
- /* XXX sanity check feature value */
+ return 0;
+}
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- if (!opt->dccpop_conf && opt->dccpop_type == t &&
- opt->dccpop_feat == feature) {
- found = 1;
- dccp_pr_debug("feature %d found\n", opt->dccpop_feat);
+/* Select the first entry in @servlist that also occurs in @clilist (6.3.1) */
+static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen)
+{
+ u8 c, s;
- /* XXX do sanity check */
+ for (s = 0; s < slen; s++)
+ for (c = 0; c < clen; c++)
+ if (servlist[s] == clilist[c])
+ return servlist[s];
+ return -1;
+}
- opt->dccpop_conf = 1;
+/**
+ * dccp_feat_prefer - Move preferred entry to the start of array
+ * Reorder the @array_len elements in @array so that @preferred_value comes
+ * first. Returns >0 to indicate that @preferred_value does occur in @array.
+ */
+static u8 dccp_feat_prefer(u8 preferred_value, u8 *array, u8 array_len)
+{
+ u8 i, does_occur = 0;
- /* We got a confirmation---change the option */
- dccp_feat_update(sk, opt->dccpop_type,
- opt->dccpop_feat, *val);
+ if (array != NULL) {
+ for (i = 0; i < array_len; i++)
+ if (array[i] == preferred_value) {
+ array[i] = array[0];
+ does_occur++;
+ }
+ if (does_occur)
+ array[0] = preferred_value;
+ }
+ return does_occur;
+}
- /* XXX check the return value of dccp_feat_update */
- break;
- }
+/**
+ * dccp_feat_reconcile - Reconcile SP preference lists
+ * @fval: SP list to reconcile into
+ * @arr: received SP preference list
+ * @len: length of @arr in bytes
+ * @is_server: whether this side is the server (and @fv is the server's list)
+ * @reorder: whether to reorder the list in @fv after reconciling with @arr
+ * When successful, > 0 is returned and the reconciled list is in @fval.
+ * A value of 0 means that negotiation failed (no shared entry).
+ */
+static int dccp_feat_reconcile(dccp_feat_val *fv, u8 *arr, u8 len,
+ bool is_server, bool reorder)
+{
+ int rc;
- if (!opt->dccpop_conf)
- all_confirmed = 0;
+ if (!fv->sp.vec || !arr) {
+ DCCP_CRIT("NULL feature value or array");
+ return 0;
}
- /* fix re-transmit timer */
- /* XXX gotta make sure that no option negotiation occurs during
- * connection shutdown. Consider that the CLOSEREQ is sent and timer is
- * on. if all options are confirmed it might kill timer which should
- * remain alive until close is received.
- */
- if (all_confirmed) {
- dccp_pr_debug("clear feat negotiation timer %p\n", sk);
- inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
- }
+ if (is_server)
+ rc = dccp_feat_preflist_match(fv->sp.vec, fv->sp.len, arr, len);
+ else
+ rc = dccp_feat_preflist_match(arr, len, fv->sp.vec, fv->sp.len);
- if (!found)
- dccp_pr_debug("%s(%d, ...) never requested\n",
- dccp_feat_typename(type), feature);
- return 0;
-}
+ if (!reorder)
+ return rc;
+ if (rc < 0)
+ return 0;
-EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv);
+ /*
+ * Reorder list: used for activating features and in dccp_insert_fn_opt.
+ */
+ return dccp_feat_prefer(rc, fv->sp.vec, fv->sp.len);
+}
-void dccp_feat_clean(struct dccp_minisock *dmsk)
+/**
+ * dccp_feat_change_recv - Process incoming ChangeL/R options
+ * @fn: feature-negotiation list to update
+ * @is_mandatory: whether the Change was preceded by a Mandatory option
+ * @opt: %DCCPO_CHANGE_L or %DCCPO_CHANGE_R
+ * @feat: one of %dccp_feature_numbers
+ * @val: NN value or SP value/preference list
+ * @len: length of @val in bytes
+ * @server: whether this node is the server (1) or the client (0)
+ */
+static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
+ u8 feat, u8 *val, u8 len, const bool server)
{
- struct dccp_opt_pend *opt, *next;
+ u8 defval, type = dccp_feat_type(feat);
+ const bool local = (opt == DCCPO_CHANGE_R);
+ struct dccp_feat_entry *entry;
+ dccp_feat_val fval;
- list_for_each_entry_safe(opt, next, &dmsk->dccpms_pending,
- dccpop_node) {
- BUG_ON(opt->dccpop_val == NULL);
- kfree(opt->dccpop_val);
+ if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */
+ goto unknown_feature_or_value;
+
+ /*
+ * Negotiation of NN features: Change R is invalid, so there is no
+ * simultaneous negotiation; hence we do not look up in the list.
+ */
+ if (type == FEAT_NN) {
+ if (local || len > sizeof(fval.nn))
+ goto unknown_feature_or_value;
+
+ /* 6.3.2: "The feature remote MUST accept any valid value..." */
+ fval.nn = dccp_decode_value_var(val, len);
+ if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
+ goto unknown_feature_or_value;
+
+ return dccp_feat_push_confirm(fn, feat, local, &fval);
+ }
+
+ /*
+ * Unidirectional/simultaneous negotiation of SP features (6.3.1)
+ */
+ entry = dccp_feat_list_lookup(fn, feat, local);
+ if (entry == NULL) {
+ /*
+ * No particular preferences have been registered. We deal with
+ * this situation by assuming that all valid values are equally
+ * acceptable, and apply the following checks:
+ * - if the peer's list is a singleton, we accept a valid value;
+ * - if we are the server, we first try to see if the peer (the
+ * client) advertises the default value. If yes, we use it,
+ * otherwise we accept the preferred value;
+ * - else if we are the client, we use the first list element.
+ */
+ if (dccp_feat_clone_sp_val(&fval, val, 1))
+ return DCCP_RESET_CODE_TOO_BUSY;
+
+ if (len > 1 && server) {
+ defval = dccp_feat_default_value(feat);
+ if (dccp_feat_preflist_match(&defval, 1, val, len) > -1)
+ fval.sp.vec[0] = defval;
+ } else if (!dccp_feat_is_valid_sp_val(feat, fval.sp.vec[0])) {
+ kfree(fval.sp.vec);
+ goto unknown_feature_or_value;
+ }
- if (opt->dccpop_sc != NULL) {
- BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
- kfree(opt->dccpop_sc->dccpoc_val);
- kfree(opt->dccpop_sc);
+ /* Treat unsupported CCIDs like invalid values */
+ if (feat == DCCPF_CCID && !ccid_support_check(fval.sp.vec, 1)) {
+ kfree(fval.sp.vec);
+ goto not_valid_or_not_known;
}
- kfree(opt);
+ return dccp_feat_push_confirm(fn, feat, local, &fval);
+
+ } else if (entry->state == FEAT_UNSTABLE) { /* 6.6.2 */
+ return 0;
}
- INIT_LIST_HEAD(&dmsk->dccpms_pending);
- list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
- BUG_ON(opt == NULL);
- if (opt->dccpop_val != NULL)
- kfree(opt->dccpop_val);
- kfree(opt);
+ if (dccp_feat_reconcile(&entry->val, val, len, server, true)) {
+ entry->empty_confirm = 0;
+ } else if (is_mandatory) {
+ return DCCP_RESET_CODE_MANDATORY_ERROR;
+ } else if (entry->state == FEAT_INITIALISING) {
+ /*
+ * Failed simultaneous negotiation (server only): try to `save'
+ * the connection by checking whether entry contains the default
+ * value for @feat. If yes, send an empty Confirm to signal that
+ * the received Change was not understood - which implies using
+ * the default value.
+ * If this also fails, we use Reset as the last resort.
+ */
+ WARN_ON(!server);
+ defval = dccp_feat_default_value(feat);
+ if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true))
+ return DCCP_RESET_CODE_OPTION_ERROR;
+ entry->empty_confirm = 1;
}
- INIT_LIST_HEAD(&dmsk->dccpms_conf);
-}
+ entry->needs_confirm = 1;
+ entry->needs_mandatory = 0;
+ entry->state = FEAT_STABLE;
+ return 0;
-EXPORT_SYMBOL_GPL(dccp_feat_clean);
+unknown_feature_or_value:
+ if (!is_mandatory)
+ return dccp_push_empty_confirm(fn, feat, local);
-/* this is to be called only when a listening sock creates its child. It is
- * assumed by the function---the confirm is not duplicated, but rather it is
- * "passed on".
+not_valid_or_not_known:
+ return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
+ : DCCP_RESET_CODE_OPTION_ERROR;
+}
+
+/**
+ * dccp_feat_confirm_recv - Process received Confirm options
+ * @fn: feature-negotiation list to update
+ * @is_mandatory: whether @opt was preceded by a Mandatory option
+ * @opt: %DCCPO_CONFIRM_L or %DCCPO_CONFIRM_R
+ * @feat: one of %dccp_feature_numbers
+ * @val: NN value or SP value/preference list
+ * @len: length of @val in bytes
+ * @server: whether this node is server (1) or client (0)
*/
-int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
+static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
+ u8 feat, u8 *val, u8 len, const bool server)
{
- struct dccp_minisock *olddmsk = dccp_msk(oldsk);
- struct dccp_minisock *newdmsk = dccp_msk(newsk);
- struct dccp_opt_pend *opt;
- int rc = 0;
+ u8 *plist, plen, type = dccp_feat_type(feat);
+ const bool local = (opt == DCCPO_CONFIRM_R);
+ struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local);
- INIT_LIST_HEAD(&newdmsk->dccpms_pending);
- INIT_LIST_HEAD(&newdmsk->dccpms_conf);
+ if (entry == NULL) { /* nothing queued: ignore or handle error */
+ if (is_mandatory && type == FEAT_UNKNOWN)
+ return DCCP_RESET_CODE_MANDATORY_ERROR;
+
+ if (!local && type == FEAT_NN) /* 6.3.2 */
+ goto confirmation_failed;
+ return 0;
+ }
- list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) {
- struct dccp_opt_pend *newopt;
- /* copy the value of the option */
- u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC);
+ if (entry->state != FEAT_CHANGING) /* 6.6.2 */
+ return 0;
- if (val == NULL)
- goto out_clean;
+ if (len == 0) {
+ if (dccp_feat_must_be_understood(feat)) /* 6.6.7 */
+ goto confirmation_failed;
+ /*
+ * Empty Confirm during connection setup: this means reverting
+ * to the `old' value, which in this case is the default. Since
+ * we handle default values automatically when no other values
+ * have been set, we revert to the old value by removing this
+ * entry from the list.
+ */
+ dccp_feat_list_pop(entry);
+ return 0;
+ }
- newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC);
- if (newopt == NULL) {
- kfree(val);
- goto out_clean;
- }
+ if (type == FEAT_NN) {
+ if (len > sizeof(entry->val.nn))
+ goto confirmation_failed;
- /* insert the option */
- newopt->dccpop_val = val;
- list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending);
+ if (entry->val.nn == dccp_decode_value_var(val, len))
+ goto confirmation_succeeded;
- /* XXX what happens with backlogs and multiple connections at
- * once...
- */
- /* the master socket no longer needs to worry about confirms */
- opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */
+ DCCP_WARN("Bogus Confirm for non-existing value\n");
+ goto confirmation_failed;
+ }
- /* reset state for a new socket */
- opt->dccpop_conf = 0;
+ /*
+ * Parsing SP Confirms: the first element of @val is the preferred
+ * SP value which the peer confirms, the remainder depends on @len.
+ * Note that only the confirmed value need to be a valid SP value.
+ */
+ if (!dccp_feat_is_valid_sp_val(feat, *val))
+ goto confirmation_failed;
+
+ if (len == 1) { /* peer didn't supply a preference list */
+ plist = val;
+ plen = len;
+ } else { /* preferred value + preference list */
+ plist = val + 1;
+ plen = len - 1;
}
- /* XXX not doing anything about the conf queue */
+ /* Check whether the peer got the reconciliation right (6.6.8) */
+ if (dccp_feat_reconcile(&entry->val, plist, plen, server, 0) != *val) {
+ DCCP_WARN("Confirm selected the wrong value %u\n", *val);
+ return DCCP_RESET_CODE_OPTION_ERROR;
+ }
+ entry->val.sp.vec[0] = *val;
-out:
- return rc;
+confirmation_succeeded:
+ entry->state = FEAT_STABLE;
+ return 0;
-out_clean:
- dccp_feat_clean(newdmsk);
- rc = -ENOMEM;
- goto out;
+confirmation_failed:
+ DCCP_WARN("Confirmation failed\n");
+ return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
+ : DCCP_RESET_CODE_OPTION_ERROR;
}
-EXPORT_SYMBOL_GPL(dccp_feat_clone);
-
-static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat,
- u8 *val, u8 len)
+/**
+ * dccp_feat_parse_options - Process Feature-Negotiation Options
+ * @sk: for general use and used by the client during connection setup
+ * @dreq: used by the server during connection setup
+ * @mandatory: whether @opt was preceded by a Mandatory option
+ * @opt: %DCCPO_CHANGE_L | %DCCPO_CHANGE_R | %DCCPO_CONFIRM_L | %DCCPO_CONFIRM_R
+ * @feat: one of %dccp_feature_numbers
+ * @val: value contents of @opt
+ * @len: length of @val in bytes
+ * Returns 0 on success, a Reset code for ending the connection otherwise.
+ */
+int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
+ u8 mandatory, u8 opt, u8 feat, u8 *val, u8 len)
{
- int rc = -ENOMEM;
- u8 *copy = kmemdup(val, len, GFP_KERNEL);
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
+ bool server = false;
- if (copy != NULL) {
- rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL);
- if (rc)
- kfree(copy);
+ switch (sk->sk_state) {
+ /*
+ * Negotiation during connection setup
+ */
+ case DCCP_LISTEN:
+ server = true; /* fall through */
+ case DCCP_REQUESTING:
+ switch (opt) {
+ case DCCPO_CHANGE_L:
+ case DCCPO_CHANGE_R:
+ return dccp_feat_change_recv(fn, mandatory, opt, feat,
+ val, len, server);
+ case DCCPO_CONFIRM_R:
+ case DCCPO_CONFIRM_L:
+ return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
+ val, len, server);
+ }
}
- return rc;
+ return 0; /* ignore FN options in all other states */
}
-int dccp_feat_init(struct dccp_minisock *dmsk)
+int dccp_feat_init(struct sock *sk)
{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_minisock *dmsk = dccp_msk(sk);
int rc;
- INIT_LIST_HEAD(&dmsk->dccpms_pending);
- INIT_LIST_HEAD(&dmsk->dccpms_conf);
-
- /* CCID L */
- rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID,
- &dmsk->dccpms_tx_ccid, 1);
- if (rc)
- goto out;
-
- /* CCID R */
- rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID,
- &dmsk->dccpms_rx_ccid, 1);
- if (rc)
- goto out;
+ INIT_LIST_HEAD(&dmsk->dccpms_pending); /* XXX no longer used */
+ INIT_LIST_HEAD(&dmsk->dccpms_conf); /* XXX no longer used */
/* Ack ratio */
- rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO,
- &dmsk->dccpms_ack_ratio, 1);
-out:
+ rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0,
+ dp->dccps_l_ack_ratio);
return rc;
}
EXPORT_SYMBOL_GPL(dccp_feat_init);
+int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_feat_entry *cur, *next;
+ int idx;
+ dccp_feat_val *fvals[DCCP_FEAT_SUPPORTED_MAX][2] = {
+ [0 ... DCCP_FEAT_SUPPORTED_MAX-1] = { NULL, NULL }
+ };
+
+ list_for_each_entry(cur, fn_list, node) {
+ /*
+ * An empty Confirm means that either an unknown feature type
+ * or an invalid value was present. In the first case there is
+ * nothing to activate, in the other the default value is used.
+ */
+ if (cur->empty_confirm)
+ continue;
+
+ idx = dccp_feat_index(cur->feat_num);
+ if (idx < 0) {
+ DCCP_BUG("Unknown feature %u", cur->feat_num);
+ goto activation_failed;
+ }
+ if (cur->state != FEAT_STABLE) {
+ DCCP_CRIT("Negotiation of %s %u failed in state %u",
+ cur->is_local ? "local" : "remote",
+ cur->feat_num, cur->state);
+ goto activation_failed;
+ }
+ fvals[idx][cur->is_local] = &cur->val;
+ }
+
+ /*
+ * Activate in decreasing order of index, so that the CCIDs are always
+ * activated as the last feature. This avoids the case where a CCID
+ * relies on the initialisation of one or more features that it depends
+ * on (e.g. Send NDP Count, Send Ack Vector, and Ack Ratio features).
+ */
+ for (idx = DCCP_FEAT_SUPPORTED_MAX; --idx >= 0;)
+ if (__dccp_feat_activate(sk, idx, 0, fvals[idx][0]) ||
+ __dccp_feat_activate(sk, idx, 1, fvals[idx][1])) {
+ DCCP_CRIT("Could not activate %d", idx);
+ goto activation_failed;
+ }
+
+ /* Clean up Change options which have been confirmed already */
+ list_for_each_entry_safe(cur, next, fn_list, node)
+ if (!cur->needs_confirm)
+ dccp_feat_list_pop(cur);
+
+ dccp_pr_debug("Activation OK\n");
+ return 0;
+
+activation_failed:
+ /*
+ * We clean up everything that may have been allocated, since
+ * it is difficult to track at which stage negotiation failed.
+ * This is ok, since all allocation functions below are robust
+ * against NULL arguments.
+ */
+ ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+ dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+ dp->dccps_hc_rx_ackvec = NULL;
+ return -1;
+}
+
#ifdef CONFIG_IP_DCCP_DEBUG
const char *dccp_feat_typename(const u8 type)
{
@@ -639,6 +1233,8 @@ const char *dccp_feat_name(const u8 feat)
if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
return feature_names[DCCPF_RESERVED];
+ if (feat == DCCPF_SEND_LEV_RATE)
+ return "Send Loss Event Rate";
if (feat >= DCCPF_MIN_CCID_SPECIFIC)
return "CCID-specific";
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index e272222c7ace..9b46e2a7866e 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -3,17 +3,103 @@
/*
* net/dccp/feat.h
*
- * An implementation of the DCCP protocol
+ * Feature negotiation for the DCCP protocol (RFC 4340, section 6)
+ * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
* Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
*/
-
#include <linux/types.h>
#include "dccp.h"
+/*
+ * Known limit values
+ */
+/* Ack Ratio takes 2-byte integer values (11.3) */
+#define DCCPF_ACK_RATIO_MAX 0xFFFF
+/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */
+#define DCCPF_SEQ_WMIN 32
+#define DCCPF_SEQ_WMAX 0x3FFFFFFFFFFFull
+/* Maximum number of SP values that fit in a single (Confirm) option */
+#define DCCP_FEAT_MAX_SP_VALS (DCCP_SINGLE_OPT_MAXLEN - 2)
+
+enum dccp_feat_type {
+ FEAT_AT_RX = 1, /* located at RX side of half-connection */
+ FEAT_AT_TX = 2, /* located at TX side of half-connection */
+ FEAT_SP = 4, /* server-priority reconciliation (6.3.1) */
+ FEAT_NN = 8, /* non-negotiable reconciliation (6.3.2) */
+ FEAT_UNKNOWN = 0xFF /* not understood or invalid feature */
+};
+
+enum dccp_feat_state {
+ FEAT_DEFAULT = 0, /* using default values from 6.4 */
+ FEAT_INITIALISING, /* feature is being initialised */
+ FEAT_CHANGING, /* Change sent but not confirmed yet */
+ FEAT_UNSTABLE, /* local modification in state CHANGING */
+ FEAT_STABLE /* both ends (think they) agree */
+};
+
+/**
+ * dccp_feat_val - Container for SP or NN feature values
+ * @nn: single NN value
+ * @sp.vec: single SP value plus optional preference list
+ * @sp.len: length of @sp.vec in bytes
+ */
+typedef union {
+ u64 nn;
+ struct {
+ u8 *vec;
+ u8 len;
+ } sp;
+} dccp_feat_val;
+
+/**
+ * struct feat_entry - Data structure to perform feature negotiation
+ * @val: feature's current value (SP features may have preference list)
+ * @state: feature's current state
+ * @feat_num: one of %dccp_feature_numbers
+ * @needs_mandatory: whether Mandatory options should be sent
+ * @needs_confirm: whether to send a Confirm instead of a Change
+ * @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm)
+ * @is_local: feature location (1) or feature-remote (0)
+ * @node: list pointers, entries arranged in FIFO order
+ */
+struct dccp_feat_entry {
+ dccp_feat_val val;
+ enum dccp_feat_state state:8;
+ u8 feat_num;
+
+ bool needs_mandatory,
+ needs_confirm,
+ empty_confirm,
+ is_local;
+
+ struct list_head node;
+};
+
+static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry)
+{
+ if (entry->needs_confirm)
+ return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R;
+ return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R;
+}
+
+/**
+ * struct ccid_dependency - Track changes resulting from choosing a CCID
+ * @dependent_feat: one of %dccp_feature_numbers
+ * @is_local: local (1) or remote (0) @dependent_feat
+ * @is_mandatory: whether presence of @dependent_feat is mission-critical or not
+ * @val: corresponding default value for @dependent_feat (u8 is sufficient here)
+ */
+struct ccid_dependency {
+ u8 dependent_feat;
+ bool is_local:1,
+ is_mandatory:1;
+ u8 val;
+};
+
#ifdef CONFIG_IP_DCCP_DEBUG
extern const char *dccp_feat_typename(const u8 type);
extern const char *dccp_feat_name(const u8 feat);
@@ -27,14 +113,30 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
#define dccp_feat_debug(type, feat, val)
#endif /* CONFIG_IP_DCCP_DEBUG */
-extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
- u8 *val, u8 len, gfp_t gfp);
-extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature,
- u8 *val, u8 len);
-extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
- u8 *val, u8 len);
-extern void dccp_feat_clean(struct dccp_minisock *dmsk);
-extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
-extern int dccp_feat_init(struct dccp_minisock *dmsk);
+extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+ u8 const *list, u8 len);
+extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
+extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
+ u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
+extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
+extern int dccp_feat_init(struct sock *sk);
+
+/*
+ * Encoding variable-length options and their maximum length.
+ *
+ * This affects NN options (SP options are all u8) and other variable-length
+ * options (see table 3 in RFC 4340). The limit is currently given the Sequence
+ * Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other
+ * options consume less than 6 bytes (timestamps are 4 bytes).
+ * When updating this constant (e.g. due to new internet drafts / RFCs), make
+ * sure that you also update all code which refers to it.
+ */
+#define DCCP_OPTVAL_MAXLEN 6
+
+extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
+extern u64 dccp_decode_value_var(const u8 *bf, const u8 len);
+extern int dccp_insert_option_mandatory(struct sk_buff *skb);
+extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
+ u8 *val, u8 len, bool repeat_first);
#endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 779d0ed9ae94..5eb443f656c1 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -163,7 +163,7 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
- if (dccp_msk(sk)->dccpms_send_ack_vector)
+ if (dp->dccps_hc_rx_ackvec != NULL)
dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
@@ -375,7 +375,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
+ if (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
@@ -421,20 +421,19 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
goto out_invalid_packet;
}
+ /*
+ * If option processing (Step 8) failed, return 1 here so that
+ * dccp_v4_do_rcv() sends a Reset. The Reset code depends on
+ * the option type and is set in dccp_parse_options().
+ */
if (dccp_parse_options(sk, NULL, skb))
- goto out_invalid_packet;
+ return 1;
/* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
dp->dccps_options_received.dccpor_timestamp_echo));
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
- dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
- DCCP_SKB_CB(skb)->dccpd_seq,
- DCCP_ACKVEC_STATE_RECEIVED))
- goto out_invalid_packet; /* FIXME: change error code */
-
/* Stop the REQUEST timer */
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
WARN_ON(sk->sk_send_head == NULL);
@@ -475,6 +474,15 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
*/
dccp_set_state(sk, DCCP_PARTOPEN);
+ /*
+ * If feature negotiation was successful, activate features now;
+ * an activation failure means that this host could not activate
+ * one ore more features (e.g. insufficient memory), which would
+ * leave at least one feature in an undefined state.
+ */
+ if (dccp_feat_activate_values(sk, &dp->dccps_featneg))
+ goto unable_to_proceed;
+
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
@@ -509,6 +517,16 @@ out_invalid_packet:
/* dccp_v4_do_rcv will send a reset */
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
return 1;
+
+unable_to_proceed:
+ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED;
+ /*
+ * We mark this socket as no longer usable, so that the loop in
+ * dccp_sendmsg() terminates and the application gets notified.
+ */
+ dccp_set_state(sk, DCCP_CLOSED);
+ sk->sk_err = ECOMM;
+ return 1;
}
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -590,8 +608,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
skb) < 0)
return 1;
-
- /* FIXME: do congestion control initialization */
goto discard;
}
if (dh->dccph_type == DCCP_PKT_RESET)
@@ -602,7 +618,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
return 1;
}
- if (sk->sk_state != DCCP_REQUESTING) {
+ if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
if (dccp_check_seqno(sk, skb))
goto discard;
@@ -615,7 +631,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
+ if (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
@@ -667,8 +683,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
return 1;
case DCCP_REQUESTING:
- /* FIXME: do congestion control initialization */
-
queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
if (queued >= 0)
return queued;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e3dfddab21cc..d1dd95289b89 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -545,6 +545,7 @@ out:
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
+ dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
kfree(inet_rsk(req)->opt);
}
@@ -595,7 +596,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (req == NULL)
goto drop;
- dccp_reqsk_init(req, skb);
+ if (dccp_reqsk_init(req, dccp_sk(sk), skb))
+ goto drop_and_free;
dreq = dccp_rsk(req);
if (dccp_parse_options(sk, dreq, skb))
@@ -792,12 +794,10 @@ static int dccp_v4_rcv(struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh);
DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
- dccp_pr_debug("%8.8s "
- "src=%u.%u.%u.%u@%-5d "
- "dst=%u.%u.%u.%u@%-5d seq=%llu",
+ dccp_pr_debug("%8.8s src=%pI4@%-5d dst=%pI4@%-5d seq=%llu",
dccp_packet_name(dh->dccph_type),
- NIPQUAD(iph->saddr), ntohs(dh->dccph_sport),
- NIPQUAD(iph->daddr), ntohs(dh->dccph_dport),
+ &iph->saddr, ntohs(dh->dccph_sport),
+ &iph->daddr, ntohs(dh->dccph_dport),
(unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
if (dccp_packet_without_ack(skb)) {
@@ -938,6 +938,7 @@ static struct proto dccp_v4_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp_sock),
+ .slab_flags = SLAB_DESTROY_BY_RCU,
.rsk_prot = &dccp_request_sock_ops,
.twsk_prot = &dccp_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d4ce1224e008..b963f35c65f6 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -168,7 +168,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- err = xfrm_lookup(&dst, &fl, sk, 0);
+ err = xfrm_lookup(net, &dst, &fl, sk, 0);
if (err < 0) {
sk->sk_err_soft = -err;
goto out;
@@ -279,7 +279,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
if (final_p)
ipv6_addr_copy(&fl.fl6_dst, final_p);
- err = xfrm_lookup(&dst, &fl, sk, 0);
+ err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0);
if (err < 0)
goto done;
@@ -304,6 +304,7 @@ done:
static void dccp_v6_reqsk_destructor(struct request_sock *req)
{
+ dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
if (inet6_rsk(req)->pktopts != NULL)
kfree_skb(inet6_rsk(req)->pktopts);
}
@@ -342,7 +343,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
/* sk = NULL, but it is safe for now. RST socket required. */
if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) {
- if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
+ if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) {
ip6_xmit(ctl_sk, skb, &fl, NULL, 0);
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
@@ -426,7 +427,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (req == NULL)
goto drop;
- dccp_reqsk_init(req, skb);
+ if (dccp_reqsk_init(req, dccp_sk(sk), skb))
+ goto drop_and_free;
dreq = dccp_rsk(req);
if (dccp_parse_options(sk, dreq, skb))
@@ -567,7 +569,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
if (final_p)
ipv6_addr_copy(&fl.fl6_dst, final_p);
- if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+ if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
goto out;
}
@@ -1002,7 +1004,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (final_p)
ipv6_addr_copy(&fl.fl6_dst, final_p);
- err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT);
+ err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
if (err < 0) {
if (err == -EREMOTE)
err = ip6_dst_blackhole(sk, &dst, &fl);
@@ -1138,6 +1140,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
+ .slab_flags = SLAB_DESTROY_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index e6bf99e3e41a..6821ae33dd37 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -45,11 +45,6 @@ EXPORT_SYMBOL_GPL(dccp_death_row);
void dccp_minisock_init(struct dccp_minisock *dmsk)
{
dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
- dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid;
- dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid;
- dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio;
- dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
- dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count;
}
void dccp_time_wait(struct sock *sk, int state, int timeo)
@@ -112,7 +107,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
- const struct dccp_request_sock *dreq = dccp_rsk(req);
+ struct dccp_request_sock *dreq = dccp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(newsk);
struct dccp_sock *newdp = dccp_sk(newsk);
struct dccp_minisock *newdmsk = dccp_msk(newsk);
@@ -125,35 +120,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
- if (dccp_feat_clone(sk, newsk))
- goto out_free;
-
- if (newdmsk->dccpms_send_ack_vector) {
- newdp->dccps_hc_rx_ackvec =
- dccp_ackvec_alloc(GFP_ATOMIC);
- if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
- goto out_free;
- }
-
- newdp->dccps_hc_rx_ccid =
- ccid_hc_rx_new(newdmsk->dccpms_rx_ccid,
- newsk, GFP_ATOMIC);
- newdp->dccps_hc_tx_ccid =
- ccid_hc_tx_new(newdmsk->dccpms_tx_ccid,
- newsk, GFP_ATOMIC);
- if (unlikely(newdp->dccps_hc_rx_ccid == NULL ||
- newdp->dccps_hc_tx_ccid == NULL)) {
- dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
- ccid_hc_rx_delete(newdp->dccps_hc_rx_ccid, newsk);
- ccid_hc_tx_delete(newdp->dccps_hc_tx_ccid, newsk);
-out_free:
- /* It is still raw copy of parent, so invalidate
- * destructor and make plain sk_free() */
- newsk->sk_destruct = NULL;
- sk_free(newsk);
- return NULL;
- }
-
+ INIT_LIST_HEAD(&newdp->dccps_featneg);
/*
* Step 3: Process LISTEN state
*
@@ -184,6 +151,17 @@ out_free:
dccp_set_seqno(&newdp->dccps_awl,
max48(newdp->dccps_awl, newdp->dccps_iss));
+ /*
+ * Activate features after initialising the sequence numbers,
+ * since CCID initialisation may depend on GSS, ISR, ISS etc.
+ */
+ if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ newsk->sk_destruct = NULL;
+ sk_free(newsk);
+ return NULL;
+ }
dccp_init_xmit_timers(newsk);
DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
@@ -304,7 +282,8 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
-void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
+int dccp_reqsk_init(struct request_sock *req,
+ struct dccp_sock const *dp, struct sk_buff const *skb)
{
struct dccp_request_sock *dreq = dccp_rsk(req);
@@ -313,6 +292,9 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
inet_rsk(req)->acked = 0;
req->rcv_wnd = sysctl_dccp_feat_sequence_window;
dreq->dreq_timestamp_echo = 0;
+
+ /* inherit feature negotiation options from listening socket */
+ return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
}
EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 0809b63cb055..7b1165c21f51 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -26,20 +26,21 @@
int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID;
int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID;
-int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
-int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
-int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
-static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
+u64 dccp_decode_value_var(const u8 *bf, const u8 len)
{
- u32 value = 0;
+ u64 value = 0;
+ if (len >= DCCP_OPTVAL_MAXLEN)
+ value += ((u64)*bf++) << 40;
+ if (len > 4)
+ value += ((u64)*bf++) << 32;
if (len > 3)
- value += *bf++ << 24;
+ value += ((u64)*bf++) << 24;
if (len > 2)
- value += *bf++ << 16;
+ value += ((u64)*bf++) << 16;
if (len > 1)
- value += *bf++ << 8;
+ value += ((u64)*bf++) << 8;
if (len > 0)
value += *bf;
@@ -64,7 +65,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
(dh->dccph_doff * 4);
struct dccp_options_received *opt_recv = &dp->dccps_options_received;
unsigned char opt, len;
- unsigned char *value;
+ unsigned char *uninitialized_var(value);
u32 elapsed_time;
__be32 opt_val;
int rc;
@@ -131,41 +132,19 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk),
(unsigned long long)opt_recv->dccpor_ndp);
break;
- case DCCPO_CHANGE_L:
- /* fall through */
- case DCCPO_CHANGE_R:
- if (pkt_type == DCCP_PKT_DATA)
+ case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R:
+ if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */
break;
- if (len < 2)
- goto out_invalid_option;
- rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
- len - 1);
- /*
- * When there is a change error, change_recv is
- * responsible for dealing with it. i.e. reply with an
- * empty confirm.
- * If the change was mandatory, then we need to die.
- */
- if (rc && mandatory)
- goto out_invalid_option;
- break;
- case DCCPO_CONFIRM_L:
- /* fall through */
- case DCCPO_CONFIRM_R:
- if (pkt_type == DCCP_PKT_DATA)
- break;
- if (len < 2) /* FIXME this disallows empty confirm */
- goto out_invalid_option;
- if (dccp_feat_confirm_recv(sk, opt, *value,
- value + 1, len - 1))
- goto out_invalid_option;
+ rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
+ *value, value + 1, len - 1);
+ if (rc)
+ goto out_featneg_failed;
break;
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
break;
-
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
+ if (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
goto out_invalid_option;
break;
@@ -289,8 +268,10 @@ out_nonsensical_length:
out_invalid_option:
DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
- DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
- DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
+ rc = DCCP_RESET_CODE_OPTION_ERROR;
+out_featneg_failed:
+ DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
+ DCCP_SKB_CB(skb)->dccpd_reset_code = rc;
DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt;
DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0;
DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0;
@@ -299,9 +280,12 @@ out_invalid_option:
EXPORT_SYMBOL_GPL(dccp_parse_options);
-static void dccp_encode_value_var(const u32 value, unsigned char *to,
- const unsigned int len)
+void dccp_encode_value_var(const u64 value, u8 *to, const u8 len)
{
+ if (len >= DCCP_OPTVAL_MAXLEN)
+ *to++ = (value & 0xFF0000000000ull) >> 40;
+ if (len > 4)
+ *to++ = (value & 0xFF00000000ull) >> 32;
if (len > 3)
*to++ = (value & 0xFF000000) >> 24;
if (len > 2)
@@ -461,23 +445,61 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
return 0;
}
-static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
- u8 *val, u8 len)
+/**
+ * dccp_insert_option_mandatory - Mandatory option (5.8.2)
+ * Note that since we are using skb_push, this function needs to be called
+ * _after_ inserting the option it is supposed to influence (stack order).
+ */
+int dccp_insert_option_mandatory(struct sk_buff *skb)
+{
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN)
+ return -1;
+
+ DCCP_SKB_CB(skb)->dccpd_opt_len++;
+ *skb_push(skb, 1) = DCCPO_MANDATORY;
+ return 0;
+}
+
+/**
+ * dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb
+ * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R
+ * @feat: one out of %dccp_feature_numbers
+ * @val: NN value or SP array (preferred element first) to copy
+ * @len: true length of @val in bytes (excluding first element repetition)
+ * @repeat_first: whether to copy the first element of @val twice
+ * The last argument is used to construct Confirm options, where the preferred
+ * value and the preference list appear separately (RFC 4340, 6.3.1). Preference
+ * lists are kept such that the preferred entry is always first, so we only need
+ * to copy twice, and avoid the overhead of cloning into a bigger array.
+ */
+int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
+ u8 *val, u8 len, bool repeat_first)
{
- u8 *to;
+ u8 tot_len, *to;
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
- DCCP_WARN("packet too small for feature %d option!\n", feat);
+ /* take the `Feature' field and possible repetition into account */
+ if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) {
+ DCCP_WARN("length %u for feature %u too large\n", len, feat);
return -1;
}
- DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3;
+ if (unlikely(val == NULL || len == 0))
+ len = repeat_first = 0;
+ tot_len = 3 + repeat_first + len;
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) {
+ DCCP_WARN("packet too small for feature %d option!\n", feat);
+ return -1;
+ }
+ DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len;
- to = skb_push(skb, len + 3);
+ to = skb_push(skb, tot_len);
*to++ = type;
- *to++ = len + 3;
+ *to++ = tot_len;
*to++ = feat;
+ if (repeat_first)
+ *to++ = *val;
if (len)
memcpy(to, val, len);
@@ -487,69 +509,6 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
return 0;
}
-static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
- struct dccp_opt_pend *opt, *next;
- int change = 0;
-
- /* confirm any options [NN opts] */
- list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
- dccp_insert_feat_opt(skb, opt->dccpop_type,
- opt->dccpop_feat, opt->dccpop_val,
- opt->dccpop_len);
- /* fear empty confirms */
- if (opt->dccpop_val)
- kfree(opt->dccpop_val);
- kfree(opt);
- }
- INIT_LIST_HEAD(&dmsk->dccpms_conf);
-
- /* see which features we need to send */
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- /* see if we need to send any confirm */
- if (opt->dccpop_sc) {
- dccp_insert_feat_opt(skb, opt->dccpop_type + 1,
- opt->dccpop_feat,
- opt->dccpop_sc->dccpoc_val,
- opt->dccpop_sc->dccpoc_len);
-
- BUG_ON(!opt->dccpop_sc->dccpoc_val);
- kfree(opt->dccpop_sc->dccpoc_val);
- kfree(opt->dccpop_sc);
- opt->dccpop_sc = NULL;
- }
-
- /* any option not confirmed, re-send it */
- if (!opt->dccpop_conf) {
- dccp_insert_feat_opt(skb, opt->dccpop_type,
- opt->dccpop_feat, opt->dccpop_val,
- opt->dccpop_len);
- change++;
- }
- }
-
- /* Retransmit timer.
- * If this is the master listening sock, we don't set a timer on it. It
- * should be fine because if the dude doesn't receive our RESPONSE
- * [which will contain the CHANGE] he will send another REQUEST which
- * will "retrnasmit" the change.
- */
- if (change && dp->dccps_role != DCCP_ROLE_LISTEN) {
- dccp_pr_debug("reset feat negotiation timer %p\n", sk);
-
- /* XXX don't reset the timer on re-transmissions. I.e. reset it
- * only when sending new stuff i guess. Currently the timer
- * never backs off because on re-transmission it just resets it!
- */
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- inet_csk(sk)->icsk_rto, DCCP_RTO_MAX);
- }
-
- return 0;
-}
-
/* The length of all options needs to be a multiple of 4 (5.8) */
static void dccp_insert_option_padding(struct sk_buff *skb)
{
@@ -565,19 +524,31 @@ static void dccp_insert_option_padding(struct sk_buff *skb)
int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
- if (dmsk->dccpms_send_ndp_count &&
- dccp_insert_option_ndp(sk, skb))
+ if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb))
return -1;
- if (!dccp_packet_without_ack(skb)) {
- if (dmsk->dccpms_send_ack_vector &&
- dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
- dccp_insert_option_ackvec(sk, skb))
+ if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
+
+ /* Feature Negotiation */
+ if (dccp_feat_insert_opts(dp, NULL, skb))
return -1;
+
+ if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
+ /*
+ * Obtain RTT sample from Request/Response exchange.
+ * This is currently used in CCID 3 initialisation.
+ */
+ if (dccp_insert_option_timestamp(sk, skb))
+ return -1;
+
+ } else if (dp->dccps_hc_rx_ackvec != NULL &&
+ dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
+ dccp_insert_option_ackvec(sk, skb)) {
+ return -1;
+ }
}
if (dp->dccps_hc_rx_insert_options) {
@@ -586,21 +557,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
dp->dccps_hc_rx_insert_options = 0;
}
- /* Feature negotiation */
- /* Data packets can't do feat negotiation */
- if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA &&
- DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK &&
- dccp_insert_options_feat(sk, skb))
- return -1;
-
- /*
- * Obtain RTT sample from Request/Response exchange.
- * This is currently used in CCID 3 initialisation.
- */
- if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
- dccp_insert_option_timestamp(sk, skb))
- return -1;
-
if (dp->dccps_timestamp_echo != 0 &&
dccp_insert_option_timestamp_echo(dp, NULL, skb))
return -1;
@@ -613,6 +569,9 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
{
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
+ if (dccp_feat_insert_opts(NULL, dreq, skb))
+ return -1;
+
if (dreq->dreq_timestamp_echo != 0 &&
dccp_insert_option_timestamp_echo(NULL, dreq, skb))
return -1;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 809d803d5006..22a618af4893 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -175,7 +175,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
* make it a multiple of 4
*/
- cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
+ cur_mps -= roundup(5 + 6 + 10 + 6 + 6 + 6, 4);
/* And store cached results */
icsk->icsk_pmtu_cookie = pmtu;
@@ -339,10 +339,12 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
- if (dccp_insert_options_rsk(dreq, skb)) {
- kfree_skb(skb);
- return NULL;
- }
+ /* Resolve feature dependencies resulting from choice of CCID */
+ if (dccp_feat_server_ccid_dependencies(dreq))
+ goto response_failed;
+
+ if (dccp_insert_options_rsk(dreq, skb))
+ goto response_failed;
/* Build and checksum header */
dh = dccp_zeroed_hdr(skb, dccp_header_size);
@@ -363,6 +365,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
inet_rsk(req)->acked = 1;
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
+response_failed:
+ kfree_skb(skb);
+ return NULL;
}
EXPORT_SYMBOL_GPL(dccp_make_response);
@@ -469,6 +474,10 @@ int dccp_connect(struct sock *sk)
struct sk_buff *skb;
struct inet_connection_sock *icsk = inet_csk(sk);
+ /* do not connect if feature negotiation setup fails */
+ if (dccp_feat_finalise_settings(dccp_sk(sk)))
+ return -EPROTO;
+
dccp_connect_init(sk);
skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 81368a7f5379..37731da41481 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -74,30 +74,27 @@ static void printl(const char *fmt, ...)
static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t size)
{
- const struct dccp_minisock *dmsk = dccp_msk(sk);
const struct inet_sock *inet = inet_sk(sk);
- const struct ccid3_hc_tx_sock *hctx;
+ struct ccid3_hc_tx_sock *hctx = NULL;
- if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
+ if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
hctx = ccid3_hc_tx_sk(sk);
- else
- hctx = NULL;
if (port == 0 || ntohs(inet->dport) == port ||
ntohs(inet->sport) == port) {
if (hctx)
- printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
+ printl("%pI4:%u %pI4:%u %d %d %d %d %u "
"%llu %llu %d\n",
- NIPQUAD(inet->saddr), ntohs(inet->sport),
- NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+ &inet->saddr, ntohs(inet->sport),
+ &inet->daddr, ntohs(inet->dport), size,
hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
hctx->ccid3hctx_x_recv >> 6,
hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
else
- printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
- NIPQUAD(inet->saddr), ntohs(inet->sport),
- NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+ printl("%pI4:%u %pI4:%u %d\n",
+ &inet->saddr, ntohs(inet->sport),
+ &inet->daddr, ntohs(inet->dport), size);
}
jprobe_return();
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d0bd34819761..d5c2bacb713c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -40,16 +40,10 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
EXPORT_SYMBOL_GPL(dccp_statistics);
-atomic_t dccp_orphan_count = ATOMIC_INIT(0);
-
+struct percpu_counter dccp_orphan_count;
EXPORT_SYMBOL_GPL(dccp_orphan_count);
-struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
- .lhash_lock = RW_LOCK_UNLOCKED,
- .lhash_users = ATOMIC_INIT(0),
- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
-};
-
+struct inet_hashinfo dccp_hashinfo;
EXPORT_SYMBOL_GPL(dccp_hashinfo);
/* the maximum queue length for tx in packets. 0 is no limit */
@@ -67,6 +61,9 @@ void dccp_set_state(struct sock *sk, const int state)
case DCCP_OPEN:
if (oldstate != DCCP_OPEN)
DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
+ /* Client retransmits all Confirm options until entering OPEN */
+ if (oldstate == DCCP_PARTOPEN)
+ dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
break;
case DCCP_CLOSED:
@@ -175,7 +172,6 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
dccp_minisock_init(&dp->dccps_minisock);
@@ -193,45 +189,10 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
dccp_init_xmit_timers(sk);
- /*
- * FIXME: We're hardcoding the CCID, and doing this at this point makes
- * the listening (master) sock get CCID control blocks, which is not
- * necessary, but for now, to not mess with the test userspace apps,
- * lets leave it here, later the real solution is to do this in a
- * setsockopt(CCIDs-I-want/accept). -acme
- */
- if (likely(ctl_sock_initialized)) {
- int rc = dccp_feat_init(dmsk);
-
- if (rc)
- return rc;
-
- if (dmsk->dccpms_send_ack_vector) {
- dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
- if (dp->dccps_hc_rx_ackvec == NULL)
- return -ENOMEM;
- }
- dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
- sk, GFP_KERNEL);
- dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
- sk, GFP_KERNEL);
- if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
- dp->dccps_hc_tx_ccid == NULL)) {
- ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
- if (dmsk->dccpms_send_ack_vector) {
- dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
- dp->dccps_hc_rx_ackvec = NULL;
- }
- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
- return -ENOMEM;
- }
- } else {
- /* control socket doesn't need feat nego */
- INIT_LIST_HEAD(&dmsk->dccpms_pending);
- INIT_LIST_HEAD(&dmsk->dccpms_conf);
- }
-
+ INIT_LIST_HEAD(&dp->dccps_featneg);
+ /* control socket doesn't need feat nego */
+ if (likely(ctl_sock_initialized))
+ return dccp_feat_init(sk);
return 0;
}
@@ -240,7 +201,6 @@ EXPORT_SYMBOL_GPL(dccp_init_sock);
void dccp_destroy_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
/*
* DCCP doesn't use sk_write_queue, just sk_send_head
@@ -258,7 +218,7 @@ void dccp_destroy_sock(struct sock *sk)
kfree(dp->dccps_service_list);
dp->dccps_service_list = NULL;
- if (dmsk->dccpms_send_ack_vector) {
+ if (dp->dccps_hc_rx_ackvec != NULL) {
dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
dp->dccps_hc_rx_ackvec = NULL;
}
@@ -267,7 +227,7 @@ void dccp_destroy_sock(struct sock *sk)
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
/* clean up feature negotiation state */
- dccp_feat_clean(dmsk);
+ dccp_feat_list_purge(&dp->dccps_featneg);
}
EXPORT_SYMBOL_GPL(dccp_destroy_sock);
@@ -277,6 +237,9 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_LISTEN;
+ /* do not start to listen if feature negotiation setup fails */
+ if (dccp_feat_finalise_settings(dp))
+ return -EPROTO;
return inet_csk_listen_start(sk, backlog);
}
@@ -466,42 +429,70 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
return 0;
}
-/* byte 1 is feature. the rest is the preference list */
-static int dccp_setsockopt_change(struct sock *sk, int type,
- struct dccp_so_feat __user *optval)
+static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
{
- struct dccp_so_feat opt;
- u8 *val;
- int rc;
+ u8 *list, len;
+ int i, rc;
- if (copy_from_user(&opt, optval, sizeof(opt)))
- return -EFAULT;
+ if (cscov < 0 || cscov > 15)
+ return -EINVAL;
/*
- * rfc4340: 6.1. Change Options
+ * Populate a list of permissible values, in the range cscov...15. This
+ * is necessary since feature negotiation of single values only works if
+ * both sides incidentally choose the same value. Since the list starts
+ * lowest-value first, negotiation will pick the smallest shared value.
*/
- if (opt.dccpsf_len < 1)
+ if (cscov == 0)
+ return 0;
+ len = 16 - cscov;
+
+ list = kmalloc(len, GFP_KERNEL);
+ if (list == NULL)
+ return -ENOBUFS;
+
+ for (i = 0; i < len; i++)
+ list[i] = cscov++;
+
+ rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
+
+ if (rc == 0) {
+ if (rx)
+ dccp_sk(sk)->dccps_pcrlen = cscov;
+ else
+ dccp_sk(sk)->dccps_pcslen = cscov;
+ }
+ kfree(list);
+ return rc;
+}
+
+static int dccp_setsockopt_ccid(struct sock *sk, int type,
+ char __user *optval, int optlen)
+{
+ u8 *val;
+ int rc = 0;
+
+ if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
return -EINVAL;
- val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
- if (!val)
+ val = kmalloc(optlen, GFP_KERNEL);
+ if (val == NULL)
return -ENOMEM;
- if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
- rc = -EFAULT;
- goto out_free_val;
+ if (copy_from_user(val, optval, optlen)) {
+ kfree(val);
+ return -EFAULT;
}
- rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
- val, opt.dccpsf_len, GFP_KERNEL);
- if (rc)
- goto out_free_val;
+ lock_sock(sk);
+ if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
+ rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
-out:
- return rc;
+ if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
+ rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
+ release_sock(sk);
-out_free_val:
kfree(val);
- goto out;
+ return rc;
}
static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
@@ -510,7 +501,21 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
struct dccp_sock *dp = dccp_sk(sk);
int val, err = 0;
- if (optlen < sizeof(int))
+ switch (optname) {
+ case DCCP_SOCKOPT_PACKET_SIZE:
+ DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+ return 0;
+ case DCCP_SOCKOPT_CHANGE_L:
+ case DCCP_SOCKOPT_CHANGE_R:
+ DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
+ return 0;
+ case DCCP_SOCKOPT_CCID:
+ case DCCP_SOCKOPT_RX_CCID:
+ case DCCP_SOCKOPT_TX_CCID:
+ return dccp_setsockopt_ccid(sk, optname, optval, optlen);
+ }
+
+ if (optlen < (int)sizeof(int))
return -EINVAL;
if (get_user(val, (int __user *)optval))
@@ -521,53 +526,24 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
lock_sock(sk);
switch (optname) {
- case DCCP_SOCKOPT_PACKET_SIZE:
- DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
- err = 0;
- break;
- case DCCP_SOCKOPT_CHANGE_L:
- if (optlen != sizeof(struct dccp_so_feat))
- err = -EINVAL;
- else
- err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
- (struct dccp_so_feat __user *)
- optval);
- break;
- case DCCP_SOCKOPT_CHANGE_R:
- if (optlen != sizeof(struct dccp_so_feat))
- err = -EINVAL;
- else
- err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
- (struct dccp_so_feat __user *)
- optval);
- break;
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
if (dp->dccps_role != DCCP_ROLE_SERVER)
err = -EOPNOTSUPP;
else
dp->dccps_server_timewait = (val != 0);
break;
- case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
- if (val < 0 || val > 15)
- err = -EINVAL;
- else
- dp->dccps_pcslen = val;
+ case DCCP_SOCKOPT_SEND_CSCOV:
+ err = dccp_setsockopt_cscov(sk, val, false);
break;
- case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
- if (val < 0 || val > 15)
- err = -EINVAL;
- else {
- dp->dccps_pcrlen = val;
- /* FIXME: add feature negotiation,
- * ChangeL(MinimumChecksumCoverage, val) */
- }
+ case DCCP_SOCKOPT_RECV_CSCOV:
+ err = dccp_setsockopt_cscov(sk, val, true);
break;
default:
err = -ENOPROTOOPT;
break;
}
-
release_sock(sk);
+
return err;
}
@@ -648,6 +624,18 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
case DCCP_SOCKOPT_GET_CUR_MPS:
val = dp->dccps_mss_cache;
break;
+ case DCCP_SOCKOPT_AVAILABLE_CCIDS:
+ return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
+ case DCCP_SOCKOPT_TX_CCID:
+ val = ccid_get_current_tx_ccid(dp);
+ if (val < 0)
+ return -ENOPROTOOPT;
+ break;
+ case DCCP_SOCKOPT_RX_CCID:
+ val = ccid_get_current_rx_ccid(dp);
+ if (val < 0)
+ return -ENOPROTOOPT;
+ break;
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
val = dp->dccps_server_timewait;
break;
@@ -976,7 +964,7 @@ adjudge_to_death:
state = sk->sk_state;
sock_hold(sk);
sock_orphan(sk);
- atomic_inc(sk->sk_prot->orphan_count);
+ percpu_counter_inc(sk->sk_prot->orphan_count);
/*
* It is the last release_sock in its life. It will remove backlog.
@@ -1040,17 +1028,21 @@ static int __init dccp_init(void)
{
unsigned long goal;
int ehash_order, bhash_order, i;
- int rc = -ENOBUFS;
+ int rc;
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
FIELD_SIZEOF(struct sk_buff, cb));
-
+ rc = percpu_counter_init(&dccp_orphan_count, 0);
+ if (rc)
+ goto out;
+ rc = -ENOBUFS;
+ inet_hashinfo_init(&dccp_hashinfo);
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
- goto out;
+ goto out_free_percpu;
/*
* Size and allocate the main established and bind bucket
@@ -1084,8 +1076,8 @@ static int __init dccp_init(void)
}
for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
- INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
- INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
+ INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
+ INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
}
if (inet_ehash_locks_alloc(&dccp_hashinfo))
@@ -1143,6 +1135,8 @@ out_free_dccp_ehash:
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_hashinfo.bind_bucket_cachep = NULL;
+out_free_percpu:
+ percpu_counter_destroy(&dccp_orphan_count);
goto out;
}
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 21295993fdb8..018e210875e1 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -41,27 +41,6 @@ static struct ctl_table dccp_default_table[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "ack_ratio",
- .data = &sysctl_dccp_feat_ack_ratio,
- .maxlen = sizeof(sysctl_dccp_feat_ack_ratio),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "send_ackvec",
- .data = &sysctl_dccp_feat_send_ack_vector,
- .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "send_ndp",
- .data = &sysctl_dccp_feat_send_ndp_count,
- .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
.procname = "request_retries",
.data = &sysctl_dccp_request_retries,
.maxlen = sizeof(sysctl_dccp_request_retries),
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 54b3c7e9e016..162d1e683c39 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -87,17 +87,6 @@ static void dccp_retransmit_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- /* retransmit timer is used for feature negotiation throughout
- * connection. In this case, no packet is re-transmitted, but rather an
- * ack is generated and pending changes are placed into its options.
- */
- if (sk->sk_send_head == NULL) {
- dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk);
- if (sk->sk_state == DCCP_OPEN)
- dccp_send_ack(sk);
- goto backoff;
- }
-
/*
* More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
@@ -126,7 +115,6 @@ static void dccp_retransmit_timer(struct sock *sk)
return;
}
-backoff:
icsk->icsk_backoff++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);