From f90f92eed74251034f251e3cdf4fa5c4c1f09df0 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 16 Jan 2009 23:36:30 +0000 Subject: dccp: Initialisation framework for feature negotiation This initialises feature negotiation from two tables, which are in turn are initialised from sysctls. As a novel feature, specifics of the implementation (e.g. that short seqnos and ECN are not yet available) are advertised for robustness. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/feat.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++-------- net/dccp/feat.h | 2 +- 2 files changed, 57 insertions(+), 10 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 4152308958ab..67ffac9905f8 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1115,23 +1115,70 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, return 0; /* ignore FN options in all other states */ } +/** + * dccp_feat_init - Seed feature negotiation with host-specific defaults + * This initialises global defaults, depending on the value of the sysctls. + * These can later be overridden by registering changes via setsockopt calls. + * The last link in the chain is finalise_settings, to make sure that between + * here and the start of actual feature negotiation no inconsistencies enter. + * + * All features not appearing below use either defaults or are otherwise + * later adjusted through dccp_feat_finalise_settings(). + */ int dccp_feat_init(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + u8 on = 1, off = 0; int rc; + struct { + u8 *val; + u8 len; + } tx, rx; + + /* Non-negotiable (NN) features */ + rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0, + sysctl_dccp_feat_sequence_window); + if (rc) + return rc; + + /* Server-priority (SP) features */ + + /* Advertise that short seqnos are not supported (7.6.1) */ + rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1); + if (rc) + return rc; - INIT_LIST_HEAD(&dmsk->dccpms_pending); /* XXX no longer used */ - INIT_LIST_HEAD(&dmsk->dccpms_conf); /* XXX no longer used */ + /* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */ + rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1); + if (rc) + return rc; + + /* + * We advertise the available list of CCIDs and reorder according to + * preferences, to avoid failure resulting from negotiating different + * singleton values (which always leads to failure). + * These settings can still (later) be overridden via sockopts. + */ + if (ccid_get_builtin_ccids(&tx.val, &tx.len) || + ccid_get_builtin_ccids(&rx.val, &rx.len)) + return -ENOBUFS; - /* Ack ratio */ - rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0, - dp->dccps_l_ack_ratio); + if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) || + !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len)) + goto free_ccid_lists; + + rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len); + if (rc) + goto free_ccid_lists; + + rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len); + +free_ccid_lists: + kfree(tx.val); + kfree(rx.val); return rc; } -EXPORT_SYMBOL_GPL(dccp_feat_init); - int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list) { struct dccp_sock *dp = dccp_sk(sk); diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 9b46e2a7866e..5e7b8481cd04 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -113,13 +113,13 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) #define dccp_feat_debug(type, feat, val) #endif /* CONFIG_IP_DCCP_DEBUG */ +extern int dccp_feat_init(struct sock *sk); extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, u8 const *list, u8 len); extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, u8 mand, u8 opt, u8 feat, u8 *val, u8 len); extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); -extern int dccp_feat_init(struct sock *sk); /* * Encoding variable-length options and their maximum length. -- cgit v1.2.3 From 792b48780e8b6435d017cef4b5c304876a48653e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 16 Jan 2009 23:36:31 +0000 Subject: dccp: Implement both feature-local and feature-remote Sequence Window feature This adds full support for local/remote Sequence Window feature, from which the * sequence-number-validity (W) and * acknowledgment-number-validity (W') windows derive as specified in RFC 4340, 7.5.3. Specifically, the following is contained in this patch: * integrated new socket fields into dccp_sk; * updated the update_gsr/gss routines with regard to these fields; * updated handler code: the Sequence Window feature is located at the TX side, so the local feature is meant if the handler-rx flag is false; * the initialisation of `rcv_wnd' in reqsk is removed, since - rcv_wnd is not used by the code anywhere; - sequence number checks are not done in the LISTEN state (cf. 7.5.3); - dccp_check_req checks the Ack number validity more rigorously; * the `struct dccp_minisock' became empty and is now removed. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- Documentation/networking/dccp.txt | 3 ++- include/linux/dccp.h | 24 ++++-------------------- net/dccp/dccp.h | 16 +++++++--------- net/dccp/feat.c | 13 +++++++++++-- net/dccp/minisocks.c | 11 ----------- net/dccp/proto.c | 2 -- 6 files changed, 24 insertions(+), 45 deletions(-) (limited to 'net/dccp') diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index 7a3bb1abb830..b132e4a3cf0f 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -141,7 +141,8 @@ rx_ccid = 2 Default CCID for the receiver-sender half-connection; see tx_ccid. seq_window = 100 - The initial sequence window (sec. 7.5.2). + The initial sequence window (sec. 7.5.2) of the sender. This influences + the local ackno validity and the remote seqno validity windows (7.5.1). tx_qlen = 5 The size of the transmit buffer in packets. A value of 0 corresponds diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 990e97fa1f07..7a0502ab383a 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -363,19 +363,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 -/** - * struct dccp_minisock - Minimal DCCP connection representation - * - * Will be used to pass the state from dccp_request_sock to dccp_sock. - * - * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - */ -struct dccp_minisock { - __u64 dccpms_sequence_window; -}; - -extern void dccp_minisock_init(struct dccp_minisock *dmsk); - /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from @@ -464,13 +451,14 @@ struct dccp_ackvec; * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio + * @dccps_l_seq_win - local Sequence Window (influences ack number validity) + * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_minisock - associated minisock (accessed via dccp_msk) * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) @@ -504,12 +492,13 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; + __u64 dccps_l_seq_win:48; + __u64 dccps_r_seq_win:48; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; - struct dccp_minisock dccps_minisock; struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; @@ -527,11 +516,6 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } -static inline struct dccp_minisock *dccp_msk(const struct sock *sk) -{ - return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; -} - static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index f2230fc168e1..04ae91898a68 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -409,23 +409,21 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, static inline void dccp_update_gsr(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - const struct dccp_minisock *dmsk = dccp_msk(sk); dp->dccps_gsr = seq; - dccp_set_seqno(&dp->dccps_swl, - dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4)); - dccp_set_seqno(&dp->dccps_swh, - dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4); + /* Sequence validity window depends on remote Sequence Window (7.5.1) */ + dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); + dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); } static inline void dccp_update_gss(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - dp->dccps_awh = dp->dccps_gss = seq; - dccp_set_seqno(&dp->dccps_awl, - (dp->dccps_gss - - dccp_msk(sk)->dccpms_sequence_window + 1)); + dp->dccps_gss = seq; + /* Ack validity window depends on local Sequence Window value (7.5.1) */ + dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); + dp->dccps_awh = dp->dccps_gss; } static inline int dccp_ack_pending(const struct sock *sk) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 67ffac9905f8..7303f79705d2 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -51,8 +51,17 @@ static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx) static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx) { - if (!rx) - dccp_msk(sk)->dccpms_sequence_window = seq_win; + struct dccp_sock *dp = dccp_sk(sk); + + if (rx) { + dp->dccps_r_seq_win = seq_win; + /* propagate changes to update SWL/SWH */ + dccp_update_gsr(sk, dp->dccps_gsr); + } else { + dp->dccps_l_seq_win = seq_win; + /* propagate changes to update AWL */ + dccp_update_gss(sk, dp->dccps_gss); + } return 0; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 6821ae33dd37..5ca49cec95f5 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -42,11 +42,6 @@ struct inet_timewait_death_row dccp_death_row = { EXPORT_SYMBOL_GPL(dccp_death_row); -void dccp_minisock_init(struct dccp_minisock *dmsk) -{ - dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; -} - void dccp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw = NULL; @@ -110,7 +105,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct dccp_request_sock *dreq = dccp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct dccp_sock *newdp = dccp_sk(newsk); - struct dccp_minisock *newdmsk = dccp_msk(newsk); newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; @@ -128,10 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk, * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies */ - - /* See dccp_v4_conn_request */ - newdmsk->dccpms_sequence_window = req->rcv_wnd; - newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; dccp_update_gss(newsk, dreq->dreq_iss); @@ -290,7 +280,6 @@ int dccp_reqsk_init(struct request_sock *req, inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport; inet_rsk(req)->acked = 0; - req->rcv_wnd = sysctl_dccp_feat_sequence_window; dreq->dreq_timestamp_echo = 0; /* inherit feature negotiation options from listening socket */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 945b4d5d23b3..314a1b5c033c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -174,8 +174,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) struct dccp_sock *dp = dccp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - dccp_minisock_init(&dp->dccps_minisock); - icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; -- cgit v1.2.3 From 883ca833e5fb814fb03426c9d35e5489ce43e8da Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 16 Jan 2009 23:36:32 +0000 Subject: dccp: Initialisation and type-checking of feature sysctls This patch takes care of initialising and type-checking sysctls related to feature negotiation. Type checking is important since some of the sysctls now directly impact the feature-negotiation process. The sysctls are initialised with the known default values for each feature. For the type-checking the value constraints from RFC 4340 are used: * Sequence Window uses the specified Wmin=32, the maximum is ulong (4 bytes), tested and confirmed that it works up to 4294967295 - for Gbps speed; * Ack Ratio is between 0 .. 0xffff (2-byte unsigned integer); * CCIDs are between 0 .. 255; * request_retries, retries1, retries2 also between 0..255 for good measure; * tx_qlen is checked to be non-negative; * sync_ratelimit remains as before. Notes: ------ 1. Die s@sysctl_dccp_feat@sysctl_dccp@g since the sysctls are now in feat.c. 2. As pointed out by Arnaldo, the pattern of type-checking repeats itself in other places, sometimes with exactly the same kind of definitions (e.g. "static int zero;"). It may be a good idea (kernel janitors?) to consolidate type checking. For the sake of keeping the changeset small and in order not to affect other subsystems, I have not strived to generalise here. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 8 -------- net/dccp/dccp.h | 3 --- net/dccp/feat.c | 11 ++++++++--- net/dccp/feat.h | 8 ++++++++ net/dccp/options.c | 4 ---- net/dccp/sysctl.c | 43 ++++++++++++++++++++++++++++++------------- 6 files changed, 46 insertions(+), 31 deletions(-) (limited to 'net/dccp') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7a0502ab383a..7434a8353e23 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -355,14 +355,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) return __dccp_hdr_len(dccp_hdr(skb)); } - -/* initial values for each feature */ -#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 -#define DCCPF_INITIAL_ACK_RATIO 2 -#define DCCPF_INITIAL_CCID DCCPC_CCID2 -/* FIXME: for now we're default to 1 but it should really be 0 */ -#define DCCPF_INITIAL_SEND_NDP_COUNT 1 - /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 04ae91898a68..44a5bc6f6785 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -95,9 +95,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); extern int sysctl_dccp_request_retries; extern int sysctl_dccp_retries1; extern int sysctl_dccp_retries2; -extern int sysctl_dccp_feat_sequence_window; -extern int sysctl_dccp_feat_rx_ccid; -extern int sysctl_dccp_feat_tx_ccid; extern int sysctl_dccp_tx_qlen; extern int sysctl_dccp_sync_ratelimit; diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 7303f79705d2..12006e9b2472 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -25,6 +25,11 @@ #include "ccid.h" #include "feat.h" +/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */ +unsigned long sysctl_dccp_sequence_window __read_mostly = 100; +int sysctl_dccp_rx_ccid __read_mostly = 2, + sysctl_dccp_tx_ccid __read_mostly = 2; + /* * Feature activation handlers. * @@ -1146,7 +1151,7 @@ int dccp_feat_init(struct sock *sk) /* Non-negotiable (NN) features */ rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0, - sysctl_dccp_feat_sequence_window); + sysctl_dccp_sequence_window); if (rc) return rc; @@ -1172,8 +1177,8 @@ int dccp_feat_init(struct sock *sk) ccid_get_builtin_ccids(&rx.val, &rx.len)) return -ENOBUFS; - if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) || - !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len)) + if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || + !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) goto free_ccid_lists; rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len); diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 5e7b8481cd04..40aa7a10bd5f 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -100,6 +100,13 @@ struct ccid_dependency { u8 val; }; +/* + * Sysctls to seed defaults for feature negotiation + */ +extern unsigned long sysctl_dccp_sequence_window; +extern int sysctl_dccp_rx_ccid; +extern int sysctl_dccp_tx_ccid; + #ifdef CONFIG_IP_DCCP_DEBUG extern const char *dccp_feat_typename(const u8 type); extern const char *dccp_feat_name(const u8 feat); @@ -114,6 +121,7 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) #endif /* CONFIG_IP_DCCP_DEBUG */ extern int dccp_feat_init(struct sock *sk); +extern void dccp_feat_initialise_sysctls(void); extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, u8 const *list, u8 len); extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); diff --git a/net/dccp/options.c b/net/dccp/options.c index 7b1165c21f51..3e2726c7182d 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -23,10 +23,6 @@ #include "dccp.h" #include "feat.h" -int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; -int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; -int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; - u64 dccp_decode_value_var(const u8 *bf, const u8 len) { u64 value = 0; diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 018e210875e1..a5a1856234e7 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -18,55 +18,72 @@ #error This file should not be compiled without CONFIG_SYSCTL defined #endif +/* Boundary values */ +static int zero = 0, + u8_max = 0xFF; +static unsigned long seqw_min = 32; + static struct ctl_table dccp_default_table[] = { { .procname = "seq_window", - .data = &sysctl_dccp_feat_sequence_window, - .maxlen = sizeof(sysctl_dccp_feat_sequence_window), + .data = &sysctl_dccp_sequence_window, + .maxlen = sizeof(sysctl_dccp_sequence_window), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */ }, { .procname = "rx_ccid", - .data = &sysctl_dccp_feat_rx_ccid, - .maxlen = sizeof(sysctl_dccp_feat_rx_ccid), + .data = &sysctl_dccp_rx_ccid, + .maxlen = sizeof(sysctl_dccp_rx_ccid), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, /* RFC 4340, 10. */ }, { .procname = "tx_ccid", - .data = &sysctl_dccp_feat_tx_ccid, - .maxlen = sizeof(sysctl_dccp_feat_tx_ccid), + .data = &sysctl_dccp_tx_ccid, + .maxlen = sizeof(sysctl_dccp_tx_ccid), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, /* RFC 4340, 10. */ }, { .procname = "request_retries", .data = &sysctl_dccp_request_retries, .maxlen = sizeof(sysctl_dccp_request_retries), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, }, { .procname = "retries1", .data = &sysctl_dccp_retries1, .maxlen = sizeof(sysctl_dccp_retries1), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, }, { .procname = "retries2", .data = &sysctl_dccp_retries2, .maxlen = sizeof(sysctl_dccp_retries2), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, }, { .procname = "tx_qlen", .data = &sysctl_dccp_tx_qlen, .maxlen = sizeof(sysctl_dccp_tx_qlen), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "sync_ratelimit", -- cgit v1.2.3 From f3f3abb62ccb1a1c77bcce855c04e12356e6ac95 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 16 Jan 2009 23:36:33 +0000 Subject: dccp: Debugging functions for feature negotiation Since all feature-negotiation processing now takes place in feat.c, functions for producing verbose debugging output are concentrated there. New functions to print out values, entry records, and options are provided, and also a macro is defined to not always have the function name in the output line. Thanks a lot to Wei Yongjun and Giuseppe Galeota for help and discussion with an earlier revision of this patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/dccp.h | 2 + net/dccp/feat.c | 149 ++++++++++++++++++++++++++++++++++++++--------------- net/dccp/feat.h | 13 ----- net/dccp/options.c | 4 -- 4 files changed, 109 insertions(+), 59 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 44a5bc6f6785..08a569ff02d1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -42,9 +42,11 @@ extern int dccp_debug; #define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) +#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a) #else #define dccp_pr_debug(format, a...) #define dccp_pr_debug_cat(format, a...) +#define dccp_debug(format, a...) #endif extern struct inet_hashinfo dccp_hashinfo; diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 12006e9b2472..b04160a2eea5 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -208,6 +208,100 @@ static int dccp_feat_default_value(u8 feat_num) return idx < 0 ? 0 : dccp_feat_table[idx].default_value; } +/* + * Debugging and verbose-printing section + */ +static const char *dccp_feat_fname(const u8 feat) +{ + static const char *feature_names[] = { + [DCCPF_RESERVED] = "Reserved", + [DCCPF_CCID] = "CCID", + [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", + [DCCPF_SEQUENCE_WINDOW] = "Sequence Window", + [DCCPF_ECN_INCAPABLE] = "ECN Incapable", + [DCCPF_ACK_RATIO] = "Ack Ratio", + [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector", + [DCCPF_SEND_NDP_COUNT] = "Send NDP Count", + [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", + [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", + }; + if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) + return feature_names[DCCPF_RESERVED]; + + if (feat == DCCPF_SEND_LEV_RATE) + return "Send Loss Event Rate"; + if (feat >= DCCPF_MIN_CCID_SPECIFIC) + return "CCID-specific"; + + return feature_names[feat]; +} + +static const char *dccp_feat_sname[] = { "DEFAULT", "INITIALISING", "CHANGING", + "UNSTABLE", "STABLE" }; + +#ifdef CONFIG_IP_DCCP_DEBUG +static const char *dccp_feat_oname(const u8 opt) +{ + switch (opt) { + case DCCPO_CHANGE_L: return "Change_L"; + case DCCPO_CONFIRM_L: return "Confirm_L"; + case DCCPO_CHANGE_R: return "Change_R"; + case DCCPO_CONFIRM_R: return "Confirm_R"; + } + return NULL; +} + +static void dccp_feat_printval(u8 feat_num, dccp_feat_val const *val) +{ + u8 i, type = dccp_feat_type(feat_num); + + if (val == NULL || (type == FEAT_SP && val->sp.vec == NULL)) + dccp_pr_debug_cat("(NULL)"); + else if (type == FEAT_SP) + for (i = 0; i < val->sp.len; i++) + dccp_pr_debug_cat("%s%u", i ? " " : "", val->sp.vec[i]); + else if (type == FEAT_NN) + dccp_pr_debug_cat("%llu", (unsigned long long)val->nn); + else + dccp_pr_debug_cat("unknown type %u", type); +} + +static void dccp_feat_printvals(u8 feat_num, u8 *list, u8 len) +{ + u8 type = dccp_feat_type(feat_num); + dccp_feat_val fval = { .sp.vec = list, .sp.len = len }; + + if (type == FEAT_NN) + fval.nn = dccp_decode_value_var(list, len); + dccp_feat_printval(feat_num, &fval); +} + +static void dccp_feat_print_entry(struct dccp_feat_entry const *entry) +{ + dccp_debug(" * %s %s = ", entry->is_local ? "local" : "remote", + dccp_feat_fname(entry->feat_num)); + dccp_feat_printval(entry->feat_num, &entry->val); + dccp_pr_debug_cat(", state=%s %s\n", dccp_feat_sname[entry->state], + entry->needs_confirm ? "(Confirm pending)" : ""); +} + +#define dccp_feat_print_opt(opt, feat, val, len, mandatory) do { \ + dccp_pr_debug("%s(%s, ", dccp_feat_oname(opt), dccp_feat_fname(feat));\ + dccp_feat_printvals(feat, val, len); \ + dccp_pr_debug_cat(") %s\n", mandatory ? "!" : ""); } while (0) + +#define dccp_feat_print_fnlist(fn_list) { \ + const struct dccp_feat_entry *___entry; \ + \ + dccp_pr_debug("List Dump:\n"); \ + list_for_each_entry(___entry, fn_list, node) \ + dccp_feat_print_entry(___entry); \ +} +#else /* ! CONFIG_IP_DCCP_DEBUG */ +#define dccp_feat_print_opt(opt, feat, val, len, mandatory) +#define dccp_feat_print_fnlist(fn_list) +#endif + static int __dccp_feat_activate(struct sock *sk, const int idx, const bool is_local, dccp_feat_val const *fval) { @@ -240,6 +334,10 @@ static int __dccp_feat_activate(struct sock *sk, const int idx, /* Location is RX if this is a local-RX or remote-TX feature */ rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX)); + dccp_debug(" -> activating %s %s, %sval=%llu\n", rx ? "RX" : "TX", + dccp_feat_fname(dccp_feat_table[idx].feat_num), + fval ? "" : "default ", (unsigned long long)val); + return dccp_feat_table[idx].activation_hdlr(sk, val, rx); } @@ -544,6 +642,7 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, return -1; } } + dccp_feat_print_opt(opt, pos->feat_num, ptr, len, 0); if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt)) return -1; @@ -797,6 +896,7 @@ int dccp_feat_finalise_settings(struct dccp_sock *dp) while (i--) if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i)) return -1; + dccp_feat_print_fnlist(fn); return 0; } @@ -915,6 +1015,8 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */ goto unknown_feature_or_value; + dccp_feat_print_opt(opt, feat, val, len, is_mandatory); + /* * Negotiation of NN features: Change R is invalid, so there is no * simultaneous negotiation; hence we do not look up in the list. @@ -1020,6 +1122,8 @@ static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt, const bool local = (opt == DCCPO_CONFIRM_R); struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local); + dccp_feat_print_opt(opt, feat, val, len, is_mandatory); + if (entry == NULL) { /* nothing queued: ignore or handle error */ if (is_mandatory && type == FEAT_UNKNOWN) return DCCP_RESET_CODE_MANDATORY_ERROR; @@ -1217,9 +1321,10 @@ int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list) goto activation_failed; } if (cur->state != FEAT_STABLE) { - DCCP_CRIT("Negotiation of %s %u failed in state %u", + DCCP_CRIT("Negotiation of %s %s failed in state %s", cur->is_local ? "local" : "remote", - cur->feat_num, cur->state); + dccp_feat_fname(cur->feat_num), + dccp_feat_sname[cur->state]); goto activation_failed; } fvals[idx][cur->is_local] = &cur->val; @@ -1260,43 +1365,3 @@ activation_failed: dp->dccps_hc_rx_ackvec = NULL; return -1; } - -#ifdef CONFIG_IP_DCCP_DEBUG -const char *dccp_feat_typename(const u8 type) -{ - switch(type) { - case DCCPO_CHANGE_L: return("ChangeL"); - case DCCPO_CONFIRM_L: return("ConfirmL"); - case DCCPO_CHANGE_R: return("ChangeR"); - case DCCPO_CONFIRM_R: return("ConfirmR"); - /* the following case must not appear in feature negotation */ - default: dccp_pr_debug("unknown type %d [BUG!]\n", type); - } - return NULL; -} - -const char *dccp_feat_name(const u8 feat) -{ - static const char *feature_names[] = { - [DCCPF_RESERVED] = "Reserved", - [DCCPF_CCID] = "CCID", - [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", - [DCCPF_SEQUENCE_WINDOW] = "Sequence Window", - [DCCPF_ECN_INCAPABLE] = "ECN Incapable", - [DCCPF_ACK_RATIO] = "Ack Ratio", - [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector", - [DCCPF_SEND_NDP_COUNT] = "Send NDP Count", - [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", - [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", - }; - if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) - return feature_names[DCCPF_RESERVED]; - - if (feat == DCCPF_SEND_LEV_RATE) - return "Send Loss Event Rate"; - if (feat >= DCCPF_MIN_CCID_SPECIFIC) - return "CCID-specific"; - - return feature_names[feat]; -} -#endif /* CONFIG_IP_DCCP_DEBUG */ diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 40aa7a10bd5f..f96721619def 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -107,19 +107,6 @@ extern unsigned long sysctl_dccp_sequence_window; extern int sysctl_dccp_rx_ccid; extern int sysctl_dccp_tx_ccid; -#ifdef CONFIG_IP_DCCP_DEBUG -extern const char *dccp_feat_typename(const u8 type); -extern const char *dccp_feat_name(const u8 feat); - -static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) -{ - dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type), - dccp_feat_name(feat), feat, val); -} -#else -#define dccp_feat_debug(type, feat, val) -#endif /* CONFIG_IP_DCCP_DEBUG */ - extern int dccp_feat_init(struct sock *sk); extern void dccp_feat_initialise_sysctls(void); extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, diff --git a/net/dccp/options.c b/net/dccp/options.c index 3e2726c7182d..1b08cae9c65b 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -498,10 +498,6 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, *to++ = *val; if (len) memcpy(to, val, len); - - dccp_pr_debug("%s(%s (%d), ...), length %d\n", - dccp_feat_typename(type), - dccp_feat_name(feat), feat, len); return 0; } -- cgit v1.2.3 From 361a5c1dd0bd7bb2b90e7fe9127b366d3566522e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 27 Feb 2009 22:38:28 +0000 Subject: dccp: Minimise header option overhead in setting the MPS This patch resolves a long-standing FIXME to dynamically update the Maximum Packet Size depending on actual options usage. It uses the flags set by the feature-negotiation infrastructure to compute the required header option size. Most options are fixed-size, a notable exception are Ack Vectors (required currently only by CCID-2). These can have any length between 3 and 1020 bytes. As a result of testing, 16 bytes (2 bytes for type/length plus 14 Ack Vector cells) have been found to be sufficient for loss-free situations. There are currently no CCID-specific header options which may appear on data packets, thus it is not necessary to define a corresponding CCID field as suggested in the old comment. Further changes: ---------------- Adjusted the type of 'cur_mps' to match the unsigned return type of the function. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ackvec.h | 3 +++ net/dccp/output.c | 22 ++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 45f95e55f873..7ea557b7c6b1 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -20,6 +20,9 @@ /* We can spread an ack vector across multiple options */ #define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) +/* Estimated minimum average Ack Vector length - used for updating MPS */ +#define DCCPAV_MIN_OPTLEN 16 + #define DCCP_ACKVEC_STATE_RECEIVED 0 #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) #define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) diff --git a/net/dccp/output.c b/net/dccp/output.c index 22a618af4893..27c79bcc6a1e 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -161,21 +161,27 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); u32 ccmps = dccp_determine_ccmps(dp); - int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; + u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; /* Account for header lengths and IPv4/v6 option overhead */ cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); /* - * FIXME: this should come from the CCID infrastructure, where, say, - * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets - * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED - * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to - * make it a multiple of 4 + * Leave enough headroom for common DCCP header options. + * This only considers options which may appear on DCCP-Data packets, as + * per table 3 in RFC 4340, 5.8. When running out of space for other + * options (eg. Ack Vector which can take up to 255 bytes), it is better + * to schedule a separate Ack. Thus we leave headroom for the following: + * - 1 byte for Slow Receiver (11.6) + * - 6 bytes for Timestamp (13.1) + * - 10 bytes for Timestamp Echo (13.3) + * - 8 bytes for NDP count (7.7, when activated) + * - 6 bytes for Data Checksum (9.3) + * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled) */ - - cur_mps -= roundup(5 + 6 + 10 + 6 + 6 + 6, 4); + cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 + + (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4); /* And store cached results */ icsk->icsk_pmtu_cookie = pmtu; -- cgit v1.2.3 From 86739fb96e8c8269fc5b3d300c959bede272a6f6 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 27 Feb 2009 22:38:29 +0000 Subject: dccp: Do not let initial option overhead shrink the MPS This fixes a problem caused by the overlap of the connection-setup and established-state phases of DCCP connections. During connection setup, the client retransmits Confirm Feature-Negotiation options until a response from the server signals that it can move from the half-established PARTOPEN into the OPEN state, whereupon the connection is fully established on both ends (RFC 4340, 8.1.5). However, since the client may already send data while it is in the PARTOPEN state, consequences arise for the Maximum Packet Size: the problem is that the initial option overhead is much higher than for the subsequent established phase, as it involves potentially many variable-length list-type options (server-priority options, RFC 4340, 6.4). Applying the standard MPS is insufficient here: especially with larger payloads this can lead to annoying, counter-intuitive EMSGSIZE errors. On the other hand, reducing the MPS available for the established phase by the added initial overhead is highly wasteful and inefficient. The solution chosen therefore is a two-phase strategy: If the payload length of the DataAck in PARTOPEN is too large, an Ack is sent to carry the options, and the feature-negotiation list is then flushed. This means that the server gets two Acks for one Response. If both Acks get lost, it is probably better to restart the connection anyway and devising yet another special-case does not seem worth the extra complexity. The result is a higher utilisation of the available packet space for the data transmission phase (established state) of a connection. The patch (over-)estimates the initial overhead to be 32*4 bytes -- commonly seen values were around 90 bytes for initial feature-negotiation options. It uses sizeof(u32) to mean "aligned units of 4 bytes". For consistency, another use of 4-byte alignment is adapted. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/dccp.h | 5 ++++- net/dccp/output.c | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 08a569ff02d1..d6bc47363b1c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -63,11 +63,14 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields * Hence a safe upper bound for the maximum option length is 1020-28 = 992 */ -#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int)) +#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t)) #define DCCP_MAX_PACKET_HDR 28 #define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) #define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) +/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */ +#define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t)) + #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ diff --git a/net/dccp/output.c b/net/dccp/output.c index 27c79bcc6a1e..36bcc00654d3 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -276,7 +276,20 @@ void dccp_write_xmit(struct sock *sk, int block) const int len = skb->len; if (sk->sk_state == DCCP_PARTOPEN) { - /* See 8.1.5. Handshake Completion */ + const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; + /* + * See 8.1.5 - Handshake Completion. + * + * For robustness we resend Confirm options until the client has + * entered OPEN. During the initial feature negotiation, the MPS + * is smaller than usual, reduced by the Change/Confirm options. + */ + if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { + DCCP_WARN("Payload too large (%d) for featneg.\n", len); + dccp_send_ack(sk); + dccp_feat_list_purge(&dp->dccps_featneg); + } + inet_csk_schedule_ack(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, -- cgit v1.2.3