diff options
Diffstat (limited to 'net/ipv6/tcp_ipv6.c')
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 319 |
1 files changed, 124 insertions, 195 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cb21fccf2089..03a5d1ed3340 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -39,7 +39,7 @@ #include <linux/ipsec.h> #include <linux/times.h> #include <linux/slab.h> - +#include <linux/uaccess.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> @@ -64,8 +64,6 @@ #include <net/tcp_memcontrol.h> #include <net/busy_poll.h> -#include <asm/uaccess.h> - #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -199,6 +197,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; + ip6_set_txhash(sk); + /* * TCP over IPv4 */ @@ -341,7 +341,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sock *sk; int err; struct tcp_sock *tp; - __u32 seq; + struct request_sock *fastopen; + __u32 seq, snd_una; struct net *net = dev_net(skb->dev); sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, @@ -372,8 +373,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp = tcp_sk(sk); seq = ntohl(th->seq); + /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ + fastopen = tp->fastopen_rsk; + snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt)) { + !between(seq, snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -437,8 +441,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can, it SYNs are crossed. --ANK */ + case TCP_SYN_RECV: + /* Only in fast or simultaneous open. If a fast open socket is + * is already accepted it is treated as a connected one below. + */ + if (fastopen && fastopen->sk == NULL) + break; + if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -462,12 +471,14 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, - struct flowi6 *fl6, + struct flowi *fl, struct request_sock *req, - u16 queue_mapping) + u16 queue_mapping, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; int err = -ENOMEM; @@ -475,7 +486,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, NULL); + skb = tcp_make_synack(sk, dst, req, foc); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, @@ -494,16 +505,6 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) -{ - struct flowi6 fl6; - int res; - - res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); - if (!res) - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return res; -} static void tcp_v6_reqsk_destructor(struct request_sock *req) { @@ -529,8 +530,8 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); } -static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, - int optlen) +static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, + int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; @@ -665,7 +666,8 @@ clear_hash_noput: return 1; } -static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +static int __tcp_v6_inbound_md5_hash(struct sock *sk, + const struct sk_buff *skb) { const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; @@ -705,27 +707,83 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) } return 0; } + +static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +{ + int ret; + + rcu_read_lock(); + ret = __tcp_v6_inbound_md5_hash(sk, skb); + rcu_read_unlock(); + + return ret; +} + #endif +static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct ipv6_pinfo *np = inet6_sk(sk); + + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + + ireq->ir_iif = sk->sk_bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); + + if (!TCP_SKB_CB(skb)->when && + (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || + np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || + np->rxopt.bits.rxohlim || np->repflow)) { + atomic_inc(&skb->users); + ireq->pktopts = skb; + } +} + +static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict) +{ + if (strict) + *strict = true; + return inet6_csk_route_req(sk, &fl->u.ip6, req); +} + struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), - .rtx_syn_ack = tcp_v6_rtx_synack, + .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, + .syn_ack_timeout = tcp_syn_ack_timeout, }; -#ifdef CONFIG_TCP_MD5SIG static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { + .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - + sizeof(struct ipv6hdr), +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_reqsk_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, -}; #endif + .init_req = tcp_v6_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = cookie_v6_init_sequence, +#endif + .route_req = tcp_v6_route_req, + .init_seq = tcp_v6_init_sequence, + .send_synack = tcp_v6_send_synack, + .queue_hash_add = inet6_csk_reqsk_queue_hash_add, +}; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 tsval, u32 tsecr, + u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, u8 tclass, u32 label) { @@ -797,8 +855,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = inet6_iif(skb); + else + fl6.flowi6_oif = oif; + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -832,6 +893,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) int genhash; struct sock *sk1 = NULL; #endif + int oif; if (th->rst) return; @@ -875,7 +937,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0, 0); + oif = sk ? sk->sk_bound_dev_if : 0; + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -887,11 +950,11 @@ release_sk1: } static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 tsval, u32 tsecr, + u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, u32 label) { - tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass, + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, label); } @@ -903,7 +966,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, - tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), + tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), tw->tw_tclass, (tw->tw_flowlabel << 12)); inet_twsk_put(tw); @@ -912,8 +975,13 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, - req->rcv_wnd, tcp_time_stamp, req->ts_recent, + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ + tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + tcp_rsk(req)->rcv_nxt, + req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); } @@ -952,142 +1020,17 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -/* FIXME: this is substantially similar to the ipv4 code. - * Can some kind of merge be done? -- erics - */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_options_received tmp_opt; - struct request_sock *req; - struct inet_request_sock *ireq; - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; - struct flowi6 fl6; - bool want_cookie = false; - if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) goto drop; - if ((sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); - if (!want_cookie) - goto drop; - } - - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - goto drop; - } + return tcp_conn_request(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, skb); - req = inet6_reqsk_alloc(&tcp6_request_sock_ops); - if (req == NULL) - goto drop; - -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops; -#endif - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); - tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, NULL); - - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); - - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); - - ireq = inet_rsk(req); - ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; - ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); - - ireq->ir_iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); - - if (!isn) { - if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || - np->repflow) { - atomic_inc(&skb->users); - ireq->pktopts = skb; - } - - if (want_cookie) { - isn = cookie_v6_init_sequence(sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - goto have_isn; - } - - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { - if (!tcp_peer_is_proven(req, dst, true)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", - &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source)); - goto drop_and_release; - } - - isn = tcp_v6_init_sequence(skb); - } -have_isn: - tcp_rsk(req)->snt_isn = isn; - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_release; - - if (tcp_v6_send_synack(sk, dst, &fl6, req, - skb_get_queue_mapping(skb)) || - want_cookie) - goto drop_and_free; - - tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_rsk(req)->listener = NULL; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - return 0; - -drop_and_release: - dst_release(dst); -drop_and_free: - reqsk_free(req); drop: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; /* don't send reset */ @@ -1203,6 +1146,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; + ip6_set_txhash(newsk); + /* Now IPv6 options... First: no IPv4 options. @@ -1258,7 +1203,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) { + key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr); + if (key != NULL) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key @@ -1288,26 +1234,6 @@ out: return NULL; } -static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - } - - skb->csum = ~csum_unfold(tcp_v6_check(skb->len, - &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0)); - - if (skb->len <= 76) { - return __skb_checksum_complete(skb); - } - return 0; -} - /* The socket must have it's spinlock held when we get * here. * @@ -1333,11 +1259,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); -#ifdef CONFIG_TCP_MD5SIG - if (tcp_v6_inbound_md5_hash (sk, skb)) - goto discard; -#endif - if (sk_filter(sk, skb)) goto discard; @@ -1481,7 +1402,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; - if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) + if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) goto csum_error; th = tcp_hdr(skb); @@ -1510,6 +1431,11 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; +#ifdef CONFIG_TCP_MD5SIG + if (tcp_v6_inbound_md5_hash(sk, skb)) + goto discard_and_relse; +#endif + if (sk_filter(sk, skb)) goto discard_and_relse; @@ -1591,7 +1517,8 @@ do_time_wait: break; case TCP_TW_RST: goto no_tcp_socket; - case TCP_TW_SUCCESS:; + case TCP_TW_SUCCESS: + ; } goto discard_it; } @@ -1636,7 +1563,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, + .twsk_destructor = tcp_twsk_destructor, }; static const struct inet_connection_sock_af_ops ipv6_specific = { @@ -1657,6 +1584,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, #endif + .mtu_reduced = tcp_v6_mtu_reduced, }; #ifdef CONFIG_TCP_MD5SIG @@ -1670,7 +1598,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { /* * TCP over IPv4 via INET6 API */ - static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, @@ -1688,6 +1615,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, #endif + .mtu_reduced = tcp_v4_mtu_reduced, }; #ifdef CONFIG_TCP_MD5SIG @@ -1764,6 +1692,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); + struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; dest = &sp->sk_v6_daddr; src = &sp->sk_v6_rcv_saddr; @@ -1806,7 +1735,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, - tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh + sp->sk_state == TCP_LISTEN ? + (fastopenq ? fastopenq->max_qlen : 0) : + (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) ); } @@ -1924,7 +1855,6 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, - .mtu_reduced = tcp_v6_mtu_reduced, .hash = tcp_v6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, @@ -1966,7 +1896,6 @@ static struct inet_protosw tcpv6_protosw = { .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }; |