diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 6 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 2 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 10 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 10 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 7 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 42 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 4 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 8 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 8 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_queue.c | 2 | ||||
-rw-r--r-- | net/ipv4/ping.c | 27 | ||||
-rw-r--r-- | net/ipv4/proc.c | 1 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 16 | ||||
-rw-r--r-- | net/ipv4/tcp_bic.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 41 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_memcontrol.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 6 | ||||
-rw-r--r-- | net/ipv4/udp_diag.c | 15 |
24 files changed, 128 insertions, 112 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 335ca7abbd46..aa2a2c79776f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -408,8 +408,12 @@ config INET_TCP_DIAG def_tristate INET_DIAG config INET_UDP_DIAG + tristate "UDP: socket monitoring interface" depends on INET_DIAG - def_tristate INET_DIAG && IPV6 + default n + ---help--- + Support for UDP socket monitoring interface used by the ss tool. + If unsure, say Y. menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 65f01dc47565..e41c40f48cfe 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -258,7 +258,7 @@ static struct in_device *inetdev_init(struct net_device *dev) ip_mc_up(in_dev); /* we can receive as soon as ip_ptr is set -- do this last */ - RCU_INIT_POINTER(dev->ip_ptr, in_dev); + rcu_assign_pointer(dev->ip_ptr, in_dev); out: return in_dev; out_kfree: diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d04b13ae18fe..2b555a5521e0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -205,7 +205,7 @@ static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) return (struct tnode *)(parent & ~NODE_TYPE_MASK); } -/* Same as RCU_INIT_POINTER +/* Same as rcu_assign_pointer * but that macro() assumes that value is a pointer. */ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) @@ -529,7 +529,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node * if (n) node_set_parent(n, tn); - RCU_INIT_POINTER(tn->child[i], n); + rcu_assign_pointer(tn->child[i], n); } #define MAX_WORK 10 @@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) tp = node_parent((struct rt_trie_node *) tn); if (!tp) - RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); if (!tp) @@ -1027,7 +1027,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); } @@ -1164,7 +1164,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)tn); } else { - RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tp = tn; } } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index fa057d105bef..450e5d21ed2a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -880,6 +880,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * to be intended in a v3 query. */ max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); + if (!max_delay) + max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) return; @@ -1247,7 +1249,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) im->next_rcu = in_dev->mc_list; in_dev->mc_count++; - RCU_INIT_POINTER(in_dev->mc_list, im); + rcu_assign_pointer(in_dev->mc_list, im); #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); @@ -1819,7 +1821,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; - RCU_INIT_POINTER(inet->mc_list, iml); + rcu_assign_pointer(inet->mc_list, iml); ip_mc_inc_group(in_dev, addr); err = 0; done: @@ -2006,7 +2008,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } - RCU_INIT_POINTER(pmc->sflist, newpsl); + rcu_assign_pointer(pmc->sflist, newpsl); psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ @@ -2109,7 +2111,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); - RCU_INIT_POINTER(pmc->sflist, newpsl); + rcu_assign_pointer(pmc->sflist, newpsl); pmc->sfmode = msf->imsf_fmode; err = 0; done: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2e4e24476c4c..19d66cefd7d3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -123,11 +123,14 @@ again: smallest_size = tb->num_owners; smallest_rover = rover; if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { - spin_unlock(&head->lock); snum = smallest_rover; - goto have_snum; + goto tb_found; } } + if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { + snum = rover; + goto tb_found; + } goto next; } break; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2240a8e8c44d..fcf281819cd4 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -71,7 +71,7 @@ static inline void inet_diag_unlock_handler( } int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req *req, + struct sk_buff *skb, struct inet_diag_req_v2 *req, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -193,7 +193,7 @@ nlmsg_failure: EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, struct inet_diag_req *req, + struct sk_buff *skb, struct inet_diag_req_v2 *req, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -202,7 +202,7 @@ static int inet_csk_diag_fill(struct sock *sk, } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, - struct sk_buff *skb, struct inet_diag_req *req, + struct sk_buff *skb, struct inet_diag_req_v2 *req, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -253,7 +253,7 @@ nlmsg_failure: } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags, + struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) @@ -264,7 +264,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req *req) + const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) { int err; struct sock *sk; @@ -333,7 +333,7 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req) + struct inet_diag_req_v2 *req) { const struct inet_diag_handler *handler; int err; @@ -540,7 +540,7 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, + struct inet_diag_req_v2 *r, const struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) @@ -554,7 +554,7 @@ static int inet_csk_diag_dump(struct sock *sk, static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, + struct inet_diag_req_v2 *r, const struct nlattr *bc) { if (bc != NULL) { @@ -639,7 +639,7 @@ nlmsg_failure: static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb, - struct inet_diag_req *r, + struct inet_diag_req_v2 *r, const struct nlattr *bc) { struct inet_diag_entry entry; @@ -721,7 +721,7 @@ out: } void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc) + struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc) { int i, num; int s_i, s_num; @@ -872,7 +872,7 @@ out: EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { const struct inet_diag_handler *handler; @@ -887,12 +887,12 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *bc = NULL; - int hdrlen = sizeof(struct inet_diag_req); + int hdrlen = sizeof(struct inet_diag_req_v2); if (nlmsg_attrlen(cb->nlh, hdrlen)) bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc); + return __inet_diag_dump(skb, cb, (struct inet_diag_req_v2 *)NLMSG_DATA(cb->nlh), bc); } static inline int inet_diag_type2proto(int type) @@ -909,10 +909,10 @@ static inline int inet_diag_type2proto(int type) static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) { - struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh); - struct inet_diag_req req; + struct inet_diag_req *rc = NLMSG_DATA(cb->nlh); + struct inet_diag_req_v2 req; struct nlattr *bc = NULL; - int hdrlen = sizeof(struct inet_diag_req_compat); + int hdrlen = sizeof(struct inet_diag_req); req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); @@ -929,8 +929,8 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *c static int inet_diag_get_exact_compat(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { - struct inet_diag_req_compat *rc = NLMSG_DATA(nlh); - struct inet_diag_req req; + struct inet_diag_req *rc = NLMSG_DATA(nlh); + struct inet_diag_req_v2 req; req.sdiag_family = rc->idiag_family; req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); @@ -943,7 +943,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) { - int hdrlen = sizeof(struct inet_diag_req_compat); + int hdrlen = sizeof(struct inet_diag_req); if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || nlmsg_len(nlh) < hdrlen) @@ -970,7 +970,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { - int hdrlen = sizeof(struct inet_diag_req); + int hdrlen = sizeof(struct inet_diag_req_v2); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -990,7 +990,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) inet_diag_dump, NULL, 0); } - return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h)); + return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); } static struct sock_diag_handler inet_diag_handler = { diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 86f13c67ea85..bf4a9c4808e1 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -136,7 +136,7 @@ static int addr_compare(const struct inetpeer_addr *a, for (i = 0; i < n; i++) { if (a->addr.a6[i] == b->addr.a6[i]) continue; - if (a->addr.a6[i] < b->addr.a6[i]) + if ((__force u32)a->addr.a6[i] < (__force u32)b->addr.a6[i]) return -1; return 1; } @@ -447,6 +447,7 @@ relookup: p->rate_last = 0; p->pmtu_expires = 0; p->pmtu_orig = 0; + p->redirect_genid = 0; memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2b53a1f7abf6..6b3ca5ba4450 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -422,6 +422,10 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= NETIF_F_LLTX; + dev_hold(dev); ipgre_tunnel_link(ign, nt); return nt; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 7e4ec9fc2cef..6e412a60a91f 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -141,7 +141,7 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ -u32 ic_dev_xid; /* Device under configuration */ +__be32 ic_dev_xid; /* Device under configuration */ /* vendor class identifier */ static char vendor_class_identifier[253] __initdata; @@ -859,9 +859,9 @@ static int __init ic_bootp_string(char *dest, char *src, int len, int max) */ static void __init ic_do_bootp_ext(u8 *ext) { - u8 servers; - int i; - u16 mtu; + u8 servers; + int i; + __be16 mtu; #ifdef IPCONFIG_DEBUG u8 *c; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 413ed1ba7a5a..22a199315309 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -231,7 +231,7 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - RCU_INIT_POINTER(*tp, t->next); + rcu_assign_pointer(*tp, t->next); break; } } @@ -241,8 +241,8 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); - RCU_INIT_POINTER(t->next, rtnl_dereference(*tp)); - RCU_INIT_POINTER(*tp, t); + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); } static struct ip_tunnel * ipip_tunnel_locate(struct net *net, @@ -792,7 +792,7 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) return -ENOMEM; dev_hold(dev); - RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel); + rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); return 0; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8e54490ee3f4..7bc2db6db8d4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1225,7 +1225,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { - RCU_INIT_POINTER(mrt->mroute_sk, sk); + rcu_assign_pointer(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; } rtnl_unlock(); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index a057fe64debd..94d45e1f8882 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -431,7 +431,7 @@ __ipq_rcv_skb(struct sk_buff *skb) if (type <= IPQM_BASE) return; - if (security_netlink_recv(skb, CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); spin_lock_bh(&queue_lock); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 43d4c3b22369..aea5a199c37a 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -140,13 +140,14 @@ static void ping_v4_unhash(struct sock *sk) write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); sock_put(sk); - isk->inet_num = isk->inet_sport = 0; + isk->inet_num = 0; + isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(&ping_table.lock); } } -static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr, +static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, u16 ident, int dif) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); @@ -154,15 +155,15 @@ static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr, struct inet_sock *isk; struct hlist_nulls_node *hnode; - pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n", - (int)ident, (unsigned long)daddr, dif); + pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", + (int)ident, &daddr, dif); read_lock_bh(&ping_table.lock); ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); - pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk, - (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr, + pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk, + (int)isk->inet_num, &isk->inet_rcv_saddr, sk->sk_bound_dev_if); pr_debug("iterate\n"); @@ -254,7 +255,7 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr == INADDR_ANY) + if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) chk_addr_ret = RTN_LOCAL; if ((sysctl_ip_nonlocal_bind == 0 && @@ -278,9 +279,9 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; } - pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n", + pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n", (int)isk->inet_num, - (unsigned long) isk->inet_rcv_saddr, + &isk->inet_rcv_saddr, (int)sk->sk_bound_dev_if); err = 0; @@ -407,7 +408,7 @@ out: struct pingfakehdr { struct icmphdr icmph; struct iovec *iov; - u32 wcheck; + __wsum wcheck; }; static int ping_getfrag(void *from, char * to, @@ -459,7 +460,7 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct rtable *rt = NULL; struct ip_options_data opt_copy; int free = 0; - u32 saddr, daddr, faddr; + __be32 saddr, daddr, faddr; u8 tos; int err; @@ -696,8 +697,8 @@ void ping_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph = icmp_hdr(skb); - u32 saddr = iph->saddr; - u32 daddr = iph->daddr; + __be32 saddr = iph->saddr; + __be32 daddr = iph->daddr; /* We assume the packet has already been checked by icmp_rcv */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3569d8ecaeac..6afc807ee2ad 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), - SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS), SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4aa7e9dc0cbb..4cb9cd2f2c39 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -814,6 +814,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_rt_cache_rebuild_count = 4; + tcp_init_mem(net); limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9bcdec3ad772..06373b4a449a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3216,6 +3216,16 @@ static int __init set_thash_entries(char *str) } __setup("thash_entries=", set_thash_entries); +void tcp_init_mem(struct net *net) +{ + /* Set per-socket limits to no more than 1/128 the pressure threshold */ + unsigned long limit = nr_free_buffer_pages() / 8; + limit = max(limit, 128UL); + net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; + net->ipv4.sysctl_tcp_mem[1] = limit; + net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2; +} + void __init tcp_init(void) { struct sk_buff *skb = NULL; @@ -3276,9 +3286,9 @@ void __init tcp_init(void) sysctl_tcp_max_orphans = cnt / 2; sysctl_max_syn_backlog = max(128, cnt / 256); - /* Set per-socket limits to no more than 1/128 the pressure threshold */ - limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1]) - << (PAGE_SHIFT - 7); + tcp_init_mem(&init_net); + limit = nr_free_buffer_pages() / 8; + limit = max(limit, 128UL); max_share = min(4UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 6187eb4d1dcf..f45e1c242440 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -63,7 +63,6 @@ static inline void bictcp_reset(struct bictcp *ca) { ca->cnt = 0; ca->last_max_cwnd = 0; - ca->loss_cwnd = 0; ca->last_cwnd = 0; ca->last_time = 0; ca->epoch_start = 0; @@ -72,7 +71,11 @@ static inline void bictcp_reset(struct bictcp *ca) static void bictcp_init(struct sock *sk) { - bictcp_reset(inet_csk_ca(sk)); + struct bictcp *ca = inet_csk_ca(sk); + + bictcp_reset(ca); + ca->loss_cwnd = 0; + if (initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } @@ -127,7 +130,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) } /* if in slow start or link utilization is very low */ - if (ca->loss_cwnd == 0) { + if (ca->last_max_cwnd == 0) { if (ca->cnt > 20) /* increase cwnd 5% per RTT */ ca->cnt = 20; } @@ -185,7 +188,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); const struct bictcp *ca = inet_csk_ca(sk); - return max(tp->snd_cwnd, ca->last_max_cwnd); + return max(tp->snd_cwnd, ca->loss_cwnd); } static void bictcp_state(struct sock *sk, u8 new_state) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index f376b05cca81..a9077f441cb2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -107,7 +107,6 @@ static inline void bictcp_reset(struct bictcp *ca) { ca->cnt = 0; ca->last_max_cwnd = 0; - ca->loss_cwnd = 0; ca->last_cwnd = 0; ca->last_time = 0; ca->bic_origin_point = 0; @@ -142,7 +141,10 @@ static inline void bictcp_hystart_reset(struct sock *sk) static void bictcp_init(struct sock *sk) { - bictcp_reset(inet_csk_ca(sk)); + struct bictcp *ca = inet_csk_ca(sk); + + bictcp_reset(ca); + ca->loss_cwnd = 0; if (hystart) bictcp_hystart_reset(sk); @@ -275,7 +277,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) * The initial growth of cubic function may be too conservative * when the available bandwidth is still unknown. */ - if (ca->loss_cwnd == 0 && ca->cnt > 20) + if (ca->last_max_cwnd == 0 && ca->cnt > 20) ca->cnt = 20; /* increase cwnd 5% per RTT */ /* TCP Friendly */ @@ -342,7 +344,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) { struct bictcp *ca = inet_csk_ca(sk); - return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd); + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); } static void bictcp_state(struct sock *sk, u8 new_state) diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 8cd357a8be79..ed3f2ad42e0f 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -35,13 +35,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); } static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req) + struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2877c3e09587..976034f82320 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ -#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ @@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, * These 6 states form finite state machine, controlled by the following events: * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) - * 3. Loss detection event of one of three flavors: + * 3. Loss detection event of two flavors: * A. Scoreboard estimator decided the packet is lost. * A'. Reno "three dupacks" marks head of queue lost. - * A''. Its FACK modfication, head until snd.fack is lost. - * B. SACK arrives sacking data transmitted after never retransmitted - * hole was sent out. - * C. SACK arrives sacking SND.NXT at the moment, when the + * A''. Its FACK modification, head until snd.fack is lost. + * B. SACK arrives sacking SND.NXT at the moment, when the * segment was retransmitted. * 4. D-SACK added new rule: D-SACK changes any tag to S. * @@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, } /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". - * Event "C". Later note: FACK people cheated me again 8), we have to account + * Event "B". Later note: FACK people cheated me again 8), we have to account * for reordering! Ugly, but should help. * * Search retransmitted skbs from write_queue that were sent when snd_nxt was @@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, if (found_dup_sack && ((i + 1) == first_sack_index)) next_dup = &sp[i + 1]; - /* Event "B" in the comment above. */ - if (after(end_seq, tp->high_seq)) - state.flag |= FLAG_DATA_LOST; - /* Skip too early cached blocks */ while (tcp_sack_cache_ok(tp, cache) && !before(start_seq, cache->end_seq)) @@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk) tcp_verify_left_out(tp); } -/* Mark head of queue up as lost. With RFC3517 SACK, the packets is - * is against sacked "cnt", otherwise it's against facked "cnt" +/* Detect loss in event "A" above by marking head of queue up as lost. + * For FACK or non-SACK(Reno) senders, the first "packets" number of segments + * are considered lost. For RFC3517 SACK, a segment is considered lost if it + * has at least tp->reordering SACKed seqments above it; "packets" refers to + * the maximum SACKed segments to pass before reaching this limit. */ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) { @@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) int cnt, oldcnt; int err; unsigned int mss; + /* Use SACK to deduce losses of new sequences sent during recovery */ + const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { @@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; - if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) + if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) break; oldcnt = cnt; @@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (tcp_check_sack_reneging(sk, flag)) return; - /* C. Process data loss notification, provided it is valid. */ - if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) && - before(tp->snd_una, tp->high_seq) && - icsk->icsk_ca_state != TCP_CA_Open && - tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); - } - - /* D. Check consistency of the current state. */ + /* C. Check consistency of the current state. */ tcp_verify_left_out(tp); - /* E. Check state exit conditions. State can be terminated + /* D. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { WARN_ON(tp->retrans_out != 0); @@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, } } - /* F. Process state. */ + /* E. Process state. */ switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (!(flag & FLAG_SND_UNA_ADVANCED)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1eb4ad57670e..337ba4cca052 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -631,7 +631,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; + key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 7fed04f875c1..49978788a9dc 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -108,7 +108,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) tcp = tcp_from_cgproto(cg_proto); percpu_counter_destroy(&tcp->tcp_sockets_allocated); - val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); + val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); if (val != RESOURCE_MAX) jump_label_dec(&memcg_socket_limit_enabled); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8c8de2780c7a..4ff3b6dc74fc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1141,11 +1141,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) sk_mem_uncharge(sk, len); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - /* Any change of skb->len requires recalculation of tso - * factor and mss. - */ + /* Any change of skb->len requires recalculation of tso factor. */ if (tcp_skb_pcount(skb) > 1) - tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); + tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb)); return 0; } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 69f8a7ca63dd..8a949f19deb6 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -15,11 +15,10 @@ #include <linux/udp.h> #include <net/udp.h> #include <net/udplite.h> -#include <linux/inet_diag.h> #include <linux/sock_diag.h> static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req *req, + struct netlink_callback *cb, struct inet_diag_req_v2 *req, struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) @@ -30,7 +29,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, } static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req *req) + const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) { int err = -EINVAL; struct sock *sk; @@ -88,7 +87,7 @@ out_nosk: } static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { int num, s_num, slot, s_slot; @@ -136,13 +135,13 @@ done: } static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { udp_dump(&udp_table, skb, cb, r, bc); } static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req) + struct inet_diag_req_v2 *req) { return udp_dump_one(&udp_table, in_skb, nlh, req); } @@ -154,13 +153,13 @@ static const struct inet_diag_handler udp_diag_handler = { }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { udp_dump(&udplite_table, skb, cb, r, bc); } static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req *req) + struct inet_diag_req_v2 *req) { return udp_dump_one(&udplite_table, in_skb, nlh, req); } |