diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 4 | ||||
-rw-r--r-- | net/ipv4/Makefile | 2 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/arp.c | 20 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 447 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 10 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 6 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 4 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 7 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_ULOG.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/iptable_filter.c | 2 | ||||
-rw-r--r-- | net/ipv4/proc.c | 6 | ||||
-rw-r--r-- | net/ipv4/route.c | 161 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 2 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 69 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 34 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 20 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 20 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_memcontrol.c | 272 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 15 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 8 | ||||
-rw-r--r-- | net/ipv4/tunnel4.c | 10 | ||||
-rw-r--r-- | net/ipv4/udp.c | 3 | ||||
-rw-r--r-- | net/ipv4/udp_diag.c | 201 | ||||
-rw-r--r-- | net/ipv4/xfrm4_tunnel.c | 6 |
29 files changed, 1064 insertions, 289 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index cbb505ba9324..1a8f93bd2d4f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -409,6 +409,10 @@ config INET_TCP_DIAG depends on INET_DIAG def_tristate INET_DIAG +config INET_UDP_DIAG + depends on INET_DIAG + def_tristate INET_DIAG && IPV6 + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f2dc69cffb57..ff75d3bbcd6a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o +obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o @@ -47,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o +obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 15dc4c4828de..f7b5670744f0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1672,6 +1672,8 @@ static int __init inet_init(void) ip_static_sysctl_init(); #endif + tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; + /* * Add all the base protocols. */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ff324ebc8893..381a0876c363 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -277,9 +277,9 @@ static int arp_constructor(struct neighbour *neigh) default: break; case ARPHRD_ROSE: -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) case ARPHRD_AX25: -#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) +#if IS_ENABLED(CONFIG_NETROM) case ARPHRD_NETROM: #endif neigh->ops = &arp_broken_ops; @@ -629,13 +629,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp->ar_pro = htons(ETH_P_IP); break; -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) case ARPHRD_AX25: arp->ar_hrd = htons(ARPHRD_AX25); arp->ar_pro = htons(AX25_P_IP); break; -#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) +#if IS_ENABLED(CONFIG_NETROM) case ARPHRD_NETROM: arp->ar_hrd = htons(ARPHRD_NETROM); arp->ar_pro = htons(AX25_P_IP); @@ -643,13 +643,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, #endif #endif -#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) +#if IS_ENABLED(CONFIG_FDDI) case ARPHRD_FDDI: arp->ar_hrd = htons(ARPHRD_ETHER); arp->ar_pro = htons(ETH_P_IP); break; #endif -#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) +#if IS_ENABLED(CONFIG_TR) case ARPHRD_IEEE802_TR: arp->ar_hrd = htons(ARPHRD_IEEE802); arp->ar_pro = htons(ETH_P_IP); @@ -1036,7 +1036,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, return -EINVAL; } switch (dev->type) { -#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) +#if IS_ENABLED(CONFIG_FDDI) case ARPHRD_FDDI: /* * According to RFC 1390, FDDI devices should accept ARP @@ -1282,7 +1282,7 @@ void __init arp_init(void) } #ifdef CONFIG_PROC_FS -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) /* ------------------------------------------------------------------------ */ /* @@ -1330,7 +1330,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, read_lock(&n->lock); /* Convert hardware address to XX:XX:XX:XX ... form. */ -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM) ax2asc2((ax25_address *)n->ha, hbuffer); else { @@ -1343,7 +1343,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, if (k != 0) --k; hbuffer[k] = 0; -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) } #endif sprintf(tbuf, "%pI4", n->primary_key); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a598768c616c..2e4e24476c4c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -418,7 +418,7 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #define AF_INET_FAMILY(fam) ((fam) == AF_INET) #else #define AF_INET_FAMILY(fam) 1 diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 0a46c541b477..fb2e47ff59f7 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -33,6 +33,7 @@ #include <linux/stddef.h> #include <linux/inet_diag.h> +#include <linux/sock_diag.h> static const struct inet_diag_handler **inet_diag_table; @@ -45,24 +46,22 @@ struct inet_diag_entry { u16 userlocks; }; -static struct sock *idiagnl; - #define INET_DIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) static DEFINE_MUTEX(inet_diag_table_mutex); -static const struct inet_diag_handler *inet_diag_lock_handler(int type) +static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { - if (!inet_diag_table[type]) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_INET_DIAG, type); + if (!inet_diag_table[proto]) + request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, AF_INET, proto); mutex_lock(&inet_diag_table_mutex); - if (!inet_diag_table[type]) + if (!inet_diag_table[proto]) return ERR_PTR(-ENOENT); - return inet_diag_table[type]; + return inet_diag_table[proto]; } static inline void inet_diag_unlock_handler( @@ -71,21 +70,21 @@ static inline void inet_diag_unlock_handler( mutex_unlock(&inet_diag_table_mutex); } -static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, - int ext, u32 pid, u32 seq, u16 nlmsg_flags, +int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, + struct sk_buff *skb, struct inet_diag_req *req, + u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; void *info = NULL; struct inet_diag_meminfo *minfo = NULL; unsigned char *b = skb_tail_pointer(skb); const struct inet_diag_handler *handler; + int ext = req->idiag_ext; - handler = inet_diag_table[unlh->nlmsg_type]; + handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(handler == NULL); nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); @@ -97,25 +96,13 @@ static int inet_csk_diag_fill(struct sock *sk, if (ext & (1 << (INET_DIAG_MEMINFO - 1))) minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo)); - if (ext & (1 << (INET_DIAG_INFO - 1))) - info = INET_DIAG_PUT(skb, INET_DIAG_INFO, - handler->idiag_info_size); - - if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { - const size_t len = strlen(icsk->icsk_ca_ops->name); - - strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), - icsk->icsk_ca_ops->name); - } - r->idiag_family = sk->sk_family; r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; r->id.idiag_if = sk->sk_bound_dev_if; - r->id.idiag_cookie[0] = (u32)(unsigned long)sk; - r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); + sock_diag_save_cookie(sk, r->id.idiag_cookie); r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = inet->inet_dport; @@ -128,7 +115,7 @@ static int inet_csk_diag_fill(struct sock *sk, if (ext & (1 << (INET_DIAG_TOS - 1))) RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos); -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -139,6 +126,21 @@ static int inet_csk_diag_fill(struct sock *sk, } #endif + r->idiag_uid = sock_i_uid(sk); + r->idiag_inode = sock_i_ino(sk); + + if (minfo) { + minfo->idiag_rmem = sk_rmem_alloc_get(sk); + minfo->idiag_wmem = sk->sk_wmem_queued; + minfo->idiag_fmem = sk->sk_forward_alloc; + minfo->idiag_tmem = sk_wmem_alloc_get(sk); + } + + if (icsk == NULL) { + r->idiag_rqueue = r->idiag_wqueue = 0; + goto out; + } + #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) if (icsk->icsk_pending == ICSK_TIME_RETRANS) { @@ -159,14 +161,14 @@ static int inet_csk_diag_fill(struct sock *sk, } #undef EXPIRES_IN_MS - r->idiag_uid = sock_i_uid(sk); - r->idiag_inode = sock_i_ino(sk); + if (ext & (1 << (INET_DIAG_INFO - 1))) + info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info)); - if (minfo) { - minfo->idiag_rmem = sk_rmem_alloc_get(sk); - minfo->idiag_wmem = sk->sk_wmem_queued; - minfo->idiag_fmem = sk->sk_forward_alloc; - minfo->idiag_tmem = sk_wmem_alloc_get(sk); + if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { + const size_t len = strlen(icsk->icsk_ca_ops->name); + + strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), + icsk->icsk_ca_ops->name); } handler->idiag_get_info(sk, r, info); @@ -175,6 +177,7 @@ static int inet_csk_diag_fill(struct sock *sk, icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) icsk->icsk_ca_ops->get_info(sk, ext, skb); +out: nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; @@ -183,10 +186,20 @@ nlmsg_failure: nlmsg_trim(skb, b); return -EMSGSIZE; } +EXPORT_SYMBOL_GPL(inet_sk_diag_fill); + +static int inet_csk_diag_fill(struct sock *sk, + struct sk_buff *skb, struct inet_diag_req *req, + u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + return inet_sk_diag_fill(sk, inet_csk(sk), + skb, req, pid, seq, nlmsg_flags, unlh); +} static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, - struct sk_buff *skb, int ext, u32 pid, - u32 seq, u16 nlmsg_flags, + struct sk_buff *skb, struct inet_diag_req *req, + u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { long tmo; @@ -207,8 +220,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_family = tw->tw_family; r->idiag_retrans = 0; r->id.idiag_if = tw->tw_bound_dev_if; - r->id.idiag_cookie[0] = (u32)(unsigned long)tw; - r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1); + sock_diag_save_cookie(tw, r->id.idiag_cookie); r->id.idiag_sport = tw->tw_sport; r->id.idiag_dport = tw->tw_dport; r->id.idiag_src[0] = tw->tw_rcv_saddr; @@ -220,7 +232,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_wqueue = 0; r->idiag_uid = 0; r->idiag_inode = 0; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); @@ -237,42 +249,31 @@ nlmsg_failure: } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - int ext, u32 pid, u32 seq, u16 nlmsg_flags, + struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, ext, pid, seq, nlmsg_flags, + skb, r, pid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh); } -static int inet_diag_get_exact(struct sk_buff *in_skb, - const struct nlmsghdr *nlh) +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, + const struct nlmsghdr *nlh, struct inet_diag_req *req) { int err; struct sock *sk; - struct inet_diag_req *req = NLMSG_DATA(nlh); struct sk_buff *rep; - struct inet_hashinfo *hashinfo; - const struct inet_diag_handler *handler; - handler = inet_diag_lock_handler(nlh->nlmsg_type); - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - goto unlock; - } - - hashinfo = handler->idiag_hashinfo; err = -EINVAL; - - if (req->idiag_family == AF_INET) { + if (req->sdiag_family == AF_INET) { sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); } -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - else if (req->idiag_family == AF_INET6) { +#if IS_ENABLED(CONFIG_IPV6) + else if (req->sdiag_family == AF_INET6) { sk = inet6_lookup(&init_net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, @@ -282,29 +283,26 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, } #endif else { - goto unlock; + goto out_nosk; } err = -ENOENT; if (sk == NULL) - goto unlock; + goto out_nosk; - err = -ESTALE; - if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || - req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) && - ((u32)(unsigned long)sk != req->id.idiag_cookie[0] || - (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1])) + err = sock_diag_check_cookie(sk, req->id.idiag_cookie); + if (err) goto out; err = -ENOMEM; rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + sizeof(struct inet_diag_meminfo) + - handler->idiag_info_size + 64)), + sizeof(struct tcp_info) + 64)), GFP_KERNEL); if (!rep) goto out; - err = sk_diag_fill(sk, rep, req->idiag_ext, + err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { @@ -312,7 +310,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, kfree_skb(rep); goto out; } - err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -324,8 +322,25 @@ out: else sock_put(sk); } -unlock: +out_nosk: + return err; +} +EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); + +static int inet_diag_get_exact(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + struct inet_diag_req *req) +{ + const struct inet_diag_handler *handler; + int err; + + handler = inet_diag_lock_handler(req->sdiag_protocol); + if (IS_ERR(handler)) + err = PTR_ERR(handler); + else + err = handler->dump_one(in_skb, nlh, req); inet_diag_unlock_handler(handler); + return err; } @@ -356,9 +371,12 @@ static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) } -static int inet_diag_bc_run(const void *bc, int len, - const struct inet_diag_entry *entry) +static int inet_diag_bc_run(const struct nlattr *_bc, + const struct inet_diag_entry *entry) { + const void *bc = nla_data(_bc); + int len = nla_len(_bc); + while (len > 0) { int yes = 1; const struct inet_diag_bc_op *op = bc; @@ -432,6 +450,35 @@ static int inet_diag_bc_run(const void *bc, int len, return len == 0; } +int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) +{ + struct inet_diag_entry entry; + struct inet_sock *inet = inet_sk(sk); + + if (bc == NULL) + return 1; + + entry.family = sk->sk_family; +#if IS_ENABLED(CONFIG_IPV6) + if (entry.family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + entry.saddr = np->rcv_saddr.s6_addr32; + entry.daddr = np->daddr.s6_addr32; + } else +#endif + { + entry.saddr = &inet->inet_rcv_saddr; + entry.daddr = &inet->inet_daddr; + } + entry.sport = inet->inet_num; + entry.dport = ntohs(inet->inet_dport); + entry.userlocks = sk->sk_userlocks; + + return inet_diag_bc_run(bc, &entry); +} +EXPORT_SYMBOL_GPL(inet_diag_bc_sk); + static int valid_cc(const void *bc, int len, int cc) { while (len >= 0) { @@ -488,57 +535,29 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct inet_diag_req *r, + const struct nlattr *bc) { - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); + if (!inet_diag_bc_sk(bc, sk)) + return 0; - if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { - struct inet_diag_entry entry; - const struct nlattr *bc = nlmsg_find_attr(cb->nlh, - sizeof(*r), - INET_DIAG_REQ_BYTECODE); - struct inet_sock *inet = inet_sk(sk); - - entry.family = sk->sk_family; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (entry.family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - entry.saddr = np->rcv_saddr.s6_addr32; - entry.daddr = np->daddr.s6_addr32; - } else -#endif - { - entry.saddr = &inet->inet_rcv_saddr; - entry.daddr = &inet->inet_daddr; - } - entry.sport = inet->inet_num; - entry.dport = ntohs(inet->inet_dport); - entry.userlocks = sk->sk_userlocks; - - if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) - return 0; - } - - return inet_csk_diag_fill(sk, skb, r->idiag_ext, + return inet_csk_diag_fill(sk, skb, r, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct inet_diag_req *r, + const struct nlattr *bc) { - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - - if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { + if (bc != NULL) { struct inet_diag_entry entry; - const struct nlattr *bc = nlmsg_find_attr(cb->nlh, - sizeof(*r), - INET_DIAG_REQ_BYTECODE); entry.family = tw->tw_family; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); @@ -554,11 +573,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, entry.dport = ntohs(tw->tw_dport); entry.userlocks = 0; - if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) + if (!inet_diag_bc_run(bc, &entry)) return 0; } - return inet_twsk_diag_fill(tw, skb, r->idiag_ext, + return inet_twsk_diag_fill(tw, skb, r, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -584,8 +603,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_retrans = req->retrans; r->id.idiag_if = sk->sk_bound_dev_if; - r->id.idiag_cookie[0] = (u32)(unsigned long)req; - r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); + sock_diag_save_cookie(req, r->id.idiag_cookie); tmo = req->expires - jiffies; if (tmo < 0) @@ -600,7 +618,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_wqueue = 0; r->idiag_uid = sock_i_uid(sk); r->idiag_inode = 0; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr; *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr; @@ -616,13 +634,13 @@ nlmsg_failure: } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct inet_diag_req *r, + const struct nlattr *bc) { struct inet_diag_entry entry; - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt; - const struct nlattr *bc = NULL; struct inet_sock *inet = inet_sk(sk); int j, s_j; int reqnum, s_reqnum; @@ -642,9 +660,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (!lopt || !lopt->qlen) goto out; - if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { - bc = nlmsg_find_attr(cb->nlh, sizeof(*r), - INET_DIAG_REQ_BYTECODE); + if (bc != NULL) { entry.sport = inet->inet_num; entry.userlocks = sk->sk_userlocks; } @@ -664,21 +680,20 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (bc) { entry.saddr = -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) (entry.family == AF_INET6) ? inet6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; entry.daddr = -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) (entry.family == AF_INET6) ? inet6_rsk(req)->rmt_addr.s6_addr32 : #endif &ireq->rmt_addr; entry.dport = ntohs(ireq->rmt_port); - if (!inet_diag_bc_run(nla_data(bc), - nla_len(bc), &entry)) + if (!inet_diag_bc_run(bc, &entry)) continue; } @@ -701,19 +716,11 @@ out: return err; } -static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, + struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc) { int i, num; int s_i, s_num; - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - const struct inet_diag_handler *handler; - struct inet_hashinfo *hashinfo; - - handler = inet_diag_lock_handler(cb->nlh->nlmsg_type); - if (IS_ERR(handler)) - goto unlock; - - hashinfo = handler->idiag_hashinfo; s_i = cb->args[1]; s_num = num = cb->args[2]; @@ -738,6 +745,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_listen; + if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_listen; @@ -747,7 +758,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[3] > 0) goto syn_recv; - if (inet_csk_diag_dump(sk, skb, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { spin_unlock_bh(&ilb->lock); goto done; } @@ -756,7 +767,7 @@ syn_recv: if (!(r->idiag_states & TCPF_SYN_RECV)) goto next_listen; - if (inet_diag_dump_reqs(skb, sk, cb) < 0) { + if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) { spin_unlock_bh(&ilb->lock); goto done; } @@ -778,7 +789,7 @@ skip_listen_ht: } if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) - goto unlock; + goto out; for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; @@ -803,13 +814,16 @@ skip_listen_ht: goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) goto next_normal; + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_normal; if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_normal; if (r->id.idiag_dport != inet->inet_dport && r->id.idiag_dport) goto next_normal; - if (inet_csk_diag_dump(sk, skb, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { spin_unlock_bh(lock); goto done; } @@ -825,13 +839,16 @@ next_normal: if (num < s_num) goto next_dying; + if (r->sdiag_family != AF_UNSPEC && + tw->tw_family != r->sdiag_family) + goto next_dying; if (r->id.idiag_sport != tw->tw_sport && r->id.idiag_sport) goto next_dying; if (r->id.idiag_dport != tw->tw_dport && r->id.idiag_dport) goto next_dying; - if (inet_twsk_diag_dump(tw, skb, cb) < 0) { + if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) { spin_unlock_bh(lock); goto done; } @@ -845,15 +862,85 @@ next_dying: done: cb->args[1] = i; cb->args[2] = num; -unlock: +out: + ; +} +EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); + +static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req *r, struct nlattr *bc) +{ + const struct inet_diag_handler *handler; + + handler = inet_diag_lock_handler(r->sdiag_protocol); + if (!IS_ERR(handler)) + handler->dump(skb, cb, r, bc); inet_diag_unlock_handler(handler); + return skb->len; } -static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr *bc = NULL; int hdrlen = sizeof(struct inet_diag_req); + if (nlmsg_attrlen(cb->nlh, hdrlen)) + bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); + + return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc); +} + +static inline int inet_diag_type2proto(int type) +{ + switch (type) { + case TCPDIAG_GETSOCK: + return IPPROTO_TCP; + case DCCPDIAG_GETSOCK: + return IPPROTO_DCCP; + default: + return 0; + } +} + +static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh); + struct inet_diag_req req; + struct nlattr *bc = NULL; + int hdrlen = sizeof(struct inet_diag_req_compat); + + req.sdiag_family = AF_UNSPEC; /* compatibility */ + req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); + req.idiag_ext = rc->idiag_ext; + req.idiag_states = rc->idiag_states; + req.id = rc->id; + + if (nlmsg_attrlen(cb->nlh, hdrlen)) + bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); + + return __inet_diag_dump(skb, cb, &req, bc); +} + +static int inet_diag_get_exact_compat(struct sk_buff *in_skb, + const struct nlmsghdr *nlh) +{ + struct inet_diag_req_compat *rc = NLMSG_DATA(nlh); + struct inet_diag_req req; + + req.sdiag_family = rc->idiag_family; + req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); + req.idiag_ext = rc->idiag_ext; + req.idiag_states = rc->idiag_states; + req.id = rc->id; + + return inet_diag_get_exact(in_skb, nlh, &req); +} + +static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + int hdrlen = sizeof(struct inet_diag_req_compat); + if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || nlmsg_len(nlh) < hdrlen) return -EINVAL; @@ -870,28 +957,54 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; } - return netlink_dump_start(idiagnl, skb, nlh, - inet_diag_dump, NULL, 0); + return netlink_dump_start(sock_diag_nlsk, skb, nlh, + inet_diag_dump_compat, NULL, 0); } - return inet_diag_get_exact(skb, nlh); + return inet_diag_get_exact_compat(skb, nlh); } -static DEFINE_MUTEX(inet_diag_mutex); - -static void inet_diag_rcv(struct sk_buff *skb) +static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { - mutex_lock(&inet_diag_mutex); - netlink_rcv_skb(skb, &inet_diag_rcv_msg); - mutex_unlock(&inet_diag_mutex); + int hdrlen = sizeof(struct inet_diag_req); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + if (nlmsg_attrlen(h, hdrlen)) { + struct nlattr *attr; + attr = nlmsg_find_attr(h, hdrlen, + INET_DIAG_REQ_BYTECODE); + if (attr == NULL || + nla_len(attr) < sizeof(struct inet_diag_bc_op) || + inet_diag_bc_audit(nla_data(attr), nla_len(attr))) + return -EINVAL; + } + + return netlink_dump_start(sock_diag_nlsk, skb, h, + inet_diag_dump, NULL, 0); + } + + return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h)); } +static struct sock_diag_handler inet_diag_handler = { + .family = AF_INET, + .dump = inet_diag_handler_dump, +}; + +static struct sock_diag_handler inet6_diag_handler = { + .family = AF_INET6, + .dump = inet_diag_handler_dump, +}; + int inet_diag_register(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; int err = -EINVAL; - if (type >= INET_DIAG_GETSOCK_MAX) + if (type >= IPPROTO_MAX) goto out; mutex_lock(&inet_diag_table_mutex); @@ -910,7 +1023,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; - if (type >= INET_DIAG_GETSOCK_MAX) + if (type >= IPPROTO_MAX) return; mutex_lock(&inet_diag_table_mutex); @@ -921,7 +1034,7 @@ EXPORT_SYMBOL_GPL(inet_diag_unregister); static int __init inet_diag_init(void) { - const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * + const int inet_diag_table_size = (IPPROTO_MAX * sizeof(struct inet_diag_handler *)); int err = -ENOMEM; @@ -929,25 +1042,35 @@ static int __init inet_diag_init(void) if (!inet_diag_table) goto out; - idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, - inet_diag_rcv, NULL, THIS_MODULE); - if (idiagnl == NULL) - goto out_free_table; - err = 0; + err = sock_diag_register(&inet_diag_handler); + if (err) + goto out_free_nl; + + err = sock_diag_register(&inet6_diag_handler); + if (err) + goto out_free_inet; + + sock_diag_register_inet_compat(inet_diag_rcv_msg_compat); out: return err; -out_free_table: + +out_free_inet: + sock_diag_unregister(&inet_diag_handler); +out_free_nl: kfree(inet_diag_table); goto out; } static void __exit inet_diag_exit(void) { - netlink_kernel_release(idiagnl); + sock_diag_unregister(&inet6_diag_handler); + sock_diag_unregister(&inet_diag_handler); + sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat); kfree(inet_diag_table); } module_init(inet_diag_init); module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2b32296b7958..2b53a1f7abf6 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -46,7 +46,7 @@ #include <net/rtnetlink.h> #include <net/gre.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> @@ -729,9 +729,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { - struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); + struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb)); const struct in6_addr *addr6; int addr_type; @@ -799,7 +799,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); @@ -875,7 +875,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((iph->ttl = tiph->ttl) == 0) { if (skb->protocol == htons(ETH_P_IP)) iph->ttl = old_iph->ttl; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0d5e5672f3d1..ff302bde8890 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -206,7 +206,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) } rcu_read_lock(); - neigh = dst_get_neighbour(dst); + neigh = dst_get_neighbour_noref(dst); if (neigh) { int res = neigh_output(neigh, skb); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 80d5fa450210..8aa87c19fa00 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -37,7 +37,7 @@ #include <net/route.h> #include <net/xfrm.h> #include <net/compat.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #include <net/transp_v6.h> #endif @@ -508,7 +508,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, sock_owned_by_user(sk)); if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET || (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && @@ -519,7 +519,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (opt) icsk->icsk_ext_hdr_len += opt->opt.optlen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) } #endif } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 915eb5265b2e..7e4ec9fc2cef 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -253,6 +253,10 @@ static int __init ic_open_devs(void) } } + /* no point in waiting if we could not bring up at least one device */ + if (!ic_first_dev) + goto have_carrier; + /* wait for a carrier on at least one device */ start = jiffies; while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 94906908a416..413ed1ba7a5a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip_tunnel_link(ipn, nt); return nt; @@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) static int __net_init ipip_init_net(struct net *net) { struct ipip_net *ipn = net_generic(net, ipip_net_id); + struct ip_tunnel *t; int err; ipn->tunnels[0] = ipn->tunnels_wc; @@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net) if ((err = register_netdev(ipn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(ipn->fb_tunnel_dev); + + strcpy(t->parms.name, ipn->fb_tunnel_dev->name); return 0; err_reg_dev: diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b5508151e547..ba5756d20165 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -65,7 +65,7 @@ static unsigned int flushtimeout = 10; module_param(flushtimeout, uint, 0600); MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); -static int nflog = 1; +static bool nflog = true; module_param(nflog, bool, 0400); MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c37641e819f2..0e58f09e59fb 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -52,7 +52,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb, static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ -static int forward = NF_ACCEPT; +static bool forward = NF_ACCEPT; module_param(forward, bool, 0000); static int __net_init iptable_filter_net_init(struct net *net) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 961eed4f510a..3569d8ecaeac 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -56,17 +56,17 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) local_bh_disable(); orphans = percpu_counter_sum_positive(&tcp_orphan_count); - sockets = percpu_counter_sum_positive(&tcp_sockets_allocated); + sockets = proto_sockets_allocated_sum_positive(&tcp_prot); local_bh_enable(); socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, tcp_death_row.tw_count, sockets, - atomic_long_read(&tcp_memory_allocated)); + proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), - atomic_long_read(&udp_memory_allocated)); + proto_memory_allocated(&udp_prot)); seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7047069cf967..bcacf54e5418 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -91,6 +91,7 @@ #include <linux/rcupdate.h> #include <linux/times.h> #include <linux/slab.h> +#include <linux/prefetch.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/protocol.h> @@ -111,7 +112,7 @@ #include <net/secure_seq.h> #define RT_FL_TOS(oldflp4) \ - ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) + ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) #define IP_MAX_MTU 0xFFF0 @@ -119,6 +120,7 @@ static int ip_rt_max_size; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; +static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; @@ -132,6 +134,9 @@ static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; static int redirect_genid; +static struct delayed_work expires_work; +static unsigned long expires_ljiffies; + /* * Interface to generic destination cache. */ @@ -419,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) int len, HHUptod; rcu_read_lock(); - n = dst_get_neighbour(&r->dst); + n = dst_get_neighbour_noref(&r->dst); HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; rcu_read_unlock(); @@ -829,6 +834,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) return ONE; } +static void rt_check_expire(void) +{ + static unsigned int rover; + unsigned int i = rover, goal; + struct rtable *rth; + struct rtable __rcu **rthp; + unsigned long samples = 0; + unsigned long sum = 0, sum2 = 0; + unsigned long delta; + u64 mult; + + delta = jiffies - expires_ljiffies; + expires_ljiffies = jiffies; + mult = ((u64)delta) << rt_hash_log; + if (ip_rt_gc_timeout > 1) + do_div(mult, ip_rt_gc_timeout); + goal = (unsigned int)mult; + if (goal > rt_hash_mask) + goal = rt_hash_mask + 1; + for (; goal > 0; goal--) { + unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; + + i = (i + 1) & rt_hash_mask; + rthp = &rt_hash_table[i].chain; + + if (need_resched()) + cond_resched(); + + samples++; + + if (rcu_dereference_raw(*rthp) == NULL) + continue; + length = 0; + spin_lock_bh(rt_hash_lock_addr(i)); + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { + prefetch(rth->dst.rt_next); + if (rt_is_expired(rth)) { + *rthp = rth->dst.rt_next; + rt_free(rth); + continue; + } + if (rth->dst.expires) { + /* Entry is expired even if it is in use */ + if (time_before_eq(jiffies, rth->dst.expires)) { +nofree: + tmo >>= 1; + rthp = &rth->dst.rt_next; + /* + * We only count entries on + * a chain with equal hash inputs once + * so that entries for different QOS + * levels, and other non-hash input + * attributes don't unfairly skew + * the length computation + */ + length += has_noalias(rt_hash_table[i].chain, rth); + continue; + } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; + + /* Cleanup aged off entries. */ + *rthp = rth->dst.rt_next; + rt_free(rth); + } + spin_unlock_bh(rt_hash_lock_addr(i)); + sum += length; + sum2 += length*length; + } + if (samples) { + unsigned long avg = sum / samples; + unsigned long sd = int_sqrt(sum2 / samples - avg*avg); + rt_chain_length_max = max_t(unsigned long, + ip_rt_gc_elasticity, + (avg + 4*sd) >> FRACT_BITS); + } + rover = i; +} + +/* + * rt_worker_func() is run in process context. + * we call rt_check_expire() to scan part of the hash table + */ +static void rt_worker_func(struct work_struct *work) +{ + rt_check_expire(); + schedule_delayed_work(&expires_work, ip_rt_gc_interval); +} + /* * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -1265,7 +1361,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) { struct rtable *rt = (struct rtable *) dst; - if (rt) { + if (rt && !(rt->dst.flags & DST_NOPEER)) { if (rt->peer == NULL) rt_bind_peer(rt, rt->rt_dst, 1); @@ -1276,7 +1372,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) iph->id = htons(inet_getid(rt->peer, more)); return; } - } else + } else if (!rt) printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", __builtin_return_address(0)); @@ -1304,7 +1400,7 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) +static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; @@ -1315,21 +1411,19 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) rt->rt_gateway = peer->redirect_learned.a4; n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); - if (IS_ERR(n)) - return PTR_ERR(n); + if (IS_ERR(n)) { + rt->rt_gateway = orig_gw; + return; + } old_n = xchg(&rt->dst._neighbour, n); if (old_n) neigh_release(old_n); - if (!n || !(n->nud_state & NUD_VALID)) { - if (n) - neigh_event_send(n, NULL); - rt->rt_gateway = orig_gw; - return -EAGAIN; + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); } else { rt->rt_flags |= RTCF_REDIRECTED; call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } - return 0; } /* called in rcu_read_lock() section */ @@ -1687,7 +1781,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } -static struct rtable *ipv4_validate_peer(struct rtable *rt) +static void ipv4_validate_peer(struct rtable *rt) { if (rt->rt_peer_genid != rt_peer_genid()) { struct inet_peer *peer; @@ -1702,15 +1796,12 @@ static struct rtable *ipv4_validate_peer(struct rtable *rt) if (peer->redirect_genid != redirect_genid) peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(&rt->dst, peer)) - return NULL; - } + peer->redirect_learned.a4 != rt->rt_gateway) + check_peer_redir(&rt->dst, peer); } rt->rt_peer_genid = rt_peer_genid(); } - return rt; } static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) @@ -1719,7 +1810,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) if (rt_is_expired(rt)) return NULL; - dst = (struct dst_entry *) ipv4_validate_peer(rt); + ipv4_validate_peer(rt); return dst; } @@ -2374,9 +2465,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - rth = ipv4_validate_peer(rth); - if (!rth) - continue; + ipv4_validate_peer(rth); if (noref) { dst_use_noref(&rth->dst, jiffies); skb_dst_set_noref(skb, &rth->dst); @@ -2435,11 +2524,11 @@ EXPORT_SYMBOL(ip_route_input_common); static struct rtable *__mkroute_output(const struct fib_result *res, const struct flowi4 *fl4, __be32 orig_daddr, __be32 orig_saddr, - int orig_oif, struct net_device *dev_out, + int orig_oif, __u8 orig_rtos, + struct net_device *dev_out, unsigned int flags) { struct fib_info *fi = res->fi; - u32 tos = RT_FL_TOS(fl4); struct in_device *in_dev; u16 type = res->type; struct rtable *rth; @@ -2490,7 +2579,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_genid = rt_genid(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; - rth->rt_key_tos = tos; + rth->rt_key_tos = orig_rtos; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; @@ -2540,7 +2629,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) { struct net_device *dev_out = NULL; - u32 tos = RT_FL_TOS(fl4); + __u8 tos = RT_FL_TOS(fl4); unsigned int flags = 0; struct fib_result res; struct rtable *rth; @@ -2716,7 +2805,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) make_route: rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, - dev_out, flags); + tos, dev_out, flags); if (!IS_ERR(rth)) { unsigned int hash; @@ -2752,9 +2841,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - rth = ipv4_validate_peer(rth); - if (!rth) - continue; + ipv4_validate_peer(rth); dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -3182,6 +3269,13 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec_jiffies, }, { + .procname = "gc_interval", + .data = &ip_rt_gc_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { .procname = "redirect_load", .data = &ip_rt_redirect_load, .maxlen = sizeof(int), @@ -3391,6 +3485,11 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); + INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); + expires_ljiffies = jiffies; + schedule_delayed_work(&expires_work, + net_random() % ip_rt_gc_interval + ip_rt_gc_interval); + if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 90f6544c13e2..51fdbb490437 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -245,7 +245,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) if (!sysctl_tcp_timestamps) return false; - tcp_opt->sack_ok = (options >> 4) & 0x1; + tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0; *ecn_ok = (options >> 5) & 1; if (*ecn_ok && !sysctl_tcp_ecn) return false; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 69fd7201129a..4aa7e9dc0cbb 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/nsproxy.h> +#include <linux/swap.h> #include <net/snmp.h> #include <net/icmp.h> #include <net/ip.h> @@ -23,6 +24,7 @@ #include <net/cipso_ipv4.h> #include <net/inet_frag.h> #include <net/ping.h> +#include <net/tcp_memcontrol.h> static int zero; static int tcp_retr1_max = 255; @@ -73,7 +75,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, } -void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) +static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) { gid_t *data = table->data; unsigned seq; @@ -86,7 +88,7 @@ void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t } /* Update system visible IP port range */ -static void set_ping_group_range(struct ctl_table *table, int range[2]) +static void set_ping_group_range(struct ctl_table *table, gid_t range[2]) { gid_t *data = table->data; write_seqlock(&sysctl_local_ports.lock); @@ -174,6 +176,49 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } +static int ipv4_tcp_mem(ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + unsigned long vec[3]; + struct net *net = current->nsproxy->net_ns; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM + struct mem_cgroup *memcg; +#endif + + ctl_table tmp = { + .data = &vec, + .maxlen = sizeof(vec), + .mode = ctl->mode, + }; + + if (!write) { + ctl->data = &net->ipv4.sysctl_tcp_mem; + return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos); + } + + ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos); + if (ret) + return ret; + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM + rcu_read_lock(); + memcg = mem_cgroup_from_task(current); + + tcp_prot_mem(memcg, vec[0], 0); + tcp_prot_mem(memcg, vec[1], 1); + tcp_prot_mem(memcg, vec[2], 2); + rcu_read_unlock(); +#endif + + net->ipv4.sysctl_tcp_mem[0] = vec[0]; + net->ipv4.sysctl_tcp_mem[1] = vec[1]; + net->ipv4.sysctl_tcp_mem[2] = vec[2]; + + return 0; +} + static struct ctl_table ipv4_table[] = { { .procname = "tcp_timestamps", @@ -433,13 +478,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_mem", - .data = &sysctl_tcp_mem, - .maxlen = sizeof(sysctl_tcp_mem), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax - }, - { .procname = "tcp_wmem", .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), @@ -721,6 +759,12 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_ping_group_range, }, + { + .procname = "tcp_mem", + .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem), + .mode = 0644, + .proc_handler = ipv4_tcp_mem, + }, { } }; @@ -734,6 +778,7 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); static __net_init int ipv4_sysctl_init_net(struct net *net) { struct ctl_table *table; + unsigned long limit; table = ipv4_net_table; if (!net_eq(net, &init_net)) { @@ -769,6 +814,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_rt_cache_rebuild_count = 4; + limit = nr_free_buffer_pages() / 8; + limit = max(limit, 128UL); + net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; + net->ipv4.sysctl_tcp_mem[1] = limit; + net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2; + net->ipv4.ipv4_hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); if (net->ipv4.ipv4_hdr == NULL) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a09fe253b917..9bcdec3ad772 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); -long sysctl_tcp_mem[3] __read_mostly; int sysctl_tcp_wmem[3] __read_mostly; int sysctl_tcp_rmem[3] __read_mostly; -EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); @@ -888,9 +886,6 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(tcp_sendpage); -#define TCP_PAGE(sk) (sk->sk_sndmsg_page) -#define TCP_OFF(sk) (sk->sk_sndmsg_off) - static inline int select_size(const struct sock *sk, bool sg) { const struct tcp_sock *tp = tcp_sk(sk); @@ -1008,13 +1003,13 @@ new_segment: } else { int merge = 0; int i = skb_shinfo(skb)->nr_frags; - struct page *page = TCP_PAGE(sk); + struct page *page = sk->sk_sndmsg_page; int off; if (page && page_count(page) == 1) - TCP_OFF(sk) = 0; + sk->sk_sndmsg_off = 0; - off = TCP_OFF(sk); + off = sk->sk_sndmsg_off; if (skb_can_coalesce(skb, i, page, off) && off != PAGE_SIZE) { @@ -1031,7 +1026,7 @@ new_segment: } else if (page) { if (off == PAGE_SIZE) { put_page(page); - TCP_PAGE(sk) = page = NULL; + sk->sk_sndmsg_page = page = NULL; off = 0; } } else @@ -1057,9 +1052,9 @@ new_segment: /* If this page was new, give it to the * socket so it does not get leaked. */ - if (!TCP_PAGE(sk)) { - TCP_PAGE(sk) = page; - TCP_OFF(sk) = 0; + if (!sk->sk_sndmsg_page) { + sk->sk_sndmsg_page = page; + sk->sk_sndmsg_off = 0; } goto do_error; } @@ -1069,15 +1064,15 @@ new_segment: skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, page, off, copy); - if (TCP_PAGE(sk)) { + if (sk->sk_sndmsg_page) { get_page(page); } else if (off + copy < PAGE_SIZE) { get_page(page); - TCP_PAGE(sk) = page; + sk->sk_sndmsg_page = page; } } - TCP_OFF(sk) = off + copy; + sk->sk_sndmsg_off = off + copy; } if (!copied) @@ -3281,14 +3276,9 @@ void __init tcp_init(void) sysctl_tcp_max_orphans = cnt / 2; sysctl_max_syn_backlog = max(128, cnt / 256); - limit = nr_free_buffer_pages() / 8; - limit = max(limit, 128UL); - sysctl_tcp_mem[0] = limit / 4 * 3; - sysctl_tcp_mem[1] = limit; - sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; - /* Set per-socket limits to no more than 1/128 the pressure threshold */ - limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); + limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1]) + << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 939edb3b8e4d..8cd357a8be79 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -34,11 +34,23 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, tcp_get_info(sk, info); } +static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req *r, struct nlattr *bc) +{ + inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); +} + +static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct inet_diag_req *req) +{ + return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); +} + static const struct inet_diag_handler tcp_diag_handler = { - .idiag_hashinfo = &tcp_hashinfo, + .dump = tcp_diag_dump, + .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, - .idiag_type = TCPDIAG_GETSOCK, - .idiag_info_size = sizeof(struct tcp_info), + .idiag_type = IPPROTO_TCP, }; static int __init tcp_diag_init(void) @@ -54,4 +66,4 @@ static void __exit tcp_diag_exit(void) module_init(tcp_diag_init); module_exit(tcp_diag_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0cbb44076cfa..2877c3e09587 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -322,7 +322,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* Check #1 */ if (tp->rcv_ssthresh < tp->window_clamp && (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_memory_pressure) { + !sk_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -411,8 +411,8 @@ static void tcp_clamp_window(struct sock *sk) if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && - !tcp_memory_pressure && - atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { + !sk_under_memory_pressure(sk) && + sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sysctl_tcp_rmem[2]); } @@ -865,13 +865,13 @@ static void tcp_disable_fack(struct tcp_sock *tp) /* RFC3517 uses different metric in lost marker => reset on change */ if (tcp_is_fack(tp)) tp->lost_skb_hint = NULL; - tp->rx_opt.sack_ok &= ~2; + tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED; } /* Take a notice that peer is sending D-SACKs */ static void tcp_dsack_seen(struct tcp_sock *tp) { - tp->rx_opt.sack_ok |= 4; + tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; } /* Initialize metrics on socket. */ @@ -2663,7 +2663,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", @@ -3878,7 +3878,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && !estab && sysctl_tcp_sack) { - opt_rx->sack_ok = 1; + opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } break; @@ -4866,7 +4866,7 @@ static int tcp_prune_queue(struct sock *sk) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); - else if (tcp_memory_pressure) + else if (sk_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); @@ -4932,11 +4932,11 @@ static int tcp_should_expand_sndbuf(const struct sock *sk) return 0; /* If we are under global TCP memory pressure, do not expand. */ - if (tcp_memory_pressure) + if (sk_under_memory_pressure(sk)) return 0; /* If we are under soft global TCP memory pressure, do not expand. */ - if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) + if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) return 0; /* If we filled the congestion window, do not expand. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c4b8b09db9f5..1eb4ad57670e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -73,6 +73,7 @@ #include <net/xfrm.h> #include <net/netdma.h> #include <net/secure_seq.h> +#include <net/tcp_memcontrol.h> #include <linux/inet.h> #include <linux/ipv6.h> @@ -1917,7 +1918,8 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_rcvbuf = sysctl_tcp_rmem[1]; local_bh_disable(); - percpu_counter_inc(&tcp_sockets_allocated); + sock_update_memcg(sk); + sk_sockets_allocated_inc(sk); local_bh_enable(); return 0; @@ -1973,7 +1975,8 @@ void tcp_v4_destroy_sock(struct sock *sk) tp->cookie_values = NULL; } - percpu_counter_dec(&tcp_sockets_allocated); + sk_sockets_allocated_dec(sk); + sock_release_memcg(sk); } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -2620,7 +2623,6 @@ struct proto tcp_prot = { .orphan_count = &tcp_orphan_count, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, - .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, @@ -2634,10 +2636,14 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM + .init_cgroup = tcp_init_cgroup, + .destroy_cgroup = tcp_destroy_cgroup, + .proto_cgroup = tcp_proto_cgroup, +#endif }; EXPORT_SYMBOL(tcp_prot); - static int __net_init tcp_sk_init(struct net *net) { return inet_ctl_sock_create(&net->ipv4.tcp_sock, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c new file mode 100644 index 000000000000..7fed04f875c1 --- /dev/null +++ b/net/ipv4/tcp_memcontrol.c @@ -0,0 +1,272 @@ +#include <net/tcp.h> +#include <net/tcp_memcontrol.h> +#include <net/sock.h> +#include <net/ip.h> +#include <linux/nsproxy.h> +#include <linux/memcontrol.h> +#include <linux/module.h> + +static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft); +static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, + const char *buffer); +static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event); + +static struct cftype tcp_files[] = { + { + .name = "kmem.tcp.limit_in_bytes", + .write_string = tcp_cgroup_write, + .read_u64 = tcp_cgroup_read, + .private = RES_LIMIT, + }, + { + .name = "kmem.tcp.usage_in_bytes", + .read_u64 = tcp_cgroup_read, + .private = RES_USAGE, + }, + { + .name = "kmem.tcp.failcnt", + .private = RES_FAILCNT, + .trigger = tcp_cgroup_reset, + .read_u64 = tcp_cgroup_read, + }, + { + .name = "kmem.tcp.max_usage_in_bytes", + .private = RES_MAX_USAGE, + .trigger = tcp_cgroup_reset, + .read_u64 = tcp_cgroup_read, + }, +}; + +static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) +{ + return container_of(cg_proto, struct tcp_memcontrol, cg_proto); +} + +static void memcg_tcp_enter_memory_pressure(struct sock *sk) +{ + if (sk->sk_cgrp->memory_pressure) + *sk->sk_cgrp->memory_pressure = 1; +} +EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); + +int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) +{ + /* + * The root cgroup does not use res_counters, but rather, + * rely on the data already collected by the network + * subsystem + */ + struct res_counter *res_parent = NULL; + struct cg_proto *cg_proto, *parent_cg; + struct tcp_memcontrol *tcp; + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); + struct mem_cgroup *parent = parent_mem_cgroup(memcg); + struct net *net = current->nsproxy->net_ns; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + goto create_files; + + tcp = tcp_from_cgproto(cg_proto); + + tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0]; + tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1]; + tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2]; + tcp->tcp_memory_pressure = 0; + + parent_cg = tcp_prot.proto_cgroup(parent); + if (parent_cg) + res_parent = parent_cg->memory_allocated; + + res_counter_init(&tcp->tcp_memory_allocated, res_parent); + percpu_counter_init(&tcp->tcp_sockets_allocated, 0); + + cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure; + cg_proto->memory_pressure = &tcp->tcp_memory_pressure; + cg_proto->sysctl_mem = tcp->tcp_prot_mem; + cg_proto->memory_allocated = &tcp->tcp_memory_allocated; + cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; + cg_proto->memcg = memcg; + +create_files: + return cgroup_add_files(cgrp, ss, tcp_files, + ARRAY_SIZE(tcp_files)); +} +EXPORT_SYMBOL(tcp_init_cgroup); + +void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) +{ + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); + struct cg_proto *cg_proto; + struct tcp_memcontrol *tcp; + u64 val; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return; + + tcp = tcp_from_cgproto(cg_proto); + percpu_counter_destroy(&tcp->tcp_sockets_allocated); + + val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); + + if (val != RESOURCE_MAX) + jump_label_dec(&memcg_socket_limit_enabled); +} +EXPORT_SYMBOL(tcp_destroy_cgroup); + +static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) +{ + struct net *net = current->nsproxy->net_ns; + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + u64 old_lim; + int i; + int ret; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return -EINVAL; + + if (val > RESOURCE_MAX) + val = RESOURCE_MAX; + + tcp = tcp_from_cgproto(cg_proto); + + old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); + ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); + if (ret) + return ret; + + for (i = 0; i < 3; i++) + tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, + net->ipv4.sysctl_tcp_mem[i]); + + if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) + jump_label_dec(&memcg_socket_limit_enabled); + else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) + jump_label_inc(&memcg_socket_limit_enabled); + + return 0; +} + +static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, + const char *buffer) +{ + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + unsigned long long val; + int ret = 0; + + switch (cft->private) { + case RES_LIMIT: + /* see memcontrol.c */ + ret = res_counter_memparse_write_strategy(buffer, &val); + if (ret) + break; + ret = tcp_update_limit(memcg, val); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) +{ + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return default_val; + + tcp = tcp_from_cgproto(cg_proto); + return res_counter_read_u64(&tcp->tcp_memory_allocated, type); +} + +static u64 tcp_read_usage(struct mem_cgroup *memcg) +{ + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; + + tcp = tcp_from_cgproto(cg_proto); + return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); +} + +static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) +{ + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + u64 val; + + switch (cft->private) { + case RES_LIMIT: + val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); + break; + case RES_USAGE: + val = tcp_read_usage(memcg); + break; + case RES_FAILCNT: + case RES_MAX_USAGE: + val = tcp_read_stat(memcg, cft->private, 0); + break; + default: + BUG(); + } + return val; +} + +static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event) +{ + struct mem_cgroup *memcg; + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + memcg = mem_cgroup_from_cont(cont); + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return 0; + tcp = tcp_from_cgproto(cg_proto); + + switch (event) { + case RES_MAX_USAGE: + res_counter_reset_max(&tcp->tcp_memory_allocated); + break; + case RES_FAILCNT: + res_counter_reset_failcnt(&tcp->tcp_memory_allocated); + break; + } + + return 0; +} + +unsigned long long tcp_max_memory(const struct mem_cgroup *memcg) +{ + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg); + if (!cg_proto) + return 0; + + tcp = tcp_from_cgproto(cg_proto); + return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); +} + +void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx) +{ + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return; + + tcp = tcp_from_cgproto(cg_proto); + + tcp->tcp_prot_mem[idx] = val; +} diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9dc146e5ed65..550e755747e0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -336,7 +336,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet6_timewait_sock *tw6; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 50788d67bdb7..8c8de2780c7a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1093,6 +1093,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) { int i, k, eat; + eat = min_t(int, len, skb_headlen(skb)); + if (eat) { + __skb_pull(skb, eat); + len -= eat; + if (!len) + return; + } eat = len; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { @@ -1124,11 +1131,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; - /* If len == headlen, we avoid __skb_pull to preserve alignment. */ - if (unlikely(len < skb_headlen(skb))) - __skb_pull(skb, len); - else - __pskb_trim_head(skb, len - skb_headlen(skb)); + __pskb_trim_head(skb, len); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_PARTIAL; @@ -1919,7 +1922,7 @@ u32 __tcp_select_window(struct sock *sk) if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; - if (tcp_memory_pressure) + if (sk_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 2e0f0af76c19..a516d1e399df 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -171,13 +171,13 @@ static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); int retry_until; - bool do_reset, syn_set = 0; + bool do_reset, syn_set = false; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; - syn_set = 1; + syn_set = true; } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { /* Black hole detection */ @@ -261,7 +261,7 @@ static void tcp_delack_timer(unsigned long data) } out: - if (tcp_memory_pressure) + if (sk_under_memory_pressure(sk)) sk_mem_reclaim(sk); out_unlock: bh_unlock_sock(sk); @@ -340,7 +340,7 @@ void tcp_retransmit_timer(struct sock *sk) &inet->inet_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index ac3b3ee4b07c..01775983b997 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -105,7 +105,7 @@ drop: return 0; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int tunnel64_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler; @@ -134,7 +134,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info) break; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static void tunnel64_err(struct sk_buff *skb, u32 info) { struct xfrm_tunnel *handler; @@ -152,7 +152,7 @@ static const struct net_protocol tunnel4_protocol = { .netns_ok = 1, }; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static const struct net_protocol tunnel64_protocol = { .handler = tunnel64_rcv, .err_handler = tunnel64_err, @@ -167,7 +167,7 @@ static int __init tunnel4_init(void) printk(KERN_ERR "tunnel4 init: can't add protocol\n"); return -EAGAIN; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) { printk(KERN_ERR "tunnel64 init: can't add protocol\n"); inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP); @@ -179,7 +179,7 @@ static int __init tunnel4_init(void) static void __exit tunnel4_fini(void) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6)) printk(KERN_ERR "tunnel64 close: can't remove protocol\n"); #endif diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ad481b32f1e3..5d075b5f70fc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -445,7 +445,7 @@ exact_match: /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, +struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, struct udp_table *udptable) { @@ -512,6 +512,7 @@ begin: rcu_read_unlock(); return result; } +EXPORT_SYMBOL_GPL(__udp4_lib_lookup); static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c new file mode 100644 index 000000000000..69f8a7ca63dd --- /dev/null +++ b/net/ipv4/udp_diag.c @@ -0,0 +1,201 @@ +/* + * udp_diag.c Module for monitoring UDP transport protocols sockets. + * + * Authors: Pavel Emelyanov, <xemul@parallels.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include <linux/module.h> +#include <linux/inet_diag.h> +#include <linux/udp.h> +#include <net/udp.h> +#include <net/udplite.h> +#include <linux/inet_diag.h> +#include <linux/sock_diag.h> + +static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, struct inet_diag_req *req, + struct nlattr *bc) +{ + if (!inet_diag_bc_sk(bc, sk)) + return 0; + + return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); +} + +static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, + const struct nlmsghdr *nlh, struct inet_diag_req *req) +{ + int err = -EINVAL; + struct sock *sk; + struct sk_buff *rep; + + if (req->sdiag_family == AF_INET) + sk = __udp4_lib_lookup(&init_net, + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_if, tbl); +#if IS_ENABLED(CONFIG_IPV6) + else if (req->sdiag_family == AF_INET6) + sk = __udp6_lib_lookup(&init_net, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + req->id.idiag_if, tbl); +#endif + else + goto out_nosk; + + err = -ENOENT; + if (sk == NULL) + goto out_nosk; + + err = sock_diag_check_cookie(sk, req->id.idiag_cookie); + if (err) + goto out; + + err = -ENOMEM; + rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + + 64)), GFP_KERNEL); + if (!rep) + goto out; + + err = inet_sk_diag_fill(sk, NULL, rep, req, + NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, 0, nlh); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(rep); + goto out; + } + err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + if (sk) + sock_put(sk); +out_nosk: + return err; +} + +static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req *r, struct nlattr *bc) +{ + int num, s_num, slot, s_slot; + + s_slot = cb->args[0]; + num = s_num = cb->args[1]; + + for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) { + struct sock *sk; + struct hlist_nulls_node *node; + struct udp_hslot *hslot = &table->hash[slot]; + + if (hlist_nulls_empty(&hslot->head)) + continue; + + spin_lock_bh(&hslot->lock); + sk_nulls_for_each(sk, node, &hslot->head) { + struct inet_sock *inet = inet_sk(sk); + + if (num < s_num) + goto next; + if (!(r->idiag_states & (1 << sk->sk_state))) + goto next; + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next; + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next; + if (r->id.idiag_dport != inet->inet_dport && + r->id.idiag_dport) + goto next; + + if (sk_diag_dump(sk, skb, cb, r, bc) < 0) { + spin_unlock_bh(&hslot->lock); + goto done; + } +next: + num++; + } + spin_unlock_bh(&hslot->lock); + } +done: + cb->args[0] = slot; + cb->args[1] = num; +} + +static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req *r, struct nlattr *bc) +{ + udp_dump(&udp_table, skb, cb, r, bc); +} + +static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct inet_diag_req *req) +{ + return udp_dump_one(&udp_table, in_skb, nlh, req); +} + +static const struct inet_diag_handler udp_diag_handler = { + .dump = udp_diag_dump, + .dump_one = udp_diag_dump_one, + .idiag_type = IPPROTO_UDP, +}; + +static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req *r, struct nlattr *bc) +{ + udp_dump(&udplite_table, skb, cb, r, bc); +} + +static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct inet_diag_req *req) +{ + return udp_dump_one(&udplite_table, in_skb, nlh, req); +} + +static const struct inet_diag_handler udplite_diag_handler = { + .dump = udplite_diag_dump, + .dump_one = udplite_diag_dump_one, + .idiag_type = IPPROTO_UDPLITE, +}; + +static int __init udp_diag_init(void) +{ + int err; + + err = inet_diag_register(&udp_diag_handler); + if (err) + goto out; + err = inet_diag_register(&udplite_diag_handler); + if (err) + goto out_lite; +out: + return err; +out_lite: + inet_diag_unregister(&udp_diag_handler); + goto out; +} + +static void __exit udp_diag_exit(void) +{ + inet_diag_unregister(&udplite_diag_handler); + inet_diag_unregister(&udp_diag_handler); +} + +module_init(udp_diag_init); +module_exit(udp_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */); diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 82806455e859..9247d9d70e9d 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -64,7 +64,7 @@ static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .priority = 2, }; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, @@ -84,7 +84,7 @@ static int __init ipip_init(void) xfrm_unregister_type(&ipip_type, AF_INET); return -EAGAIN; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) { printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n"); xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET); @@ -97,7 +97,7 @@ static int __init ipip_init(void) static void __exit ipip_fini(void) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6)) printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n"); #endif |