summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig4
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/arp.c20
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c447
-rw-r--r--net/ipv4/ip_gre.c10
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ipconfig.c4
-rw-r--r--net/ipv4/ipip.c7
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c2
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/proc.c6
-rw-r--r--net/ipv4/route.c161
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c69
-rw-r--r--net/ipv4/tcp.c34
-rw-r--r--net/ipv4/tcp_diag.c20
-rw-r--r--net/ipv4/tcp_input.c20
-rw-r--r--net/ipv4/tcp_ipv4.c14
-rw-r--r--net/ipv4/tcp_memcontrol.c272
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c15
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv4/tunnel4.c10
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/udp_diag.c201
-rw-r--r--net/ipv4/xfrm4_tunnel.c6
29 files changed, 1064 insertions, 289 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index cbb505ba9324..1a8f93bd2d4f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -409,6 +409,10 @@ config INET_TCP_DIAG
depends on INET_DIAG
def_tristate INET_DIAG
+config INET_UDP_DIAG
+ depends on INET_DIAG
+ def_tristate INET_DIAG && IPV6
+
menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
---help---
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f2dc69cffb57..ff75d3bbcd6a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_IP_PNP) += ipconfig.o
obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
obj-$(CONFIG_INET_DIAG) += inet_diag.o
obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
+obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
@@ -47,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 15dc4c4828de..f7b5670744f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1672,6 +1672,8 @@ static int __init inet_init(void)
ip_static_sysctl_init();
#endif
+ tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;
+
/*
* Add all the base protocols.
*/
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ff324ebc8893..381a0876c363 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -277,9 +277,9 @@ static int arp_constructor(struct neighbour *neigh)
default:
break;
case ARPHRD_ROSE:
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
case ARPHRD_AX25:
-#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+#if IS_ENABLED(CONFIG_NETROM)
case ARPHRD_NETROM:
#endif
neigh->ops = &arp_broken_ops;
@@ -629,13 +629,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
arp->ar_pro = htons(ETH_P_IP);
break;
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
case ARPHRD_AX25:
arp->ar_hrd = htons(ARPHRD_AX25);
arp->ar_pro = htons(AX25_P_IP);
break;
-#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+#if IS_ENABLED(CONFIG_NETROM)
case ARPHRD_NETROM:
arp->ar_hrd = htons(ARPHRD_NETROM);
arp->ar_pro = htons(AX25_P_IP);
@@ -643,13 +643,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
#endif
#endif
-#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
+#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
arp->ar_hrd = htons(ARPHRD_ETHER);
arp->ar_pro = htons(ETH_P_IP);
break;
#endif
-#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE)
+#if IS_ENABLED(CONFIG_TR)
case ARPHRD_IEEE802_TR:
arp->ar_hrd = htons(ARPHRD_IEEE802);
arp->ar_pro = htons(ETH_P_IP);
@@ -1036,7 +1036,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
return -EINVAL;
}
switch (dev->type) {
-#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
+#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
/*
* According to RFC 1390, FDDI devices should accept ARP
@@ -1282,7 +1282,7 @@ void __init arp_init(void)
}
#ifdef CONFIG_PROC_FS
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
/* ------------------------------------------------------------------------ */
/*
@@ -1330,7 +1330,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
read_lock(&n->lock);
/* Convert hardware address to XX:XX:XX:XX ... form. */
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
ax2asc2((ax25_address *)n->ha, hbuffer);
else {
@@ -1343,7 +1343,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
if (k != 0)
--k;
hbuffer[k] = 0;
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
}
#endif
sprintf(tbuf, "%pI4", n->primary_key);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a598768c616c..2e4e24476c4c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -418,7 +418,7 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
#else
#define AF_INET_FAMILY(fam) 1
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 0a46c541b477..fb2e47ff59f7 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -33,6 +33,7 @@
#include <linux/stddef.h>
#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
static const struct inet_diag_handler **inet_diag_table;
@@ -45,24 +46,22 @@ struct inet_diag_entry {
u16 userlocks;
};
-static struct sock *idiagnl;
-
#define INET_DIAG_PUT(skb, attrtype, attrlen) \
RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
static DEFINE_MUTEX(inet_diag_table_mutex);
-static const struct inet_diag_handler *inet_diag_lock_handler(int type)
+static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
- if (!inet_diag_table[type])
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_INET_DIAG, type);
+ if (!inet_diag_table[proto])
+ request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, AF_INET, proto);
mutex_lock(&inet_diag_table_mutex);
- if (!inet_diag_table[type])
+ if (!inet_diag_table[proto])
return ERR_PTR(-ENOENT);
- return inet_diag_table[type];
+ return inet_diag_table[proto];
}
static inline void inet_diag_unlock_handler(
@@ -71,21 +70,21 @@ static inline void inet_diag_unlock_handler(
mutex_unlock(&inet_diag_table_mutex);
}
-static int inet_csk_diag_fill(struct sock *sk,
- struct sk_buff *skb,
- int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
+ struct sk_buff *skb, struct inet_diag_req *req,
+ u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
const struct inet_sock *inet = inet_sk(sk);
- const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
void *info = NULL;
struct inet_diag_meminfo *minfo = NULL;
unsigned char *b = skb_tail_pointer(skb);
const struct inet_diag_handler *handler;
+ int ext = req->idiag_ext;
- handler = inet_diag_table[unlh->nlmsg_type];
+ handler = inet_diag_table[req->sdiag_protocol];
BUG_ON(handler == NULL);
nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
@@ -97,25 +96,13 @@ static int inet_csk_diag_fill(struct sock *sk,
if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
- if (ext & (1 << (INET_DIAG_INFO - 1)))
- info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
- handler->idiag_info_size);
-
- if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
- const size_t len = strlen(icsk->icsk_ca_ops->name);
-
- strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
- icsk->icsk_ca_ops->name);
- }
-
r->idiag_family = sk->sk_family;
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
r->idiag_retrans = 0;
r->id.idiag_if = sk->sk_bound_dev_if;
- r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
- r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+ sock_diag_save_cookie(sk, r->id.idiag_cookie);
r->id.idiag_sport = inet->inet_sport;
r->id.idiag_dport = inet->inet_dport;
@@ -128,7 +115,7 @@ static int inet_csk_diag_fill(struct sock *sk,
if (ext & (1 << (INET_DIAG_TOS - 1)))
RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (r->idiag_family == AF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -139,6 +126,21 @@ static int inet_csk_diag_fill(struct sock *sk,
}
#endif
+ r->idiag_uid = sock_i_uid(sk);
+ r->idiag_inode = sock_i_ino(sk);
+
+ if (minfo) {
+ minfo->idiag_rmem = sk_rmem_alloc_get(sk);
+ minfo->idiag_wmem = sk->sk_wmem_queued;
+ minfo->idiag_fmem = sk->sk_forward_alloc;
+ minfo->idiag_tmem = sk_wmem_alloc_get(sk);
+ }
+
+ if (icsk == NULL) {
+ r->idiag_rqueue = r->idiag_wqueue = 0;
+ goto out;
+ }
+
#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
@@ -159,14 +161,14 @@ static int inet_csk_diag_fill(struct sock *sk,
}
#undef EXPIRES_IN_MS
- r->idiag_uid = sock_i_uid(sk);
- r->idiag_inode = sock_i_ino(sk);
+ if (ext & (1 << (INET_DIAG_INFO - 1)))
+ info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info));
- if (minfo) {
- minfo->idiag_rmem = sk_rmem_alloc_get(sk);
- minfo->idiag_wmem = sk->sk_wmem_queued;
- minfo->idiag_fmem = sk->sk_forward_alloc;
- minfo->idiag_tmem = sk_wmem_alloc_get(sk);
+ if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+ const size_t len = strlen(icsk->icsk_ca_ops->name);
+
+ strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+ icsk->icsk_ca_ops->name);
}
handler->idiag_get_info(sk, r, info);
@@ -175,6 +177,7 @@ static int inet_csk_diag_fill(struct sock *sk,
icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
icsk->icsk_ca_ops->get_info(sk, ext, skb);
+out:
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
@@ -183,10 +186,20 @@ nlmsg_failure:
nlmsg_trim(skb, b);
return -EMSGSIZE;
}
+EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
+
+static int inet_csk_diag_fill(struct sock *sk,
+ struct sk_buff *skb, struct inet_diag_req *req,
+ u32 pid, u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
+{
+ return inet_sk_diag_fill(sk, inet_csk(sk),
+ skb, req, pid, seq, nlmsg_flags, unlh);
+}
static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
- struct sk_buff *skb, int ext, u32 pid,
- u32 seq, u16 nlmsg_flags,
+ struct sk_buff *skb, struct inet_diag_req *req,
+ u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
long tmo;
@@ -207,8 +220,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
r->idiag_family = tw->tw_family;
r->idiag_retrans = 0;
r->id.idiag_if = tw->tw_bound_dev_if;
- r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
- r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
+ sock_diag_save_cookie(tw, r->id.idiag_cookie);
r->id.idiag_sport = tw->tw_sport;
r->id.idiag_dport = tw->tw_dport;
r->id.idiag_src[0] = tw->tw_rcv_saddr;
@@ -220,7 +232,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
r->idiag_wqueue = 0;
r->idiag_uid = 0;
r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
const struct inet6_timewait_sock *tw6 =
inet6_twsk((struct sock *)tw);
@@ -237,42 +249,31 @@ nlmsg_failure:
}
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
- int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+ struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
- skb, ext, pid, seq, nlmsg_flags,
+ skb, r, pid, seq, nlmsg_flags,
unlh);
- return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh);
+ return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh);
}
-static int inet_diag_get_exact(struct sk_buff *in_skb,
- const struct nlmsghdr *nlh)
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh, struct inet_diag_req *req)
{
int err;
struct sock *sk;
- struct inet_diag_req *req = NLMSG_DATA(nlh);
struct sk_buff *rep;
- struct inet_hashinfo *hashinfo;
- const struct inet_diag_handler *handler;
- handler = inet_diag_lock_handler(nlh->nlmsg_type);
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- goto unlock;
- }
-
- hashinfo = handler->idiag_hashinfo;
err = -EINVAL;
-
- if (req->idiag_family == AF_INET) {
+ if (req->sdiag_family == AF_INET) {
sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
req->id.idiag_sport, req->id.idiag_if);
}
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
- else if (req->idiag_family == AF_INET6) {
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (req->sdiag_family == AF_INET6) {
sk = inet6_lookup(&init_net, hashinfo,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
@@ -282,29 +283,26 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
}
#endif
else {
- goto unlock;
+ goto out_nosk;
}
err = -ENOENT;
if (sk == NULL)
- goto unlock;
+ goto out_nosk;
- err = -ESTALE;
- if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
- req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
- ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
- (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
+ err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
+ if (err)
goto out;
err = -ENOMEM;
rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
sizeof(struct inet_diag_meminfo) +
- handler->idiag_info_size + 64)),
+ sizeof(struct tcp_info) + 64)),
GFP_KERNEL);
if (!rep)
goto out;
- err = sk_diag_fill(sk, rep, req->idiag_ext,
+ err = sk_diag_fill(sk, rep, req,
NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, 0, nlh);
if (err < 0) {
@@ -312,7 +310,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
kfree_skb(rep);
goto out;
}
- err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid,
+ err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid,
MSG_DONTWAIT);
if (err > 0)
err = 0;
@@ -324,8 +322,25 @@ out:
else
sock_put(sk);
}
-unlock:
+out_nosk:
+ return err;
+}
+EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
+
+static int inet_diag_get_exact(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
+{
+ const struct inet_diag_handler *handler;
+ int err;
+
+ handler = inet_diag_lock_handler(req->sdiag_protocol);
+ if (IS_ERR(handler))
+ err = PTR_ERR(handler);
+ else
+ err = handler->dump_one(in_skb, nlh, req);
inet_diag_unlock_handler(handler);
+
return err;
}
@@ -356,9 +371,12 @@ static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
}
-static int inet_diag_bc_run(const void *bc, int len,
- const struct inet_diag_entry *entry)
+static int inet_diag_bc_run(const struct nlattr *_bc,
+ const struct inet_diag_entry *entry)
{
+ const void *bc = nla_data(_bc);
+ int len = nla_len(_bc);
+
while (len > 0) {
int yes = 1;
const struct inet_diag_bc_op *op = bc;
@@ -432,6 +450,35 @@ static int inet_diag_bc_run(const void *bc, int len,
return len == 0;
}
+int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
+{
+ struct inet_diag_entry entry;
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (bc == NULL)
+ return 1;
+
+ entry.family = sk->sk_family;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (entry.family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ entry.saddr = np->rcv_saddr.s6_addr32;
+ entry.daddr = np->daddr.s6_addr32;
+ } else
+#endif
+ {
+ entry.saddr = &inet->inet_rcv_saddr;
+ entry.daddr = &inet->inet_daddr;
+ }
+ entry.sport = inet->inet_num;
+ entry.dport = ntohs(inet->inet_dport);
+ entry.userlocks = sk->sk_userlocks;
+
+ return inet_diag_bc_run(bc, &entry);
+}
+EXPORT_SYMBOL_GPL(inet_diag_bc_sk);
+
static int valid_cc(const void *bc, int len, int cc)
{
while (len >= 0) {
@@ -488,57 +535,29 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
static int inet_csk_diag_dump(struct sock *sk,
struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct inet_diag_req *r,
+ const struct nlattr *bc)
{
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+ if (!inet_diag_bc_sk(bc, sk))
+ return 0;
- if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
- struct inet_diag_entry entry;
- const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
- sizeof(*r),
- INET_DIAG_REQ_BYTECODE);
- struct inet_sock *inet = inet_sk(sk);
-
- entry.family = sk->sk_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
- if (entry.family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- entry.saddr = np->rcv_saddr.s6_addr32;
- entry.daddr = np->daddr.s6_addr32;
- } else
-#endif
- {
- entry.saddr = &inet->inet_rcv_saddr;
- entry.daddr = &inet->inet_daddr;
- }
- entry.sport = inet->inet_num;
- entry.dport = ntohs(inet->inet_dport);
- entry.userlocks = sk->sk_userlocks;
-
- if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
- return 0;
- }
-
- return inet_csk_diag_fill(sk, skb, r->idiag_ext,
+ return inet_csk_diag_fill(sk, skb, r,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct inet_diag_req *r,
+ const struct nlattr *bc)
{
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
-
- if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+ if (bc != NULL) {
struct inet_diag_entry entry;
- const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
- sizeof(*r),
- INET_DIAG_REQ_BYTECODE);
entry.family = tw->tw_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
struct inet6_timewait_sock *tw6 =
inet6_twsk((struct sock *)tw);
@@ -554,11 +573,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
entry.dport = ntohs(tw->tw_dport);
entry.userlocks = 0;
- if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
+ if (!inet_diag_bc_run(bc, &entry))
return 0;
}
- return inet_twsk_diag_fill(tw, skb, r->idiag_ext,
+ return inet_twsk_diag_fill(tw, skb, r,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
@@ -584,8 +603,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->idiag_retrans = req->retrans;
r->id.idiag_if = sk->sk_bound_dev_if;
- r->id.idiag_cookie[0] = (u32)(unsigned long)req;
- r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
+ sock_diag_save_cookie(req, r->id.idiag_cookie);
tmo = req->expires - jiffies;
if (tmo < 0)
@@ -600,7 +618,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->idiag_wqueue = 0;
r->idiag_uid = sock_i_uid(sk);
r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (r->idiag_family == AF_INET6) {
*(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr;
*(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
@@ -616,13 +634,13 @@ nlmsg_failure:
}
static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct inet_diag_req *r,
+ const struct nlattr *bc)
{
struct inet_diag_entry entry;
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt;
- const struct nlattr *bc = NULL;
struct inet_sock *inet = inet_sk(sk);
int j, s_j;
int reqnum, s_reqnum;
@@ -642,9 +660,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (!lopt || !lopt->qlen)
goto out;
- if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
- bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
- INET_DIAG_REQ_BYTECODE);
+ if (bc != NULL) {
entry.sport = inet->inet_num;
entry.userlocks = sk->sk_userlocks;
}
@@ -664,21 +680,20 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (bc) {
entry.saddr =
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
(entry.family == AF_INET6) ?
inet6_rsk(req)->loc_addr.s6_addr32 :
#endif
&ireq->loc_addr;
entry.daddr =
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
(entry.family == AF_INET6) ?
inet6_rsk(req)->rmt_addr.s6_addr32 :
#endif
&ireq->rmt_addr;
entry.dport = ntohs(ireq->rmt_port);
- if (!inet_diag_bc_run(nla_data(bc),
- nla_len(bc), &entry))
+ if (!inet_diag_bc_run(bc, &entry))
continue;
}
@@ -701,19 +716,11 @@ out:
return err;
}
-static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
+ struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc)
{
int i, num;
int s_i, s_num;
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- const struct inet_diag_handler *handler;
- struct inet_hashinfo *hashinfo;
-
- handler = inet_diag_lock_handler(cb->nlh->nlmsg_type);
- if (IS_ERR(handler))
- goto unlock;
-
- hashinfo = handler->idiag_hashinfo;
s_i = cb->args[1];
s_num = num = cb->args[2];
@@ -738,6 +745,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
continue;
}
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_listen;
+
if (r->id.idiag_sport != inet->inet_sport &&
r->id.idiag_sport)
goto next_listen;
@@ -747,7 +758,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[3] > 0)
goto syn_recv;
- if (inet_csk_diag_dump(sk, skb, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
spin_unlock_bh(&ilb->lock);
goto done;
}
@@ -756,7 +767,7 @@ syn_recv:
if (!(r->idiag_states & TCPF_SYN_RECV))
goto next_listen;
- if (inet_diag_dump_reqs(skb, sk, cb) < 0) {
+ if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) {
spin_unlock_bh(&ilb->lock);
goto done;
}
@@ -778,7 +789,7 @@ skip_listen_ht:
}
if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
- goto unlock;
+ goto out;
for (i = s_i; i <= hashinfo->ehash_mask; i++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[i];
@@ -803,13 +814,16 @@ skip_listen_ht:
goto next_normal;
if (!(r->idiag_states & (1 << sk->sk_state)))
goto next_normal;
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_normal;
if (r->id.idiag_sport != inet->inet_sport &&
r->id.idiag_sport)
goto next_normal;
if (r->id.idiag_dport != inet->inet_dport &&
r->id.idiag_dport)
goto next_normal;
- if (inet_csk_diag_dump(sk, skb, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
spin_unlock_bh(lock);
goto done;
}
@@ -825,13 +839,16 @@ next_normal:
if (num < s_num)
goto next_dying;
+ if (r->sdiag_family != AF_UNSPEC &&
+ tw->tw_family != r->sdiag_family)
+ goto next_dying;
if (r->id.idiag_sport != tw->tw_sport &&
r->id.idiag_sport)
goto next_dying;
if (r->id.idiag_dport != tw->tw_dport &&
r->id.idiag_dport)
goto next_dying;
- if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
+ if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) {
spin_unlock_bh(lock);
goto done;
}
@@ -845,15 +862,85 @@ next_dying:
done:
cb->args[1] = i;
cb->args[2] = num;
-unlock:
+out:
+ ;
+}
+EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
+
+static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ const struct inet_diag_handler *handler;
+
+ handler = inet_diag_lock_handler(r->sdiag_protocol);
+ if (!IS_ERR(handler))
+ handler->dump(skb, cb, r, bc);
inet_diag_unlock_handler(handler);
+
return skb->len;
}
-static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct nlattr *bc = NULL;
int hdrlen = sizeof(struct inet_diag_req);
+ if (nlmsg_attrlen(cb->nlh, hdrlen))
+ bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
+
+ return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc);
+}
+
+static inline int inet_diag_type2proto(int type)
+{
+ switch (type) {
+ case TCPDIAG_GETSOCK:
+ return IPPROTO_TCP;
+ case DCCPDIAG_GETSOCK:
+ return IPPROTO_DCCP;
+ default:
+ return 0;
+ }
+}
+
+static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh);
+ struct inet_diag_req req;
+ struct nlattr *bc = NULL;
+ int hdrlen = sizeof(struct inet_diag_req_compat);
+
+ req.sdiag_family = AF_UNSPEC; /* compatibility */
+ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
+ req.idiag_ext = rc->idiag_ext;
+ req.idiag_states = rc->idiag_states;
+ req.id = rc->id;
+
+ if (nlmsg_attrlen(cb->nlh, hdrlen))
+ bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
+
+ return __inet_diag_dump(skb, cb, &req, bc);
+}
+
+static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh)
+{
+ struct inet_diag_req_compat *rc = NLMSG_DATA(nlh);
+ struct inet_diag_req req;
+
+ req.sdiag_family = rc->idiag_family;
+ req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
+ req.idiag_ext = rc->idiag_ext;
+ req.idiag_states = rc->idiag_states;
+ req.id = rc->id;
+
+ return inet_diag_get_exact(in_skb, nlh, &req);
+}
+
+static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ int hdrlen = sizeof(struct inet_diag_req_compat);
+
if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
nlmsg_len(nlh) < hdrlen)
return -EINVAL;
@@ -870,28 +957,54 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
}
- return netlink_dump_start(idiagnl, skb, nlh,
- inet_diag_dump, NULL, 0);
+ return netlink_dump_start(sock_diag_nlsk, skb, nlh,
+ inet_diag_dump_compat, NULL, 0);
}
- return inet_diag_get_exact(skb, nlh);
+ return inet_diag_get_exact_compat(skb, nlh);
}
-static DEFINE_MUTEX(inet_diag_mutex);
-
-static void inet_diag_rcv(struct sk_buff *skb)
+static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
{
- mutex_lock(&inet_diag_mutex);
- netlink_rcv_skb(skb, &inet_diag_rcv_msg);
- mutex_unlock(&inet_diag_mutex);
+ int hdrlen = sizeof(struct inet_diag_req);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ if (nlmsg_attrlen(h, hdrlen)) {
+ struct nlattr *attr;
+ attr = nlmsg_find_attr(h, hdrlen,
+ INET_DIAG_REQ_BYTECODE);
+ if (attr == NULL ||
+ nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
+ inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
+ return -EINVAL;
+ }
+
+ return netlink_dump_start(sock_diag_nlsk, skb, h,
+ inet_diag_dump, NULL, 0);
+ }
+
+ return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h));
}
+static struct sock_diag_handler inet_diag_handler = {
+ .family = AF_INET,
+ .dump = inet_diag_handler_dump,
+};
+
+static struct sock_diag_handler inet6_diag_handler = {
+ .family = AF_INET6,
+ .dump = inet_diag_handler_dump,
+};
+
int inet_diag_register(const struct inet_diag_handler *h)
{
const __u16 type = h->idiag_type;
int err = -EINVAL;
- if (type >= INET_DIAG_GETSOCK_MAX)
+ if (type >= IPPROTO_MAX)
goto out;
mutex_lock(&inet_diag_table_mutex);
@@ -910,7 +1023,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
{
const __u16 type = h->idiag_type;
- if (type >= INET_DIAG_GETSOCK_MAX)
+ if (type >= IPPROTO_MAX)
return;
mutex_lock(&inet_diag_table_mutex);
@@ -921,7 +1034,7 @@ EXPORT_SYMBOL_GPL(inet_diag_unregister);
static int __init inet_diag_init(void)
{
- const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX *
+ const int inet_diag_table_size = (IPPROTO_MAX *
sizeof(struct inet_diag_handler *));
int err = -ENOMEM;
@@ -929,25 +1042,35 @@ static int __init inet_diag_init(void)
if (!inet_diag_table)
goto out;
- idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0,
- inet_diag_rcv, NULL, THIS_MODULE);
- if (idiagnl == NULL)
- goto out_free_table;
- err = 0;
+ err = sock_diag_register(&inet_diag_handler);
+ if (err)
+ goto out_free_nl;
+
+ err = sock_diag_register(&inet6_diag_handler);
+ if (err)
+ goto out_free_inet;
+
+ sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
out:
return err;
-out_free_table:
+
+out_free_inet:
+ sock_diag_unregister(&inet_diag_handler);
+out_free_nl:
kfree(inet_diag_table);
goto out;
}
static void __exit inet_diag_exit(void)
{
- netlink_kernel_release(idiagnl);
+ sock_diag_unregister(&inet6_diag_handler);
+ sock_diag_unregister(&inet_diag_handler);
+ sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
kfree(inet_diag_table);
}
module_init(inet_diag_init);
module_exit(inet_diag_exit);
MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2b32296b7958..2b53a1f7abf6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -46,7 +46,7 @@
#include <net/rtnetlink.h>
#include <net/gre.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
@@ -729,9 +729,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if ((dst = rt->rt_gateway) == 0)
goto tx_error_icmp;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct neighbour *neigh = dst_get_neighbour(skb_dst(skb));
+ struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb));
const struct in6_addr *addr6;
int addr_type;
@@ -799,7 +799,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
goto tx_error;
}
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6)) {
struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
@@ -875,7 +875,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if ((iph->ttl = tiph->ttl) == 0) {
if (skb->protocol == htons(ETH_P_IP))
iph->ttl = old_iph->ttl;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6))
iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
#endif
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0d5e5672f3d1..ff302bde8890 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -206,7 +206,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
}
rcu_read_lock();
- neigh = dst_get_neighbour(dst);
+ neigh = dst_get_neighbour_noref(dst);
if (neigh) {
int res = neigh_output(neigh, skb);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 80d5fa450210..8aa87c19fa00 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -37,7 +37,7 @@
#include <net/route.h>
#include <net/xfrm.h>
#include <net/compat.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#include <net/transp_v6.h>
#endif
@@ -508,7 +508,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
sock_owned_by_user(sk));
if (inet->is_icsk) {
struct inet_connection_sock *icsk = inet_csk(sk);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET ||
(!((1 << sk->sk_state) &
(TCPF_LISTEN | TCPF_CLOSE)) &&
@@ -519,7 +519,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
if (opt)
icsk->icsk_ext_hdr_len += opt->opt.optlen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
}
#endif
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 915eb5265b2e..7e4ec9fc2cef 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -253,6 +253,10 @@ static int __init ic_open_devs(void)
}
}
+ /* no point in waiting if we could not bring up at least one device */
+ if (!ic_first_dev)
+ goto have_carrier;
+
/* wait for a carrier on at least one device */
start = jiffies;
while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 94906908a416..413ed1ba7a5a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
if (register_netdevice(dev) < 0)
goto failed_free;
+ strcpy(nt->parms.name, dev->name);
+
dev_hold(dev);
ipip_tunnel_link(ipn, nt);
return nt;
@@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
@@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
static int __net_init ipip_init_net(struct net *net)
{
struct ipip_net *ipn = net_generic(net, ipip_net_id);
+ struct ip_tunnel *t;
int err;
ipn->tunnels[0] = ipn->tunnels_wc;
@@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net)
if ((err = register_netdev(ipn->fb_tunnel_dev)))
goto err_reg_dev;
+ t = netdev_priv(ipn->fb_tunnel_dev);
+
+ strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
return 0;
err_reg_dev:
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b5508151e547..ba5756d20165 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -65,7 +65,7 @@ static unsigned int flushtimeout = 10;
module_param(flushtimeout, uint, 0600);
MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
-static int nflog = 1;
+static bool nflog = true;
module_param(nflog, bool, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c37641e819f2..0e58f09e59fb 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -52,7 +52,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
-static int forward = NF_ACCEPT;
+static bool forward = NF_ACCEPT;
module_param(forward, bool, 0000);
static int __net_init iptable_filter_net_init(struct net *net)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 961eed4f510a..3569d8ecaeac 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -56,17 +56,17 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
local_bh_disable();
orphans = percpu_counter_sum_positive(&tcp_orphan_count);
- sockets = percpu_counter_sum_positive(&tcp_sockets_allocated);
+ sockets = proto_sockets_allocated_sum_positive(&tcp_prot);
local_bh_enable();
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
tcp_death_row.tw_count, sockets,
- atomic_long_read(&tcp_memory_allocated));
+ proto_memory_allocated(&tcp_prot));
seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
- atomic_long_read(&udp_memory_allocated));
+ proto_memory_allocated(&udp_prot));
seq_printf(seq, "UDPLITE: inuse %d\n",
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7047069cf967..bcacf54e5418 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -91,6 +91,7 @@
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -111,7 +112,7 @@
#include <net/secure_seq.h>
#define RT_FL_TOS(oldflp4) \
- ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
+ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
#define IP_MAX_MTU 0xFFF0
@@ -119,6 +120,7 @@
static int ip_rt_max_size;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
+static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_redirect_number __read_mostly = 9;
static int ip_rt_redirect_load __read_mostly = HZ / 50;
@@ -132,6 +134,9 @@ static int ip_rt_min_advmss __read_mostly = 256;
static int rt_chain_length_max __read_mostly = 20;
static int redirect_genid;
+static struct delayed_work expires_work;
+static unsigned long expires_ljiffies;
+
/*
* Interface to generic destination cache.
*/
@@ -419,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
int len, HHUptod;
rcu_read_lock();
- n = dst_get_neighbour(&r->dst);
+ n = dst_get_neighbour_noref(&r->dst);
HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;
rcu_read_unlock();
@@ -829,6 +834,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
return ONE;
}
+static void rt_check_expire(void)
+{
+ static unsigned int rover;
+ unsigned int i = rover, goal;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
+ unsigned long samples = 0;
+ unsigned long sum = 0, sum2 = 0;
+ unsigned long delta;
+ u64 mult;
+
+ delta = jiffies - expires_ljiffies;
+ expires_ljiffies = jiffies;
+ mult = ((u64)delta) << rt_hash_log;
+ if (ip_rt_gc_timeout > 1)
+ do_div(mult, ip_rt_gc_timeout);
+ goal = (unsigned int)mult;
+ if (goal > rt_hash_mask)
+ goal = rt_hash_mask + 1;
+ for (; goal > 0; goal--) {
+ unsigned long tmo = ip_rt_gc_timeout;
+ unsigned long length;
+
+ i = (i + 1) & rt_hash_mask;
+ rthp = &rt_hash_table[i].chain;
+
+ if (need_resched())
+ cond_resched();
+
+ samples++;
+
+ if (rcu_dereference_raw(*rthp) == NULL)
+ continue;
+ length = 0;
+ spin_lock_bh(rt_hash_lock_addr(i));
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
+ prefetch(rth->dst.rt_next);
+ if (rt_is_expired(rth)) {
+ *rthp = rth->dst.rt_next;
+ rt_free(rth);
+ continue;
+ }
+ if (rth->dst.expires) {
+ /* Entry is expired even if it is in use */
+ if (time_before_eq(jiffies, rth->dst.expires)) {
+nofree:
+ tmo >>= 1;
+ rthp = &rth->dst.rt_next;
+ /*
+ * We only count entries on
+ * a chain with equal hash inputs once
+ * so that entries for different QOS
+ * levels, and other non-hash input
+ * attributes don't unfairly skew
+ * the length computation
+ */
+ length += has_noalias(rt_hash_table[i].chain, rth);
+ continue;
+ }
+ } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
+ goto nofree;
+
+ /* Cleanup aged off entries. */
+ *rthp = rth->dst.rt_next;
+ rt_free(rth);
+ }
+ spin_unlock_bh(rt_hash_lock_addr(i));
+ sum += length;
+ sum2 += length*length;
+ }
+ if (samples) {
+ unsigned long avg = sum / samples;
+ unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
+ rt_chain_length_max = max_t(unsigned long,
+ ip_rt_gc_elasticity,
+ (avg + 4*sd) >> FRACT_BITS);
+ }
+ rover = i;
+}
+
+/*
+ * rt_worker_func() is run in process context.
+ * we call rt_check_expire() to scan part of the hash table
+ */
+static void rt_worker_func(struct work_struct *work)
+{
+ rt_check_expire();
+ schedule_delayed_work(&expires_work, ip_rt_gc_interval);
+}
+
/*
* Perturbation of rt_genid by a small quantity [1..256]
* Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
@@ -1265,7 +1361,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
{
struct rtable *rt = (struct rtable *) dst;
- if (rt) {
+ if (rt && !(rt->dst.flags & DST_NOPEER)) {
if (rt->peer == NULL)
rt_bind_peer(rt, rt->rt_dst, 1);
@@ -1276,7 +1372,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
iph->id = htons(inet_getid(rt->peer, more));
return;
}
- } else
+ } else if (!rt)
printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
__builtin_return_address(0));
@@ -1304,7 +1400,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
spin_unlock_bh(rt_hash_lock_addr(hash));
}
-static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
+static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
{
struct rtable *rt = (struct rtable *) dst;
__be32 orig_gw = rt->rt_gateway;
@@ -1315,21 +1411,19 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
rt->rt_gateway = peer->redirect_learned.a4;
n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
- if (IS_ERR(n))
- return PTR_ERR(n);
+ if (IS_ERR(n)) {
+ rt->rt_gateway = orig_gw;
+ return;
+ }
old_n = xchg(&rt->dst._neighbour, n);
if (old_n)
neigh_release(old_n);
- if (!n || !(n->nud_state & NUD_VALID)) {
- if (n)
- neigh_event_send(n, NULL);
- rt->rt_gateway = orig_gw;
- return -EAGAIN;
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
} else {
rt->rt_flags |= RTCF_REDIRECTED;
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
}
- return 0;
}
/* called in rcu_read_lock() section */
@@ -1687,7 +1781,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
}
-static struct rtable *ipv4_validate_peer(struct rtable *rt)
+static void ipv4_validate_peer(struct rtable *rt)
{
if (rt->rt_peer_genid != rt_peer_genid()) {
struct inet_peer *peer;
@@ -1702,15 +1796,12 @@ static struct rtable *ipv4_validate_peer(struct rtable *rt)
if (peer->redirect_genid != redirect_genid)
peer->redirect_learned.a4 = 0;
if (peer->redirect_learned.a4 &&
- peer->redirect_learned.a4 != rt->rt_gateway) {
- if (check_peer_redir(&rt->dst, peer))
- return NULL;
- }
+ peer->redirect_learned.a4 != rt->rt_gateway)
+ check_peer_redir(&rt->dst, peer);
}
rt->rt_peer_genid = rt_peer_genid();
}
- return rt;
}
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
@@ -1719,7 +1810,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
if (rt_is_expired(rt))
return NULL;
- dst = (struct dst_entry *) ipv4_validate_peer(rt);
+ ipv4_validate_peer(rt);
return dst;
}
@@ -2374,9 +2465,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_mark == skb->mark &&
net_eq(dev_net(rth->dst.dev), net) &&
!rt_is_expired(rth)) {
- rth = ipv4_validate_peer(rth);
- if (!rth)
- continue;
+ ipv4_validate_peer(rth);
if (noref) {
dst_use_noref(&rth->dst, jiffies);
skb_dst_set_noref(skb, &rth->dst);
@@ -2435,11 +2524,11 @@ EXPORT_SYMBOL(ip_route_input_common);
static struct rtable *__mkroute_output(const struct fib_result *res,
const struct flowi4 *fl4,
__be32 orig_daddr, __be32 orig_saddr,
- int orig_oif, struct net_device *dev_out,
+ int orig_oif, __u8 orig_rtos,
+ struct net_device *dev_out,
unsigned int flags)
{
struct fib_info *fi = res->fi;
- u32 tos = RT_FL_TOS(fl4);
struct in_device *in_dev;
u16 type = res->type;
struct rtable *rth;
@@ -2490,7 +2579,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_genid = rt_genid(dev_net(dev_out));
rth->rt_flags = flags;
rth->rt_type = type;
- rth->rt_key_tos = tos;
+ rth->rt_key_tos = orig_rtos;
rth->rt_dst = fl4->daddr;
rth->rt_src = fl4->saddr;
rth->rt_route_iif = 0;
@@ -2540,7 +2629,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
{
struct net_device *dev_out = NULL;
- u32 tos = RT_FL_TOS(fl4);
+ __u8 tos = RT_FL_TOS(fl4);
unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
@@ -2716,7 +2805,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
make_route:
rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
- dev_out, flags);
+ tos, dev_out, flags);
if (!IS_ERR(rth)) {
unsigned int hash;
@@ -2752,9 +2841,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
(IPTOS_RT_MASK | RTO_ONLINK)) &&
net_eq(dev_net(rth->dst.dev), net) &&
!rt_is_expired(rth)) {
- rth = ipv4_validate_peer(rth);
- if (!rth)
- continue;
+ ipv4_validate_peer(rth);
dst_use(&rth->dst, jiffies);
RT_CACHE_STAT_INC(out_hit);
rcu_read_unlock_bh();
@@ -3182,6 +3269,13 @@ static ctl_table ipv4_route_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
+ .procname = "gc_interval",
+ .data = &ip_rt_gc_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
.procname = "redirect_load",
.data = &ip_rt_redirect_load,
.maxlen = sizeof(int),
@@ -3391,6 +3485,11 @@ int __init ip_rt_init(void)
devinet_init();
ip_fib_init();
+ INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
+ expires_ljiffies = jiffies;
+ schedule_delayed_work(&expires_work,
+ net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
+
if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
#ifdef CONFIG_XFRM
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 90f6544c13e2..51fdbb490437 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -245,7 +245,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
if (!sysctl_tcp_timestamps)
return false;
- tcp_opt->sack_ok = (options >> 4) & 0x1;
+ tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0;
*ecn_ok = (options >> 5) & 1;
if (*ecn_ok && !sysctl_tcp_ecn)
return false;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 69fd7201129a..4aa7e9dc0cbb 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>
+#include <linux/swap.h>
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
@@ -23,6 +24,7 @@
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
+#include <net/tcp_memcontrol.h>
static int zero;
static int tcp_retr1_max = 255;
@@ -73,7 +75,7 @@ static int ipv4_local_port_range(ctl_table *table, int write,
}
-void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
+static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
{
gid_t *data = table->data;
unsigned seq;
@@ -86,7 +88,7 @@ void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t
}
/* Update system visible IP port range */
-static void set_ping_group_range(struct ctl_table *table, int range[2])
+static void set_ping_group_range(struct ctl_table *table, gid_t range[2])
{
gid_t *data = table->data;
write_seqlock(&sysctl_local_ports.lock);
@@ -174,6 +176,49 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
return ret;
}
+static int ipv4_tcp_mem(ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+ unsigned long vec[3];
+ struct net *net = current->nsproxy->net_ns;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ struct mem_cgroup *memcg;
+#endif
+
+ ctl_table tmp = {
+ .data = &vec,
+ .maxlen = sizeof(vec),
+ .mode = ctl->mode,
+ };
+
+ if (!write) {
+ ctl->data = &net->ipv4.sysctl_tcp_mem;
+ return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
+ }
+
+ ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(current);
+
+ tcp_prot_mem(memcg, vec[0], 0);
+ tcp_prot_mem(memcg, vec[1], 1);
+ tcp_prot_mem(memcg, vec[2], 2);
+ rcu_read_unlock();
+#endif
+
+ net->ipv4.sysctl_tcp_mem[0] = vec[0];
+ net->ipv4.sysctl_tcp_mem[1] = vec[1];
+ net->ipv4.sysctl_tcp_mem[2] = vec[2];
+
+ return 0;
+}
+
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
@@ -433,13 +478,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_mem",
- .data = &sysctl_tcp_mem,
- .maxlen = sizeof(sysctl_tcp_mem),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax
- },
- {
.procname = "tcp_wmem",
.data = &sysctl_tcp_wmem,
.maxlen = sizeof(sysctl_tcp_wmem),
@@ -721,6 +759,12 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = ipv4_ping_group_range,
},
+ {
+ .procname = "tcp_mem",
+ .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem),
+ .mode = 0644,
+ .proc_handler = ipv4_tcp_mem,
+ },
{ }
};
@@ -734,6 +778,7 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
struct ctl_table *table;
+ unsigned long limit;
table = ipv4_net_table;
if (!net_eq(net, &init_net)) {
@@ -769,6 +814,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
net->ipv4.sysctl_rt_cache_rebuild_count = 4;
+ limit = nr_free_buffer_pages() / 8;
+ limit = max(limit, 128UL);
+ net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
+ net->ipv4.sysctl_tcp_mem[1] = limit;
+ net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
+
net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
net_ipv4_ctl_path, table);
if (net->ipv4.ipv4_hdr == NULL)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a09fe253b917..9bcdec3ad772 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
-long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
-EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
@@ -888,9 +886,6 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(tcp_sendpage);
-#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
-#define TCP_OFF(sk) (sk->sk_sndmsg_off)
-
static inline int select_size(const struct sock *sk, bool sg)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -1008,13 +1003,13 @@ new_segment:
} else {
int merge = 0;
int i = skb_shinfo(skb)->nr_frags;
- struct page *page = TCP_PAGE(sk);
+ struct page *page = sk->sk_sndmsg_page;
int off;
if (page && page_count(page) == 1)
- TCP_OFF(sk) = 0;
+ sk->sk_sndmsg_off = 0;
- off = TCP_OFF(sk);
+ off = sk->sk_sndmsg_off;
if (skb_can_coalesce(skb, i, page, off) &&
off != PAGE_SIZE) {
@@ -1031,7 +1026,7 @@ new_segment:
} else if (page) {
if (off == PAGE_SIZE) {
put_page(page);
- TCP_PAGE(sk) = page = NULL;
+ sk->sk_sndmsg_page = page = NULL;
off = 0;
}
} else
@@ -1057,9 +1052,9 @@ new_segment:
/* If this page was new, give it to the
* socket so it does not get leaked.
*/
- if (!TCP_PAGE(sk)) {
- TCP_PAGE(sk) = page;
- TCP_OFF(sk) = 0;
+ if (!sk->sk_sndmsg_page) {
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
}
goto do_error;
}
@@ -1069,15 +1064,15 @@ new_segment:
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
skb_fill_page_desc(skb, i, page, off, copy);
- if (TCP_PAGE(sk)) {
+ if (sk->sk_sndmsg_page) {
get_page(page);
} else if (off + copy < PAGE_SIZE) {
get_page(page);
- TCP_PAGE(sk) = page;
+ sk->sk_sndmsg_page = page;
}
}
- TCP_OFF(sk) = off + copy;
+ sk->sk_sndmsg_off = off + copy;
}
if (!copied)
@@ -3281,14 +3276,9 @@ void __init tcp_init(void)
sysctl_tcp_max_orphans = cnt / 2;
sysctl_max_syn_backlog = max(128, cnt / 256);
- limit = nr_free_buffer_pages() / 8;
- limit = max(limit, 128UL);
- sysctl_tcp_mem[0] = limit / 4 * 3;
- sysctl_tcp_mem[1] = limit;
- sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
-
/* Set per-socket limits to no more than 1/128 the pressure threshold */
- limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
+ limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1])
+ << (PAGE_SHIFT - 7);
max_share = min(4UL*1024*1024, limit);
sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 939edb3b8e4d..8cd357a8be79 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -34,11 +34,23 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
tcp_get_info(sk, info);
}
+static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
+}
+
+static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
+{
+ return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
+}
+
static const struct inet_diag_handler tcp_diag_handler = {
- .idiag_hashinfo = &tcp_hashinfo,
+ .dump = tcp_diag_dump,
+ .dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
- .idiag_type = TCPDIAG_GETSOCK,
- .idiag_info_size = sizeof(struct tcp_info),
+ .idiag_type = IPPROTO_TCP,
};
static int __init tcp_diag_init(void)
@@ -54,4 +66,4 @@ static void __exit tcp_diag_exit(void)
module_init(tcp_diag_init);
module_exit(tcp_diag_exit);
MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0cbb44076cfa..2877c3e09587 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -322,7 +322,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp &&
(int)tp->rcv_ssthresh < tcp_space(sk) &&
- !tcp_memory_pressure) {
+ !sk_under_memory_pressure(sk)) {
int incr;
/* Check #2. Increase window, if skb with such overhead
@@ -411,8 +411,8 @@ static void tcp_clamp_window(struct sock *sk)
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
- !tcp_memory_pressure &&
- atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ !sk_under_memory_pressure(sk) &&
+ sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
}
@@ -865,13 +865,13 @@ static void tcp_disable_fack(struct tcp_sock *tp)
/* RFC3517 uses different metric in lost marker => reset on change */
if (tcp_is_fack(tp))
tp->lost_skb_hint = NULL;
- tp->rx_opt.sack_ok &= ~2;
+ tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
}
/* Take a notice that peer is sending D-SACKs */
static void tcp_dsack_seen(struct tcp_sock *tp)
{
- tp->rx_opt.sack_ok |= 4;
+ tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
}
/* Initialize metrics on socket. */
@@ -2663,7 +2663,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
@@ -3878,7 +3878,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
!estab && sysctl_tcp_sack) {
- opt_rx->sack_ok = 1;
+ opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
break;
@@ -4866,7 +4866,7 @@ static int tcp_prune_queue(struct sock *sk)
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
- else if (tcp_memory_pressure)
+ else if (sk_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk);
@@ -4932,11 +4932,11 @@ static int tcp_should_expand_sndbuf(const struct sock *sk)
return 0;
/* If we are under global TCP memory pressure, do not expand. */
- if (tcp_memory_pressure)
+ if (sk_under_memory_pressure(sk))
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
- if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
+ if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
return 0;
/* If we filled the congestion window, do not expand. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c4b8b09db9f5..1eb4ad57670e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -73,6 +73,7 @@
#include <net/xfrm.h>
#include <net/netdma.h>
#include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -1917,7 +1918,8 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
- percpu_counter_inc(&tcp_sockets_allocated);
+ sock_update_memcg(sk);
+ sk_sockets_allocated_inc(sk);
local_bh_enable();
return 0;
@@ -1973,7 +1975,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
tp->cookie_values = NULL;
}
- percpu_counter_dec(&tcp_sockets_allocated);
+ sk_sockets_allocated_dec(sk);
+ sock_release_memcg(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -2620,7 +2623,6 @@ struct proto tcp_prot = {
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
- .sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
@@ -2634,10 +2636,14 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ .init_cgroup = tcp_init_cgroup,
+ .destroy_cgroup = tcp_destroy_cgroup,
+ .proto_cgroup = tcp_proto_cgroup,
+#endif
};
EXPORT_SYMBOL(tcp_prot);
-
static int __net_init tcp_sk_init(struct net *net)
{
return inet_ctl_sock_create(&net->ipv4.tcp_sock,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
new file mode 100644
index 000000000000..7fed04f875c1
--- /dev/null
+++ b/net/ipv4/tcp_memcontrol.c
@@ -0,0 +1,272 @@
+#include <net/tcp.h>
+#include <net/tcp_memcontrol.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <linux/nsproxy.h>
+#include <linux/memcontrol.h>
+#include <linux/module.h>
+
+static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft);
+static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
+ const char *buffer);
+static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event);
+
+static struct cftype tcp_files[] = {
+ {
+ .name = "kmem.tcp.limit_in_bytes",
+ .write_string = tcp_cgroup_write,
+ .read_u64 = tcp_cgroup_read,
+ .private = RES_LIMIT,
+ },
+ {
+ .name = "kmem.tcp.usage_in_bytes",
+ .read_u64 = tcp_cgroup_read,
+ .private = RES_USAGE,
+ },
+ {
+ .name = "kmem.tcp.failcnt",
+ .private = RES_FAILCNT,
+ .trigger = tcp_cgroup_reset,
+ .read_u64 = tcp_cgroup_read,
+ },
+ {
+ .name = "kmem.tcp.max_usage_in_bytes",
+ .private = RES_MAX_USAGE,
+ .trigger = tcp_cgroup_reset,
+ .read_u64 = tcp_cgroup_read,
+ },
+};
+
+static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
+{
+ return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
+}
+
+static void memcg_tcp_enter_memory_pressure(struct sock *sk)
+{
+ if (sk->sk_cgrp->memory_pressure)
+ *sk->sk_cgrp->memory_pressure = 1;
+}
+EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
+
+int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+ /*
+ * The root cgroup does not use res_counters, but rather,
+ * rely on the data already collected by the network
+ * subsystem
+ */
+ struct res_counter *res_parent = NULL;
+ struct cg_proto *cg_proto, *parent_cg;
+ struct tcp_memcontrol *tcp;
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+ struct net *net = current->nsproxy->net_ns;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ goto create_files;
+
+ tcp = tcp_from_cgproto(cg_proto);
+
+ tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
+ tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
+ tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
+ tcp->tcp_memory_pressure = 0;
+
+ parent_cg = tcp_prot.proto_cgroup(parent);
+ if (parent_cg)
+ res_parent = parent_cg->memory_allocated;
+
+ res_counter_init(&tcp->tcp_memory_allocated, res_parent);
+ percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
+
+ cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
+ cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
+ cg_proto->sysctl_mem = tcp->tcp_prot_mem;
+ cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
+ cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
+ cg_proto->memcg = memcg;
+
+create_files:
+ return cgroup_add_files(cgrp, ss, tcp_files,
+ ARRAY_SIZE(tcp_files));
+}
+EXPORT_SYMBOL(tcp_init_cgroup);
+
+void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct cg_proto *cg_proto;
+ struct tcp_memcontrol *tcp;
+ u64 val;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return;
+
+ tcp = tcp_from_cgproto(cg_proto);
+ percpu_counter_destroy(&tcp->tcp_sockets_allocated);
+
+ val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
+
+ if (val != RESOURCE_MAX)
+ jump_label_dec(&memcg_socket_limit_enabled);
+}
+EXPORT_SYMBOL(tcp_destroy_cgroup);
+
+static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+ u64 old_lim;
+ int i;
+ int ret;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return -EINVAL;
+
+ if (val > RESOURCE_MAX)
+ val = RESOURCE_MAX;
+
+ tcp = tcp_from_cgproto(cg_proto);
+
+ old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
+ ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < 3; i++)
+ tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
+ net->ipv4.sysctl_tcp_mem[i]);
+
+ if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX)
+ jump_label_dec(&memcg_socket_limit_enabled);
+ else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX)
+ jump_label_inc(&memcg_socket_limit_enabled);
+
+ return 0;
+}
+
+static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
+ const char *buffer)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ unsigned long long val;
+ int ret = 0;
+
+ switch (cft->private) {
+ case RES_LIMIT:
+ /* see memcontrol.c */
+ ret = res_counter_memparse_write_strategy(buffer, &val);
+ if (ret)
+ break;
+ ret = tcp_update_limit(memcg, val);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
+{
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return default_val;
+
+ tcp = tcp_from_cgproto(cg_proto);
+ return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
+}
+
+static u64 tcp_read_usage(struct mem_cgroup *memcg)
+{
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
+
+ tcp = tcp_from_cgproto(cg_proto);
+ return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
+}
+
+static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ u64 val;
+
+ switch (cft->private) {
+ case RES_LIMIT:
+ val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX);
+ break;
+ case RES_USAGE:
+ val = tcp_read_usage(memcg);
+ break;
+ case RES_FAILCNT:
+ case RES_MAX_USAGE:
+ val = tcp_read_stat(memcg, cft->private, 0);
+ break;
+ default:
+ BUG();
+ }
+ return val;
+}
+
+static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event)
+{
+ struct mem_cgroup *memcg;
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ memcg = mem_cgroup_from_cont(cont);
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return 0;
+ tcp = tcp_from_cgproto(cg_proto);
+
+ switch (event) {
+ case RES_MAX_USAGE:
+ res_counter_reset_max(&tcp->tcp_memory_allocated);
+ break;
+ case RES_FAILCNT:
+ res_counter_reset_failcnt(&tcp->tcp_memory_allocated);
+ break;
+ }
+
+ return 0;
+}
+
+unsigned long long tcp_max_memory(const struct mem_cgroup *memcg)
+{
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg);
+ if (!cg_proto)
+ return 0;
+
+ tcp = tcp_from_cgproto(cg_proto);
+ return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
+}
+
+void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
+{
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return;
+
+ tcp = tcp_from_cgproto(cg_proto);
+
+ tcp->tcp_prot_mem[idx] = val;
+}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 9dc146e5ed65..550e755747e0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -336,7 +336,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet6_timewait_sock *tw6;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 50788d67bdb7..8c8de2780c7a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1093,6 +1093,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
{
int i, k, eat;
+ eat = min_t(int, len, skb_headlen(skb));
+ if (eat) {
+ __skb_pull(skb, eat);
+ len -= eat;
+ if (!len)
+ return;
+ }
eat = len;
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
@@ -1124,11 +1131,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return -ENOMEM;
- /* If len == headlen, we avoid __skb_pull to preserve alignment. */
- if (unlikely(len < skb_headlen(skb)))
- __skb_pull(skb, len);
- else
- __pskb_trim_head(skb, len - skb_headlen(skb));
+ __pskb_trim_head(skb, len);
TCP_SKB_CB(skb)->seq += len;
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1919,7 +1922,7 @@ u32 __tcp_select_window(struct sock *sk)
if (free_space < (full_space >> 1)) {
icsk->icsk_ack.quick = 0;
- if (tcp_memory_pressure)
+ if (sk_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh,
4U * tp->advmss);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 2e0f0af76c19..a516d1e399df 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -171,13 +171,13 @@ static int tcp_write_timeout(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int retry_until;
- bool do_reset, syn_set = 0;
+ bool do_reset, syn_set = false;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
dst_negative_advice(sk);
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
- syn_set = 1;
+ syn_set = true;
} else {
if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
/* Black hole detection */
@@ -261,7 +261,7 @@ static void tcp_delack_timer(unsigned long data)
}
out:
- if (tcp_memory_pressure)
+ if (sk_under_memory_pressure(sk))
sk_mem_reclaim(sk);
out_unlock:
bh_unlock_sock(sk);
@@ -340,7 +340,7 @@ void tcp_retransmit_timer(struct sock *sk)
&inet->inet_daddr, ntohs(inet->inet_dport),
inet->inet_num, tp->snd_una, tp->snd_nxt);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ac3b3ee4b07c..01775983b997 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -105,7 +105,7 @@ drop:
return 0;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static int tunnel64_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler;
@@ -134,7 +134,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
break;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static void tunnel64_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
@@ -152,7 +152,7 @@ static const struct net_protocol tunnel4_protocol = {
.netns_ok = 1,
};
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static const struct net_protocol tunnel64_protocol = {
.handler = tunnel64_rcv,
.err_handler = tunnel64_err,
@@ -167,7 +167,7 @@ static int __init tunnel4_init(void)
printk(KERN_ERR "tunnel4 init: can't add protocol\n");
return -EAGAIN;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
printk(KERN_ERR "tunnel64 init: can't add protocol\n");
inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
@@ -179,7 +179,7 @@ static int __init tunnel4_init(void)
static void __exit tunnel4_fini(void)
{
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
printk(KERN_ERR "tunnel64 close: can't remove protocol\n");
#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ad481b32f1e3..5d075b5f70fc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -445,7 +445,7 @@ exact_match:
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
+struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport,
int dif, struct udp_table *udptable)
{
@@ -512,6 +512,7 @@ begin:
rcu_read_unlock();
return result;
}
+EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
new file mode 100644
index 000000000000..69f8a7ca63dd
--- /dev/null
+++ b/net/ipv4/udp_diag.c
@@ -0,0 +1,201 @@
+/*
+ * udp_diag.c Module for monitoring UDP transport protocols sockets.
+ *
+ * Authors: Pavel Emelyanov, <xemul@parallels.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/module.h>
+#include <linux/inet_diag.h>
+#include <linux/udp.h>
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+
+static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
+ struct netlink_callback *cb, struct inet_diag_req *req,
+ struct nlattr *bc)
+{
+ if (!inet_diag_bc_sk(bc, sk))
+ return 0;
+
+ return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+}
+
+static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh, struct inet_diag_req *req)
+{
+ int err = -EINVAL;
+ struct sock *sk;
+ struct sk_buff *rep;
+
+ if (req->sdiag_family == AF_INET)
+ sk = __udp4_lib_lookup(&init_net,
+ req->id.idiag_src[0], req->id.idiag_sport,
+ req->id.idiag_dst[0], req->id.idiag_dport,
+ req->id.idiag_if, tbl);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (req->sdiag_family == AF_INET6)
+ sk = __udp6_lib_lookup(&init_net,
+ (struct in6_addr *)req->id.idiag_src,
+ req->id.idiag_sport,
+ (struct in6_addr *)req->id.idiag_dst,
+ req->id.idiag_dport,
+ req->id.idiag_if, tbl);
+#endif
+ else
+ goto out_nosk;
+
+ err = -ENOENT;
+ if (sk == NULL)
+ goto out_nosk;
+
+ err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
+ if (err)
+ goto out;
+
+ err = -ENOMEM;
+ rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
+ sizeof(struct inet_diag_meminfo) +
+ 64)), GFP_KERNEL);
+ if (!rep)
+ goto out;
+
+ err = inet_sk_diag_fill(sk, NULL, rep, req,
+ NETLINK_CB(in_skb).pid,
+ nlh->nlmsg_seq, 0, nlh);
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(rep);
+ goto out;
+ }
+ err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid,
+ MSG_DONTWAIT);
+ if (err > 0)
+ err = 0;
+out:
+ if (sk)
+ sock_put(sk);
+out_nosk:
+ return err;
+}
+
+static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ int num, s_num, slot, s_slot;
+
+ s_slot = cb->args[0];
+ num = s_num = cb->args[1];
+
+ for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) {
+ struct sock *sk;
+ struct hlist_nulls_node *node;
+ struct udp_hslot *hslot = &table->hash[slot];
+
+ if (hlist_nulls_empty(&hslot->head))
+ continue;
+
+ spin_lock_bh(&hslot->lock);
+ sk_nulls_for_each(sk, node, &hslot->head) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (num < s_num)
+ goto next;
+ if (!(r->idiag_states & (1 << sk->sk_state)))
+ goto next;
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next;
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next;
+ if (r->id.idiag_dport != inet->inet_dport &&
+ r->id.idiag_dport)
+ goto next;
+
+ if (sk_diag_dump(sk, skb, cb, r, bc) < 0) {
+ spin_unlock_bh(&hslot->lock);
+ goto done;
+ }
+next:
+ num++;
+ }
+ spin_unlock_bh(&hslot->lock);
+ }
+done:
+ cb->args[0] = slot;
+ cb->args[1] = num;
+}
+
+static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ udp_dump(&udp_table, skb, cb, r, bc);
+}
+
+static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
+{
+ return udp_dump_one(&udp_table, in_skb, nlh, req);
+}
+
+static const struct inet_diag_handler udp_diag_handler = {
+ .dump = udp_diag_dump,
+ .dump_one = udp_diag_dump_one,
+ .idiag_type = IPPROTO_UDP,
+};
+
+static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ udp_dump(&udplite_table, skb, cb, r, bc);
+}
+
+static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
+{
+ return udp_dump_one(&udplite_table, in_skb, nlh, req);
+}
+
+static const struct inet_diag_handler udplite_diag_handler = {
+ .dump = udplite_diag_dump,
+ .dump_one = udplite_diag_dump_one,
+ .idiag_type = IPPROTO_UDPLITE,
+};
+
+static int __init udp_diag_init(void)
+{
+ int err;
+
+ err = inet_diag_register(&udp_diag_handler);
+ if (err)
+ goto out;
+ err = inet_diag_register(&udplite_diag_handler);
+ if (err)
+ goto out_lite;
+out:
+ return err;
+out_lite:
+ inet_diag_unregister(&udp_diag_handler);
+ goto out;
+}
+
+static void __exit udp_diag_exit(void)
+{
+ inet_diag_unregister(&udplite_diag_handler);
+ inet_diag_unregister(&udp_diag_handler);
+}
+
+module_init(udp_diag_init);
+module_exit(udp_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */);
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 82806455e859..9247d9d70e9d 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -64,7 +64,7 @@ static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
.priority = 2,
};
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
.handler = xfrm_tunnel_rcv,
.err_handler = xfrm_tunnel_err,
@@ -84,7 +84,7 @@ static int __init ipip_init(void)
xfrm_unregister_type(&ipip_type, AF_INET);
return -EAGAIN;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) {
printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n");
xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET);
@@ -97,7 +97,7 @@ static int __init ipip_init(void)
static void __exit ipip_fini(void)
{
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6))
printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n");
#endif