summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2023-12-14 11:49:00 +0100
committerDavid S. Miller <davem@davemloft.net>2023-12-15 12:01:27 +0100
commitf5769faeec36b9d5b9df2c3e4f05a76d04ffd9c9 (patch)
tree8bb17e49ba064927fcad5296337c099f323e3709
parentnet: increase optmem_max default value (diff)
downloadlinux-f5769faeec36b9d5b9df2c3e4f05a76d04ffd9c9.tar.xz
linux-f5769faeec36b9d5b9df2c3e4f05a76d04ffd9c9.zip
net: Namespace-ify sysctl_optmem_max
optmem_max being used in tx zerocopy, we want to be able to control it on a netns basis. Following patch changes two tests. Tested: oqq130:~# cat /proc/sys/net/core/optmem_max 131072 oqq130:~# echo 1000000 >/proc/sys/net/core/optmem_max oqq130:~# cat /proc/sys/net/core/optmem_max 1000000 oqq130:~# unshare -n oqq130:~# cat /proc/sys/net/core/optmem_max 131072 oqq130:~# exit logout oqq130:~# cat /proc/sys/net/core/optmem_max 1000000 Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/netns/core.h1
-rw-r--r--include/net/sock.h1
-rw-r--r--net/core/bpf_sk_storage.c3
-rw-r--r--net/core/filter.c12
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/sock.c10
-rw-r--r--net/core/sysctl_net_core.c15
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv6/ipv6_sockglue.c4
9 files changed, 29 insertions, 27 deletions
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index a91ef9f8de60..78214f1b43a2 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -13,6 +13,7 @@ struct netns_core {
struct ctl_table_header *sysctl_hdr;
int sysctl_somaxconn;
+ int sysctl_optmem_max;
u8 sysctl_txrehash;
#ifdef CONFIG_PROC_FS
diff --git a/include/net/sock.h b/include/net/sock.h
index 1d6931caf0c3..8b6fe164b218 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2920,7 +2920,6 @@ extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern int sysctl_tstamp_allow_data;
-extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index cca7594be92e..6c4d90b24d46 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -275,9 +275,10 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
void *owner, u32 size)
{
- int optmem_max = READ_ONCE(sysctl_optmem_max);
struct sock *sk = (struct sock *)owner;
+ int optmem_max;
+ optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
/* same check as in sock_kmalloc() */
if (size <= optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
diff --git a/net/core/filter.c b/net/core/filter.c
index eedb33f3e998..6d89a9cf33c9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1219,8 +1219,8 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
*/
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
+ int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
u32 filter_size = bpf_prog_size(fp->prog->len);
- int optmem_max = READ_ONCE(sysctl_optmem_max);
/* same check as in sock_kmalloc() */
if (filter_size <= optmem_max &&
@@ -1550,12 +1550,13 @@ EXPORT_SYMBOL_GPL(sk_attach_filter);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct bpf_prog *prog = __get_filter(fprog, sk);
- int err;
+ int err, optmem_max;
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
+ optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
+ if (bpf_prog_size(prog->len) > optmem_max)
err = -ENOMEM;
else
err = reuseport_attach_prog(sk, prog);
@@ -1594,7 +1595,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog;
- int err;
+ int err, optmem_max;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
@@ -1622,7 +1623,8 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
}
} else {
/* BPF_PROG_TYPE_SOCKET_FILTER */
- if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
+ optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
+ if (bpf_prog_size(prog->len) > optmem_max) {
err = -ENOMEM;
goto err_prog_put;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index cb8bcbff9e83..72799533426b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -372,6 +372,10 @@ out_undo:
static int __net_init net_defaults_init_net(struct net *net)
{
net->core.sysctl_somaxconn = SOMAXCONN;
+ /* Limits per socket sk_omem_alloc usage.
+ * TCP zerocopy regular usage needs 128 KB.
+ */
+ net->core.sysctl_optmem_max = 128 * 1024;
net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 08ecdc68d2df..446e945f736b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -283,12 +283,6 @@ EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-/* Limits per socket sk_omem_alloc usage.
- * TCP zerocopy regular usage needs 128 KB.
- */
-int sysctl_optmem_max __read_mostly = 128 * 1024;
-EXPORT_SYMBOL(sysctl_optmem_max);
-
int sysctl_tstamp_allow_data __read_mostly = 1;
DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
@@ -2653,7 +2647,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
- READ_ONCE(sysctl_optmem_max))
+ READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return NULL;
skb = alloc_skb(size, priority);
@@ -2671,7 +2665,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
*/
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
- int optmem_max = READ_ONCE(sysctl_optmem_max);
+ int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
if ((unsigned int)size <= optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 03f1edb948d7..0f0cb1465e08 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -509,13 +509,6 @@ static struct ctl_table net_core_table[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "optmem_max",
- .data = &sysctl_optmem_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tstamp_allow_data",
.data = &sysctl_tstamp_allow_data,
.maxlen = sizeof(int),
@@ -674,6 +667,14 @@ static struct ctl_table netns_core_table[] = {
.proc_handler = proc_dointvec_minmax
},
{
+ .procname = "optmem_max",
+ .data = &init_net.core.sysctl_optmem_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_dointvec_minmax
+ },
+ {
.procname = "txrehash",
.data = &init_net.core.sysctl_txrehash,
.maxlen = sizeof(u8),
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d7d13940774e..66247e8b429e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -775,7 +775,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max))
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -811,7 +811,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -1254,7 +1254,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (optlen < IP_MSFILTER_SIZE(0))
goto e_inval;
- if (optlen > READ_ONCE(sysctl_optmem_max)) {
+ if (optlen > READ_ONCE(net->core.sysctl_optmem_max)) {
err = -ENOBUFS;
break;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9e8ebda170f1..56c3c467f9de 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max))
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);