diff options
author | Eric Dumazet <edumazet@google.com> | 2017-06-07 22:29:12 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-06-08 17:26:19 +0200 |
commit | 0604475119de5f80dc051a5db055c6a2a75bd542 (patch) | |
tree | 292c38e390647ef05fc445510c4d2a89b10b3f2a /net/ipv4 | |
parent | Merge branch 'tcp-Namespaceify-3-sysctls' (diff) | |
download | linux-0604475119de5f80dc051a5db055c6a2a75bd542.tar.xz linux-0604475119de5f80dc051a5db055c6a2a75bd542.zip |
tcp: add TCPMemoryPressuresChrono counter
DRAM supply shortage and poor memory pressure tracking in TCP
stack makes any change in SO_SNDBUF/SO_RCVBUF (or equivalent autotuning
limits) and tcp_mem[] quite hazardous.
TCPMemoryPressures SNMP counter is an indication of tcp_mem sysctl
limits being hit, but only tracking number of transitions.
If TCP stack behavior under stress was perfect :
1) It would maintain memory usage close to the limit.
2) Memory pressure state would be entered for short times.
We certainly prefer 100 events lasting 10ms compared to one event
lasting 200 seconds.
This patch adds a new SNMP counter tracking cumulative duration of
memory pressure events, given in ms units.
$ cat /proc/sys/net/ipv4/tcp_mem
3088 4117 6176
$ grep TCP /proc/net/sockstat
TCP: inuse 180 orphan 0 tw 2 alloc 234 mem 4140
$ nstat -n ; sleep 10 ; nstat |grep Pressure
TcpExtTCPMemoryPressures 1700
TcpExtTCPMemoryPressuresChrono 5209
v2: Used EXPORT_SYMBOL_GPL() instead of EXPORT_SYMBOL() as David
instructed.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/proc.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 |
3 files changed, 27 insertions, 6 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index fa44e752a9a3..43eb6567b3a0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -250,6 +250,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER), SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED), SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES), + SNMP_MIB_ITEM("TCPMemoryPressuresChrono", LINUX_MIB_TCPMEMORYPRESSURESCHRONO), SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD), SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 87981fcdfcf2..cc8fd8b747a4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -320,17 +320,36 @@ struct tcp_splice_state { * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ -int tcp_memory_pressure __read_mostly; -EXPORT_SYMBOL(tcp_memory_pressure); +unsigned long tcp_memory_pressure __read_mostly; +EXPORT_SYMBOL_GPL(tcp_memory_pressure); void tcp_enter_memory_pressure(struct sock *sk) { - if (!tcp_memory_pressure) { + unsigned long val; + + if (tcp_memory_pressure) + return; + val = jiffies; + + if (!val) + val--; + if (!cmpxchg(&tcp_memory_pressure, 0, val)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); - tcp_memory_pressure = 1; - } } -EXPORT_SYMBOL(tcp_enter_memory_pressure); +EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure); + +void tcp_leave_memory_pressure(struct sock *sk) +{ + unsigned long val; + + if (!tcp_memory_pressure) + return; + val = xchg(&tcp_memory_pressure, 0); + if (val) + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO, + jiffies_to_msecs(jiffies - val)); +} +EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ static u8 secs_to_retrans(int seconds, int timeout, int rto_max) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13c7ae7d4504..1dc8c449e16a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2387,6 +2387,7 @@ struct proto tcp_prot = { .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, + .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .orphan_count = &tcp_orphan_count, |