summaryrefslogtreecommitdiffstats
path: root/net/ipv4/icmp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/icmp.c')
-rw-r--r--net/ipv4/icmp.c119
1 files changed, 62 insertions, 57 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ab6d0d98dbc3..e1384e7331d8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,7 @@
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
+#include <net/inet_dscp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/icmp.h>
@@ -220,61 +221,56 @@ static inline void icmp_xmit_unlock(struct sock *sk)
spin_unlock(&sk->sk_lock.slock);
}
-int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
-int sysctl_icmp_msgs_burst __read_mostly = 50;
-
-static struct {
- spinlock_t lock;
- u32 credit;
- u32 stamp;
-} icmp_global = {
- .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
-};
-
/**
* icmp_global_allow - Are we allowed to send one more ICMP message ?
+ * @net: network namespace
*
* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet.
- * Note: called with BH disabled
+ * Works in tandem with icmp_global_consume().
*/
-bool icmp_global_allow(void)
+bool icmp_global_allow(struct net *net)
{
- u32 credit, delta, incr = 0, now = (u32)jiffies;
- bool rc = false;
+ u32 delta, now, oldstamp;
+ int incr, new, old;
- /* Check if token bucket is empty and cannot be refilled
- * without taking the spinlock. The READ_ONCE() are paired
- * with the following WRITE_ONCE() in this same function.
+ /* Note: many cpus could find this condition true.
+ * Then later icmp_global_consume() could consume more credits,
+ * this is an acceptable race.
*/
- if (!READ_ONCE(icmp_global.credit)) {
- delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
- if (delta < HZ / 50)
- return false;
- }
+ if (atomic_read(&net->ipv4.icmp_global_credit) > 0)
+ return true;
- spin_lock(&icmp_global.lock);
- delta = min_t(u32, now - icmp_global.stamp, HZ);
- if (delta >= HZ / 50) {
- incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
- if (incr)
- WRITE_ONCE(icmp_global.stamp, now);
- }
- credit = min_t(u32, icmp_global.credit + incr,
- READ_ONCE(sysctl_icmp_msgs_burst));
- if (credit) {
- /* We want to use a credit of one in average, but need to randomize
- * it for security reasons.
- */
- credit = max_t(int, credit - get_random_u32_below(3), 0);
- rc = true;
+ now = jiffies;
+ oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp);
+ delta = min_t(u32, now - oldstamp, HZ);
+ if (delta < HZ / 50)
+ return false;
+
+ incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
+ if (!incr)
+ return false;
+
+ if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) {
+ old = atomic_read(&net->ipv4.icmp_global_credit);
+ do {
+ new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst));
+ } while (!atomic_try_cmpxchg(&net->ipv4.icmp_global_credit, &old, new));
}
- WRITE_ONCE(icmp_global.credit, credit);
- spin_unlock(&icmp_global.lock);
- return rc;
+ return true;
}
EXPORT_SYMBOL(icmp_global_allow);
+void icmp_global_consume(struct net *net)
+{
+ int credits = get_random_u32_below(3);
+
+ /* Note: this might make icmp_global.credit negative. */
+ if (credits)
+ atomic_sub(credits, &net->ipv4.icmp_global_credit);
+}
+EXPORT_SYMBOL(icmp_global_consume);
+
static bool icmpv4_mask_allow(struct net *net, int type, int code)
{
if (type > NR_ICMP_TYPES)
@@ -291,14 +287,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return false;
}
-static bool icmpv4_global_allow(struct net *net, int type, int code)
+static bool icmpv4_global_allow(struct net *net, int type, int code,
+ bool *apply_ratelimit)
{
if (icmpv4_mask_allow(net, type, code))
return true;
- if (icmp_global_allow())
+ if (icmp_global_allow(net)) {
+ *apply_ratelimit = true;
return true;
-
+ }
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -308,15 +306,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
*/
static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
- struct flowi4 *fl4, int type, int code)
+ struct flowi4 *fl4, int type, int code,
+ bool apply_ratelimit)
{
struct dst_entry *dst = &rt->dst;
struct inet_peer *peer;
bool rc = true;
int vif;
- if (icmpv4_mask_allow(net, type, code))
- goto out;
+ if (!apply_ratelimit)
+ return true;
/* No rate limit on loopback */
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
@@ -331,6 +330,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
out:
if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
+ else
+ icmp_global_consume(net);
return rc;
}
@@ -402,6 +403,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct ipcm_cookie ipc;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
+ bool apply_ratelimit = false;
struct flowi4 fl4;
struct sock *sk;
struct inet_sock *inet;
@@ -413,11 +415,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
return;
- /* Needed by both icmp_global_allow and icmp_xmit_lock */
+ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
- /* global icmp_msgs_per_sec */
- if (!icmpv4_global_allow(net, type, code))
+ /* is global icmp_msgs_per_sec exhausted ? */
+ if (!icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
@@ -443,14 +445,14 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.saddr = saddr;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = sock_net_uid(net, NULL);
- fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
+ fl4.flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK;
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
goto out_unlock;
- if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
+ if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt);
out_unlock:
@@ -496,7 +498,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->saddr = saddr;
fl4->flowi4_mark = mark;
fl4->flowi4_uid = sock_net_uid(net, NULL);
- fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_tos = tos & INET_DSCP_MASK;
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
@@ -545,7 +547,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
orefdst = skb_in->_skb_refdst; /* save old refdst */
skb_dst_set(skb_in, NULL);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
- RT_TOS(tos), rt2->dst.dev);
+ tos, rt2->dst.dev);
dst_release(&rt2->dst);
rt2 = skb_rtable(skb_in);
@@ -596,6 +598,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
int room;
struct icmp_bxm icmp_param;
struct rtable *rt = skb_rtable(skb_in);
+ bool apply_ratelimit = false;
struct ipcm_cookie ipc;
struct flowi4 fl4;
__be32 saddr;
@@ -677,7 +680,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
}
}
- /* Needed by both icmp_global_allow and icmp_xmit_lock */
+ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
@@ -685,7 +688,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
* loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
*/
if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
- !icmpv4_global_allow(net, type, code))
+ !icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
@@ -744,7 +747,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
goto out_unlock;
/* peer icmp_ratelimit */
- if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
+ if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
goto ende;
/* RFC says return as much as we can without exceeding 576 bytes. */
@@ -1487,6 +1490,8 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
net->ipv4.sysctl_icmp_ratemask = 0x1818;
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
+ net->ipv4.sysctl_icmp_msgs_per_sec = 1000;
+ net->ipv4.sysctl_icmp_msgs_burst = 50;
return 0;
}