diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-01-10 01:30:37 +0100 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-01-10 01:30:38 +0100 |
commit | 6738fc77ffa2575f38b88d85583450184d59ed94 (patch) | |
tree | ec88a6b4b61d85f1dd7eb8706f5b609a7970a116 | |
parent | net/mlx5e: Fix build error in fec_set_block_stats() (diff) | |
parent | net: skb: use kfree_skb_reason() in __udp4_lib_rcv() (diff) | |
download | linux-6738fc77ffa2575f38b88d85583450184d59ed94.tar.xz linux-6738fc77ffa2575f38b88d85583450184d59ed94.zip |
Merge branch 'net-skb-introduce-kfree_skb_with_reason'
Menglong Dong says:
====================
net: skb: introduce kfree_skb_with_reason()
In this series patch, the interface kfree_skb_with_reason() is
introduced(), which is used to collect skb drop reason, and pass
it to 'kfree_skb' tracepoint. Therefor, 'drop_monitor' or eBPF is
able to monitor abnormal skb with detail reason.
In fact, this series patches are out of the intelligence of David
and Steve, I'm just a truck man :/
Previous discussion is here:
https://lore.kernel.org/netdev/20211118105752.1d46e990@gandalf.local.home/
https://lore.kernel.org/netdev/67b36bd8-2477-88ac-83a0-35a1eeaf40c9@gmail.com/
In the first patch, kfree_skb_with_reason() is introduced and
the 'reason' field is added to 'kfree_skb' tracepoint. In the
second patch, 'kfree_skb()' in replaced with 'kfree_skb_with_reason()'
in tcp_v4_rcv(). In the third patch, 'kfree_skb_with_reason()' is
used in __udp4_lib_rcv().
Changes since v3:
- fix some code style problems in skb.h
Changes since v2:
- rename kfree_skb_with_reason() to kfree_skb_reason()
- make kfree_skb() static inline, as Jakub suggested
Changes since v1:
- rename some drop reason, as David suggested
- add the third patch
====================
Link: https://lore.kernel.org/r/20220109063628.526990-1-imagedong@tencent.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | include/linux/skbuff.h | 28 | ||||
-rw-r--r-- | include/trace/events/skb.h | 41 | ||||
-rw-r--r-- | net/core/dev.c | 3 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 10 | ||||
-rw-r--r-- | net/core/skbuff.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 14 | ||||
-rw-r--r-- | net/ipv4/udp.c | 10 |
7 files changed, 96 insertions, 22 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 642acb0d1646..af64c7de9b53 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -305,6 +305,22 @@ struct sk_buff_head { struct sk_buff; +/* The reason of skb drop, which is used in kfree_skb_reason(). + * en...maybe they should be splited by group? + * + * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is + * used to translate the reason to string. + */ +enum skb_drop_reason { + SKB_DROP_REASON_NOT_SPECIFIED, + SKB_DROP_REASON_NO_SOCKET, + SKB_DROP_REASON_PKT_TOO_SMALL, + SKB_DROP_REASON_TCP_CSUM, + SKB_DROP_REASON_TCP_FILTER, + SKB_DROP_REASON_UDP_CSUM, + SKB_DROP_REASON_MAX, +}; + /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. @@ -1085,8 +1101,18 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); + +/** + * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason + * @skb: buffer to free + */ +static inline void kfree_skb(struct sk_buff *skb) +{ + kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); +} + void skb_release_head_state(struct sk_buff *skb); -void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 9e92f22eb086..3e042ca2cedb 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -9,29 +9,56 @@ #include <linux/netdevice.h> #include <linux/tracepoint.h> +#define TRACE_SKB_DROP_REASON \ + EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ + EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ + EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ + EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ + EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ + EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ + EMe(SKB_DROP_REASON_MAX, MAX) + +#undef EM +#undef EMe + +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +TRACE_SKB_DROP_REASON + +#undef EM +#undef EMe +#define EM(a, b) { a, #b }, +#define EMe(a, b) { a, #b } + /* * Tracepoint for free an sk_buff: */ TRACE_EVENT(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location), + TP_PROTO(struct sk_buff *skb, void *location, + enum skb_drop_reason reason), - TP_ARGS(skb, location), + TP_ARGS(skb, location, reason), TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( void *, location ) - __field( unsigned short, protocol ) + __field(void *, skbaddr) + __field(void *, location) + __field(unsigned short, protocol) + __field(enum skb_drop_reason, reason) ), TP_fast_assign( __entry->skbaddr = skb; __entry->location = location; __entry->protocol = ntohs(skb->protocol); + __entry->reason = reason; ), - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) + TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", + __entry->skbaddr, __entry->protocol, __entry->location, + __print_symbolic(__entry->reason, + TRACE_SKB_DROP_REASON)) ); TRACE_EVENT(consume_skb, diff --git a/net/core/dev.c b/net/core/dev.c index 83a4089990a0..84a0d9542fe9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4899,7 +4899,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) trace_consume_skb(skb); else - trace_kfree_skb(skb, net_tx_action); + trace_kfree_skb(skb, net_tx_action, + SKB_DROP_REASON_NOT_SPECIFIED); if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 3d0ab2eec916..7b288a121a41 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -110,7 +110,8 @@ static u32 net_dm_queue_len = 1000; struct net_dm_alert_ops { void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, - void *location); + void *location, + enum skb_drop_reason reason); void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, int work, int budget); void (*work_item_func)(struct work_struct *work); @@ -262,7 +263,9 @@ out: spin_unlock_irqrestore(&data->lock, flags); } -static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) +static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, + void *location, + enum skb_drop_reason reason) { trace_drop_common(skb, location); } @@ -490,7 +493,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = { static void net_dm_packet_trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, - void *location) + void *location, + enum skb_drop_reason reason) { ktime_t tstamp = ktime_get_real(); struct per_cpu_dm_data *data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e514a36bcffc..0118f0afaa4f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -759,21 +759,23 @@ void __kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(__kfree_skb); /** - * kfree_skb - free an sk_buff + * kfree_skb_reason - free an sk_buff with special reason * @skb: buffer to free + * @reason: reason why this skb is dropped * * Drop a reference to the buffer and free it if the usage count has - * hit zero. + * hit zero. Meanwhile, pass the drop reason to 'kfree_skb' + * tracepoint. */ -void kfree_skb(struct sk_buff *skb) +void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) { if (!skb_unref(skb)) return; - trace_kfree_skb(skb, __builtin_return_address(0)); + trace_kfree_skb(skb, __builtin_return_address(0), reason); __kfree_skb(skb); } -EXPORT_SYMBOL(kfree_skb); +EXPORT_SYMBOL(kfree_skb_reason); void kfree_skb_list(struct sk_buff *segs) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9861786b8336..b3f34e366b27 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1971,8 +1971,10 @@ int tcp_v4_rcv(struct sk_buff *skb) const struct tcphdr *th; bool refcounted; struct sock *sk; + int drop_reason; int ret; + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1984,8 +1986,10 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; - if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) + if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; goto bad_packet; + } if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; @@ -2090,8 +2094,10 @@ process: nf_reset_ct(skb); - if (tcp_filter(sk, skb)) + if (tcp_filter(sk, skb)) { + drop_reason = SKB_DROP_REASON_TCP_FILTER; goto discard_and_relse; + } th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); @@ -2124,6 +2130,7 @@ put_and_return: return ret; no_tcp_socket: + drop_reason = SKB_DROP_REASON_NO_SOCKET; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; @@ -2131,6 +2138,7 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: + drop_reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: @@ -2141,7 +2149,7 @@ bad_packet: discard_it: /* Discard frame. */ - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; discard_and_relse: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c2a4411d2b04..464590ea922e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2411,6 +2411,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, __be32 saddr, daddr; struct net *net = dev_net(skb->dev); bool refcounted; + int drop_reason; + + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; /* * Validate the packet. @@ -2466,6 +2469,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp_lib_checksum_complete(skb)) goto csum_error; + drop_reason = SKB_DROP_REASON_NO_SOCKET; __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -2473,10 +2477,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * Hmm. We got an UDP packet to a port to which we * don't wanna listen. Ignore it. */ - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; short_packet: + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), @@ -2489,6 +2494,7 @@ csum_error: * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ + drop_reason = SKB_DROP_REASON_UDP_CSUM; net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), @@ -2496,7 +2502,7 @@ csum_error: __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; } |