From b4391db42308c9940944b5d7be5ca4b78fb88dd0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Sep 2017 23:29:19 +0200 Subject: netlink: fix nla_put_{u8,u16,u32} for KASAN When CONFIG_KASAN is enabled, the "--param asan-stack=1" causes rather large stack frames in some functions. This goes unnoticed normally because CONFIG_FRAME_WARN is disabled with CONFIG_KASAN by default as of commit 3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with KASAN=y"). The kernelci.org build bot however has the warning enabled and that led me to investigate it a little further, as every build produces these warnings: net/wireless/nl80211.c:4389:1: warning: the frame size of 2240 bytes is larger than 2048 bytes [-Wframe-larger-than=] net/wireless/nl80211.c:1895:1: warning: the frame size of 3776 bytes is larger than 2048 bytes [-Wframe-larger-than=] net/wireless/nl80211.c:1410:1: warning: the frame size of 2208 bytes is larger than 2048 bytes [-Wframe-larger-than=] net/bridge/br_netlink.c:1282:1: warning: the frame size of 2544 bytes is larger than 2048 bytes [-Wframe-larger-than=] Most of this problem is now solved in gcc-8, which can consolidate the stack slots for the inline function arguments. On older compilers we can add a workaround by declaring a local variable in each function to pass the inline function argument. Cc: stable@vger.kernel.org Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/netlink.h | 73 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index e51cf5f81597..14c289393071 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -773,7 +773,10 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, */ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) { - return nla_put(skb, attrtype, sizeof(u8), &value); + /* temporary variables to work around GCC PR81715 with asan-stack=1 */ + u8 tmp = value; + + return nla_put(skb, attrtype, sizeof(u8), &tmp); } /** @@ -784,7 +787,9 @@ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) */ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) { - return nla_put(skb, attrtype, sizeof(u16), &value); + u16 tmp = value; + + return nla_put(skb, attrtype, sizeof(u16), &tmp); } /** @@ -795,7 +800,9 @@ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) */ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) { - return nla_put(skb, attrtype, sizeof(__be16), &value); + __be16 tmp = value; + + return nla_put(skb, attrtype, sizeof(__be16), &tmp); } /** @@ -806,7 +813,9 @@ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) */ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value) { - return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, value); + __be16 tmp = value; + + return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** @@ -817,7 +826,9 @@ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value) */ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value) { - return nla_put(skb, attrtype, sizeof(__le16), &value); + __le16 tmp = value; + + return nla_put(skb, attrtype, sizeof(__le16), &tmp); } /** @@ -828,7 +839,9 @@ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value) */ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) { - return nla_put(skb, attrtype, sizeof(u32), &value); + u32 tmp = value; + + return nla_put(skb, attrtype, sizeof(u32), &tmp); } /** @@ -839,7 +852,9 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) */ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) { - return nla_put(skb, attrtype, sizeof(__be32), &value); + __be32 tmp = value; + + return nla_put(skb, attrtype, sizeof(__be32), &tmp); } /** @@ -850,7 +865,9 @@ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) */ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value) { - return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, value); + __be32 tmp = value; + + return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** @@ -861,7 +878,9 @@ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value) */ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) { - return nla_put(skb, attrtype, sizeof(__le32), &value); + __le32 tmp = value; + + return nla_put(skb, attrtype, sizeof(__le32), &tmp); } /** @@ -874,7 +893,9 @@ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, u64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(u64), &value, padattr); + u64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr); } /** @@ -887,7 +908,9 @@ static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(__be64), &value, padattr); + __be64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(__be64), &tmp, padattr); } /** @@ -900,7 +923,9 @@ static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { - return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value, + __be64 tmp = value; + + return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, tmp, padattr); } @@ -914,7 +939,9 @@ static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(__le64), &value, padattr); + __le64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(__le64), &tmp, padattr); } /** @@ -925,7 +952,9 @@ static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, */ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value) { - return nla_put(skb, attrtype, sizeof(s8), &value); + s8 tmp = value; + + return nla_put(skb, attrtype, sizeof(s8), &tmp); } /** @@ -936,7 +965,9 @@ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value) */ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value) { - return nla_put(skb, attrtype, sizeof(s16), &value); + s16 tmp = value; + + return nla_put(skb, attrtype, sizeof(s16), &tmp); } /** @@ -947,7 +978,9 @@ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value) */ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) { - return nla_put(skb, attrtype, sizeof(s32), &value); + s32 tmp = value; + + return nla_put(skb, attrtype, sizeof(s32), &tmp); } /** @@ -960,7 +993,9 @@ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(s64), &value, padattr); + s64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(s64), &tmp, padattr); } /** @@ -1010,7 +1045,9 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype, __be32 addr) { - return nla_put_be32(skb, attrtype, addr); + __be32 tmp = addr; + + return nla_put_be32(skb, attrtype, tmp); } /** -- cgit v1.2.3 From 99d3cd27f755d63fd6cf85169eaa873d90769aa5 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Thu, 24 Aug 2017 17:21:44 +0300 Subject: net/mlx5: Fix FPGA capability location Currently, FPGA capability is located in (mdev)->caps.hca_cur, change the location to be (mdev)->caps.fpga, since hca_cur is reserved for HCA device capabilities. Fixes: e29341fb3a5b ("net/mlx5: FPGA, Add basic support for Innova") Signed-off-by: Inbar Karmy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c | 3 +-- include/linux/mlx5/device.h | 5 ++--- include/linux/mlx5/driver.h | 1 + 5 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c index e37453d838db..c0fd2212e890 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c @@ -71,11 +71,11 @@ int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, return 0; } -int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps) +int mlx5_fpga_caps(struct mlx5_core_dev *dev) { u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0}; - return mlx5_core_access_reg(dev, in, sizeof(in), caps, + return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga, MLX5_ST_SZ_BYTES(fpga_cap), MLX5_REG_FPGA_CAP, 0, 0); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h index 94bdfd47c3f0..d05233c9b4f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h @@ -65,7 +65,7 @@ struct mlx5_fpga_qp_counters { u64 rx_total_drop; }; -int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps); +int mlx5_fpga_caps(struct mlx5_core_dev *dev); int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query); int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op); int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 9034e9960a76..dc8970346521 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -139,8 +139,7 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) if (err) goto out; - err = mlx5_fpga_caps(fdev->mdev, - fdev->mdev->caps.hca_cur[MLX5_CAP_FPGA]); + err = mlx5_fpga_caps(fdev->mdev); if (err) goto out; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index eaf4ad209c8f..e32dbc4934db 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -980,7 +980,6 @@ enum mlx5_cap_type { MLX5_CAP_RESERVED, MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, - MLX5_CAP_FPGA, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1110,10 +1109,10 @@ enum mlx5_mcam_feature_groups { MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) #define MLX5_CAP_FPGA(mdev, cap) \ - MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) + MLX5_GET(fpga_cap, (mdev)->caps.fpga, cap) #define MLX5_CAP64_FPGA(mdev, cap) \ - MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) + MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap) enum { MLX5_CMD_STAT_OK = 0x0, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 02ff700e4f30..401c8972cc3a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -774,6 +774,7 @@ struct mlx5_core_dev { u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; + u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; } caps; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; -- cgit v1.2.3 From 16f1c5bb3ed75b3cf3ced537db40f7e1a244debe Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Sun, 30 Jul 2017 11:02:51 +0300 Subject: net/mlx5: Check device capability for maximum flow counters Added check for the maximal number of flow counters attached to rule (FTE). Fixes: bd5251dbf156b ('net/mlx5_core: Introduce flow steering destination of type counter') Signed-off-by: Raed Salem Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 8 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 11 +++++++++++ include/linux/mlx5/mlx5_ifc.h | 3 ++- 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index e0d0efd903bc..36ecc2b2e187 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -293,6 +293,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, } if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); int list_size = 0; list_for_each_entry(dst, &fte->node.children, node.list) { @@ -305,12 +308,17 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); list_size++; } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, list_size); } err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: kvfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 5509a752f98e..48dd78975062 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -52,6 +52,7 @@ enum fs_flow_table_type { FS_FT_FDB = 0X4, FS_FT_SNIFFER_RX = 0X5, FS_FT_SNIFFER_TX = 0X6, + FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX, }; enum fs_flow_table_op_mod { @@ -260,4 +261,14 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_for_each_dst(pos, fte) \ fs_list_for_each_entry(pos, &(fte)->node.children) +#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \ + (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \ + (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \ + (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \ + (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ + (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \ + (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_SNIFFER_TX != FS_FT_MAX_TYPE))\ + ) + #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a528b35a022e..69772347f866 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -327,7 +327,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_80[0x18]; u8 log_max_destination[0x8]; - u8 reserved_at_a0[0x18]; + u8 log_max_flow_counter[0x8]; + u8 reserved_at_a8[0x10]; u8 log_max_flow[0x8]; u8 reserved_at_c0[0x40]; -- cgit v1.2.3 From 7487449c86c65202b3b725c4524cb48dd65e4e6f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Sep 2017 15:51:36 +0200 Subject: IPv4: early demux can return an error code Currently no error is emitted, but this infrastructure will used by the next patch to allow source address validation for mcast sockets. Since early demux can do a route lookup and an ipv4 route lookup can return an error code this is consistent with the current ipv4 route infrastructure. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/protocol.h | 4 ++-- include/net/tcp.h | 2 +- include/net/udp.h | 2 +- net/ipv4/ip_input.c | 25 +++++++++++++++---------- net/ipv4/tcp_ipv4.c | 9 +++++---- net/ipv4/udp.c | 11 ++++++----- 6 files changed, 30 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/protocol.h b/include/net/protocol.h index 65ba335b0e7e..4fc75f7ae23b 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -39,8 +39,8 @@ /* This is used to register protocols. */ struct net_protocol { - void (*early_demux)(struct sk_buff *skb); - void (*early_demux_handler)(struct sk_buff *skb); + int (*early_demux)(struct sk_buff *skb); + int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); unsigned int no_policy:1, diff --git a/include/net/tcp.h b/include/net/tcp.h index 3bc910a9bfc6..89974c5286d8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -345,7 +345,7 @@ void tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); -void tcp_v4_early_demux(struct sk_buff *skb); +int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); diff --git a/include/net/udp.h b/include/net/udp.h index 12dfbfe2e2d7..6c759c8594e2 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -259,7 +259,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); } -void udp_v4_early_demux(struct sk_buff *skb); +int udp_v4_early_demux(struct sk_buff *skb); bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index fa2dc8f692c6..57fc13c6ab2b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -311,9 +311,10 @@ drop: static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); - struct rtable *rt; + int (*edemux)(struct sk_buff *skb); struct net_device *dev = skb->dev; - void (*edemux)(struct sk_buff *skb); + struct rtable *rt; + int err; /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - edemux(skb); + err = edemux(skb); + if (unlikely(err)) + goto drop_error; /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } @@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (!skb_valid_dst(skb)) { - int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, dev); - if (unlikely(err)) { - if (err == -EXDEV) - __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); - goto drop; - } + err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, dev); + if (unlikely(err)) + goto drop_error; } #ifdef CONFIG_IP_ROUTE_CLASSID @@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) drop: kfree_skb(skb); return NET_RX_DROP; + +drop_error: + if (err == -EXDEV) + __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); + goto drop; } /* diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9416b5162bc..85164d4d3e53 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1503,23 +1503,23 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); -void tcp_v4_early_demux(struct sk_buff *skb) +int tcp_v4_early_demux(struct sk_buff *skb) { const struct iphdr *iph; const struct tcphdr *th; struct sock *sk; if (skb->pkt_type != PACKET_HOST) - return; + return 0; if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) - return; + return 0; iph = ip_hdr(skb); th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) - return; + return 0; sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, @@ -1538,6 +1538,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb_dst_set_noref(skb, dst); } } + return 0; } bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ef29df8648e4..9b30f821fe96 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2221,7 +2221,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, return NULL; } -void udp_v4_early_demux(struct sk_buff *skb) +int udp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct iphdr *iph; @@ -2234,7 +2234,7 @@ void udp_v4_early_demux(struct sk_buff *skb) /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) - return; + return 0; iph = ip_hdr(skb); uh = udp_hdr(skb); @@ -2244,14 +2244,14 @@ void udp_v4_early_demux(struct sk_buff *skb) struct in_device *in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) - return; + return 0; /* we are supposed to accept bcast packets */ if (skb->pkt_type == PACKET_MULTICAST) { ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, iph->protocol); if (!ours) - return; + return 0; } sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, @@ -2263,7 +2263,7 @@ void udp_v4_early_demux(struct sk_buff *skb) } if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) - return; + return 0; skb->sk = sk; skb->destructor = sock_efree; @@ -2278,6 +2278,7 @@ void udp_v4_early_demux(struct sk_buff *skb) */ skb_dst_set_noref(skb, dst); } + return 0; } int udp_rcv(struct sk_buff *skb) -- cgit v1.2.3 From bc044e8db7962e727a75b591b9851ff2ac5cf846 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Sep 2017 15:51:37 +0200 Subject: udp: perform source validation for mcast early demux The UDP early demux can leverate the rx dst cache even for multicast unconnected sockets. In such scenario the ipv4 source address is validated only on the first packet in the given flow. After that, when we fetch the dst entry from the socket rx cache, we stop enforcing the rp_filter and we even start accepting any kind of martian addresses. Disabling the dst cache for unconnected multicast socket will cause large performace regression, nearly reducing by half the max ingress tput. Instead we factor out a route helper to completely validate an skb source address for multicast packets and we call it from the UDP early demux for mcast packets landing on unconnected sockets, after successful fetching the related cached dst entry. This still gives a measurable, but limited performance regression: rp_filter = 0 rp_filter = 1 edmux disabled: 1182 Kpps 1127 Kpps edmux before: 2238 Kpps 2238 Kpps edmux after: 2037 Kpps 2019 Kpps The above figures are on top of current net tree. Applying the net-next commit 6e617de84e87 ("net: avoid a full fib lookup when rp_filter is disabled.") the delta with rp_filter == 0 will decrease even more. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/route.h | 4 +++- net/ipv4/route.c | 46 ++++++++++++++++++++++++++-------------------- net/ipv4/udp.c | 13 ++++++++++++- 3 files changed, 41 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 57dfc6850d37..d538e6db1afe 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -175,7 +175,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 fl4->fl4_gre_key = gre_key; return ip_route_output_key(net, fl4); } - +int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, + struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin); int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 94d4cd2d5ea4..ac6fde5d45f1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev, EXPORT_SYMBOL(rt_dst_alloc); /* called in rcu_read_lock() section */ -static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, int our) +int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, + struct in_device *in_dev, u32 *itag) { - struct rtable *rth; - struct in_device *in_dev = __in_dev_get_rcu(dev); - unsigned int flags = RTCF_MULTICAST; - u32 itag = 0; int err; /* Primary sanity checks. */ - if (!in_dev) return -EINVAL; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || skb->protocol != htons(ETH_P_IP)) - goto e_inval; + return -EINVAL; if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) - goto e_inval; + return -EINVAL; if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr)) - goto e_inval; + return -EINVAL; } else { err = fib_validate_source(skb, saddr, 0, tos, 0, dev, - in_dev, &itag); + in_dev, itag); if (err < 0) - goto e_err; + return err; } + return 0; +} + +/* called in rcu_read_lock() section */ +static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, int our) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + unsigned int flags = RTCF_MULTICAST; + struct rtable *rth; + u32 itag = 0; + int err; + + err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); + if (err) + return err; + if (our) flags |= RTCF_LOCAL; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); if (!rth) - goto e_nobufs; + return -ENOBUFS; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; @@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, skb_dst_set(skb, &rth->dst); return 0; - -e_nobufs: - return -ENOBUFS; -e_inval: - return -EINVAL; -e_err: - return err; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9b30f821fe96..5676237d2b0f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2224,6 +2224,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, int udp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + struct in_device *in_dev = NULL; const struct iphdr *iph; const struct udphdr *uh; struct sock *sk = NULL; @@ -2241,7 +2242,7 @@ int udp_v4_early_demux(struct sk_buff *skb) if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) return 0; @@ -2272,11 +2273,21 @@ int udp_v4_early_demux(struct sk_buff *skb) if (dst) dst = dst_check(dst, 0); if (dst) { + u32 itag = 0; + /* set noref for now. * any place which wants to hold dst has to call * dst_hold_safe() */ skb_dst_set_noref(skb, dst); + + /* for unconnected multicast sockets we need to validate + * the source on each packet + */ + if (!inet_sk(sk)->inet_daddr && in_dev) + return ip_mc_validate_source(skb, iph->daddr, + iph->saddr, iph->tos, + skb->dev, in_dev, &itag); } return 0; } -- cgit v1.2.3 From 90caccdd8cc0215705f18b92771b449b01e2474a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 3 Oct 2017 15:37:20 -0700 Subject: bpf: fix bpf_tail_call() x64 JIT - bpf prog_array just like all other types of bpf array accepts 32-bit index. Clarify that in the comment. - fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes - tighten corresponding check in the interpreter to stay consistent The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and though JIT code is wrong it will check bounds correctly. Hence two fixes tags. All other JITs don't have this problem. Signed-off-by: Alexei Starovoitov Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation") Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper") Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 4 ++-- include/uapi/linux/bpf.h | 2 +- kernel/bpf/core.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8c9573660d51..0554e8aef4d5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog) /* if (index >= array->map.max_entries) * goto out; */ - EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ + EMIT2(0x89, 0xD2); /* mov edx, edx */ + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); - EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ #define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 43ab5c402f98..f90860d1f897 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -312,7 +312,7 @@ union bpf_attr { * jump into another BPF program * @ctx: context pointer passed to next program * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run + * @index: 32-bit index inside array that selects specific program to run * Return: 0 on success or negative error * * int bpf_clone_redirect(skb, ifindex, flags) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 917cc04a0a94..7b62df86be1d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1022,7 +1022,7 @@ select_insn: struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog; - u64 index = BPF_R3; + u32 index = BPF_R3; if (unlikely(index >= array->map.max_entries)) goto out; -- cgit v1.2.3