From 9c86336c15db1c48cbaddff56caf2be0a930e991 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Mon, 20 Aug 2018 10:51:05 +0800 Subject: ip6_vti: fix a null pointer deference when destroy vti6 tunnel If load ip6_vti module and create a network namespace when set fb_tunnels_only_for_init_net to 1, then exit the namespace will cause following crash: [ 6601.677036] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 6601.679057] PGD 8000000425eca067 P4D 8000000425eca067 PUD 424292067 PMD 0 [ 6601.680483] Oops: 0000 [#1] SMP PTI [ 6601.681223] CPU: 7 PID: 93 Comm: kworker/u16:1 Kdump: loaded Tainted: G E 4.18.0+ #3 [ 6601.683153] Hardware name: Fedora Project OpenStack Nova, BIOS seabios-1.7.5-11.el7 04/01/2014 [ 6601.684919] Workqueue: netns cleanup_net [ 6601.685742] RIP: 0010:vti6_exit_batch_net+0x87/0xd0 [ip6_vti] [ 6601.686932] Code: 7b 08 48 89 e6 e8 b9 ea d3 dd 48 8b 1b 48 85 db 75 ec 48 83 c5 08 48 81 fd 00 01 00 00 75 d5 49 8b 84 24 08 01 00 00 48 89 e6 <48> 8b 78 08 e8 90 ea d3 dd 49 8b 45 28 49 39 c6 4c 8d 68 d8 75 a1 [ 6601.690735] RSP: 0018:ffffa897c2737de0 EFLAGS: 00010246 [ 6601.691846] RAX: 0000000000000000 RBX: 0000000000000000 RCX: dead000000000200 [ 6601.693324] RDX: 0000000000000015 RSI: ffffa897c2737de0 RDI: ffffffff9f2ea9e0 [ 6601.694824] RBP: 0000000000000100 R08: 0000000000000000 R09: 0000000000000000 [ 6601.696314] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8dc323c07e00 [ 6601.697812] R13: ffff8dc324a63100 R14: ffffa897c2737e30 R15: ffffa897c2737e30 [ 6601.699345] FS: 0000000000000000(0000) GS:ffff8dc33fdc0000(0000) knlGS:0000000000000000 [ 6601.701068] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6601.702282] CR2: 0000000000000008 CR3: 0000000424966002 CR4: 00000000001606e0 [ 6601.703791] Call Trace: [ 6601.704329] cleanup_net+0x1b4/0x2c0 [ 6601.705268] process_one_work+0x16c/0x370 [ 6601.706145] worker_thread+0x49/0x3e0 [ 6601.706942] kthread+0xf8/0x130 [ 6601.707626] ? rescuer_thread+0x340/0x340 [ 6601.708476] ? kthread_bind+0x10/0x10 [ 6601.709266] ret_from_fork+0x35/0x40 Reproduce: modprobe ip6_vti echo 1 > /proc/sys/net/core/fb_tunnels_only_for_init_net unshare -n exit This because ip6n->tnls_wc[0] point to fallback device in default, but in non-default namespace, ip6n->tnls_wc[0] will be NULL, so add the NULL check comparatively. Fixes: e2948e5af8ee ("ip6_vti: fix creating fallback tunnel device for vti6") Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 38dec9da90d3..5095367c7204 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1094,7 +1094,8 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n, } t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, list); + if (t) + unregister_netdevice_queue(t->dev, list); } static int __net_init vti6_init_net(struct net *net) -- cgit v1.2.3 From 80f1a0f4e0cd4bfc8a74fc1c39843a6e7b206b95 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 20 Aug 2018 13:02:41 -0700 Subject: net/ipv6: Put lwtstate when destroying fib6_info Prior to the introduction of fib6_info lwtstate was managed by the dst code. With fib6_info releasing lwtstate needs to be done when the struct is freed. Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d212738e9d10..c861a6d4671d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -198,6 +198,8 @@ void fib6_info_destroy_rcu(struct rcu_head *head) } } + lwtstate_put(f6i->fib6_nh.nh_lwtstate); + if (f6i->fib6_nh.nh_dev) dev_put(f6i->fib6_nh.nh_dev); -- cgit v1.2.3 From 093dee661d6004738c4cbcbf48835c1e6c6ebae3 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Aug 2018 13:58:08 +0000 Subject: sch_cake: Remove unused including Remove including that don't need it. Signed-off-by: Yue Haibing Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 35fc7252187c..4d26b0823cdf 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From c27f1e2e9f29563cb093e96261e87c1ef83aeb98 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Aug 2018 14:05:42 +0000 Subject: rds: tcp: remove duplicated include from tcp.c Remove duplicated include. Signed-off-by: Yue Haibing Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2c7b7c352d3e..b9bbcf3d6c63 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include "rds.h" -- cgit v1.2.3 From edfaf94fa705181eeb2fe0c36c0b902dedbd40f1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:05 -0700 Subject: net_sched: improve and refactor tcf_action_put_many() tcf_action_put_many() is mostly called to clean up actions on failure path, but tcf_action_put_many(&actions[acts_deleted]) is used in the ugliest way: it passes a slice of the array and uses an additional NULL at the end to avoid out-of-bound access. acts_deleted is completely unnecessary since we can teach tcf_action_put_many() scan the whole array and checks against NULL pointer. Which also means tcf_action_delete() should set deleted action pointers to NULL to avoid double free. Fixes: 90b73b77d08e ("net: sched: change action API to use array of pointers to actions") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 229d63c99be2..cd69a6afcf88 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -686,14 +686,18 @@ static int tcf_action_put(struct tc_action *p) return __tcf_action_put(p, false); } +/* Put all actions in this array, skip those NULL's. */ static void tcf_action_put_many(struct tc_action *actions[]) { int i; - for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { struct tc_action *a = actions[i]; - const struct tc_action_ops *ops = a->ops; + const struct tc_action_ops *ops; + if (!a) + continue; + ops = a->ops; if (tcf_action_put(a)) module_put(ops->owner); } @@ -1176,7 +1180,7 @@ err_out: } static int tcf_action_delete(struct net *net, struct tc_action *actions[], - int *acts_deleted, struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack) { u32 act_index; int ret, i; @@ -1196,20 +1200,17 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[], } else { /* now do the delete */ ret = ops->delete(net, act_index); - if (ret < 0) { - *acts_deleted = i + 1; + if (ret < 0) return ret; - } } + actions[i] = NULL; } - *acts_deleted = i; return 0; } static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], - int *acts_deleted, u32 portid, size_t attr_size, - struct netlink_ext_ack *extack) + u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { int ret; struct sk_buff *skb; @@ -1227,7 +1228,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], } /* now do the delete */ - ret = tcf_action_delete(net, actions, acts_deleted, extack); + ret = tcf_action_delete(net, actions, extack); if (ret < 0) { NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); @@ -1249,8 +1250,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t attr_size = 0; - struct tc_action *actions[TCA_ACT_MAX_PRIO + 1] = {}; - int acts_deleted = 0; + struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (ret < 0) @@ -1280,14 +1280,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, if (event == RTM_GETACTION) ret = tcf_get_notify(net, portid, n, actions, event, extack); else { /* delete */ - ret = tcf_del_notify(net, n, actions, &acts_deleted, portid, - attr_size, extack); + ret = tcf_del_notify(net, n, actions, portid, attr_size, extack); if (ret) goto err; - return ret; + return 0; } err: - tcf_action_put_many(&actions[acts_deleted]); + tcf_action_put_many(actions); return ret; } -- cgit v1.2.3 From 97a3f84f2c84f81b859aedd2c186df09c2ee21a6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:06 -0700 Subject: net_sched: remove unnecessary ops->delete() All ops->delete() wants is getting the tn->idrinfo, but we already have tc_action before calling ops->delete(), and tc_action has a pointer ->idrinfo. More importantly, each type of action does the same thing, that is, just calling tcf_idr_delete_index(). So it can be just removed. Fixes: b409074e6693 ("net: sched: add 'delete' function to action ops") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- net/sched/act_api.c | 15 +++++++-------- net/sched/act_bpf.c | 8 -------- net/sched/act_connmark.c | 8 -------- net/sched/act_csum.c | 8 -------- net/sched/act_gact.c | 8 -------- net/sched/act_ife.c | 8 -------- net/sched/act_ipt.c | 16 ---------------- net/sched/act_mirred.c | 8 -------- net/sched/act_nat.c | 8 -------- net/sched/act_pedit.c | 8 -------- net/sched/act_police.c | 8 -------- net/sched/act_sample.c | 8 -------- net/sched/act_simple.c | 8 -------- net/sched/act_skbedit.c | 8 -------- net/sched/act_skbmod.c | 8 -------- net/sched/act_tunnel_key.c | 8 -------- net/sched/act_vlan.c | 8 -------- 18 files changed, 7 insertions(+), 146 deletions(-) (limited to 'net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 1ad5b19e83a9..e32708491d83 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -102,7 +102,6 @@ struct tc_action_ops { size_t (*get_fill_size)(const struct tc_action *act); struct net_device *(*get_dev)(const struct tc_action *a); void (*put_dev)(struct net_device *dev); - int (*delete)(struct net *net, u32 index); }; struct tc_action_net { @@ -158,7 +157,6 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); -int tcf_idr_delete_index(struct tc_action_net *tn, u32 index); int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); static inline int tcf_idr_release(struct tc_action *a, bool bind) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cd69a6afcf88..00bf7d2b0bdd 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -337,9 +337,8 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, } EXPORT_SYMBOL(tcf_idr_check); -int tcf_idr_delete_index(struct tc_action_net *tn, u32 index) +static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { - struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; int ret = 0; @@ -370,7 +369,6 @@ int tcf_idr_delete_index(struct tc_action_net *tn, u32 index) spin_unlock(&idrinfo->lock); return ret; } -EXPORT_SYMBOL(tcf_idr_delete_index); int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, @@ -1182,24 +1180,25 @@ err_out: static int tcf_action_delete(struct net *net, struct tc_action *actions[], struct netlink_ext_ack *extack) { - u32 act_index; - int ret, i; + int i; for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { struct tc_action *a = actions[i]; const struct tc_action_ops *ops = a->ops; - /* Actions can be deleted concurrently so we must save their * type and id to search again after reference is released. */ - act_index = a->tcfa_index; + struct tcf_idrinfo *idrinfo = a->idrinfo; + u32 act_index = a->tcfa_index; if (tcf_action_put(a)) { /* last reference, action was deleted concurrently */ module_put(ops->owner); } else { + int ret; + /* now do the delete */ - ret = ops->delete(net, act_index); + ret = tcf_idr_delete_index(idrinfo, act_index); if (ret < 0) return ret; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index d30b23e42436..0c68bc9cf0b4 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -395,13 +395,6 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_bpf_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, bpf_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", .type = TCA_ACT_BPF, @@ -412,7 +405,6 @@ static struct tc_action_ops act_bpf_ops __read_mostly = { .init = tcf_bpf_init, .walk = tcf_bpf_walker, .lookup = tcf_bpf_search, - .delete = tcf_bpf_delete, .size = sizeof(struct tcf_bpf), }; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 54c0bf54f2ac..6f0f273f1139 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -198,13 +198,6 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_connmark_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, connmark_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_connmark_ops = { .kind = "connmark", .type = TCA_ACT_CONNMARK, @@ -214,7 +207,6 @@ static struct tc_action_ops act_connmark_ops = { .init = tcf_connmark_init, .walk = tcf_connmark_walker, .lookup = tcf_connmark_search, - .delete = tcf_connmark_delete, .size = sizeof(struct tcf_connmark_info), }; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e698d3fe2080..b8a67ae3105a 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -659,13 +659,6 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_csum)); } -static int tcf_csum_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, csum_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_csum_ops = { .kind = "csum", .type = TCA_ACT_CSUM, @@ -677,7 +670,6 @@ static struct tc_action_ops act_csum_ops = { .walk = tcf_csum_walker, .lookup = tcf_csum_search, .get_fill_size = tcf_csum_get_fill_size, - .delete = tcf_csum_delete, .size = sizeof(struct tcf_csum), }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 6a3f25a8ffb3..cd1d9bd32ef9 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -243,13 +243,6 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) return sz; } -static int tcf_gact_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, gact_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_gact_ops = { .kind = "gact", .type = TCA_ACT_GACT, @@ -261,7 +254,6 @@ static struct tc_action_ops act_gact_ops = { .walk = tcf_gact_walker, .lookup = tcf_gact_search, .get_fill_size = tcf_gact_get_fill_size, - .delete = tcf_gact_delete, .size = sizeof(struct tcf_gact), }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index d1081bdf1bdb..92fcf8ba5bca 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -853,13 +853,6 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_ife_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ife_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_ife_ops = { .kind = "ife", .type = TCA_ACT_IFE, @@ -870,7 +863,6 @@ static struct tc_action_ops act_ife_ops = { .init = tcf_ife_init, .walk = tcf_ife_walker, .lookup = tcf_ife_search, - .delete = tcf_ife_delete, .size = sizeof(struct tcf_ife_info), }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 51f235bbeb5b..23273b5303fd 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -337,13 +337,6 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_ipt_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_ipt_ops = { .kind = "ipt", .type = TCA_ACT_IPT, @@ -354,7 +347,6 @@ static struct tc_action_ops act_ipt_ops = { .init = tcf_ipt_init, .walk = tcf_ipt_walker, .lookup = tcf_ipt_search, - .delete = tcf_ipt_delete, .size = sizeof(struct tcf_ipt), }; @@ -395,13 +387,6 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_xt_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_xt_ops = { .kind = "xt", .type = TCA_ACT_XT, @@ -412,7 +397,6 @@ static struct tc_action_ops act_xt_ops = { .init = tcf_xt_init, .walk = tcf_xt_walker, .lookup = tcf_xt_search, - .delete = tcf_xt_delete, .size = sizeof(struct tcf_ipt), }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 38fd20f10f67..8bf66d0a6800 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -395,13 +395,6 @@ static void tcf_mirred_put_dev(struct net_device *dev) dev_put(dev); } -static int tcf_mirred_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, mirred_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .type = TCA_ACT_MIRRED, @@ -416,7 +409,6 @@ static struct tc_action_ops act_mirred_ops = { .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, .put_dev = tcf_mirred_put_dev, - .delete = tcf_mirred_delete, }; static __net_init int mirred_init_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 822e903bfc25..4313aa102440 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -300,13 +300,6 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_nat_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, nat_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_nat_ops = { .kind = "nat", .type = TCA_ACT_NAT, @@ -316,7 +309,6 @@ static struct tc_action_ops act_nat_ops = { .init = tcf_nat_init, .walk = tcf_nat_walker, .lookup = tcf_nat_search, - .delete = tcf_nat_delete, .size = sizeof(struct tcf_nat), }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 8a7a7cb94e83..107034070019 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -460,13 +460,6 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_pedit_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, pedit_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_pedit_ops = { .kind = "pedit", .type = TCA_ACT_PEDIT, @@ -477,7 +470,6 @@ static struct tc_action_ops act_pedit_ops = { .init = tcf_pedit_init, .walk = tcf_pedit_walker, .lookup = tcf_pedit_search, - .delete = tcf_pedit_delete, .size = sizeof(struct tcf_pedit), }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 06f0742db593..5d8bfa878477 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -320,13 +320,6 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_police_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, police_net_id); - - return tcf_idr_delete_index(tn, index); -} - MODULE_AUTHOR("Alexey Kuznetsov"); MODULE_DESCRIPTION("Policing actions"); MODULE_LICENSE("GPL"); @@ -340,7 +333,6 @@ static struct tc_action_ops act_police_ops = { .init = tcf_police_init, .walk = tcf_police_walker, .lookup = tcf_police_search, - .delete = tcf_police_delete, .size = sizeof(struct tcf_police), }; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 207b4132d1b0..44e9c00657bc 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -232,13 +232,6 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_sample_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, sample_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_sample_ops = { .kind = "sample", .type = TCA_ACT_SAMPLE, @@ -249,7 +242,6 @@ static struct tc_action_ops act_sample_ops = { .cleanup = tcf_sample_cleanup, .walk = tcf_sample_walker, .lookup = tcf_sample_search, - .delete = tcf_sample_delete, .size = sizeof(struct tcf_sample), }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e616523ba3c1..52400d49f81f 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -196,13 +196,6 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_simp_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, simp_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_simp_ops = { .kind = "simple", .type = TCA_ACT_SIMP, @@ -213,7 +206,6 @@ static struct tc_action_ops act_simp_ops = { .init = tcf_simp_init, .walk = tcf_simp_walker, .lookup = tcf_simp_search, - .delete = tcf_simp_delete, .size = sizeof(struct tcf_defact), }; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 926d7bc4a89d..73e44ce2a883 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -299,13 +299,6 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_skbedit_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbedit_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .type = TCA_ACT_SKBEDIT, @@ -316,7 +309,6 @@ static struct tc_action_ops act_skbedit_ops = { .cleanup = tcf_skbedit_cleanup, .walk = tcf_skbedit_walker, .lookup = tcf_skbedit_search, - .delete = tcf_skbedit_delete, .size = sizeof(struct tcf_skbedit), }; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index d6a1af0c4171..588077fafd6c 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -259,13 +259,6 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_skbmod_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbmod_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", .type = TCA_ACT_SKBMOD, @@ -276,7 +269,6 @@ static struct tc_action_ops act_skbmod_ops = { .cleanup = tcf_skbmod_cleanup, .walk = tcf_skbmod_walker, .lookup = tcf_skbmod_search, - .delete = tcf_skbmod_delete, .size = sizeof(struct tcf_skbmod), }; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 8f09cf08d8fe..420759153d5f 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -548,13 +548,6 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tunnel_key_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", .type = TCA_ACT_TUNNEL_KEY, @@ -565,7 +558,6 @@ static struct tc_action_ops act_tunnel_key_ops = { .cleanup = tunnel_key_release, .walk = tunnel_key_walker, .lookup = tunnel_key_search, - .delete = tunnel_key_delete, .size = sizeof(struct tcf_tunnel_key), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 209e70ad2c09..033d273afe50 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -296,13 +296,6 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_vlan_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, vlan_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_vlan_ops = { .kind = "vlan", .type = TCA_ACT_VLAN, @@ -313,7 +306,6 @@ static struct tc_action_ops act_vlan_ops = { .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, .lookup = tcf_vlan_search, - .delete = tcf_vlan_delete, .size = sizeof(struct tcf_vlan), }; -- cgit v1.2.3 From b144e7ec51a132eac00a68bf897b6349d810022f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:07 -0700 Subject: net_sched: remove unused parameter for tcf_action_delete() Fixes: 16af6067392c ("net: sched: implement reference counted action release") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 00bf7d2b0bdd..ba55226928a3 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1177,8 +1177,7 @@ err_out: return err; } -static int tcf_action_delete(struct net *net, struct tc_action *actions[], - struct netlink_ext_ack *extack) +static int tcf_action_delete(struct net *net, struct tc_action *actions[]) { int i; @@ -1227,7 +1226,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], } /* now do the delete */ - ret = tcf_action_delete(net, actions, extack); + ret = tcf_action_delete(net, actions); if (ret < 0) { NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); -- cgit v1.2.3 From 7d485c451fc82f8ae431cdb379521bc6d0641064 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:08 -0700 Subject: net_sched: remove unused tcf_idr_check() tcf_idr_check() is replaced by tcf_idr_check_alloc(), and __tcf_idr_check() now can be folded into tcf_idr_search(). Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- net/sched/act_api.c | 22 +++------------------- 2 files changed, 3 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/net/act_api.h b/include/net/act_api.h index e32708491d83..eaa0e8b93d5b 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -147,8 +147,6 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack); int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); -bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind); int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, bool cpustats); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ba55226928a3..d76948f02a02 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -300,21 +300,17 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, } EXPORT_SYMBOL(tcf_generic_walker); -static bool __tcf_idr_check(struct tc_action_net *tn, u32 index, - struct tc_action **a, int bind) +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; spin_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); - if (IS_ERR(p)) { + if (IS_ERR(p)) p = NULL; - } else if (p) { + else if (p) refcount_inc(&p->tcfa_refcnt); - if (bind) - atomic_inc(&p->tcfa_bindcnt); - } spin_unlock(&idrinfo->lock); if (p) { @@ -323,20 +319,8 @@ static bool __tcf_idr_check(struct tc_action_net *tn, u32 index, } return false; } - -int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) -{ - return __tcf_idr_check(tn, index, a, 0); -} EXPORT_SYMBOL(tcf_idr_search); -bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind) -{ - return __tcf_idr_check(tn, index, a, bind); -} -EXPORT_SYMBOL(tcf_idr_check); - static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { struct tc_action *p; -- cgit v1.2.3 From 244cd96adb5f5ab39551081fb1f9009a54bb12ee Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:09 -0700 Subject: net_sched: remove list_head from tc_action After commit 90b73b77d08e, list_head is no longer needed. Now we just need to convert the list iteration to array iteration for drivers. Fixes: 90b73b77d08e ("net: sched: change action API to use array of pointers to actions") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 6 ++---- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 10 ++++----- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 5 ++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++---- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 19 ++++++++-------- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 3 +-- .../net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 ++---- drivers/net/ethernet/netronome/nfp/flower/action.c | 6 ++---- drivers/net/ethernet/qlogic/qede/qede_filter.c | 6 ++---- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 5 ++--- include/net/act_api.h | 1 - include/net/pkt_cls.h | 25 ++++++++++++---------- net/dsa/slave.c | 4 +--- net/sched/act_api.c | 1 - 14 files changed, 43 insertions(+), 60 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 139d96c5a023..092c817f8f11 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -110,16 +110,14 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, struct tcf_exts *tc_exts) { const struct tc_action *tc_act; - LIST_HEAD(tc_actions); - int rc; + int i, rc; if (!tcf_exts_has_actions(tc_exts)) { netdev_info(bp->dev, "no actions"); return -EINVAL; } - tcf_exts_to_list(tc_exts, &tc_actions); - list_for_each_entry(tc_act, &tc_actions, list) { + tcf_exts_for_each_action(i, tc_act, tc_exts) { /* Drop action */ if (is_tcf_gact_shot(tc_act)) { actions->flags |= BNXT_TC_ACTION_FLAG_DROP; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 623f73dd7738..c116f96956fe 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -417,10 +417,9 @@ static void cxgb4_process_flow_actions(struct net_device *in, struct ch_filter_specification *fs) { const struct tc_action *a; - LIST_HEAD(actions); + int i; - tcf_exts_to_list(cls->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, cls->exts) { if (is_tcf_gact_ok(a)) { fs->action = FILTER_PASS; } else if (is_tcf_gact_shot(a)) { @@ -591,10 +590,9 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, bool act_redir = false; bool act_pedit = false; bool act_vlan = false; - LIST_HEAD(actions); + int i; - tcf_exts_to_list(cls->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, cls->exts) { if (is_tcf_gact_ok(a)) { /* Do nothing */ } else if (is_tcf_gact_shot(a)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 18eb2aedd4cb..c7d2b4dc7568 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -93,14 +93,13 @@ static int fill_action_fields(struct adapter *adap, unsigned int num_actions = 0; const struct tc_action *a; struct tcf_exts *exts; - LIST_HEAD(actions); + int i; exts = cls->knode.exts; if (!tcf_exts_has_actions(exts)) return -EINVAL; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { /* Don't allow more than one action per rule. */ if (num_actions) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 447098005490..af4c9ae7f432 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9171,14 +9171,12 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, struct tcf_exts *exts, u64 *action, u8 *queue) { const struct tc_action *a; - LIST_HEAD(actions); + int i; if (!tcf_exts_has_actions(exts)) return -EINVAL; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { - + tcf_exts_for_each_action(i, a, exts) { /* Drop action */ if (is_tcf_gact_shot(a)) { *action = IXGBE_FDIR_DROP_QUEUE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9131a1376e7d..9fed54017659 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1982,14 +1982,15 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, goto out_ok; modify_ip_header = false; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { + int k; + if (!is_tcf_pedit(a)) continue; nkeys = tcf_pedit_nkeys(a); - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); + for (k = 0; k < nkeys; k++) { + htype = tcf_pedit_htype(a, k); if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { modify_ip_header = true; @@ -2053,15 +2054,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, const struct tc_action *a; LIST_HEAD(actions); u32 action = 0; - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_shot(a)) { action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, @@ -2666,7 +2666,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, LIST_HEAD(actions); bool encap = false; u32 action = 0; - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return -EINVAL; @@ -2674,8 +2674,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->in_rep = rpriv->rep; attr->in_mdev = priv->mdev; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_shot(a)) { action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6070d1591d1e..930700413b1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1346,8 +1346,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOMEM; mall_tc_entry->cookie = f->cookie; - tcf_exts_to_list(f->exts, &actions); - a = list_first_entry(&actions, struct tc_action, list); + a = tcf_exts_first_action(f->exts); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { struct mlxsw_sp_port_mall_mirror_tc_entry *mirror; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index ebd1b24ebaa5..8d211972c5e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -21,8 +21,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { const struct tc_action *a; - LIST_HEAD(actions); - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return 0; @@ -32,8 +31,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (err) return err; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_ok(a)) { err = mlxsw_sp_acl_rulei_act_terminate(rulei); if (err) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 0ba0356ec4e6..9044496803e6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -796,11 +796,10 @@ int nfp_flower_compile_action(struct nfp_app *app, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { - int act_len, act_cnt, err, tun_out_cnt, out_cnt; + int act_len, act_cnt, err, tun_out_cnt, out_cnt, i; enum nfp_flower_tun_type tun_type; const struct tc_action *a; u32 csum_updated = 0; - LIST_HEAD(actions); memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); nfp_flow->meta.act_len = 0; @@ -810,8 +809,7 @@ int nfp_flower_compile_action(struct nfp_app *app, tun_out_cnt = 0; out_cnt = 0; - tcf_exts_to_list(flow->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, flow->exts) { err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len, netdev, &tun_type, &tun_out_cnt, &out_cnt, &csum_updated); diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 9673d19308e6..b16ce7d93caf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -2006,18 +2006,16 @@ unlock: static int qede_parse_actions(struct qede_dev *edev, struct tcf_exts *exts) { - int rc = -EINVAL, num_act = 0; + int rc = -EINVAL, num_act = 0, i; const struct tc_action *a; bool is_drop = false; - LIST_HEAD(actions); if (!tcf_exts_has_actions(exts)) { DP_NOTICE(edev, "No tc actions received\n"); return rc; } - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { num_act++; if (is_tcf_gact_shot(a)) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1a96dd9c1091..531294f4978b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -61,7 +61,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry, struct stmmac_tc_entry *action_entry = entry; const struct tc_action *act; struct tcf_exts *exts; - LIST_HEAD(actions); + int i; exts = cls->knode.exts; if (!tcf_exts_has_actions(exts)) @@ -69,8 +69,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry, if (frag) action_entry = frag; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(act, &actions, list) { + tcf_exts_for_each_action(i, act, exts) { /* Accept */ if (is_tcf_gact_ok(act)) { action_entry->val.af = 1; diff --git a/include/net/act_api.h b/include/net/act_api.h index eaa0e8b93d5b..f9c4b871af88 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -23,7 +23,6 @@ struct tc_action { const struct tc_action_ops *ops; __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; - struct list_head list; struct tcf_idrinfo *idrinfo; u32 tcfa_index; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ef727f71336e..c17d51865469 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -298,19 +298,13 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) #endif } -static inline void tcf_exts_to_list(const struct tcf_exts *exts, - struct list_head *actions) -{ #ifdef CONFIG_NET_CLS_ACT - int i; - - for (i = 0; i < exts->nr_actions; i++) { - struct tc_action *a = exts->actions[i]; - - list_add_tail(&a->list, actions); - } +#define tcf_exts_for_each_action(i, a, exts) \ + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = (exts)->actions[i]); i++) +#else +#define tcf_exts_for_each_action(i, a, exts) \ + for (; 0; ) #endif -} static inline void tcf_exts_stats_update(const struct tcf_exts *exts, @@ -361,6 +355,15 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) #endif } +static inline struct tc_action *tcf_exts_first_action(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + return exts->actions[0]; +#else + return NULL; +#endif +} + /** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 962c4fd338ba..1c45c1d6d241 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -767,7 +767,6 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, const struct tc_action *a; struct dsa_port *to_dp; int err = -EOPNOTSUPP; - LIST_HEAD(actions); if (!ds->ops->port_mirror_add) return err; @@ -775,8 +774,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, if (!tcf_exts_has_one_action(cls->exts)) return err; - tcf_exts_to_list(cls->exts, &actions); - a = list_first_entry(&actions, struct tc_action, list); + a = tcf_exts_first_action(cls->exts); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { struct dsa_mall_mirror_tc_entry *mirror; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d76948f02a02..db83dac1e7f4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -391,7 +391,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, p->idrinfo = idrinfo; p->ops = ops; - INIT_LIST_HEAD(&p->list); *a = p; return 0; err3: -- cgit v1.2.3 From 8ce5be1c899d31f3cd047e5f707cd9dbfb81e076 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:11 -0700 Subject: Revert "net: sched: act_ife: disable bh when taking ife_mod_lock" This reverts commit 42c625a486f3 ("net: sched: act_ife: disable bh when taking ife_mod_lock"), because what ife_mod_lock protects is absolutely not touched in rate est timer BH context, they have no race. A better fix is following up. Cc: Vlad Buslov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 92fcf8ba5bca..9decbb74b3ac 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -167,16 +167,16 @@ static struct tcf_meta_ops *find_ife_oplist(u16 metaid) { struct tcf_meta_ops *o; - read_lock_bh(&ife_mod_lock); + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { if (o->metaid == metaid) { if (!try_module_get(o->owner)) o = NULL; - read_unlock_bh(&ife_mod_lock); + read_unlock(&ife_mod_lock); return o; } } - read_unlock_bh(&ife_mod_lock); + read_unlock(&ife_mod_lock); return NULL; } @@ -190,12 +190,12 @@ int register_ife_op(struct tcf_meta_ops *mops) !mops->get || !mops->alloc) return -EINVAL; - write_lock_bh(&ife_mod_lock); + write_lock(&ife_mod_lock); list_for_each_entry(m, &ifeoplist, list) { if (m->metaid == mops->metaid || (strcmp(mops->name, m->name) == 0)) { - write_unlock_bh(&ife_mod_lock); + write_unlock(&ife_mod_lock); return -EEXIST; } } @@ -204,7 +204,7 @@ int register_ife_op(struct tcf_meta_ops *mops) mops->release = ife_release_meta_gen; list_add_tail(&mops->list, &ifeoplist); - write_unlock_bh(&ife_mod_lock); + write_unlock(&ife_mod_lock); return 0; } EXPORT_SYMBOL_GPL(unregister_ife_op); @@ -214,7 +214,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops) struct tcf_meta_ops *m; int err = -ENOENT; - write_lock_bh(&ife_mod_lock); + write_lock(&ife_mod_lock); list_for_each_entry(m, &ifeoplist, list) { if (m->metaid == mops->metaid) { list_del(&mops->list); @@ -222,7 +222,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops) break; } } - write_unlock_bh(&ife_mod_lock); + write_unlock(&ife_mod_lock); return err; } @@ -343,13 +343,13 @@ static int use_all_metadata(struct tcf_ife_info *ife) int rc = 0; int installed = 0; - read_lock_bh(&ife_mod_lock); + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { rc = add_metainfo(ife, o->metaid, NULL, 0, true); if (rc == 0) installed += 1; } - read_unlock_bh(&ife_mod_lock); + read_unlock(&ife_mod_lock); if (installed) return 0; -- cgit v1.2.3 From 4e407ff5cd67ec76eeeea1deec227b7982dc7f66 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:12 -0700 Subject: act_ife: move tcfa_lock down to where necessary The only time we need to take tcfa_lock is when adding a new metainfo to an existing ife->metalist. We don't need to take tcfa_lock so early and so broadly in tcf_ife_init(). This means we can always take ife_mod_lock first, avoid the reverse locking ordering warning as reported by Vlad. Reported-by: Vlad Buslov Tested-by: Vlad Buslov Cc: Vlad Buslov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 9decbb74b3ac..244a8cf48183 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -265,11 +265,8 @@ static const char *ife_meta_id2name(u32 metaid) #endif /* called when adding new meta information - * under ife->tcf_lock for existing action */ -static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len, bool exists, - bool rtnl_held) +static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -277,15 +274,11 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - if (exists) - spin_unlock_bh(&ife->tcf_lock); if (rtnl_held) rtnl_unlock(); request_module("ife-meta-%s", ife_meta_id2name(metaid)); if (rtnl_held) rtnl_lock(); - if (exists) - spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -302,10 +295,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic) + int len, bool atomic, bool exists) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -332,12 +324,16 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, } } + if (exists) + spin_lock_bh(&ife->tcf_lock); list_add_tail(&mi->metalist, &ife->metalist); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return ret; } -static int use_all_metadata(struct tcf_ife_info *ife) +static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; int rc = 0; @@ -345,7 +341,7 @@ static int use_all_metadata(struct tcf_ife_info *ife) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, true); + rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); if (rc == 0) installed += 1; } @@ -422,7 +418,6 @@ static void tcf_ife_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } -/* under ife->tcf_lock for existing action */ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, bool exists, bool rtnl_held) { @@ -436,12 +431,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len, exists, - rtnl_held); + rc = load_metaops_and_vet(i, val, len, rtnl_held); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len, exists); + rc = add_metainfo(ife, i, val, len, false, exists); if (rc) return rc; } @@ -540,8 +534,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, p->eth_type = ife_type; } - if (exists) - spin_lock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&ife->metalist); @@ -551,10 +543,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, NULL, NULL); if (err) { metadata_parse_err: - if (exists) - spin_unlock_bh(&ife->tcf_lock); tcf_idr_release(*a, bind); - kfree(p); return err; } @@ -569,17 +558,16 @@ metadata_parse_err: * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife); + err = use_all_metadata(ife, exists); if (err) { - if (exists) - spin_unlock_bh(&ife->tcf_lock); tcf_idr_release(*a, bind); - kfree(p); return err; } } + if (exists) + spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; /* protected by tcf_lock when modifying existing action */ rcu_swap_protected(ife->params, p, 1); -- cgit v1.2.3 From 5ffe57da29b3802baeddaa40909682bbb4cb4d48 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:13 -0700 Subject: act_ife: fix a potential deadlock use_all_metadata() acquires read_lock(&ife_mod_lock), then calls add_metainfo() which calls find_ife_oplist() which acquires the same lock again. Deadlock! Introduce __add_metainfo() which accepts struct tcf_meta_ops *ops as an additional parameter and let its callers to decide how to find it. For use_all_metadata(), it already has ops, no need to find it again, just call __add_metainfo() directly. And, as ife_mod_lock is only needed for find_ife_oplist(), this means we can make non-atomic allocation for populate_metalist() now. Fixes: 817e9f2c5c26 ("act_ife: acquire ife_mod_lock before reading ifeoplist") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 244a8cf48183..196430aefe87 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -296,22 +296,16 @@ static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held) /* called when adding new meta information */ -static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic, bool exists) +static int __add_metainfo(const struct tcf_meta_ops *ops, + struct tcf_ife_info *ife, u32 metaid, void *metaval, + int len, bool atomic, bool exists) { struct tcf_meta_info *mi = NULL; - struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; - if (!ops) - return -ENOENT; - mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); - if (!mi) { - /*put back what find_ife_oplist took */ - module_put(ops->owner); + if (!mi) return -ENOMEM; - } mi->metaid = metaid; mi->ops = ops; @@ -319,7 +313,6 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); - module_put(ops->owner); return ret; } } @@ -333,6 +326,21 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, return ret; } +static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, + int len, bool exists) +{ + const struct tcf_meta_ops *ops = find_ife_oplist(metaid); + int ret; + + if (!ops) + return -ENOENT; + ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists); + if (ret) + /*put back what find_ife_oplist took */ + module_put(ops->owner); + return ret; +} + static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; @@ -341,7 +349,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); + rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists); if (rc == 0) installed += 1; } @@ -435,7 +443,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len, false, exists); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } -- cgit v1.2.3 From 96c26e04581667e3cd17ed74c2fc3499afea49b8 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Mon, 20 Aug 2018 09:54:25 +0900 Subject: xsk: fix return value of xdp_umem_assign_dev() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit s/ENOTSUPP/EOPNOTSUPP/ in function umem_assign_dev(). This function's return value is directly returned by xsk_bind(). EOPNOTSUPP is bind()'s possible return value. Fixes: f734607e819b ("xsk: refactor xdp_umem_assign_dev()") Signed-off-by: Prashant Bhole Acked-by: Song Liu Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 911ca6d3cb5a..bfe2dbea480b 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -74,14 +74,14 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, return 0; if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit) - return force_zc ? -ENOTSUPP : 0; /* fail or fallback */ + return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */ bpf.command = XDP_QUERY_XSK_UMEM; rtnl_lock(); err = xdp_umem_query(dev, queue_id); if (err) { - err = err < 0 ? -ENOTSUPP : -EBUSY; + err = err < 0 ? -EOPNOTSUPP : -EBUSY; goto err_rtnl_unlock; } -- cgit v1.2.3 From 67db7cd249e71f64346f481b629724376d063e08 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 22 Aug 2018 08:37:32 -0700 Subject: tls: possible hang when do_tcp_sendpages hits sndbuf is full case Currently, the lower protocols sk_write_space handler is not called if TLS is sending a scatterlist via tls_push_sg. However, normally tls_push_sg calls do_tcp_sendpage, which may be under memory pressure, that in turn may trigger a wait via sk_wait_event. Typically, this happens when the in-flight bytes exceed the sdnbuf size. In the normal case when enough ACKs are received sk_write_space() will be called and the sk_wait_event will be woken up allowing it to send more data and/or return to the user. But, in the TLS case because the sk_write_space() handler does not wake up the events the above send will wait until the sndtimeo is exceeded. By default this is MAX_SCHEDULE_TIMEOUT so it look like a hang to the user (especially this impatient user). To fix this pass the sk_write_space event to the lower layers sk_write_space event which in the TCP case will wake any pending events. I observed the above while integrating sockmap and ktls. It initially appeared as test_sockmap (modified to use ktls) occasionally hanging. To reliably reproduce this reduce the sndbuf size and stress the tls layer by sending many 1B sends. This results in every byte needing a header and each byte individually being sent to the crypto layer. Signed-off-by: John Fastabend Acked-by: Dave Watson Signed-off-by: Daniel Borkmann --- net/tls/tls_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 93c0c225ab34..180b6640e531 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -213,9 +213,14 @@ static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); - /* We are already sending pages, ignore notification */ - if (ctx->in_tcp_sendpages) + /* If in_tcp_sendpages call lower protocol write space handler + * to ensure we wake up any waiting operations there. For example + * if do_tcp_sendpages where to call sk_wait_event. + */ + if (ctx->in_tcp_sendpages) { + ctx->sk_write_space(sk); return; + } if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) { gfp_t sk_allocation = sk->sk_allocation; -- cgit v1.2.3 From 3d0371b313b84ba7c16ebf2526a7a34f1c57b19e Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Wed, 22 Aug 2018 14:57:44 +1000 Subject: net/ncsi: Fixup .dumpit message flags and ID check in Netlink handler The ncsi_pkg_info_all_nl() .dumpit handler is missing the NLM_F_MULTI flag, causing additional package information after the first to be lost. Also fixup a sanity check in ncsi_write_package_info() to reject out of range package IDs. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 82e6edf9c5d9..45f33d6dedf7 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -100,7 +100,7 @@ static int ncsi_write_package_info(struct sk_buff *skb, bool found; int rc; - if (id > ndp->package_num) { + if (id > ndp->package_num - 1) { netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id); return -ENODEV; } @@ -240,7 +240,7 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, return 0; /* done */ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO); + &ncsi_genl_family, NLM_F_MULTI, NCSI_CMD_PKG_INFO); if (!hdr) { rc = -EMSGSIZE; goto err; -- cgit v1.2.3 From 93cfb6c17690c465509967aeb237717d10513a88 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 22 Aug 2018 12:29:43 +0200 Subject: sch_cake: Fix TC filter flow override and expand it to hosts as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TC filter flow mapping override completely skipped the call to cake_hash(); however that meant that the internal state was not being updated, which ultimately leads to deadlocks in some configurations. Fix that by passing the overridden flow ID into cake_hash() instead so it can react appropriately. In addition, the major number of the class ID can now be set to override the host mapping in host isolation mode. If both host and flow are overridden (or if the respective modes are disabled), flow dissection and hashing will be skipped entirely; otherwise, the hashing will be kept for the portions that are not set by the filter. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 4d26b0823cdf..c07c30b916d5 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -620,15 +620,20 @@ static bool cake_ddst(int flow_mode) } static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, - int flow_mode) + int flow_mode, u16 flow_override, u16 host_override) { - u32 flow_hash = 0, srchost_hash, dsthost_hash; + u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0; u16 reduced_hash, srchost_idx, dsthost_idx; struct flow_keys keys, host_keys; if (unlikely(flow_mode == CAKE_FLOW_NONE)) return 0; + /* If both overrides are set we can skip packet dissection entirely */ + if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) && + (host_override || !(flow_mode & CAKE_FLOW_HOSTS))) + goto skip_hash; + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@ -675,6 +680,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, if (flow_mode & CAKE_FLOW_FLOWS) flow_hash = flow_hash_from_keys(&keys); +skip_hash: + if (flow_override) + flow_hash = flow_override - 1; + if (host_override) { + dsthost_hash = host_override - 1; + srchost_hash = host_override - 1; + } + if (!(flow_mode & CAKE_FLOW_FLOWS)) { if (flow_mode & CAKE_FLOW_SRC_IP) flow_hash ^= srchost_hash; @@ -1570,7 +1583,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, struct cake_sched_data *q = qdisc_priv(sch); struct tcf_proto *filter; struct tcf_result res; - u32 flow = 0; + u16 flow = 0, host = 0; int result; filter = rcu_dereference_bh(q->filter_list); @@ -1594,10 +1607,12 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, #endif if (TC_H_MIN(res.classid) <= CAKE_QUEUES) flow = TC_H_MIN(res.classid); + if (TC_H_MAJ(res.classid) <= (CAKE_QUEUES << 16)) + host = TC_H_MAJ(res.classid) >> 16; } hash: *t = cake_select_tin(sch, skb); - return flow ?: cake_hash(*t, skb, flow_mode) + 1; + return cake_hash(*t, skb, flow_mode, flow, host) + 1; } static void cake_reconfigure(struct Qdisc *sch); -- cgit v1.2.3 From e500c6d349f7f36120886841c6f057ce248b48b2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 Aug 2018 12:58:34 -0700 Subject: addrconf: reduce unnecessary atomic allocations All the 3 callers of addrconf_add_mroute() assert RTNL lock, they don't take any additional lock either, so it is safe to convert it to GFP_KERNEL. Same for sit_add_v4_addrs(). Cc: David Ahern Signed-off-by: Cong Wang Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2fac4ad74867..d51a8c0b3372 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2398,7 +2398,7 @@ static void addrconf_add_mroute(struct net_device *dev) ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - ip6_route_add(&cfg, GFP_ATOMIC, NULL); + ip6_route_add(&cfg, GFP_KERNEL, NULL); } static struct inet6_dev *addrconf_add_dev(struct net_device *dev) @@ -3062,7 +3062,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) if (addr.s6_addr32[3]) { add_addr(idev, &addr, plen, scope); addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags, - GFP_ATOMIC); + GFP_KERNEL); return; } @@ -3087,7 +3087,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) add_addr(idev, &addr, plen, flag); addrconf_prefix_route(&addr, plen, 0, idev->dev, - 0, pflags, GFP_ATOMIC); + 0, pflags, GFP_KERNEL); } } } -- cgit v1.2.3 From 431280eebed9f5079553daf003011097763e71fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Aug 2018 13:30:45 -0700 Subject: ipv4: tcp: send zero IPID for RST and ACK sent in SYN-RECV and TIME-WAIT state tcp uses per-cpu (and per namespace) sockets (net->ipv4.tcp_sk) internally to send some control packets. 1) RST packets, through tcp_v4_send_reset() 2) ACK packets in SYN-RECV and TIME-WAIT state, through tcp_v4_send_ack() These packets assert IP_DF, and also use the hashed IP ident generator to provide an IPv4 ID number. Geoff Alexander reported this could be used to build off-path attacks. These packets should not be fragmented, since their size is smaller than IPV4_MIN_MTU. Only some tunneled paths could eventually have to fragment, regardless of inner IPID. We really can use zero IPID, to address the flaw, and as a bonus, avoid a couple of atomic operations in ip_idents_reserve() Signed-off-by: Eric Dumazet Reported-by: Geoff Alexander Tested-by: Geoff Alexander Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9e041fa5c545..44c09eddbb78 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2517,6 +2517,12 @@ static int __net_init tcp_sk_init(struct net *net) if (res) goto fail; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + /* Please enforce IP_DF and IPID==0 for RST and + * ACK sent in SYN-RECV and TIME-WAIT state. + */ + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } -- cgit v1.2.3 From fb99886224294b2291d267da41395022fa4200e2 Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Wed, 22 Aug 2018 17:43:14 -0400 Subject: tcp_bbr: add bbr_check_probe_rtt_done() helper This patch add a helper function bbr_check_probe_rtt_done() to 1. check the condition to see if bbr should exit probe_rtt mode; 2. process the logic of exiting probe_rtt mode. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Kevin Yang Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 13d34427ca3d..fd7bccf36a26 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -95,11 +95,10 @@ struct bbr { u32 mode:3, /* current bbr_mode in state machine */ prev_ca_state:3, /* CA state on previous ACK */ packet_conservation:1, /* use packet conservation? */ - restore_cwnd:1, /* decided to revert cwnd to old value */ round_start:1, /* start of packet-timed tx->ack round? */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ - unused:12, + unused:13, lt_is_sampling:1, /* taking long-term ("LT") samples now? */ lt_rtt_cnt:7, /* round trips in long-term interval */ lt_use_bw:1; /* use lt_bw as our bw estimate? */ @@ -396,17 +395,11 @@ static bool bbr_set_cwnd_to_recover_or_restore( cwnd = tcp_packets_in_flight(tp) + acked; } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { /* Exiting loss recovery; restore cwnd saved before recovery. */ - bbr->restore_cwnd = 1; + cwnd = max(cwnd, bbr->prior_cwnd); bbr->packet_conservation = 0; } bbr->prev_ca_state = state; - if (bbr->restore_cwnd) { - /* Restore cwnd after exiting loss recovery or PROBE_RTT. */ - cwnd = max(cwnd, bbr->prior_cwnd); - bbr->restore_cwnd = 0; - } - if (bbr->packet_conservation) { *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); return true; /* yes, using packet conservation */ @@ -748,6 +741,20 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ } +static void bbr_check_probe_rtt_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (!(bbr->probe_rtt_done_stamp && + after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) + return; + + bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr_reset_mode(sk); +} + /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and * periodically drain the bottleneck queue, to converge to measure the true * min_rtt (unloaded propagation delay). This allows the flows to keep queues @@ -806,12 +813,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) } else if (bbr->probe_rtt_done_stamp) { if (bbr->round_start) bbr->probe_rtt_round_done = 1; - if (bbr->probe_rtt_round_done && - after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) { - bbr->min_rtt_stamp = tcp_jiffies32; - bbr->restore_cwnd = 1; /* snap to prior_cwnd */ - bbr_reset_mode(sk); - } + if (bbr->probe_rtt_round_done) + bbr_check_probe_rtt_done(sk); } } /* Restart after idle ends only once we process a new S/ACK for data */ @@ -862,7 +865,6 @@ static void bbr_init(struct sock *sk) bbr->has_seen_rtt = 0; bbr_init_pacing_rate_from_rtt(sk); - bbr->restore_cwnd = 0; bbr->round_start = 0; bbr->idle_restart = 0; bbr->full_bw_reached = 0; -- cgit v1.2.3 From 5490b32dce6932ea7ee8e3b2f76db2957c92af6e Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Wed, 22 Aug 2018 17:43:15 -0400 Subject: tcp_bbr: in restart from idle, see if we should exit PROBE_RTT This patch fix the case where BBR does not exit PROBE_RTT mode when it restarts from idle. When BBR restarts from idle and if BBR is in PROBE_RTT mode, BBR should check if it's time to exit PROBE_RTT. If yes, then BBR should exit PROBE_RTT mode and restore the cwnd to its full value. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Kevin Yang Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index fd7bccf36a26..1d4bdd3b5e4d 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -174,6 +174,8 @@ static const u32 bbr_lt_bw_diff = 4000 / 8; /* If we estimate we're policed, use lt_bw for this many round trips: */ static const u32 bbr_lt_bw_max_rtts = 48; +static void bbr_check_probe_rtt_done(struct sock *sk); + /* Do we estimate that STARTUP filled the pipe? */ static bool bbr_full_bw_reached(const struct sock *sk) { @@ -308,6 +310,8 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) */ if (bbr->mode == BBR_PROBE_BW) bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + else if (bbr->mode == BBR_PROBE_RTT) + bbr_check_probe_rtt_done(sk); } } -- cgit v1.2.3 From 8e995bf14fdb7e33681d5c3312b602fa342b878a Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Wed, 22 Aug 2018 17:43:16 -0400 Subject: tcp_bbr: apply PROBE_RTT cwnd cap even if acked==0 This commit fixes a corner case where TCP BBR would enter PROBE_RTT mode but not reduce its cwnd. If a TCP receiver ACKed less than one full segment, the number of delivered/acked packets was 0, so that bbr_set_cwnd() would short-circuit and exit early, without cutting cwnd to the value we want for PROBE_RTT. The fix is to instead make sure that even when 0 full packets are ACKed, we do apply all the appropriate caps, including the cap that applies in PROBE_RTT mode. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Kevin Yang Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 1d4bdd3b5e4d..02ff2dde9609 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -420,10 +420,10 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd = 0, target_cwnd = 0; + u32 cwnd = tp->snd_cwnd, target_cwnd = 0; if (!acked) - return; + goto done; /* no packet fully ACKed; just apply caps */ if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) goto done; -- cgit v1.2.3 From d23c4b6336ef30898dcdff351f21e633e7a64930 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 23 Aug 2018 11:31:37 +0800 Subject: net/ipv6: init ip6 anycast rt->dst.input as ip6_input Commit 6edb3c96a5f02 ("net/ipv6: Defer initialization of dst to data path") forgot to handle anycast route and init anycast rt->dst.input to ip6_forward. Fix it by setting anycast rt->dst.input back to ip6_input. Fixes: 6edb3c96a5f02 ("net/ipv6: Defer initialization of dst to data path") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7208c16302f6..c4ea13e8360b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -956,7 +956,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) rt->dst.error = 0; rt->dst.output = ip6_output; - if (ort->fib6_type == RTN_LOCAL) { + if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) { rt->dst.input = ip6_input; } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { rt->dst.input = ip6_mc_input; -- cgit v1.2.3 From 98c8f125fd8a6240ea343c1aa50a1be9047791b8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 25 Aug 2018 22:58:01 -0700 Subject: net: sched: Fix memory exposure from short TCA_U32_SEL Via u32_change(), TCA_U32_SEL has an unspecified type in the netlink policy, so max length isn't enforced, only minimum. This means nkeys (from userspace) was being trusted without checking the actual size of nla_len(), which could lead to a memory over-read, and ultimately an exposure via a call to u32_dump(). Reachability is CAP_NET_ADMIN within a namespace. Reported-by: Al Viro Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d5d2a6dc3921..f218ccf1e2d9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -914,6 +914,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_U32_MAX + 1]; u32 htid, flags = 0; + size_t sel_size; int err; #ifdef CONFIG_CLS_U32_PERF size_t size; @@ -1076,8 +1077,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } s = nla_data(tb[TCA_U32_SEL]); + sel_size = struct_size(s, keys, s->nkeys); + if (nla_len(tb[TCA_U32_SEL]) < sel_size) { + err = -EINVAL; + goto erridr; + } - n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); + n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL); if (n == NULL) { err = -ENOBUFS; goto erridr; @@ -1092,7 +1098,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); + memcpy(&n->sel, s, sel_size); RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; -- cgit v1.2.3