From 39215846740a9f29ac7dac276f9df98135f39bb0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 7 Nov 2017 07:20:07 +0100 Subject: netfilter: conntrack: remove nlattr_size pointer from l4proto trackers similar to previous commit, but instead compute this at compile time and turn nlattr_size into an u16. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7ef56c13698a..0e5618ec8b9d 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,6 +27,9 @@ struct nf_conntrack_l4proto { /* Resolve clashes on insertion races. */ bool allow_clash; + /* protoinfo nlattr size, closes a hole */ + u16 nlattr_size; + /* Try to fill in the third arg: dataoff is offset past network protocol hdr. Return true if possible. */ bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, @@ -66,8 +69,6 @@ struct nf_conntrack_l4proto { /* convert protoinfo to nfnetink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct); - /* Calculate protoinfo nlattr size */ - int (*nlattr_size)(void); /* convert nfnetlink attributes to protoinfo */ int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct); @@ -80,8 +81,6 @@ struct nf_conntrack_l4proto { struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; - size_t nla_size; - #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct { int (*nlattr_to_obj)(struct nlattr *tb[], -- cgit v1.2.3 From cd9ceafc0a761a15b4cbfe6c0024edf88f861d66 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 7 Nov 2017 07:20:08 +0100 Subject: netfilter: conntrack: constify list of builtin trackers Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 10 +++++----- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/netfilter/nf_conntrack_proto.c | 12 ++++++------ 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 0e5618ec8b9d..7fbb8f64a96e 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -125,18 +125,18 @@ int nf_ct_l4proto_pernet_register_one(struct net *net, void nf_ct_l4proto_pernet_unregister_one(struct net *net, const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *const proto[], + const struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *const proto[], + const struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); /* Protocol global registration. */ -int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], +int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[], unsigned int num_proto); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], +void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[], unsigned int num_proto); /* Generic netlink helpers */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 89af9d88ca21..bb2c868a5621 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -368,7 +368,7 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); MODULE_ALIAS("ip_conntrack"); MODULE_LICENSE("GPL"); -static struct nf_conntrack_l4proto *builtin_l4proto4[] = { +static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = { &nf_conntrack_l4proto_tcp4, &nf_conntrack_l4proto_udp4, &nf_conntrack_l4proto_icmp, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 3b80a38f62b8..7340ca7cc362 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -368,7 +368,7 @@ static struct nf_sockopt_ops so_getorigdst6 = { .owner = THIS_MODULE, }; -static struct nf_conntrack_l4proto *builtin_l4proto6[] = { +static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = { &nf_conntrack_l4proto_tcp6, &nf_conntrack_l4proto_udp6, &nf_conntrack_l4proto_icmpv6, diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 19c3b1b84544..afdeca53e88b 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -385,7 +385,7 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto) { int ret = 0; @@ -498,7 +498,7 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], +int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], unsigned int num_proto) { int ret = -EINVAL, ver; @@ -520,7 +520,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *const l4proto[], + const struct nf_conntrack_l4proto *const l4proto[], unsigned int num_proto) { int ret = -EINVAL; @@ -541,7 +541,7 @@ int nf_ct_l4proto_pernet_register(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], +void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], unsigned int num_proto) { mutex_lock(&nf_ct_proto_mutex); @@ -551,12 +551,12 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], synchronize_net(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_destroy(kill_l4proto, l4proto); + nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *const l4proto[], + const struct nf_conntrack_l4proto *const l4proto[], unsigned int num_proto) { while (num_proto-- != 0) -- cgit v1.2.3 From 9dae47aba0a055f761176d9297371d5bb24289ec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 7 Nov 2017 07:20:09 +0100 Subject: netfilter: conntrack: l4 protocol trackers can be const previous patches removed all writes to these structs so we can now mark them as const. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 12 ++++++------ include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 12 ++++++------ include/net/netfilter/nf_conntrack_l4proto.h | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_proto_dccp.c | 4 ++-- net/netfilter/nf_conntrack_proto_generic.c | 2 +- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 4 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udp.c | 8 ++++---- 11 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 4ed1040bbe4a..73f825732326 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -13,17 +13,17 @@ const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; #endif #ifdef CONFIG_NF_CT_PROTO_SCTP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; #endif int nf_conntrack_ipv4_compat_init(void); diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index 9cd55be95853..effa8dfba68c 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -4,17 +4,17 @@ extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; #ifdef CONFIG_NF_CT_PROTO_DCCP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; #endif #ifdef CONFIG_NF_CT_PROTO_SCTP -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; #endif #include diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7fbb8f64a96e..a7220eef9aee 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -108,7 +108,7 @@ struct nf_conntrack_l4proto { }; /* Existing built-in generic protocol */ -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO 256 diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 1849fedd9b81..669e586b6b8f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -351,7 +351,7 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.icmp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = { .l3proto = PF_INET, .l4proto = IPPROTO_ICMP, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 3ac0d826afc4..75a85e35a16b 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -352,7 +352,7 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.icmpv6.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_ICMPV6, diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 2fee7c96ec09..abe647d5b8c6 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -861,7 +861,7 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.dccp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = { .l3proto = AF_INET, .l4proto = IPPROTO_DCCP, .pkt_to_tuple = dccp_pkt_to_tuple, @@ -897,7 +897,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = { .l3proto = AF_INET6, .l4proto = IPPROTO_DCCP, .pkt_to_tuple = dccp_pkt_to_tuple, diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 1f86ddf6649a..f2d22442c89e 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -163,7 +163,7 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.generic.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = { .l3proto = PF_UNSPEC, .l4proto = 255, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a2503005d80b..a881c074a43a 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -352,7 +352,7 @@ static int gre_init_net(struct net *net, u_int16_t proto) } /* protocol helper struct */ -static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { +static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { .l3proto = AF_INET, .l4proto = IPPROTO_GRE, .pkt_to_tuple = gre_pkt_to_tuple, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index f5bff4de0386..69eaaca6f933 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -777,7 +777,7 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.sctp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, .pkt_to_tuple = sctp_pkt_to_tuple, @@ -814,7 +814,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, .pkt_to_tuple = sctp_pkt_to_tuple, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 9875a3623676..44a6038f99bc 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1541,7 +1541,7 @@ static struct nf_proto_net *tcp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.tcp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = { .l3proto = PF_INET, .l4proto = IPPROTO_TCP, @@ -1579,7 +1579,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_TCP, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 3a5f727103af..59a20f61c364 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -296,7 +296,7 @@ static struct nf_proto_net *udp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.udp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = { .l3proto = PF_INET, .l4proto = IPPROTO_UDP, @@ -328,7 +328,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); #ifdef CONFIG_NF_CT_PROTO_UDPLITE -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 = { .l3proto = PF_INET, .l4proto = IPPROTO_UDPLITE, @@ -360,7 +360,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); #endif -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDP, @@ -392,7 +392,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); #ifdef CONFIG_NF_CT_PROTO_UDPLITE -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDPLITE, -- cgit v1.2.3 From 6b3d933000cbe539e5b234d639b083da60bb275c Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 13 Nov 2017 22:58:18 +0800 Subject: netfilter: ipvs: Remove useless ipvsh param of frag_safe_skb_hp The param of frag_safe_skb_hp, ipvsh, isn't used now. So remove it and update the callers' codes too. Signed-off-by: Gao Feng Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 3 +-- net/netfilter/ipvs/ip_vs_conn.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 12 ++++++------ 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ff68cf288f9b..eb0bec043c96 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -69,8 +69,7 @@ struct ip_vs_iphdr { }; static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, - int len, void *buffer, - const struct ip_vs_iphdr *ipvsh) + int len, void *buffer) { return skb_header_pointer(skb, offset, len, buffer); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 3e053cb30070..f489b8db2406 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -322,7 +322,7 @@ ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs, { __be16 _ports[2], *pptr; - pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (pptr == NULL) return 1; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5cb7cac9177d..5f6f73cf2174 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -433,7 +433,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, /* * IPv6 frags, only the first hit here. */ - pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (pptr == NULL) return NULL; @@ -566,7 +566,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct netns_ipvs *ipvs = svc->ipvs; struct net *net = ipvs->net; - pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (!pptr) return NF_DROP; dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0]; @@ -982,7 +982,7 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, unsigned int offset; *related = 1; - ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); + ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; @@ -1214,7 +1214,7 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, return NULL; pptr = frag_safe_skb_hp(skb, iph->len, - sizeof(_ports), _ports, iph); + sizeof(_ports), _ports); if (!pptr) return NULL; @@ -1407,7 +1407,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in __be16 _ports[2], *pptr; pptr = frag_safe_skb_hp(skb, iph.len, - sizeof(_ports), _ports, &iph); + sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr, @@ -1741,7 +1741,7 @@ static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, *related = 1; - ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph); + ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; -- cgit v1.2.3 From 26888dfd7e7454686b8d3ea9ba5045d5f236e4d7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Dec 2017 00:21:03 +0100 Subject: netfilter: core: remove synchronize_net call if nfqueue is used since commit 960632ece6949b ("netfilter: convert hook list to an array") nfqueue no longer stores a pointer to the hook that caused the packet to be queued. Therefore no extra synchronize_net() call is needed after dropping the packets enqueued by the old rule blob. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 2 +- net/netfilter/core.c | 6 +----- net/netfilter/nf_internals.h | 2 +- net/netfilter/nf_queue.c | 7 ++----- net/netfilter/nfnetlink_queue.c | 9 ++------- 5 files changed, 7 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 814058d0f167..a50a69f5334c 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -25,7 +25,7 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); - unsigned int (*nf_hook_drop)(struct net *net); + void (*nf_hook_drop)(struct net *net); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index d39bb2c583dc..9a84b6cb99e6 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -341,7 +341,6 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { struct nf_hook_entries __rcu **pp; struct nf_hook_entries *p; - unsigned int nfq; pp = nf_hook_entry_head(net, reg); if (!pp) @@ -364,10 +363,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) synchronize_net(); - /* other cpu might still process nfqueue verdict that used reg */ - nfq = nf_queue_nf_hook_drop(net); - if (nfq) - synchronize_net(); + nf_queue_nf_hook_drop(net); kvfree(p); } EXPORT_SYMBOL(nf_unregister_net_hook); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 44284cd2528d..18f6d7ae995b 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -10,7 +10,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, const struct nf_hook_entries *entries, unsigned int index, unsigned int verdict); -unsigned int nf_queue_nf_hook_drop(struct net *net); +void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ int __init netfilter_log_init(void); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index f7e21953b1de..4e42a4a68a0b 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -96,18 +96,15 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); -unsigned int nf_queue_nf_hook_drop(struct net *net) +void nf_queue_nf_hook_drop(struct net *net) { const struct nf_queue_handler *qh; - unsigned int count = 0; rcu_read_lock(); qh = rcu_dereference(net->nf.queue_handler); if (qh) - count = qh->nf_hook_drop(net); + qh->nf_hook_drop(net); rcu_read_unlock(); - - return count; } EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index c09b36755ed7..2db35f2d553d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -941,23 +941,18 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; -static unsigned int nfqnl_nf_hook_drop(struct net *net) +static void nfqnl_nf_hook_drop(struct net *net) { struct nfnl_queue_net *q = nfnl_queue_pernet(net); - unsigned int instances = 0; int i; for (i = 0; i < INSTANCE_BUCKETS; i++) { struct nfqnl_instance *inst; struct hlist_head *head = &q->instance_table[i]; - hlist_for_each_entry_rcu(inst, head, hlist) { + hlist_for_each_entry_rcu(inst, head, hlist) nfqnl_flush(inst, NULL, 0); - instances++; - } } - - return instances; } static int -- cgit v1.2.3 From b0f38338aef2dae5ade3c16acf713737e3b15a73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 Dec 2017 00:58:47 +0100 Subject: netfilter: reduce size of hook entry point locations struct net contains: struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; which store the hook entry point locations for the various protocol families and the hooks. Using array results in compact c code when doing accesses, i.e. x = rcu_dereference(net->nf.hooks[pf][hook]); but its also wasting a lot of memory, as most families are not used. So split the array into those families that are used, which are only 5 (instead of 13). In most cases, the 'pf' argument is constant, i.e. gcc removes switch statement. struct net before: /* size: 5184, cachelines: 81, members: 46 */ after: /* size: 4672, cachelines: 73, members: 46 */ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 24 ++++++++++++++++++++++-- include/net/netns/netfilter.h | 6 +++++- net/bridge/br_netfilter_hooks.c | 2 +- net/netfilter/core.c | 38 ++++++++++++++++++++++++++++++-------- net/netfilter/nf_queue.c | 21 +++++++++++++++++++-- 5 files changed, 77 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 792f6d535707..9dcbcdfa3b82 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -195,7 +195,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct nf_hook_entries *hook_head; + struct nf_hook_entries *hook_head = NULL; int ret = 1; #ifdef HAVE_JUMP_LABEL @@ -206,7 +206,27 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, #endif rcu_read_lock(); - hook_head = rcu_dereference(net->nf.hooks[pf][hook]); + switch (pf) { + case NFPROTO_IPV4: + hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); + break; + case NFPROTO_IPV6: + hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); + break; + case NFPROTO_ARP: + hook_head = rcu_dereference(net->nf.hooks_arp[hook]); + break; + case NFPROTO_BRIDGE: + hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); + break; + case NFPROTO_DECNET: + hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); + break; + default: + WARN_ON_ONCE(1); + break; + } + if (hook_head) { struct nf_hook_state state; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cc00af2ac2d7..b39c563c2fce 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -17,7 +17,11 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks_ipv4[NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks_ipv6[NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks_arp[NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks_bridge[NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks_decnet[NF_MAX_HOOKS]; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) bool defrag_ipv4; #endif diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index c2eea1b8737a..27f1d4f2114a 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -991,7 +991,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, unsigned int i; int ret; - e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); + e = rcu_dereference(net->nf.hooks_bridge[hook]); if (!e) return okfn(net, sk, skb); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 6921f9f1cc81..a6eaaf303be8 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -264,8 +264,23 @@ out_assign: static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) { - if (reg->pf != NFPROTO_NETDEV) - return net->nf.hooks[reg->pf]+reg->hooknum; + switch (reg->pf) { + case NFPROTO_NETDEV: + break; + case NFPROTO_ARP: + return net->nf.hooks_arp + reg->hooknum; + case NFPROTO_BRIDGE: + return net->nf.hooks_bridge + reg->hooknum; + case NFPROTO_IPV4: + return net->nf.hooks_ipv4 + reg->hooknum; + case NFPROTO_IPV6: + return net->nf.hooks_ipv6 + reg->hooknum; + case NFPROTO_DECNET: + return net->nf.hooks_decnet + reg->hooknum; + default: + WARN_ON_ONCE(1); + return NULL; + } #ifdef CONFIG_NETFILTER_INGRESS if (reg->hooknum == NF_NETDEV_INGRESS) { @@ -534,14 +549,21 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif -static int __net_init netfilter_net_init(struct net *net) +static void __net_init __netfilter_net_init(struct nf_hook_entries *e[NF_MAX_HOOKS]) { - int i, h; + int h; - for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { - for (h = 0; h < NF_MAX_HOOKS; h++) - RCU_INIT_POINTER(net->nf.hooks[i][h], NULL); - } + for (h = 0; h < NF_MAX_HOOKS; h++) + RCU_INIT_POINTER(e[h], NULL); +} + +static int __net_init netfilter_net_init(struct net *net) +{ + __netfilter_net_init(net->nf.hooks_ipv4); + __netfilter_net_init(net->nf.hooks_ipv6); + __netfilter_net_init(net->nf.hooks_arp); + __netfilter_net_init(net->nf.hooks_bridge); + __netfilter_net_init(net->nf.hooks_decnet); #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 4e42a4a68a0b..836aeb08686e 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -201,6 +201,23 @@ repeat: return NF_ACCEPT; } +static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum) +{ + switch (pf) { + case NFPROTO_BRIDGE: + return rcu_dereference(net->nf.hooks_bridge[hooknum]); + case NFPROTO_IPV4: + return rcu_dereference(net->nf.hooks_ipv4[hooknum]); + case NFPROTO_IPV6: + return rcu_dereference(net->nf.hooks_ipv6[hooknum]); + default: + WARN_ON_ONCE(1); + return NULL; + } + + return NULL; +} + /* Caller must hold rcu read-side lock */ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { @@ -216,12 +233,12 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) net = entry->state.net; pf = entry->state.pf; - hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]); + hooks = nf_hook_entries_head(net, pf, entry->state.hook); nf_queue_entry_release_refs(entry); i = entry->hook_index; - if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) { + if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { kfree_skb(skb); kfree(entry); return; -- cgit v1.2.3 From ef57170bbfdd6958281011332b1fd237712f69f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Dec 2017 16:28:24 +0100 Subject: netfilter: reduce hook array sizes to what is needed Not all families share the same hook count, adjust sizes to what is needed. struct net before: /* size: 6592, cachelines: 103, members: 46 */ after: /* size: 5952, cachelines: 93, members: 46 */ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/netfilter.h | 10 +++++----- net/netfilter/core.c | 24 +++++++++++++++++------- 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index b39c563c2fce..8f756a4b9205 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -17,11 +17,11 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct nf_hook_entries __rcu *hooks_ipv4[NF_MAX_HOOKS]; - struct nf_hook_entries __rcu *hooks_ipv6[NF_MAX_HOOKS]; - struct nf_hook_entries __rcu *hooks_arp[NF_MAX_HOOKS]; - struct nf_hook_entries __rcu *hooks_bridge[NF_MAX_HOOKS]; - struct nf_hook_entries __rcu *hooks_decnet[NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS]; + struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS]; + struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS]; + struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS]; + struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) bool defrag_ipv4; #endif diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a6eaaf303be8..43643427b560 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -268,14 +268,24 @@ static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const case NFPROTO_NETDEV: break; case NFPROTO_ARP: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= reg->hooknum)) + return NULL; return net->nf.hooks_arp + reg->hooknum; case NFPROTO_BRIDGE: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= reg->hooknum)) + return NULL; return net->nf.hooks_bridge + reg->hooknum; case NFPROTO_IPV4: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= reg->hooknum)) + return NULL; return net->nf.hooks_ipv4 + reg->hooknum; case NFPROTO_IPV6: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= reg->hooknum)) + return NULL; return net->nf.hooks_ipv6 + reg->hooknum; case NFPROTO_DECNET: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= reg->hooknum)) + return NULL; return net->nf.hooks_decnet + reg->hooknum; default: WARN_ON_ONCE(1); @@ -549,21 +559,21 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif -static void __net_init __netfilter_net_init(struct nf_hook_entries *e[NF_MAX_HOOKS]) +static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max) { int h; - for (h = 0; h < NF_MAX_HOOKS; h++) + for (h = 0; h < max; h++) RCU_INIT_POINTER(e[h], NULL); } static int __net_init netfilter_net_init(struct net *net) { - __netfilter_net_init(net->nf.hooks_ipv4); - __netfilter_net_init(net->nf.hooks_ipv6); - __netfilter_net_init(net->nf.hooks_arp); - __netfilter_net_init(net->nf.hooks_bridge); - __netfilter_net_init(net->nf.hooks_decnet); + __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4)); + __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6)); + __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp)); + __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); + __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet)); #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", -- cgit v1.2.3 From bb4badf3a3dc81190f7c1c1fa063cdefb18df45f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Dec 2017 16:28:25 +0100 Subject: netfilter: don't allocate space for decnet hooks unless needed no need to define hook points if the family isn't supported. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 ++ include/net/netns/netfilter.h | 2 ++ net/netfilter/core.c | 4 ++++ 3 files changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9dcbcdfa3b82..ce4e91df8b56 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -219,9 +219,11 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, case NFPROTO_BRIDGE: hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); break; +#if IS_ENABLED(CONFIG_DECNET) case NFPROTO_DECNET: hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); break; +#endif default: WARN_ON_ONCE(1); break; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 8f756a4b9205..432609fd9899 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -21,7 +21,9 @@ struct netns_nf { struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS]; struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS]; struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS]; +#if IS_ENABLED(CONFIG_DECNET) struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; +#endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) bool defrag_ipv4; #endif diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 43643427b560..4738d0d0ebac 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -283,10 +283,12 @@ static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= reg->hooknum)) return NULL; return net->nf.hooks_ipv6 + reg->hooknum; +#if IS_ENABLED(CONFIG_DECNET) case NFPROTO_DECNET: if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= reg->hooknum)) return NULL; return net->nf.hooks_decnet + reg->hooknum; +#endif default: WARN_ON_ONCE(1); return NULL; @@ -573,7 +575,9 @@ static int __net_init netfilter_net_init(struct net *net) __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6)); __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp)); __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); +#if IS_ENABLED(CONFIG_DECNET) __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet)); +#endif #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", -- cgit v1.2.3 From 2a95183a5e0375df756efb2ca37602d71e8455f9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Dec 2017 16:28:26 +0100 Subject: netfilter: don't allocate space for arp/bridge hooks unless needed no need to define hook points if the family isn't supported. Because we need these hooks for either nftables, arp/ebtables or the 'call-iptables' hack we have in the bridge layer add two new dependencies, NETFILTER_FAMILY_{ARP,BRIDGE}, and have the users select them. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 4 ++++ include/net/netns/netfilter.h | 4 ++++ net/Kconfig | 1 + net/bridge/netfilter/Kconfig | 2 ++ net/ipv4/netfilter/Kconfig | 2 ++ net/netfilter/Kconfig | 6 ++++++ net/netfilter/core.c | 8 ++++++++ net/netfilter/nf_queue.c | 2 ++ 8 files changed, 29 insertions(+) (limited to 'include/net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ce4e91df8b56..ee7a9cbd8d81 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -214,10 +214,14 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); break; case NFPROTO_ARP: +#ifdef CONFIG_NETFILTER_FAMILY_ARP hook_head = rcu_dereference(net->nf.hooks_arp[hook]); +#endif break; case NFPROTO_BRIDGE: +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); +#endif break; #if IS_ENABLED(CONFIG_DECNET) case NFPROTO_DECNET: diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 432609fd9899..ca043342c0eb 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -19,8 +19,12 @@ struct netns_nf { #endif struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS]; struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS]; +#ifdef CONFIG_NETFILTER_FAMILY_ARP struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS]; +#endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS]; +#endif #if IS_ENABLED(CONFIG_DECNET) struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; #endif diff --git a/net/Kconfig b/net/Kconfig index efe930db3c08..37ec8e67af57 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -182,6 +182,7 @@ config BRIDGE_NETFILTER depends on BRIDGE depends on NETFILTER && INET depends on NETFILTER_ADVANCED + select NETFILTER_FAMILY_BRIDGE default m ---help--- Enabling this option will let arptables resp. iptables see bridged diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index e7ef1a1ef3a6..225d1668dfdd 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -4,6 +4,7 @@ # menuconfig NF_TABLES_BRIDGE depends on BRIDGE && NETFILTER && NF_TABLES + select NETFILTER_FAMILY_BRIDGE tristate "Ethernet Bridge nf_tables support" if NF_TABLES_BRIDGE @@ -29,6 +30,7 @@ endif # NF_TABLES_BRIDGE menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" depends on BRIDGE && NETFILTER && NETFILTER_XTABLES + select NETFILTER_FAMILY_BRIDGE help ebtables is a general, extensible frame/packet identification framework. Say 'Y' or 'M' here if you want to do Ethernet diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c11eb1744ab1..cee51045e2f7 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -72,6 +72,7 @@ endif # NF_TABLES_IPV4 config NF_TABLES_ARP tristate "ARP nf_tables support" + select NETFILTER_FAMILY_ARP help This option enables the ARP support for nf_tables. @@ -392,6 +393,7 @@ endif # IP_NF_IPTABLES config IP_NF_ARPTABLES tristate "ARP tables support" select NETFILTER_XTABLES + select NETFILTER_FAMILY_ARP depends on NETFILTER_ADVANCED help arptables is a general, extensible packet identification framework. diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e4a13cc8a2e7..263609a7e010 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -12,6 +12,12 @@ config NETFILTER_INGRESS config NETFILTER_NETLINK tristate +config NETFILTER_FAMILY_BRIDGE + bool + +config NETFILTER_FAMILY_ARP + bool + config NETFILTER_NETLINK_ACCT tristate "Netfilter NFACCT over NFNETLINK interface" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 4738d0d0ebac..ed8618f4efd7 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -267,14 +267,18 @@ static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const switch (reg->pf) { case NFPROTO_NETDEV: break; +#ifdef CONFIG_NETFILTER_FAMILY_ARP case NFPROTO_ARP: if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= reg->hooknum)) return NULL; return net->nf.hooks_arp + reg->hooknum; +#endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE case NFPROTO_BRIDGE: if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= reg->hooknum)) return NULL; return net->nf.hooks_bridge + reg->hooknum; +#endif case NFPROTO_IPV4: if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= reg->hooknum)) return NULL; @@ -573,8 +577,12 @@ static int __net_init netfilter_net_init(struct net *net) { __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4)); __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6)); +#ifdef CONFIG_NETFILTER_FAMILY_ARP __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp)); +#endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); +#endif #if IS_ENABLED(CONFIG_DECNET) __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet)); #endif diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 836aeb08686e..0c02fdb7efc9 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -204,8 +204,10 @@ repeat: static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum) { switch (pf) { +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE case NFPROTO_BRIDGE: return rcu_dereference(net->nf.hooks_bridge[hooknum]); +#endif case NFPROTO_IPV4: return rcu_dereference(net->nf.hooks_ipv4[hooknum]); case NFPROTO_IPV6: -- cgit v1.2.3 From 7a4473a31a6974c0fbf9afe80ef16ac5bc67cf79 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 10 Dec 2017 01:43:14 +0100 Subject: netfilter: nf_tables: explicit nft_set_pktinfo() call from hook path Instead of calling this function from the family specific variant, this reduces the code size in the fast path for the netdev, bridge and inet families. After this change, we must call nft_set_pktinfo() upfront from the chain hook indirection. Before: text data bss dec hex filename 2145 208 0 2353 931 net/netfilter/nf_tables_netdev.o After: text data bss dec hex filename 2125 208 0 2333 91d net/netfilter/nf_tables_netdev.o Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 12 ++---------- include/net/netfilter/nf_tables_ipv4.h | 25 ++++++++----------------- include/net/netfilter/nf_tables_ipv6.h | 27 +++++++++------------------ net/bridge/netfilter/nf_tables_bridge.c | 8 +++++--- net/ipv4/netfilter/nf_tables_arp.c | 3 ++- net/ipv4/netfilter/nf_tables_ipv4.c | 3 ++- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 3 ++- net/ipv4/netfilter/nft_chain_route_ipv4.c | 3 ++- net/ipv6/netfilter/nf_tables_ipv6.c | 3 ++- net/ipv6/netfilter/nft_chain_nat_ipv6.c | 3 ++- net/ipv6/netfilter/nft_chain_route_ipv6.c | 3 ++- net/netfilter/nf_tables_netdev.c | 8 +++++--- 12 files changed, 43 insertions(+), 58 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index fecc6112c768..f6e4325b3306 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -54,8 +54,8 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, pkt->xt.state = state; } -static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, - struct sk_buff *skb) +static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb) { pkt->tprot_set = false; pkt->tprot = 0; @@ -63,14 +63,6 @@ static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, pkt->xt.fragoff = 0; } -static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - nft_set_pktinfo(pkt, skb, state); - nft_set_pktinfo_proto_unspec(pkt, skb); -} - /** * struct nft_verdict - nf_tables verdict * diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index f0896ba456c4..b2deeb2755a4 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -5,15 +5,11 @@ #include #include -static inline void -nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, + struct sk_buff *skb) { struct iphdr *ip; - nft_set_pktinfo(pkt, skb, state); - ip = ip_hdr(pkt->skb); pkt->tprot_set = true; pkt->tprot = ip->protocol; @@ -21,10 +17,8 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; } -static inline int -__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { struct iphdr *iph, _iph; u32 len, thoff; @@ -52,14 +46,11 @@ __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, return 0; } -static inline void -nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { - nft_set_pktinfo(pkt, skb, state); - if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0) - nft_set_pktinfo_proto_unspec(pkt, skb); + if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0) + nft_set_pktinfo_unspec(pkt, skb); } extern struct nft_af_info nft_af_ipv4; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index b8065b72f56e..1890c5bc3c3c 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -5,20 +5,16 @@ #include #include -static inline void -nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, + struct sk_buff *skb) { unsigned int flags = IP6_FH_F_AUTH; int protohdr, thoff = 0; unsigned short frag_off; - nft_set_pktinfo(pkt, skb, state); - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) { - nft_set_pktinfo_proto_unspec(pkt, skb); + nft_set_pktinfo_unspec(pkt, skb); return; } @@ -28,10 +24,8 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, pkt->xt.fragoff = frag_off; } -static inline int -__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) unsigned int flags = IP6_FH_F_AUTH; @@ -68,14 +62,11 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, #endif } -static inline void -nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) +static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb) { - nft_set_pktinfo(pkt, skb, state); - if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0) - nft_set_pktinfo_proto_unspec(pkt, skb); + if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0) + nft_set_pktinfo_unspec(pkt, skb); } extern struct nft_af_info nft_af_ipv6; diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 97afdc0744e6..612bfd0737d5 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -25,15 +25,17 @@ nft_do_chain_bridge(void *priv, { struct nft_pktinfo pkt; + nft_set_pktinfo(&pkt, skb, state); + switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_set_pktinfo_ipv4_validate(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb); break; case htons(ETH_P_IPV6): - nft_set_pktinfo_ipv6_validate(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb); break; default: - nft_set_pktinfo_unspec(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb); break; } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index ec47c12cd137..3fa7e1b22bdd 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -21,7 +21,8 @@ nft_do_chain_arp(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_unspec(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 2840a29b2e04..35fa265d1ce3 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -24,7 +24,8 @@ static unsigned int nft_do_chain_ipv4(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index f5c66a7a4bf2..f2a490981594 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 30493beb611a..fb3d49fb62fe 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -38,7 +38,8 @@ static unsigned int nf_route_table_hook(void *priv, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - nft_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); mark = skb->mark; iph = ip_hdr(skb); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index d6e4ba5de916..71bac94770dd 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -22,7 +22,8 @@ static unsigned int nft_do_chain_ipv6(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 443cd306c0b0..73fe2bd13fcf 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index f2727475895e..11d3c3b9aa18 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(void *priv, u32 mark, flowlabel; int err; - nft_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); /* save source/dest address, mark, hoplimit, flowlabel, priority */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 403432988313..3cd127dd2895 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -21,15 +21,17 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, { struct nft_pktinfo pkt; + nft_set_pktinfo(&pkt, skb, state); + switch (skb->protocol) { case htons(ETH_P_IP): - nft_set_pktinfo_ipv4_validate(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb); break; case htons(ETH_P_IPV6): - nft_set_pktinfo_ipv6_validate(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb); break; default: - nft_set_pktinfo_unspec(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb); break; } -- cgit v1.2.3 From 408070d6ee3490da63430bc8ce13348cf2eb47ea Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Nov 2017 13:39:57 +0100 Subject: netfilter: nf_tables: add nft_set_is_anonymous() helper Add helper function to test for the NFT_SET_ANONYMOUS flag. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++++ net/netfilter/nf_tables_api.c | 8 ++++---- net/netfilter/nft_dynset.c | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f6e4325b3306..169b562df226 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -416,6 +416,11 @@ struct nft_set { __attribute__((aligned(__alignof__(u64)))); }; +static inline bool nft_set_is_anonymous(const struct nft_set *set) +{ + return set->flags & NFT_SET_ANONYMOUS; +} + static inline void *nft_set_priv(const struct nft_set *set) { return (void *)set->data; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 36d38f8b0284..7bc1b0c92a7f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -774,7 +774,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (set->flags & NFT_SET_ANONYMOUS && + if (nft_set_is_anonymous(set) && !list_empty(&set->bindings)) continue; @@ -3284,7 +3284,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; - if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; if (binding->flags & NFT_SET_MAP) { @@ -3319,7 +3319,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, { list_del_rcu(&binding->list); - if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && + if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && nft_is_active(ctx->net, set)) nf_tables_set_destroy(ctx, set); } @@ -5157,7 +5157,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* This avoids hitting -EBUSY when deleting the table * from the transaction. */ - if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS && + if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) trans->ctx.table->use--; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 66221ad891a9..ec0fd78231d8 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -184,7 +184,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_EXPR] != NULL) { if (!(set->flags & NFT_SET_EVAL)) return -EINVAL; - if (!(set->flags & NFT_SET_ANONYMOUS)) + if (!nft_set_is_anonymous(set)) return -EOPNOTSUPP; priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); -- cgit v1.2.3 From 12355d3670dac0dde5aae3deefb59f8cc0a9ed2a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 9 Dec 2017 15:36:24 +0100 Subject: netfilter: nf_tables_inet: don't use multihook infrastructure anymore Use new native NFPROTO_INET support in netfilter core, this gets rid of ad-hoc code in the nf_tables API codebase. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv4.h | 2 - include/net/netfilter/nf_tables_ipv6.h | 2 - net/ipv4/netfilter/nf_tables_ipv4.c | 3 +- net/ipv6/netfilter/nf_tables_ipv6.c | 3 +- net/netfilter/nf_tables_inet.c | 70 ++++++++++++++++++++++++++++------ 5 files changed, 60 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index b2deeb2755a4..ed7b511f0a59 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -53,6 +53,4 @@ static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, nft_set_pktinfo_unspec(pkt, skb); } -extern struct nft_af_info nft_af_ipv4; - #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 1890c5bc3c3c..dabe6fdb553a 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -69,6 +69,4 @@ static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, nft_set_pktinfo_unspec(pkt, skb); } -extern struct nft_af_info nft_af_ipv6; - #endif diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 35fa265d1ce3..b6223f4b1315 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -45,7 +45,7 @@ static unsigned int nft_ipv4_output(void *priv, return nft_do_chain_ipv4(priv, skb, state); } -struct nft_af_info nft_af_ipv4 __read_mostly = { +static struct nft_af_info nft_af_ipv4 __read_mostly = { .family = NFPROTO_IPV4, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, @@ -58,7 +58,6 @@ struct nft_af_info nft_af_ipv4 __read_mostly = { [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, }, }; -EXPORT_SYMBOL_GPL(nft_af_ipv4); static int nf_tables_ipv4_init_net(struct net *net) { diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 71bac94770dd..b1b5d3824fc1 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -42,7 +42,7 @@ static unsigned int nft_ipv6_output(void *priv, return nft_do_chain_ipv6(priv, skb, state); } -struct nft_af_info nft_af_ipv6 __read_mostly = { +static struct nft_af_info nft_af_ipv6 __read_mostly = { .family = NFPROTO_IPV6, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, @@ -55,7 +55,6 @@ struct nft_af_info nft_af_ipv6 __read_mostly = { [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, }, }; -EXPORT_SYMBOL_GPL(nft_af_ipv6); static int nf_tables_ipv6_init_net(struct net *net) { diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index f713cc205669..c6194b3509aa 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,26 +17,71 @@ #include #include -static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n) +static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { - struct nft_af_info *afi; - - if (n == 1) - afi = &nft_af_ipv4; - else - afi = &nft_af_ipv6; + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + + switch (state->pf) { + case NFPROTO_IPV4: + nft_set_pktinfo_ipv4(&pkt, skb); + break; + case NFPROTO_IPV6: + nft_set_pktinfo_ipv6(&pkt, skb); + break; + default: + break; + } + + return nft_do_chain(&pkt, priv); +} - ops->pf = afi->family; - if (afi->hooks[ops->hooknum]) - ops->hook = afi->hooks[ops->hooknum]; +static unsigned int nft_inet_output(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + + switch (state->pf) { + case NFPROTO_IPV4: + if (unlikely(skb->len < sizeof(struct iphdr) || + ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { + if (net_ratelimit()) + pr_info("ignoring short SOCK_RAW packet\n"); + return NF_ACCEPT; + } + nft_set_pktinfo_ipv4(&pkt, skb); + break; + case NFPROTO_IPV6: + if (unlikely(skb->len < sizeof(struct ipv6hdr))) { + if (net_ratelimit()) + pr_info("ignoring short SOCK_RAW packet\n"); + return NF_ACCEPT; + } + nft_set_pktinfo_ipv6(&pkt, skb); + break; + default: + break; + } + + return nft_do_chain(&pkt, priv); } static struct nft_af_info nft_af_inet __read_mostly = { .family = NFPROTO_INET, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .nops = 2, - .hook_ops_init = nft_inet_hook_ops_init, + .nops = 1, + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_inet, + [NF_INET_LOCAL_OUT] = nft_inet_output, + [NF_INET_FORWARD] = nft_do_chain_inet, + [NF_INET_PRE_ROUTING] = nft_do_chain_inet, + [NF_INET_POST_ROUTING] = nft_do_chain_inet, + }, }; static int __net_init nf_tables_inet_init_net(struct net *net) -- cgit v1.2.3 From c974a3a36468d1947c96f0c694c8a1b2e7810043 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 9 Dec 2017 15:40:25 +0100 Subject: netfilter: nf_tables: remove multihook chains and families Since NFPROTO_INET is handled from the core, we don't need to maintain extra infrastructure in nf_tables to handle the double hook registration, one for IPv4 and another for IPv6. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 9 +-- net/bridge/netfilter/nf_tables_bridge.c | 1 - net/ipv4/netfilter/nf_tables_arp.c | 1 - net/ipv4/netfilter/nf_tables_ipv4.c | 1 - net/ipv6/netfilter/nf_tables_ipv6.c | 1 - net/netfilter/nf_tables_api.c | 102 ++++++++++++++------------------ net/netfilter/nf_tables_inet.c | 1 - net/netfilter/nf_tables_netdev.c | 3 +- net/netfilter/nft_compat.c | 8 +-- 9 files changed, 49 insertions(+), 78 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 169b562df226..a3560fd55f99 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -902,8 +902,6 @@ struct nft_stats { struct u64_stats_sync syncp; }; -#define NFT_HOOK_OPS_MAX 2 - /** * struct nft_base_chain - nf_tables base chain * @@ -915,7 +913,7 @@ struct nft_stats { * @dev_name: device name that this base chain is attached to (if any) */ struct nft_base_chain { - struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; + struct nf_hook_ops ops; const struct nf_chain_type *type; u8 policy; u8 flags; @@ -976,8 +974,6 @@ enum nft_af_flags { * @owner: module owner * @tables: used internally * @flags: family flags - * @nops: number of hook ops in this family - * @hook_ops_init: initialization function for chain hook ops * @hooks: hookfn overrides for packet validation */ struct nft_af_info { @@ -987,9 +983,6 @@ struct nft_af_info { struct module *owner; struct list_head tables; u32 flags; - unsigned int nops; - void (*hook_ops_init)(struct nf_hook_ops *, - unsigned int); nf_hookfn *hooks[NF_MAX_HOOKS]; }; diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 612bfd0737d5..991d0abb46aa 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -46,7 +46,6 @@ static struct nft_af_info nft_af_bridge __read_mostly = { .family = NFPROTO_BRIDGE, .nhooks = NF_BR_NUMHOOKS, .owner = THIS_MODULE, - .nops = 1, .hooks = { [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, [NF_BR_LOCAL_IN] = nft_do_chain_bridge, diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 3fa7e1b22bdd..3090f639bd89 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -31,7 +31,6 @@ static struct nft_af_info nft_af_arp __read_mostly = { .family = NFPROTO_ARP, .nhooks = NF_ARP_NUMHOOKS, .owner = THIS_MODULE, - .nops = 1, .hooks = { [NF_ARP_IN] = nft_do_chain_arp, [NF_ARP_OUT] = nft_do_chain_arp, diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index b6223f4b1315..51b363abd541 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -49,7 +49,6 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { .family = NFPROTO_IPV4, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .nops = 1, .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, [NF_INET_LOCAL_OUT] = nft_ipv4_output, diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index b1b5d3824fc1..78d34a2f3347 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -46,7 +46,6 @@ static struct nft_af_info nft_af_ipv6 __read_mostly = { .family = NFPROTO_IPV6, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .nops = 1, .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, [NF_INET_LOCAL_OUT] = nft_ipv6_output, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7bc1b0c92a7f..06fae437c9cb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -139,29 +139,26 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -static int nf_tables_register_hooks(struct net *net, - const struct nft_table *table, - struct nft_chain *chain, - unsigned int hook_nops) +static int nf_tables_register_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain) { if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return 0; - return nf_register_net_hooks(net, nft_base_chain(chain)->ops, - hook_nops); + return nf_register_net_hook(net, &nft_base_chain(chain)->ops); } -static void nf_tables_unregister_hooks(struct net *net, - const struct nft_table *table, - struct nft_chain *chain, - unsigned int hook_nops) +static void nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain) { if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return; - nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops); + nf_unregister_net_hook(net, &nft_base_chain(chain)->ops); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -595,8 +592,7 @@ static void _nf_tables_table_disable(struct net *net, if (cnt && i++ == cnt) break; - nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, - afi->nops); + nf_unregister_net_hook(net, &nft_base_chain(chain)->ops); } } @@ -613,8 +609,7 @@ static int nf_tables_table_enable(struct net *net, if (!nft_is_base_chain(chain)) continue; - err = nf_register_net_hooks(net, nft_base_chain(chain)->ops, - afi->nops); + err = nf_register_net_hook(net, &nft_base_chain(chain)->ops); if (err < 0) goto err; @@ -1026,7 +1021,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; struct nlattr *nest; nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); @@ -1252,8 +1247,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) free_percpu(basechain->stats); if (basechain->stats) static_branch_dec(&nft_counters_enabled); - if (basechain->ops[0].dev != NULL) - dev_put(basechain->ops[0].dev); + if (basechain->ops.dev != NULL) + dev_put(basechain->ops.dev); kfree(chain->name); kfree(basechain); } else { @@ -1354,7 +1349,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats; struct net *net = ctx->net; struct nft_chain *chain; - unsigned int i; int err; if (table->use == UINT_MAX) @@ -1393,21 +1387,18 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, basechain->type = hook.type; chain = &basechain->chain; - for (i = 0; i < afi->nops; i++) { - ops = &basechain->ops[i]; - ops->pf = family; - ops->hooknum = hook.num; - ops->priority = hook.priority; - ops->priv = chain; - ops->hook = afi->hooks[ops->hooknum]; - ops->dev = hook.dev; - if (hookfn) - ops->hook = hookfn; - if (afi->hook_ops_init) - afi->hook_ops_init(ops, i); - if (basechain->type->type == NFT_CHAIN_T_NAT) - ops->nat_hook = true; - } + ops = &basechain->ops; + ops->pf = family; + ops->hooknum = hook.num; + ops->priority = hook.priority; + ops->priv = chain; + ops->hook = afi->hooks[ops->hooknum]; + ops->dev = hook.dev; + if (hookfn) + ops->hook = hookfn; + + if (basechain->type->type == NFT_CHAIN_T_NAT) + ops->nat_hook = true; chain->flags |= NFT_BASE_CHAIN; basechain->policy = policy; @@ -1425,7 +1416,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, goto err1; } - err = nf_tables_register_hooks(net, table, chain, afi->nops); + err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err1; @@ -1439,7 +1430,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, return 0; err2: - nf_tables_unregister_hooks(net, table, chain, afi->nops); + nf_tables_unregister_hook(net, table, chain); err1: nf_tables_chain_destroy(chain); @@ -1452,14 +1443,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; struct nft_chain *chain = ctx->chain; - struct nft_af_info *afi = ctx->afi; struct nft_base_chain *basechain; struct nft_stats *stats = NULL; struct nft_chain_hook hook; const struct nlattr *name; struct nf_hook_ops *ops; struct nft_trans *trans; - int err, i; + int err; if (nla[NFTA_CHAIN_HOOK]) { if (!nft_is_base_chain(chain)) @@ -1476,14 +1466,12 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, return -EBUSY; } - for (i = 0; i < afi->nops; i++) { - ops = &basechain->ops[i]; - if (ops->hooknum != hook.num || - ops->priority != hook.priority || - ops->dev != hook.dev) { - nft_chain_release_hook(&hook); - return -EBUSY; - } + ops = &basechain->ops; + if (ops->hooknum != hook.num || + ops->priority != hook.priority || + ops->dev != hook.dev) { + nft_chain_release_hook(&hook); + return -EBUSY; } nft_chain_release_hook(&hook); } @@ -5134,10 +5122,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELCHAIN: list_del_rcu(&trans->ctx.chain->list); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); - nf_tables_unregister_hooks(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain, - trans->ctx.afi->nops); + nf_tables_unregister_hook(trans->ctx.net, + trans->ctx.table, + trans->ctx.chain); break; case NFT_MSG_NEWRULE: nft_clear(trans->ctx.net, nft_trans_rule(trans)); @@ -5274,10 +5261,9 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) } else { trans->ctx.table->use--; list_del_rcu(&trans->ctx.chain->list); - nf_tables_unregister_hooks(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain, - trans->ctx.afi->nops); + nf_tables_unregister_hook(trans->ctx.net, + trans->ctx.table, + trans->ctx.chain); } break; case NFT_MSG_DELCHAIN: @@ -5378,7 +5364,7 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, if (nft_is_base_chain(chain)) { basechain = nft_base_chain(chain); - if ((1 << basechain->ops[0].hooknum) & hook_flags) + if ((1 << basechain->ops.hooknum) & hook_flags) return 0; return -EOPNOTSUPP; @@ -5866,8 +5852,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) BUG_ON(!nft_is_base_chain(ctx->chain)); - nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain, - ctx->afi->nops); + nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); ctx->chain->use--; @@ -5896,8 +5881,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) list_for_each_entry_safe(table, nt, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hooks(net, table, chain, - afi->nops); + nf_tables_unregister_hook(net, table, chain); /* No packets are walking on these chains anymore. */ ctx.table = table; list_for_each_entry(chain, &table->chains, list) { diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index c6194b3509aa..edd7829a5753 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -74,7 +74,6 @@ static struct nft_af_info nft_af_inet __read_mostly = { .family = NFPROTO_INET, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .nops = 1, .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_inet, [NF_INET_LOCAL_OUT] = nft_inet_output, diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 3cd127dd2895..018e2c5b4a49 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -43,7 +43,6 @@ static struct nft_af_info nft_af_netdev __read_mostly = { .nhooks = NF_NETDEV_NUMHOOKS, .owner = THIS_MODULE, .flags = NFT_AF_NEEDS_DEV, - .nops = 1, .hooks = { [NF_NETDEV_INGRESS] = nft_do_chain_netdev, }, @@ -98,7 +97,7 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, __nft_release_basechain(ctx); break; case NETDEV_CHANGENAME: - if (dev->ifindex != basechain->ops[0].dev->ifindex) + if (dev->ifindex != basechain->ops.dev->ifindex) return; strncpy(basechain->dev_name, dev->name, IFNAMSIZ); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index b89f4f65b2a0..dcff0dc8d28b 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -169,7 +169,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; par->hook_mask = 1 << ops->hooknum; } else { @@ -302,7 +302,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; hook_mask = 1 << ops->hooknum; if (target->hooks && !(hook_mask & target->hooks)) @@ -383,7 +383,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; par->hook_mask = 1 << ops->hooknum; } else { @@ -481,7 +481,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; hook_mask = 1 << ops->hooknum; if (match->hooks && !(hook_mask & match->hooks)) -- cgit v1.2.3 From c2f9eafee9aaeedaad9eadbf47913f4681d723df Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 9 Dec 2017 15:43:17 +0100 Subject: netfilter: nf_tables: remove hooks from family definition They don't belong to the family definition, move them to the filter chain type definition instead. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +--- net/bridge/netfilter/nf_tables_bridge.c | 14 +++++++------- net/ipv4/netfilter/nf_tables_arp.c | 8 ++++---- net/ipv4/netfilter/nf_tables_ipv4.c | 14 +++++++------- net/ipv6/netfilter/nf_tables_ipv6.c | 14 +++++++------- net/netfilter/nf_tables_api.c | 6 +----- net/netfilter/nf_tables_inet.c | 14 +++++++------- net/netfilter/nf_tables_netdev.c | 6 +++--- 8 files changed, 37 insertions(+), 43 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a3560fd55f99..e040b6151acc 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -880,7 +880,7 @@ enum nft_chain_type { * @family: address family * @owner: module owner * @hook_mask: mask of valid hooks - * @hooks: hookfn overrides + * @hooks: array of hook functions */ struct nf_chain_type { const char *name; @@ -974,7 +974,6 @@ enum nft_af_flags { * @owner: module owner * @tables: used internally * @flags: family flags - * @hooks: hookfn overrides for packet validation */ struct nft_af_info { struct list_head list; @@ -983,7 +982,6 @@ struct nft_af_info { struct module *owner; struct list_head tables; u32 flags; - nf_hookfn *hooks[NF_MAX_HOOKS]; }; int nft_register_afinfo(struct net *, struct nft_af_info *); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 991d0abb46aa..74260ffec74d 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -46,13 +46,6 @@ static struct nft_af_info nft_af_bridge __read_mostly = { .family = NFPROTO_BRIDGE, .nhooks = NF_BR_NUMHOOKS, .owner = THIS_MODULE, - .hooks = { - [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, - [NF_BR_LOCAL_IN] = nft_do_chain_bridge, - [NF_BR_FORWARD] = nft_do_chain_bridge, - [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, - [NF_BR_POST_ROUTING] = nft_do_chain_bridge, - }, }; static int nf_tables_bridge_init_net(struct net *net) @@ -93,6 +86,13 @@ static const struct nf_chain_type filter_bridge = { (1 << NF_BR_FORWARD) | (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_POST_ROUTING), + .hooks = { + [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, + [NF_BR_LOCAL_IN] = nft_do_chain_bridge, + [NF_BR_FORWARD] = nft_do_chain_bridge, + [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, + [NF_BR_POST_ROUTING] = nft_do_chain_bridge, + }, }; static void nf_br_saveroute(const struct sk_buff *skb, diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 3090f639bd89..f84c17763f6f 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -31,10 +31,6 @@ static struct nft_af_info nft_af_arp __read_mostly = { .family = NFPROTO_ARP, .nhooks = NF_ARP_NUMHOOKS, .owner = THIS_MODULE, - .hooks = { - [NF_ARP_IN] = nft_do_chain_arp, - [NF_ARP_OUT] = nft_do_chain_arp, - }, }; static int nf_tables_arp_init_net(struct net *net) @@ -72,6 +68,10 @@ static const struct nf_chain_type filter_arp = { .owner = THIS_MODULE, .hook_mask = (1 << NF_ARP_IN) | (1 << NF_ARP_OUT), + .hooks = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + }, }; static int __init nf_tables_arp_init(void) diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 51b363abd541..8aeb15c2b9b2 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -49,13 +49,6 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { .family = NFPROTO_IPV4, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, - [NF_INET_LOCAL_OUT] = nft_ipv4_output, - [NF_INET_FORWARD] = nft_do_chain_ipv4, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, - }, }; static int nf_tables_ipv4_init_net(struct net *net) @@ -96,6 +89,13 @@ static const struct nf_chain_type filter_ipv4 = { (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, + }, }; static int __init nf_tables_ipv4_init(void) diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 78d34a2f3347..d4c9ef030e4f 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -46,13 +46,6 @@ static struct nft_af_info nft_af_ipv6 __read_mostly = { .family = NFPROTO_IPV6, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, - [NF_INET_LOCAL_OUT] = nft_ipv6_output, - [NF_INET_FORWARD] = nft_do_chain_ipv6, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, - }, }; static int nf_tables_ipv6_init_net(struct net *net) @@ -93,6 +86,13 @@ static const struct nf_chain_type filter_ipv6 = { (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, + }, }; static int __init nf_tables_ipv6_init(void) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 06fae437c9cb..15773a3189ce 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1357,7 +1357,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nla[NFTA_CHAIN_HOOK]) { struct nft_chain_hook hook; struct nf_hook_ops *ops; - nf_hookfn *hookfn; err = nft_chain_parse_hook(net, nla, afi, &hook, create); if (err < 0) @@ -1383,7 +1382,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, static_branch_inc(&nft_counters_enabled); } - hookfn = hook.type->hooks[hook.num]; basechain->type = hook.type; chain = &basechain->chain; @@ -1392,10 +1390,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, ops->hooknum = hook.num; ops->priority = hook.priority; ops->priv = chain; - ops->hook = afi->hooks[ops->hooknum]; + ops->hook = hook.type->hooks[ops->hooknum]; ops->dev = hook.dev; - if (hookfn) - ops->hook = hookfn; if (basechain->type->type == NFT_CHAIN_T_NAT) ops->nat_hook = true; diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index edd7829a5753..313987e2b1fe 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -74,13 +74,6 @@ static struct nft_af_info nft_af_inet __read_mostly = { .family = NFPROTO_INET, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_inet, - [NF_INET_LOCAL_OUT] = nft_inet_output, - [NF_INET_FORWARD] = nft_do_chain_inet, - [NF_INET_PRE_ROUTING] = nft_do_chain_inet, - [NF_INET_POST_ROUTING] = nft_do_chain_inet, - }, }; static int __net_init nf_tables_inet_init_net(struct net *net) @@ -121,6 +114,13 @@ static const struct nf_chain_type filter_inet = { (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_inet, + [NF_INET_LOCAL_OUT] = nft_inet_output, + [NF_INET_FORWARD] = nft_do_chain_inet, + [NF_INET_PRE_ROUTING] = nft_do_chain_inet, + [NF_INET_POST_ROUTING] = nft_do_chain_inet, + }, }; static int __init nf_tables_inet_init(void) diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 018e2c5b4a49..42f6f6d42a6d 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -43,9 +43,6 @@ static struct nft_af_info nft_af_netdev __read_mostly = { .nhooks = NF_NETDEV_NUMHOOKS, .owner = THIS_MODULE, .flags = NFT_AF_NEEDS_DEV, - .hooks = { - [NF_NETDEV_INGRESS] = nft_do_chain_netdev, - }, }; static int nf_tables_netdev_init_net(struct net *net) @@ -82,6 +79,9 @@ static const struct nf_chain_type nft_filter_chain_netdev = { .family = NFPROTO_NETDEV, .owner = THIS_MODULE, .hook_mask = (1 << NF_NETDEV_INGRESS), + .hooks = { + [NF_NETDEV_INGRESS] = nft_do_chain_netdev, + }, }; static void nft_netdev_event(unsigned long event, struct net_device *dev, -- cgit v1.2.3 From 625c556118f3c2fd28bb8ef6da18c53bd4037be4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 9 Dec 2017 21:01:08 +0100 Subject: netfilter: connlimit: split xt_connlimit into front and backend This allows to reuse xt_connlimit infrastructure from nf_tables. The upcoming nf_tables frontend can just pass in an nftables register as input key, this allows limiting by any nft-supported key, including concatenations. For xt_connlimit, pass in the zone and the ip/ipv6 address. With help from Yi-Hung Wei. Signed-off-by: Florian Westphal Acked-by: Yi-Hung Wei Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_count.h | 17 ++ include/uapi/linux/netfilter/xt_connlimit.h | 2 +- net/netfilter/Kconfig | 3 + net/netfilter/Makefile | 2 + net/netfilter/nf_conncount.c | 373 ++++++++++++++++++++++++++++ net/netfilter/xt_connlimit.c | 369 ++------------------------- 6 files changed, 420 insertions(+), 346 deletions(-) create mode 100644 include/net/netfilter/nf_conntrack_count.h create mode 100644 net/netfilter/nf_conncount.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h new file mode 100644 index 000000000000..adf8db44cf86 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_count.h @@ -0,0 +1,17 @@ +#ifndef _NF_CONNTRACK_COUNT_H +#define _NF_CONNTRACK_COUNT_H + +struct nf_conncount_data; + +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, + unsigned int keylen); +void nf_conncount_destroy(struct net *net, unsigned int family, + struct nf_conncount_data *data); + +unsigned int nf_conncount_count(struct net *net, + struct nf_conncount_data *data, + const u32 *key, + unsigned int family, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); +#endif diff --git a/include/uapi/linux/netfilter/xt_connlimit.h b/include/uapi/linux/netfilter/xt_connlimit.h index 07e5e9d47882..d4d1943dcd11 100644 --- a/include/uapi/linux/netfilter/xt_connlimit.h +++ b/include/uapi/linux/netfilter/xt_connlimit.h @@ -27,7 +27,7 @@ struct xt_connlimit_info { __u32 flags; /* Used internally by the kernel */ - struct xt_connlimit_data *data __attribute__((aligned(8))); + struct nf_conncount_data *data __attribute__((aligned(8))); }; #endif /* _XT_CONNLIMIT_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 263609a7e010..af3d9f721b3f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -68,6 +68,8 @@ config NF_LOG_NETDEV select NF_LOG_COMMON if NF_CONNTRACK +config NETFILTER_CONNCOUNT + tristate config NF_CONNTRACK_MARK bool 'Connection mark tracking support' @@ -1126,6 +1128,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED + select NETFILTER_CONNCOUNT ---help--- This match allows you to match against the number of parallel connections to a server per client IP address (or address block). diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index f78ed2470831..490a55e7166d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -67,6 +67,8 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +obj-$(CONFIG_NETFILTER_CONNCOUNT) += nf_conncount.o + # generic packet duplication from netdev family obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c new file mode 100644 index 000000000000..a95518261168 --- /dev/null +++ b/net/netfilter/nf_conncount.c @@ -0,0 +1,373 @@ +/* + * count the number of connections matching an arbitrary key. + * + * (C) 2017 Red Hat GmbH + * Author: Florian Westphal + * + * split from xt_connlimit.c: + * (c) 2000 Gerd Knorr + * Nov 2002: Martin Bene : + * only ignore TIME_WAIT or gone connections + * (C) CC Computer Consultants GmbH, 2007 + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CONNCOUNT_SLOTS 256U + +#ifdef CONFIG_LOCKDEP +#define CONNCOUNT_LOCK_SLOTS 8U +#else +#define CONNCOUNT_LOCK_SLOTS 256U +#endif + +#define CONNCOUNT_GC_MAX_NODES 8 +#define MAX_KEYLEN 5 + +/* we will save the tuples of all connections we care about */ +struct nf_conncount_tuple { + struct hlist_node node; + struct nf_conntrack_tuple tuple; +}; + +struct nf_conncount_rb { + struct rb_node node; + struct hlist_head hhead; /* connections/hosts in same subnet */ + u32 key[MAX_KEYLEN]; +}; + +static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp; + +struct nf_conncount_data { + unsigned int keylen; + struct rb_root root[CONNCOUNT_SLOTS]; +}; + +static u_int32_t conncount_rnd __read_mostly; +static struct kmem_cache *conncount_rb_cachep __read_mostly; +static struct kmem_cache *conncount_conn_cachep __read_mostly; + +static inline bool already_closed(const struct nf_conn *conn) +{ + if (nf_ct_protonum(conn) == IPPROTO_TCP) + return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || + conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; + else + return 0; +} + +static int key_diff(const u32 *a, const u32 *b, unsigned int klen) +{ + return memcmp(a, b, klen * sizeof(u32)); +} + +static bool add_hlist(struct hlist_head *head, + const struct nf_conntrack_tuple *tuple) +{ + struct nf_conncount_tuple *conn; + + conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); + if (conn == NULL) + return false; + conn->tuple = *tuple; + hlist_add_head(&conn->node, head); + return true; +} + +static unsigned int check_hlist(struct net *net, + struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone, + bool *addit) +{ + const struct nf_conntrack_tuple_hash *found; + struct nf_conncount_tuple *conn; + struct hlist_node *n; + struct nf_conn *found_ct; + unsigned int length = 0; + + *addit = true; + + /* check the saved connections */ + hlist_for_each_entry_safe(conn, n, head, node) { + found = nf_conntrack_find_get(net, zone, &conn->tuple); + if (found == NULL) { + hlist_del(&conn->node); + kmem_cache_free(conncount_conn_cachep, conn); + continue; + } + + found_ct = nf_ct_tuplehash_to_ctrack(found); + + if (nf_ct_tuple_equal(&conn->tuple, tuple)) { + /* + * Just to be sure we have it only once in the list. + * We should not see tuples twice unless someone hooks + * this into a table without "-p tcp --syn". + */ + *addit = false; + } else if (already_closed(found_ct)) { + /* + * we do not care about connections which are + * closed already -> ditch it + */ + nf_ct_put(found_ct); + hlist_del(&conn->node); + kmem_cache_free(conncount_conn_cachep, conn); + continue; + } + + nf_ct_put(found_ct); + length++; + } + + return length; +} + +static void tree_nodes_free(struct rb_root *root, + struct nf_conncount_rb *gc_nodes[], + unsigned int gc_count) +{ + struct nf_conncount_rb *rbconn; + + while (gc_count) { + rbconn = gc_nodes[--gc_count]; + rb_erase(&rbconn->node, root); + kmem_cache_free(conncount_rb_cachep, rbconn); + } +} + +static unsigned int +count_tree(struct net *net, struct rb_root *root, + const u32 *key, u8 keylen, + u8 family, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) +{ + struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; + struct rb_node **rbnode, *parent; + struct nf_conncount_rb *rbconn; + struct nf_conncount_tuple *conn; + unsigned int gc_count; + bool no_gc = false; + + restart: + gc_count = 0; + parent = NULL; + rbnode = &(root->rb_node); + while (*rbnode) { + int diff; + bool addit; + + rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); + + parent = *rbnode; + diff = key_diff(key, rbconn->key, keylen); + if (diff < 0) { + rbnode = &((*rbnode)->rb_left); + } else if (diff > 0) { + rbnode = &((*rbnode)->rb_right); + } else { + /* same source network -> be counted! */ + unsigned int count; + count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit); + + tree_nodes_free(root, gc_nodes, gc_count); + if (!addit) + return count; + + if (!add_hlist(&rbconn->hhead, tuple)) + return 0; /* hotdrop */ + + return count + 1; + } + + if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) + continue; + + /* only used for GC on hhead, retval and 'addit' ignored */ + check_hlist(net, &rbconn->hhead, tuple, zone, &addit); + if (hlist_empty(&rbconn->hhead)) + gc_nodes[gc_count++] = rbconn; + } + + if (gc_count) { + no_gc = true; + tree_nodes_free(root, gc_nodes, gc_count); + /* tree_node_free before new allocation permits + * allocator to re-use newly free'd object. + * + * This is a rare event; in most cases we will find + * existing node to re-use. (or gc_count is 0). + */ + goto restart; + } + + /* no match, need to insert new node */ + rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); + if (rbconn == NULL) + return 0; + + conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); + if (conn == NULL) { + kmem_cache_free(conncount_rb_cachep, rbconn); + return 0; + } + + conn->tuple = *tuple; + memcpy(rbconn->key, key, sizeof(u32) * keylen); + + INIT_HLIST_HEAD(&rbconn->hhead); + hlist_add_head(&conn->node, &rbconn->hhead); + + rb_link_node(&rbconn->node, parent, rbnode); + rb_insert_color(&rbconn->node, root); + return 1; +} + +unsigned int nf_conncount_count(struct net *net, + struct nf_conncount_data *data, + const u32 *key, + unsigned int family, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) +{ + struct rb_root *root; + int count; + u32 hash; + + hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; + root = &data->root[hash]; + + spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + + count = count_tree(net, root, key, data->keylen, family, tuple, zone); + + spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + + return count; +} +EXPORT_SYMBOL_GPL(nf_conncount_count); + +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, + unsigned int keylen) +{ + struct nf_conncount_data *data; + int ret, i; + + if (keylen % sizeof(u32) || + keylen / sizeof(u32) > MAX_KEYLEN || + keylen == 0) + return ERR_PTR(-EINVAL); + + net_get_random_once(&conncount_rnd, sizeof(conncount_rnd)); + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + ret = nf_ct_netns_get(net, family); + if (ret < 0) { + kfree(data); + return ERR_PTR(ret); + } + + for (i = 0; i < ARRAY_SIZE(data->root); ++i) + data->root[i] = RB_ROOT; + + data->keylen = keylen / sizeof(u32); + + return data; +} +EXPORT_SYMBOL_GPL(nf_conncount_init); + +static void destroy_tree(struct rb_root *r) +{ + struct nf_conncount_tuple *conn; + struct nf_conncount_rb *rbconn; + struct hlist_node *n; + struct rb_node *node; + + while ((node = rb_first(r)) != NULL) { + rbconn = rb_entry(node, struct nf_conncount_rb, node); + + rb_erase(node, r); + + hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) + kmem_cache_free(conncount_conn_cachep, conn); + + kmem_cache_free(conncount_rb_cachep, rbconn); + } +} + +void nf_conncount_destroy(struct net *net, unsigned int family, + struct nf_conncount_data *data) +{ + unsigned int i; + + nf_ct_netns_put(net, family); + + for (i = 0; i < ARRAY_SIZE(data->root); ++i) + destroy_tree(&data->root[i]); + + kfree(data); +} +EXPORT_SYMBOL_GPL(nf_conncount_destroy); + +static int __init nf_conncount_modinit(void) +{ + int i; + + BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS); + BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0); + + for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i) + spin_lock_init(&nf_conncount_locks[i]); + + conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", + sizeof(struct nf_conncount_tuple), + 0, 0, NULL); + if (!conncount_conn_cachep) + return -ENOMEM; + + conncount_rb_cachep = kmem_cache_create("nf_conncount_rb", + sizeof(struct nf_conncount_rb), + 0, 0, NULL); + if (!conncount_rb_cachep) { + kmem_cache_destroy(conncount_conn_cachep); + return -ENOMEM; + } + + return 0; +} + +static void __exit nf_conncount_modexit(void) +{ + kmem_cache_destroy(conncount_conn_cachep); + kmem_cache_destroy(conncount_rb_cachep); +} + +module_init(nf_conncount_modinit); +module_exit(nf_conncount_modexit); +MODULE_AUTHOR("Jan Engelhardt "); +MODULE_AUTHOR("Florian Westphal "); +MODULE_DESCRIPTION("netfilter: count number of connections matching a key"); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index a6214f235333..b1b17b9353e1 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -12,292 +12,30 @@ * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include + #include -#include #include -#include -#include #include #include + #include #include #include #include - -#define CONNLIMIT_SLOTS 256U - -#ifdef CONFIG_LOCKDEP -#define CONNLIMIT_LOCK_SLOTS 8U -#else -#define CONNLIMIT_LOCK_SLOTS 256U -#endif - -#define CONNLIMIT_GC_MAX_NODES 8 - -/* we will save the tuples of all connections we care about */ -struct xt_connlimit_conn { - struct hlist_node node; - struct nf_conntrack_tuple tuple; -}; - -struct xt_connlimit_rb { - struct rb_node node; - struct hlist_head hhead; /* connections/hosts in same subnet */ - union nf_inet_addr addr; /* search key */ -}; - -static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp; - -struct xt_connlimit_data { - struct rb_root climit_root[CONNLIMIT_SLOTS]; -}; - -static u_int32_t connlimit_rnd __read_mostly; -static struct kmem_cache *connlimit_rb_cachep __read_mostly; -static struct kmem_cache *connlimit_conn_cachep __read_mostly; - -static inline unsigned int connlimit_iphash(__be32 addr) -{ - return jhash_1word((__force __u32)addr, - connlimit_rnd) % CONNLIMIT_SLOTS; -} - -static inline unsigned int -connlimit_iphash6(const union nf_inet_addr *addr) -{ - return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), - connlimit_rnd) % CONNLIMIT_SLOTS; -} - -static inline bool already_closed(const struct nf_conn *conn) -{ - if (nf_ct_protonum(conn) == IPPROTO_TCP) - return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || - conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; - else - return 0; -} - -static int -same_source(const union nf_inet_addr *addr, - const union nf_inet_addr *u3, u_int8_t family) -{ - if (family == NFPROTO_IPV4) - return ntohl(addr->ip) - ntohl(u3->ip); - - return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6)); -} - -static bool add_hlist(struct hlist_head *head, - const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr) -{ - struct xt_connlimit_conn *conn; - - conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); - if (conn == NULL) - return false; - conn->tuple = *tuple; - hlist_add_head(&conn->node, head); - return true; -} - -static unsigned int check_hlist(struct net *net, - struct hlist_head *head, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit) -{ - const struct nf_conntrack_tuple_hash *found; - struct xt_connlimit_conn *conn; - struct hlist_node *n; - struct nf_conn *found_ct; - unsigned int length = 0; - - *addit = true; - - /* check the saved connections */ - hlist_for_each_entry_safe(conn, n, head, node) { - found = nf_conntrack_find_get(net, zone, &conn->tuple); - if (found == NULL) { - hlist_del(&conn->node); - kmem_cache_free(connlimit_conn_cachep, conn); - continue; - } - - found_ct = nf_ct_tuplehash_to_ctrack(found); - - if (nf_ct_tuple_equal(&conn->tuple, tuple)) { - /* - * Just to be sure we have it only once in the list. - * We should not see tuples twice unless someone hooks - * this into a table without "-p tcp --syn". - */ - *addit = false; - } else if (already_closed(found_ct)) { - /* - * we do not care about connections which are - * closed already -> ditch it - */ - nf_ct_put(found_ct); - hlist_del(&conn->node); - kmem_cache_free(connlimit_conn_cachep, conn); - continue; - } - - nf_ct_put(found_ct); - length++; - } - - return length; -} - -static void tree_nodes_free(struct rb_root *root, - struct xt_connlimit_rb *gc_nodes[], - unsigned int gc_count) -{ - struct xt_connlimit_rb *rbconn; - - while (gc_count) { - rbconn = gc_nodes[--gc_count]; - rb_erase(&rbconn->node, root); - kmem_cache_free(connlimit_rb_cachep, rbconn); - } -} - -static unsigned int -count_tree(struct net *net, struct rb_root *root, - const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr, - u8 family, const struct nf_conntrack_zone *zone) -{ - struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; - struct rb_node **rbnode, *parent; - struct xt_connlimit_rb *rbconn; - struct xt_connlimit_conn *conn; - unsigned int gc_count; - bool no_gc = false; - - restart: - gc_count = 0; - parent = NULL; - rbnode = &(root->rb_node); - while (*rbnode) { - int diff; - bool addit; - - rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node); - - parent = *rbnode; - diff = same_source(addr, &rbconn->addr, family); - if (diff < 0) { - rbnode = &((*rbnode)->rb_left); - } else if (diff > 0) { - rbnode = &((*rbnode)->rb_right); - } else { - /* same source network -> be counted! */ - unsigned int count; - count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit); - - tree_nodes_free(root, gc_nodes, gc_count); - if (!addit) - return count; - - if (!add_hlist(&rbconn->hhead, tuple, addr)) - return 0; /* hotdrop */ - - return count + 1; - } - - if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) - continue; - - /* only used for GC on hhead, retval and 'addit' ignored */ - check_hlist(net, &rbconn->hhead, tuple, zone, &addit); - if (hlist_empty(&rbconn->hhead)) - gc_nodes[gc_count++] = rbconn; - } - - if (gc_count) { - no_gc = true; - tree_nodes_free(root, gc_nodes, gc_count); - /* tree_node_free before new allocation permits - * allocator to re-use newly free'd object. - * - * This is a rare event; in most cases we will find - * existing node to re-use. (or gc_count is 0). - */ - goto restart; - } - - /* no match, need to insert new node */ - rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC); - if (rbconn == NULL) - return 0; - - conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); - if (conn == NULL) { - kmem_cache_free(connlimit_rb_cachep, rbconn); - return 0; - } - - conn->tuple = *tuple; - rbconn->addr = *addr; - - INIT_HLIST_HEAD(&rbconn->hhead); - hlist_add_head(&conn->node, &rbconn->hhead); - - rb_link_node(&rbconn->node, parent, rbnode); - rb_insert_color(&rbconn->node, root); - return 1; -} - -static int count_them(struct net *net, - struct xt_connlimit_data *data, - const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr, - u_int8_t family, - const struct nf_conntrack_zone *zone) -{ - struct rb_root *root; - int count; - u32 hash; - - if (family == NFPROTO_IPV6) - hash = connlimit_iphash6(addr); - else - hash = connlimit_iphash(addr->ip); - root = &data->climit_root[hash]; - - spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); - - count = count_tree(net, root, tuple, addr, family, zone); - - spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); - - return count; -} +#include static bool connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = xt_net(par); const struct xt_connlimit_info *info = par->matchinfo; - union nf_inet_addr addr; struct nf_conntrack_tuple tuple; const struct nf_conntrack_tuple *tuple_ptr = &tuple; const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int connections; + u32 key[5]; ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) { @@ -310,6 +48,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); + union nf_inet_addr addr; unsigned int i; memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? @@ -317,22 +56,24 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i) addr.ip6[i] &= info->mask.ip6[i]; + memcpy(key, &addr, sizeof(addr.ip6)); + key[4] = zone->id; } else { const struct iphdr *iph = ip_hdr(skb); - addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? + key[0] = (info->flags & XT_CONNLIMIT_DADDR) ? iph->daddr : iph->saddr; - addr.ip &= info->mask.ip; + key[0] &= info->mask.ip; + key[1] = zone->id; } - connections = count_them(net, info->data, tuple_ptr, &addr, - xt_family(par), zone); + connections = nf_conncount_count(net, info->data, key, + xt_family(par), tuple_ptr, zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; - return (connections > info->limit) ^ - !!(info->flags & XT_CONNLIMIT_INVERT); + return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT); hotdrop: par->hotdrop = true; @@ -342,61 +83,27 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) static int connlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_connlimit_info *info = par->matchinfo; - unsigned int i; - int ret; + unsigned int keylen; - net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); - - ret = nf_ct_netns_get(par->net, par->family); - if (ret < 0) { - pr_info("cannot load conntrack support for " - "address family %u\n", par->family); - return ret; - } + keylen = sizeof(u32); + if (par->family == NFPROTO_IPV6) + keylen += sizeof(struct in6_addr); + else + keylen += sizeof(struct in_addr); /* init private data */ - info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); - if (info->data == NULL) { - nf_ct_netns_put(par->net, par->family); - return -ENOMEM; - } - - for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i) - info->data->climit_root[i] = RB_ROOT; + info->data = nf_conncount_init(par->net, par->family, keylen); + if (IS_ERR(info->data)) + return PTR_ERR(info->data); return 0; } -static void destroy_tree(struct rb_root *r) -{ - struct xt_connlimit_conn *conn; - struct xt_connlimit_rb *rbconn; - struct hlist_node *n; - struct rb_node *node; - - while ((node = rb_first(r)) != NULL) { - rbconn = rb_entry(node, struct xt_connlimit_rb, node); - - rb_erase(node, r); - - hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) - kmem_cache_free(connlimit_conn_cachep, conn); - - kmem_cache_free(connlimit_rb_cachep, rbconn); - } -} - static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; - unsigned int i; - - nf_ct_netns_put(par->net, par->family); - - for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i) - destroy_tree(&info->data->climit_root[i]); - kfree(info->data); + nf_conncount_destroy(par->net, par->family, info->data); } static struct xt_match connlimit_mt_reg __read_mostly = { @@ -413,40 +120,12 @@ static struct xt_match connlimit_mt_reg __read_mostly = { static int __init connlimit_mt_init(void) { - int ret, i; - - BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS); - BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0); - - for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i) - spin_lock_init(&xt_connlimit_locks[i]); - - connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn", - sizeof(struct xt_connlimit_conn), - 0, 0, NULL); - if (!connlimit_conn_cachep) - return -ENOMEM; - - connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb", - sizeof(struct xt_connlimit_rb), - 0, 0, NULL); - if (!connlimit_rb_cachep) { - kmem_cache_destroy(connlimit_conn_cachep); - return -ENOMEM; - } - ret = xt_register_match(&connlimit_mt_reg); - if (ret != 0) { - kmem_cache_destroy(connlimit_conn_cachep); - kmem_cache_destroy(connlimit_rb_cachep); - } - return ret; + return xt_register_match(&connlimit_mt_reg); } static void __exit connlimit_mt_exit(void) { xt_unregister_match(&connlimit_mt_reg); - kmem_cache_destroy(connlimit_conn_cachep); - kmem_cache_destroy(connlimit_rb_cachep); } module_init(connlimit_mt_init); -- cgit v1.2.3 From 0befd061af59c4ba426588930f09eb9ea2475534 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Jan 2018 12:50:12 +0100 Subject: netfilter: nf_tables: remove nft_dereference() This macro is unnecessary, it just hides details for one single caller. nfnl_dereference() is just enough. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 --- net/netfilter/nf_tables_api.c | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e040b6151acc..e3ec02fd0f67 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1113,9 +1113,6 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, void nft_trace_notify(struct nft_traceinfo *info); -#define nft_dereference(p) \ - nfnl_dereference(p, NFNL_SUBSYS_NFTABLES) - #define MODULE_ALIAS_NFT_FAMILY(family) \ MODULE_ALIAS("nft-afinfo-" __stringify(family)) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 15773a3189ce..fa564dac66a2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1222,13 +1222,13 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) static void nft_chain_stats_replace(struct nft_base_chain *chain, struct nft_stats __percpu *newstats) { + struct nft_stats __percpu *oldstats; + if (newstats == NULL) return; if (chain->stats) { - struct nft_stats __percpu *oldstats = - nft_dereference(chain->stats); - + oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES); rcu_assign_pointer(chain->stats, newstats); synchronize_rcu(); free_percpu(oldstats); -- cgit v1.2.3 From 3b49e2e94e6ebb8b23d0955d9e898254455734f8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Jan 2018 01:04:07 +0100 Subject: netfilter: nf_tables: add flow table netlink frontend This patch introduces a netlink control plane to create, delete and dump flow tables. Flow tables are identified by name, this name is used from rules to refer to an specific flow table. Flow tables use the rhashtable class and a generic garbage collector to remove expired entries. This also adds the infrastructure to add different flow table types, so we can add one for each layer 3 protocol family. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 23 + include/net/netfilter/nf_tables.h | 48 ++ include/uapi/linux/netfilter/nf_tables.h | 53 +++ net/netfilter/nf_tables_api.c | 747 ++++++++++++++++++++++++++++++- 4 files changed, 870 insertions(+), 1 deletion(-) create mode 100644 include/net/netfilter/nf_flow_table.h (limited to 'include/net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h new file mode 100644 index 000000000000..3a0779589281 --- /dev/null +++ b/include/net/netfilter/nf_flow_table.h @@ -0,0 +1,23 @@ +#ifndef _NF_FLOW_TABLE_H +#define _NF_FLOW_TABLE_H + +#include + +struct nf_flowtable; + +struct nf_flowtable_type { + struct list_head list; + int family; + void (*gc)(struct work_struct *work); + const struct rhashtable_params *params; + nf_hookfn *hook; + struct module *owner; +}; + +struct nf_flowtable { + struct rhashtable rhashtable; + const struct nf_flowtable_type *type; + struct delayed_work gc_work; +}; + +#endif /* _FLOW_OFFLOAD_H */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e3ec02fd0f67..dd238950df81 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #define NFT_JUMP_STACK_SIZE 16 @@ -943,6 +944,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @chains: chains in the table * @sets: sets in the table * @objects: stateful objects in the table + * @flowtables: flow tables in the table * @hgenerator: handle generator state * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) @@ -954,6 +956,7 @@ struct nft_table { struct list_head chains; struct list_head sets; struct list_head objects; + struct list_head flowtables; u64 hgenerator; u32 use; u16 flags:14, @@ -1084,6 +1087,44 @@ struct nft_object_ops { int nft_register_obj(struct nft_object_type *obj_type); void nft_unregister_obj(struct nft_object_type *obj_type); +/** + * struct nft_flowtable - nf_tables flow table + * + * @list: flow table list node in table list + * @table: the table the flow table is contained in + * @name: name of this flow table + * @hooknum: hook number + * @priority: hook priority + * @ops_len: number of hooks in array + * @genmask: generation mask + * @use: number of references to this flow table + * @data: rhashtable and garbage collector + * @ops: array of hooks + */ +struct nft_flowtable { + struct list_head list; + struct nft_table *table; + char *name; + int hooknum; + int priority; + int ops_len; + u32 genmask:2, + use:30; + /* runtime data below here */ + struct nf_hook_ops *ops ____cacheline_aligned; + struct nf_flowtable data; +}; + +struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, + const struct nlattr *nla, + u8 genmask); +void nft_flow_table_iterate(struct net *net, + void (*iter)(struct nf_flowtable *flowtable, void *data), + void *data); + +void nft_register_flowtable_type(struct nf_flowtable_type *type); +void nft_unregister_flowtable_type(struct nf_flowtable_type *type); + /** * struct nft_traceinfo - nft tracing information and state * @@ -1317,4 +1358,11 @@ struct nft_trans_obj { #define nft_trans_obj(trans) \ (((struct nft_trans_obj *)trans->data)->obj) +struct nft_trans_flowtable { + struct nft_flowtable *flowtable; +}; + +#define nft_trans_flowtable(trans) \ + (((struct nft_trans_flowtable *)trans->data)->flowtable) + #endif /* _NET_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2efbf9744c2a..591b53bce070 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -92,6 +92,9 @@ enum nft_verdicts { * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes) + * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes) + * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -116,6 +119,9 @@ enum nf_tables_msg_types { NFT_MSG_GETOBJ, NFT_MSG_DELOBJ, NFT_MSG_GETOBJ_RESET, + NFT_MSG_NEWFLOWTABLE, + NFT_MSG_GETFLOWTABLE, + NFT_MSG_DELFLOWTABLE, NFT_MSG_MAX, }; @@ -1309,6 +1315,53 @@ enum nft_object_attributes { }; #define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) +/** + * enum nft_flowtable_attributes - nf_tables flow table netlink attributes + * + * @NFTA_FLOWTABLE_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING) + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) + */ +enum nft_flowtable_attributes { + NFTA_FLOWTABLE_UNSPEC, + NFTA_FLOWTABLE_TABLE, + NFTA_FLOWTABLE_NAME, + NFTA_FLOWTABLE_HOOK, + NFTA_FLOWTABLE_USE, + __NFTA_FLOWTABLE_MAX +}; +#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) + +/** + * enum nft_flowtable_hook_attributes - nf_tables flow table hook netlink attributes + * + * @NFTA_FLOWTABLE_HOOK_NUM: netfilter hook number (NLA_U32) + * @NFTA_FLOWTABLE_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFTA_FLOWTABLE_HOOK_DEVS: input devices this flow table is bound to (NLA_NESTED) + */ +enum nft_flowtable_hook_attributes { + NFTA_FLOWTABLE_HOOK_UNSPEC, + NFTA_FLOWTABLE_HOOK_NUM, + NFTA_FLOWTABLE_HOOK_PRIORITY, + NFTA_FLOWTABLE_HOOK_DEVS, + __NFTA_FLOWTABLE_HOOK_MAX +}; +#define NFTA_FLOWTABLE_HOOK_MAX (__NFTA_FLOWTABLE_HOOK_MAX - 1) + +/** + * enum nft_device_attributes - nf_tables device netlink attributes + * + * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) + */ +enum nft_devices_attributes { + NFTA_DEVICE_UNSPEC, + NFTA_DEVICE_NAME, + __NFTA_DEVICE_MAX +}; +#define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) + + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fa564dac66a2..db0933256ec9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,7 @@ static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); +static LIST_HEAD(nf_tables_flowtables); /** * nft_register_afinfo - register nf_tables address family info @@ -345,6 +347,40 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } +static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, + sizeof(struct nft_trans_flowtable)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWFLOWTABLE) + nft_activate_next(ctx->net, flowtable); + + nft_trans_flowtable(trans) = flowtable; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + +static int nft_delflowtable(struct nft_ctx *ctx, + struct nft_flowtable *flowtable) +{ + int err; + + err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (err < 0) + return err; + + nft_deactivate_next(ctx->net, flowtable); + ctx->table->use--; + + return err; +} + /* * Tables */ @@ -728,6 +764,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); + INIT_LIST_HEAD(&table->flowtables); table->flags = flags; nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); @@ -749,10 +786,11 @@ err1: static int nft_flush_table(struct nft_ctx *ctx) { - int err; + struct nft_flowtable *flowtable, *nft; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; struct nft_set *set, *ns; + int err; list_for_each_entry(chain, &ctx->table->chains, list) { if (!nft_is_active_next(ctx->net, chain)) @@ -778,6 +816,12 @@ static int nft_flush_table(struct nft_ctx *ctx) goto out; } + list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) { + err = nft_delflowtable(ctx, flowtable); + if (err < 0) + goto out; + } + list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) { err = nft_delobj(ctx, obj); if (err < 0) @@ -4839,6 +4883,605 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx, ctx->afi->family, ctx->report, GFP_KERNEL); } +/* + * Flow tables + */ +void nft_register_flowtable_type(struct nf_flowtable_type *type) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail_rcu(&type->list, &nf_tables_flowtables); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_register_flowtable_type); + +void nft_unregister_flowtable_type(struct nf_flowtable_type *type) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del_rcu(&type->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type); + +static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = { + [NFTA_FLOWTABLE_TABLE] = { .type = NLA_STRING, + .len = NFT_NAME_MAXLEN - 1 }, + [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING, + .len = NFT_NAME_MAXLEN - 1 }, + [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED }, +}; + +struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, + const struct nlattr *nla, + u8 genmask) +{ + struct nft_flowtable *flowtable; + + list_for_each_entry(flowtable, &table->flowtables, list) { + if (!nla_strcmp(nla, flowtable->name) && + nft_active_genmask(flowtable, genmask)) + return flowtable; + } + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup); + +#define NFT_FLOWTABLE_DEVICE_MAX 8 + +static int nf_tables_parse_devices(const struct nft_ctx *ctx, + const struct nlattr *attr, + struct net_device *dev_array[], int *len) +{ + const struct nlattr *tmp; + struct net_device *dev; + char ifname[IFNAMSIZ]; + int rem, n = 0, err; + + nla_for_each_nested(tmp, attr, rem) { + if (nla_type(tmp) != NFTA_DEVICE_NAME) { + err = -EINVAL; + goto err1; + } + + nla_strlcpy(ifname, tmp, IFNAMSIZ); + dev = dev_get_by_name(ctx->net, ifname); + if (!dev) { + err = -ENOENT; + goto err1; + } + + dev_array[n++] = dev; + if (n == NFT_FLOWTABLE_DEVICE_MAX) { + err = -EFBIG; + goto err1; + } + } + if (!len) + return -EINVAL; + + err = 0; +err1: + *len = n; + return err; +} + +static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = { + [NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 }, + [NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 }, + [NFTA_FLOWTABLE_HOOK_DEVS] = { .type = NLA_NESTED }, +}; + +static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, + const struct nlattr *attr, + struct nft_flowtable *flowtable) +{ + struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX]; + struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1]; + struct nf_hook_ops *ops; + int hooknum, priority; + int err, n = 0, i; + + err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr, + nft_flowtable_hook_policy, NULL); + if (err < 0) + return err; + + if (!tb[NFTA_FLOWTABLE_HOOK_NUM] || + !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] || + !tb[NFTA_FLOWTABLE_HOOK_DEVS]) + return -EINVAL; + + hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM])); + if (hooknum >= ctx->afi->nhooks) + return -EINVAL; + + priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY])); + + err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS], + dev_array, &n); + if (err < 0) + goto err1; + + ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL); + if (!ops) { + err = -ENOMEM; + goto err1; + } + + flowtable->ops = ops; + flowtable->ops_len = n; + + for (i = 0; i < n; i++) { + flowtable->ops[i].pf = NFPROTO_NETDEV; + flowtable->ops[i].hooknum = hooknum; + flowtable->ops[i].priority = priority; + flowtable->ops[i].priv = &flowtable->data.rhashtable; + flowtable->ops[i].hook = flowtable->data.type->hook; + flowtable->ops[i].dev = dev_array[i]; + } + + err = 0; +err1: + for (i = 0; i < n; i++) + dev_put(dev_array[i]); + + return err; +} + +static const struct nf_flowtable_type * +__nft_flowtable_type_get(const struct nft_af_info *afi) +{ + const struct nf_flowtable_type *type; + + list_for_each_entry(type, &nf_tables_flowtables, list) { + if (afi->family == type->family) + return type; + } + return NULL; +} + +static const struct nf_flowtable_type * +nft_flowtable_type_get(const struct nft_af_info *afi) +{ + const struct nf_flowtable_type *type; + + type = __nft_flowtable_type_get(afi); + if (type != NULL && try_module_get(type->owner)) + return type; + +#ifdef CONFIG_MODULES + if (type == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nf-flowtable-%u", afi->family); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_flowtable_type_get(afi)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +void nft_flow_table_iterate(struct net *net, + void (*iter)(struct nf_flowtable *flowtable, void *data), + void *data) +{ + struct nft_flowtable *flowtable; + const struct nft_af_info *afi; + const struct nft_table *table; + + rcu_read_lock(); + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + list_for_each_entry_rcu(table, &afi->tables, list) { + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { + iter(&flowtable->data, data); + } + } + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nft_flow_table_iterate); + +static void nft_unregister_flowtable_net_hooks(struct net *net, + struct nft_flowtable *flowtable) +{ + int i; + + for (i = 0; i < flowtable->ops_len; i++) { + if (!flowtable->ops[i].dev) + continue; + + nf_unregister_net_hook(net, &flowtable->ops[i]); + } +} + +static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nf_flowtable_type *type; + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_ctx ctx; + int err, i, k; + + if (!nla[NFTA_FLOWTABLE_TABLE] || + !nla[NFTA_FLOWTABLE_NAME] || + !nla[NFTA_FLOWTABLE_HOOK]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + genmask); + if (IS_ERR(flowtable)) { + err = PTR_ERR(flowtable); + if (err != -ENOENT) + return err; + } else { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + return 0; + } + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); + if (!flowtable) + return -ENOMEM; + + flowtable->table = table; + flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL); + if (!flowtable->name) { + err = -ENOMEM; + goto err1; + } + + type = nft_flowtable_type_get(afi); + if (IS_ERR(type)) { + err = PTR_ERR(type); + goto err2; + } + + flowtable->data.type = type; + err = rhashtable_init(&flowtable->data.rhashtable, type->params); + if (err < 0) + goto err3; + + err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], + flowtable); + if (err < 0) + goto err3; + + for (i = 0; i < flowtable->ops_len; i++) { + err = nf_register_net_hook(net, &flowtable->ops[i]); + if (err < 0) + goto err4; + } + + err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (err < 0) + goto err5; + + INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc); + queue_delayed_work(system_power_efficient_wq, + &flowtable->data.gc_work, HZ); + + list_add_tail_rcu(&flowtable->list, &table->flowtables); + table->use++; + + return 0; +err5: + i = flowtable->ops_len; +err4: + for (k = i - 1; k >= 0; k--) + nf_unregister_net_hook(net, &flowtable->ops[i]); + + kfree(flowtable->ops); +err3: + module_put(type->owner); +err2: + kfree(flowtable->name); +err1: + kfree(flowtable); + return err; +} + +static int nf_tables_delflowtable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_ctx ctx; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + genmask); + if (IS_ERR(flowtable)) + return PTR_ERR(flowtable); + if (flowtable->use > 0) + return -EBUSY; + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + + return nft_delflowtable(&ctx, flowtable); +} + +static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, + u32 flags, int family, + struct nft_flowtable *flowtable) +{ + struct nlattr *nest, *nest_devs; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + int i; + + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); + + if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || + nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || + nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use))) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); + if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || + nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) + goto nla_put_failure; + + nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS); + if (!nest_devs) + goto nla_put_failure; + + for (i = 0; i < flowtable->ops_len; i++) { + if (flowtable->ops[i].dev && + nla_put_string(skb, NFTA_DEVICE_NAME, + flowtable->ops[i].dev->name)) + goto nla_put_failure; + } + nla_nest_end(skb, nest_devs); + nla_nest_end(skb, nest); + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +struct nft_flowtable_filter { + char *table; +}; + +static int nf_tables_dump_flowtable(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nft_flowtable_filter *filter = cb->data; + unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; + const struct nft_af_info *afi; + const struct nft_table *table; + + rcu_read_lock(); + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry_rcu(table, &afi->tables, list) { + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { + if (!nft_is_active(net, flowtable)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (filter && filter->table[0] && + strcmp(filter->table, table->name)) + goto cont; + + if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWFLOWTABLE, + NLM_F_MULTI | NLM_F_APPEND, + afi->family, flowtable) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } + } +done: + rcu_read_unlock(); + + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_dump_flowtable_done(struct netlink_callback *cb) +{ + struct nft_flowtable_filter *filter = cb->data; + + if (!filter) + return 0; + + kfree(filter->table); + kfree(filter); + + return 0; +} + +static struct nft_flowtable_filter * +nft_flowtable_filter_alloc(const struct nlattr * const nla[]) +{ + struct nft_flowtable_filter *filter; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return ERR_PTR(-ENOMEM); + + if (nla[NFTA_FLOWTABLE_TABLE]) { + filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE], + GFP_KERNEL); + if (!filter->table) { + kfree(filter); + return ERR_PTR(-ENOMEM); + } + } + return filter; +} + +static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; + const struct nft_af_info *afi; + const struct nft_table *table; + struct sk_buff *skb2; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_flowtable, + .done = nf_tables_dump_flowtable_done, + }; + + if (nla[NFTA_FLOWTABLE_TABLE]) { + struct nft_flowtable_filter *filter; + + filter = nft_flowtable_filter_alloc(nla); + if (IS_ERR(filter)) + return -ENOMEM; + + c.data = filter; + } + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + if (!nla[NFTA_FLOWTABLE_NAME]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + genmask); + if (IS_ERR(table)) + return PTR_ERR(flowtable); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFT_MSG_NEWFLOWTABLE, 0, family, + flowtable); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); +err: + kfree_skb(skb2); + return err; +} + +static void nf_tables_flowtable_notify(struct nft_ctx *ctx, + struct nft_flowtable *flowtable, + int event) +{ + struct sk_buff *skb; + int err; + + if (ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) + return; + + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, + ctx->seq, event, 0, + ctx->afi->family, flowtable); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); + return; +err: + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); +} + +static void nft_flowtable_destroy(void *ptr, void *arg) +{ + kfree(ptr); +} + +static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) +{ + cancel_delayed_work_sync(&flowtable->data.gc_work); + kfree(flowtable->name); + rhashtable_free_and_destroy(&flowtable->data.rhashtable, + nft_flowtable_destroy, NULL); + module_put(flowtable->data.type->owner); +} + static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { @@ -4869,6 +5512,49 @@ nla_put_failure: return -EMSGSIZE; } +static void nft_flowtable_event(unsigned long event, struct net_device *dev, + struct nft_flowtable *flowtable) +{ + int i; + + for (i = 0; i < flowtable->ops_len; i++) { + if (flowtable->ops[i].dev != dev) + continue; + + nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]); + flowtable->ops[i].dev = NULL; + break; + } +} + +static int nf_tables_flowtable_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_flowtable *flowtable; + struct nft_table *table; + struct nft_af_info *afi; + + if (event != NETDEV_UNREGISTER) + return 0; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(flowtable, &table->flowtables, list) { + nft_flowtable_event(event, dev, flowtable); + } + } + } + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + + return NOTIFY_DONE; +} + +static struct notifier_block nf_tables_flowtable_notifier = { + .notifier_call = nf_tables_flowtable_event, +}; + static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event) { @@ -5021,6 +5707,21 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, + [NFT_MSG_NEWFLOWTABLE] = { + .call_batch = nf_tables_newflowtable, + .attr_count = NFTA_FLOWTABLE_MAX, + .policy = nft_flowtable_policy, + }, + [NFT_MSG_GETFLOWTABLE] = { + .call = nf_tables_getflowtable, + .attr_count = NFTA_FLOWTABLE_MAX, + .policy = nft_flowtable_policy, + }, + [NFT_MSG_DELFLOWTABLE] = { + .call_batch = nf_tables_delflowtable, + .attr_count = NFTA_FLOWTABLE_MAX, + .policy = nft_flowtable_policy, + }, }; static void nft_chain_commit_update(struct nft_trans *trans) @@ -5066,6 +5767,9 @@ static void nf_tables_commit_release(struct nft_trans *trans) case NFT_MSG_DELOBJ: nft_obj_destroy(nft_trans_obj(trans)); break; + case NFT_MSG_DELFLOWTABLE: + nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); + break; } kfree(trans); } @@ -5183,6 +5887,21 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), NFT_MSG_DELOBJ); break; + case NFT_MSG_NEWFLOWTABLE: + nft_clear(net, nft_trans_flowtable(trans)); + nf_tables_flowtable_notify(&trans->ctx, + nft_trans_flowtable(trans), + NFT_MSG_NEWFLOWTABLE); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELFLOWTABLE: + list_del_rcu(&nft_trans_flowtable(trans)->list); + nf_tables_flowtable_notify(&trans->ctx, + nft_trans_flowtable(trans), + NFT_MSG_DELFLOWTABLE); + nft_unregister_flowtable_net_hooks(net, + nft_trans_flowtable(trans)); + break; } } @@ -5220,6 +5939,9 @@ static void nf_tables_abort_release(struct nft_trans *trans) case NFT_MSG_NEWOBJ: nft_obj_destroy(nft_trans_obj(trans)); break; + case NFT_MSG_NEWFLOWTABLE: + nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); + break; } kfree(trans); } @@ -5309,6 +6031,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; + case NFT_MSG_NEWFLOWTABLE: + trans->ctx.table->use--; + list_del_rcu(&nft_trans_flowtable(trans)->list); + nft_unregister_flowtable_net_hooks(net, + nft_trans_flowtable(trans)); + break; + case NFT_MSG_DELFLOWTABLE: + trans->ctx.table->use++; + nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); + nft_trans_destroy(trans); + break; } } @@ -5865,6 +6598,7 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain); /* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) { + struct nft_flowtable *flowtable, *nf; struct nft_table *table, *nt; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; @@ -5878,6 +6612,9 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) list_for_each_entry_safe(table, nt, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) nf_tables_unregister_hook(net, table, chain); + list_for_each_entry(flowtable, &table->flowtables, list) + nf_unregister_net_hooks(net, flowtable->ops, + flowtable->ops_len); /* No packets are walking on these chains anymore. */ ctx.table = table; list_for_each_entry(chain, &table->chains, list) { @@ -5888,6 +6625,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) nf_tables_rule_destroy(&ctx, rule); } } + list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { + list_del(&flowtable->list); + table->use--; + nf_tables_flowtable_destroy(flowtable); + } list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); table->use--; @@ -5932,6 +6674,8 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err3; + register_netdevice_notifier(&nf_tables_flowtable_notifier); + pr_info("nf_tables: (c) 2007-2009 Patrick McHardy \n"); return register_pernet_subsys(&nf_tables_net_ops); err3: @@ -5946,6 +6690,7 @@ static void __exit nf_tables_module_exit(void) { unregister_pernet_subsys(&nf_tables_net_ops); nfnetlink_subsys_unregister(&nf_tables_subsys); + unregister_netdevice_notifier(&nf_tables_flowtable_notifier); rcu_barrier(); nf_tables_core_module_exit(); kfree(info); -- cgit v1.2.3 From ac2a66665e231847cab11b8c8e844ce43207dd2e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Jan 2018 01:04:11 +0100 Subject: netfilter: add generic flow table infrastructure This patch defines the API to interact with flow tables, this allows to add, delete and lookup for entries in the flow table. This also adds the generic garbage code that removes entries that have expired, ie. no traffic has been seen for a while. Users of the flow table infrastructure can delete entries via flow_offload_dead(), which sets the dying bit, this signals the garbage collector to release an entry from user context. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 94 ++++++++ net/netfilter/Kconfig | 7 + net/netfilter/Makefile | 3 + net/netfilter/nf_flow_table.c | 429 ++++++++++++++++++++++++++++++++++ 4 files changed, 533 insertions(+) create mode 100644 net/netfilter/nf_flow_table.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 3a0779589281..161f71ca78a0 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -1,7 +1,12 @@ #ifndef _NF_FLOW_TABLE_H #define _NF_FLOW_TABLE_H +#include +#include +#include #include +#include +#include struct nf_flowtable; @@ -20,4 +25,93 @@ struct nf_flowtable { struct delayed_work gc_work; }; +enum flow_offload_tuple_dir { + FLOW_OFFLOAD_DIR_ORIGINAL, + FLOW_OFFLOAD_DIR_REPLY, + __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY, +}; +#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1) + +struct flow_offload_tuple { + union { + struct in_addr src_v4; + struct in6_addr src_v6; + }; + union { + struct in_addr dst_v4; + struct in6_addr dst_v6; + }; + struct { + __be16 src_port; + __be16 dst_port; + }; + + int iifidx; + + u8 l3proto; + u8 l4proto; + u8 dir; + + int oifidx; + + struct dst_entry *dst_cache; +}; + +struct flow_offload_tuple_rhash { + struct rhash_head node; + struct flow_offload_tuple tuple; +}; + +#define FLOW_OFFLOAD_SNAT 0x1 +#define FLOW_OFFLOAD_DNAT 0x2 +#define FLOW_OFFLOAD_DYING 0x4 + +struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; + u32 flags; + union { + /* Your private driver data here. */ + u32 timeout; + }; +}; + +#define NF_FLOW_TIMEOUT (30 * HZ) + +struct nf_flow_route { + struct { + struct dst_entry *dst; + int ifindex; + } tuple[FLOW_OFFLOAD_DIR_MAX]; +}; + +struct flow_offload *flow_offload_alloc(struct nf_conn *ct, + struct nf_flow_route *route); +void flow_offload_free(struct flow_offload *flow); + +int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); +void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow); +struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, + struct flow_offload_tuple *tuple); +int nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data); +void nf_flow_offload_work_gc(struct work_struct *work); +extern const struct rhashtable_params nf_flow_offload_rhash_params; + +void flow_offload_dead(struct flow_offload *flow); + +int nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); +int nf_flow_dnat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); + +struct flow_ports { + __be16 source, dest; +}; + +#define MODULE_ALIAS_NF_FLOWTABLE(family) \ + MODULE_ALIAS("nf-flowtable-" __stringify(family)) + #endif /* _FLOW_OFFLOAD_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index af3d9f721b3f..264ce877ef49 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -657,6 +657,13 @@ endif # NF_TABLES_NETDEV endif # NF_TABLES +config NF_FLOW_TABLE + tristate "Netfilter flow table module" + help + This option adds the flow table core infrastructure. + + To compile it as a module, choose M here. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index eec0c3b72926..2930f2b854be 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -109,6 +109,9 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o +# flow table infrastructure +obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c new file mode 100644 index 000000000000..2f5099cb85b8 --- /dev/null +++ b/net/netfilter/nf_flow_table.c @@ -0,0 +1,429 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct flow_offload_entry { + struct flow_offload flow; + struct nf_conn *ct; + struct rcu_head rcu_head; +}; + +struct flow_offload * +flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) +{ + struct flow_offload_entry *entry; + struct flow_offload *flow; + + if (unlikely(nf_ct_is_dying(ct) || + !atomic_inc_not_zero(&ct->ct_general.use))) + return NULL; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + goto err_ct_refcnt; + + flow = &entry->flow; + + if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) + goto err_dst_cache_original; + + if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) + goto err_dst_cache_reply; + + entry->ct = ct; + + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) { + case NFPROTO_IPV4: + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in; + break; + case NFPROTO_IPV6: + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6; + break; + } + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache = + route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache = + route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir = + FLOW_OFFLOAD_DIR_ORIGINAL; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir = + FLOW_OFFLOAD_DIR_REPLY; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = + route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx = + route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = + route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx = + route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; + + if (ct->status & IPS_SRC_NAT) + flow->flags |= FLOW_OFFLOAD_SNAT; + else if (ct->status & IPS_DST_NAT) + flow->flags |= FLOW_OFFLOAD_DNAT; + + return flow; + +err_dst_cache_reply: + dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); +err_dst_cache_original: + kfree(entry); +err_ct_refcnt: + nf_ct_put(ct); + + return NULL; +} +EXPORT_SYMBOL_GPL(flow_offload_alloc); + +void flow_offload_free(struct flow_offload *flow) +{ + struct flow_offload_entry *e; + + dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); + dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); + e = container_of(flow, struct flow_offload_entry, flow); + kfree(e); +} +EXPORT_SYMBOL_GPL(flow_offload_free); + +void flow_offload_dead(struct flow_offload *flow) +{ + flow->flags |= FLOW_OFFLOAD_DYING; +} +EXPORT_SYMBOL_GPL(flow_offload_dead); + +int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) +{ + flow->timeout = (u32)jiffies; + + rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, + *flow_table->type->params); + rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, + *flow_table->type->params); + return 0; +} +EXPORT_SYMBOL_GPL(flow_offload_add); + +void flow_offload_del(struct nf_flowtable *flow_table, + struct flow_offload *flow) +{ + struct flow_offload_entry *e; + + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, + *flow_table->type->params); + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, + *flow_table->type->params); + + e = container_of(flow, struct flow_offload_entry, flow); + kfree_rcu(e, rcu_head); +} +EXPORT_SYMBOL_GPL(flow_offload_del); + +struct flow_offload_tuple_rhash * +flow_offload_lookup(struct nf_flowtable *flow_table, + struct flow_offload_tuple *tuple) +{ + return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, + *flow_table->type->params); +} +EXPORT_SYMBOL_GPL(flow_offload_lookup); + +static void nf_flow_release_ct(const struct flow_offload *flow) +{ + struct flow_offload_entry *e; + + e = container_of(flow, struct flow_offload_entry, flow); + nf_ct_delete(e->ct, 0, 0); + nf_ct_put(e->ct); +} + +int nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct rhashtable_iter hti; + struct flow_offload *flow; + int err; + + err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); + if (err) + return err; + + rhashtable_walk_start(&hti); + + while ((tuplehash = rhashtable_walk_next(&hti))) { + if (IS_ERR(tuplehash)) { + err = PTR_ERR(tuplehash); + if (err != -EAGAIN) + goto out; + + continue; + } + if (tuplehash->tuple.dir) + continue; + + flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + + iter(flow, data); + } +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + return err; +} +EXPORT_SYMBOL_GPL(nf_flow_table_iterate); + +static inline bool nf_flow_has_expired(const struct flow_offload *flow) +{ + return (__s32)(flow->timeout - (u32)jiffies) <= 0; +} + +static inline bool nf_flow_is_dying(const struct flow_offload *flow) +{ + return flow->flags & FLOW_OFFLOAD_DYING; +} + +void nf_flow_offload_work_gc(struct work_struct *work) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table; + struct rhashtable_iter hti; + struct flow_offload *flow; + int err; + + flow_table = container_of(work, struct nf_flowtable, gc_work.work); + + err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); + if (err) + goto schedule; + + rhashtable_walk_start(&hti); + + while ((tuplehash = rhashtable_walk_next(&hti))) { + if (IS_ERR(tuplehash)) { + err = PTR_ERR(tuplehash); + if (err != -EAGAIN) + goto out; + + continue; + } + if (tuplehash->tuple.dir) + continue; + + flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + + if (nf_flow_has_expired(flow) || + nf_flow_is_dying(flow)) { + flow_offload_del(flow_table, flow); + nf_flow_release_ct(flow); + } + } +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); +schedule: + queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); +} +EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); + +static u32 flow_offload_hash(const void *data, u32 len, u32 seed) +{ + const struct flow_offload_tuple *tuple = data; + + return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); +} + +static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct flow_offload_tuple_rhash *tuplehash = data; + + return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); +} + +static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct flow_offload_tuple *tuple = arg->key; + const struct flow_offload_tuple_rhash *x = ptr; + + if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) + return 1; + + return 0; +} + +const struct rhashtable_params nf_flow_offload_rhash_params = { + .head_offset = offsetof(struct flow_offload_tuple_rhash, node), + .hashfn = flow_offload_hash, + .obj_hashfn = flow_offload_hash_obj, + .obj_cmpfn = flow_offload_hash_cmp, + .automatic_shrinking = true, +}; +EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params); + +static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, + __be16 port, __be16 new_port) +{ + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || + skb_try_make_writable(skb, thoff + sizeof(*tcph))) + return -1; + + tcph = (void *)(skb_network_header(skb) + thoff); + inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); + + return 0; +} + +static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, + __be16 port, __be16 new_port) +{ + struct udphdr *udph; + + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || + skb_try_make_writable(skb, thoff + sizeof(*udph))) + return -1; + + udph = (void *)(skb_network_header(skb) + thoff); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace2(&udph->check, skb, port, + new_port, true); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + return 0; +} + +static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, + u8 protocol, __be16 port, __be16 new_port) +{ + switch (protocol) { + case IPPROTO_TCP: + if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) + return NF_DROP; + break; + case IPPROTO_UDP: + if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) + return NF_DROP; + break; + } + + return 0; +} + +int nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir) +{ + struct flow_ports *hdr; + __be16 port, new_port; + + if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || + skb_try_make_writable(skb, thoff + sizeof(*hdr))) + return -1; + + hdr = (void *)(skb_network_header(skb) + thoff); + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + port = hdr->source; + new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; + hdr->source = new_port; + break; + case FLOW_OFFLOAD_DIR_REPLY: + port = hdr->dest; + new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; + hdr->dest = new_port; + break; + default: + return -1; + } + + return nf_flow_nat_port(skb, thoff, protocol, port, new_port); +} +EXPORT_SYMBOL_GPL(nf_flow_snat_port); + +int nf_flow_dnat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir) +{ + struct flow_ports *hdr; + __be16 port, new_port; + + if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || + skb_try_make_writable(skb, thoff + sizeof(*hdr))) + return -1; + + hdr = (void *)(skb_network_header(skb) + thoff); + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + port = hdr->dest; + new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; + hdr->dest = new_port; + break; + case FLOW_OFFLOAD_DIR_REPLY: + port = hdr->source; + new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; + hdr->source = new_port; + break; + default: + return -1; + } + + return nf_flow_nat_port(skb, thoff, protocol, port, new_port); +} +EXPORT_SYMBOL_GPL(nf_flow_dnat_port); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); -- cgit v1.2.3 From 0995210753a26c4fa1a3d8c63cc230e22a8537cd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Jan 2018 01:04:19 +0100 Subject: netfilter: flow table support for IPv6 This patch adds the IPv6 flow table type, that implements the datapath flow table to forward IPv6 traffic. This patch exports ip6_dst_mtu_forward() that is required to check for mtu to pass up packets that need PMTUD handling to the classic forwarding path. Signed-off-by: Pablo Neira Ayuso --- include/net/ipv6.h | 2 + net/ipv6/ip6_output.c | 3 +- net/ipv6/netfilter/Kconfig | 8 + net/ipv6/netfilter/Makefile | 3 + net/ipv6/netfilter/nf_flow_table_ipv6.c | 277 ++++++++++++++++++++++++++++++++ 5 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 25be4715578c..9dc1230d789c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -969,6 +969,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk) &inet6_sk(sk)->cork); } +unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst); + int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bcdb615aed6e..19adad6d90bc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -378,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } -static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) { unsigned int mtu; struct inet6_dev *idev; @@ -398,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) return mtu; } +EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward); static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6acb2eecd986..806e95375ec8 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -71,6 +71,14 @@ config NFT_FIB_IPV6 endif # NF_TABLES_IPV6 endif # NF_TABLES +config NF_FLOW_TABLE_IPV6 + select NF_FLOW_TABLE + tristate "Netfilter flow table IPv6 module" + help + This option adds the flow table IPv6 support. + + To compile it as a module, choose M here. + config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index c6ee0cdd0ba9..95611c4b39b0 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o +# flow table support +obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o + # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c new file mode 100644 index 000000000000..d7d073bb19ee --- /dev/null +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -0,0 +1,277 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* For layer 4 checksum field offset. */ +#include +#include + +static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, + struct in6_addr *addr, + struct in6_addr *new_addr) +{ + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || + skb_try_make_writable(skb, thoff + sizeof(*tcph))) + return -1; + + tcph = (void *)(skb_network_header(skb) + thoff); + inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, + new_addr->s6_addr32, true); + + return 0; +} + +static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, + struct in6_addr *addr, + struct in6_addr *new_addr) +{ + struct udphdr *udph; + + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || + skb_try_make_writable(skb, thoff + sizeof(*udph))) + return -1; + + udph = (void *)(skb_network_header(skb) + thoff); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, + new_addr->s6_addr32, true); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + return 0; +} + +static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, struct in6_addr *addr, + struct in6_addr *new_addr) +{ + switch (ip6h->nexthdr) { + case IPPROTO_TCP: + if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + case IPPROTO_UDP: + if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + } + + return 0; +} + +static int nf_flow_snat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + struct in6_addr addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = ip6h->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; + ip6h->saddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = ip6h->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; + ip6h->daddr = new_addr; + break; + default: + return -1; + } + + return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); +} + +static int nf_flow_dnat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + struct in6_addr addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = ip6h->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; + ip6h->daddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = ip6h->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; + ip6h->saddr = new_addr; + break; + default: + return -1; + } + + return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); +} + +static int nf_flow_nat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, + enum flow_offload_tuple_dir dir) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + unsigned int thoff = sizeof(*ip6h); + + if (flow->flags & FLOW_OFFLOAD_SNAT && + (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || + nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) + return -1; + if (flow->flags & FLOW_OFFLOAD_DNAT && + (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || + nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) + return -1; + + return 0; +} + +static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + struct ipv6hdr *ip6h; + unsigned int thoff; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return -1; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_TCP && + ip6h->nexthdr != IPPROTO_UDP) + return -1; + + thoff = sizeof(*ip6h); + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return -1; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v6 = ip6h->saddr; + tuple->dst_v6 = ip6h->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET6; + tuple->l4proto = ip6h->nexthdr; + tuple->iifidx = dev->ifindex; + + return 0; +} + +/* Based on ip_exceeds_mtu(). */ +static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + return false; + + return true; +} + +static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt) +{ + u32 mtu; + + mtu = ip6_dst_mtu_forward(&rt->dst); + if (__nf_flow_exceeds_mtu(skb, mtu)) + return true; + + return false; +} + +static unsigned int +nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table = priv; + struct flow_offload_tuple tuple = {}; + enum flow_offload_tuple_dir dir; + struct flow_offload *flow; + struct net_device *outdev; + struct in6_addr *nexthop; + struct ipv6hdr *ip6h; + struct rt6_info *rt; + + if (skb->protocol != htons(ETH_P_IPV6)) + return NF_ACCEPT; + + if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) + return NF_ACCEPT; + + tuplehash = flow_offload_lookup(flow_table, &tuple); + if (tuplehash == NULL) + return NF_ACCEPT; + + outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); + if (!outdev) + return NF_ACCEPT; + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + + rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; + if (unlikely(nf_flow_exceeds_mtu(skb, rt))) + return NF_ACCEPT; + + if (skb_try_make_writable(skb, sizeof(*ip6h))) + return NF_DROP; + + if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && + nf_flow_nat_ipv6(flow, skb, dir) < 0) + return NF_DROP; + + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + ip6h = ipv6_hdr(skb); + ip6h->hop_limit--; + + skb->dev = outdev; + nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); + neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); + + return NF_STOLEN; +} + +static struct nf_flowtable_type flowtable_ipv6 = { + .family = NFPROTO_IPV6, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, + .hook = nf_flow_offload_ipv6_hook, + .owner = THIS_MODULE, +}; + +static int __init nf_flow_ipv6_module_init(void) +{ + nft_register_flowtable_type(&flowtable_ipv6); + + return 0; +} + +static void __exit nf_flow_ipv6_module_exit(void) +{ + nft_unregister_flowtable_type(&flowtable_ipv6); +} + +module_init(nf_flow_ipv6_module_init); +module_exit(nf_flow_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NF_FLOWTABLE(AF_INET6); -- cgit v1.2.3 From 7c23b629a8085b11daccd68c62b5116ff498f84a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Jan 2018 01:04:22 +0100 Subject: netfilter: flow table support for the mixed IPv4/IPv6 family This patch adds the IPv6 flow table type, that implements the datapath flow table to forward IPv6 traffic. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 5 ++++ net/ipv4/netfilter/nf_flow_table_ipv4.c | 3 ++- net/ipv6/netfilter/nf_flow_table_ipv6.c | 3 ++- net/netfilter/Kconfig | 8 ++++++ net/netfilter/Makefile | 1 + net/netfilter/nf_flow_table_inet.c | 48 +++++++++++++++++++++++++++++++++ 6 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 net/netfilter/nf_flow_table_inet.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 161f71ca78a0..b22b22082733 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -111,6 +111,11 @@ struct flow_ports { __be16 source, dest; }; +unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); +unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ MODULE_ALIAS("nf-flowtable-" __stringify(family)) diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index ac56c0f0492a..b2d01eb25f2c 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -202,7 +202,7 @@ static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt) return false; } -static unsigned int +unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -254,6 +254,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, return NF_STOLEN; } +EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); static struct nf_flowtable_type flowtable_ipv4 = { .family = NFPROTO_IPV4, diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c index d7d073bb19ee..0c3b9d32f64f 100644 --- a/net/ipv6/netfilter/nf_flow_table_ipv6.c +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -196,7 +196,7 @@ static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt) return false; } -static unsigned int +unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -248,6 +248,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, return NF_STOLEN; } +EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); static struct nf_flowtable_type flowtable_ipv6 = { .family = NFPROTO_IPV6, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 264ce877ef49..272803079bf2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -657,6 +657,14 @@ endif # NF_TABLES_NETDEV endif # NF_TABLES +config NF_FLOW_TABLE_INET + select NF_FLOW_TABLE + tristate "Netfilter flow table mixed IPv4/IPv6 module" + help + This option adds the flow table mixed IPv4/IPv6 support. + + To compile it as a module, choose M here. + config NF_FLOW_TABLE tristate "Netfilter flow table module" help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 2930f2b854be..061365875cde 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o # flow table infrastructure obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o +obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c new file mode 100644 index 000000000000..281209aeba8f --- /dev/null +++ b/net/netfilter/nf_flow_table_inet.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include +#include +#include + +static unsigned int +nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + return nf_flow_offload_ip_hook(priv, skb, state); + case htons(ETH_P_IPV6): + return nf_flow_offload_ipv6_hook(priv, skb, state); + } + + return NF_ACCEPT; +} + +static struct nf_flowtable_type flowtable_inet = { + .family = NFPROTO_INET, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, + .hook = nf_flow_offload_inet_hook, + .owner = THIS_MODULE, +}; + +static int __init nf_flow_inet_module_init(void) +{ + nft_register_flowtable_type(&flowtable_inet); + + return 0; +} + +static void __exit nf_flow_inet_module_exit(void) +{ + nft_unregister_flowtable_type(&flowtable_inet); +} + +module_init(nf_flow_inet_module_init); +module_exit(nf_flow_inet_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */ -- cgit v1.2.3