diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/Kconfig | 2 | ||||
-rw-r--r-- | net/Makefile | 1 | ||||
-rw-r--r-- | net/bpfilter/Kconfig | 16 | ||||
-rw-r--r-- | net/bpfilter/Makefile | 30 | ||||
-rw-r--r-- | net/bpfilter/bpfilter_kern.c | 111 | ||||
-rw-r--r-- | net/bpfilter/main.c | 63 | ||||
-rw-r--r-- | net/bpfilter/msgfmt.h | 17 | ||||
-rw-r--r-- | net/core/devlink.c | 3 | ||||
-rw-r--r-- | net/ipv4/Makefile | 4 | ||||
-rw-r--r-- | net/ipv4/bpfilter/Makefile | 2 | ||||
-rw-r--r-- | net/ipv4/bpfilter/sockopt.c | 42 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 17 | ||||
-rw-r--r-- | net/ipv4/netlink.c | 23 | ||||
-rw-r--r-- | net/ipv4/route.c | 146 | ||||
-rw-r--r-- | net/ipv4/udp.c | 3 | ||||
-rw-r--r-- | net/ipv6/route.c | 17 | ||||
-rw-r--r-- | net/ipv6/udp.c | 3 |
18 files changed, 460 insertions, 43 deletions
diff --git a/net/Kconfig b/net/Kconfig index df8d45ef47d8..ba554cedb615 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -202,6 +202,8 @@ source "net/bridge/netfilter/Kconfig" endif +source "net/bpfilter/Kconfig" + source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/rds/Kconfig" diff --git a/net/Makefile b/net/Makefile index 77aaddedbd29..bdaf53925acd 100644 --- a/net/Makefile +++ b/net/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_NET) += ipv6/ +obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig new file mode 100644 index 000000000000..60725c5f79db --- /dev/null +++ b/net/bpfilter/Kconfig @@ -0,0 +1,16 @@ +menuconfig BPFILTER + bool "BPF based packet filtering framework (BPFILTER)" + default n + depends on NET && BPF + help + This builds experimental bpfilter framework that is aiming to + provide netfilter compatible functionality via BPF + +if BPFILTER +config BPFILTER_UMH + tristate "bpfilter kernel module with user mode helper" + default m + help + This builds bpfilter kernel module with embedded user mode helper +endif + diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile new file mode 100644 index 000000000000..2af752c8ef5e --- /dev/null +++ b/net/bpfilter/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux BPFILTER layer. +# + +hostprogs-y := bpfilter_umh +bpfilter_umh-objs := main.o +HOSTCFLAGS += -I. -Itools/include/ +ifeq ($(CONFIG_BPFILTER_UMH), y) +# builtin bpfilter_umh should be compiled with -static +# since rootfs isn't mounted at the time of __init +# function is called and do_execv won't find elf interpreter +HOSTLDFLAGS += -static +endif + +# a bit of elf magic to convert bpfilter_umh binary into a binary blob +# inside bpfilter_umh.o elf file referenced by +# _binary_net_bpfilter_bpfilter_umh_start symbol +# which bpfilter_kern.c passes further into umh blob loader at run-time +quiet_cmd_copy_umh = GEN $@ + cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \ + $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \ + -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \ + --rename-section .data=.init.rodata $< $@ + +$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh + $(call cmd,copy_umh) + +obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o +bpfilter-objs += bpfilter_kern.o bpfilter_umh.o diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c new file mode 100644 index 000000000000..7596314b61c7 --- /dev/null +++ b/net/bpfilter/bpfilter_kern.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/init.h> +#include <linux/module.h> +#include <linux/umh.h> +#include <linux/bpfilter.h> +#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/fs.h> +#include <linux/file.h> +#include "msgfmt.h" + +#define UMH_start _binary_net_bpfilter_bpfilter_umh_start +#define UMH_end _binary_net_bpfilter_bpfilter_umh_end + +extern char UMH_start; +extern char UMH_end; + +static struct umh_info info; +/* since ip_getsockopt() can run in parallel, serialize access to umh */ +static DEFINE_MUTEX(bpfilter_lock); + +static void shutdown_umh(struct umh_info *info) +{ + struct task_struct *tsk; + + tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID); + if (tsk) + force_sig(SIGKILL, tsk); + fput(info->pipe_to_umh); + fput(info->pipe_from_umh); +} + +static void __stop_umh(void) +{ + if (bpfilter_process_sockopt) { + bpfilter_process_sockopt = NULL; + shutdown_umh(&info); + } +} + +static void stop_umh(void) +{ + mutex_lock(&bpfilter_lock); + __stop_umh(); + mutex_unlock(&bpfilter_lock); +} + +static int __bpfilter_process_sockopt(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set) +{ + struct mbox_request req; + struct mbox_reply reply; + loff_t pos; + ssize_t n; + int ret; + + req.is_set = is_set; + req.pid = current->pid; + req.cmd = optname; + req.addr = (long)optval; + req.len = optlen; + mutex_lock(&bpfilter_lock); + n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos); + if (n != sizeof(req)) { + pr_err("write fail %zd\n", n); + __stop_umh(); + ret = -EFAULT; + goto out; + } + pos = 0; + n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos); + if (n != sizeof(reply)) { + pr_err("read fail %zd\n", n); + __stop_umh(); + ret = -EFAULT; + goto out; + } + ret = reply.status; +out: + mutex_unlock(&bpfilter_lock); + return ret; +} + +static int __init load_umh(void) +{ + int err; + + /* fork usermode process */ + err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info); + if (err) + return err; + pr_info("Loaded bpfilter_umh pid %d\n", info.pid); + + /* health check that usermode process started correctly */ + if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) { + stop_umh(); + return -EFAULT; + } + bpfilter_process_sockopt = &__bpfilter_process_sockopt; + return 0; +} + +static void __exit fini_umh(void) +{ + stop_umh(); +} +module_init(load_umh); +module_exit(fini_umh); +MODULE_LICENSE("GPL"); diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c new file mode 100644 index 000000000000..81bbc1684896 --- /dev/null +++ b/net/bpfilter/main.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/uio.h> +#include <errno.h> +#include <stdio.h> +#include <sys/socket.h> +#include <fcntl.h> +#include <unistd.h> +#include "include/uapi/linux/bpf.h" +#include <asm/unistd.h> +#include "msgfmt.h" + +int debug_fd; + +static int handle_get_cmd(struct mbox_request *cmd) +{ + switch (cmd->cmd) { + case 0: + return 0; + default: + break; + } + return -ENOPROTOOPT; +} + +static int handle_set_cmd(struct mbox_request *cmd) +{ + return -ENOPROTOOPT; +} + +static void loop(void) +{ + while (1) { + struct mbox_request req; + struct mbox_reply reply; + int n; + + n = read(0, &req, sizeof(req)); + if (n != sizeof(req)) { + dprintf(debug_fd, "invalid request %d\n", n); + return; + } + + reply.status = req.is_set ? + handle_set_cmd(&req) : + handle_get_cmd(&req); + + n = write(1, &reply, sizeof(reply)); + if (n != sizeof(reply)) { + dprintf(debug_fd, "reply failed %d\n", n); + return; + } + } +} + +int main(void) +{ + debug_fd = open("/dev/console", 00000002 | 00000100); + dprintf(debug_fd, "Started bpfilter\n"); + loop(); + close(debug_fd); + return 0; +} diff --git a/net/bpfilter/msgfmt.h b/net/bpfilter/msgfmt.h new file mode 100644 index 000000000000..98d121c62945 --- /dev/null +++ b/net/bpfilter/msgfmt.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_BPFILTER_MSGFMT_H +#define _NET_BPFILTER_MSGFMT_H + +struct mbox_request { + __u64 addr; + __u32 len; + __u32 is_set; + __u32 cmd; + __u32 pid; +}; + +struct mbox_reply { + __u32 status; +}; + +#endif diff --git a/net/core/devlink.c b/net/core/devlink.c index 5c8a40e1a01e..475246b355f0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2756,7 +2756,8 @@ static const struct genl_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_eswitch_set_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index b379520f9133..eec9569ffa5c 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -14,7 +14,9 @@ obj-y := route.o inetpeer.o protocol.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ - metrics.o + metrics.o netlink.o + +obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile new file mode 100644 index 000000000000..ce262d76cc48 --- /dev/null +++ b/net/ipv4/bpfilter/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_BPFILTER) += sockopt.o + diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c new file mode 100644 index 000000000000..42a96d2d8d05 --- /dev/null +++ b/net/ipv4/bpfilter/sockopt.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/uaccess.h> +#include <linux/bpfilter.h> +#include <uapi/linux/bpf.h> +#include <linux/wait.h> +#include <linux/kmod.h> + +int (*bpfilter_process_sockopt)(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set); +EXPORT_SYMBOL_GPL(bpfilter_process_sockopt); + +int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval, + unsigned int optlen, bool is_set) +{ + if (!bpfilter_process_sockopt) { + int err = request_module("bpfilter"); + + if (err) + return err; + if (!bpfilter_process_sockopt) + return -ECHILD; + } + return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set); +} + +int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, + unsigned int optlen) +{ + return bpfilter_mbox_request(sk, optname, optval, optlen, true); +} + +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen) +{ + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + return bpfilter_mbox_request(sk, optname, optval, len, false); +} diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4d622112bf95..897ae92dff0f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -649,6 +649,9 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, + [RTA_IP_PROTO] = { .type = NLA_U8 }, + [RTA_SPORT] = { .type = NLA_U16 }, + [RTA_DPORT] = { .type = NLA_U16 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5ad2d8ed3a3f..e0791faacb24 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -47,6 +47,8 @@ #include <linux/errqueue.h> #include <linux/uaccess.h> +#include <linux/bpfilter.h> + /* * SOL_IP control messages. */ @@ -1244,6 +1246,11 @@ int ip_setsockopt(struct sock *sk, int level, return -ENOPROTOOPT; err = do_ip_setsockopt(sk, level, optname, optval, optlen); +#ifdef CONFIG_BPFILTER + if (optname >= BPFILTER_IPT_SO_SET_REPLACE && + optname < BPFILTER_IPT_SET_MAX) + err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); +#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && @@ -1552,6 +1559,11 @@ int ip_getsockopt(struct sock *sk, int level, int err; err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); +#ifdef CONFIG_BPFILTER + if (optname >= BPFILTER_IPT_SO_GET_INFO && + optname < BPFILTER_IPT_GET_MAX) + err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); +#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && @@ -1584,6 +1596,11 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, err = do_ip_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); +#ifdef CONFIG_BPFILTER + if (optname >= BPFILTER_IPT_SO_GET_INFO && + optname < BPFILTER_IPT_GET_MAX) + err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); +#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c new file mode 100644 index 000000000000..f86bb4f06609 --- /dev/null +++ b/net/ipv4/netlink.c @@ -0,0 +1,23 @@ +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/types.h> +#include <net/net_namespace.h> +#include <net/netlink.h> +#include <net/ip.h> + +int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, + struct netlink_ext_ack *extack) +{ + *ip_proto = nla_get_u8(attr); + + switch (*ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_ICMP: + return 0; + default: + NL_SET_ERR_MSG(extack, "Unsupported ip proto"); + return -EOPNOTSUPP; + } +} +EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2cfa1b518f8d..0e401dc4e1bd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2574,11 +2574,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, EXPORT_SYMBOL_GPL(ip_route_output_flow); /* called with rcu_read_lock held */ -static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, - struct flowi4 *fl4, struct sk_buff *skb, u32 portid, - u32 seq) +static int rt_fill_info(struct net *net, __be32 dst, __be32 src, + struct rtable *rt, u32 table_id, struct flowi4 *fl4, + struct sk_buff *skb, u32 portid, u32 seq) { - struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; @@ -2674,7 +2673,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, } } else #endif - if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex)) + if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) goto nla_put_failure; } @@ -2689,43 +2688,93 @@ nla_put_failure: return -EMSGSIZE; } +static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, + u8 ip_proto, __be16 sport, + __be16 dport) +{ + struct sk_buff *skb; + struct iphdr *iph; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return NULL; + + /* Reserve room for dummy headers, this skb can pass + * through good chunk of routing engine. + */ + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + iph = skb_put(skb, sizeof(struct iphdr)); + iph->protocol = ip_proto; + iph->saddr = src; + iph->daddr = dst; + iph->version = 0x4; + iph->frag_off = 0; + iph->ihl = 0x5; + skb_set_transport_header(skb, skb->len); + + switch (iph->protocol) { + case IPPROTO_UDP: { + struct udphdr *udph; + + udph = skb_put_zero(skb, sizeof(struct udphdr)); + udph->source = sport; + udph->dest = dport; + udph->len = sizeof(struct udphdr); + udph->check = 0; + break; + } + case IPPROTO_TCP: { + struct tcphdr *tcph; + + tcph = skb_put_zero(skb, sizeof(struct tcphdr)); + tcph->source = sport; + tcph->dest = dport; + tcph->doff = sizeof(struct tcphdr) / 4; + tcph->rst = 1; + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), + src, dst, 0); + break; + } + case IPPROTO_ICMP: { + struct icmphdr *icmph; + + icmph = skb_put_zero(skb, sizeof(struct icmphdr)); + icmph->type = ICMP_ECHO; + icmph->code = 0; + } + } + + return skb; +} + static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); - struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; + u32 table_id = RT_TABLE_MAIN; + __be16 sport = 0, dport = 0; struct fib_result res = {}; + u8 ip_proto = IPPROTO_UDP; struct rtable *rt = NULL; + struct sk_buff *skb; + struct rtmsg *rtm; struct flowi4 fl4; __be32 dst = 0; __be32 src = 0; + kuid_t uid; u32 iif; int err; int mark; - struct sk_buff *skb; - u32 table_id = RT_TABLE_MAIN; - kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, extack); if (err < 0) - goto errout; + return err; rtm = nlmsg_data(nlh); - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) { - err = -ENOBUFS; - goto errout; - } - - /* Reserve room for dummy headers, this skb can pass - through good chunk of routing engine. - */ - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; @@ -2735,14 +2784,22 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, else uid = (iif ? INVALID_UID : current_uid()); - /* Bugfix: need to give ip_route_input enough of an IP header to - * not gag. - */ - ip_hdr(skb)->protocol = IPPROTO_UDP; - ip_hdr(skb)->saddr = src; - ip_hdr(skb)->daddr = dst; + if (tb[RTA_IP_PROTO]) { + err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], + &ip_proto, extack); + if (err) + return err; + } + + if (tb[RTA_SPORT]) + sport = nla_get_be16(tb[RTA_SPORT]); - skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); + if (tb[RTA_DPORT]) + dport = nla_get_be16(tb[RTA_DPORT]); + + skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport); + if (!skb) + return -ENOBUFS; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2751,6 +2808,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; + if (sport) + fl4.fl4_sport = sport; + if (dport) + fl4.fl4_dport = dport; + fl4.flowi4_proto = ip_proto; rcu_read_lock(); @@ -2760,10 +2822,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, dev = dev_get_by_index_rcu(net, iif); if (!dev) { err = -ENODEV; - goto errout_free; + goto errout_rcu; } - skb->protocol = htons(ETH_P_IP); + fl4.flowi4_iif = iif; /* for rt_fill_info */ skb->dev = dev; skb->mark = mark; err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, @@ -2783,7 +2845,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, } if (err) - goto errout_free; + goto errout_rcu; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -2791,34 +2853,40 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) table_id = res.table ? res.table->tb_id : 0; + /* reset skb for netlink reply msg */ + skb_trim(skb, 0); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb_reset_mac_header(skb); + if (rtm->rtm_flags & RTM_F_FIB_MATCH) { if (!res.fi) { err = fib_props[res.type].error; if (!err) err = -EHOSTUNREACH; - goto errout_free; + goto errout_rcu; } err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, table_id, rt->rt_type, res.prefix, res.prefixlen, fl4.flowi4_tos, res.fi, 0); } else { - err = rt_fill_info(net, dst, src, table_id, &fl4, skb, + err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); } if (err < 0) - goto errout_free; + goto errout_rcu; rcu_read_unlock(); err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); -errout: - return err; errout_free: + return err; +errout_rcu: rcu_read_unlock(); kfree_skb(skb); - goto errout; + goto errout_free; } void ip_rt_multicast_event(struct in_device *in_dev) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ff4d4ba67735..d71f1f3e1155 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -788,7 +788,8 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, return -EINVAL; if (sk->sk_no_check_tx) return -EINVAL; - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || + dst_xfrm(skb_dst(skb))) return -EIO; skb_shinfo(skb)->gso_size = cork->gso_size; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bcb8785c0451..038d661d5ffc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -63,6 +63,7 @@ #include <net/lwtunnel.h> #include <net/ip_tunnels.h> #include <net/l3mdev.h> +#include <net/ip.h> #include <trace/events/fib6.h> #include <linux/uaccess.h> @@ -4083,6 +4084,9 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_IP_PROTO] = { .type = NLA_U8 }, + [RTA_SPORT] = { .type = NLA_U16 }, + [RTA_DPORT] = { .type = NLA_U16 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -4795,6 +4799,19 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, else fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (tb[RTA_SPORT]) + fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]); + + if (tb[RTA_DPORT]) + fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]); + + if (tb[RTA_IP_PROTO]) { + err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], + &fl6.flowi6_proto, extack); + if (err) + goto errout; + } + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2839c1bd1e58..426c9d2b418d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1053,7 +1053,8 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, return -EINVAL; if (udp_sk(sk)->no_check6_tx) return -EINVAL; - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || + dst_xfrm(skb_dst(skb))) return -EIO; skb_shinfo(skb)->gso_size = cork->gso_size; |