diff options
Diffstat (limited to 'net/ipv4')
47 files changed, 738 insertions, 479 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index f96bd489b362..6490b845e17b 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -303,6 +303,7 @@ config SYN_COOKIES config NET_IPVTI tristate "Virtual (secure) IP: tunneling" + depends on IPV6 || IPV6=n select INET_TUNNEL select NET_IP_TUNNEL select XFRM diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9d97bace13c8..9e1a186a3671 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o +obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2fe295432c24..cf58e29cf746 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -872,7 +872,7 @@ int inet_shutdown(struct socket *sock, int how) err = -ENOTCONN; /* Hack to wake up other listeners, who can poll for EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ - /* fall through */ + fallthrough; default: sk->sk_shutdown |= how; if (sk->sk_prot->shutdown) @@ -886,7 +886,7 @@ int inet_shutdown(struct socket *sock, int how) case TCP_LISTEN: if (!(how & RCV_SHUTDOWN)) break; - /* fall through */ + fallthrough; case TCP_SYN_SENT: err = sk->sk_prot->disconnect(sk, O_NONBLOCK); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; @@ -1793,6 +1793,10 @@ static __net_exit void ipv4_mib_exit_net(struct net *net) free_percpu(net->mib.net_statistics); free_percpu(net->mib.ip_statistics); free_percpu(net->mib.tcp_statistics); +#ifdef CONFIG_MPTCP + /* allocated on demand, see mptcp_init_sock() */ + free_percpu(net->mib.mptcp_statistics); +#endif } static __net_initdata struct pernet_operations ipv4_mib_ops = { diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 974179b3b314..d99e1be94019 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -107,7 +107,7 @@ static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) if (optlen < 6) return -EINVAL; memcpy(daddr, optptr+optlen-4, 4); - /* Fall through */ + fallthrough; default: memset(optptr, 0, optlen); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 05eb42f347e8..687971d83b4e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1181,7 +1181,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSARP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - /* fall through */ + fallthrough; case SIOCGARP: err = copy_from_user(&r, arg, sizeof(struct arpreq)); if (err) diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 574972bc7299..2bf3abeb1456 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -184,7 +184,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, { const struct tcp_congestion_ops *utcp_ca; struct tcp_congestion_ops *tcp_ca; - size_t tcp_ca_name_len; int prog_fd; u32 moff; @@ -199,13 +198,11 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, tcp_ca->flags = utcp_ca->flags; return 1; case offsetof(struct tcp_congestion_ops, name): - tcp_ca_name_len = strnlen(utcp_ca->name, sizeof(utcp_ca->name)); - if (!tcp_ca_name_len || - tcp_ca_name_len == sizeof(utcp_ca->name)) + if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name, + sizeof(tcp_ca->name)) <= 0) return -EINVAL; if (tcp_ca_find(utcp_ca->name)) return -EEXIST; - memcpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name)); return 1; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 376882215919..0bd10a1f477f 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1724,6 +1724,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; + int res; if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; @@ -1735,7 +1736,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) memset(opt, 0, sizeof(struct ip_options)); opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; if (gateway) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e4632bd2026d..30fa42f5997d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1566,11 +1566,11 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, } } ip_mc_up(in_dev); - /* fall through */ + fallthrough; case NETDEV_CHANGEADDR: if (!IN_DEV_ARP_NOTIFY(in_dev)) break; - /* fall through */ + fallthrough; case NETDEV_NOTIFY_PEERS: /* Send gratuitous ARP to notify of link change */ inetdev_send_gratuitous_arp(dev, in_dev); @@ -1588,7 +1588,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (inetdev_valid_mtu(dev->mtu)) break; /* disable IP when MTU is not enough */ - /* fall through */ + fallthrough; case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 577db1d50a24..213be9c050ad 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -997,7 +997,9 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) return -ENOENT; } + rcu_read_lock(); err = fib_table_dump(tb, skb, cb, &filter); + rcu_read_unlock(); return skb->len ? : err; } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c092e9a55790..818916b2a04d 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -35,7 +35,7 @@ static inline void fib_alias_accessed(struct fib_alias *fa) void fib_release_info(struct fib_info *); struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack); -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, +int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack); bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a803cdd9400a..6ed8c9317179 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -570,8 +570,9 @@ static int fib_detect_death(struct fib_info *fi, int order, return 1; } -int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, - u16 encap_type, void *cfg, gfp_t gfp_flags, +int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, + struct nlattr *encap, u16 encap_type, + void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { int err; @@ -589,8 +590,9 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, err = -EINVAL; goto lwt_failure; } - err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, - cfg, &lwtstate, extack); + err = lwtunnel_build_state(net, encap_type, encap, + nhc->nhc_family, cfg, &lwtstate, + extack); if (err) goto lwt_failure; @@ -614,7 +616,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, nh->fib_nh_family = AF_INET; - err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, + err = fib_nh_common_init(net, &nh->nh_common, cfg->fc_encap, cfg->fc_encap_type, cfg, GFP_KERNEL, extack); if (err) return err; @@ -814,7 +816,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -static int fib_encap_match(u16 encap_type, +static int fib_encap_match(struct net *net, u16 encap_type, struct nlattr *encap, const struct fib_nh *nh, const struct fib_config *cfg, @@ -826,7 +828,7 @@ static int fib_encap_match(u16 encap_type, if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; - ret = lwtunnel_build_state(encap_type, encap, AF_INET, + ret = lwtunnel_build_state(net, encap_type, encap, AF_INET, cfg, &lwtstate, extack); if (!ret) { result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); @@ -836,7 +838,7 @@ static int fib_encap_match(u16 encap_type, return result; } -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, +int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack) { #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -857,8 +859,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { - if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, - nh, cfg, extack)) + if (fib_encap_match(net, cfg->fc_encap_type, + cfg->fc_encap, nh, cfg, extack)) return 1; } #ifdef CONFIG_IP_ROUTE_CLASSID @@ -1962,7 +1964,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) case NETDEV_DOWN: case NETDEV_UNREGISTER: nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; - /* fall through */ + fallthrough; case NETDEV_CHANGE: nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; break; @@ -1984,7 +1986,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) case NETDEV_DOWN: case NETDEV_UNREGISTER: fi->fib_flags |= RTNH_F_DEAD; - /* fall through */ + fallthrough; case NETDEV_CHANGE: fi->fib_flags |= RTNH_F_LINKDOWN; break; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ff0c24371e33..b01b748df9dd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -35,9 +35,6 @@ * Paul E. McKenney <paulmck@us.ibm.com> * Patrick McHardy <kaber@trash.net> */ - -#define VERSION "0.409" - #include <linux/cache.h> #include <linux/uaccess.h> #include <linux/bitops.h> @@ -304,8 +301,6 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa) call_rcu(&fa->rcu, __alias_free_mem); } -#define TNODE_KMALLOC_MAX \ - ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) #define TNODE_VMALLOC_MAX \ ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) @@ -1684,7 +1679,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi, extack) == 0 && + fib_nh_match(net, cfg, fi, extack) == 0 && fib_metrics_match(cfg, fi)) { fa_to_delete = fa; break; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 5fd6e8ed02b5..66fdbfe5447c 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -56,7 +56,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); -/* Fills in tpi and returns header length to be pulled. */ +/* Fills in tpi and returns header length to be pulled. + * Note that caller must use pskb_may_pull() before pulling GRE header. + */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs) { @@ -110,8 +112,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + u8 _val, *val; + + val = skb_header_pointer(skb, nhs + hdr_len, + sizeof(_val), &_val); + if (!val) + return -EINVAL; tpi->proto = proto; - if ((*(u8 *)options & 0xF0) != 0x40) + if ((*val & 0xF0) != 0x40) hdr_len += 4; } tpi->hdr_len = hdr_len; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f369e7ce685b..fc61f51d87a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb) case 3: if (!icmp_tag_validation(iph->protocol)) goto out; - /* fall through */ + fallthrough; case 0: info = ntohs(icmph->un.frag.mtu); } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3b9c7a2725a9..47f0502b2101 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -107,8 +107,6 @@ #ifdef CONFIG_IP_MULTICAST /* Parameter names and values are taken from igmp-v2-06 draft */ -#define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ) -#define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ) #define IGMP_QUERY_INTERVAL (125*HZ) #define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a4db79b1b643..5f34eb951627 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -131,7 +131,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, { struct sock *sk2; bool reuse = sk->sk_reuse; - bool reuseport = !!sk->sk_reuseport && reuseport_ok; + bool reuseport = !!sk->sk_reuseport; kuid_t uid = sock_i_uid((struct sock *)sk); /* @@ -146,17 +146,21 @@ static int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - (!reuseport || !sk2->sk_reuseport || - rcu_access_pointer(sk->sk_reuseport_cb) || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, sock_i_uid(sk2))))) { - if (inet_rcv_saddr_equal(sk, sk2, true)) - break; - } - if (!relax && reuse && sk2->sk_reuse && + if (reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { + if ((!relax || + (!reuseport_ok && + reuseport && sk2->sk_reuseport && + !rcu_access_pointer(sk->sk_reuseport_cb) && + (sk2->sk_state == TCP_TIME_WAIT || + uid_eq(uid, sock_i_uid(sk2))))) && + inet_rcv_saddr_equal(sk, sk2, true)) + break; + } else if (!reuseport_ok || + !reuseport || !sk2->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, sock_i_uid(sk2)))) { if (inet_rcv_saddr_equal(sk, sk2, true)) break; } @@ -176,12 +180,14 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int * int port = 0; struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); + bool relax = false; int i, low, high, attempt_half; struct inet_bind_bucket *tb; u32 remaining, offset; int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); +ports_exhausted: attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; other_half_scan: inet_get_local_port_range(net, &low, &high); @@ -219,7 +225,7 @@ other_parity_scan: inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && tb->port == port) { - if (!inet_csk_bind_conflict(sk, tb, false, false)) + if (!inet_csk_bind_conflict(sk, tb, relax, false)) goto success; goto next_port; } @@ -239,6 +245,12 @@ next_port: attempt_half = 2; goto other_half_scan; } + + if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + /* We still have a chance to connect to different destinations */ + relax = true; + goto ports_exhausted; + } return NULL; success: *port_ret = port; @@ -482,8 +494,28 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + out: release_sock(sk); + if (newsk && mem_cgroup_sockets_enabled) { + int amt; + + /* atomically get the memory usage, set and charge the + * newsk->sk_memcg. + */ + lock_sock(newsk); + + /* The socket has not been accepted yet, no need to look at + * newsk->sk_wmem_queued. + */ + amt = sk_mem_pages(newsk->sk_forward_alloc + + atomic_read(&newsk->sk_rmem_alloc)); + mem_cgroup_sk_alloc(newsk); + if (newsk->sk_memcg && amt) + mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + + release_sock(newsk); + } if (req) reqsk_put(req); return newsk; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f11e997e517b..5d50aad3cdbf 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -23,6 +23,7 @@ #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/inet6_hashtables.h> +#include <net/bpf_sk_storage.h> #include <net/netlink.h> #include <linux/inet.h> @@ -100,13 +101,9 @@ static size_t inet_sk_attr_size(struct sock *sk, aux = handler->idiag_get_aux_size(sk, net_admin); return nla_total_size(sizeof(struct tcp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + nla_total_size(TCP_CA_NAME_MAX) + nla_total_size(sizeof(struct tcpvegas_info)) @@ -147,6 +144,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) goto errout; + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { + u32 classid = 0; + +#ifdef CONFIG_SOCK_CGROUP_DATA + classid = sock_cgroup_classid(&sk->sk_cgrp_data); +#endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; + } + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -156,26 +171,28 @@ errout: } EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); +#define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, const struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, - bool net_admin) + struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + u16 nlmsg_flags, bool net_admin) { const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; + struct inet_diag_dump_data *cb_data; int ext = req->idiag_ext; struct inet_diag_msg *r; struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; + cb_data = cb->data; handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(!handler); - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -187,7 +204,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 0; r->idiag_retrans = 0; - if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, + sk_user_ns(NETLINK_CB(cb->skb).sk), + net_admin)) goto errout; if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { @@ -284,22 +303,46 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || - ext & (1 << (INET_DIAG_TCLASS - 1))) { - u32 classid = 0; - -#ifdef CONFIG_SOCK_CGROUP_DATA - classid = sock_cgroup_classid(&sk->sk_cgrp_data); -#endif - /* Fallback to socket priority if class id isn't set. - * Classful qdiscs use it as direct reference to class. - * For cgroup2 classid is always zero. - */ - if (!classid) - classid = sk->sk_priority; + /* Keep it at the end for potential retry with a larger skb, + * or else do best-effort fitting, which is only done for the + * first_nlmsg. + */ + if (cb_data->bpf_stg_diag) { + bool first_nlmsg = ((unsigned char *)nlh == skb->data); + unsigned int prev_min_dump_alloc; + unsigned int total_nla_size = 0; + unsigned int msg_len; + int err; + + msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; + err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb, + INET_DIAG_SK_BPF_STORAGES, + &total_nla_size); + + if (!err) + goto out; + + total_nla_size += msg_len; + prev_min_dump_alloc = cb->min_dump_alloc; + if (total_nla_size > prev_min_dump_alloc) + cb->min_dump_alloc = min_t(u32, total_nla_size, + MAX_DUMP_ALLOC_SIZE); + + if (!first_nlmsg) + goto errout; - if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + if (cb->min_dump_alloc > prev_min_dump_alloc) + /* Retry with pskb_expand_head() with + * __GFP_DIRECT_RECLAIM + */ goto errout; + + WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc); + + /* Send what we have for this sk + * and move on to the next sk in the following + * dump() + */ } out: @@ -312,30 +355,19 @@ errout: } EXPORT_SYMBOL_GPL(inet_sk_diag_fill); -static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, - const struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, - bool net_admin) -{ - return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns, - portid, seq, nlmsg_flags, unlh, net_admin); -} - static int inet_twsk_diag_fill(struct sock *sk, struct sk_buff *skb, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + struct netlink_callback *cb, + u16 nlmsg_flags) { struct inet_timewait_sock *tw = inet_twsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, + sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -359,16 +391,16 @@ static int inet_twsk_diag_fill(struct sock *sk, } static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, bool net_admin) + struct netlink_callback *cb, + u16 nlmsg_flags, bool net_admin) { struct request_sock *reqsk = inet_reqsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -397,21 +429,18 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, const struct inet_diag_req_v2 *r, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, bool net_admin) + u16 nlmsg_flags, bool net_admin) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill(sk, skb, portid, seq, - nlmsg_flags, unlh); + return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags); if (sk->sk_state == TCP_NEW_SYN_RECV) - return inet_req_diag_fill(sk, skb, portid, seq, - nlmsg_flags, unlh, net_admin); + return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); - return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, - nlmsg_flags, unlh, net_admin); + return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, + net_admin); } struct sock *inet_diag_find_one_icsk(struct net *net, @@ -459,10 +488,10 @@ struct sock *inet_diag_find_one_icsk(struct net *net, EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, - const struct nlmsghdr *nlh, + struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { + struct sk_buff *in_skb = cb->skb; bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); struct net *net = sock_net(in_skb->sk); struct sk_buff *rep; @@ -479,10 +508,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, goto out; } - err = sk_diag_fill(sk, rep, req, - sk_user_ns(NETLINK_CB(in_skb).sk), - NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh, net_admin); + err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); @@ -509,14 +535,21 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, int err; handler = inet_diag_lock_handler(req->sdiag_protocol); - if (IS_ERR(handler)) + if (IS_ERR(handler)) { err = PTR_ERR(handler); - else if (cmd == SOCK_DIAG_BY_FAMILY) - err = handler->dump_one(in_skb, nlh, req); - else if (cmd == SOCK_DESTROY && handler->destroy) + } else if (cmd == SOCK_DIAG_BY_FAMILY) { + struct inet_diag_dump_data empty_dump_data = {}; + struct netlink_callback cb = { + .nlh = nlh, + .skb = in_skb, + .data = &empty_dump_data, + }; + err = handler->dump_one(&cb, req); + } else if (cmd == SOCK_DESTROY && handler->destroy) { err = handler->destroy(in_skb, req); - else + } else { err = -EOPNOTSUPP; + } inet_diag_unlock_handler(handler); return err; @@ -847,23 +880,6 @@ static int inet_diag_bc_audit(const struct nlattr *attr, return len == 0 ? 0 : -EINVAL; } -static int inet_csk_diag_dump(struct sock *sk, - struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - const struct nlattr *bc, - bool net_admin) -{ - if (!inet_diag_bc_sk(bc, sk)) - return 0; - - return inet_csk_diag_fill(sk, skb, r, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, - net_admin); -} - static void twsk_build_assert(void) { BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != @@ -892,14 +908,17 @@ static void twsk_build_assert(void) void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct inet_diag_dump_data *cb_data = cb->data; struct net *net = sock_net(skb->sk); u32 idiag_states = r->idiag_states; int i, num, s_i, s_num; + struct nlattr *bc; struct sock *sk; + bc = cb_data->inet_diag_nla_bc; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; s_i = cb->args[1]; @@ -935,8 +954,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, r->id.idiag_sport) goto next_listen; - if (inet_csk_diag_dump(sk, skb, cb, r, - bc, net_admin) < 0) { + if (!inet_diag_bc_sk(bc, sk)) + goto next_listen; + + if (inet_sk_diag_fill(sk, inet_csk(sk), skb, + cb, r, NLM_F_MULTI, + net_admin) < 0) { spin_unlock(&ilb->lock); goto done; } @@ -1014,11 +1037,8 @@ next_normal: res = 0; for (idx = 0; idx < accum; idx++) { if (res >= 0) { - res = sk_diag_fill(sk_arr[idx], skb, r, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh, net_admin); + res = sk_diag_fill(sk_arr[idx], skb, cb, r, + NLM_F_MULTI, net_admin); if (res < 0) num = num_arr[idx]; } @@ -1042,31 +1062,101 @@ out: EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc) + const struct inet_diag_req_v2 *r) { const struct inet_diag_handler *handler; + u32 prev_min_dump_alloc; int err = 0; +again: + prev_min_dump_alloc = cb->min_dump_alloc; handler = inet_diag_lock_handler(r->sdiag_protocol); if (!IS_ERR(handler)) - handler->dump(skb, cb, r, bc); + handler->dump(skb, cb, r); else err = PTR_ERR(handler); inet_diag_unlock_handler(handler); + /* The skb is not large enough to fit one sk info and + * inet_sk_diag_fill() has requested for a larger skb. + */ + if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) { + err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL); + if (!err) + goto again; + } + return err ? : skb->len; } static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int hdrlen = sizeof(struct inet_diag_req_v2); - struct nlattr *bc = NULL; + return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh)); +} + +static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) +{ + const struct nlmsghdr *nlh = cb->nlh; + struct inet_diag_dump_data *cb_data; + struct sk_buff *skb = cb->skb; + struct nlattr *nla; + int rem, err; + + cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL); + if (!cb_data) + return -ENOMEM; + + nla_for_each_attr(nla, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), rem) { + int type = nla_type(nla); - if (nlmsg_attrlen(cb->nlh, hdrlen)) - bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); + if (type < __INET_DIAG_REQ_MAX) + cb_data->req_nlas[type] = nla; + } + + nla = cb_data->inet_diag_nla_bc; + if (nla) { + err = inet_diag_bc_audit(nla, skb); + if (err) { + kfree(cb_data); + return err; + } + } + + nla = cb_data->inet_diag_nla_bpf_stgs; + if (nla) { + struct bpf_sk_storage_diag *bpf_stg_diag; - return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc); + bpf_stg_diag = bpf_sk_storage_diag_alloc(nla); + if (IS_ERR(bpf_stg_diag)) { + kfree(cb_data); + return PTR_ERR(bpf_stg_diag); + } + cb_data->bpf_stg_diag = bpf_stg_diag; + } + + cb->data = cb_data; + return 0; +} + +static int inet_diag_dump_start(struct netlink_callback *cb) +{ + return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2)); +} + +static int inet_diag_dump_start_compat(struct netlink_callback *cb) +{ + return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req)); +} + +static int inet_diag_dump_done(struct netlink_callback *cb) +{ + struct inet_diag_dump_data *cb_data = cb->data; + + bpf_sk_storage_diag_free(cb_data->bpf_stg_diag); + kfree(cb->data); + + return 0; } static int inet_diag_type2proto(int type) @@ -1085,9 +1175,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) { struct inet_diag_req *rc = nlmsg_data(cb->nlh); - int hdrlen = sizeof(struct inet_diag_req); struct inet_diag_req_v2 req; - struct nlattr *bc = NULL; req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); @@ -1095,10 +1183,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - if (nlmsg_attrlen(cb->nlh, hdrlen)) - bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - - return __inet_diag_dump(skb, cb, &req, bc); + return __inet_diag_dump(skb, cb, &req); } static int inet_diag_get_exact_compat(struct sk_buff *in_skb, @@ -1126,22 +1211,12 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; if (nlh->nlmsg_flags & NLM_F_DUMP) { - if (nlmsg_attrlen(nlh, hdrlen)) { - struct nlattr *attr; - int err; - - attr = nlmsg_find_attr(nlh, hdrlen, - INET_DIAG_REQ_BYTECODE); - err = inet_diag_bc_audit(attr, skb); - if (err) - return err; - } - { - struct netlink_dump_control c = { - .dump = inet_diag_dump_compat, - }; - return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); - } + struct netlink_dump_control c = { + .start = inet_diag_dump_start_compat, + .done = inet_diag_dump_done, + .dump = inet_diag_dump_compat, + }; + return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); } return inet_diag_get_exact_compat(skb, nlh); @@ -1157,22 +1232,12 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && h->nlmsg_flags & NLM_F_DUMP) { - if (nlmsg_attrlen(h, hdrlen)) { - struct nlattr *attr; - int err; - - attr = nlmsg_find_attr(h, hdrlen, - INET_DIAG_REQ_BYTECODE); - err = inet_diag_bc_audit(attr, skb); - if (err) - return err; - } - { - struct netlink_dump_control c = { - .dump = inet_diag_dump, - }; - return netlink_dump_start(net->diag_nlsk, skb, h, &c); - } + struct netlink_dump_control c = { + .start = inet_diag_dump_start, + .done = inet_diag_dump_done, + .dump = inet_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8274f98c511c..029b24eeafba 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1153,6 +1153,24 @@ static int ipgre_netlink_parms(struct net_device *dev, if (data[IFLA_GRE_FWMARK]) *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); + return 0; +} + +static int erspan_netlink_parms(struct net_device *dev, + struct nlattr *data[], + struct nlattr *tb[], + struct ip_tunnel_parm *parms, + __u32 *fwmark) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = ipgre_netlink_parms(dev, data, tb, parms, fwmark); + if (err) + return err; + if (!data) + return 0; + if (data[IFLA_GRE_ERSPAN_VER]) { t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); @@ -1276,45 +1294,70 @@ static void ipgre_tap_setup(struct net_device *dev) ip_tunnel_setup(dev, gre_tap_net_id); } -static int ipgre_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) +static int +ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[]) { - struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; - __u32 fwmark = 0; - int err; if (ipgre_netlink_encap_parms(data, &ipencap)) { struct ip_tunnel *t = netdev_priv(dev); - err = ip_tunnel_encap_setup(t, &ipencap); + int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } + return 0; +} + +static int ipgre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip_tunnel_parm p; + __u32 fwmark = 0; + int err; + + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; + err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; return ip_tunnel_newlink(dev, tb, &p, fwmark); } +static int erspan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip_tunnel_parm p; + __u32 fwmark = 0; + int err; + + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; + + err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); + if (err) + return err; + return ip_tunnel_newlink(dev, tb, &p, fwmark); +} + static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_encap ipencap; __u32 fwmark = t->fwmark; struct ip_tunnel_parm p; int err; - if (ipgre_netlink_encap_parms(data, &ipencap)) { - err = ip_tunnel_encap_setup(t, &ipencap); - - if (err < 0) - return err; - } + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) @@ -1327,8 +1370,34 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], t->parms.i_flags = p.i_flags; t->parms.o_flags = p.o_flags; - if (strcmp(dev->rtnl_link_ops->kind, "erspan")) - ipgre_link_update(dev, !tb[IFLA_MTU]); + ipgre_link_update(dev, !tb[IFLA_MTU]); + + return 0; +} + +static int erspan_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip_tunnel *t = netdev_priv(dev); + __u32 fwmark = t->fwmark; + struct ip_tunnel_parm p; + int err; + + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; + + err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); + if (err < 0) + return err; + + err = ip_tunnel_changelink(dev, tb, &p, fwmark); + if (err < 0) + return err; + + t->parms.i_flags = p.i_flags; + t->parms.o_flags = p.o_flags; return 0; } @@ -1519,8 +1588,8 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = { .priv_size = sizeof(struct ip_tunnel), .setup = erspan_setup, .validate = erspan_validate, - .newlink = ipgre_newlink, - .changelink = ipgre_changelink, + .newlink = erspan_newlink, + .changelink = erspan_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d84819893db9..090d3097ee15 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -263,7 +263,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, * insufficent MTU. */ features = netif_skb_features(skb); - BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); @@ -333,7 +333,7 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk, switch (ret) { case NET_XMIT_CN: do_cn = true; - /* fall through */ + fallthrough; case NET_XMIT_SUCCESS: break; default: diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 47f8b947eef1..181b7a2a0247 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -432,7 +432,7 @@ static int ip_tun_set_opts(struct nlattr *attr, struct ip_tunnel_info *info, return ip_tun_parse_opts(attr, info, extack); } -static int ip_tun_build_state(struct nlattr *attr, +static int ip_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) @@ -719,7 +719,7 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { [LWTUNNEL_IP6_OPTS] = { .type = NLA_NESTED }, }; -static int ip6_tun_build_state(struct nlattr *attr, +static int ip6_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 37cddd18f282..1b4e6f298648 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -187,17 +187,39 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, int mtu; if (!dst) { - struct rtable *rt; - - fl->u.ip4.flowi4_oif = dev->ifindex; - fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; - rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); - if (IS_ERR(rt)) { + switch (skb->protocol) { + case htons(ETH_P_IP): { + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst = &rt->dst; + skb_dst_set(skb, dst); + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + skb_dst_set(skb, dst); + break; +#endif + default: dev->stats.tx_carrier_errors++; goto tx_error_icmp; } - dst = &rt->dst; - skb_dst_set(skb, dst); } dst_hold(dst); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6e68def66822..9cf83cc85e4a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1465,7 +1465,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; - /* fall through */ + fallthrough; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) { diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f1f78a742b36..b167f4a5b684 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1057,7 +1057,7 @@ struct compat_arpt_replace { u32 underflow[NF_ARP_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; - struct compat_arpt_entry entries[0]; + struct compat_arpt_entry entries[]; }; static inline void compat_release_entry(struct compat_arpt_entry *e) @@ -1383,7 +1383,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, struct compat_arpt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_arpt_entry entrytable[0]; + struct compat_arpt_entry entrytable[]; }; static int compat_get_entries(struct net *net, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 10b91ebdf213..c2670eaa74e6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1211,7 +1211,7 @@ struct compat_ipt_replace { u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ - struct compat_ipt_entry entries[0]; + struct compat_ipt_entry entries[]; }; static int @@ -1562,7 +1562,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, struct compat_ipt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_ipt_entry entrytable[0]; + struct compat_ipt_entry entrytable[]; }; static int diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 4b2d49cc9f1a..0c72156130b6 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -173,7 +173,7 @@ static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m, case ICMP_REDIRECT: /* Max length: 24 "GATEWAY=255.255.255.255 " */ nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); - /* Fall through */ + fallthrough; case ICMP_DEST_UNREACH: case ICMP_SOURCE_QUENCH: case ICMP_TIME_EXCEEDED: diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index b2aeb7bf5dac..3c25a467b3ef 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -168,7 +168,7 @@ pptp_outbound_pkt(struct sk_buff *skb, pr_debug("unknown outbound packet 0x%04x:%s\n", msg, msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0]); - /* fall through */ + fallthrough; case PPTP_SET_LINK_INFO: /* only need to NAT in case PAC is behind NAT box */ case PPTP_START_SESSION_REQUEST: @@ -271,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff *skb, pr_debug("unknown inbound packet %s\n", msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0]); - /* fall through */ + fallthrough; case PPTP_START_SESSION_REQUEST: case PPTP_START_SESSION_REPLY: case PPTP_STOP_SESSION_REQUEST: diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d072c326dd64..fdfca534d094 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1327,7 +1327,7 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, case AF_UNSPEC: if (tb[NHA_GROUP]) break; - /* fallthrough */ + fallthrough; default: NL_SET_ERR_MSG(extack, "Invalid address family"); goto out; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 2580303249e2..75545a829a2b 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -32,6 +32,7 @@ #include <net/icmp.h> #include <net/protocol.h> #include <net/tcp.h> +#include <net/mptcp.h> #include <net/udp.h> #include <net/udplite.h> #include <linux/bottom_half.h> @@ -485,6 +486,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) offsetof(struct ipstats_mib, syncp))); seq_putc(seq, '\n'); + mptcp_seq_show(seq); return 0; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3183413ebc6c..47665919048f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1034,6 +1034,7 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) } void *raw_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(&h->lock) { struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); @@ -1056,6 +1057,7 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) + __releases(&h->lock) { struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index e35736b99300..1b5b8af27aaf 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -87,31 +87,30 @@ out_unlock: return sk ? sk : ERR_PTR(-ENOENT); } -static int raw_diag_dump_one(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, +static int raw_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { - struct net *net = sock_net(in_skb->sk); + struct sk_buff *in_skb = cb->skb; struct sk_buff *rep; struct sock *sk; + struct net *net; int err; + net = sock_net(in_skb->sk); sk = raw_sock_get(net, r); if (IS_ERR(sk)) return PTR_ERR(sk); - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) { sock_put(sk); return -ENOMEM; } - err = inet_sk_diag_fill(sk, NULL, rep, r, - sk_user_ns(NETLINK_CB(in_skb).sk), - NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh, + err = inet_sk_diag_fill(sk, NULL, rep, cb, r, 0, netlink_net_capable(in_skb, CAP_NET_ADMIN)); sock_put(sk); @@ -136,25 +135,25 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, if (!inet_diag_bc_sk(bc, sk)) return 0; - return inet_sk_diag_fill(sk, NULL, skb, r, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh, net_admin); + return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin); } static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); + struct inet_diag_dump_data *cb_data; int num, s_num, slot, s_slot; struct sock *sk = NULL; + struct nlattr *bc; if (IS_ERR(hashinfo)) return; + cb_data = cb->data; + bc = cb_data->inet_diag_nla_bc; s_slot = cb->args[0]; num = s_num = cb->args[1]; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ebe7060d0fc9..788c69d9bfe0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1621,12 +1621,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, - bool nopolicy, bool noxfrm, bool will_cache) + bool nopolicy, bool noxfrm) { struct rtable *rt; rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - (will_cache ? 0 : DST_HOST) | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); @@ -1674,7 +1673,6 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) new_rt->rt_gw6 = rt->rt_gw6; INIT_LIST_HEAD(&new_rt->rt_uncached); - new_rt->dst.flags |= DST_HOST; new_rt->dst.input = rt->dst.input; new_rt->dst.output = rt->dst.output; new_rt->dst.error = rt->dst.error; @@ -1734,7 +1732,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, flags |= RTCF_LOCAL; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) return -ENOBUFS; @@ -1851,7 +1849,7 @@ static int __mkroute_input(struct sk_buff *skb, rth = rt_dst_alloc(out_dev->dev, 0, res->type, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); + IN_DEV_CONF_GET(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -2219,7 +2217,7 @@ local_input: rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, flags | RTCF_LOCAL, res->type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; @@ -2443,8 +2441,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, add: rth = rt_dst_alloc(dev_out, flags, type, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(in_dev, NOXFRM), - do_cache); + IN_DEV_CONF_GET(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS); @@ -2774,6 +2771,54 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); +struct rtable *ip_route_output_tunnel(struct sk_buff *skb, + struct net_device *dev, + struct net *net, __be32 *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache) +{ +#ifdef CONFIG_DST_CACHE + struct dst_cache *dst_cache; +#endif + struct rtable *rt = NULL; + struct flowi4 fl4; + __u8 tos; + +#ifdef CONFIG_DST_CACHE + dst_cache = (struct dst_cache *)&info->dst_cache; + if (use_cache) { + rt = dst_cache_get_ip4(dst_cache, saddr); + if (rt) + return rt; + } +#endif + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = protocol; + fl4.daddr = info->key.u.ipv4.dst; + fl4.saddr = info->key.u.ipv4.src; + tos = info->key.tos; + fl4.flowi4_tos = RT_TOS(tos); + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (rt->dst.dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); + ip_rt_put(rt); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (use_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); +#endif + *saddr = fl4.saddr; + return rt; +} +EXPORT_SYMBOL_GPL(ip_route_output_tunnel); + /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, struct flowi4 *fl4, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 9684af02e0a5..81b267e990a1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -555,18 +555,6 @@ static struct ctl_table ipv4_table[] = { }, #endif /* CONFIG_NETLABEL */ { - .procname = "tcp_available_congestion_control", - .maxlen = TCP_CA_BUF_MAX, - .mode = 0444, - .proc_handler = proc_tcp_available_congestion_control, - }, - { - .procname = "tcp_allowed_congestion_control", - .maxlen = TCP_CA_BUF_MAX, - .mode = 0644, - .proc_handler = proc_allowed_congestion_control, - }, - { .procname = "tcp_available_ulp", .maxlen = TCP_ULP_BUF_MAX, .mode = 0444, @@ -776,6 +764,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "ip_autobind_reuse", + .data = &init_net.ipv4.sysctl_ip_autobind_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { .procname = "fwmark_reflect", .data = &init_net.ipv4.sysctl_fwmark_reflect, .maxlen = sizeof(int), @@ -886,6 +883,18 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_tcp_congestion_control, }, { + .procname = "tcp_available_congestion_control", + .maxlen = TCP_CA_BUF_MAX, + .mode = 0444, + .proc_handler = proc_tcp_available_congestion_control, + }, + { + .procname = "tcp_allowed_congestion_control", + .maxlen = TCP_CA_BUF_MAX, + .mode = 0644, + .proc_handler = proc_allowed_congestion_control, + }, + { .procname = "tcp_keepalive_time", .data = &init_net.ipv4.sysctl_tcp_keepalive_time, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1b685485a5b5..6d87de434377 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2251,7 +2251,7 @@ void tcp_set_state(struct sock *sk, int state) if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(sk); - /* fall through */ + fallthrough; default: if (oldstate == TCP_ESTABLISHED) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); @@ -2948,8 +2948,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EPERM; else if (tp->repair_queue == TCP_SEND_QUEUE) WRITE_ONCE(tp->write_seq, val); - else if (tp->repair_queue == TCP_RECV_QUEUE) + else if (tp->repair_queue == TCP_RECV_QUEUE) { WRITE_ONCE(tp->rcv_nxt, val); + WRITE_ONCE(tp->copied_seq, val); + } else err = -EINVAL; break; @@ -3344,6 +3346,7 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */ nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */ nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */ + nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */ 0; } @@ -3399,6 +3402,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); + nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, + max_t(int, 0, tp->write_seq - tp->snd_nxt)); return stats; } @@ -3669,8 +3674,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; if (len < offsetofend(struct tcp_zerocopy_receive, length)) return -EINVAL; - if (len > sizeof(zc)) + if (len > sizeof(zc)) { len = sizeof(zc); + if (put_user(len, optlen)) + return -EFAULT; + } if (copy_from_user(&zc, optval, len)) return -EFAULT; lock_sock(sk); diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 645cc3009e64..f5f588b1f6e9 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -145,12 +145,13 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); - else { - bictcp_update(ca, tp->snd_cwnd); - tcp_cong_avoid_ai(tp, ca->cnt, 1); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; } + bictcp_update(ca, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, ca->cnt, acked); } /* diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 8a01428f80c1..fe7b4fbc31c1 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -528,57 +528,7 @@ out_err: return copied ? copied : err; } -static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock) -{ - struct sk_psock_link *link; - - while ((link = sk_psock_link_pop(psock))) { - sk_psock_unlink(sk, link); - sk_psock_free_link(link); - } -} - -static void tcp_bpf_unhash(struct sock *sk) -{ - void (*saved_unhash)(struct sock *sk); - struct sk_psock *psock; - - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - if (sk->sk_prot->unhash) - sk->sk_prot->unhash(sk); - return; - } - - saved_unhash = psock->saved_unhash; - tcp_bpf_remove(sk, psock); - rcu_read_unlock(); - saved_unhash(sk); -} - -static void tcp_bpf_close(struct sock *sk, long timeout) -{ - void (*saved_close)(struct sock *sk, long timeout); - struct sk_psock *psock; - - lock_sock(sk); - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - release_sock(sk); - return sk->sk_prot->close(sk, timeout); - } - - saved_close = psock->saved_close; - tcp_bpf_remove(sk, psock); - rcu_read_unlock(); - release_sock(sk); - saved_close(sk, timeout); -} - +#ifdef CONFIG_BPF_STREAM_PARSER enum { TCP_BPF_IPV4, TCP_BPF_IPV6, @@ -599,8 +549,8 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; - prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash; - prot[TCP_BPF_BASE].close = tcp_bpf_close; + prot[TCP_BPF_BASE].unhash = sock_map_unhash; + prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read; @@ -629,26 +579,6 @@ static int __init tcp_bpf_v4_build_proto(void) } core_initcall(tcp_bpf_v4_build_proto); -static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock) -{ - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - - sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]); -} - -static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock) -{ - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - - /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed - * or added requiring sk_prot hook updates. We keep original saved - * hooks in this case. - */ - sk->sk_prot = &tcp_bpf_prots[family][config]; -} - static int tcp_bpf_assert_proto_ops(struct proto *ops) { /* In order to avoid retpoline, we make assumptions when we call @@ -660,34 +590,34 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; } -void tcp_bpf_reinit(struct sock *sk) +struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) { - struct sk_psock *psock; + int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; + int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - sock_owned_by_me(sk); + if (!psock->sk_proto) { + struct proto *ops = READ_ONCE(sk->sk_prot); - rcu_read_lock(); - psock = sk_psock(sk); - tcp_bpf_reinit_sk_prot(sk, psock); - rcu_read_unlock(); + if (tcp_bpf_assert_proto_ops(ops)) + return ERR_PTR(-EINVAL); + + tcp_bpf_check_v6_needs_rebuild(sk, ops); + } + + return &tcp_bpf_prots[family][config]; } -int tcp_bpf_init(struct sock *sk) +/* If a child got cloned from a listening socket that had tcp_bpf + * protocol callbacks installed, we need to restore the callbacks to + * the default ones because the child does not inherit the psock state + * that tcp_bpf callbacks expect. + */ +void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) { - struct proto *ops = READ_ONCE(sk->sk_prot); - struct sk_psock *psock; - - sock_owned_by_me(sk); + int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; + struct proto *prot = newsk->sk_prot; - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock || psock->sk_proto || - tcp_bpf_assert_proto_ops(ops))) { - rcu_read_unlock(); - return -EINVAL; - } - tcp_bpf_check_v6_needs_rebuild(sk, ops); - tcp_bpf_update_sk_prot(sk, psock); - rcu_read_unlock(); - return 0; + if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) + newsk->sk_prot = sk->sk_prot_creator; } +#endif /* CONFIG_BPF_STREAM_PARSER */ diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0d08f9e2d8d0..75a1c985f49a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -179,15 +179,15 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); + inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r); } -static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, +static int tcp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); + return inet_diag_dump_one_icsk(&tcp_hashinfo, cb, req); } #ifdef CONFIG_INET_DIAG_DESTROY diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 316ebdf8151d..bf4ced9273e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2865,7 +2865,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, (*ack_flag & FLAG_LOST_RETRANS))) return; /* Change state if cwnd is undone or retransmits are lost */ - /* fall through */ + fallthrough; default: if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) @@ -6124,7 +6124,11 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) { struct request_sock *req; - tcp_try_undo_loss(sk, false); + /* If we are still handling the SYNACK RTO, see if timestamp ECR allows + * undo. If peer SACKs triggered fast recovery, we can't undo here. + */ + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) + tcp_try_undo_loss(sk, false); /* Reset rtx states to prevent spurious retransmits_timed_out() */ tcp_sk(sk)->retrans_stamp = 0; @@ -6363,7 +6367,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) mptcp_incoming_options(sk, skb, &tp->rx_opt); break; } - /* fall through */ + fallthrough; case TCP_FIN_WAIT1: case TCP_FIN_WAIT2: /* RFC 793 says to queue data in these states, @@ -6378,7 +6382,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 1; } } - /* Fall through */ + fallthrough; case TCP_ESTABLISHED: tcp_data_queue(sk, skb); queued = 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index df1166b76126..83a5d24e13b8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1019,7 +1019,8 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, if (!md5sig) return NULL; - hlist_for_each_entry_rcu(key, &md5sig->head, node) { + hlist_for_each_entry_rcu(key, &md5sig->head, node, + lockdep_sock_is_held(sk)) { if (key->family != family) continue; if (key->l3index && key->l3index != l3index) @@ -1064,7 +1065,8 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, if (family == AF_INET6) size = sizeof(struct in6_addr); #endif - hlist_for_each_entry_rcu(key, &md5sig->head, node) { + hlist_for_each_entry_rcu(key, &md5sig->head, node, + lockdep_sock_is_held(sk)) { if (key->family != family) continue; if (key->l3index && key->l3index != l3index) @@ -2070,7 +2072,7 @@ do_time_wait: } } /* to ACK */ - /* fall through */ + fallthrough; case TCP_TW_ACK: tcp_v4_timewait_ack(sk, skb); break; @@ -2366,7 +2368,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) break; st->bucket = 0; st->state = TCP_SEQ_STATE_ESTABLISHED; - /* Fallthrough */ + fallthrough; case TCP_SEQ_STATE_ESTABLISHED: if (st->bucket > tcp_hashinfo.ehash_mask) break; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ad3b56d9fa71..7e40322cc5ec 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -548,6 +548,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->fastopen_req = NULL; RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); + tcp_bpf_clone(sk, newsk); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); return newsk; @@ -772,6 +774,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!child) goto listen_overflow; + if (own_req && sk_is_mptcp(child) && mptcp_sk_is_subflow(child)) { + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); + inet_csk_reqsk_queue_drop_and_put(sk, req); + return child; + } + sock_rps_save_rxhash(child, skb); tcp_synack_rtt_meas(child, req); *req_stolen = !own_req; @@ -817,6 +825,7 @@ EXPORT_SYMBOL(tcp_check_req); int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb) + __releases(&((child)->sk_lock.slock)) { int ret = 0; int state = child->sk_state; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 306e25d743e8..2f45cde168c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1109,6 +1109,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, if (unlikely(!skb)) return -ENOBUFS; + /* retransmit skbs might have a non zero value in skb->dev + * because skb->dev is aliased with skb->rbnode.rb_left + */ + skb->dev = NULL; } inet = inet_sk(sk); @@ -3037,8 +3041,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) tcp_skb_tsorted_save(skb) { nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + if (nskb) { + nskb->dev = NULL; + err = tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC); + } else { + err = -ENOBUFS; + } } tcp_skb_tsorted_restore(skb); if (!err) { diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 471571e1ab26..6cebf412d590 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -10,10 +10,9 @@ #include <net/tcp.h> /* These factors derived from the recommended values in the aer: - * .01 and and 7/8. We use 50 instead of 100 to account for - * delayed ack. + * .01 and and 7/8. */ -#define TCP_SCALABLE_AI_CNT 50U +#define TCP_SCALABLE_AI_CNT 100U #define TCP_SCALABLE_MD_SCALE 3 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) @@ -23,11 +22,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); - else - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), - 1); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + acked); } static u32 tcp_scalable_ssthresh(struct sock *sk) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 38d3ad141161..7c27aa629af1 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -22,7 +22,8 @@ static struct tcp_ulp_ops *tcp_ulp_find(const char *name) { struct tcp_ulp_ops *e; - list_for_each_entry_rcu(e, &tcp_ulp_list, list) { + list_for_each_entry_rcu(e, &tcp_ulp_list, list, + lockdep_is_held(&tcp_ulp_list_lock)) { if (strcmp(e->name, name) == 0) return e; } @@ -104,12 +105,6 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto, { struct inet_connection_sock *icsk = inet_csk(sk); - if (!icsk->icsk_ulp_ops) { - sk->sk_write_space = write_space; - sk->sk_prot = proto; - return; - } - if (icsk->icsk_ulp_ops->update) icsk->icsk_ulp_ops->update(sk, proto, write_space); } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 3b36bb1a0dda..50a9a6e2c4cd 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -153,31 +153,34 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; if (tcp_in_slow_start(tp)) { - /* Slow start. */ - tcp_slow_start(tp, acked); + /* Slow start. */ + acked = tcp_slow_start(tp, acked); + if (!acked) + goto done; + } + + /* Congestion avoidance. */ + if (veno->diff < beta) { + /* In the "non-congestive state", increase cwnd + * every rtt. + */ + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } else { - /* Congestion avoidance. */ - if (veno->diff < beta) { - /* In the "non-congestive state", increase cwnd - * every rtt. - */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); - } else { - /* In the "congestive state", increase cwnd - * every other rtt. - */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (veno->inc && - tp->snd_cwnd < tp->snd_cwnd_clamp) { - tp->snd_cwnd++; - veno->inc = 0; - } else - veno->inc = 1; - tp->snd_cwnd_cnt = 0; + /* In the "congestive state", increase cwnd + * every other rtt. + */ + if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (veno->inc && + tp->snd_cwnd < tp->snd_cwnd_clamp) { + tp->snd_cwnd++; + veno->inc = 0; } else - tp->snd_cwnd_cnt++; - } + veno->inc = 1; + tp->snd_cwnd_cnt = 0; + } else + tp->snd_cwnd_cnt += acked; } +done: if (tp->snd_cwnd < 2) tp->snd_cwnd = 2; else if (tp->snd_cwnd > tp->snd_cwnd_clamp) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index e00570dd0a69..3bb448761ca3 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -36,8 +36,6 @@ struct yeah { u32 reno_count; u32 fast_count; - - u32 pkts_acked; }; static void tcp_yeah_init(struct sock *sk) @@ -57,18 +55,6 @@ static void tcp_yeah_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void tcp_yeah_pkts_acked(struct sock *sk, - const struct ack_sample *sample) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct yeah *yeah = inet_csk_ca(sk); - - if (icsk->icsk_ca_state == TCP_CA_Open) - yeah->pkts_acked = sample->pkts_acked; - - tcp_vegas_pkts_acked(sk, sample); -} - static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -77,24 +63,19 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + goto do_vegas; + } - else if (!yeah->doing_reno_now) { + if (!yeah->doing_reno_now) { /* Scalable */ - - tp->snd_cwnd_cnt += yeah->pkts_acked; - if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } - - yeah->pkts_acked = 1; - + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + acked); } else { /* Reno */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. @@ -118,7 +99,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) * of bytes we send in an RTT is often less than our cwnd will allow. * So we keep track of our cwnd separately, in v_beg_snd_cwnd. */ - +do_vegas: if (after(ack, yeah->vegas.beg_snd_nxt)) { /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -232,7 +213,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = { .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, - .pkts_acked = tcp_yeah_pkts_acked, + .pkts_acked = tcp_vegas_pkts_acked, .owner = THIS_MODULE, .name = "yeah", diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db76b9609299..7ea90eb4a1ba 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1671,10 +1671,11 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, error = -EAGAIN; do { spin_lock_bh(&queue->lock); - skb = __skb_try_recv_from_queue(sk, queue, flags, - udp_skb_destructor, - off, err, &last); + skb = __skb_try_recv_from_queue(sk, queue, flags, off, + err, &last); if (skb) { + if (!(flags & MSG_PEEK)) + udp_skb_destructor(sk, skb); spin_unlock_bh(&queue->lock); return skb; } @@ -1692,9 +1693,10 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, spin_lock(&sk_queue->lock); skb_queue_splice_tail_init(sk_queue, queue); - skb = __skb_try_recv_from_queue(sk, queue, flags, - udp_skb_dtor_locked, - off, err, &last); + skb = __skb_try_recv_from_queue(sk, queue, flags, off, + err, &last); + if (skb && !(flags & MSG_PEEK)) + udp_skb_dtor_locked(sk, skb); spin_unlock(&sk_queue->lock); spin_unlock_bh(&queue->lock); if (skb) @@ -1857,8 +1859,12 @@ int __udp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; sock_rps_reset_rxhash(sk); sk->sk_bound_dev_if = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) { inet_reset_saddr(sk); + if (sk->sk_prot->rehash && + (sk->sk_userlocks & SOCK_BINDPORT_LOCK)) + sk->sk_prot->rehash(sk); + } if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { sk->sk_prot->unhash(sk); @@ -2103,7 +2109,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (likely(!udp_unexpected_gso(sk, skb))) return udp_queue_rcv_one_skb(sk, skb); - BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET); __skb_push(skb, -skb_mac_offset(skb)); segs = udp_rcv_segment(sk, skb, true); skb_list_walk_safe(segs, skb, next) { @@ -2557,7 +2563,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case UDP_ENCAP_ESPINUDP_NON_IKE: up->encap_rcv = xfrm4_udp_encap_rcv; #endif - /* FALLTHROUGH */ + fallthrough; case UDP_ENCAP_L2TPINUDP: up->encap_type = val; lock_sock(sk); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c new file mode 100644 index 000000000000..eddd973e6575 --- /dev/null +++ b/net/ipv4/udp_bpf.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Cloudflare Ltd https://cloudflare.com */ + +#include <linux/skmsg.h> +#include <net/sock.h> +#include <net/udp.h> + +enum { + UDP_BPF_IPV4, + UDP_BPF_IPV6, + UDP_BPF_NUM_PROTS, +}; + +static struct proto *udpv6_prot_saved __read_mostly; +static DEFINE_SPINLOCK(udpv6_prot_lock); +static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS]; + +static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base) +{ + *prot = *base; + prot->unhash = sock_map_unhash; + prot->close = sock_map_close; +} + +static void udp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops) +{ + if (sk->sk_family == AF_INET6 && + unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) { + spin_lock_bh(&udpv6_prot_lock); + if (likely(ops != udpv6_prot_saved)) { + udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV6], ops); + smp_store_release(&udpv6_prot_saved, ops); + } + spin_unlock_bh(&udpv6_prot_lock); + } +} + +static int __init udp_bpf_v4_build_proto(void) +{ + udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot); + return 0; +} +core_initcall(udp_bpf_v4_build_proto); + +struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) +{ + int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; + + if (!psock->sk_proto) + udp_bpf_check_v6_needs_rebuild(sk, READ_ONCE(sk->sk_prot)); + + return &udp_bpf_prots[family]; +} diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 910555a4d9fe..1dbece34496e 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -21,16 +21,15 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, if (!inet_diag_bc_sk(bc, sk)) return 0; - return inet_sk_diag_fill(sk, NULL, skb, req, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); + return inet_sk_diag_fill(sk, NULL, skb, cb, req, NLM_F_MULTI, + net_admin); } -static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, +static int udp_dump_one(struct udp_table *tbl, + struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { + struct sk_buff *in_skb = cb->skb; int err = -EINVAL; struct sock *sk = NULL; struct sk_buff *rep; @@ -64,17 +63,15 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = -ENOMEM; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) goto out; - err = inet_sk_diag_fill(sk, NULL, rep, req, - sk_user_ns(NETLINK_CB(in_skb).sk), - NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh, - netlink_net_capable(in_skb, CAP_NET_ADMIN)); + err = inet_sk_diag_fill(sk, NULL, rep, cb, req, 0, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); @@ -93,12 +90,16 @@ out_nosk: static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); + struct inet_diag_dump_data *cb_data; int num, s_num, slot, s_slot; + struct nlattr *bc; + cb_data = cb->data; + bc = cb_data->inet_diag_nla_bc; s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -146,15 +147,15 @@ done: } static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - udp_dump(&udp_table, skb, cb, r, bc); + udp_dump(&udp_table, skb, cb, r); } -static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, +static int udp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return udp_dump_one(&udp_table, in_skb, nlh, req); + return udp_dump_one(&udp_table, cb, req); } static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, @@ -249,16 +250,15 @@ static const struct inet_diag_handler udp_diag_handler = { }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - udp_dump(&udplite_table, skb, cb, r, bc); + udp_dump(&udplite_table, skb, cb, r); } -static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, +static int udplite_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return udp_dump_one(&udplite_table, in_skb, nlh, req); + return udp_dump_one(&udplite_table, cb, req); } static const struct inet_diag_handler udplite_diag_handler = { |