summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c27
-rw-r--r--net/9p/trans_fd.c4
-rw-r--r--net/Kconfig5
-rw-r--r--net/batman-adv/bat_iv_ogm.c1
-rw-r--r--net/batman-adv/bat_v.c23
-rw-r--r--net/batman-adv/gateway_common.c162
-rw-r--r--net/batman-adv/gateway_common.h7
-rw-r--r--net/batman-adv/hard-interface.c20
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/netlink.c15
-rw-r--r--net/batman-adv/netlink.h6
-rw-r--r--net/batman-adv/routing.h4
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/types.h7
-rw-r--r--net/bluetooth/af_bluetooth.c53
-rw-r--r--net/bluetooth/amp.h1
-rw-r--r--net/bluetooth/bnep/sock.c10
-rw-r--r--net/bluetooth/coredump.c3
-rw-r--r--net/bluetooth/hci_conn.c684
-rw-r--r--net/bluetooth/hci_core.c34
-rw-r--r--net/bluetooth/hci_debugfs.c3
-rw-r--r--net/bluetooth/hci_event.c201
-rw-r--r--net/bluetooth/hci_request.c21
-rw-r--r--net/bluetooth/hci_sock.c77
-rw-r--r--net/bluetooth/hci_sync.c263
-rw-r--r--net/bluetooth/hidp/sock.c10
-rw-r--r--net/bluetooth/iso.c136
-rw-r--r--net/bluetooth/l2cap_sock.c29
-rw-r--r--net/bluetooth/mgmt.c27
-rw-r--r--net/bluetooth/msft.c412
-rw-r--r--net/bluetooth/rfcomm/sock.c13
-rw-r--r--net/bluetooth/sco.c34
-rw-r--r--net/bpf/test_run.c21
-rw-r--r--net/bridge/br.c8
-rw-r--r--net/bridge/br_forward.c1
-rw-r--r--net/bridge/br_netlink.c12
-rw-r--r--net/bridge/br_private.h20
-rw-r--r--net/bridge/br_switchdev.c15
-rw-r--r--net/bridge/br_vlan_tunnel.c15
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/core/dev.c367
-rw-r--r--net/core/dev_ioctl.c187
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/filter.c15
-rw-r--r--net/core/flow_dissector.c55
-rw-r--r--net/core/flow_offload.c7
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/core/netdev-genl.c54
-rw-r--r--net/core/of_net.c1
-rw-r--r--net/core/page_pool.c87
-rw-r--r--net/core/rtnetlink.c11
-rw-r--r--net/core/scm.c3
-rw-r--r--net/core/skbuff.c150
-rw-r--r--net/core/skmsg.c8
-rw-r--r--net/core/sock.c63
-rw-r--r--net/core/xdp.c2
-rw-r--r--net/dccp/feat.h1
-rw-r--r--net/dccp/ipv4.c7
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/dccp/ipv6.h4
-rw-r--r--net/devlink/Makefile2
-rw-r--r--net/devlink/dev.c51
-rw-r--r--net/devlink/devl_internal.h54
-rw-r--r--net/devlink/health.c42
-rw-r--r--net/devlink/leftover.c447
-rw-r--r--net/devlink/netlink.c127
-rw-r--r--net/devlink/netlink_gen.c481
-rw-r--r--net/devlink/netlink_gen.h79
-rw-r--r--net/dsa/port.c53
-rw-r--r--net/dsa/slave.c9
-rw-r--r--net/dsa/tag_qca.c8
-rw-r--r--net/ethtool/channels.c2
-rw-r--r--net/ethtool/coalesce.c6
-rw-r--r--net/ethtool/common.c3
-rw-r--r--net/ethtool/debug.c2
-rw-r--r--net/ethtool/eee.c2
-rw-r--r--net/ethtool/eeprom.c9
-rw-r--r--net/ethtool/features.c2
-rw-r--r--net/ethtool/fec.c2
-rw-r--r--net/ethtool/ioctl.c91
-rw-r--r--net/ethtool/linkinfo.c2
-rw-r--r--net/ethtool/linkmodes.c2
-rw-r--r--net/ethtool/linkstate.c2
-rw-r--r--net/ethtool/mm.c2
-rw-r--r--net/ethtool/module.c5
-rw-r--r--net/ethtool/netlink.c96
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/pause.c5
-rw-r--r--net/ethtool/phc_vclocks.c2
-rw-r--r--net/ethtool/plca.c4
-rw-r--r--net/ethtool/privflags.c2
-rw-r--r--net/ethtool/pse-pd.c6
-rw-r--r--net/ethtool/rings.c5
-rw-r--r--net/ethtool/rss.c3
-rw-r--r--net/ethtool/stats.c5
-rw-r--r--net/ethtool/strset.c2
-rw-r--r--net/ethtool/tsinfo.c2
-rw-r--r--net/ethtool/tunnels.c73
-rw-r--r--net/ethtool/wol.c5
-rw-r--r--net/handshake/Makefile2
-rw-r--r--net/handshake/alert.c110
-rw-r--r--net/handshake/handshake.h6
-rw-r--r--net/handshake/tlshd.c23
-rw-r--r--net/handshake/trace.c2
-rw-r--r--net/hsr/hsr_netlink.h2
-rw-r--r--net/ieee802154/nl802154.c4
-rw-r--r--net/ipv4/af_inet.c62
-rw-r--r--net/ipv4/bpf_tcp_ca.c2
-rw-r--r--net/ipv4/cipso_ipv4.c4
-rw-r--r--net/ipv4/devinet.c23
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_diag.c22
-rw-r--r--net/ipv4/inet_hashtables.c66
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_output.c9
-rw-r--r--net/ipv4/ip_sockglue.c405
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c19
-rw-r--r--net/ipv4/nexthop.c65
-rw-r--r--net/ipv4/ping.c7
-rw-r--r--net/ipv4/raw.c26
-rw-r--r--net/ipv4/route.c8
-rw-r--r--net/ipv4/tcp.c113
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_input.c69
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_metrics.c19
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c40
-rw-r--r--net/ipv4/tcp_timer.c89
-rw-r--r--net/ipv4/udp.c97
-rw-r--r--net/ipv4/udp_tunnel_core.c2
-rw-r--r--net/ipv4/xfrm4_policy.c11
-rw-r--r--net/ipv6/addrconf.c90
-rw-r--r--net/ipv6/af_inet6.c22
-rw-r--r--net/ipv6/anycast.c2
-rw-r--r--net/ipv6/datagram.c9
-rw-r--r--net/ipv6/exthdrs.c7
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/ila/ila_main.c1
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/inet6_hashtables.c69
-rw-r--r--net/ipv6/ip6_fib.c55
-rw-r--r--net/ipv6/ip6_output.c16
-rw-r--r--net/ipv6/ipv6_sockglue.c22
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c17
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c11
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/raw.c17
-rw-r--r--net/ipv6/route.c23
-rw-r--r--net/ipv6/rpl_iptunnel.c3
-rw-r--r--net/ipv6/seg6_local.c108
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/udp.c99
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_policy.c6
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c4
-rw-r--r--net/llc/llc_conn.c11
-rw-r--r--net/mptcp/Makefile2
-rw-r--r--net/mptcp/bpf.c15
-rw-r--r--net/mptcp/ctrl.c14
-rw-r--r--net/mptcp/pm.c9
-rw-r--r--net/mptcp/pm_netlink.c33
-rw-r--r--net/mptcp/protocol.c497
-rw-r--r--net/mptcp/protocol.h41
-rw-r--r--net/mptcp/sched.c173
-rw-r--r--net/mptcp/sockopt.c77
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/ncsi/ncsi-netlink.c2
-rw-r--r--net/ncsi/ncsi-netlink.h2
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c4
-rw-r--r--net/netfilter/nf_bpf_link.c125
-rw-r--r--net/netfilter/nf_conntrack_bpf.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c8
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c2
-rw-r--r--net/netfilter/nf_flow_table_offload.c22
-rw-r--r--net/netfilter/nf_tables_api.c6
-rw-r--r--net/netfilter/nf_tables_offload.c13
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/netfilter/nft_ct.c4
-rw-r--r--net/netfilter/nft_fib.c15
-rw-r--r--net/netfilter/nft_lookup.c6
-rw-r--r--net/netfilter/nft_masq.c8
-rw-r--r--net/netfilter/nft_meta.c6
-rw-r--r--net/netfilter/nft_nat.c8
-rw-r--r--net/netfilter/nft_osf.c6
-rw-r--r--net/netfilter/nft_redir.c8
-rw-r--r--net/netfilter/x_tables.c5
-rw-r--r--net/netfilter/xt_repldata.h2
-rw-r--r--net/netlabel/netlabel_cipso_v4.h3
-rw-r--r--net/netlink/af_netlink.c128
-rw-r--r--net/netlink/af_netlink.h26
-rw-r--r--net/netlink/diag.c10
-rw-r--r--net/netlink/genetlink.c125
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/openvswitch/actions.c42
-rw-r--r--net/openvswitch/conntrack.c83
-rw-r--r--net/openvswitch/datapath.c45
-rw-r--r--net/openvswitch/drop.h41
-rw-r--r--net/openvswitch/flow_netlink.c10
-rw-r--r--net/openvswitch/meter.c10
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/qrtr/af_qrtr.c5
-rw-r--r--net/qrtr/ns.c139
-rw-r--r--net/rds/rdma_transport.h1
-rw-r--r--net/rds/rds.h3
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/sched/Kconfig4
-rw-r--r--net/sched/act_ct.c3
-rw-r--r--net/sched/cls_flower.c35
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_drr.c11
-rw-r--r--net/sched/sch_hfsc.c10
-rw-r--r--net/sched/sch_htb.c17
-rw-r--r--net/sched/sch_ingress.c61
-rw-r--r--net/sched/sch_netem.c49
-rw-r--r--net/sched/sch_qfq.c12
-rw-r--r--net/sched/sch_taprio.c68
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/protocol.c5
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/smc/af_smc.c88
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_clc.c147
-rw-r--r--net/smc/smc_clc.h53
-rw-r--r--net/smc/smc_core.c13
-rw-r--r--net/smc/smc_core.h26
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/smc/smc_llc.c25
-rw-r--r--net/socket.c167
-rw-r--r--net/sunrpc/svcsock.c50
-rw-r--r--net/sunrpc/xprtsock.c45
-rw-r--r--net/switchdev/switchdev.c25
-rw-r--r--net/tipc/addr.h1
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/link.h2
-rw-r--r--net/tipc/name_distr.h1
-rw-r--r--net/tipc/net.h1
-rw-r--r--net/tipc/netlink_compat.c4
-rw-r--r--net/tipc/node.c4
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tipc/udp_media.c2
-rw-r--r--net/tls/tls.h7
-rw-r--r--net/tls/tls_device.c6
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_strp.c3
-rw-r--r--net/tls/tls_sw.c139
-rw-r--r--net/unix/scm.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c104
-rw-r--r--net/vmw_vsock/vmci_transport.h3
-rw-r--r--net/xdp/xsk.c366
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xdp/xsk_queue.h95
-rw-r--r--net/xfrm/xfrm_device.c13
262 files changed, 6652 insertions, 3927 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b90781b9ece6..2a7f1b15714a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -354,6 +354,26 @@ out:
return 0;
}
+static int vlan_hwtstamp_get(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg)
+{
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+
+ return generic_hwtstamp_get_lower(real_dev, cfg);
+}
+
+static int vlan_hwtstamp_set(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+
+ if (!net_eq(dev_net(dev), dev_net(real_dev)))
+ return -EOPNOTSUPP;
+
+ return generic_hwtstamp_set_lower(real_dev, cfg, extack);
+}
+
static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
@@ -365,14 +385,9 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
ifrr.ifr_ifru = ifr->ifr_ifru;
switch (cmd) {
- case SIOCSHWTSTAMP:
- if (!net_eq(dev_net(dev), dev_net(real_dev)))
- break;
- fallthrough;
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- case SIOCGHWTSTAMP:
if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
break;
@@ -1081,6 +1096,8 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_fix_features = vlan_dev_fix_features,
.ndo_get_iflink = vlan_dev_get_iflink,
.ndo_fill_forward_path = vlan_dev_fill_forward_path,
+ .ndo_hwtstamp_get = vlan_hwtstamp_get,
+ .ndo_hwtstamp_set = vlan_hwtstamp_set,
};
static void vlan_dev_free(struct net_device *dev)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 00b684616e8d..c4015f30f9fa 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1019,7 +1019,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
}
}
- err = csocket->ops->connect(csocket,
+ err = READ_ONCE(csocket->ops)->connect(csocket,
(struct sockaddr *)&sin_server,
sizeof(struct sockaddr_in), 0);
if (err < 0) {
@@ -1060,7 +1060,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
return err;
}
- err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+ err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server,
sizeof(struct sockaddr_un) - 1, 0);
if (err < 0) {
pr_err("%s (%d): problem connecting socket: %s: %d\n",
diff --git a/net/Kconfig b/net/Kconfig
index 2fb25b534df5..d532ec33f1fe 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -52,6 +52,11 @@ config NET_INGRESS
config NET_EGRESS
bool
+config NET_XGRESS
+ select NET_INGRESS
+ select NET_EGRESS
+ bool
+
config NET_REDIRECT
bool
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 828fb393ee94..74b49c35ddc1 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2516,6 +2516,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
},
.gw = {
.init_sel_class = batadv_iv_init_sel_class,
+ .sel_class_max = BATADV_TQ_MAX_VALUE,
.get_best_gw_node = batadv_iv_gw_get_best_gw_node,
.is_eligible = batadv_iv_gw_is_eligible,
.dump = batadv_iv_gw_dump,
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 54e41fc709c3..ac11f1f08db0 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
+#include <linux/limits.h>
#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
@@ -34,7 +35,6 @@
#include "bat_v_elp.h"
#include "bat_v_ogm.h"
#include "gateway_client.h"
-#include "gateway_common.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
@@ -512,25 +512,6 @@ static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
atomic_set(&bat_priv->gw.sel_class, 50);
}
-static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
- char *buff, size_t count)
-{
- u32 old_class, class;
-
- if (!batadv_parse_throughput(bat_priv->soft_iface, buff,
- "B.A.T.M.A.N. V GW selection class",
- &class))
- return -EINVAL;
-
- old_class = atomic_read(&bat_priv->gw.sel_class);
- atomic_set(&bat_priv->gw.sel_class, class);
-
- if (old_class != class)
- batadv_gw_reselect(bat_priv);
-
- return count;
-}
-
/**
* batadv_v_gw_throughput_get() - retrieve the GW-bandwidth for a given GW
* @gw_node: the GW to retrieve the metric for
@@ -818,7 +799,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
},
.gw = {
.init_sel_class = batadv_v_init_sel_class,
- .store_sel_class = batadv_v_store_sel_class,
+ .sel_class_max = U32_MAX,
.get_best_gw_node = batadv_v_gw_get_best_gw_node,
.is_eligible = batadv_v_gw_is_eligible,
.dump = batadv_v_gw_dump,
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 6a964a773f57..2dd36ef03c84 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -9,124 +9,15 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
-#include <linux/errno.h>
-#include <linux/kstrtox.h>
-#include <linux/limits.h>
-#include <linux/math64.h>
-#include <linux/netdevice.h>
#include <linux/stddef.h>
-#include <linux/string.h>
+#include <linux/types.h>
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "gateway_client.h"
-#include "log.h"
#include "tvlv.h"
/**
- * batadv_parse_throughput() - parse supplied string buffer to extract
- * throughput information
- * @net_dev: the soft interface net device
- * @buff: string buffer to parse
- * @description: text shown when throughput string cannot be parsed
- * @throughput: pointer holding the returned throughput information
- *
- * Return: false on parse error and true otherwise.
- */
-bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
- const char *description, u32 *throughput)
-{
- enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT;
- u64 lthroughput;
- char *tmp_ptr;
- int ret;
-
- if (strlen(buff) > 4) {
- tmp_ptr = buff + strlen(buff) - 4;
-
- if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
- bw_unit_type = BATADV_BW_UNIT_MBIT;
-
- if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
- bw_unit_type == BATADV_BW_UNIT_MBIT)
- *tmp_ptr = '\0';
- }
-
- ret = kstrtou64(buff, 10, &lthroughput);
- if (ret) {
- batadv_err(net_dev,
- "Invalid throughput speed for %s: %s\n",
- description, buff);
- return false;
- }
-
- switch (bw_unit_type) {
- case BATADV_BW_UNIT_MBIT:
- /* prevent overflow */
- if (U64_MAX / 10 < lthroughput) {
- batadv_err(net_dev,
- "Throughput speed for %s too large: %s\n",
- description, buff);
- return false;
- }
-
- lthroughput *= 10;
- break;
- case BATADV_BW_UNIT_KBIT:
- default:
- lthroughput = div_u64(lthroughput, 100);
- break;
- }
-
- if (lthroughput > U32_MAX) {
- batadv_err(net_dev,
- "Throughput speed for %s too large: %s\n",
- description, buff);
- return false;
- }
-
- *throughput = lthroughput;
-
- return true;
-}
-
-/**
- * batadv_parse_gw_bandwidth() - parse supplied string buffer to extract
- * download and upload bandwidth information
- * @net_dev: the soft interface net device
- * @buff: string buffer to parse
- * @down: pointer holding the returned download bandwidth information
- * @up: pointer holding the returned upload bandwidth information
- *
- * Return: false on parse error and true otherwise.
- */
-static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
- u32 *down, u32 *up)
-{
- char *slash_ptr;
- bool ret;
-
- slash_ptr = strchr(buff, '/');
- if (slash_ptr)
- *slash_ptr = 0;
-
- ret = batadv_parse_throughput(net_dev, buff, "download gateway speed",
- down);
- if (!ret)
- return false;
-
- /* we also got some upload info */
- if (slash_ptr) {
- ret = batadv_parse_throughput(net_dev, slash_ptr + 1,
- "upload gateway speed", up);
- if (!ret)
- return false;
- }
-
- return true;
-}
-
-/**
* batadv_gw_tvlv_container_update() - update the gw tvlv container after
* gateway setting change
* @bat_priv: the bat priv with all the soft interface information
@@ -156,57 +47,6 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
}
/**
- * batadv_gw_bandwidth_set() - Parse and set download/upload gateway bandwidth
- * from supplied string buffer
- * @net_dev: netdev struct of the soft interface
- * @buff: the buffer containing the user data
- * @count: number of bytes in the buffer
- *
- * Return: 'count' on success or a negative error code in case of failure
- */
-ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
- size_t count)
-{
- struct batadv_priv *bat_priv = netdev_priv(net_dev);
- u32 down_curr;
- u32 up_curr;
- u32 down_new = 0;
- u32 up_new = 0;
- bool ret;
-
- down_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_down);
- up_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_up);
-
- ret = batadv_parse_gw_bandwidth(net_dev, buff, &down_new, &up_new);
- if (!ret)
- return -EINVAL;
-
- if (!down_new)
- down_new = 1;
-
- if (!up_new)
- up_new = down_new / 5;
-
- if (!up_new)
- up_new = 1;
-
- if (down_curr == down_new && up_curr == up_new)
- return count;
-
- batadv_gw_reselect(bat_priv);
- batadv_info(net_dev,
- "Changing gateway bandwidth from: '%u.%u/%u.%u MBit' to: '%u.%u/%u.%u MBit'\n",
- down_curr / 10, down_curr % 10, up_curr / 10, up_curr % 10,
- down_new / 10, down_new % 10, up_new / 10, up_new % 10);
-
- atomic_set(&bat_priv->gw.bandwidth_down, down_new);
- atomic_set(&bat_priv->gw.bandwidth_up, up_new);
- batadv_gw_tvlv_container_update(bat_priv);
-
- return count;
-}
-
-/**
* batadv_gw_tvlv_ogm_handler_v1() - process incoming gateway tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 87c37f907261..5d097d6a1dd9 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -9,9 +9,6 @@
#include "main.h"
-#include <linux/netdevice.h>
-#include <linux/types.h>
-
/**
* enum batadv_bandwidth_units - bandwidth unit types
*/
@@ -27,12 +24,8 @@ enum batadv_bandwidth_units {
#define BATADV_GW_MODE_CLIENT_NAME "client"
#define BATADV_GW_MODE_SERVER_NAME "server"
-ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
- size_t count);
void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv);
void batadv_gw_init(struct batadv_priv *bat_priv);
void batadv_gw_free(struct batadv_priv *bat_priv);
-bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
- const char *description, u32 *throughput);
#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 24c9c0c3f316..96a412beab2d 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/gfp.h>
@@ -711,9 +712,14 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
struct batadv_priv *bat_priv;
__be16 ethertype = htons(ETH_P_BATMAN);
int max_header_len = batadv_max_header_len();
+ unsigned int required_mtu;
+ unsigned int hardif_mtu;
int ret;
- if (hard_iface->net_dev->mtu < ETH_MIN_MTU + max_header_len)
+ hardif_mtu = READ_ONCE(hard_iface->net_dev->mtu);
+ required_mtu = READ_ONCE(soft_iface->mtu) + max_header_len;
+
+ if (hardif_mtu < ETH_MIN_MTU + max_header_len)
return -EINVAL;
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
@@ -746,18 +752,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->net_dev->name);
if (atomic_read(&bat_priv->fragmentation) &&
- hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
+ hardif_mtu < required_mtu)
batadv_info(hard_iface->soft_iface,
"The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n",
- hard_iface->net_dev->name, hard_iface->net_dev->mtu,
- ETH_DATA_LEN + max_header_len);
+ hard_iface->net_dev->name, hardif_mtu,
+ required_mtu);
if (!atomic_read(&bat_priv->fragmentation) &&
- hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
+ hardif_mtu < required_mtu)
batadv_info(hard_iface->soft_iface,
"The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n",
- hard_iface->net_dev->name, hard_iface->net_dev->mtu,
- ETH_DATA_LEN + max_header_len);
+ hard_iface->net_dev->name, hardif_mtu,
+ required_mtu);
if (batadv_hardif_is_iface_up(hard_iface))
batadv_hardif_activate_interface(hard_iface);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 156ed39eded1..10007c5894a1 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2023.1"
+#define BATADV_SOURCE_VERSION "2023.3"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 6efbc9275aec..0c64d81a7761 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -377,7 +377,7 @@ nla_put_failure:
*
* Return: 0 on success, < 0 on error
*/
-int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
+static int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
{
struct sk_buff *msg;
int ret;
@@ -551,15 +551,12 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
* algorithm in use implements the GW API
*/
- u32 sel_class_max = 0xffffffffu;
+ u32 sel_class_max = bat_priv->algo_ops->gw.sel_class_max;
u32 sel_class;
attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS];
sel_class = nla_get_u32(attr);
- if (!bat_priv->algo_ops->gw.store_sel_class)
- sel_class_max = BATADV_TQ_MAX_VALUE;
-
if (sel_class >= 1 && sel_class <= sel_class_max) {
atomic_set(&bat_priv->gw.sel_class, sel_class);
batadv_gw_reselect(bat_priv);
@@ -861,8 +858,8 @@ nla_put_failure:
*
* Return: 0 on success, < 0 on error
*/
-int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *hard_iface)
+static int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface)
{
struct sk_buff *msg;
int ret;
@@ -1076,8 +1073,8 @@ nla_put_failure:
*
* Return: 0 on success, < 0 on error
*/
-int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
- struct batadv_softif_vlan *vlan)
+static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+ struct batadv_softif_vlan *vlan)
{
struct sk_buff *msg;
int ret;
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 48102cc7490c..876d2806a67d 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -21,12 +21,6 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
u8 result, u32 test_time, u64 total_bytes,
u32 cookie);
-int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv);
-int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *hard_iface);
-int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
- struct batadv_softif_vlan *vlan);
-
extern struct genl_family batadv_netlink_family;
#endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 5f387786e9a7..afd15b3879f1 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -27,10 +27,6 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
struct batadv_hard_iface *iface);
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
-int batadv_recv_tt_query(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if);
-int batadv_recv_roam_adv(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if);
int batadv_recv_unicast_tvlv(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 85d00dc9ce32..1bf1232a4f75 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -156,7 +156,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
struct batadv_priv *bat_priv = netdev_priv(dev);
/* check ranges */
- if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
+ if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
dev->mtu = new_mtu;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index cf1a0eafe3ab..17d5ea1d8e84 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -2197,11 +2197,10 @@ struct batadv_algo_gw_ops {
void (*init_sel_class)(struct batadv_priv *bat_priv);
/**
- * @store_sel_class: parse and stores a new GW selection class
- * (optional)
+ * @sel_class_max: maximum allowed GW selection class
*/
- ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
- size_t count);
+ u32 sel_class_max;
+
/**
* @get_best_gw_node: select the best GW from the list of available
* nodes (optional)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1c3c7ff5c3c6..336a76165454 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -140,6 +140,35 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto,
return err;
}
+struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
+ struct proto *prot, int proto, gfp_t prio, int kern)
+{
+ struct sock *sk;
+
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, prot, kern);
+ if (!sk)
+ return NULL;
+
+ sock_init_data(sock, sk);
+ INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+
+ sock_reset_flag(sk, SOCK_ZAPPED);
+
+ sk->sk_protocol = proto;
+ sk->sk_state = BT_OPEN;
+
+ /* Init peer information so it can be properly monitored */
+ if (!kern) {
+ spin_lock(&sk->sk_peer_lock);
+ sk->sk_peer_pid = get_pid(task_tgid(current));
+ sk->sk_peer_cred = get_current_cred();
+ spin_unlock(&sk->sk_peer_lock);
+ }
+
+ return sk;
+}
+EXPORT_SYMBOL(bt_sock_alloc);
+
void bt_sock_link(struct bt_sock_list *l, struct sock *sk)
{
write_lock(&l->lock);
@@ -158,6 +187,9 @@ EXPORT_SYMBOL(bt_sock_unlink);
void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
{
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
BT_DBG("parent %p, sk %p", parent, sk);
sock_hold(sk);
@@ -170,6 +202,19 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
bt_sk(sk)->parent = parent;
+ /* Copy credentials from parent since for incoming connections the
+ * socket is allocated by the kernel.
+ */
+ spin_lock(&sk->sk_peer_lock);
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
+ sk->sk_peer_pid = get_pid(parent->sk_peer_pid);
+ sk->sk_peer_cred = get_cred(parent->sk_peer_cred);
+ spin_unlock(&sk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
+
if (bh)
bh_unlock_sock(sk);
else
@@ -288,8 +333,12 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
bt_sk(sk)->skb_msg_name(skb, msg->msg_name,
&msg->msg_namelen);
- if (bt_sk(sk)->skb_put_cmsg)
- bt_sk(sk)->skb_put_cmsg(skb, msg, sk);
+ if (test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags)) {
+ u8 pkt_status = hci_skb_pkt_status(skb);
+
+ put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS,
+ sizeof(pkt_status), &pkt_status);
+ }
}
skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/amp.h b/net/bluetooth/amp.h
index 832764dfbfb3..97c87abd129f 100644
--- a/net/bluetooth/amp.h
+++ b/net/bluetooth/amp.h
@@ -28,7 +28,6 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
int phylink_gen_key(struct hci_conn *hcon, u8 *data, u8 *len, u8 *type);
-void amp_read_loc_info(struct hci_dev *hdev, struct amp_mgr *mgr);
void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle);
void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr);
void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 57d509d77cb4..00d47bcf4d7d 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -205,21 +205,13 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern);
+ sk = bt_sock_alloc(net, sock, &bnep_proto, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
- sock_init_data(sock, sk);
-
sock->ops = &bnep_sock_ops;
-
sock->state = SS_UNCONNECTED;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = protocol;
- sk->sk_state = BT_OPEN;
-
bt_sock_link(&bnep_sk_list, sk);
return 0;
}
diff --git a/net/bluetooth/coredump.c b/net/bluetooth/coredump.c
index d2d2624ec708..ec97a4bab1c9 100644
--- a/net/bluetooth/coredump.c
+++ b/net/bluetooth/coredump.c
@@ -100,8 +100,7 @@ void hci_devcd_reset(struct hci_dev *hdev)
/* Call with hci_dev_lock only. */
static void hci_devcd_free(struct hci_dev *hdev)
{
- if (hdev->dump.head)
- vfree(hdev->dump.head);
+ vfree(hdev->dump.head);
hci_devcd_reset(hdev);
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 76222565e2df..234746721047 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -178,57 +178,6 @@ static void hci_conn_cleanup(struct hci_conn *conn)
hci_conn_put(conn);
}
-static void le_scan_cleanup(struct work_struct *work)
-{
- struct hci_conn *conn = container_of(work, struct hci_conn,
- le_scan_cleanup);
- struct hci_dev *hdev = conn->hdev;
- struct hci_conn *c = NULL;
-
- BT_DBG("%s hcon %p", hdev->name, conn);
-
- hci_dev_lock(hdev);
-
- /* Check that the hci_conn is still around */
- rcu_read_lock();
- list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) {
- if (c == conn)
- break;
- }
- rcu_read_unlock();
-
- if (c == conn) {
- hci_connect_le_scan_cleanup(conn, 0x00);
- hci_conn_cleanup(conn);
- }
-
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
- hci_conn_put(conn);
-}
-
-static void hci_connect_le_scan_remove(struct hci_conn *conn)
-{
- BT_DBG("%s hcon %p", conn->hdev->name, conn);
-
- /* We can't call hci_conn_del/hci_conn_cleanup here since that
- * could deadlock with another hci_conn_del() call that's holding
- * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work).
- * Instead, grab temporary extra references to the hci_dev and
- * hci_conn and perform the necessary cleanup in a separate work
- * callback.
- */
-
- hci_dev_hold(conn->hdev);
- hci_conn_get(conn);
-
- /* Even though we hold a reference to the hdev, many other
- * things might get cleaned up meanwhile, including the hdev's
- * own workqueue, so we can't use that for scheduling.
- */
- schedule_work(&conn->le_scan_cleanup);
-}
-
static void hci_acl_create_connection(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -679,13 +628,6 @@ static void hci_conn_timeout(struct work_struct *work)
if (refcnt > 0)
return;
- /* LE connections in scanning state need special handling */
- if (conn->state == BT_CONNECT && conn->type == LE_LINK &&
- test_bit(HCI_CONN_SCANNING, &conn->flags)) {
- hci_connect_le_scan_remove(conn);
- return;
- }
-
hci_abort_conn(conn, hci_proto_disconn_ind(conn));
}
@@ -791,7 +733,8 @@ struct iso_list_data {
u16 sync_handle;
};
int count;
- struct iso_cig_params pdu;
+ bool big_term;
+ bool big_sync_term;
};
static void bis_list(struct hci_conn *conn, void *data)
@@ -809,17 +752,6 @@ static void bis_list(struct hci_conn *conn, void *data)
d->count++;
}
-static void find_bis(struct hci_conn *conn, void *data)
-{
- struct iso_list_data *d = data;
-
- /* Ignore unicast */
- if (bacmp(&conn->dst, BDADDR_ANY))
- return;
-
- d->count++;
-}
-
static int terminate_big_sync(struct hci_dev *hdev, void *data)
{
struct iso_list_data *d = data;
@@ -828,11 +760,8 @@ static int terminate_big_sync(struct hci_dev *hdev, void *data)
hci_remove_ext_adv_instance_sync(hdev, d->bis, NULL);
- /* Check if ISO connection is a BIS and terminate BIG if there are
- * no other connections using it.
- */
- hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d);
- if (d->count)
+ /* Only terminate BIG if it has been created */
+ if (!d->big_term)
return 0;
return hci_le_terminate_big_sync(hdev, d->big,
@@ -844,19 +773,21 @@ static void terminate_big_destroy(struct hci_dev *hdev, void *data, int err)
kfree(data);
}
-static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
+static int hci_le_terminate_big(struct hci_dev *hdev, struct hci_conn *conn)
{
struct iso_list_data *d;
int ret;
- bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", big, bis);
+ bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", conn->iso_qos.bcast.big,
+ conn->iso_qos.bcast.bis);
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
- d->big = big;
- d->bis = bis;
+ d->big = conn->iso_qos.bcast.big;
+ d->bis = conn->iso_qos.bcast.bis;
+ d->big_term = test_and_clear_bit(HCI_CONN_BIG_CREATED, &conn->flags);
ret = hci_cmd_sync_queue(hdev, terminate_big_sync, d,
terminate_big_destroy);
@@ -873,31 +804,26 @@ static int big_terminate_sync(struct hci_dev *hdev, void *data)
bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", d->big,
d->sync_handle);
- /* Check if ISO connection is a BIS and terminate BIG if there are
- * no other connections using it.
- */
- hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d);
- if (d->count)
- return 0;
-
- hci_le_big_terminate_sync(hdev, d->big);
+ if (d->big_sync_term)
+ hci_le_big_terminate_sync(hdev, d->big);
return hci_le_pa_terminate_sync(hdev, d->sync_handle);
}
-static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
+static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn)
{
struct iso_list_data *d;
int ret;
- bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, sync_handle);
+ bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, conn->sync_handle);
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
d->big = big;
- d->sync_handle = sync_handle;
+ d->sync_handle = conn->sync_handle;
+ d->big_sync_term = test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags);
ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d,
terminate_big_destroy);
@@ -916,6 +842,7 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
static void bis_cleanup(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
+ struct hci_conn *bis;
bt_dev_dbg(hdev, "conn %p", conn);
@@ -923,17 +850,29 @@ static void bis_cleanup(struct hci_conn *conn)
if (!test_and_clear_bit(HCI_CONN_PER_ADV, &conn->flags))
return;
- hci_le_terminate_big(hdev, conn->iso_qos.bcast.big,
- conn->iso_qos.bcast.bis);
+ /* Check if ISO connection is a BIS and terminate advertising
+ * set and BIG if there are no other connections using it.
+ */
+ bis = hci_conn_hash_lookup_big(hdev, conn->iso_qos.bcast.big);
+ if (bis)
+ return;
+
+ hci_le_terminate_big(hdev, conn);
} else {
+ bis = hci_conn_hash_lookup_big_any_dst(hdev,
+ conn->iso_qos.bcast.big);
+
+ if (bis)
+ return;
+
hci_le_big_terminate(hdev, conn->iso_qos.bcast.big,
- conn->sync_handle);
+ conn);
}
}
static int remove_cig_sync(struct hci_dev *hdev, void *data)
{
- u8 handle = PTR_ERR(data);
+ u8 handle = PTR_UINT(data);
return hci_le_remove_cig_sync(hdev, handle);
}
@@ -942,7 +881,8 @@ static int hci_le_remove_cig(struct hci_dev *hdev, u8 handle)
{
bt_dev_dbg(hdev, "handle 0x%2.2x", handle);
- return hci_cmd_sync_queue(hdev, remove_cig_sync, ERR_PTR(handle), NULL);
+ return hci_cmd_sync_queue(hdev, remove_cig_sync, UINT_PTR(handle),
+ NULL);
}
static void find_cis(struct hci_conn *conn, void *data)
@@ -983,6 +923,25 @@ static void cis_cleanup(struct hci_conn *conn)
hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
}
+static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+ u16 handle = HCI_CONN_HANDLE_MAX + 1;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ /* Find the first unused handle */
+ if (handle == 0xffff || c->handle != handle)
+ break;
+ handle++;
+ }
+ rcu_read_unlock();
+
+ return handle;
+}
+
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role)
{
@@ -996,7 +955,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
- conn->handle = HCI_CONN_HANDLE_UNSET;
+ conn->handle = hci_conn_hash_alloc_unset(hdev);
conn->hdev = hdev;
conn->type = type;
conn->role = role;
@@ -1059,7 +1018,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept);
INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle);
INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout);
- INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup);
atomic_set(&conn->refcnt, 0);
@@ -1109,7 +1067,7 @@ static void hci_conn_unlink(struct hci_conn *conn)
*/
if ((child->type == SCO_LINK ||
child->type == ESCO_LINK) &&
- child->handle == HCI_CONN_HANDLE_UNSET)
+ HCI_CONN_HANDLE_UNSET(child->handle))
hci_conn_del(child);
}
@@ -1273,9 +1231,41 @@ void hci_conn_failed(struct hci_conn *conn, u8 status)
hci_conn_del(conn);
}
+/* This function requires the caller holds hdev->lock */
+u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ bt_dev_dbg(hdev, "hcon %p handle 0x%4.4x", conn, handle);
+
+ if (conn->handle == handle)
+ return 0;
+
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ handle, HCI_CONN_HANDLE_MAX);
+ return HCI_ERROR_INVALID_PARAMETERS;
+ }
+
+ /* If abort_reason has been sent it means the connection is being
+ * aborted and the handle shall not be changed.
+ */
+ if (conn->abort_reason)
+ return conn->abort_reason;
+
+ conn->handle = handle;
+
+ return 0;
+}
+
static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
- struct hci_conn *conn = data;
+ struct hci_conn *conn;
+ u16 handle = PTR_UINT(data);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
+ return;
bt_dev_dbg(hdev, "err %d", err);
@@ -1300,10 +1290,17 @@ done:
static int hci_connect_le_sync(struct hci_dev *hdev, void *data)
{
- struct hci_conn *conn = data;
+ struct hci_conn *conn;
+ u16 handle = PTR_UINT(data);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
+ return 0;
bt_dev_dbg(hdev, "conn %p", conn);
+ conn->state = BT_CONNECT;
+
return hci_le_create_conn_sync(hdev, conn);
}
@@ -1373,10 +1370,10 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
conn->sec_level = BT_SECURITY_LOW;
conn->conn_timeout = conn_timeout;
- conn->state = BT_CONNECT;
clear_bit(HCI_CONN_SCANNING, &conn->flags);
- err = hci_cmd_sync_queue(hdev, hci_connect_le_sync, conn,
+ err = hci_cmd_sync_queue(hdev, hci_connect_le_sync,
+ UINT_PTR(conn->handle),
create_le_conn_complete);
if (err) {
hci_conn_del(conn);
@@ -1440,25 +1437,23 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
static int qos_set_big(struct hci_dev *hdev, struct bt_iso_qos *qos)
{
- struct iso_list_data data;
+ struct hci_conn *conn;
+ u8 big;
/* Allocate a BIG if not set */
if (qos->bcast.big == BT_ISO_QOS_BIG_UNSET) {
- for (data.big = 0x00; data.big < 0xef; data.big++) {
- data.count = 0;
- data.bis = 0xff;
+ for (big = 0x00; big < 0xef; big++) {
- hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
- BT_BOUND, &data);
- if (!data.count)
+ conn = hci_conn_hash_lookup_big(hdev, big);
+ if (!conn)
break;
}
- if (data.big == 0xef)
+ if (big == 0xef)
return -EADDRNOTAVAIL;
/* Update BIG */
- qos->bcast.big = data.big;
+ qos->bcast.big = big;
}
return 0;
@@ -1466,28 +1461,27 @@ static int qos_set_big(struct hci_dev *hdev, struct bt_iso_qos *qos)
static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
{
- struct iso_list_data data;
+ struct hci_conn *conn;
+ u8 bis;
/* Allocate BIS if not set */
if (qos->bcast.bis == BT_ISO_QOS_BIS_UNSET) {
/* Find an unused adv set to advertise BIS, skip instance 0x00
* since it is reserved as general purpose set.
*/
- for (data.bis = 0x01; data.bis < hdev->le_num_of_adv_sets;
- data.bis++) {
- data.count = 0;
+ for (bis = 0x01; bis < hdev->le_num_of_adv_sets;
+ bis++) {
- hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
- BT_BOUND, &data);
- if (!data.count)
+ conn = hci_conn_hash_lookup_bis(hdev, BDADDR_ANY, bis);
+ if (!conn)
break;
}
- if (data.bis == hdev->le_num_of_adv_sets)
+ if (bis == hdev->le_num_of_adv_sets)
return -EADDRNOTAVAIL;
/* Update BIS */
- qos->bcast.bis = data.bis;
+ qos->bcast.bis = bis;
}
return 0;
@@ -1495,10 +1489,10 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
/* This function requires the caller holds hdev->lock */
static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
- struct bt_iso_qos *qos)
+ struct bt_iso_qos *qos, __u8 base_len,
+ __u8 *base)
{
struct hci_conn *conn;
- struct iso_list_data data;
int err;
/* Let's make sure that le is enabled.*/
@@ -1516,24 +1510,26 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
if (err)
return ERR_PTR(err);
- data.big = qos->bcast.big;
- data.bis = qos->bcast.bis;
- data.count = 0;
-
- /* Check if there is already a matching BIG/BIS */
- hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, BT_BOUND, &data);
- if (data.count)
+ /* Check if the LE Create BIG command has already been sent */
+ conn = hci_conn_hash_lookup_per_adv_bis(hdev, dst, qos->bcast.big,
+ qos->bcast.big);
+ if (conn)
return ERR_PTR(-EADDRINUSE);
- conn = hci_conn_hash_lookup_bis(hdev, dst, qos->bcast.big, qos->bcast.bis);
- if (conn)
+ /* Check BIS settings against other bound BISes, since all
+ * BISes in a BIG must have the same value for all parameters
+ */
+ conn = hci_conn_hash_lookup_big(hdev, qos->bcast.big);
+
+ if (conn && (memcmp(qos, &conn->iso_qos, sizeof(*qos)) ||
+ base_len != conn->le_per_adv_data_len ||
+ memcmp(conn->le_per_adv_data, base, base_len)))
return ERR_PTR(-EADDRINUSE);
conn = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
if (!conn)
return ERR_PTR(-ENOMEM);
- set_bit(HCI_CONN_PER_ADV, &conn->flags);
conn->state = BT_CONNECT;
hci_conn_hold(conn);
@@ -1707,52 +1703,25 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
return sco;
}
-static void cis_add(struct iso_list_data *d, struct bt_iso_qos *qos)
-{
- struct hci_cis_params *cis = &d->pdu.cis[d->pdu.cp.num_cis];
-
- cis->cis_id = qos->ucast.cis;
- cis->c_sdu = cpu_to_le16(qos->ucast.out.sdu);
- cis->p_sdu = cpu_to_le16(qos->ucast.in.sdu);
- cis->c_phy = qos->ucast.out.phy ? qos->ucast.out.phy : qos->ucast.in.phy;
- cis->p_phy = qos->ucast.in.phy ? qos->ucast.in.phy : qos->ucast.out.phy;
- cis->c_rtn = qos->ucast.out.rtn;
- cis->p_rtn = qos->ucast.in.rtn;
-
- d->pdu.cp.num_cis++;
-}
-
-static void cis_list(struct hci_conn *conn, void *data)
-{
- struct iso_list_data *d = data;
-
- /* Skip if broadcast/ANY address */
- if (!bacmp(&conn->dst, BDADDR_ANY))
- return;
-
- if (d->cig != conn->iso_qos.ucast.cig || d->cis == BT_ISO_QOS_CIS_UNSET ||
- d->cis != conn->iso_qos.ucast.cis)
- return;
-
- d->count++;
-
- if (d->pdu.cp.cig_id == BT_ISO_QOS_CIG_UNSET ||
- d->count >= ARRAY_SIZE(d->pdu.cis))
- return;
-
- cis_add(d, &conn->iso_qos);
-}
-
static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_le_create_big cp;
+ struct iso_list_data data;
memset(&cp, 0, sizeof(cp));
+ data.big = qos->bcast.big;
+ data.bis = qos->bcast.bis;
+ data.count = 0;
+
+ /* Create a BIS for each bound connection */
+ hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
+ BT_BOUND, &data);
+
cp.handle = qos->bcast.big;
cp.adv_handle = qos->bcast.bis;
- cp.num_bis = 0x01;
+ cp.num_bis = data.count;
hci_cpu_to_le24(qos->bcast.out.interval, cp.bis.sdu_interval);
cp.bis.sdu = cpu_to_le16(qos->bcast.out.sdu);
cp.bis.latency = cpu_to_le16(qos->bcast.out.latency);
@@ -1766,25 +1735,62 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
return hci_send_cmd(hdev, HCI_OP_LE_CREATE_BIG, sizeof(cp), &cp);
}
-static void set_cig_params_complete(struct hci_dev *hdev, void *data, int err)
+static int set_cig_params_sync(struct hci_dev *hdev, void *data)
{
- struct iso_cig_params *pdu = data;
+ u8 cig_id = PTR_UINT(data);
+ struct hci_conn *conn;
+ struct bt_iso_qos *qos;
+ struct iso_cig_params pdu;
+ u8 cis_id;
- bt_dev_dbg(hdev, "");
+ conn = hci_conn_hash_lookup_cig(hdev, cig_id);
+ if (!conn)
+ return 0;
- if (err)
- bt_dev_err(hdev, "Unable to set CIG parameters: %d", err);
+ memset(&pdu, 0, sizeof(pdu));
- kfree(pdu);
-}
+ qos = &conn->iso_qos;
+ pdu.cp.cig_id = cig_id;
+ hci_cpu_to_le24(qos->ucast.out.interval, pdu.cp.c_interval);
+ hci_cpu_to_le24(qos->ucast.in.interval, pdu.cp.p_interval);
+ pdu.cp.sca = qos->ucast.sca;
+ pdu.cp.packing = qos->ucast.packing;
+ pdu.cp.framing = qos->ucast.framing;
+ pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency);
+ pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency);
+
+ /* Reprogram all CIS(s) with the same CIG, valid range are:
+ * num_cis: 0x00 to 0x1F
+ * cis_id: 0x00 to 0xEF
+ */
+ for (cis_id = 0x00; cis_id < 0xf0 &&
+ pdu.cp.num_cis < ARRAY_SIZE(pdu.cis); cis_id++) {
+ struct hci_cis_params *cis;
-static int set_cig_params_sync(struct hci_dev *hdev, void *data)
-{
- struct iso_cig_params *pdu = data;
- u32 plen;
+ conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, cig_id, cis_id);
+ if (!conn)
+ continue;
+
+ qos = &conn->iso_qos;
+
+ cis = &pdu.cis[pdu.cp.num_cis++];
+ cis->cis_id = cis_id;
+ cis->c_sdu = cpu_to_le16(conn->iso_qos.ucast.out.sdu);
+ cis->p_sdu = cpu_to_le16(conn->iso_qos.ucast.in.sdu);
+ cis->c_phy = qos->ucast.out.phy ? qos->ucast.out.phy :
+ qos->ucast.in.phy;
+ cis->p_phy = qos->ucast.in.phy ? qos->ucast.in.phy :
+ qos->ucast.out.phy;
+ cis->c_rtn = qos->ucast.out.rtn;
+ cis->p_rtn = qos->ucast.in.rtn;
+ }
+
+ if (!pdu.cp.num_cis)
+ return 0;
- plen = sizeof(pdu->cp) + pdu->cp.num_cis * sizeof(pdu->cis[0]);
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS, plen, pdu,
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS,
+ sizeof(pdu.cp) +
+ pdu.cp.num_cis * sizeof(pdu.cis[0]), &pdu,
HCI_CMD_TIMEOUT);
}
@@ -1792,7 +1798,6 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
{
struct hci_dev *hdev = conn->hdev;
struct iso_list_data data;
- struct iso_cig_params *pdu;
memset(&data, 0, sizeof(data));
@@ -1819,58 +1824,31 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
qos->ucast.cig = data.cig;
}
- data.pdu.cp.cig_id = qos->ucast.cig;
- hci_cpu_to_le24(qos->ucast.out.interval, data.pdu.cp.c_interval);
- hci_cpu_to_le24(qos->ucast.in.interval, data.pdu.cp.p_interval);
- data.pdu.cp.sca = qos->ucast.sca;
- data.pdu.cp.packing = qos->ucast.packing;
- data.pdu.cp.framing = qos->ucast.framing;
- data.pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency);
- data.pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency);
-
if (qos->ucast.cis != BT_ISO_QOS_CIS_UNSET) {
- data.count = 0;
- data.cig = qos->ucast.cig;
- data.cis = qos->ucast.cis;
-
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND,
- &data);
- if (data.count)
+ if (hci_conn_hash_lookup_cis(hdev, NULL, 0, qos->ucast.cig,
+ qos->ucast.cis))
return false;
-
- cis_add(&data, qos);
+ goto done;
}
- /* Reprogram all CIS(s) with the same CIG */
- for (data.cig = qos->ucast.cig, data.cis = 0x00; data.cis < 0x11;
+ /* Allocate first available CIS if not set */
+ for (data.cig = qos->ucast.cig, data.cis = 0x00; data.cis < 0xf0;
data.cis++) {
- data.count = 0;
-
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND,
- &data);
- if (data.count)
- continue;
-
- /* Allocate a CIS if not set */
- if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET) {
+ if (!hci_conn_hash_lookup_cis(hdev, NULL, 0, data.cig,
+ data.cis)) {
/* Update CIS */
qos->ucast.cis = data.cis;
- cis_add(&data, qos);
+ break;
}
}
- if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET || !data.pdu.cp.num_cis)
+ if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET)
return false;
- pdu = kmemdup(&data.pdu, sizeof(*pdu), GFP_KERNEL);
- if (!pdu)
- return false;
-
- if (hci_cmd_sync_queue(hdev, set_cig_params_sync, pdu,
- set_cig_params_complete) < 0) {
- kfree(pdu);
+done:
+ if (hci_cmd_sync_queue(hdev, set_cig_params_sync,
+ UINT_PTR(qos->ucast.cig), NULL) < 0)
return false;
- }
return true;
}
@@ -1888,6 +1866,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-ENOMEM);
cis->cleanup = cis_cleanup;
cis->dst_type = dst_type;
+ cis->iso_qos.ucast.cig = BT_ISO_QOS_CIG_UNSET;
+ cis->iso_qos.ucast.cis = BT_ISO_QOS_CIS_UNSET;
}
if (cis->state == BT_CONNECTED)
@@ -1931,6 +1911,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EINVAL);
}
+ hci_conn_hold(cis);
+
cis->iso_qos = *qos;
cis->state = BT_BOUND;
@@ -1969,59 +1951,47 @@ bool hci_iso_setup_path(struct hci_conn *conn)
return true;
}
-static int hci_create_cis_sync(struct hci_dev *hdev, void *data)
+int hci_conn_check_create_cis(struct hci_conn *conn)
{
- return hci_le_create_cis_sync(hdev, data);
-}
+ if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY))
+ return -EINVAL;
-int hci_le_create_cis(struct hci_conn *conn)
-{
- struct hci_conn *cis;
- struct hci_link *link, *t;
- struct hci_dev *hdev = conn->hdev;
- int err;
+ if (!conn->parent || conn->parent->state != BT_CONNECTED ||
+ conn->state != BT_CONNECT || HCI_CONN_HANDLE_UNSET(conn->handle))
+ return 1;
- bt_dev_dbg(hdev, "hcon %p", conn);
+ return 0;
+}
- switch (conn->type) {
- case LE_LINK:
- if (conn->state != BT_CONNECTED || list_empty(&conn->link_list))
- return -EINVAL;
+static int hci_create_cis_sync(struct hci_dev *hdev, void *data)
+{
+ return hci_le_create_cis_sync(hdev);
+}
- cis = NULL;
+int hci_le_create_cis_pending(struct hci_dev *hdev)
+{
+ struct hci_conn *conn;
+ bool pending = false;
- /* hci_conn_link uses list_add_tail_rcu so the list is in
- * the same order as the connections are requested.
- */
- list_for_each_entry_safe(link, t, &conn->link_list, list) {
- if (link->conn->state == BT_BOUND) {
- err = hci_le_create_cis(link->conn);
- if (err)
- return err;
+ rcu_read_lock();
- cis = link->conn;
- }
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) {
+ rcu_read_unlock();
+ return -EBUSY;
}
- return cis ? 0 : -EINVAL;
- case ISO_LINK:
- cis = conn;
- break;
- default:
- return -EINVAL;
+ if (!hci_conn_check_create_cis(conn))
+ pending = true;
}
- if (cis->state == BT_CONNECT)
+ rcu_read_unlock();
+
+ if (!pending)
return 0;
/* Queue Create CIS */
- err = hci_cmd_sync_queue(hdev, hci_create_cis_sync, cis, NULL);
- if (err)
- return err;
-
- cis->state = BT_CONNECT;
-
- return 0;
+ return hci_cmd_sync_queue(hdev, hci_create_cis_sync, NULL, NULL);
}
static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
@@ -2051,16 +2021,6 @@ static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
qos->latency = conn->le_conn_latency;
}
-static void hci_bind_bis(struct hci_conn *conn,
- struct bt_iso_qos *qos)
-{
- /* Update LINK PHYs according to QoS preference */
- conn->le_tx_phy = qos->bcast.out.phy;
- conn->le_tx_phy = qos->bcast.out.phy;
- conn->iso_qos = *qos;
- conn->state = BT_BOUND;
-}
-
static int create_big_sync(struct hci_dev *hdev, void *data)
{
struct hci_conn *conn = data;
@@ -2183,27 +2143,80 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err)
}
}
-struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
- __u8 dst_type, struct bt_iso_qos *qos,
- __u8 base_len, __u8 *base)
+struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base)
{
struct hci_conn *conn;
- int err;
+ __u8 eir[HCI_MAX_PER_AD_LENGTH];
+
+ if (base_len && base)
+ base_len = eir_append_service_data(eir, 0, 0x1851,
+ base, base_len);
/* We need hci_conn object using the BDADDR_ANY as dst */
- conn = hci_add_bis(hdev, dst, qos);
+ conn = hci_add_bis(hdev, dst, qos, base_len, eir);
if (IS_ERR(conn))
return conn;
- hci_bind_bis(conn, qos);
+ /* Update LINK PHYs according to QoS preference */
+ conn->le_tx_phy = qos->bcast.out.phy;
+ conn->le_tx_phy = qos->bcast.out.phy;
/* Add Basic Announcement into Peridic Adv Data if BASE is set */
if (base_len && base) {
- base_len = eir_append_service_data(conn->le_per_adv_data, 0,
- 0x1851, base, base_len);
+ memcpy(conn->le_per_adv_data, eir, sizeof(eir));
conn->le_per_adv_data_len = base_len;
}
+ hci_iso_qos_setup(hdev, conn, &qos->bcast.out,
+ conn->le_tx_phy ? conn->le_tx_phy :
+ hdev->le_tx_def_phys);
+
+ conn->iso_qos = *qos;
+ conn->state = BT_BOUND;
+
+ return conn;
+}
+
+static void bis_mark_per_adv(struct hci_conn *conn, void *data)
+{
+ struct iso_list_data *d = data;
+
+ /* Skip if not broadcast/ANY address */
+ if (bacmp(&conn->dst, BDADDR_ANY))
+ return;
+
+ if (d->big != conn->iso_qos.bcast.big ||
+ d->bis == BT_ISO_QOS_BIS_UNSET ||
+ d->bis != conn->iso_qos.bcast.bis)
+ return;
+
+ set_bit(HCI_CONN_PER_ADV, &conn->flags);
+}
+
+struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base)
+{
+ struct hci_conn *conn;
+ int err;
+ struct iso_list_data data;
+
+ conn = hci_bind_bis(hdev, dst, qos, base_len, base);
+ if (IS_ERR(conn))
+ return conn;
+
+ data.big = qos->bcast.big;
+ data.bis = qos->bcast.bis;
+
+ /* Set HCI_CONN_PER_ADV for all bound connections, to mark that
+ * the start periodic advertising and create BIG commands have
+ * been queued
+ */
+ hci_conn_hash_list_state(hdev, bis_mark_per_adv, ISO_LINK,
+ BT_BOUND, &data);
+
/* Queue start periodic advertising and create BIG */
err = hci_cmd_sync_queue(hdev, create_big_sync, conn,
create_big_complete);
@@ -2212,10 +2225,6 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(err);
}
- hci_iso_qos_setup(hdev, conn, &qos->bcast.out,
- conn->le_tx_phy ? conn->le_tx_phy :
- hdev->le_tx_def_phys);
-
return conn;
}
@@ -2257,11 +2266,12 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-ENOLINK);
}
- /* If LE is already connected and CIS handle is already set proceed to
- * Create CIS immediately.
- */
- if (le->state == BT_CONNECTED && cis->handle != HCI_CONN_HANDLE_UNSET)
- hci_le_create_cis(cis);
+ /* Link takes the refcount */
+ hci_conn_drop(cis);
+
+ cis->state = BT_CONNECT;
+
+ hci_le_create_cis_pending(hdev);
return cis;
}
@@ -2848,81 +2858,49 @@ u32 hci_conn_get_phy(struct hci_conn *conn)
return phys;
}
-int hci_abort_conn(struct hci_conn *conn, u8 reason)
+static int abort_conn_sync(struct hci_dev *hdev, void *data)
{
- int r = 0;
+ struct hci_conn *conn;
+ u16 handle = PTR_UINT(data);
- if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
return 0;
- switch (conn->state) {
- case BT_CONNECTED:
- case BT_CONFIG:
- if (conn->type == AMP_LINK) {
- struct hci_cp_disconn_phy_link cp;
+ return hci_abort_conn_sync(hdev, conn, conn->abort_reason);
+}
- cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
- cp.reason = reason;
- r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK,
- sizeof(cp), &cp);
- } else {
- struct hci_cp_disconnect dc;
+int hci_abort_conn(struct hci_conn *conn, u8 reason)
+{
+ struct hci_dev *hdev = conn->hdev;
- dc.handle = cpu_to_le16(conn->handle);
- dc.reason = reason;
- r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT,
- sizeof(dc), &dc);
- }
+ /* If abort_reason has already been set it means the connection is
+ * already being aborted so don't attempt to overwrite it.
+ */
+ if (conn->abort_reason)
+ return 0;
- conn->state = BT_DISCONN;
+ bt_dev_dbg(hdev, "handle 0x%2.2x reason 0x%2.2x", conn->handle, reason);
- break;
- case BT_CONNECT:
- if (conn->type == LE_LINK) {
- if (test_bit(HCI_CONN_SCANNING, &conn->flags))
- break;
- r = hci_send_cmd(conn->hdev,
- HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
- } else if (conn->type == ACL_LINK) {
- if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2)
- break;
- r = hci_send_cmd(conn->hdev,
- HCI_OP_CREATE_CONN_CANCEL,
- 6, &conn->dst);
- }
- break;
- case BT_CONNECT2:
- if (conn->type == ACL_LINK) {
- struct hci_cp_reject_conn_req rej;
-
- bacpy(&rej.bdaddr, &conn->dst);
- rej.reason = reason;
-
- r = hci_send_cmd(conn->hdev,
- HCI_OP_REJECT_CONN_REQ,
- sizeof(rej), &rej);
- } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
- struct hci_cp_reject_sync_conn_req rej;
-
- bacpy(&rej.bdaddr, &conn->dst);
-
- /* SCO rejection has its own limited set of
- * allowed error values (0x0D-0x0F) which isn't
- * compatible with most values passed to this
- * function. To be safe hard-code one of the
- * values that's suitable for SCO.
- */
- rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
+ conn->abort_reason = reason;
- r = hci_send_cmd(conn->hdev,
- HCI_OP_REJECT_SYNC_CONN_REQ,
- sizeof(rej), &rej);
+ /* If the connection is pending check the command opcode since that
+ * might be blocking on hci_cmd_sync_work while waiting its respective
+ * event so we need to hci_cmd_sync_cancel to cancel it.
+ *
+ * hci_connect_le serializes the connection attempts so only one
+ * connection can be in BT_CONNECT at time.
+ */
+ if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) {
+ switch (hci_skb_event(hdev->sent_cmd)) {
+ case HCI_EV_LE_CONN_COMPLETE:
+ case HCI_EV_LE_ENHANCED_CONN_COMPLETE:
+ case HCI_EVT_LE_CIS_ESTABLISHED:
+ hci_cmd_sync_cancel(hdev, -ECANCELED);
+ break;
}
- break;
- default:
- conn->state = BT_CLOSED;
- break;
}
- return r;
+ return hci_cmd_sync_queue(hdev, abort_conn_sync, UINT_PTR(conn->handle),
+ NULL);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1ec83985f1ab..a5992f1b3c9b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1074,9 +1074,9 @@ void hci_uuids_clear(struct hci_dev *hdev)
void hci_link_keys_clear(struct hci_dev *hdev)
{
- struct link_key *key;
+ struct link_key *key, *tmp;
- list_for_each_entry(key, &hdev->link_keys, list) {
+ list_for_each_entry_safe(key, tmp, &hdev->link_keys, list) {
list_del_rcu(&key->list);
kfree_rcu(key, rcu);
}
@@ -1084,9 +1084,9 @@ void hci_link_keys_clear(struct hci_dev *hdev)
void hci_smp_ltks_clear(struct hci_dev *hdev)
{
- struct smp_ltk *k;
+ struct smp_ltk *k, *tmp;
- list_for_each_entry(k, &hdev->long_term_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
list_del_rcu(&k->list);
kfree_rcu(k, rcu);
}
@@ -1094,9 +1094,9 @@ void hci_smp_ltks_clear(struct hci_dev *hdev)
void hci_smp_irks_clear(struct hci_dev *hdev)
{
- struct smp_irk *k;
+ struct smp_irk *k, *tmp;
- list_for_each_entry(k, &hdev->identity_resolving_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
list_del_rcu(&k->list);
kfree_rcu(k, rcu);
}
@@ -1104,9 +1104,9 @@ void hci_smp_irks_clear(struct hci_dev *hdev)
void hci_blocked_keys_clear(struct hci_dev *hdev)
{
- struct blocked_key *b;
+ struct blocked_key *b, *tmp;
- list_for_each_entry(b, &hdev->blocked_keys, list) {
+ list_for_each_entry_safe(b, tmp, &hdev->blocked_keys, list) {
list_del_rcu(&b->list);
kfree_rcu(b, rcu);
}
@@ -1949,15 +1949,15 @@ int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE:
- bt_dev_dbg(hdev, "%s add monitor %d status %d", hdev->name,
+ bt_dev_dbg(hdev, "add monitor %d status %d",
monitor->handle, status);
/* Message was not forwarded to controller - not an error */
break;
case HCI_ADV_MONITOR_EXT_MSFT:
status = msft_add_monitor_pattern(hdev, monitor);
- bt_dev_dbg(hdev, "%s add monitor %d msft status %d", hdev->name,
- monitor->handle, status);
+ bt_dev_dbg(hdev, "add monitor %d msft status %d",
+ handle, status);
break;
}
@@ -1976,15 +1976,15 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
- bt_dev_dbg(hdev, "%s remove monitor %d status %d", hdev->name,
+ bt_dev_dbg(hdev, "remove monitor %d status %d",
monitor->handle, status);
goto free_monitor;
case HCI_ADV_MONITOR_EXT_MSFT:
handle = monitor->handle;
status = msft_remove_monitor(hdev, monitor);
- bt_dev_dbg(hdev, "%s remove monitor %d msft status %d",
- hdev->name, handle, status);
+ bt_dev_dbg(hdev, "remove monitor %d msft status %d",
+ handle, status);
break;
}
@@ -2436,6 +2436,9 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
return NOTIFY_DONE;
+ /* To avoid a potential race with hci_unregister_dev. */
+ hci_dev_hold(hdev);
+
if (action == PM_SUSPEND_PREPARE)
ret = hci_suspend_dev(hdev);
else if (action == PM_POST_SUSPEND)
@@ -2445,6 +2448,7 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d",
action, ret);
+ hci_dev_put(hdev);
return NOTIFY_DONE;
}
@@ -3891,7 +3895,7 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
if (conn) {
/* Send to upper protocol */
- bt_cb(skb)->sco.pkt_status = flags & 0x03;
+ hci_skb_pkt_status(skb) = flags & 0x03;
sco_recv_scodata(conn, skb);
return;
} else {
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index ec0df2f9188e..6b7741f6e95b 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -22,6 +22,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/kstrtox.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -1152,7 +1153,7 @@ static ssize_t force_no_mitm_write(struct file *file,
return -EFAULT;
buf[buf_size] = '\0';
- if (strtobool(buf, &enable))
+ if (kstrtobool(buf, &enable))
return -EINVAL;
if (enable == hci_dev_test_flag(hdev, HCI_FORCE_NO_MITM))
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 31ca320ce38d..559b6080706c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1639,7 +1639,7 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data,
hci_dev_set_flag(hdev, HCI_LE_ADV);
- if (adv)
+ if (adv && !adv->periodic)
adv->enabled = true;
conn = hci_lookup_le_connect(hdev);
@@ -1747,7 +1747,7 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
{
struct discovery_state *d = &hdev->discovery;
- if (len > HCI_MAX_AD_LENGTH)
+ if (len > max_adv_len(hdev))
return;
bacpy(&d->last_adv_addr, bdaddr);
@@ -3173,19 +3173,15 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
* As the connection handle is set here for the first time, it indicates
* whether the connection is already set up.
*/
- if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ if (!HCI_CONN_HANDLE_UNSET(conn->handle)) {
bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
goto unlock;
}
if (!status) {
- conn->handle = __le16_to_cpu(ev->handle);
- if (conn->handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
- conn->handle, HCI_CONN_HANDLE_MAX);
- status = HCI_ERROR_INVALID_PARAMETERS;
+ status = hci_conn_set_handle(conn, __le16_to_cpu(ev->handle));
+ if (status)
goto done;
- }
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
@@ -3803,6 +3799,22 @@ static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data,
return rp->status;
}
+static void hci_unbound_cis_failed(struct hci_dev *hdev, u8 cig, u8 status)
+{
+ struct hci_conn *conn, *tmp;
+
+ lockdep_assert_held(&hdev->lock);
+
+ list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) {
+ if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY) ||
+ conn->state == BT_OPEN || conn->iso_qos.ucast.cig != cig)
+ continue;
+
+ if (HCI_CONN_HANDLE_UNSET(conn->handle))
+ hci_conn_failed(conn, status);
+ }
+}
+
static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -3810,6 +3822,7 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
struct hci_cp_le_set_cig_params *cp;
struct hci_conn *conn;
u8 status = rp->status;
+ bool pending = false;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
@@ -3823,12 +3836,15 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 4, Part E page 2554
+ *
+ * If the Status return parameter is non-zero, then the state of the CIG
+ * and its CIS configurations shall not be changed by the command. If
+ * the CIG did not already exist, it shall not be created.
+ */
if (status) {
- while ((conn = hci_conn_hash_lookup_cig(hdev, rp->cig_id))) {
- conn->state = BT_CLOSED;
- hci_connect_cfm(conn, status);
- hci_conn_del(conn);
- }
+ /* Keep current configuration, fail only the unbound CIS */
+ hci_unbound_cis_failed(hdev, rp->cig_id, status);
goto unlock;
}
@@ -3848,17 +3864,17 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
if (conn->state != BT_BOUND && conn->state != BT_CONNECT)
continue;
- conn->handle = __le16_to_cpu(rp->handle[i]);
-
- bt_dev_dbg(hdev, "%p handle 0x%4.4x parent %p", conn,
- conn->handle, conn->parent);
+ if (hci_conn_set_handle(conn, __le16_to_cpu(rp->handle[i])))
+ continue;
- /* Create CIS if LE is already connected */
- if (conn->parent && conn->parent->state == BT_CONNECTED)
- hci_le_create_cis(conn);
+ if (conn->state == BT_CONNECT)
+ pending = true;
}
unlock:
+ if (pending)
+ hci_le_create_cis_pending(hdev);
+
hci_dev_unlock(hdev);
return rp->status;
@@ -3938,24 +3954,47 @@ static u8 hci_cc_le_set_per_adv_enable(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_ev_status *rp = data;
- __u8 *sent;
+ struct hci_cp_le_set_per_adv_enable *cp;
+ struct adv_info *adv = NULL, *n;
+ u8 per_adv_cnt = 0;
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
if (rp->status)
return rp->status;
- sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE);
- if (!sent)
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE);
+ if (!cp)
return rp->status;
hci_dev_lock(hdev);
- if (*sent)
+ adv = hci_find_adv_instance(hdev, cp->handle);
+
+ if (cp->enable) {
hci_dev_set_flag(hdev, HCI_LE_PER_ADV);
- else
+
+ if (adv)
+ adv->enabled = true;
+ } else {
+ /* If just one instance was disabled check if there are
+ * any other instance enabled before clearing HCI_LE_PER_ADV.
+ * The current periodic adv instance will be marked as
+ * disabled once extended advertising is also disabled.
+ */
+ list_for_each_entry_safe(adv, n, &hdev->adv_instances,
+ list) {
+ if (adv->periodic && adv->enabled)
+ per_adv_cnt++;
+ }
+
+ if (per_adv_cnt > 1)
+ goto unlock;
+
hci_dev_clear_flag(hdev, HCI_LE_PER_ADV);
+ }
+unlock:
hci_dev_unlock(hdev);
return rp->status;
@@ -4224,6 +4263,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data,
static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
{
struct hci_cp_le_create_cis *cp;
+ bool pending = false;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", status);
@@ -4246,12 +4286,18 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
conn = hci_conn_hash_lookup_handle(hdev, handle);
if (conn) {
+ if (test_and_clear_bit(HCI_CONN_CREATE_CIS,
+ &conn->flags))
+ pending = true;
conn->state = BT_CLOSED;
hci_connect_cfm(conn, status);
hci_conn_del(conn);
}
}
+ if (pending)
+ hci_le_create_cis_pending(hdev);
+
hci_dev_unlock(hdev);
}
@@ -4999,18 +5045,15 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
* As the connection handle is set here for the first time, it indicates
* whether the connection is already set up.
*/
- if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ if (!HCI_CONN_HANDLE_UNSET(conn->handle)) {
bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete event for existing connection");
goto unlock;
}
switch (status) {
case 0x00:
- conn->handle = __le16_to_cpu(ev->handle);
- if (conn->handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
- conn->handle, HCI_CONN_HANDLE_MAX);
- status = HCI_ERROR_INVALID_PARAMETERS;
+ status = hci_conn_set_handle(conn, __le16_to_cpu(ev->handle));
+ if (status) {
conn->state = BT_CLOSED;
break;
}
@@ -5863,7 +5906,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
* As the connection handle is set here for the first time, it indicates
* whether the connection is already set up.
*/
- if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ if (!HCI_CONN_HANDLE_UNSET(conn->handle)) {
bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
goto unlock;
}
@@ -6216,8 +6259,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
return;
}
- if (!ext_adv && len > HCI_MAX_AD_LENGTH) {
- bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes");
+ if (len > max_adv_len(hdev)) {
+ bt_dev_err_ratelimited(hdev,
+ "adv larger than maximum supported");
return;
}
@@ -6282,7 +6326,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved,
type);
- if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) {
+ if (!ext_adv && conn && type == LE_ADV_IND &&
+ len <= max_adv_len(hdev)) {
/* Store report for later inclusion by
* mgmt_device_connected
*/
@@ -6423,7 +6468,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data,
info->length + 1))
break;
- if (info->length <= HCI_MAX_AD_LENGTH) {
+ if (info->length <= max_adv_len(hdev)) {
rssi = info->data[info->length];
process_adv_report(hdev, info->type, &info->bdaddr,
info->bdaddr_type, NULL, 0, rssi,
@@ -6790,6 +6835,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
struct hci_evt_le_cis_established *ev = data;
struct hci_conn *conn;
struct bt_iso_qos *qos;
+ bool pending = false;
u16 handle = __le16_to_cpu(ev->handle);
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6813,6 +6859,8 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
qos = &conn->iso_qos;
+ pending = test_and_clear_bit(HCI_CONN_CREATE_CIS, &conn->flags);
+
/* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */
qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250;
qos->ucast.out.interval = qos->ucast.in.interval;
@@ -6854,10 +6902,14 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
+ conn->state = BT_CLOSED;
hci_connect_cfm(conn, ev->status);
hci_conn_del(conn);
unlock:
+ if (pending)
+ hci_le_create_cis_pending(hdev);
+
hci_dev_unlock(hdev);
}
@@ -6936,6 +6988,7 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_evt_le_create_big_complete *ev = data;
struct hci_conn *conn;
+ __u8 i = 0;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
@@ -6944,33 +6997,46 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
return;
hci_dev_lock(hdev);
+ rcu_read_lock();
- conn = hci_conn_hash_lookup_big(hdev, ev->handle);
- if (!conn)
- goto unlock;
+ /* Connect all BISes that are bound to the BIG */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (bacmp(&conn->dst, BDADDR_ANY) ||
+ conn->type != ISO_LINK ||
+ conn->iso_qos.bcast.big != ev->handle)
+ continue;
- if (conn->type != ISO_LINK) {
- bt_dev_err(hdev,
- "Invalid connection link type handle 0x%2.2x",
- ev->handle);
- goto unlock;
- }
+ if (hci_conn_set_handle(conn,
+ __le16_to_cpu(ev->bis_handle[i++])))
+ continue;
- if (ev->num_bis)
- conn->handle = __le16_to_cpu(ev->bis_handle[0]);
+ if (!ev->status) {
+ conn->state = BT_CONNECTED;
+ set_bit(HCI_CONN_BIG_CREATED, &conn->flags);
+ rcu_read_unlock();
+ hci_debugfs_create_conn(conn);
+ hci_conn_add_sysfs(conn);
+ hci_iso_setup_path(conn);
+ rcu_read_lock();
+ continue;
+ }
- if (!ev->status) {
- conn->state = BT_CONNECTED;
- hci_debugfs_create_conn(conn);
- hci_conn_add_sysfs(conn);
- hci_iso_setup_path(conn);
- goto unlock;
+ hci_connect_cfm(conn, ev->status);
+ rcu_read_unlock();
+ hci_conn_del(conn);
+ rcu_read_lock();
}
- hci_connect_cfm(conn, ev->status);
- hci_conn_del(conn);
+ if (!ev->status && !i)
+ /* If no BISes have been connected for the BIG,
+ * terminate. This is in case all bound connections
+ * have been closed before the BIG creation
+ * has completed.
+ */
+ hci_le_terminate_big_sync(hdev, ev->handle,
+ HCI_ERROR_LOCAL_HOST_TERM);
-unlock:
+ rcu_read_unlock();
hci_dev_unlock(hdev);
}
@@ -6987,9 +7053,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
flex_array_size(ev, bis, ev->num_bis)))
return;
- if (ev->status)
- return;
-
hci_dev_lock(hdev);
for (i = 0; i < ev->num_bis; i++) {
@@ -7013,9 +7076,25 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis->iso_qos.bcast.in.latency = le16_to_cpu(ev->interval) * 125 / 100;
bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu);
- hci_iso_setup_path(bis);
+ if (!ev->status) {
+ set_bit(HCI_CONN_BIG_SYNC, &bis->flags);
+ hci_iso_setup_path(bis);
+ }
}
+ /* In case BIG sync failed, notify each failed connection to
+ * the user after all hci connections have been added
+ */
+ if (ev->status)
+ for (i = 0; i < ev->num_bis; i++) {
+ u16 handle = le16_to_cpu(ev->bis[i]);
+
+ bis = hci_conn_hash_lookup_handle(hdev, handle);
+
+ set_bit(HCI_CONN_BIG_SYNC_FAILED, &bis->flags);
+ hci_connect_cfm(bis, ev->status);
+ }
+
hci_dev_unlock(hdev);
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index f7e006a36382..6e023b0104b0 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -629,27 +629,6 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
}
}
-/* Returns true if an le connection is in the scanning state */
-static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
-{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type == LE_LINK && c->state == BT_CONNECT &&
- test_bit(HCI_CONN_SCANNING, &c->flags)) {
- rcu_read_unlock();
- return true;
- }
- }
-
- rcu_read_unlock();
-
- return false;
-}
-
static void set_random_addr(struct hci_request *req, bdaddr_t *rpa);
static int hci_update_random_address(struct hci_request *req,
bool require_privacy, bool use_rpa,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d249d839819..5e4f718073b7 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -264,6 +264,53 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
kfree_skb(skb_copy);
}
+static void hci_sock_copy_creds(struct sock *sk, struct sk_buff *skb)
+{
+ struct scm_creds *creds;
+
+ if (!sk || WARN_ON(!skb))
+ return;
+
+ creds = &bt_cb(skb)->creds;
+
+ /* Check if peer credentials is set */
+ if (!sk->sk_peer_pid) {
+ /* Check if parent peer credentials is set */
+ if (bt_sk(sk)->parent && bt_sk(sk)->parent->sk_peer_pid)
+ sk = bt_sk(sk)->parent;
+ else
+ return;
+ }
+
+ /* Check if scm_creds already set */
+ if (creds->pid == pid_vnr(sk->sk_peer_pid))
+ return;
+
+ memset(creds, 0, sizeof(*creds));
+
+ creds->pid = pid_vnr(sk->sk_peer_pid);
+ if (sk->sk_peer_cred) {
+ creds->uid = sk->sk_peer_cred->uid;
+ creds->gid = sk->sk_peer_cred->gid;
+ }
+}
+
+static struct sk_buff *hci_skb_clone(struct sk_buff *skb)
+{
+ struct sk_buff *nskb;
+
+ if (!skb)
+ return NULL;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return NULL;
+
+ hci_sock_copy_creds(skb->sk, nskb);
+
+ return nskb;
+}
+
/* Send frame to sockets with specific channel */
static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
int flag, struct sock *skip_sk)
@@ -289,7 +336,7 @@ static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
if (hci_pi(sk)->channel != channel)
continue;
- nskb = skb_clone(skb, GFP_ATOMIC);
+ nskb = hci_skb_clone(skb);
if (!nskb)
continue;
@@ -356,6 +403,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
if (!skb_copy)
return;
+ hci_sock_copy_creds(skb->sk, skb_copy);
+
/* Put header before the data */
hdr = skb_push(skb_copy, HCI_MON_HDR_SIZE);
hdr->opcode = opcode;
@@ -531,10 +580,12 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk)
return NULL;
}
- skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC);
+ skb = bt_skb_alloc(14 + TASK_COMM_LEN, GFP_ATOMIC);
if (!skb)
return NULL;
+ hci_sock_copy_creds(sk, skb);
+
flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0;
put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
@@ -580,6 +631,8 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk)
if (!skb)
return NULL;
+ hci_sock_copy_creds(sk, skb);
+
put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
__net_timestamp(skb);
@@ -606,6 +659,8 @@ static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index,
if (!skb)
return NULL;
+ hci_sock_copy_creds(sk, skb);
+
put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
put_unaligned_le16(opcode, skb_put(skb, 2));
@@ -638,6 +693,8 @@ send_monitor_note(struct sock *sk, const char *fmt, ...)
if (!skb)
return;
+ hci_sock_copy_creds(sk, skb);
+
va_start(args, fmt);
vsprintf(skb_put(skb, len), fmt, args);
*(u8 *)skb_put(skb, 1) = 0;
@@ -1494,6 +1551,7 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
+ struct scm_cookie scm;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
@@ -1538,11 +1596,16 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
break;
}
+ memset(&scm, 0, sizeof(scm));
+ scm.creds = bt_cb(skb)->creds;
+
skb_free_datagram(sk, skb);
if (flags & MSG_TRUNC)
copied = skblen;
+ scm_recv(sock, msg, &scm, flags);
+
return err ? : copied;
}
@@ -2143,18 +2206,12 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &hci_sock_ops;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern);
+ sk = bt_sock_alloc(net, sock, &hci_sk_proto, protocol, GFP_ATOMIC,
+ kern);
if (!sk)
return -ENOMEM;
- sock_init_data(sock, sk);
-
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = protocol;
-
sock->state = SS_UNCONNECTED;
- sk->sk_state = BT_OPEN;
sk->sk_destruct = hci_sock_destruct;
bt_sock_link(&hci_sk_list, sk);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 4d1e32bb6a9c..5eb30ba21370 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -3,6 +3,7 @@
* BlueZ - Bluetooth protocol stack for Linux
*
* Copyright (C) 2021 Intel Corporation
+ * Copyright 2023 NXP
*/
#include <linux/property.h>
@@ -1319,9 +1320,11 @@ int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance)
static int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_per_adv_enable cp;
+ struct adv_info *adv = NULL;
/* If periodic advertising already disabled there is nothing to do. */
- if (!hci_dev_test_flag(hdev, HCI_LE_PER_ADV))
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv || !adv->periodic || !adv->enabled)
return 0;
memset(&cp, 0, sizeof(cp));
@@ -1386,9 +1389,11 @@ static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance)
static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_per_adv_enable cp;
+ struct adv_info *adv = NULL;
/* If periodic advertising already enabled there is nothing to do. */
- if (hci_dev_test_flag(hdev, HCI_LE_PER_ADV))
+ adv = hci_find_adv_instance(hdev, instance);
+ if (adv && adv->periodic && adv->enabled)
return 0;
memset(&cp, 0, sizeof(cp));
@@ -1458,22 +1463,19 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
sync_interval);
if (IS_ERR(adv))
return PTR_ERR(adv);
+ adv->pending = false;
added = true;
}
}
- /* Only start advertising if instance 0 or if a dedicated instance has
- * been added.
- */
- if (!adv || added) {
- err = hci_start_ext_adv_sync(hdev, instance);
- if (err < 0)
- goto fail;
+ /* Start advertising */
+ err = hci_start_ext_adv_sync(hdev, instance);
+ if (err < 0)
+ goto fail;
- err = hci_adv_bcast_annoucement(hdev, adv);
- if (err < 0)
- goto fail;
- }
+ err = hci_adv_bcast_annoucement(hdev, adv);
+ if (err < 0)
+ goto fail;
err = hci_set_per_adv_params_sync(hdev, instance, min_interval,
max_interval);
@@ -2670,27 +2672,6 @@ done:
return filter_policy;
}
-/* Returns true if an le connection is in the scanning state */
-static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
-{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type == LE_LINK && c->state == BT_CONNECT &&
- test_bit(HCI_CONN_SCANNING, &c->flags)) {
- rcu_read_unlock();
- return true;
- }
- }
-
- rcu_read_unlock();
-
- return false;
-}
-
static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
u16 interval, u16 window,
u8 own_addr_type, u8 filter_policy)
@@ -4133,10 +4114,13 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
}
if (bis_capable(hdev)) {
+ events[1] |= 0x20; /* LE PA Report */
+ events[1] |= 0x40; /* LE PA Sync Established */
events[3] |= 0x04; /* LE Create BIG Complete */
events[3] |= 0x08; /* LE Terminate BIG Complete */
events[3] |= 0x10; /* LE BIG Sync Established */
events[3] |= 0x20; /* LE BIG Sync Loss */
+ events[4] |= 0x02; /* LE BIG Info Advertising Report */
}
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EVENT_MASK,
@@ -5269,26 +5253,64 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
}
static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
- struct hci_conn *conn)
+ struct hci_conn *conn, u8 reason)
{
+ /* Return reason if scanning since the connection shall probably be
+ * cleanup directly.
+ */
if (test_bit(HCI_CONN_SCANNING, &conn->flags))
- return 0;
+ return reason;
- if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ if (conn->role == HCI_ROLE_SLAVE ||
+ test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL,
0, NULL, HCI_CMD_TIMEOUT);
}
-static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn)
+static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn,
+ u8 reason)
{
if (conn->type == LE_LINK)
- return hci_le_connect_cancel_sync(hdev, conn);
+ return hci_le_connect_cancel_sync(hdev, conn, reason);
+
+ if (conn->type == ISO_LINK) {
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 1857:
+ *
+ * If this command is issued for a CIS on the Central and the
+ * CIS is successfully terminated before being established,
+ * then an HCI_LE_CIS_Established event shall also be sent for
+ * this CIS with the Status Operation Cancelled by Host (0x44).
+ */
+ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ return hci_disconnect_sync(hdev, conn, reason);
+
+ /* CIS with no Create CIS sent have nothing to cancel */
+ if (bacmp(&conn->dst, BDADDR_ANY))
+ return HCI_ERROR_LOCAL_HOST_TERM;
+
+ /* There is no way to cancel a BIS without terminating the BIG
+ * which is done later on connection cleanup.
+ */
+ return 0;
+ }
if (hdev->hci_ver < BLUETOOTH_VER_1_2)
return 0;
+ /* Wait for HCI_EV_CONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
+ * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
+ * used when suspending or powering off, where we don't want to wait
+ * for the peer's response.
+ */
+ if (reason != HCI_ERROR_REMOTE_POWER_OFF)
+ return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN_CANCEL,
+ 6, &conn->dst,
+ HCI_EV_CONN_COMPLETE,
+ HCI_CMD_TIMEOUT, NULL);
+
return __hci_cmd_sync_status(hdev, HCI_OP_CREATE_CONN_CANCEL,
6, &conn->dst, HCI_CMD_TIMEOUT);
}
@@ -5312,11 +5334,27 @@ static int hci_reject_sco_sync(struct hci_dev *hdev, struct hci_conn *conn,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
+static int hci_le_reject_cis_sync(struct hci_dev *hdev, struct hci_conn *conn,
+ u8 reason)
+{
+ struct hci_cp_le_reject_cis cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = cpu_to_le16(conn->handle);
+ cp.reason = reason;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_REJECT_CIS,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
struct hci_cp_reject_conn_req cp;
+ if (conn->type == ISO_LINK)
+ return hci_le_reject_cis_sync(hdev, conn, reason);
+
if (conn->type == SCO_LINK || conn->type == ESCO_LINK)
return hci_reject_sco_sync(hdev, conn, reason);
@@ -5330,31 +5368,52 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
{
- int err;
+ int err = 0;
+ u16 handle = conn->handle;
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
- return hci_disconnect_sync(hdev, conn, reason);
+ err = hci_disconnect_sync(hdev, conn, reason);
+ break;
case BT_CONNECT:
- err = hci_connect_cancel_sync(hdev, conn);
- /* Cleanup hci_conn object if it cannot be cancelled as it
- * likelly means the controller and host stack are out of sync.
- */
- if (err) {
- hci_dev_lock(hdev);
- hci_conn_failed(conn, err);
- hci_dev_unlock(hdev);
- }
- return err;
+ err = hci_connect_cancel_sync(hdev, conn, reason);
+ break;
case BT_CONNECT2:
- return hci_reject_conn_sync(hdev, conn, reason);
+ err = hci_reject_conn_sync(hdev, conn, reason);
+ break;
+ case BT_OPEN:
+ case BT_BOUND:
+ hci_dev_lock(hdev);
+ hci_conn_failed(conn, reason);
+ hci_dev_unlock(hdev);
+ return 0;
default:
conn->state = BT_CLOSED;
- break;
+ return 0;
}
- return 0;
+ /* Cleanup hci_conn object if it cannot be cancelled as it
+ * likelly means the controller and host stack are out of sync
+ * or in case of LE it was still scanning so it can be cleanup
+ * safely.
+ */
+ if (err) {
+ struct hci_conn *c;
+
+ /* Check if the connection hasn't been cleanup while waiting
+ * commands to complete.
+ */
+ c = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!c || c != conn)
+ return 0;
+
+ hci_dev_lock(hdev);
+ hci_conn_failed(conn, err);
+ hci_dev_unlock(hdev);
+ }
+
+ return err;
}
static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
@@ -6253,63 +6312,99 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
done:
if (err == -ETIMEDOUT)
- hci_le_connect_cancel_sync(hdev, conn);
+ hci_le_connect_cancel_sync(hdev, conn, 0x00);
/* Re-enable advertising after the connection attempt is finished. */
hci_resume_advertising_sync(hdev);
return err;
}
-int hci_le_create_cis_sync(struct hci_dev *hdev, struct hci_conn *conn)
+int hci_le_create_cis_sync(struct hci_dev *hdev)
{
struct {
struct hci_cp_le_create_cis cp;
struct hci_cis cis[0x1f];
} cmd;
- u8 cig;
- struct hci_conn *hcon = conn;
+ struct hci_conn *conn;
+ u8 cig = BT_ISO_QOS_CIG_UNSET;
+
+ /* The spec allows only one pending LE Create CIS command at a time. If
+ * the command is pending now, don't do anything. We check for pending
+ * connections after each CIS Established event.
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2566:
+ *
+ * If the Host issues this command before all the
+ * HCI_LE_CIS_Established events from the previous use of the
+ * command have been generated, the Controller shall return the
+ * error code Command Disallowed (0x0C).
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2567:
+ *
+ * When the Controller receives the HCI_LE_Create_CIS command, the
+ * Controller sends the HCI_Command_Status event to the Host. An
+ * HCI_LE_CIS_Established event will be generated for each CIS when it
+ * is established or if it is disconnected or considered lost before
+ * being established; until all the events are generated, the command
+ * remains pending.
+ */
memset(&cmd, 0, sizeof(cmd));
- cmd.cis[0].acl_handle = cpu_to_le16(conn->parent->handle);
- cmd.cis[0].cis_handle = cpu_to_le16(conn->handle);
- cmd.cp.num_cis++;
- cig = conn->iso_qos.ucast.cig;
hci_dev_lock(hdev);
rcu_read_lock();
+ /* Wait until previous Create CIS has completed */
list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ goto done;
+ }
- if (conn == hcon || conn->type != ISO_LINK ||
- conn->state == BT_CONNECTED ||
- conn->iso_qos.ucast.cig != cig)
+ /* Find CIG with all CIS ready */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ struct hci_conn *link;
+
+ if (hci_conn_check_create_cis(conn))
continue;
- /* Check if all CIS(s) belonging to a CIG are ready */
- if (!conn->parent || conn->parent->state != BT_CONNECTED ||
- conn->state != BT_CONNECT) {
- cmd.cp.num_cis = 0;
- break;
+ cig = conn->iso_qos.ucast.cig;
+
+ list_for_each_entry_rcu(link, &hdev->conn_hash.list, list) {
+ if (hci_conn_check_create_cis(link) > 0 &&
+ link->iso_qos.ucast.cig == cig &&
+ link->state != BT_CONNECTED) {
+ cig = BT_ISO_QOS_CIG_UNSET;
+ break;
+ }
}
- /* Group all CIS with state BT_CONNECT since the spec don't
- * allow to send them individually:
- *
- * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
- * page 2566:
- *
- * If the Host issues this command before all the
- * HCI_LE_CIS_Established events from the previous use of the
- * command have been generated, the Controller shall return the
- * error code Command Disallowed (0x0C).
- */
+ if (cig != BT_ISO_QOS_CIG_UNSET)
+ break;
+ }
+
+ if (cig == BT_ISO_QOS_CIG_UNSET)
+ goto done;
+
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+
+ if (hci_conn_check_create_cis(conn) ||
+ conn->iso_qos.ucast.cig != cig)
+ continue;
+
+ set_bit(HCI_CONN_CREATE_CIS, &conn->flags);
cis->acl_handle = cpu_to_le16(conn->parent->handle);
cis->cis_handle = cpu_to_le16(conn->handle);
cmd.cp.num_cis++;
+
+ if (cmd.cp.num_cis >= ARRAY_SIZE(cmd.cis))
+ break;
}
+done:
rcu_read_unlock();
hci_dev_unlock(hdev);
@@ -6433,7 +6528,7 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
{
- u8 instance = PTR_ERR(data);
+ u8 instance = PTR_UINT(data);
return hci_update_adv_data_sync(hdev, instance);
}
@@ -6441,5 +6536,5 @@ static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
int hci_update_adv_data(struct hci_dev *hdev, u8 instance)
{
return hci_cmd_sync_queue(hdev, _update_adv_data_sync,
- ERR_PTR(instance), NULL);
+ UINT_PTR(instance), NULL);
}
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 369ed92dac99..c93aaeb3a3fa 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -256,21 +256,13 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern);
+ sk = bt_sock_alloc(net, sock, &hidp_proto, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
- sock_init_data(sock, sk);
-
sock->ops = &hidp_sock_ops;
-
sock->state = SS_UNCONNECTED;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = protocol;
- sk->sk_state = BT_OPEN;
-
bt_sock_link(&hidp_sk_list, sk);
return 0;
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 505d62247268..3c03e49422c7 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -48,6 +48,11 @@ static void iso_sock_kill(struct sock *sk);
#define EIR_SERVICE_DATA_LENGTH 4
#define BASE_MAX_LENGTH (HCI_MAX_PER_AD_LENGTH - EIR_SERVICE_DATA_LENGTH)
+/* iso_pinfo flags values */
+enum {
+ BT_SK_BIG_SYNC,
+};
+
struct iso_pinfo {
struct bt_sock bt;
bdaddr_t src;
@@ -58,7 +63,7 @@ struct iso_pinfo {
__u8 bc_num_bis;
__u8 bc_bis[ISO_MAX_NUM_BIS];
__u16 sync_handle;
- __u32 flags;
+ unsigned long flags;
struct bt_iso_qos qos;
bool qos_user_set;
__u8 base_len;
@@ -287,13 +292,24 @@ static int iso_connect_bis(struct sock *sk)
goto unlock;
}
- hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst,
- le_addr_type(iso_pi(sk)->dst_type),
- &iso_pi(sk)->qos, iso_pi(sk)->base_len,
- iso_pi(sk)->base);
- if (IS_ERR(hcon)) {
- err = PTR_ERR(hcon);
- goto unlock;
+ /* Just bind if DEFER_SETUP has been set */
+ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst,
+ &iso_pi(sk)->qos, iso_pi(sk)->base_len,
+ iso_pi(sk)->base);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto unlock;
+ }
+ } else {
+ hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst,
+ le_addr_type(iso_pi(sk)->dst_type),
+ &iso_pi(sk)->qos, iso_pi(sk)->base_len,
+ iso_pi(sk)->base);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto unlock;
+ }
}
conn = iso_conn_add(hcon);
@@ -317,6 +333,9 @@ static int iso_connect_bis(struct sock *sk)
if (hcon->state == BT_CONNECTED) {
iso_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
+ } else if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ iso_sock_clear_timer(sk);
+ sk->sk_state = BT_CONNECT;
} else {
sk->sk_state = BT_CONNECT;
iso_sock_set_timer(sk, sk->sk_sndtimeo);
@@ -600,18 +619,6 @@ static void iso_sock_kill(struct sock *sk)
sock_put(sk);
}
-static void iso_conn_defer_reject(struct hci_conn *conn)
-{
- struct hci_cp_le_reject_cis cp;
-
- BT_DBG("conn %p", conn);
-
- memset(&cp, 0, sizeof(cp));
- cp.handle = cpu_to_le16(conn->handle);
- cp.reason = HCI_ERROR_REJ_BAD_ADDR;
- hci_send_cmd(conn->hdev, HCI_OP_LE_REJECT_CIS, sizeof(cp), &cp);
-}
-
static void __iso_sock_close(struct sock *sk)
{
BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
@@ -621,6 +628,7 @@ static void __iso_sock_close(struct sock *sk)
iso_sock_cleanup_listen(sk);
break;
+ case BT_CONNECT:
case BT_CONNECTED:
case BT_CONFIG:
if (iso_pi(sk)->conn->hcon) {
@@ -636,21 +644,6 @@ static void __iso_sock_close(struct sock *sk)
break;
case BT_CONNECT2:
- if (iso_pi(sk)->conn->hcon)
- iso_conn_defer_reject(iso_pi(sk)->conn->hcon);
- iso_chan_del(sk, ECONNRESET);
- break;
- case BT_CONNECT:
- /* In case of DEFER_SETUP the hcon would be bound to CIG which
- * needs to be removed so just call hci_conn_del so the cleanup
- * callback do what is needed.
- */
- if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
- iso_pi(sk)->conn->hcon) {
- hci_conn_del(iso_pi(sk)->conn->hcon);
- iso_pi(sk)->conn->hcon = NULL;
- }
-
iso_chan_del(sk, ECONNRESET);
break;
case BT_DISCONN:
@@ -724,21 +717,13 @@ static struct sock *iso_sock_alloc(struct net *net, struct socket *sock,
{
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &iso_proto, kern);
+ sk = bt_sock_alloc(net, sock, &iso_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
sk->sk_destruct = iso_sock_destruct;
sk->sk_sndtimeo = ISO_CONN_TIMEOUT;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
/* Set address type as public as default src address is BDADDR_ANY */
iso_pi(sk)->src_type = BDADDR_LE_PUBLIC;
@@ -1202,6 +1187,12 @@ static bool check_io_qos(struct bt_iso_io_qos *qos)
static bool check_ucast_qos(struct bt_iso_qos *qos)
{
+ if (qos->ucast.cig > 0xef && qos->ucast.cig != BT_ISO_QOS_CIG_UNSET)
+ return false;
+
+ if (qos->ucast.cis > 0xef && qos->ucast.cis != BT_ISO_QOS_CIS_UNSET)
+ return false;
+
if (qos->ucast.sca > 0x07)
return false;
@@ -1291,6 +1282,18 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
break;
+ case BT_PKT_STATUS:
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (opt)
+ set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
+ else
+ clear_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
+ break;
+
case BT_ISO_QOS:
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
sk->sk_state != BT_CONNECT2) {
@@ -1376,6 +1379,12 @@ static int iso_sock_getsockopt(struct socket *sock, int level, int optname,
break;
+ case BT_PKT_STATUS:
+ if (put_user(test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags),
+ (int __user *)optval))
+ err = -EFAULT;
+ break;
+
case BT_ISO_QOS:
qos = iso_sock_get_qos(sk);
@@ -1466,7 +1475,7 @@ static int iso_sock_release(struct socket *sock)
iso_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ if (sock_flag(sk, SOCK_LINGER) && READ_ONCE(sk->sk_lingertime) &&
!(current->flags & PF_EXITING)) {
lock_sock(sk);
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
@@ -1563,6 +1572,12 @@ static void iso_conn_ready(struct iso_conn *conn)
hci_conn_hold(hcon);
iso_chan_add(conn, sk, parent);
+ if (ev && ((struct hci_evt_le_big_sync_estabilished *)ev)->status) {
+ /* Trigger error signal on child socket */
+ sk->sk_err = ECONNREFUSED;
+ sk->sk_error_report(sk);
+ }
+
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
sk->sk_state = BT_CONNECT2;
else
@@ -1631,15 +1646,17 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (ev2->num_bis < iso_pi(sk)->bc_num_bis)
iso_pi(sk)->bc_num_bis = ev2->num_bis;
- err = hci_le_big_create_sync(hdev,
- &iso_pi(sk)->qos,
- iso_pi(sk)->sync_handle,
- iso_pi(sk)->bc_num_bis,
- iso_pi(sk)->bc_bis);
- if (err) {
- bt_dev_err(hdev, "hci_le_big_create_sync: %d",
- err);
- sk = NULL;
+ if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
+ err = hci_le_big_create_sync(hdev,
+ &iso_pi(sk)->qos,
+ iso_pi(sk)->sync_handle,
+ iso_pi(sk)->bc_num_bis,
+ iso_pi(sk)->bc_bis);
+ if (err) {
+ bt_dev_err(hdev, "hci_le_big_create_sync: %d",
+ err);
+ sk = NULL;
+ }
}
}
} else {
@@ -1676,13 +1693,18 @@ static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
}
/* Create CIS if pending */
- hci_le_create_cis(hcon);
+ hci_le_create_cis_pending(hcon->hdev);
return;
}
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
- if (!status) {
+ /* Similar to the success case, if HCI_CONN_BIG_SYNC_FAILED is set,
+ * queue the failed bis connection into the accept queue of the
+ * listening socket and wake up userspace, to inform the user about
+ * the BIG sync failed event.
+ */
+ if (!status || test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) {
struct iso_conn *conn;
conn = iso_conn_add(hcon);
@@ -1757,6 +1779,7 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (len == skb->len) {
/* Complete frame received */
+ hci_skb_pkt_status(skb) = flags & 0x03;
iso_recv_frame(conn, skb);
return;
}
@@ -1778,6 +1801,7 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (!conn->rx_skb)
goto drop;
+ hci_skb_pkt_status(conn->rx_skb) = flags & 0x03;
skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
skb->len);
conn->rx_len = len - skb->len;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 947ca580bb9a..3bdfc3f1e73d 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -178,21 +178,6 @@ done:
return err;
}
-static void l2cap_sock_init_pid(struct sock *sk)
-{
- struct l2cap_chan *chan = l2cap_pi(sk)->chan;
-
- /* Only L2CAP_MODE_EXT_FLOWCTL ever need to access the PID in order to
- * group the channels being requested.
- */
- if (chan->mode != L2CAP_MODE_EXT_FLOWCTL)
- return;
-
- spin_lock(&sk->sk_peer_lock);
- sk->sk_peer_pid = get_pid(task_tgid(current));
- spin_unlock(&sk->sk_peer_lock);
-}
-
static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
@@ -268,8 +253,6 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
chan->mode != L2CAP_MODE_EXT_FLOWCTL)
chan->mode = L2CAP_MODE_LE_FLOWCTL;
- l2cap_sock_init_pid(sk);
-
err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid),
&la.l2_bdaddr, la.l2_bdaddr_type);
if (err)
@@ -325,8 +308,6 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
goto done;
}
- l2cap_sock_init_pid(sk);
-
sk->sk_max_ack_backlog = backlog;
sk->sk_ack_backlog = 0;
@@ -1858,21 +1839,13 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
struct sock *sk;
struct l2cap_chan *chan;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern);
+ sk = bt_sock_alloc(net, sock, &l2cap_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
sk->sk_destruct = l2cap_sock_destruct;
sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
chan = l2cap_chan_create();
if (!chan) {
sk_free(sk);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d4498037fadc..d6c9b7bc8592 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -944,6 +944,12 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (cis_peripheral_capable(hdev))
settings |= MGMT_SETTING_CIS_PERIPHERAL;
+ if (bis_capable(hdev))
+ settings |= MGMT_SETTING_ISO_BROADCASTER;
+
+ if (sync_recv_capable(hdev))
+ settings |= MGMT_SETTING_ISO_SYNC_RECEIVER;
+
return settings;
}
@@ -3580,18 +3586,6 @@ unlock:
return err;
}
-static int abort_conn_sync(struct hci_dev *hdev, void *data)
-{
- struct hci_conn *conn;
- u16 handle = PTR_ERR(data);
-
- conn = hci_conn_hash_lookup_handle(hdev, handle);
- if (!conn)
- return 0;
-
- return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM);
-}
-
static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
@@ -3642,8 +3636,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
le_addr_type(addr->type));
if (conn->conn_reason == CONN_REASON_PAIR_DEVICE)
- hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle),
- NULL);
+ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
unlock:
hci_dev_unlock(hdev);
@@ -8435,8 +8428,8 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
supported_flags = get_supported_adv_flags(hdev);
rp->supported_flags = cpu_to_le32(supported_flags);
- rp->max_adv_data_len = HCI_MAX_AD_LENGTH;
- rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH;
+ rp->max_adv_data_len = max_adv_len(hdev);
+ rp->max_scan_rsp_len = max_adv_len(hdev);
rp->max_instances = hdev->le_num_of_adv_sets;
rp->num_instances = hdev->adv_instance_cnt;
@@ -8472,7 +8465,7 @@ static u8 calculate_name_len(struct hci_dev *hdev)
static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags,
bool is_adv_data)
{
- u8 max_len = HCI_MAX_AD_LENGTH;
+ u8 max_len = max_adv_len(hdev);
if (is_adv_data) {
if (adv_flags & (MGMT_ADV_FLAG_DISCOV |
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index bf5cee48916c..abbafa6194ca 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -91,6 +91,33 @@ struct msft_ev_le_monitor_device {
struct msft_monitor_advertisement_handle_data {
__u8 msft_handle;
__u16 mgmt_handle;
+ __s8 rssi_high;
+ __s8 rssi_low;
+ __u8 rssi_low_interval;
+ __u8 rssi_sampling_period;
+ __u8 cond_type;
+ struct list_head list;
+};
+
+enum monitor_addr_filter_state {
+ AF_STATE_IDLE,
+ AF_STATE_ADDING,
+ AF_STATE_ADDED,
+ AF_STATE_REMOVING,
+};
+
+#define MSFT_MONITOR_ADVERTISEMENT_TYPE_ADDR 0x04
+struct msft_monitor_addr_filter_data {
+ __u8 msft_handle;
+ __u8 pattern_handle; /* address filters pertain to */
+ __u16 mgmt_handle;
+ int state;
+ __s8 rssi_high;
+ __s8 rssi_low;
+ __u8 rssi_low_interval;
+ __u8 rssi_sampling_period;
+ __u8 addr_type;
+ bdaddr_t bdaddr;
struct list_head list;
};
@@ -99,9 +126,12 @@ struct msft_data {
__u8 evt_prefix_len;
__u8 *evt_prefix;
struct list_head handle_map;
+ struct list_head address_filters;
__u8 resuming;
__u8 suspending;
__u8 filter_enabled;
+ /* To synchronize add/remove address filter and monitor device event.*/
+ struct mutex filter_lock;
};
bool msft_monitor_supported(struct hci_dev *hdev)
@@ -180,6 +210,24 @@ static struct msft_monitor_advertisement_handle_data *msft_find_handle_data
return NULL;
}
+/* This function requires the caller holds msft->filter_lock */
+static struct msft_monitor_addr_filter_data *msft_find_address_data
+ (struct hci_dev *hdev, u8 addr_type, bdaddr_t *addr,
+ u8 pattern_handle)
+{
+ struct msft_monitor_addr_filter_data *entry;
+ struct msft_data *msft = hdev->msft_data;
+
+ list_for_each_entry(entry, &msft->address_filters, list) {
+ if (entry->pattern_handle == pattern_handle &&
+ addr_type == entry->addr_type &&
+ !bacmp(addr, &entry->bdaddr))
+ return entry;
+ }
+
+ return NULL;
+}
+
/* This function requires the caller holds hdev->lock */
static int msft_monitor_device_del(struct hci_dev *hdev, __u16 mgmt_handle,
bdaddr_t *bdaddr, __u8 addr_type,
@@ -240,6 +288,7 @@ static int msft_le_monitor_advertisement_cb(struct hci_dev *hdev, u16 opcode,
handle_data->mgmt_handle = monitor->handle;
handle_data->msft_handle = rp->handle;
+ handle_data->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN;
INIT_LIST_HEAD(&handle_data->list);
list_add(&handle_data->list, &msft->handle_map);
@@ -254,6 +303,70 @@ unlock:
return status;
}
+/* This function requires the caller holds hci_req_sync_lock */
+static void msft_remove_addr_filters_sync(struct hci_dev *hdev, u8 handle)
+{
+ struct msft_monitor_addr_filter_data *address_filter, *n;
+ struct msft_cp_le_cancel_monitor_advertisement cp;
+ struct msft_data *msft = hdev->msft_data;
+ struct list_head head;
+ struct sk_buff *skb;
+
+ INIT_LIST_HEAD(&head);
+
+ /* Cancel all corresponding address monitors */
+ mutex_lock(&msft->filter_lock);
+
+ list_for_each_entry_safe(address_filter, n, &msft->address_filters,
+ list) {
+ if (address_filter->pattern_handle != handle)
+ continue;
+
+ list_del(&address_filter->list);
+
+ /* Keep the address filter and let
+ * msft_add_address_filter_sync() remove and free the address
+ * filter.
+ */
+ if (address_filter->state == AF_STATE_ADDING) {
+ address_filter->state = AF_STATE_REMOVING;
+ continue;
+ }
+
+ /* Keep the address filter and let
+ * msft_cancel_address_filter_sync() remove and free the address
+ * filter
+ */
+ if (address_filter->state == AF_STATE_REMOVING)
+ continue;
+
+ list_add_tail(&address_filter->list, &head);
+ }
+
+ mutex_unlock(&msft->filter_lock);
+
+ list_for_each_entry_safe(address_filter, n, &head, list) {
+ list_del(&address_filter->list);
+
+ cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT;
+ cp.handle = address_filter->msft_handle;
+
+ skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR_OR_NULL(skb)) {
+ kfree(address_filter);
+ continue;
+ }
+
+ kfree_skb(skb);
+
+ bt_dev_dbg(hdev, "MSFT: Canceled device %pMR address filter",
+ &address_filter->bdaddr);
+
+ kfree(address_filter);
+ }
+}
+
static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
u16 opcode,
struct adv_monitor *monitor,
@@ -263,6 +376,7 @@ static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
struct msft_monitor_advertisement_handle_data *handle_data;
struct msft_data *msft = hdev->msft_data;
int status = 0;
+ u8 msft_handle;
rp = (struct msft_rp_le_cancel_monitor_advertisement *)skb->data;
if (skb->len < sizeof(*rp)) {
@@ -293,11 +407,17 @@ static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
NULL, 0, false);
}
+ msft_handle = handle_data->msft_handle;
+
list_del(&handle_data->list);
kfree(handle_data);
- }
- hci_dev_unlock(hdev);
+ hci_dev_unlock(hdev);
+
+ msft_remove_addr_filters_sync(hdev, msft_handle);
+ } else {
+ hci_dev_unlock(hdev);
+ }
done:
return status;
@@ -394,12 +514,14 @@ static int msft_add_monitor_sync(struct hci_dev *hdev,
{
struct msft_cp_le_monitor_advertisement *cp;
struct msft_le_monitor_advertisement_pattern_data *pattern_data;
+ struct msft_monitor_advertisement_handle_data *handle_data;
struct msft_le_monitor_advertisement_pattern *pattern;
struct adv_pattern *entry;
size_t total_size = sizeof(*cp) + sizeof(*pattern_data);
ptrdiff_t offset = 0;
u8 pattern_count = 0;
struct sk_buff *skb;
+ int err;
if (!msft_monitor_pattern_valid(monitor))
return -EINVAL;
@@ -436,16 +558,31 @@ static int msft_add_monitor_sync(struct hci_dev *hdev,
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, total_size, cp,
HCI_CMD_TIMEOUT);
- kfree(cp);
if (IS_ERR_OR_NULL(skb)) {
- if (!skb)
- return -EIO;
- return PTR_ERR(skb);
+ err = PTR_ERR(skb);
+ goto out_free;
}
- return msft_le_monitor_advertisement_cb(hdev, hdev->msft_opcode,
- monitor, skb);
+ err = msft_le_monitor_advertisement_cb(hdev, hdev->msft_opcode,
+ monitor, skb);
+ if (err)
+ goto out_free;
+
+ handle_data = msft_find_handle_data(hdev, monitor->handle, true);
+ if (!handle_data) {
+ err = -ENODATA;
+ goto out_free;
+ }
+
+ handle_data->rssi_high = cp->rssi_high;
+ handle_data->rssi_low = cp->rssi_low;
+ handle_data->rssi_low_interval = cp->rssi_low_interval;
+ handle_data->rssi_sampling_period = cp->rssi_sampling_period;
+
+out_free:
+ kfree(cp);
+ return err;
}
/* This function requires the caller holds hci_req_sync_lock */
@@ -538,6 +675,7 @@ void msft_do_close(struct hci_dev *hdev)
{
struct msft_data *msft = hdev->msft_data;
struct msft_monitor_advertisement_handle_data *handle_data, *tmp;
+ struct msft_monitor_addr_filter_data *address_filter, *n;
struct adv_monitor *monitor;
if (!msft)
@@ -559,6 +697,14 @@ void msft_do_close(struct hci_dev *hdev)
kfree(handle_data);
}
+ mutex_lock(&msft->filter_lock);
+ list_for_each_entry_safe(address_filter, n, &msft->address_filters,
+ list) {
+ list_del(&address_filter->list);
+ kfree(address_filter);
+ }
+ mutex_unlock(&msft->filter_lock);
+
hci_dev_lock(hdev);
/* Clear any devices that are being monitored and notify device lost */
@@ -568,6 +714,49 @@ void msft_do_close(struct hci_dev *hdev)
hci_dev_unlock(hdev);
}
+static int msft_cancel_address_filter_sync(struct hci_dev *hdev, void *data)
+{
+ struct msft_monitor_addr_filter_data *address_filter = data;
+ struct msft_cp_le_cancel_monitor_advertisement cp;
+ struct msft_data *msft = hdev->msft_data;
+ struct sk_buff *skb;
+ int err = 0;
+
+ if (!msft) {
+ bt_dev_err(hdev, "MSFT: msft data is freed");
+ return -EINVAL;
+ }
+
+ /* The address filter has been removed by hci dev close */
+ if (!test_bit(HCI_UP, &hdev->flags))
+ return 0;
+
+ mutex_lock(&msft->filter_lock);
+ list_del(&address_filter->list);
+ mutex_unlock(&msft->filter_lock);
+
+ cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT;
+ cp.handle = address_filter->msft_handle;
+
+ skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR_OR_NULL(skb)) {
+ bt_dev_err(hdev, "MSFT: Failed to cancel address (%pMR) filter",
+ &address_filter->bdaddr);
+ err = -EIO;
+ goto done;
+ }
+ kfree_skb(skb);
+
+ bt_dev_dbg(hdev, "MSFT: Canceled device %pMR address filter",
+ &address_filter->bdaddr);
+
+done:
+ kfree(address_filter);
+
+ return err;
+}
+
void msft_register(struct hci_dev *hdev)
{
struct msft_data *msft = NULL;
@@ -581,7 +770,9 @@ void msft_register(struct hci_dev *hdev)
}
INIT_LIST_HEAD(&msft->handle_map);
+ INIT_LIST_HEAD(&msft->address_filters);
hdev->msft_data = msft;
+ mutex_init(&msft->filter_lock);
}
void msft_unregister(struct hci_dev *hdev)
@@ -596,6 +787,7 @@ void msft_unregister(struct hci_dev *hdev)
hdev->msft_data = NULL;
kfree(msft->evt_prefix);
+ mutex_destroy(&msft->filter_lock);
kfree(msft);
}
@@ -645,11 +837,149 @@ static void *msft_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
return data;
}
+static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data)
+{
+ struct msft_monitor_addr_filter_data *address_filter = data;
+ struct msft_rp_le_monitor_advertisement *rp;
+ struct msft_cp_le_monitor_advertisement *cp;
+ struct msft_data *msft = hdev->msft_data;
+ struct sk_buff *skb = NULL;
+ bool remove = false;
+ size_t size;
+
+ if (!msft) {
+ bt_dev_err(hdev, "MSFT: msft data is freed");
+ return -EINVAL;
+ }
+
+ /* The address filter has been removed by hci dev close */
+ if (!test_bit(HCI_UP, &hdev->flags))
+ return -ENODEV;
+
+ /* We are safe to use the address filter from now on.
+ * msft_monitor_device_evt() wouldn't delete this filter because it's
+ * not been added by now.
+ * And all other functions that requiring hci_req_sync_lock wouldn't
+ * touch this filter before this func completes because it's protected
+ * by hci_req_sync_lock.
+ */
+
+ if (address_filter->state == AF_STATE_REMOVING) {
+ mutex_lock(&msft->filter_lock);
+ list_del(&address_filter->list);
+ mutex_unlock(&msft->filter_lock);
+ kfree(address_filter);
+ return 0;
+ }
+
+ size = sizeof(*cp) +
+ sizeof(address_filter->addr_type) +
+ sizeof(address_filter->bdaddr);
+ cp = kzalloc(size, GFP_KERNEL);
+ if (!cp) {
+ bt_dev_err(hdev, "MSFT: Alloc cmd param err");
+ remove = true;
+ goto done;
+ }
+ cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT;
+ cp->rssi_high = address_filter->rssi_high;
+ cp->rssi_low = address_filter->rssi_low;
+ cp->rssi_low_interval = address_filter->rssi_low_interval;
+ cp->rssi_sampling_period = address_filter->rssi_sampling_period;
+ cp->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_ADDR;
+ cp->data[0] = address_filter->addr_type;
+ memcpy(&cp->data[1], &address_filter->bdaddr,
+ sizeof(address_filter->bdaddr));
+
+ skb = __hci_cmd_sync(hdev, hdev->msft_opcode, size, cp,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR_OR_NULL(skb)) {
+ bt_dev_err(hdev, "Failed to enable address %pMR filter",
+ &address_filter->bdaddr);
+ skb = NULL;
+ remove = true;
+ goto done;
+ }
+
+ rp = skb_pull_data(skb, sizeof(*rp));
+ if (!rp || rp->sub_opcode != MSFT_OP_LE_MONITOR_ADVERTISEMENT ||
+ rp->status)
+ remove = true;
+
+done:
+ mutex_lock(&msft->filter_lock);
+
+ if (remove) {
+ bt_dev_warn(hdev, "MSFT: Remove address (%pMR) filter",
+ &address_filter->bdaddr);
+ list_del(&address_filter->list);
+ kfree(address_filter);
+ } else {
+ address_filter->state = AF_STATE_ADDED;
+ address_filter->msft_handle = rp->handle;
+ bt_dev_dbg(hdev, "MSFT: Address %pMR filter enabled",
+ &address_filter->bdaddr);
+ }
+ mutex_unlock(&msft->filter_lock);
+
+ kfree_skb(skb);
+
+ return 0;
+}
+
+/* This function requires the caller holds msft->filter_lock */
+static struct msft_monitor_addr_filter_data *msft_add_address_filter
+ (struct hci_dev *hdev, u8 addr_type, bdaddr_t *bdaddr,
+ struct msft_monitor_advertisement_handle_data *handle_data)
+{
+ struct msft_monitor_addr_filter_data *address_filter = NULL;
+ struct msft_data *msft = hdev->msft_data;
+ int err;
+
+ address_filter = kzalloc(sizeof(*address_filter), GFP_KERNEL);
+ if (!address_filter)
+ return NULL;
+
+ address_filter->state = AF_STATE_ADDING;
+ address_filter->msft_handle = 0xff;
+ address_filter->pattern_handle = handle_data->msft_handle;
+ address_filter->mgmt_handle = handle_data->mgmt_handle;
+ address_filter->rssi_high = handle_data->rssi_high;
+ address_filter->rssi_low = handle_data->rssi_low;
+ address_filter->rssi_low_interval = handle_data->rssi_low_interval;
+ address_filter->rssi_sampling_period = handle_data->rssi_sampling_period;
+ address_filter->addr_type = addr_type;
+ bacpy(&address_filter->bdaddr, bdaddr);
+
+ /* With the above AF_STATE_ADDING, duplicated address filter can be
+ * avoided when receiving monitor device event (found/lost) frequently
+ * for the same device.
+ */
+ list_add_tail(&address_filter->list, &msft->address_filters);
+
+ err = hci_cmd_sync_queue(hdev, msft_add_address_filter_sync,
+ address_filter, NULL);
+ if (err < 0) {
+ bt_dev_err(hdev, "MSFT: Add address %pMR filter err", bdaddr);
+ list_del(&address_filter->list);
+ kfree(address_filter);
+ return NULL;
+ }
+
+ bt_dev_dbg(hdev, "MSFT: Add device %pMR address filter",
+ &address_filter->bdaddr);
+
+ return address_filter;
+}
+
/* This function requires the caller holds hdev->lock */
static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
+ struct msft_monitor_addr_filter_data *n, *address_filter = NULL;
struct msft_ev_le_monitor_device *ev;
struct msft_monitor_advertisement_handle_data *handle_data;
+ struct msft_data *msft = hdev->msft_data;
+ u16 mgmt_handle = 0xffff;
u8 addr_type;
ev = msft_skb_pull(hdev, skb, MSFT_EV_LE_MONITOR_DEVICE, sizeof(*ev));
@@ -662,9 +992,53 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
ev->monitor_state, &ev->bdaddr);
handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false);
- if (!handle_data)
+
+ if (!test_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks)) {
+ if (!handle_data)
+ return;
+ mgmt_handle = handle_data->mgmt_handle;
+ goto report_state;
+ }
+
+ if (handle_data) {
+ /* Don't report any device found/lost event from pattern
+ * monitors. Pattern monitor always has its address filters for
+ * tracking devices.
+ */
+
+ address_filter = msft_find_address_data(hdev, ev->addr_type,
+ &ev->bdaddr,
+ handle_data->msft_handle);
+ if (address_filter)
+ return;
+
+ if (ev->monitor_state && handle_data->cond_type ==
+ MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN)
+ msft_add_address_filter(hdev, ev->addr_type,
+ &ev->bdaddr, handle_data);
+
return;
+ }
+ /* This device event is not from pattern monitor.
+ * Report it if there is a corresponding address_filter for it.
+ */
+ list_for_each_entry(n, &msft->address_filters, list) {
+ if (n->state == AF_STATE_ADDED &&
+ n->msft_handle == ev->monitor_handle) {
+ mgmt_handle = n->mgmt_handle;
+ address_filter = n;
+ break;
+ }
+ }
+
+ if (!address_filter) {
+ bt_dev_warn(hdev, "MSFT: Unexpected device event %pMR, %u, %u",
+ &ev->bdaddr, ev->monitor_handle, ev->monitor_state);
+ return;
+ }
+
+report_state:
switch (ev->addr_type) {
case ADDR_LE_DEV_PUBLIC:
addr_type = BDADDR_LE_PUBLIC;
@@ -681,12 +1055,18 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
- if (ev->monitor_state)
- msft_device_found(hdev, &ev->bdaddr, addr_type,
- handle_data->mgmt_handle);
- else
- msft_device_lost(hdev, &ev->bdaddr, addr_type,
- handle_data->mgmt_handle);
+ if (ev->monitor_state) {
+ msft_device_found(hdev, &ev->bdaddr, addr_type, mgmt_handle);
+ } else {
+ if (address_filter && address_filter->state == AF_STATE_ADDED) {
+ address_filter->state = AF_STATE_REMOVING;
+ hci_cmd_sync_queue(hdev,
+ msft_cancel_address_filter_sync,
+ address_filter,
+ NULL);
+ }
+ msft_device_lost(hdev, &ev->bdaddr, addr_type, mgmt_handle);
+ }
}
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
@@ -724,7 +1104,9 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
switch (*evt) {
case MSFT_EV_LE_MONITOR_DEVICE:
+ mutex_lock(&msft->filter_lock);
msft_monitor_device_evt(hdev, skb);
+ mutex_unlock(&msft->filter_lock);
break;
default:
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4397e14ff560..b54e8a530f55 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -268,18 +268,16 @@ static struct proto rfcomm_proto = {
.obj_size = sizeof(struct rfcomm_pinfo)
};
-static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
+static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock,
+ int proto, gfp_t prio, int kern)
{
struct rfcomm_dlc *d;
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern);
+ sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
d = rfcomm_dlc_alloc(prio);
if (!d) {
sk_free(sk);
@@ -298,11 +296,6 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int
sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
bt_sock_link(&rfcomm_sk_list, sk);
BT_DBG("sk %p", sk);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 7762604ddfc0..c736186aba26 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -68,7 +68,6 @@ struct sco_pinfo {
bdaddr_t dst;
__u32 flags;
__u16 setting;
- __u8 cmsg_mask;
struct bt_codec codec;
struct sco_conn *conn;
};
@@ -471,15 +470,6 @@ static void sco_sock_close(struct sock *sk)
release_sock(sk);
}
-static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg,
- struct sock *sk)
-{
- if (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS)
- put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS,
- sizeof(bt_cb(skb)->sco.pkt_status),
- &bt_cb(skb)->sco.pkt_status);
-}
-
static void sco_sock_init(struct sock *sk, struct sock *parent)
{
BT_DBG("sk %p", sk);
@@ -488,8 +478,6 @@ static void sco_sock_init(struct sock *sk, struct sock *parent)
sk->sk_type = parent->sk_type;
bt_sk(sk)->flags = bt_sk(parent)->flags;
security_sk_clone(parent, sk);
- } else {
- bt_sk(sk)->skb_put_cmsg = sco_skb_put_cmsg;
}
}
@@ -504,21 +492,13 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
{
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern);
+ sk = bt_sock_alloc(net, sock, &sco_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
sk->sk_destruct = sco_sock_destruct;
sk->sk_sndtimeo = SCO_CONN_TIMEOUT;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
sco_pi(sk)->codec.id = BT_CODEC_CVSD;
sco_pi(sk)->codec.cid = 0xffff;
@@ -915,9 +895,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
}
if (opt)
- sco_pi(sk)->cmsg_mask |= SCO_CMSG_PKT_STATUS;
+ set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
else
- sco_pi(sk)->cmsg_mask &= SCO_CMSG_PKT_STATUS;
+ clear_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
break;
case BT_CODEC:
@@ -1048,7 +1028,6 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
int len, err = 0;
struct bt_voice voice;
u32 phys;
- int pkt_status;
int buf_len;
struct codec_list *c;
u8 num_codecs, i, __user *ptr;
@@ -1102,9 +1081,8 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- pkt_status = (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS);
-
- if (put_user(pkt_status, (int __user *)optval))
+ if (put_user(test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags),
+ (int __user *)optval))
err = -EFAULT;
break;
@@ -1267,7 +1245,7 @@ static int sco_sock_release(struct socket *sock)
sco_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ if (sock_flag(sk, SOCK_LINGER) && READ_ONCE(sk->sk_lingertime) &&
!(current->flags & PF_EXITING)) {
lock_sock(sk);
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2321bd2f9964..57a7a64b84ed 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -15,11 +15,12 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
#include <linux/netfilter.h>
+#include <net/netdev_rx_queue.h>
#include <net/xdp.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -555,12 +556,23 @@ __bpf_kfunc u32 bpf_fentry_test9(u32 *a)
return *a;
}
+void noinline bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
+{
+}
+
__bpf_kfunc int bpf_modify_return_test(int a, int *b)
{
*b += 1;
return a + *b;
}
+__bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d,
+ void *e, char f, int g)
+{
+ *b += 1;
+ return a + *b + c + d + (long)e + f + g;
+}
+
int noinline bpf_fentry_shadow_test(int a)
{
return a + 1;
@@ -596,6 +608,7 @@ __diag_pop();
BTF_SET8_START(bpf_test_modify_return_ids)
BTF_ID_FLAGS(func, bpf_modify_return_test)
+BTF_ID_FLAGS(func, bpf_modify_return_test2)
BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
BTF_SET8_END(bpf_test_modify_return_ids)
@@ -663,7 +676,11 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
case BPF_MODIFY_RETURN:
ret = bpf_modify_return_test(1, &b);
if (b != 2)
- side_effect = 1;
+ side_effect++;
+ b = 2;
+ ret += bpf_modify_return_test2(1, &b, 3, 4, (void *)5, 6, 7);
+ if (b != 2)
+ side_effect++;
break;
default:
goto out;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 4f5098d33a46..a6e94ceb7c9a 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -234,6 +234,14 @@ static int br_switchdev_blocking_event(struct notifier_block *nb,
br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
b->blocking_nb);
break;
+ case SWITCHDEV_BRPORT_REPLAY:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ err = br_switchdev_port_replay(p, b->dev, b->ctx, b->atomic_nb,
+ b->blocking_nb, extack);
+ err = notifier_from_errno(err);
+ break;
}
out:
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6116eba1bd89..9d7bc8b96b53 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -154,6 +154,7 @@ void br_forward(const struct net_bridge_port *to,
backup_port = rcu_dereference(to->backup_port);
if (unlikely(!backup_port))
goto out;
+ BR_INPUT_SKB_CB(skb)->backup_nhid = READ_ONCE(to->backup_nhid);
to = backup_port;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 05c5863d2e20..10f0d33d8ccf 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -211,6 +211,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */
+ nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */
+ nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_BACKUP_NHID */
+ 0;
}
@@ -319,6 +320,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
backup_p->dev->ifindex);
rcu_read_unlock();
+ if (p->backup_nhid &&
+ nla_put_u32(skb, IFLA_BRPORT_BACKUP_NHID, p->backup_nhid))
+ return -EMSGSIZE;
+
return 0;
}
@@ -895,6 +900,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
[IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_BRPORT_BACKUP_NHID] = { .type = NLA_U32 },
};
/* Change the state of the port and notify spanning tree */
@@ -1065,6 +1071,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
return err;
}
+ if (tb[IFLA_BRPORT_BACKUP_NHID]) {
+ u32 backup_nhid = nla_get_u32(tb[IFLA_BRPORT_BACKUP_NHID]);
+
+ WRITE_ONCE(p->backup_nhid, backup_nhid);
+ }
+
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a63b32c1638e..a1f4acfa6994 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -387,6 +387,7 @@ struct net_bridge_port {
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
struct net_bridge_port __rcu *backup_port;
+ u32 backup_nhid;
/* STP */
u8 priority;
@@ -605,6 +606,8 @@ struct br_input_skb_cb {
*/
unsigned long fwd_hwdoms;
#endif
+
+ u32 backup_nhid;
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -971,7 +974,6 @@ int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router);
int br_multicast_toggle(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack);
int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val);
-int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx,
unsigned long val);
#if IS_ENABLED(CONFIG_IPV6)
@@ -2115,6 +2117,12 @@ void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
struct notifier_block *atomic_nb,
struct notifier_block *blocking_nb);
+int br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack);
+
bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
@@ -2165,6 +2173,16 @@ br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
{
}
+static inline int
+br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
{
return false;
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ba95c4d74a60..ee84e783e1df 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -727,6 +727,8 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
err = br_switchdev_mdb_replay_one(nb, dev,
SWITCHDEV_OBJ_PORT_MDB(obj),
action, ctx, extack);
+ if (err == -EOPNOTSUPP)
+ err = 0;
if (err)
goto out_free_mdb;
}
@@ -759,8 +761,10 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb,
extack);
- if (err && err != -EOPNOTSUPP)
+ if (err) {
+ /* -EOPNOTSUPP not propagated from MDB replay. */
return err;
+ }
err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb);
if (err && err != -EOPNOTSUPP)
@@ -825,3 +829,12 @@ void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
nbp_switchdev_del(p);
}
+
+int br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ return nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+}
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 6399a8a69d07..81833ca7a2c7 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -201,6 +201,21 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
if (err)
return err;
+ if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
+ tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
+ tunnel_id, 0);
+ if (!tunnel_dst)
+ return -ENOMEM;
+
+ tunnel_dst->u.tun_info.mode |= IP_TUNNEL_INFO_TX |
+ IP_TUNNEL_INFO_BRIDGE;
+ tunnel_dst->u.tun_info.key.nhid =
+ BR_INPUT_SKB_CB(skb)->backup_nhid;
+ skb_dst_set(skb, &tunnel_dst->dst);
+
+ return 0;
+ }
+
tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
skb_dst_set(skb, &tunnel_dst->dst);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 757ec46fc45a..aa23479b20b2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2115,8 +2115,7 @@ static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *ba
return ret;
offsets[0] = sizeof(struct ebt_entry); /* matches come first */
- memcpy(&offsets[1], &entry->watchers_offset,
- sizeof(offsets) - sizeof(offsets[0]));
+ memcpy(&offsets[1], &entry->offsets, sizeof(entry->offsets));
if (state->buf_kern_start) {
buf_start = state->buf_kern_start + state->buf_kern_offset;
diff --git a/net/core/dev.c b/net/core/dev.c
index 69a3e544676c..17e6281e408c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -107,6 +107,7 @@
#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
+#include <net/tcx.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -132,6 +133,7 @@
#include <trace/events/net.h>
#include <trace/events/skb.h>
#include <trace/events/qdisc.h>
+#include <trace/events/xdp.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
@@ -150,11 +152,11 @@
#include <linux/pm_runtime.h>
#include <linux/prandom.h>
#include <linux/once_lite.h>
+#include <net/netdev_rx_queue.h>
#include "dev.h"
#include "net-sysfs.h"
-
static DEFINE_SPINLOCK(ptype_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
@@ -388,6 +390,8 @@ static void list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock(&dev_base_lock);
+ /* We reserved the ifindex, this can't fail */
+ WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
dev_base_seq_inc(net);
}
@@ -397,8 +401,12 @@ static void list_netdevice(struct net_device *dev)
*/
static void unlist_netdevice(struct net_device *dev, bool lock)
{
+ struct net *net = dev_net(dev);
+
ASSERT_RTNL();
+ xa_erase(&net->dev_by_index, dev->ifindex);
+
/* Unlink dev from the device chain */
if (lock)
write_lock(&dev_base_lock);
@@ -2384,8 +2392,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
struct xps_map *map = NULL;
int pos;
- if (dev_maps)
- map = xmap_dereference(dev_maps->attr_map[tci]);
+ map = xmap_dereference(dev_maps->attr_map[tci]);
if (!map)
return false;
@@ -3882,69 +3889,198 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL(dev_loopback_xmit);
#ifdef CONFIG_NET_EGRESS
-static struct sk_buff *
-sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+static struct netdev_queue *
+netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
{
+ int qm = skb_get_queue_mapping(skb);
+
+ return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
+}
+
+static bool netdev_xmit_txqueue_skipped(void)
+{
+ return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+}
+
+void netdev_xmit_skip_txqueue(bool skip)
+{
+ __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+}
+EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
+#endif /* CONFIG_NET_EGRESS */
+
+#ifdef CONFIG_NET_XGRESS
+static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
+{
+ int ret = TC_ACT_UNSPEC;
#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
- struct tcf_result cl_res;
+ struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
+ struct tcf_result res;
if (!miniq)
- return skb;
+ return ret;
- /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
- mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
+ mini_qdisc_bstats_cpu_update(miniq, skb);
+ ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
+ /* Only tcf related quirks below. */
+ switch (ret) {
+ case TC_ACT_SHOT:
+ mini_qdisc_qstats_cpu_drop(miniq);
+ break;
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
+ skb->tc_index = TC_H_MIN(res.classid);
break;
+ }
+#endif /* CONFIG_NET_CLS_ACT */
+ return ret;
+}
+
+static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);
+
+void tcx_inc(void)
+{
+ static_branch_inc(&tcx_needed_key);
+}
+
+void tcx_dec(void)
+{
+ static_branch_dec(&tcx_needed_key);
+}
+
+static __always_inline enum tcx_action_base
+tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
+ const bool needs_mac)
+{
+ const struct bpf_mprog_fp *fp;
+ const struct bpf_prog *prog;
+ int ret = TCX_NEXT;
+
+ if (needs_mac)
+ __skb_push(skb, skb->mac_len);
+ bpf_mprog_foreach_prog(entry, fp, prog) {
+ bpf_compute_data_pointers(skb);
+ ret = bpf_prog_run(prog, skb);
+ if (ret != TCX_NEXT)
+ break;
+ }
+ if (needs_mac)
+ __skb_pull(skb, skb->mac_len);
+ return tcx_action_code(skb, ret);
+}
+
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ }
+
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ tcx_set_ingress(skb, true);
+
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, true);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto ingress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+ingress_verdict:
+ switch (sch_ret) {
+ case TC_ACT_REDIRECT:
+ /* skb_mac_header check was done by BPF, so we can safely
+ * push the L2 header back before redirecting to another
+ * netdev.
+ */
+ __skb_push(skb, skb->mac_len);
+ if (skb_do_redirect(skb) == -EAGAIN) {
+ __skb_pull(skb, skb->mac_len);
+ *another = true;
+ break;
+ }
+ *ret = NET_RX_SUCCESS;
+ return NULL;
case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- *ret = NET_XMIT_DROP;
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+ *ret = NET_RX_DROP;
return NULL;
+ /* used by tc_run */
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
- *ret = NET_XMIT_SUCCESS;
consume_skb(skb);
+ fallthrough;
+ case TC_ACT_CONSUMED:
+ *ret = NET_RX_SUCCESS;
return NULL;
+ }
+
+ return skb;
+}
+
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+
+ /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
+ * already set by the caller.
+ */
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, false);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto egress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+egress_verdict:
+ switch (sch_ret) {
case TC_ACT_REDIRECT:
/* No need to push/pop skb's mac_header here on egress! */
skb_do_redirect(skb);
*ret = NET_XMIT_SUCCESS;
return NULL;
- default:
- break;
+ case TC_ACT_SHOT:
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ *ret = NET_XMIT_DROP;
+ return NULL;
+ /* used by tc_run */
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ *ret = NET_XMIT_SUCCESS;
+ return NULL;
}
-#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
-
-static struct netdev_queue *
-netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
-{
- int qm = skb_get_queue_mapping(skb);
-
- return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
-}
-
-static bool netdev_xmit_txqueue_skipped(void)
+#else
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
{
- return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+ return skb;
}
-void netdev_xmit_skip_txqueue(bool skip)
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+ return skb;
}
-EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
-#endif /* CONFIG_NET_EGRESS */
+#endif /* CONFIG_NET_XGRESS */
#ifdef CONFIG_XPS
static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
@@ -4128,9 +4264,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
skb_update_prio(skb);
qdisc_pkt_len_init(skb);
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_at_ingress = 0;
-#endif
+ tcx_set_ingress(skb, false);
#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
if (nf_hook_egress_active()) {
@@ -5064,72 +5198,6 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-static inline struct sk_buff *
-sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev, bool *another)
-{
-#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
- struct tcf_result cl_res;
-
- /* If there's at least one ingress present somewhere (so
- * we get here via enabled static key), remaining devices
- * that are not configured with an ingress qdisc will bail
- * out here.
- */
- if (!miniq)
- return skb;
-
- if (*pt_prev) {
- *ret = deliver_skb(skb, *pt_prev, orig_dev);
- *pt_prev = NULL;
- }
-
- qdisc_skb_cb(skb)->pkt_len = skb->len;
- tc_skb_cb(skb)->mru = 0;
- tc_skb_cb(skb)->post_ct = false;
- skb->tc_at_ingress = 1;
- mini_qdisc_bstats_cpu_update(miniq, skb);
-
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
- case TC_ACT_OK:
- case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
- break;
- case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
- *ret = NET_RX_DROP;
- return NULL;
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- case TC_ACT_TRAP:
- consume_skb(skb);
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_REDIRECT:
- /* skb_mac_header check was done by cls/act_bpf, so
- * we can safely push the L2 header back before
- * redirecting to another netdev
- */
- __skb_push(skb, skb->mac_len);
- if (skb_do_redirect(skb) == -EAGAIN) {
- __skb_pull(skb, skb->mac_len);
- *another = true;
- break;
- }
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_CONSUMED:
- *ret = NET_RX_SUCCESS;
- return NULL;
- default:
- break;
- }
-#endif /* CONFIG_NET_CLS_ACT */
- return skb;
-}
-
/**
* netdev_is_rx_handler_busy - check if receive handler is registered
* @dev: device to check
@@ -6316,12 +6384,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
* softirq mode will happen in the next round of napi_schedule().
* This should not cause hiccups/stalls to the live traffic.
*/
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
- if (threaded)
- set_bit(NAPI_STATE_THREADED, &napi->state);
- else
- clear_bit(NAPI_STATE_THREADED, &napi->state);
- }
+ list_for_each_entry(napi, &dev->napi_list, dev_list)
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
return err;
}
@@ -9413,6 +9477,7 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct net *net = current->nsproxy->net_ns;
struct bpf_link_primer link_primer;
+ struct netlink_ext_ack extack = {};
struct bpf_xdp_link *link;
struct net_device *dev;
int err, fd;
@@ -9440,12 +9505,13 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
goto unlock;
}
- err = dev_xdp_attach_link(dev, NULL, link);
+ err = dev_xdp_attach_link(dev, &extack, link);
rtnl_unlock();
if (err) {
link->dev = NULL;
bpf_link_cleanup(&link_primer);
+ trace_bpf_xdp_link_attach_failed(extack._msg);
goto out_put_dev;
}
@@ -9509,23 +9575,40 @@ err_out:
}
/**
- * dev_new_index - allocate an ifindex
- * @net: the applicable net namespace
+ * dev_index_reserve() - allocate an ifindex in a namespace
+ * @net: the applicable net namespace
+ * @ifindex: requested ifindex, pass %0 to get one allocated
+ *
+ * Allocate a ifindex for a new device. Caller must either use the ifindex
+ * to store the device (via list_netdevice()) or call dev_index_release()
+ * to give the index up.
*
- * Returns a suitable unique value for a new device interface
- * number. The caller must hold the rtnl semaphore or the
- * dev_base_lock to be sure it remains unique.
+ * Return: a suitable unique value for a new device interface number or -errno.
*/
-static int dev_new_index(struct net *net)
+static int dev_index_reserve(struct net *net, u32 ifindex)
{
- int ifindex = net->ifindex;
+ int err;
- for (;;) {
- if (++ifindex <= 0)
- ifindex = 1;
- if (!__dev_get_by_index(net, ifindex))
- return net->ifindex = ifindex;
+ if (ifindex > INT_MAX) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return -EINVAL;
}
+
+ if (!ifindex)
+ err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
+ xa_limit_31b, &net->ifindex, GFP_KERNEL);
+ else
+ err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
+ if (err < 0)
+ return err;
+
+ return ifindex;
+}
+
+static void dev_index_release(struct net *net, int ifindex)
+{
+ /* Expect only unused indexes, unlist_netdevice() removes the used */
+ WARN_ON(xa_erase(&net->dev_by_index, ifindex));
}
/* Delayed registration/unregisteration */
@@ -9995,11 +10078,10 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
}
- ret = -EBUSY;
- if (!dev->ifindex)
- dev->ifindex = dev_new_index(net);
- else if (__dev_get_by_index(net, dev->ifindex))
+ ret = dev_index_reserve(net, dev->ifindex);
+ if (ret < 0)
goto err_uninit;
+ dev->ifindex = ret;
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
@@ -10046,7 +10128,7 @@ int register_netdevice(struct net_device *dev)
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
- goto err_uninit;
+ goto err_ifindex_release;
ret = netdev_register_kobject(dev);
write_lock(&dev_base_lock);
@@ -10102,6 +10184,8 @@ out:
err_uninit_notify:
call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
+err_ifindex_release:
+ dev_index_release(net, dev->ifindex);
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -10617,6 +10701,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
+ dev->xdp_zc_max_segs = 1;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
@@ -10838,7 +10923,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
/* Shutdown queueing discipline. */
dev_shutdown(dev);
-
+ dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
@@ -10978,9 +11063,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
}
/* Check that new_ifindex isn't used yet. */
- err = -EBUSY;
- if (new_ifindex && __dev_get_by_index(net, new_ifindex))
- goto out;
+ if (new_ifindex) {
+ err = dev_index_reserve(net, new_ifindex);
+ if (err < 0)
+ goto out;
+ } else {
+ /* If there is an ifindex conflict assign a new one */
+ err = dev_index_reserve(net, dev->ifindex);
+ if (err == -EBUSY)
+ err = dev_index_reserve(net, 0);
+ if (err < 0)
+ goto out;
+ new_ifindex = err;
+ }
/*
* And now a mini version of register_netdevice unregister_netdevice.
@@ -11008,13 +11103,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
rcu_barrier();
new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
- /* If there is an ifindex conflict assign a new one */
- if (!new_ifindex) {
- if (__dev_get_by_index(net, dev->ifindex))
- new_ifindex = dev_new_index(net);
- else
- new_ifindex = dev->ifindex;
- }
rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
new_ifindex);
@@ -11192,6 +11280,8 @@ static int __net_init netdev_init(struct net *net)
if (net->dev_index_head == NULL)
goto err_idx;
+ xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC1);
+
RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
return 0;
@@ -11289,6 +11379,7 @@ static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ xa_destroy(&net->dev_by_index);
if (net != &init_net)
WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 3730945ee294..b46aedc36939 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -5,6 +5,7 @@
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
+#include <linux/phylib_stubs.h>
#include <linux/wireless.h>
#include <linux/if_bridge.h>
#include <net/dsa_stubs.h>
@@ -252,14 +253,121 @@ static int dev_eth_ioctl(struct net_device *dev,
return ops->ndo_eth_ioctl(dev, ifr, cmd);
}
+/**
+ * dev_get_hwtstamp_phylib() - Get hardware timestamping settings of NIC
+ * or of attached phylib PHY
+ * @dev: Network device
+ * @cfg: Timestamping configuration structure
+ *
+ * Helper for enforcing a common policy that phylib timestamping, if available,
+ * should take precedence in front of hardware timestamping provided by the
+ * netdev.
+ *
+ * Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and
+ * there only exists a phydev->mii_ts->hwtstamp() method. So this will return
+ * -EOPNOTSUPP for phylib for now, which is still more accurate than letting
+ * the netdev handle the GET request.
+ */
+static int dev_get_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg)
+{
+ if (phy_has_hwtstamp(dev->phydev))
+ return phy_hwtstamp_get(dev->phydev, cfg);
+
+ return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg);
+}
+
static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
- return dev_eth_ioctl(dev, ifr, SIOCGHWTSTAMP);
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct kernel_hwtstamp_config kernel_cfg = {};
+ struct hwtstamp_config cfg;
+ int err;
+
+ if (!ops->ndo_hwtstamp_get)
+ return dev_eth_ioctl(dev, ifr, SIOCGHWTSTAMP); /* legacy */
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ kernel_cfg.ifr = ifr;
+ err = dev_get_hwtstamp_phylib(dev, &kernel_cfg);
+ if (err)
+ return err;
+
+ /* If the request was resolved through an unconverted driver, omit
+ * the copy_to_user(), since the implementation has already done that
+ */
+ if (!kernel_cfg.copied_to_user) {
+ hwtstamp_config_from_kernel(&cfg, &kernel_cfg);
+
+ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * dev_set_hwtstamp_phylib() - Change hardware timestamping of NIC
+ * or of attached phylib PHY
+ * @dev: Network device
+ * @cfg: Timestamping configuration structure
+ * @extack: Netlink extended ack message structure, for error reporting
+ *
+ * Helper for enforcing a common policy that phylib timestamping, if available,
+ * should take precedence in front of hardware timestamping provided by the
+ * netdev. If the netdev driver needs to perform specific actions even for PHY
+ * timestamping to work properly (a switch port must trap the timestamped
+ * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
+ * dev->priv_flags.
+ */
+static int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ bool phy_ts = phy_has_hwtstamp(dev->phydev);
+ struct kernel_hwtstamp_config old_cfg = {};
+ bool changed = false;
+ int err;
+
+ cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV;
+
+ if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ err = ops->ndo_hwtstamp_get(dev, &old_cfg);
+ if (err)
+ return err;
+ }
+
+ if (!phy_ts || (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ err = ops->ndo_hwtstamp_set(dev, cfg, extack);
+ if (err) {
+ if (extack->_msg)
+ netdev_err(dev, "%s\n", extack->_msg);
+ return err;
+ }
+ }
+
+ if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS))
+ changed = kernel_hwtstamp_config_changed(&old_cfg, cfg);
+
+ if (phy_ts) {
+ err = phy_hwtstamp_set(dev->phydev, cfg, extack);
+ if (err) {
+ if (changed)
+ ops->ndo_hwtstamp_set(dev, &old_cfg, NULL);
+ return err;
+ }
+ }
+
+ return 0;
}
static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
- struct kernel_hwtstamp_config kernel_cfg;
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct kernel_hwtstamp_config kernel_cfg = {};
struct netlink_ext_ack extack = {};
struct hwtstamp_config cfg;
int err;
@@ -268,6 +376,7 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
return -EFAULT;
hwtstamp_config_to_kernel(&kernel_cfg, &cfg);
+ kernel_cfg.ifr = ifr;
err = net_hwtstamp_validate(&kernel_cfg);
if (err)
@@ -280,8 +389,80 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
return err;
}
- return dev_eth_ioctl(dev, ifr, SIOCSHWTSTAMP);
+ if (!ops->ndo_hwtstamp_set)
+ return dev_eth_ioctl(dev, ifr, SIOCSHWTSTAMP); /* legacy */
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ err = dev_set_hwtstamp_phylib(dev, &kernel_cfg, &extack);
+ if (err)
+ return err;
+
+ /* The driver may have modified the configuration, so copy the
+ * updated version of it back to user space
+ */
+ if (!kernel_cfg.copied_to_user) {
+ hwtstamp_config_from_kernel(&cfg, &kernel_cfg);
+
+ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int generic_hwtstamp_ioctl_lower(struct net_device *dev, int cmd,
+ struct kernel_hwtstamp_config *kernel_cfg)
+{
+ struct ifreq ifrr;
+ int err;
+
+ strscpy_pad(ifrr.ifr_name, dev->name, IFNAMSIZ);
+ ifrr.ifr_ifru = kernel_cfg->ifr->ifr_ifru;
+
+ err = dev_eth_ioctl(dev, &ifrr, cmd);
+ if (err)
+ return err;
+
+ kernel_cfg->ifr->ifr_ifru = ifrr.ifr_ifru;
+ kernel_cfg->copied_to_user = true;
+
+ return 0;
+}
+
+int generic_hwtstamp_get_lower(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_cfg)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ if (ops->ndo_hwtstamp_get)
+ return dev_get_hwtstamp_phylib(dev, kernel_cfg);
+
+ /* Legacy path: unconverted lower driver */
+ return generic_hwtstamp_ioctl_lower(dev, SIOCGHWTSTAMP, kernel_cfg);
+}
+EXPORT_SYMBOL(generic_hwtstamp_get_lower);
+
+int generic_hwtstamp_set_lower(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_cfg,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ if (ops->ndo_hwtstamp_set)
+ return dev_set_hwtstamp_phylib(dev, kernel_cfg, extack);
+
+ /* Legacy path: unconverted lower driver */
+ return generic_hwtstamp_ioctl_lower(dev, SIOCSHWTSTAMP, kernel_cfg);
}
+EXPORT_SYMBOL(generic_hwtstamp_set_lower);
static int dev_siocbond(struct net_device *dev,
struct ifreq *ifr, unsigned int cmd)
diff --git a/net/core/dst.c b/net/core/dst.c
index 79d9306ad1ee..980e2fd2f013 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -152,7 +152,7 @@ void dst_dev_put(struct dst_entry *dst)
dst->obsolete = DST_OBSOLETE_DEAD;
if (dst->ops->ifdown)
- dst->ops->ifdown(dst, dev, true);
+ dst->ops->ifdown(dst, dev);
dst->input = dst_discard;
dst->output = dst_discard_out;
dst->dev = blackhole_netdev;
diff --git a/net/core/filter.c b/net/core/filter.c
index 28a59596987a..a094694899c9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4339,13 +4339,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
- if (map_type == BPF_MAP_TYPE_XSKMAP) {
- /* XDP_REDIRECT is not supported AF_XDP yet. */
- if (unlikely(xdp_buff_has_frags(xdp)))
- return -EOPNOTSUPP;
-
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
- }
return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
xdp_prog);
@@ -7350,8 +7345,8 @@ BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
return -EOPNOTSUPP;
if (unlikely(dev_net(skb->dev) != sock_net(sk)))
return -ENETUNREACH;
- if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
- return -ESOCKTNOSUPPORT;
+ if (sk_unhashed(sk))
+ return -EOPNOTSUPP;
if (sk_is_refcounted(sk) &&
unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
return -ENOENT;
@@ -9306,7 +9301,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
__u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg;
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, read skb->tstamp as is if tstamp_type_access is true.
@@ -9340,7 +9335,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
__u8 value_reg = si->src_reg;
__u8 skb_reg = si->dst_reg;
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, write skb->tstamp as is if tstamp_type_access is true.
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 85a2d0d9bd39..89d15ceaf9af 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -40,7 +40,7 @@
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
{
- flow_dissector->used_keys |= (1 << key_id);
+ flow_dissector->used_keys |= (1ULL << key_id);
}
void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
@@ -205,6 +205,50 @@ static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
}
+static void __skb_flow_dissect_ah(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, const void *data,
+ int nhoff, int hlen)
+{
+ struct flow_dissector_key_ipsec *key_ah;
+ struct ip_auth_hdr _hdr, *hdr;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC))
+ return;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ if (!hdr)
+ return;
+
+ key_ah = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPSEC,
+ target_container);
+
+ key_ah->spi = hdr->spi;
+}
+
+static void __skb_flow_dissect_esp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, const void *data,
+ int nhoff, int hlen)
+{
+ struct flow_dissector_key_ipsec *key_esp;
+ struct ip_esp_hdr _hdr, *hdr;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC))
+ return;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ if (!hdr)
+ return;
+
+ key_esp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPSEC,
+ target_container);
+
+ key_esp->spi = hdr->spi;
+}
+
static void __skb_flow_dissect_l2tpv3(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, const void *data,
@@ -1571,7 +1615,14 @@ ip_proto_again:
__skb_flow_dissect_l2tpv3(skb, flow_dissector, target_container,
data, nhoff, hlen);
break;
-
+ case IPPROTO_ESP:
+ __skb_flow_dissect_esp(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
+ case IPPROTO_AH:
+ __skb_flow_dissect_ah(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
default:
break;
}
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index acfc1f88ea79..bc5169482710 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -146,6 +146,13 @@ void flow_rule_match_tcp(const struct flow_rule *rule,
}
EXPORT_SYMBOL(flow_rule_match_tcp);
+void flow_rule_match_ipsec(const struct flow_rule *rule,
+ struct flow_match_ipsec *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPSEC, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ipsec);
+
void flow_rule_match_icmp(const struct flow_rule *rule,
struct flow_match_icmp *out)
{
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 15e3f4606b5f..fccaa5bac0ed 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -23,6 +23,7 @@
#include <linux/of.h>
#include <linux/of_net.h>
#include <linux/cpu.h>
+#include <net/netdev_rx_queue.h>
#include "dev.h"
#include "net-sysfs.h"
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index a4270fafdf11..c1aea8b756b6 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -10,11 +10,11 @@
static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
- u32 portid, u32 seq, int flags, u32 cmd)
+ const struct genl_info *info)
{
void *hdr;
- hdr = genlmsg_put(rsp, portid, seq, &netdev_nl_family, flags, cmd);
+ hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
@@ -25,6 +25,14 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
return -EINVAL;
}
+ if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+ if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
+ netdev->xdp_zc_max_segs)) {
+ genlmsg_cancel(rsp, hdr);
+ return -EINVAL;
+ }
+ }
+
genlmsg_end(rsp, hdr);
return 0;
@@ -33,17 +41,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
static void
netdev_genl_dev_notify(struct net_device *netdev, int cmd)
{
+ struct genl_info info;
struct sk_buff *ntf;
if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
NETDEV_NLGRP_MGMT))
return;
+ genl_info_init_ntf(&info, &netdev_nl_family, cmd);
+
ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!ntf)
return;
- if (netdev_nl_dev_fill(netdev, ntf, 0, 0, 0, cmd)) {
+ if (netdev_nl_dev_fill(netdev, ntf, &info)) {
nlmsg_free(ntf);
return;
}
@@ -72,8 +83,7 @@ int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
netdev = __dev_get_by_index(genl_info_net(info), ifindex);
if (netdev)
- err = netdev_nl_dev_fill(netdev, rsp, info->snd_portid,
- info->snd_seq, 0, info->genlhdr->cmd);
+ err = netdev_nl_dev_fill(netdev, rsp, info);
else
err = -ENODEV;
@@ -93,43 +103,19 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
- int idx = 0, s_idx;
- int h, s_h;
- int err;
-
- s_h = cb->args[0];
- s_idx = cb->args[1];
+ int err = 0;
rtnl_lock();
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
-
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry(netdev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- err = netdev_nl_dev_fill(netdev, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- NETDEV_CMD_DEV_GET);
- if (err < 0)
- break;
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, netdev, cb->args[0]) {
+ err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
+ if (err < 0)
+ break;
}
-
rtnl_unlock();
if (err != -EMSGSIZE)
return err;
- cb->args[1] = idx;
- cb->args[0] = h;
- cb->seq = net->dev_base_seq;
-
return skb->len;
}
diff --git a/net/core/of_net.c b/net/core/of_net.c
index 55d3fe229269..93ea425b9248 100644
--- a/net/core/of_net.c
+++ b/net/core/of_net.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/of_net.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/phy.h>
#include <linux/export.h>
#include <linux/device.h>
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a3e12a61d456..77cb75e63aca 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -10,7 +10,7 @@
#include <linux/slab.h>
#include <linux/device.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include <linux/dma-direction.h>
@@ -58,6 +58,17 @@ static const char pp_stats[][ETH_GSTRING_LEN] = {
"rx_pp_recycle_released_ref",
};
+/**
+ * page_pool_get_stats() - fetch page pool stats
+ * @pool: pool from which page was allocated
+ * @stats: struct page_pool_stats to fill in
+ *
+ * Retrieve statistics about the page_pool. This API is only available
+ * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``.
+ * A pointer to a caller allocated struct page_pool_stats structure
+ * is passed to this API which is filled in. The caller can then report
+ * those stats to the user (perhaps via ethtool, debugfs, etc.).
+ */
bool page_pool_get_stats(struct page_pool *pool,
struct page_pool_stats *stats)
{
@@ -224,6 +235,10 @@ static int page_pool_init(struct page_pool *pool,
return 0;
}
+/**
+ * page_pool_create() - create a page pool.
+ * @params: parameters, see struct page_pool_params
+ */
struct page_pool *page_pool_create(const struct page_pool_params *params)
{
struct page_pool *pool;
@@ -492,7 +507,7 @@ static s32 page_pool_inflight(struct page_pool *pool)
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
-void page_pool_release_page(struct page_pool *pool, struct page *page)
+static void page_pool_return_page(struct page_pool *pool, struct page *page)
{
dma_addr_t dma;
int count;
@@ -518,13 +533,6 @@ skip_dma_unmap:
*/
count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, page, count);
-}
-EXPORT_SYMBOL(page_pool_release_page);
-
-/* Return a page to the page allocator, cleaning up our state */
-static void page_pool_return_page(struct page_pool *pool, struct page *page)
-{
- page_pool_release_page(pool, page);
put_page(page);
/* An optimization would be to call __free_pages(page, pool->p.order)
@@ -579,6 +587,8 @@ static __always_inline struct page *
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
+ lockdep_assert_no_hardirq();
+
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
@@ -616,9 +626,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* will be invoking put_page.
*/
recycle_stat_inc(pool, released_refcnt);
- /* Do not replace this with page_pool_return_page() */
- page_pool_release_page(pool, page);
- put_page(page);
+ page_pool_return_page(pool, page);
return NULL;
}
@@ -635,7 +643,21 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
}
EXPORT_SYMBOL(page_pool_put_defragged_page);
-/* Caller must not use data area after call, as this function overwrites it */
+/**
+ * page_pool_put_page_bulk() - release references on multiple pages
+ * @pool: pool from which pages were allocated
+ * @data: array holding page pointers
+ * @count: number of pages in @data
+ *
+ * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring
+ * producer lock. If the ptr_ring is full, page_pool_put_page_bulk()
+ * will release leftover pages to the page allocator.
+ * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx
+ * completion loop for the XDP_REDIRECT use case.
+ *
+ * Please note the caller must not use data area after running
+ * page_pool_put_page_bulk(), as this function overwrites it.
+ */
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
@@ -915,42 +937,3 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
-
-bool page_pool_return_skb_page(struct page *page, bool napi_safe)
-{
- struct napi_struct *napi;
- struct page_pool *pp;
- bool allow_direct;
-
- page = compound_head(page);
-
- /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
- * in order to preserve any existing bits, such as bit 0 for the
- * head page of compound page and bit 1 for pfmemalloc page, so
- * mask those bits for freeing side when doing below checking,
- * and page_is_pfmemalloc() is checked in __page_pool_put_page()
- * to avoid recycling the pfmemalloc page.
- */
- if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
- return false;
-
- pp = page->pp;
-
- /* Allow direct recycle if we have reasons to believe that we are
- * in the same context as the consumer would run, so there's
- * no possible race.
- */
- napi = READ_ONCE(pp->p.napi);
- allow_direct = napi_safe && napi &&
- READ_ONCE(napi->list_owner) == smp_processor_id();
-
- /* Driver set this to memory recycling info. Reset it on recycle.
- * This will *not* work for NIC using a split-page memory model.
- * The page will be returned to the pool here regardless of the
- * 'flipped' fragment being in use or not.
- */
- page_pool_put_full_page(pp, page, allow_direct);
-
- return true;
-}
-EXPORT_SYMBOL(page_pool_return_skb_page);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 00c94d9622b4..4a2ec33bfb51 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -61,7 +61,7 @@
#include "dev.h"
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 43
+#define RTNL_SLAVE_MAX_TYPE 44
struct rtnl_link {
rtnl_doit_func doit;
@@ -1273,7 +1273,6 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct net_device *dev,
int vfs_num,
- struct nlattr *vfinfo,
u32 ext_filter_mask)
{
struct ifla_vf_rss_query_en vf_rss_query_en;
@@ -1343,7 +1342,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
vf_trust.setting = ivi.trusted;
vf = nla_nest_start_noflag(skb, IFLA_VF_INFO);
if (!vf)
- goto nla_put_vfinfo_failure;
+ return -EMSGSIZE;
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
@@ -1414,8 +1413,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_put_vf_failure:
nla_nest_cancel(skb, vf);
-nla_put_vfinfo_failure:
- nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
}
@@ -1441,8 +1438,10 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
return -EMSGSIZE;
for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo, ext_filter_mask))
+ if (rtnl_fill_vfinfo(skb, dev, i, ext_filter_mask)) {
+ nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
+ }
}
nla_nest_end(skb, vfinfo);
diff --git a/net/core/scm.c b/net/core/scm.c
index 3cd7dd377e53..880027ecf516 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -130,6 +130,7 @@ EXPORT_SYMBOL(__scm_destroy);
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct cmsghdr *cmsg;
int err;
@@ -153,7 +154,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
switch (cmsg->cmsg_type)
{
case SCM_RIGHTS:
- if (!sock->ops || sock->ops->family != PF_UNIX)
+ if (!ops || ops->family != PF_UNIX)
goto error;
err=scm_fp_copy(cmsg, &p->fp);
if (err<0)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a298992060e6..faa6c86da2a5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -73,7 +73,7 @@
#include <net/mpls.h>
#include <net/mptcp.h>
#include <net/mctp.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <net/dropreason.h>
#include <linux/uaccess.h>
@@ -879,11 +879,56 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+bool napi_pp_put_page(struct page *page, bool napi_safe)
+{
+ bool allow_direct = false;
+ struct page_pool *pp;
+
+ page = compound_head(page);
+
+ /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
+ * in order to preserve any existing bits, such as bit 0 for the
+ * head page of compound page and bit 1 for pfmemalloc page, so
+ * mask those bits for freeing side when doing below checking,
+ * and page_is_pfmemalloc() is checked in __page_pool_put_page()
+ * to avoid recycling the pfmemalloc page.
+ */
+ if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
+ return false;
+
+ pp = page->pp;
+
+ /* Allow direct recycle if we have reasons to believe that we are
+ * in the same context as the consumer would run, so there's
+ * no possible race.
+ * __page_pool_put_page() makes sure we're not in hardirq context
+ * and interrupts are enabled prior to accessing the cache.
+ */
+ if (napi_safe || in_softirq()) {
+ const struct napi_struct *napi = READ_ONCE(pp->p.napi);
+
+ allow_direct = napi &&
+ READ_ONCE(napi->list_owner) == smp_processor_id();
+ }
+
+ /* Driver set this to memory recycling info. Reset it on recycle.
+ * This will *not* work for NIC using a split-page memory model.
+ * The page will be returned to the pool here regardless of the
+ * 'flipped' fragment being in use or not.
+ */
+ page_pool_put_full_page(pp, page, allow_direct);
+
+ return true;
+}
+EXPORT_SYMBOL(napi_pp_put_page);
+#endif
+
static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
{
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
return false;
- return page_pool_return_skb_page(virt_to_page(data), napi_safe);
+ return napi_pp_put_page(virt_to_page(data), napi_safe);
}
static void skb_kfree_head(void *head, unsigned int end_offset)
@@ -3656,20 +3701,23 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
EXPORT_SYMBOL(skb_dequeue_tail);
/**
- * skb_queue_purge - empty a list
+ * skb_queue_purge_reason - empty a list
* @list: list to empty
+ * @reason: drop reason
*
* Delete all buffers on an &sk_buff list. Each buffer is removed from
* the list and one reference dropped. This function takes the list
* lock and is atomic with respect to other list locking functions.
*/
-void skb_queue_purge(struct sk_buff_head *list)
+void skb_queue_purge_reason(struct sk_buff_head *list,
+ enum skb_drop_reason reason)
{
struct sk_buff *skb;
+
while ((skb = skb_dequeue(list)) != NULL)
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
-EXPORT_SYMBOL(skb_queue_purge);
+EXPORT_SYMBOL(skb_queue_purge_reason);
/**
* skb_rbtree_purge - empty a skb rbtree
@@ -3697,6 +3745,27 @@ unsigned int skb_rbtree_purge(struct rb_root *root)
return sum;
}
+void skb_errqueue_purge(struct sk_buff_head *list)
+{
+ struct sk_buff *skb, *next;
+ struct sk_buff_head kill;
+ unsigned long flags;
+
+ __skb_queue_head_init(&kill);
+
+ spin_lock_irqsave(&list->lock, flags);
+ skb_queue_walk_safe(list, skb, next) {
+ if (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ZEROCOPY ||
+ SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING)
+ continue;
+ __skb_unlink(skb, list);
+ __skb_queue_tail(&kill, skb);
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+ __skb_queue_purge(&kill);
+}
+EXPORT_SYMBOL(skb_errqueue_purge);
+
/**
* skb_queue_head - queue a buffer at the list head
* @list: list to use
@@ -4750,12 +4819,23 @@ static void skb_extensions_init(void)
static void skb_extensions_init(void) {}
#endif
+/* The SKB kmem_cache slab is critical for network performance. Never
+ * merge/alias the slab with similar sized objects. This avoids fragmentation
+ * that hurts performance of kmem_cache_{alloc,free}_bulk APIs.
+ */
+#ifndef CONFIG_SLUB_TINY
+#define FLAG_SKB_NO_MERGE SLAB_NO_MERGE
+#else /* CONFIG_SLUB_TINY - simple loop in kmem_cache_alloc_bulk */
+#define FLAG_SKB_NO_MERGE 0
+#endif
+
void __init skb_init(void)
{
skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|
+ FLAG_SKB_NO_MERGE,
offsetof(struct sk_buff, cb),
sizeof_field(struct sk_buff, cb),
NULL);
@@ -6204,7 +6284,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
*
* @header_len: size of linear part
* @data_len: needed length in frags
- * @max_page_order: max page order desired.
+ * @order: max page order desired.
* @errcode: pointer to error code if any
* @gfp_mask: allocation mask
*
@@ -6212,21 +6292,17 @@ EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
*/
struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
unsigned long data_len,
- int max_page_order,
+ int order,
int *errcode,
gfp_t gfp_mask)
{
- int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
unsigned long chunk;
struct sk_buff *skb;
struct page *page;
- int i;
+ int nr_frags = 0;
*errcode = -EMSGSIZE;
- /* Note this test could be relaxed, if we succeed to allocate
- * high order pages...
- */
- if (npages > MAX_SKB_FRAGS)
+ if (unlikely(data_len > MAX_SKB_FRAGS * (PAGE_SIZE << order)))
return NULL;
*errcode = -ENOBUFS;
@@ -6234,34 +6310,32 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
if (!skb)
return NULL;
- skb->truesize += npages << PAGE_SHIFT;
-
- for (i = 0; npages > 0; i++) {
- int order = max_page_order;
-
- while (order) {
- if (npages >= 1 << order) {
- page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
- __GFP_COMP |
- __GFP_NOWARN,
- order);
- if (page)
- goto fill_page;
- /* Do not retry other high order allocations */
- order = 1;
- max_page_order = 0;
- }
+ while (data_len) {
+ if (nr_frags == MAX_SKB_FRAGS - 1)
+ goto failure;
+ while (order && PAGE_ALIGN(data_len) < (PAGE_SIZE << order))
order--;
+
+ if (order) {
+ page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
+ __GFP_COMP |
+ __GFP_NOWARN,
+ order);
+ if (!page) {
+ order--;
+ continue;
+ }
+ } else {
+ page = alloc_page(gfp_mask);
+ if (!page)
+ goto failure;
}
- page = alloc_page(gfp_mask);
- if (!page)
- goto failure;
-fill_page:
chunk = min_t(unsigned long, data_len,
PAGE_SIZE << order);
- skb_fill_page_desc(skb, i, page, 0, chunk);
+ skb_fill_page_desc(skb, nr_frags, page, 0, chunk);
+ nr_frags++;
+ skb->truesize += (PAGE_SIZE << order);
data_len -= chunk;
- npages -= 1 << order;
}
return skb;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index ef1a2eb6520b..a0659fc29bcc 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1204,13 +1204,17 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
+ const struct proto_ops *ops;
int copied;
trace_sk_data_ready(sk);
- if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
+ if (unlikely(!sock))
return;
- copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
+ ops = READ_ONCE(sock->ops);
+ if (!ops || !ops->read_skb)
+ return;
+ copied = ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
struct sk_psock *psock;
diff --git a/net/core/sock.c b/net/core/sock.c
index c9cffb7acbea..666a17cab4f5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -767,7 +767,7 @@ bool sk_mc_loop(struct sock *sk)
return true;
switch (sk->sk_family) {
case AF_INET:
- return inet_sk(sk)->mc_loop;
+ return inet_test_bit(MC_LOOP, sk);
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return inet6_sk(sk)->mc_loop;
@@ -797,7 +797,7 @@ EXPORT_SYMBOL(sock_set_reuseport);
void sock_no_linger(struct sock *sk)
{
lock_sock(sk);
- sk->sk_lingertime = 0;
+ WRITE_ONCE(sk->sk_lingertime, 0);
sock_set_flag(sk, SOCK_LINGER);
release_sock(sk);
}
@@ -1230,15 +1230,15 @@ set_sndbuf:
ret = -EFAULT;
break;
}
- if (!ling.l_onoff)
+ if (!ling.l_onoff) {
sock_reset_flag(sk, SOCK_LINGER);
- else {
-#if (BITS_PER_LONG == 32)
- if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
- sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+ } else {
+ unsigned long t_sec = ling.l_linger;
+
+ if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
+ WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
else
-#endif
- sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
+ WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
sock_set_flag(sk, SOCK_LINGER);
}
break;
@@ -1247,17 +1247,11 @@ set_sndbuf:
break;
case SO_PASSCRED:
- if (valbool)
- set_bit(SOCK_PASSCRED, &sock->flags);
- else
- clear_bit(SOCK_PASSCRED, &sock->flags);
+ assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
break;
case SO_PASSPIDFD:
- if (valbool)
- set_bit(SOCK_PASSPIDFD, &sock->flags);
- else
- clear_bit(SOCK_PASSPIDFD, &sock->flags);
+ assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
break;
case SO_TIMESTAMP_OLD:
@@ -1283,14 +1277,19 @@ set_sndbuf:
break;
case SO_RCVLOWAT:
+ {
+ int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
+
if (val < 0)
val = INT_MAX;
- if (sock && sock->ops->set_rcvlowat)
- ret = sock->ops->set_rcvlowat(sk, val);
+ if (sock)
+ set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
+ if (set_rcvlowat)
+ ret = set_rcvlowat(sk, val);
else
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
-
+ }
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
@@ -1361,10 +1360,7 @@ set_sndbuf:
break;
case SO_PASSSEC:
- if (valbool)
- set_bit(SOCK_PASSSEC, &sock->flags);
- else
- clear_bit(SOCK_PASSSEC, &sock->flags);
+ assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
break;
case SO_MARK:
if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
@@ -1388,11 +1384,16 @@ set_sndbuf:
break;
case SO_PEEK_OFF:
- if (sock->ops->set_peek_off)
- ret = sock->ops->set_peek_off(sk, val);
+ {
+ int (*set_peek_off)(struct sock *sk, int val);
+
+ set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
+ if (set_peek_off)
+ ret = set_peek_off(sk, val);
else
ret = -EOPNOTSUPP;
break;
+ }
case SO_NOFCS:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
@@ -1691,7 +1692,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_LINGER:
lv = sizeof(v.ling);
v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
- v.ling.l_linger = sk->sk_lingertime / HZ;
+ v.ling.l_linger = READ_ONCE(sk->sk_lingertime) / HZ;
break;
case SO_BSDCOMPAT:
@@ -1823,14 +1824,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_PEERNAME:
{
- char address[128];
+ struct sockaddr_storage address;
- lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+ lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
- if (copy_to_sockptr(optval, address, len))
+ if (copy_to_sockptr(optval, &address, len))
return -EFAULT;
goto lenout;
}
@@ -1867,7 +1868,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_PEEK_OFF:
- if (!sock->ops->set_peek_off)
+ if (!READ_ONCE(sock->ops)->set_peek_off)
return -EOPNOTSUPP;
v.val = READ_ONCE(sk->sk_peek_off);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 8362130bf085..a70670fe9a2d 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -14,7 +14,7 @@
#include <linux/idr.h>
#include <linux/rhashtable.h>
#include <linux/bug.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index d76c9be5bfca..57d9c026aa3f 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -105,7 +105,6 @@ extern int sysctl_dccp_rx_ccid;
extern int sysctl_dccp_tx_ccid;
int dccp_feat_init(struct sock *sk);
-void dccp_feat_initialise_sysctls(void);
int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
u8 const *list, u8 len);
int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a545ad71201c..1591b061105a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -247,7 +247,6 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
const u8 offset = iph->ihl << 2;
const struct dccp_hdr *dh;
struct dccp_sock *dp;
- struct inet_sock *inet;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct sock *sk;
@@ -361,8 +360,7 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
* --ANK (980905)
*/
- inet = inet_sk(sk);
- if (!sock_owned_by_user(sk) && inet->recverr) {
+ if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else { /* Only an error on timeout */
@@ -474,7 +472,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
.flowi4_oif = inet_iif(skb),
.daddr = iph->saddr,
.saddr = iph->daddr,
- .flowi4_tos = RT_CONN_FLAGS(sk),
+ .flowi4_tos = ip_sock_rt_tos(sk),
+ .flowi4_scope = ip_sock_rt_scope(sk),
.flowi4_proto = sk->sk_protocol,
.fl4_sport = dccp_hdr(skb)->dccph_dport,
.fl4_dport = dccp_hdr(skb)->dccph_sport,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d29d1163203d..686090bc5945 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1056,6 +1056,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 7e4c2a3b322b..c5d14c48def1 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -13,10 +13,6 @@
struct dccp6_sock {
struct dccp_sock dccp;
- /*
- * ipv6_pinfo has to be the last member of dccp6_sock,
- * see inet6_sk_generic.
- */
struct ipv6_pinfo inet6;
};
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index ef91a76646a3..a087af581847 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y := leftover.o core.o netlink.o dev.o health.o
+obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o health.o
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index bf1d6f1bcfc7..abf3393a7a17 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -196,7 +196,7 @@ void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -217,17 +217,18 @@ int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
}
static int
-devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, flags);
}
-const struct devlink_cmd devl_cmd_get = {
- .dump_one = devlink_nl_cmd_get_dump_one,
-};
+int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one);
+}
static void devlink_reload_failed_set(struct devlink *devlink,
bool reload_failed)
@@ -804,7 +805,7 @@ err_cancel_msg:
return err;
}
-int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -826,23 +827,24 @@ int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
}
static int
-devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
int err;
err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh->nlmsg_seq, flags,
cb->extack);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
-const struct devlink_cmd devl_cmd_info_get = {
- .dump_one = devlink_nl_cmd_info_get_dump_one,
-};
+int devlink_nl_info_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(msg, cb, devlink_nl_info_get_dump_one);
+}
static int devlink_nl_flash_update_fill(struct sk_buff *msg,
struct devlink *devlink,
@@ -1204,8 +1206,7 @@ err_cancel_msg:
return err;
}
-int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -1228,23 +1229,25 @@ int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int
-devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_selftests_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
if (!devlink->ops->selftest_check)
return 0;
return devlink_nl_selftests_fill(msg, devlink,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh->nlmsg_seq, flags,
cb->extack);
}
-const struct devlink_cmd devl_cmd_selftests_get = {
- .dump_one = devlink_nl_cmd_selftests_get_dump_one,
-};
+int devlink_nl_selftests_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_selftests_get_dump_one);
+}
static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
enum devlink_selftest_status test_status)
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 62921b2eb0d3..eb1d5066c73f 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -12,6 +12,8 @@
#include <net/devlink.h>
#include <net/net_namespace.h>
+#include "netlink_gen.h"
+
#define DEVLINK_REGISTERED XA_MARK_1
#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
@@ -90,9 +92,6 @@ static inline bool devl_is_registered(struct devlink *devlink)
/* Netlink */
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
-#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
-#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
-#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
@@ -114,12 +113,12 @@ struct devlink_nl_dump_state {
};
};
-struct devlink_cmd {
- int (*dump_one)(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb);
-};
+typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags);
-extern const struct genl_small_ops devlink_nl_ops[56];
+extern const struct genl_small_ops devlink_nl_small_ops[40];
struct devlink *
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
@@ -127,8 +126,8 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
void devlink_notify_unregister(struct devlink *devlink);
void devlink_notify_register(struct devlink *devlink);
-int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb);
+int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
+ devlink_nl_dump_one_func_t *dump_one);
static inline struct devlink_nl_dump_state *
devlink_dump_state(struct netlink_callback *cb)
@@ -148,24 +147,6 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
-/* Commands */
-extern const struct devlink_cmd devl_cmd_get;
-extern const struct devlink_cmd devl_cmd_port_get;
-extern const struct devlink_cmd devl_cmd_sb_get;
-extern const struct devlink_cmd devl_cmd_sb_pool_get;
-extern const struct devlink_cmd devl_cmd_sb_port_pool_get;
-extern const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get;
-extern const struct devlink_cmd devl_cmd_param_get;
-extern const struct devlink_cmd devl_cmd_region_get;
-extern const struct devlink_cmd devl_cmd_info_get;
-extern const struct devlink_cmd devl_cmd_health_reporter_get;
-extern const struct devlink_cmd devl_cmd_trap_get;
-extern const struct devlink_cmd devl_cmd_trap_group_get;
-extern const struct devlink_cmd devl_cmd_trap_policer_get;
-extern const struct devlink_cmd devl_cmd_rate_get;
-extern const struct devlink_cmd devl_cmd_linecard_get;
-extern const struct devlink_cmd devl_cmd_selftests_get;
-
/* Notify */
void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
@@ -199,31 +180,16 @@ int devlink_resources_validate(struct devlink *devlink,
struct devlink_resource *resource,
struct genl_info *info);
-/* Line cards */
-struct devlink_linecard;
-
-struct devlink_linecard *
-devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info);
-
/* Rates */
int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack);
-struct devlink_rate *
-devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info);
-struct devlink_rate *
-devlink_rate_node_get_from_info(struct devlink *devlink,
- struct genl_info *info);
+
/* Devlink nl cmds */
-int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
- struct genl_info *info);
int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
diff --git a/net/devlink/health.c b/net/devlink/health.c
index 194340a8bb86..638cad8d5c65 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -356,8 +356,8 @@ devlink_health_reporter_get_from_info(struct devlink *devlink,
return devlink_health_reporter_get_from_attrs(devlink, info->attrs);
}
-int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
@@ -384,18 +384,29 @@ int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int
-devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_health_reporter_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ const struct genl_info *info = genl_info_dump(cb);
struct devlink_health_reporter *reporter;
+ unsigned long port_index_end = ULONG_MAX;
+ struct nlattr **attrs = info->attrs;
+ unsigned long port_index_start = 0;
struct devlink_port *port;
unsigned long port_index;
int idx = 0;
int err;
+ if (attrs && attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ port_index_start = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+ port_index_end = port_index_start;
+ flags |= NLM_F_DUMP_FILTERED;
+ goto per_port_dump;
+ }
+
list_for_each_entry(reporter, &devlink->reporter_list, list) {
if (idx < state->idx) {
idx++;
@@ -405,14 +416,16 @@ devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
DEVLINK_CMD_HEALTH_REPORTER_GET,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ flags);
if (err) {
state->idx = idx;
return err;
}
idx++;
}
- xa_for_each(&devlink->ports, port_index, port) {
+per_port_dump:
+ xa_for_each_range(&devlink->ports, port_index, port,
+ port_index_start, port_index_end) {
list_for_each_entry(reporter, &port->reporter_list, list) {
if (idx < state->idx) {
idx++;
@@ -422,7 +435,7 @@ devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
DEVLINK_CMD_HEALTH_REPORTER_GET,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ flags);
if (err) {
state->idx = idx;
return err;
@@ -434,9 +447,12 @@ devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
return 0;
}
-const struct devlink_cmd devl_cmd_health_reporter_get = {
- .dump_one = devlink_nl_cmd_health_reporter_get_dump_one,
-};
+int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb,
+ devlink_nl_health_reporter_get_dump_one);
+}
int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
struct genl_info *info)
@@ -1248,7 +1264,7 @@ out:
static struct devlink_health_reporter *
devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ const struct genl_info *info = genl_info_dump(cb);
struct devlink_health_reporter *reporter;
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index bfed7929a904..e2cd13958cc2 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -232,13 +232,13 @@ devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
return devlink_rate_node_get_by_name(devlink, rate_node_name);
}
-struct devlink_rate *
+static struct devlink_rate *
devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
{
return devlink_rate_node_get_from_attrs(devlink, info->attrs);
}
-struct devlink_rate *
+static struct devlink_rate *
devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
{
struct nlattr **attrs = info->attrs;
@@ -285,7 +285,7 @@ devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
return ERR_PTR(-EINVAL);
}
-struct devlink_linecard *
+static struct devlink_linecard *
devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
{
return devlink_linecard_get_from_attrs(devlink, info->attrs);
@@ -1005,8 +1005,8 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
}
static int
-devlink_nl_cmd_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_rate *devlink_rate;
@@ -1022,8 +1022,7 @@ devlink_nl_cmd_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
continue;
}
err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, NULL);
+ cb->nlh->nlmsg_seq, flags, NULL);
if (err) {
state->idx = idx;
break;
@@ -1034,17 +1033,22 @@ devlink_nl_cmd_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
return err;
}
-const struct devlink_cmd devl_cmd_rate_get = {
- .dump_one = devlink_nl_cmd_rate_get_dump_one,
-};
+int devlink_nl_rate_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_rate_get_dump_one);
+}
-static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct devlink_rate *devlink_rate = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *devlink_rate;
struct sk_buff *msg;
int err;
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate))
+ return PTR_ERR(devlink_rate);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -1072,8 +1076,7 @@ devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
return false;
}
-static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
struct sk_buff *msg;
@@ -1095,8 +1098,8 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
}
static int
-devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_port *devlink_port;
@@ -1107,8 +1110,8 @@ devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
err = devlink_nl_port_fill(msg, devlink_port,
DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, cb->extack);
+ cb->nlh->nlmsg_seq, flags,
+ cb->extack);
if (err) {
state->idx = port_index;
break;
@@ -1118,9 +1121,10 @@ devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
return err;
}
-const struct devlink_cmd devl_cmd_port_get = {
- .dump_one = devlink_nl_cmd_port_get_dump_one,
-};
+int devlink_nl_port_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_port_get_dump_one);
+}
static int devlink_port_type_set(struct devlink_port *devlink_port,
enum devlink_port_type port_type)
@@ -1629,11 +1633,16 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_rate *devlink_rate = info->user_ptr[1];
- struct devlink *devlink = devlink_rate->devlink;
- const struct devlink_ops *ops = devlink->ops;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *devlink_rate;
+ const struct devlink_ops *ops;
int err;
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate))
+ return PTR_ERR(devlink_rate);
+
+ ops = devlink->ops;
if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
return -EOPNOTSUPP;
@@ -1704,18 +1713,22 @@ err_strdup:
static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_rate *rate_node = info->user_ptr[1];
- struct devlink *devlink = rate_node->devlink;
- const struct devlink_ops *ops = devlink->ops;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *rate_node;
int err;
+ rate_node = devlink_rate_node_get_from_info(devlink, info);
+ if (IS_ERR(rate_node))
+ return PTR_ERR(rate_node);
+
if (refcount_read(&rate_node->refcnt) > 1) {
NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
return -EBUSY;
}
devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
- err = ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+ err = devlink->ops->rate_node_del(rate_node, rate_node->priv,
+ info->extack);
if (rate_node->parent)
refcount_dec(&rate_node->parent->refcnt);
list_del(&rate_node->list);
@@ -1811,14 +1824,17 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard,
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct devlink_linecard *linecard = info->user_ptr[1];
- struct devlink *devlink = linecard->devlink;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_linecard *linecard;
struct sk_buff *msg;
int err;
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard))
+ return PTR_ERR(linecard);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -1837,9 +1853,10 @@ static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int devlink_nl_cmd_linecard_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_linecard_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_linecard *linecard;
@@ -1855,8 +1872,7 @@ static int devlink_nl_cmd_linecard_get_dump_one(struct sk_buff *msg,
err = devlink_nl_linecard_fill(msg, devlink, linecard,
DEVLINK_CMD_LINECARD_NEW,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
+ cb->nlh->nlmsg_seq, flags,
cb->extack);
mutex_unlock(&linecard->state_lock);
if (err) {
@@ -1869,9 +1885,11 @@ static int devlink_nl_cmd_linecard_get_dump_one(struct sk_buff *msg,
return err;
}
-const struct devlink_cmd devl_cmd_linecard_get = {
- .dump_one = devlink_nl_cmd_linecard_get_dump_one,
-};
+int devlink_nl_linecard_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_linecard_get_dump_one);
+}
static struct devlink_linecard_type *
devlink_linecard_type_lookup(struct devlink_linecard *linecard,
@@ -2008,10 +2026,15 @@ out:
static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_linecard *linecard = info->user_ptr[1];
struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_linecard *linecard;
int err;
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard))
+ return PTR_ERR(linecard);
+
if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
const char *type;
@@ -2068,8 +2091,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_sb *devlink_sb;
@@ -2096,8 +2118,8 @@ static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
}
static int
-devlink_nl_cmd_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
@@ -2112,8 +2134,7 @@ devlink_nl_cmd_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
DEVLINK_CMD_SB_NEW,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, flags);
if (err) {
state->idx = idx;
break;
@@ -2124,9 +2145,10 @@ devlink_nl_cmd_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
return err;
}
-const struct devlink_cmd devl_cmd_sb_get = {
- .dump_one = devlink_nl_cmd_sb_get_dump_one,
-};
+int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_sb_get_dump_one);
+}
static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
@@ -2171,8 +2193,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_sb *devlink_sb;
@@ -2210,7 +2231,7 @@ static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
struct devlink *devlink,
struct devlink_sb *devlink_sb,
- u32 portid, u32 seq)
+ u32 portid, u32 seq, int flags)
{
u16 pool_count = devlink_sb_pool_count(devlink_sb);
u16 pool_index;
@@ -2225,7 +2246,7 @@ static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
devlink_sb,
pool_index,
DEVLINK_CMD_SB_POOL_NEW,
- portid, seq, NLM_F_MULTI);
+ portid, seq, flags);
if (err)
return err;
(*p_idx)++;
@@ -2234,9 +2255,8 @@ static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
}
static int
-devlink_nl_cmd_sb_pool_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_sb_pool_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
@@ -2250,7 +2270,7 @@ devlink_nl_cmd_sb_pool_get_dump_one(struct sk_buff *msg,
err = __sb_pool_get_dumpit(msg, state->idx, &idx,
devlink, devlink_sb,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
+ cb->nlh->nlmsg_seq, flags);
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
@@ -2262,9 +2282,11 @@ devlink_nl_cmd_sb_pool_get_dump_one(struct sk_buff *msg,
return err;
}
-const struct devlink_cmd devl_cmd_sb_pool_get = {
- .dump_one = devlink_nl_cmd_sb_pool_get_dump_one,
-};
+int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_sb_pool_get_dump_one);
+}
static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
u16 pool_index, u32 size,
@@ -2371,8 +2393,8 @@ sb_occ_get_failure:
return err;
}
-static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
@@ -2412,7 +2434,7 @@ static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
struct devlink *devlink,
struct devlink_sb *devlink_sb,
- u32 portid, u32 seq)
+ u32 portid, u32 seq, int flags)
{
struct devlink_port *devlink_port;
u16 pool_count = devlink_sb_pool_count(devlink_sb);
@@ -2431,8 +2453,7 @@ static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
devlink_sb,
pool_index,
DEVLINK_CMD_SB_PORT_POOL_NEW,
- portid, seq,
- NLM_F_MULTI);
+ portid, seq, flags);
if (err)
return err;
(*p_idx)++;
@@ -2442,9 +2463,9 @@ static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
}
static int
-devlink_nl_cmd_sb_port_pool_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_sb_port_pool_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
@@ -2458,7 +2479,7 @@ devlink_nl_cmd_sb_port_pool_get_dump_one(struct sk_buff *msg,
err = __sb_port_pool_get_dumpit(msg, state->idx, &idx,
devlink, devlink_sb,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
+ cb->nlh->nlmsg_seq, flags);
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
@@ -2470,9 +2491,11 @@ devlink_nl_cmd_sb_port_pool_get_dump_one(struct sk_buff *msg,
return err;
}
-const struct devlink_cmd devl_cmd_sb_port_pool_get = {
- .dump_one = devlink_nl_cmd_sb_port_pool_get_dump_one,
-};
+int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_sb_port_pool_get_dump_one);
+}
static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
@@ -2580,8 +2603,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
@@ -2628,7 +2651,7 @@ static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
int start, int *p_idx,
struct devlink *devlink,
struct devlink_sb *devlink_sb,
- u32 portid, u32 seq)
+ u32 portid, u32 seq, int flags)
{
struct devlink_port *devlink_port;
unsigned long port_index;
@@ -2649,7 +2672,7 @@ static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
DEVLINK_SB_POOL_TYPE_INGRESS,
DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
portid, seq,
- NLM_F_MULTI);
+ flags);
if (err)
return err;
(*p_idx)++;
@@ -2667,7 +2690,7 @@ static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
DEVLINK_SB_POOL_TYPE_EGRESS,
DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
portid, seq,
- NLM_F_MULTI);
+ flags);
if (err)
return err;
(*p_idx)++;
@@ -2676,10 +2699,10 @@ static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
return 0;
}
-static int
-devlink_nl_cmd_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
@@ -2693,7 +2716,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx,
devlink, devlink_sb,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
+ cb->nlh->nlmsg_seq, flags);
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
@@ -2705,9 +2728,12 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
return err;
}
-const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get = {
- .dump_one = devlink_nl_cmd_sb_tc_pool_bind_get_dump_one,
-};
+int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb,
+ devlink_nl_sb_tc_pool_bind_get_dump_one);
+}
static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 tc_index,
@@ -3946,7 +3972,7 @@ static int devlink_param_get(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->get || devlink->reload_failed)
+ if (!param->get)
return -EOPNOTSUPP;
return param->get(devlink, param->id, ctx);
}
@@ -3955,7 +3981,7 @@ static int devlink_param_set(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->set || devlink->reload_failed)
+ if (!param->set)
return -EOPNOTSUPP;
return param->set(devlink, param->id, ctx);
}
@@ -4155,9 +4181,10 @@ static void devlink_param_notify(struct devlink *devlink,
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-static int
-devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_param_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_param_item *param_item;
@@ -4168,8 +4195,7 @@ devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
err = devlink_nl_param_fill(msg, devlink, 0, param_item,
DEVLINK_CMD_PARAM_GET,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, flags);
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
@@ -4181,9 +4207,11 @@ devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
return err;
}
-const struct devlink_cmd devl_cmd_param_get = {
- .dump_one = devlink_nl_cmd_param_get_dump_one,
-};
+int devlink_nl_param_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_param_get_dump_one);
+}
static int
devlink_param_type_get_from_info(struct genl_info *info,
@@ -4272,8 +4300,8 @@ devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
return devlink_param_find_by_name(params, param_name);
}
-static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_param_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_param_item *param_item;
@@ -4770,8 +4798,7 @@ static void devlink_region_snapshot_del(struct devlink_region *region,
kfree(snapshot);
}
-static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_port *port = NULL;
@@ -4819,8 +4846,7 @@ static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg,
struct netlink_callback *cb,
struct devlink_port *port,
- int *idx,
- int start)
+ int *idx, int start, int flags)
{
struct devlink_region *region;
int err = 0;
@@ -4834,7 +4860,7 @@ static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg,
DEVLINK_CMD_REGION_GET,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI, region);
+ flags, region);
if (err)
goto out;
(*idx)++;
@@ -4844,9 +4870,10 @@ out:
return err;
}
-static int
-devlink_nl_cmd_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_region_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_region *region;
@@ -4863,8 +4890,8 @@ devlink_nl_cmd_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
err = devlink_nl_region_fill(msg, devlink,
DEVLINK_CMD_REGION_GET,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, region);
+ cb->nlh->nlmsg_seq, flags,
+ region);
if (err) {
state->idx = idx;
return err;
@@ -4874,7 +4901,7 @@ devlink_nl_cmd_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
xa_for_each(&devlink->ports, port_index, port) {
err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx,
- state->idx);
+ state->idx, flags);
if (err) {
state->idx = idx;
return err;
@@ -4884,9 +4911,11 @@ devlink_nl_cmd_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
return 0;
}
-const struct devlink_cmd devl_cmd_region_get = {
- .dump_one = devlink_nl_cmd_region_get_dump_one,
-};
+int devlink_nl_region_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_region_get_dump_one);
+}
static int devlink_nl_cmd_region_del(struct sk_buff *skb,
struct genl_info *info)
@@ -5172,7 +5201,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
u64 ret_offset, start_offset, end_offset = U64_MAX;
- struct nlattr **attrs = info->attrs;
+ struct nlattr **attrs = info->info.attrs;
struct devlink_port *port = NULL;
devlink_chunk_fill_t *region_cb;
struct devlink_region *region;
@@ -5195,8 +5224,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto out_unlock;
}
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+ if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
port = devlink_port_get_by_index(devlink, index);
if (!port) {
@@ -5632,8 +5661,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct netlink_ext_ack *extack = info->extack;
struct devlink *devlink = info->user_ptr[0];
@@ -5667,9 +5695,9 @@ err_trap_fill:
return err;
}
-static int
-devlink_nl_cmd_trap_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_trap_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_trap_item *trap_item;
@@ -5684,8 +5712,7 @@ devlink_nl_cmd_trap_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
err = devlink_nl_trap_fill(msg, devlink, trap_item,
DEVLINK_CMD_TRAP_NEW,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, flags);
if (err) {
state->idx = idx;
break;
@@ -5696,9 +5723,10 @@ devlink_nl_cmd_trap_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
return err;
}
-const struct devlink_cmd devl_cmd_trap_get = {
- .dump_one = devlink_nl_cmd_trap_get_dump_one,
-};
+int devlink_nl_trap_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_trap_get_dump_one);
+}
static int __devlink_trap_action_set(struct devlink *devlink,
struct devlink_trap_item *trap_item,
@@ -5843,8 +5871,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct netlink_ext_ack *extack = info->extack;
struct devlink *devlink = info->user_ptr[0];
@@ -5878,10 +5905,10 @@ err_trap_group_fill:
return err;
}
-static int
-devlink_nl_cmd_trap_group_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_trap_group_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_trap_group_item *group_item;
@@ -5897,8 +5924,7 @@ devlink_nl_cmd_trap_group_get_dump_one(struct sk_buff *msg,
err = devlink_nl_trap_group_fill(msg, devlink, group_item,
DEVLINK_CMD_TRAP_GROUP_NEW,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, flags);
if (err) {
state->idx = idx;
break;
@@ -5909,9 +5935,11 @@ devlink_nl_cmd_trap_group_get_dump_one(struct sk_buff *msg,
return err;
}
-const struct devlink_cmd devl_cmd_trap_group_get = {
- .dump_one = devlink_nl_cmd_trap_group_get_dump_one,
-};
+int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_trap_group_get_dump_one);
+}
static int
__devlink_trap_group_action_set(struct devlink *devlink,
@@ -6137,8 +6165,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int devlink_nl_cmd_trap_policer_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_trap_policer_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink_trap_policer_item *policer_item;
struct netlink_ext_ack *extack = info->extack;
@@ -6172,10 +6200,10 @@ err_trap_policer_fill:
return err;
}
-static int
-devlink_nl_cmd_trap_policer_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_trap_policer_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_trap_policer_item *policer_item;
@@ -6190,8 +6218,7 @@ devlink_nl_cmd_trap_policer_get_dump_one(struct sk_buff *msg,
err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
DEVLINK_CMD_TRAP_POLICER_NEW,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, flags);
if (err) {
state->idx = idx;
break;
@@ -6202,9 +6229,11 @@ devlink_nl_cmd_trap_policer_get_dump_one(struct sk_buff *msg,
return err;
}
-const struct devlink_cmd devl_cmd_trap_policer_get = {
- .dump_one = devlink_nl_cmd_trap_policer_get_dump_one,
-};
+int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_trap_policer_get_dump_one);
+}
static int
devlink_trap_policer_set(struct devlink *devlink,
@@ -6278,22 +6307,7 @@ static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
return devlink_trap_policer_set(devlink, policer_item, info);
}
-const struct genl_small_ops devlink_nl_ops[56] = {
- {
- .cmd = DEVLINK_CMD_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PORT_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
+const struct genl_small_ops devlink_nl_small_ops[40] = {
{
.cmd = DEVLINK_CMD_PORT_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -6302,17 +6316,9 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
- .cmd = DEVLINK_CMD_RATE_GET,
- .doit = devlink_nl_cmd_rate_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_RATE_SET,
.doit = devlink_nl_cmd_rate_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
},
{
.cmd = DEVLINK_CMD_RATE_NEW,
@@ -6323,7 +6329,6 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.cmd = DEVLINK_CMD_RATE_DEL,
.doit = devlink_nl_cmd_rate_del_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE,
},
{
.cmd = DEVLINK_CMD_PORT_SPLIT,
@@ -6350,32 +6355,11 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
- {
- .cmd = DEVLINK_CMD_LINECARD_GET,
- .doit = devlink_nl_cmd_linecard_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
- /* can be retrieved by unprivileged users */
- },
+
{
.cmd = DEVLINK_CMD_LINECARD_SET,
.doit = devlink_nl_cmd_linecard_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
- },
- {
- .cmd = DEVLINK_CMD_SB_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SB_POOL_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_pool_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_SB_POOL_SET,
@@ -6384,14 +6368,6 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.flags = GENL_ADMIN_PERM,
},
{
- .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_port_pool_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_port_pool_set_doit,
@@ -6399,14 +6375,6 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
- .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
@@ -6480,13 +6448,6 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.flags = GENL_ADMIN_PERM,
},
{
- .cmd = DEVLINK_CMD_PARAM_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_param_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_PARAM_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_param_set_doit,
@@ -6508,13 +6469,6 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
- .cmd = DEVLINK_CMD_REGION_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_region_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .flags = GENL_ADMIN_PERM,
- },
- {
.cmd = DEVLINK_CMD_REGION_NEW,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_region_new,
@@ -6534,21 +6488,6 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.flags = GENL_ADMIN_PERM,
},
{
- .cmd = DEVLINK_CMD_INFO_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_info_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_set_doit,
@@ -6597,45 +6536,21 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.flags = GENL_ADMIN_PERM,
},
{
- .cmd = DEVLINK_CMD_TRAP_GET,
- .doit = devlink_nl_cmd_trap_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_TRAP_SET,
.doit = devlink_nl_cmd_trap_set_doit,
.flags = GENL_ADMIN_PERM,
},
{
- .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
- .doit = devlink_nl_cmd_trap_group_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_TRAP_GROUP_SET,
.doit = devlink_nl_cmd_trap_group_set_doit,
.flags = GENL_ADMIN_PERM,
},
{
- .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
- .doit = devlink_nl_cmd_trap_policer_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_TRAP_POLICER_SET,
.doit = devlink_nl_cmd_trap_policer_set_doit,
.flags = GENL_ADMIN_PERM,
},
{
- .cmd = DEVLINK_CMD_SELFTESTS_GET,
- .doit = devlink_nl_cmd_selftests_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_SELFTESTS_RUN,
.doit = devlink_nl_cmd_selftests_run,
.flags = GENL_ADMIN_PERM,
@@ -6846,8 +6761,10 @@ int devl_port_register_with_ops(struct devlink *devlink,
spin_lock_init(&devlink_port->type_lock);
INIT_LIST_HEAD(&devlink_port->reporter_list);
err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
- if (err)
+ if (err) {
+ devlink_port->registered = false;
return err;
+ }
INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
devlink_port_type_warn_schedule(devlink_port);
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index 7a332eb70f70..72a5005a64cd 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -109,10 +109,9 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
return ERR_PTR(-ENODEV);
}
-static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
+ u8 flags)
{
- struct devlink_linecard *linecard;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
@@ -122,42 +121,17 @@ static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
return PTR_ERR(devlink);
info->user_ptr[0] = devlink;
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
+ if (flags & DEVLINK_NL_FLAG_NEED_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
if (IS_ERR(devlink_port)) {
err = PTR_ERR(devlink_port);
goto unlock;
}
info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
+ } else if (flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
if (!IS_ERR(devlink_port))
info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
- struct devlink_rate *devlink_rate;
-
- devlink_rate = devlink_rate_get_from_info(devlink, info);
- if (IS_ERR(devlink_rate)) {
- err = PTR_ERR(devlink_rate);
- goto unlock;
- }
- info->user_ptr[1] = devlink_rate;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
- struct devlink_rate *rate_node;
-
- rate_node = devlink_rate_node_get_from_info(devlink, info);
- if (IS_ERR(rate_node)) {
- err = PTR_ERR(rate_node);
- goto unlock;
- }
- info->user_ptr[1] = rate_node;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
- linecard = devlink_linecard_get_from_info(devlink, info);
- if (IS_ERR(linecard)) {
- err = PTR_ERR(linecard);
- goto unlock;
- }
- info->user_ptr[1] = linecard;
}
return 0;
@@ -167,8 +141,27 @@ unlock:
return err;
}
-static void devlink_nl_post_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, ops->internal_flags);
+}
+
+int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT);
+}
+
+int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
+ struct sk_buff *skb,
+ struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT);
+}
+
+void devlink_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink;
@@ -177,42 +170,41 @@ static void devlink_nl_post_doit(const struct genl_split_ops *ops,
devlink_put(devlink);
}
-static const struct devlink_cmd *devl_cmds[] = {
- [DEVLINK_CMD_GET] = &devl_cmd_get,
- [DEVLINK_CMD_PORT_GET] = &devl_cmd_port_get,
- [DEVLINK_CMD_SB_GET] = &devl_cmd_sb_get,
- [DEVLINK_CMD_SB_POOL_GET] = &devl_cmd_sb_pool_get,
- [DEVLINK_CMD_SB_PORT_POOL_GET] = &devl_cmd_sb_port_pool_get,
- [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = &devl_cmd_sb_tc_pool_bind_get,
- [DEVLINK_CMD_PARAM_GET] = &devl_cmd_param_get,
- [DEVLINK_CMD_REGION_GET] = &devl_cmd_region_get,
- [DEVLINK_CMD_INFO_GET] = &devl_cmd_info_get,
- [DEVLINK_CMD_HEALTH_REPORTER_GET] = &devl_cmd_health_reporter_get,
- [DEVLINK_CMD_TRAP_GET] = &devl_cmd_trap_get,
- [DEVLINK_CMD_TRAP_GROUP_GET] = &devl_cmd_trap_group_get,
- [DEVLINK_CMD_TRAP_POLICER_GET] = &devl_cmd_trap_policer_get,
- [DEVLINK_CMD_RATE_GET] = &devl_cmd_rate_get,
- [DEVLINK_CMD_LINECARD_GET] = &devl_cmd_linecard_get,
- [DEVLINK_CMD_SELFTESTS_GET] = &devl_cmd_selftests_get,
-};
+static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb, int flags,
+ devlink_nl_dump_one_func_t *dump_one,
+ struct nlattr **attrs)
+{
+ struct devlink *devlink;
+ int err;
+
+ devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs);
+ if (IS_ERR(devlink))
+ return PTR_ERR(devlink);
+ err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED);
+
+ devl_unlock(devlink);
+ devlink_put(devlink);
+
+ if (err != -EMSGSIZE)
+ return err;
+ return msg->len;
+}
-int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int devlink_nl_inst_iter_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb, int flags,
+ devlink_nl_dump_one_func_t *dump_one)
{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- const struct devlink_cmd *cmd;
struct devlink *devlink;
int err = 0;
- cmd = devl_cmds[info->op.cmd];
-
while ((devlink = devlinks_xa_find_get(sock_net(msg->sk),
&state->instance))) {
devl_lock(devlink);
if (devl_is_registered(devlink))
- err = cmd->dump_one(msg, devlink, cb);
+ err = dump_one(msg, devlink, cb, flags);
else
err = 0;
@@ -233,6 +225,21 @@ int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
return msg->len;
}
+int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
+ devlink_nl_dump_one_func_t *dump_one)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct nlattr **attrs = info->attrs;
+ int flags = NLM_F_MULTI;
+
+ if (attrs &&
+ (attrs[DEVLINK_ATTR_BUS_NAME] || attrs[DEVLINK_ATTR_DEV_NAME]))
+ return devlink_nl_inst_single_dumpit(msg, cb, flags, dump_one,
+ attrs);
+ else
+ return devlink_nl_inst_iter_dumpit(msg, cb, flags, dump_one);
+}
+
struct genl_family devlink_nl_family __ro_after_init = {
.name = DEVLINK_GENL_NAME,
.version = DEVLINK_GENL_VERSION,
@@ -243,8 +250,10 @@ struct genl_family devlink_nl_family __ro_after_init = {
.pre_doit = devlink_nl_pre_doit,
.post_doit = devlink_nl_post_doit,
.module = THIS_MODULE,
- .small_ops = devlink_nl_ops,
- .n_small_ops = ARRAY_SIZE(devlink_nl_ops),
+ .small_ops = devlink_nl_small_ops,
+ .n_small_ops = ARRAY_SIZE(devlink_nl_small_ops),
+ .split_ops = devlink_nl_ops,
+ .n_split_ops = ARRAY_SIZE(devlink_nl_ops),
.resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
.mcgrps = devlink_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
new file mode 100644
index 000000000000..467b7a431de1
--- /dev/null
+++ b/net/devlink/netlink_gen.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/devlink.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "netlink_gen.h"
+
+#include <uapi/linux/devlink.h>
+
+/* DEVLINK_CMD_GET - do */
+static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_PORT_GET - do */
+static const struct nla_policy devlink_port_get_do_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_PORT_GET - dump */
+static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_GET - do */
+static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_SB_GET - dump */
+static const struct nla_policy devlink_sb_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_POOL_GET - do */
+static const struct nla_policy devlink_sb_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, },
+};
+
+/* DEVLINK_CMD_SB_POOL_GET - dump */
+static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_PORT_POOL_GET - do */
+static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, },
+};
+
+/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */
+static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */
+static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_TYPE] = NLA_POLICY_MAX(NLA_U8, 1),
+ [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16, },
+};
+
+/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */
+static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_PARAM_GET - do */
+static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_PARAM_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_PARAM_GET - dump */
+static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_REGION_GET - do */
+static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_REGION_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_REGION_GET - dump */
+static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_INFO_GET - do */
+static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_GET - do */
+static const struct nla_policy devlink_health_reporter_get_do_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */
+static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_TRAP_GET - do */
+static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_TRAP_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_GET - dump */
+static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_GROUP_GET - do */
+static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_TRAP_GROUP_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_GROUP_GET - dump */
+static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_POLICER_GET - do */
+static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_TRAP_POLICER_GET - dump */
+static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_RATE_GET - do */
+static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_RATE_GET - dump */
+static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_LINECARD_GET - do */
+static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_LINECARD_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_LINECARD_GET - dump */
+static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SELFTESTS_GET - do */
+static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* Ops table for devlink */
+const struct genl_split_ops devlink_nl_ops[32] = {
+ {
+ .cmd = DEVLINK_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP,
+ .dumpit = devlink_nl_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_port_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_GET,
+ .dumpit = devlink_nl_port_get_dumpit,
+ .policy = devlink_port_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_sb_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_GET,
+ .dumpit = devlink_nl_sb_get_dumpit,
+ .policy = devlink_sb_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_sb_pool_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_pool_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_POOL_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_GET,
+ .dumpit = devlink_nl_sb_pool_get_dumpit,
+ .policy = devlink_sb_pool_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_sb_port_pool_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_port_pool_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_POOL_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+ .dumpit = devlink_nl_sb_port_pool_get_dumpit,
+ .policy = devlink_sb_port_pool_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_sb_tc_pool_bind_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_tc_pool_bind_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_TC_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+ .dumpit = devlink_nl_sb_tc_pool_bind_get_dumpit,
+ .policy = devlink_sb_tc_pool_bind_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_PARAM_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_param_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_param_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_PARAM_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PARAM_GET,
+ .dumpit = devlink_nl_param_get_dumpit,
+ .policy = devlink_param_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_region_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_region_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_REGION_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_GET,
+ .dumpit = devlink_nl_region_get_dumpit,
+ .policy = devlink_region_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_INFO_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_info_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_info_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_INFO_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP,
+ .dumpit = devlink_nl_info_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_health_reporter_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_health_reporter_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+ .dumpit = devlink_nl_health_reporter_get_dumpit,
+ .policy = devlink_health_reporter_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GET,
+ .dumpit = devlink_nl_trap_get_dumpit,
+ .policy = devlink_trap_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_group_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_group_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_GROUP_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+ .dumpit = devlink_nl_trap_group_get_dumpit,
+ .policy = devlink_trap_group_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_policer_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_policer_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
+ .dumpit = devlink_nl_trap_policer_get_dumpit,
+ .policy = devlink_trap_policer_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_rate_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_rate_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_RATE_NODE_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_GET,
+ .dumpit = devlink_nl_rate_get_dumpit,
+ .policy = devlink_rate_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_linecard_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_linecard_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_LINECARD_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_GET,
+ .dumpit = devlink_nl_linecard_get_dumpit,
+ .policy = devlink_linecard_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_selftests_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_selftests_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP,
+ .dumpit = devlink_nl_selftests_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+};
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
new file mode 100644
index 000000000000..f8bbc93e39be
--- /dev/null
+++ b/net/devlink/netlink_gen.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/devlink.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_DEVLINK_GEN_H
+#define _LINUX_DEVLINK_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/devlink.h>
+
+/* Ops table for devlink */
+extern const struct genl_split_ops devlink_nl_ops[32];
+
+int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
+ struct sk_buff *skb,
+ struct genl_info *info);
+void
+devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+
+int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_port_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_param_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_region_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_info_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_trap_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_trap_policer_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_rate_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_linecard_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_selftests_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+
+#endif /* _LINUX_DEVLINK_GEN_H */
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 2f6195d7b741..37ab238e8304 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1568,27 +1568,6 @@ static void dsa_port_phylink_validate(struct phylink_config *config,
phylink_generic_validate(config, supported, state);
}
-static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
- int err;
-
- /* Only called for inband modes */
- if (!ds->ops->phylink_mac_link_state) {
- state->link = 0;
- return;
- }
-
- err = ds->ops->phylink_mac_link_state(ds, dp->index, state);
- if (err < 0) {
- dev_err(ds->dev, "p%d: phylink_mac_link_state() failed: %d\n",
- dp->index, err);
- state->link = 0;
- }
-}
-
static struct phylink_pcs *
dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
phy_interface_t interface)
@@ -1646,17 +1625,6 @@ static int dsa_port_phylink_mac_finish(struct phylink_config *config,
return err;
}
-static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_an_restart)
- return;
-
- ds->ops->phylink_mac_an_restart(ds, dp->index);
-}
-
static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
@@ -1700,11 +1668,9 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
.mac_select_pcs = dsa_port_phylink_mac_select_pcs,
- .mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state,
.mac_prepare = dsa_port_phylink_mac_prepare,
.mac_config = dsa_port_phylink_mac_config,
.mac_finish = dsa_port_phylink_mac_finish,
- .mac_an_restart = dsa_port_phylink_mac_an_restart,
.mac_link_down = dsa_port_phylink_mac_link_down,
.mac_link_up = dsa_port_phylink_mac_link_up,
};
@@ -1720,21 +1686,18 @@ int dsa_port_phylink_create(struct dsa_port *dp)
if (err)
mode = PHY_INTERFACE_MODE_NA;
- /* Presence of phylink_mac_link_state or phylink_mac_an_restart is
- * an indicator of a legacy phylink driver.
- */
- if (ds->ops->phylink_mac_link_state ||
- ds->ops->phylink_mac_an_restart)
- dp->pl_config.legacy_pre_march2020 = true;
-
if (ds->ops->phylink_get_caps) {
ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config);
} else {
/* For legacy drivers */
- __set_bit(PHY_INTERFACE_MODE_INTERNAL,
- dp->pl_config.supported_interfaces);
- __set_bit(PHY_INTERFACE_MODE_GMII,
- dp->pl_config.supported_interfaces);
+ if (mode != PHY_INTERFACE_MODE_NA) {
+ __set_bit(mode, dp->pl_config.supported_interfaces);
+ } else {
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ dp->pl_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ dp->pl_config.supported_interfaces);
+ }
}
pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 527b1d576460..48db91b33390 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
#include <linux/if_hsr.h>
#include <net/dcbnl.h>
#include <linux/netpoll.h>
+#include <linux/string.h>
#include "dsa.h"
#include "port.h"
@@ -1056,10 +1057,10 @@ static void dsa_slave_get_strings(struct net_device *dev,
if (stringset == ETH_SS_STATS) {
int len = ETH_GSTRING_LEN;
- strncpy(data, "tx_packets", len);
- strncpy(data + len, "tx_bytes", len);
- strncpy(data + 2 * len, "rx_packets", len);
- strncpy(data + 3 * len, "rx_bytes", len);
+ strscpy_pad(data, "tx_packets", len);
+ strscpy_pad(data + len, "tx_bytes", len);
+ strscpy_pad(data + 2 * len, "rx_packets", len);
+ strscpy_pad(data + 3 * len, "rx_bytes", len);
if (ds->ops->get_strings)
ds->ops->get_strings(ds, dp->index, stringset,
data + 4 * len);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index e757c8de06f1..e5ff7c34e577 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -75,10 +75,6 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- /* Remove QCA tag and recalculate checksum */
- skb_pull_rcsum(skb, QCA_HDR_LEN);
- dsa_strip_etype_header(skb, QCA_HDR_LEN);
-
/* Get source port information */
port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr);
@@ -86,6 +82,10 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
if (!skb->dev)
return NULL;
+ /* Remove QCA tag and recalculate checksum */
+ skb_pull_rcsum(skb, QCA_HDR_LEN);
+ dsa_strip_etype_header(skb, QCA_HDR_LEN);
+
return skb;
}
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 61c40e889a4d..7b4bbd674bae 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -24,7 +24,7 @@ const struct nla_policy ethnl_channels_get_policy[] = {
static int channels_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct channels_reply_data *data = CHANNELS_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 01a59ce211c8..83112c1a71ae 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -59,10 +59,9 @@ const struct nla_policy ethnl_coalesce_get_policy[] = {
static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -73,7 +72,8 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce,
- &data->kernel_coalesce, extack);
+ &data->kernel_coalesce,
+ info->extack);
ethnl_ops_complete(dev);
return ret;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 5fb19050991e..f5598c5f50de 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -665,9 +665,8 @@ const struct ethtool_phy_ops *ethtool_phy_ops;
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
{
- rtnl_lock();
+ ASSERT_RTNL();
ethtool_phy_ops = ops;
- rtnl_unlock();
}
EXPORT_SYMBOL_GPL(ethtool_set_ethtool_phy_ops);
diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c
index e4369769817e..0b2dea56d461 100644
--- a/net/ethtool/debug.c
+++ b/net/ethtool/debug.c
@@ -23,7 +23,7 @@ const struct nla_policy ethnl_debug_get_policy[] = {
static int debug_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct debug_reply_data *data = DEBUG_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c
index 42104bcb0e47..2853394d06a8 100644
--- a/net/ethtool/eee.c
+++ b/net/ethtool/eee.c
@@ -26,7 +26,7 @@ const struct nla_policy ethnl_eee_get_policy[] = {
static int eee_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct eee_reply_data *data = EEE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 49c0a2a77f02..6209c3a9c8f7 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -51,8 +51,7 @@ static int fallback_set_params(struct eeprom_req_info *request,
}
static int eeprom_fallback(struct eeprom_req_info *request,
- struct eeprom_reply_data *reply,
- struct genl_info *info)
+ struct eeprom_reply_data *reply)
{
struct net_device *dev = reply->base.dev;
struct ethtool_modinfo modinfo = {0};
@@ -103,7 +102,7 @@ static int get_module_eeprom_by_page(struct net_device *dev,
static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base);
@@ -124,7 +123,7 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
if (ret)
goto err_free;
- ret = get_module_eeprom_by_page(dev, &page_data, info ? info->extack : NULL);
+ ret = get_module_eeprom_by_page(dev, &page_data, info->extack);
if (ret < 0)
goto err_ops;
@@ -140,7 +139,7 @@ err_free:
kfree(page_data.data);
if (ret == -EOPNOTSUPP)
- return eeprom_fallback(request, reply, info);
+ return eeprom_fallback(request, reply);
return ret;
}
diff --git a/net/ethtool/features.c b/net/ethtool/features.c
index 55d449a2d3fc..a79af8c25a07 100644
--- a/net/ethtool/features.c
+++ b/net/ethtool/features.c
@@ -35,7 +35,7 @@ static void ethnl_features_to_bitmap32(u32 *dest, netdev_features_t src)
static int features_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct features_reply_data *data = FEATURES_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c
index 0d9a3d153170..e7d3f2c352a3 100644
--- a/net/ethtool/fec.c
+++ b/net/ethtool/fec.c
@@ -92,7 +92,7 @@ fec_stats_recalc(struct fec_stat_grp *grp, struct ethtool_fec_stat *stats)
static int fec_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(active_fec_modes) = {};
struct fec_reply_data *data = FEC_REPDATA(reply_base);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 4a51e0ec295c..0b0ce4f81c01 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -907,6 +907,38 @@ static int ethtool_rxnfc_copy_to_compat(void __user *useraddr,
return 0;
}
+static int ethtool_rxnfc_copy_struct(u32 cmd, struct ethtool_rxnfc *info,
+ size_t *info_size, void __user *useraddr)
+{
+ /* struct ethtool_rxnfc was originally defined for
+ * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+ * members. User-space might still be using that
+ * definition.
+ */
+ if (cmd == ETHTOOL_GRXFH || cmd == ETHTOOL_SRXFH)
+ *info_size = (offsetof(struct ethtool_rxnfc, data) +
+ sizeof(info->data));
+
+ if (ethtool_rxnfc_copy_from_user(info, useraddr, *info_size))
+ return -EFAULT;
+
+ if ((cmd == ETHTOOL_GRXFH || cmd == ETHTOOL_SRXFH) && info->flow_type & FLOW_RSS) {
+ *info_size = sizeof(*info);
+ if (ethtool_rxnfc_copy_from_user(info, useraddr, *info_size))
+ return -EFAULT;
+ /* Since malicious users may modify the original data,
+ * we need to check whether FLOW_RSS is still requested.
+ */
+ if (!(info->flow_type & FLOW_RSS))
+ return -EINVAL;
+ }
+
+ if (info->cmd != cmd)
+ return -EINVAL;
+
+ return 0;
+}
+
static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
const struct ethtool_rxnfc *rxnfc,
size_t size, const u32 *rule_buf)
@@ -944,16 +976,9 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
if (!dev->ethtool_ops->set_rxnfc)
return -EOPNOTSUPP;
- /* struct ethtool_rxnfc was originally defined for
- * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
- * members. User-space might still be using that
- * definition. */
- if (cmd == ETHTOOL_SRXFH)
- info_size = (offsetof(struct ethtool_rxnfc, data) +
- sizeof(info.data));
-
- if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
+ rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
+ if (rc)
+ return rc;
rc = dev->ethtool_ops->set_rxnfc(dev, &info);
if (rc)
@@ -978,33 +1003,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (!ops->get_rxnfc)
return -EOPNOTSUPP;
- /* struct ethtool_rxnfc was originally defined for
- * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
- * members. User-space might still be using that
- * definition. */
- if (cmd == ETHTOOL_GRXFH)
- info_size = (offsetof(struct ethtool_rxnfc, data) +
- sizeof(info.data));
-
- if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
-
- /* If FLOW_RSS was requested then user-space must be using the
- * new definition, as FLOW_RSS is newer.
- */
- if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
- info_size = sizeof(info);
- if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
- /* Since malicious users may modify the original data,
- * we need to check whether FLOW_RSS is still requested.
- */
- if (!(info.flow_type & FLOW_RSS))
- return -EINVAL;
- }
-
- if (info.cmd != cmd)
- return -EINVAL;
+ ret = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
+ if (ret)
+ return ret;
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
@@ -3207,7 +3208,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (v4_m_spec->ip4src ||
v4_m_spec->ip4dst) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] =
offsetof(struct ethtool_rx_flow_key, ipv4);
}
@@ -3222,7 +3223,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (v4_m_spec->psrc ||
v4_m_spec->pdst) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_PORTS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
offsetof(struct ethtool_rx_flow_key, tp);
}
@@ -3259,7 +3260,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6src) ||
!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6dst)) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
offsetof(struct ethtool_rx_flow_key, ipv6);
}
@@ -3274,7 +3275,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (v6_m_spec->psrc ||
v6_m_spec->pdst) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_PORTS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
offsetof(struct ethtool_rx_flow_key, tp);
}
@@ -3282,7 +3283,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
match->key.ip.tos = v6_spec->tclass;
match->mask.ip.tos = v6_m_spec->tclass;
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IP);
+ BIT_ULL(FLOW_DISSECTOR_KEY_IP);
match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
offsetof(struct ethtool_rx_flow_key, ip);
}
@@ -3306,7 +3307,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
break;
}
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_BASIC);
match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] =
offsetof(struct ethtool_rx_flow_key, basic);
@@ -3339,7 +3340,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (ext_m_spec->vlan_etype ||
ext_m_spec->vlan_tci) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_VLAN);
+ BIT_ULL(FLOW_DISSECTOR_KEY_VLAN);
match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
offsetof(struct ethtool_rx_flow_key, vlan);
}
@@ -3354,7 +3355,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
ETH_ALEN);
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] =
offsetof(struct ethtool_rx_flow_key, eth_addrs);
}
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 310dfe63292a..5c317d23787b 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -23,7 +23,7 @@ const struct nla_policy ethnl_linkinfo_get_policy[] = {
static int linkinfo_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct linkinfo_reply_data *data = LINKINFO_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 20165e07ef90..b2591db49f7d 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -27,7 +27,7 @@ const struct nla_policy ethnl_linkmodes_get_policy[] = {
static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index 2158c17a0b32..b2de2108b356 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -81,7 +81,7 @@ static int linkstate_get_link_ext_state(struct net_device *dev,
static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/mm.c b/net/ethtool/mm.c
index 4058a557b5a4..2816bb23c3ad 100644
--- a/net/ethtool/mm.c
+++ b/net/ethtool/mm.c
@@ -27,7 +27,7 @@ const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1] = {
static int mm_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct mm_reply_data *data = MM_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/module.c b/net/ethtool/module.c
index e0d539b21423..ceb575efc290 100644
--- a/net/ethtool/module.c
+++ b/net/ethtool/module.c
@@ -38,10 +38,9 @@ static int module_get_power_mode(struct net_device *dev,
static int module_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct module_reply_data *data = MODULE_REPDATA(reply_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -49,7 +48,7 @@ static int module_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
- ret = module_get_power_mode(dev, data, extack);
+ ret = module_get_power_mode(dev, data, info->extack);
if (ret < 0)
goto out_complete;
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 39a459b0111b..3bbd5afb7b31 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -252,8 +252,7 @@ int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
* @ops: request ops of currently processed message type
* @req_info: parsed request header of processed request
* @reply_data: data needed to compose the reply
- * @pos_hash: saved iteration position - hashbucket
- * @pos_idx: saved iteration position - index
+ * @pos_ifindex: saved iteration position - ifindex
*
* These parameters are kept in struct netlink_callback as context preserved
* between iterations. They are initialized by ethnl_default_start() and used
@@ -263,8 +262,7 @@ struct ethnl_dump_ctx {
const struct ethnl_request_ops *ops;
struct ethnl_req_info *req_info;
struct ethnl_reply_data *reply_data;
- int pos_hash;
- int pos_idx;
+ unsigned long pos_ifindex;
};
static const struct ethnl_request_ops *
@@ -318,10 +316,8 @@ static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
/**
* ethnl_default_parse() - Parse request message
* @req_info: pointer to structure to put data into
- * @tb: parsed attributes
- * @net: request netns
+ * @info: genl_info from the request
* @request_ops: struct request_ops for request type
- * @extack: netlink extack for error reporting
* @require_dev: fail if no device identified in header
*
* Parse universal request header and call request specific ->parse_request()
@@ -330,19 +326,21 @@ static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
* Return: 0 on success or negative error code
*/
static int ethnl_default_parse(struct ethnl_req_info *req_info,
- struct nlattr **tb, struct net *net,
+ const struct genl_info *info,
const struct ethnl_request_ops *request_ops,
- struct netlink_ext_ack *extack, bool require_dev)
+ bool require_dev)
{
+ struct nlattr **tb = info->attrs;
int ret;
ret = ethnl_parse_header_dev_get(req_info, tb[request_ops->hdr_attr],
- net, extack, require_dev);
+ genl_info_net(info), info->extack,
+ require_dev);
if (ret < 0)
return ret;
if (request_ops->parse_request) {
- ret = request_ops->parse_request(req_info, tb, extack);
+ ret = request_ops->parse_request(req_info, tb, info->extack);
if (ret < 0)
return ret;
}
@@ -395,8 +393,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
}
- ret = ethnl_default_parse(req_info, info->attrs, genl_info_net(info),
- ops, info->extack, !ops->allow_nodev_do);
+ ret = ethnl_default_parse(req_info, info, ops, !ops->allow_nodev_do);
if (ret < 0)
goto err_dev;
ethnl_init_reply_data(reply_data, ops, req_info->dev);
@@ -447,12 +444,12 @@ err_dev:
static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
const struct ethnl_dump_ctx *ctx,
- struct netlink_callback *cb)
+ const struct genl_info *info)
{
void *ehdr;
int ret;
- ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ ehdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&ethtool_genl_family, NLM_F_MULTI,
ctx->ops->reply_cmd);
if (!ehdr)
@@ -460,7 +457,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev);
rtnl_lock();
- ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL);
+ ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, info);
rtnl_unlock();
if (ret < 0)
goto out;
@@ -490,55 +487,27 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
{
struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
struct net *net = sock_net(skb->sk);
- int s_idx = ctx->pos_idx;
- int h, idx = 0;
+ struct net_device *dev;
int ret = 0;
rtnl_lock();
- for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
- struct net_device *dev;
- unsigned int seq;
-
- head = &net->dev_index_head[h];
-
-restart_chain:
- seq = net->dev_base_seq;
- cb->seq = seq;
- idx = 0;
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- dev_hold(dev);
- rtnl_unlock();
-
- ret = ethnl_default_dump_one(skb, dev, ctx, cb);
- dev_put(dev);
- if (ret < 0) {
- if (ret == -EOPNOTSUPP)
- goto lock_and_cont;
- if (likely(skb->len))
- ret = skb->len;
- goto out;
- }
-lock_and_cont:
- rtnl_lock();
- if (net->dev_base_seq != seq) {
- s_idx = idx + 1;
- goto restart_chain;
- }
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
+ dev_hold(dev);
+ rtnl_unlock();
+
+ ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
+ rtnl_lock();
+ dev_put(dev);
+
+ if (ret < 0 && ret != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ ret = skb->len;
+ break;
+ }
}
rtnl_unlock();
-out:
- ctx->pos_hash = h;
- ctx->pos_idx = idx;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-
return ret;
}
@@ -568,8 +537,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
goto free_req_info;
}
- ret = ethnl_default_parse(req_info, info->attrs, sock_net(cb->skb->sk),
- ops, cb->extack, false);
+ ret = ethnl_default_parse(req_info, &info->info, ops, false);
if (req_info->dev) {
/* We ignore device specification in dump requests but as the
* same parser as for non-dump (doit) requests is used, it
@@ -584,8 +552,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
ctx->ops = ops;
ctx->req_info = req_info;
ctx->reply_data = reply_data;
- ctx->pos_hash = 0;
- ctx->pos_idx = 0;
+ ctx->pos_ifindex = 0;
return 0;
@@ -680,11 +647,14 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
struct ethnl_reply_data *reply_data;
const struct ethnl_request_ops *ops;
struct ethnl_req_info *req_info;
+ struct genl_info info;
struct sk_buff *skb;
void *reply_payload;
int reply_len;
int ret;
+ genl_info_init_ntf(&info, &ethtool_genl_family, cmd);
+
if (WARN_ONCE(cmd > ETHTOOL_MSG_KERNEL_MAX ||
!ethnl_default_notify_ops[cmd],
"unexpected notification type %u\n", cmd))
@@ -703,7 +673,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS;
ethnl_init_reply_data(reply_data, ops, dev);
- ret = ops->prepare_data(req_info, reply_data, NULL);
+ ret = ops->prepare_data(req_info, reply_data, &info);
if (ret < 0)
goto err_cleanup;
ret = ops->reply_size(req_info, reply_data);
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 79424b34b553..9a333a8d04c1 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -355,7 +355,7 @@ struct ethnl_request_ops {
struct netlink_ext_ack *extack);
int (*prepare_data)(const struct ethnl_req_info *req_info,
struct ethnl_reply_data *reply_data,
- struct genl_info *info);
+ const struct genl_info *info);
int (*reply_size)(const struct ethnl_req_info *req_info,
const struct ethnl_reply_data *reply_data);
int (*fill_reply)(struct sk_buff *skb,
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index 6657d0b888d8..f7c847aeb1a2 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -51,10 +51,9 @@ static int pause_parse_request(struct ethnl_req_info *req_base,
static int pause_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
const struct pause_req_info *req_info = PAUSE_REQINFO(req_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct pause_reply_data *data = PAUSE_REPDATA(reply_base);
enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
@@ -74,7 +73,7 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base,
if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
!__ethtool_dev_mm_supported(dev)) {
- NL_SET_ERR_MSG_MOD(extack,
+ NL_SET_ERR_MSG_MOD(info->extack,
"Device does not support MAC merge layer");
ethnl_ops_complete(dev);
return -EOPNOTSUPP;
diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c
index 637b2f5297d5..cadaabed60bd 100644
--- a/net/ethtool/phc_vclocks.c
+++ b/net/ethtool/phc_vclocks.c
@@ -24,7 +24,7 @@ const struct nla_policy ethnl_phc_vclocks_get_policy[] = {
static int phc_vclocks_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
index 5a8cab4df0c9..b238a1afe9ae 100644
--- a/net/ethtool/plca.c
+++ b/net/ethtool/plca.c
@@ -40,7 +40,7 @@ const struct nla_policy ethnl_plca_get_cfg_policy[] = {
static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
@@ -183,7 +183,7 @@ const struct nla_policy ethnl_plca_get_status_policy[] = {
static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c
index 23264a1ebf12..297be6a13ab9 100644
--- a/net/ethtool/privflags.c
+++ b/net/ethtool/privflags.c
@@ -57,7 +57,7 @@ static int ethnl_get_priv_flags_info(struct net_device *dev,
static int privflags_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index 530b8b99e6df..cc478af77111 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -53,8 +53,8 @@ static int pse_get_pse_attributes(struct net_device *dev,
}
static int pse_prepare_data(const struct ethnl_req_info *req_base,
- struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ struct ethnl_reply_data *reply_base,
+ const struct genl_info *info)
{
struct pse_reply_data *data = PSE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
@@ -64,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
- ret = pse_get_pse_attributes(dev, info ? info->extack : NULL, data);
+ ret = pse_get_pse_attributes(dev, info->extack, data);
ethnl_ops_complete(dev);
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index 1c4972526142..fb09f774ea01 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -24,10 +24,9 @@ const struct nla_policy ethnl_rings_get_policy[] = {
static int rings_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct rings_reply_data *data = RINGS_REPDATA(reply_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -39,7 +38,7 @@ static int rings_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
dev->ethtool_ops->get_ringparam(dev, &data->ringparam,
- &data->kernel_ringparam, extack);
+ &data->kernel_ringparam, info->extack);
ethnl_ops_complete(dev);
return 0;
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index be260ab34e58..5764202e6cb6 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -42,7 +42,8 @@ rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
static int
rss_prepare_data(const struct ethnl_req_info *req_base,
- struct ethnl_reply_data *reply_base, struct genl_info *info)
+ struct ethnl_reply_data *reply_base,
+ const struct genl_info *info)
{
struct rss_reply_data *data = RSS_REPDATA(reply_base);
struct rss_req_info *request = RSS_REQINFO(req_base);
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index 010ed19ccc99..912f0c4fff2f 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -114,10 +114,9 @@ static int stats_parse_request(struct ethnl_req_info *req_base,
static int stats_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
const struct stats_req_info *req_info = STATS_REQINFO(req_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct stats_reply_data *data = STATS_REPDATA(reply_base);
enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
@@ -130,7 +129,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
!__ethtool_dev_mm_supported(dev)) {
- NL_SET_ERR_MSG_MOD(extack,
+ NL_SET_ERR_MSG_MOD(info->extack,
"Device does not support MAC merge layer");
ethnl_ops_complete(dev);
return -EOPNOTSUPP;
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index 3f7de54d85fb..c678b484a079 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -274,7 +274,7 @@ static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
static int strset_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
struct strset_reply_data *data = STRSET_REPDATA(reply_base);
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 63b5814bd460..9daed0aab162 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -25,7 +25,7 @@ const struct nla_policy ethnl_tsinfo_get_policy[] = {
static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c
index 67fb414ca859..b4ce47dd2aa6 100644
--- a/net/ethtool/tunnels.c
+++ b/net/ethtool/tunnels.c
@@ -212,15 +212,14 @@ err_unlock_rtnl:
struct ethnl_tunnel_info_dump_ctx {
struct ethnl_req_info req_info;
- int pos_hash;
- int pos_idx;
+ unsigned long ifindex;
};
int ethnl_tunnel_info_start(struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
- struct nlattr **tb = info->attrs;
+ struct nlattr **tb = info->info.attrs;
int ret;
BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
@@ -243,57 +242,39 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
- int s_idx = ctx->pos_idx;
- int h, idx = 0;
+ struct net_device *dev;
int ret = 0;
void *ehdr;
rtnl_lock();
- cb->seq = net->dev_base_seq;
- for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
- struct net_device *dev;
-
- head = &net->dev_index_head[h];
- idx = 0;
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
-
- ehdr = ethnl_dump_put(skb, cb,
- ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
- if (!ehdr) {
- ret = -EMSGSIZE;
- goto out;
- }
-
- ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER);
- if (ret < 0) {
- genlmsg_cancel(skb, ehdr);
- goto out;
- }
-
- ctx->req_info.dev = dev;
- ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb);
- ctx->req_info.dev = NULL;
- if (ret < 0) {
- genlmsg_cancel(skb, ehdr);
- if (ret == -EOPNOTSUPP)
- goto cont;
- goto out;
- }
- genlmsg_end(skb, ehdr);
-cont:
- idx++;
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ ehdr = ethnl_dump_put(skb, cb,
+ ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
+ if (!ehdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+
+ ret = ethnl_fill_reply_header(skb, dev,
+ ETHTOOL_A_TUNNEL_INFO_HEADER);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
}
+
+ ctx->req_info.dev = dev;
+ ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb);
+ ctx->req_info.dev = NULL;
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ if (ret == -EOPNOTSUPP)
+ continue;
+ break;
+ }
+ genlmsg_end(skb, ehdr);
}
-out:
rtnl_unlock();
- ctx->pos_hash = h;
- ctx->pos_idx = idx;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-
if (ret == -EMSGSIZE && skb->len)
return skb->len;
return ret;
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index a4a43d9e6e9d..0ed56c9ac1bc 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -24,7 +24,7 @@ const struct nla_policy ethnl_wol_get_policy[] = {
static int wol_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct wol_reply_data *data = WOL_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
@@ -39,7 +39,8 @@ static int wol_prepare_data(const struct ethnl_req_info *req_base,
dev->ethtool_ops->get_wol(dev, &data->wol);
ethnl_ops_complete(dev);
/* do not include password in notifications */
- data->show_sopass = info && (data->wol.supported & WAKE_MAGICSECURE);
+ data->show_sopass = !genl_info_is_ntf(info) &&
+ (data->wol.supported & WAKE_MAGICSECURE);
return 0;
}
diff --git a/net/handshake/Makefile b/net/handshake/Makefile
index 247d73c6ff6e..ef4d9a2112bd 100644
--- a/net/handshake/Makefile
+++ b/net/handshake/Makefile
@@ -8,6 +8,6 @@
#
obj-y += handshake.o
-handshake-y := genl.o netlink.o request.o tlshd.o trace.o
+handshake-y := alert.o genl.o netlink.o request.o tlshd.o trace.o
obj-$(CONFIG_NET_HANDSHAKE_KUNIT_TEST) += handshake-test.o
diff --git a/net/handshake/alert.c b/net/handshake/alert.c
new file mode 100644
index 000000000000..329d91984683
--- /dev/null
+++ b/net/handshake/alert.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Handle the TLS Alert protocol
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+
+#include <net/sock.h>
+#include <net/handshake.h>
+#include <net/tls.h>
+#include <net/tls_prot.h>
+
+#include "handshake.h"
+
+#include <trace/events/handshake.h>
+
+/**
+ * tls_alert_send - send a TLS Alert on a kTLS socket
+ * @sock: open kTLS socket to send on
+ * @level: TLS Alert level
+ * @description: TLS Alert description
+ *
+ * Returns zero on success or a negative errno.
+ */
+int tls_alert_send(struct socket *sock, u8 level, u8 description)
+{
+ u8 record_type = TLS_RECORD_TYPE_ALERT;
+ u8 buf[CMSG_SPACE(sizeof(record_type))];
+ struct msghdr msg = { 0 };
+ struct cmsghdr *cmsg;
+ struct kvec iov;
+ u8 alert[2];
+ int ret;
+
+ trace_tls_alert_send(sock->sk, level, description);
+
+ alert[0] = level;
+ alert[1] = description;
+ iov.iov_base = alert;
+ iov.iov_len = sizeof(alert);
+
+ memset(buf, 0, sizeof(buf));
+ msg.msg_control = buf;
+ msg.msg_controllen = sizeof(buf);
+ msg.msg_flags = MSG_DONTWAIT;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_TLS;
+ cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(record_type));
+ memcpy(CMSG_DATA(cmsg), &record_type, sizeof(record_type));
+
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, iov.iov_len);
+ ret = sock_sendmsg(sock, &msg);
+ return ret < 0 ? ret : 0;
+}
+
+/**
+ * tls_get_record_type - Look for TLS RECORD_TYPE information
+ * @sk: socket (for IP address information)
+ * @cmsg: incoming message to be parsed
+ *
+ * Returns zero or a TLS_RECORD_TYPE value.
+ */
+u8 tls_get_record_type(const struct sock *sk, const struct cmsghdr *cmsg)
+{
+ u8 record_type;
+
+ if (cmsg->cmsg_level != SOL_TLS)
+ return 0;
+ if (cmsg->cmsg_type != TLS_GET_RECORD_TYPE)
+ return 0;
+
+ record_type = *((u8 *)CMSG_DATA(cmsg));
+ trace_tls_contenttype(sk, record_type);
+ return record_type;
+}
+EXPORT_SYMBOL(tls_get_record_type);
+
+/**
+ * tls_alert_recv - Parse TLS Alert messages
+ * @sk: socket (for IP address information)
+ * @msg: incoming message to be parsed
+ * @level: OUT - TLS AlertLevel value
+ * @description: OUT - TLS AlertDescription value
+ *
+ */
+void tls_alert_recv(const struct sock *sk, const struct msghdr *msg,
+ u8 *level, u8 *description)
+{
+ const struct kvec *iov;
+ u8 *data;
+
+ iov = msg->msg_iter.kvec;
+ data = iov->iov_base;
+ *level = data[0];
+ *description = data[1];
+
+ trace_tls_alert_recv(sk, *level, *description);
+}
+EXPORT_SYMBOL(tls_alert_recv);
diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h
index 4dac965c99df..a48163765a7a 100644
--- a/net/handshake/handshake.h
+++ b/net/handshake/handshake.h
@@ -41,8 +41,11 @@ struct handshake_req {
enum hr_flags_bits {
HANDSHAKE_F_REQ_COMPLETED,
+ HANDSHAKE_F_REQ_SESSION,
};
+struct genl_info;
+
/* Invariants for all handshake requests for one transport layer
* security protocol
*/
@@ -63,6 +66,9 @@ enum hp_flags_bits {
HANDSHAKE_F_PROTO_NOTIFY,
};
+/* alert.c */
+int tls_alert_send(struct socket *sock, u8 level, u8 description);
+
/* netlink.c */
int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
gfp_t flags);
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index b735f5cced2f..bbfb4095ddd6 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -18,6 +18,7 @@
#include <net/sock.h>
#include <net/handshake.h>
#include <net/genetlink.h>
+#include <net/tls_prot.h>
#include <uapi/linux/keyctl.h>
#include <uapi/linux/handshake.h>
@@ -100,6 +101,9 @@ static void tls_handshake_done(struct handshake_req *req,
if (info)
tls_handshake_remote_peerids(treq, info);
+ if (!status)
+ set_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags);
+
treq->th_consumer_done(treq->th_consumer_data, -status,
treq->th_peerid[0]);
}
@@ -424,3 +428,22 @@ bool tls_handshake_cancel(struct sock *sk)
return handshake_req_cancel(sk);
}
EXPORT_SYMBOL(tls_handshake_cancel);
+
+/**
+ * tls_handshake_close - send a Closure alert
+ * @sock: an open socket
+ *
+ */
+void tls_handshake_close(struct socket *sock)
+{
+ struct handshake_req *req;
+
+ req = handshake_req_hash_lookup(sock->sk);
+ if (!req)
+ return;
+ if (!test_and_clear_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags))
+ return;
+ tls_alert_send(sock, TLS_ALERT_LEVEL_WARNING,
+ TLS_ALERT_DESC_CLOSE_NOTIFY);
+}
+EXPORT_SYMBOL(tls_handshake_close);
diff --git a/net/handshake/trace.c b/net/handshake/trace.c
index 1c4d8e27e17a..44432d0857b9 100644
--- a/net/handshake/trace.c
+++ b/net/handshake/trace.c
@@ -8,8 +8,10 @@
*/
#include <linux/types.h>
+#include <linux/ipv6.h>
#include <net/sock.h>
+#include <net/inet_sock.h>
#include <net/netlink.h>
#include <net/genetlink.h>
diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h
index 501552d9753b..8c99e64e1cea 100644
--- a/net/hsr/hsr_netlink.h
+++ b/net/hsr/hsr_netlink.h
@@ -23,7 +23,5 @@ void __exit hsr_netlink_exit(void);
void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN],
struct hsr_port *port);
void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN]);
-void hsr_nl_framedrop(int dropcount, int dev_idx);
-void hsr_nl_linkdown(int dev_idx);
#endif /* __HSR_NETLINK_H */
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index d610c1886160..1a265a421308 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -262,7 +262,7 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
*wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
- info->attrs);
+ info->info.attrs);
if (IS_ERR(*wpan_dev)) {
err = PTR_ERR(*wpan_dev);
goto out_unlock;
@@ -570,7 +570,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct nl802154_dump_wpan_phy_state *state)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- struct nlattr **tb = info->attrs;
+ struct nlattr **tb = info->info.attrs;
if (tb[NL802154_ATTR_WPAN_PHY])
state->filter_wpan_phy = nla_get_u32(tb[NL802154_ATTR_WPAN_PHY]);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 02736b83c303..3d2e30e20473 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -187,24 +187,13 @@ static int inet_autobind(struct sock *sk)
return 0;
}
-/*
- * Move a socket into listening state.
- */
-int inet_listen(struct socket *sock, int backlog)
+int __inet_listen_sk(struct sock *sk, int backlog)
{
- struct sock *sk = sock->sk;
- unsigned char old_state;
+ unsigned char old_state = sk->sk_state;
int err, tcp_fastopen;
- lock_sock(sk);
-
- err = -EINVAL;
- if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
- goto out;
-
- old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
- goto out;
+ return -EINVAL;
WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
@@ -227,10 +216,27 @@ int inet_listen(struct socket *sock, int backlog)
err = inet_csk_listen_start(sk);
if (err)
- goto out;
+ return err;
+
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
}
- err = 0;
+ return 0;
+}
+
+/*
+ * Move a socket into listening state.
+ */
+int inet_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+ int err = -EINVAL;
+
+ lock_sock(sk);
+
+ if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
+ goto out;
+
+ err = __inet_listen_sk(sk, backlog);
out:
release_sock(sk);
@@ -325,14 +331,14 @@ lookup_protocol:
sk->sk_reuse = SK_CAN_REUSE;
inet = inet_sk(sk);
- inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
+ inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
- inet->nodefrag = 0;
+ inet_clear_bit(NODEFRAG, sk);
if (SOCK_RAW == sock->type) {
inet->inet_num = protocol;
if (IPPROTO_RAW == protocol)
- inet->hdrincl = 1;
+ inet_set_bit(HDRINCL, sk);
}
if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
@@ -350,9 +356,9 @@ lookup_protocol:
sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
inet->uc_ttl = -1;
- inet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, sk);
inet->mc_ttl = 1;
- inet->mc_all = 1;
+ inet_set_bit(MC_ALL, sk);
inet->mc_index = 0;
inet->mc_list = NULL;
inet->rcv_tos = 0;
@@ -431,9 +437,8 @@ int inet_release(struct socket *sock)
}
EXPORT_SYMBOL(inet_release);
-int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
int err;
@@ -454,6 +459,11 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return __inet_bind(sk, uaddr, addr_len, flags);
}
+
+int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ return inet_bind_sk(sock->sk, uaddr, addr_len);
+}
EXPORT_SYMBOL(inet_bind);
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
@@ -519,7 +529,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
- if (snum || !(inet->bind_address_no_port ||
+ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
err = sk->sk_prot->get_port(sk, snum);
if (err) {
@@ -646,7 +656,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = -EISCONN;
goto out;
case SS_CONNECTING:
- if (inet_sk(sk)->defer_connect)
+ if (inet_test_bit(DEFER_CONNECT, sk))
err = is_sendmsg ? -EINPROGRESS : -EISCONN;
else
err = -EALREADY;
@@ -669,7 +679,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTING;
- if (!err && inet_sk(sk)->defer_connect)
+ if (!err && inet_test_bit(DEFER_CONNECT, sk))
goto out;
/* Just entered SS_CONNECTING state; the only
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 4406d796cc2f..39dcccf0f174 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -51,8 +51,6 @@ static bool is_unsupported(u32 member_offset)
return false;
}
-extern struct btf *btf_vmlinux;
-
static bool bpf_tcp_ca_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 79ae7204e8ed..d048aa833293 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1881,7 +1881,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
old = rcu_dereference_protected(sk_inet->inet_opt,
lockdep_sock_is_held(sk));
- if (sk_inet->is_icsk) {
+ if (inet_test_bit(IS_ICSK, sk)) {
sk_conn = inet_csk(sk);
if (old)
sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
@@ -2051,7 +2051,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
sk_inet = inet_sk(sk);
hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
- if (sk_inet->is_icsk && hdr_delta > 0) {
+ if (inet_test_bit(IS_ICSK, sk) && hdr_delta > 0) {
struct inet_connection_sock *sk_conn = inet_csk(sk);
sk_conn->icsk_ext_hdr_len -= hdr_delta;
sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5deac0517ef7..c3658b8755bc 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -509,6 +509,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
return -EEXIST;
}
if (ifa1->ifa_scope != ifa->ifa_scope) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
inet_free_ifa(ifa);
return -EINVAL;
}
@@ -664,6 +665,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
in_dev = inetdev_by_index(net, ifm->ifa_index);
if (!in_dev) {
+ NL_SET_ERR_MSG(extack, "ipv4: Device not found");
err = -ENODEV;
goto errout;
}
@@ -688,6 +690,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+ NL_SET_ERR_MSG(extack, "ipv4: Address not found");
err = -EADDRNOTAVAIL;
errout:
return err;
@@ -839,13 +842,23 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
err = -EINVAL;
- if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
+
+ if (ifm->ifa_prefixlen > 32) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
+ goto errout;
+ }
+
+ if (!tb[IFA_LOCAL]) {
+ NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
goto errout;
+ }
dev = __dev_get_by_index(net, ifm->ifa_index);
err = -ENODEV;
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "ipv4: Device not found");
goto errout;
+ }
in_dev = __in_dev_get_rtnl(dev);
err = -ENOBUFS;
@@ -897,6 +910,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ci = nla_data(tb[IFA_CACHEINFO]);
if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+ NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
err = -EINVAL;
goto errout_free;
}
@@ -954,6 +968,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
int ret = ip_mc_autojoin_config(net, true, ifa);
if (ret < 0) {
+ NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
inet_free_ifa(ifa);
return ret;
}
@@ -967,8 +982,10 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
inet_free_ifa(ifa);
if (nlh->nlmsg_flags & NLM_F_EXCL ||
- !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
return -EEXIST;
+ }
ifa = ifa_existing;
if (ifa->ifa_rt_priority != new_metric) {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 48ff5f13e797..0c9e768e5628 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2658,7 +2658,7 @@ int ip_mc_sf_allow(const struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
(sdif && pmc->multi.imr_ifindex == sdif)))
break;
}
- ret = inet->mc_all;
+ ret = inet_test_bit(MC_ALL, sk);
if (!pmc)
goto unlock;
psl = rcu_dereference(pmc->sflist);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f7426926a104..e13a84433413 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -182,17 +182,17 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
r->idiag_inode = sock_i_ino(sk);
memset(&inet_sockopt, 0, sizeof(inet_sockopt));
- inet_sockopt.recverr = inet->recverr;
- inet_sockopt.is_icsk = inet->is_icsk;
- inet_sockopt.freebind = inet->freebind;
- inet_sockopt.hdrincl = inet->hdrincl;
- inet_sockopt.mc_loop = inet->mc_loop;
- inet_sockopt.transparent = inet->transparent;
- inet_sockopt.mc_all = inet->mc_all;
- inet_sockopt.nodefrag = inet->nodefrag;
- inet_sockopt.bind_address_no_port = inet->bind_address_no_port;
- inet_sockopt.recverr_rfc4884 = inet->recverr_rfc4884;
- inet_sockopt.defer_connect = inet->defer_connect;
+ inet_sockopt.recverr = inet_test_bit(RECVERR, sk);
+ inet_sockopt.is_icsk = inet_test_bit(IS_ICSK, sk);
+ inet_sockopt.freebind = inet_test_bit(FREEBIND, sk);
+ inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk);
+ inet_sockopt.mc_loop = inet_test_bit(MC_LOOP, sk);
+ inet_sockopt.transparent = inet_test_bit(TRANSPARENT, sk);
+ inet_sockopt.mc_all = inet_test_bit(MC_ALL, sk);
+ inet_sockopt.nodefrag = inet_test_bit(NODEFRAG, sk);
+ inet_sockopt.bind_address_no_port = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
+ inet_sockopt.recverr_rfc4884 = inet_test_bit(RECVERR_RFC4884, sk);
+ inet_sockopt.defer_connect = inet_test_bit(DEFER_CONNECT, sk);
if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt),
&inet_sockopt))
goto errout;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 0819d6001b9a..7876b7d703cb 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -28,9 +28,9 @@
#include <net/tcp.h>
#include <net/sock_reuseport.h>
-static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
- const __u16 lport, const __be32 faddr,
- const __be16 fport)
+u32 inet_ehashfn(const struct net *net, const __be32 laddr,
+ const __u16 lport, const __be32 faddr,
+ const __be16 fport)
{
static u32 inet_ehash_secret __read_mostly;
@@ -39,6 +39,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
return __inet_ehashfn(laddr, lport, faddr, fport,
inet_ehash_secret + net_hash_mix(net));
}
+EXPORT_SYMBOL_GPL(inet_ehashfn);
/* This function handles inet_sock, but also timewait and request sockets
* for IPv4/IPv6.
@@ -332,20 +333,38 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
-static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb, int doff,
- __be32 saddr, __be16 sport,
- __be32 daddr, unsigned short hnum)
+/**
+ * inet_lookup_reuseport() - execute reuseport logic on AF_INET socket if necessary.
+ * @net: network namespace.
+ * @sk: AF_INET socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP.
+ * @skb: context for a potential SK_REUSEPORT program.
+ * @doff: header offset.
+ * @saddr: source address.
+ * @sport: source port.
+ * @daddr: destination address.
+ * @hnum: destination port in host byte order.
+ * @ehashfn: hash function used to generate the fallback hash.
+ *
+ * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
+ * the selected sock or an error.
+ */
+struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum,
+ inet_ehashfn_t *ehashfn)
{
struct sock *reuse_sk = NULL;
u32 phash;
if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ phash = INDIRECT_CALL_2(ehashfn, udp_ehashfn, inet_ehashfn,
+ net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
}
return reuse_sk;
}
+EXPORT_SYMBOL_GPL(inet_lookup_reuseport);
/*
* Here are some nice properties to exploit here. The BSD API
@@ -369,8 +388,8 @@ static struct sock *inet_lhash2_lookup(struct net *net,
sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
- result = lookup_reuseport(net, sk, skb, doff,
- saddr, sport, daddr, hnum);
+ result = inet_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, inet_ehashfn);
if (result)
return result;
@@ -382,24 +401,23 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
-static inline struct sock *inet_lookup_run_bpf(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- __be32 saddr, __be16 sport,
- __be32 daddr, u16 hnum, const int dif)
+struct sock *inet_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum, const int dif,
+ inet_ehashfn_t *ehashfn)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
- return NULL; /* only TCP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport,
+ no_reuseport = bpf_sk_lookup_run_v4(net, protocol, saddr, sport,
daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
- reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ reuse_sk = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum,
+ ehashfn);
if (reuse_sk)
sk = reuse_sk;
return sk;
@@ -417,9 +435,11 @@ struct sock *__inet_lookup_listener(struct net *net,
unsigned int hash2;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ hashinfo == net->ipv4.tcp_death_row.hashinfo) {
+ result = inet_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff,
+ saddr, sport, daddr, hnum, dif,
+ inet_ehashfn);
if (result)
goto done;
}
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 2c1b245dba8e..dd37a5bf6881 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -203,7 +203,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
tw->tw_reuseport = sk->sk_reuseport;
tw->tw_hash = sk->sk_hash;
tw->tw_ipv6only = 0;
- tw->tw_transparent = inet->transparent;
+ tw->tw_transparent = inet_test_bit(TRANSPARENT, sk);
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6ba1a0fafbaa..ce6257860a40 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(ip_local_out);
static inline int ip_select_ttl(const struct inet_sock *inet,
const struct dst_entry *dst)
{
- int ttl = inet->uc_ttl;
+ int ttl = READ_ONCE(inet->uc_ttl);
if (ttl < 0)
ttl = ip4_dst_hoplimit(dst);
@@ -236,7 +236,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
__func__);
kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
- return -EINVAL;
+ return PTR_ERR(neigh);
}
static int ip_finish_output_gso(struct net *net, struct sock *sk,
@@ -1039,7 +1039,7 @@ static int __ip_append_data(struct sock *sk,
}
}
} else if ((flags & MSG_SPLICE_PAGES) && length) {
- if (inet->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
return -EPERM;
if (rt->dst.dev->features & NETIF_F_SG &&
getfrag == ip_generic_getfrag)
@@ -1467,7 +1467,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
* so icmphdr does not in skb linear region and can not get icmp_type
* by icmp_hdr(skb)->type.
*/
- if (sk->sk_type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+ if (sk->sk_type == SOCK_RAW &&
+ !inet_test_bit(HDRINCL, sk))
icmp_type = fl4->fl4_icmp_type;
else
icmp_type = icmp_hdr(skb)->type;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d41bce8927b2..54ad0f0d5c2d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -171,8 +171,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, int tlen, int offset)
{
- struct inet_sock *inet = inet_sk(sk);
- unsigned int flags = inet->cmsg_flags;
+ unsigned long flags = inet_cmsg_flags(inet_sk(sk));
+
+ if (!flags)
+ return;
/* Ordered by supposed usage frequency */
if (flags & IP_CMSG_PKTINFO) {
@@ -431,7 +433,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr->port = port;
if (skb_pull(skb, payload - skb->data)) {
- if (inet_sk(sk)->recverr_rfc4884)
+ if (inet_test_bit(RECVERR_RFC4884, sk))
ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
skb_reset_transport_header(skb);
@@ -444,12 +446,11 @@ EXPORT_SYMBOL_GPL(ip_icmp_error);
void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
{
- struct inet_sock *inet = inet_sk(sk);
struct sock_exterr_skb *serr;
struct iphdr *iph;
struct sk_buff *skb;
- if (!inet->recverr)
+ if (!inet_test_bit(RECVERR, sk))
return;
skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
@@ -568,7 +569,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
- if (inet_sk(sk)->cmsg_flags)
+ if (inet_cmsg_flags(inet_sk(sk)))
ip_cmsg_recv(msg, skb);
}
@@ -607,17 +608,13 @@ EXPORT_SYMBOL(ip_sock_set_tos);
void ip_sock_set_freebind(struct sock *sk)
{
- lock_sock(sk);
- inet_sk(sk)->freebind = true;
- release_sock(sk);
+ inet_set_bit(FREEBIND, sk);
}
EXPORT_SYMBOL(ip_sock_set_freebind);
void ip_sock_set_recverr(struct sock *sk)
{
- lock_sock(sk);
- inet_sk(sk)->recverr = true;
- release_sock(sk);
+ inet_set_bit(RECVERR, sk);
}
EXPORT_SYMBOL(ip_sock_set_recverr);
@@ -634,9 +631,7 @@ EXPORT_SYMBOL(ip_sock_set_mtu_discover);
void ip_sock_set_pktinfo(struct sock *sk)
{
- lock_sock(sk);
- inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
- release_sock(sk);
+ inet_set_bit(PKTINFO, sk);
}
EXPORT_SYMBOL(ip_sock_set_pktinfo);
@@ -950,6 +945,104 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk, optname, optval, optlen);
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case IP_PKTINFO:
+ inet_assign_bit(PKTINFO, sk, val);
+ return 0;
+ case IP_RECVTTL:
+ inet_assign_bit(TTL, sk, val);
+ return 0;
+ case IP_RECVTOS:
+ inet_assign_bit(TOS, sk, val);
+ return 0;
+ case IP_RECVOPTS:
+ inet_assign_bit(RECVOPTS, sk, val);
+ return 0;
+ case IP_RETOPTS:
+ inet_assign_bit(RETOPTS, sk, val);
+ return 0;
+ case IP_PASSSEC:
+ inet_assign_bit(PASSSEC, sk, val);
+ return 0;
+ case IP_RECVORIGDSTADDR:
+ inet_assign_bit(ORIGDSTADDR, sk, val);
+ return 0;
+ case IP_RECVFRAGSIZE:
+ if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
+ return -EINVAL;
+ inet_assign_bit(RECVFRAGSIZE, sk, val);
+ return 0;
+ case IP_RECVERR:
+ inet_assign_bit(RECVERR, sk, val);
+ if (!val)
+ skb_errqueue_purge(&sk->sk_error_queue);
+ return 0;
+ case IP_RECVERR_RFC4884:
+ if (val < 0 || val > 1)
+ return -EINVAL;
+ inet_assign_bit(RECVERR_RFC4884, sk, val);
+ return 0;
+ case IP_FREEBIND:
+ if (optlen < 1)
+ return -EINVAL;
+ inet_assign_bit(FREEBIND, sk, val);
+ return 0;
+ case IP_HDRINCL:
+ if (sk->sk_type != SOCK_RAW)
+ return -ENOPROTOOPT;
+ inet_assign_bit(HDRINCL, sk, val);
+ return 0;
+ case IP_MULTICAST_LOOP:
+ if (optlen < 1)
+ return -EINVAL;
+ inet_assign_bit(MC_LOOP, sk, val);
+ return 0;
+ case IP_MULTICAST_ALL:
+ if (optlen < 1)
+ return -EINVAL;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ inet_assign_bit(MC_ALL, sk, val);
+ return 0;
+ case IP_TRANSPARENT:
+ if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ err = -EPERM;
+ break;
+ }
+ if (optlen < 1)
+ goto e_inval;
+ inet_assign_bit(TRANSPARENT, sk, val);
+ return 0;
+ case IP_NODEFRAG:
+ if (sk->sk_type != SOCK_RAW)
+ return -ENOPROTOOPT;
+ inet_assign_bit(NODEFRAG, sk, val);
+ return 0;
+ case IP_BIND_ADDRESS_NO_PORT:
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, sk, val);
+ return 0;
+ case IP_TTL:
+ if (optlen < 1)
+ return -EINVAL;
+ if (val != -1 && (val < 1 || val > 255))
+ return -EINVAL;
+ WRITE_ONCE(inet->uc_ttl, val);
+ return 0;
+ case IP_MINTTL:
+ if (optlen < 1)
+ return -EINVAL;
+ if (val < 0 || val > 255)
+ return -EINVAL;
+
+ if (val)
+ static_branch_enable(&ip4_min_ttl);
+
+ WRITE_ONCE(inet->min_ttl, val);
+ return 0;
+ }
+
err = 0;
if (needs_rtnl)
rtnl_lock();
@@ -967,7 +1060,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
break;
old = rcu_dereference_protected(inet->inet_opt,
lockdep_sock_is_held(sk));
- if (inet->is_icsk) {
+ if (inet_test_bit(IS_ICSK, sk)) {
struct inet_connection_sock *icsk = inet_csk(sk);
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET ||
@@ -989,111 +1082,27 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
kfree_rcu(old, rcu);
break;
}
- case IP_PKTINFO:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PKTINFO;
- else
- inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
- break;
- case IP_RECVTTL:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TTL;
- else
- inet->cmsg_flags &= ~IP_CMSG_TTL;
- break;
- case IP_RECVTOS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TOS;
- else
- inet->cmsg_flags &= ~IP_CMSG_TOS;
- break;
- case IP_RECVOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RECVOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
- break;
- case IP_RETOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RETOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
- break;
- case IP_PASSSEC:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PASSSEC;
- else
- inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
- break;
- case IP_RECVORIGDSTADDR:
- if (val)
- inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
- else
- inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
- break;
case IP_CHECKSUM:
if (val) {
- if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
+ if (!(inet_test_bit(CHECKSUM, sk))) {
inet_inc_convert_csum(sk);
- inet->cmsg_flags |= IP_CMSG_CHECKSUM;
+ inet_set_bit(CHECKSUM, sk);
}
} else {
- if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
+ if (inet_test_bit(CHECKSUM, sk)) {
inet_dec_convert_csum(sk);
- inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
+ inet_clear_bit(CHECKSUM, sk);
}
}
break;
- case IP_RECVFRAGSIZE:
- if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
- goto e_inval;
- if (val)
- inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
- else
- inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
- break;
case IP_TOS: /* This sets both TOS and Precedence */
__ip_sock_set_tos(sk, val);
break;
- case IP_TTL:
- if (optlen < 1)
- goto e_inval;
- if (val != -1 && (val < 1 || val > 255))
- goto e_inval;
- inet->uc_ttl = val;
- break;
- case IP_HDRINCL:
- if (sk->sk_type != SOCK_RAW) {
- err = -ENOPROTOOPT;
- break;
- }
- inet->hdrincl = val ? 1 : 0;
- break;
- case IP_NODEFRAG:
- if (sk->sk_type != SOCK_RAW) {
- err = -ENOPROTOOPT;
- break;
- }
- inet->nodefrag = val ? 1 : 0;
- break;
- case IP_BIND_ADDRESS_NO_PORT:
- inet->bind_address_no_port = val ? 1 : 0;
- break;
case IP_MTU_DISCOVER:
if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval;
inet->pmtudisc = val;
break;
- case IP_RECVERR:
- inet->recverr = !!val;
- if (!val)
- skb_queue_purge(&sk->sk_error_queue);
- break;
- case IP_RECVERR_RFC4884:
- if (val < 0 || val > 1)
- goto e_inval;
- inet->recverr_rfc4884 = !!val;
- break;
case IP_MULTICAST_TTL:
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
@@ -1105,11 +1114,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
goto e_inval;
inet->mc_ttl = val;
break;
- case IP_MULTICAST_LOOP:
- if (optlen < 1)
- goto e_inval;
- inet->mc_loop = !!val;
- break;
case IP_UNICAST_IF:
{
struct net_device *dev = NULL;
@@ -1214,7 +1218,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
struct ip_mreqn mreq;
err = -EPROTO;
- if (inet_sk(sk)->is_icsk)
+ if (inet_test_bit(IS_ICSK, sk))
break;
if (optlen < sizeof(struct ip_mreq))
@@ -1325,20 +1329,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
else
err = ip_set_mcast_msfilter(sk, optval, optlen);
break;
- case IP_MULTICAST_ALL:
- if (optlen < 1)
- goto e_inval;
- if (val != 0 && val != 1)
- goto e_inval;
- inet->mc_all = val;
- break;
-
- case IP_FREEBIND:
- if (optlen < 1)
- goto e_inval;
- inet->freebind = !!val;
- break;
-
case IP_IPSEC_POLICY:
case IP_XFRM_POLICY:
err = -EPERM;
@@ -1347,32 +1337,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
- case IP_TRANSPARENT:
- if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
- err = -EPERM;
- break;
- }
- if (optlen < 1)
- goto e_inval;
- inet->transparent = !!val;
- break;
-
- case IP_MINTTL:
- if (optlen < 1)
- goto e_inval;
- if (val < 0 || val > 255)
- goto e_inval;
-
- if (val)
- static_branch_enable(&ip4_min_ttl);
-
- /* tcp_v4_err() and tcp_v4_rcv() might read min_ttl
- * while we are changint it.
- */
- WRITE_ONCE(inet->min_ttl, val);
- break;
-
case IP_LOCAL_PORT_RANGE:
{
const __u16 lo = val;
@@ -1415,7 +1379,7 @@ e_inval:
void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
{
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
- bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
+ bool prepare = inet_test_bit(PKTINFO, sk) ||
ipv6_sk_rxinfo(sk);
if (prepare && skb_rtable(skb)) {
@@ -1566,6 +1530,72 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (len < 0)
return -EINVAL;
+ /* Handle options that can be read without locking the socket. */
+ switch (optname) {
+ case IP_PKTINFO:
+ val = inet_test_bit(PKTINFO, sk);
+ goto copyval;
+ case IP_RECVTTL:
+ val = inet_test_bit(TTL, sk);
+ goto copyval;
+ case IP_RECVTOS:
+ val = inet_test_bit(TOS, sk);
+ goto copyval;
+ case IP_RECVOPTS:
+ val = inet_test_bit(RECVOPTS, sk);
+ goto copyval;
+ case IP_RETOPTS:
+ val = inet_test_bit(RETOPTS, sk);
+ goto copyval;
+ case IP_PASSSEC:
+ val = inet_test_bit(PASSSEC, sk);
+ goto copyval;
+ case IP_RECVORIGDSTADDR:
+ val = inet_test_bit(ORIGDSTADDR, sk);
+ goto copyval;
+ case IP_CHECKSUM:
+ val = inet_test_bit(CHECKSUM, sk);
+ goto copyval;
+ case IP_RECVFRAGSIZE:
+ val = inet_test_bit(RECVFRAGSIZE, sk);
+ goto copyval;
+ case IP_RECVERR:
+ val = inet_test_bit(RECVERR, sk);
+ goto copyval;
+ case IP_RECVERR_RFC4884:
+ val = inet_test_bit(RECVERR_RFC4884, sk);
+ goto copyval;
+ case IP_FREEBIND:
+ val = inet_test_bit(FREEBIND, sk);
+ goto copyval;
+ case IP_HDRINCL:
+ val = inet_test_bit(HDRINCL, sk);
+ goto copyval;
+ case IP_MULTICAST_LOOP:
+ val = inet_test_bit(MC_LOOP, sk);
+ goto copyval;
+ case IP_MULTICAST_ALL:
+ val = inet_test_bit(MC_ALL, sk);
+ goto copyval;
+ case IP_TRANSPARENT:
+ val = inet_test_bit(TRANSPARENT, sk);
+ goto copyval;
+ case IP_NODEFRAG:
+ val = inet_test_bit(NODEFRAG, sk);
+ goto copyval;
+ case IP_BIND_ADDRESS_NO_PORT:
+ val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
+ goto copyval;
+ case IP_TTL:
+ val = READ_ONCE(inet->uc_ttl);
+ if (val < 0)
+ val = READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_default_ttl);
+ goto copyval;
+ case IP_MINTTL:
+ val = READ_ONCE(inet->min_ttl);
+ goto copyval;
+ }
+
if (needs_rtnl)
rtnl_lock();
sockopt_lock_sock(sk);
@@ -1600,53 +1630,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
- case IP_PKTINFO:
- val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
- break;
- case IP_RECVTTL:
- val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
- break;
- case IP_RECVTOS:
- val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
- break;
- case IP_RECVOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
- break;
- case IP_RETOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
- break;
- case IP_PASSSEC:
- val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
- break;
- case IP_RECVORIGDSTADDR:
- val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
- break;
- case IP_CHECKSUM:
- val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
- break;
- case IP_RECVFRAGSIZE:
- val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
- break;
case IP_TOS:
val = inet->tos;
break;
- case IP_TTL:
- {
- struct net *net = sock_net(sk);
- val = (inet->uc_ttl == -1 ?
- READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
- inet->uc_ttl);
- break;
- }
- case IP_HDRINCL:
- val = inet->hdrincl;
- break;
- case IP_NODEFRAG:
- val = inet->nodefrag;
- break;
- case IP_BIND_ADDRESS_NO_PORT:
- val = inet->bind_address_no_port;
- break;
case IP_MTU_DISCOVER:
val = inet->pmtudisc;
break;
@@ -1665,18 +1651,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
}
break;
}
- case IP_RECVERR:
- val = inet->recverr;
- break;
- case IP_RECVERR_RFC4884:
- val = inet->recverr_rfc4884;
- break;
case IP_MULTICAST_TTL:
val = inet->mc_ttl;
break;
- case IP_MULTICAST_LOOP:
- val = inet->mc_loop;
- break;
case IP_UNICAST_IF:
val = (__force int)htonl((__u32) inet->uc_index);
break;
@@ -1715,9 +1692,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
else
err = ip_get_mcast_msfilter(sk, optval, optlen, len);
goto out;
- case IP_MULTICAST_ALL:
- val = inet->mc_all;
- break;
case IP_PKTOPTIONS:
{
struct msghdr msg;
@@ -1737,7 +1711,7 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
msg.msg_controllen = len;
msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
- if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
+ if (inet_test_bit(PKTINFO, sk)) {
struct in_pktinfo info;
info.ipi_addr.s_addr = inet->inet_rcv_saddr;
@@ -1745,26 +1719,17 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
info.ipi_ifindex = inet->mc_index;
put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
- if (inet->cmsg_flags & IP_CMSG_TTL) {
+ if (inet_test_bit(TTL, sk)) {
int hlim = inet->mc_ttl;
put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
}
- if (inet->cmsg_flags & IP_CMSG_TOS) {
+ if (inet_test_bit(TOS, sk)) {
int tos = inet->rcv_tos;
put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
}
len -= msg.msg_controllen;
return copy_to_sockptr(optlen, &len, sizeof(int));
}
- case IP_FREEBIND:
- val = inet->freebind;
- break;
- case IP_TRANSPARENT:
- val = inet->transparent;
- break;
- case IP_MINTTL:
- val = inet->min_ttl;
- break;
case IP_LOCAL_PORT_RANGE:
val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
break;
@@ -1776,7 +1741,7 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -ENOPROTOOPT;
}
sockopt_release_sock(sk);
-
+copyval:
if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
unsigned char ucval = (unsigned char)val;
len = 1;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index e61ea428ea18..265b39bc435b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -7,6 +7,7 @@
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/module.h>
+#include <linux/rcupdate.h>
#include <linux/skbuff.h>
#include <net/netns/generic.h>
#include <net/route.h>
@@ -65,7 +66,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
struct sock *sk = skb->sk;
if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
- inet_sk(sk)->nodefrag)
+ inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -113,17 +114,31 @@ static void __net_exit defrag4_net_exit(struct net *net)
}
}
+static const struct nf_defrag_hook defrag_hook = {
+ .owner = THIS_MODULE,
+ .enable = nf_defrag_ipv4_enable,
+ .disable = nf_defrag_ipv4_disable,
+};
+
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
};
static int __init nf_defrag_init(void)
{
- return register_pernet_subsys(&defrag4_net_ops);
+ int err;
+
+ err = register_pernet_subsys(&defrag4_net_ops);
+ if (err)
+ return err;
+
+ rcu_assign_pointer(nf_defrag_v4_hook, &defrag_hook);
+ return err;
}
static void __exit nf_defrag_fini(void)
{
+ rcu_assign_pointer(nf_defrag_v4_hook, NULL);
unregister_pernet_subsys(&defrag4_net_ops);
}
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index be5498f5dd31..bbff68b5b5d4 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
return !!(state & NUD_VALID);
}
-static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+static bool nexthop_is_good_nh(const struct nexthop *nh)
+{
+ struct nh_info *nhi = rcu_dereference(nh->nh_info);
+
+ switch (nhi->family) {
+ case AF_INET:
+ return ipv4_good_nh(&nhi->fib_nh);
+ case AF_INET6:
+ return ipv6_good_nh(&nhi->fib6_nh);
+ }
+
+ return false;
+}
+
+static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
{
- struct nexthop *rc = NULL;
int i;
- for (i = 0; i < nhg->num_nh; ++i) {
+ for (i = 0; i < nhg->num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- struct nh_info *nhi;
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
- nhi = rcu_dereference(nhge->nh->nh_info);
- if (nhi->fdb_nh)
- return nhge->nh;
+ return nhge->nh;
+ }
+
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+{
+ struct nexthop *rc = NULL;
+ int i;
+
+ if (nhg->fdb_nh)
+ return nexthop_select_path_fdb(nhg, hash);
+
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
- switch (nhi->family) {
- case AF_INET:
- if (ipv4_good_nh(&nhi->fib_nh))
- return nhge->nh;
- break;
- case AF_INET6:
- if (ipv6_good_nh(&nhi->fib6_nh))
- return nhge->nh;
- break;
- }
+ if (!nexthop_is_good_nh(nhge->nh))
+ continue;
if (!rc)
rc = nhge->nh;
+
+ if (hash > atomic_read(&nhge->hthr.upper_bound))
+ continue;
+
+ return nhge->nh;
}
- return rc;
+ return rc ? : nhg->nh_entries[0].nh;
}
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
@@ -3186,7 +3209,6 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb,
return err;
}
- ctx->idx++;
return 0;
}
@@ -3314,7 +3336,6 @@ static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
struct rtm_dump_res_bucket_ctx {
struct rtm_dump_nh_ctx nh;
u16 bucket_index;
- u32 done_nh_idx; /* 1 + the index of the last fully processed NH. */
};
static struct rtm_dump_res_bucket_ctx *
@@ -3343,9 +3364,6 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
u16 bucket_index;
int err;
- if (dd->ctx->nh.idx < dd->ctx->done_nh_idx)
- return 0;
-
nhg = rtnl_dereference(nh->nh_grp);
res_table = rtnl_dereference(nhg->res_table);
for (bucket_index = dd->ctx->bucket_index;
@@ -3372,7 +3390,6 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
return err;
}
- dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
dd->ctx->bucket_index = 0;
return 0;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 25dd78cee179..75e0aee35eb7 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -580,7 +580,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
- if ((family == AF_INET && !inet_sock->recverr) ||
+ if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) ||
(family == AF_INET6 && !inet6_sk(sk)->recverr)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
@@ -894,7 +894,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
*addr_len = sizeof(*sin);
}
- if (isk->cmsg_flags)
+ if (inet_cmsg_flags(isk))
ip_cmsg_recv(msg, skb);
#if IS_ENABLED(CONFIG_IPV6)
@@ -921,7 +921,8 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
if (skb->protocol == htons(ETH_P_IPV6) &&
inet6_sk(sk)->rxopt.all)
pingv6_ops.ip6_datagram_recv_specific_ctl(sk, msg, skb);
- else if (skb->protocol == htons(ETH_P_IP) && isk->cmsg_flags)
+ else if (skb->protocol == htons(ETH_P_IP) &&
+ inet_cmsg_flags(isk))
ip_cmsg_recv(msg, skb);
#endif
} else {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index cb381f5aa464..4b5db5d1edc2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -203,8 +203,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
struct inet_sock *inet = inet_sk(sk);
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- int err = 0;
int harderr = 0;
+ bool recverr;
+ int err = 0;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_sk_update_pmtu(skb, sk, info);
@@ -218,7 +219,8 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
2. Socket is connected (otherwise the error indication
is useless without ip_recverr and error is hard.
*/
- if (!inet->recverr && sk->sk_state != TCP_ESTABLISHED)
+ recverr = inet_test_bit(RECVERR, sk);
+ if (!recverr && sk->sk_state != TCP_ESTABLISHED)
return;
switch (type) {
@@ -245,16 +247,16 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
}
}
- if (inet->recverr) {
+ if (recverr) {
const struct iphdr *iph = (const struct iphdr *)skb->data;
u8 *payload = skb->data + (iph->ihl << 2);
- if (inet->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
payload = skb->data;
ip_icmp_error(sk, skb, err, 0, info, payload);
}
- if (inet->recverr || harderr) {
+ if (recverr || harderr) {
sk->sk_err = err;
sk_error_report(sk);
}
@@ -413,7 +415,7 @@ error_free:
kfree_skb(skb);
error:
IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
- if (err == -ENOBUFS && !inet->recverr)
+ if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk))
err = 0;
return err;
}
@@ -489,12 +491,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (len > 0xFFFF)
goto out;
- /* hdrincl should be READ_ONCE(inet->hdrincl)
- * but READ_ONCE() doesn't work with bit fields.
- * Doing this indirectly yields the same result.
- */
- hdrincl = inet->hdrincl;
- hdrincl = READ_ONCE(hdrincl);
+ hdrincl = inet_test_bit(HDRINCL, sk);
+
/*
* Check the flags.
*/
@@ -645,7 +643,7 @@ back_from_confirm:
ip_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE)) {
err = ip_push_pending_frames(sk, &fl4);
- if (err == -ENOBUFS && !inet->recverr)
+ if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk))
err = 0;
}
release_sock(sk);
@@ -767,7 +765,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
}
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv(msg, skb);
if (flags & MSG_TRUNC)
copied = skb->len;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92fede388d52..a4e153dd615b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -515,13 +515,12 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
__u8 scope = RT_SCOPE_UNIVERSE;
if (sk) {
- const struct inet_sock *inet = inet_sk(sk);
-
oif = sk->sk_bound_dev_if;
mark = READ_ONCE(sk->sk_mark);
tos = ip_sock_rt_tos(sk);
scope = ip_sock_rt_scope(sk);
- prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
+ prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW :
+ sk->sk_protocol;
}
flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
@@ -555,7 +554,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
ip_sock_rt_scope(sk),
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ inet_test_bit(HDRINCL, sk) ?
+ IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk),
daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
rcu_read_unlock();
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8ed52e1e3c99..cee1e548660c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -457,6 +457,7 @@ void tcp_init_sock(struct sock *sk)
WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+ tcp_scaling_ratio_init(sk);
set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
sk_sockets_allocated_inc(sk);
@@ -582,7 +583,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (urg_data & TCP_URG_VALID)
mask |= EPOLLPRI;
- } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ } else if (state == TCP_SYN_SENT &&
+ inet_test_bit(DEFER_CONNECT, sk)) {
/* Active TCP fastopen socket with defer_connect
* Return EPOLLOUT so application can call write()
* in order for kernel to generate SYN+data
@@ -1006,7 +1008,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
tp->fastopen_req->size = size;
tp->fastopen_req->uarg = uarg;
- if (inet->defer_connect) {
+ if (inet_test_bit(DEFER_CONNECT, sk)) {
err = tcp_connect(sk);
/* Same failure procedure as in tcp_v4/6_connect */
if (err) {
@@ -1024,7 +1026,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
if (tp->fastopen_req) {
*copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
- inet->defer_connect = 0;
+ inet_clear_bit(DEFER_CONNECT, sk);
}
return err;
}
@@ -1065,7 +1067,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
zc = MSG_SPLICE_PAGES;
}
- if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
+ if (unlikely(flags & MSG_FASTOPEN ||
+ inet_test_bit(DEFER_CONNECT, sk)) &&
!tp->repair) {
err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg);
if (err == -EINPROGRESS && copied_syn > 0)
@@ -1700,7 +1703,7 @@ EXPORT_SYMBOL(tcp_peek_len);
/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
int tcp_set_rcvlowat(struct sock *sk, int val)
{
- int cap;
+ int space, cap;
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
@@ -1715,10 +1718,10 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
return 0;
- val <<= 1;
- if (val > sk->sk_rcvbuf) {
- WRITE_ONCE(sk->sk_rcvbuf, val);
- tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
+ space = tcp_space_from_win(sk, val);
+ if (space > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, space);
+ tcp_sk(sk)->window_clamp = val;
}
return 0;
}
@@ -2864,7 +2867,7 @@ adjudge_to_death:
if (sk->sk_state == TCP_FIN_WAIT2) {
struct tcp_sock *tp = tcp_sk(sk);
- if (tp->linger2 < 0) {
+ if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
__NET_INC_STATS(sock_net(sk),
@@ -3087,7 +3090,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* Clean up fastopen related fields */
tcp_free_fastopen_req(tp);
- inet->defer_connect = 0;
+ inet_clear_bit(DEFER_CONNECT, sk);
tp->fastopen_client_fail = 0;
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
@@ -3290,18 +3293,21 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_SYNCNT)
return -EINVAL;
- lock_sock(sk);
WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
- release_sock(sk);
return 0;
}
EXPORT_SYMBOL(tcp_sock_set_syncnt);
-void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+int tcp_sock_set_user_timeout(struct sock *sk, int val)
{
- lock_sock(sk);
+ /* Cap the max time in ms TCP will retry or probe the window
+ * before giving up and aborting (ETIMEDOUT) a connection.
+ */
+ if (val < 0)
+ return -EINVAL;
+
WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
- release_sock(sk);
+ return 0;
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
@@ -3344,9 +3350,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_KEEPINTVL)
return -EINVAL;
- lock_sock(sk);
WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
- release_sock(sk);
return 0;
}
EXPORT_SYMBOL(tcp_sock_set_keepintvl);
@@ -3356,10 +3360,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_KEEPCNT)
return -EINVAL;
- lock_sock(sk);
/* Paired with READ_ONCE() in keepalive_probes() */
WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
- release_sock(sk);
return 0;
}
EXPORT_SYMBOL(tcp_sock_set_keepcnt);
@@ -3461,6 +3463,32 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case TCP_SYNCNT:
+ return tcp_sock_set_syncnt(sk, val);
+ case TCP_USER_TIMEOUT:
+ return tcp_sock_set_user_timeout(sk, val);
+ case TCP_KEEPINTVL:
+ return tcp_sock_set_keepintvl(sk, val);
+ case TCP_KEEPCNT:
+ return tcp_sock_set_keepcnt(sk, val);
+ case TCP_LINGER2:
+ if (val < 0)
+ WRITE_ONCE(tp->linger2, -1);
+ else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
+ else
+ WRITE_ONCE(tp->linger2, val * HZ);
+ return 0;
+ case TCP_DEFER_ACCEPT:
+ /* Translate value in seconds to number of retransmits */
+ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
+ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ));
+ return 0;
+ }
+
sockopt_lock_sock(sk);
switch (optname) {
@@ -3556,25 +3584,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_KEEPIDLE:
err = tcp_sock_set_keepidle_locked(sk, val);
break;
- case TCP_KEEPINTVL:
- if (val < 1 || val > MAX_TCP_KEEPINTVL)
- err = -EINVAL;
- else
- WRITE_ONCE(tp->keepalive_intvl, val * HZ);
- break;
- case TCP_KEEPCNT:
- if (val < 1 || val > MAX_TCP_KEEPCNT)
- err = -EINVAL;
- else
- WRITE_ONCE(tp->keepalive_probes, val);
- break;
- case TCP_SYNCNT:
- if (val < 1 || val > MAX_TCP_SYNCNT)
- err = -EINVAL;
- else
- WRITE_ONCE(icsk->icsk_syn_retries, val);
- break;
-
case TCP_SAVE_SYN:
/* 0: disable, 1: enable, 2: start from ether_header */
if (val < 0 || val > 2)
@@ -3583,22 +3592,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
tp->save_syn = val;
break;
- case TCP_LINGER2:
- if (val < 0)
- WRITE_ONCE(tp->linger2, -1);
- else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
- WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
- else
- WRITE_ONCE(tp->linger2, val * HZ);
- break;
-
- case TCP_DEFER_ACCEPT:
- /* Translate value in seconds to number of retransmits */
- WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
- TCP_RTO_MAX / HZ));
- break;
-
case TCP_WINDOW_CLAMP:
err = tcp_set_window_clamp(sk, val);
break;
@@ -3613,16 +3606,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
break;
#endif
- case TCP_USER_TIMEOUT:
- /* Cap the max time in ms TCP will retry or probe the window
- * before giving up and aborting (ETIMEDOUT) a connection.
- */
- if (val < 0)
- err = -EINVAL;
- else
- WRITE_ONCE(icsk->icsk_user_timeout, val);
- break;
-
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 85e4953f1182..8ed54e7334a9 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -451,7 +451,7 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
if (tp->fastopen_connect && !tp->fastopen_req) {
if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) {
- inet_sk(sk)->defer_connect = 1;
+ inet_set_bit(DEFER_CONNECT, sk);
return true;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 57c8af1859c1..06fe1cf645d5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -237,6 +237,16 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
*/
len = skb_shinfo(skb)->gso_size ? : skb->len;
if (len >= icsk->icsk_ack.rcv_mss) {
+ /* Note: divides are still a bit expensive.
+ * For the moment, only adjust scaling_ratio
+ * when we update icsk_ack.rcv_mss.
+ */
+ if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
+ u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
+
+ do_div(val, skb->truesize);
+ tcp_sk(sk)->scaling_ratio = val ? val : 1;
+ }
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
/* Account for possibly-removed options */
@@ -287,7 +297,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks;
}
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -295,7 +305,6 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
-EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive.
@@ -727,8 +736,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
/* minimal window to cope with packet losses, assuming
* steady state. Add some cushion because of small variations.
@@ -740,12 +749,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
do_div(grow, tp->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
- rcvmem += 128;
-
- do_div(rcvwin, tp->advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
@@ -3521,7 +3525,7 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
{
return after(ack, tp->snd_una) ||
after(ack_seq, tp->snd_wl1) ||
- (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
+ (ack_seq == tp->snd_wl1 && (nwin > tp->snd_wnd || !nwin));
}
/* If we update tp->snd_una, also update tp->bytes_acked */
@@ -4122,9 +4126,8 @@ void tcp_parse_options(const struct net *net,
break;
#ifdef CONFIG_TCP_MD5SIG
case TCPOPT_MD5SIG:
- /*
- * The MD5 Hash has already been
- * checked (see tcp_v{4,6}_do_rcv()).
+ /* The MD5 Hash has already been
+ * checked (see tcp_v{4,6}_rcv()).
*/
break;
#endif
@@ -4308,10 +4311,16 @@ static inline bool tcp_paws_discard(const struct sock *sk,
* (borrowed from freebsd)
*/
-static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
+static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
+ u32 seq, u32 end_seq)
{
- return !before(end_seq, tp->rcv_wup) &&
- !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
+ if (before(end_seq, tp->rcv_wup))
+ return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
+
+ if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
+
+ return SKB_NOT_DROPPED_YET;
}
/* When we get a reset we do this. */
@@ -5050,13 +5059,19 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
/* Ok. In sequence. In window. */
queue_and_out:
- if (skb_queue_len(&sk->sk_receive_queue) == 0)
- sk_forced_mem_schedule(sk, skb->truesize);
- else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
- reason = SKB_DROP_REASON_PROTO_MEM;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
+ if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+ /* TODO: maybe ratelimit these WIN 0 ACK ? */
+ inet_csk(sk)->icsk_ack.pending |=
+ (ICSK_ACK_NOMEM | ICSK_ACK_NOW);
+ inet_csk_schedule_ack(sk);
sk->sk_data_ready(sk);
- goto drop;
+
+ if (skb_queue_len(&sk->sk_receive_queue)) {
+ reason = SKB_DROP_REASON_PROTO_MEM;
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
+ goto drop;
+ }
+ sk_forced_mem_schedule(sk, skb->truesize);
}
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -5734,7 +5749,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}
/* Step 1: check sequence number */
- if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+ reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ if (reason) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
* And page 69: "If an incoming segment is not acceptable,
@@ -5751,7 +5767,6 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
} else if (tcp_reset_check(sk, skb)) {
goto reset;
}
- SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}
@@ -6315,7 +6330,7 @@ consume:
if (fastopen_fail)
return -1;
if (sk->sk_write_pending ||
- icsk->icsk_accept_queue.rskq_defer_accept ||
+ READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept) ||
inet_csk_in_pingpong_mode(sk)) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
@@ -6615,7 +6630,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
break;
}
- if (tp->linger2 < 0) {
+ if (READ_ONCE(tp->linger2) < 0) {
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
@@ -6985,7 +7000,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
- inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
+ inet_rsk(req)->no_srccheck = inet_test_bit(TRANSPARENT, sk);
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2dbdc26da86e..27140e5cdc06 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -57,6 +57,7 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -476,7 +477,6 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
const struct iphdr *iph = (const struct iphdr *)skb->data;
struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
struct tcp_sock *tp;
- struct inet_sock *inet;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct sock *sk;
@@ -624,8 +624,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
* --ANK (980905)
*/
- inet = inet_sk(sk);
- if (!sock_owned_by_user(sk) && inet->recverr) {
+ if (!sock_owned_by_user(sk) &&
+ inet_test_bit(RECVERR, sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
} else { /* Only an error on timeout */
@@ -2448,6 +2448,8 @@ static void *established_get_first(struct seq_file *seq)
struct hlist_nulls_node *node;
spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket);
+ cond_resched();
+
/* Lockless fast path for the common case of empty buckets */
if (empty_bucket(hinfo, st))
continue;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 99ac5efe244d..c196759f1d3b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -990,7 +990,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = {
.resv_start_op = TCP_METRICS_CMD_DEL + 1,
};
-static unsigned int tcpmhash_entries;
+static unsigned int tcpmhash_entries __initdata;
static int __init set_tcpmhash_entries(char *str)
{
ssize_t ret;
@@ -1006,15 +1006,11 @@ static int __init set_tcpmhash_entries(char *str)
}
__setup("tcpmhash_entries=", set_tcpmhash_entries);
-static int __net_init tcp_net_metrics_init(struct net *net)
+static void __init tcp_metrics_hash_alloc(void)
{
+ unsigned int slots = tcpmhash_entries;
size_t size;
- unsigned int slots;
- if (!net_eq(net, &init_net))
- return 0;
-
- slots = tcpmhash_entries;
if (!slots) {
if (totalram_pages() >= 128 * 1024)
slots = 16 * 1024;
@@ -1027,9 +1023,7 @@ static int __net_init tcp_net_metrics_init(struct net *net)
tcp_metrics_hash = kvzalloc(size, GFP_KERNEL);
if (!tcp_metrics_hash)
- return -ENOMEM;
-
- return 0;
+ panic("Could not allocate the tcp_metrics hash table\n");
}
static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
@@ -1038,7 +1032,6 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis
}
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
- .init = tcp_net_metrics_init,
.exit_batch = tcp_net_metrics_exit_batch,
};
@@ -1046,9 +1039,11 @@ void __init tcp_metrics_init(void)
{
int ret;
+ tcp_metrics_hash_alloc();
+
ret = register_pernet_subsys(&tcp_net_metrics_ops);
if (ret < 0)
- panic("Could not allocate the tcp_metrics hash table\n");
+ panic("Could not register tcp_net_metrics_ops\n");
ret = genl_register_family(&tcp_metrics_nl_family);
if (ret < 0)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index c8f2aa003387..b98d476f1594 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -289,9 +289,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (tw) {
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
- struct inet_sock *inet = inet_sk(sk);
- tw->tw_transparent = inet->transparent;
+ tw->tw_transparent = inet_test_bit(TRANSPARENT, sk);
tw->tw_mark = sk->sk_mark;
tw->tw_priority = sk->sk_priority;
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
@@ -570,8 +569,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
- newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
@@ -794,7 +791,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return sk;
/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
- if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+ if (req->num_timeout < READ_ONCE(inet_csk(sk)->icsk_accept_queue.rskq_defer_accept) &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 51d8638d4b4c..e6b4fbd642f7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -257,11 +257,19 @@ EXPORT_SYMBOL(tcp_select_initial_window);
static u16 tcp_select_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 old_win = tp->rcv_wnd;
- u32 cur_win = tcp_receive_window(tp);
- u32 new_win = __tcp_select_window(sk);
struct net *net = sock_net(sk);
+ u32 old_win = tp->rcv_wnd;
+ u32 cur_win, new_win;
+
+ /* Make the window 0 if we failed to queue the data because we
+ * are out of memory. The window is temporary, so we don't store
+ * it on the socket.
+ */
+ if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM))
+ return 0;
+ cur_win = tcp_receive_window(tp);
+ new_win = __tcp_select_window(sk);
if (new_win < cur_win) {
/* Danger Will Robinson!
* Don't update rcv_wup/rcv_wnd here or else
@@ -1293,14 +1301,21 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
}
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
- /* if no packet is in qdisc/device queue, then allow XPS to select
- * another queue. We can be called from tcp_tsq_handler()
- * which holds one reference to sk.
- *
- * TODO: Ideally, in-flight pure ACK packets should not matter here.
- * One way to get this would be to set skb->truesize = 2 on them.
+ /* We set skb->ooo_okay to one if this packet can select
+ * a different TX queue than prior packets of this flow,
+ * to avoid self inflicted reorders.
+ * The 'other' queue decision is based on current cpu number
+ * if XPS is enabled, or sk->sk_txhash otherwise.
+ * We can switch to another (and better) queue if:
+ * 1) No packet with payload is in qdisc/device queues.
+ * Delays in TX completion can defeat the test
+ * even if packets were already sent.
+ * 2) Or rtx queue is empty.
+ * This mitigates above case if ACK packets for
+ * all prior packets were already processed.
*/
- skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
+ skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) ||
+ tcp_rtx_queue_empty(sk);
/* If we had to use memory reserve to allocate this skb,
* this might cause drops if packet is looped back :
@@ -3741,11 +3756,6 @@ static void tcp_connect_init(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
-#ifdef CONFIG_TCP_MD5SIG
- if (tp->af_specific->md5_lookup(sk, sk))
- tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
-#endif
-
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 206418b6d7c4..984ab4a0421e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -26,14 +26,15 @@
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- u32 elapsed, start_ts;
+ u32 elapsed, start_ts, user_timeout;
s32 remaining;
start_ts = tcp_sk(sk)->retrans_stamp;
- if (!icsk->icsk_user_timeout)
+ user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (!user_timeout)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- remaining = icsk->icsk_user_timeout - elapsed;
+ remaining = user_timeout - elapsed;
if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
@@ -43,16 +44,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- u32 remaining;
+ u32 remaining, user_timeout;
s32 elapsed;
- if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp)
+ user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (!user_timeout || !icsk->icsk_probes_tstamp)
return when;
elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
if (unlikely(elapsed < 0))
elapsed = 0;
- remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed;
+ remaining = msecs_to_jiffies(user_timeout) - elapsed;
remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
return min_t(u32, remaining, when);
@@ -239,7 +241,8 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
__dst_negative_advice(sk);
- retry_until = icsk->icsk_syn_retries ? :
+ /* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */
+ retry_until = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
max_retransmits = retry_until;
@@ -269,7 +272,7 @@ static int tcp_write_timeout(struct sock *sk)
}
if (!expired)
expired = retransmits_timed_out(sk, retry_until,
- icsk->icsk_user_timeout);
+ READ_ONCE(icsk->icsk_user_timeout));
tcp_fastopen_active_detect_blackhole(sk, expired);
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
@@ -383,13 +386,16 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer().
*/
- if (!icsk->icsk_probes_tstamp)
+ if (!icsk->icsk_probes_tstamp) {
icsk->icsk_probes_tstamp = tcp_jiffies32;
- else if (icsk->icsk_user_timeout &&
- (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
- msecs_to_jiffies(icsk->icsk_user_timeout))
- goto abort;
+ } else {
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (user_timeout &&
+ (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
+ msecs_to_jiffies(user_timeout))
+ goto abort;
+ }
max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -421,8 +427,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
req->rsk_ops->syn_ack_timeout(req);
- /* add one more retry for fastopen */
- max_retries = icsk->icsk_syn_retries ? :
+ /* Add one more retry for fastopen.
+ * Paired with WRITE_ONCE() in tcp_sock_set_syncnt()
+ */
+ max_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
if (req->num_timeout >= max_retries) {
@@ -446,6 +454,22 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
req->timeout << req->num_timeout, TCP_RTO_MAX);
}
+static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ const int timeout = TCP_RTO_MAX * 2;
+ u32 rcv_delta, rtx_delta;
+
+ rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
+ if (rcv_delta <= timeout)
+ return false;
+
+ rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) -
+ (tp->retrans_stamp ?: tcp_skb_timestamp(skb)));
+
+ return rtx_delta > timeout;
+}
/**
* tcp_retransmit_timer() - The TCP retransmit timeout handler
@@ -495,23 +519,26 @@ void tcp_retransmit_timer(struct sock *sk)
* we cannot allow such beasts to hang infinitely.
*/
struct inet_sock *inet = inet_sk(sk);
+ u32 rtx_delta;
+
+ rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb));
if (sk->sk_family == AF_INET) {
- net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &inet->inet_daddr,
- ntohs(inet->inet_dport),
- inet->inet_num,
- tp->snd_una, tp->snd_nxt);
+ net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
+ &inet->inet_daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt,
+ jiffies_to_msecs(jiffies - tp->rcv_tstamp),
+ rtx_delta);
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &sk->sk_v6_daddr,
- ntohs(inet->inet_dport),
- inet->inet_num,
- tp->snd_una, tp->snd_nxt);
+ net_dbg_ratelimited("Probing zero-window on %pI6:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
+ &sk->sk_v6_daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt,
+ jiffies_to_msecs(jiffies - tp->rcv_tstamp),
+ rtx_delta);
}
#endif
- if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) {
+ if (tcp_rtx_probe0_timed_out(sk, skb)) {
tcp_write_err(sk);
goto out;
}
@@ -708,7 +735,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
tcp_mstamp_refresh(tp);
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
- if (tp->linger2 >= 0) {
+ if (READ_ONCE(tp->linger2) >= 0) {
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
if (tmo > 0) {
@@ -733,13 +760,15 @@ static void tcp_keepalive_timer (struct timer_list *t)
elapsed = keepalive_time_elapsed(tp);
if (elapsed >= keepalive_time_when(tp)) {
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+
/* If the TCP_USER_TIMEOUT option is enabled, use that
* to determine when to timeout instead.
*/
- if ((icsk->icsk_user_timeout != 0 &&
- elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
+ if ((user_timeout != 0 &&
+ elapsed >= msecs_to_jiffies(user_timeout) &&
icsk->icsk_probes_out > 0) ||
- (icsk->icsk_user_timeout == 0 &&
+ (user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index abfa860367aa..0794a2c46a56 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -407,9 +407,9 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
-static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
- const __u16 lport, const __be32 faddr,
- const __be16 fport)
+INDIRECT_CALLABLE_SCOPE
+u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
+ const __be32 faddr, const __be16 fport)
{
static u32 udp_ehash_secret __read_mostly;
@@ -419,22 +419,6 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
udp_ehash_secret + net_hash_mix(net));
}
-static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb,
- __be32 saddr, __be16 sport,
- __be32 daddr, unsigned short hnum)
-{
- struct sock *reuse_sk = NULL;
- u32 hash;
-
- if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
- hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
- reuse_sk = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- }
- return reuse_sk;
-}
-
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
@@ -452,42 +436,36 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- result = lookup_reuseport(net, sk, skb,
- saddr, sport, daddr, hnum);
+ badness = score;
+
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ result = sk;
+ continue;
+ }
+
+ result = inet_lookup_reuseport(net, sk, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, udp_ehashfn);
+ if (!result) {
+ result = sk;
+ continue;
+ }
+
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk))
+ if (!reuseport_has_conns(sk))
return result;
- result = result ? : sk;
- badness = score;
+ /* Reuseport logic returned an error, keep original score. */
+ if (IS_ERR(result))
+ continue;
+
+ badness = compute_score(result, net, saddr, sport,
+ daddr, hnum, dif, sdif);
+
}
}
return result;
}
-static struct sock *udp4_lookup_run_bpf(struct net *net,
- struct udp_table *udptable,
- struct sk_buff *skb,
- __be32 saddr, __be16 sport,
- __be32 daddr, u16 hnum, const int dif)
-{
- struct sock *sk, *reuse_sk;
- bool no_reuseport;
-
- if (udptable != net->ipv4.udp_table)
- return NULL; /* only UDP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport,
- daddr, hnum, dif, &sk);
- if (no_reuseport || IS_ERR_OR_NULL(sk))
- return sk;
-
- reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
- if (reuse_sk)
- sk = reuse_sk;
- return sk;
-}
-
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@@ -512,9 +490,11 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
goto done;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- sk = udp4_lookup_run_bpf(net, udptable, skb,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ udptable == net->ipv4.udp_table) {
+ sk = inet_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, dif,
+ udp_ehashfn);
if (sk) {
result = sk;
goto done;
@@ -799,7 +779,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
(u8 *)(uh+1));
goto out;
}
- if (!inet->recverr) {
+ if (!inet_test_bit(RECVERR, sk)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else
@@ -982,7 +962,8 @@ csum_partial:
send:
err = ip_send_skb(sock_net(sk), skb);
if (err) {
- if (err == -ENOBUFS && !inet->recverr) {
+ if (err == -ENOBUFS &&
+ !inet_test_bit(RECVERR, sk)) {
UDP_INC_STATS(sock_net(sk),
UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
@@ -1557,7 +1538,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
spin_unlock(&list->lock);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
busylock_release(busy);
return 0;
@@ -1890,7 +1871,7 @@ try_again:
if (udp_sk(sk)->gro_enabled)
udp_cmsg_recv(msg, sk, skb);
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
err = copied;
@@ -2412,7 +2393,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp4_csum_init(skb, uh, proto))
goto csum_error;
- sk = skb_steal_sock(skb, &refcounted);
+ sk = inet_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest,
+ &refcounted, udp_ehashfn);
+ if (IS_ERR(sk))
+ goto no_sk;
+
if (sk) {
struct dst_entry *dst = skb_dst(skb);
int ret;
@@ -2433,7 +2418,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk)
return udp_unicast_rcv_skb(sk, skb, uh);
-
+no_sk:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
nf_reset_ct(skb);
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 5f8104cf082d..9b18f371af0d 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -63,7 +63,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct sock *sk = sock->sk;
/* Disable multicast loopback */
- inet_sk(sk)->mc_loop = 0;
+ inet_clear_bit(MC_LOOP, sk);
/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
inet_inc_convert_csum(sk);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 9403bbaf1b61..cdcc0f6b4f0a 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -124,22 +124,13 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
xfrm_dst_destroy(xdst);
}
-static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int unregister)
-{
- if (!unregister)
- return;
-
- xfrm_dst_ifdown(dst, dev);
-}
-
static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
.update_pmtu = xfrm4_update_pmtu,
.redirect = xfrm4_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm4_dst_destroy,
- .ifdown = xfrm4_dst_ifdown,
+ .ifdown = xfrm_dst_ifdown,
.local_out = __ip_local_out,
.gc_thresh = 32768,
};
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 94cec2075eee..47d1dd8501b7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -202,6 +202,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -262,6 +263,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -1061,20 +1063,28 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
struct fib6_info *f6i = NULL;
int err = 0;
- if (addr_type == IPV6_ADDR_ANY ||
- (addr_type & IPV6_ADDR_MULTICAST &&
- !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) ||
- (!(idev->dev->flags & IFF_LOOPBACK) &&
- !netif_is_l3_master(idev->dev) &&
- addr_type & IPV6_ADDR_LOOPBACK))
+ if (addr_type == IPV6_ADDR_ANY) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid address");
return ERR_PTR(-EADDRNOTAVAIL);
+ } else if (addr_type & IPV6_ADDR_MULTICAST &&
+ !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot assign multicast address without \"IFA_F_MCAUTOJOIN\" flag");
+ return ERR_PTR(-EADDRNOTAVAIL);
+ } else if (!(idev->dev->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(idev->dev) &&
+ addr_type & IPV6_ADDR_LOOPBACK) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot assign loopback address on this device");
+ return ERR_PTR(-EADDRNOTAVAIL);
+ }
if (idev->dead) {
- err = -ENODEV; /*XXX*/
+ NL_SET_ERR_MSG_MOD(extack, "device is going away");
+ err = -ENODEV;
goto out;
}
if (idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
err = -EACCES;
goto out;
}
@@ -1101,7 +1111,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags);
+ f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags, extack);
if (IS_ERR(f6i)) {
err = PTR_ERR(f6i);
f6i = NULL;
@@ -2731,6 +2741,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
return;
}
+ if (valid_lft != 0 && valid_lft < in6_dev->cnf.accept_ra_min_lft)
+ goto put;
+
/*
* Two things going on here:
* 1) Add routes for on-link prefixes
@@ -2925,30 +2938,40 @@ static int inet6_addr_add(struct net *net, int ifindex,
ASSERT_RTNL();
- if (cfg->plen > 128)
+ if (cfg->plen > 128) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
+ }
/* check the lifetime */
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
+ if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) {
+ NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
return -EINVAL;
+ }
- if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64)
+ if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64) {
+ NL_SET_ERR_MSG_MOD(extack, "address with \"mngtmpaddr\" flag must have a prefix length of 64");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, ifindex);
if (!dev)
return -ENODEV;
idev = addrconf_add_dev(dev);
- if (IS_ERR(idev))
+ if (IS_ERR(idev)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return PTR_ERR(idev);
+ }
if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
true, cfg->pfx, ifindex);
- if (ret < 0)
+ if (ret < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast auto join failed");
return ret;
+ }
}
cfg->scope = ipv6_addr_scope(cfg->pfx);
@@ -3005,22 +3028,29 @@ static int inet6_addr_add(struct net *net, int ifindex,
}
static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
- const struct in6_addr *pfx, unsigned int plen)
+ const struct in6_addr *pfx, unsigned int plen,
+ struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
struct net_device *dev;
- if (plen > 128)
+ if (plen > 128) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, ifindex);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
return -ENODEV;
+ }
idev = __in6_dev_get(dev);
- if (!idev)
+ if (!idev) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return -ENXIO;
+ }
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
@@ -3043,6 +3073,8 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
}
}
read_unlock_bh(&idev->lock);
+
+ NL_SET_ERR_MSG_MOD(extack, "address not found");
return -EADDRNOTAVAIL;
}
@@ -3085,7 +3117,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
rtnl_lock();
err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
- ireq.ifr6_prefixlen);
+ ireq.ifr6_prefixlen, NULL);
rtnl_unlock();
return err;
}
@@ -3488,7 +3520,7 @@ static int fixup_permanent_addr(struct net *net,
struct fib6_info *f6i, *prev;
f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false,
- GFP_ATOMIC);
+ GFP_ATOMIC, NULL);
if (IS_ERR(f6i))
return PTR_ERR(f6i);
@@ -4698,7 +4730,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifa_flags &= IFA_F_MANAGETEMPADDR;
return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
- ifm->ifa_prefixlen);
+ ifm->ifa_prefixlen, extack);
}
static int modify_prefix_route(struct inet6_ifaddr *ifp,
@@ -4903,8 +4935,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
dev = __dev_get_by_index(net, ifm->ifa_index);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
return -ENODEV;
+ }
if (tb[IFA_FLAGS])
cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
@@ -4939,10 +4973,12 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
- !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "address already assigned");
err = -EEXIST;
- else
+ } else {
err = inet6_addr_modify(net, ifa, &cfg);
+ }
in6_ifa_put(ifa);
@@ -5602,6 +5638,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
+ array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
}
static inline size_t inet6_ifla6_size(void)
@@ -6796,6 +6833,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "accept_ra_min_lft",
+ .data = &ipv6_devconf.accept_ra_min_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
.maxlen = sizeof(int),
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5d593ddc0347..368824fe9719 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -102,9 +102,9 @@ bool ipv6_mod_enabled(void)
}
EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
-static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
- const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+ const int offset = sk->sk_prot->ipv6_pinfo_offset;
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
@@ -200,12 +200,12 @@ lookup_protocol:
sk->sk_reuse = SK_CAN_REUSE;
inet = inet_sk(sk);
- inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
+ inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
if (SOCK_RAW == sock->type) {
inet->inet_num = protocol;
if (IPPROTO_RAW == protocol)
- inet->hdrincl = 1;
+ inet_set_bit(HDRINCL, sk);
}
sk->sk_destruct = inet6_sock_destruct;
@@ -229,7 +229,7 @@ lookup_protocol:
*/
inet->uc_ttl = -1;
- inet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, sk);
inet->mc_ttl = 1;
inet->mc_index = 0;
RCU_INIT_POINTER(inet->mc_list, NULL);
@@ -399,7 +399,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
- if (snum || !(inet->bind_address_no_port ||
+ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
err = sk->sk_prot->get_port(sk, snum);
if (err) {
@@ -435,10 +435,8 @@ out_unlock:
goto out;
}
-/* bind for INET6 API */
-int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
const struct proto *prot;
int err = 0;
@@ -462,6 +460,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return __inet6_bind(sk, uaddr, addr_len, flags);
}
+
+/* bind for INET6 API */
+int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ return inet6_bind_sk(sock->sk, uaddr, addr_len);
+}
EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index dacdea7fcb62..bb17f484ee2c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -305,7 +305,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
}
net = dev_net(idev->dev);
- f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
+ f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC, NULL);
if (IS_ERR(f6i)) {
err = PTR_ERR(f6i);
goto out;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9b6818453afe..41ebc4e57473 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
}
-static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
+static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
+ const struct sock *sk)
{
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
int oif = sk->sk_bound_dev_if;
memset(fl6, 0, sizeof(*fl6));
@@ -523,7 +524,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
} else {
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin->sin6_addr);
- if (inet_sk(sk)->cmsg_flags)
+ if (inet_cmsg_flags(inet_sk(sk)))
ip_cmsg_recv(msg, skb);
}
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 202fc3aaa83c..4952ae792450 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -612,8 +612,6 @@ looped_back:
kfree(buf);
- skb_dst_drop(skb);
-
ip6_route_input(skb);
if (skb_dst(skb)->error) {
@@ -650,7 +648,6 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
struct inet6_dev *idev = __in6_dev_get(skb->dev);
struct inet6_skb_parm *opt = IP6CB(skb);
struct in6_addr *addr = NULL;
- struct in6_addr daddr;
int n, i;
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
@@ -798,9 +795,7 @@ looped_back:
return -1;
}
- daddr = *addr;
- *addr = ipv6_hdr(skb)->daddr;
- ipv6_hdr(skb)->daddr = daddr;
+ swap(*addr, ipv6_hdr(skb)->daddr);
ip6_route_input(skb);
if (skb_dst(skb)->error) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 65fa5014bc85..6d88f5248c1f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -1034,11 +1034,9 @@ drop_no_count:
return 0;
}
-void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
- u8 type,
+void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int oif)
+ const struct in6_addr *daddr, int oif)
{
memset(fl6, 0, sizeof(*fl6));
fl6->saddr = *saddr;
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 3faf62530d6a..69caed07315f 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <net/genetlink.h>
-#include <net/ila.h>
#include <net/netns/generic.h>
#include <uapi/linux/genetlink.h>
#include "ila.h"
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index bee45dfeb187..67e8c9440977 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -5,7 +5,6 @@
#include <linux/rhashtable.h>
#include <linux/vmalloc.h>
#include <net/genetlink.h>
-#include <net/ila.h>
#include <net/netns/generic.h>
#include <uapi/linux/genetlink.h>
#include "ila.h"
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b64b49012655..b0e8d278e8a9 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -39,6 +39,7 @@ u32 inet6_ehashfn(const struct net *net,
return __inet6_ehashfn(lhash, lport, fhash, fport,
inet6_ehash_secret + net_hash_mix(net));
}
+EXPORT_SYMBOL_GPL(inet6_ehashfn);
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
@@ -111,22 +112,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
-static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- unsigned short hnum)
+/**
+ * inet6_lookup_reuseport() - execute reuseport logic on AF_INET6 socket if necessary.
+ * @net: network namespace.
+ * @sk: AF_INET6 socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP.
+ * @skb: context for a potential SK_REUSEPORT program.
+ * @doff: header offset.
+ * @saddr: source address.
+ * @sport: source port.
+ * @daddr: destination address.
+ * @hnum: destination port in host byte order.
+ * @ehashfn: hash function used to generate the fallback hash.
+ *
+ * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
+ * the selected sock or an error.
+ */
+struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum,
+ inet6_ehashfn_t *ehashfn)
{
struct sock *reuse_sk = NULL;
u32 phash;
if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn,
+ net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
}
return reuse_sk;
}
+EXPORT_SYMBOL_GPL(inet6_lookup_reuseport);
/* called with rcu_read_lock() */
static struct sock *inet6_lhash2_lookup(struct net *net,
@@ -143,8 +162,8 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
- result = lookup_reuseport(net, sk, skb, doff,
- saddr, sport, daddr, hnum);
+ result = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, inet6_ehashfn);
if (result)
return result;
@@ -156,30 +175,30 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
-static inline struct sock *inet6_lookup_run_bpf(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- const __be16 sport,
- const struct in6_addr *daddr,
- const u16 hnum, const int dif)
+struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum, const int dif,
+ inet6_ehashfn_t *ehashfn)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
- return NULL; /* only TCP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport,
+ no_reuseport = bpf_sk_lookup_run_v6(net, protocol, saddr, sport,
daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
- reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, ehashfn);
if (reuse_sk)
sk = reuse_sk;
return sk;
}
+EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup);
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
@@ -193,9 +212,11 @@ struct sock *inet6_lookup_listener(struct net *net,
unsigned int hash2;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ hashinfo == net->ipv4.tcp_death_row.hashinfo) {
+ result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff,
+ saddr, sport, daddr, hnum, dif,
+ inet6_ehashfn);
if (result)
goto done;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index bac768d36cc1..28b01a068412 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
+ INIT_HLIST_NODE(&f6i->gc_link);
+
return f6i;
}
@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
+ INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -1057,6 +1060,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
+
+ fib6_clean_expires_locked(rt);
}
/*
@@ -1118,9 +1123,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES))
- fib6_clean_expires(iter);
+ fib6_clean_expires_locked(iter);
else
- fib6_set_expires(iter, rt->expires);
+ fib6_set_expires_locked(iter,
+ rt->expires);
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1479,6 +1485,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+ if (fib6_has_expires(rt))
+ hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist);
+
fib6_start_gc(info->nl_net, rt);
}
@@ -2285,9 +2295,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
- struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2295,7 +2304,7 @@ static int fib6_age(struct fib6_info *rt, void *arg)
* Routes are expired even if they are in use.
*/
- if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
+ if (fib6_has_expires(rt) && rt->expires) {
if (time_after(now, rt->expires)) {
RT6_TRACE("expiring %p\n", rt);
return -1;
@@ -2312,6 +2321,40 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0;
}
+static void fib6_gc_table(struct net *net,
+ struct fib6_table *tb6,
+ struct fib6_gc_args *gc_args)
+{
+ struct fib6_info *rt;
+ struct hlist_node *n;
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = false,
+ };
+
+ hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+ if (fib6_age(rt, gc_args) == -1)
+ fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ spin_lock_bh(&table->tb6_lock);
+ fib6_gc_table(net, table, gc_args);
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2327,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_clean_all(net, fib6_age, &gc_args);
+ fib6_gc_all(net, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1e8c90e97608..f8a1f6bb3f87 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1591,7 +1591,7 @@ emsgsize:
}
}
} else if ((flags & MSG_SPLICE_PAGES) && length) {
- if (inet_sk(sk)->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
return -EPERM;
if (rt->dst.dev->features & NETIF_F_SG &&
getfrag == ip_generic_getfrag)
@@ -1693,7 +1693,10 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
copy = datalen - transhdrlen - fraggap - pagedlen;
- if (copy < 0) {
+ /* [!] NOTE: copy may be negative if pagedlen>0
+ * because then the equation may reduces to -fraggap.
+ */
+ if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) {
err = -EINVAL;
goto error;
}
@@ -1744,6 +1747,8 @@ alloc_new_skb:
err = -EFAULT;
kfree_skb(skb);
goto error;
+ } else if (flags & MSG_SPLICE_PAGES) {
+ copy = 0;
}
offset += copy;
@@ -1791,6 +1796,10 @@ alloc_new_skb:
} else if (flags & MSG_SPLICE_PAGES) {
struct msghdr *msg = from;
+ err = -EIO;
+ if (WARN_ON_ONCE(copy > msg->msg_iter.count))
+ goto error;
+
err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
sk->sk_allocation);
if (err < 0)
@@ -1986,7 +1995,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
u8 icmp6_type;
- if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+ if (sk->sk_socket->type == SOCK_RAW &&
+ !inet_test_bit(HDRINCL, sk))
icmp6_type = fl6->fl6_icmp_type;
else
icmp6_type = icmp6_hdr(skb)->icmp6_type;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ae818ff46224..0e2a0847b387 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -102,7 +102,7 @@ int ip6_ra_control(struct sock *sk, int sel)
struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
struct ipv6_txoptions *opt)
{
- if (inet_sk(sk)->is_icsk) {
+ if (inet_test_bit(IS_ICSK, sk)) {
if (opt &&
!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) {
@@ -474,8 +474,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
WRITE_ONCE(sk->sk_prot, &tcp_prot);
/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific);
- sk->sk_socket->ops = &inet_stream_ops;
- sk->sk_family = PF_INET;
+ WRITE_ONCE(sk->sk_socket->ops, &inet_stream_ops);
+ WRITE_ONCE(sk->sk_family, PF_INET);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
struct proto *prot = &udp_prot;
@@ -488,8 +488,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
/* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */
WRITE_ONCE(sk->sk_prot, prot);
- sk->sk_socket->ops = &inet_dgram_ops;
- sk->sk_family = PF_INET;
+ WRITE_ONCE(sk->sk_socket->ops, &inet_dgram_ops);
+ WRITE_ONCE(sk->sk_family, PF_INET);
}
/* Disable all options not to allocate memory anymore,
@@ -633,7 +633,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
goto e_inval;
/* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
- inet_sk(sk)->transparent = valbool;
+ inet_assign_bit(TRANSPARENT, sk, valbool);
retv = 0;
break;
@@ -641,7 +641,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
goto e_inval;
/* we also don't have a separate freebind bit for IPV6 */
- inet_sk(sk)->freebind = valbool;
+ inet_assign_bit(FREEBIND, sk, valbool);
retv = 0;
break;
@@ -831,7 +831,7 @@ done:
goto e_inval;
retv = -EPROTO;
- if (inet_sk(sk)->is_icsk)
+ if (inet_test_bit(IS_ICSK, sk))
break;
retv = -EFAULT;
@@ -923,7 +923,7 @@ done:
goto e_inval;
np->recverr = valbool;
if (!val)
- skb_queue_purge(&sk->sk_error_queue);
+ skb_errqueue_purge(&sk->sk_error_queue);
retv = 0;
break;
case IPV6_FLOWINFO_SEND:
@@ -1330,11 +1330,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_TRANSPARENT:
- val = inet_sk(sk)->transparent;
+ val = inet_test_bit(TRANSPARENT, sk);
break;
case IPV6_FREEBIND:
- val = inet_sk(sk)->freebind;
+ val = inet_test_bit(FREEBIND, sk);
break;
case IPV6_RECVORIGDSTADDR:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 714cdc9e2b8e..5ce25bcb9974 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1699,11 +1699,9 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
-static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
- struct net_device *dev,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int proto, int len)
+static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
+ struct net_device *dev, const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int proto, int len)
{
struct ipv6hdr *hdr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a42be96ae209..553c8664e0a7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1267,10 +1267,6 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
}
#endif
- /*
- * set the RA_RECV flag in the interface
- */
-
in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev) {
ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
@@ -1328,6 +1324,14 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
goto skip_defrtr;
}
+ lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
+ if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) {
+ ND_PRINTK(2, info,
+ "RA: router lifetime (%ds) is too short: %s\n",
+ lifetime, skb->dev->name);
+ goto skip_defrtr;
+ }
+
/* Do not accept RA with source-addr found on local machine unless
* accept_ra_from_local is set to true.
*/
@@ -1340,8 +1344,6 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
goto skip_defrtr;
}
- lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
-
#ifdef CONFIG_IPV6_ROUTER_PREF
pref = ra_msg->icmph.icmp6_router_pref;
/* 10b is handled as if it were 00b (medium) */
@@ -1517,6 +1519,9 @@ skip_linkparms:
if (ri->prefix_len == 0 &&
!in6_dev->cnf.accept_ra_defrtr)
continue;
+ if (ri->lifetime != 0 &&
+ ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft)
+ continue;
if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index cb4eb1d2c620..d59b296b4f51 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
+#include <linux/rcupdate.h>
#include <linux/sysctl.h>
#include <net/ipv6_frag.h>
@@ -96,6 +97,12 @@ static void __net_exit defrag6_net_exit(struct net *net)
}
}
+static const struct nf_defrag_hook defrag_hook = {
+ .owner = THIS_MODULE,
+ .enable = nf_defrag_ipv6_enable,
+ .disable = nf_defrag_ipv6_disable,
+};
+
static struct pernet_operations defrag6_net_ops = {
.exit = defrag6_net_exit,
};
@@ -114,6 +121,9 @@ static int __init nf_defrag_init(void)
pr_err("nf_defrag_ipv6: can't register pernet ops\n");
goto cleanup_frag6;
}
+
+ rcu_assign_pointer(nf_defrag_v6_hook, &defrag_hook);
+
return ret;
cleanup_frag6:
@@ -124,6 +134,7 @@ cleanup_frag6:
static void __exit nf_defrag_fini(void)
{
+ rcu_assign_pointer(nf_defrag_v6_hook, NULL);
unregister_pernet_subsys(&defrag6_net_ops);
nf_ct_frag6_cleanup();
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index c2c291827a2c..1b2772834972 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -215,6 +215,7 @@ struct proto pingv6_prot = {
.get_port = ping_get_port,
.put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 49381f35b623..0eae7661a85c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -291,7 +291,6 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
int err;
int harderr;
@@ -315,7 +314,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
}
if (np->recverr) {
u8 *payload = skb->data;
- if (!inet->hdrincl)
+ if (!inet_test_bit(HDRINCL, sk))
payload += offset;
ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
}
@@ -406,7 +405,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
skb->len,
inet->inet_num, 0));
- if (inet->hdrincl) {
+ if (inet_test_bit(HDRINCL, sk)) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
@@ -762,12 +761,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- /* hdrincl should be READ_ONCE(inet->hdrincl)
- * but READ_ONCE() doesn't work with bit fields.
- * Doing this indirectly yields the same result.
- */
- hdrincl = inet->hdrincl;
- hdrincl = READ_ONCE(hdrincl);
+ hdrincl = inet_test_bit(HDRINCL, sk);
/*
* Get and verify the address.
@@ -1000,7 +994,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
case IPV6_HDRINCL:
if (sk->sk_type != SOCK_RAW)
return -EINVAL;
- inet_sk(sk)->hdrincl = !!val;
+ inet_assign_bit(HDRINCL, sk, val);
return 0;
case IPV6_CHECKSUM:
if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
@@ -1068,7 +1062,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case IPV6_HDRINCL:
- val = inet_sk(sk)->hdrincl;
+ val = inet_test_bit(HDRINCL, sk);
break;
case IPV6_CHECKSUM:
/*
@@ -1216,6 +1210,7 @@ struct proto rawv6_prot = {
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
.useroffset = offsetof(struct raw6_sock, filter),
.usersize = sizeof_field(struct raw6_sock, filter),
.h.raw_hash = &raw_v6_hashinfo,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 56a55585eb79..846aec8e0093 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -90,7 +90,7 @@ unsigned int ip6_mtu(const struct dst_entry *dst);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
- struct net_device *dev, int how);
+ struct net_device *dev);
static void ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
@@ -371,8 +371,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
fib6_info_release(from);
}
-static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int how)
+static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
@@ -3761,10 +3760,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->dst_nocount = true;
if (cfg->fc_flags & RTF_EXPIRES)
- fib6_set_expires(rt, jiffies +
- clock_t_to_jiffies(cfg->fc_expires));
+ fib6_set_expires_locked(rt, jiffies +
+ clock_t_to_jiffies(cfg->fc_expires));
else
- fib6_clean_expires(rt);
+ fib6_clean_expires_locked(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -4544,7 +4543,8 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
struct fib6_info *addrconf_f6i_alloc(struct net *net,
struct inet6_dev *idev,
const struct in6_addr *addr,
- bool anycast, gfp_t gfp_flags)
+ bool anycast, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
@@ -4566,7 +4566,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
cfg.fc_flags |= RTF_LOCAL;
}
- f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
+ f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
if (!IS_ERR(f6i)) {
f6i->dst_nocount = true;
@@ -4581,21 +4581,19 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
- struct net_device *dev;
struct net *net;
struct in6_addr *addr;
};
static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
{
- struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
if (!rt->nh &&
- ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
- ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
+ ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
+ !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->fib6_prefsrc.plen = 0;
@@ -4608,7 +4606,6 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
{
struct net *net = dev_net(ifp->idev->dev);
struct arg_dev_net_ip adni = {
- .dev = ifp->idev->dev,
.net = net,
.addr = &ifp->addr,
};
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index b1c028df686e..a013b92cbb86 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -272,8 +272,6 @@ static int rpl_input(struct sk_buff *skb)
dst = dst_cache_get(&rlwt->cache);
preempt_enable();
- skb_dst_drop(skb);
-
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
@@ -284,6 +282,7 @@ static int rpl_input(struct sk_buff *skb)
preempt_enable();
}
} else {
+ skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index dd433cc265c8..24e2b4b494cb 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -109,15 +109,19 @@ struct bpf_lwt_prog {
#define next_csid_chk_lcnode_fn_bits(flen) \
next_csid_chk_lcblock_bits(flen)
+/* flag indicating that flavors are set up for a given End* behavior */
+#define SEG6_F_LOCAL_FLAVORS SEG6_F_ATTR(SEG6_LOCAL_FLAVORS)
+
#define SEG6_F_LOCAL_FLV_OP(flvname) BIT(SEG6_LOCAL_FLV_OP_##flvname)
+#define SEG6_F_LOCAL_FLV_NEXT_CSID SEG6_F_LOCAL_FLV_OP(NEXT_CSID)
#define SEG6_F_LOCAL_FLV_PSP SEG6_F_LOCAL_FLV_OP(PSP)
/* Supported RFC8986 Flavor operations are reported in this bitmask */
#define SEG6_LOCAL_FLV8986_SUPP_OPS SEG6_F_LOCAL_FLV_PSP
-/* Supported Flavor operations are reported in this bitmask */
-#define SEG6_LOCAL_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_OP(NEXT_CSID) | \
+#define SEG6_LOCAL_END_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_NEXT_CSID | \
SEG6_LOCAL_FLV8986_SUPP_OPS)
+#define SEG6_LOCAL_END_X_FLV_SUPP_OPS SEG6_F_LOCAL_FLV_NEXT_CSID
struct seg6_flavors_info {
/* Flavor operations */
@@ -411,9 +415,72 @@ static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
return input_action_end_finish(skb, slwt);
}
+static int input_action_end_x_finish(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ seg6_lookup_nexthop(skb, &slwt->nh6, 0);
+
+ return dst_input(skb);
+}
+
+static int input_action_end_x_core(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+
+ srh = get_and_validate_srh(skb);
+ if (!srh)
+ goto drop;
+
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ return input_action_end_x_finish(skb, slwt);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int end_x_next_csid_core(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
+
+ if (seg6_next_csid_is_arg_zero(daddr, finfo))
+ return input_action_end_x_core(skb, slwt);
+
+ /* update DA */
+ seg6_next_csid_advance_arg(daddr, finfo);
+
+ return input_action_end_x_finish(skb, slwt);
+}
+
static bool seg6_next_csid_enabled(__u32 fops)
{
- return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID);
+ return fops & SEG6_F_LOCAL_FLV_NEXT_CSID;
+}
+
+/* Processing of SRv6 End, End.X, and End.T behaviors can be extended through
+ * the flavors framework. These behaviors must report the subset of (flavor)
+ * operations they currently implement. In this way, if a user specifies a
+ * flavor combination that is not supported by a given End* behavior, the
+ * kernel refuses to instantiate the tunnel reporting the error.
+ */
+static int seg6_flv_supp_ops_by_action(int action, __u32 *fops)
+{
+ switch (action) {
+ case SEG6_LOCAL_ACTION_END:
+ *fops = SEG6_LOCAL_END_FLV_SUPP_OPS;
+ break;
+ case SEG6_LOCAL_ACTION_END_X:
+ *fops = SEG6_LOCAL_END_X_FLV_SUPP_OPS;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
/* We describe the packet state in relation to the absence/presence of the SRH
@@ -746,21 +813,14 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
/* regular endpoint, and forward to specified nexthop */
static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
- struct ipv6_sr_hdr *srh;
-
- srh = get_and_validate_srh(skb);
- if (!srh)
- goto drop;
-
- advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
-
- seg6_lookup_nexthop(skb, &slwt->nh6, 0);
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ __u32 fops = finfo->flv_ops;
- return dst_input(skb);
+ /* check for the presence of NEXT-C-SID since it applies first */
+ if (seg6_next_csid_enabled(fops))
+ return end_x_next_csid_core(skb, slwt);
-drop:
- kfree_skb(skb);
- return -EINVAL;
+ return input_action_end_x_core(skb, slwt);
}
static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
@@ -1404,13 +1464,14 @@ static struct seg6_action_desc seg6_action_table[] = {
.action = SEG6_LOCAL_ACTION_END,
.attrs = 0,
.optattrs = SEG6_F_LOCAL_COUNTERS |
- SEG6_F_ATTR(SEG6_LOCAL_FLAVORS),
+ SEG6_F_LOCAL_FLAVORS,
.input = input_action_end,
},
{
.action = SEG6_LOCAL_ACTION_END_X,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
- .optattrs = SEG6_F_LOCAL_COUNTERS,
+ .optattrs = SEG6_F_LOCAL_COUNTERS |
+ SEG6_F_LOCAL_FLAVORS,
.input = input_action_end_x,
},
{
@@ -2070,7 +2131,8 @@ static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
{
struct seg6_flavors_info *finfo = &slwt->flv_info;
struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
- unsigned long fops;
+ int action = slwt->action;
+ __u32 fops, supp_fops;
int rc;
rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
@@ -2086,7 +2148,8 @@ static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
return -EINVAL;
fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
- if (fops & ~SEG6_LOCAL_FLV_SUPP_OPS) {
+ rc = seg6_flv_supp_ops_by_action(action, &supp_fops);
+ if (rc < 0 || (fops & ~supp_fops)) {
NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
return -EOPNOTSUPP;
}
@@ -2618,6 +2681,11 @@ int __init seg6_local_init(void)
*/
BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
+ /* Check whether the number of defined flavors exceeds the maximum
+ * allowed value.
+ */
+ BUILD_BUG_ON(SEG6_LOCAL_FLV_OP_MAX + 1 > BITS_PER_TYPE(__u32));
+
/* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
* bits) have been changed with invalid values, kernel build stops
* here.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6e86721e1cdb..3a88545a265d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2176,6 +2176,7 @@ struct proto tcpv6_prot = {
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f787e6b8424c..ebc6ae47cfea 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -72,11 +72,12 @@ int udpv6_init_sock(struct sock *sk)
return 0;
}
-static u32 udp6_ehashfn(const struct net *net,
- const struct in6_addr *laddr,
- const u16 lport,
- const struct in6_addr *faddr,
- const __be16 fport)
+INDIRECT_CALLABLE_SCOPE
+u32 udp6_ehashfn(const struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
{
static u32 udp6_ehash_secret __read_mostly;
static u32 udp_ipv6_hash_secret __read_mostly;
@@ -161,24 +162,6 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
-static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- unsigned int hnum)
-{
- struct sock *reuse_sk = NULL;
- u32 hash;
-
- if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
- hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
- reuse_sk = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- }
- return reuse_sk;
-}
-
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -195,44 +178,35 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- result = lookup_reuseport(net, sk, skb,
- saddr, sport, daddr, hnum);
+ badness = score;
+
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ result = sk;
+ continue;
+ }
+
+ result = inet6_lookup_reuseport(net, sk, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, udp6_ehashfn);
+ if (!result) {
+ result = sk;
+ continue;
+ }
+
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk))
+ if (!reuseport_has_conns(sk))
return result;
- result = result ? : sk;
- badness = score;
+ /* Reuseport logic returned an error, keep original score. */
+ if (IS_ERR(result))
+ continue;
+
+ badness = compute_score(sk, net, saddr, sport,
+ daddr, hnum, dif, sdif);
}
}
return result;
}
-static inline struct sock *udp6_lookup_run_bpf(struct net *net,
- struct udp_table *udptable,
- struct sk_buff *skb,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- u16 hnum, const int dif)
-{
- struct sock *sk, *reuse_sk;
- bool no_reuseport;
-
- if (udptable != net->ipv4.udp_table)
- return NULL; /* only UDP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport,
- daddr, hnum, dif, &sk);
- if (no_reuseport || IS_ERR_OR_NULL(sk))
- return sk;
-
- reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
- if (reuse_sk)
- sk = reuse_sk;
- return sk;
-}
-
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -257,9 +231,11 @@ struct sock *__udp6_lib_lookup(struct net *net,
goto done;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- sk = udp6_lookup_run_bpf(net, udptable, skb,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ udptable == net->ipv4.udp_table) {
+ sk = inet6_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, dif,
+ udp6_ehashfn);
if (sk) {
result = sk;
goto done;
@@ -444,7 +420,7 @@ try_again:
ip6_datagram_recv_common_ctl(sk, msg, skb);
if (is_udp4) {
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv_offset(msg, sk, skb,
sizeof(struct udphdr), off);
} else {
@@ -992,7 +968,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto csum_error;
/* Check if the socket is already available, e.g. due to early demux */
- sk = skb_steal_sock(skb, &refcounted);
+ sk = inet6_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest,
+ &refcounted, udp6_ehashfn);
+ if (IS_ERR(sk))
+ goto no_sk;
+
if (sk) {
struct dst_entry *dst = skb_dst(skb);
int ret;
@@ -1026,7 +1006,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto report_csum_error;
return udp6_unicast_rcv_skb(sk, skb, uh);
}
-
+no_sk:
reason = SKB_DROP_REASON_NO_SOCKET;
if (!uh->check)
@@ -1802,6 +1782,7 @@ struct proto udpv6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = NULL,
.diag_destroy = udp_abort,
};
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 8e010d07917a..267d491e9707 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -67,6 +67,7 @@ struct proto udplitev6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = &udplite_table,
};
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index eecc5e59da17..188224a76685 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -124,14 +124,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
xfrm_dst_destroy(xdst);
}
-static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int unregister)
+static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
struct xfrm_dst *xdst;
- if (!unregister)
- return;
-
xdst = (struct xfrm_dst *)dst;
if (xdst->u.rt6.rt6i_idev->dev == dev) {
struct inet6_dev *loopback_idev =
diff --git a/net/key/af_key.c b/net/key/af_key.c
index b4ea4cf9fad4..d68d01804dc7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1281,7 +1281,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
natt->encap_dport = n_port->sadb_x_nat_t_port_port;
}
- memset(&natt->encap_oa, 0, sizeof(natt->encap_oa));
}
err = xfrm_init_state(x);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index f9073bc7281f..9a2a9ed3ba47 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -552,7 +552,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
}
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv(msg, skb);
if (flags & MSG_TRUNC)
copied = skb->len;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index ff78217f0cb1..ed8ebb6f5909 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -36,9 +36,6 @@ struct l2tp_ip6_sock {
u32 conn_id;
u32 peer_conn_id;
- /* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see
- * inet6_sk_generic
- */
struct ipv6_pinfo inet6;
};
@@ -730,6 +727,7 @@ static struct proto l2tp_ip6_prot = {
.hash = l2tp_ip6_hash,
.unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
+ .ipv6_pinfo_offset = offsetof(struct l2tp_ip6_sock, inet6),
};
static const struct proto_ops l2tp_ip6_ops = {
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index d037009ee10f..0a3f5e0bec00 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -14,14 +14,15 @@
#include <linux/init.h>
#include <linux/slab.h>
-#include <net/llc_sap.h>
-#include <net/llc_conn.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <net/llc_c_ev.h>
+#include <net/llc.h>
#include <net/llc_c_ac.h>
+#include <net/llc_c_ev.h>
#include <net/llc_c_st.h>
+#include <net/llc_conn.h>
#include <net/llc_pdu.h>
+#include <net/llc_sap.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
#if 0
#define dprintk(args...) printk(KERN_DEBUG args)
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index a3829ce548f9..84e531f86b82 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 5a0a84ad94af..8a16672b94e2 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -19,3 +19,18 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
return NULL;
}
+
+BTF_SET8_START(bpf_mptcp_fmodret_ids)
+BTF_ID_FLAGS(func, update_socket_protocol)
+BTF_SET8_END(bpf_mptcp_fmodret_ids)
+
+static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_mptcp_fmodret_ids,
+};
+
+static int __init bpf_mptcp_kfunc_init(void)
+{
+ return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
+}
+late_initcall(bpf_mptcp_kfunc_init);
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index ae20b7d92e28..c46c22a84d23 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -32,6 +32,7 @@ struct mptcp_pernet {
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
u8 pm_type;
+ char scheduler[MPTCP_SCHED_NAME_MAX];
};
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -69,6 +70,11 @@ int mptcp_get_pm_type(const struct net *net)
return mptcp_get_pernet(net)->pm_type;
}
+const char *mptcp_get_scheduler(const struct net *net)
+{
+ return mptcp_get_pernet(net)->scheduler;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
@@ -77,6 +83,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
+ strcpy(pernet->scheduler, "default");
}
#ifdef CONFIG_SYSCTL
@@ -128,6 +135,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &mptcp_pm_type_max
},
+ {
+ .procname = "scheduler",
+ .maxlen = MPTCP_SCHED_NAME_MAX,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
{}
};
@@ -149,6 +162,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[3].data = &pernet->allow_join_initial_addr_port;
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
+ table[6].data = &pernet->scheduler;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 7dbbad1e4f55..d8da5374d9e1 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -299,15 +299,8 @@ void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
msk = mptcp_sk(sk);
- if (subflow->backup != bkup) {
+ if (subflow->backup != bkup)
subflow->backup = bkup;
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk))
- msk->last_snd = NULL;
- else
- __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags);
- mptcp_data_unlock(sk);
- }
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5692daf57a4d..9661f3812682 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -9,6 +9,7 @@
#include <linux/inet.h>
#include <linux/kernel.h>
#include <net/tcp.h>
+#include <net/inet_common.h>
#include <net/netns/generic.h>
#include <net/mptcp.h>
#include <net/genetlink.h>
@@ -471,9 +472,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con
slow = lock_sock_fast(ssk);
if (prio) {
- if (subflow->backup != backup)
- msk->last_snd = NULL;
-
subflow->send_mp_prio = 1;
subflow->backup = backup;
subflow->request_bkup = backup;
@@ -1005,8 +1003,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
bool is_ipv6 = sk->sk_family == AF_INET6;
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
- struct socket *ssock;
- struct sock *newsk;
+ struct sock *newsk, *ssk;
int backlog = 1024;
int err;
@@ -1032,28 +1029,32 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
&mptcp_keys[is_ipv6]);
lock_sock(newsk);
- ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
+ ssk = __mptcp_nmpc_sk(mptcp_sk(newsk));
release_sock(newsk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (entry->addr.family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
+ if (ssk->sk_family == AF_INET)
+ err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (ssk->sk_family == AF_INET6)
+ err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
+#endif
if (err)
return err;
inet_sk_state_store(newsk, TCP_LISTEN);
- err = kernel_listen(ssock, backlog);
- if (err)
- return err;
-
- mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
-
- return 0;
+ lock_sock(ssk);
+ err = __inet_listen_sk(ssk, backlog);
+ if (!err)
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
+ release_sock(ssk);
+ return err;
}
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d80658547836..933b257eee02 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -67,11 +67,11 @@ static bool mptcp_is_tcpsk(struct sock *sk)
* Hand the socket over to tcp so all further socket ops
* bypass mptcp.
*/
- sock->ops = &inet_stream_ops;
+ WRITE_ONCE(sock->ops, &inet_stream_ops);
return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
} else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
- sock->ops = &inet6_stream_ops;
+ WRITE_ONCE(sock->ops, &inet6_stream_ops);
return true;
#endif
}
@@ -90,8 +90,8 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
if (err)
return err;
+ msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio;
WRITE_ONCE(msk->first, ssock->sk);
- WRITE_ONCE(msk->subflow, ssock);
subflow = mptcp_subflow_ctx(ssock->sk);
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
@@ -101,6 +101,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
/* This is the first subflow, always with id 0 */
subflow->local_id_valid = 1;
mptcp_sock_graft(msk->first, sk->sk_socket);
+ iput(SOCK_INODE(ssock));
return 0;
}
@@ -108,7 +109,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
/* If the MPC handshake is not started, returns the first subflow,
* eventually allocating it.
*/
-struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
+struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
int ret;
@@ -116,10 +117,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
return ERR_PTR(-EINVAL);
- if (!msk->subflow) {
- if (msk->first)
- return ERR_PTR(-EINVAL);
-
+ if (!msk->first) {
ret = __mptcp_socket_create(msk);
if (ret)
return ERR_PTR(ret);
@@ -127,7 +125,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
mptcp_sockopt_sync(msk, msk->first);
}
- return msk->subflow;
+ return msk->first;
}
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
@@ -1368,7 +1366,7 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
* returns the subflow that will transmit the next DSS
* additionally updates the rtx timeout
*/
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct subflow_send_info send_info[SSK_MODE_MAX];
struct mptcp_subflow_context *subflow;
@@ -1379,23 +1377,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
u64 linger_time;
long tout = 0;
- msk_owned_by_me(msk);
-
- if (__mptcp_check_fallback(msk)) {
- if (!msk->first)
- return NULL;
- return __tcp_can_send(msk->first) &&
- sk_stream_memory_free(msk->first) ? msk->first : NULL;
- }
-
- /* re-use last subflow, if the burst allow that */
- if (msk->last_snd && msk->snd_burst > 0 &&
- sk_stream_memory_free(msk->last_snd) &&
- mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
- mptcp_set_timeout(sk);
- return msk->last_snd;
- }
-
/* pick the subflow with the lower wmem/wspace ratio */
for (i = 0; i < SSK_MODE_MAX; ++i) {
send_info[i].ssk = NULL;
@@ -1448,16 +1429,13 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
wmem = READ_ONCE(ssk->sk_wmem_queued);
- if (!burst) {
- msk->last_snd = NULL;
+ if (!burst)
return ssk;
- }
subflow = mptcp_subflow_ctx(ssk);
subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
READ_ONCE(ssk->sk_pacing_rate) * burst,
burst + wmem);
- msk->last_snd = ssk;
msk->snd_burst = burst;
return ssk;
}
@@ -1501,64 +1479,106 @@ void mptcp_check_and_set_pending(struct sock *sk)
mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
}
-void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
+ struct mptcp_sendmsg_info *info)
{
- struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct mptcp_sendmsg_info info = {
- .flags = flags,
- };
- bool do_check_data_fin = false;
struct mptcp_data_frag *dfrag;
- int len;
+ int len, copied = 0, err = 0;
while ((dfrag = mptcp_send_head(sk))) {
- info.sent = dfrag->already_sent;
- info.limit = dfrag->data_len;
+ info->sent = dfrag->already_sent;
+ info->limit = dfrag->data_len;
len = dfrag->data_len - dfrag->already_sent;
while (len > 0) {
int ret = 0;
- prev_ssk = ssk;
- ssk = mptcp_subflow_get_send(msk);
-
- /* First check. If the ssk has changed since
- * the last round, release prev_ssk
- */
- if (ssk != prev_ssk && prev_ssk)
- mptcp_push_release(prev_ssk, &info);
- if (!ssk)
- goto out;
-
- /* Need to lock the new subflow only if different
- * from the previous one, otherwise we are still
- * helding the relevant lock
- */
- if (ssk != prev_ssk)
- lock_sock(ssk);
-
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
if (ret <= 0) {
- if (ret == -EAGAIN)
- continue;
- mptcp_push_release(ssk, &info);
+ err = copied ? : ret;
goto out;
}
- do_check_data_fin = true;
- info.sent += ret;
+ info->sent += ret;
+ copied += ret;
len -= ret;
mptcp_update_post_push(msk, dfrag, ret);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+
+ if (msk->snd_burst <= 0 ||
+ !sk_stream_memory_free(ssk) ||
+ !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) {
+ err = copied;
+ goto out;
+ }
+ mptcp_set_timeout(sk);
+ }
+ err = copied;
+
+out:
+ return err;
+}
+
+void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+{
+ struct sock *prev_ssk = NULL, *ssk = NULL;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_sendmsg_info info = {
+ .flags = flags,
+ };
+ bool do_check_data_fin = false;
+ int push_count = 1;
+
+ while (mptcp_send_head(sk) && (push_count > 0)) {
+ struct mptcp_subflow_context *subflow;
+ int ret = 0;
+
+ if (mptcp_sched_get_send(msk))
+ break;
+
+ push_count = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ mptcp_subflow_set_scheduled(subflow, false);
+
+ prev_ssk = ssk;
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ if (ssk != prev_ssk) {
+ /* First check. If the ssk has changed since
+ * the last round, release prev_ssk
+ */
+ if (prev_ssk)
+ mptcp_push_release(prev_ssk, &info);
+
+ /* Need to lock the new subflow only if different
+ * from the previous one, otherwise we are still
+ * helding the relevant lock
+ */
+ lock_sock(ssk);
+ }
+
+ push_count++;
+
+ ret = __subflow_push_pending(sk, ssk, &info);
+ if (ret <= 0) {
+ if (ret != -EAGAIN ||
+ (1 << ssk->sk_state) &
+ (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSE))
+ push_count--;
+ continue;
+ }
+ do_check_data_fin = true;
+ }
+ }
}
/* at this point we held the socket lock for the last subflow we used */
if (ssk)
mptcp_push_release(ssk, &info);
-out:
/* ensure the rtx timer is running */
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
@@ -1572,42 +1592,49 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
struct mptcp_sendmsg_info info = {
.data_lock_held = true,
};
- struct mptcp_data_frag *dfrag;
+ bool keep_pushing = true;
struct sock *xmit_ssk;
- int len, copied = 0;
+ int copied = 0;
info.flags = 0;
- while ((dfrag = mptcp_send_head(sk))) {
- info.sent = dfrag->already_sent;
- info.limit = dfrag->data_len;
- len = dfrag->data_len - dfrag->already_sent;
- while (len > 0) {
- int ret = 0;
+ while (mptcp_send_head(sk) && keep_pushing) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ int ret = 0;
- /* check for a different subflow usage only after
- * spooling the first chunk of data
- */
- xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
- if (!xmit_ssk)
- goto out;
- if (xmit_ssk != ssk) {
- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
- MPTCP_DELEGATE_SEND);
- goto out;
- }
-
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ /* check for a different subflow usage only after
+ * spooling the first chunk of data
+ */
+ if (first) {
+ mptcp_subflow_set_scheduled(subflow, false);
+ ret = __subflow_push_pending(sk, ssk, &info);
+ first = false;
if (ret <= 0)
- goto out;
+ break;
+ copied += ret;
+ continue;
+ }
+
+ if (mptcp_sched_get_send(msk))
+ goto out;
- info.sent += ret;
+ if (READ_ONCE(subflow->scheduled)) {
+ mptcp_subflow_set_scheduled(subflow, false);
+ ret = __subflow_push_pending(sk, ssk, &info);
+ if (ret <= 0)
+ keep_pushing = false;
copied += ret;
- len -= ret;
- first = false;
+ }
- mptcp_update_post_push(msk, dfrag, ret);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ xmit_ssk = mptcp_subflow_tcp_sock(subflow);
+ if (xmit_ssk != ssk) {
+ mptcp_subflow_delegate(subflow,
+ MPTCP_DELEGATE_SEND);
+ keep_pushing = false;
+ }
+ }
}
- WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
out:
@@ -1642,7 +1669,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
{
unsigned int saved_flags = msg->msg_flags;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
struct sock *ssk;
int ret;
@@ -1653,9 +1679,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
* fastopen attempt, no need to check for additional subflow status.
*/
if (msg->msg_flags & MSG_FASTOPEN) {
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
}
if (!msk->first)
return -EINVAL;
@@ -1689,7 +1715,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
if (!mptcp_disconnect(sk, 0))
sk->sk_socket->state = SS_UNCONNECTED;
}
- inet_sk(sk)->defer_connect = 0;
+ inet_clear_bit(DEFER_CONNECT, sk);
return ret;
}
@@ -1707,7 +1733,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
- if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) {
+ if (unlikely(inet_test_bit(DEFER_CONNECT, sk) ||
+ msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn);
@@ -1881,6 +1908,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
+ u8 scaling_ratio = U8_MAX;
u32 time, advmss = 1;
u64 rtt_us, mstamp;
@@ -1911,9 +1939,11 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
rtt_us = max(sf_rtt_us, rtt_us);
advmss = max(sf_advmss, advmss);
+ scaling_ratio = min(tp->scaling_ratio, scaling_ratio);
}
msk->rcvq_space.rtt_us = rtt_us;
+ msk->scaling_ratio = scaling_ratio;
if (time < (rtt_us >> 3) || rtt_us == 0)
return;
@@ -1922,8 +1952,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss;
@@ -1932,18 +1962,13 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(grow, msk->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < advmss)
- rcvmem += 128;
-
- do_div(rcvwin, advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, __tcp_space_from_win(scaling_ratio, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
- window_clamp = tcp_win_from_space(sk, rcvbuf);
+ window_clamp = __tcp_win_from_space(scaling_ratio, rcvbuf);
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make subflows follow along. If we do not do this, we
@@ -2202,17 +2227,12 @@ static void mptcp_timeout_timer(struct timer_list *t)
*
* A backup subflow is returned only if that is the only kind available.
*/
-static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
+struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
{
struct sock *backup = NULL, *pick = NULL;
struct mptcp_subflow_context *subflow;
int min_stale_count = INT_MAX;
- msk_owned_by_me(msk);
-
- if (__mptcp_check_fallback(msk))
- return NULL;
-
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -2243,14 +2263,6 @@ static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
return min_stale_count > 1 ? backup : NULL;
}
-static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
-{
- if (msk->subflow) {
- iput(SOCK_INODE(msk->subflow));
- WRITE_ONCE(msk->subflow, NULL);
- }
-}
-
bool __mptcp_retransmit_pending_data(struct sock *sk)
{
struct mptcp_data_frag *cur, *rtx_head;
@@ -2329,7 +2341,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out_release;
}
- dispose_it = !msk->subflow || ssk != msk->subflow->sk;
+ dispose_it = msk->free_first || ssk != msk->first;
if (dispose_it)
list_del(&subflow->node);
@@ -2350,7 +2362,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
* disconnect should never fail
*/
WARN_ON_ONCE(tcp_disconnect(ssk, 0));
- msk->subflow->state = SS_UNCONNECTED;
mptcp_subflow_ctx_reset(subflow);
release_sock(ssk);
@@ -2383,9 +2394,6 @@ out_release:
WRITE_ONCE(msk->first, NULL);
out:
- if (ssk == msk->last_snd)
- msk->last_snd = NULL;
-
if (need_push)
__mptcp_push_pending(sk, 0);
}
@@ -2502,16 +2510,17 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
static void __mptcp_retrans(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_subflow_context *subflow;
struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
- size_t copied = 0;
struct sock *ssk;
- int ret;
+ int ret, err;
+ u16 len = 0;
mptcp_clean_una_wakeup(sk);
/* first check ssk: need to kick "stale" logic */
- ssk = mptcp_subflow_get_retrans(msk);
+ err = mptcp_sched_get_retrans(msk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag) {
if (mptcp_data_fin_enabled(msk)) {
@@ -2530,32 +2539,45 @@ static void __mptcp_retrans(struct sock *sk)
goto reset_timer;
}
- if (!ssk)
+ if (err)
goto reset_timer;
- lock_sock(ssk);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ u16 copied = 0;
- /* limit retransmission to the bytes already sent on some subflows */
- info.sent = 0;
- info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
- while (info.sent < info.limit) {
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
- if (ret <= 0)
- break;
+ mptcp_subflow_set_scheduled(subflow, false);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
- copied += ret;
- info.sent += ret;
- }
- if (copied) {
- dfrag->already_sent = max(dfrag->already_sent, info.sent);
- msk->bytes_retrans += copied;
- tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
- info.size_goal);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+
+ /* limit retransmission to the bytes already sent on some subflows */
+ info.sent = 0;
+ info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len :
+ dfrag->already_sent;
+ while (info.sent < info.limit) {
+ ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ if (ret <= 0)
+ break;
+
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
+ copied += ret;
+ info.sent += ret;
+ }
+ if (copied) {
+ len = max(copied, len);
+ tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
+ info.size_goal);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ }
+
+ release_sock(ssk);
+ }
}
- release_sock(ssk);
+ msk->bytes_retrans += len;
+ dfrag->already_sent = max(dfrag->already_sent, len);
reset_timer:
mptcp_check_and_set_pending(sk);
@@ -2663,7 +2685,7 @@ unlock:
sock_put(sk);
}
-static int __mptcp_init_sock(struct sock *sk)
+static void __mptcp_init_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2690,8 +2712,6 @@ static int __mptcp_init_sock(struct sock *sk)
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
-
- return 0;
}
static void mptcp_ca_reset(struct sock *sk)
@@ -2711,9 +2731,7 @@ static int mptcp_init_sock(struct sock *sk)
struct net *net = sock_net(sk);
int ret;
- ret = __mptcp_init_sock(sk);
- if (ret)
- return ret;
+ __mptcp_init_sock(sk);
if (!mptcp_is_enabled(net))
return -ENOPROTOOPT;
@@ -2721,6 +2739,11 @@ static int mptcp_init_sock(struct sock *sk)
if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
return -ENOMEM;
+ ret = mptcp_init_sched(mptcp_sk(sk),
+ mptcp_sched_find(mptcp_get_scheduler(net)));
+ if (ret)
+ return ret;
+
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
@@ -2866,6 +2889,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
+ mptcp_release_sched(msk);
sk->sk_prot->destroy(sk);
@@ -3055,7 +3079,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
* subflow
*/
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
- msk->last_snd = NULL;
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
msk->push_pending = 0;
@@ -3111,7 +3134,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
- WRITE_ONCE(msk->subflow, NULL);
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
@@ -3122,6 +3144,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->snd_una = msk->write_seq;
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+ mptcp_init_sched(msk, mptcp_sk(sk)->sched);
/* passive msk is created after the first/MPC subflow */
msk->subflow_id = 2;
@@ -3175,25 +3198,17 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}
-static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
+static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
bool kern)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *listener;
struct sock *newsk;
- listener = READ_ONCE(msk->subflow);
- if (WARN_ON_ONCE(!listener)) {
- *err = -EINVAL;
- return NULL;
- }
-
- pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
- newsk = inet_csk_accept(listener->sk, flags, err, kern);
+ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ newsk = inet_csk_accept(ssk, flags, err, kern);
if (!newsk)
return NULL;
- pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
+ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
@@ -3210,9 +3225,9 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
}
newsk = new_mptcp_sock;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
- MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_INC_STATS(sock_net(ssk),
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
}
@@ -3253,10 +3268,8 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- /* clears msk->subflow, allowing the following to close
- * even the initial subflow
- */
- mptcp_dispose_initial_subflow(msk);
+ /* allow the following to close even the initial subflow */
+ msk->free_first = 1;
mptcp_destroy_common(msk, 0);
sk_sockets_allocated_dec(sk);
}
@@ -3336,8 +3349,6 @@ static void mptcp_release_cb(struct sock *sk)
__mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
- if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags))
- msk->last_snd = NULL;
}
__mptcp_update_rmem(sk);
@@ -3406,14 +3417,12 @@ static void mptcp_unhash(struct sock *sk)
static int mptcp_get_port(struct sock *sk, unsigned short snum)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
- ssock = msk->subflow;
- pr_debug("msk=%p, subflow=%p", msk, ssock);
- if (WARN_ON_ONCE(!ssock))
+ pr_debug("msk=%p, ssk=%p", msk, msk->first);
+ if (WARN_ON_ONCE(!msk->first))
return -EINVAL;
- return inet_csk_get_port(ssock->sk, snum);
+ return inet_csk_get_port(msk->first, snum);
}
void mptcp_finish_connect(struct sock *ssk)
@@ -3588,25 +3597,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
int err = -EINVAL;
+ struct sock *ssk;
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
- mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_SYN_SENT);
- subflow = mptcp_subflow_ctx(ssock->sk);
+ subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
- if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
+ if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
- MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
mptcp_subflow_early_fallback(msk, subflow);
}
if (likely(!__mptcp_check_fallback(msk)))
@@ -3615,25 +3623,42 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* if reaching here via the fastopen/sendmsg path, the caller already
* acquired the subflow socket lock, too.
*/
- if (msk->fastopening)
- err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
- else
- err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
- inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
+ if (!msk->fastopening)
+ lock_sock(ssk);
+
+ /* the following mirrors closely a very small chunk of code from
+ * __inet_stream_connect()
+ */
+ if (ssk->sk_state != TCP_CLOSE)
+ goto out;
+
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
+ err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
+ if (err)
+ goto out;
+ }
+
+ err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
+ if (err < 0)
+ goto out;
+
+ inet_assign_bit(DEFER_CONNECT, sk, inet_test_bit(DEFER_CONNECT, ssk));
+
+out:
+ if (!msk->fastopening)
+ release_sock(ssk);
/* on successful connect, the msk state will be moved to established by
* subflow_finish_connect()
*/
- if (unlikely(err && err != -EINPROGRESS)) {
- inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ if (unlikely(err)) {
+ /* avoid leaving a dangling token in an unconnected socket */
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sk, TCP_CLOSE);
return err;
}
- mptcp_copy_inaddrs(sk, ssock->sk);
-
- /* silence EINPROGRESS and let the caller inet_stream_connect
- * handle the connection in progress
- */
+ mptcp_copy_inaddrs(sk, ssk);
return 0;
}
@@ -3674,22 +3699,27 @@ static struct proto mptcp_prot = {
static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct socket *ssock;
- int err;
+ struct sock *ssk, *sk = sock->sk;
+ int err = -EINVAL;
- lock_sock(sock->sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ lock_sock(sk);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ err = PTR_ERR(ssk);
goto unlock;
}
- err = ssock->ops->bind(ssock, uaddr, addr_len);
+ if (sk->sk_family == AF_INET)
+ err = inet_bind_sk(ssk, uaddr, addr_len);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (sk->sk_family == AF_INET6)
+ err = inet6_bind_sk(ssk, uaddr, addr_len);
+#endif
if (!err)
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ mptcp_copy_inaddrs(sk, ssk);
unlock:
- release_sock(sock->sk);
+ release_sock(sk);
return err;
}
@@ -3697,7 +3727,7 @@ static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct sock *sk = sock->sk;
- struct socket *ssock;
+ struct sock *ssk;
int err;
pr_debug("msk=%p", msk);
@@ -3708,22 +3738,24 @@ static int mptcp_listen(struct socket *sock, int backlog)
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto unlock;
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ err = PTR_ERR(ssk);
goto unlock;
}
- mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE);
- err = ssock->ops->listen(ssock, backlog);
- inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ lock_sock(ssk);
+ err = __inet_listen_sk(ssk, backlog);
+ release_sock(ssk);
+ inet_sk_state_store(sk, inet_sk_state_load(ssk));
+
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- mptcp_copy_inaddrs(sk, ssock->sk);
- mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
+ mptcp_copy_inaddrs(sk, ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
}
unlock:
@@ -3735,8 +3767,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
int flags, bool kern)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct socket *ssock;
- struct sock *newsk;
+ struct sock *ssk, *newsk;
int err;
pr_debug("msk=%p", msk);
@@ -3744,11 +3775,11 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* Buggy applications can call accept on socket states other then LISTEN
* but no need to allocate the first subflow just to error out.
*/
- ssock = READ_ONCE(msk->subflow);
- if (!ssock)
+ ssk = READ_ONCE(msk->first);
+ if (!ssk)
return -EINVAL;
- newsk = mptcp_accept(sock->sk, flags, &err, kern);
+ newsk = mptcp_accept(ssk, flags, &err, kern);
if (!newsk)
return err;
@@ -3775,11 +3806,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/
- if (msk->first &&
- unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
+ if (unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
- if (unlikely(list_empty(&msk->conn_list)))
+ if (unlikely(list_is_singular(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
}
@@ -3818,12 +3848,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN) {
- struct socket *ssock = READ_ONCE(msk->subflow);
+ struct sock *ssk = READ_ONCE(msk->first);
- if (WARN_ON_ONCE(!ssock || !ssock->sk))
+ if (WARN_ON_ONCE(!ssk))
return 0;
- return inet_csk_listen_poll(ssock->sk);
+ return inet_csk_listen_poll(ssk);
}
shutdown = READ_ONCE(sk->sk_shutdown);
@@ -3838,7 +3868,8 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
mask |= EPOLLOUT | EPOLLWRNORM;
else
mask |= mptcp_check_writeable(msk);
- } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ } else if (state == TCP_SYN_SENT &&
+ inet_test_bit(DEFER_CONNECT, sk)) {
/* cf tcp_poll() note about TFO */
mask |= EPOLLOUT | EPOLLWRNORM;
}
@@ -3934,6 +3965,7 @@ void __init mptcp_proto_init(void)
mptcp_subflow_init();
mptcp_pm_init();
+ mptcp_sched_init();
mptcp_token_init();
if (proto_register(&mptcp_prot, 1) != 0)
@@ -3987,6 +4019,7 @@ int __init mptcp_proto_v6_init(void)
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
+ mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
err = proto_register(&mptcp_v6_prot, 1);
if (err)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ba2a873a4d2e..7254b3562575 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -123,7 +123,6 @@
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
-#define MPTCP_RESET_SCHEDULER 7
struct mptcp_skb_cb {
u64 map_seq;
@@ -269,7 +268,6 @@ struct mptcp_sock {
u64 rcv_data_fin_seq;
u64 bytes_retrans;
int rmem_fwd_alloc;
- struct sock *last_snd;
int snd_burst;
int old_wspace;
u64 recovery_snd_nxt; /* in recovery mode accept up to this seq;
@@ -299,7 +297,8 @@ struct mptcp_sock {
cork:1,
nodelay:1,
fastopening:1,
- in_accept_queue:1;
+ in_accept_queue:1,
+ free_first:1;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -308,19 +307,19 @@ struct mptcp_sock {
struct list_head rtx_queue;
struct mptcp_data_frag *first_pending;
struct list_head join_list;
- struct socket *subflow; /* outgoing connect/listener/!mp_capable
- * The mptcp ops can safely dereference, using suitable
- * ONCE annotation, the subflow outside the socket
- * lock as such sock is freed after close().
- */
- struct sock *first;
+ struct sock *first; /* The mptcp ops can safely dereference, using suitable
+ * ONCE annotation, the subflow outside the socket
+ * lock as such sock is freed after close().
+ */
struct mptcp_pm_data pm;
+ struct mptcp_sched_ops *sched;
struct {
u32 space; /* bytes copied in last measurement window */
u32 copied; /* bytes copied in this measurement window */
u64 time; /* start time of measurement window */
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
+ u8 scaling_ratio;
u32 subflow_id;
u32 setsockopt_seq;
@@ -350,9 +349,14 @@ static inline int __mptcp_rmem(const struct sock *sk)
return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
}
+static inline int mptcp_win_from_space(const struct sock *sk, int space)
+{
+ return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space);
+}
+
static inline int __mptcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
+ return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
@@ -487,6 +491,7 @@ struct mptcp_subflow_context {
is_mptfo : 1, /* subflow is doing TFO */
__unused : 9;
enum mptcp_data_avail data_avail;
+ bool scheduled;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -620,6 +625,7 @@ int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
+const char *mptcp_get_scheduler(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
@@ -634,7 +640,7 @@ void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
-struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk);
+struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
@@ -652,6 +658,19 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
+struct mptcp_sched_ops *mptcp_sched_find(const char *name);
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_sched_init(void);
+int mptcp_init_sched(struct mptcp_sock *msk,
+ struct mptcp_sched_ops *sched);
+void mptcp_release_sched(struct mptcp_sock *msk);
+void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
+ bool scheduled);
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
+struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
+int mptcp_sched_get_send(struct mptcp_sock *msk);
+int mptcp_sched_get_retrans(struct mptcp_sock *msk);
static inline bool __tcp_can_send(const struct sock *ssk)
{
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
new file mode 100644
index 000000000000..4ab0693c069c
--- /dev/null
+++ b/net/mptcp/sched.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2022, SUSE.
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include "protocol.h"
+
+static DEFINE_SPINLOCK(mptcp_sched_list_lock);
+static LIST_HEAD(mptcp_sched_list);
+
+static int mptcp_sched_default_get_subflow(struct mptcp_sock *msk,
+ struct mptcp_sched_data *data)
+{
+ struct sock *ssk;
+
+ ssk = data->reinject ? mptcp_subflow_get_retrans(msk) :
+ mptcp_subflow_get_send(msk);
+ if (!ssk)
+ return -EINVAL;
+
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true);
+ return 0;
+}
+
+static struct mptcp_sched_ops mptcp_sched_default = {
+ .get_subflow = mptcp_sched_default_get_subflow,
+ .name = "default",
+ .owner = THIS_MODULE,
+};
+
+/* Must be called with rcu read lock held */
+struct mptcp_sched_ops *mptcp_sched_find(const char *name)
+{
+ struct mptcp_sched_ops *sched, *ret = NULL;
+
+ list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
+ if (!strcmp(sched->name, name)) {
+ ret = sched;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+{
+ if (!sched->get_subflow)
+ return -EINVAL;
+
+ spin_lock(&mptcp_sched_list_lock);
+ if (mptcp_sched_find(sched->name)) {
+ spin_unlock(&mptcp_sched_list_lock);
+ return -EEXIST;
+ }
+ list_add_tail_rcu(&sched->list, &mptcp_sched_list);
+ spin_unlock(&mptcp_sched_list_lock);
+
+ pr_debug("%s registered", sched->name);
+ return 0;
+}
+
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
+{
+ if (sched == &mptcp_sched_default)
+ return;
+
+ spin_lock(&mptcp_sched_list_lock);
+ list_del_rcu(&sched->list);
+ spin_unlock(&mptcp_sched_list_lock);
+}
+
+void mptcp_sched_init(void)
+{
+ mptcp_register_scheduler(&mptcp_sched_default);
+}
+
+int mptcp_init_sched(struct mptcp_sock *msk,
+ struct mptcp_sched_ops *sched)
+{
+ if (!sched)
+ sched = &mptcp_sched_default;
+
+ if (!bpf_try_module_get(sched, sched->owner))
+ return -EBUSY;
+
+ msk->sched = sched;
+ if (msk->sched->init)
+ msk->sched->init(msk);
+
+ pr_debug("sched=%s", msk->sched->name);
+
+ return 0;
+}
+
+void mptcp_release_sched(struct mptcp_sock *msk)
+{
+ struct mptcp_sched_ops *sched = msk->sched;
+
+ if (!sched)
+ return;
+
+ msk->sched = NULL;
+ if (sched->release)
+ sched->release(msk);
+
+ bpf_module_put(sched, sched->owner);
+}
+
+void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
+ bool scheduled)
+{
+ WRITE_ONCE(subflow->scheduled, scheduled);
+}
+
+int mptcp_sched_get_send(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sched_data data;
+
+ msk_owned_by_me(msk);
+
+ /* the following check is moved out of mptcp_subflow_get_send */
+ if (__mptcp_check_fallback(msk)) {
+ if (msk->first &&
+ __tcp_can_send(msk->first) &&
+ sk_stream_memory_free(msk->first)) {
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(msk->first), true);
+ return 0;
+ }
+ return -EINVAL;
+ }
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled))
+ return 0;
+ }
+
+ data.reinject = false;
+ if (msk->sched == &mptcp_sched_default || !msk->sched)
+ return mptcp_sched_default_get_subflow(msk, &data);
+ return msk->sched->get_subflow(msk, &data);
+}
+
+int mptcp_sched_get_retrans(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sched_data data;
+
+ msk_owned_by_me(msk);
+
+ /* the following check is moved out of mptcp_subflow_get_retrans */
+ if (__mptcp_check_fallback(msk))
+ return -EINVAL;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled))
+ return 0;
+ }
+
+ data.reinject = true;
+ if (msk->sched == &mptcp_sched_default || !msk->sched)
+ return mptcp_sched_default_get_subflow(msk, &data);
+ return msk->sched->get_subflow(msk, &data);
+}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index a3f1fe810cc9..8260202c0066 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -292,7 +292,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
- struct socket *ssock;
+ struct sock *ssk;
int ret;
switch (optname) {
@@ -301,22 +301,22 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BINDTODEVICE:
case SO_BINDTOIFINDEX:
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
release_sock(sk);
- return PTR_ERR(ssock);
+ return PTR_ERR(ssk);
}
- ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
+ ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
if (ret == 0) {
if (optname == SO_REUSEPORT)
- sk->sk_reuseport = ssock->sk->sk_reuseport;
+ sk->sk_reuseport = ssk->sk_reuseport;
else if (optname == SO_REUSEADDR)
- sk->sk_reuse = ssock->sk->sk_reuse;
+ sk->sk_reuse = ssk->sk_reuse;
else if (optname == SO_BINDTODEVICE)
- sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
+ sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
else if (optname == SO_BINDTOIFINDEX)
- sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
+ sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
}
release_sock(sk);
return ret;
@@ -390,20 +390,20 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
{
struct sock *sk = (struct sock *)msk;
int ret = -EOPNOTSUPP;
- struct socket *ssock;
+ struct sock *ssk;
switch (optname) {
case IPV6_V6ONLY:
case IPV6_TRANSPARENT:
case IPV6_FREEBIND:
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
release_sock(sk);
- return PTR_ERR(ssock);
+ return PTR_ERR(ssk);
}
- ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
+ ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen);
if (ret != 0) {
release_sock(sk);
return ret;
@@ -413,13 +413,15 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
switch (optname) {
case IPV6_V6ONLY:
- sk->sk_ipv6only = ssock->sk->sk_ipv6only;
+ sk->sk_ipv6only = ssk->sk_ipv6only;
break;
case IPV6_TRANSPARENT:
- inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent;
+ inet_assign_bit(TRANSPARENT, sk,
+ inet_test_bit(TRANSPARENT, ssk));
break;
case IPV6_FREEBIND:
- inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind;
+ inet_assign_bit(FREEBIND, sk,
+ inet_test_bit(FREEBIND, ssk));
break;
}
@@ -684,8 +686,7 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
- struct inet_sock *issk;
- struct socket *ssock;
+ struct sock *ssk;
int err;
err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
@@ -694,20 +695,19 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
release_sock(sk);
- return PTR_ERR(ssock);
+ return PTR_ERR(ssk);
}
- issk = inet_sk(ssock->sk);
-
switch (optname) {
case IP_FREEBIND:
- issk->freebind = inet_sk(sk)->freebind;
+ inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
break;
case IP_TRANSPARENT:
- issk->transparent = inet_sk(sk)->transparent;
+ inet_assign_bit(TRANSPARENT, ssk,
+ inet_test_bit(TRANSPARENT, sk));
break;
default:
release_sock(sk);
@@ -763,18 +763,18 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
- struct socket *sock;
+ struct sock *ssk;
int ret;
/* Limit to first subflow, before the connection establishment */
lock_sock(sk);
- sock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(sock)) {
- ret = PTR_ERR(sock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ ret = PTR_ERR(ssk);
goto unlock;
}
- ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+ ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
unlock:
release_sock(sk);
@@ -864,9 +864,8 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
char __user *optval, int __user *optlen)
{
struct sock *sk = (struct sock *)msk;
- struct socket *ssock;
- int ret;
struct sock *ssk;
+ int ret;
lock_sock(sk);
ssk = msk->first;
@@ -875,13 +874,13 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
goto out;
}
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- ret = PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ ret = PTR_ERR(ssk);
goto out;
}
- ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
+ ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
out:
release_sock(sk);
@@ -1441,8 +1440,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
__tcp_sock_set_cork(ssk, !!msk->cork);
__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
- inet_sk(ssk)->transparent = inet_sk(sk)->transparent;
- inet_sk(ssk)->freebind = inet_sk(sk)->freebind;
+ inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
+ inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
}
static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 94ae7dd01c65..9bf3c7bc1762 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1359,7 +1359,7 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
const struct sock *sk = subflow->conn;
*space = __mptcp_space(sk);
- *full_space = tcp_full_space(sk);
+ *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
void __mptcp_error_report(struct sock *sk)
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index d27f4eccce6d..a3a6753a1db7 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -563,7 +563,7 @@ int ncsi_send_netlink_timeout(struct ncsi_request *nr,
int ncsi_send_netlink_err(struct net_device *dev,
u32 snd_seq,
u32 snd_portid,
- struct nlmsghdr *nlhdr,
+ const struct nlmsghdr *nlhdr,
int err)
{
struct nlmsghdr *nlh;
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
index 39a1a9d7bf77..747767ea0aae 100644
--- a/net/ncsi/ncsi-netlink.h
+++ b/net/ncsi/ncsi-netlink.h
@@ -19,7 +19,7 @@ int ncsi_send_netlink_timeout(struct ncsi_request *nr,
int ncsi_send_netlink_err(struct net_device *dev,
u32 snd_seq,
u32 snd_portid,
- struct nlmsghdr *nlhdr,
+ const struct nlmsghdr *nlhdr,
int err);
#endif /* __NCSI_NETLINK_H__ */
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5f76ae86a656..ef4e76e5aef9 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -680,6 +680,12 @@ EXPORT_SYMBOL_GPL(nfnl_ct_hook);
const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_hook);
+const struct nf_defrag_hook __rcu *nf_defrag_v4_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_defrag_v4_hook);
+
+const struct nf_defrag_hook __rcu *nf_defrag_v6_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_defrag_v6_hook);
+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
u8 nf_ctnetlink_has_listener;
EXPORT_SYMBOL_GPL(nf_ctnetlink_has_listener);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 0b68e2e2824e..e564b5174261 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -872,7 +872,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
BUG_ON(!set);
read_lock_bh(&ip_set_ref_lock);
- strncpy(name, set->name, IPSET_MAXNAMELEN);
+ strscpy_pad(name, set->name, IPSET_MAXNAMELEN);
read_unlock_bh(&ip_set_ref_lock);
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
@@ -1326,7 +1326,7 @@ static int ip_set_rename(struct sk_buff *skb, const struct nfnl_info *info,
goto out;
}
}
- strncpy(set->name, name2, IPSET_MAXNAMELEN);
+ strscpy_pad(set->name, name2, IPSET_MAXNAMELEN);
out:
write_unlock_bh(&ip_set_ref_lock);
@@ -1380,9 +1380,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
return -EBUSY;
}
- strncpy(from_name, from->name, IPSET_MAXNAMELEN);
- strncpy(from->name, to->name, IPSET_MAXNAMELEN);
- strncpy(to->name, from_name, IPSET_MAXNAMELEN);
+ strscpy_pad(from_name, from->name, IPSET_MAXNAMELEN);
+ strscpy_pad(from->name, to->name, IPSET_MAXNAMELEN);
+ strscpy_pad(to->name, from_name, IPSET_MAXNAMELEN);
swap(from->ref, to->ref);
ip_set(inst, from_id) = to;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index cb83ca506c5c..3230506ae3ff 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1346,7 +1346,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+ if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
}
@@ -1946,7 +1946,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+ if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 264f2f87a437..da5af28ff57b 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1297,11 +1297,9 @@ static void set_sock_size(struct sock *sk, int mode, int val)
*/
static void set_mcast_loop(struct sock *sk, u_char loop)
{
- struct inet_sock *inet = inet_sk(sk);
-
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
lock_sock(sk);
- inet->mc_loop = loop ? 1 : 0;
+ inet_assign_bit(MC_LOOP, sk, loop);
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index c36da56d756f..e502ec00b2fe 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -23,8 +25,90 @@ struct bpf_nf_link {
struct nf_hook_ops hook_ops;
struct net *net;
u32 dead;
+ const struct nf_defrag_hook *defrag_hook;
};
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+static const struct nf_defrag_hook *
+get_proto_defrag_hook(struct bpf_nf_link *link,
+ const struct nf_defrag_hook __rcu *global_hook,
+ const char *mod)
+{
+ const struct nf_defrag_hook *hook;
+ int err;
+
+ /* RCU protects us from races against module unloading */
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ if (!hook) {
+ rcu_read_unlock();
+ err = request_module(mod);
+ if (err)
+ return ERR_PTR(err < 0 ? err : -EINVAL);
+
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ }
+
+ if (hook && try_module_get(hook->owner)) {
+ /* Once we have a refcnt on the module, we no longer need RCU */
+ hook = rcu_pointer_handoff(hook);
+ } else {
+ WARN_ONCE(!hook, "%s has bad registration", mod);
+ hook = ERR_PTR(-ENOENT);
+ }
+ rcu_read_unlock();
+
+ if (!IS_ERR(hook)) {
+ err = hook->enable(link->net);
+ if (err) {
+ module_put(hook->owner);
+ hook = ERR_PTR(err);
+ }
+ }
+
+ return hook;
+}
+#endif
+
+static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook __maybe_unused *hook;
+
+ switch (link->hook_ops.pf) {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ case NFPROTO_IPV4:
+ hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ case NFPROTO_IPV6:
+ hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook *hook = link->defrag_hook;
+
+ if (!hook)
+ return;
+ hook->disable(link->net);
+ module_put(hook->owner);
+}
+
static void bpf_nf_link_release(struct bpf_link *link)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
@@ -32,11 +116,11 @@ static void bpf_nf_link_release(struct bpf_link *link)
if (nf_link->dead)
return;
- /* prevent hook-not-found warning splat from netfilter core when
- * .detach was already called
- */
- if (!cmpxchg(&nf_link->dead, 0, 1))
+ /* do not double release in case .detach was already called */
+ if (!cmpxchg(&nf_link->dead, 0, 1)) {
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
+ bpf_nf_disable_defrag(nf_link);
+ }
}
static void bpf_nf_link_dealloc(struct bpf_link *link)
@@ -92,6 +176,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
{
+ int prio;
+
switch (attr->link_create.netfilter.pf) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
@@ -102,19 +188,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
return -EAFNOSUPPORT;
}
- if (attr->link_create.netfilter.flags)
+ if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
return -EOPNOTSUPP;
- /* make sure conntrack confirm is always last.
- *
- * In the future, if userspace can e.g. request defrag, then
- * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
- * should fail.
- */
- switch (attr->link_create.netfilter.priority) {
- case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
- case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
- }
+ /* make sure conntrack confirm is always last */
+ prio = attr->link_create.netfilter.priority;
+ if (prio == NF_IP_PRI_FIRST)
+ return -ERANGE; /* sabotage_in and other warts */
+ else if (prio == NF_IP_PRI_LAST)
+ return -ERANGE; /* e.g. conntrack confirm */
+ else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
+ prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
+ return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
return 0;
}
@@ -149,6 +234,7 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
link->net = net;
link->dead = false;
+ link->defrag_hook = NULL;
err = bpf_link_prime(&link->link, &link_primer);
if (err) {
@@ -156,8 +242,17 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}
+ if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
+ err = bpf_nf_enable_defrag(link);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ return err;
+ }
+ }
+
err = nf_register_net_hook(net, &link->hook_ops);
if (err) {
+ bpf_nf_disable_defrag(link);
bpf_link_cleanup(&link_primer);
return err;
}
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 0d36d7285e3f..c7a6114091ae 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/btf_ids.h>
#include <linux/net_namespace.h>
+#include <net/xdp.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 992393102d5f..9f6f2e643575 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1756,7 +1756,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
cnet = nf_ct_pernet(net);
if (cnet->expect_count) {
spin_lock_bh(&nf_conntrack_expect_lock);
- exp = nf_ct_find_expectation(net, zone, tuple);
+ exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl));
if (exp) {
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 96948e98ec53..81ca348915c9 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple, bool unlink)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i, *exp = NULL;
@@ -211,7 +211,7 @@ nf_ct_find_expectation(struct net *net,
!refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
- if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+ if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink) {
refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 69c8c8c7e9b8..334db22199c1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1321,15 +1321,11 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
struct nlattr *tb[CTA_IP_MAX+1];
int ret = 0;
- ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr, NULL, NULL);
+ ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr,
+ cta_ip_nla_policy, NULL);
if (ret < 0)
return ret;
- ret = nla_validate_nested_deprecated(attr, CTA_IP_MAX,
- cta_ip_nla_policy, NULL);
- if (ret)
- return ret;
-
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
ret = ipv4_nlattr_to_tuple(tb, tuple, flags);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index d4fd626d2b8c..e2db1f4ec2df 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -69,6 +69,7 @@
#define DCCP_MSL (2 * 60 * HZ)
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
static const char * const dccp_state_names[] = {
[CT_DCCP_NONE] = "NONE",
[CT_DCCP_REQUEST] = "REQUEST",
@@ -81,6 +82,7 @@ static const char * const dccp_state_names[] = {
[CT_DCCP_IGNORE] = "IGNORE",
[CT_DCCP_INVALID] = "INVALID",
};
+#endif
#define sNO CT_DCCP_NONE
#define sRQ CT_DCCP_REQUEST
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 1c26f03fc661..a010b25076ca 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -34,7 +34,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
{
struct nf_flow_key *mask = &match->mask;
struct nf_flow_key *key = &match->key;
- unsigned int enc_keys;
+ unsigned long long enc_keys;
if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
return;
@@ -43,8 +43,8 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
mask->enc_key_id.keyid = 0xffffffff;
- enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
- BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
+ enc_keys = BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL);
if (ip_tunnel_info_af(tun_info) == AF_INET) {
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
@@ -55,7 +55,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
mask->enc_ipv4.src = 0xffffffff;
if (key->enc_ipv4.dst)
mask->enc_ipv4.dst = 0xffffffff;
- enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+ enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
} else {
memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
@@ -70,7 +70,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
sizeof(struct in6_addr)))
memset(&mask->enc_ipv6.dst, 0xff,
sizeof(struct in6_addr));
- enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
+ enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
@@ -163,14 +163,14 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
return -EOPNOTSUPP;
}
mask->control.addr_type = 0xffff;
- match->dissector.used_keys |= BIT(key->control.addr_type);
+ match->dissector.used_keys |= BIT_ULL(key->control.addr_type);
mask->basic.n_proto = 0xffff;
switch (tuple->l4proto) {
case IPPROTO_TCP:
key->tcp.flags = 0;
mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_TCP);
break;
case IPPROTO_UDP:
case IPPROTO_GRE:
@@ -182,9 +182,9 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->basic.ip_proto = tuple->l4proto;
mask->basic.ip_proto = 0xff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
- BIT(FLOW_DISSECTOR_KEY_CONTROL) |
- BIT(FLOW_DISSECTOR_KEY_BASIC);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_META) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_BASIC);
switch (tuple->l4proto) {
case IPPROTO_TCP:
@@ -194,7 +194,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->tp.dst = tuple->dst_port;
mask->tp.dst = 0xffff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
break;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index eb8b1167dced..41b826dff6f5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3675,6 +3675,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
return -EMLINK;
list_for_each_entry(rule, &chain->rules, list) {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
if (!nft_is_active_next(ctx->net, rule))
continue;
@@ -10485,6 +10488,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
if (ctx->chain == chain)
return -ELOOP;
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
list_for_each_entry(rule, &chain->rules, list) {
nft_rule_for_each_expr(expr, last, rule) {
struct nft_immediate_expr *priv;
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 910ef881c3b8..12ab78fa5d84 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -35,12 +35,12 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
struct nft_flow_key *mask = &match->mask;
struct nft_flow_key *key = &match->key;
- if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL))
+ if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL))
return;
key->control.addr_type = addr_type;
mask->control.addr_type = 0xffff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL);
match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] =
offsetof(struct nft_flow_key, control);
}
@@ -59,7 +59,7 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
.mask = match->mask.basic.n_proto,
};
- if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
+ if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) &&
(match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) {
match->key.basic.n_proto = match->key.cvlan.vlan_tpid;
@@ -70,8 +70,9 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
match->mask.vlan.vlan_tpid = ethertype.mask;
match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
offsetof(struct nft_flow_key, cvlan);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
- } else if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC) &&
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN);
+ } else if (match->dissector.used_keys &
+ BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) &&
(match->key.basic.n_proto == htons(ETH_P_8021Q) ||
match->key.basic.n_proto == htons(ETH_P_8021AD))) {
match->key.basic.n_proto = match->key.vlan.vlan_tpid;
@@ -80,7 +81,7 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
match->mask.vlan.vlan_tpid = ethertype.mask;
match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
offsetof(struct nft_flow_key, vlan);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_VLAN);
}
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e57eb168ee13..53c9e76473ba 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -470,7 +470,6 @@ __build_packet_message(struct nfnl_log_net *log,
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
- ktime_t tstamp;
nlh = nfnl_msg_put(inst->skb, 0, 0,
nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
@@ -599,10 +598,9 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
}
- tstamp = skb_tstamp_cond(skb, false);
- if (hooknum <= NF_INET_FORWARD && tstamp) {
+ if (hooknum <= NF_INET_FORWARD) {
+ struct timespec64 kts = ktime_to_timespec64(skb_tstamp_cond(skb, true));
struct nfulnl_msg_packet_timestamp ts;
- struct timespec64 kts = ktime_to_timespec64(tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 6eb21a4f5698..cd4652259095 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -162,7 +162,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
memcpy(key + reg->offset, data, reg->len);
memcpy(mask + reg->offset, datamask, reg->len);
- flow->match.dissector.used_keys |= BIT(reg->key);
+ flow->match.dissector.used_keys |= BIT_ULL(reg->key);
flow->match.dissector.offset[reg->key] = reg->base_offset;
if (reg->key == FLOW_DISSECTOR_KEY_META &&
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 38958e067aa8..86bb9d7797d9 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -108,7 +108,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
helper = rcu_dereference(help->helper);
if (helper == NULL)
goto err;
- strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
+ strscpy_pad((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
return;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
@@ -262,6 +262,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
regs->verdict.code = NF_DROP;
return;
}
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
}
nf_ct_set(skb, ct, IP_CT_NEW);
@@ -368,6 +369,7 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false;
}
+ __set_bit(IPS_CONFIRMED_BIT, &tmp->status);
per_cpu(nft_ct_pcpu_template, cpu) = tmp;
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 6e049fd48760..04b51f285332 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -14,17 +14,18 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_fib.h>
+#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+ NFTA_FIB_F_PRESENT)
+
const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
[NFTA_FIB_DREG] = { .type = NLA_U32 },
[NFTA_FIB_RESULT] = { .type = NLA_U32 },
- [NFTA_FIB_FLAGS] = { .type = NLA_U32 },
+ [NFTA_FIB_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NFTA_FIB_F_ALL),
};
EXPORT_SYMBOL(nft_fib_policy);
-#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
- NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
- NFTA_FIB_F_PRESENT)
-
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nft_data **data)
{
@@ -77,7 +78,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
- if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
+ if (priv->flags == 0)
return -EINVAL;
if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
@@ -150,7 +151,7 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv,
if (priv->flags & NFTA_FIB_F_PRESENT)
*dreg = !!dev;
else
- strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ);
break;
default:
WARN_ON_ONCE(1);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 29ac48cdd6db..870e5b113d13 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -90,7 +90,8 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
[NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 },
[NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
[NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
- [NFTA_LOOKUP_FLAGS] = { .type = NLA_U32 },
+ [NFTA_LOOKUP_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NFT_LOOKUP_F_INV),
};
static int nft_lookup_init(const struct nft_ctx *ctx,
@@ -120,9 +121,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (tb[NFTA_LOOKUP_FLAGS]) {
flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));
- if (flags & ~NFT_LOOKUP_F_INV)
- return -EINVAL;
-
if (flags & NFT_LOOKUP_F_INV)
priv->invert = true;
}
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index b115d77fbbc7..8a14aaca93bb 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -20,7 +20,8 @@ struct nft_masq {
};
static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
- [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+ [NFTA_MASQ_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
[NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 },
};
@@ -47,11 +48,8 @@ static int nft_masq_init(const struct nft_ctx *ctx,
struct nft_masq *priv = nft_expr_priv(expr);
int err;
- if (tb[NFTA_MASQ_FLAGS]) {
+ if (tb[NFTA_MASQ_FLAGS])
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
- }
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 8fdc7318c03c..f7da7c43333b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -185,12 +185,12 @@ static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key,
case NFT_META_IIFKIND:
if (!in || !in->rtnl_link_ops)
return false;
- strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
+ strscpy_pad((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
break;
case NFT_META_OIFKIND:
if (!out || !out->rtnl_link_ops)
return false;
- strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
+ strscpy_pad((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
break;
default:
return false;
@@ -206,7 +206,7 @@ static void nft_meta_store_ifindex(u32 *dest, const struct net_device *dev)
static void nft_meta_store_ifname(u32 *dest, const struct net_device *dev)
{
- strncpy((char *)dest, dev ? dev->name : "", IFNAMSIZ);
+ strscpy_pad((char *)dest, dev ? dev->name : "", IFNAMSIZ);
}
static bool nft_meta_store_iftype(u32 *dest, const struct net_device *dev)
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 5c29915ab028..583885ce7232 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -132,7 +132,8 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
[NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
- [NFTA_NAT_FLAGS] = { .type = NLA_U32 },
+ [NFTA_NAT_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
};
static int nft_nat_validate(const struct nft_ctx *ctx,
@@ -246,11 +247,8 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
- if (tb[NFTA_NAT_FLAGS]) {
+ if (tb[NFTA_NAT_FLAGS])
priv->flags |= ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EOPNOTSUPP;
- }
return nf_ct_netns_get(ctx->net, family);
}
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 70820c66b591..7f61506e5b44 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -23,7 +23,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct nft_osf *priv = nft_expr_priv(expr);
u32 *dest = &regs->data[priv->dreg];
struct sk_buff *skb = pkt->skb;
- char os_match[NFT_OSF_MAXGENRELEN + 1];
+ char os_match[NFT_OSF_MAXGENRELEN];
const struct tcphdr *tcp;
struct nf_osf_data data;
struct tcphdr _tcph;
@@ -45,7 +45,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
}
if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) {
- strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
+ strscpy_pad((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
} else {
if (priv->flags & NFT_OSF_F_VERSION)
snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s",
@@ -53,7 +53,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
strscpy(os_match, data.genre, NFT_OSF_MAXGENRELEN);
- strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
+ strscpy_pad((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
}
}
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index a70196ffcb1e..a58bd8d291ff 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -22,7 +22,8 @@ struct nft_redir {
static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
[NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 },
- [NFTA_REDIR_FLAGS] = { .type = NLA_U32 },
+ [NFTA_REDIR_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
};
static int nft_redir_validate(const struct nft_ctx *ctx,
@@ -68,11 +69,8 @@ static int nft_redir_init(const struct nft_ctx *ctx,
priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
- if (tb[NFTA_REDIR_FLAGS]) {
+ if (tb[NFTA_REDIR_FLAGS])
priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
- }
return nf_ct_netns_get(ctx->net, ctx->family);
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 470282cf3fae..21624d68314f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -768,7 +768,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
m->u.user.match_size = msize;
strscpy(name, match->name, sizeof(name));
module_put(match->me);
- strncpy(m->u.user.name, name, sizeof(m->u.user.name));
+ strscpy_pad(m->u.user.name, name, sizeof(m->u.user.name));
*size += off;
*dstptr += msize;
@@ -1148,7 +1148,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
t->u.user.target_size = tsize;
strscpy(name, target->name, sizeof(name));
module_put(target->me);
- strncpy(t->u.user.name, name, sizeof(t->u.user.name));
+ strscpy_pad(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
@@ -2014,4 +2014,3 @@ static void __exit xt_fini(void)
module_init(xt_init);
module_exit(xt_fini);
-
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 68ccbe50bb1e..5d1fb7018dba 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -29,7 +29,7 @@
if (tbl == NULL) \
return NULL; \
term = (struct type##_error *)&(((char *)tbl)[term_offset]); \
- strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
+ strscpy_pad(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
*term = (struct type##_error)typ2##_ERROR_INIT; \
tbl->repl.valid_hooks = hook_mask; \
tbl->repl.num_entries = nhooks + 1; \
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 85d7ecb05728..9518ab56ec98 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -149,7 +149,4 @@ enum {
/* NetLabel protocol functions */
int netlbl_cipsov4_genl_init(void);
-/* Free the memory associated with a CIPSOv4 DOI definition */
-void netlbl_cipsov4_doi_free(struct rcu_head *entry);
-
#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 383631873748..642b9d382fb4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -84,7 +84,7 @@ struct listeners {
static inline int netlink_is_kernel(struct sock *sk)
{
- return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
+ return nlk_test_bit(KERNEL_SOCKET, sk);
}
struct netlink_table *nl_table __read_mostly;
@@ -349,9 +349,7 @@ static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
static void netlink_overrun(struct sock *sk)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
+ if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
if (!test_and_set_bit(NETLINK_S_CONGESTED,
&nlk_sk(sk)->state)) {
sk->sk_err = ENOBUFS;
@@ -677,6 +675,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
struct netlink_sock *nlk;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release)(struct sock *sock, unsigned long *groups);
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -704,6 +703,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
cb_mutex = nl_table[protocol].cb_mutex;
bind = nl_table[protocol].bind;
unbind = nl_table[protocol].unbind;
+ release = nl_table[protocol].release;
netlink_unlock_table();
if (err < 0)
@@ -719,6 +719,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
nlk->module = module;
nlk->netlink_bind = bind;
nlk->netlink_unbind = unbind;
+ nlk->netlink_release = release;
out:
return err;
@@ -763,6 +764,8 @@ static int netlink_release(struct socket *sock)
* OK. Socket is unlinked, any packets that arrive now
* will be purged.
*/
+ if (nlk->netlink_release)
+ nlk->netlink_release(sk, nlk->groups);
/* must not acquire netlink_table_lock in any way again before unbind
* and notifying genetlink is done as otherwise it might deadlock
@@ -1402,9 +1405,7 @@ EXPORT_SYMBOL_GPL(netlink_has_listeners);
bool netlink_strict_get_check(struct sk_buff *skb)
{
- const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk);
-
- return nlk->flags & NETLINK_F_STRICT_CHK;
+ return nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
}
EXPORT_SYMBOL_GPL(netlink_strict_get_check);
@@ -1432,6 +1433,8 @@ struct netlink_broadcast_data {
int delivered;
gfp_t allocation;
struct sk_buff *skb, *skb2;
+ int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+ void *tx_data;
};
static void do_one_broadcast(struct sock *sk,
@@ -1448,7 +1451,7 @@ static void do_one_broadcast(struct sock *sk,
return;
if (!net_eq(sock_net(sk), p->net)) {
- if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
+ if (!nlk_test_bit(LISTEN_ALL_NSID, sk))
return;
if (!peernet_has_id(sock_net(sk), p->net))
@@ -1481,10 +1484,17 @@ static void do_one_broadcast(struct sock *sk,
netlink_overrun(sk);
/* Clone failed. Notify ALL listeners. */
p->failure = 1;
- if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
p->delivery_failure = 1;
goto out;
}
+
+ if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+ kfree_skb(p->skb2);
+ p->skb2 = NULL;
+ goto out;
+ }
+
if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
@@ -1496,7 +1506,7 @@ static void do_one_broadcast(struct sock *sk,
val = netlink_broadcast_deliver(sk, p->skb2);
if (val < 0) {
netlink_overrun(sk);
- if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
p->delivery_failure = 1;
} else {
p->congested |= val;
@@ -1507,8 +1517,12 @@ out:
sock_put(sk);
}
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
- u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
+ u32 portid,
+ u32 group, gfp_t allocation,
+ int (*filter)(struct sock *dsk,
+ struct sk_buff *skb, void *data),
+ void *filter_data)
{
struct net *net = sock_net(ssk);
struct netlink_broadcast_data info;
@@ -1527,6 +1541,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
info.allocation = allocation;
info.skb = skb;
info.skb2 = NULL;
+ info.tx_filter = filter;
+ info.tx_data = filter_data;
/* While we sleep in clone, do not allow to change socket list */
@@ -1552,6 +1568,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
}
return -ESRCH;
}
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
+ u32 group, gfp_t allocation)
+{
+ return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
+ NULL, NULL);
+}
EXPORT_SYMBOL(netlink_broadcast);
struct netlink_set_err_data {
@@ -1576,7 +1600,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
!test_bit(p->group - 1, nlk->groups))
goto out;
- if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
+ if (p->code == ENOBUFS && nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
ret = 1;
goto out;
}
@@ -1629,10 +1653,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk,
old = test_bit(group - 1, nlk->groups);
subscriptions = nlk->subscriptions - old + new;
- if (new)
- __set_bit(group - 1, nlk->groups);
- else
- __clear_bit(group - 1, nlk->groups);
+ __assign_bit(group - 1, nlk->groups, new);
netlink_update_subscriptions(&nlk->sk, subscriptions);
netlink_update_listeners(&nlk->sk);
}
@@ -1643,7 +1664,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
unsigned int val = 0;
- int err;
+ int nr = -1;
if (level != SOL_NETLINK)
return -ENOPROTOOPT;
@@ -1654,14 +1675,12 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case NETLINK_PKTINFO:
- if (val)
- nlk->flags |= NETLINK_F_RECV_PKTINFO;
- else
- nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
- err = 0;
+ nr = NETLINK_F_RECV_PKTINFO;
break;
case NETLINK_ADD_MEMBERSHIP:
case NETLINK_DROP_MEMBERSHIP: {
+ int err;
+
if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
@@ -1681,61 +1700,38 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
nlk->netlink_unbind(sock_net(sk), val);
- err = 0;
break;
}
case NETLINK_BROADCAST_ERROR:
- if (val)
- nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
- else
- nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
- err = 0;
+ nr = NETLINK_F_BROADCAST_SEND_ERROR;
break;
case NETLINK_NO_ENOBUFS:
+ assign_bit(NETLINK_F_RECV_NO_ENOBUFS, &nlk->flags, val);
if (val) {
- nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
clear_bit(NETLINK_S_CONGESTED, &nlk->state);
wake_up_interruptible(&nlk->wait);
- } else {
- nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
}
- err = 0;
break;
case NETLINK_LISTEN_ALL_NSID:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
return -EPERM;
-
- if (val)
- nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
- else
- nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
- err = 0;
+ nr = NETLINK_F_LISTEN_ALL_NSID;
break;
case NETLINK_CAP_ACK:
- if (val)
- nlk->flags |= NETLINK_F_CAP_ACK;
- else
- nlk->flags &= ~NETLINK_F_CAP_ACK;
- err = 0;
+ nr = NETLINK_F_CAP_ACK;
break;
case NETLINK_EXT_ACK:
- if (val)
- nlk->flags |= NETLINK_F_EXT_ACK;
- else
- nlk->flags &= ~NETLINK_F_EXT_ACK;
- err = 0;
+ nr = NETLINK_F_EXT_ACK;
break;
case NETLINK_GET_STRICT_CHK:
- if (val)
- nlk->flags |= NETLINK_F_STRICT_CHK;
- else
- nlk->flags &= ~NETLINK_F_STRICT_CHK;
- err = 0;
+ nr = NETLINK_F_STRICT_CHK;
break;
default:
- err = -ENOPROTOOPT;
+ return -ENOPROTOOPT;
}
- return err;
+ if (nr >= 0)
+ assign_bit(nr, &nlk->flags, val);
+ return 0;
}
static int netlink_getsockopt(struct socket *sock, int level, int optname,
@@ -1802,7 +1798,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & flag ? 1 : 0;
+ val = test_bit(flag, &nlk->flags);
if (put_user(len, optlen) ||
copy_to_user(optval, &val, len))
@@ -1979,9 +1975,9 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(*addr);
}
- if (nlk->flags & NETLINK_F_RECV_PKTINFO)
+ if (nlk_test_bit(RECV_PKTINFO, sk))
netlink_cmsg_recv_pktinfo(msg, skb);
- if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ if (nlk_test_bit(LISTEN_ALL_NSID, sk))
netlink_cmsg_listen_all_nsid(sk, msg, skb);
memset(&scm, 0, sizeof(scm));
@@ -2058,7 +2054,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
goto out_sock_release;
nlk = nlk_sk(sk);
- nlk->flags |= NETLINK_F_KERNEL_SOCKET;
+ set_bit(NETLINK_F_KERNEL_SOCKET, &nlk->flags);
netlink_table_grab();
if (!nl_table[unit].registered) {
@@ -2069,6 +2065,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (cfg) {
nl_table[unit].bind = cfg->bind;
nl_table[unit].unbind = cfg->unbind;
+ nl_table[unit].release = cfg->release;
nl_table[unit].flags = cfg->flags;
}
nl_table[unit].registered = 1;
@@ -2192,7 +2189,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
nl_dump_check_consistent(cb, nlh);
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));
- if (extack->_msg && nlk->flags & NETLINK_F_EXT_ACK) {
+ if (extack->_msg && test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg))
nlmsg_end(skb, nlh);
@@ -2321,8 +2318,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct netlink_dump_control *control)
{
- struct netlink_sock *nlk, *nlk2;
struct netlink_callback *cb;
+ struct netlink_sock *nlk;
struct sock *sk;
int ret;
@@ -2357,8 +2354,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->min_dump_alloc = control->min_dump_alloc;
cb->skb = skb;
- nlk2 = nlk_sk(NETLINK_CB(skb).sk);
- cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK);
+ cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
if (control->start) {
cb->extack = control->extack;
@@ -2402,7 +2398,7 @@ netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
{
size_t tlvlen;
- if (!extack || !(nlk->flags & NETLINK_F_EXT_ACK))
+ if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
return 0;
tlvlen = 0;
@@ -2474,7 +2470,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
* requests to cap the error message, and get extra error data if
* requested.
*/
- if (err && !(nlk->flags & NETLINK_F_CAP_ACK))
+ if (err && !test_bit(NETLINK_F_CAP_ACK, &nlk->flags))
payload += nlmsg_len(nlh);
else
flags |= NLM_F_CAPPED;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 90a3198a9b7f..2145979b9986 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -8,14 +8,16 @@
#include <net/sock.h>
/* flags */
-#define NETLINK_F_KERNEL_SOCKET 0x1
-#define NETLINK_F_RECV_PKTINFO 0x2
-#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_F_RECV_NO_ENOBUFS 0x8
-#define NETLINK_F_LISTEN_ALL_NSID 0x10
-#define NETLINK_F_CAP_ACK 0x20
-#define NETLINK_F_EXT_ACK 0x40
-#define NETLINK_F_STRICT_CHK 0x80
+enum {
+ NETLINK_F_KERNEL_SOCKET,
+ NETLINK_F_RECV_PKTINFO,
+ NETLINK_F_BROADCAST_SEND_ERROR,
+ NETLINK_F_RECV_NO_ENOBUFS,
+ NETLINK_F_LISTEN_ALL_NSID,
+ NETLINK_F_CAP_ACK,
+ NETLINK_F_EXT_ACK,
+ NETLINK_F_STRICT_CHK,
+};
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
@@ -23,10 +25,10 @@
struct netlink_sock {
/* struct sock has to be the first member of netlink_sock */
struct sock sk;
+ unsigned long flags;
u32 portid;
u32 dst_portid;
u32 dst_group;
- u32 flags;
u32 subscriptions;
u32 ngroups;
unsigned long *groups;
@@ -42,6 +44,8 @@ struct netlink_sock {
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
+ void (*netlink_release)(struct sock *sk,
+ unsigned long *groups);
struct module *module;
struct rhash_head node;
@@ -54,6 +58,8 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
return container_of(sk, struct netlink_sock, sk);
}
+#define nlk_test_bit(nr, sk) test_bit(NETLINK_F_##nr, &nlk_sk(sk)->flags)
+
struct netlink_table {
struct rhashtable hash;
struct hlist_head mc_list;
@@ -64,6 +70,8 @@ struct netlink_table {
struct module *module;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release)(struct sock *sk,
+ unsigned long *groups);
int registered;
};
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index e4f21b1067bc..9c4f231be275 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -27,15 +27,15 @@ static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb)
if (nlk->cb_running)
flags |= NDIAG_FLAG_CB_RUNNING;
- if (nlk->flags & NETLINK_F_RECV_PKTINFO)
+ if (nlk_test_bit(RECV_PKTINFO, sk))
flags |= NDIAG_FLAG_PKTINFO;
- if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
flags |= NDIAG_FLAG_BROADCAST_ERROR;
- if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)
+ if (nlk_test_bit(RECV_NO_ENOBUFS, sk))
flags |= NDIAG_FLAG_NO_ENOBUFS;
- if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ if (nlk_test_bit(LISTEN_ALL_NSID, sk))
flags |= NDIAG_FLAG_LISTEN_ALL_NSID;
- if (nlk->flags & NETLINK_F_CAP_ACK)
+ if (nlk_test_bit(CAP_ACK, sk))
flags |= NDIAG_FLAG_CAP_ACK;
return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a157247a1e45..8315d31b53db 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -52,6 +52,18 @@ static void genl_unlock_all(void)
up_write(&cb_lock);
}
+static void genl_op_lock(const struct genl_family *family)
+{
+ if (!family->parallel_ops)
+ genl_lock();
+}
+
+static void genl_op_unlock(const struct genl_family *family)
+{
+ if (!family->parallel_ops)
+ genl_unlock();
+}
+
static DEFINE_IDR(genl_fam_idr);
/*
@@ -593,8 +605,12 @@ static int genl_validate_ops(const struct genl_family *family)
return -EINVAL;
/* Check sort order */
- if (a->cmd < b->cmd)
+ if (a->cmd < b->cmd) {
continue;
+ } else if (a->cmd > b->cmd) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
if (a->internal_flags != b->internal_flags ||
((a->flags ^ b->flags) & ~(GENL_CMD_CAP_DO |
@@ -828,64 +844,63 @@ static int genl_start(struct netlink_callback *cb)
genl_family_rcv_msg_attrs_free(attrs);
return -ENOMEM;
}
- info->family = ctx->family;
info->op = *ops;
- info->attrs = attrs;
+ info->info.family = ctx->family;
+ info->info.snd_seq = cb->nlh->nlmsg_seq;
+ info->info.snd_portid = NETLINK_CB(cb->skb).portid;
+ info->info.nlhdr = cb->nlh;
+ info->info.genlhdr = nlmsg_data(cb->nlh);
+ info->info.attrs = attrs;
+ genl_info_net_set(&info->info, sock_net(cb->skb->sk));
+ info->info.extack = cb->extack;
+ memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr));
cb->data = info;
if (ops->start) {
- if (!ctx->family->parallel_ops)
- genl_lock();
+ genl_op_lock(ctx->family);
rc = ops->start(cb);
- if (!ctx->family->parallel_ops)
- genl_unlock();
+ genl_op_unlock(ctx->family);
}
if (rc) {
- genl_family_rcv_msg_attrs_free(info->attrs);
+ genl_family_rcv_msg_attrs_free(info->info.attrs);
genl_dumpit_info_free(info);
cb->data = NULL;
}
return rc;
}
-static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+static int genl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct genl_split_ops *ops = &genl_dumpit_info(cb)->op;
+ struct genl_dumpit_info *dump_info = cb->data;
+ const struct genl_split_ops *ops = &dump_info->op;
+ struct genl_info *info = &dump_info->info;
int rc;
- genl_lock();
+ info->extack = cb->extack;
+
+ genl_op_lock(info->family);
rc = ops->dumpit(skb, cb);
- genl_unlock();
+ genl_op_unlock(info->family);
return rc;
}
-static int genl_lock_done(struct netlink_callback *cb)
+static int genl_done(struct netlink_callback *cb)
{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- const struct genl_split_ops *ops = &info->op;
+ struct genl_dumpit_info *dump_info = cb->data;
+ const struct genl_split_ops *ops = &dump_info->op;
+ struct genl_info *info = &dump_info->info;
int rc = 0;
+ info->extack = cb->extack;
+
if (ops->done) {
- genl_lock();
+ genl_op_lock(info->family);
rc = ops->done(cb);
- genl_unlock();
+ genl_op_unlock(info->family);
}
genl_family_rcv_msg_attrs_free(info->attrs);
- genl_dumpit_info_free(info);
- return rc;
-}
-
-static int genl_parallel_done(struct netlink_callback *cb)
-{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- const struct genl_split_ops *ops = &info->op;
- int rc = 0;
-
- if (ops->done)
- rc = ops->done(cb);
- genl_family_rcv_msg_attrs_free(info->attrs);
- genl_dumpit_info_free(info);
+ genl_dumpit_info_free(dump_info);
return rc;
}
@@ -897,6 +912,14 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
int hdrlen, struct net *net)
{
struct genl_start_context ctx;
+ struct netlink_dump_control c = {
+ .module = family->module,
+ .data = &ctx,
+ .start = genl_start,
+ .dump = genl_dumpit,
+ .done = genl_done,
+ .extack = extack,
+ };
int err;
ctx.family = family;
@@ -905,31 +928,9 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
ctx.ops = ops;
ctx.hdrlen = hdrlen;
- if (!family->parallel_ops) {
- struct netlink_dump_control c = {
- .module = family->module,
- .data = &ctx,
- .start = genl_start,
- .dump = genl_lock_dumpit,
- .done = genl_lock_done,
- .extack = extack,
- };
-
- genl_unlock();
- err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- genl_lock();
- } else {
- struct netlink_dump_control c = {
- .module = family->module,
- .data = &ctx,
- .start = genl_start,
- .dump = ops->dumpit,
- .done = genl_parallel_done,
- .extack = extack,
- };
-
- err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- }
+ genl_op_unlock(family);
+ err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
+ genl_op_lock(family);
return err;
}
@@ -953,9 +954,9 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family,
info.snd_seq = nlh->nlmsg_seq;
info.snd_portid = NETLINK_CB(skb).portid;
+ info.family = family;
info.nlhdr = nlh;
info.genlhdr = nlmsg_data(nlh);
- info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = attrbuf;
info.extack = extack;
genl_info_net_set(&info, net);
@@ -1061,13 +1062,9 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (family == NULL)
return -ENOENT;
- if (!family->parallel_ops)
- genl_lock();
-
+ genl_op_lock(family);
err = genl_family_rcv_msg(family, skb, nlh, extack);
-
- if (!family->parallel_ops)
- genl_unlock();
+ genl_op_unlock(family);
return err;
}
@@ -1392,7 +1389,7 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
- struct nlattr **tb = info->attrs;
+ struct nlattr **tb = info->info.attrs;
const struct genl_family *rt;
struct genl_op_iter i;
int err;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index e9ac6a6f934e..aa1dbf654c3e 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -110,10 +110,10 @@ static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
struct nfc_dev *dev;
u32 idx;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->info.attrs[NFC_ATTR_DEVICE_INDEX])
return ERR_PTR(-EINVAL);
- idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+ idx = nla_get_u32(info->info.attrs[NFC_ATTR_DEVICE_INDEX]);
dev = nfc_get_device(idx);
if (!dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index cab1e02b63e0..fd66014d8a76 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -27,6 +27,7 @@
#include <net/sctp/checksum.h>
#include "datapath.h"
+#include "drop.h"
#include "flow.h"
#include "conntrack.h"
#include "vport.h"
@@ -781,7 +782,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk,
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
return -ENOMEM;
}
@@ -852,6 +853,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
struct sk_buff *skb, u16 mru,
struct sw_flow_key *key)
{
+ enum ovs_drop_reason reason;
u16 orig_network_offset = 0;
if (eth_p_mpls(skb->protocol)) {
@@ -861,6 +863,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
+ reason = OVS_DROP_FRAG_L2_TOO_LONG;
goto err;
}
@@ -901,12 +904,13 @@ static void ovs_fragment(struct net *net, struct vport *vport,
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
ovs_vport_name(vport), ntohs(key->eth.type), mru,
vport->dev->mtu);
+ reason = OVS_DROP_FRAG_INVALID_PROTO;
goto err;
}
return;
err:
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, reason);
}
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
@@ -933,10 +937,10 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
ovs_fragment(net, vport, skb, mru, key);
} else {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
}
} else {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY);
}
}
@@ -1010,7 +1014,7 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
return clone_execute(dp, skb, key, 0, nla_data(actions),
nla_len(actions), true, false);
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL);
return 0;
}
@@ -1036,7 +1040,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
if ((arg->probability != U32_MAX) &&
(!arg->probability || get_random_u32() > arg->probability)) {
if (last)
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
return 0;
}
@@ -1297,6 +1301,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (trace_ovs_do_execute_action_enabled())
trace_ovs_do_execute_action(dp, skb, key, a, rem);
+ /* Actions that rightfully have to consume the skb should do it
+ * and return directly.
+ */
switch (nla_type(a)) {
case OVS_ACTION_ATTR_OUTPUT: {
int port = nla_get_u32(a);
@@ -1332,6 +1339,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
output_userspace(dp, skb, key, a, attr,
len, OVS_CB(skb)->cutlen);
OVS_CB(skb)->cutlen = 0;
+ if (nla_is_last(a, rem)) {
+ consume_skb(skb);
+ return 0;
+ }
break;
case OVS_ACTION_ATTR_HASH:
@@ -1446,7 +1457,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_METER:
if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_METER);
return 0;
}
break;
@@ -1477,15 +1488,24 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return dec_ttl_exception_handler(dp, skb,
key, a);
break;
+
+ case OVS_ACTION_ATTR_DROP: {
+ enum ovs_drop_reason reason = nla_get_u32(a)
+ ? OVS_DROP_EXPLICIT_WITH_ERROR
+ : OVS_DROP_EXPLICIT;
+
+ ovs_kfree_skb_reason(skb, reason);
+ return 0;
+ }
}
if (unlikely(err)) {
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR);
return err;
}
}
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
return 0;
}
@@ -1547,7 +1567,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
/* Out of per CPU action FIFO space. Drop the 'skb' and
* log an error.
*/
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT);
if (net_ratelimit()) {
if (actions) { /* Sample action */
@@ -1599,7 +1619,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT);
err = -ENETDOWN;
goto out;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 331730fd3580..0b9a785dea45 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack_act_ct.h>
#include "datapath.h"
+#include "drop.h"
#include "conntrack.h"
#include "flow.h"
#include "flow_netlink.h"
@@ -455,45 +456,6 @@ static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
return 0;
}
-static struct nf_conntrack_expect *
-ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
- u16 proto, const struct sk_buff *skb)
-{
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_expect *exp;
-
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
- return NULL;
-
- exp = __nf_ct_expect_find(net, zone, &tuple);
- if (exp) {
- struct nf_conntrack_tuple_hash *h;
-
- /* Delete existing conntrack entry, if it clashes with the
- * expectation. This can happen since conntrack ALGs do not
- * check for clashes between (new) expectations and existing
- * conntrack entries. nf_conntrack_in() will check the
- * expectations only if a conntrack entry can not be found,
- * which can lead to OVS finding the expectation (here) in the
- * init direction, but which will not be removed by the
- * nf_conntrack_in() call, if a matching conntrack entry is
- * found instead. In this case all init direction packets
- * would be reported as new related packets, while reply
- * direction packets would be reported as un-related
- * established packets.
- */
- h = nf_conntrack_find_get(net, zone, &tuple);
- if (h) {
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
- nf_ct_delete(ct, 0, 0);
- nf_ct_put(ct);
- }
- }
-
- return exp;
-}
-
/* This replicates logic from nf_conntrack_core.c that is not exported. */
static enum ip_conntrack_info
ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
@@ -852,36 +814,16 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
- struct nf_conntrack_expect *exp;
-
- /* If we pass an expected packet through nf_conntrack_in() the
- * expectation is typically removed, but the packet could still be
- * lost in upcall processing. To prevent this from happening we
- * perform an explicit expectation lookup. Expected connections are
- * always new, and will be passed through conntrack only when they are
- * committed, as it is OK to remove the expectation at that time.
- */
- exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
- if (exp) {
- u8 state;
-
- /* NOTE: New connections are NATted and Helped only when
- * committed, so we are not calling into NAT here.
- */
- state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
- __ovs_ct_update_key(key, state, &info->zone, exp->master);
- } else {
- struct nf_conn *ct;
- int err;
+ struct nf_conn *ct;
+ int err;
- err = __ovs_ct_lookup(net, key, info, skb);
- if (err)
- return err;
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
- ct = (struct nf_conn *)skb_nfct(skb);
- if (ct)
- nf_ct_deliver_cached_events(ct);
- }
+ ct = (struct nf_conn *)skb_nfct(skb);
+ if (ct)
+ nf_ct_deliver_cached_events(ct);
return 0;
}
@@ -1094,7 +1036,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
skb_push_rcsum(skb, nh_ofs);
if (err)
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
return err;
}
@@ -1460,7 +1402,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err)
goto err_free_ct;
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ if (ct_info.commit)
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
return 0;
err_free_ct:
__ovs_ct_free_action(&ct_info);
@@ -1662,7 +1605,7 @@ static struct sk_buff *
ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sk_buff *skb;
skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 3d7a91e64c88..11c69415c605 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -41,6 +41,7 @@
#include <net/pkt_cls.h>
#include "datapath.h"
+#include "drop.h"
#include "flow.h"
#include "flow_table.h"
#include "flow_netlink.h"
@@ -589,7 +590,7 @@ out:
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct sw_flow_actions *acts;
@@ -966,7 +967,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sw_flow *flow = NULL, *new_flow;
struct sw_flow_mask mask;
struct sk_buff *reply;
@@ -1213,7 +1214,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sw_flow_key key;
struct sw_flow *flow;
struct sk_buff *reply = NULL;
@@ -1314,7 +1315,7 @@ error:
static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
@@ -1373,7 +1374,7 @@ unlock:
static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
@@ -1641,7 +1642,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
{
struct datapath *dp;
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
info->attrs);
if (IS_ERR(dp))
return;
@@ -1934,7 +1935,8 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
goto err_unlock_free;
@@ -1967,7 +1969,8 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
goto err_unlock_free;
@@ -2002,7 +2005,8 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
if (IS_ERR(dp)) {
err = PTR_ERR(dp);
goto err_unlock_free;
@@ -2245,7 +2249,7 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct vport_parms parms;
struct sk_buff *reply;
struct vport *vport;
@@ -2347,7 +2351,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
@@ -2403,7 +2407,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
@@ -2446,7 +2450,7 @@ exit_unlock_free:
static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sk_buff *reply;
struct vport *vport;
int err;
@@ -2702,6 +2706,17 @@ static struct pernet_operations ovs_net_ops = {
.size = sizeof(struct ovs_net),
};
+static const char * const ovs_drop_reasons[] = {
+#define S(x) (#x),
+ OVS_DROP_REASONS(S)
+#undef S
+};
+
+static struct drop_reason_list drop_reason_list_ovs = {
+ .reasons = ovs_drop_reasons,
+ .n_reasons = ARRAY_SIZE(ovs_drop_reasons),
+};
+
static int __init dp_init(void)
{
int err;
@@ -2743,6 +2758,9 @@ static int __init dp_init(void)
if (err < 0)
goto error_unreg_netdev;
+ drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
+ &drop_reason_list_ovs);
+
return 0;
error_unreg_netdev:
@@ -2769,6 +2787,7 @@ static void dp_cleanup(void)
ovs_netdev_exit();
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
+ drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
rcu_barrier();
ovs_vport_exit();
ovs_flow_exit();
diff --git a/net/openvswitch/drop.h b/net/openvswitch/drop.h
new file mode 100644
index 000000000000..cedf9b7b5796
--- /dev/null
+++ b/net/openvswitch/drop.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * OpenvSwitch drop reason list.
+ */
+
+#ifndef OPENVSWITCH_DROP_H
+#define OPENVSWITCH_DROP_H
+#include <linux/skbuff.h>
+#include <net/dropreason.h>
+
+#define OVS_DROP_REASONS(R) \
+ R(OVS_DROP_LAST_ACTION) \
+ R(OVS_DROP_ACTION_ERROR) \
+ R(OVS_DROP_EXPLICIT) \
+ R(OVS_DROP_EXPLICIT_WITH_ERROR) \
+ R(OVS_DROP_METER) \
+ R(OVS_DROP_RECURSION_LIMIT) \
+ R(OVS_DROP_DEFERRED_LIMIT) \
+ R(OVS_DROP_FRAG_L2_TOO_LONG) \
+ R(OVS_DROP_FRAG_INVALID_PROTO) \
+ R(OVS_DROP_CONNTRACK) \
+ R(OVS_DROP_IP_TTL) \
+ /* deliberate comment for trailing \ */
+
+enum ovs_drop_reason {
+ __OVS_DROP_REASON = SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
+ SKB_DROP_REASON_SUBSYS_SHIFT,
+#define ENUM(x) x,
+ OVS_DROP_REASONS(ENUM)
+#undef ENUM
+
+ OVS_DROP_MAX,
+};
+
+static inline void
+ovs_kfree_skb_reason(struct sk_buff *skb, enum ovs_drop_reason reason)
+{
+ kfree_skb_reason(skb, (u32)reason);
+}
+
+#endif /* OPENVSWITCH_DROP_H */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 41116361433d..88965e2068ac 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -38,6 +38,7 @@
#include <net/tun_proto.h>
#include <net/erspan.h>
+#include "drop.h"
#include "flow_netlink.h"
struct ovs_len_tbl {
@@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
+ case OVS_ACTION_ATTR_DROP:
break;
case OVS_ACTION_ATTR_CT:
@@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
/* Whenever new actions are added, the need to update this
* function should be considered.
*/
- BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
if (!actions)
return;
@@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
+ [OVS_ACTION_ATTR_DROP] = sizeof(u32),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3453,6 +3456,11 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_DROP:
+ if (!nla_is_last(a, rem))
+ return -EINVAL;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index c4ebf810e4b1..cc08e0403909 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -211,7 +211,7 @@ ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
struct sk_buff *skb;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
@@ -272,7 +272,7 @@ error:
static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr *nla, *band_nla;
struct sk_buff *reply;
@@ -409,7 +409,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct dp_meter *meter, *old_meter;
struct sk_buff *reply;
struct ovs_header *ovs_reply_header;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct dp_meter_table *meter_tbl;
struct datapath *dp;
int err;
@@ -482,7 +482,7 @@ exit_free_meter:
static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *meter;
@@ -535,7 +535,7 @@ exit_unlock:
static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *old_meter;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a2935bd18ed9..8f97648d652f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2931,8 +2931,10 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
if (prepad + len < PAGE_SIZE || !linear)
linear = len;
+ if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
- err, 0);
+ err, PAGE_ALLOC_COSTLY_ORDER);
if (!skb)
return NULL;
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index 78beb74146e7..41ece61eb57a 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -23,6 +23,8 @@
#define QRTR_EPH_PORT_RANGE \
XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
+#define QRTR_PORT_CTRL_LEGACY 0xffff
+
/**
* struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
* @version: protocol version
@@ -495,6 +497,9 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
goto err;
}
+ if (cb->dst_port == QRTR_PORT_CTRL_LEGACY)
+ cb->dst_port = QRTR_PORT_CTRL;
+
if (!size || len != ALIGN(size, 4) + hdrlen)
goto err;
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 0f7a729f1a1f..b1db0b519179 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -16,7 +16,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/qrtr.h>
-static RADIX_TREE(nodes, GFP_KERNEL);
+static DEFINE_XARRAY(nodes);
static struct {
struct socket *sock;
@@ -66,14 +66,14 @@ struct qrtr_server {
struct qrtr_node {
unsigned int id;
- struct radix_tree_root servers;
+ struct xarray servers;
};
static struct qrtr_node *node_get(unsigned int node_id)
{
struct qrtr_node *node;
- node = radix_tree_lookup(&nodes, node_id);
+ node = xa_load(&nodes, node_id);
if (node)
return node;
@@ -83,8 +83,9 @@ static struct qrtr_node *node_get(unsigned int node_id)
return NULL;
node->id = node_id;
+ xa_init(&node->servers);
- if (radix_tree_insert(&nodes, node_id, node)) {
+ if (xa_store(&nodes, node_id, node, GFP_KERNEL)) {
kfree(node);
return NULL;
}
@@ -193,40 +194,23 @@ static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv,
static int announce_servers(struct sockaddr_qrtr *sq)
{
- struct radix_tree_iter iter;
struct qrtr_server *srv;
struct qrtr_node *node;
- void __rcu **slot;
+ unsigned long index;
int ret;
node = node_get(qrtr_ns.local_node);
if (!node)
return 0;
- rcu_read_lock();
/* Announce the list of servers registered in this node */
- radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&node->servers, index, srv) {
ret = service_announce_new(sq, srv);
if (ret < 0) {
pr_err("failed to announce new service\n");
return ret;
}
-
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -256,14 +240,17 @@ static struct qrtr_server *server_add(unsigned int service,
goto err;
/* Delete the old server on the same port */
- old = radix_tree_lookup(&node->servers, port);
+ old = xa_store(&node->servers, port, srv, GFP_KERNEL);
if (old) {
- radix_tree_delete(&node->servers, port);
- kfree(old);
+ if (xa_is_err(old)) {
+ pr_err("failed to add server [0x%x:0x%x] ret:%d\n",
+ srv->service, srv->instance, xa_err(old));
+ goto err;
+ } else {
+ kfree(old);
+ }
}
- radix_tree_insert(&node->servers, port, srv);
-
trace_qrtr_ns_server_add(srv->service, srv->instance,
srv->node, srv->port);
@@ -280,11 +267,11 @@ static int server_del(struct qrtr_node *node, unsigned int port, bool bcast)
struct qrtr_server *srv;
struct list_head *li;
- srv = radix_tree_lookup(&node->servers, port);
+ srv = xa_load(&node->servers, port);
if (!srv)
return -ENOENT;
- radix_tree_delete(&node->servers, port);
+ xa_erase(&node->servers, port);
/* Broadcast the removal of local servers */
if (srv->node == qrtr_ns.local_node && bcast)
@@ -344,13 +331,12 @@ static int ctrl_cmd_hello(struct sockaddr_qrtr *sq)
static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
{
struct qrtr_node *local_node;
- struct radix_tree_iter iter;
struct qrtr_ctrl_pkt pkt;
struct qrtr_server *srv;
struct sockaddr_qrtr sq;
struct msghdr msg = { };
struct qrtr_node *node;
- void __rcu **slot;
+ unsigned long index;
struct kvec iv;
int ret;
@@ -361,22 +347,9 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
if (!node)
return 0;
- rcu_read_lock();
/* Advertise removal of this client to all servers of remote node */
- radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
+ xa_for_each(&node->servers, index, srv)
server_del(node, srv->port, true);
- rcu_read_lock();
- }
- rcu_read_unlock();
/* Advertise the removal of this client to all local servers */
local_node = node_get(qrtr_ns.local_node);
@@ -387,18 +360,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE);
pkt.client.node = cpu_to_le32(from->sq_node);
- rcu_read_lock();
- radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&local_node->servers, index, srv) {
sq.sq_family = AF_QIPCRTR;
sq.sq_node = srv->node;
sq.sq_port = srv->port;
@@ -411,11 +373,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
pr_err("failed to send bye cmd\n");
return ret;
}
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -423,7 +381,6 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
unsigned int node_id, unsigned int port)
{
struct qrtr_node *local_node;
- struct radix_tree_iter iter;
struct qrtr_lookup *lookup;
struct qrtr_ctrl_pkt pkt;
struct msghdr msg = { };
@@ -432,7 +389,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
struct qrtr_node *node;
struct list_head *tmp;
struct list_head *li;
- void __rcu **slot;
+ unsigned long index;
struct kvec iv;
int ret;
@@ -477,18 +434,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
pkt.client.node = cpu_to_le32(node_id);
pkt.client.port = cpu_to_le32(port);
- rcu_read_lock();
- radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&local_node->servers, index, srv) {
sq.sq_family = AF_QIPCRTR;
sq.sq_node = srv->node;
sq.sq_port = srv->port;
@@ -501,11 +447,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
pr_err("failed to send del client cmd\n");
return ret;
}
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -576,13 +518,12 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from,
static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
unsigned int service, unsigned int instance)
{
- struct radix_tree_iter node_iter;
struct qrtr_server_filter filter;
- struct radix_tree_iter srv_iter;
struct qrtr_lookup *lookup;
+ struct qrtr_server *srv;
struct qrtr_node *node;
- void __rcu **node_slot;
- void __rcu **srv_slot;
+ unsigned long node_idx;
+ unsigned long srv_idx;
/* Accept only local observers */
if (from->sq_node != qrtr_ns.local_node)
@@ -601,40 +542,14 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
filter.service = service;
filter.instance = instance;
- rcu_read_lock();
- radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) {
- node = radix_tree_deref_slot(node_slot);
- if (!node)
- continue;
- if (radix_tree_deref_retry(node)) {
- node_slot = radix_tree_iter_retry(&node_iter);
- continue;
- }
- node_slot = radix_tree_iter_resume(node_slot, &node_iter);
-
- radix_tree_for_each_slot(srv_slot, &node->servers,
- &srv_iter, 0) {
- struct qrtr_server *srv;
-
- srv = radix_tree_deref_slot(srv_slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- srv_slot = radix_tree_iter_retry(&srv_iter);
- continue;
- }
-
+ xa_for_each(&nodes, node_idx, node) {
+ xa_for_each(&node->servers, srv_idx, srv) {
if (!server_match(srv, &filter))
continue;
- srv_slot = radix_tree_iter_resume(srv_slot, &srv_iter);
-
- rcu_read_unlock();
lookup_notify(from, srv, true);
- rcu_read_lock();
}
}
- rcu_read_unlock();
/* Empty notification, to indicate end of listing */
lookup_notify(from, NULL, true);
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index ca4c3a667091..d2fdb1529585 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -17,7 +17,6 @@
*/
#define RDS_RDMA_REJ_INCOMPAT 1
-int rds_rdma_conn_connect(struct rds_connection *conn);
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
diff --git a/net/rds/rds.h b/net/rds/rds.h
index d35d1fc39807..dc360252c515 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -863,7 +863,6 @@ int rds_message_next_extension(struct rds_header *hdr,
unsigned int *pos, void *buf, unsigned int *buflen);
int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
-void rds_message_inc_free(struct rds_incoming *inc);
void rds_message_addref(struct rds_message *rm);
void rds_message_put(struct rds_message *rm);
void rds_message_wait(struct rds_message *rm);
@@ -1013,7 +1012,5 @@ void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
struct rds_transport *rds_trans_get(int t_type);
-int rds_trans_init(void);
-void rds_trans_exit(void);
#endif
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index f8b5930d7b34..053aa7da87ef 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -56,7 +56,6 @@ void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc);
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc);
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
-u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
void rds_tcp_accept_work(struct sock *sk);
int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 4b95cb1ac435..470c70deffe2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -347,8 +347,7 @@ config NET_SCH_FQ_PIE
config NET_SCH_INGRESS
tristate "Ingress/classifier-action Qdisc"
depends on NET_CLS_ACT
- select NET_INGRESS
- select NET_EGRESS
+ select NET_XGRESS
help
Say Y here if you want to use classifiers for incoming and/or outgoing
packets. This qdisc doesn't do anything else besides running classifiers,
@@ -679,6 +678,7 @@ config NET_EMATCH_IPT
config NET_CLS_ACT
bool "Actions"
select NET_CLS
+ select NET_XGRESS
help
Say Y here if you want to use traffic control actions. Actions
get attached to classifiers and are invoked after a successful
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index abc71a06d634..7c652d14528b 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1238,7 +1238,8 @@ static int tcf_ct_fill_params(struct net *net,
}
}
- __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+ if (p->ct_action & TCA_CT_ACT_COMMIT)
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0;
err:
nf_ct_put(p->tmpl);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9f0711da9c95..e5314a31f75a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -72,6 +72,7 @@ struct fl_flow_key {
struct flow_dissector_key_num_of_vlans num_of_vlans;
struct flow_dissector_key_pppoe pppoe;
struct flow_dissector_key_l2tpv3 l2tpv3;
+ struct flow_dissector_key_ipsec ipsec;
struct flow_dissector_key_cfm cfm;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
@@ -726,6 +727,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_SPI] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 },
[TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1),
[TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED },
};
@@ -796,6 +799,24 @@ static void fl_set_key_val(struct nlattr **tb,
nla_memcpy(mask, tb[mask_type], len);
}
+static int fl_set_key_spi(struct nlattr **tb, struct fl_flow_key *key,
+ struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
+{
+ if (key->basic.ip_proto != IPPROTO_ESP &&
+ key->basic.ip_proto != IPPROTO_AH) {
+ NL_SET_ERR_MSG(extack,
+ "Protocol must be either ESP or AH");
+ return -EINVAL;
+ }
+
+ fl_set_key_val(tb, &key->ipsec.spi,
+ TCA_FLOWER_KEY_SPI,
+ &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK,
+ sizeof(key->ipsec.spi));
+ return 0;
+}
+
static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -1895,6 +1916,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
return ret;
}
+ if (tb[TCA_FLOWER_KEY_SPI]) {
+ ret = fl_set_key_spi(tb, key, mask, extack);
+ if (ret)
+ return ret;
+ }
+
if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -2068,6 +2095,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPSEC, ipsec);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_CFM, cfm);
skb_flow_dissector_init(dissector, keys, cnt);
@@ -3365,6 +3394,12 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
sizeof(key->l2tpv3.session_id)))
goto nla_put_failure;
+ if (key->ipsec.spi &&
+ fl_dump_key_val(skb, &key->ipsec.spi, TCA_FLOWER_KEY_SPI,
+ &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK,
+ sizeof(key->ipsec.spi)))
+ goto nla_put_failure;
+
if ((key->basic.ip_proto == IPPROTO_TCP ||
key->basic.ip_proto == IPPROTO_UDP ||
key->basic.ip_proto == IPPROTO_SCTP) &&
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 6fdba069f6bf..da34fd4c9269 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -502,7 +502,7 @@ META_COLLECTOR(int_sk_lingertime)
*err = -1;
return;
}
- dst->value = sk->sk_lingertime / HZ;
+ dst->value = READ_ONCE(sk->sk_lingertime) / HZ;
}
META_COLLECTOR(int_sk_err_qlen)
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e35a4e90f4e6..19901e77cd3b 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -17,7 +17,6 @@
struct drr_class {
struct Qdisc_class_common common;
- unsigned int filter_cnt;
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
@@ -150,8 +149,10 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg,
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl = (struct drr_class *)arg;
- if (cl->filter_cnt > 0)
+ if (qdisc_class_in_use(&cl->common)) {
+ NL_SET_ERR_MSG(extack, "DRR class is in use");
return -EBUSY;
+ }
sch_tree_lock(sch);
@@ -187,8 +188,8 @@ static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
{
struct drr_class *cl = drr_find_class(sch, classid);
- if (cl != NULL)
- cl->filter_cnt++;
+ if (cl)
+ qdisc_class_get(&cl->common);
return (unsigned long)cl;
}
@@ -197,7 +198,7 @@ static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
{
struct drr_class *cl = (struct drr_class *)arg;
- cl->filter_cnt--;
+ qdisc_class_put(&cl->common);
}
static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 70b0c5873d32..98805303218d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,7 +116,6 @@ struct hfsc_class {
struct net_rate_estimator __rcu *rate_est;
struct tcf_proto __rcu *filter_list; /* filter list */
struct tcf_block *block;
- unsigned int filter_cnt; /* filter count */
unsigned int level; /* class level in hierarchy */
struct hfsc_sched *sched; /* scheduler data */
@@ -1094,8 +1093,11 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg,
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl = (struct hfsc_class *)arg;
- if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root)
+ if (cl->level > 0 || qdisc_class_in_use(&cl->cl_common) ||
+ cl == &q->root) {
+ NL_SET_ERR_MSG(extack, "HFSC class in use");
return -EBUSY;
+ }
sch_tree_lock(sch);
@@ -1223,7 +1225,7 @@ hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid)
if (cl != NULL) {
if (p != NULL && p->level <= cl->level)
return 0;
- cl->filter_cnt++;
+ qdisc_class_get(&cl->cl_common);
}
return (unsigned long)cl;
@@ -1234,7 +1236,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
- cl->filter_cnt--;
+ qdisc_class_put(&cl->cl_common);
}
static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 325c29041c7d..0d947414e616 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -102,7 +102,6 @@ struct htb_class {
struct tcf_proto __rcu *filter_list; /* class attached filters */
struct tcf_block *block;
- int filter_cnt;
int level; /* our level (see above) */
unsigned int children;
@@ -1710,8 +1709,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
* tc subsys guarantee us that in htb_destroy it holds no class
* refs so that we can remove children safely there ?
*/
- if (cl->children || cl->filter_cnt)
+ if (cl->children || qdisc_class_in_use(&cl->common)) {
+ NL_SET_ERR_MSG(extack, "HTB class in use");
return -EBUSY;
+ }
if (!cl->level && htb_parent_last_child(cl))
last_child = 1;
@@ -1810,10 +1811,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
goto failure;
}
- if (hopt->quantum) {
- NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
- goto failure;
- }
}
/* Keeping backward compatible with rate_table based iproute2 tc */
@@ -1910,6 +1907,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -1931,6 +1929,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -2017,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -2108,7 +2108,7 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
* be broken by class during destroy IIUC.
*/
if (cl)
- cl->filter_cnt++;
+ qdisc_class_get(&cl->common);
return (unsigned long)cl;
}
@@ -2116,8 +2116,7 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
{
struct htb_class *cl = (struct htb_class *)arg;
- if (cl)
- cl->filter_cnt--;
+ qdisc_class_put(&cl->common);
}
static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index e43a45499372..a463a63192c3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -13,6 +13,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tcx.h>
struct ingress_sched_data {
struct tcf_block *block;
@@ -78,6 +79,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry;
+ bool created;
int err;
if (sch->parent != TC_H_INGRESS)
@@ -85,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
net_inc_ingress_queue();
- mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
+ entry = tcx_entry_fetch_or_create(dev, true, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, true);
q->block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->block_info.chain_head_change = clsact_chain_head_change;
@@ -103,11 +112,22 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry = rtnl_dereference(dev->tcx_ingress);
if (sch->parent != TC_H_INGRESS)
return;
tcf_block_put_ext(q->block, sch, &q->block_info);
+
+ if (entry) {
+ tcx_miniq_set_active(entry, false);
+ if (!tcx_entry_is_active(entry)) {
+ tcx_entry_update(dev, NULL, true);
+ tcx_entry_free(entry);
+ }
+ }
+
net_dec_ingress_queue();
}
@@ -223,6 +243,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
{
struct clsact_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry;
+ bool created;
int err;
if (sch->parent != TC_H_CLSACT)
@@ -231,7 +253,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
net_inc_ingress_queue();
net_inc_egress_queue();
- mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
+ entry = tcx_entry_fetch_or_create(dev, true, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, true);
q->ingress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->ingress_block_info.chain_head_change = clsact_chain_head_change;
@@ -244,7 +272,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block);
- mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
+ entry = tcx_entry_fetch_or_create(dev, false, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, false);
q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
q->egress_block_info.chain_head_change = clsact_chain_head_change;
@@ -256,12 +290,31 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *ingress_entry = rtnl_dereference(dev->tcx_ingress);
+ struct bpf_mprog_entry *egress_entry = rtnl_dereference(dev->tcx_egress);
if (sch->parent != TC_H_CLSACT)
return;
- tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
+ tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
+
+ if (ingress_entry) {
+ tcx_miniq_set_active(ingress_entry, false);
+ if (!tcx_entry_is_active(ingress_entry)) {
+ tcx_entry_update(dev, NULL, true);
+ tcx_entry_free(ingress_entry);
+ }
+ }
+
+ if (egress_entry) {
+ tcx_miniq_set_active(egress_entry, false);
+ if (!tcx_entry_is_active(egress_entry)) {
+ tcx_entry_update(dev, NULL, false);
+ tcx_entry_free(egress_entry);
+ }
+ }
net_dec_ingress_queue();
net_dec_egress_queue();
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 38d9aa0cd30e..4ad39a4a3cf5 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -105,6 +105,11 @@ struct netem_sched_data {
u32 rho;
} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
+ struct prng {
+ u64 seed;
+ struct rnd_state prng_state;
+ } prng;
+
struct disttable *delay_dist;
enum {
@@ -179,15 +184,16 @@ static void init_crandom(struct crndstate *state, unsigned long rho)
* Next number depends on last value.
* rho is scaled to avoid floating point.
*/
-static u32 get_crandom(struct crndstate *state)
+static u32 get_crandom(struct crndstate *state, struct prng *p)
{
u64 value, rho;
unsigned long answer;
+ struct rnd_state *s = &p->prng_state;
if (!state || state->rho == 0) /* no correlation */
- return get_random_u32();
+ return prandom_u32_state(s);
- value = get_random_u32();
+ value = prandom_u32_state(s);
rho = (u64)state->rho + 1;
answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
state->last = answer;
@@ -201,7 +207,7 @@ static u32 get_crandom(struct crndstate *state)
static bool loss_4state(struct netem_sched_data *q)
{
struct clgstate *clg = &q->clg;
- u32 rnd = get_random_u32();
+ u32 rnd = prandom_u32_state(&q->prng.prng_state);
/*
* Makes a comparison between rnd and the transition
@@ -266,18 +272,19 @@ static bool loss_4state(struct netem_sched_data *q)
static bool loss_gilb_ell(struct netem_sched_data *q)
{
struct clgstate *clg = &q->clg;
+ struct rnd_state *s = &q->prng.prng_state;
switch (clg->state) {
case GOOD_STATE:
- if (get_random_u32() < clg->a1)
+ if (prandom_u32_state(s) < clg->a1)
clg->state = BAD_STATE;
- if (get_random_u32() < clg->a4)
+ if (prandom_u32_state(s) < clg->a4)
return true;
break;
case BAD_STATE:
- if (get_random_u32() < clg->a2)
+ if (prandom_u32_state(s) < clg->a2)
clg->state = GOOD_STATE;
- if (get_random_u32() > clg->a3)
+ if (prandom_u32_state(s) > clg->a3)
return true;
}
@@ -289,7 +296,7 @@ static bool loss_event(struct netem_sched_data *q)
switch (q->loss_model) {
case CLG_RANDOM:
/* Random packet drop 0 => none, ~0 => all */
- return q->loss && q->loss >= get_crandom(&q->loss_cor);
+ return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng);
case CLG_4_STATES:
/* 4state loss model algorithm (used also for GI model)
@@ -318,6 +325,7 @@ static bool loss_event(struct netem_sched_data *q)
*/
static s64 tabledist(s64 mu, s32 sigma,
struct crndstate *state,
+ struct prng *prng,
const struct disttable *dist)
{
s64 x;
@@ -327,7 +335,7 @@ static s64 tabledist(s64 mu, s32 sigma,
if (sigma == 0)
return mu;
- rnd = get_crandom(state);
+ rnd = get_crandom(state, prng);
/* default uniform distribution */
if (dist == NULL)
@@ -449,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
skb->prev = NULL;
/* Random duplication */
- if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng))
++count;
/* Drop packet? */
@@ -492,7 +500,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* If packet is going to be hardware checksummed, then
* do it now in software before we mangle it.
*/
- if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) {
if (skb_is_gso(skb)) {
skb = netem_segment(skb, sch, to_free);
if (!skb)
@@ -530,12 +538,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
cb = netem_skb_cb(skb);
if (q->gap == 0 || /* not doing reordering */
q->counter < q->gap - 1 || /* inside last reordering gap */
- q->reorder < get_crandom(&q->reorder_cor)) {
+ q->reorder < get_crandom(&q->reorder_cor, &q->prng)) {
u64 now;
s64 delay;
delay = tabledist(q->latency, q->jitter,
- &q->delay_cor, q->delay_dist);
+ &q->delay_cor, &q->prng, q->delay_dist);
now = ktime_get_ns();
@@ -639,7 +647,7 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
else
next_delay = tabledist(q->slot_config.dist_delay,
(s32)(q->slot_config.dist_jitter),
- NULL, q->slot_dist);
+ NULL, &q->prng, q->slot_dist);
q->slot.slot_next = now + next_delay;
q->slot.packets_left = q->slot_config.max_packets;
@@ -922,6 +930,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
[TCA_NETEM_JITTER64] = { .type = NLA_S64 },
[TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
+ [TCA_NETEM_PRNG_SEED] = { .type = NLA_U64 },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -1040,6 +1049,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
/* capping jitter to the range acceptable by tabledist() */
q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
+ if (tb[TCA_NETEM_PRNG_SEED])
+ q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
+ else
+ q->prng.seed = get_random_u64();
+ prandom_seed_state(&q->prng.prng_state, q->prng.seed);
+
unlock:
sch_tree_unlock(sch);
@@ -1203,6 +1218,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
}
+ if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed,
+ TCA_NETEM_PAD))
+ goto nla_put_failure;
+
return nla_nest_end(skb, nla);
nla_put_failure:
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index befaf74b33ca..1a25752f1a9a 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -130,8 +130,6 @@ struct qfq_aggregate;
struct qfq_class {
struct Qdisc_class_common common;
- unsigned int filter_cnt;
-
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
struct net_rate_estimator __rcu *rate_est;
@@ -545,8 +543,10 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)arg;
- if (cl->filter_cnt > 0)
+ if (qdisc_class_in_use(&cl->common)) {
+ NL_SET_ERR_MSG_MOD(extack, "QFQ class in use");
return -EBUSY;
+ }
sch_tree_lock(sch);
@@ -580,8 +580,8 @@ static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
{
struct qfq_class *cl = qfq_find_class(sch, classid);
- if (cl != NULL)
- cl->filter_cnt++;
+ if (cl)
+ qdisc_class_get(&cl->common);
return (unsigned long)cl;
}
@@ -590,7 +590,7 @@ static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
{
struct qfq_class *cl = (struct qfq_class *)arg;
- cl->filter_cnt--;
+ qdisc_class_put(&cl->common);
}
static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8c9cfff7fd05..1cb5e41c0ec7 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -2099,11 +2099,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
return -EOPNOTSUPP;
}
- /* pre-allocate qdisc, attachment can't fail */
- q->qdiscs = kcalloc(dev->num_tx_queues,
- sizeof(q->qdiscs[0]),
+ q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]),
GFP_KERNEL);
-
if (!q->qdiscs)
return -ENOMEM;
@@ -2145,25 +2142,32 @@ static void taprio_attach(struct Qdisc *sch)
/* Attach underlying qdisc */
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
- struct Qdisc *qdisc = q->qdiscs[ntx];
- struct Qdisc *old;
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+ struct Qdisc *old, *dev_queue_qdisc;
if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ struct Qdisc *qdisc = q->qdiscs[ntx];
+
+ /* In offload mode, the root taprio qdisc is bypassed
+ * and the netdev TX queues see the children directly
+ */
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
- old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+ dev_queue_qdisc = qdisc;
} else {
- old = dev_graft_qdisc(qdisc->dev_queue, sch);
- qdisc_refcount_inc(sch);
+ /* In software mode, attach the root taprio qdisc
+ * to all netdev TX queues, so that dev_qdisc_enqueue()
+ * goes through taprio_enqueue().
+ */
+ dev_queue_qdisc = sch;
}
+ old = dev_graft_qdisc(dev_queue, dev_queue_qdisc);
+ /* The qdisc's refcount requires to be elevated once
+ * for each netdev TX queue it is grafted onto
+ */
+ qdisc_refcount_inc(dev_queue_qdisc);
if (old)
qdisc_put(old);
}
-
- /* access to the child qdiscs is not needed in offload mode */
- if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
- kfree(q->qdiscs);
- q->qdiscs = NULL;
- }
}
static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
@@ -2192,13 +2196,23 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl,
if (dev->flags & IFF_UP)
dev_deactivate(dev);
+ /* In offload mode, the child Qdisc is directly attached to the netdev
+ * TX queue, and thus, we need to keep its refcount elevated in order
+ * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue.
+ * However, save the reference to the new qdisc in the private array in
+ * both software and offload cases, to have an up-to-date reference to
+ * our children.
+ */
+ *old = q->qdiscs[cl - 1];
if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
- *old = dev_graft_qdisc(dev_queue, new);
- } else {
- *old = q->qdiscs[cl - 1];
- q->qdiscs[cl - 1] = new;
+ WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old);
+ if (new)
+ qdisc_refcount_inc(new);
+ if (*old)
+ qdisc_put(*old);
}
+ q->qdiscs[cl - 1] = new;
if (new)
new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
@@ -2436,12 +2450,14 @@ start_error:
static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned int ntx = cl - 1;
- if (!dev_queue)
+ if (ntx >= dev->num_tx_queues)
return NULL;
- return rtnl_dereference(dev_queue->qdisc_sleeping);
+ return q->qdiscs[ntx];
}
static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
@@ -2456,11 +2472,11 @@ static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
struct sk_buff *skb, struct tcmsg *tcm)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct Qdisc *child = taprio_leaf(sch, cl);
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle |= TC_H_MIN(cl);
- tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
+ tcm->tcm_info = child->handle;
return 0;
}
@@ -2470,16 +2486,14 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
__releases(d->lock)
__acquires(d->lock)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct Qdisc *child = taprio_leaf(sch, cl);
struct tc_taprio_qopt_offload offload = {
.cmd = TAPRIO_CMD_QUEUE_STATS,
.queue_stats = {
.queue = cl - 1,
},
};
- struct Qdisc *child;
- child = rtnl_dereference(dev_queue->qdisc_sleeping);
if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
qdisc_qstats_copy(d, child) < 0)
return -1;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2613c4d74b16..17fcaa9b0df9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -581,7 +581,7 @@ static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb,
default:
return;
}
- if (!sock_owned_by_user(sk) && inet_sk(sk)->recverr) {
+ if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else { /* Only an error on timeout */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 274d07bd774f..2185f44198de 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -360,7 +360,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
ret = inet_addr_type_table(net, addr->v4.sin_addr.s_addr, tb_id);
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
- !sp->inet.freebind &&
+ !inet_test_bit(FREEBIND, sk) &&
!READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
@@ -435,7 +435,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
- fl4->flowi4_tos = RT_CONN_FLAGS_TOS(asoc->base.sk, tos);
+ fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 76f1bce49a8e..fd0631e70d46 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9482,7 +9482,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
atomic_set(&newinet->inet_id, get_random_u16());
newinet->uc_ttl = inet->uc_ttl;
- newinet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, newsk);
newinet->mc_ttl = 1;
newinet->mc_index = 0;
newinet->mc_list = NULL;
@@ -9732,6 +9732,7 @@ struct proto sctpv6_prot = {
.unhash = sctp_unhash,
.no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct sctp6_sock, inet6),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
offsetof(struct sctp6_sock, sctp.subscribe) +
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index f5834af5fad5..bacdd971615e 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -641,20 +641,22 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
- /* optional 2nd link, receive ADD LINK request from server */
- qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
- SMC_LLC_ADD_LINK);
- if (!qentry) {
- struct smc_clc_msg_decline dclc;
-
- rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
- if (rc == -EAGAIN)
- rc = 0; /* no DECLINE received, go with one link */
- return rc;
+ if (link->lgr->max_links > 1) {
+ /* optional 2nd link, receive ADD LINK request from server */
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ SMC_LLC_ADD_LINK);
+ if (!qentry) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+ if (rc == -EAGAIN)
+ rc = 0; /* no DECLINE received, go with one link */
+ return rc;
+ }
+ smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
+ smc_llc_cli_add_link(link, qentry);
}
- smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
- smc_llc_cli_add_link(link, qentry);
return 0;
}
@@ -1144,7 +1146,7 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
#define SMC_CLC_MAX_ACCEPT_LEN \
(sizeof(struct smc_clc_msg_accept_confirm_v2) + \
- sizeof(struct smc_clc_first_contact_ext) + \
+ sizeof(struct smc_clc_first_contact_ext_v2x) + \
sizeof(struct smc_clc_msg_trail))
/* CLC handshake during connect */
@@ -1198,8 +1200,8 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
- (struct smc_clc_first_contact_ext *)
- (((u8 *)clc_v2) + sizeof(*clc_v2));
+ smc_get_clc_first_contact_ext(clc_v2, false);
+ int rc;
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
return 0;
@@ -1218,6 +1220,12 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
return SMC_CLC_DECL_NOINDIRECT;
}
}
+
+ ini->release_nr = fce->release;
+ rc = smc_clc_clnt_v2x_features_validate(fce, ini);
+ if (rc)
+ return rc;
+
return 0;
}
@@ -1236,6 +1244,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
+ ini->max_conns = SMC_CONN_PER_LGR_MAX;
+ ini->max_links = SMC_LINKS_ADD_LNK_MAX;
reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
if (reason_code)
@@ -1386,6 +1396,16 @@ static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
+ if (ini->first_contact_peer) {
+ struct smc_clc_first_contact_ext *fce =
+ smc_get_clc_first_contact_ext(aclc_v2, true);
+
+ ini->release_nr = fce->release;
+ rc = smc_clc_clnt_v2x_features_validate(fce, ini);
+ if (rc)
+ return rc;
+ }
+
rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
if (rc)
return rc;
@@ -1420,7 +1440,7 @@ static int smc_connect_ism(struct smc_sock *smc,
}
rc = smc_clc_send_confirm(smc, ini->first_contact_local,
- aclc->hdr.version, eid, NULL);
+ aclc->hdr.version, eid, ini);
if (rc)
goto connect_abort;
mutex_unlock(&smc_server_lgr_pending);
@@ -1820,7 +1840,7 @@ void smc_close_non_accepted(struct sock *sk)
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
- sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
+ WRITE_ONCE(sk->sk_lingertime, SMC_MAX_STREAM_WAIT_TIMEOUT);
__smc_release(smc);
release_sock(sk);
sock_put(sk); /* sock_hold above */
@@ -1870,10 +1890,12 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
- down_write(&link->lgr->llc_conf_mutex);
- /* initial contact - try to establish second link */
- smc_llc_srv_add_link(link, NULL);
- up_write(&link->lgr->llc_conf_mutex);
+ if (link->lgr->max_links > 1) {
+ down_write(&link->lgr->llc_conf_mutex);
+ /* initial contact - try to establish second link */
+ smc_llc_srv_add_link(link, NULL);
+ up_write(&link->lgr->llc_conf_mutex);
+ }
return 0;
}
@@ -1996,6 +2018,10 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,
}
}
+ ini->release_nr = pclc_v2_ext->hdr.flag.release;
+ if (pclc_v2_ext->hdr.flag.release > SMC_RELEASE)
+ ini->release_nr = SMC_RELEASE;
+
out:
if (!ini->smcd_version && !ini->smcr_version)
return rc;
@@ -2430,6 +2456,10 @@ static void smc_listen_work(struct work_struct *work)
if (rc)
goto out_decl;
+ rc = smc_clc_srv_v2x_features_validate(pclc, ini);
+ if (rc)
+ goto out_decl;
+
mutex_lock(&smc_server_lgr_pending);
smc_close_init(new_smc);
smc_rx_init(new_smc);
@@ -2443,7 +2473,7 @@ static void smc_listen_work(struct work_struct *work)
/* send SMC Accept CLC message */
accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
- accept_version, ini->negotiated_eid);
+ accept_version, ini->negotiated_eid, ini);
if (rc)
goto out_unlock;
@@ -2462,6 +2492,18 @@ static void smc_listen_work(struct work_struct *work)
goto out_decl;
}
+ rc = smc_clc_v2x_features_confirm_check(cclc, ini);
+ if (rc) {
+ if (!ini->is_smcd)
+ goto out_unlock;
+ goto out_decl;
+ }
+
+ /* fce smc release version is needed in smc_listen_rdma_finish,
+ * so save fce info here.
+ */
+ smc_conn_save_peer_info_fce(new_smc, cclc);
+
/* finish worker */
if (!ini->is_smcd) {
rc = smc_listen_rdma_finish(new_smc, cclc,
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 1f2b912c43d1..24745fde4ac2 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -21,7 +21,10 @@
#define SMC_V1 1 /* SMC version V1 */
#define SMC_V2 2 /* SMC version V2 */
-#define SMC_RELEASE 0
+
+#define SMC_RELEASE_0 0
+#define SMC_RELEASE_1 1
+#define SMC_RELEASE SMC_RELEASE_1 /* the latest release version */
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index c90d9e5dda54..8deb46c28f1d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -391,9 +391,7 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
return false;
} else {
if (hdr->typev1 == SMC_TYPE_D &&
- ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 &&
- (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
- sizeof(struct smc_clc_first_contact_ext)))
+ ntohs(hdr->length) < SMCD_CLC_ACCEPT_CONFIRM_LEN_V2)
return false;
if (hdr->typev1 == SMC_TYPE_R &&
ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2)
@@ -420,13 +418,29 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
return true;
}
-static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len)
+static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce,
+ struct smc_init_info *ini)
{
+ int ret = sizeof(*fce);
+
memset(fce, 0, sizeof(*fce));
- fce->os_type = SMC_CLC_OS_LINUX;
- fce->release = SMC_RELEASE;
- memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname));
- (*len) += sizeof(*fce);
+ fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX;
+ fce->fce_v2_base.release = ini->release_nr;
+ memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname));
+ if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) {
+ ret = sizeof(struct smc_clc_first_contact_ext);
+ goto out;
+ }
+
+ if (ini->release_nr >= SMC_RELEASE_1) {
+ if (!ini->is_smcd) {
+ fce->max_conns = ini->max_conns;
+ fce->max_links = ini->max_links;
+ }
+ }
+
+out:
+ return ret;
}
/* check if received message has a correct header length and contains valid
@@ -927,8 +941,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
sizeof(struct smc_clc_smcd_gid_chid);
}
}
- if (smcr_indicated(ini->smc_type_v2))
+ if (smcr_indicated(ini->smc_type_v2)) {
memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
+ v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER;
+ v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER;
+ }
pclc_base->hdr.length = htons(plen);
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -986,13 +1003,13 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
u8 *eid, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
+ struct smc_clc_first_contact_ext_v2x fce;
struct smc_clc_msg_accept_confirm *clc;
- struct smc_clc_first_contact_ext fce;
struct smc_clc_fce_gid_ext gle;
struct smc_clc_msg_trail trl;
+ int i, len, fce_len;
struct kvec vec[5];
struct msghdr msg;
- int i, len;
/* send SMC Confirm CLC msg */
clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
@@ -1018,8 +1035,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (eid && eid[0])
memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
- if (first_contact)
- smc_clc_fill_fce(&fce, &len);
+ if (first_contact) {
+ fce_len = smc_clc_fill_fce(&fce, ini);
+ len += fce_len;
+ }
clc_v2->hdr.length = htons(len);
}
memcpy(trl.eyecatcher, SMCD_EYECATCHER,
@@ -1063,15 +1082,14 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact) {
- smc_clc_fill_fce(&fce, &len);
- fce.v2_direct = !link->lgr->uses_gateway;
- memset(&gle, 0, sizeof(gle));
- if (ini && clc->hdr.type == SMC_CLC_CONFIRM) {
+ fce_len = smc_clc_fill_fce(&fce, ini);
+ len += fce_len;
+ fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway;
+ if (clc->hdr.type == SMC_CLC_CONFIRM) {
+ memset(&gle, 0, sizeof(gle));
gle.gid_cnt = ini->smcrv2.gidlist.len;
len += sizeof(gle);
len += gle.gid_cnt * sizeof(gle.gid[0]);
- } else {
- len += sizeof(gle.reserved);
}
}
clc_v2->hdr.length = htons(len);
@@ -1094,7 +1112,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
sizeof(trl);
if (version > SMC_V1 && first_contact) {
vec[i].iov_base = &fce;
- vec[i++].iov_len = sizeof(fce);
+ vec[i++].iov_len = fce_len;
if (!conn->lgr->is_smcd) {
if (clc->hdr.type == SMC_CLC_CONFIRM) {
vec[i].iov_base = &gle;
@@ -1102,9 +1120,6 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
vec[i].iov_base = &ini->smcrv2.gidlist.list;
vec[i++].iov_len = gle.gid_cnt *
sizeof(gle.gid[0]);
- } else {
- vec[i].iov_base = &gle.reserved;
- vec[i++].iov_len = sizeof(gle.reserved);
}
}
}
@@ -1141,7 +1156,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
/* send CLC ACCEPT message across internal TCP socket */
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
- u8 version, u8 *negotiated_eid)
+ u8 version, u8 *negotiated_eid, struct smc_init_info *ini)
{
struct smc_clc_msg_accept_confirm_v2 aclc_v2;
int len;
@@ -1149,13 +1164,95 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
memset(&aclc_v2, 0, sizeof(aclc_v2));
aclc_v2.hdr.type = SMC_CLC_ACCEPT;
len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
- version, negotiated_eid, NULL);
+ version, negotiated_eid, ini);
if (len < ntohs(aclc_v2.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
return len > 0 ? 0 : len;
}
+int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_v2_extension *pclc_v2_ext;
+
+ ini->max_conns = SMC_CONN_PER_LGR_MAX;
+ ini->max_links = SMC_LINKS_ADD_LNK_MAX;
+
+ if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) ||
+ ini->release_nr < SMC_RELEASE_1)
+ return 0;
+
+ pclc_v2_ext = smc_get_clc_v2_ext(pclc);
+ if (!pclc_v2_ext)
+ return SMC_CLC_DECL_NOV2EXT;
+
+ if (ini->smcr_version & SMC_V2) {
+ ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER);
+ if (ini->max_conns < SMC_CONN_PER_LGR_MIN)
+ return SMC_CLC_DECL_MAXCONNERR;
+
+ ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER);
+ if (ini->max_links < SMC_LINKS_ADD_LNK_MIN)
+ return SMC_CLC_DECL_MAXLINKERR;
+ }
+
+ return 0;
+}
+
+int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_first_contact_ext_v2x *fce_v2x =
+ (struct smc_clc_first_contact_ext_v2x *)fce;
+
+ if (ini->release_nr < SMC_RELEASE_1)
+ return 0;
+
+ if (!ini->is_smcd) {
+ if (fce_v2x->max_conns < SMC_CONN_PER_LGR_MIN)
+ return SMC_CLC_DECL_MAXCONNERR;
+ ini->max_conns = fce_v2x->max_conns;
+
+ if (fce_v2x->max_links > SMC_LINKS_ADD_LNK_MAX ||
+ fce_v2x->max_links < SMC_LINKS_ADD_LNK_MIN)
+ return SMC_CLC_DECL_MAXLINKERR;
+ ini->max_links = fce_v2x->max_links;
+ }
+
+ return 0;
+}
+
+int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)cclc;
+ struct smc_clc_first_contact_ext *fce =
+ smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd);
+ struct smc_clc_first_contact_ext_v2x *fce_v2x =
+ (struct smc_clc_first_contact_ext_v2x *)fce;
+
+ if (cclc->hdr.version == SMC_V1 ||
+ !(cclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return 0;
+
+ if (ini->release_nr != fce->release)
+ return SMC_CLC_DECL_RELEASEERR;
+
+ if (fce->release < SMC_RELEASE_1)
+ return 0;
+
+ if (!ini->is_smcd) {
+ if (fce_v2x->max_conns != ini->max_conns)
+ return SMC_CLC_DECL_MAXCONNERR;
+ if (fce_v2x->max_links != ini->max_links)
+ return SMC_CLC_DECL_MAXLINKERR;
+ }
+
+ return 0;
+}
+
void smc_clc_get_hostname(u8 **host)
{
*host = &smc_hostname[0];
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 5fee545c9a10..c5c8e7db775a 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -45,6 +45,9 @@
#define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */
#define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */
#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */
+#define SMC_CLC_DECL_RELEASEERR 0x03030009 /* release version negotiate failed */
+#define SMC_CLC_DECL_MAXCONNERR 0x0303000a /* max connections negotiate failed */
+#define SMC_CLC_DECL_MAXLINKERR 0x0303000b /* max links negotiate failed */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
@@ -133,7 +136,9 @@ struct smc_clc_smcd_gid_chid {
struct smc_clc_v2_extension {
struct smc_clnt_opts_area_hdr hdr;
u8 roce[16]; /* RoCEv2 GID */
- u8 reserved[16];
+ u8 max_conns;
+ u8 max_links;
+ u8 reserved[14];
u8 user_eids[][SMC_MAX_EID_LEN];
};
@@ -147,7 +152,9 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
struct smc_clc_msg_smcd { /* SMC-D GID information */
struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */
__be16 v2_ext_offset; /* SMC Version 2 Extension Offset */
- u8 reserved[28];
+ u8 vendor_oui[3]; /* vendor organizationally unique identifier */
+ u8 vendor_exp_options[5];
+ u8 reserved[20];
};
struct smc_clc_smcd_v2_extension {
@@ -231,8 +238,19 @@ struct smc_clc_first_contact_ext {
u8 hostname[SMC_MAX_HOSTNAME_LEN];
};
+struct smc_clc_first_contact_ext_v2x {
+ struct smc_clc_first_contact_ext fce_v2_base;
+ u8 max_conns; /* for SMC-R only */
+ u8 max_links; /* for SMC-R only */
+ u8 reserved3[2];
+ __be32 vendor_exp_options;
+ u8 reserved4[8];
+} __packed; /* format defined in
+ * IBM Shared Memory Communications Version 2 (Third Edition)
+ * (https://www.ibm.com/support/pages/node/7009315)
+ */
+
struct smc_clc_fce_gid_ext {
- u8 reserved[16];
u8 gid_cnt;
u8 reserved2[3];
u8 gid[][SMC_GID_SIZE];
@@ -370,6 +388,27 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext)
ntohs(prop_v2ext->hdr.smcd_v2_ext_offset));
}
+static inline struct smc_clc_first_contact_ext *
+smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2,
+ bool is_smcd)
+{
+ int clc_v2_len;
+
+ if (clc_v2->hdr.version == SMC_V1 ||
+ !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return NULL;
+
+ if (is_smcd)
+ clc_v2_len =
+ offsetofend(struct smc_clc_msg_accept_confirm_v2, d1);
+ else
+ clc_v2_len =
+ offsetofend(struct smc_clc_msg_accept_confirm_v2, r1);
+
+ return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) +
+ clc_v2_len);
+}
+
struct smcd_dev;
struct smc_init_info;
@@ -382,7 +421,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
- u8 version, u8 *negotiated_eid);
+ u8 version, u8 *negotiated_eid, struct smc_init_info *ini);
+int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini);
+int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
+ struct smc_init_info *ini);
+int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
+ struct smc_init_info *ini);
void smc_clc_init(void) __init;
void smc_clc_exit(void);
void smc_clc_get_hostname(u8 **host);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 6b78075404d7..bd01dd31e4bd 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -319,6 +319,10 @@ static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_CONNS, lgr->max_conns))
+ goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_LINKS, lgr->max_links))
+ goto errv2attr;
nla_nest_end(skb, v2_attrs);
return 0;
@@ -895,9 +899,13 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->uses_gateway = ini->smcrv2.uses_gateway;
memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
ETH_ALEN);
+ lgr->max_conns = ini->max_conns;
+ lgr->max_links = ini->max_links;
} else {
ibdev = ini->ib_dev;
ibport = ini->ib_port;
+ lgr->max_conns = SMC_CONN_PER_LGR_MAX;
+ lgr->max_links = SMC_LINKS_ADD_LNK_MAX;
}
memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
SMC_MAX_PNETID_LEN);
@@ -1664,6 +1672,9 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
!rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
continue;
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
+ continue;
+
/* trigger local add link processing */
link = smc_llc_usable_link(lgr);
if (link)
@@ -1888,7 +1899,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
(role == SMC_CLNT || ini->is_smcd ||
- (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
+ (lgr->conns_num < lgr->max_conns &&
!bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
/* link group found */
ini->first_contact_local = 0;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 1645fba0d2d3..120027d40469 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -22,6 +22,15 @@
#include "smc_ib.h"
#define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */
+#define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */
+#define SMC_CONN_PER_LGR_MAX 255 /* max. # of connections per link group,
+ * also is the default value for SMC-R v1 and v2.0
+ */
+#define SMC_CONN_PER_LGR_PREFER 255 /* Preferred connections per link group used for
+ * SMC-R v2.1 and later negotiation, vendors or
+ * distrubutions may modify it to a value between
+ * 16-255 as needed.
+ */
struct smc_lgr_list { /* list of link group definition */
struct list_head list;
@@ -164,6 +173,15 @@ struct smc_link {
*/
#define SMC_LINKS_PER_LGR_MAX 3
#define SMC_SINGLE_LINK 0
+#define SMC_LINKS_ADD_LNK_MIN 1 /* min. # of links per link group */
+#define SMC_LINKS_ADD_LNK_MAX 2 /* max. # of links per link group, also is the
+ * default value for smc-r v1.0 and v2.0
+ */
+#define SMC_LINKS_PER_LGR_MAX_PREFER 2 /* Preferred max links per link group used for
+ * SMC-R v2.1 and later negotiation, vendors or
+ * distrubutions may modify it to a value between
+ * 1-2 as needed.
+ */
/* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
struct smc_buf_desc {
@@ -331,6 +349,10 @@ struct smc_link_group {
__be32 saddr;
/* net namespace */
struct net *net;
+ u8 max_conns;
+ /* max conn can be assigned to lgr */
+ u8 max_links;
+ /* max links can be added in lgr */
};
struct { /* SMC-D */
u64 peer_gid;
@@ -374,6 +396,9 @@ struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
u8 smc_type_v2;
+ u8 release_nr;
+ u8 max_conns;
+ u8 max_links;
u8 first_contact_peer;
u8 first_contact_local;
unsigned short vlan_id;
@@ -539,7 +564,6 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
-void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 034295676e88..4df5f8c8a0a1 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -96,7 +96,6 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk);
int smc_ib_create_queue_pair(struct smc_link *lnk);
int smc_ib_ready_link(struct smc_link *lnk);
int smc_ib_modify_qp_rts(struct smc_link *lnk);
-int smc_ib_modify_qp_reset(struct smc_link *lnk);
int smc_ib_modify_qp_error(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 90f0b60b196a..018ce8133b02 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -52,14 +52,13 @@ struct smc_llc_msg_confirm_link { /* type 0x01 */
u8 link_num;
u8 link_uid[SMC_LGR_ID_SIZE];
u8 max_links;
- u8 reserved[9];
+ u8 max_conns;
+ u8 reserved[8];
};
#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
-#define SMC_LLC_ADD_LNK_MAX_LINKS 2
-
struct smc_llc_msg_add_link { /* type 0x02 */
struct smc_llc_hdr hd;
u8 sender_mac[ETH_ALEN];
@@ -471,7 +470,12 @@ int smc_llc_send_confirm_link(struct smc_link *link,
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
confllc->link_num = link->link_id;
memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE);
- confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
+ confllc->max_links = SMC_LINKS_ADD_LNK_MAX;
+ if (link->lgr->smc_version == SMC_V2 &&
+ link->lgr->peer_smc_release >= SMC_RELEASE_1) {
+ confllc->max_conns = link->lgr->max_conns;
+ confllc->max_links = link->lgr->max_links;
+ }
/* send llc message */
rc = smc_wr_tx_send(link, pend);
put_out:
@@ -1041,6 +1045,11 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
goto out_reject;
}
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
+ rc = 0;
+ goto out_reject;
+ }
+
ini->vlan_id = lgr->vlan_id;
if (lgr->smc_version == SMC_V2) {
ini->check_smcrv2 = true;
@@ -1165,6 +1174,9 @@ static void smc_llc_cli_add_link_invite(struct smc_link *link,
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
goto out;
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
+ goto out;
+
ini = kzalloc(sizeof(*ini), GFP_KERNEL);
if (!ini)
goto out;
@@ -1410,6 +1422,11 @@ int smc_llc_srv_add_link(struct smc_link *link,
goto out;
}
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
+ rc = 0;
+ goto out;
+ }
+
/* ignore client add link recommendation, start new flow */
ini->vlan_id = lgr->vlan_id;
if (lgr->smc_version == SMC_V2) {
diff --git a/net/socket.c b/net/socket.c
index 2b0e54b2405c..848116d06b51 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -136,9 +136,10 @@ static void sock_splice_eof(struct file *file);
static void sock_show_fdinfo(struct seq_file *m, struct file *f)
{
struct socket *sock = f->private_data;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
- if (sock->ops->show_fdinfo)
- sock->ops->show_fdinfo(m, sock);
+ if (ops->show_fdinfo)
+ ops->show_fdinfo(m, sock);
}
#else
#define sock_show_fdinfo NULL
@@ -646,12 +647,14 @@ EXPORT_SYMBOL(sock_alloc);
static void __sock_release(struct socket *sock, struct inode *inode)
{
- if (sock->ops) {
- struct module *owner = sock->ops->owner;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
+
+ if (ops) {
+ struct module *owner = ops->owner;
if (inode)
inode_lock(inode);
- sock->ops->release(sock);
+ ops->release(sock);
sock->sk = NULL;
if (inode)
inode_unlock(inode);
@@ -722,7 +725,7 @@ static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
- int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
+ int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
inet_sendmsg, sock, msg,
msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
@@ -786,13 +789,14 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
struct socket *sock = sk->sk_socket;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
- if (!sock->ops->sendmsg_locked)
+ if (!ops->sendmsg_locked)
return sock_no_sendmsg_locked(sk, msg, size);
iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
- return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
+ return ops->sendmsg_locked(sk, msg, msg_data_left(msg));
}
EXPORT_SYMBOL(kernel_sendmsg_locked);
@@ -1017,7 +1021,8 @@ static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int f
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
- int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
+ int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
+ inet6_recvmsg,
inet_recvmsg, sock, msg,
msg_data_left(msg), flags);
if (trace_sock_recv_length_enabled())
@@ -1072,19 +1077,23 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
unsigned int flags)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops;
- if (unlikely(!sock->ops->splice_read))
+ ops = READ_ONCE(sock->ops);
+ if (unlikely(!ops->splice_read))
return copy_splice_read(file, ppos, pipe, len, flags);
- return sock->ops->splice_read(sock, ppos, pipe, len, flags);
+ return ops->splice_read(sock, ppos, pipe, len, flags);
}
static void sock_splice_eof(struct file *file)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops;
- if (sock->ops->splice_eof)
- sock->ops->splice_eof(sock);
+ ops = READ_ONCE(sock->ops);
+ if (ops->splice_eof)
+ ops->splice_eof(sock);
}
static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -1181,13 +1190,14 @@ EXPORT_SYMBOL(vlan_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock,
unsigned int cmd, unsigned long arg)
{
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct ifreq ifr;
bool need_copyout;
int err;
void __user *argp = (void __user *)arg;
void __user *data;
- err = sock->ops->ioctl(sock, cmd, arg);
+ err = ops->ioctl(sock, cmd, arg);
/*
* If this ioctl is unknown try to hand it down
@@ -1216,6 +1226,7 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
+ const struct proto_ops *ops;
struct socket *sock;
struct sock *sk;
void __user *argp = (void __user *)arg;
@@ -1223,6 +1234,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
struct net *net;
sock = file->private_data;
+ ops = READ_ONCE(sock->ops);
sk = sock->sk;
net = sock_net(sk);
if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
@@ -1280,23 +1292,23 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
break;
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
- if (!sock->ops->gettstamp) {
+ if (!ops->gettstamp) {
err = -ENOIOCTLCMD;
break;
}
- err = sock->ops->gettstamp(sock, argp,
- cmd == SIOCGSTAMP_OLD,
- !IS_ENABLED(CONFIG_64BIT));
+ err = ops->gettstamp(sock, argp,
+ cmd == SIOCGSTAMP_OLD,
+ !IS_ENABLED(CONFIG_64BIT));
break;
case SIOCGSTAMP_NEW:
case SIOCGSTAMPNS_NEW:
- if (!sock->ops->gettstamp) {
+ if (!ops->gettstamp) {
err = -ENOIOCTLCMD;
break;
}
- err = sock->ops->gettstamp(sock, argp,
- cmd == SIOCGSTAMP_NEW,
- false);
+ err = ops->gettstamp(sock, argp,
+ cmd == SIOCGSTAMP_NEW,
+ false);
break;
case SIOCGIFCONF:
@@ -1357,9 +1369,10 @@ EXPORT_SYMBOL(sock_create_lite);
static __poll_t sock_poll(struct file *file, poll_table *wait)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
__poll_t events = poll_requested_events(wait), flag = 0;
- if (!sock->ops->poll)
+ if (!ops->poll)
return 0;
if (sk_can_busy_loop(sock->sk)) {
@@ -1371,14 +1384,14 @@ static __poll_t sock_poll(struct file *file, poll_table *wait)
flag = POLL_BUSY_LOOP;
}
- return sock->ops->poll(file, sock, wait) | flag;
+ return ops->poll(file, sock, wait) | flag;
}
static int sock_mmap(struct file *file, struct vm_area_struct *vma)
{
struct socket *sock = file->private_data;
- return sock->ops->mmap(file, sock, vma);
+ return READ_ONCE(sock->ops)->mmap(file, sock, vma);
}
static int sock_close(struct inode *inode, struct file *filp)
@@ -1644,12 +1657,36 @@ struct file *__sys_socket_file(int family, int type, int protocol)
return sock_alloc_file(sock, flags, NULL);
}
+/* A hook for bpf progs to attach to and update socket protocol.
+ *
+ * A static noinline declaration here could cause the compiler to
+ * optimize away the function. A global noinline declaration will
+ * keep the definition, but may optimize away the callsite.
+ * Therefore, __weak is needed to ensure that the call is still
+ * emitted, by telling the compiler that we don't know what the
+ * function might eventually be.
+ *
+ * __diag_* below are needed to dismiss the missing prototype warning.
+ */
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "A fmod_ret entry point for BPF programs");
+
+__weak noinline int update_socket_protocol(int family, int type, int protocol)
+{
+ return protocol;
+}
+
+__diag_pop();
+
int __sys_socket(int family, int type, int protocol)
{
struct socket *sock;
int flags;
- sock = __sys_socket_create(family, type, protocol);
+ sock = __sys_socket_create(family, type,
+ update_socket_protocol(family, type, protocol));
if (IS_ERR(sock))
return PTR_ERR(sock);
@@ -1728,7 +1765,7 @@ int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
goto out;
}
- err = sock1->ops->socketpair(sock1, sock2);
+ err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
if (unlikely(err < 0)) {
sock_release(sock2);
sock_release(sock1);
@@ -1789,7 +1826,7 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
(struct sockaddr *)&address,
addrlen);
if (!err)
- err = sock->ops->bind(sock,
+ err = READ_ONCE(sock->ops)->bind(sock,
(struct sockaddr *)
&address, addrlen);
}
@@ -1823,7 +1860,7 @@ int __sys_listen(int fd, int backlog)
err = security_socket_listen(sock, backlog);
if (!err)
- err = sock->ops->listen(sock, backlog);
+ err = READ_ONCE(sock->ops)->listen(sock, backlog);
fput_light(sock->file, fput_needed);
}
@@ -1843,6 +1880,7 @@ struct file *do_accept(struct file *file, unsigned file_flags,
struct file *newfile;
int err, len;
struct sockaddr_storage address;
+ const struct proto_ops *ops;
sock = sock_from_file(file);
if (!sock)
@@ -1851,15 +1889,16 @@ struct file *do_accept(struct file *file, unsigned file_flags,
newsock = sock_alloc();
if (!newsock)
return ERR_PTR(-ENFILE);
+ ops = READ_ONCE(sock->ops);
newsock->type = sock->type;
- newsock->ops = sock->ops;
+ newsock->ops = ops;
/*
* We don't need try_module_get here, as the listening socket (sock)
* has the protocol module (sock->ops->owner) held.
*/
- __module_get(newsock->ops->owner);
+ __module_get(ops->owner);
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
if (IS_ERR(newfile))
@@ -1869,14 +1908,13 @@ struct file *do_accept(struct file *file, unsigned file_flags,
if (err)
goto out_fd;
- err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
+ err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
false);
if (err < 0)
goto out_fd;
if (upeer_sockaddr) {
- len = newsock->ops->getname(newsock,
- (struct sockaddr *)&address, 2);
+ len = ops->getname(newsock, (struct sockaddr *)&address, 2);
if (len < 0) {
err = -ECONNABORTED;
goto out_fd;
@@ -1989,8 +2027,8 @@ int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
if (err)
goto out;
- err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
- sock->file->f_flags | file_flags);
+ err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
+ addrlen, sock->file->f_flags | file_flags);
out:
return err;
}
@@ -2039,7 +2077,7 @@ int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
if (err)
goto out_put;
- err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+ err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
if (err < 0)
goto out_put;
/* "err" is actually length in this case */
@@ -2071,13 +2109,15 @@ int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
+
err = security_socket_getpeername(sock);
if (err) {
fput_light(sock->file, fput_needed);
return err;
}
- err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+ err = ops->getname(sock, (struct sockaddr *)&address, 1);
if (err >= 0)
/* "err" is actually length in this case */
err = move_addr_to_user(&address, err, usockaddr,
@@ -2227,6 +2267,7 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
int optlen)
{
sockptr_t optval = USER_SOCKPTR(user_optval);
+ const struct proto_ops *ops;
char *kernel_optval = NULL;
int err, fput_needed;
struct socket *sock;
@@ -2255,12 +2296,13 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
if (kernel_optval)
optval = KERNEL_SOCKPTR(kernel_optval);
+ ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
err = sock_setsockopt(sock, level, optname, optval, optlen);
- else if (unlikely(!sock->ops->setsockopt))
+ else if (unlikely(!ops->setsockopt))
err = -EOPNOTSUPP;
else
- err = sock->ops->setsockopt(sock, level, optname, optval,
+ err = ops->setsockopt(sock, level, optname, optval,
optlen);
kfree(kernel_optval);
out_put:
@@ -2285,6 +2327,7 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
int __user *optlen)
{
int max_optlen __maybe_unused;
+ const struct proto_ops *ops;
int err, fput_needed;
struct socket *sock;
@@ -2299,12 +2342,13 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
if (!in_compat_syscall())
max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
+ ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET)
err = sock_getsockopt(sock, level, optname, optval, optlen);
- else if (unlikely(!sock->ops->getsockopt))
+ else if (unlikely(!ops->getsockopt))
err = -EOPNOTSUPP;
else
- err = sock->ops->getsockopt(sock, level, optname, optval,
+ err = ops->getsockopt(sock, level, optname, optval,
optlen);
if (!in_compat_syscall())
@@ -2332,7 +2376,7 @@ int __sys_shutdown_sock(struct socket *sock, int how)
err = security_socket_shutdown(sock, how);
if (!err)
- err = sock->ops->shutdown(sock, how);
+ err = READ_ONCE(sock->ops)->shutdown(sock, how);
return err;
}
@@ -3324,6 +3368,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
void __user *argp = compat_ptr(arg);
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
+ const struct proto_ops *ops;
if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
return sock_ioctl(file, cmd, (unsigned long)argp);
@@ -3333,10 +3378,11 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
return compat_siocwandev(net, argp);
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
- if (!sock->ops->gettstamp)
+ ops = READ_ONCE(sock->ops);
+ if (!ops->gettstamp)
return -ENOIOCTLCMD;
- return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
- !COMPAT_USE_64BIT_TIME);
+ return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
+ !COMPAT_USE_64BIT_TIME);
case SIOCETHTOOL:
case SIOCBONDSLAVEINFOQUERY:
@@ -3417,6 +3463,7 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
int ret = -ENOIOCTLCMD;
struct sock *sk;
struct net *net;
@@ -3424,8 +3471,8 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
sk = sock->sk;
net = sock_net(sk);
- if (sock->ops->compat_ioctl)
- ret = sock->ops->compat_ioctl(sock, cmd, arg);
+ if (ops->compat_ioctl)
+ ret = ops->compat_ioctl(sock, cmd, arg);
if (ret == -ENOIOCTLCMD &&
(cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
@@ -3449,7 +3496,7 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
- return sock->ops->bind(sock, addr, addrlen);
+ return READ_ONCE(sock->ops)->bind(sock, addr, addrlen);
}
EXPORT_SYMBOL(kernel_bind);
@@ -3463,7 +3510,7 @@ EXPORT_SYMBOL(kernel_bind);
int kernel_listen(struct socket *sock, int backlog)
{
- return sock->ops->listen(sock, backlog);
+ return READ_ONCE(sock->ops)->listen(sock, backlog);
}
EXPORT_SYMBOL(kernel_listen);
@@ -3481,6 +3528,7 @@ EXPORT_SYMBOL(kernel_listen);
int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
{
struct sock *sk = sock->sk;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
int err;
err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
@@ -3488,15 +3536,15 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
if (err < 0)
goto done;
- err = sock->ops->accept(sock, *newsock, flags, true);
+ err = ops->accept(sock, *newsock, flags, true);
if (err < 0) {
sock_release(*newsock);
*newsock = NULL;
goto done;
}
- (*newsock)->ops = sock->ops;
- __module_get((*newsock)->ops->owner);
+ (*newsock)->ops = ops;
+ __module_get(ops->owner);
done:
return err;
@@ -3519,7 +3567,12 @@ EXPORT_SYMBOL(kernel_accept);
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags)
{
- return sock->ops->connect(sock, addr, addrlen, flags);
+ struct sockaddr_storage address;
+
+ memcpy(&address, addr, addrlen);
+
+ return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
+ addrlen, flags);
}
EXPORT_SYMBOL(kernel_connect);
@@ -3534,7 +3587,7 @@ EXPORT_SYMBOL(kernel_connect);
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, 0);
+ return READ_ONCE(sock->ops)->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
@@ -3549,7 +3602,7 @@ EXPORT_SYMBOL(kernel_getsockname);
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, 1);
+ return READ_ONCE(sock->ops)->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
@@ -3563,7 +3616,7 @@ EXPORT_SYMBOL(kernel_getpeername);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
- return sock->ops->shutdown(sock, how);
+ return READ_ONCE(sock->ops)->shutdown(sock, how);
}
EXPORT_SYMBOL(kernel_sock_shutdown);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2eb8df44f894..8c9a8ee76aa0 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -43,7 +43,7 @@
#include <net/udp.h>
#include <net/tcp.h>
#include <net/tcp_states.h>
-#include <net/tls.h>
+#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
@@ -226,27 +226,30 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
}
static int
-svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg,
+svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
struct cmsghdr *cmsg, int ret)
{
- if (cmsg->cmsg_level == SOL_TLS &&
- cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
- u8 content_type = *((u8 *)CMSG_DATA(cmsg));
-
- switch (content_type) {
- case TLS_RECORD_TYPE_DATA:
- /* TLS sets EOR at the end of each application data
- * record, even though there might be more frames
- * waiting to be decrypted.
- */
- msg->msg_flags &= ~MSG_EOR;
- break;
- case TLS_RECORD_TYPE_ALERT:
- ret = -ENOTCONN;
- break;
- default:
- ret = -EAGAIN;
- }
+ u8 content_type = tls_get_record_type(sock->sk, cmsg);
+ u8 level, description;
+
+ switch (content_type) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(sock->sk, msg, &level, &description);
+ ret = (level == TLS_ALERT_LEVEL_FATAL) ?
+ -ENOTCONN : -EAGAIN;
+ break;
+ default:
+ /* discard this record type */
+ ret = -EAGAIN;
}
return ret;
}
@@ -258,13 +261,14 @@ svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg)
struct cmsghdr cmsg;
u8 buf[CMSG_SPACE(sizeof(u8))];
} u;
+ struct socket *sock = svsk->sk_sock;
int ret;
msg->msg_control = &u;
msg->msg_controllen = sizeof(u);
- ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT);
+ ret = sock_recvmsg(sock, msg, MSG_DONTWAIT);
if (unlikely(msg->msg_controllen != sizeof(u)))
- ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret);
+ ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret);
return ret;
}
@@ -1624,6 +1628,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ tls_handshake_close(svsk->sk_sock);
+
svc_sock_detach(xprt);
if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9f010369100a..268a2cc61acd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,7 +47,7 @@
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
-#include <net/tls.h>
+#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/bvec.h>
@@ -360,24 +360,27 @@ static int
xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
struct cmsghdr *cmsg, int ret)
{
- if (cmsg->cmsg_level == SOL_TLS &&
- cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
- u8 content_type = *((u8 *)CMSG_DATA(cmsg));
-
- switch (content_type) {
- case TLS_RECORD_TYPE_DATA:
- /* TLS sets EOR at the end of each application data
- * record, even though there might be more frames
- * waiting to be decrypted.
- */
- msg->msg_flags &= ~MSG_EOR;
- break;
- case TLS_RECORD_TYPE_ALERT:
- ret = -ENOTCONN;
- break;
- default:
- ret = -EAGAIN;
- }
+ u8 content_type = tls_get_record_type(sock->sk, cmsg);
+ u8 level, description;
+
+ switch (content_type) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(sock->sk, msg, &level, &description);
+ ret = (level == TLS_ALERT_LEVEL_FATAL) ?
+ -EACCES : -EAGAIN;
+ break;
+ default:
+ /* discard this record type */
+ ret = -EAGAIN;
}
return ret;
}
@@ -777,6 +780,8 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
}
if (ret == -ESHUTDOWN)
kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ else if (ret == -EACCES)
+ xprt_wake_pending_tasks(&transport->xprt, -EACCES);
else
xs_poll_check_readable(transport);
out:
@@ -1292,6 +1297,8 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt);
+ if (transport->sock)
+ tls_handshake_close(transport->sock);
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8cc42aea19c7..5b045284849e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -862,3 +862,28 @@ void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
NULL);
}
EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
+
+int switchdev_bridge_port_replay(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .dev = dev,
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ },
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_REPLAY,
+ brport_dev, &brport_info.info,
+ extack);
+ return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_replay);
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 0772cfadaa0d..93f82398283d 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -131,6 +131,5 @@ bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr);
void tipc_set_node_id(struct net *net, u8 *id);
void tipc_set_node_addr(struct net *net, u32 addr);
char *tipc_nodeid2string(char *str, u8 *id);
-u32 tipc_node_id2hash(u8 *id128);
#endif
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 1ee60649bd17..41eac1ee0c09 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -214,8 +214,6 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
-int tipc_media_set_priority(const char *name, u32 new_value);
-int tipc_media_set_window(const char *name, u32 new_value);
int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
struct nlattr *attrs[]);
diff --git a/net/tipc/link.h b/net/tipc/link.h
index a16f401fdabd..d80f5649b395 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -148,8 +148,6 @@ int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
struct sk_buff_head *xmitq,
struct sk_buff_head *retrq);
-void tipc_link_build_bc_sync_msg(struct tipc_link *l,
- struct sk_buff_head *xmitq);
void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq);
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index e231e6964d61..c677f6f082df 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -67,7 +67,6 @@ struct distr_item {
__be32 key;
};
-void tipc_named_bcast(struct net *net, struct sk_buff *skb);
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities);
diff --git a/net/tipc/net.h b/net/tipc/net.h
index d0c91d2df20a..1cb1e43cf34a 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -43,7 +43,6 @@ extern const struct nla_policy tipc_nl_net_policy[];
int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
void tipc_net_finalize_work(struct work_struct *work);
-void tipc_sched_net_finalize(struct net *net, u32 addr);
void tipc_net_stop(struct net *net);
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 9b47c8409231..5bc076f2fa74 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -208,7 +208,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
goto err_out;
}
- info.attrs = attrbuf;
+ info.info.attrs = attrbuf;
if (nlmsg_len(cb.nlh) > 0) {
err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
@@ -1294,7 +1294,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
struct tipc_nl_compat_msg msg;
struct nlmsghdr *req_nlh;
struct nlmsghdr *rep_nlh;
- struct tipc_genlmsghdr *req_userhdr = info->userhdr;
+ struct tipc_genlmsghdr *req_userhdr = genl_info_userhdr(info);
memset(&msg, 0, sizeof(msg));
diff --git a/net/tipc/node.c b/net/tipc/node.c
index a9c5b6594889..3105abe97bb9 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2662,7 +2662,7 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *node;
@@ -2870,7 +2870,7 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
int err;
if (!prev_node) {
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct nlattr *mon[TIPC_NLA_MON_MAX + 1];
if (!attrs[TIPC_NLA_MON])
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ef8e5139a873..bb1118d02f95 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3791,7 +3791,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct tipc_sock *tsk;
if (!tsk_portid) {
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
if (!attrs[TIPC_NLA_SOCK])
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 926232557e77..f892b0903dba 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -465,7 +465,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
int i;
if (!bid && !skip_cnt) {
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct net *net = sock_net(skb->sk);
struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
char *bname;
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 86cef1c68e03..164d6a955e26 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -39,6 +39,7 @@
#include <linux/types.h>
#include <linux/skmsg.h>
#include <net/tls.h>
+#include <net/tls_prot.h>
#define TLS_PAGE_ORDER (min_t(unsigned int, PAGE_ALLOC_COSTLY_ORDER, \
TLS_MAX_PAYLOAD_SIZE >> PAGE_SHIFT))
@@ -86,10 +87,6 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
void update_sk_prot(struct sock *sk, struct tls_context *ctx);
int wait_on_pending_writer(struct sock *sk, long *timeo);
-int tls_sk_query(struct sock *sk, int optname, char __user *optval,
- int __user *optlen);
-int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
- unsigned int optlen);
void tls_err_abort(struct sock *sk, int err);
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
@@ -110,6 +107,8 @@ bool tls_sw_sock_is_readable(struct sock *sk);
ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
+int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t read_actor);
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
void tls_device_splice_eof(struct socket *sock);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 529101eb20bd..2392d06845aa 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -440,9 +440,13 @@ static int tls_push_data(struct sock *sk,
long timeo;
if (flags &
- ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SPLICE_PAGES))
+ ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_SPLICE_PAGES | MSG_EOR))
return -EOPNOTSUPP;
+ if ((flags & (MSG_MORE | MSG_EOR)) == (MSG_MORE | MSG_EOR))
+ return -EINVAL;
+
if (unlikely(sk->sk_err))
return -sk->sk_err;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 4a8ee2f6badb..f550c84f3408 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -959,10 +959,12 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE];
ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read;
ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll;
+ ops[TLS_BASE][TLS_SW ].read_sock = tls_sw_read_sock;
ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE];
ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read;
ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll;
+ ops[TLS_SW ][TLS_SW ].read_sock = tls_sw_read_sock;
#ifdef CONFIG_TLS_DEVICE
ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index f37f4a0fcd3c..ca1e0e198ceb 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -369,7 +369,6 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
static int tls_strp_read_copyin(struct tls_strparser *strp)
{
- struct socket *sock = strp->sk->sk_socket;
read_descriptor_t desc;
desc.arg.data = strp;
@@ -377,7 +376,7 @@ static int tls_strp_read_copyin(struct tls_strparser *strp)
desc.count = 1; /* give more than one skb per call */
/* sk should be locked here, so okay to do read_sock */
- sock->ops->read_sock(strp->sk, &desc, tls_strp_copyin);
+ tcp_read_sock(strp->sk, &desc, tls_strp_copyin);
return desc.error;
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 53f944e6d8ef..5c122d7bb784 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -984,6 +984,9 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
int ret = 0;
int pending;
+ if (!eor && (msg->msg_flags & MSG_EOR))
+ return -EINVAL;
+
if (unlikely(msg->msg_controllen)) {
ret = tls_process_cmsg(sk, msg, &record_type);
if (ret) {
@@ -1193,7 +1196,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
int ret;
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
- MSG_CMSG_COMPAT | MSG_SPLICE_PAGES |
+ MSG_CMSG_COMPAT | MSG_SPLICE_PAGES | MSG_EOR |
MSG_SENDPAGE_NOPOLICY))
return -EOPNOTSUPP;
@@ -1845,13 +1848,10 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
return sk_flush_backlog(sk);
}
-static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
- bool nonblock)
+static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx,
+ bool nonblock)
{
long timeo;
- int err;
-
- lock_sock(sk);
timeo = sock_rcvtimeo(sk, nonblock);
@@ -1865,26 +1865,30 @@ static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
!READ_ONCE(ctx->reader_present), &wait);
remove_wait_queue(&ctx->wq, &wait);
- if (timeo <= 0) {
- err = -EAGAIN;
- goto err_unlock;
- }
- if (signal_pending(current)) {
- err = sock_intr_errno(timeo);
- goto err_unlock;
- }
+ if (timeo <= 0)
+ return -EAGAIN;
+ if (signal_pending(current))
+ return sock_intr_errno(timeo);
}
WRITE_ONCE(ctx->reader_present, 1);
return 0;
+}
-err_unlock:
- release_sock(sk);
+static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
+ bool nonblock)
+{
+ int err;
+
+ lock_sock(sk);
+ err = tls_rx_reader_acquire(sk, ctx, nonblock);
+ if (err)
+ release_sock(sk);
return err;
}
-static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+static void tls_rx_reader_release(struct sock *sk, struct tls_sw_context_rx *ctx)
{
if (unlikely(ctx->reader_contended)) {
if (wq_has_sleeper(&ctx->wq))
@@ -1896,6 +1900,11 @@ static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
}
WRITE_ONCE(ctx->reader_present, 0);
+}
+
+static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+{
+ tls_rx_reader_release(sk, ctx);
release_sock(sk);
}
@@ -2193,6 +2202,102 @@ splice_requeue:
goto splice_read_end;
}
+int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t read_actor)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
+ struct strp_msg *rxm = NULL;
+ struct sk_buff *skb = NULL;
+ struct sk_psock *psock;
+ size_t flushed_at = 0;
+ bool released = true;
+ struct tls_msg *tlm;
+ ssize_t copied = 0;
+ ssize_t decrypted;
+ int err, used;
+
+ psock = sk_psock_get(sk);
+ if (psock) {
+ sk_psock_put(sk, psock);
+ return -EINVAL;
+ }
+ err = tls_rx_reader_acquire(sk, ctx, true);
+ if (err < 0)
+ return err;
+
+ /* If crypto failed the connection is broken */
+ err = ctx->async_wait.err;
+ if (err)
+ goto read_sock_end;
+
+ decrypted = 0;
+ do {
+ if (!skb_queue_empty(&ctx->rx_list)) {
+ skb = __skb_dequeue(&ctx->rx_list);
+ rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
+ } else {
+ struct tls_decrypt_arg darg;
+
+ err = tls_rx_rec_wait(sk, NULL, true, released);
+ if (err <= 0)
+ goto read_sock_end;
+
+ memset(&darg.inargs, 0, sizeof(darg.inargs));
+
+ err = tls_rx_one_record(sk, NULL, &darg);
+ if (err < 0) {
+ tls_err_abort(sk, -EBADMSG);
+ goto read_sock_end;
+ }
+
+ released = tls_read_flush_backlog(sk, prot, INT_MAX,
+ 0, decrypted,
+ &flushed_at);
+ skb = darg.skb;
+ rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
+ decrypted += rxm->full_len;
+
+ tls_rx_rec_done(ctx);
+ }
+
+ /* read_sock does not support reading control messages */
+ if (tlm->control != TLS_RECORD_TYPE_DATA) {
+ err = -EINVAL;
+ goto read_sock_requeue;
+ }
+
+ used = read_actor(desc, skb, rxm->offset, rxm->full_len);
+ if (used <= 0) {
+ if (!copied)
+ err = used;
+ goto read_sock_requeue;
+ }
+ copied += used;
+ if (used < rxm->full_len) {
+ rxm->offset += used;
+ rxm->full_len -= used;
+ if (!desc->count)
+ goto read_sock_requeue;
+ } else {
+ consume_skb(skb);
+ if (!desc->count)
+ skb = NULL;
+ }
+ } while (skb);
+
+read_sock_end:
+ tls_rx_reader_release(sk, ctx);
+ return copied ? : err;
+
+read_sock_requeue:
+ __skb_queue_head(&ctx->rx_list, skb);
+ goto read_sock_end;
+}
+
bool tls_sw_sock_is_readable(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
diff --git a/net/unix/scm.c b/net/unix/scm.c
index f9152881d77f..e9dde7176c8a 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -29,10 +29,11 @@ struct sock *unix_get_socket(struct file *filp)
/* Socket ? */
if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
struct socket *sock = SOCKET_I(inode);
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct sock *s = sock->sk;
/* PF_UNIX ? */
- if (s && sock->ops && sock->ops->family == PF_UNIX)
+ if (s && ops && ops->family == PF_UNIX)
u_sock = s;
} else {
/* Could be an io_uring instance */
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index b769fc258931..352d042b130b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -348,37 +348,34 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- size_t bytes, total = 0, off;
- struct sk_buff *skb, *tmp;
- int err = -EFAULT;
+ struct sk_buff *skb;
+ size_t total = 0;
+ int err;
spin_lock_bh(&vvs->rx_lock);
- skb_queue_walk_safe(&vvs->rx_queue, skb, tmp) {
- off = 0;
+ skb_queue_walk(&vvs->rx_queue, skb) {
+ size_t bytes;
- if (total == len)
- break;
+ bytes = len - total;
+ if (bytes > skb->len)
+ bytes = skb->len;
- while (total < len && off < skb->len) {
- bytes = len - total;
- if (bytes > skb->len - off)
- bytes = skb->len - off;
+ spin_unlock_bh(&vvs->rx_lock);
- /* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
- */
- spin_unlock_bh(&vvs->rx_lock);
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ err = memcpy_to_msg(msg, skb->data, bytes);
+ if (err)
+ goto out;
- err = memcpy_to_msg(msg, skb->data + off, bytes);
- if (err)
- goto out;
+ total += bytes;
- spin_lock_bh(&vvs->rx_lock);
+ spin_lock_bh(&vvs->rx_lock);
- total += bytes;
- off += bytes;
- }
+ if (total == len)
+ break;
}
spin_unlock_bh(&vvs->rx_lock);
@@ -463,6 +460,63 @@ out:
return err;
}
+static ssize_t
+virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
+ struct msghdr *msg)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct sk_buff *skb;
+ size_t total, len;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ if (!vvs->msg_count) {
+ spin_unlock_bh(&vvs->rx_lock);
+ return 0;
+ }
+
+ total = 0;
+ len = msg_data_left(msg);
+
+ skb_queue_walk(&vvs->rx_queue, skb) {
+ struct virtio_vsock_hdr *hdr;
+
+ if (total < len) {
+ size_t bytes;
+ int err;
+
+ bytes = len - total;
+ if (bytes > skb->len)
+ bytes = skb->len;
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ err = memcpy_to_msg(msg, skb->data, bytes);
+ if (err)
+ return err;
+
+ spin_lock_bh(&vvs->rx_lock);
+ }
+
+ total += skb->len;
+ hdr = virtio_vsock_hdr(skb);
+
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
+ msg->msg_flags |= MSG_EOR;
+
+ break;
+ }
+ }
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ return total;
+}
+
static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
int flags)
@@ -557,9 +611,9 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
int flags)
{
if (flags & MSG_PEEK)
- return -EOPNOTSUPP;
-
- return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
+ return virtio_transport_seqpacket_do_peek(vsk, msg);
+ else
+ return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
}
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index b7b072194282..dbda3ababa14 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -116,9 +116,6 @@ struct vmci_transport {
spinlock_t lock; /* protects sk. */
};
-int vmci_transport_register(void);
-void vmci_transport_unregister(void);
-
int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
struct sockaddr_vm *src);
int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 10ea85c03147..fcfc8472f73d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <net/xdp_sock_drv.h>
#include <net/busy_poll.h>
+#include <net/netdev_rx_queue.h>
#include <net/xdp.h>
#include "xsk_queue.h"
@@ -135,14 +136,14 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
return 0;
}
-static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
+ u32 flags)
{
- struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
u64 addr;
int err;
addr = xp_get_handle(xskb);
- err = xskq_prod_reserve_desc(xs->rx, addr, len);
+ err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
if (err) {
xs->rx_queue_full++;
return err;
@@ -152,48 +153,138 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return 0;
}
-static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
+static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- void *from_buf, *to_buf;
- u32 metalen;
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ u32 frags = xdp_buff_has_frags(xdp);
+ struct xdp_buff_xsk *pos, *tmp;
+ struct list_head *xskb_list;
+ u32 contd = 0;
+ int err;
- if (unlikely(xdp_data_meta_unsupported(from))) {
- from_buf = from->data;
- to_buf = to->data;
- metalen = 0;
- } else {
- from_buf = from->data_meta;
- metalen = from->data - from->data_meta;
- to_buf = to->data - metalen;
+ if (frags)
+ contd = XDP_PKT_CONTD;
+
+ err = __xsk_rcv_zc(xs, xskb, len, contd);
+ if (err || likely(!frags))
+ goto out;
+
+ xskb_list = &xskb->pool->xskb_list;
+ list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ if (list_is_singular(xskb_list))
+ contd = 0;
+ len = pos->xdp.data_end - pos->xdp.data;
+ err = __xsk_rcv_zc(xs, pos, len, contd);
+ if (err)
+ return err;
+ list_del(&pos->xskb_list_node);
}
- memcpy(to_buf, from_buf, len + metalen);
+out:
+ return err;
}
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static void *xsk_copy_xdp_start(struct xdp_buff *from)
{
+ if (unlikely(xdp_data_meta_unsupported(from)))
+ return from->data;
+ else
+ return from->data_meta;
+}
+
+static u32 xsk_copy_xdp(void *to, void **from, u32 to_len,
+ u32 *from_len, skb_frag_t **frag, u32 rem)
+{
+ u32 copied = 0;
+
+ while (1) {
+ u32 copy_len = min_t(u32, *from_len, to_len);
+
+ memcpy(to, *from, copy_len);
+ copied += copy_len;
+ if (rem == copied)
+ return copied;
+
+ if (*from_len == copy_len) {
+ *from = skb_frag_address(*frag);
+ *from_len = skb_frag_size((*frag)++);
+ } else {
+ *from += copy_len;
+ *from_len -= copy_len;
+ }
+ if (to_len == copy_len)
+ return copied;
+
+ to_len -= copy_len;
+ to += copy_len;
+ }
+}
+
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+{
+ u32 frame_size = xsk_pool_get_rx_frame_size(xs->pool);
+ void *copy_from = xsk_copy_xdp_start(xdp), *copy_to;
+ u32 from_len, meta_len, rem, num_desc;
+ struct xdp_buff_xsk *xskb;
struct xdp_buff *xsk_xdp;
- int err;
- u32 len;
+ skb_frag_t *frag;
- len = xdp->data_end - xdp->data;
- if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
- xs->rx_dropped++;
- return -ENOSPC;
+ from_len = xdp->data_end - copy_from;
+ meta_len = xdp->data - copy_from;
+ rem = len + meta_len;
+
+ if (len <= frame_size && !xdp_buff_has_frags(xdp)) {
+ int err;
+
+ xsk_xdp = xsk_buff_alloc(xs->pool);
+ if (!xsk_xdp) {
+ xs->rx_dropped++;
+ return -ENOMEM;
+ }
+ memcpy(xsk_xdp->data - meta_len, copy_from, rem);
+ xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+ err = __xsk_rcv_zc(xs, xskb, len, 0);
+ if (err) {
+ xsk_buff_free(xsk_xdp);
+ return err;
+ }
+
+ return 0;
}
- xsk_xdp = xsk_buff_alloc(xs->pool);
- if (!xsk_xdp) {
+ num_desc = (len - 1) / frame_size + 1;
+
+ if (!xsk_buff_can_alloc(xs->pool, num_desc)) {
xs->rx_dropped++;
return -ENOMEM;
}
+ if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
+ xs->rx_queue_full++;
+ return -ENOBUFS;
+ }
- xsk_copy_xdp(xsk_xdp, xdp, len);
- err = __xsk_rcv_zc(xs, xsk_xdp, len);
- if (err) {
- xsk_buff_free(xsk_xdp);
- return err;
+ if (xdp_buff_has_frags(xdp)) {
+ struct skb_shared_info *sinfo;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ frag = &sinfo->frags[0];
}
+
+ do {
+ u32 to_len = frame_size + meta_len;
+ u32 copied;
+
+ xsk_xdp = xsk_buff_alloc(xs->pool);
+ copy_to = xsk_xdp->data - meta_len;
+
+ copied = xsk_copy_xdp(copy_to, &copy_from, to_len, &from_len, &frag, rem);
+ rem -= copied;
+
+ xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+ __xsk_rcv_zc(xs, xskb, copied - meta_len, rem ? XDP_PKT_CONTD : 0);
+ meta_len = 0;
+ } while (rem);
+
return 0;
}
@@ -215,7 +306,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
return false;
}
-static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
if (!xsk_is_bound(xs))
return -ENXIO;
@@ -223,6 +314,11 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
+ if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
+ xs->rx_dropped++;
+ return -ENOSPC;
+ }
+
sk_mark_napi_id_once_xdp(&xs->sk, xdp);
return 0;
}
@@ -236,12 +332,13 @@ static void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp_get_buff_len(xdp);
int err;
spin_lock_bh(&xs->rx_lock);
- err = xsk_rcv_check(xs, xdp);
+ err = xsk_rcv_check(xs, xdp, len);
if (!err) {
- err = __xsk_rcv(xs, xdp);
+ err = __xsk_rcv(xs, xdp, len);
xsk_flush(xs);
}
spin_unlock_bh(&xs->rx_lock);
@@ -250,19 +347,19 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp_get_buff_len(xdp);
int err;
- u32 len;
- err = xsk_rcv_check(xs, xdp);
+ err = xsk_rcv_check(xs, xdp, len);
if (err)
return err;
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
len = xdp->data_end - xdp->data;
- return __xsk_rcv_zc(xs, xdp, len);
+ return xsk_rcv_zc(xs, xdp, len);
}
- err = __xsk_rcv(xs, xdp);
+ err = __xsk_rcv(xs, xdp, len);
if (!err)
xdp_return_buff(xdp);
return err;
@@ -321,7 +418,8 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
- xs->tx->queue_empty_descs++;
+ if (xskq_has_descs(xs->tx))
+ xskq_cons_release(xs->tx);
continue;
}
@@ -408,37 +506,91 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static void xsk_destruct_skb(struct sk_buff *skb)
+static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+
+ return ret;
+}
+
+static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
{
- u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
- struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags;
spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_submit_addr(xs->pool->cq, addr);
+ xskq_prod_submit_n(xs->pool->cq, n);
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+
+static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel_n(xs->pool->cq, n);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+
+static u32 xsk_get_num_desc(struct sk_buff *skb)
+{
+ return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
+}
+
+static void xsk_destruct_skb(struct sk_buff *skb)
+{
+ xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
sock_wfree(skb);
}
+static void xsk_set_destructor_arg(struct sk_buff *skb)
+{
+ long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
+
+ skb_shinfo(skb)->destructor_arg = (void *)num;
+}
+
+static void xsk_consume_skb(struct sk_buff *skb)
+{
+ struct xdp_sock *xs = xdp_sk(skb->sk);
+
+ skb->destructor = sock_wfree;
+ xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
+ /* Free skb without triggering the perf drop trace */
+ consume_skb(skb);
+ xs->skb = NULL;
+}
+
+static void xsk_drop_skb(struct sk_buff *skb)
+{
+ xdp_sk(skb->sk)->tx->invalid_descs += xsk_get_num_desc(skb);
+ xsk_consume_skb(skb);
+}
+
static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct xsk_buff_pool *pool = xs->pool;
u32 hr, len, ts, offset, copy, copied;
- struct sk_buff *skb;
+ struct sk_buff *skb = xs->skb;
struct page *page;
void *buffer;
int err, i;
u64 addr;
- hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
+ if (!skb) {
+ hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
- skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
- if (unlikely(!skb))
- return ERR_PTR(err);
+ skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
+ if (unlikely(!skb))
+ return ERR_PTR(err);
- skb_reserve(skb, hr);
+ skb_reserve(skb, hr);
+ }
addr = desc->addr;
len = desc->len;
@@ -448,7 +600,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
offset = offset_in_page(buffer);
addr = buffer - pool->addrs;
- for (copied = 0, i = 0; copied < len; i++) {
+ for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
+ if (unlikely(i >= MAX_SKB_FRAGS))
+ return ERR_PTR(-EFAULT);
+
page = pool->umem->pgs[addr >> PAGE_SHIFT];
get_page(page);
@@ -473,43 +628,77 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct net_device *dev = xs->dev;
- struct sk_buff *skb;
+ struct sk_buff *skb = xs->skb;
+ int err;
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
skb = xsk_build_skb_zerocopy(xs, desc);
- if (IS_ERR(skb))
- return skb;
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto free_err;
+ }
} else {
u32 hr, tr, len;
void *buffer;
- int err;
- hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
- tr = dev->needed_tailroom;
+ buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
len = desc->len;
- skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
- if (unlikely(!skb))
- return ERR_PTR(err);
+ if (!skb) {
+ hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
+ tr = dev->needed_tailroom;
+ skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
+ if (unlikely(!skb))
+ goto free_err;
- skb_reserve(skb, hr);
- skb_put(skb, len);
+ skb_reserve(skb, hr);
+ skb_put(skb, len);
- buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
- err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
- kfree_skb(skb);
- return ERR_PTR(err);
+ err = skb_store_bits(skb, 0, buffer, len);
+ if (unlikely(err))
+ goto free_err;
+ } else {
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ struct page *page;
+ u8 *vaddr;
+
+ if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
+ err = -EFAULT;
+ goto free_err;
+ }
+
+ page = alloc_page(xs->sk.sk_allocation);
+ if (unlikely(!page)) {
+ err = -EAGAIN;
+ goto free_err;
+ }
+
+ vaddr = kmap_local_page(page);
+ memcpy(vaddr, buffer, len);
+ kunmap_local(vaddr);
+
+ skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
}
}
skb->dev = dev;
skb->priority = xs->sk.sk_priority;
skb->mark = READ_ONCE(xs->sk.sk_mark);
- skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
skb->destructor = xsk_destruct_skb;
+ xsk_set_destructor_arg(skb);
return skb;
+
+free_err:
+ if (err == -EAGAIN) {
+ xsk_cq_cancel_locked(xs, 1);
+ } else {
+ xsk_set_destructor_arg(skb);
+ xsk_drop_skb(skb);
+ xskq_cons_release(xs->tx);
+ }
+
+ return ERR_PTR(err);
}
static int __xsk_generic_xmit(struct sock *sk)
@@ -519,7 +708,6 @@ static int __xsk_generic_xmit(struct sock *sk)
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
- unsigned long flags;
int err = 0;
mutex_lock(&xs->mutex);
@@ -544,47 +732,51 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- if (xskq_prod_reserve(xs->pool->cq)) {
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ if (xsk_cq_reserve_addr_locked(xs, desc.addr))
goto out;
- }
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
skb = xsk_build_skb(xs, &desc);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel(xs->pool->cq);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- goto out;
+ if (err == -EAGAIN)
+ goto out;
+ err = 0;
+ continue;
+ }
+
+ xskq_cons_release(xs->tx);
+
+ if (xp_mb_desc(&desc)) {
+ xs->skb = skb;
+ continue;
}
err = __dev_direct_xmit(skb, xs->queue_id);
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
- skb->destructor = sock_wfree;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel(xs->pool->cq);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- /* Free skb without triggering the perf drop trace */
- consume_skb(skb);
+ xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skb));
+ xsk_consume_skb(skb);
err = -EAGAIN;
goto out;
}
- xskq_cons_release(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP) {
/* SKB completed but not sent */
err = -EBUSY;
+ xs->skb = NULL;
goto out;
}
sent_frame = true;
+ xs->skb = NULL;
}
- xs->tx->queue_empty_descs++;
+ if (xskq_has_descs(xs->tx)) {
+ if (xs->skb)
+ xsk_drop_skb(xs->skb);
+ xskq_cons_release(xs->tx);
+ }
out:
if (sent_frame)
@@ -834,6 +1026,9 @@ static int xsk_release(struct socket *sock)
net = sock_net(sk);
+ if (xs->skb)
+ xsk_drop_skb(xs->skb);
+
mutex_lock(&net->xdp.lock);
sk_del_node_init_rcu(sk);
mutex_unlock(&net->xdp.lock);
@@ -897,7 +1092,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
flags = sxdp->sxdp_flags;
if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
- XDP_USE_NEED_WAKEUP))
+ XDP_USE_NEED_WAKEUP | XDP_USE_SG))
return -EINVAL;
bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
@@ -929,7 +1124,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct socket *sock;
if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
- (flags & XDP_USE_NEED_WAKEUP)) {
+ (flags & XDP_USE_NEED_WAKEUP) || (flags & XDP_USE_SG)) {
/* Cannot specify flags for shared sockets. */
err = -EINVAL;
goto out_unlock;
@@ -1029,6 +1224,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->dev = dev;
xs->zc = xs->umem->zc;
+ xs->sg = !!(flags & XDP_USE_SG);
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 26f6d304451e..b3f7b310811e 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -86,6 +86,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
pool->umem = umem;
pool->addrs = umem->addrs;
INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
spin_lock_init(&pool->cq_lock);
@@ -99,6 +100,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
INIT_LIST_HEAD(&xskb->free_list_node);
+ INIT_LIST_HEAD(&xskb->xskb_list_node);
if (pool->unaligned)
pool->free_heads[i] = xskb;
else
@@ -187,6 +189,11 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_pool;
}
+ if (netdev->xdp_zc_max_segs == 1 && (flags & XDP_USE_SG)) {
+ err = -EOPNOTSUPP;
+ goto err_unreg_pool;
+ }
+
bpf.command = XDP_SETUP_XSK_POOL;
bpf.xsk.pool = pool;
bpf.xsk.queue_id = queue_id;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 6d40a77fccbe..13354a1e4280 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -48,6 +48,11 @@ struct xsk_queue {
size_t ring_vmalloc_size;
};
+struct parsed_desc {
+ u32 mb;
+ u32 valid;
+};
+
/* The structure of the shared state of the rings are a simple
* circular buffer, as outlined in
* Documentation/core-api/circular-buffers.rst. For the Rx and
@@ -130,18 +135,26 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
return false;
}
+static inline bool xp_unused_options_set(u32 options)
+{
+ return options & ~XDP_PKT_CONTD;
+}
+
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
u64 offset = desc->addr & (pool->chunk_size - 1);
+ if (!desc->len)
+ return false;
+
if (offset + desc->len > pool->chunk_size)
return false;
if (desc->addr >= pool->addrs_cnt)
return false;
- if (desc->options)
+ if (xp_unused_options_set(desc->options))
return false;
return true;
}
@@ -151,6 +164,9 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
{
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ if (!desc->len)
+ return false;
+
if (desc->len > pool->chunk_size)
return false;
@@ -158,7 +174,7 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
return false;
- if (desc->options)
+ if (xp_unused_options_set(desc->options))
return false;
return true;
}
@@ -170,6 +186,11 @@ static inline bool xp_validate_desc(struct xsk_buff_pool *pool,
xp_aligned_validate_desc(pool, desc);
}
+static inline bool xskq_has_descs(struct xsk_queue *q)
+{
+ return q->cached_cons != q->cached_prod;
+}
+
static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
struct xdp_desc *d,
struct xsk_buff_pool *pool)
@@ -185,17 +206,15 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
struct xdp_desc *desc,
struct xsk_buff_pool *pool)
{
- while (q->cached_cons != q->cached_prod) {
+ if (q->cached_cons != q->cached_prod) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx = q->cached_cons & q->ring_mask;
*desc = ring->desc[idx];
- if (xskq_cons_is_valid_desc(q, desc, pool))
- return true;
-
- q->cached_cons++;
+ return xskq_cons_is_valid_desc(q, desc, pool);
}
+ q->queue_empty_descs++;
return false;
}
@@ -204,30 +223,52 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
q->cached_cons += cnt;
}
-static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
- u32 max)
+static inline void parse_desc(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ struct xdp_desc *desc, struct parsed_desc *parsed)
+{
+ parsed->valid = xskq_cons_is_valid_desc(q, desc, pool);
+ parsed->mb = xp_mb_desc(desc);
+}
+
+static inline
+u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ u32 max)
{
u32 cached_cons = q->cached_cons, nb_entries = 0;
struct xdp_desc *descs = pool->tx_descs;
+ u32 total_descs = 0, nr_frags = 0;
+ /* track first entry, if stumble upon *any* invalid descriptor, rewind
+ * current packet that consists of frags and stop the processing
+ */
while (cached_cons != q->cached_prod && nb_entries < max) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx = cached_cons & q->ring_mask;
+ struct parsed_desc parsed;
descs[nb_entries] = ring->desc[idx];
- if (unlikely(!xskq_cons_is_valid_desc(q, &descs[nb_entries], pool))) {
- /* Skip the entry */
- cached_cons++;
- continue;
+ cached_cons++;
+ parse_desc(q, pool, &descs[nb_entries], &parsed);
+ if (unlikely(!parsed.valid))
+ break;
+
+ if (likely(!parsed.mb)) {
+ total_descs += (nr_frags + 1);
+ nr_frags = 0;
+ } else {
+ nr_frags++;
+ if (nr_frags == pool->netdev->xdp_zc_max_segs) {
+ nr_frags = 0;
+ break;
+ }
}
-
nb_entries++;
- cached_cons++;
}
+ cached_cons -= nr_frags;
/* Release valid plus any invalid entries */
xskq_cons_release_n(q, cached_cons - q->cached_cons);
- return nb_entries;
+ return total_descs;
}
/* Functions for consumers */
@@ -292,6 +333,11 @@ static inline void xskq_cons_release(struct xsk_queue *q)
q->cached_cons++;
}
+static inline void xskq_cons_cancel_n(struct xsk_queue *q, u32 cnt)
+{
+ q->cached_cons -= cnt;
+}
+
static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
{
/* No barriers needed since data is not accessed */
@@ -319,9 +365,9 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true;
}
-static inline void xskq_prod_cancel(struct xsk_queue *q)
+static inline void xskq_prod_cancel_n(struct xsk_queue *q, u32 cnt)
{
- q->cached_prod--;
+ q->cached_prod -= cnt;
}
static inline int xskq_prod_reserve(struct xsk_queue *q)
@@ -360,7 +406,7 @@ static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_de
}
static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
- u64 addr, u32 len)
+ u64 addr, u32 len, u32 flags)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx;
@@ -372,6 +418,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
idx = q->cached_prod++ & q->ring_mask;
ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
+ ring->desc[idx].options = flags;
return 0;
}
@@ -386,16 +433,6 @@ static inline void xskq_prod_submit(struct xsk_queue *q)
__xskq_prod_submit(q, q->cached_prod);
}
-static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr)
-{
- struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- u32 idx = q->ring->producer;
-
- ring->desc[idx++ & q->ring_mask] = addr;
-
- __xskq_prod_submit(q, idx);
-}
-
static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
{
__xskq_prod_submit(q, q->ring->producer + nb_entries);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 533697e2488f..3784534c9185 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -247,12 +247,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
- /* We don't yet support UDP encapsulation and TFC padding. */
- if (x->encap || x->tfcpad) {
- NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
- return -EINVAL;
- }
-
if (xuo->flags &
~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) {
NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
@@ -260,6 +254,13 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
+
+ /* We don't yet support UDP encapsulation and TFC padding. */
+ if ((!is_packet_offload && x->encap) || x->tfcpad) {
+ NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
+ return -EINVAL;
+ }
+
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {