summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-04-28 22:01:50 +0200
committerJakub Kicinski <kuba@kernel.org>2022-04-28 22:02:01 +0200
commit0e55546b189fc5f1ce5149445d7df083f26d4f25 (patch)
tree0ed58feac725c81dd27a4533bea24c328898e877 /net
parentnet: make sure net_rx_action() calls skb_defer_free_flush() (diff)
parentMerge tag 'net-5.18-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/net... (diff)
downloadlinux-0e55546b189fc5f1ce5149445d7df083f26d4f25.tar.xz
linux-0e55546b189fc5f1ce5149445d7df083f26d4f25.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
include/linux/netdevice.h net/core/dev.c 6510ea973d8d ("net: Use this_cpu_inc() to increment net->core_stats") 794c24e9921f ("net-core: rx_otherhost_dropped to core_stats") https://lore.kernel.org/all/20220428111903.5f4304e0@canb.auug.org.au/ drivers/net/wan/cosa.c d48fea8401cf ("net: cosa: fix error check return value of register_chrdev()") 89fbca3307d4 ("net: wan: remove support for COSA and SRP synchronous serial boards") https://lore.kernel.org/all/20220428112130.1f689e5e@canb.auug.org.au/ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/hci_conn.c32
-rw-r--r--net/bluetooth/hci_event.c80
-rw-r--r--net/bluetooth/hci_sync.c11
-rw-r--r--net/bpf/test_run.c5
-rw-r--r--net/bridge/br_switchdev.c2
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/lwt_bpf.c7
-rw-r--r--net/dsa/port.c2
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/ipv4/ip_gre.c12
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c0
-rw-r--r--net/ipv4/syncookies.c8
-rw-r--r--net/ipv4/tcp_input.c15
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/tcp_rate.c11
-rw-r--r--net/ipv6/ip6_gre.c16
-rw-r--r--net/ipv6/netfilter.c10
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/mctp/device.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c21
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c6
-rw-r--r--net/netfilter/nft_socket.c52
-rw-r--r--net/sctp/sm_sideeffect.c4
-rw-r--r--net/smc/af_smc.c137
-rw-r--r--net/smc/smc.h29
-rw-r--r--net/smc/smc_close.c5
-rw-r--r--net/tls/tls_device.c12
-rw-r--r--net/xdp/xsk.c15
-rw-r--r--net/xdp/xsk_buff_pool.c16
32 files changed, 368 insertions, 170 deletions
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 84312c836549..fe803bee419a 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -670,7 +670,7 @@ static void le_conn_timeout(struct work_struct *work)
/* Disable LE Advertising */
le_disable_advertising(hdev);
hci_dev_lock(hdev);
- hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
hci_dev_unlock(hdev);
return;
}
@@ -873,7 +873,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
EXPORT_SYMBOL(hci_get_route);
/* This function requires the caller holds hdev->lock */
-void hci_le_conn_failed(struct hci_conn *conn, u8 status)
+static void hci_le_conn_failed(struct hci_conn *conn, u8 status)
{
struct hci_dev *hdev = conn->hdev;
struct hci_conn_params *params;
@@ -886,8 +886,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
params->conn = NULL;
}
- conn->state = BT_CLOSED;
-
/* If the status indicates successful cancellation of
* the attempt (i.e. Unknown Connection Id) there's no point of
* notifying failure since we'll go back to keep trying to
@@ -899,10 +897,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
mgmt_connect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, status);
- hci_connect_cfm(conn, status);
-
- hci_conn_del(conn);
-
/* Since we may have temporarily stopped the background scanning in
* favor of connection establishment, we should restart it.
*/
@@ -914,6 +908,28 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
hci_enable_advertising(hdev);
}
+/* This function requires the caller holds hdev->lock */
+void hci_conn_failed(struct hci_conn *conn, u8 status)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
+
+ switch (conn->type) {
+ case LE_LINK:
+ hci_le_conn_failed(conn, status);
+ break;
+ case ACL_LINK:
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
+ conn->dst_type, status);
+ break;
+ }
+
+ conn->state = BT_CLOSED;
+ hci_connect_cfm(conn, status);
+ hci_conn_del(conn);
+}
+
static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
struct hci_conn *conn = data;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index abaabfae19cc..66451661283c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2834,7 +2834,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -2859,7 +2859,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -3067,18 +3067,20 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
- if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle");
- return;
- }
-
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (ev->status)
+ goto unlock;
+
/* Connection may not exist if auto-connected. Check the bredr
* allowlist to see if this device is allowed to auto connect.
* If link is an ACL type, create a connection class
@@ -3122,8 +3124,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- if (!ev->status) {
+ if (!status) {
conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ goto done;
+ }
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
@@ -3164,19 +3172,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp),
&cp);
}
- } else {
- conn->state = BT_CLOSED;
- if (conn->type == ACL_LINK)
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, ev->status);
}
if (conn->type == ACL_LINK)
hci_sco_setup(conn, ev->status);
- if (ev->status) {
- hci_connect_cfm(conn, ev->status);
- hci_conn_del(conn);
+done:
+ if (status) {
+ hci_conn_failed(conn, status);
} else if (ev->link_type == SCO_LINK) {
switch (conn->setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_CVSD:
@@ -3185,7 +3188,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
break;
}
- hci_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, status);
}
unlock:
@@ -4676,6 +4679,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_sync_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
switch (ev->link_type) {
case SCO_LINK:
@@ -4690,12 +4694,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
return;
}
- if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle");
- return;
- }
-
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
@@ -4729,9 +4728,17 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- switch (ev->status) {
+ switch (status) {
case 0x00:
conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ conn->state = BT_CLOSED;
+ break;
+ }
+
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -4775,8 +4782,8 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
}
}
- hci_connect_cfm(conn, ev->status);
- if (ev->status)
+ hci_connect_cfm(conn, status);
+ if (status)
hci_conn_del(conn);
unlock:
@@ -5527,11 +5534,6 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
struct smp_irk *irk;
u8 addr_type;
- if (handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle");
- return;
- }
-
hci_dev_lock(hdev);
/* All controllers implicitly stop advertising in the event of a
@@ -5541,6 +5543,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn = hci_lookup_le_connect(hdev);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (status)
+ goto unlock;
+
conn = hci_conn_add(hdev, LE_LINK, bdaddr, role);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
@@ -5603,8 +5611,14 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL);
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle,
+ HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ }
+
if (status) {
- hci_le_conn_failed(conn, status);
+ hci_conn_failed(conn, status);
goto unlock;
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 8f4c5698913d..13600bf120b0 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4408,12 +4408,21 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
+ int err;
+
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
return hci_disconnect_sync(hdev, conn, reason);
case BT_CONNECT:
- return hci_connect_cancel_sync(hdev, conn);
+ err = hci_connect_cancel_sync(hdev, conn);
+ /* Cleanup hci_conn object if it cannot be cancelled as it
+ * likelly means the controller and host stack are out of sync.
+ */
+ if (err)
+ hci_conn_failed(conn, err);
+
+ return err;
case BT_CONNECT2:
return hci_reject_conn_sync(hdev, conn, reason);
default:
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 7a1579c91432..8d54fef9a568 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -108,6 +108,7 @@ struct xdp_test_data {
struct page_pool *pp;
struct xdp_frame **frames;
struct sk_buff **skbs;
+ struct xdp_mem_info mem;
u32 batch_size;
u32 frame_cnt;
};
@@ -147,7 +148,6 @@ static void xdp_test_run_init_page(struct page *page, void *arg)
static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx)
{
- struct xdp_mem_info mem = {};
struct page_pool *pp;
int err = -ENOMEM;
struct page_pool_params pp_params = {
@@ -174,7 +174,7 @@ static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_c
}
/* will copy 'mem.id' into pp->xdp_mem_id */
- err = xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pp);
+ err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp);
if (err)
goto err_mmodel;
@@ -202,6 +202,7 @@ err_skbs:
static void xdp_test_run_teardown(struct xdp_test_data *xdp)
{
+ xdp_unreg_mem_model(&xdp->mem);
page_pool_destroy(xdp->pp);
kfree(xdp->frames);
kfree(xdp->skbs);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 81400e0b26ac..8f3d76c751dd 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -354,6 +354,8 @@ static int br_switchdev_vlan_attr_replay(struct net_device *br_dev,
attr.orig_dev = br_dev;
vg = br_vlan_group(br);
+ if (!vg)
+ return 0;
list_for_each_entry(v, &vg->vlan_list, vlist) {
if (v->msti) {
diff --git a/net/core/dev.c b/net/core/dev.c
index e09cd202fc57..7a61690d3137 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10358,7 +10358,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
-struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
{
struct net_device_core_stats __percpu *p;
@@ -10369,11 +10369,7 @@ struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
free_percpu(p);
/* This READ_ONCE() pairs with the cmpxchg() above */
- p = READ_ONCE(dev->core_stats);
- if (!p)
- return NULL;
-
- return this_cpu_ptr(p);
+ return READ_ONCE(dev->core_stats);
}
EXPORT_SYMBOL(netdev_core_stats_alloc);
@@ -10410,10 +10406,10 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
for_each_possible_cpu(i) {
core_stats = per_cpu_ptr(p, i);
- storage->rx_dropped += local_read(&core_stats->rx_dropped);
- storage->tx_dropped += local_read(&core_stats->tx_dropped);
- storage->rx_nohandler += local_read(&core_stats->rx_nohandler);
- storage->rx_otherhost_dropped += local_read(&core_stats->rx_otherhost_dropped);
+ storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
+ storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
+ storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
+ storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
}
}
return storage;
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 349480ef68a5..8b6b5e72b217 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return dst->lwtstate->orig_output(net, sk, skb);
}
-static int xmit_check_hhlen(struct sk_buff *skb)
+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
{
- int hh_len = skb_dst(skb)->dev->hard_header_len;
-
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb)
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
if (bpf->xmit.prog) {
+ int hh_len = dst->dev->hard_header_len;
__be16 proto = skb->protocol;
int ret;
@@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb)
/* If the header was expanded, headroom might be too
* small for L2 header to come, expand as needed.
*/
- ret = xmit_check_hhlen(skb);
+ ret = xmit_check_hhlen(skb, hh_len);
if (unlikely(ret))
return ret;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 8856ed6afd05..48e5a309ca5c 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1652,8 +1652,10 @@ int dsa_port_link_register_of(struct dsa_port *dp)
if (ds->ops->phylink_mac_link_down)
ds->ops->phylink_mac_link_down(ds, port,
MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
+ of_node_put(phy_np);
return dsa_port_phylink_register(dp);
}
+ of_node_put(phy_np);
return 0;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 63da683d4660..5ee0aced9410 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -285,7 +285,7 @@ static void dsa_port_manage_cpu_flood(struct dsa_port *dp)
if (other_dp->slave->flags & IFF_ALLMULTI)
flags.val |= BR_MCAST_FLOOD;
if (other_dp->slave->flags & IFF_PROMISC)
- flags.val |= BR_FLOOD;
+ flags.val |= BR_FLOOD | BR_MCAST_FLOOD;
}
err = dsa_port_pre_bridge_flags(dp, flags, NULL);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 365caebf51ab..aacee9dd771b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
-
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ __be16 flags = tunnel->parms.o_flags;
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
- tunnel->parms.o_flags, proto, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ flags, proto, tunnel->parms.o_key,
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -504,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -581,7 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
}
gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(tunnel->o_seqno++));
+ proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ /dev/null
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2cb3b852d148..f33c31dd7366 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -281,6 +281,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
EXPORT_SYMBOL(cookie_ecn_ok);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ const struct tcp_request_sock_ops *af_ops,
struct sock *sk,
struct sk_buff *skb)
{
@@ -297,6 +298,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
return NULL;
treq = tcp_rsk(req);
+
+ /* treq->af_specific might be used to perform TCP_MD5 lookup */
+ treq->af_specific = af_ops;
+
treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
#if IS_ENABLED(CONFIG_MPTCP)
treq->is_mptcp = sk_is_mptcp(sk);
@@ -364,7 +369,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
+ &tcp_request_sock_ipv4_ops, sk, skb);
if (!req)
goto out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index daff631b9486..cc3de8dc5797 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3867,7 +3867,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_process_tlp_ack(sk, ack, flag);
if (tcp_ack_is_dubious(sk, flag)) {
- if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+ if (!(flag & (FLAG_SND_UNA_ADVANCED |
+ FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
num_dupack = 1;
/* Consider if pure acks were aggregated in tcp_add_backlog() */
if (!(flag & FLAG_DATA))
@@ -5450,7 +5451,17 @@ static void tcp_new_space(struct sock *sk)
INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
}
-static void tcp_check_space(struct sock *sk)
+/* Caller made space either from:
+ * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
+ * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
+ *
+ * We might be able to generate EPOLLOUT to the application if:
+ * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
+ * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
+ * small enough that tcp_stream_memory_free() decides it
+ * is time to generate EPOLLOUT.
+ */
+void tcp_check_space(struct sock *sk)
{
/* pairs with tcp_poll() */
smp_mb();
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6366df7aaf2a..6854bb1fb32b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -531,7 +531,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
+ if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c221f3bce975..5f91a9536e00 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -82,6 +82,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb));
+ tcp_check_space(sk);
}
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 617b8187c03d..a8f6d9d06f2e 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -74,27 +74,32 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
*
* If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is
* called multiple times. We favor the information from the most recently
- * sent skb, i.e., the skb with the highest prior_delivered count.
+ * sent skb, i.e., the skb with the most recently sent time and the highest
+ * sequence.
*/
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ u64 tx_tstamp;
if (!scb->tx.delivered_mstamp)
return;
+ tx_tstamp = tcp_skb_timestamp_us(skb);
if (!rs->prior_delivered ||
- after(scb->tx.delivered, rs->prior_delivered)) {
+ tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+ scb->end_seq, rs->last_end_seq)) {
rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
+ rs->last_end_seq = scb->end_seq;
/* Record send time of most recently ACKed packet: */
- tp->first_tx_mstamp = tcp_skb_timestamp_us(skb);
+ tp->first_tx_mstamp = tx_tstamp;
/* Find the duration of the "send phase" of this window: */
rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
scb->tx.first_tx_mstamp);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 976236736146..5136959b3dc5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -724,6 +724,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
{
struct ip6_tnl *tunnel = netdev_priv(dev);
__be16 protocol;
+ __be16 flags;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -739,7 +740,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
if (tunnel->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
- __be16 flags;
int tun_hlen;
tun_info = skb_tunnel_info_txcheck(skb);
@@ -766,19 +766,19 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
: 0);
} else {
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
-
if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
return -ENOMEM;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ flags = tunnel->parms.o_flags;
+
+ gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
+ : 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -1056,7 +1056,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
/* Push GRE header. */
proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
: htons(ETH_P_ERSPAN2);
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
+ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1da332450d98..8ce60ab89015 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,14 +24,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *sk = sk_to_full_sk(sk_partial);
+ struct net_device *dev = skb_dst(skb)->dev;
struct flow_keys flkeys;
unsigned int hh_len;
struct dst_entry *dst;
int strict = (ipv6_addr_type(&iph->daddr) &
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct flowi6 fl6 = {
- .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
- strict ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
@@ -39,6 +38,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
};
int err;
+ if (sk && sk->sk_bound_dev_if)
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ else if (strict)
+ fl6.flowi6_oif = dev->ifindex;
+ else
+ fl6.flowi6_oif = l3mdev_master_ifindex(dev);
+
fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
dst = ip6_route_output(net, sk, &fl6);
err = dst->error;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d1b61d00368e..9cc123f000fb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -170,7 +170,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, skb);
if (!req)
goto out;
diff --git a/net/mctp/device.c b/net/mctp/device.c
index f49be882e98e..99a3bda8852f 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -313,6 +313,7 @@ void mctp_dev_hold(struct mctp_dev *mdev)
void mctp_dev_put(struct mctp_dev *mdev)
{
if (mdev && refcount_dec_and_test(&mdev->refs)) {
+ kfree(mdev->addrs);
dev_put(mdev->dev);
kfree_rcu(mdev, rcu);
}
@@ -441,7 +442,6 @@ static void mctp_unregister(struct net_device *dev)
mctp_route_remove_dev(mdev);
mctp_neigh_remove_dev(mdev);
- kfree(mdev->addrs);
mctp_dev_put(mdev);
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 2c467c422dc6..fb67f1ca2495 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1495,7 +1495,7 @@ int __init ip_vs_conn_init(void)
pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
ip_vs_conn_tab_size,
- (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
+ (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
sizeof(struct ip_vs_conn));
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 8ec55cd72572..204a5cdff5b1 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -556,24 +556,14 @@ static bool tcp_in_window(struct nf_conn *ct,
}
}
- } else if (((state->state == TCP_CONNTRACK_SYN_SENT
- && dir == IP_CT_DIR_ORIGINAL)
- || (state->state == TCP_CONNTRACK_SYN_RECV
- && dir == IP_CT_DIR_REPLY))
- && after(end, sender->td_end)) {
+ } else if (tcph->syn &&
+ after(end, sender->td_end) &&
+ (state->state == TCP_CONNTRACK_SYN_SENT ||
+ state->state == TCP_CONNTRACK_SYN_RECV)) {
/*
* RFC 793: "if a TCP is reinitialized ... then it need
* not wait at all; it must only be sure to use sequence
* numbers larger than those recently used."
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, dataoff, tcph, sender);
- } else if (tcph->syn && dir == IP_CT_DIR_REPLY &&
- state->state == TCP_CONNTRACK_SYN_SENT) {
- /* Retransmitted syn-ack, or syn (simultaneous open).
*
* Re-init state for this direction, just like for the first
* syn(-ack) reply, it might differ in seq, ack or tcp options.
@@ -581,7 +571,8 @@ static bool tcp_in_window(struct nf_conn *ct,
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
end, win);
- if (!tcph->ack)
+
+ if (dir == IP_CT_DIR_REPLY && !tcph->ack)
return true;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 3e1afd10a9b6..55aa55b252b2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -823,7 +823,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
-#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
.procname = "nf_flowtable_udp_timeout",
.maxlen = sizeof(unsigned int),
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d600a566da32..7325bee7d144 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -349,7 +349,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*ext = &rbe->ext;
return -EEXIST;
} else {
- p = &parent->rb_left;
+ overlap = false;
+ if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
}
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 6d9e8e0a3a7d..05ae5a338b6f 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -54,6 +54,32 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo
}
#endif
+static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+{
+ const struct net_device *indev = nft_in(pkt);
+ const struct sk_buff *skb = pkt->skb;
+ struct sock *sk = NULL;
+
+ if (!indev)
+ return NULL;
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
+ break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case NFPROTO_IPV6:
+ sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return sk;
+}
+
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -67,20 +93,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
sk = NULL;
if (!sk)
- switch(nft_pf(pkt)) {
- case NFPROTO_IPV4:
- sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
- break;
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- case NFPROTO_IPV6:
- sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
- break;
-#endif
- default:
- WARN_ON_ONCE(1);
- regs->verdict.code = NFT_BREAK;
- return;
- }
+ sk = nft_socket_do_lookup(pkt);
if (!sk) {
regs->verdict.code = NFT_BREAK;
@@ -224,6 +237,16 @@ static bool nft_socket_reduce(struct nft_regs_track *track,
return nft_expr_reduce_bitwise(track, expr);
}
+static int nft_socket_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT));
+}
+
static struct nft_expr_type nft_socket_type;
static const struct nft_expr_ops nft_socket_ops = {
.type = &nft_socket_type,
@@ -231,6 +254,7 @@ static const struct nft_expr_ops nft_socket_ops = {
.eval = nft_socket_eval,
.init = nft_socket_init,
.dump = nft_socket_dump,
+ .validate = nft_socket_validate,
.reduce = nft_socket_reduce,
};
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b3815b568e8e..463c4a58d2c3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t)
goto out_unlock;
}
+ /* This happens when the response arrives after the timer is triggered. */
+ if (!asoc->strreset_chunk)
+ goto out_unlock;
+
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF),
asoc->state, asoc->ep, asoc,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index fc7b6eb22143..fce16b9d6e1a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -243,11 +243,27 @@ struct proto smc_proto6 = {
};
EXPORT_SYMBOL_GPL(smc_proto6);
+static void smc_fback_restore_callbacks(struct smc_sock *smc)
+{
+ struct sock *clcsk = smc->clcsock->sk;
+
+ write_lock_bh(&clcsk->sk_callback_lock);
+ clcsk->sk_user_data = NULL;
+
+ smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
+ smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
+ smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
+ smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
+
+ write_unlock_bh(&clcsk->sk_callback_lock);
+}
+
static void smc_restore_fallback_changes(struct smc_sock *smc)
{
if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
smc->clcsock->file->private_data = smc->sk.sk_socket;
smc->clcsock->file = NULL;
+ smc_fback_restore_callbacks(smc);
}
}
@@ -373,6 +389,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
mutex_init(&smc->clcsock_release_lock);
+ smc_init_saved_callbacks(smc);
return sk;
}
@@ -744,47 +761,73 @@ out:
static void smc_fback_state_change(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_state_change);
+ read_unlock_bh(&clcsk->sk_callback_lock);
}
static void smc_fback_data_ready(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_data_ready);
+ read_unlock_bh(&clcsk->sk_callback_lock);
}
static void smc_fback_write_space(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_write_space);
+ read_unlock_bh(&clcsk->sk_callback_lock);
}
static void smc_fback_error_report(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_error_report);
+ read_unlock_bh(&clcsk->sk_callback_lock);
+}
+
+static void smc_fback_replace_callbacks(struct smc_sock *smc)
+{
+ struct sock *clcsk = smc->clcsock->sk;
+
+ write_lock_bh(&clcsk->sk_callback_lock);
+ clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+
+ smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
+ &smc->clcsk_state_change);
+ smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
+ &smc->clcsk_data_ready);
+ smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
+ &smc->clcsk_write_space);
+ smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
+ &smc->clcsk_error_report);
+
+ write_unlock_bh(&clcsk->sk_callback_lock);
}
static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
{
- struct sock *clcsk;
int rc = 0;
mutex_lock(&smc->clcsock_release_lock);
@@ -792,10 +835,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
rc = -EBADF;
goto out;
}
- clcsk = smc->clcsock->sk;
- if (smc->use_fallback)
- goto out;
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
@@ -810,18 +850,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
* in smc sk->sk_wq and they should be woken up
* as clcsock's wait queue is woken up.
*/
- smc->clcsk_state_change = clcsk->sk_state_change;
- smc->clcsk_data_ready = clcsk->sk_data_ready;
- smc->clcsk_write_space = clcsk->sk_write_space;
- smc->clcsk_error_report = clcsk->sk_error_report;
-
- clcsk->sk_state_change = smc_fback_state_change;
- clcsk->sk_data_ready = smc_fback_data_ready;
- clcsk->sk_write_space = smc_fback_write_space;
- clcsk->sk_error_report = smc_fback_error_report;
-
- smc->clcsock->sk->sk_user_data =
- (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+ smc_fback_replace_callbacks(smc);
}
out:
mutex_unlock(&smc->clcsock_release_lock);
@@ -1475,6 +1504,8 @@ static void smc_connect_work(struct work_struct *work)
smc->sk.sk_state = SMC_CLOSED;
if (rc == -EPIPE || rc == -EAGAIN)
smc->sk.sk_err = EPIPE;
+ else if (rc == -ECONNREFUSED)
+ smc->sk.sk_err = ECONNREFUSED;
else if (signal_pending(current))
smc->sk.sk_err = -sock_intr_errno(timeo);
sock_put(&smc->sk); /* passive closing */
@@ -1594,6 +1625,19 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
* function; switch it back to the original sk_data_ready function
*/
new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
+
+ /* if new clcsock has also inherited the fallback-specific callback
+ * functions, switch them back to the original ones.
+ */
+ if (lsmc->use_fallback) {
+ if (lsmc->clcsk_state_change)
+ new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
+ if (lsmc->clcsk_write_space)
+ new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
+ if (lsmc->clcsk_error_report)
+ new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
+ }
+
(*new_smc)->clcsock = new_clcsock;
out:
return rc;
@@ -2353,17 +2397,20 @@ out:
static void smc_clcsock_data_ready(struct sock *listen_clcsock)
{
- struct smc_sock *lsmc =
- smc_clcsock_user_data(listen_clcsock);
+ struct smc_sock *lsmc;
+ read_lock_bh(&listen_clcsock->sk_callback_lock);
+ lsmc = smc_clcsock_user_data(listen_clcsock);
if (!lsmc)
- return;
+ goto out;
lsmc->clcsk_data_ready(listen_clcsock);
if (lsmc->sk.sk_state == SMC_LISTEN) {
sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
sock_put(&lsmc->sk);
}
+out:
+ read_unlock_bh(&listen_clcsock->sk_callback_lock);
}
static int smc_listen(struct socket *sock, int backlog)
@@ -2395,10 +2442,12 @@ static int smc_listen(struct socket *sock, int backlog)
/* save original sk_data_ready function and establish
* smc-specific sk_data_ready function
*/
- smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
- smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
smc->clcsock->sk->sk_user_data =
(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+ smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
+ smc_clcsock_data_ready, &smc->clcsk_data_ready);
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
/* save original ops */
smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
@@ -2413,7 +2462,11 @@ static int smc_listen(struct socket *sock, int backlog)
rc = kernel_listen(smc->clcsock, backlog);
if (rc) {
- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+ &smc->clcsk_data_ready);
+ smc->clcsock->sk->sk_user_data = NULL;
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
goto out;
}
sk->sk_max_ack_backlog = backlog;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index ea0620529ebe..5ed765ea0c73 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -288,12 +288,41 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
return (struct smc_sock *)sk;
}
+static inline void smc_init_saved_callbacks(struct smc_sock *smc)
+{
+ smc->clcsk_state_change = NULL;
+ smc->clcsk_data_ready = NULL;
+ smc->clcsk_write_space = NULL;
+ smc->clcsk_error_report = NULL;
+}
+
static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk)
{
return (struct smc_sock *)
((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
}
+/* save target_cb in saved_cb, and replace target_cb with new_cb */
+static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *),
+ void (*new_cb)(struct sock *),
+ void (**saved_cb)(struct sock *))
+{
+ /* only save once */
+ if (!*saved_cb)
+ *saved_cb = *target_cb;
+ *target_cb = new_cb;
+}
+
+/* restore target_cb to saved_cb, and reset saved_cb to NULL */
+static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *),
+ void (**saved_cb)(struct sock *))
+{
+ if (!*saved_cb)
+ return;
+ *target_cb = *saved_cb;
+ *saved_cb = NULL;
+}
+
extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
extern struct workqueue_struct *smc_close_wq; /* wq for close work */
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 676cb2333d3c..31db7438857c 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -214,8 +214,11 @@ again:
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk); /* wake up accept */
if (smc->clcsock && smc->clcsock->sk) {
- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+ &smc->clcsk_data_ready);
smc->clcsock->sk->sk_user_data = NULL;
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
}
smc_close_cleanup_listen(sk);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 5e4bd8ba48d3..b12f81a2b44c 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -483,11 +483,13 @@ handle_error:
copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
copy = min_t(size_t, copy, (max_open_record_len - record->len));
- rc = tls_device_copy_data(page_address(pfrag->page) +
- pfrag->offset, copy, msg_iter);
- if (rc)
- goto handle_error;
- tls_append_frag(record, pfrag, copy);
+ if (copy) {
+ rc = tls_device_copy_data(page_address(pfrag->page) +
+ pfrag->offset, copy, msg_iter);
+ if (rc)
+ goto handle_error;
+ tls_append_frag(record, pfrag, copy);
+ }
size -= copy;
if (!size) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 040c73345b7c..e0a4526ab66b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -639,7 +639,7 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
if (sk_can_busy_loop(sk))
sk_busy_loop(sk, 1); /* only support non-blocking sockets */
- if (xsk_no_wakeup(sk))
+ if (xs->zc && xsk_no_wakeup(sk))
return 0;
pool = xs->pool;
@@ -967,6 +967,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xp_get_pool(umem_xs->pool);
xs->pool = umem_xs->pool;
+
+ /* If underlying shared umem was created without Tx
+ * ring, allocate Tx descs array that Tx batching API
+ * utilizes
+ */
+ if (xs->tx && !xs->pool->tx_descs) {
+ err = xp_alloc_tx_descs(xs->pool, xs);
+ if (err) {
+ xp_put_pool(xs->pool);
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+ }
}
xdp_get_umem(umem_xs->umem);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index af040ffa14ff..87bdd71c7bb6 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -42,6 +42,16 @@ void xp_destroy(struct xsk_buff_pool *pool)
kvfree(pool);
}
+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs)
+{
+ pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs),
+ GFP_KERNEL);
+ if (!pool->tx_descs)
+ return -ENOMEM;
+
+ return 0;
+}
+
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
struct xdp_umem *umem)
{
@@ -59,11 +69,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
if (!pool->heads)
goto out;
- if (xs->tx) {
- pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL);
- if (!pool->tx_descs)
+ if (xs->tx)
+ if (xp_alloc_tx_descs(pool, xs))
goto out;
- }
pool->chunk_mask = ~((u64)umem->chunk_size - 1);
pool->addrs_cnt = umem->size;