diff options
Diffstat (limited to 'net')
285 files changed, 7684 insertions, 4814 deletions
diff --git a/net/Kconfig b/net/Kconfig index 7d39c1773eb4..2fb25b534df5 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -324,7 +324,7 @@ config CGROUP_NET_CLASSID config NET_RX_BUSY_POLL bool - default y if !PREEMPT_RT + default y if !PREEMPT_RT || (PREEMPT_RT && !NETCONSOLE) config BQL bool diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index a06f4d4a6f47..8978fb6212ff 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1929,7 +1929,6 @@ static const struct proto_ops atalk_dgram_ops = { .sendmsg = atalk_sendmsg, .recvmsg = atalk_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct notifier_block ddp_notifier = { diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 53e7d3f39e26..66d9a9bd5896 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -126,7 +126,6 @@ static const struct proto_ops pvc_proto_ops = { .sendmsg = vcc_sendmsg, .recvmsg = vcc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; diff --git a/net/atm/svc.c b/net/atm/svc.c index d83556d8beb9..36a814f1fbd1 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -654,7 +654,6 @@ static const struct proto_ops svc_proto_ops = { .sendmsg = vcc_sendmsg, .recvmsg = vcc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d8da400cb4de..5db805d5f74d 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -2022,7 +2022,6 @@ static const struct proto_ops ax25_proto_ops = { .sendmsg = ax25_sendmsg, .recvmsg = ax25_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; /* diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index e79e3a415ca9..2321bd2f9964 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -561,29 +561,6 @@ __bpf_kfunc int bpf_modify_return_test(int a, int *b) return a + *b; } -__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) -{ - return a + b + c + d; -} - -__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) -{ - return a + b; -} - -__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) -{ - return sk; -} - -long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) -{ - /* Provoke the compiler to assume that the caller has sign-extended a, - * b and c on platforms where this is required (e.g. s390x). - */ - return (long)a + (long)b + (long)c + d; -} - int noinline bpf_fentry_shadow_test(int a) { return a + 1; @@ -606,32 +583,6 @@ struct prog_test_ref_kfunc { refcount_t cnt; }; -static struct prog_test_ref_kfunc prog_test_struct = { - .a = 42, - .b = 108, - .next = &prog_test_struct, - .cnt = REFCOUNT_INIT(1), -}; - -__bpf_kfunc struct prog_test_ref_kfunc * -bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) -{ - refcount_inc(&prog_test_struct.cnt); - return &prog_test_struct; -} - -__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) -{ - WARN_ON_ONCE(1); -} - -__bpf_kfunc struct prog_test_member * -bpf_kfunc_call_memb_acquire(void) -{ - WARN_ON_ONCE(1); - return NULL; -} - __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { refcount_dec(&p->cnt); @@ -641,134 +592,6 @@ __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } -__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) -{ - WARN_ON_ONCE(1); -} - -static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) -{ - if (size > 2 * sizeof(int)) - return NULL; - - return (int *)p; -} - -__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, - const int rdwr_buf_size) -{ - return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); -} - -__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, - const int rdonly_buf_size) -{ - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); -} - -/* the next 2 ones can't be really used for testing expect to ensure - * that the verifier rejects the call. - * Acquire functions must return struct pointers, so these ones are - * failing. - */ -__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, - const int rdonly_buf_size) -{ - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); -} - -__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) -{ -} - -struct prog_test_pass1 { - int x0; - struct { - int x1; - struct { - int x2; - struct { - int x3; - }; - }; - }; -}; - -struct prog_test_pass2 { - int len; - short arr1[4]; - struct { - char arr2[4]; - unsigned long arr3[8]; - } x; -}; - -struct prog_test_fail1 { - void *p; - int x; -}; - -struct prog_test_fail2 { - int x8; - struct prog_test_pass1 x; -}; - -struct prog_test_fail3 { - int len; - char arr1[2]; - char arr2[]; -}; - -__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) -{ - /* p != NULL, but p->cnt could be 0 */ -} - -__bpf_kfunc void bpf_kfunc_call_test_destructive(void) -{ -} - -__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) -{ - return arg; -} - __diag_pop(); BTF_SET8_START(bpf_test_modify_return_ids) @@ -782,32 +605,8 @@ static const struct btf_kfunc_id_set bpf_test_modify_return_set = { }; BTF_SET8_START(test_sk_check_kfunc_ids) -BTF_ID_FLAGS(func, bpf_kfunc_call_test1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test3) -BTF_ID_FLAGS(func, bpf_kfunc_call_test4) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, @@ -1415,11 +1214,10 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, } frag = &sinfo->frags[sinfo->nr_frags++]; - __skb_frag_set_page(frag, page); data_len = min_t(u32, kattr->test.data_size_in - size, PAGE_SIZE); - skb_frag_size_set(frag, data_len); + skb_frag_fill_page_desc(frag, page, 0, data_len); if (copy_from_user(page_address(page), data_in + size, data_len)) { diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 422ec6e7ccff..97e129e3f31c 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -21,7 +21,7 @@ static void shutdown_umh(void) if (tgid) { kill_pid(tgid, SIGKILL, 1); wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); - bpfilter_umh_cleanup(info); + umd_cleanup_helper(info); } } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8eca8a5c80c6..9a5ea06236bd 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) u16 vid = 0; memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); + br_tc_skb_miss_set(skb, false); rcu_read_lock(); nf_ops = rcu_dereference(nf_br_ops); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 84d6dd5e5b1a..6116eba1bd89 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -203,6 +203,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, struct net_bridge_port *prev = NULL; struct net_bridge_port *p; + br_tc_skb_miss_set(skb, pkt_type != BR_PKT_BROADCAST); + list_for_each_entry_rcu(p, &br->port_list, list) { /* Do not flood unicast traffic to ports that turn it off, nor * other traffic if flood off, except for traffic we originate @@ -295,6 +297,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, allow_mode_include = false; } else { p = NULL; + br_tc_skb_miss_set(skb, true); } while (p || rp) { diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index fc17b9fd93e6..c34a0b0901b0 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -334,6 +334,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); + br_tc_skb_miss_set(skb, false); p = br_port_get_rcu(skb->dev); if (p->flags & BR_VLAN_TUNNEL) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2119729ded2b..a63b32c1638e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -15,6 +15,7 @@ #include <linux/u64_stats_sync.h> #include <net/route.h> #include <net/ip6_fib.h> +#include <net/pkt_cls.h> #include <linux/if_vlan.h> #include <linux/rhashtable.h> #include <linux/refcount.h> @@ -754,6 +755,32 @@ void br_boolopt_multi_get(const struct net_bridge *br, struct br_boolopt_multi *bm); void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on); +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss) +{ + struct tc_skb_ext *ext; + + if (!tc_skb_ext_tc_enabled()) + return; + + ext = skb_ext_find(skb, TC_SKB_EXT); + if (ext) { + ext->l2_miss = miss; + return; + } + if (!miss) + return; + ext = tc_skb_ext_alloc(skb); + if (!ext) + return; + ext->l2_miss = true; +} +#else +static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss) +{ +} +#endif + /* br_device.c */ void br_dev_setup(struct net_device *dev); void br_dev_delete(struct net_device *dev, struct list_head *list); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 4eebcc66c19a..9c82698da4f5 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -976,7 +976,6 @@ static const struct proto_ops caif_seqpacket_ops = { .sendmsg = caif_seqpkt_sendmsg, .recvmsg = caif_seqpkt_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static const struct proto_ops caif_stream_ops = { @@ -996,7 +995,6 @@ static const struct proto_ops caif_stream_ops = { .sendmsg = caif_stream_sendmsg, .recvmsg = caif_stream_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; /* This function is called when a socket is finally destroyed. */ diff --git a/net/can/bcm.c b/net/can/bcm.c index a962ec2b8ba5..9ba35685b043 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1703,7 +1703,6 @@ static const struct proto_ops bcm_ops = { .sendmsg = bcm_sendmsg, .recvmsg = bcm_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto bcm_proto __read_mostly = { diff --git a/net/can/isotp.c b/net/can/isotp.c index 84f9aba02901..99770ed28531 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1112,8 +1112,9 @@ wait_free_buffer: if (err) goto err_event_drop; - if (sk->sk_err) - return -sk->sk_err; + err = sock_error(sk); + if (err) + return err; } return size; @@ -1699,7 +1700,6 @@ static const struct proto_ops isotp_ops = { .sendmsg = isotp_sendmsg, .recvmsg = isotp_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto isotp_proto __read_mostly = { diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 35970c25496a..feaec4ad6d16 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -1306,7 +1306,6 @@ static const struct proto_ops j1939_ops = { .sendmsg = j1939_sk_sendmsg, .recvmsg = j1939_sk_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto j1939_proto __read_mostly = { diff --git a/net/can/raw.c b/net/can/raw.c index f64469b98260..15c79b079184 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -962,7 +962,6 @@ static const struct proto_ops raw_ops = { .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto raw_proto __read_mostly = { diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index d664cb1593a7..3d57bb48a2b4 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -75,18 +75,19 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, } /* - * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST + * @more: MSG_MORE or 0. */ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int more) { - ssize_t (*sendpage)(struct socket *sock, struct page *page, - int offset, size_t size, int flags); - int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more, + }; + struct bio_vec bvec; int ret; /* - * sendpage cannot properly handle pages with page_count == 0, + * MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0, * we need to fall back to sendmsg if that's the case. * * Same goes for slab pages: skb_can_coalesce() allows @@ -94,11 +95,12 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, * triggers one of hardened usercopy checks. */ if (sendpage_ok(page)) - sendpage = sock->ops->sendpage; - else - sendpage = sock_no_sendpage; + msg.msg_flags |= MSG_SPLICE_PAGES; + + bvec_set_page(&bvec, page, size, offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - ret = sendpage(sock, page, offset, size, flags); + ret = sock_sendmsg(sock, &msg); if (ret == -EAGAIN) ret = 0; @@ -464,7 +466,6 @@ static int write_partial_message_data(struct ceph_connection *con) struct ceph_msg *msg = con->out_msg; struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); - int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; u32 crc; dout("%s %p msg %p\n", __func__, con, msg); @@ -493,10 +494,8 @@ static int write_partial_message_data(struct ceph_connection *con) } page = ceph_msg_data_next(cursor, &page_offset, &length); - if (length == cursor->total_resid) - more = MSG_MORE; ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, - more); + MSG_MORE); if (ret <= 0) { if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); @@ -526,17 +525,14 @@ static int write_partial_message_data(struct ceph_connection *con) */ static int write_partial_skip(struct ceph_connection *con) { - int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; int ret; dout("%s %p %d left\n", __func__, con, con->v1.out_skip); while (con->v1.out_skip > 0) { size_t size = min(con->v1.out_skip, (int)PAGE_SIZE); - if (size == con->v1.out_skip) - more = MSG_MORE; ret = ceph_tcp_sendpage(con->sock, ceph_zero_page, 0, size, - more); + MSG_MORE); if (ret <= 0) goto out; con->v1.out_skip -= ret; diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 301a991dc6a6..1a888b86a494 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -155,7 +155,7 @@ static int do_try_sendpage(struct socket *sock, struct iov_iter *it) it->bvec->bv_offset + it->iov_offset); /* - * sendpage cannot properly handle pages with + * MSG_SPLICE_PAGES cannot properly handle pages with * page_count == 0, we need to fall back to sendmsg if * that's the case. * @@ -163,14 +163,13 @@ static int do_try_sendpage(struct socket *sock, struct iov_iter *it) * coalescing neighboring slab objects into a single frag * which triggers one of hardened usercopy checks. */ - if (sendpage_ok(bv.bv_page)) { - ret = sock->ops->sendpage(sock, bv.bv_page, - bv.bv_offset, bv.bv_len, - CEPH_MSG_FLAGS); - } else { - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len); - ret = sock_sendmsg(sock, &msg); - } + if (sendpage_ok(bv.bv_page)) + msg.msg_flags |= MSG_SPLICE_PAGES; + else + msg.msg_flags &= ~MSG_SPLICE_PAGES; + + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len); + ret = sock_sendmsg(sock, &msg); if (ret <= 0) { if (ret == -EAGAIN) ret = 0; @@ -185,7 +184,7 @@ static int do_try_sendpage(struct socket *sock, struct iov_iter *it) /* * Write as much as possible. The socket is expected to be corked, - * so we don't bother with MSG_MORE/MSG_SENDPAGE_NOTLAST here. + * so we don't bother with MSG_MORE here. * * Return: * 1 - done, nothing (else) to write diff --git a/net/core/Makefile b/net/core/Makefile index 8f367813bc68..731db2eaa610 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -13,7 +13,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o gro.o \ - netdev-genl.o netdev-genl-gen.o + netdev-genl.o netdev-genl-gen.o gso.o obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o diff --git a/net/core/dev.c b/net/core/dev.c index c29f3e1db3ca..69a3e544676c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -758,29 +758,43 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) } EXPORT_SYMBOL(dev_get_by_name_rcu); +/* Deprecated for new users, call netdev_get_by_name() instead */ +struct net_device *dev_get_by_name(struct net *net, const char *name) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, name); + dev_hold(dev); + rcu_read_unlock(); + return dev; +} +EXPORT_SYMBOL(dev_get_by_name); + /** - * dev_get_by_name - find a device by its name + * netdev_get_by_name() - find a device by its name * @net: the applicable net namespace * @name: name to find + * @tracker: tracking object for the acquired reference + * @gfp: allocation flags for the tracker * * Find an interface by name. This can be called from any * context and does its own locking. The returned handle has - * the usage count incremented and the caller must use dev_put() to + * the usage count incremented and the caller must use netdev_put() to * release it when it is no longer needed. %NULL is returned if no * matching device is found. */ - -struct net_device *dev_get_by_name(struct net *net, const char *name) +struct net_device *netdev_get_by_name(struct net *net, const char *name, + netdevice_tracker *tracker, gfp_t gfp) { struct net_device *dev; - rcu_read_lock(); - dev = dev_get_by_name_rcu(net, name); - dev_hold(dev); - rcu_read_unlock(); + dev = dev_get_by_name(net, name); + if (dev) + netdev_tracker_alloc(dev, tracker, gfp); return dev; } -EXPORT_SYMBOL(dev_get_by_name); +EXPORT_SYMBOL(netdev_get_by_name); /** * __dev_get_by_index - find a device by its ifindex @@ -831,29 +845,42 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) } EXPORT_SYMBOL(dev_get_by_index_rcu); +/* Deprecated for new users, call netdev_get_by_index() instead */ +struct net_device *dev_get_by_index(struct net *net, int ifindex) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); + dev_hold(dev); + rcu_read_unlock(); + return dev; +} +EXPORT_SYMBOL(dev_get_by_index); /** - * dev_get_by_index - find a device by its ifindex + * netdev_get_by_index() - find a device by its ifindex * @net: the applicable net namespace * @ifindex: index of device + * @tracker: tracking object for the acquired reference + * @gfp: allocation flags for the tracker * * Search for an interface by index. Returns NULL if the device * is not found or a pointer to the device. The device returned has * had a reference added and the pointer is safe until the user calls - * dev_put to indicate they have finished with it. + * netdev_put() to indicate they have finished with it. */ - -struct net_device *dev_get_by_index(struct net *net, int ifindex) +struct net_device *netdev_get_by_index(struct net *net, int ifindex, + netdevice_tracker *tracker, gfp_t gfp) { struct net_device *dev; - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, ifindex); - dev_hold(dev); - rcu_read_unlock(); + dev = dev_get_by_index(net, ifindex); + if (dev) + netdev_tracker_alloc(dev, tracker, gfp); return dev; } -EXPORT_SYMBOL(dev_get_by_index); +EXPORT_SYMBOL(netdev_get_by_index); /** * dev_get_by_napi_id - find a device by napi_id @@ -3209,7 +3236,7 @@ static u16 skb_tx_hash(const struct net_device *dev, return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; } -static void skb_warn_bad_offload(const struct sk_buff *skb) +void skb_warn_bad_offload(const struct sk_buff *skb) { static const netdev_features_t null_features; struct net_device *dev = skb->dev; @@ -3338,74 +3365,6 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) return vlan_get_protocol_and_depth(skb, type, depth); } -/* openvswitch calls this on rx path, so we need a different check. - */ -static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) -{ - if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL && - skb->ip_summed != CHECKSUM_UNNECESSARY; - - return skb->ip_summed == CHECKSUM_NONE; -} - -/** - * __skb_gso_segment - Perform segmentation on skb. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * @tx_path: whether it is called in TX path - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. - * - * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb. - */ -struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, bool tx_path) -{ - struct sk_buff *segs; - - if (unlikely(skb_needs_check(skb, tx_path))) { - int err; - - /* We're going to init ->check field in TCP or UDP header */ - err = skb_cow_head(skb, 0); - if (err < 0) - return ERR_PTR(err); - } - - /* Only report GSO partial support if it will enable us to - * support segmentation on this frame without needing additional - * work. - */ - if (features & NETIF_F_GSO_PARTIAL) { - netdev_features_t partial_features = NETIF_F_GSO_ROBUST; - struct net_device *dev = skb->dev; - - partial_features |= dev->features & dev->gso_partial_features; - if (!skb_gso_ok(skb, features | partial_features)) - features &= ~NETIF_F_GSO_PARTIAL; - } - - BUILD_BUG_ON(SKB_GSO_CB_OFFSET + - sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); - - SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); - SKB_GSO_CB(skb)->encap_level = 0; - - skb_reset_mac_header(skb); - skb_reset_mac_len(skb); - - segs = skb_mac_gso_segment(skb, features); - - if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) - skb_warn_bad_offload(skb); - - return segs; -} -EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG @@ -6199,7 +6158,8 @@ restart: if (!napi) goto out; - preempt_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); for (;;) { int work = 0; @@ -6241,7 +6201,8 @@ count: if (unlikely(need_resched())) { if (napi_poll) busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget); - preempt_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); rcu_read_unlock(); cond_resched(); if (loop_end(loop_end_arg, start_time)) @@ -6252,7 +6213,8 @@ count: } if (napi_poll) busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget); - preempt_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); out: rcu_read_unlock(); } @@ -8822,9 +8784,11 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack); if (err) return err; - err = ops->ndo_set_mac_address(dev, sa); - if (err) - return err; + if (memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) { + err = ops->ndo_set_mac_address(dev, sa); + if (err) + return err; + } dev->addr_assign_type = NET_ADDR_SET; call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); add_device_randomness(dev->dev_addr, dev->addr_len); @@ -10570,8 +10534,10 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev) { WARN_ON(dev->reg_state == NETREG_REGISTERED); - dev->gro_flush_timeout = 20000; - dev->napi_defer_hard_irqs = 1; + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { + dev->gro_flush_timeout = 20000; + dev->napi_defer_hard_irqs = 1; + } } EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on); @@ -10632,7 +10598,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; - ref_tracker_dir_init(&dev->refcnt_tracker, 128); + ref_tracker_dir_init(&dev->refcnt_tracker, 128, name); #ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) diff --git a/net/core/filter.c b/net/core/filter.c index d9ce04ca22ce..06ba0e56e369 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3948,20 +3948,21 @@ void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) { - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); u32 size = xdp->data_end - xdp->data; + struct skb_shared_info *sinfo; void *addr = xdp->data; int i; if (unlikely(offset > 0xffff || len > 0xffff)) return ERR_PTR(-EFAULT); - if (offset + len > xdp_get_buff_len(xdp)) + if (unlikely(offset + len > xdp_get_buff_len(xdp))) return ERR_PTR(-EINVAL); - if (offset < size) /* linear area */ + if (likely(offset < size)) /* linear area */ goto out; + sinfo = xdp_get_shared_info_from_buff(xdp); offset -= size; for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */ u32 frag_size = skb_frag_size(&sinfo->frags[i]); @@ -5803,6 +5804,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = fib_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5936,6 +5943,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib6_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = ipv6_stub->fib6_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -6008,7 +6021,7 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) @@ -6555,12 +6568,11 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, static struct sock * __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, - u64 flags) + u64 flags, int sdif) { struct sock *sk = NULL; struct net *net; u8 family; - int sdif; if (len == sizeof(tuple->ipv4)) family = AF_INET; @@ -6572,10 +6584,12 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX))) goto out; - if (family == AF_INET) - sdif = inet_sdif(skb); - else - sdif = inet6_sdif(skb); + if (sdif < 0) { + if (family == AF_INET) + sdif = inet_sdif(skb); + else + sdif = inet6_sdif(skb); + } if ((s32)netns_id < 0) { net = caller_net; @@ -6595,10 +6609,11 @@ out: static struct sock * __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, - u64 flags) + u64 flags, int sdif) { struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net, - ifindex, proto, netns_id, flags); + ifindex, proto, netns_id, flags, + sdif); if (sk) { struct sock *sk2 = sk_to_full_sk(sk); @@ -6638,7 +6653,7 @@ bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, } return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto, - netns_id, flags); + netns_id, flags, -1); } static struct sock * @@ -6727,6 +6742,78 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + struct net_device *dev = skb->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); + + return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags, sdif); +} + +static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = { + .func = bpf_tc_skc_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + struct net_device *dev = skb->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); + + return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags, sdif); +} + +static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = { + .func = bpf_tc_sk_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + struct net_device *dev = skb->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); + + return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net, + ifindex, IPPROTO_UDP, netns_id, + flags, sdif); +} + +static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = { + .func = bpf_tc_sk_lookup_udp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_sk_release, struct sock *, sk) { if (sk && sk_is_refcounted(sk)) @@ -6744,12 +6831,13 @@ static const struct bpf_func_proto bpf_sk_release_proto = { BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) { - struct net *caller_net = dev_net(ctx->rxq->dev); - int ifindex = ctx->rxq->dev->ifindex; + struct net_device *dev = ctx->rxq->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, IPPROTO_UDP, netns_id, - flags); + flags, sdif); } static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { @@ -6767,12 +6855,13 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) { - struct net *caller_net = dev_net(ctx->rxq->dev); - int ifindex = ctx->rxq->dev->ifindex; + struct net_device *dev = ctx->rxq->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net, ifindex, IPPROTO_TCP, netns_id, - flags); + flags, sdif); } static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { @@ -6790,12 +6879,13 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) { - struct net *caller_net = dev_net(ctx->rxq->dev); - int ifindex = ctx->rxq->dev->ifindex; + struct net_device *dev = ctx->rxq->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, IPPROTO_TCP, netns_id, - flags); + flags, sdif); } static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { @@ -6815,7 +6905,8 @@ BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, { return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, - IPPROTO_TCP, netns_id, flags); + IPPROTO_TCP, netns_id, flags, + -1); } static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { @@ -6834,7 +6925,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx, { return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, IPPROTO_TCP, - netns_id, flags); + netns_id, flags, -1); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { @@ -6853,7 +6944,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx, { return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, IPPROTO_UDP, - netns_id, flags); + netns_id, flags, -1); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { @@ -6916,6 +7007,8 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, FIELD)); \ } while (0) + BTF_TYPE_EMIT(struct bpf_tcp_sock); + switch (si->off) { case offsetof(struct bpf_tcp_sock, rtt_min): BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) != @@ -7980,9 +8073,9 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: - return &bpf_sk_lookup_tcp_proto; + return &bpf_tc_sk_lookup_tcp_proto; case BPF_FUNC_sk_lookup_udp: - return &bpf_sk_lookup_udp_proto; + return &bpf_tc_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; case BPF_FUNC_tcp_sock: @@ -7990,7 +8083,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_listener_sock: return &bpf_get_listener_sock_proto; case BPF_FUNC_skc_lookup_tcp: - return &bpf_skc_lookup_tcp_proto; + return &bpf_tc_skc_lookup_tcp_proto; case BPF_FUNC_tcp_check_syncookie: return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_skb_ecn_set_ce: @@ -11721,3 +11814,66 @@ static int __init bpf_kfunc_init(void) return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); } late_initcall(bpf_kfunc_init); + +/* Disables missing prototype warnings */ +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code. + * + * The function expects a non-NULL pointer to a socket, and invokes the + * protocol specific socket destroy handlers. + * + * The helper can only be called from BPF contexts that have acquired the socket + * locks. + * + * Parameters: + * @sock: Pointer to socket to be destroyed + * + * Return: + * On error, may return EPROTONOSUPPORT, EINVAL. + * EPROTONOSUPPORT if protocol specific destroy handler is not supported. + * 0 otherwise + */ +__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) +{ + struct sock *sk = (struct sock *)sock; + + /* The locking semantics that allow for synchronous execution of the + * destroy handlers are only supported for TCP and UDP. + * Supporting protocols will need to acquire sock lock in the BPF context + * prior to invoking this kfunc. + */ + if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP && + sk->sk_protocol != IPPROTO_UDP)) + return -EOPNOTSUPP; + + return sk->sk_prot->diag_destroy(sk, ECONNABORTED); +} + +__diag_pop() + +BTF_SET8_START(bpf_sk_iter_kfunc_ids) +BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) +BTF_SET8_END(bpf_sk_iter_kfunc_ids) + +static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) +{ + if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) && + prog->expected_attach_type != BPF_TRACE_ITER) + return -EACCES; + return 0; +} + +static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_sk_iter_kfunc_ids, + .filter = tracing_iter_filter, +}; + +static int init_subsystem(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set); +} +late_initcall(init_subsystem); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 25fb0bbc310f..85a2d0d9bd39 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -27,6 +27,7 @@ #include <linux/tcp.h> #include <linux/ptp_classify.h> #include <net/flow_dissector.h> +#include <net/pkt_cls.h> #include <scsi/fc/fc_fcoe.h> #include <uapi/linux/batadv_packet.h> #include <linux/bpf.h> @@ -241,6 +242,15 @@ void skb_flow_dissect_meta(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_META, target_container); meta->ingress_ifindex = skb->skb_iif; +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (tc_skb_ext_tc_enabled()) { + struct tc_skb_ext *ext; + + ext = skb_ext_find(skb, TC_SKB_EXT); + if (ext) + meta->l2_miss = ext->l2_miss; + } +#endif } EXPORT_SYMBOL(skb_flow_dissect_meta); @@ -548,6 +558,30 @@ __skb_flow_dissect_arp(const struct sk_buff *skb, } static enum flow_dissect_ret +__skb_flow_dissect_cfm(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, const void *data, + int nhoff, int hlen) +{ + struct flow_dissector_key_cfm *key, *hdr, _hdr; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CFM)) + return FLOW_DISSECT_RET_OUT_GOOD; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(*key), data, hlen, &_hdr); + if (!hdr) + return FLOW_DISSECT_RET_OUT_BAD; + + key = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CFM, + target_container); + + key->mdl_ver = hdr->mdl_ver; + key->opcode = hdr->opcode; + + return FLOW_DISSECT_RET_OUT_GOOD; +} + +static enum flow_dissect_ret __skb_flow_dissect_gre(const struct sk_buff *skb, struct flow_dissector_key_control *key_control, struct flow_dissector *flow_dissector, @@ -1390,6 +1424,12 @@ proto_again: break; } + case htons(ETH_P_CFM): + fdret = __skb_flow_dissect_cfm(skb, flow_dissector, + target_container, data, + nhoff, hlen); + break; + default: fdret = FLOW_DISSECT_RET_OUT_BAD; break; diff --git a/net/core/gro.c b/net/core/gro.c index 2d84165cb4f1..0759277dc14e 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -10,7 +10,7 @@ #define GRO_MAX_HEAD (MAX_HEADER + 128) static DEFINE_SPINLOCK(offload_lock); -static struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base); +struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base); /* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ int gro_normal_batch __read_mostly = 8; @@ -92,63 +92,6 @@ void dev_remove_offload(struct packet_offload *po) } EXPORT_SYMBOL(dev_remove_offload); -/** - * skb_eth_gso_segment - segmentation handler for ethernet protocols. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * @type: Ethernet Protocol ID - */ -struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, - netdev_features_t features, __be16 type) -{ - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &offload_base, list) { - if (ptype->type == type && ptype->callbacks.gso_segment) { - segs = ptype->callbacks.gso_segment(skb, features); - break; - } - } - rcu_read_unlock(); - - return segs; -} -EXPORT_SYMBOL(skb_eth_gso_segment); - -/** - * skb_mac_gso_segment - mac layer segmentation handler. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - */ -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; - int vlan_depth = skb->mac_len; - __be16 type = skb_network_protocol(skb, &vlan_depth); - - if (unlikely(!type)) - return ERR_PTR(-EINVAL); - - __skb_pull(skb, vlan_depth); - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &offload_base, list) { - if (ptype->type == type && ptype->callbacks.gso_segment) { - segs = ptype->callbacks.gso_segment(skb, features); - break; - } - } - rcu_read_unlock(); - - __skb_push(skb, skb->data - skb_mac_header(skb)); - - return segs; -} -EXPORT_SYMBOL(skb_mac_gso_segment); int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) { @@ -239,9 +182,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; - __skb_frag_set_page(frag, page); - skb_frag_off_set(frag, first_offset); - skb_frag_size_set(frag, first_size); + skb_frag_fill_page_desc(frag, page, first_offset, first_size); memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); /* We dont need to clear skbinfo->nr_frags here */ @@ -363,6 +304,24 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old) } EXPORT_SYMBOL(napi_gro_flush); +static unsigned long gro_list_prepare_tc_ext(const struct sk_buff *skb, + const struct sk_buff *p, + unsigned long diffs) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *skb_ext; + struct tc_skb_ext *p_ext; + + skb_ext = skb_ext_find(skb, TC_SKB_EXT); + p_ext = skb_ext_find(p, TC_SKB_EXT); + + diffs |= (!!p_ext) ^ (!!skb_ext); + if (!diffs && unlikely(skb_ext)) + diffs |= p_ext->chain ^ skb_ext->chain; +#endif + return diffs; +} + static void gro_list_prepare(const struct list_head *head, const struct sk_buff *skb) { @@ -397,23 +356,11 @@ static void gro_list_prepare(const struct list_head *head, * avoid trying too hard to skip each of them individually */ if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) { -#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - struct tc_skb_ext *skb_ext; - struct tc_skb_ext *p_ext; -#endif - diffs |= p->sk != skb->sk; diffs |= skb_metadata_dst_cmp(p, skb); diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); -#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - skb_ext = skb_ext_find(skb, TC_SKB_EXT); - p_ext = skb_ext_find(p, TC_SKB_EXT); - - diffs |= (!!p_ext) ^ (!!skb_ext); - if (!diffs && unlikely(skb_ext)) - diffs |= p_ext->chain ^ skb_ext->chain; -#endif + diffs |= gro_list_prepare_tc_ext(skb, p, diffs); } NAPI_GRO_CB(p)->same_flow = !diffs; @@ -460,6 +407,14 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) } } +static void gro_try_pull_from_frag0(struct sk_buff *skb) +{ + int grow = skb_gro_offset(skb) - skb_headlen(skb); + + if (grow > 0) + gro_pull_from_frag0(skb, grow); +} + static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) { struct sk_buff *oldest; @@ -489,7 +444,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff struct sk_buff *pp = NULL; enum gro_result ret; int same_flow; - int grow; if (netif_elide_gro(skb->dev)) goto normal; @@ -564,17 +518,14 @@ found_ptype: else gro_list->count++; + /* Must be called before setting NAPI_GRO_CB(skb)->{age|last} */ + gro_try_pull_from_frag0(skb); NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->last = skb; if (!skb_is_gso(skb)) skb_shinfo(skb)->gso_size = skb_gro_len(skb); list_add(&skb->list, &gro_list->list); ret = GRO_HELD; - -pull: - grow = skb_gro_offset(skb) - skb_headlen(skb); - if (grow > 0) - gro_pull_from_frag0(skb, grow); ok: if (gro_list->count) { if (!test_bit(bucket, &napi->gro_bitmask)) @@ -587,7 +538,8 @@ ok: normal: ret = GRO_NORMAL; - goto pull; + gro_try_pull_from_frag0(skb); + goto ok; } struct packet_offload *gro_find_receive_by_type(__be16 type) diff --git a/net/core/gso.c b/net/core/gso.c new file mode 100644 index 000000000000..9e1803bfc9c6 --- /dev/null +++ b/net/core/gso.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/skbuff.h> +#include <linux/sctp.h> +#include <net/gso.h> +#include <net/gro.h> + +/** + * skb_eth_gso_segment - segmentation handler for ethernet protocols. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @type: Ethernet Protocol ID + */ +struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, + netdev_features_t features, __be16 type) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->callbacks.gso_segment) { + segs = ptype->callbacks.gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + return segs; +} +EXPORT_SYMBOL(skb_eth_gso_segment); + +/** + * skb_mac_gso_segment - mac layer segmentation handler. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + */ +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + int vlan_depth = skb->mac_len; + __be16 type = skb_network_protocol(skb, &vlan_depth); + + if (unlikely(!type)) + return ERR_PTR(-EINVAL); + + __skb_pull(skb, vlan_depth); + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->callbacks.gso_segment) { + segs = ptype->callbacks.gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + __skb_push(skb, skb->data - skb_mac_header(skb)); + + return segs; +} +EXPORT_SYMBOL(skb_mac_gso_segment); +/* openvswitch calls this on rx path, so we need a different check. + */ +static bool skb_needs_check(const struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL && + skb->ip_summed != CHECKSUM_UNNECESSARY; + + return skb->ip_summed == CHECKSUM_NONE; +} + +/** + * __skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + * + * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb. + */ +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) +{ + struct sk_buff *segs; + + if (unlikely(skb_needs_check(skb, tx_path))) { + int err; + + /* We're going to init ->check field in TCP or UDP header */ + err = skb_cow_head(skb, 0); + if (err < 0) + return ERR_PTR(err); + } + + /* Only report GSO partial support if it will enable us to + * support segmentation on this frame without needing additional + * work. + */ + if (features & NETIF_F_GSO_PARTIAL) { + netdev_features_t partial_features = NETIF_F_GSO_ROBUST; + struct net_device *dev = skb->dev; + + partial_features |= dev->features & dev->gso_partial_features; + if (!skb_gso_ok(skb, features | partial_features)) + features &= ~NETIF_F_GSO_PARTIAL; + } + + BUILD_BUG_ON(SKB_GSO_CB_OFFSET + + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); + + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); + SKB_GSO_CB(skb)->encap_level = 0; + + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + segs = skb_mac_gso_segment(skb, features); + + if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) + skb_warn_bad_offload(skb); + + return segs; +} +EXPORT_SYMBOL(__skb_gso_segment); + +/** + * skb_gso_transport_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_transport_seglen is used to determine the real size of the + * individual segments, including Layer4 headers (TCP/UDP). + * + * The MAC/L2 or network (IP, IPv6) headers are not accounted for. + */ +static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int thlen = 0; + + if (skb->encapsulation) { + thlen = skb_inner_transport_header(skb) - + skb_transport_header(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + thlen += inner_tcp_hdrlen(skb); + } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + thlen = tcp_hdrlen(skb); + } else if (unlikely(skb_is_gso_sctp(skb))) { + thlen = sizeof(struct sctphdr); + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { + thlen = sizeof(struct udphdr); + } + /* UFO sets gso_size to the size of the fragmentation + * payload, i.e. the size of the L4 (UDP) header is already + * accounted for. + */ + return thlen + shinfo->gso_size; +} + +/** + * skb_gso_network_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_network_seglen is used to determine the real size of the + * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). + * + * The MAC/L2 header is not accounted for. + */ +static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - + skb_network_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} + +/** + * skb_gso_mac_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_mac_seglen is used to determine the real size of the + * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 + * headers (TCP/UDP). + */ +static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} + +/** + * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS + * + * There are a couple of instances where we have a GSO skb, and we + * want to determine what size it would be after it is segmented. + * + * We might want to check: + * - L3+L4+payload size (e.g. IP forwarding) + * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) + * + * This is a helper to do that correctly considering GSO_BY_FRAGS. + * + * @skb: GSO skb + * + * @seg_len: The segmented length (from skb_gso_*_seglen). In the + * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. + * + * @max_len: The maximum permissible length. + * + * Returns true if the segmented length <= max length. + */ +static inline bool skb_gso_size_check(const struct sk_buff *skb, + unsigned int seg_len, + unsigned int max_len) { + const struct skb_shared_info *shinfo = skb_shinfo(skb); + const struct sk_buff *iter; + + if (shinfo->gso_size != GSO_BY_FRAGS) + return seg_len <= max_len; + + /* Undo this so we can re-use header sizes */ + seg_len -= GSO_BY_FRAGS; + + skb_walk_frags(skb, iter) { + if (seg_len + skb_headlen(iter) > max_len) + return false; + } + + return true; +} + +/** + * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? + * + * @skb: GSO skb + * @mtu: MTU to validate against + * + * skb_gso_validate_network_len validates if a given skb will fit a + * wanted MTU once split. It considers L3 headers, L4 headers, and the + * payload. + */ +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) +{ + return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); +} +EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); + +/** + * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? + * + * @skb: GSO skb + * @len: length to validate against + * + * skb_gso_validate_mac_len validates if a given skb will fit a wanted + * length once split, including L2, L3 and L4 headers and the payload. + */ +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) +{ + return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); +} +EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); + diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 3e3598cd49f2..f4183c4c1ec8 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -308,7 +308,7 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_id); /* init code that must occur even if setup_net() is not called. */ static __net_init void preinit_net(struct net *net) { - ref_tracker_dir_init(&net->notrefcnt_tracker, 128); + ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt"); } /* @@ -322,7 +322,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) LIST_HEAD(net_exit_list); refcount_set(&net->ns.count, 1); - ref_tracker_dir_init(&net->refcnt_tracker, 128); + ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index de17ca2f7dbf..ea9231378aa6 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -8,7 +8,7 @@ #include "netdev-genl-gen.h" -#include <linux/netdev.h> +#include <uapi/linux/netdev.h> /* NETDEV_CMD_DEV_GET - do */ static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 74d74fc23167..7b370c073e7d 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -9,7 +9,7 @@ #include <net/netlink.h> #include <net/genetlink.h> -#include <linux/netdev.h> +#include <uapi/linux/netdev.h> int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e6a739b1afa9..543007f159f9 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -690,7 +690,7 @@ int netpoll_setup(struct netpoll *np) err = -ENODEV; goto unlock; } - dev_hold(ndev); + netdev_hold(ndev, &np->dev_tracker, GFP_KERNEL); if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); @@ -783,12 +783,11 @@ put_noaddr: err = __netpoll_setup(np, ndev); if (err) goto put; - netdev_tracker_alloc(ndev, &np->dev_tracker, GFP_KERNEL); rtnl_unlock(); return 0; put: - dev_put(ndev); + netdev_put(ndev, &np->dev_tracker); unlock: rtnl_unlock(); return err; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 760238196db1..f56b8d697014 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2785,14 +2785,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, break; } get_page(pkt_dev->page); - skb_frag_set_page(skb, i, pkt_dev->page); - skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0); + /*last fragment, fill rest of data*/ if (i == (frags - 1)) - skb_frag_size_set(&skb_shinfo(skb)->frags[i], - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE)); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i], + pkt_dev->page, 0, + (datalen < PAGE_SIZE ? + datalen : PAGE_SIZE)); else - skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i], + pkt_dev->page, 0, frag_len); + datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]); skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]); skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 41de3a2f29e1..3ad4e030846d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -961,24 +961,27 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + - nla_total_size(0) + /* nest IFLA_VF_STATS */ - /* IFLA_VF_STATS_RX_PACKETS */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_TX_PACKETS */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_RX_BYTES */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_TX_BYTES */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_BROADCAST */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_MULTICAST */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_RX_DROPPED */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_TX_DROPPED */ - nla_total_size_64bit(sizeof(__u64)) + nla_total_size(sizeof(struct ifla_vf_trust))); + if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) { + size += num_vfs * + (nla_total_size(0) + /* nest IFLA_VF_STATS */ + /* IFLA_VF_STATS_RX_PACKETS */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_PACKETS */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_RX_BYTES */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_BYTES */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_BROADCAST */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_MULTICAST */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_RX_DROPPED */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_DROPPED */ + nla_total_size_64bit(sizeof(__u64))); + } return size; } else return 0; @@ -1270,7 +1273,8 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb, static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct net_device *dev, int vfs_num, - struct nlattr *vfinfo) + struct nlattr *vfinfo, + u32 ext_filter_mask) { struct ifla_vf_rss_query_en vf_rss_query_en; struct nlattr *vf, *vfstats, *vfvlanlist; @@ -1376,33 +1380,35 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, goto nla_put_vf_failure; } nla_nest_end(skb, vfvlanlist); - memset(&vf_stats, 0, sizeof(vf_stats)); - if (dev->netdev_ops->ndo_get_vf_stats) - dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, - &vf_stats); - vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS); - if (!vfstats) - goto nla_put_vf_failure; - if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, - vf_stats.rx_packets, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, - vf_stats.tx_packets, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES, - vf_stats.rx_bytes, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES, - vf_stats.tx_bytes, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, - vf_stats.broadcast, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED, - vf_stats.rx_dropped, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED, - vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) { - nla_nest_cancel(skb, vfstats); - goto nla_put_vf_failure; + if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) { + memset(&vf_stats, 0, sizeof(vf_stats)); + if (dev->netdev_ops->ndo_get_vf_stats) + dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, + &vf_stats); + vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS); + if (!vfstats) + goto nla_put_vf_failure; + if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, + vf_stats.rx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, + vf_stats.tx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES, + vf_stats.rx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES, + vf_stats.tx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, + vf_stats.broadcast, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, + vf_stats.multicast, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED, + vf_stats.rx_dropped, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED, + vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) { + nla_nest_cancel(skb, vfstats); + goto nla_put_vf_failure; + } + nla_nest_end(skb, vfstats); } - nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); return 0; @@ -1435,7 +1441,7 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, return -EMSGSIZE; for (i = 0; i < num_vfs; i++) { - if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) + if (rtnl_fill_vfinfo(skb, dev, i, vfinfo, ext_filter_mask)) return -EMSGSIZE; } @@ -2377,45 +2383,43 @@ static int rtnl_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { - if (dev) { - if (tb[IFLA_ADDRESS] && - nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) - return -EINVAL; + if (tb[IFLA_ADDRESS] && + nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) + return -EINVAL; - if (tb[IFLA_BROADCAST] && - nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) - return -EINVAL; + if (tb[IFLA_BROADCAST] && + nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) + return -EINVAL; - if (tb[IFLA_GSO_MAX_SIZE] && - nla_get_u32(tb[IFLA_GSO_MAX_SIZE]) > dev->tso_max_size) { - NL_SET_ERR_MSG(extack, "too big gso_max_size"); - return -EINVAL; - } + if (tb[IFLA_GSO_MAX_SIZE] && + nla_get_u32(tb[IFLA_GSO_MAX_SIZE]) > dev->tso_max_size) { + NL_SET_ERR_MSG(extack, "too big gso_max_size"); + return -EINVAL; + } - if (tb[IFLA_GSO_MAX_SEGS] && - (nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > GSO_MAX_SEGS || - nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > dev->tso_max_segs)) { - NL_SET_ERR_MSG(extack, "too big gso_max_segs"); - return -EINVAL; - } + if (tb[IFLA_GSO_MAX_SEGS] && + (nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > GSO_MAX_SEGS || + nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > dev->tso_max_segs)) { + NL_SET_ERR_MSG(extack, "too big gso_max_segs"); + return -EINVAL; + } - if (tb[IFLA_GRO_MAX_SIZE] && - nla_get_u32(tb[IFLA_GRO_MAX_SIZE]) > GRO_MAX_SIZE) { - NL_SET_ERR_MSG(extack, "too big gro_max_size"); - return -EINVAL; - } + if (tb[IFLA_GRO_MAX_SIZE] && + nla_get_u32(tb[IFLA_GRO_MAX_SIZE]) > GRO_MAX_SIZE) { + NL_SET_ERR_MSG(extack, "too big gro_max_size"); + return -EINVAL; + } - if (tb[IFLA_GSO_IPV4_MAX_SIZE] && - nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) { - NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size"); - return -EINVAL; - } + if (tb[IFLA_GSO_IPV4_MAX_SIZE] && + nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) { + NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size"); + return -EINVAL; + } - if (tb[IFLA_GRO_IPV4_MAX_SIZE] && - nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]) > GRO_MAX_SIZE) { - NL_SET_ERR_MSG(extack, "too big gro_ipv4_max_size"); - return -EINVAL; - } + if (tb[IFLA_GRO_IPV4_MAX_SIZE] && + nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]) > GRO_MAX_SIZE) { + NL_SET_ERR_MSG(extack, "too big gro_ipv4_max_size"); + return -EINVAL; } if (tb[IFLA_AF_SPEC]) { @@ -2736,10 +2740,6 @@ static int do_setlink(const struct sk_buff *skb, char ifname[IFNAMSIZ]; int err; - err = validate_linkmsg(dev, tb, extack); - if (err < 0) - return err; - if (tb[IFLA_IFNAME]) nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -3156,6 +3156,10 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } + err = validate_linkmsg(dev, tb, extack); + if (err < 0) + goto errout; + err = do_setlink(skb, dev, ifm, extack, tb, 0); errout: return err; @@ -3399,6 +3403,9 @@ static int rtnl_group_changelink(const struct sk_buff *skb, for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { + err = validate_linkmsg(dev, tb, extack); + if (err < 0) + return err; err = do_setlink(skb, dev, ifm, extack, tb, 0); if (err < 0) return err; @@ -3556,10 +3563,6 @@ replay: m_ops = master_dev->rtnl_link_ops; } - err = validate_linkmsg(dev, tb, extack); - if (err < 0) - return err; - if (tb[IFLA_LINKINFO]) { err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO], @@ -3623,6 +3626,10 @@ replay: if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + err = validate_linkmsg(dev, tb, extack); + if (err < 0) + return err; + if (linkinfo[IFLA_INFO_DATA]) { if (!ops || ops != dev->rtnl_link_ops || !ops->changelink) @@ -4090,7 +4097,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, ndm->ndm_ifindex = dev->ifindex; ndm->ndm_state = ndm_state; - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) + if (nla_put(skb, NDA_LLADDR, dev->addr_len, addr)) goto nla_put_failure; if (vid) if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid)) @@ -4104,10 +4111,10 @@ nla_put_failure: return -EMSGSIZE; } -static inline size_t rtnl_fdb_nlmsg_size(void) +static inline size_t rtnl_fdb_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + - nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(dev->addr_len) + /* NDA_LLADDR */ nla_total_size(sizeof(u16)) + /* NDA_VLAN */ 0; } @@ -4119,7 +4126,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(rtnl_fdb_nlmsg_size(), GFP_ATOMIC); + skb = nlmsg_new(rtnl_fdb_nlmsg_size(dev), GFP_ATOMIC); if (!skb) goto errout; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cea28d30abb5..6c5915efbc17 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -67,6 +67,7 @@ #include <net/dst.h> #include <net/sock.h> #include <net/checksum.h> +#include <net/gso.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/mpls.h> @@ -92,15 +93,7 @@ static struct kmem_cache *skbuff_fclone_cache __ro_after_init; static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif -/* skb_small_head_cache and related code is only supported - * for CONFIG_SLAB and CONFIG_SLUB. - * As soon as SLOB is removed from the kernel, we can clean up this. - */ -#if !defined(CONFIG_SLOB) -# define HAVE_SKB_SMALL_HEAD_CACHE 1 -#endif -#ifdef HAVE_SKB_SMALL_HEAD_CACHE static struct kmem_cache *skb_small_head_cache __ro_after_init; #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER) @@ -117,7 +110,6 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init; #define SKB_SMALL_HEAD_HEADROOM \ SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) -#endif /* HAVE_SKB_SMALL_HEAD_CACHE */ int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); @@ -562,7 +554,6 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, void *obj; obj_size = SKB_HEAD_ALIGN(*size); -#ifdef HAVE_SKB_SMALL_HEAD_CACHE if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE && !(flags & KMALLOC_NOT_NORMAL_BITS)) { obj = kmem_cache_alloc_node(skb_small_head_cache, @@ -576,7 +567,6 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node); goto out; } -#endif *size = obj_size = kmalloc_size_roundup(obj_size); /* * Try a regular allocation, when that fails and we're not entitled @@ -898,11 +888,9 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe) static void skb_kfree_head(void *head, unsigned int end_offset) { -#ifdef HAVE_SKB_SMALL_HEAD_CACHE if (end_offset == SKB_SMALL_HEAD_HEADROOM) kmem_cache_free(skb_small_head_cache, head); else -#endif kfree(head); } @@ -2160,7 +2148,6 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) if (likely(skb_end_offset(skb) == saved_end_offset)) return 0; -#ifdef HAVE_SKB_SMALL_HEAD_CACHE /* We can not change skb->end if the original or new value * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head(). */ @@ -2174,7 +2161,6 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) WARN_ON_ONCE(1); return 0; } -#endif shinfo = skb_shinfo(skb); @@ -3003,32 +2989,32 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, } EXPORT_SYMBOL_GPL(skb_splice_bits); -static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg, - struct kvec *vec, size_t num, size_t size) +static int sendmsg_locked(struct sock *sk, struct msghdr *msg) { struct socket *sock = sk->sk_socket; + size_t size = msg_data_left(msg); if (!sock) return -EINVAL; - return kernel_sendmsg(sock, msg, vec, num, size); + + if (!sock->ops->sendmsg_locked) + return sock_no_sendmsg_locked(sk, msg, size); + + return sock->ops->sendmsg_locked(sk, msg, size); } -static int sendpage_unlocked(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg) { struct socket *sock = sk->sk_socket; if (!sock) return -EINVAL; - return kernel_sendpage(sock, page, offset, size, flags); + return sock_sendmsg(sock, msg); } -typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg, - struct kvec *vec, size_t num, size_t size); -typedef int (*sendpage_func)(struct sock *sk, struct page *page, int offset, - size_t size, int flags); +typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg); static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, - int len, sendmsg_func sendmsg, sendpage_func sendpage) + int len, sendmsg_func sendmsg) { unsigned int orig_len = len; struct sk_buff *head = skb; @@ -3048,8 +3034,9 @@ do_frag_list: memset(&msg, 0, sizeof(msg)); msg.msg_flags = MSG_DONTWAIT; - ret = INDIRECT_CALL_2(sendmsg, kernel_sendmsg_locked, - sendmsg_unlocked, sk, &msg, &kv, 1, slen); + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen); + ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, + sendmsg_unlocked, sk, &msg); if (ret <= 0) goto error; @@ -3080,11 +3067,18 @@ do_frag_list: slen = min_t(size_t, len, skb_frag_size(frag) - offset); while (slen) { - ret = INDIRECT_CALL_2(sendpage, kernel_sendpage_locked, - sendpage_unlocked, sk, - skb_frag_page(frag), - skb_frag_off(frag) + offset, - slen, MSG_DONTWAIT); + struct bio_vec bvec; + struct msghdr msg = { + .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT, + }; + + bvec_set_page(&bvec, skb_frag_page(frag), slen, + skb_frag_off(frag) + offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, + slen); + + ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, + sendmsg_unlocked, sk, &msg); if (ret <= 0) goto error; @@ -3121,16 +3115,14 @@ error: int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len) { - return __skb_send_sock(sk, skb, offset, len, kernel_sendmsg_locked, - kernel_sendpage_locked); + return __skb_send_sock(sk, skb, offset, len, sendmsg_locked); } EXPORT_SYMBOL_GPL(skb_send_sock_locked); /* Send skb data on a socket. Socket must be unlocked. */ int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len) { - return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked, - sendpage_unlocked); + return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked); } /** @@ -4203,13 +4195,13 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, EXPORT_SYMBOL(skb_find_text); int skb_append_pagefrags(struct sk_buff *skb, struct page *page, - int offset, size_t size) + int offset, size_t size, size_t max_frags) { int i = skb_shinfo(skb)->nr_frags; if (skb_can_coalesce(skb, i, page, offset)) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); - } else if (i < MAX_SKB_FRAGS) { + } else if (i < max_frags) { skb_zcopy_downgrade_managed(skb); get_page(page); skb_fill_page_desc_noacc(skb, i, page, offset, size); @@ -4249,10 +4241,9 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) struct page *page; page = virt_to_head_page(frag_skb->head); - __skb_frag_set_page(&head_frag, page); - skb_frag_off_set(&head_frag, frag_skb->data - - (unsigned char *)page_address(page)); - skb_frag_size_set(&head_frag, skb_headlen(frag_skb)); + skb_frag_fill_page_desc(&head_frag, page, frag_skb->data - + (unsigned char *)page_address(page), + skb_headlen(frag_skb)); return head_frag; } @@ -4768,7 +4759,6 @@ void __init skb_init(void) 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); -#ifdef HAVE_SKB_SMALL_HEAD_CACHE /* usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes. * struct skb_shared_info is located at the end of skb->head, * and should not be copied to/from user. @@ -4780,7 +4770,6 @@ void __init skb_init(void) 0, SKB_SMALL_HEAD_HEADROOM, NULL); -#endif skb_extensions_init(); } @@ -5784,147 +5773,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) } EXPORT_SYMBOL_GPL(skb_scrub_packet); -/** - * skb_gso_transport_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_transport_seglen is used to determine the real size of the - * individual segments, including Layer4 headers (TCP/UDP). - * - * The MAC/L2 or network (IP, IPv6) headers are not accounted for. - */ -static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) -{ - const struct skb_shared_info *shinfo = skb_shinfo(skb); - unsigned int thlen = 0; - - if (skb->encapsulation) { - thlen = skb_inner_transport_header(skb) - - skb_transport_header(skb); - - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - thlen += inner_tcp_hdrlen(skb); - } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { - thlen = tcp_hdrlen(skb); - } else if (unlikely(skb_is_gso_sctp(skb))) { - thlen = sizeof(struct sctphdr); - } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { - thlen = sizeof(struct udphdr); - } - /* UFO sets gso_size to the size of the fragmentation - * payload, i.e. the size of the L4 (UDP) header is already - * accounted for. - */ - return thlen + shinfo->gso_size; -} - -/** - * skb_gso_network_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_network_seglen is used to determine the real size of the - * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). - * - * The MAC/L2 header is not accounted for. - */ -static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - - skb_network_header(skb); - - return hdr_len + skb_gso_transport_seglen(skb); -} - -/** - * skb_gso_mac_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_mac_seglen is used to determine the real size of the - * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 - * headers (TCP/UDP). - */ -static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - - return hdr_len + skb_gso_transport_seglen(skb); -} - -/** - * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS - * - * There are a couple of instances where we have a GSO skb, and we - * want to determine what size it would be after it is segmented. - * - * We might want to check: - * - L3+L4+payload size (e.g. IP forwarding) - * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) - * - * This is a helper to do that correctly considering GSO_BY_FRAGS. - * - * @skb: GSO skb - * - * @seg_len: The segmented length (from skb_gso_*_seglen). In the - * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. - * - * @max_len: The maximum permissible length. - * - * Returns true if the segmented length <= max length. - */ -static inline bool skb_gso_size_check(const struct sk_buff *skb, - unsigned int seg_len, - unsigned int max_len) { - const struct skb_shared_info *shinfo = skb_shinfo(skb); - const struct sk_buff *iter; - - if (shinfo->gso_size != GSO_BY_FRAGS) - return seg_len <= max_len; - - /* Undo this so we can re-use header sizes */ - seg_len -= GSO_BY_FRAGS; - - skb_walk_frags(skb, iter) { - if (seg_len + skb_headlen(iter) > max_len) - return false; - } - - return true; -} - -/** - * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? - * - * @skb: GSO skb - * @mtu: MTU to validate against - * - * skb_gso_validate_network_len validates if a given skb will fit a - * wanted MTU once split. It considers L3 headers, L4 headers, and the - * payload. - */ -bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) -{ - return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); -} -EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); - -/** - * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? - * - * @skb: GSO skb - * @len: length to validate against - * - * skb_gso_validate_mac_len validates if a given skb will fit a wanted - * length once split, including L2, L3 and L4 headers and the payload. - */ -bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) -{ - return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); -} -EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); - static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { int mac_len, meta_len; @@ -6912,3 +6760,91 @@ nodefer: __kfree_skb(skb); if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) smp_call_function_single_async(cpu, &sd->defer_csd); } + +static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, + size_t offset, size_t len) +{ + const char *kaddr; + __wsum csum; + + kaddr = kmap_local_page(page); + csum = csum_partial(kaddr + offset, len, 0); + kunmap_local(kaddr); + skb->csum = csum_block_add(skb->csum, csum, skb->len); +} + +/** + * skb_splice_from_iter - Splice (or copy) pages to skbuff + * @skb: The buffer to add pages to + * @iter: Iterator representing the pages to be added + * @maxsize: Maximum amount of pages to be added + * @gfp: Allocation flags + * + * This is a common helper function for supporting MSG_SPLICE_PAGES. It + * extracts pages from an iterator and adds them to the socket buffer if + * possible, copying them to fragments if not possible (such as if they're slab + * pages). + * + * Returns the amount of data spliced/copied or -EMSGSIZE if there's + * insufficient space in the buffer to transfer anything. + */ +ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, + ssize_t maxsize, gfp_t gfp) +{ + size_t frag_limit = READ_ONCE(sysctl_max_skb_frags); + struct page *pages[8], **ppages = pages; + ssize_t spliced = 0, ret = 0; + unsigned int i; + + while (iter->count > 0) { + ssize_t space, nr, len; + size_t off; + + ret = -EMSGSIZE; + space = frag_limit - skb_shinfo(skb)->nr_frags; + if (space < 0) + break; + + /* We might be able to coalesce without increasing nr_frags */ + nr = clamp_t(size_t, space, 1, ARRAY_SIZE(pages)); + + len = iov_iter_extract_pages(iter, &ppages, maxsize, nr, 0, &off); + if (len <= 0) { + ret = len ?: -EIO; + break; + } + + i = 0; + do { + struct page *page = pages[i++]; + size_t part = min_t(size_t, PAGE_SIZE - off, len); + + ret = -EIO; + if (WARN_ON_ONCE(!sendpage_ok(page))) + goto out; + + ret = skb_append_pagefrags(skb, page, off, part, + frag_limit); + if (ret < 0) { + iov_iter_revert(iter, len); + goto out; + } + + if (skb->ip_summed == CHECKSUM_NONE) + skb_splice_csum_page(skb, page, off, part); + + off = 0; + spliced += part; + maxsize -= part; + len -= part; + } while (len > 0); + + if (maxsize <= 0) + break; + } + +out: + skb_len_add(skb, spliced); + return spliced ?: ret; +} +EXPORT_SYMBOL(skb_splice_from_iter); diff --git a/net/core/sock.c b/net/core/sock.c index 24f2761bdb1d..9370fd50aa2c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -114,6 +114,9 @@ #include <linux/memcontrol.h> #include <linux/prefetch.h> #include <linux/compat.h> +#include <linux/mroute.h> +#include <linux/mroute6.h> +#include <linux/icmpv6.h> #include <linux/uaccess.h> @@ -138,6 +141,7 @@ #include <net/tcp.h> #include <net/busy_poll.h> +#include <net/phonet/phonet.h> #include <linux/ethtool.h> @@ -1246,6 +1250,13 @@ set_sndbuf: clear_bit(SOCK_PASSCRED, &sock->flags); break; + case SO_PASSPIDFD: + if (valbool) + set_bit(SOCK_PASSPIDFD, &sock->flags); + else + clear_bit(SOCK_PASSPIDFD, &sock->flags); + break; + case SO_TIMESTAMP_OLD: case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: @@ -1362,12 +1373,6 @@ set_sndbuf: __sock_set_mark(sk, val); break; case SO_RCVMARK: - if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { - ret = -EPERM; - break; - } - sock_valbool_flag(sk, SOCK_RCVMARK, valbool); break; @@ -1732,6 +1737,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); break; + case SO_PASSPIDFD: + v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags); + break; + case SO_PEERCRED: { struct ucred peercred; @@ -1747,6 +1756,39 @@ int sk_getsockopt(struct sock *sk, int level, int optname, goto lenout; } + case SO_PEERPIDFD: + { + struct pid *peer_pid; + struct file *pidfd_file = NULL; + int pidfd; + + if (len > sizeof(pidfd)) + len = sizeof(pidfd); + + spin_lock(&sk->sk_peer_lock); + peer_pid = get_pid(sk->sk_peer_pid); + spin_unlock(&sk->sk_peer_lock); + + if (!peer_pid) + return -ESRCH; + + pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file); + put_pid(peer_pid); + if (pidfd < 0) + return pidfd; + + if (copy_to_sockptr(optval, &pidfd, len) || + copy_to_sockptr(optlen, &len, sizeof(int))) { + put_unused_fd(pidfd); + fput(pidfd_file); + + return -EFAULT; + } + + fd_install(pidfd, pidfd_file); + return 0; + } + case SO_PEERGROUPS: { const struct cred *cred; @@ -2556,13 +2598,24 @@ kuid_t sock_i_uid(struct sock *sk) } EXPORT_SYMBOL(sock_i_uid); -unsigned long sock_i_ino(struct sock *sk) +unsigned long __sock_i_ino(struct sock *sk) { unsigned long ino; - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); + return ino; +} +EXPORT_SYMBOL(__sock_i_ino); + +unsigned long sock_i_ino(struct sock *sk) +{ + unsigned long ino; + + local_bh_disable(); + ino = __sock_i_ino(sk); + local_bh_enable(); return ino; } EXPORT_SYMBOL(sock_i_ino); @@ -3219,36 +3272,6 @@ void __receive_sock(struct file *file) } } -ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) -{ - ssize_t res; - struct msghdr msg = {.msg_flags = flags}; - struct kvec iov; - char *kaddr = kmap(page); - iov.iov_base = kaddr + offset; - iov.iov_len = size; - res = kernel_sendmsg(sock, &msg, &iov, 1, size); - kunmap(page); - return res; -} -EXPORT_SYMBOL(sock_no_sendpage); - -ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - ssize_t res; - struct msghdr msg = {.msg_flags = flags}; - struct kvec iov; - char *kaddr = kmap(page); - - iov.iov_base = kaddr + offset; - iov.iov_len = size; - res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size); - kunmap(page); - return res; -} -EXPORT_SYMBOL(sock_no_sendpage_locked); - /* * Default Socket Callbacks */ @@ -4004,7 +4027,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) { seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " - "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", + "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", proto->name, proto->obj_size, sock_prot_inuse_get(seq_file_net(seq), proto), @@ -4025,7 +4048,6 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) proto_method_implemented(proto->getsockopt), proto_method_implemented(proto->sendmsg), proto_method_implemented(proto->recvmsg), - proto_method_implemented(proto->sendpage), proto_method_implemented(proto->bind), proto_method_implemented(proto->backlog_rcv), proto_method_implemented(proto->hash), @@ -4046,7 +4068,7 @@ static int proto_seq_show(struct seq_file *seq, void *v) "maxhdr", "slab", "module", - "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); + "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n"); else proto_seq_printf(seq, list_entry(v, struct proto, node)); return 0; @@ -4106,3 +4128,63 @@ int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) return sk->sk_prot->bind_add(sk, addr, addr_len); } EXPORT_SYMBOL(sock_bind_add); + +/* Copy 'size' bytes from userspace and return `size` back to userspace */ +int sock_ioctl_inout(struct sock *sk, unsigned int cmd, + void __user *arg, void *karg, size_t size) +{ + int ret; + + if (copy_from_user(karg, arg, size)) + return -EFAULT; + + ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg); + if (ret) + return ret; + + if (copy_to_user(arg, karg, size)) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL(sock_ioctl_inout); + +/* This is the most common ioctl prep function, where the result (4 bytes) is + * copied back to userspace if the ioctl() returns successfully. No input is + * copied from userspace as input argument. + */ +static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg) +{ + int ret, karg = 0; + + ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg); + if (ret) + return ret; + + return put_user(karg, (int __user *)arg); +} + +/* A wrapper around sock ioctls, which copies the data from userspace + * (depending on the protocol/ioctl), and copies back the result to userspace. + * The main motivation for this function is to pass kernel memory to the + * protocol ioctl callbacks, instead of userspace memory. + */ +int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) +{ + int rc = 1; + + if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET) + rc = ipmr_sk_ioctl(sk, cmd, arg); + else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6) + rc = ip6mr_sk_ioctl(sk, cmd, arg); + else if (sk_is_phonet(sk)) + rc = phonet_sk_ioctl(sk, cmd, arg); + + /* If ioctl was processed, returns its value */ + if (rc <= 0) + return rc; + + /* Otherwise call the default handler */ + return sock_ioctl_out(sk, cmd, arg); +} +EXPORT_SYMBOL(sk_ioctl); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 00afb66cd095..19538d628714 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -32,8 +32,6 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) { struct bpf_stab *stab; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); if (attr->max_entries == 0 || attr->key_size != 4 || (attr->value_size != sizeof(u32) && @@ -1085,8 +1083,6 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) struct bpf_shtab *htab; int i, err; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); if (attr->max_entries == 0 || attr->key_size == 0 || (attr->value_size != sizeof(u32) && diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 9ddc3a9e89e4..1f748ed1279d 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -292,7 +292,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); -int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int dccp_ioctl(struct sock *sk, int cmd, int *karg); int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3ab68415d121..fa8079303cb0 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1010,7 +1010,6 @@ static const struct proto_ops inet_dccp_ops = { .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct inet_protosw dccp_v4_protosw = { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 93c98990d726..7249ef218178 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1087,7 +1087,6 @@ static const struct proto_ops inet6_dccp_ops = { .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b0ebf853cb07..f331e5977a84 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -362,7 +362,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, EXPORT_SYMBOL_GPL(dccp_poll); -int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) +int dccp_ioctl(struct sock *sk, int cmd, int *karg) { int rc = -ENOTCONN; @@ -373,17 +373,17 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: { - int amount = sk_wmem_alloc_get(sk); + *karg = sk_wmem_alloc_get(sk); /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and * always 0, comparably to UDP. */ - rc = put_user(amount, (int __user *)arg); + rc = 0; } break; case SIOCINQ: { struct sk_buff *skb; - unsigned long amount = 0; + *karg = 0; skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) { @@ -391,9 +391,9 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) * We will only return the amount of this packet since * that is all that will be read. */ - amount = skb->len; + *karg = skb->len; } - rc = put_user(amount, (int __user *)arg); + rc = 0; } break; default: diff --git a/net/devlink/health.c b/net/devlink/health.c index 0839706d5741..194340a8bb86 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -480,7 +480,7 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, int err; WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); - WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); + ASSERT_DEVLINK_REGISTERED(devlink); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index cd0254968076..1f00f874471f 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -447,18 +447,18 @@ static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps, caps->value |= cap; } -static int devlink_port_fn_roce_fill(const struct devlink_ops *ops, - struct devlink_port *devlink_port, +static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port, struct nla_bitfield32 *caps, struct netlink_ext_ack *extack) { bool is_enable; int err; - if (!ops->port_fn_roce_get) + if (!devlink_port->ops->port_fn_roce_get) return 0; - err = ops->port_fn_roce_get(devlink_port, &is_enable, extack); + err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable, + extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -469,19 +469,19 @@ static int devlink_port_fn_roce_fill(const struct devlink_ops *ops, return 0; } -static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops, - struct devlink_port *devlink_port, +static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port, struct nla_bitfield32 *caps, struct netlink_ext_ack *extack) { bool is_enable; int err; - if (!ops->port_fn_migratable_get || + if (!devlink_port->ops->port_fn_migratable_get || devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) return 0; - err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack); + err = devlink_port->ops->port_fn_migratable_get(devlink_port, + &is_enable, extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -492,8 +492,7 @@ static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops, return 0; } -static int devlink_port_fn_caps_fill(const struct devlink_ops *ops, - struct devlink_port *devlink_port, +static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port, struct sk_buff *msg, struct netlink_ext_ack *extack, bool *msg_updated) @@ -501,11 +500,11 @@ static int devlink_port_fn_caps_fill(const struct devlink_ops *ops, struct nla_bitfield32 caps = {}; int err; - err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack); + err = devlink_port_fn_roce_fill(devlink_port, &caps, extack); if (err) return err; - err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack); + err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack); if (err) return err; @@ -691,8 +690,7 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } -static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops, - struct devlink_port *port, +static int devlink_port_fn_hw_addr_fill(struct devlink_port *port, struct sk_buff *msg, struct netlink_ext_ack *extack, bool *msg_updated) @@ -701,10 +699,10 @@ static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops, int hw_addr_len; int err; - if (!ops->port_function_hw_addr_get) + if (!port->ops->port_fn_hw_addr_get) return 0; - err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len, + err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len, extack); if (err) { if (err == -EOPNOTSUPP) @@ -789,8 +787,7 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate) opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED; } -static int devlink_port_fn_state_fill(const struct devlink_ops *ops, - struct devlink_port *port, +static int devlink_port_fn_state_fill(struct devlink_port *port, struct sk_buff *msg, struct netlink_ext_ack *extack, bool *msg_updated) @@ -799,10 +796,10 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops, enum devlink_port_fn_state state; int err; - if (!ops->port_fn_state_get) + if (!port->ops->port_fn_state_get) return 0; - err = ops->port_fn_state_get(port, &state, &opstate, extack); + err = port->ops->port_fn_state_get(port, &state, &opstate, extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -829,18 +826,16 @@ static int devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops = devlink_port->devlink->ops; - - return ops->port_fn_migratable_set(devlink_port, enable, extack); + return devlink_port->ops->port_fn_migratable_set(devlink_port, enable, + extack); } static int devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops = devlink_port->devlink->ops; - - return ops->port_fn_roce_set(devlink_port, enable, extack); + return devlink_port->ops->port_fn_roce_set(devlink_port, enable, + extack); } static int devlink_port_fn_caps_set(struct devlink_port *devlink_port, @@ -874,7 +869,6 @@ static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops; struct nlattr *function_attr; bool msg_updated = false; int err; @@ -883,16 +877,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (!function_attr) return -EMSGSIZE; - ops = port->devlink->ops; - err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack, - &msg_updated); + err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated); if (err) goto out; - err = devlink_port_fn_caps_fill(ops, port, msg, extack, - &msg_updated); + err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated); if (err) goto out; - err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated); + err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); @@ -1137,14 +1128,13 @@ static int devlink_port_type_set(struct devlink_port *devlink_port, { int err; - if (!devlink_port->devlink->ops->port_type_set) + if (!devlink_port->ops->port_type_set) return -EOPNOTSUPP; if (port_type == devlink_port->type) return 0; - err = devlink_port->devlink->ops->port_type_set(devlink_port, - port_type); + err = devlink_port->ops->port_type_set(devlink_port, port_type); if (err) return err; @@ -1157,7 +1147,6 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port, const struct nlattr *attr, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops = port->devlink->ops; const u8 *hw_addr; int hw_addr_len; @@ -1178,7 +1167,7 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port, } } - return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len, + return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len, extack); } @@ -1187,22 +1176,20 @@ static int devlink_port_fn_state_set(struct devlink_port *port, struct netlink_ext_ack *extack) { enum devlink_port_fn_state state; - const struct devlink_ops *ops; state = nla_get_u8(attr); - ops = port->devlink->ops; - return ops->port_fn_state_set(port, state, extack); + return port->ops->port_fn_state_set(port, state, extack); } static int devlink_port_function_validate(struct devlink_port *devlink_port, struct nlattr **tb, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops = devlink_port->devlink->ops; + const struct devlink_port_ops *ops = devlink_port->ops; struct nlattr *attr; if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] && - !ops->port_function_hw_addr_set) { + !ops->port_fn_hw_addr_set) { NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], "Port doesn't support function attributes"); return -EOPNOTSUPP; @@ -1320,7 +1307,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT)) return -EINVAL; - if (!devlink->ops->port_split) + if (!devlink_port->ops->port_split) return -EOPNOTSUPP; count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]); @@ -1339,8 +1326,8 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, return -EINVAL; } - return devlink->ops->port_split(devlink, devlink_port, count, - info->extack); + return devlink_port->ops->port_split(devlink, devlink_port, count, + info->extack); } static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, @@ -1349,40 +1336,9 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; - if (!devlink->ops->port_unsplit) + if (!devlink_port->ops->port_unsplit) return -EOPNOTSUPP; - return devlink->ops->port_unsplit(devlink, devlink_port, info->extack); -} - -static int devlink_port_new_notify(struct devlink *devlink, - unsigned int port_index, - struct genl_info *info) -{ - struct devlink_port *devlink_port; - struct sk_buff *msg; - int err; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - lockdep_assert_held(&devlink->lock); - devlink_port = devlink_port_get_by_index(devlink, port_index); - if (!devlink_port) { - err = -ENODEV; - goto out; - } - - err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, - info->snd_portid, info->snd_seq, 0, NULL); - if (err) - goto out; - - return genlmsg_reply(msg, info); - -out: - nlmsg_free(msg); - return err; + return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack); } static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, @@ -1391,10 +1347,11 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct netlink_ext_ack *extack = info->extack; struct devlink_port_new_attrs new_attrs = {}; struct devlink *devlink = info->user_ptr[0]; - unsigned int new_port_index; + struct devlink_port *devlink_port; + struct sk_buff *msg; int err; - if (!devlink->ops->port_new || !devlink->ops->port_del) + if (!devlink->ops->port_new) return -EOPNOTSUPP; if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] || @@ -1423,36 +1380,43 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, new_attrs.sfnum_valid = true; } - err = devlink->ops->port_new(devlink, &new_attrs, extack, - &new_port_index); + err = devlink->ops->port_new(devlink, &new_attrs, + extack, &devlink_port); if (err) return err; - err = devlink_port_new_notify(devlink, new_port_index, info); - if (err && err != -ENODEV) { - /* Fail to send the response; destroy newly created port. */ - devlink->ops->port_del(devlink, new_port_index, extack); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto err_out_port_del; } + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, + info->snd_portid, info->snd_seq, 0, NULL); + if (WARN_ON_ONCE(err)) + goto err_out_msg_free; + err = genlmsg_reply(msg, info); + if (err) + goto err_out_port_del; + return 0; + +err_out_msg_free: + nlmsg_free(msg); +err_out_port_del: + devlink_port->ops->port_del(devlink, devlink_port, NULL); return err; } static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info) { + struct devlink_port *devlink_port = info->user_ptr[1]; struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; - unsigned int port_index; - if (!devlink->ops->port_del) + if (!devlink_port->ops->port_del) return -EOPNOTSUPP; - if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_INDEX)) { - NL_SET_ERR_MSG(extack, "Port index is not specified"); - return -EINVAL; - } - port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); - - return devlink->ops->port_del(devlink, port_index, extack); + return devlink_port->ops->port_del(devlink, devlink_port, extack); } static int @@ -6384,6 +6348,7 @@ const struct genl_small_ops devlink_nl_ops[56] = { .cmd = DEVLINK_CMD_PORT_DEL, .doit = devlink_nl_cmd_port_del_doit, .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_LINECARD_GET, @@ -6772,7 +6737,10 @@ void devlink_notify_unregister(struct devlink *devlink) static void devlink_port_type_warn(struct work_struct *work) { - WARN(true, "Type was not set for devlink port."); + struct devlink_port *port = container_of(to_delayed_work(work), + struct devlink_port, + type_warn_dw); + dev_warn(port->devlink->dev, "Type was not set for devlink port."); } static bool devlink_port_type_should_warn(struct devlink_port *devlink_port) @@ -6809,7 +6777,7 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port) * @devlink: devlink * @devlink_port: devlink port * - * Initialize essencial stuff that is needed for functions + * Initialize essential stuff that is needed for functions * that may be called before devlink port registration. * Call to this function is optional and not needed * in case the driver does not use such functions. @@ -6830,7 +6798,7 @@ EXPORT_SYMBOL_GPL(devlink_port_init); * * @devlink_port: devlink port * - * Deinitialize essencial stuff that is in use for functions + * Deinitialize essential stuff that is in use for functions * that may be called after devlink port unregistration. * Call to this function is optional and not needed * in case the driver does not use such functions. @@ -6841,12 +6809,15 @@ void devlink_port_fini(struct devlink_port *devlink_port) } EXPORT_SYMBOL_GPL(devlink_port_fini); +static const struct devlink_port_ops devlink_port_dummy_ops = {}; + /** - * devl_port_register() - Register devlink port + * devl_port_register_with_ops() - Register devlink port * * @devlink: devlink * @devlink_port: devlink port * @port_index: driver-specific numerical identifier of the port + * @ops: port ops * * Register devlink port with provided port index. User can use * any indexing, even hw-related one. devlink_port structure @@ -6854,9 +6825,10 @@ EXPORT_SYMBOL_GPL(devlink_port_fini); * Note that the caller should take care of zeroing the devlink_port * structure. */ -int devl_port_register(struct devlink *devlink, - struct devlink_port *devlink_port, - unsigned int port_index) +int devl_port_register_with_ops(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index, + const struct devlink_port_ops *ops) { int err; @@ -6867,6 +6839,7 @@ int devl_port_register(struct devlink *devlink, devlink_port_init(devlink, devlink_port); devlink_port->registered = true; devlink_port->index = port_index; + devlink_port->ops = ops ? ops : &devlink_port_dummy_ops; spin_lock_init(&devlink_port->type_lock); INIT_LIST_HEAD(&devlink_port->reporter_list); err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL); @@ -6878,14 +6851,15 @@ int devl_port_register(struct devlink *devlink, devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); return 0; } -EXPORT_SYMBOL_GPL(devl_port_register); +EXPORT_SYMBOL_GPL(devl_port_register_with_ops); /** - * devlink_port_register - Register devlink port + * devlink_port_register_with_ops - Register devlink port * * @devlink: devlink * @devlink_port: devlink port * @port_index: driver-specific numerical identifier of the port + * @ops: port ops * * Register devlink port with provided port index. User can use * any indexing, even hw-related one. devlink_port structure @@ -6895,18 +6869,20 @@ EXPORT_SYMBOL_GPL(devl_port_register); * * Context: Takes and release devlink->lock <mutex>. */ -int devlink_port_register(struct devlink *devlink, - struct devlink_port *devlink_port, - unsigned int port_index) +int devlink_port_register_with_ops(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index, + const struct devlink_port_ops *ops) { int err; devl_lock(devlink); - err = devl_port_register(devlink, devlink_port, port_index); + err = devl_port_register_with_ops(devlink, devlink_port, + port_index, ops); devl_unlock(devlink); return err; } -EXPORT_SYMBOL_GPL(devlink_port_register); +EXPORT_SYMBOL_GPL(devlink_port_register_with_ops); /** * devl_port_unregister() - Unregister devlink port diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ab1afe67fd18..ccbdb98109f8 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -403,6 +403,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return 0; } +static struct dsa_port * +dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + + if (!ds->ops->preferred_default_local_cpu_port) + return NULL; + + cpu_dp = ds->ops->preferred_default_local_cpu_port(ds); + if (!cpu_dp) + return NULL; + + if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds)) + return NULL; + + return cpu_dp; +} + /* Perform initial assignment of CPU ports to user ports and DSA links in the * fabric, giving preference to CPU ports local to each switch. Default to * using the first CPU port in the switch tree if the port does not have a CPU @@ -410,12 +428,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) */ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) { - struct dsa_port *cpu_dp, *dp; + struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp; list_for_each_entry(cpu_dp, &dst->ports, list) { if (!dsa_port_is_cpu(cpu_dp)) continue; + preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds); + if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp) + continue; + /* Prefer a local CPU port */ dsa_switch_for_each_port(dp, cpu_dp->ds) { /* Prefer the first local CPU port found */ @@ -1084,7 +1106,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) mutex_init(&dp->vlans_lock); INIT_LIST_HEAD(&dp->fdbs); INIT_LIST_HEAD(&dp->mdbs); - INIT_LIST_HEAD(&dp->vlans); + INIT_LIST_HEAD(&dp->vlans); /* also initializes &dp->user_vlans */ INIT_LIST_HEAD(&dp->list); list_add_tail(&dp->list, &dst->ports); diff --git a/net/dsa/port.c b/net/dsa/port.c index 71ba30538411..0ce8fd311c78 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1603,6 +1603,21 @@ dsa_port_phylink_mac_select_pcs(struct phylink_config *config, return pcs; } +static int dsa_port_phylink_mac_prepare(struct phylink_config *config, + unsigned int mode, + phy_interface_t interface) +{ + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); + struct dsa_switch *ds = dp->ds; + int err = 0; + + if (ds->ops->phylink_mac_prepare) + err = ds->ops->phylink_mac_prepare(ds, dp->index, mode, + interface); + + return err; +} + static void dsa_port_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) @@ -1616,6 +1631,21 @@ static void dsa_port_phylink_mac_config(struct phylink_config *config, ds->ops->phylink_mac_config(ds, dp->index, mode, state); } +static int dsa_port_phylink_mac_finish(struct phylink_config *config, + unsigned int mode, + phy_interface_t interface) +{ + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); + struct dsa_switch *ds = dp->ds; + int err = 0; + + if (ds->ops->phylink_mac_finish) + err = ds->ops->phylink_mac_finish(ds, dp->index, mode, + interface); + + return err; +} + static void dsa_port_phylink_mac_an_restart(struct phylink_config *config) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); @@ -1671,7 +1701,9 @@ static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { .validate = dsa_port_phylink_validate, .mac_select_pcs = dsa_port_phylink_mac_select_pcs, .mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state, + .mac_prepare = dsa_port_phylink_mac_prepare, .mac_config = dsa_port_phylink_mac_config, + .mac_finish = dsa_port_phylink_mac_finish, .mac_an_restart = dsa_port_phylink_mac_an_restart, .mac_link_down = dsa_port_phylink_mac_link_down, .mac_link_up = dsa_port_phylink_mac_link_up, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 165bb2cb8431..527b1d576460 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -27,6 +27,7 @@ #include "master.h" #include "netlink.h" #include "slave.h" +#include "switch.h" #include "tag.h" struct dsa_switchdev_event_work { @@ -161,8 +162,7 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev, return 0; } -static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid, - void *arg) +static int dsa_slave_host_vlan_rx_filtering(void *arg, int vid) { struct dsa_host_vlan_rx_filtering_ctx *ctx = arg; @@ -170,6 +170,28 @@ static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid, ctx->addr, vid); } +static int dsa_slave_vlan_for_each(struct net_device *dev, + int (*cb)(void *arg, int vid), void *arg) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_vlan *v; + int err; + + lockdep_assert_held(&dev->addr_list_lock); + + err = cb(arg, 0); + if (err) + return err; + + list_for_each_entry(v, &dp->user_vlans, list) { + err = cb(arg, v->vid); + if (err) + return err; + } + + return 0; +} + static int dsa_slave_sync_uc(struct net_device *dev, const unsigned char *addr) { @@ -180,18 +202,14 @@ static int dsa_slave_sync_uc(struct net_device *dev, .addr = addr, .event = DSA_UC_ADD, }; - int err; dev_uc_add(master, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - err = dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); - if (err) - return err; - - return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + &ctx); } static int dsa_slave_unsync_uc(struct net_device *dev, @@ -204,18 +222,14 @@ static int dsa_slave_unsync_uc(struct net_device *dev, .addr = addr, .event = DSA_UC_DEL, }; - int err; dev_uc_del(master, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - err = dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); - if (err) - return err; - - return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + &ctx); } static int dsa_slave_sync_mc(struct net_device *dev, @@ -228,18 +242,14 @@ static int dsa_slave_sync_mc(struct net_device *dev, .addr = addr, .event = DSA_MC_ADD, }; - int err; dev_mc_add(master, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - err = dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); - if (err) - return err; - - return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + &ctx); } static int dsa_slave_unsync_mc(struct net_device *dev, @@ -252,18 +262,14 @@ static int dsa_slave_unsync_mc(struct net_device *dev, .addr = addr, .event = DSA_MC_DEL, }; - int err; dev_mc_del(master, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - err = dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); - if (err) - return err; - - return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + &ctx); } void dsa_slave_sync_ha(struct net_device *dev) @@ -1759,6 +1765,7 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, struct netlink_ext_ack extack = {0}; struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; + struct dsa_vlan *v; int ret; /* User port... */ @@ -1782,8 +1789,17 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, !dsa_switch_supports_mc_filtering(ds)) return 0; + v = kzalloc(sizeof(*v), GFP_KERNEL); + if (!v) { + ret = -ENOMEM; + goto rollback; + } + netif_addr_lock_bh(dev); + v->vid = vid; + list_add_tail(&v->list, &dp->user_vlans); + if (dsa_switch_supports_mc_filtering(ds)) { netdev_for_each_synced_mc_addr(ha, dev) { dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, @@ -1803,6 +1819,12 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, dsa_flush_workqueue(); return 0; + +rollback: + dsa_port_host_vlan_del(dp, &vlan); + dsa_port_vlan_del(dp, &vlan); + + return ret; } static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, @@ -1816,6 +1838,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, }; struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; + struct dsa_vlan *v; int err; err = dsa_port_vlan_del(dp, &vlan); @@ -1832,6 +1855,15 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, netif_addr_lock_bh(dev); + v = dsa_vlan_find(&dp->user_vlans, &vlan); + if (!v) { + netif_addr_unlock_bh(dev); + return -ENOENT; + } + + list_del(&v->list); + kfree(v); + if (dsa_switch_supports_mc_filtering(ds)) { netdev_for_each_synced_mc_addr(ha, dev) { dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 8c9a9f94b756..1a42f9317334 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -673,8 +673,8 @@ static bool dsa_port_host_vlan_match(struct dsa_port *dp, return false; } -static struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list, - const struct switchdev_obj_port_vlan *vlan) +struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list, + const struct switchdev_obj_port_vlan *vlan) { struct dsa_vlan *v; diff --git a/net/dsa/switch.h b/net/dsa/switch.h index 15e67b95eb6e..ea034677da15 100644 --- a/net/dsa/switch.h +++ b/net/dsa/switch.h @@ -111,6 +111,9 @@ struct dsa_notifier_master_state_info { bool operational; }; +struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list, + const struct switchdev_obj_port_vlan *vlan); + int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); int dsa_broadcast(unsigned long e, void *v); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 6bb778e10461..4a51e0ec295c 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1436,15 +1436,26 @@ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) { - struct ethtool_wolinfo wol; + struct ethtool_wolinfo wol, cur_wol; int ret; - if (!dev->ethtool_ops->set_wol) + if (!dev->ethtool_ops->get_wol || !dev->ethtool_ops->set_wol) return -EOPNOTSUPP; + memset(&cur_wol, 0, sizeof(struct ethtool_wolinfo)); + cur_wol.cmd = ETHTOOL_GWOL; + dev->ethtool_ops->get_wol(dev, &cur_wol); + if (copy_from_user(&wol, useraddr, sizeof(wol))) return -EFAULT; + if (wol.wolopts & ~cur_wol.supported) + return -EINVAL; + + if (wol.wolopts == cur_wol.wolopts && + !memcmp(wol.sopass, cur_wol.sopass, sizeof(wol.sopass))) + return 0; + ret = dev->ethtool_ops->set_wol(dev, &wol); if (ret) return ret; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 08120095cc68..39a459b0111b 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -96,6 +96,8 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, int ret; if (!header) { + if (!require_dev) + return 0; NL_SET_ERR_MSG(extack, "request header missing"); return -EINVAL; } @@ -113,7 +115,8 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) { u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]); - dev = dev_get_by_index(net, ifindex); + dev = netdev_get_by_index(net, ifindex, &req_info->dev_tracker, + GFP_KERNEL); if (!dev) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_HEADER_DEV_INDEX], @@ -123,13 +126,14 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, /* if both ifindex and ifname are passed, they must match */ if (devname_attr && strncmp(dev->name, nla_data(devname_attr), IFNAMSIZ)) { - dev_put(dev); + netdev_put(dev, &req_info->dev_tracker); NL_SET_ERR_MSG_ATTR(extack, header, "ifindex and name do not match"); return -ENODEV; } } else if (devname_attr) { - dev = dev_get_by_name(net, nla_data(devname_attr)); + dev = netdev_get_by_name(net, nla_data(devname_attr), + &req_info->dev_tracker, GFP_KERNEL); if (!dev) { NL_SET_ERR_MSG_ATTR(extack, devname_attr, "no device matches name"); @@ -142,8 +146,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, } req_info->dev = dev; - if (dev) - netdev_tracker_alloc(dev, &req_info->dev_tracker, GFP_KERNEL); req_info->flags = flags; return 0; } diff --git a/net/handshake/genl.c b/net/handshake/genl.c index 9f29efb1493e..233be5cbfec9 100644 --- a/net/handshake/genl.c +++ b/net/handshake/genl.c @@ -8,7 +8,7 @@ #include "genl.h" -#include <linux/handshake.h> +#include <uapi/linux/handshake.h> /* HANDSHAKE_CMD_ACCEPT - do */ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = { diff --git a/net/handshake/genl.h b/net/handshake/genl.h index 2c1f1aa6a02a..ae72a596f6cc 100644 --- a/net/handshake/genl.h +++ b/net/handshake/genl.h @@ -9,7 +9,7 @@ #include <net/netlink.h> #include <net/genetlink.h> -#include <linux/handshake.h> +#include <uapi/linux/handshake.h> int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info); int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 5a236aae2366..306f942c3b28 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -531,6 +531,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res) goto err_add_master; + /* HSR forwarding offload supported in lower device? */ + if ((slave[0]->features & NETIF_F_HW_HSR_FWD) && + (slave[1]->features & NETIF_F_HW_HSR_FWD)) + hsr->fwd_offloaded = true; + res = register_netdevice(hsr_dev); if (res) goto err_unregister; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 5584c80a5c79..6851e33df7d1 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -208,6 +208,7 @@ struct hsr_priv { u8 net_id; /* for PRP, it occupies most significant 3 bits * of lan_id */ + bool fwd_offloaded; /* Forwarding offloaded to HW */ unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16)); /* Align to u16 boundary to avoid unaligned access * in ether_addr_equal diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index b70e6bbf6021..e5742f2a2d52 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -131,9 +131,14 @@ static int hsr_portdev_setup(struct hsr_priv *hsr, struct net_device *dev, struct hsr_port *master; int res; - res = dev_set_promiscuity(dev, 1); - if (res) - return res; + /* Don't use promiscuous mode for offload since L2 frame forward + * happens at the offloaded hardware. + */ + if (!port->hsr->fwd_offloaded) { + res = dev_set_promiscuity(dev, 1); + if (res) + return res; + } master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); hsr_dev = master->dev; @@ -152,7 +157,9 @@ static int hsr_portdev_setup(struct hsr_priv *hsr, struct net_device *dev, fail_rx_handler: netdev_upper_dev_unlink(dev, hsr_dev); fail_upper_dev_link: - dev_set_promiscuity(dev, -1); + if (!port->hsr->fwd_offloaded) + dev_set_promiscuity(dev, -1); + return res; } diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c index 35d384dfe29d..41a556be1017 100644 --- a/net/ieee802154/header_ops.c +++ b/net/ieee802154/header_ops.c @@ -120,6 +120,29 @@ ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr) } EXPORT_SYMBOL_GPL(ieee802154_hdr_push); +int ieee802154_mac_cmd_push(struct sk_buff *skb, void *f, + const void *pl, unsigned int pl_len) +{ + struct ieee802154_mac_cmd_frame *frame = f; + struct ieee802154_mac_cmd_pl *mac_pl = &frame->mac_pl; + struct ieee802154_hdr *mhr = &frame->mhr; + int ret; + + skb_reserve(skb, sizeof(*mhr)); + ret = ieee802154_hdr_push(skb, mhr); + if (ret < 0) + return ret; + + skb_reset_mac_header(skb); + skb->mac_len = ret; + + skb_put_data(skb, mac_pl, sizeof(*mac_pl)); + skb_put_data(skb, pl, pl_len); + + return 0; +} +EXPORT_SYMBOL_GPL(ieee802154_mac_cmd_push); + int ieee802154_beacon_push(struct sk_buff *skb, struct ieee802154_beacon_frame *beacon) { @@ -284,6 +307,19 @@ ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr) } EXPORT_SYMBOL_GPL(ieee802154_hdr_pull); +int ieee802154_mac_cmd_pl_pull(struct sk_buff *skb, + struct ieee802154_mac_cmd_pl *mac_pl) +{ + if (!pskb_may_pull(skb, sizeof(*mac_pl))) + return -EINVAL; + + memcpy(mac_pl, skb->data, sizeof(*mac_pl)); + skb_pull(skb, sizeof(*mac_pl)); + + return 0; +} +EXPORT_SYMBOL_GPL(ieee802154_mac_cmd_pl_pull); + int ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr) { diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 832e3c50816c..d610c1886160 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -233,7 +233,7 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { NLA_POLICY_RANGE(NLA_U8, NL802154_SCAN_DONE_REASON_FINISHED, NL802154_SCAN_DONE_REASON_ABORTED), [NL802154_ATTR_BEACON_INTERVAL] = - NLA_POLICY_MAX(NLA_U8, IEEE802154_MAX_SCAN_DURATION), + NLA_POLICY_MAX(NLA_U8, IEEE802154_ACTIVE_SCAN_DURATION), #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL [NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, }, @@ -1417,6 +1417,11 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) { + NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams"); + return -EOPNOTSUPP; + } + request = kzalloc(sizeof(*request), GFP_KERNEL); if (!request) return -ENOMEM; @@ -1426,6 +1431,7 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info) type = nla_get_u8(info->attrs[NL802154_ATTR_SCAN_TYPE]); switch (type) { + case NL802154_SCAN_ACTIVE: case NL802154_SCAN_PASSIVE: request->type = type; break; @@ -1583,6 +1589,11 @@ nl802154_send_beacons(struct sk_buff *skb, struct genl_info *info) return -EPERM; } + if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) { + NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams"); + return -EOPNOTSUPP; + } + request = kzalloc(sizeof(*request), GFP_KERNEL); if (!request) return -ENOMEM; diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 1fa2fe041ec0..00302e8b9615 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -162,7 +162,7 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, default: if (!sk->sk_prot->ioctl) return -ENOIOCTLCMD; - return sk->sk_prot->ioctl(sk, cmd, arg); + return sk_ioctl(sk, cmd, (void __user *)arg); } } @@ -426,7 +426,6 @@ static const struct proto_ops ieee802154_raw_ops = { .sendmsg = ieee802154_sock_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; /* DGRAM Sockets (802.15.4 dataframes) */ @@ -531,22 +530,21 @@ out: return err; } -static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int dgram_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { - int amount = sk_wmem_alloc_get(sk); + *karg = sk_wmem_alloc_get(sk); - return put_user(amount, (int __user *)arg); + return 0; } case SIOCINQ: { struct sk_buff *skb; - unsigned long amount; - amount = 0; + *karg = 0; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) { @@ -554,10 +552,10 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) * of this packet since that is all * that will be read. */ - amount = skb->len - ieee802154_hdr_length(skb); + *karg = skb->len - ieee802154_hdr_length(skb); } spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); + return 0; } } @@ -990,7 +988,6 @@ static const struct proto_ops ieee802154_dgram_ops = { .sendmsg = ieee802154_sock_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static void ieee802154_sock_destruct(struct sock *sk) diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index e5d8439b9e45..c16db0b326fa 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -13,7 +13,7 @@ #define MAXNAME 32 #define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME) -#define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \ +#define WPAN_PHY_ASSIGN strscpy(__entry->wpan_phy_name, \ wpan_phy_name(wpan_phy), \ MAXNAME) #define WPAN_PHY_PR_FMT "%s" diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4a76ebf793b8..9b2ca2fcc5a1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -100,6 +100,7 @@ #include <net/ip_fib.h> #include <net/inet_connection_sock.h> #include <net/gro.h> +#include <net/gso.h> #include <net/tcp.h> #include <net/udp.h> #include <net/udplite.h> @@ -732,6 +733,20 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, } EXPORT_SYMBOL(inet_stream_connect); +void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk) +{ + sock_rps_record_flow(newsk); + WARN_ON(!((1 << newsk->sk_state) & + (TCPF_ESTABLISHED | TCPF_SYN_RECV | + TCPF_CLOSE_WAIT | TCPF_CLOSE))); + + if (test_bit(SOCK_SUPPORT_ZC, &sock->flags)) + set_bit(SOCK_SUPPORT_ZC, &newsock->flags); + sock_graft(newsk, newsock); + + newsock->state = SS_CONNECTED; +} + /* * Accept a pending connection. The TCP layer now gives BSD semantics. */ @@ -745,24 +760,12 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags, /* IPV6_ADDRFORM can change sk->sk_prot under us. */ sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); if (!sk2) - goto do_err; + return err; lock_sock(sk2); - - sock_rps_record_flow(sk2); - WARN_ON(!((1 << sk2->sk_state) & - (TCPF_ESTABLISHED | TCPF_SYN_RECV | - TCPF_CLOSE_WAIT | TCPF_CLOSE))); - - if (test_bit(SOCK_SUPPORT_ZC, &sock->flags)) - set_bit(SOCK_SUPPORT_ZC, &newsock->flags); - sock_graft(sk2, newsock); - - newsock->state = SS_CONNECTED; - err = 0; + __inet_accept(sock, newsock, sk2); release_sock(sk2); -do_err: - return err; + return 0; } EXPORT_SYMBOL(inet_accept); @@ -829,22 +832,20 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } EXPORT_SYMBOL(inet_sendmsg); -ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) +void inet_splice_eof(struct socket *sock) { - struct sock *sk = sock->sk; const struct proto *prot; + struct sock *sk = sock->sk; if (unlikely(inet_send_prepare(sk))) - return -EAGAIN; + return; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); - if (prot->sendpage) - return prot->sendpage(sk, page, offset, size, flags); - return sock_no_sendpage(sock, page, offset, size, flags); + if (prot->splice_eof) + prot->splice_eof(sock); } -EXPORT_SYMBOL(inet_sendpage); +EXPORT_SYMBOL_GPL(inet_splice_eof); INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *, size_t, int, int *)); @@ -980,7 +981,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; default: if (sk->sk_prot->ioctl) - err = sk->sk_prot->ioctl(sk, cmd, arg); + err = sk_ioctl(sk, cmd, (void __user *)arg); else err = -ENOIOCTLCMD; break; @@ -1048,12 +1049,11 @@ const struct proto_ops inet_stream_ops = { #ifdef CONFIG_MMU .mmap = tcp_mmap, #endif - .sendpage = inet_sendpage, + .splice_eof = inet_splice_eof, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, .read_skb = tcp_read_skb, .sendmsg_locked = tcp_sendmsg_locked, - .sendpage_locked = tcp_sendpage_locked, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, @@ -1082,7 +1082,7 @@ const struct proto_ops inet_dgram_ops = { .read_skb = udp_read_skb, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, + .splice_eof = inet_splice_eof, .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, @@ -1113,7 +1113,7 @@ static const struct proto_ops inet_sockraw_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, + .splice_eof = inet_splice_eof, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, #endif diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 1b34cb9a7708..193bcc2acccc 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -12,15 +12,6 @@ struct bpfilter_umh_ops bpfilter_ops; EXPORT_SYMBOL_GPL(bpfilter_ops); -void bpfilter_umh_cleanup(struct umd_info *info) -{ - fput(info->pipe_to_umh); - fput(info->pipe_from_umh); - put_pid(info->tgid); - info->tgid = NULL; -} -EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup); - static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen, bool is_set) { @@ -38,7 +29,7 @@ static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval, } if (bpfilter_ops.info.tgid && thread_group_exited(bpfilter_ops.info.tgid)) - bpfilter_umh_cleanup(&bpfilter_ops.info); + umd_cleanup_helper(&bpfilter_ops.info); if (!bpfilter_ops.info.tgid) { err = bpfilter_ops.start(); diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 3969fa805679..10e96ed6c9e3 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -17,6 +17,7 @@ #include <linux/err.h> #include <linux/module.h> #include <net/gro.h> +#include <net/gso.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/esp.h> @@ -340,6 +341,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ secpath_reset(skb); + if (skb_needs_linearize(skb, skb->dev->features) && + __skb_linearize(skb)) + return -ENOMEM; return 0; } diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 6c37c4f98cca..98b90107b5ab 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -8,7 +8,7 @@ #include "fou_nl.h" -#include <linux/fou.h> +#include <uapi/linux/fou.h> /* Global operation policy for fou */ const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = { diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h index dbd0780a5d34..63a6c4ed803d 100644 --- a/net/ipv4/fou_nl.h +++ b/net/ipv4/fou_nl.h @@ -9,7 +9,7 @@ #include <net/netlink.h> #include <net/genetlink.h> -#include <linux/fou.h> +#include <uapi/linux/fou.h> /* Global operation policy for fou */ extern const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1]; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 2b9cb5398335..311e70bfce40 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -11,6 +11,7 @@ #include <net/protocol.h> #include <net/gre.h> #include <net/gro.h> +#include <net/gso.h> static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1386787eaf1a..0cc19cfbb673 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -706,20 +706,23 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) out: release_sock(sk); if (newsk && mem_cgroup_sockets_enabled) { - int amt; + int amt = 0; /* atomically get the memory usage, set and charge the * newsk->sk_memcg. */ lock_sock(newsk); - /* The socket has not been accepted yet, no need to look at - * newsk->sk_wmem_queued. - */ - amt = sk_mem_pages(newsk->sk_forward_alloc + - atomic_read(&newsk->sk_rmem_alloc)); mem_cgroup_sk_alloc(newsk); - if (newsk->sk_memcg && amt) + if (newsk->sk_memcg) { + /* The socket has not been accepted yet, no need + * to look at newsk->sk_wmem_queued. + */ + amt = sk_mem_pages(newsk->sk_forward_alloc + + atomic_read(&newsk->sk_rmem_alloc)); + } + + if (amt) mem_cgroup_charge_skmem(newsk->sk_memcg, amt, GFP_KERNEL | __GFP_NOFAIL); @@ -792,7 +795,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, opt = rcu_dereference(ireq->ireq_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, - RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, @@ -830,7 +833,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, fl4 = &newinet->cork.fl.u.ip4; flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, - RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e55a20264960..81a1cce1a7d1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -189,10 +189,10 @@ static int ipgre_err(struct sk_buff *skb, u32 info, } #if IS_ENABLED(CONFIG_IPV6) - if (tpi->proto == htons(ETH_P_IPV6) && - !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, - type, data_len)) - return 0; + if (tpi->proto == htons(ETH_P_IPV6) && + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, + type, data_len)) + return 0; #endif if (t->parms.iph.daddr == 0 || diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 61892268e8a6..6e70839257f7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -73,6 +73,7 @@ #include <net/arp.h> #include <net/icmp.h> #include <net/checksum.h> +#include <net/gso.h> #include <net/inetpeer.h> #include <net/inet_ecn.h> #include <net/lwtunnel.h> @@ -946,17 +947,6 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk } EXPORT_SYMBOL(ip_generic_getfrag); -static inline __wsum -csum_page(struct page *page, int offset, int copy) -{ - char *kaddr; - __wsum csum; - kaddr = kmap(page); - csum = csum_partial(kaddr + offset, copy, 0); - kunmap(page); - return csum; -} - static int __ip_append_data(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, @@ -1048,6 +1038,15 @@ static int __ip_append_data(struct sock *sk, skb_zcopy_set(skb, uarg, &extra_uref); } } + } else if ((flags & MSG_SPLICE_PAGES) && length) { + if (inet->hdrincl) + return -EPERM; + if (rt->dst.dev->features & NETIF_F_SG && + getfrag == ip_generic_getfrag) + /* We need an empty buffer to attach stuff to */ + paged = true; + else + flags &= ~MSG_SPLICE_PAGES; } cork->length += length; @@ -1207,6 +1206,15 @@ alloc_new_skb: err = -EFAULT; goto error; } + } else if (flags & MSG_SPLICE_PAGES) { + struct msghdr *msg = from; + + err = skb_splice_from_iter(skb, &msg->msg_iter, copy, + sk->sk_allocation); + if (err < 0) + goto error; + copy = err; + wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; @@ -1310,10 +1318,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, } /* - * ip_append_data() and ip_append_page() can make one large IP datagram - * from many pieces of data. Each pieces will be holded on the socket - * until ip_push_pending_frames() is called. Each piece can be a page - * or non-page data. + * ip_append_data() can make one large IP datagram from many pieces of + * data. Each piece will be held on the socket until + * ip_push_pending_frames() is called. Each piece can be a page or + * non-page data. * * Not only UDP, other transport protocols - e.g. raw sockets - can use * this interface potentially. @@ -1346,134 +1354,6 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4, from, length, transhdrlen, flags); } -ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, - int offset, size_t size, int flags) -{ - struct inet_sock *inet = inet_sk(sk); - struct sk_buff *skb; - struct rtable *rt; - struct ip_options *opt = NULL; - struct inet_cork *cork; - int hh_len; - int mtu; - int len; - int err; - unsigned int maxfraglen, fragheaderlen, fraggap, maxnonfragsize; - - if (inet->hdrincl) - return -EPERM; - - if (flags&MSG_PROBE) - return 0; - - if (skb_queue_empty(&sk->sk_write_queue)) - return -EINVAL; - - cork = &inet->cork.base; - rt = (struct rtable *)cork->dst; - if (cork->flags & IPCORK_OPT) - opt = cork->opt; - - if (!(rt->dst.dev->features & NETIF_F_SG)) - return -EOPNOTSUPP; - - hh_len = LL_RESERVED_SPACE(rt->dst.dev); - mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; - - fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; - - if (cork->length + size > maxnonfragsize - fragheaderlen) { - ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, - mtu - (opt ? opt->optlen : 0)); - return -EMSGSIZE; - } - - skb = skb_peek_tail(&sk->sk_write_queue); - if (!skb) - return -EINVAL; - - cork->length += size; - - while (size > 0) { - /* Check if the remaining data fits into current packet. */ - len = mtu - skb->len; - if (len < size) - len = maxfraglen - skb->len; - - if (len <= 0) { - struct sk_buff *skb_prev; - int alloclen; - - skb_prev = skb; - fraggap = skb_prev->len - maxfraglen; - - alloclen = fragheaderlen + hh_len + fraggap + 15; - skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); - if (unlikely(!skb)) { - err = -ENOBUFS; - goto error; - } - - /* - * Fill in the control structures - */ - skb->ip_summed = CHECKSUM_NONE; - skb->csum = 0; - skb_reserve(skb, hh_len); - - /* - * Find where to start putting bytes. - */ - skb_put(skb, fragheaderlen + fraggap); - skb_reset_network_header(skb); - skb->transport_header = (skb->network_header + - fragheaderlen); - if (fraggap) { - skb->csum = skb_copy_and_csum_bits(skb_prev, - maxfraglen, - skb_transport_header(skb), - fraggap); - skb_prev->csum = csum_sub(skb_prev->csum, - skb->csum); - pskb_trim_unique(skb_prev, maxfraglen); - } - - /* - * Put the packet on the pending queue. - */ - __skb_queue_tail(&sk->sk_write_queue, skb); - continue; - } - - if (len > size) - len = size; - - if (skb_append_pagefrags(skb, page, offset, len)) { - err = -EMSGSIZE; - goto error; - } - - if (skb->ip_summed == CHECKSUM_NONE) { - __wsum csum; - csum = csum_page(page, offset, len); - skb->csum = csum_block_add(skb->csum, csum, skb->len); - } - - skb_len_add(skb, len); - refcount_add(len, &sk->sk_wmem_alloc); - offset += len; - size -= len; - } - return 0; - -error: - cork->length -= size; - IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); - return err; -} - static void ip_cork_release(struct inet_cork *cork) { cork->flags &= ~IPCORK_OPT; @@ -1692,7 +1572,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, - unsigned int len, u64 transmit_time) + unsigned int len, u64 transmit_time, u32 txhash) { struct ip_options_data replyopts; struct ipcm_cookie ipc; @@ -1755,6 +1635,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; nskb->mono_delivery_time = !!transmit_time; + if (txhash) + skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4); ip_push_pending_frames(sk, &fl4); } out: diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e90bc0aa85c7..c56b6fe6f0d7 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -665,6 +665,9 @@ static struct packet_type bootp_packet_type __initdata = { .func = ic_bootp_recv, }; +/* DHCPACK can overwrite DNS if fallback was set upon first BOOTP reply */ +static int ic_nameservers_fallback __initdata; + /* * Initialize DHCP/BOOTP extension fields in the request. */ @@ -938,7 +941,8 @@ static void __init ic_do_bootp_ext(u8 *ext) if (servers > CONF_NAMESERVERS_MAX) servers = CONF_NAMESERVERS_MAX; for (i = 0; i < servers; i++) { - if (ic_nameservers[i] == NONE) + if (ic_nameservers[i] == NONE || + ic_nameservers_fallback) memcpy(&ic_nameservers[i], ext+1+4*i, 4); } break; @@ -1158,8 +1162,10 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str ic_addrservaddr = b->iph.saddr; if (ic_gateway == NONE && b->relay_ip) ic_gateway = b->relay_ip; - if (ic_nameservers[0] == NONE) + if (ic_nameservers[0] == NONE) { ic_nameservers[0] = ic_servaddr; + ic_nameservers_fallback = 1; + } ic_got_reply = IC_BOOTP; drop_unlock: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index eec1f6df80d8..3f0c6d602fb7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1547,6 +1547,28 @@ out: return ret; } +/* Execute if this ioctl is a special mroute ioctl */ +int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) +{ + switch (cmd) { + /* These userspace buffers will be consumed by ipmr_ioctl() */ + case SIOCGETVIFCNT: { + struct sioc_vif_req buffer; + + return sock_ioctl_inout(sk, cmd, arg, &buffer, + sizeof(buffer)); + } + case SIOCGETSGCNT: { + struct sioc_sg_req buffer; + + return sock_ioctl_inout(sk, cmd, arg, &buffer, + sizeof(buffer)); + } + } + /* return code > 0 means that the ioctl was not executed */ + return 1; +} + /* Getsock opt support for the multicast routing system. */ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, sockptr_t optlen) @@ -1593,13 +1615,13 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, } /* The IP multicast ioctl support routines. */ -int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) +int ipmr_ioctl(struct sock *sk, int cmd, void *arg) { - struct sioc_sg_req sr; - struct sioc_vif_req vr; struct vif_device *vif; struct mfc_cache *c; struct net *net = sock_net(sk); + struct sioc_vif_req *vr; + struct sioc_sg_req *sr; struct mr_table *mrt; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); @@ -1608,40 +1630,33 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) switch (cmd) { case SIOCGETVIFCNT: - if (copy_from_user(&vr, arg, sizeof(vr))) - return -EFAULT; - if (vr.vifi >= mrt->maxvif) + vr = (struct sioc_vif_req *)arg; + if (vr->vifi >= mrt->maxvif) return -EINVAL; - vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); + vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif); rcu_read_lock(); - vif = &mrt->vif_table[vr.vifi]; - if (VIF_EXISTS(mrt, vr.vifi)) { - vr.icount = READ_ONCE(vif->pkt_in); - vr.ocount = READ_ONCE(vif->pkt_out); - vr.ibytes = READ_ONCE(vif->bytes_in); - vr.obytes = READ_ONCE(vif->bytes_out); + vif = &mrt->vif_table[vr->vifi]; + if (VIF_EXISTS(mrt, vr->vifi)) { + vr->icount = READ_ONCE(vif->pkt_in); + vr->ocount = READ_ONCE(vif->pkt_out); + vr->ibytes = READ_ONCE(vif->bytes_in); + vr->obytes = READ_ONCE(vif->bytes_out); rcu_read_unlock(); - if (copy_to_user(arg, &vr, sizeof(vr))) - return -EFAULT; return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT: - if (copy_from_user(&sr, arg, sizeof(sr))) - return -EFAULT; + sr = (struct sioc_sg_req *)arg; rcu_read_lock(); - c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); + c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr); if (c) { - sr.pktcnt = c->_c.mfc_un.res.pkt; - sr.bytecnt = c->_c.mfc_un.res.bytes; - sr.wrong_if = c->_c.mfc_un.res.wrong_if; + sr->pktcnt = c->_c.mfc_un.res.pkt; + sr->bytecnt = c->_c.mfc_un.res.bytes; + sr->wrong_if = c->_c.mfc_un.res.wrong_if; rcu_read_unlock(); - - if (copy_to_user(arg, &sr, sizeof(sr))) - return -EFAULT; return 0; } rcu_read_unlock(); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 5178a3f3cb53..25dd78cee179 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -49,13 +49,8 @@ #include <net/transp_v6.h> #endif -#define ping_portaddr_for_each_entry(__sk, node, list) \ - hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) -#define ping_portaddr_for_each_entry_rcu(__sk, node, list) \ - hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) - struct ping_table { - struct hlist_nulls_head hash[PING_HTABLE_SIZE]; + struct hlist_head hash[PING_HTABLE_SIZE]; spinlock_t lock; }; @@ -74,17 +69,16 @@ static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) } EXPORT_SYMBOL_GPL(ping_hash); -static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, - struct net *net, unsigned int num) +static inline struct hlist_head *ping_hashslot(struct ping_table *table, + struct net *net, unsigned int num) { return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } int ping_get_port(struct sock *sk, unsigned short ident) { - struct hlist_nulls_node *node; - struct hlist_nulls_head *hlist; struct inet_sock *isk, *isk2; + struct hlist_head *hlist; struct sock *sk2 = NULL; isk = inet_sk(sk); @@ -98,7 +92,7 @@ int ping_get_port(struct sock *sk, unsigned short ident) result++; /* avoid zero */ hlist = ping_hashslot(&ping_table, sock_net(sk), result); - ping_portaddr_for_each_entry(sk2, node, hlist) { + sk_for_each(sk2, hlist) { isk2 = inet_sk(sk2); if (isk2->inet_num == result) @@ -115,7 +109,7 @@ next_port: goto fail; } else { hlist = ping_hashslot(&ping_table, sock_net(sk), ident); - ping_portaddr_for_each_entry(sk2, node, hlist) { + sk_for_each(sk2, hlist) { isk2 = inet_sk(sk2); /* BUG? Why is this reuse and not reuseaddr? ping.c @@ -133,9 +127,8 @@ next_port: isk->inet_num = ident; if (sk_unhashed(sk)) { pr_debug("was not hashed\n"); - sock_hold(sk); + sk_add_node_rcu(sk, hlist); sock_set_flag(sk, SOCK_RCU_FREE); - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, hlist); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } spin_unlock(&ping_table.lock); @@ -161,9 +154,7 @@ void ping_unhash(struct sock *sk) pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); spin_lock(&ping_table.lock); - if (sk_hashed(sk)) { - hlist_nulls_del_init_rcu(&sk->sk_nulls_node); - sock_put(sk); + if (sk_del_node_init_rcu(sk)) { isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); @@ -175,10 +166,9 @@ EXPORT_SYMBOL_GPL(ping_unhash); /* Called under rcu_read_lock() */ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { - struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); + struct hlist_head *hslot = ping_hashslot(&ping_table, net, ident); struct sock *sk = NULL; struct inet_sock *isk; - struct hlist_nulls_node *hnode; int dif, sdif; if (skb->protocol == htons(ETH_P_IP)) { @@ -197,7 +187,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) return NULL; } - ping_portaddr_for_each_entry_rcu(sk, hnode, hslot) { + sk_for_each_rcu(sk, hslot) { isk = inet_sk(sk); pr_debug("iterate\n"); @@ -715,7 +705,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ip_options_data opt_copy; int free = 0; __be32 saddr, daddr, faddr; - u8 tos; + u8 tos, scope; int err; pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); @@ -779,11 +769,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) faddr = ipc.opt->opt.faddr; } tos = get_rttos(&ipc, inet); - if (sock_flag(sk, SOCK_LOCALROUTE) || - (msg->msg_flags & MSG_DONTROUTE) || - (ipc.opt && ipc.opt->opt.is_strictroute)) { - tos |= RTO_ONLINK; - } + scope = ip_sendmsg_scope(inet, &ipc, msg); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) @@ -793,10 +779,9 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } else if (!ipc.oif) ipc.oif = inet->uc_index; - flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, - RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, - sk->sk_uid); + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, + sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, + saddr, 0, 0, sk->sk_uid); fl4.fl4_icmp_type = user_icmph.type; fl4.fl4_icmp_code = user_icmph.code; @@ -1045,15 +1030,14 @@ static struct sock *ping_get_first(struct seq_file *seq, int start) for (state->bucket = start; state->bucket < PING_HTABLE_SIZE; ++state->bucket) { - struct hlist_nulls_node *node; - struct hlist_nulls_head *hslot; + struct hlist_head *hslot; hslot = &ping_table.hash[state->bucket]; - if (hlist_nulls_empty(hslot)) + if (hlist_empty(hslot)) continue; - sk_nulls_for_each(sk, node, hslot) { + sk_for_each(sk, hslot) { if (net_eq(sock_net(sk), net) && sk->sk_family == state->family) goto found; @@ -1070,7 +1054,7 @@ static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk) struct net *net = seq_file_net(seq); do { - sk = sk_nulls_next(sk); + sk = sk_next(sk); } while (sk && (!net_eq(sock_net(sk), net))); if (!sk) @@ -1206,6 +1190,6 @@ void __init ping_init(void) int i; for (i = 0; i < PING_HTABLE_SIZE; i++) - INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i); + INIT_HLIST_HEAD(&ping_table.hash[i]); spin_lock_init(&ping_table.lock); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index eadf1c9ef7e4..7782ff5e6539 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -476,10 +476,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ipcm_cookie ipc; struct rtable *rt = NULL; struct flowi4 fl4; + u8 tos, scope; int free = 0; __be32 daddr; __be32 saddr; - u8 tos; int err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; @@ -575,9 +575,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) daddr = ipc.opt->opt.faddr; } } - tos = get_rtconn_flags(&ipc, sk); - if (msg->msg_flags & MSG_DONTROUTE) - tos |= RTO_ONLINK; + tos = get_rttos(&ipc, inet); + scope = ip_sendmsg_scope(inet, &ipc, msg); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) @@ -600,8 +599,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } } - flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, - RT_SCOPE_UNIVERSE, + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, hdrincl ? ipc.protocol : sk->sk_protocol, inet_sk_flowi_flags(sk) | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), @@ -858,29 +856,29 @@ static int raw_getsockopt(struct sock *sk, int level, int optname, return do_raw_getsockopt(sk, level, optname, optval, optlen); } -static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int raw_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { - int amount = sk_wmem_alloc_get(sk); - - return put_user(amount, (int __user *)arg); + *karg = sk_wmem_alloc_get(sk); + return 0; } case SIOCINQ: { struct sk_buff *skb; - int amount = 0; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) - amount = skb->len; + *karg = skb->len; + else + *karg = 0; spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); + return 0; } default: #ifdef CONFIG_IP_MROUTE - return ipmr_ioctl(sk, cmd, (void __user *)arg); + return ipmr_ioctl(sk, cmd, karg); #else return -ENOIOCTLCMD; #endif diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 26fb97d1d4d9..dc478a0574cb 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -418,8 +418,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) * no easy way to do this. */ flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark, - RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, - inet_sk_flowi_flags(sk), + ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), + IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 88dfe51e68f3..2afb0870648b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -34,6 +34,7 @@ static int ip_ttl_min = 1; static int ip_ttl_max = 255; static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; +static int tcp_syn_linear_timeouts_max = MAX_TCP_SYNCNT; static unsigned long ip_ping_group_range_min[] = { 0, 0 }; static unsigned long ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static u32 u32_max_div_HZ = UINT_MAX / HZ; @@ -1470,6 +1471,24 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &tcp_plb_max_cong_thresh, }, + { + .procname = "tcp_syn_linear_timeouts", + .data = &init_net.ipv4.sysctl_tcp_syn_linear_timeouts, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &tcp_syn_linear_timeouts_max, + }, + { + .procname = "tcp_shrink_window", + .data = &init_net.ipv4.sysctl_tcp_shrink_window, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8d20d9221238..e03e08745308 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -599,7 +599,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } EXPORT_SYMBOL(tcp_poll); -int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) +int tcp_ioctl(struct sock *sk, int cmd, int *karg) { struct tcp_sock *tp = tcp_sk(sk); int answ; @@ -641,7 +641,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -ENOIOCTLCMD; } - return put_user(answ, (int __user *)arg); + *karg = answ; + return 0; } EXPORT_SYMBOL(tcp_ioctl); @@ -838,7 +839,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, tss.len -= ret; spliced += ret; - if (!timeo) + if (!tss.len || !timeo) break; release_sock(sk); lock_sock(sk); @@ -858,12 +859,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } EXPORT_SYMBOL(tcp_splice_read); -struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, +struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, bool force_schedule) { struct sk_buff *skb; - skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp); + skb = alloc_skb_fclone(MAX_TCP_HEADER, gfp); if (likely(skb)) { bool mem_scheduled; @@ -922,11 +923,10 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } -/* In some cases, both sendpage() and sendmsg() could have added - * an skb to the write queue, but failed adding payload on it. - * We need to remove it to consume less memory, but more - * importantly be able to generate EPOLLOUT for Edge Trigger epoll() - * users. +/* In some cases, both sendmsg() could have added an skb to the write queue, + * but failed adding payload on it. We need to remove it to consume less + * memory, but more importantly be able to generate EPOLLOUT for Edge Trigger + * epoll() users. */ void tcp_remove_empty_skb(struct sock *sk) { @@ -957,7 +957,7 @@ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb) } -static int tcp_wmem_schedule(struct sock *sk, int copy) +int tcp_wmem_schedule(struct sock *sk, int copy) { int left; @@ -974,191 +974,6 @@ static int tcp_wmem_schedule(struct sock *sk, int copy) return min(copy, sk->sk_forward_alloc); } -static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, - struct page *page, int offset, size_t *size) -{ - struct sk_buff *skb = tcp_write_queue_tail(sk); - struct tcp_sock *tp = tcp_sk(sk); - bool can_coalesce; - int copy, i; - - if (!skb || (copy = size_goal - skb->len) <= 0 || - !tcp_skb_can_collapse_to(skb)) { -new_segment: - if (!sk_stream_memory_free(sk)) - return NULL; - - skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, - tcp_rtx_and_write_queues_empty(sk)); - if (!skb) - return NULL; - -#ifdef CONFIG_TLS_DEVICE - skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); -#endif - tcp_skb_entail(sk, skb); - copy = size_goal; - } - - if (copy > *size) - copy = *size; - - i = skb_shinfo(skb)->nr_frags; - can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) { - tcp_mark_push(tp, skb); - goto new_segment; - } - if (tcp_downgrade_zcopy_pure(sk, skb)) - return NULL; - - copy = tcp_wmem_schedule(sk, copy); - if (!copy) - return NULL; - - if (can_coalesce) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - } else { - get_page(page); - skb_fill_page_desc_noacc(skb, i, page, offset, copy); - } - - if (!(flags & MSG_NO_SHARED_FRAGS)) - skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; - - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; - sk_wmem_queued_add(sk, copy); - sk_mem_charge(sk, copy); - WRITE_ONCE(tp->write_seq, tp->write_seq + copy); - TCP_SKB_CB(skb)->end_seq += copy; - tcp_skb_pcount_set(skb, 0); - - *size = copy; - return skb; -} - -ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct tcp_sock *tp = tcp_sk(sk); - int mss_now, size_goal; - int err; - ssize_t copied; - long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - - if (IS_ENABLED(CONFIG_DEBUG_VM) && - WARN_ONCE(!sendpage_ok(page), - "page must not be a Slab one and have page_count > 0")) - return -EINVAL; - - /* Wait for a connection to finish. One exception is TCP Fast Open - * (passive side) where data is allowed to be sent before a connection - * is fully established. - */ - if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && - !tcp_passive_fastopen(sk)) { - err = sk_stream_wait_connect(sk, &timeo); - if (err != 0) - goto out_err; - } - - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - - mss_now = tcp_send_mss(sk, &size_goal, flags); - copied = 0; - - err = -EPIPE; - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - goto out_err; - - while (size > 0) { - struct sk_buff *skb; - size_t copy = size; - - skb = tcp_build_frag(sk, size_goal, flags, page, offset, ©); - if (!skb) - goto wait_for_space; - - if (!copied) - TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; - - copied += copy; - offset += copy; - size -= copy; - if (!size) - goto out; - - if (skb->len < size_goal || (flags & MSG_OOB)) - continue; - - if (forced_push(tp)) { - tcp_mark_push(tp, skb); - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); - } else if (skb == tcp_send_head(sk)) - tcp_push_one(sk, mss_now); - continue; - -wait_for_space: - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - tcp_push(sk, flags & ~MSG_MORE, mss_now, - TCP_NAGLE_PUSH, size_goal); - - err = sk_stream_wait_memory(sk, &timeo); - if (err != 0) - goto do_error; - - mss_now = tcp_send_mss(sk, &size_goal, flags); - } - -out: - if (copied) { - tcp_tx_timestamp(sk, sk->sk_tsflags); - if (!(flags & MSG_SENDPAGE_NOTLAST)) - tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); - } - return copied; - -do_error: - tcp_remove_empty_skb(sk); - if (copied) - goto out; -out_err: - /* make sure we wake any epoll edge trigger waiter */ - if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { - sk->sk_write_space(sk); - tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); - } - return sk_stream_error(sk, flags, err); -} -EXPORT_SYMBOL_GPL(do_tcp_sendpages); - -int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - if (!(sk->sk_route_caps & NETIF_F_SG)) - return sock_no_sendpage_locked(sk, page, offset, size, flags); - - tcp_rate_check_app_limited(sk); /* is sending application-limited? */ - - return do_tcp_sendpages(sk, page, offset, size, flags); -} -EXPORT_SYMBOL_GPL(tcp_sendpage_locked); - -int tcp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - int ret; - - lock_sock(sk); - ret = tcp_sendpage_locked(sk, page, offset, size, flags); - release_sock(sk); - - return ret; -} -EXPORT_SYMBOL(tcp_sendpage); - void tcp_free_fastopen_req(struct tcp_sock *tp) { if (tp->fastopen_req) { @@ -1223,28 +1038,31 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; int process_backlog = 0; - bool zc = false; + int zc = 0; long timeo; flags = msg->msg_flags; if ((flags & MSG_ZEROCOPY) && size) { - skb = tcp_write_queue_tail(sk); - if (msg->msg_ubuf) { uarg = msg->msg_ubuf; - net_zcopy_get(uarg); - zc = sk->sk_route_caps & NETIF_F_SG; + if (sk->sk_route_caps & NETIF_F_SG) + zc = MSG_ZEROCOPY; } else if (sock_flag(sk, SOCK_ZEROCOPY)) { + skb = tcp_write_queue_tail(sk); uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb)); if (!uarg) { err = -ENOBUFS; goto out_err; } - zc = sk->sk_route_caps & NETIF_F_SG; - if (!zc) + if (sk->sk_route_caps & NETIF_F_SG) + zc = MSG_ZEROCOPY; + else uarg_to_msgzc(uarg)->zerocopy = 0; } + } else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) { + if (sk->sk_route_caps & NETIF_F_SG) + zc = MSG_SPLICE_PAGES; } if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) && @@ -1307,7 +1125,7 @@ restart: goto do_error; while (msg_data_left(msg)) { - int copy = 0; + ssize_t copy = 0; skb = tcp_write_queue_tail(sk); if (skb) @@ -1326,7 +1144,7 @@ new_segment: goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, + skb = tcp_stream_alloc_skb(sk, sk->sk_allocation, first_skb); if (!skb) goto wait_for_space; @@ -1348,7 +1166,7 @@ new_segment: if (copy > msg_data_left(msg)) copy = msg_data_left(msg); - if (!zc) { + if (zc == 0) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@ -1393,7 +1211,7 @@ new_segment: page_ref_inc(pfrag->page); } pfrag->offset += copy; - } else { + } else if (zc == MSG_ZEROCOPY) { /* First append to a fragless skb builds initial * pure zerocopy skb */ @@ -1414,6 +1232,30 @@ new_segment: if (err < 0) goto do_error; copy = err; + } else if (zc == MSG_SPLICE_PAGES) { + /* Splice in data if we can; copy if we can't. */ + if (tcp_downgrade_zcopy_pure(sk, skb)) + goto wait_for_space; + copy = tcp_wmem_schedule(sk, copy); + if (!copy) + goto wait_for_space; + + err = skb_splice_from_iter(skb, &msg->msg_iter, copy, + sk->sk_allocation); + if (err < 0) { + if (err == -EMSGSIZE) { + tcp_mark_push(tp, skb); + goto new_segment; + } + goto do_error; + } + copy = err; + + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; + + sk_wmem_queued_add(sk, copy); + sk_mem_charge(sk, copy); } if (!copied) @@ -1459,7 +1301,9 @@ out: tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } out_nopush: - net_zcopy_put(uarg); + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ + if (uarg && !msg->msg_ubuf) + net_zcopy_put(uarg); return copied + copied_syn; do_error: @@ -1468,7 +1312,9 @@ do_error: if (copied + copied_syn) goto out; out_err: - net_zcopy_put_abort(uarg, true); + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ + if (uarg && !msg->msg_ubuf) + net_zcopy_put_abort(uarg, true); err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { @@ -1491,6 +1337,22 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) } EXPORT_SYMBOL(tcp_sendmsg); +void tcp_splice_eof(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct tcp_sock *tp = tcp_sk(sk); + int mss_now, size_goal; + + if (!tcp_write_queue_tail(sk)) + return; + + lock_sock(sk); + mss_now = tcp_send_mss(sk, &size_goal, 0); + tcp_push(sk, 0, mss_now, tp->nonagle, size_goal); + release_sock(sk); +} +EXPORT_SYMBOL_GPL(tcp_splice_eof); + /* * Handle reading urgent data. BSD has very simple semantics for * this, no blocking and very strange errors 8) @@ -1877,7 +1739,7 @@ void tcp_update_recv_tstamps(struct sk_buff *skb, } #ifdef CONFIG_MMU -static const struct vm_operations_struct tcp_vm_ops = { +const struct vm_operations_struct tcp_vm_ops = { }; int tcp_mmap(struct file *file, struct socket *sock, @@ -2176,6 +2038,34 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk, } } +static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm, + unsigned long address, + bool *mmap_locked) +{ + struct vm_area_struct *vma = NULL; + +#ifdef CONFIG_PER_VMA_LOCK + vma = lock_vma_under_rcu(mm, address); +#endif + if (vma) { + if (!vma_is_tcp(vma)) { + vma_end_read(vma); + return NULL; + } + *mmap_locked = false; + return vma; + } + + mmap_read_lock(mm); + vma = vma_lookup(mm, address); + if (!vma || !vma_is_tcp(vma)) { + mmap_read_unlock(mm); + return NULL; + } + *mmap_locked = true; + return vma; +} + #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32 static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc, @@ -2193,6 +2083,7 @@ static int tcp_zerocopy_receive(struct sock *sk, u32 seq = tp->copied_seq; u32 total_bytes_to_map; int inq = tcp_inq(sk); + bool mmap_locked; int ret; zc->copybuf_len = 0; @@ -2217,13 +2108,10 @@ static int tcp_zerocopy_receive(struct sock *sk, return 0; } - mmap_read_lock(current->mm); - - vma = vma_lookup(current->mm, address); - if (!vma || vma->vm_ops != &tcp_vm_ops) { - mmap_read_unlock(current->mm); + vma = find_tcp_vma(current->mm, address, &mmap_locked); + if (!vma) return -EINVAL; - } + vma_len = min_t(unsigned long, zc->length, vma->vm_end - address); avail_len = min_t(u32, vma_len, inq); total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1); @@ -2297,7 +2185,10 @@ static int tcp_zerocopy_receive(struct sock *sk, zc, total_bytes_to_map); } out: - mmap_read_unlock(current->mm); + if (mmap_locked) + mmap_read_unlock(current->mm); + else + vma_end_read(vma); /* Try to copy straggler data. */ if (!ret) copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss); @@ -4680,8 +4571,10 @@ int tcp_abort(struct sock *sk, int err) return 0; } - /* Don't race with userspace socket closes such as tcp_close. */ - lock_sock(sk); + /* BPF context ensures sock locking. */ + if (!has_current_bpf_ctx()) + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); @@ -4705,7 +4598,8 @@ int tcp_abort(struct sock *sk, int err) bh_unlock_sock(sk); local_bh_enable(); tcp_write_queue_purge(sk); - release_sock(sk); + if (!has_current_bpf_ctx()) + release_sock(sk); return 0; } EXPORT_SYMBOL_GPL(tcp_abort); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 5f93918c063c..81f0dff69e0b 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -88,6 +88,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes, int flags, bool uncharge) { + struct msghdr msghdr = {}; bool apply = apply_bytes; struct scatterlist *sge; struct page *page; @@ -95,6 +96,7 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes, u32 off; while (1) { + struct bio_vec bvec; bool has_tx_ulp; sge = sk_msg_elem(msg, msg->sg.start); @@ -105,17 +107,20 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes, tcp_rate_check_app_limited(sk); retry: + msghdr.msg_flags = flags | MSG_SPLICE_PAGES; has_tx_ulp = tls_sw_has_ctx_tx(sk); - if (has_tx_ulp) { - flags |= MSG_SENDPAGE_NOPOLICY; - ret = kernel_sendpage_locked(sk, - page, off, size, flags); - } else { - ret = do_tcp_sendpages(sk, page, off, size, flags); - } + if (has_tx_ulp) + msghdr.msg_flags |= MSG_SENDPAGE_NOPOLICY; + + if (size < sge->length && msg->sg.start != msg->sg.end) + msghdr.msg_flags |= MSG_MORE; + bvec_set_page(&bvec, page, size, off); + iov_iter_bvec(&msghdr.msg_iter, ITER_SOURCE, &bvec, 1, size); + ret = tcp_sendmsg_locked(sk, &msghdr, size); if (ret <= 0) return ret; + if (apply) apply_bytes -= ret; msg->sg.size -= ret; @@ -481,7 +486,7 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) long timeo; int flags; - /* Don't let internal do_tcp_sendpages() flags through */ + /* Don't let internal flags through */ flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED); flags |= MSG_NO_SHARED_FRAGS; @@ -561,54 +566,6 @@ out_err: return copied ? copied : err; } -static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct sk_msg tmp, *msg = NULL; - int err = 0, copied = 0; - struct sk_psock *psock; - bool enospc = false; - - psock = sk_psock_get(sk); - if (unlikely(!psock)) - return tcp_sendpage(sk, page, offset, size, flags); - - lock_sock(sk); - if (psock->cork) { - msg = psock->cork; - } else { - msg = &tmp; - sk_msg_init(msg); - } - - /* Catch case where ring is full and sendpage is stalled. */ - if (unlikely(sk_msg_full(msg))) - goto out_err; - - sk_msg_page_add(msg, page, size, offset); - sk_mem_charge(sk, size); - copied = size; - if (sk_msg_full(msg)) - enospc = true; - if (psock->cork_bytes) { - if (size > psock->cork_bytes) - psock->cork_bytes = 0; - else - psock->cork_bytes -= size; - if (psock->cork_bytes && !enospc) - goto out_err; - /* All cork bytes are accounted, rerun the prog. */ - psock->eval = __SK_NONE; - psock->cork_bytes = 0; - } - - err = tcp_bpf_send_verdict(sk, psock, msg, &copied, flags); -out_err: - release_sock(sk); - sk_psock_put(sk, psock); - return copied ? copied : err; -} - enum { TCP_BPF_IPV4, TCP_BPF_IPV6, @@ -638,7 +595,6 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; - prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage; prot[TCP_BPF_RX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_RX].recvmsg = tcp_bpf_recvmsg_parser; @@ -673,8 +629,7 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) * indeed valid assumptions. */ return ops->recvmsg == tcp_recvmsg && - ops->sendmsg == tcp_sendmsg && - ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; + ops->sendmsg == tcp_sendmsg ? 0 : -ENOTSUPP; } int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bf8b22218dd4..6f072095211e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2867,7 +2867,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack, } if (tcp_is_reno(tp)) { /* A Reno DUPACK means new data in F-RTO step 2.b above are - * delivered. Lower inflight to clock out (re)tranmissions. + * delivered. Lower inflight to clock out (re)transmissions. */ if (after(tp->snd_nxt, tp->high_seq) && num_dupack) tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 06d2573685ca..fd365de4d5ff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -692,6 +692,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) u64 transmit_time = 0; struct sock *ctl_sk; struct net *net; + u32 txhash = 0; /* Never send a reset in response to a reset. */ if (th->rst) @@ -829,6 +830,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) inet_twsk(sk)->tw_priority : sk->sk_priority; transmit_time = tcp_transmit_time(sk); xfrm_sk_clone_policy(ctl_sk, sk); + txhash = (sk->sk_state == TCP_TIME_WAIT) ? + inet_twsk(sk)->tw_txhash : sk->sk_txhash; } else { ctl_sk->sk_mark = 0; ctl_sk->sk_priority = 0; @@ -837,7 +840,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len, - transmit_time); + transmit_time, txhash); xfrm_sk_free_policy(ctl_sk); sock_net_set(ctl_sk, &init_net); @@ -859,7 +862,7 @@ static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, - int reply_flags, u8 tos) + int reply_flags, u8 tos, u32 txhash) { const struct tcphdr *th = tcp_hdr(skb); struct { @@ -935,7 +938,7 @@ static void tcp_v4_send_ack(const struct sock *sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len, - transmit_time); + transmit_time, txhash); sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); @@ -955,7 +958,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, - tw->tw_tos + tw->tw_tos, + tw->tw_txhash ); inet_twsk_put(tw); @@ -988,7 +992,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 0, tcp_md5_do_lookup(sk, l3index, addr, AF_INET), inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, - ip_hdr(skb)->tos); + ip_hdr(skb)->tos, tcp_rsk(req)->txhash); } /* @@ -2963,7 +2967,6 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) struct bpf_iter_meta meta; struct bpf_prog *prog; struct sock *sk = v; - bool slow; uid_t uid; int ret; @@ -2971,7 +2974,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) return 0; if (sk_fullsock(sk)) - slow = lock_sock_fast(sk); + lock_sock(sk); if (unlikely(sk_unhashed(sk))) { ret = SEQ_SKIP; @@ -2995,7 +2998,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) unlock: if (sk_fullsock(sk)) - unlock_sock_fast(sk, slow); + release_sock(sk); return ret; } @@ -3113,7 +3116,7 @@ struct proto tcp_prot = { .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, - .sendpage = tcp_sendpage, + .splice_eof = tcp_splice_eof, .backlog_rcv = tcp_v4_do_rcv, .release_cb = tcp_release_cb, .hash = inet_hash, @@ -3276,6 +3279,9 @@ static int __net_init tcp_sk_init(struct net *net) else net->ipv4.tcp_congestion_control = &tcp_reno; + net->ipv4.sysctl_tcp_syn_linear_timeouts = 4; + net->ipv4.sysctl_tcp_shrink_window = 0; + return 0; } @@ -3356,7 +3362,7 @@ static struct bpf_iter_reg tcp_reg_info = { .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__tcp, sk_common), - PTR_TO_BTF_ID_OR_NULL }, + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, }, .get_func_proto = bpf_iter_tcp_get_func_proto, .seq_info = &tcp_seq_info, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dac0d62120e6..04fc328727e6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -303,6 +303,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_ts_offset = tp->tsoffset; tcptw->tw_last_oow_ack_time = 0; tcptw->tw_tx_delay = tp->tcp_tx_delay; + tw->tw_txhash = sk->sk_txhash; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -311,7 +312,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK); - tw->tw_txhash = sk->sk_txhash; tw->tw_ipv6only = sk->sk_ipv6only; } #endif diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 4851211aa60d..8311c38267b5 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -9,6 +9,7 @@ #include <linux/indirect_call_wrapper.h> #include <linux/skbuff.h> #include <net/gro.h> +#include <net/gso.h> #include <net/tcp.h> #include <net/protocol.h> @@ -295,7 +296,7 @@ out: return pp; } -int tcp_gro_complete(struct sk_buff *skb) +void tcp_gro_complete(struct sk_buff *skb) { struct tcphdr *th = tcp_hdr(skb); @@ -310,8 +311,6 @@ int tcp_gro_complete(struct sk_buff *skb) if (skb->encapsulation) skb->inner_transport_header = skb->transport_header; - - return 0; } EXPORT_SYMBOL(tcp_gro_complete); @@ -341,7 +340,8 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) if (NAPI_GRO_CB(skb)->is_atomic) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; - return tcp_gro_complete(skb); + tcp_gro_complete(skb); + return 0; } static const struct net_offload tcpv4_offload = { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cfe128b81a01..2cb39b6dad02 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -260,8 +260,8 @@ static u16 tcp_select_window(struct sock *sk) u32 old_win = tp->rcv_wnd; u32 cur_win = tcp_receive_window(tp); u32 new_win = __tcp_select_window(sk); + struct net *net = sock_net(sk); - /* Never shrink the offered window */ if (new_win < cur_win) { /* Danger Will Robinson! * Don't update rcv_wup/rcv_wnd here or else @@ -270,11 +270,14 @@ static u16 tcp_select_window(struct sock *sk) * * Relax Will Robinson. */ - if (new_win == 0) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPWANTZEROWINDOWADV); - new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); + if (!READ_ONCE(net->ipv4.sysctl_tcp_shrink_window) || !tp->rx_opt.rcv_wscale) { + /* Never shrink the offered window */ + if (new_win == 0) + NET_INC_STATS(net, LINUX_MIB_TCPWANTZEROWINDOWADV); + new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); + } } + tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; @@ -282,7 +285,7 @@ static u16 tcp_select_window(struct sock *sk) * scaled window. */ if (!tp->rx_opt.rcv_wscale && - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)) + READ_ONCE(net->ipv4.sysctl_tcp_workaround_signed_windows)) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); @@ -294,10 +297,9 @@ static u16 tcp_select_window(struct sock *sk) if (new_win == 0) { tp->pred_flags = 0; if (old_win) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPTOZEROWINDOWADV); + NET_INC_STATS(net, LINUX_MIB_TCPTOZEROWINDOWADV); } else if (old_win == 0) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV); + NET_INC_STATS(net, LINUX_MIB_TCPFROMZEROWINDOWADV); } return new_win; @@ -1530,7 +1532,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int nsize, old_factor; + int old_factor; long limit; int nlen; u8 flags; @@ -1538,9 +1540,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (WARN_ON(len > skb->len)) return -EINVAL; - nsize = skb_headlen(skb) - len; - if (nsize < 0) - nsize = 0; + DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb)); /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb. * We need some allowance to not penalize applications setting small @@ -1560,7 +1560,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, return -ENOMEM; /* Get a new skb... force flag on. */ - buff = tcp_stream_alloc_skb(sk, nsize, gfp, true); + buff = tcp_stream_alloc_skb(sk, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ skb_copy_decrypted(buff, skb); @@ -1568,7 +1568,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); - nlen = skb->len - len - nsize; + nlen = skb->len - len; buff->truesize += nlen; skb->truesize -= nlen; @@ -1626,13 +1626,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len) struct skb_shared_info *shinfo; int i, k, eat; - eat = min_t(int, len, skb_headlen(skb)); - if (eat) { - __skb_pull(skb, eat); - len -= eat; - if (!len) - return 0; - } + DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb)); eat = len; k = 0; shinfo = skb_shinfo(skb); @@ -1671,12 +1665,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) TCP_SKB_CB(skb)->seq += len; - if (delta_truesize) { - skb->truesize -= delta_truesize; - sk_wmem_queued_add(sk, -delta_truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_uncharge(sk, delta_truesize); - } + skb->truesize -= delta_truesize; + sk_wmem_queued_add(sk, -delta_truesize); + if (!skb_zcopy_pure(skb)) + sk_mem_uncharge(sk, delta_truesize); /* Any change of skb->len requires recalculation of tso factor. */ if (tcp_skb_pcount(skb) > 1) @@ -2126,11 +2118,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, u8 flags; /* All of a TSO frame must be composed of paged data. */ - if (skb->len != skb->data_len) - return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, - skb, len, mss_now, gfp); + DEBUG_NET_WARN_ON_ONCE(skb->len != skb->data_len); - buff = tcp_stream_alloc_skb(sk, 0, gfp, true); + buff = tcp_stream_alloc_skb(sk, gfp, true); if (unlikely(!buff)) return -ENOMEM; skb_copy_decrypted(buff, skb); @@ -2319,6 +2309,57 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) return true; } +static int tcp_clone_payload(struct sock *sk, struct sk_buff *to, + int probe_size) +{ + skb_frag_t *lastfrag = NULL, *fragto = skb_shinfo(to)->frags; + int i, todo, len = 0, nr_frags = 0; + const struct sk_buff *skb; + + if (!sk_wmem_schedule(sk, to->truesize + probe_size)) + return -ENOMEM; + + skb_queue_walk(&sk->sk_write_queue, skb) { + const skb_frag_t *fragfrom = skb_shinfo(skb)->frags; + + if (skb_headlen(skb)) + return -EINVAL; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, fragfrom++) { + if (len >= probe_size) + goto commit; + todo = min_t(int, skb_frag_size(fragfrom), + probe_size - len); + len += todo; + if (lastfrag && + skb_frag_page(fragfrom) == skb_frag_page(lastfrag) && + skb_frag_off(fragfrom) == skb_frag_off(lastfrag) + + skb_frag_size(lastfrag)) { + skb_frag_size_add(lastfrag, todo); + continue; + } + if (unlikely(nr_frags == MAX_SKB_FRAGS)) + return -E2BIG; + skb_frag_page_copy(fragto, fragfrom); + skb_frag_off_copy(fragto, fragfrom); + skb_frag_size_set(fragto, todo); + nr_frags++; + lastfrag = fragto++; + } + } +commit: + WARN_ON_ONCE(len != probe_size); + for (i = 0; i < nr_frags; i++) + skb_frag_ref(to, i); + + skb_shinfo(to)->nr_frags = nr_frags; + to->truesize += probe_size; + to->len += probe_size; + to->data_len += probe_size; + __skb_header_release(to); + return 0; +} + /* Create a new MTU probe if we are ready. * MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing @@ -2395,9 +2436,15 @@ static int tcp_mtu_probe(struct sock *sk) return -1; /* We're allowed to probe. Build it now. */ - nskb = tcp_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); + nskb = tcp_stream_alloc_skb(sk, GFP_ATOMIC, false); if (!nskb) return -1; + + /* build the payload, and be prepared to abort if this fails. */ + if (tcp_clone_payload(sk, nskb, probe_size)) { + consume_skb(nskb); + return -1; + } sk_wmem_queued_add(sk, nskb->truesize); sk_mem_charge(sk, nskb->truesize); @@ -2415,7 +2462,6 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); if (skb->len <= copy) { /* We've eaten all the data from this skb. @@ -2431,12 +2477,8 @@ static int tcp_mtu_probe(struct sock *sk) } else { TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & ~(TCPHDR_FIN|TCPHDR_PSH); - if (!skb_shinfo(skb)->nr_frags) { - skb_pull(skb, copy); - } else { - __pskb_trim_head(skb, copy); - tcp_set_skb_tso_segs(skb, mss_now); - } + __pskb_trim_head(skb, copy); + tcp_set_skb_tso_segs(skb, mss_now); TCP_SKB_CB(skb)->seq += copy; } @@ -2947,6 +2989,7 @@ u32 __tcp_select_window(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); /* MSS for the peer's data. Previous versions used mss_clamp * here. I don't know if the value based on our guesses * of peer's MSS is better for the performance. It's more correct @@ -2968,6 +3011,15 @@ u32 __tcp_select_window(struct sock *sk) if (mss <= 0) return 0; } + + /* Only allow window shrink if the sysctl is enabled and we have + * a non-zero scaling factor in effect. + */ + if (READ_ONCE(net->ipv4.sysctl_tcp_shrink_window) && tp->rx_opt.rcv_wscale) + goto shrink_window_allowed; + + /* do not allow window to shrink */ + if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; @@ -3022,6 +3074,36 @@ u32 __tcp_select_window(struct sock *sk) } return window; + +shrink_window_allowed: + /* new window should always be an exact multiple of scaling factor */ + free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale); + + if (free_space < (full_space >> 1)) { + icsk->icsk_ack.quick = 0; + + if (tcp_under_memory_pressure(sk)) + tcp_adjust_rcv_ssthresh(sk); + + /* if free space is too low, return a zero window */ + if (free_space < (allowed_space >> 4) || free_space < mss || + free_space < (1 << tp->rx_opt.rcv_wscale)) + return 0; + } + + if (free_space > tp->rcv_ssthresh) { + free_space = tp->rcv_ssthresh; + /* new window should always be an exact multiple of scaling factor + * + * For this case, we ALIGN "up" (increase free_space) because + * we know free_space is not zero here, it has been reduced from + * the memory-based limit, and rcv_ssthresh is not a hard limit + * (unlike sk_rcvbuf). + */ + free_space = ALIGN(free_space, (1 << tp->rx_opt.rcv_wscale)); + } + + return free_space; } void tcp_skb_collapse_tstamp(struct sk_buff *skb, @@ -3746,8 +3828,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int space, err = 0; + struct page_frag *pfrag = sk_page_frag(sk); struct sk_buff *syn_data; + int space, err = 0; tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie)) @@ -3766,25 +3849,31 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) space = min_t(size_t, space, fo->size); - /* limit to order-0 allocations */ - space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); - - syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false); + if (space && + !skb_page_frag_refill(min_t(size_t, space, PAGE_SIZE), + pfrag, sk->sk_allocation)) + goto fallback; + syn_data = tcp_stream_alloc_skb(sk, sk->sk_allocation, false); if (!syn_data) goto fallback; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); if (space) { - int copied = copy_from_iter(skb_put(syn_data, space), space, - &fo->data->msg_iter); - if (unlikely(!copied)) { + space = min_t(size_t, space, pfrag->size - pfrag->offset); + space = tcp_wmem_schedule(sk, space); + } + if (space) { + space = copy_page_from_iter(pfrag->page, pfrag->offset, + space, &fo->data->msg_iter); + if (unlikely(!space)) { tcp_skb_tsorted_anchor_cleanup(syn_data); kfree_skb(syn_data); goto fallback; } - if (copied != space) { - skb_trim(syn_data, copied); - space = copied; - } + skb_fill_page_desc(syn_data, 0, pfrag->page, + pfrag->offset, space); + page_ref_inc(pfrag->page); + pfrag->offset += space; + skb_len_add(syn_data, space); skb_zcopy_set(syn_data, fo->uarg, NULL); } /* No more data pending in inet_wait_for_connect() */ @@ -3849,7 +3938,7 @@ int tcp_connect(struct sock *sk) return 0; } - buff = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, true); + buff = tcp_stream_alloc_skb(sk, sk->sk_allocation, true); if (unlikely(!buff)) return -ENOBUFS; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 39eb947fe392..470f581eedd4 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -234,14 +234,19 @@ static int tcp_write_timeout(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool expired = false, do_reset; - int retry_until; + int retry_until, max_retransmits; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) __dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); - expired = icsk->icsk_retransmits >= retry_until; + + max_retransmits = retry_until; + if (sk->sk_state == TCP_SYN_SENT) + max_retransmits += READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts); + + expired = icsk->icsk_retransmits >= max_retransmits; } else { if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { /* Black hole detection */ @@ -587,8 +592,12 @@ out_reset_timer: icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); - } else { - /* Use normal (exponential) backoff */ + } else if (sk->sk_state != TCP_SYN_SENT || + icsk->icsk_backoff > + READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { + /* Use normal (exponential) backoff unless linear timeouts are + * activated. + */ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9482def1f310..42a96b3547c9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -103,6 +103,7 @@ #include <net/ip_tunnels.h> #include <net/route.h> #include <net/checksum.h> +#include <net/gso.h> #include <net/xfrm.h> #include <trace/events/udp.h> #include <linux/static_key.h> @@ -1062,8 +1063,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int free = 0; int connected = 0; __be32 daddr, faddr, saddr; + u8 tos, scope; __be16 dport; - u8 tos; int err, is_udplite = IS_UDPLITE(sk); int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); @@ -1183,12 +1184,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) connected = 0; } tos = get_rttos(&ipc, inet); - if (sock_flag(sk, SOCK_LOCALROUTE) || - (msg->msg_flags & MSG_DONTROUTE) || - (ipc.opt && ipc.opt->opt.is_strictroute)) { - tos |= RTO_ONLINK; + scope = ip_sendmsg_scope(inet, &ipc, msg); + if (scope == RT_SCOPE_LINK) connected = 0; - } if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) @@ -1221,11 +1219,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4 = &fl4_stack; - flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, - RT_SCOPE_UNIVERSE, sk->sk_protocol, - flow_flags, - faddr, saddr, dport, inet->inet_sport, - sk->sk_uid); + flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, scope, + sk->sk_protocol, flow_flags, faddr, saddr, + dport, inet->inet_sport, sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); @@ -1329,58 +1325,20 @@ do_confirm: } EXPORT_SYMBOL(udp_sendmsg); -int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +void udp_splice_eof(struct socket *sock) { - struct inet_sock *inet = inet_sk(sk); + struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - int ret; - - if (flags & MSG_SENDPAGE_NOTLAST) - flags |= MSG_MORE; - if (!up->pending) { - struct msghdr msg = { .msg_flags = flags|MSG_MORE }; - - /* Call udp_sendmsg to specify destination address which - * sendpage interface can't pass. - * This will succeed only when the socket is connected. - */ - ret = udp_sendmsg(sk, &msg, 0); - if (ret < 0) - return ret; - } + if (!up->pending || READ_ONCE(up->corkflag)) + return; lock_sock(sk); - - if (unlikely(!up->pending)) { - release_sock(sk); - - net_dbg_ratelimited("cork failed\n"); - return -EINVAL; - } - - ret = ip_append_page(sk, &inet->cork.fl.u.ip4, - page, offset, size, flags); - if (ret == -EOPNOTSUPP) { - release_sock(sk); - return sock_no_sendpage(sk->sk_socket, page, offset, - size, flags); - } - if (ret < 0) { - udp_flush_pending_frames(sk); - goto out; - } - - up->len += size; - if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE))) - ret = udp_push_pending_frames(sk); - if (!ret) - ret = size; -out: + if (up->pending && !READ_ONCE(up->corkflag)) + udp_push_pending_frames(sk); release_sock(sk); - return ret; } +EXPORT_SYMBOL_GPL(udp_splice_eof); #define UDP_SKB_IS_STATELESS 0x80000000 @@ -1720,21 +1678,19 @@ static int first_packet_length(struct sock *sk) * IOCTL requests applicable to the UDP protocol */ -int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) +int udp_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { - int amount = sk_wmem_alloc_get(sk); - - return put_user(amount, (int __user *)arg); + *karg = sk_wmem_alloc_get(sk); + return 0; } case SIOCINQ: { - int amount = max_t(int, 0, first_packet_length(sk)); - - return put_user(amount, (int __user *)arg); + *karg = max_t(int, 0, first_packet_length(sk)); + return 0; } default: @@ -2927,7 +2883,8 @@ EXPORT_SYMBOL(udp_poll); int udp_abort(struct sock *sk, int err) { - lock_sock(sk); + if (!has_current_bpf_ctx()) + lock_sock(sk); /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing * with close() @@ -2940,7 +2897,8 @@ int udp_abort(struct sock *sk, int err) __udp_disconnect(sk, 0); out: - release_sock(sk); + if (!has_current_bpf_ctx()) + release_sock(sk); return 0; } @@ -2960,7 +2918,7 @@ struct proto udp_prot = { .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, + .splice_eof = udp_splice_eof, .release_cb = ip4_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, @@ -2985,9 +2943,30 @@ EXPORT_SYMBOL(udp_prot); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -static struct udp_table *udp_get_table_afinfo(struct udp_seq_afinfo *afinfo, - struct net *net) +static unsigned short seq_file_family(const struct seq_file *seq); +static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) { + unsigned short family = seq_file_family(seq); + + /* AF_UNSPEC is used as a match all */ + return ((family == AF_UNSPEC || family == sk->sk_family) && + net_eq(sock_net(sk), seq_file_net(seq))); +} + +#ifdef CONFIG_BPF_SYSCALL +static const struct seq_operations bpf_iter_udp_seq_ops; +#endif +static struct udp_table *udp_get_table_seq(struct seq_file *seq, + struct net *net) +{ + const struct udp_seq_afinfo *afinfo; + +#ifdef CONFIG_BPF_SYSCALL + if (seq->op == &bpf_iter_udp_seq_ops) + return net->ipv4.udp_table; +#endif + + afinfo = pde_data(file_inode(seq->file)); return afinfo->udp_table ? : net->ipv4.udp_table; } @@ -2995,16 +2974,10 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) { struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - struct udp_seq_afinfo *afinfo; struct udp_table *udptable; struct sock *sk; - if (state->bpf_seq_afinfo) - afinfo = state->bpf_seq_afinfo; - else - afinfo = pde_data(file_inode(seq->file)); - - udptable = udp_get_table_afinfo(afinfo, net); + udptable = udp_get_table_seq(seq, net); for (state->bucket = start; state->bucket <= udptable->mask; ++state->bucket) { @@ -3015,10 +2988,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) spin_lock_bh(&hslot->lock); sk_for_each(sk, &hslot->head) { - if (!net_eq(sock_net(sk), net)) - continue; - if (afinfo->family == AF_UNSPEC || - sk->sk_family == afinfo->family) + if (seq_sk_match(seq, sk)) goto found; } spin_unlock_bh(&hslot->lock); @@ -3032,22 +3002,14 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) { struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - struct udp_seq_afinfo *afinfo; struct udp_table *udptable; - if (state->bpf_seq_afinfo) - afinfo = state->bpf_seq_afinfo; - else - afinfo = pde_data(file_inode(seq->file)); - do { sk = sk_next(sk); - } while (sk && (!net_eq(sock_net(sk), net) || - (afinfo->family != AF_UNSPEC && - sk->sk_family != afinfo->family))); + } while (sk && !seq_sk_match(seq, sk)); if (!sk) { - udptable = udp_get_table_afinfo(afinfo, net); + udptable = udp_get_table_seq(seq, net); if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); @@ -3093,15 +3055,9 @@ EXPORT_SYMBOL(udp_seq_next); void udp_seq_stop(struct seq_file *seq, void *v) { struct udp_iter_state *state = seq->private; - struct udp_seq_afinfo *afinfo; struct udp_table *udptable; - if (state->bpf_seq_afinfo) - afinfo = state->bpf_seq_afinfo; - else - afinfo = pde_data(file_inode(seq->file)); - - udptable = udp_get_table_afinfo(afinfo, seq_file_net(seq)); + udptable = udp_get_table_seq(seq, seq_file_net(seq)); if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); @@ -3154,6 +3110,143 @@ struct bpf_iter__udp { int bucket __aligned(8); }; +struct bpf_udp_iter_state { + struct udp_iter_state state; + unsigned int cur_sk; + unsigned int end_sk; + unsigned int max_sk; + int offset; + struct sock **batch; + bool st_bucket_done; +}; + +static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, + unsigned int new_batch_sz); +static struct sock *bpf_iter_udp_batch(struct seq_file *seq) +{ + struct bpf_udp_iter_state *iter = seq->private; + struct udp_iter_state *state = &iter->state; + struct net *net = seq_file_net(seq); + struct udp_table *udptable; + unsigned int batch_sks = 0; + bool resized = false; + struct sock *sk; + + /* The current batch is done, so advance the bucket. */ + if (iter->st_bucket_done) { + state->bucket++; + iter->offset = 0; + } + + udptable = udp_get_table_seq(seq, net); + +again: + /* New batch for the next bucket. + * Iterate over the hash table to find a bucket with sockets matching + * the iterator attributes, and return the first matching socket from + * the bucket. The remaining matched sockets from the bucket are batched + * before releasing the bucket lock. This allows BPF programs that are + * called in seq_show to acquire the bucket lock if needed. + */ + iter->cur_sk = 0; + iter->end_sk = 0; + iter->st_bucket_done = false; + batch_sks = 0; + + for (; state->bucket <= udptable->mask; state->bucket++) { + struct udp_hslot *hslot2 = &udptable->hash2[state->bucket]; + + if (hlist_empty(&hslot2->head)) { + iter->offset = 0; + continue; + } + + spin_lock_bh(&hslot2->lock); + udp_portaddr_for_each_entry(sk, &hslot2->head) { + if (seq_sk_match(seq, sk)) { + /* Resume from the last iterated socket at the + * offset in the bucket before iterator was stopped. + */ + if (iter->offset) { + --iter->offset; + continue; + } + if (iter->end_sk < iter->max_sk) { + sock_hold(sk); + iter->batch[iter->end_sk++] = sk; + } + batch_sks++; + } + } + spin_unlock_bh(&hslot2->lock); + + if (iter->end_sk) + break; + + /* Reset the current bucket's offset before moving to the next bucket. */ + iter->offset = 0; + } + + /* All done: no batch made. */ + if (!iter->end_sk) + return NULL; + + if (iter->end_sk == batch_sks) { + /* Batching is done for the current bucket; return the first + * socket to be iterated from the batch. + */ + iter->st_bucket_done = true; + goto done; + } + if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) { + resized = true; + /* After allocating a larger batch, retry one more time to grab + * the whole bucket. + */ + state->bucket--; + goto again; + } +done: + return iter->batch[0]; +} + +static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_udp_iter_state *iter = seq->private; + struct sock *sk; + + /* Whenever seq_next() is called, the iter->cur_sk is + * done with seq_show(), so unref the iter->cur_sk. + */ + if (iter->cur_sk < iter->end_sk) { + sock_put(iter->batch[iter->cur_sk++]); + ++iter->offset; + } + + /* After updating iter->cur_sk, check if there are more sockets + * available in the current bucket batch. + */ + if (iter->cur_sk < iter->end_sk) + sk = iter->batch[iter->cur_sk]; + else + /* Prepare a new batch. */ + sk = bpf_iter_udp_batch(seq); + + ++*pos; + return sk; +} + +static void *bpf_iter_udp_seq_start(struct seq_file *seq, loff_t *pos) +{ + /* bpf iter does not support lseek, so it always + * continue from where it was stop()-ped. + */ + if (*pos) + return bpf_iter_udp_batch(seq); + + return SEQ_START_TOKEN; +} + static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) { @@ -3174,18 +3267,37 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v) struct bpf_prog *prog; struct sock *sk = v; uid_t uid; + int ret; if (v == SEQ_START_TOKEN) return 0; + lock_sock(sk); + + if (unlikely(sk_unhashed(sk))) { + ret = SEQ_SKIP; + goto unlock; + } + uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); meta.seq = seq; prog = bpf_iter_get_info(&meta, false); - return udp_prog_seq_show(prog, &meta, v, uid, state->bucket); + ret = udp_prog_seq_show(prog, &meta, v, uid, state->bucket); + +unlock: + release_sock(sk); + return ret; +} + +static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter) +{ + while (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); } static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) { + struct bpf_udp_iter_state *iter = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; @@ -3196,17 +3308,35 @@ static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) (void)udp_prog_seq_show(prog, &meta, v, 0, 0); } - udp_seq_stop(seq, v); + if (iter->cur_sk < iter->end_sk) { + bpf_iter_udp_put_batch(iter); + iter->st_bucket_done = false; + } } static const struct seq_operations bpf_iter_udp_seq_ops = { - .start = udp_seq_start, - .next = udp_seq_next, + .start = bpf_iter_udp_seq_start, + .next = bpf_iter_udp_seq_next, .stop = bpf_iter_udp_seq_stop, .show = bpf_iter_udp_seq_show, }; #endif +static unsigned short seq_file_family(const struct seq_file *seq) +{ + const struct udp_seq_afinfo *afinfo; + +#ifdef CONFIG_BPF_SYSCALL + /* BPF iterator: bpf programs to filter sockets. */ + if (seq->op == &bpf_iter_udp_seq_ops) + return AF_UNSPEC; +#endif + + /* Proc fs iterator */ + afinfo = pde_data(file_inode(seq->file)); + return afinfo->family; +} + const struct seq_operations udp_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, @@ -3415,38 +3545,55 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = { DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) -static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) +static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, + unsigned int new_batch_sz) { - struct udp_iter_state *st = priv_data; - struct udp_seq_afinfo *afinfo; - int ret; + struct sock **new_batch; - afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); - if (!afinfo) + new_batch = kvmalloc_array(new_batch_sz, sizeof(*new_batch), + GFP_USER | __GFP_NOWARN); + if (!new_batch) return -ENOMEM; - afinfo->family = AF_UNSPEC; - afinfo->udp_table = NULL; - st->bpf_seq_afinfo = afinfo; + bpf_iter_udp_put_batch(iter); + kvfree(iter->batch); + iter->batch = new_batch; + iter->max_sk = new_batch_sz; + + return 0; +} + +#define INIT_BATCH_SZ 16 + +static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) +{ + struct bpf_udp_iter_state *iter = priv_data; + int ret; + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) - kfree(afinfo); + return ret; + + ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ); + if (ret) + bpf_iter_fini_seq_net(priv_data); + return ret; } static void bpf_iter_fini_udp(void *priv_data) { - struct udp_iter_state *st = priv_data; + struct bpf_udp_iter_state *iter = priv_data; - kfree(st->bpf_seq_afinfo); bpf_iter_fini_seq_net(priv_data); + kvfree(iter->batch); } static const struct bpf_iter_seq_info udp_seq_info = { .seq_ops = &bpf_iter_udp_seq_ops, .init_seq_private = bpf_iter_init_udp, .fini_seq_private = bpf_iter_fini_udp, - .seq_priv_size = sizeof(struct udp_iter_state), + .seq_priv_size = sizeof(struct bpf_udp_iter_state), }; static struct bpf_iter_reg udp_reg_info = { @@ -3454,7 +3601,7 @@ static struct bpf_iter_reg udp_reg_info = { .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__udp, udp_sk), - PTR_TO_BTF_ID_OR_NULL }, + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, }, .seq_info = &udp_seq_info, }; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 4ba7a88a1b1d..e1ff3a375996 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -19,8 +19,6 @@ int udp_getsockopt(struct sock *sk, int level, int optname, int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); -int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, - int flags); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 1f01e15ca24f..75aa4de5b731 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -8,6 +8,7 @@ #include <linux/skbuff.h> #include <net/gro.h> +#include <net/gso.h> #include <net/udp.h> #include <net/protocol.h> #include <net/inet_common.h> diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 143f93a12f25..39ecdad1b50c 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -56,7 +56,6 @@ struct proto udplite_prot = { .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index ad2afeef4f10..eac206a290d0 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -164,6 +164,7 @@ drop: kfree_skb(skb); return 0; } +EXPORT_SYMBOL(xfrm4_udp_encap_rcv); int xfrm4_rcv(struct sk_buff *skb) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3797917237d0..5479da08ef40 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3633,8 +3633,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, idev->if_flags |= IF_READY; } - pr_info("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n", - dev->name); + pr_debug("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n", + dev->name); run_pending = 1; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2bbf13216a3d..5d593ddc0347 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -579,7 +579,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) prot = READ_ONCE(sk->sk_prot); if (!prot->ioctl) return -ENOIOCTLCMD; - return prot->ioctl(sk, cmd, arg); + return sk_ioctl(sk, cmd, (void __user *)arg); } /*NOTREACHED*/ return 0; @@ -695,9 +695,8 @@ const struct proto_ops inet6_stream_ops = { #ifdef CONFIG_MMU .mmap = tcp_mmap, #endif - .sendpage = inet_sendpage, + .splice_eof = inet_splice_eof, .sendmsg_locked = tcp_sendmsg_locked, - .sendpage_locked = tcp_sendpage_locked, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, .read_skb = tcp_read_skb, @@ -728,7 +727,6 @@ const struct proto_ops inet6_dgram_ops = { .recvmsg = inet6_recvmsg, /* retpoline's sake */ .read_skb = udp_read_skb, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 75c02992c520..a189e08370a5 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -17,6 +17,7 @@ #include <linux/err.h> #include <linux/module.h> #include <net/gro.h> +#include <net/gso.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/esp.h> @@ -374,6 +375,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features secpath_reset(skb); + if (skb_needs_linearize(skb, skb->dev->features) && + __skb_linearize(skb)) + return -ENOMEM; return 0; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5fa0e37305d9..202fc3aaa83c 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -126,9 +126,6 @@ static bool ip6_parse_tlv(bool hopbyhop, max_count = -max_count; } - if (skb_transport_offset(skb) + len > skb_headlen(skb)) - goto bad; - off += 2; len -= 2; @@ -402,11 +399,7 @@ looped_back: skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); - - if (!pskb_pull(skb, offset)) { - kfree_skb(skb); - return -1; - } + skb_pull(skb, offset); skb_postpull_rcsum(skb, skb_transport_header(skb), offset); @@ -444,9 +437,9 @@ looped_back: kfree_skb(skb); return -1; } - } - hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + } hdr->segments_left--; addr = hdr->segments + hdr->segments_left; @@ -458,8 +451,6 @@ looped_back: ipv6_hdr(skb)->daddr = *addr; - skb_dst_drop(skb); - ip6_route_input(skb); if (skb_dst(skb)->error) { @@ -519,11 +510,7 @@ looped_back: skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); - - if (!pskb_pull(skb, offset)) { - kfree_skb(skb); - return -1; - } + skb_pull(skb, offset); skb_postpull_rcsum(skb, skb_transport_header(skb), offset); @@ -545,11 +532,6 @@ looped_back: return 1; } - if (!pskb_may_pull(skb, sizeof(*hdr))) { - kfree_skb(skb); - return -1; - } - n = (hdr->hdrlen << 3) - hdr->pad - (16 - hdr->cmpre); r = do_div(n, (16 - hdr->cmpri)); /* checks if calculation was without remainder and n fits into @@ -569,12 +551,6 @@ looped_back: return -1; } - if (!pskb_may_pull(skb, ipv6_rpl_srh_size(n, hdr->cmpri, - hdr->cmpre))) { - kfree_skb(skb); - return -1; - } - hdr->segments_left--; i = n - hdr->segments_left; @@ -588,8 +564,7 @@ looped_back: ipv6_rpl_srh_decompress(ohdr, hdr, &ipv6_hdr(skb)->daddr, n); chdr = (struct ipv6_rpl_sr_hdr *)(buf + ((ohdr->hdrlen + 1) << 3)); - if ((ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST) || - (ipv6_addr_type(&ohdr->rpl_segaddr[i]) & IPV6_ADDR_MULTICAST)) { + if (ipv6_addr_is_multicast(&ohdr->rpl_segaddr[i])) { kfree_skb(skb); kfree(buf); return -1; @@ -827,7 +802,6 @@ looped_back: *addr = ipv6_hdr(skb)->daddr; ipv6_hdr(skb)->daddr = daddr; - skb_dst_drop(skb); ip6_route_input(skb); if (skb_dst(skb)->error) { skb_push(skb, skb->data - skb_network_header(skb)); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 00dc2e3b0184..d6314287338d 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -16,6 +16,7 @@ #include <net/tcp.h> #include <net/udp.h> #include <net/gro.h> +#include <net/gso.h> #include "ip6_offload.h" diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9554cf46ed88..1e8c90e97608 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -42,6 +42,7 @@ #include <net/sock.h> #include <net/snmp.h> +#include <net/gso.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> @@ -1589,6 +1590,15 @@ emsgsize: skb_zcopy_set(skb, uarg, &extra_uref); } } + } else if ((flags & MSG_SPLICE_PAGES) && length) { + if (inet_sk(sk)->hdrincl) + return -EPERM; + if (rt->dst.dev->features & NETIF_F_SG && + getfrag == ip_generic_getfrag) + /* We need an empty buffer to attach stuff to */ + paged = true; + else + flags &= ~MSG_SPLICE_PAGES; } /* @@ -1778,6 +1788,15 @@ alloc_new_skb: err = -EFAULT; goto error; } + } else if (flags & MSG_SPLICE_PAGES) { + struct msghdr *msg = from; + + err = skb_splice_from_iter(skb, &msg->msg_iter, copy, + sk->sk_allocation); + if (err < 0) + goto error; + copy = err; + wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 51cf37abd142..cc3d5ad17257 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1879,11 +1879,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, /* * The IP multicast ioctl support routines. */ - -int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) +int ip6mr_ioctl(struct sock *sk, int cmd, void *arg) { - struct sioc_sg_req6 sr; - struct sioc_mif_req6 vr; + struct sioc_sg_req6 *sr; + struct sioc_mif_req6 *vr; struct vif_device *vif; struct mfc6_cache *c; struct net *net = sock_net(sk); @@ -1895,40 +1894,33 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) switch (cmd) { case SIOCGETMIFCNT_IN6: - if (copy_from_user(&vr, arg, sizeof(vr))) - return -EFAULT; - if (vr.mifi >= mrt->maxvif) + vr = (struct sioc_mif_req6 *)arg; + if (vr->mifi >= mrt->maxvif) return -EINVAL; - vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); + vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif); rcu_read_lock(); - vif = &mrt->vif_table[vr.mifi]; - if (VIF_EXISTS(mrt, vr.mifi)) { - vr.icount = READ_ONCE(vif->pkt_in); - vr.ocount = READ_ONCE(vif->pkt_out); - vr.ibytes = READ_ONCE(vif->bytes_in); - vr.obytes = READ_ONCE(vif->bytes_out); + vif = &mrt->vif_table[vr->mifi]; + if (VIF_EXISTS(mrt, vr->mifi)) { + vr->icount = READ_ONCE(vif->pkt_in); + vr->ocount = READ_ONCE(vif->pkt_out); + vr->ibytes = READ_ONCE(vif->bytes_in); + vr->obytes = READ_ONCE(vif->bytes_out); rcu_read_unlock(); - - if (copy_to_user(arg, &vr, sizeof(vr))) - return -EFAULT; return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT_IN6: - if (copy_from_user(&sr, arg, sizeof(sr))) - return -EFAULT; + sr = (struct sioc_sg_req6 *)arg; rcu_read_lock(); - c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); + c = ip6mr_cache_find(mrt, &sr->src.sin6_addr, + &sr->grp.sin6_addr); if (c) { - sr.pktcnt = c->_c.mfc_un.res.pkt; - sr.bytecnt = c->_c.mfc_un.res.bytes; - sr.wrong_if = c->_c.mfc_un.res.wrong_if; + sr->pktcnt = c->_c.mfc_un.res.pkt; + sr->bytecnt = c->_c.mfc_un.res.bytes; + sr->wrong_if = c->_c.mfc_un.res.wrong_if; rcu_read_unlock(); - - if (copy_to_user(arg, &sr, sizeof(sr))) - return -EFAULT; return 0; } rcu_read_unlock(); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 44ee7a2e72ac..ac1cef094c5f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1118,29 +1118,29 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, return do_rawv6_getsockopt(sk, level, optname, optval, optlen); } -static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int rawv6_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { - int amount = sk_wmem_alloc_get(sk); - - return put_user(amount, (int __user *)arg); + *karg = sk_wmem_alloc_get(sk); + return 0; } case SIOCINQ: { struct sk_buff *skb; - int amount = 0; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) - amount = skb->len; + *karg = skb->len; + else + *karg = 0; spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); + return 0; } default: #ifdef CONFIG_IPV6_MROUTE - return ip6mr_ioctl(sk, cmd, (void __user *)arg); + return ip6mr_ioctl(sk, cmd, karg); #else return -ENOIOCTLCMD; #endif @@ -1296,7 +1296,6 @@ const struct proto_ops inet6_sockraw_ops = { .sendmsg = inet_sendmsg, /* ok */ .recvmsg = sock_common_recvmsg, /* ok */ .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 392aaa373b66..64e873f5895f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3360,6 +3360,7 @@ static int ip6_route_check_nh_onlink(struct net *net, static int ip6_route_check_nh(struct net *net, struct fib6_config *cfg, struct net_device **_dev, + netdevice_tracker *dev_tracker, struct inet6_dev **idev) { const struct in6_addr *gw_addr = &cfg->fc_gateway; @@ -3404,7 +3405,7 @@ static int ip6_route_check_nh(struct net *net, err = -EHOSTUNREACH; } else { *_dev = dev = res.nh->fib_nh_dev; - dev_hold(dev); + netdev_hold(dev, dev_tracker, GFP_ATOMIC); *idev = in6_dev_get(dev); } @@ -3412,7 +3413,9 @@ static int ip6_route_check_nh(struct net *net, } static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, - struct net_device **_dev, struct inet6_dev **idev, + struct net_device **_dev, + netdevice_tracker *dev_tracker, + struct inet6_dev **idev, struct netlink_ext_ack *extack) { const struct in6_addr *gw_addr = &cfg->fc_gateway; @@ -3453,7 +3456,8 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, if (cfg->fc_flags & RTNH_F_ONLINK) err = ip6_route_check_nh_onlink(net, cfg, dev, extack); else - err = ip6_route_check_nh(net, cfg, _dev, idev); + err = ip6_route_check_nh(net, cfg, _dev, dev_tracker, + idev); rcu_read_unlock(); @@ -3503,6 +3507,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { + netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; int addr_type; @@ -3520,7 +3525,8 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, err = -ENODEV; if (cfg->fc_ifindex) { - dev = dev_get_by_index(net, cfg->fc_ifindex); + dev = netdev_get_by_index(net, cfg->fc_ifindex, + dev_tracker, gfp_flags); if (!dev) goto out; idev = in6_dev_get(dev); @@ -3554,11 +3560,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, /* hold loopback dev/idev if we haven't done so. */ if (dev != net->loopback_dev) { if (dev) { - dev_put(dev); + netdev_put(dev, dev_tracker); in6_dev_put(idev); } dev = net->loopback_dev; - dev_hold(dev); + netdev_hold(dev, dev_tracker, gfp_flags); idev = in6_dev_get(dev); if (!idev) { err = -ENODEV; @@ -3569,7 +3575,8 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, } if (cfg->fc_flags & RTF_GATEWAY) { - err = ip6_validate_gw(net, cfg, &dev, &idev, extack); + err = ip6_validate_gw(net, cfg, &dev, dev_tracker, + &idev, extack); if (err) goto out; @@ -3610,8 +3617,6 @@ pcpu_alloc: } fib6_nh->fib_nh_dev = dev; - netdev_tracker_alloc(dev, &fib6_nh->fib_nh_dev_tracker, gfp_flags); - fib6_nh->fib_nh_oif = dev->ifindex; err = 0; out: @@ -3621,7 +3626,7 @@ out: if (err) { lwtstate_put(fib6_nh->fib_nh_lws); fib6_nh->fib_nh_lws = NULL; - dev_put(dev); + netdev_put(dev, dev_tracker); } return err; diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c index d1876f192225..e186998bfbf7 100644 --- a/net/ipv6/rpl.c +++ b/net/ipv6/rpl.c @@ -29,13 +29,6 @@ static void *ipv6_rpl_segdata_pos(const struct ipv6_rpl_sr_hdr *hdr, int i) return (void *)&hdr->rpl_segdata[i * IPV6_PFXTAIL_LEN(hdr->cmpri)]; } -size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri, - unsigned char cmpre) -{ - return sizeof(struct ipv6_rpl_sr_hdr) + (n * IPV6_PFXTAIL_LEN(cmpri)) + - IPV6_PFXTAIL_LEN(cmpre); -} - void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr, const struct ipv6_rpl_sr_hdr *inhdr, const struct in6_addr *daddr, unsigned char n) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 34db881204d2..03b877ff4558 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -470,8 +470,6 @@ static int seg6_input_core(struct net *net, struct sock *sk, dst = dst_cache_get(&slwt->cache); preempt_enable(); - skb_dst_drop(skb); - if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); @@ -482,6 +480,7 @@ static int seg6_input_core(struct net *net, struct sock *sk, preempt_enable(); } } else { + skb_dst_drop(skb); skb_dst_set(skb, dst); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7132eb213a7a..40dd92a2f480 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -93,12 +93,8 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, * This avoids a dereference and allow compiler optimizations. * It is a specialized version of inet6_sk_generic(). */ -static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) -{ - unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); - - return (struct ipv6_pinfo *)(((u8 *)sk) + offset); -} +#define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ + struct tcp6_sock, tcp)->inet6) static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { @@ -533,7 +529,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct sk_buff *syn_skb) { struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *np = tcp_inet6_sk(sk); + const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; @@ -2154,7 +2150,7 @@ struct proto tcpv6_prot = { .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, - .sendpage = tcp_sendpage, + .splice_eof = tcp_splice_eof, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, .hash = inet6_hash, diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 39db5a226855..bf0c957e4b5e 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -36,7 +36,8 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) &iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; - return tcp_gro_complete(skb); + tcp_gro_complete(skb); + return 0; } static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e5a337e6b970..317b01c9bc39 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1653,6 +1653,20 @@ do_confirm: } EXPORT_SYMBOL(udpv6_sendmsg); +static void udpv6_splice_eof(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct udp_sock *up = udp_sk(sk); + + if (!up->pending || READ_ONCE(up->corkflag)) + return; + + lock_sock(sk); + if (up->pending && !READ_ONCE(up->corkflag)) + udp_v6_push_pending_frames(sk); + release_sock(sk); +} + void udpv6_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); @@ -1764,6 +1778,7 @@ struct proto udpv6_prot = { .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, + .splice_eof = udpv6_splice_eof, .release_cb = ip6_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index c39c1e32f980..ad3b8726873e 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -14,6 +14,7 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" #include <net/gro.h> +#include <net/gso.h> static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, netdev_features_t features) diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 04cbeefd8982..4907ab241d6b 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -86,6 +86,9 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __be32 *udpdata32; __u16 encap_type = up->encap_type; + if (skb->protocol == htons(ETH_P_IP)) + return xfrm4_udp_encap_rcv(sk, skb); + /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index cfe828bd7fc6..393f01b2a7e6 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -581,12 +581,10 @@ static void kcm_report_tx_retry(struct kcm_sock *kcm) */ static int kcm_write_msgs(struct kcm_sock *kcm) { + unsigned int total_sent = 0; struct sock *sk = &kcm->sk; struct kcm_psock *psock; - struct sk_buff *skb, *head; - struct kcm_tx_msg *txm; - unsigned short fragidx, frag_offset; - unsigned int sent, total_sent = 0; + struct sk_buff *head; int ret = 0; kcm->tx_wait_more = false; @@ -600,72 +598,57 @@ static int kcm_write_msgs(struct kcm_sock *kcm) if (skb_queue_empty(&sk->sk_write_queue)) return 0; - kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0; - - } else if (skb_queue_empty(&sk->sk_write_queue)) { - return 0; + kcm_tx_msg(skb_peek(&sk->sk_write_queue))->started_tx = false; } - head = skb_peek(&sk->sk_write_queue); - txm = kcm_tx_msg(head); +retry: + while ((head = skb_peek(&sk->sk_write_queue))) { + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, + }; + struct kcm_tx_msg *txm = kcm_tx_msg(head); + struct sk_buff *skb; + unsigned int msize; + int i; - if (txm->sent) { - /* Send of first skbuff in queue already in progress */ - if (WARN_ON(!psock)) { - ret = -EINVAL; - goto out; + if (!txm->started_tx) { + psock = reserve_psock(kcm); + if (!psock) + goto out; + skb = head; + txm->frag_offset = 0; + txm->sent = 0; + txm->started_tx = true; + } else { + if (WARN_ON(!psock)) { + ret = -EINVAL; + goto out; + } + skb = txm->frag_skb; } - sent = txm->sent; - frag_offset = txm->frag_offset; - fragidx = txm->fragidx; - skb = txm->frag_skb; - goto do_frag; - } - -try_again: - psock = reserve_psock(kcm); - if (!psock) - goto out; - - do { - skb = head; - txm = kcm_tx_msg(head); - sent = 0; - -do_frag_list: if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { ret = -EINVAL; goto out; } - for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; - fragidx++) { - skb_frag_t *frag; + msize = 0; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + msize += skb_shinfo(skb)->frags[i].bv_len; - frag_offset = 0; -do_frag: - frag = &skb_shinfo(skb)->frags[fragidx]; - if (WARN_ON(!skb_frag_size(frag))) { - ret = -EINVAL; - goto out; - } + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, + skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags, + msize); + iov_iter_advance(&msg.msg_iter, txm->frag_offset); - ret = kernel_sendpage(psock->sk->sk_socket, - skb_frag_page(frag), - skb_frag_off(frag) + frag_offset, - skb_frag_size(frag) - frag_offset, - MSG_DONTWAIT); + do { + ret = sock_sendmsg(psock->sk->sk_socket, &msg); if (ret <= 0) { if (ret == -EAGAIN) { /* Save state to try again when there's * write space on the socket */ - txm->sent = sent; - txm->frag_offset = frag_offset; - txm->fragidx = fragidx; txm->frag_skb = skb; - ret = 0; goto out; } @@ -678,45 +661,44 @@ do_frag: kcm_abort_tx_psock(psock, ret ? -ret : EPIPE, true); unreserve_psock(kcm); + psock = NULL; - txm->sent = 0; + txm->started_tx = false; kcm_report_tx_retry(kcm); ret = 0; - - goto try_again; + goto retry; } - sent += ret; - frag_offset += ret; + txm->sent += ret; + txm->frag_offset += ret; KCM_STATS_ADD(psock->stats.tx_bytes, ret); - if (frag_offset < skb_frag_size(frag)) { - /* Not finished with this frag */ - goto do_frag; - } - } + } while (msg.msg_iter.count > 0); if (skb == head) { if (skb_has_frag_list(skb)) { - skb = skb_shinfo(skb)->frag_list; - goto do_frag_list; + txm->frag_skb = skb_shinfo(skb)->frag_list; + txm->frag_offset = 0; + continue; } } else if (skb->next) { - skb = skb->next; - goto do_frag_list; + txm->frag_skb = skb->next; + txm->frag_offset = 0; + continue; } /* Successfully sent the whole packet, account for it. */ + sk->sk_wmem_queued -= txm->sent; + total_sent += txm->sent; skb_dequeue(&sk->sk_write_queue); kfree_skb(head); - sk->sk_wmem_queued -= sent; - total_sent += sent; KCM_STATS_INCR(psock->stats.tx_msgs); - } while ((head = skb_peek(&sk->sk_write_queue))); + } out: if (!head) { /* Done with all queued messages. */ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); - unreserve_psock(kcm); + if (psock) + unreserve_psock(kcm); } /* Check if write space is available */ @@ -761,149 +743,6 @@ static void kcm_push(struct kcm_sock *kcm) kcm_write_msgs(kcm); } -static ssize_t kcm_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags) - -{ - struct sock *sk = sock->sk; - struct kcm_sock *kcm = kcm_sk(sk); - struct sk_buff *skb = NULL, *head = NULL; - long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - bool eor; - int err = 0; - int i; - - if (flags & MSG_SENDPAGE_NOTLAST) - flags |= MSG_MORE; - - /* No MSG_EOR from splice, only look at MSG_MORE */ - eor = !(flags & MSG_MORE); - - lock_sock(sk); - - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - - err = -EPIPE; - if (sk->sk_err) - goto out_error; - - if (kcm->seq_skb) { - /* Previously opened message */ - head = kcm->seq_skb; - skb = kcm_tx_msg(head)->last_skb; - i = skb_shinfo(skb)->nr_frags; - - if (skb_can_coalesce(skb, i, page, offset)) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); - skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; - goto coalesced; - } - - if (i >= MAX_SKB_FRAGS) { - struct sk_buff *tskb; - - tskb = alloc_skb(0, sk->sk_allocation); - while (!tskb) { - kcm_push(kcm); - err = sk_stream_wait_memory(sk, &timeo); - if (err) - goto out_error; - } - - if (head == skb) - skb_shinfo(head)->frag_list = tskb; - else - skb->next = tskb; - - skb = tskb; - skb->ip_summed = CHECKSUM_UNNECESSARY; - i = 0; - } - } else { - /* Call the sk_stream functions to manage the sndbuf mem. */ - if (!sk_stream_memory_free(sk)) { - kcm_push(kcm); - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - err = sk_stream_wait_memory(sk, &timeo); - if (err) - goto out_error; - } - - head = alloc_skb(0, sk->sk_allocation); - while (!head) { - kcm_push(kcm); - err = sk_stream_wait_memory(sk, &timeo); - if (err) - goto out_error; - } - - skb = head; - i = 0; - } - - get_page(page); - skb_fill_page_desc_noacc(skb, i, page, offset, size); - skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; - -coalesced: - skb->len += size; - skb->data_len += size; - skb->truesize += size; - sk->sk_wmem_queued += size; - sk_mem_charge(sk, size); - - if (head != skb) { - head->len += size; - head->data_len += size; - head->truesize += size; - } - - if (eor) { - bool not_busy = skb_queue_empty(&sk->sk_write_queue); - - /* Message complete, queue it on send buffer */ - __skb_queue_tail(&sk->sk_write_queue, head); - kcm->seq_skb = NULL; - KCM_STATS_INCR(kcm->stats.tx_msgs); - - if (flags & MSG_BATCH) { - kcm->tx_wait_more = true; - } else if (kcm->tx_wait_more || not_busy) { - err = kcm_write_msgs(kcm); - if (err < 0) { - /* We got a hard error in write_msgs but have - * already queued this message. Report an error - * in the socket, but don't affect return value - * from sendmsg - */ - pr_warn("KCM: Hard failure on kcm_write_msgs\n"); - report_csk_error(&kcm->sk, -err); - } - } - } else { - /* Message not complete, save state */ - kcm->seq_skb = head; - kcm_tx_msg(head)->last_skb = skb; - } - - KCM_STATS_ADD(kcm->stats.tx_bytes, size); - - release_sock(sk); - return size; - -out_error: - kcm_push(kcm); - - err = sk_stream_error(sk, flags, err); - - /* make sure we wake any epoll edge trigger waiter */ - if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) - sk->sk_write_space(sk); - - release_sock(sk); - return err; -} - static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; @@ -989,29 +828,52 @@ start: merge = false; } - copy = min_t(int, msg_data_left(msg), - pfrag->size - pfrag->offset); + if (msg->msg_flags & MSG_SPLICE_PAGES) { + copy = msg_data_left(msg); + if (!sk_wmem_schedule(sk, copy)) + goto wait_for_memory; - if (!sk_wmem_schedule(sk, copy)) - goto wait_for_memory; + err = skb_splice_from_iter(skb, &msg->msg_iter, copy, + sk->sk_allocation); + if (err < 0) { + if (err == -EMSGSIZE) + goto wait_for_memory; + goto out_error; + } - err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, - pfrag->page, - pfrag->offset, - copy); - if (err) - goto out_error; + copy = err; + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; + sk_wmem_queued_add(sk, copy); + sk_mem_charge(sk, copy); - /* Update the skb. */ - if (merge) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); + if (head != skb) + head->truesize += copy; } else { - skb_fill_page_desc(skb, i, pfrag->page, - pfrag->offset, copy); - get_page(pfrag->page); + copy = min_t(int, msg_data_left(msg), + pfrag->size - pfrag->offset); + if (!sk_wmem_schedule(sk, copy)) + goto wait_for_memory; + + err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, + pfrag->page, + pfrag->offset, + copy); + if (err) + goto out_error; + + /* Update the skb. */ + if (merge) { + skb_frag_size_add( + &skb_shinfo(skb)->frags[i - 1], copy); + } else { + skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, copy); + get_page(pfrag->page); + } + + pfrag->offset += copy; } - pfrag->offset += copy; copied += copy; if (head != skb) { head->len += copy; @@ -1088,6 +950,19 @@ out_error: return err; } +static void kcm_splice_eof(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct kcm_sock *kcm = kcm_sk(sk); + + if (skb_queue_empty_lockless(&sk->sk_write_queue)) + return; + + lock_sock(sk); + kcm_write_msgs(kcm); + release_sock(sk); +} + static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { @@ -1875,7 +1750,7 @@ static const struct proto_ops kcm_dgram_ops = { .sendmsg = kcm_sendmsg, .recvmsg = kcm_recvmsg, .mmap = sock_no_mmap, - .sendpage = kcm_sendpage, + .splice_eof = kcm_splice_eof, }; static const struct proto_ops kcm_seqpacket_ops = { @@ -1896,7 +1771,7 @@ static const struct proto_ops kcm_seqpacket_ops = { .sendmsg = kcm_sendmsg, .recvmsg = kcm_recvmsg, .mmap = sock_no_mmap, - .sendpage = kcm_sendpage, + .splice_eof = kcm_splice_eof, .splice_read = kcm_splice_read, }; diff --git a/net/key/af_key.c b/net/key/af_key.c index 31ab12fd720a..ede3c6a60353 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3761,7 +3761,6 @@ static const struct proto_ops pfkey_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, /* Now the operations that really occur. */ .release = pfkey_release, diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a88e070b431d..91ebf0a3f499 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -272,7 +272,7 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); /* IOCTL helper for IP encap modules. */ -int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int l2tp_ioctl(struct sock *sk, int cmd, int *karg); /* Extract the tunnel structure from a socket's sk_user_data pointer, * validating the tunnel magic feather. diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 41a74fc84ca1..f9073bc7281f 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -562,19 +562,18 @@ out: return err ? err : copied; } -int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg) +int l2tp_ioctl(struct sock *sk, int cmd, int *karg) { struct sk_buff *skb; - int amount; switch (cmd) { case SIOCOUTQ: - amount = sk_wmem_alloc_get(sk); + *karg = sk_wmem_alloc_get(sk); break; case SIOCINQ: spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - amount = skb ? skb->len : 0; + *karg = skb ? skb->len : 0; spin_unlock_bh(&sk->sk_receive_queue.lock); break; @@ -582,7 +581,7 @@ int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -ENOIOCTLCMD; } - return put_user(amount, (int __user *)arg); + return 0; } EXPORT_SYMBOL_GPL(l2tp_ioctl); @@ -625,7 +624,6 @@ static const struct proto_ops l2tp_ip_ops = { .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct inet_protosw l2tp_ip_protosw = { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 5137ea1861ce..b1623f9c4f92 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -751,7 +751,6 @@ static const struct proto_ops l2tp_ip6_ops = { .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 9ffbc667be6c..57c35c960b2c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1232,7 +1232,6 @@ static const struct proto_ops llc_ui_ops = { .sendmsg = llc_ui_sendmsg, .recvmsg = llc_ui_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static const char llc_proc_err_msg[] __initconst = diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 3b651e7f5a73..b6b772685881 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2022 Intel Corporation + * Copyright (C) 2018 - 2023 Intel Corporation */ #include <linux/ieee80211.h> @@ -457,6 +457,12 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, u8 tid = tid_tx->tid; u16 buf_size; + if (WARN_ON_ONCE(test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state) || + test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state))) + return; + + lockdep_assert_held(&sta->ampdu_mlme.mtx); + /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", @@ -795,9 +801,15 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; + lockdep_assert_held(&sta->ampdu_mlme.mtx); + if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))) return; + if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state) || + test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) + return; + if (!test_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)) { ieee80211_send_addba_with_timeout(sta, tid_tx); /* RESPONSE_RECEIVED state whould trigger the flow again */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f2d08dbccfb7..e7ac24603892 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -35,7 +35,7 @@ ieee80211_link_or_deflink(struct ieee80211_sub_if_data *sdata, int link_id, * the return value at all (if it's not a pairwise key), * so in that case (require_valid==false) don't error. */ - if (require_valid && sdata->vif.valid_links) + if (require_valid && ieee80211_vif_is_mld(&sdata->vif)) return ERR_PTR(-EINVAL); return &sdata->deflink; @@ -228,7 +228,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, return 0; /* FIXME: no support for 4-addr MLO yet */ - if (sdata->vif.valid_links) + if (ieee80211_vif_is_mld(&sdata->vif)) return -EOPNOTSUPP; sdata->u.mgd.use_4addr = params->use_4addr; @@ -913,24 +913,30 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, if (cfg80211_chandef_identical(&local->monitor_chandef, chandef)) return 0; - mutex_lock(&local->mtx); if (local->use_chanctx) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata) { + sdata_lock(sdata); + mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, chandef, IEEE80211_CHANCTX_EXCLUSIVE); + mutex_unlock(&local->mtx); + sdata_unlock(sdata); + } + } else { + mutex_lock(&local->mtx); + if (local->open_count == local->monitors) { + local->_oper_chandef = *chandef; + ieee80211_hw_config(local, 0); } - } else if (local->open_count == local->monitors) { - local->_oper_chandef = *chandef; - ieee80211_hw_config(local, 0); + mutex_unlock(&local->mtx); } if (ret == 0) local->monitor_chandef = *chandef; - mutex_unlock(&local->mtx); return ret; } @@ -1101,18 +1107,20 @@ ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst, return offset; } -static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_link_data *link, - struct cfg80211_beacon_data *params, - const struct ieee80211_csa_settings *csa, - const struct ieee80211_color_change_settings *cca) +static int +ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_data *link, + struct cfg80211_beacon_data *params, + const struct ieee80211_csa_settings *csa, + const struct ieee80211_color_change_settings *cca, + u64 *changed) { struct cfg80211_mbssid_elems *mbssid = NULL; struct cfg80211_rnr_elems *rnr = NULL; struct beacon_data *new, *old; int new_head_len, new_tail_len; int size, err; - u32 changed = BSS_CHANGED_BEACON; + u64 _changed = BSS_CHANGED_BEACON; struct ieee80211_bss_conf *link_conf = link->conf; old = sdata_dereference(link->u.ap.beacon, sdata); @@ -1219,7 +1227,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, return err; } if (err == 0) - changed |= BSS_CHANGED_AP_PROBE_RESP; + _changed |= BSS_CHANGED_AP_PROBE_RESP; if (params->ftm_responder != -1) { link_conf->ftm_responder = params->ftm_responder; @@ -1235,7 +1243,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, return err; } - changed |= BSS_CHANGED_FTM_RESPONDER; + _changed |= BSS_CHANGED_FTM_RESPONDER; } rcu_assign_pointer(link->u.ap.beacon, new); @@ -1244,7 +1252,8 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, if (old) kfree_rcu(old, rcu_head); - return changed; + *changed |= _changed; + return 0; } static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, @@ -1446,10 +1455,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) link_conf->beacon_tx_rate = params->beacon_rate; - err = ieee80211_assign_beacon(sdata, link, ¶ms->beacon, NULL, NULL); + err = ieee80211_assign_beacon(sdata, link, ¶ms->beacon, NULL, NULL, + &changed); if (err < 0) goto error; - changed |= err; if (params->fils_discovery.max_interval) { err = ieee80211_set_fils_discovery(sdata, @@ -1506,6 +1515,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, struct beacon_data *old; int err; struct ieee80211_bss_conf *link_conf; + u64 changed = 0; sdata_assert_lock(sdata); @@ -1525,17 +1535,18 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, if (!old) return -ENOENT; - err = ieee80211_assign_beacon(sdata, link, params, NULL, NULL); + err = ieee80211_assign_beacon(sdata, link, params, NULL, NULL, + &changed); if (err < 0) return err; if (params->he_bss_color_valid && params->he_bss_color.enabled != link_conf->he_bss_color.enabled) { link_conf->he_bss_color.enabled = params->he_bss_color.enabled; - err |= BSS_CHANGED_HE_BSS_COLOR; + changed |= BSS_CHANGED_HE_BSS_COLOR; } - ieee80211_link_info_change_notify(sdata, link, err); + ieee80211_link_info_change_notify(sdata, link, changed); return 0; } @@ -1718,7 +1729,7 @@ static void sta_apply_mesh_params(struct ieee80211_local *local, { #ifdef CONFIG_MAC80211_MESH struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed = 0; + u64 changed = 0; if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) { switch (params->plink_state) { @@ -2665,7 +2676,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct ieee80211_supported_band *sband; - u32 changed = 0; + u64 changed = 0; link = ieee80211_link_or_deflink(sdata, params->link_id, true); if (IS_ERR(link)) @@ -3585,7 +3596,7 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t sdata->deflink.csa_block_tx = block_tx; sdata_info(sdata, "channel switch failed, disconnecting\n"); - ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); + wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work); } EXPORT_SYMBOL(ieee80211_channel_switch_disconnect); @@ -3601,25 +3612,22 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, err = ieee80211_assign_beacon(sdata, &sdata->deflink, sdata->deflink.u.ap.next_beacon, - NULL, NULL); + NULL, NULL, changed); ieee80211_free_next_beacon(&sdata->deflink); if (err < 0) return err; - *changed |= err; break; case NL80211_IFTYPE_ADHOC: - err = ieee80211_ibss_finish_csa(sdata); + err = ieee80211_ibss_finish_csa(sdata, changed); if (err < 0) return err; - *changed |= err; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - err = ieee80211_mesh_finish_csa(sdata); + err = ieee80211_mesh_finish_csa(sdata, changed); if (err < 0) return err; - *changed |= err; break; #endif default: @@ -3730,7 +3738,7 @@ unlock: static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *params, - u32 *changed) + u64 *changed) { struct ieee80211_csa_settings csa = {}; int err; @@ -3777,12 +3785,11 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, err = ieee80211_assign_beacon(sdata, &sdata->deflink, ¶ms->beacon_csa, &csa, - NULL); + NULL, changed); if (err < 0) { ieee80211_free_next_beacon(&sdata->deflink); return err; } - *changed |= err; break; case NL80211_IFTYPE_ADHOC: @@ -3814,10 +3821,9 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { - err = ieee80211_ibss_csa_beacon(sdata, params); + err = ieee80211_ibss_csa_beacon(sdata, params, changed); if (err < 0) return err; - *changed |= err; } ieee80211_send_action_csa(sdata, params); @@ -3842,12 +3848,11 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { - err = ieee80211_mesh_csa_beacon(sdata, params); + err = ieee80211_mesh_csa_beacon(sdata, params, changed); if (err < 0) { ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; return err; } - *changed |= err; } if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT) @@ -3881,7 +3886,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel_switch ch_switch; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; - u32 changed = 0; + u64 changed = 0; int err; sdata_assert_lock(sdata); @@ -4614,7 +4619,7 @@ static int ieee80211_set_sar_specs(struct wiphy *wiphy, static int ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata, - u32 *changed) + u64 *changed) { switch (sdata->vif.type) { case NL80211_IFTYPE_AP: { @@ -4625,13 +4630,12 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata, ret = ieee80211_assign_beacon(sdata, &sdata->deflink, sdata->deflink.u.ap.next_beacon, - NULL, NULL); + NULL, NULL, changed); ieee80211_free_next_beacon(&sdata->deflink); if (ret < 0) return ret; - *changed |= ret; break; } default: @@ -4645,7 +4649,7 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata, static int ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata, struct cfg80211_color_change_settings *params, - u32 *changed) + u64 *changed) { struct ieee80211_color_change_settings color_change = {}; int err; @@ -4668,12 +4672,11 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata, err = ieee80211_assign_beacon(sdata, &sdata->deflink, ¶ms->beacon_color_change, - NULL, &color_change); + NULL, &color_change, changed); if (err < 0) { ieee80211_free_next_beacon(&sdata->deflink); return err; } - *changed |= err; break; default: return -EOPNOTSUPP; @@ -4684,7 +4687,7 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata, static void ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, - u8 color, int enable, u32 changed) + u8 color, int enable, u64 changed) { sdata->vif.bss_conf.he_bss_color.color = color; sdata->vif.bss_conf.he_bss_color.enabled = enable; @@ -4712,7 +4715,7 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - u32 changed = 0; + u64 changed = 0; int err; sdata_assert_lock(sdata); @@ -4809,7 +4812,7 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - u32 changed = 0; + u64 changed = 0; int err; sdata_assert_lock(sdata); @@ -4871,7 +4874,7 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy, return -EOPNOTSUPP; mutex_lock(&sdata->local->mtx); - res = ieee80211_vif_set_links(sdata, wdev->valid_links); + res = ieee80211_vif_set_links(sdata, wdev->valid_links, 0); mutex_unlock(&sdata->local->mtx); return res; @@ -4884,7 +4887,7 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); mutex_lock(&sdata->local->mtx); - ieee80211_vif_set_links(sdata, wdev->valid_links); + ieee80211_vif_set_links(sdata, wdev->valid_links, 0); mutex_unlock(&sdata->local->mtx); } @@ -5053,6 +5056,7 @@ const struct cfg80211_ops mac80211_config_ops = { .join_ocb = ieee80211_join_ocb, .leave_ocb = ieee80211_leave_ocb, .change_bss = ieee80211_change_bss, + .inform_bss = ieee80211_inform_bss, .set_txq_params = ieee80211_set_txq_params, .set_monitor_channel = ieee80211_set_monitor_channel, .suspend = ieee80211_suspend, diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 77c90ed8f5d7..68952752b599 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -802,6 +802,11 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, } } + if (WARN_ON_ONCE(!compat)) { + rcu_read_unlock(); + return; + } + /* TDLS peers can sometimes affect the chandef width */ list_for_each_entry_rcu(sta, &local->sta_list, list) { if (!sta->uploaded || @@ -1205,8 +1210,8 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) &link->csa_finalize_work); break; case NL80211_IFTYPE_STATION: - ieee80211_queue_work(&sdata->local->hw, - &link->u.mgd.chswitch_work); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &link->u.mgd.chswitch_work, 0); break; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_AP_VLAN: @@ -1257,7 +1262,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) struct ieee80211_vif_chanctx_switch vif_chsw[1] = {}; struct ieee80211_chanctx *old_ctx, *new_ctx; const struct cfg80211_chan_def *chandef; - u32 changed = 0; + u64 changed = 0; int err; lockdep_assert_held(&local->mtx); @@ -1653,7 +1658,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) reserved_chanctx_list) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_bss_conf *link_conf = link->conf; - u32 changed = 0; + u64 changed = 0; if (!ieee80211_link_has_in_place_reservation(link)) continue; diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index b4c20f5e778e..d49894df2351 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022 - 2023 Intel Corporation */ #ifndef __MAC80211_DEBUG_H #define __MAC80211_DEBUG_H @@ -136,7 +136,7 @@ do { \ #define link_info(link, fmt, ...) \ do { \ - if ((link)->sdata->vif.valid_links) \ + if (ieee80211_vif_is_mld(&(link)->sdata->vif)) \ _sdata_info((link)->sdata, "[link %d] " fmt, \ (link)->link_id, \ ##__VA_ARGS__); \ @@ -145,7 +145,7 @@ do { \ } while (0) #define link_err(link, fmt, ...) \ do { \ - if ((link)->sdata->vif.valid_links) \ + if (ieee80211_vif_is_mld(&(link)->sdata->vif)) \ _sdata_err((link)->sdata, "[link %d] " fmt, \ (link)->link_id, \ ##__VA_ARGS__); \ @@ -154,7 +154,7 @@ do { \ } while (0) #define link_dbg(link, fmt, ...) \ do { \ - if ((link)->sdata->vif.valid_links) \ + if (ieee80211_vif_is_mld(&(link)->sdata->vif)) \ _sdata_dbg(1, (link)->sdata, "[link %d] " fmt, \ (link)->link_id, \ ##__VA_ARGS__); \ diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index b0cef37eb394..63250286dc8b 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation */ #include <linux/kernel.h> @@ -267,6 +267,9 @@ static int ieee80211_set_smps(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; int err; + if (sdata->vif.driver_flags & IEEE80211_VIF_DISABLE_SMPS_OVERRIDE) + return -EOPNOTSUPP; + if (!(local->hw.wiphy->features & NL80211_FEATURE_STATIC_SMPS) && smps_mode == IEEE80211_SMPS_STATIC) return -EINVAL; @@ -690,6 +693,19 @@ IEEE80211_IF_FILE(dot11MeshConnectedToAuthServer, debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \ sdata, &name##_ops) +#define DEBUGFS_ADD_X(_bits, _name, _mode) \ + debugfs_create_x##_bits(#_name, _mode, sdata->vif.debugfs_dir, \ + &sdata->vif._name) + +#define DEBUGFS_ADD_X8(_name, _mode) \ + DEBUGFS_ADD_X(8, _name, _mode) + +#define DEBUGFS_ADD_X16(_name, _mode) \ + DEBUGFS_ADD_X(16, _name, _mode) + +#define DEBUGFS_ADD_X32(_name, _mode) \ + DEBUGFS_ADD_X(32, _name, _mode) + #define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400) static void add_common_files(struct ieee80211_sub_if_data *sdata) @@ -717,8 +733,9 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD_MODE(uapsd_queues, 0600); DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600); DEBUGFS_ADD_MODE(tdls_wider_bw, 0600); - DEBUGFS_ADD_MODE(valid_links, 0200); + DEBUGFS_ADD_MODE(valid_links, 0400); DEBUGFS_ADD_MODE(active_links, 0600); + DEBUGFS_ADD_X16(dormant_links, 0400); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index f1914bf39f0e..5a97fb248c85 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -1035,6 +1035,190 @@ out: } LINK_STA_OPS(he_capa); +static ssize_t link_sta_eht_capa_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + char *buf, *p; + size_t buf_sz = PAGE_SIZE; + struct link_sta_info *link_sta = file->private_data; + struct ieee80211_sta_eht_cap *bec = &link_sta->pub->eht_cap; + struct ieee80211_eht_cap_elem_fixed *fixed = &bec->eht_cap_elem; + struct ieee80211_eht_mcs_nss_supp *nss = &bec->eht_mcs_nss_supp; + u8 *cap; + int i; + ssize_t ret; + static const char *mcs_desc[] = { "0-7", "8-9", "10-11", "12-13"}; + + buf = kmalloc(buf_sz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + p = buf; + + p += scnprintf(p, buf_sz + buf - p, "EHT %ssupported\n", + bec->has_eht ? "" : "not "); + if (!bec->has_eht) + goto out; + + p += scnprintf(p, buf_sz + buf - p, + "MAC-CAP: %#.2x %#.2x\n", + fixed->mac_cap_info[0], fixed->mac_cap_info[1]); + p += scnprintf(p, buf_sz + buf - p, + "PHY-CAP: %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x\n", + fixed->phy_cap_info[0], fixed->phy_cap_info[1], + fixed->phy_cap_info[2], fixed->phy_cap_info[3], + fixed->phy_cap_info[4], fixed->phy_cap_info[5], + fixed->phy_cap_info[6], fixed->phy_cap_info[7], + fixed->phy_cap_info[8]); + +#define PRINT(fmt, ...) \ + p += scnprintf(p, buf_sz + buf - p, "\t\t" fmt "\n", \ + ##__VA_ARGS__) + +#define PFLAG(t, n, a, b) \ + do { \ + if (cap[n] & IEEE80211_EHT_##t##_CAP##n##_##a) \ + PRINT("%s", b); \ + } while (0) + + cap = fixed->mac_cap_info; + PFLAG(MAC, 0, EPCS_PRIO_ACCESS, "EPCS-PRIO-ACCESS"); + PFLAG(MAC, 0, OM_CONTROL, "OM-CONTROL"); + PFLAG(MAC, 0, TRIG_TXOP_SHARING_MODE1, "TRIG-TXOP-SHARING-MODE1"); + PFLAG(MAC, 0, TRIG_TXOP_SHARING_MODE2, "TRIG-TXOP-SHARING-MODE2"); + PFLAG(MAC, 0, RESTRICTED_TWT, "RESTRICTED-TWT"); + PFLAG(MAC, 0, SCS_TRAFFIC_DESC, "SCS-TRAFFIC-DESC"); + switch ((cap[0] & 0xc0) >> 6) { + case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_3895: + PRINT("MAX-MPDU-LEN: 3985"); + break; + case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991: + PRINT("MAX-MPDU-LEN: 7991"); + break; + case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454: + PRINT("MAX-MPDU-LEN: 11454"); + break; + } + + cap = fixed->phy_cap_info; + PFLAG(PHY, 0, 320MHZ_IN_6GHZ, "320MHZ-IN-6GHZ"); + PFLAG(PHY, 0, 242_TONE_RU_GT20MHZ, "242-TONE-RU-GT20MHZ"); + PFLAG(PHY, 0, NDP_4_EHT_LFT_32_GI, "NDP-4-EHT-LFT-32-GI"); + PFLAG(PHY, 0, PARTIAL_BW_UL_MU_MIMO, "PARTIAL-BW-UL-MU-MIMO"); + PFLAG(PHY, 0, SU_BEAMFORMER, "SU-BEAMFORMER"); + PFLAG(PHY, 0, SU_BEAMFORMEE, "SU-BEAMFORMEE"); + i = cap[0] >> 7; + i |= (cap[1] & 0x3) << 1; + PRINT("BEAMFORMEE-80-NSS: %i", i); + PRINT("BEAMFORMEE-160-NSS: %i", (cap[1] >> 2) & 0x7); + PRINT("BEAMFORMEE-320-NSS: %i", (cap[1] >> 5) & 0x7); + PRINT("SOUNDING-DIM-80-NSS: %i", (cap[2] & 0x7)); + PRINT("SOUNDING-DIM-160-NSS: %i", (cap[2] >> 3) & 0x7); + i = cap[2] >> 6; + i |= (cap[3] & 0x1) << 3; + PRINT("SOUNDING-DIM-320-NSS: %i", i); + + PFLAG(PHY, 3, NG_16_SU_FEEDBACK, "NG-16-SU-FEEDBACK"); + PFLAG(PHY, 3, NG_16_MU_FEEDBACK, "NG-16-MU-FEEDBACK"); + PFLAG(PHY, 3, CODEBOOK_4_2_SU_FDBK, "CODEBOOK-4-2-SU-FDBK"); + PFLAG(PHY, 3, CODEBOOK_7_5_MU_FDBK, "CODEBOOK-7-5-MU-FDBK"); + PFLAG(PHY, 3, TRIG_SU_BF_FDBK, "TRIG-SU-BF-FDBK"); + PFLAG(PHY, 3, TRIG_MU_BF_PART_BW_FDBK, "TRIG-MU-BF-PART-BW-FDBK"); + PFLAG(PHY, 3, TRIG_CQI_FDBK, "TRIG-CQI-FDBK"); + + PFLAG(PHY, 4, PART_BW_DL_MU_MIMO, "PART-BW-DL-MU-MIMO"); + PFLAG(PHY, 4, PSR_SR_SUPP, "PSR-SR-SUPP"); + PFLAG(PHY, 4, POWER_BOOST_FACT_SUPP, "POWER-BOOST-FACT-SUPP"); + PFLAG(PHY, 4, EHT_MU_PPDU_4_EHT_LTF_08_GI, "EHT-MU-PPDU-4-EHT-LTF-08-GI"); + PRINT("MAX_NC: %i", cap[4] >> 4); + + PFLAG(PHY, 5, NON_TRIG_CQI_FEEDBACK, "NON-TRIG-CQI-FEEDBACK"); + PFLAG(PHY, 5, TX_LESS_242_TONE_RU_SUPP, "TX-LESS-242-TONE-RU-SUPP"); + PFLAG(PHY, 5, RX_LESS_242_TONE_RU_SUPP, "RX-LESS-242-TONE-RU-SUPP"); + PFLAG(PHY, 5, PPE_THRESHOLD_PRESENT, "PPE_THRESHOLD_PRESENT"); + switch (cap[5] >> 4 & 0x3) { + case IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_0US: + PRINT("NOMINAL_PKT_PAD: 0us"); + break; + case IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_8US: + PRINT("NOMINAL_PKT_PAD: 8us"); + break; + case IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_16US: + PRINT("NOMINAL_PKT_PAD: 16us"); + break; + case IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_20US: + PRINT("NOMINAL_PKT_PAD: 20us"); + break; + } + i = cap[5] >> 6; + i |= cap[6] & 0x7; + PRINT("MAX-NUM-SUPP-EHT-LTF: %i", i); + PFLAG(PHY, 5, SUPP_EXTRA_EHT_LTF, "SUPP-EXTRA-EHT-LTF"); + + i = (cap[6] >> 3) & 0xf; + PRINT("MCS15-SUPP-MASK: %i", i); + PFLAG(PHY, 6, EHT_DUP_6GHZ_SUPP, "EHT-DUP-6GHZ-SUPP"); + + PFLAG(PHY, 7, 20MHZ_STA_RX_NDP_WIDER_BW, "20MHZ-STA-RX-NDP-WIDER-BW"); + PFLAG(PHY, 7, NON_OFDMA_UL_MU_MIMO_80MHZ, "NON-OFDMA-UL-MU-MIMO-80MHZ"); + PFLAG(PHY, 7, NON_OFDMA_UL_MU_MIMO_160MHZ, "NON-OFDMA-UL-MU-MIMO-160MHZ"); + PFLAG(PHY, 7, NON_OFDMA_UL_MU_MIMO_320MHZ, "NON-OFDMA-UL-MU-MIMO-320MHZ"); + PFLAG(PHY, 7, MU_BEAMFORMER_80MHZ, "MU-BEAMFORMER-80MHZ"); + PFLAG(PHY, 7, MU_BEAMFORMER_160MHZ, "MU-BEAMFORMER-160MHZ"); + PFLAG(PHY, 7, MU_BEAMFORMER_320MHZ, "MU-BEAMFORMER-320MHZ"); + PFLAG(PHY, 7, TB_SOUNDING_FDBK_RATE_LIMIT, "TB-SOUNDING-FDBK-RATE-LIMIT"); + + PFLAG(PHY, 8, RX_1024QAM_WIDER_BW_DL_OFDMA, "RX-1024QAM-WIDER-BW-DL-OFDMA"); + PFLAG(PHY, 8, RX_4096QAM_WIDER_BW_DL_OFDMA, "RX-4096QAM-WIDER-BW-DL-OFDMA"); + +#undef PFLAG + + PRINT(""); /* newline */ + if (!(link_sta->pub->he_cap.he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) { + u8 *mcs_vals = (u8 *)(&nss->only_20mhz); + + for (i = 0; i < 4; i++) + PRINT("EHT bw=20 MHz, max NSS for MCS %s: Rx=%u, Tx=%u", + mcs_desc[i], + mcs_vals[i] & 0xf, mcs_vals[i] >> 4); + } else { + u8 *mcs_vals = (u8 *)(&nss->bw._80); + + for (i = 0; i < 3; i++) + PRINT("EHT bw <= 80 MHz, max NSS for MCS %s: Rx=%u, Tx=%u", + mcs_desc[i + 1], + mcs_vals[i] & 0xf, mcs_vals[i] >> 4); + + mcs_vals = (u8 *)(&nss->bw._160); + for (i = 0; i < 3; i++) + PRINT("EHT bw <= 160 MHz, max NSS for MCS %s: Rx=%u, Tx=%u", + mcs_desc[i + 1], + mcs_vals[i] & 0xf, mcs_vals[i] >> 4); + + mcs_vals = (u8 *)(&nss->bw._320); + for (i = 0; i < 3; i++) + PRINT("EHT bw <= 320 MHz, max NSS for MCS %s: Rx=%u, Tx=%u", + mcs_desc[i + 1], + mcs_vals[i] & 0xf, mcs_vals[i] >> 4); + } + + if (cap[5] & IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT) { + u8 ppe_size = ieee80211_eht_ppe_size(bec->eht_ppe_thres[0], cap); + + p += scnprintf(p, buf_sz + buf - p, "EHT PPE Thresholds: "); + for (i = 0; i < ppe_size; i++) + p += scnprintf(p, buf_sz + buf - p, "0x%02x ", + bec->eht_ppe_thres[i]); + PRINT(""); /* newline */ + } + +out: + ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return ret; +} +LINK_STA_OPS(eht_capa); + #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ sta->debugfs_dir, sta, &sta_ ##name## _ops) @@ -1128,6 +1312,7 @@ void ieee80211_link_sta_debugfs_add(struct link_sta_info *link_sta) DEBUGFS_ADD(ht_capa); DEBUGFS_ADD(vht_capa); DEBUGFS_ADD(he_capa); + DEBUGFS_ADD(eht_capa); DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates); DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 45d3e53c7383..c4505593ba7a 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016 Intel Deutschland GmbH -* Copyright (C) 2018 - 2019, 2021 Intel Corporation +* Copyright (C) 2018 - 2019, 2021 - 2023 Intel Corporation */ #ifndef __MAC80211_DRIVER_OPS @@ -13,9 +13,11 @@ #include "trace.h" #define check_sdata_in_driver(sdata) ({ \ - !WARN_ONCE(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), \ - "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", \ - sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); \ + WARN_ONCE(!sdata->local->reconfig_failure && \ + !(sdata->flags & IEEE80211_SDATA_IN_DRIVER), \ + "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", \ + sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); \ + !!(sdata->flags & IEEE80211_SDATA_IN_DRIVER); \ }) static inline struct ieee80211_sub_if_data * diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 18bc6b78b267..ddc7acc68335 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -2,7 +2,7 @@ /* * EHT handling * - * Copyright(c) 2021-2022 Intel Corporation + * Copyright(c) 2021-2023 Intel Corporation */ #include "ieee80211_i.h" @@ -25,8 +25,7 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, memset(eht_cap, 0, sizeof(*eht_cap)); if (!eht_cap_ie_elem || - !ieee80211_get_eht_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif))) + !ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif)) return; mcs_nss_size = ieee80211_eht_mcs_nss_size(he_cap_ie_elem, diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 0322abae0825..9f5ffdc9db28 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -128,8 +128,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, return; own_he_cap_ptr = - ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); if (!own_he_cap_ptr) return; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 5315ab750280..33729870ad8a 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright 2017 Intel Deutschland GmbH - * Copyright(c) 2020-2022 Intel Corporation + * Copyright(c) 2020-2023 Intel Corporation */ #include <linux/ieee80211.h> @@ -602,7 +602,8 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id, goto out; link->u.mgd.driver_smps_mode = smps_mode; - ieee80211_queue_work(&sdata->local->hw, &link->u.mgd.request_smps_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &link->u.mgd.request_smps_work); out: rcu_read_unlock(); } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 9dffc3079588..e1900077bc4b 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -9,7 +9,7 @@ * Copyright 2009, Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright(c) 2018-2022 Intel Corporation + * Copyright(c) 2018-2023 Intel Corporation */ #include <linux/delay.h> @@ -226,7 +226,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct cfg80211_bss *bss; - u32 bss_change; + u64 bss_change; struct cfg80211_chan_def chandef; struct ieee80211_channel *chan; struct beacon_data *presp; @@ -478,7 +478,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_csa_settings *csa_settings) + struct cfg80211_csa_settings *csa_settings, + u64 *changed) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct beacon_data *presp, *old_presp; @@ -520,10 +521,11 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, if (old_presp) kfree_rcu(old_presp, rcu_head); - return BSS_CHANGED_BEACON; + *changed |= BSS_CHANGED_BEACON; + return 0; } -int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) +int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct cfg80211_bss *cbss; @@ -552,14 +554,15 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) ifibss->chandef = sdata->deflink.csa_chandef; /* generate the beacon */ - return ieee80211_ibss_csa_beacon(sdata, NULL); + return ieee80211_ibss_csa_beacon(sdata, NULL, changed); } void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - cancel_work_sync(&ifibss->csa_connection_drop_work); + wiphy_work_cancel(sdata->local->hw.wiphy, + &ifibss->csa_connection_drop_work); } static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta) @@ -728,7 +731,8 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->mtx); } -static void ieee80211_csa_connection_drop_work(struct work_struct *work) +static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, @@ -741,7 +745,7 @@ static void ieee80211_csa_connection_drop_work(struct work_struct *work) skb_queue_purge(&sdata->skb_queue); /* trigger a scan to find another IBSS network to join */ - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); sdata_unlock(sdata); } @@ -894,8 +898,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, return true; disconnect: ibss_dbg(sdata, "Can't handle channel switch, disconnect\n"); - ieee80211_queue_work(&sdata->local->hw, - &ifibss->csa_connection_drop_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &ifibss->csa_connection_drop_work); ieee80211_ibss_csa_mark_radar(sdata); @@ -1242,7 +1246,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, spin_lock(&ifibss->incomplete_lock); list_add(&sta->list, &ifibss->incomplete_stations); spin_unlock(&ifibss->incomplete_lock); - ieee80211_queue_work(&local->hw, &sdata->work); + wiphy_work_queue(local->hw.wiphy, &sdata->work); } static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) @@ -1721,7 +1725,7 @@ static void ieee80211_ibss_timer(struct timer_list *t) struct ieee80211_sub_if_data *sdata = from_timer(sdata, t, u.ibss.timer); - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) @@ -1731,8 +1735,8 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) timer_setup(&ifibss->timer, ieee80211_ibss_timer, 0); INIT_LIST_HEAD(&ifibss->incomplete_stations); spin_lock_init(&ifibss->incomplete_lock); - INIT_WORK(&ifibss->csa_connection_drop_work, - ieee80211_csa_connection_drop_work); + wiphy_work_init(&ifibss->csa_connection_drop_work, + ieee80211_csa_connection_drop_work); } /* scan finished notification */ @@ -1754,7 +1758,7 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, struct cfg80211_ibss_params *params) { - u32 changed = 0; + u64 changed = 0; u32 rate_flags; struct ieee80211_supported_band *sband; enum ieee80211_chanctx_mode chanmode; @@ -1856,7 +1860,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->deflink.needed_rx_chains = local->rx_chains; sdata->control_port_over_nl80211 = params->control_port_over_nl80211; - ieee80211_queue_work(&local->hw, &sdata->work); + wiphy_work_queue(local->hw.wiphy, &sdata->work); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4159fb65038b..91633a0b723e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -410,6 +410,8 @@ struct ieee80211_mgd_assoc_data { ieee80211_conn_flags_t conn_flags; u16 status; + + bool disabled; } link[IEEE80211_MLD_MAX_NUM_LINKS]; u8 ap_addr[ETH_ALEN] __aligned(2); @@ -466,8 +468,8 @@ struct ieee80211_if_managed { struct timer_list conn_mon_timer; struct timer_list bcn_mon_timer; struct work_struct monitor_work; - struct work_struct beacon_connection_loss_work; - struct work_struct csa_connection_drop_work; + struct wiphy_work beacon_connection_loss_work; + struct wiphy_work csa_connection_drop_work; unsigned long beacon_timeout; unsigned long probe_timeout; @@ -549,11 +551,14 @@ struct ieee80211_if_managed { */ u8 *assoc_req_ies; size_t assoc_req_ies_len; + + struct wiphy_delayed_work ml_reconf_work; + u16 removed_links; }; struct ieee80211_if_ibss { struct timer_list timer; - struct work_struct csa_connection_drop_work; + struct wiphy_work csa_connection_drop_work; unsigned long last_scan_completed; @@ -918,10 +923,9 @@ struct ieee80211_link_data_managed { bool csa_waiting_bcn; bool csa_ignored_same_chan; - struct timer_list chswitch_timer; - struct work_struct chswitch_work; + struct wiphy_delayed_work chswitch_work; - struct work_struct request_smps_work; + struct wiphy_work request_smps_work; bool beacon_crc_valid; u32 beacon_crc; struct ewma_beacon_signal ave_beacon_signal; @@ -947,6 +951,8 @@ struct ieee80211_link_data_managed { int wmm_last_param_set; int mu_edca_last_param_set; + u8 bss_param_ch_cnt; + struct cfg80211_bss *bss; }; @@ -1061,7 +1067,7 @@ struct ieee80211_sub_if_data { /* used to reconfigure hardware SM PS */ struct work_struct recalc_smps; - struct work_struct work; + struct wiphy_work work; struct sk_buff_head skb_queue; struct sk_buff_head status_queue; @@ -1394,6 +1400,9 @@ struct ieee80211_local { /* device is during a HW reconfig */ bool in_reconfig; + /* reconfiguration failed ... suppress some warnings etc. */ + bool reconfig_failure; + /* wowlan is enabled -- don't reconfig on resume */ bool wowlan; @@ -1612,7 +1621,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata) struct ieee80211_chanctx_conf *chanctx_conf; enum nl80211_band band; - WARN_ON(sdata->vif.valid_links); + WARN_ON(ieee80211_vif_is_mld(&sdata->vif)); rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); @@ -1722,7 +1731,8 @@ struct ieee802_11_elems { const struct ieee80211_aid_response_ie *aid_resp; const struct ieee80211_eht_cap_elem *eht_cap; const struct ieee80211_eht_operation *eht_operation; - const struct ieee80211_multi_link_elem *multi_link; + const struct ieee80211_multi_link_elem *ml_basic; + const struct ieee80211_multi_link_elem *ml_reconf; /* length of them, respectively */ u8 ext_capab_len; @@ -1747,7 +1757,14 @@ struct ieee802_11_elems { u8 eht_cap_len; /* mult-link element can be de-fragmented and thus u8 is not sufficient */ - size_t multi_link_len; + size_t ml_basic_len; + size_t ml_reconf_len; + + /* The basic Multi-Link element in the original IEs */ + const struct element *ml_basic_elem; + + /* The reconfiguration Multi-Link element in the original IEs */ + const struct element *ml_reconf_elem; /* * store the per station profile pointer and length in case that the @@ -1827,7 +1844,7 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, u64 changed); void ieee80211_configure_filter(struct ieee80211_local *local); -u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); +u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local); int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, @@ -1887,8 +1904,10 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata); void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_csa_settings *csa_settings); -int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata); + struct cfg80211_csa_settings *csa_settings, + u64 *changed); +int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, + u64 *changed); void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata); /* OCB code */ @@ -1905,8 +1924,10 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_csa_settings *csa_settings); -int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata); + struct cfg80211_csa_settings *csa_settings, + u64 *changed); +int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, + u64 *changed); /* scan/BSS handling */ void ieee80211_scan_work(struct work_struct *work); @@ -1921,6 +1942,9 @@ void ieee80211_scan_cancel(struct ieee80211_local *local); void ieee80211_run_deferred_scan(struct ieee80211_local *local); void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb); +void ieee80211_inform_bss(struct wiphy *wiphy, struct cfg80211_bss *bss, + const struct cfg80211_bss_ies *ies, void *data); + void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); struct ieee80211_bss * ieee80211_bss_info_update(struct ieee80211_local *local, @@ -2013,8 +2037,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf); void ieee80211_link_stop(struct ieee80211_link_data *link); int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata, - u16 new_links); + u16 new_links, u16 dormant_links); void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata); +int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); /* tx handling */ void ieee80211_clear_tx_pending(struct ieee80211_local *local); @@ -2269,8 +2294,6 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, * (or re-association) response frame if this is given * @from_ap: frame is received from an AP (currently used only * for EHT capabilities parsing) - * @scratch_len: if non zero, specifies the requested length of the scratch - * buffer; otherwise, 'len' is used. */ struct ieee80211_elems_parse_params { const u8 *start; @@ -2281,7 +2304,6 @@ struct ieee80211_elems_parse_params { struct cfg80211_bss *bss; int link_id; bool from_ap; - size_t scratch_len; }; struct ieee802_11_elems * @@ -2421,6 +2443,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, const u8 *da, const u8 *bssid, u16 stype, u16 reason, bool send_frame, u8 *frame_buf); +u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end); enum { IEEE80211_PROBE_FLAG_DIRECTED = BIT(0), @@ -2560,10 +2583,10 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, /* TDLS */ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, u32 peer_capability, - bool initiator, const u8 *extra_ies, - size_t extra_ies_len); + const u8 *peer, int link_id, + u8 action_code, u8 dialog_token, u16 status_code, + u32 peer_capability, bool initiator, + const u8 *extra_ies, size_t extra_ies_len); int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper); void ieee80211_tdls_peer_del_work(struct work_struct *wk); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index bd2c48870add..be586bc0b5b7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -8,7 +8,7 @@ * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #include <linux/slab.h> #include <linux/kernel.h> @@ -43,7 +43,7 @@ * by either the RTNL, the iflist_mtx or RCU. */ -static void ieee80211_iface_work(struct work_struct *work); +static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work); bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) { @@ -521,7 +521,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do cancel_work_sync(&sdata->recalc_smps); sdata_lock(sdata); - WARN(sdata->vif.valid_links, + WARN(ieee80211_vif_is_mld(&sdata->vif), "destroying interface with valid links 0x%04x\n", sdata->vif.valid_links); @@ -614,7 +614,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do RCU_INIT_POINTER(local->p2p_sdata, NULL); fallthrough; default: - cancel_work_sync(&sdata->work); + wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->work); /* * When we get here, the interface is marked down. * Free the remaining keys, if there are any @@ -1133,6 +1133,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) snprintf(sdata->name, IFNAMSIZ, "%s-monitor", wiphy_name(local->hw.wiphy)); sdata->wdev.iftype = NL80211_IFTYPE_MONITOR; + mutex_init(&sdata->wdev.mtx); ieee80211_sdata_init(local, sdata); @@ -1157,23 +1158,26 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) rcu_assign_pointer(local->monitor_sdata, sdata); mutex_unlock(&local->iflist_mtx); + sdata_lock(sdata); mutex_lock(&local->mtx); ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chandef, IEEE80211_CHANCTX_EXCLUSIVE); mutex_unlock(&local->mtx); + sdata_unlock(sdata); if (ret) { mutex_lock(&local->iflist_mtx); RCU_INIT_POINTER(local->monitor_sdata, NULL); mutex_unlock(&local->iflist_mtx); synchronize_net(); drv_remove_interface(local, sdata); + mutex_destroy(&sdata->wdev.mtx); kfree(sdata); return ret; } skb_queue_head_init(&sdata->skb_queue); skb_queue_head_init(&sdata->status_queue); - INIT_WORK(&sdata->work, ieee80211_iface_work); + wiphy_work_init(&sdata->work, ieee80211_iface_work); return 0; } @@ -1202,12 +1206,15 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) synchronize_net(); + sdata_lock(sdata); mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); mutex_unlock(&local->mtx); + sdata_unlock(sdata); drv_remove_interface(local, sdata); + mutex_destroy(&sdata->wdev.mtx); kfree(sdata); } @@ -1221,7 +1228,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct net_device *dev = wdev->netdev; struct ieee80211_local *local = sdata->local; - u32 changed = 0; + u64 changed = 0; int res; u32 hw_reconf_flags = 0; @@ -1281,6 +1288,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) } if (local->open_count == 0) { + /* here we can consider everything in good order (again) */ + local->reconfig_failure = false; + res = drv_start(local); if (res) goto err_del_bss; @@ -1622,7 +1632,7 @@ static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata, } } -static void ieee80211_iface_work(struct work_struct *work) +static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, work); @@ -1734,7 +1744,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&sdata->skb_queue); skb_queue_head_init(&sdata->status_queue); - INIT_WORK(&sdata->work, ieee80211_iface_work); + wiphy_work_init(&sdata->work, ieee80211_iface_work); INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); INIT_WORK(&sdata->activate_links_work, ieee80211_activate_links_work); @@ -1812,7 +1822,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, return -EBUSY; /* for now, don't support changing while links exist */ - if (sdata->vif.valid_links) + if (ieee80211_vif_is_mld(&sdata->vif)) return -EBUSY; switch (sdata->vif.type) { @@ -2255,7 +2265,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata, *tmp; LIST_HEAD(unreg_list); - LIST_HEAD(wdev_list); ASSERT_RTNL(); @@ -2278,23 +2287,18 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ieee80211_txq_teardown_flows(local); mutex_lock(&local->iflist_mtx); - list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { - list_del(&sdata->list); - - if (sdata->dev) - unregister_netdevice_queue(sdata->dev, &unreg_list); - else - list_add(&sdata->list, &wdev_list); - } + list_splice_init(&local->interfaces, &unreg_list); mutex_unlock(&local->iflist_mtx); - unregister_netdevice_many(&unreg_list); - wiphy_lock(local->hw.wiphy); - list_for_each_entry_safe(sdata, tmp, &wdev_list, list) { + list_for_each_entry_safe(sdata, tmp, &unreg_list, list) { + bool netdev = sdata->dev; + list_del(&sdata->list); cfg80211_unregister_wdev(&sdata->wdev); - kfree(sdata); + + if (!netdev) + kfree(sdata); } wiphy_unlock(local->hw.wiphy); } diff --git a/net/mac80211/key.c b/net/mac80211/key.c index e8f6c1e5eabf..21cf5a208910 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -6,7 +6,7 @@ * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright 2018-2020, 2022 Intel Corporation + * Copyright 2018-2020, 2022-2023 Intel Corporation */ #include <linux/if_ether.h> @@ -510,8 +510,12 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ret = ieee80211_key_enable_hw_accel(new); } } else { - if (!new->local->wowlan) + if (!new->local->wowlan) { ret = ieee80211_key_enable_hw_accel(new); + } else { + assert_key_lock(new->local); + new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + } } if (ret) diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 40f030b8ece9..6148208b320e 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -142,25 +142,34 @@ static int ieee80211_check_dup_link_addrs(struct ieee80211_sub_if_data *sdata) } static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata, - u16 links) + u16 valid_links, u16 dormant_links) { - sdata->vif.valid_links = links; - - if (!links) { + sdata->vif.valid_links = valid_links; + sdata->vif.dormant_links = dormant_links; + + if (!valid_links || + WARN((~valid_links & dormant_links) || + !(valid_links & ~dormant_links), + "Invalid links: valid=0x%x, dormant=0x%x", + valid_links, dormant_links)) { sdata->vif.active_links = 0; + sdata->vif.dormant_links = 0; return; } switch (sdata->vif.type) { case NL80211_IFTYPE_AP: /* in an AP all links are always active */ - sdata->vif.active_links = links; + sdata->vif.active_links = valid_links; + + /* AP links are not expected to be disabled */ + WARN_ON(dormant_links); break; case NL80211_IFTYPE_STATION: if (sdata->vif.active_links) break; - WARN_ON(hweight16(links) > 1); - sdata->vif.active_links = links; + sdata->vif.active_links = valid_links & ~dormant_links; + WARN_ON(hweight16(sdata->vif.active_links) > 1); break; default: WARN_ON(1); @@ -169,7 +178,7 @@ static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata, static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, struct link_container **to_free, - u16 new_links) + u16 new_links, u16 dormant_links) { u16 old_links = sdata->vif.valid_links; u16 old_active = sdata->vif.active_links; @@ -245,7 +254,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, /* for keys we will not be able to undo this */ ieee80211_tear_down_links(sdata, to_free, rem); - ieee80211_set_vif_links_bitmaps(sdata, new_links); + ieee80211_set_vif_links_bitmaps(sdata, new_links, dormant_links); /* tell the driver */ ret = drv_change_vif_links(sdata->local, sdata, @@ -258,7 +267,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, /* restore config */ memcpy(sdata->link, old_data, sizeof(old_data)); memcpy(sdata->vif.link_conf, old, sizeof(old)); - ieee80211_set_vif_links_bitmaps(sdata, old_links); + ieee80211_set_vif_links_bitmaps(sdata, old_links, dormant_links); /* and free (only) the newly allocated links */ memset(to_free, 0, sizeof(links)); goto free; @@ -282,12 +291,13 @@ deinit: } int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata, - u16 new_links) + u16 new_links, u16 dormant_links) { struct link_container *links[IEEE80211_MLD_MAX_NUM_LINKS]; int ret; - ret = ieee80211_vif_update_links(sdata, links, new_links); + ret = ieee80211_vif_update_links(sdata, links, new_links, + dormant_links); ieee80211_free_links(sdata, links); return ret; @@ -304,7 +314,7 @@ void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata) */ sdata_lock(sdata); - ieee80211_vif_update_links(sdata, links, 0); + ieee80211_vif_update_links(sdata, links, 0, 0); sdata_unlock(sdata); ieee80211_free_links(sdata, links); @@ -328,8 +338,7 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; - /* cannot activate links that don't exist */ - if (active_links & ~sdata->vif.valid_links) + if (active_links & ~ieee80211_vif_usable_links(&sdata->vif)) return -EINVAL; /* nothing to do */ @@ -438,14 +447,14 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, return 0; } -int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) +int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; u16 old_active; int ret; - sdata_lock(sdata); + sdata_assert_lock(sdata); mutex_lock(&local->sta_mtx); mutex_lock(&local->mtx); mutex_lock(&local->key_mtx); @@ -467,6 +476,17 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) mutex_unlock(&local->key_mtx); mutex_unlock(&local->mtx); mutex_unlock(&local->sta_mtx); + + return ret; +} + +int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + int ret; + + sdata_lock(sdata); + ret = __ieee80211_set_active_links(vif, active_links); sdata_unlock(sdata); return ret; @@ -484,8 +504,7 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif, if (sdata->vif.type != NL80211_IFTYPE_STATION) return; - /* cannot activate links that don't exist */ - if (active_links & ~sdata->vif.valid_links) + if (active_links & ~ieee80211_vif_usable_links(&sdata->vif)) return; /* nothing to do */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 55cdfaef0f5d..24315d7b3126 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #include <net/mac80211.h> @@ -291,7 +291,7 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata, drv_link_info_changed(local, sdata, link->conf, link->link_id, changed); } -u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) +u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) { sdata->vif.bss_conf.use_cts_prot = false; sdata->vif.bss_conf.use_short_preamble = false; @@ -364,7 +364,8 @@ static void ieee80211_restart_work(struct work_struct *work) * The exception is ieee80211_chswitch_done. * Then we can have a race... */ - cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work); + wiphy_work_cancel(local->hw.wiphy, + &sdata->u.mgd.csa_connection_drop_work); if (sdata->vif.bss_conf.csa_active) { sdata_lock(sdata); ieee80211_sta_connection_lost(sdata, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index f72333201903..af8c5fc2db14 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2018 - 2022 Intel Corporation + * Copyright (C) 2018 - 2023 Intel Corporation * Authors: Luis Carlos Cobo <luisca@cozybit.com> * Javier Cardona <javier@cozybit.com> */ @@ -45,7 +45,7 @@ static void ieee80211_mesh_housekeeping_timer(struct timer_list *t) set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); - ieee80211_queue_work(&local->hw, &sdata->work); + wiphy_work_queue(local->hw.wiphy, &sdata->work); } /** @@ -133,10 +133,10 @@ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) * * Returns: beacon changed flag if the beacon content changed. */ -u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) +u64 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) { bool free_plinks; - u32 changed = 0; + u64 changed = 0; /* In case mesh_plink_free_count > 0 and mesh_plinktbl_capacity == 0, * the mesh interface might be able to establish plinks with peers that @@ -162,7 +162,7 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_sta_cleanup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed = mesh_plink_deactivate(sta); + u64 changed = mesh_plink_deactivate(sta); if (changed) ieee80211_mbss_info_change_notify(sdata, changed); @@ -703,7 +703,7 @@ static void ieee80211_mesh_path_timer(struct timer_list *t) struct ieee80211_sub_if_data *sdata = from_timer(sdata, t, u.mesh.mesh_path_timer); - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } static void ieee80211_mesh_path_root_timer(struct timer_list *t) @@ -714,7 +714,7 @@ static void ieee80211_mesh_path_root_timer(struct timer_list *t) set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) @@ -923,7 +923,7 @@ unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - u32 changed; + u64 changed; if (ifmsh->mshcfg.plink_timeout > 0) ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ); @@ -1164,7 +1164,7 @@ ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata) } void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, - u32 changed) + u64 changed) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; unsigned long bits = changed; @@ -1177,14 +1177,14 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE) set_bit(bit, &ifmsh->mbss_changed); set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags); - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; - u32 changed = BSS_CHANGED_BEACON | + u64 changed = BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | @@ -1202,7 +1202,7 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ifmsh->sync_offset_clockdrift_max = 0; set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); ieee80211_mesh_root_setup(ifmsh); - ieee80211_queue_work(&local->hw, &sdata->work); + wiphy_work_queue(local->hw.wiphy, &sdata->work); sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; sdata->vif.bss_conf.enable_beacon = true; @@ -1525,12 +1525,11 @@ free: kfree(elems); } -int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) +int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_csa_settings *tmp_csa_settings; int ret = 0; - int changed = 0; /* Reset the TTL value and Initiator flag */ ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; @@ -1545,15 +1544,16 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) if (ret) return -EINVAL; - changed |= BSS_CHANGED_BEACON; + *changed |= BSS_CHANGED_BEACON; mcsa_dbg(sdata, "complete switching to center freq %d MHz", sdata->vif.bss_conf.chandef.chan->center_freq); - return changed; + return 0; } int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_csa_settings *csa_settings) + struct cfg80211_csa_settings *csa_settings, + u64 *changed) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_csa_settings *tmp_csa_settings; @@ -1579,7 +1579,8 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, return ret; } - return BSS_CHANGED_BEACON; + *changed |= BSS_CHANGED_BEACON; + return 0; } static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, @@ -1720,7 +1721,8 @@ out: static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - u32 bit, changed = 0; + u32 bit; + u64 changed = 0; for_each_set_bit(bit, &ifmsh->mbss_changed, sizeof(changed) * BITS_PER_BYTE) { diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 022f41292a05..6c94222a9df5 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008, 2009 open80211s Ltd. + * Copyright (C) 2023 Intel Corporation * Authors: Luis Carlos Cobo <luisca@cozybit.com> * Javier Cardona <javier@cozybit.com> */ @@ -252,11 +253,11 @@ void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method); /* wrapper for ieee80211_bss_info_change_notify() */ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, - u32 changed); + u64 changed); /* mesh power save */ -u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata); -u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, +u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata); +u64 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, enum nl80211_mesh_power_mode pm); void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, @@ -303,12 +304,12 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, u8 *hw_addr, struct ieee802_11_elems *ie, struct ieee80211_rx_status *rx_status); bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); -u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); +u64 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); void mesh_plink_timer(struct timer_list *t); void mesh_plink_broken(struct sta_info *sta); -u32 mesh_plink_deactivate(struct sta_info *sta); -u32 mesh_plink_open(struct sta_info *sta); -u32 mesh_plink_block(struct sta_info *sta); +u64 mesh_plink_deactivate(struct sta_info *sta); +u64 mesh_plink_open(struct sta_info *sta); +u64 mesh_plink_block(struct sta_info *sta); void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status); @@ -349,14 +350,14 @@ void mesh_path_refresh(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_MESH static inline -u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) +u64 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.estab_plinks); return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON; } static inline -u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) +u64 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.estab_plinks); return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 5217e1d97dd6..51369072984e 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021-2022 Intel Corporation + * Copyright (C) 2019, 2021-2023 Intel Corporation * Author: Luis Carlos Cobo <luisca@cozybit.com> */ @@ -1026,14 +1026,14 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); else if (time_before(jiffies, ifmsh->last_preq)) { /* avoid long wait if did not send preqs for a long time * and jiffies wrapped around */ ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1; - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } else mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq + min_preq_int_jiff(sdata)); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 8f168bc4e4b8..f3d5bb0a59f1 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021-2022 Intel Corporation + * Copyright (C) 2019, 2021-2023 Intel Corporation * Author: Luis Carlos Cobo <luisca@cozybit.com> */ #include <linux/gfp.h> @@ -90,12 +90,13 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta) * * Returns BSS_CHANGED_ERP_SLOT or 0 for no change. */ -static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) +static u64 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; struct sta_info *sta; - u32 erp_rates = 0, changed = 0; + u32 erp_rates = 0; + u64 changed = 0; int i; bool short_slot = false; @@ -153,7 +154,7 @@ out: * is selected if all peers in our 20/40MHz MBSS support HT and at least one * HT20 peer is present. Otherwise no-protection mode is selected. */ -static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) +static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; @@ -365,10 +366,10 @@ free: * * Locking: the caller must hold sta->mesh->plink_lock */ -static u32 __mesh_plink_deactivate(struct sta_info *sta) +static u64 __mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed = 0; + u64 changed = 0; lockdep_assert_held(&sta->mesh->plink_lock); @@ -390,10 +391,10 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta) * * All mesh paths with this peer as next hop will be flushed */ -u32 mesh_plink_deactivate(struct sta_info *sta) +u64 mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed; + u64 changed; spin_lock_bh(&sta->mesh->plink_lock); changed = __mesh_plink_deactivate(sta); @@ -622,7 +623,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status) { struct sta_info *sta; - u32 changed = 0; + u64 changed = 0; sta = mesh_sta_info_get(sdata, hw_addr, elems, rx_status); if (!sta) @@ -775,10 +776,10 @@ static u16 mesh_get_new_llid(struct ieee80211_sub_if_data *sdata) return llid; } -u32 mesh_plink_open(struct sta_info *sta) +u64 mesh_plink_open(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed; + u64 changed; if (!test_sta_flag(sta, WLAN_STA_AUTH)) return 0; @@ -805,9 +806,9 @@ u32 mesh_plink_open(struct sta_info *sta) return changed; } -u32 mesh_plink_block(struct sta_info *sta) +u64 mesh_plink_block(struct sta_info *sta) { - u32 changed; + u64 changed; spin_lock_bh(&sta->mesh->plink_lock); changed = __mesh_plink_deactivate(sta); @@ -831,11 +832,11 @@ static void mesh_plink_close(struct ieee80211_sub_if_data *sdata, mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout); } -static u32 mesh_plink_establish(struct ieee80211_sub_if_data *sdata, +static u64 mesh_plink_establish(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; - u32 changed = 0; + u64 changed = 0; del_timer(&sta->mesh->plink_timer); sta->mesh->plink_state = NL80211_PLINK_ESTAB; @@ -857,12 +858,12 @@ static u32 mesh_plink_establish(struct ieee80211_sub_if_data *sdata, * * Return: changed MBSS flags */ -static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, +static u64 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, enum plink_event event) { struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; enum ieee80211_self_protected_actioncode action = 0; - u32 changed = 0; + u64 changed = 0; bool flush = false; mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr, @@ -1117,7 +1118,7 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; enum plink_event event; enum ieee80211_self_protected_actioncode ftype; - u32 changed = 0; + u64 changed = 0; u8 ie_len = elems->peering_len; u16 plid, llid = 0; diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 3fbd0b9ff913..35eacca43e49 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -3,6 +3,7 @@ * Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de> * Copyright 2012-2013, cozybit Inc. * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2023 Intel Corporation */ #include "mesh.h" @@ -77,14 +78,14 @@ static void mps_qos_null_tx(struct sta_info *sta) * sets the non-peer power mode and triggers the driver PS (re-)configuration * Return BSS_CHANGED_BEACON if a beacon update is necessary. */ -u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) +u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct sta_info *sta; bool peering = false; int light_sleep_cnt = 0; int deep_sleep_cnt = 0; - u32 changed = 0; + u64 changed = 0; enum nl80211_mesh_power_mode nonpeer_pm; rcu_read_lock(); @@ -148,7 +149,7 @@ u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) * @pm: the power mode to set * Return BSS_CHANGED_BEACON if a beacon update is in order. */ -u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, +u64 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, enum nl80211_mesh_power_mode pm) { struct ieee80211_sub_if_data *sdata = sta->sdata; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5a4303130ef2..f93eb38ae0b8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -511,16 +511,14 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, /* don't check HE if we associated as non-HE station */ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE || - !ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif))) { + !ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif)) { he_oper = NULL; eht_oper = NULL; } /* don't check EHT if we associated as non-EHT station */ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT || - !ieee80211_get_eht_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif))) + !ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif)) eht_oper = NULL; /* @@ -776,8 +774,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, const struct ieee80211_sta_he_cap *he_cap; u8 he_cap_size; - he_cap = ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); + he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); if (WARN_ON(!he_cap)) return; @@ -806,10 +803,8 @@ static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata, const struct ieee80211_sta_eht_cap *eht_cap; u8 eht_cap_size; - he_cap = ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); - eht_cap = ieee80211_get_eht_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); + he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); /* * EHT capabilities element is only added if the HE capabilities element @@ -1287,7 +1282,7 @@ static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata, u8 *ml_elem_len; void *capab_pos; - if (!sdata->vif.valid_links) + if (!ieee80211_vif_is_mld(&sdata->vif)) return; ift_ext_capa = cfg80211_get_iftype_ext_capa(local->hw.wiphy, @@ -1462,7 +1457,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) capab |= WLAN_CAPABILITY_PRIVACY; } - if (sdata->vif.valid_links) { + if (ieee80211_vif_is_mld(&sdata->vif)) { /* consider the multi-link element with STA profile */ size += sizeof(struct ieee80211_multi_link_elem); /* max common info field in basic multi-link element */ @@ -1680,10 +1675,12 @@ void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, } /* spectrum management related things */ -static void ieee80211_chswitch_work(struct work_struct *work) +static void ieee80211_chswitch_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_link_data *link = - container_of(work, struct ieee80211_link_data, u.mgd.chswitch_work); + container_of(work, struct ieee80211_link_data, + u.mgd.chswitch_work.work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -1723,8 +1720,8 @@ static void ieee80211_chswitch_work(struct work_struct *work) sdata_info(sdata, "failed to use reserved channel context, disconnecting (err=%d)\n", ret); - ieee80211_queue_work(&sdata->local->hw, - &ifmgd->csa_connection_drop_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &ifmgd->csa_connection_drop_work); goto out; } @@ -1735,8 +1732,8 @@ static void ieee80211_chswitch_work(struct work_struct *work) &link->csa_chandef)) { sdata_info(sdata, "failed to finalize channel switch, disconnecting\n"); - ieee80211_queue_work(&sdata->local->hw, - &ifmgd->csa_connection_drop_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &ifmgd->csa_connection_drop_work); goto out; } @@ -1780,8 +1777,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) if (ret) { sdata_info(sdata, "driver post channel switch failed, disconnecting\n"); - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &ifmgd->csa_connection_drop_work); return; } @@ -1793,31 +1790,23 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (WARN_ON(sdata->vif.valid_links)) + if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif))) success = false; trace_api_chswitch_done(sdata, success); if (!success) { sdata_info(sdata, "driver channel switch failed, disconnecting\n"); - ieee80211_queue_work(&sdata->local->hw, - &ifmgd->csa_connection_drop_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &ifmgd->csa_connection_drop_work); } else { - ieee80211_queue_work(&sdata->local->hw, - &sdata->deflink.u.mgd.chswitch_work); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &sdata->deflink.u.mgd.chswitch_work, + 0); } } EXPORT_SYMBOL(ieee80211_chswitch_done); -static void ieee80211_chswitch_timer(struct timer_list *t) -{ - struct ieee80211_link_data *link = - from_timer(link, t, u.mgd.chswitch_timer); - - ieee80211_queue_work(&link->sdata->local->hw, - &link->u.mgd.chswitch_work); -} - static void ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) { @@ -1861,6 +1850,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, struct ieee80211_csa_ie csa_ie; struct ieee80211_channel_switch ch_switch; struct ieee80211_bss *bss; + unsigned long timeout; int res; sdata_assert_lock(sdata); @@ -1868,9 +1858,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, if (!cbss) return; - if (local->scanning) - return; - current_band = cbss->channel->band; bss = (void *)cbss->priv; res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band, @@ -1994,8 +1981,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&local->mtx); - cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, 0, - csa_ie.count, csa_ie.mode, 0); + cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, + link->link_id, csa_ie.count, + csa_ie.mode, 0); if (local->ops->channel_switch) { /* use driver's channel switch callback */ @@ -2004,12 +1992,11 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } /* channel switch handled in software */ - if (csa_ie.count <= 1) - ieee80211_queue_work(&local->hw, &link->u.mgd.chswitch_work); - else - mod_timer(&link->u.mgd.chswitch_timer, - TU_TO_EXP_TIME((csa_ie.count - 1) * - cbss->beacon_interval)); + timeout = TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) * + cbss->beacon_interval); + wiphy_delayed_work_queue(local->hw.wiphy, + &link->u.mgd.chswitch_work, + timeout); return; lock_and_drop_connection: mutex_lock(&local->mtx); @@ -2025,7 +2012,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, link->conf->csa_active = true; link->csa_block_tx = csa_ie.mode; - ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &ifmgd->csa_connection_drop_work); mutex_unlock(&local->chanctx_mtx); mutex_unlock(&local->mtx); } @@ -2116,7 +2104,7 @@ static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata, *pwr_level = (__s8)cisco_dtpc_ie[4]; } -static u32 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link, +static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link, struct ieee80211_channel *channel, struct ieee80211_mgmt *mgmt, const u8 *country_ie, u8 country_ie_len, @@ -2650,9 +2638,9 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local, params[ac].aifs = pos[0] & 0x0f; if (params[ac].aifs < 2) { - sdata_info(sdata, - "AP has invalid WMM params (AIFSN=%d for ACI %d), will use 2\n", - params[ac].aifs, aci); + link_info(link, + "AP has invalid WMM params (AIFSN=%d for ACI %d), will use 2\n", + params[ac].aifs, aci); params[ac].aifs = 2; } params[ac].cw_max = ecw2cw((pos[1] & 0xf0) >> 4); @@ -2663,9 +2651,9 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local, if (params[ac].cw_min == 0 || params[ac].cw_min > params[ac].cw_max) { - sdata_info(sdata, - "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n", - params[ac].cw_min, params[ac].cw_max, aci); + link_info(link, + "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n", + params[ac].cw_min, params[ac].cw_max, aci); return false; } ieee80211_regulatory_limit_wmm_params(sdata, ¶ms[ac], ac); @@ -2674,9 +2662,9 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local, /* WMM specification requires all 4 ACIs. */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { if (params[ac].cw_min == 0) { - sdata_info(sdata, - "AP has invalid WMM params (missing AC %d), using defaults\n", - ac); + link_info(link, + "AP has invalid WMM params (missing AC %d), using defaults\n", + ac); return false; } } @@ -2706,12 +2694,12 @@ static void ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->local->mtx); } -static u32 ieee80211_handle_bss_capability(struct ieee80211_link_data *link, +static u64 ieee80211_handle_bss_capability(struct ieee80211_link_data *link, u16 capab, bool erp_valid, u8 erp) { struct ieee80211_bss_conf *bss_conf = link->conf; struct ieee80211_supported_band *sband; - u32 changed = 0; + u64 changed = 0; bool use_protection; bool use_short_preamble; bool use_short_slot; @@ -2757,7 +2745,7 @@ static u64 ieee80211_link_set_associated(struct ieee80211_link_data *link, struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_bss_conf *bss_conf = link->conf; struct ieee80211_bss *bss = (void *)cbss->priv; - u32 changed = BSS_CHANGED_QOS; + u64 changed = BSS_CHANGED_QOS; /* not really used in MLO */ sdata->u.mgd.beacon_timeout = @@ -2831,6 +2819,10 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) continue; + if (ieee80211_vif_is_mld(&sdata->vif) && + !(ieee80211_vif_usable_links(&sdata->vif) & BIT(link_id))) + continue; + link = sdata_dereference(sdata->link[link_id], sdata); if (WARN_ON(!link)) return; @@ -2849,7 +2841,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, if (vif_cfg->arp_addr_cnt) vif_changed |= BSS_CHANGED_ARP_FILTER; - if (sdata->vif.valid_links) { + if (ieee80211_vif_is_mld(&sdata->vif)) { for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { @@ -2857,6 +2849,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss = assoc_data->link[link_id].bss; if (!cbss || + !(BIT(link_id) & + ieee80211_vif_usable_links(&sdata->vif)) || assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) continue; @@ -2881,7 +2875,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->iflist_mtx); /* leave this here to not change ordering in non-MLO cases */ - if (!sdata->vif.valid_links) + if (!ieee80211_vif_is_mld(&sdata->vif)) ieee80211_recalc_smps(sdata, &sdata->deflink); ieee80211_recalc_ps_vif(sdata); @@ -2895,7 +2889,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; unsigned int link_id; - u32 changed = 0; + u64 changed = 0; struct ieee80211_prep_tx_info info = { .subtype = stype, }; @@ -2977,7 +2971,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sta_info_flush(sdata); /* finally reset all BSS / config parameters */ - if (!sdata->vif.valid_links) + if (!ieee80211_vif_is_mld(&sdata->vif)) changed |= ieee80211_reset_erp_info(sdata); ieee80211_led_assoc(local, 0); @@ -3002,7 +2996,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sizeof(sdata->vif.bss_conf.mu_group.membership)); memset(sdata->vif.bss_conf.mu_group.position, 0, sizeof(sdata->vif.bss_conf.mu_group.position)); - if (!sdata->vif.valid_links) + if (!ieee80211_vif_is_mld(&sdata->vif)) changed |= BSS_CHANGED_MU_GROUPS; sdata->vif.bss_conf.mu_mimo_owner = false; @@ -3016,7 +3010,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, changed |= BSS_CHANGED_ARP_FILTER; sdata->vif.bss_conf.qos = false; - if (!sdata->vif.valid_links) { + if (!ieee80211_vif_is_mld(&sdata->vif)) { changed |= BSS_CHANGED_QOS; /* The BSSID (not really interesting) and HT changed */ changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT; @@ -3031,7 +3025,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.conn_mon_timer); del_timer_sync(&sdata->u.mgd.bcn_mon_timer); del_timer_sync(&sdata->u.mgd.timer); - del_timer_sync(&sdata->deflink.u.mgd.chswitch_timer); sdata->vif.bss_conf.dtim_period = 0; sdata->vif.bss_conf.beacon_rate = NULL; @@ -3072,7 +3065,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(sdata->vif.bss_conf.tx_pwr_env, 0, sizeof(sdata->vif.bss_conf.tx_pwr_env)); - ieee80211_vif_set_links(sdata, 0); + ieee80211_vif_set_links(sdata, 0, 0); } static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) @@ -3162,7 +3155,7 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.probe_send_count = 0; else sdata->u.mgd.nullfunc_failed = true; - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata, @@ -3186,7 +3179,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) u8 unicast_limit = max(1, max_probe_tries - 3); struct sta_info *sta; - if (WARN_ON(sdata->vif.valid_links)) + if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif))) return; /* @@ -3234,7 +3227,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; bool already = false; - if (WARN_ON_ONCE(sdata->vif.valid_links)) + if (WARN_ON_ONCE(ieee80211_vif_is_mld(&sdata->vif))) return; if (!ieee80211_sdata_running(sdata)) @@ -3309,7 +3302,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, int ssid_len; if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || - sdata->vif.valid_links)) + ieee80211_vif_is_mld(&sdata->vif))) return NULL; sdata_assert_lock(sdata); @@ -3360,21 +3353,19 @@ static void ieee80211_report_disconnect(struct ieee80211_sub_if_data *sdata, drv_event_callback(sdata->local, sdata, &event); } -static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) +static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; bool tx; - sdata_lock(sdata); - if (!ifmgd->associated) { - sdata_unlock(sdata); + if (!ifmgd->associated) return; - } /* in MLO assume we have a link where we can TX the frame */ - tx = sdata->vif.valid_links || !sdata->deflink.csa_block_tx; + tx = ieee80211_vif_is_mld(&sdata->vif) || + !sdata->deflink.csa_block_tx; if (!ifmgd->driver_disconnect) { unsigned int link_id; @@ -3419,11 +3410,17 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, ifmgd->reconnect); ifmgd->reconnect = false; +} +static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) +{ + sdata_lock(sdata); + ___ieee80211_disconnect(sdata); sdata_unlock(sdata); } -static void ieee80211_beacon_connection_loss_work(struct work_struct *work) +static void ieee80211_beacon_connection_loss_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, @@ -3448,7 +3445,8 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work) } } -static void ieee80211_csa_connection_drop_work(struct work_struct *work) +static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, @@ -3465,7 +3463,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif) trace_api_beacon_loss(sdata); sdata->u.mgd.connection_loss = false; - ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work); + wiphy_work_queue(hw->wiphy, &sdata->u.mgd.beacon_connection_loss_work); } EXPORT_SYMBOL(ieee80211_beacon_loss); @@ -3477,7 +3475,7 @@ void ieee80211_connection_loss(struct ieee80211_vif *vif) trace_api_connection_loss(sdata); sdata->u.mgd.connection_loss = true; - ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work); + wiphy_work_queue(hw->wiphy, &sdata->u.mgd.beacon_connection_loss_work); } EXPORT_SYMBOL(ieee80211_connection_loss); @@ -3493,7 +3491,7 @@ void ieee80211_disconnect(struct ieee80211_vif *vif, bool reconnect) sdata->u.mgd.driver_disconnect = true; sdata->u.mgd.reconnect = reconnect; - ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work); + wiphy_work_queue(hw->wiphy, &sdata->u.mgd.beacon_connection_loss_work); } EXPORT_SYMBOL(ieee80211_disconnect); @@ -3522,7 +3520,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); - ieee80211_vif_set_links(sdata, 0); + ieee80211_vif_set_links(sdata, 0, 0); mutex_unlock(&sdata->local->mtx); } @@ -3573,7 +3571,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, for (i = 0; i < ARRAY_SIZE(data.bss); i++) data.bss[i] = assoc_data->link[i].bss; - if (sdata->vif.valid_links) + if (ieee80211_vif_is_mld(&sdata->vif)) data.ap_mld_addr = assoc_data->ap_addr; cfg80211_assoc_failure(sdata->dev, &data); @@ -3581,7 +3579,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); - ieee80211_vif_set_links(sdata, 0); + ieee80211_vif_set_links(sdata, 0, 0); mutex_unlock(&sdata->local->mtx); } @@ -3909,8 +3907,8 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, *have_higher_than_11mbit = true; /* - * Skip HT, VHT, HE and SAE H2E only BSS membership selectors - * since they're not rates. + * Skip HT, VHT, HE, EHT and SAE H2E only BSS membership + * selectors since they're not rates. * * Note: Even though the membership selector and the basic * rate flag share the same bit, they are not exactly @@ -3919,6 +3917,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) || supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY) || supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HE_PHY) || + supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_EHT_PHY) || supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_SAE_H2E)) continue; @@ -3949,8 +3948,7 @@ static bool ieee80211_twt_req_supported(struct ieee80211_sub_if_data *sdata, const struct ieee802_11_elems *elems) { const struct ieee80211_sta_he_cap *own_he_cap = - ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); if (elems->ext_capab_len < 10) return false; @@ -3965,7 +3963,7 @@ static bool ieee80211_twt_req_supported(struct ieee80211_sub_if_data *sdata, IEEE80211_HE_MAC_CAP0_TWT_REQ); } -static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata, +static u64 ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, struct ieee80211_link_data *link, struct link_sta_info *link_sta, @@ -3986,8 +3984,7 @@ static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata, struct link_sta_info *link_sta) { const struct ieee80211_sta_he_cap *own_he_cap = - ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); return bss_conf->he_support && (link_sta->pub->he_cap.he_cap_elem.mac_cap_info[2] & @@ -4021,6 +4018,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, const struct cfg80211_bss_ies *bss_ies = NULL; struct ieee80211_supported_band *sband; struct ieee802_11_elems *elems; + const __le16 prof_bss_param_ch_present = + cpu_to_le16(IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT); u16 capab_info; bool ret; @@ -4036,7 +4035,17 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, * successful, so set the status directly to success */ assoc_data->link[link_id].status = WLAN_STATUS_SUCCESS; - } else if (!elems->prof) { + if (elems->ml_basic) { + if (!(elems->ml_basic->control & + cpu_to_le16(IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT))) { + ret = false; + goto out; + } + link->u.mgd.bss_param_ch_cnt = + ieee80211_mle_get_bss_param_ch_cnt(elems->ml_basic); + } + } else if (!elems->prof || + !(elems->prof->control & prof_bss_param_ch_present)) { ret = false; goto out; } else { @@ -4049,6 +4058,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, */ capab_info = get_unaligned_le16(ptr); assoc_data->link[link_id].status = get_unaligned_le16(ptr + 2); + link->u.mgd.bss_param_ch_cnt = + ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(elems->prof); if (assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) { link_info(link, "association response status code=%u\n", @@ -4624,8 +4635,7 @@ ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata, const struct ieee80211_he_operation *he_op) { const struct ieee80211_sta_he_cap *sta_he_cap = - ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); u16 ap_min_req_set; int i; @@ -4698,6 +4708,89 @@ ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata, return false; } +static u8 +ieee80211_get_eht_cap_mcs_nss(const struct ieee80211_sta_he_cap *sta_he_cap, + const struct ieee80211_sta_eht_cap *sta_eht_cap, + unsigned int idx, int bw) +{ + u8 he_phy_cap0 = sta_he_cap->he_cap_elem.phy_cap_info[0]; + u8 eht_phy_cap0 = sta_eht_cap->eht_cap_elem.phy_cap_info[0]; + + /* handle us being a 20 MHz-only EHT STA - with four values + * for MCS 0-7, 8-9, 10-11, 12-13. + */ + if (!(he_phy_cap0 & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) + return sta_eht_cap->eht_mcs_nss_supp.only_20mhz.rx_tx_max_nss[idx]; + + /* the others have MCS 0-9 together, rather than separately from 0-7 */ + if (idx > 0) + idx--; + + switch (bw) { + case 0: + return sta_eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_max_nss[idx]; + case 1: + if (!(he_phy_cap0 & + (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G))) + return 0xff; /* pass check */ + return sta_eht_cap->eht_mcs_nss_supp.bw._160.rx_tx_max_nss[idx]; + case 2: + if (!(eht_phy_cap0 & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ)) + return 0xff; /* pass check */ + return sta_eht_cap->eht_mcs_nss_supp.bw._320.rx_tx_max_nss[idx]; + } + + WARN_ON(1); + return 0; +} + +static bool +ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_eht_operation *eht_op) +{ + const struct ieee80211_sta_he_cap *sta_he_cap = + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + const struct ieee80211_sta_eht_cap *sta_eht_cap = + ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); + const struct ieee80211_eht_mcs_nss_supp_20mhz_only *req; + unsigned int i; + + if (!sta_he_cap || !sta_eht_cap || !eht_op) + return false; + + req = &eht_op->basic_mcs_nss; + + for (i = 0; i < ARRAY_SIZE(req->rx_tx_max_nss); i++) { + u8 req_rx_nss, req_tx_nss; + unsigned int bw; + + req_rx_nss = u8_get_bits(req->rx_tx_max_nss[i], + IEEE80211_EHT_MCS_NSS_RX); + req_tx_nss = u8_get_bits(req->rx_tx_max_nss[i], + IEEE80211_EHT_MCS_NSS_TX); + + for (bw = 0; bw < 3; bw++) { + u8 have, have_rx_nss, have_tx_nss; + + have = ieee80211_get_eht_cap_mcs_nss(sta_he_cap, + sta_eht_cap, + i, bw); + have_rx_nss = u8_get_bits(have, + IEEE80211_EHT_MCS_NSS_RX); + have_tx_nss = u8_get_bits(have, + IEEE80211_EHT_MCS_NSS_TX); + + if (req_rx_nss > have_rx_nss || + req_tx_nss > have_tx_nss) + return false; + } + } + + return true; +} + static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct cfg80211_bss *cbss, @@ -4716,7 +4809,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; struct ieee80211_bss *bss = (void *)cbss->priv; struct ieee80211_elems_parse_params parse_params = { - .bss = cbss, .link_id = -1, .from_ap = true, }; @@ -4759,15 +4851,13 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, *conn_flags |= IEEE80211_CONN_DISABLE_EHT; } - if (!ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif))) { + if (!ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif)) { mlme_dbg(sdata, "HE not supported, disabling HE and EHT\n"); *conn_flags |= IEEE80211_CONN_DISABLE_HE; *conn_flags |= IEEE80211_CONN_DISABLE_EHT; } - if (!ieee80211_get_eht_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif))) { + if (!ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif)) { mlme_dbg(sdata, "EHT not supported, disabling EHT\n"); *conn_flags |= IEEE80211_CONN_DISABLE_EHT; } @@ -4844,6 +4934,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, IEEE80211_CONN_DISABLE_EHT)) && he_oper) { const struct cfg80211_bss_ies *cbss_ies; + const struct element *eht_ml_elem; const u8 *eht_oper_ie; cbss_ies = rcu_dereference(cbss->ies); @@ -4854,6 +4945,24 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, eht_oper = (void *)(eht_oper_ie + 3); else eht_oper = NULL; + + if (!ieee80211_verify_sta_eht_mcs_support(sdata, sband, eht_oper)) + *conn_flags |= IEEE80211_CONN_DISABLE_EHT; + + eht_ml_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK, + cbss_ies->data, cbss_ies->len); + + /* data + 1 / datalen - 1 since it's an extended element */ + if (!(*conn_flags & IEEE80211_CONN_DISABLE_EHT) && + eht_ml_elem && + ieee80211_mle_type_ok(eht_ml_elem->data + 1, + IEEE80211_ML_CONTROL_TYPE_BASIC, + eht_ml_elem->datalen - 1)) { + sdata->vif.cfg.eml_cap = + ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1); + sdata->vif.cfg.eml_med_sync_delay = + ieee80211_mle_get_eml_med_sync_delay(eht_ml_elem->data + 1); + } } /* Allow VHT if at least one channel on the sband supports 80 MHz */ @@ -4980,7 +5089,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, unsigned int link_id; struct sta_info *sta; u64 changed[IEEE80211_MLD_MAX_NUM_LINKS] = {}; - u16 valid_links = 0; + u16 valid_links = 0, dormant_links = 0; int err; mutex_lock(&sdata->local->sta_mtx); @@ -4992,20 +5101,22 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!sta)) goto out_err; - if (sdata->vif.valid_links) { + if (ieee80211_vif_is_mld(&sdata->vif)) { for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { if (!assoc_data->link[link_id].bss) continue; - valid_links |= BIT(link_id); - if (link_id != assoc_data->assoc_link_id) { + valid_links |= BIT(link_id); + if (assoc_data->link[link_id].disabled) { + dormant_links |= BIT(link_id); + } else if (link_id != assoc_data->assoc_link_id) { err = ieee80211_sta_allocate_link(sta, link_id); if (err) goto out_err; } } - ieee80211_vif_set_links(sdata, valid_links); + ieee80211_vif_set_links(sdata, valid_links, dormant_links); } for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { @@ -5013,14 +5124,14 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link; struct link_sta_info *link_sta; - if (!cbss) + if (!cbss || assoc_data->link[link_id].disabled) continue; link = sdata_dereference(sdata->link[link_id], sdata); if (WARN_ON(!link)) goto out_err; - if (sdata->vif.valid_links) + if (ieee80211_vif_is_mld(&sdata->vif)) link_info(link, "local address %pM, AP link address %pM%s\n", link->conf->addr, @@ -5085,7 +5196,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, } /* links might have changed due to rejected ones, set them again */ - ieee80211_vif_set_links(sdata, valid_links); + ieee80211_vif_set_links(sdata, valid_links, dormant_links); rate_control_rate_init(sta); @@ -5269,25 +5380,25 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ifmgd->broken_ap = true; } - if (sdata->vif.valid_links) { - if (!elems->multi_link) { + if (ieee80211_vif_is_mld(&sdata->vif)) { + if (!elems->ml_basic) { sdata_info(sdata, "MLO association with %pM but no multi-link element in response!\n", assoc_data->ap_addr); goto abandon_assoc; } - if (le16_get_bits(elems->multi_link->control, + if (le16_get_bits(elems->ml_basic->control, IEEE80211_ML_CONTROL_TYPE) != IEEE80211_ML_CONTROL_TYPE_BASIC) { sdata_info(sdata, "bad multi-link element (control=0x%x)\n", - le16_to_cpu(elems->multi_link->control)); + le16_to_cpu(elems->ml_basic->control)); goto abandon_assoc; } else { struct ieee80211_mle_basic_common_info *common; - common = (void *)elems->multi_link->variable; + common = (void *)elems->ml_basic->variable; if (memcmp(assoc_data->ap_addr, common->mld_mac_addr, ETH_ALEN)) { @@ -5336,7 +5447,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, resp.uapsd_queues |= ieee80211_ac_to_qos_mask[ac]; } - if (sdata->vif.valid_links) { + if (ieee80211_vif_is_mld(&sdata->vif)) { ether_addr_copy(ap_mld_addr, sdata->vif.cfg.ap_addr); resp.ap_mld_addr = ap_mld_addr; } @@ -5598,6 +5709,169 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link, return true; } +static void ieee80211_ml_reconf_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.ml_reconf_work.work); + u16 new_valid_links, new_active_links, new_dormant_links; + int ret; + + sdata_lock(sdata); + if (!sdata->u.mgd.removed_links) { + sdata_unlock(sdata); + return; + } + + sdata_info(sdata, + "MLO Reconfiguration: work: valid=0x%x, removed=0x%x\n", + sdata->vif.valid_links, sdata->u.mgd.removed_links); + + new_valid_links = sdata->vif.valid_links & ~sdata->u.mgd.removed_links; + if (new_valid_links == sdata->vif.valid_links) { + sdata_unlock(sdata); + return; + } + + if (!new_valid_links || + !(new_valid_links & ~sdata->vif.dormant_links)) { + sdata_info(sdata, "No valid links after reconfiguration\n"); + ret = -EINVAL; + goto out; + } + + new_active_links = sdata->vif.active_links & ~sdata->u.mgd.removed_links; + if (new_active_links != sdata->vif.active_links) { + if (!new_active_links) + new_active_links = + BIT(ffs(new_valid_links & + ~sdata->vif.dormant_links) - 1); + + ret = __ieee80211_set_active_links(&sdata->vif, + new_active_links); + if (ret) { + sdata_info(sdata, + "Failed setting active links\n"); + goto out; + } + } + + new_dormant_links = sdata->vif.dormant_links & ~sdata->u.mgd.removed_links; + + ret = ieee80211_vif_set_links(sdata, new_valid_links, + new_dormant_links); + if (ret) + sdata_info(sdata, "Failed setting valid links\n"); + +out: + if (!ret) + cfg80211_links_removed(sdata->dev, sdata->u.mgd.removed_links); + else + ___ieee80211_disconnect(sdata); + + sdata->u.mgd.removed_links = 0; + + sdata_unlock(sdata); +} + +static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, + struct ieee802_11_elems *elems) +{ + const struct ieee80211_multi_link_elem *ml; + const struct element *sub; + size_t ml_len; + unsigned long removed_links = 0; + u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {}; + u8 link_id; + u32 delay; + + if (!ieee80211_vif_is_mld(&sdata->vif) || !elems->ml_reconf) + return; + + ml_len = cfg80211_defragment_element(elems->ml_reconf_elem, + elems->ie_start, + elems->total_len, + elems->scratch_pos, + elems->scratch + elems->scratch_len - + elems->scratch_pos, + WLAN_EID_FRAGMENT); + + elems->ml_reconf = (const void *)elems->scratch_pos; + elems->ml_reconf_len = ml_len; + ml = elems->ml_reconf; + + /* Directly parse the sub elements as the common information doesn't + * hold any useful information. + */ + for_each_mle_subelement(sub, (u8 *)ml, ml_len) { + struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; + u8 *pos = prof->variable; + u16 control; + + if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE) + continue; + + if (!ieee80211_mle_reconf_sta_prof_size_ok(sub->data, + sub->datalen)) + return; + + control = le16_to_cpu(prof->control); + link_id = control & IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID; + + removed_links |= BIT(link_id); + + /* the MAC address should not be included, but handle it */ + if (control & + IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT) + pos += 6; + + /* According to Draft P802.11be_D3.0, the control should + * include the AP Removal Timer present. If the AP Removal Timer + * is not present assume immediate removal. + */ + if (control & + IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT) + link_removal_timeout[link_id] = le16_to_cpu(*(__le16 *)pos); + } + + removed_links &= sdata->vif.valid_links; + if (!removed_links) { + /* In case the removal was cancelled, abort it */ + if (sdata->u.mgd.removed_links) { + sdata->u.mgd.removed_links = 0; + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.ml_reconf_work); + } + return; + } + + delay = 0; + for_each_set_bit(link_id, &removed_links, IEEE80211_MLD_MAX_NUM_LINKS) { + struct ieee80211_bss_conf *link_conf = + sdata_dereference(sdata->vif.link_conf[link_id], sdata); + u32 link_delay; + + if (!link_conf) { + removed_links &= ~BIT(link_id); + continue; + } + + link_delay = link_conf->beacon_int * + link_removal_timeout[link_id]; + + if (!delay) + delay = link_delay; + else + delay = min(delay, link_delay); + } + + sdata->u.mgd.removed_links = removed_links; + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &sdata->u.mgd.ml_reconf_work, + TU_TO_JIFFIES(delay)); +} + static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_hdr *hdr, size_t len, struct ieee80211_rx_status *rx_status) @@ -5662,7 +5936,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, rcu_read_unlock(); if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && - !WARN_ON(sdata->vif.valid_links) && + !WARN_ON(ieee80211_vif_is_mld(&sdata->vif)) && ieee80211_rx_our_beacon(bssid, ifmgd->assoc_data->link[0].bss)) { parse_params.bss = ifmgd->assoc_data->link[0].bss; elems = ieee802_11_parse_elems_full(&parse_params); @@ -5927,6 +6201,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } } + ieee80211_ml_reconfiguration(sdata, elems); + ieee80211_link_info_change_notify(sdata, link, changed); free: kfree(elems); @@ -5997,6 +6273,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: + if (!sdata->u.mgd.associated || + !ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr)) + break; + if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { struct ieee802_11_elems *elems; @@ -6060,7 +6340,7 @@ static void ieee80211_sta_timer(struct timer_list *t) struct ieee80211_sub_if_data *sdata = from_timer(sdata, t, u.mgd.timer); - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, @@ -6204,7 +6484,7 @@ void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.status_acked = acked; sdata->u.mgd.status_received = true; - ieee80211_queue_work(&local->hw, &sdata->work); + wiphy_work_queue(local->hw.wiphy, &sdata->work); } void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) @@ -6356,7 +6636,7 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t) struct ieee80211_sub_if_data *sdata = from_timer(sdata, t, u.mgd.bcn_mon_timer); - if (WARN_ON(sdata->vif.valid_links)) + if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif))) return; if (sdata->vif.bss_conf.csa_active && @@ -6367,8 +6647,8 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t) return; sdata->u.mgd.connection_loss = false; - ieee80211_queue_work(&sdata->local->hw, - &sdata->u.mgd.beacon_connection_loss_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &sdata->u.mgd.beacon_connection_loss_work); } static void ieee80211_sta_conn_mon_timer(struct timer_list *t) @@ -6380,7 +6660,7 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t) struct sta_info *sta; unsigned long timeout; - if (WARN_ON(sdata->vif.valid_links)) + if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif))) return; if (sdata->vif.bss_conf.csa_active && @@ -6524,7 +6804,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) sdata_unlock(sdata); } -static void ieee80211_request_smps_mgd_work(struct work_struct *work) +static void ieee80211_request_smps_mgd_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, @@ -6542,12 +6823,14 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work); - INIT_WORK(&ifmgd->beacon_connection_loss_work, - ieee80211_beacon_connection_loss_work); - INIT_WORK(&ifmgd->csa_connection_drop_work, - ieee80211_csa_connection_drop_work); + wiphy_work_init(&ifmgd->beacon_connection_loss_work, + ieee80211_beacon_connection_loss_work); + wiphy_work_init(&ifmgd->csa_connection_drop_work, + ieee80211_csa_connection_drop_work); INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work, ieee80211_tdls_peer_del_work); + wiphy_delayed_work_init(&ifmgd->ml_reconf_work, + ieee80211_ml_reconf_work); timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0); timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0); timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); @@ -6574,15 +6857,15 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) link->u.mgd.conn_flags = 0; link->conf->bssid = link->u.mgd.bssid; - INIT_WORK(&link->u.mgd.request_smps_work, - ieee80211_request_smps_mgd_work); + wiphy_work_init(&link->u.mgd.request_smps_work, + ieee80211_request_smps_mgd_work); if (local->hw.wiphy->features & NL80211_FEATURE_DYNAMIC_SMPS) link->u.mgd.req_smps = IEEE80211_SMPS_AUTOMATIC; else link->u.mgd.req_smps = IEEE80211_SMPS_OFF; - INIT_WORK(&link->u.mgd.chswitch_work, ieee80211_chswitch_work); - timer_setup(&link->u.mgd.chswitch_timer, ieee80211_chswitch_timer, 0); + wiphy_delayed_work_init(&link->u.mgd.chswitch_work, + ieee80211_chswitch_work); if (sdata->u.mgd.assoc_data) ether_addr_copy(link->conf->addr, @@ -6623,12 +6906,12 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, mlo = true; if (WARN_ON(!ap_mld_addr)) return -EINVAL; - err = ieee80211_vif_set_links(sdata, BIT(link_id)); + err = ieee80211_vif_set_links(sdata, BIT(link_id), 0); } else { if (WARN_ON(ap_mld_addr)) return -EINVAL; ap_mld_addr = cbss->bssid; - err = ieee80211_vif_set_links(sdata, 0); + err = ieee80211_vif_set_links(sdata, 0, 0); link_id = 0; mlo = false; } @@ -6780,7 +7063,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, out_err: ieee80211_link_release_channel(&sdata->deflink); - ieee80211_vif_set_links(sdata, 0); + ieee80211_vif_set_links(sdata, 0, 0); return err; } @@ -6935,7 +7218,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, return 0; err_clear: - if (!sdata->vif.valid_links) { + if (!ieee80211_vif_is_mld(&sdata->vif)) { eth_zero_addr(sdata->deflink.u.mgd.bssid); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_BSSID); @@ -7320,10 +7603,11 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < ARRAY_SIZE(assoc_data->link); i++) { assoc_data->link[i].conn_flags = conn_flags; assoc_data->link[i].bss = req->links[i].bss; + assoc_data->link[i].disabled = req->links[i].disabled; } /* if there was no authentication, set up the link */ - err = ieee80211_vif_set_links(sdata, BIT(assoc_link_id)); + err = ieee80211_vif_set_links(sdata, BIT(assoc_link_id), 0); if (err) goto err_clear; } else { @@ -7538,8 +7822,10 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, void ieee80211_mgd_stop_link(struct ieee80211_link_data *link) { - cancel_work_sync(&link->u.mgd.request_smps_work); - cancel_work_sync(&link->u.mgd.chswitch_work); + wiphy_work_cancel(link->sdata->local->hw.wiphy, + &link->u.mgd.request_smps_work); + wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy, + &link->u.mgd.chswitch_work); } void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) @@ -7552,9 +7838,13 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) * cancelled when disconnecting. */ cancel_work_sync(&ifmgd->monitor_work); - cancel_work_sync(&ifmgd->beacon_connection_loss_work); - cancel_work_sync(&ifmgd->csa_connection_drop_work); + wiphy_work_cancel(sdata->local->hw.wiphy, + &ifmgd->beacon_connection_loss_work); + wiphy_work_cancel(sdata->local->hw.wiphy, + &ifmgd->csa_connection_drop_work); cancel_delayed_work_sync(&ifmgd->tdls_peer_del_work); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &ifmgd->ml_reconf_work); sdata_lock(sdata); if (ifmgd->assoc_data) diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index a57dcbe99a0d..b44896e14522 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -4,7 +4,7 @@ * * Copyright: (c) 2014 Czech Technical University in Prague * (c) 2014 Volkswagen Group Research - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022 - 2023 Intel Corporation * Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz> * Funded by: Volkswagen Group Research */ @@ -81,7 +81,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, spin_lock(&ifocb->incomplete_lock); list_add(&sta->list, &ifocb->incomplete_stations); spin_unlock(&ifocb->incomplete_lock); - ieee80211_queue_work(&local->hw, &sdata->work); + wiphy_work_queue(local->hw.wiphy, &sdata->work); } static struct sta_info *ieee80211_ocb_finish_sta(struct sta_info *sta) @@ -157,7 +157,7 @@ static void ieee80211_ocb_housekeeping_timer(struct timer_list *t) set_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags); - ieee80211_queue_work(&local->hw, &sdata->work); + wiphy_work_queue(local->hw.wiphy, &sdata->work); } void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata) @@ -175,7 +175,7 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; - u32 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID; + u64 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID; int err; if (ifocb->joined == true) @@ -197,7 +197,7 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata, ifocb->joined = true; set_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags); - ieee80211_queue_work(&local->hw, &sdata->work); + wiphy_work_queue(local->hw.wiphy, &sdata->work); netif_carrier_on(sdata->dev); return 0; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index d78c82d6b696..cdf991e74ab9 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -8,7 +8,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2019, 2022 Intel Corporation + * Copyright (C) 2019, 2022-2023 Intel Corporation */ #include <linux/export.h> #include <net/mac80211.h> @@ -1014,7 +1014,7 @@ void ieee80211_roc_purge(struct ieee80211_local *local, if (roc->started) { if (local->ops->remain_on_channel) { /* can race, so ignore return value */ - drv_cancel_remain_on_channel(local, sdata); + drv_cancel_remain_on_channel(local, roc->sdata); ieee80211_roc_notify_destroy(roc); } else { roc->abort = true; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d996aa2579df..4f707d2a160f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #include <linux/jiffies.h> @@ -229,7 +229,7 @@ static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, } skb_queue_tail(&sdata->skb_queue, skb); - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); if (sta) sta->deflink.rx_stats.packets++; } @@ -1732,7 +1732,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { link_sta->rx_stats.last_rx = jiffies; - if (ieee80211_is_data(hdr->frame_control) && + if (ieee80211_is_data_present(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) link_sta->rx_stats.last_rate = sta_stats_encode_rate(status); @@ -1746,7 +1746,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * match the current local configuration when processed. */ link_sta->rx_stats.last_rx = jiffies; - if (ieee80211_is_data(hdr->frame_control)) + if (ieee80211_is_data_present(hdr->frame_control)) link_sta->rx_stats.last_rate = sta_stats_encode_rate(status); } @@ -2110,7 +2110,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* either the frame has been decrypted or will be dropped */ status->flag |= RX_FLAG_DECRYPTED; - if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE && + if (unlikely(ieee80211_is_beacon(fc) && (result & RX_DROP_UNUSABLE) && rx->sdata->dev)) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, skb->data, skb->len); @@ -2405,9 +2405,9 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - __le16 fc = hdr->frame_control; + struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; + __le16 fc = mgmt->frame_control; /* * Pass through unencrypted frames if the hardware has @@ -2416,15 +2416,27 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_DECRYPTED) return 0; + /* drop unicast protected dual (that wasn't protected) */ + if (ieee80211_is_action(fc) && + mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) + return -EACCES; + if (rx->sta && test_sta_flag(rx->sta, WLAN_STA_MFP)) { if (unlikely(!ieee80211_has_protected(fc) && - ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && - rx->key)) { + ieee80211_is_unicast_robust_mgmt_frame(rx->skb))) { if (ieee80211_is_deauth(fc) || - ieee80211_is_disassoc(fc)) + ieee80211_is_disassoc(fc)) { + /* + * Permit unprotected deauth/disassoc frames + * during 4-way-HS (key is installed after HS). + */ + if (!rx->key) + return 0; + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, rx->skb->data, rx->skb->len); + } return -EACCES; } /* BIP does not use Protected field, so need to check MMIE */ @@ -2451,6 +2463,12 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) if (unlikely(ieee80211_is_action(fc) && !rx->key && ieee80211_is_robust_mgmt_frame(rx->skb))) return -EACCES; + + /* drop unicast public action frames when using MPF */ + if (is_unicast_ether_addr(mgmt->da) && + ieee80211_is_public_action((void *)rx->skb->data, + rx->skb->len)) + return -EACCES; } return 0; @@ -2505,7 +2523,7 @@ bool ieee80211_is_our_addr(struct ieee80211_sub_if_data *sdata, if (ether_addr_equal(sdata->vif.addr, addr)) return true; - if (!sdata->vif.valid_links) + if (!ieee80211_vif_is_mld(&sdata->vif)) return false; for (link_id = 0; link_id < ARRAY_SIZE(sdata->vif.link_conf); link_id++) { @@ -3356,6 +3374,11 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) if (!ieee80211_is_mgmt(mgmt->frame_control)) return RX_DROP_MONITOR; + /* drop too small action frames */ + if (ieee80211_is_action(mgmt->frame_control) && + rx->skb->len < IEEE80211_MIN_ACTION_SIZE) + return RX_DROP_UNUSABLE; + if (rx->sdata->vif.type == NL80211_IFTYPE_AP && ieee80211_is_beacon(mgmt->frame_control) && !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) { @@ -3445,10 +3468,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (!ieee80211_is_action(mgmt->frame_control)) return RX_CONTINUE; - /* drop too small frames */ - if (len < IEEE80211_MIN_ACTION_SIZE) - return RX_DROP_UNUSABLE; - if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC && mgmt->u.action.category != WLAN_CATEGORY_SELF_PROTECTED && mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 32fa8aca7005..0805aa8603c6 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #include <linux/if_arp.h> @@ -55,27 +55,45 @@ static bool is_uapsd_supported(struct ieee802_11_elems *elems) return qos_info & IEEE80211_WMM_IE_AP_QOSINFO_UAPSD; } -static void -ieee80211_update_bss_from_elems(struct ieee80211_local *local, - struct ieee80211_bss *bss, - struct ieee802_11_elems *elems, - struct ieee80211_rx_status *rx_status, - bool beacon) +struct inform_bss_update_data { + struct ieee80211_rx_status *rx_status; + bool beacon; +}; + +void ieee80211_inform_bss(struct wiphy *wiphy, + struct cfg80211_bss *cbss, + const struct cfg80211_bss_ies *ies, + void *data) { + struct ieee80211_local *local = wiphy_priv(wiphy); + struct inform_bss_update_data *update_data = data; + struct ieee80211_bss *bss = (void *)cbss->priv; + struct ieee80211_rx_status *rx_status; + struct ieee802_11_elems *elems; int clen, srlen; - if (beacon) + /* This happens while joining an IBSS */ + if (!update_data) + return; + + elems = ieee802_11_parse_elems(ies->data, ies->len, false, NULL); + if (!elems) + return; + + rx_status = update_data->rx_status; + + if (update_data->beacon) bss->device_ts_beacon = rx_status->device_timestamp; else bss->device_ts_presp = rx_status->device_timestamp; if (elems->parse_error) { - if (beacon) + if (update_data->beacon) bss->corrupt_data |= IEEE80211_BSS_CORRUPT_BEACON; else bss->corrupt_data |= IEEE80211_BSS_CORRUPT_PROBE_RESP; } else { - if (beacon) + if (update_data->beacon) bss->corrupt_data &= ~IEEE80211_BSS_CORRUPT_BEACON; else bss->corrupt_data &= ~IEEE80211_BSS_CORRUPT_PROBE_RESP; @@ -124,7 +142,7 @@ ieee80211_update_bss_from_elems(struct ieee80211_local *local, bss->valid_data |= IEEE80211_BSS_VALID_WMM; } - if (beacon) { + if (update_data->beacon) { struct ieee80211_supported_band *sband = local->hw.wiphy->bands[rx_status->band]; if (!(rx_status->encoding == RX_ENC_HT) && @@ -138,6 +156,8 @@ ieee80211_update_bss_from_elems(struct ieee80211_local *local, le32_to_cpu(elems->vht_cap_elem->vht_cap_info); else bss->vht_cap_info = 0; + + kfree(elems); } struct ieee80211_bss * @@ -148,16 +168,17 @@ ieee80211_bss_info_update(struct ieee80211_local *local, { bool beacon = ieee80211_is_beacon(mgmt->frame_control) || ieee80211_is_s1g_beacon(mgmt->frame_control); - struct cfg80211_bss *cbss, *non_tx_cbss; - struct ieee80211_bss *bss, *non_tx_bss; + struct cfg80211_bss *cbss; + struct inform_bss_update_data update_data = { + .rx_status = rx_status, + .beacon = beacon, + }; struct cfg80211_inform_bss bss_meta = { .boottime_ns = rx_status->boottime_ns, + .drv_data = (void *)&update_data, }; bool signal_valid; struct ieee80211_sub_if_data *scan_sdata; - struct ieee802_11_elems *elems; - size_t baselen; - u8 *elements; if (rx_status->flag & RX_FLAG_NO_SIGNAL_VAL) bss_meta.signal = 0; /* invalid signal indication */ @@ -192,50 +213,12 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (!cbss) return NULL; - if (ieee80211_is_probe_resp(mgmt->frame_control)) { - elements = mgmt->u.probe_resp.variable; - baselen = offsetof(struct ieee80211_mgmt, - u.probe_resp.variable); - } else if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - struct ieee80211_ext *ext = (void *) mgmt; - - baselen = offsetof(struct ieee80211_ext, u.s1g_beacon.variable); - elements = ext->u.s1g_beacon.variable; - } else { - baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable); - elements = mgmt->u.beacon.variable; - } - - if (baselen > len) - return NULL; - - elems = ieee802_11_parse_elems(elements, len - baselen, false, cbss); - if (!elems) - return NULL; - /* In case the signal is invalid update the status */ signal_valid = channel == cbss->channel; if (!signal_valid) rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; - bss = (void *)cbss->priv; - ieee80211_update_bss_from_elems(local, bss, elems, rx_status, beacon); - kfree(elems); - - list_for_each_entry(non_tx_cbss, &cbss->nontrans_list, nontrans_list) { - non_tx_bss = (void *)non_tx_cbss->priv; - - elems = ieee802_11_parse_elems(elements, len - baselen, false, - non_tx_cbss); - if (!elems) - continue; - - ieee80211_update_bss_from_elems(local, non_tx_bss, elems, - rx_status, beacon); - kfree(elems); - } - - return bss; + return (void *)cbss->priv; } static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata, @@ -502,7 +485,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) */ list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (ieee80211_sdata_running(sdata)) - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } if (was_scanning) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1400512e0dde..7751f8ba960e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #include <linux/module.h> @@ -355,8 +355,9 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id, struct sta_link_alloc *alloc = NULL; struct link_sta_info *link_sta; - link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&sta->local->sta_mtx)); + link_sta = rcu_access_pointer(sta->link[link_id]); + if (link_sta != &sta->deflink) + lockdep_assert_held(&sta->local->sta_mtx); if (WARN_ON(!link_sta)) return; @@ -1274,7 +1275,117 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) return 0; } -static void __sta_info_destroy_part2(struct sta_info *sta) +static int _sta_info_move_state(struct sta_info *sta, + enum ieee80211_sta_state new_state, + bool recalc) +{ + might_sleep(); + + if (sta->sta_state == new_state) + return 0; + + /* check allowed transitions first */ + + switch (new_state) { + case IEEE80211_STA_NONE: + if (sta->sta_state != IEEE80211_STA_AUTH) + return -EINVAL; + break; + case IEEE80211_STA_AUTH: + if (sta->sta_state != IEEE80211_STA_NONE && + sta->sta_state != IEEE80211_STA_ASSOC) + return -EINVAL; + break; + case IEEE80211_STA_ASSOC: + if (sta->sta_state != IEEE80211_STA_AUTH && + sta->sta_state != IEEE80211_STA_AUTHORIZED) + return -EINVAL; + break; + case IEEE80211_STA_AUTHORIZED: + if (sta->sta_state != IEEE80211_STA_ASSOC) + return -EINVAL; + break; + default: + WARN(1, "invalid state %d", new_state); + return -EINVAL; + } + + sta_dbg(sta->sdata, "moving STA %pM to state %d\n", + sta->sta.addr, new_state); + + /* notify the driver before the actual changes so it can + * fail the transition + */ + if (test_sta_flag(sta, WLAN_STA_INSERTED)) { + int err = drv_sta_state(sta->local, sta->sdata, sta, + sta->sta_state, new_state); + if (err) + return err; + } + + /* reflect the change in all state variables */ + + switch (new_state) { + case IEEE80211_STA_NONE: + if (sta->sta_state == IEEE80211_STA_AUTH) + clear_bit(WLAN_STA_AUTH, &sta->_flags); + break; + case IEEE80211_STA_AUTH: + if (sta->sta_state == IEEE80211_STA_NONE) { + set_bit(WLAN_STA_AUTH, &sta->_flags); + } else if (sta->sta_state == IEEE80211_STA_ASSOC) { + clear_bit(WLAN_STA_ASSOC, &sta->_flags); + if (recalc) { + ieee80211_recalc_min_chandef(sta->sdata, -1); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); + } + } + break; + case IEEE80211_STA_ASSOC: + if (sta->sta_state == IEEE80211_STA_AUTH) { + set_bit(WLAN_STA_ASSOC, &sta->_flags); + sta->assoc_at = ktime_get_boottime_ns(); + if (recalc) { + ieee80211_recalc_min_chandef(sta->sdata, -1); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); + } + } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { + ieee80211_vif_dec_num_mcast(sta->sdata); + clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + ieee80211_clear_fast_xmit(sta); + ieee80211_clear_fast_rx(sta); + } + break; + case IEEE80211_STA_AUTHORIZED: + if (sta->sta_state == IEEE80211_STA_ASSOC) { + ieee80211_vif_inc_num_mcast(sta->sdata); + set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + ieee80211_check_fast_xmit(sta); + ieee80211_check_fast_rx(sta); + } + if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sta->sdata->vif.type == NL80211_IFTYPE_AP) + cfg80211_send_layer2_update(sta->sdata->dev, + sta->sta.addr); + break; + default: + break; + } + + sta->sta_state = new_state; + + return 0; +} + +int sta_info_move_state(struct sta_info *sta, + enum ieee80211_sta_state new_state) +{ + return _sta_info_move_state(sta, new_state, true); +} + +static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -1290,7 +1401,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta) lockdep_assert_held(&local->sta_mtx); if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + ret = _sta_info_move_state(sta, IEEE80211_STA_ASSOC, recalc); WARN_ON_ONCE(ret); } @@ -1318,7 +1429,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta) local->sta_generation++; while (sta->sta_state > IEEE80211_STA_NONE) { - ret = sta_info_move_state(sta, sta->sta_state - 1); + ret = _sta_info_move_state(sta, sta->sta_state - 1, recalc); if (ret) { WARN_ON_ONCE(1); break; @@ -1355,7 +1466,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) synchronize_net(); - __sta_info_destroy_part2(sta); + __sta_info_destroy_part2(sta, true); return 0; } @@ -1462,9 +1573,18 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) } if (!list_empty(&free_list)) { + bool support_p2p_ps = true; + synchronize_net(); - list_for_each_entry_safe(sta, tmp, &free_list, free_list) - __sta_info_destroy_part2(sta); + list_for_each_entry_safe(sta, tmp, &free_list, free_list) { + if (!sta->sta.support_p2p_ps) + support_p2p_ps = false; + __sta_info_destroy_part2(sta, false); + } + + ieee80211_recalc_min_chandef(sdata, -1); + if (!support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sdata); } mutex_unlock(&local->sta_mtx); @@ -2252,106 +2372,6 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, } } -int sta_info_move_state(struct sta_info *sta, - enum ieee80211_sta_state new_state) -{ - might_sleep(); - - if (sta->sta_state == new_state) - return 0; - - /* check allowed transitions first */ - - switch (new_state) { - case IEEE80211_STA_NONE: - if (sta->sta_state != IEEE80211_STA_AUTH) - return -EINVAL; - break; - case IEEE80211_STA_AUTH: - if (sta->sta_state != IEEE80211_STA_NONE && - sta->sta_state != IEEE80211_STA_ASSOC) - return -EINVAL; - break; - case IEEE80211_STA_ASSOC: - if (sta->sta_state != IEEE80211_STA_AUTH && - sta->sta_state != IEEE80211_STA_AUTHORIZED) - return -EINVAL; - break; - case IEEE80211_STA_AUTHORIZED: - if (sta->sta_state != IEEE80211_STA_ASSOC) - return -EINVAL; - break; - default: - WARN(1, "invalid state %d", new_state); - return -EINVAL; - } - - sta_dbg(sta->sdata, "moving STA %pM to state %d\n", - sta->sta.addr, new_state); - - /* - * notify the driver before the actual changes so it can - * fail the transition - */ - if (test_sta_flag(sta, WLAN_STA_INSERTED)) { - int err = drv_sta_state(sta->local, sta->sdata, sta, - sta->sta_state, new_state); - if (err) - return err; - } - - /* reflect the change in all state variables */ - - switch (new_state) { - case IEEE80211_STA_NONE: - if (sta->sta_state == IEEE80211_STA_AUTH) - clear_bit(WLAN_STA_AUTH, &sta->_flags); - break; - case IEEE80211_STA_AUTH: - if (sta->sta_state == IEEE80211_STA_NONE) { - set_bit(WLAN_STA_AUTH, &sta->_flags); - } else if (sta->sta_state == IEEE80211_STA_ASSOC) { - clear_bit(WLAN_STA_ASSOC, &sta->_flags); - ieee80211_recalc_min_chandef(sta->sdata, -1); - if (!sta->sta.support_p2p_ps) - ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); - } - break; - case IEEE80211_STA_ASSOC: - if (sta->sta_state == IEEE80211_STA_AUTH) { - set_bit(WLAN_STA_ASSOC, &sta->_flags); - sta->assoc_at = ktime_get_boottime_ns(); - ieee80211_recalc_min_chandef(sta->sdata, -1); - if (!sta->sta.support_p2p_ps) - ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); - } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { - ieee80211_vif_dec_num_mcast(sta->sdata); - clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); - ieee80211_clear_fast_xmit(sta); - ieee80211_clear_fast_rx(sta); - } - break; - case IEEE80211_STA_AUTHORIZED: - if (sta->sta_state == IEEE80211_STA_ASSOC) { - ieee80211_vif_inc_num_mcast(sta->sdata); - set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); - ieee80211_check_fast_xmit(sta); - ieee80211_check_fast_rx(sta); - } - if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sta->sdata->vif.type == NL80211_IFTYPE_AP) - cfg80211_send_layer2_update(sta->sdata->dev, - sta->sta.addr); - break; - default: - break; - } - - sta->sta_state = new_state; - - return 0; -} - static struct ieee80211_sta_rx_stats * sta_get_last_rx_stats(struct sta_info *sta) { @@ -2913,6 +2933,8 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id) if (!test_sta_flag(sta, WLAN_STA_INSERTED)) goto hash; + ieee80211_recalc_min_chandef(sdata, link_id); + /* Ensure the values are updated for the driver, * redone by sta_remove_link on failure. */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 2b13a52ce96c..44d83da60aee 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2008-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2021-2022 Intel Corporation + * Copyright 2021-2023 Intel Corporation */ #include <linux/export.h> @@ -747,8 +747,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, if (qskb) { skb_queue_tail(&sdata->status_queue, qskb); - ieee80211_queue_work(&local->hw, - &sdata->work); + wiphy_work_queue(local->hw.wiphy, + &sdata->work); } } } else { diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index b255f3b5bf01..a4af3b7675ef 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -6,7 +6,7 @@ * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH * Copyright 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2019, 2021-2022 Intel Corporation + * Copyright (C) 2019, 2021-2023 Intel Corporation */ #include <linux/ieee80211.h> @@ -39,9 +39,10 @@ void ieee80211_tdls_peer_del_work(struct work_struct *wk) mutex_unlock(&local->mtx); } -static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata, +static void ieee80211_tdls_add_ext_capab(struct ieee80211_link_data *link, struct sk_buff *skb) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; bool chan_switch = local->hw.wiphy->features & @@ -50,7 +51,7 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata, !ifmgd->tdls_wider_bw_prohibited; bool buffer_sta = ieee80211_hw_check(&local->hw, SUPPORTS_TDLS_BUFFER_STA); - struct ieee80211_supported_band *sband = ieee80211_get_sband(sdata); + struct ieee80211_supported_band *sband = ieee80211_get_link_sband(link); bool vht = sband && sband->vht_cap.vht_supported; u8 *pos = skb_put(skb, 10); @@ -152,13 +153,13 @@ ieee80211_tdls_add_supp_channels(struct ieee80211_sub_if_data *sdata, *pos = 2 * subband_cnt; } -static void ieee80211_tdls_add_oper_classes(struct ieee80211_sub_if_data *sdata, +static void ieee80211_tdls_add_oper_classes(struct ieee80211_link_data *link, struct sk_buff *skb) { u8 *pos; u8 op_class; - if (!ieee80211_chandef_to_operating_class(&sdata->vif.bss_conf.chandef, + if (!ieee80211_chandef_to_operating_class(&link->conf->chandef, &op_class)) return; @@ -180,7 +181,7 @@ static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb) *pos++ = WLAN_BSS_COEX_INFORMATION_REQUEST; } -static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata, +static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_link_data *link, u16 status_code) { struct ieee80211_supported_band *sband; @@ -189,7 +190,8 @@ static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata, if (status_code != 0) return 0; - sband = ieee80211_get_sband(sdata); + sband = ieee80211_get_link_sband(link); + if (sband && sband->band == NL80211_BAND_2GHZ) { return WLAN_CAPABILITY_SHORT_SLOT_TIME | WLAN_CAPABILITY_SHORT_PREAMBLE; @@ -198,10 +200,11 @@ static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata, return 0; } -static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata, +static void ieee80211_tdls_add_link_ie(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, bool initiator) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_tdls_lnkie *lnkid; const u8 *init_addr, *rsp_addr; @@ -218,7 +221,7 @@ static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata, lnkid->ie_type = WLAN_EID_LINK_ID; lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; - memcpy(lnkid->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(lnkid->bssid, link->u.mgd.bssid, ETH_ALEN); memcpy(lnkid->init_sta, init_addr, ETH_ALEN); memcpy(lnkid->resp_sta, rsp_addr, ETH_ALEN); } @@ -359,21 +362,24 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, } static void -ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, +ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, u8 action_code, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_supported_band *sband; struct ieee80211_local *local = sdata->local; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; + const struct ieee80211_sta_he_cap *he_cap; + const struct ieee80211_sta_eht_cap *eht_cap; struct sta_info *sta = NULL; size_t offset = 0, noffset; u8 *pos; - sband = ieee80211_get_sband(sdata); - if (!sband) + sband = ieee80211_get_link_sband(link); + if (WARN_ON_ONCE(!sband)) return; ieee80211_add_srates_ie(sdata, skb, false, sband->band); @@ -397,7 +403,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, offset = noffset; } - ieee80211_tdls_add_ext_capab(sdata, skb); + ieee80211_tdls_add_ext_capab(link, skb); /* add the QoS element if we support it */ if (local->hw.queues >= IEEE80211_NUM_ACS && @@ -426,20 +432,16 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, offset = noffset; } - mutex_lock(&local->sta_mtx); - /* we should have the peer STA if we're already responding */ if (action_code == WLAN_TDLS_SETUP_RESPONSE) { sta = sta_info_get(sdata, peer); - if (WARN_ON_ONCE(!sta)) { - mutex_unlock(&local->sta_mtx); + if (WARN_ON_ONCE(!sta)) return; - } - sta->tdls_chandef = sdata->vif.bss_conf.chandef; + sta->tdls_chandef = link->conf->chandef; } - ieee80211_tdls_add_oper_classes(sdata, skb); + ieee80211_tdls_add_oper_classes(link, skb); /* * with TDLS we can switch channels, and HT-caps are not necessarily @@ -472,7 +474,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, (ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) ieee80211_tdls_add_bss_coex_ie(skb); - ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); + ieee80211_tdls_add_link_ie(link, skb, peer, initiator); /* add any custom IEs that go before VHT capabilities */ if (extra_ies_len) { @@ -497,17 +499,21 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, offset = noffset; } - /* build the VHT-cap similarly to the HT-cap */ + /* add AID if VHT, HE or EHT capabilities supported */ memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); + he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); + if ((vht_cap.vht_supported || he_cap || eht_cap) && + (action_code == WLAN_TDLS_SETUP_REQUEST || + action_code == WLAN_TDLS_SETUP_RESPONSE)) + ieee80211_tdls_add_aid(sdata, skb); + + /* build the VHT-cap similarly to the HT-cap */ if ((action_code == WLAN_TDLS_SETUP_REQUEST || action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) && vht_cap.vht_supported) { ieee80211_apply_vhtcap_overrides(sdata, &vht_cap); - /* the AID is present only when VHT is implemented */ - if (action_code == WLAN_TDLS_SETUP_REQUEST) - ieee80211_tdls_add_aid(sdata, skb); - pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap); } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && @@ -515,9 +521,6 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, /* the peer caps are already intersected with our own */ memcpy(&vht_cap, &sta->sta.deflink.vht_cap, sizeof(vht_cap)); - /* the AID is present only when VHT is implemented */ - ieee80211_tdls_add_aid(sdata, skb); - pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap); @@ -529,7 +532,80 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, ieee80211_tdls_chandef_vht_upgrade(sdata, sta); } - mutex_unlock(&local->sta_mtx); + /* add any custom IEs that go before HE capabilities */ + if (extra_ies_len) { + static const u8 before_he_cap[] = { + WLAN_EID_EXTENSION, + WLAN_EID_EXT_FILS_REQ_PARAMS, + WLAN_EID_AP_CSN, + }; + noffset = ieee80211_ie_split(extra_ies, extra_ies_len, + before_he_cap, + ARRAY_SIZE(before_he_cap), + offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); + offset = noffset; + } + + /* build the HE-cap from sband */ + if (he_cap && + (action_code == WLAN_TDLS_SETUP_REQUEST || + action_code == WLAN_TDLS_SETUP_RESPONSE || + action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)) { + __le16 he_6ghz_capa; + u8 cap_size; + + cap_size = + 2 + 1 + sizeof(he_cap->he_cap_elem) + + ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) + + ieee80211_he_ppe_size(he_cap->ppe_thres[0], + he_cap->he_cap_elem.phy_cap_info); + pos = skb_put(skb, cap_size); + pos = ieee80211_ie_build_he_cap(0, pos, he_cap, pos + cap_size); + + /* Build HE 6Ghz capa IE from sband */ + if (sband->band == NL80211_BAND_6GHZ) { + cap_size = 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa); + pos = skb_put(skb, cap_size); + he_6ghz_capa = + ieee80211_get_he_6ghz_capa_vif(sband, &sdata->vif); + pos = ieee80211_write_he_6ghz_cap(pos, he_6ghz_capa, + pos + cap_size); + } + } + + /* add any custom IEs that go before EHT capabilities */ + if (extra_ies_len) { + static const u8 before_he_cap[] = { + WLAN_EID_EXTENSION, + WLAN_EID_EXT_FILS_REQ_PARAMS, + WLAN_EID_AP_CSN, + }; + + noffset = ieee80211_ie_split(extra_ies, extra_ies_len, + before_he_cap, + ARRAY_SIZE(before_he_cap), + offset); + skb_put_data(skb, extra_ies + offset, noffset - offset); + offset = noffset; + } + + /* build the EHT-cap from sband */ + if (he_cap && eht_cap && + (action_code == WLAN_TDLS_SETUP_REQUEST || + action_code == WLAN_TDLS_SETUP_RESPONSE || + action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)) { + u8 cap_size; + + cap_size = + 2 + 1 + sizeof(eht_cap->eht_cap_elem) + + ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem, + &eht_cap->eht_cap_elem, false) + + ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0], + eht_cap->eht_cap_elem.phy_cap_info); + pos = skb_put(skb, cap_size); + ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + cap_size, false); + } /* add any remaining IEs */ if (extra_ies_len) { @@ -540,31 +616,29 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, } static void -ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, +ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; size_t offset = 0, noffset; struct sta_info *sta, *ap_sta; struct ieee80211_supported_band *sband; u8 *pos; - sband = ieee80211_get_sband(sdata); - if (!sband) + sband = ieee80211_get_link_sband(link); + if (WARN_ON_ONCE(!sband)) return; - mutex_lock(&local->sta_mtx); - sta = sta_info_get(sdata, peer); - ap_sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid); - if (WARN_ON_ONCE(!sta || !ap_sta)) { - mutex_unlock(&local->sta_mtx); + ap_sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); + + if (WARN_ON_ONCE(!sta || !ap_sta)) return; - } - sta->tdls_chandef = sdata->vif.bss_conf.chandef; + sta->tdls_chandef = link->conf->chandef; /* add any custom IEs that go before the QoS IE */ if (extra_ies_len) { @@ -610,11 +684,11 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); ieee80211_ie_build_ht_oper(pos, &sta->sta.deflink.ht_cap, - &sdata->vif.bss_conf.chandef, prot, + &link->conf->chandef, prot, true); } - ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); + ieee80211_tdls_add_link_ie(link, skb, peer, initiator); /* only include VHT-operation if not on the 2.4GHz band */ if (sband->band != NL80211_BAND_2GHZ && @@ -631,8 +705,6 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, &sta->tdls_chandef); } - mutex_unlock(&local->sta_mtx); - /* add any remaining IEs */ if (extra_ies_len) { noffset = extra_ies_len; @@ -641,7 +713,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, } static void -ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_sub_if_data *sdata, +ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, bool initiator, const u8 *extra_ies, size_t extra_ies_len, u8 oper_class, @@ -670,7 +742,7 @@ ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_sub_if_data *sdata, offset = noffset; } - ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); + ieee80211_tdls_add_link_ie(link, skb, peer, initiator); /* add any remaining IEs */ if (extra_ies_len) { @@ -680,20 +752,20 @@ ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_sub_if_data *sdata, } static void -ieee80211_tdls_add_chan_switch_resp_ies(struct ieee80211_sub_if_data *sdata, +ieee80211_tdls_add_chan_switch_resp_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, u16 status_code, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { if (status_code == 0) - ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); + ieee80211_tdls_add_link_ie(link, skb, peer, initiator); if (extra_ies_len) skb_put_data(skb, extra_ies, extra_ies_len); } -static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata, +static void ieee80211_tdls_add_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, u8 action_code, u16 status_code, bool initiator, const u8 *extra_ies, @@ -705,7 +777,8 @@ static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata, case WLAN_TDLS_SETUP_RESPONSE: case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: if (status_code == 0) - ieee80211_tdls_add_setup_start_ies(sdata, skb, peer, + ieee80211_tdls_add_setup_start_ies(link, + skb, peer, action_code, initiator, extra_ies, @@ -713,7 +786,7 @@ static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata, break; case WLAN_TDLS_SETUP_CONFIRM: if (status_code == 0) - ieee80211_tdls_add_setup_cfm_ies(sdata, skb, peer, + ieee80211_tdls_add_setup_cfm_ies(link, skb, peer, initiator, extra_ies, extra_ies_len); break; @@ -722,16 +795,17 @@ static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata, if (extra_ies_len) skb_put_data(skb, extra_ies, extra_ies_len); if (status_code == 0 || action_code == WLAN_TDLS_TEARDOWN) - ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); + ieee80211_tdls_add_link_ie(link, skb, + peer, initiator); break; case WLAN_TDLS_CHANNEL_SWITCH_REQUEST: - ieee80211_tdls_add_chan_switch_req_ies(sdata, skb, peer, + ieee80211_tdls_add_chan_switch_req_ies(link, skb, peer, initiator, extra_ies, extra_ies_len, oper_class, chandef); break; case WLAN_TDLS_CHANNEL_SWITCH_RESPONSE: - ieee80211_tdls_add_chan_switch_resp_ies(sdata, skb, peer, + ieee80211_tdls_add_chan_switch_resp_ies(link, skb, peer, status_code, initiator, extra_ies, extra_ies_len); @@ -742,6 +816,7 @@ static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata, static int ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, + struct ieee80211_link_data *link, const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, struct sk_buff *skb) { @@ -766,7 +841,7 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, skb_put(skb, sizeof(tf->u.setup_req)); tf->u.setup_req.dialog_token = dialog_token; tf->u.setup_req.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata, + cpu_to_le16(ieee80211_get_tdls_sta_capab(link, status_code)); break; case WLAN_TDLS_SETUP_RESPONSE: @@ -777,7 +852,7 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, tf->u.setup_resp.status_code = cpu_to_le16(status_code); tf->u.setup_resp.dialog_token = dialog_token; tf->u.setup_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata, + cpu_to_le16(ieee80211_get_tdls_sta_capab(link, status_code)); break; case WLAN_TDLS_SETUP_CONFIRM: @@ -824,7 +899,8 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, struct ieee80211_link_data *link, + u8 action_code, u8 dialog_token, u16 status_code, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -833,8 +909,7 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, mgmt = skb_put_zero(skb, 24); memcpy(mgmt->da, peer, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - memcpy(mgmt->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN); - + memcpy(mgmt->bssid, link->u.mgd.bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); @@ -847,7 +922,7 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, mgmt->u.action.u.tdls_discover_resp.dialog_token = dialog_token; mgmt->u.action.u.tdls_discover_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata, + cpu_to_le16(ieee80211_get_tdls_sta_capab(link, status_code)); break; default: @@ -859,15 +934,23 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, static struct sk_buff * ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata, - const u8 *peer, u8 action_code, - u8 dialog_token, u16 status_code, - bool initiator, const u8 *extra_ies, - size_t extra_ies_len, u8 oper_class, + const u8 *peer, int link_id, + u8 action_code, u8 dialog_token, + u16 status_code, bool initiator, + const u8 *extra_ies, size_t extra_ies_len, + u8 oper_class, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; int ret; + struct ieee80211_link_data *link; + + link_id = link_id >= 0 ? link_id : 0; + rcu_read_lock(); + link = rcu_dereference(sdata->link[link_id]); + if (WARN_ON(!link)) + goto unlock; skb = netdev_alloc_skb(sdata->dev, local->hw.extra_tx_headroom + @@ -880,6 +963,13 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata, sizeof(struct ieee80211_ht_operation)) + 2 + max(sizeof(struct ieee80211_vht_cap), sizeof(struct ieee80211_vht_operation)) + + 2 + 1 + sizeof(struct ieee80211_he_cap_elem) + + sizeof(struct ieee80211_he_mcs_nss_supp) + + IEEE80211_HE_PPE_THRES_MAX_LEN + + 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) + + 2 + 1 + sizeof(struct ieee80211_eht_cap_elem) + + sizeof(struct ieee80211_eht_mcs_nss_supp) + + IEEE80211_EHT_PPE_THRES_MAX_LEN + 50 + /* supported channels */ 3 + /* 40/20 BSS coex */ 4 + /* AID */ @@ -887,7 +977,7 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata, extra_ies_len + sizeof(struct ieee80211_tdls_lnkie)); if (!skb) - return NULL; + goto unlock; skb_reserve(skb, local->hw.extra_tx_headroom); @@ -900,13 +990,13 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata, case WLAN_TDLS_CHANNEL_SWITCH_REQUEST: case WLAN_TDLS_CHANNEL_SWITCH_RESPONSE: ret = ieee80211_prep_tdls_encap_data(local->hw.wiphy, - sdata->dev, peer, + sdata->dev, link, peer, action_code, dialog_token, status_code, skb); break; case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: ret = ieee80211_prep_tdls_direct(local->hw.wiphy, sdata->dev, - peer, action_code, + peer, link, action_code, dialog_token, status_code, skb); break; @@ -918,19 +1008,23 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata, if (ret < 0) goto fail; - ieee80211_tdls_add_ies(sdata, skb, peer, action_code, status_code, + ieee80211_tdls_add_ies(link, skb, peer, action_code, status_code, initiator, extra_ies, extra_ies_len, oper_class, chandef); + rcu_read_unlock(); return skb; fail: dev_kfree_skb(skb); +unlock: + rcu_read_unlock(); return NULL; } static int ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, int link_id, + u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, bool initiator, const u8 *extra_ies, size_t extra_ies_len, u8 oper_class, @@ -988,7 +1082,8 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, if (ret < 0) goto fail; - skb = ieee80211_tdls_build_mgmt_packet_data(sdata, peer, action_code, + skb = ieee80211_tdls_build_mgmt_packet_data(sdata, peer, + link_id, action_code, dialog_token, status_code, initiator, extra_ies, extra_ies_len, oper_class, @@ -999,7 +1094,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, } if (action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) { - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, 7, link_id); return 0; } @@ -1066,7 +1161,8 @@ fail: static int ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, int link_id, + u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { @@ -1115,7 +1211,8 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, mutex_unlock(&local->mtx); /* we cannot take the mutex while preparing the setup packet */ - ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code, + ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, + link_id, action_code, dialog_token, status_code, peer_capability, initiator, extra_ies, extra_ies_len, 0, @@ -1139,7 +1236,8 @@ out_unlock: static int ieee80211_tdls_mgmt_teardown(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, int link_id, + u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, bool initiator, const u8 *extra_ies, size_t extra_ies_len) @@ -1159,7 +1257,8 @@ ieee80211_tdls_mgmt_teardown(struct wiphy *wiphy, struct net_device *dev, IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN); ieee80211_flush_queues(local, sdata, false); - ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code, + ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, + link_id, action_code, dialog_token, status_code, peer_capability, initiator, extra_ies, extra_ies_len, 0, @@ -1185,10 +1284,10 @@ ieee80211_tdls_mgmt_teardown(struct wiphy *wiphy, struct net_device *dev, } int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, u32 peer_capability, - bool initiator, const u8 *extra_ies, - size_t extra_ies_len) + const u8 *peer, int link_id, + u8 action_code, u8 dialog_token, u16 status_code, + u32 peer_capability, bool initiator, + const u8 *extra_ies, size_t extra_ies_len) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int ret; @@ -1204,13 +1303,14 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: - ret = ieee80211_tdls_mgmt_setup(wiphy, dev, peer, action_code, + ret = ieee80211_tdls_mgmt_setup(wiphy, dev, peer, + link_id, action_code, dialog_token, status_code, peer_capability, initiator, extra_ies, extra_ies_len); break; case WLAN_TDLS_TEARDOWN: - ret = ieee80211_tdls_mgmt_teardown(wiphy, dev, peer, + ret = ieee80211_tdls_mgmt_teardown(wiphy, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, @@ -1228,7 +1328,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: /* no special handling */ ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, - action_code, + link_id, action_code, dialog_token, status_code, peer_capability, @@ -1240,8 +1340,8 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, break; } - tdls_dbg(sdata, "TDLS mgmt action %d peer %pM status %d\n", - action_code, peer, ret); + tdls_dbg(sdata, "TDLS mgmt action %d peer %pM link_id %d status %d\n", + action_code, peer, link_id, ret); return ret; } @@ -1431,8 +1531,8 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, } if (ret == 0) - ieee80211_queue_work(&sdata->local->hw, - &sdata->deflink.u.mgd.request_smps_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &sdata->deflink.u.mgd.request_smps_work); mutex_unlock(&local->mtx); sdata_unlock(sdata); @@ -1497,6 +1597,7 @@ ieee80211_tdls_ch_sw_tmpl_get(struct sta_info *sta, u8 oper_class, int extra_ies_len = 2 + sizeof(struct ieee80211_ch_switch_timing); u8 *pos = extra_ies; struct sk_buff *skb; + int link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0; /* * if chandef points to a wide channel add a Secondary-Channel @@ -1524,6 +1625,7 @@ ieee80211_tdls_ch_sw_tmpl_get(struct sta_info *sta, u8 oper_class, iee80211_tdls_add_ch_switch_timing(pos, 0, 0); skb = ieee80211_tdls_build_mgmt_packet_data(sdata, sta->sta.addr, + link_id, WLAN_TDLS_CHANNEL_SWITCH_REQUEST, 0, 0, !sta->sta.tdls_initiator, extra_ies, extra_ies_len, @@ -1644,11 +1746,13 @@ ieee80211_tdls_ch_sw_resp_tmpl_get(struct sta_info *sta, struct ieee80211_sub_if_data *sdata = sta->sdata; struct sk_buff *skb; u8 extra_ies[2 + sizeof(struct ieee80211_ch_switch_timing)]; + int link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0; /* initial timing are always zero in the template */ iee80211_tdls_add_ch_switch_timing(extra_ies, 0, 0); skb = ieee80211_tdls_build_mgmt_packet_data(sdata, sta->sta.addr, + link_id, WLAN_TDLS_CHANNEL_SWITCH_RESPONSE, 0, 0, !sta->sta.tdls_initiator, extra_ies, sizeof(extra_ies), 0, NULL); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index db0d0132c58c..b8c53b4a710b 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2022 Intel Corporation + * Copyright (C) 2018 - 2023 Intel Corporation */ #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -17,7 +17,7 @@ #define MAXNAME 32 #define LOCAL_ENTRY __array(char, wiphy_name, 32) -#define LOCAL_ASSIGN strlcpy(__entry->wiphy_name, wiphy_name(local->hw.wiphy), MAXNAME) +#define LOCAL_ASSIGN strscpy(__entry->wiphy_name, wiphy_name(local->hw.wiphy), MAXNAME) #define LOCAL_PR_FMT "%s" #define LOCAL_PR_ARG __entry->wiphy_name @@ -634,6 +634,7 @@ TRACE_EVENT(drv_set_key, LOCAL_ENTRY VIF_ENTRY STA_ENTRY + __field(u32, cmd) KEY_ENTRY ), @@ -641,12 +642,13 @@ TRACE_EVENT(drv_set_key, LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; + __entry->cmd = cmd; KEY_ASSIGN(key); ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT KEY_PR_FMT, - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, KEY_PR_ARG + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " cmd: %d" KEY_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->cmd, KEY_PR_ARG ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 13b522dab0a3..7fe7280e8437 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -26,6 +26,7 @@ #include <net/codel_impl.h> #include <asm/unaligned.h> #include <net/fq_impl.h> +#include <net/gso.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -581,25 +582,9 @@ ieee80211_select_link_key(struct ieee80211_tx_data *tx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - enum { - USE_NONE, - USE_MGMT_KEY, - USE_MCAST_KEY, - } which_key = USE_NONE; struct ieee80211_link_data *link; unsigned int link_id; - if (ieee80211_is_group_privacy_action(tx->skb)) - which_key = USE_MCAST_KEY; - else if (ieee80211_is_mgmt(hdr->frame_control) && - is_multicast_ether_addr(hdr->addr1) && - ieee80211_is_robust_mgmt_frame(tx->skb)) - which_key = USE_MGMT_KEY; - else if (is_multicast_ether_addr(hdr->addr1)) - which_key = USE_MCAST_KEY; - else - return NULL; - link_id = u32_get_bits(info->control.flags, IEEE80211_TX_CTRL_MLO_LINK); if (link_id == IEEE80211_LINK_UNSPECIFIED) { link = &tx->sdata->deflink; @@ -609,14 +594,14 @@ ieee80211_select_link_key(struct ieee80211_tx_data *tx) return NULL; } - switch (which_key) { - case USE_NONE: - break; - case USE_MGMT_KEY: + if (ieee80211_is_group_privacy_action(tx->skb)) + return rcu_dereference(link->default_multicast_key); + else if (ieee80211_is_mgmt(hdr->frame_control) && + is_multicast_ether_addr(hdr->addr1) && + ieee80211_is_robust_mgmt_frame(tx->skb)) return rcu_dereference(link->default_mgmt_key); - case USE_MCAST_KEY: + else if (is_multicast_ether_addr(hdr->addr1)) return rcu_dereference(link->default_multicast_key); - } return NULL; } @@ -860,7 +845,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) /* SNS11 from 802.11be 10.3.2.14 */ if (unlikely(is_multicast_ether_addr(hdr->addr1) && - info->control.vif->valid_links && + ieee80211_vif_is_mld(info->control.vif) && info->control.vif->type == NL80211_IFTYPE_AP)) { if (info->control.flags & IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX) tx->sdata->mld_mcast_seq += 0x10; @@ -2626,7 +2611,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, ethertype = (skb->data[12] << 8) | skb->data[13]; fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); - if (!sdata->vif.valid_links) + if (!ieee80211_vif_is_mld(&sdata->vif)) chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); @@ -2643,7 +2628,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); wme_sta = sta->sta.wme; } - if (!sdata->vif.valid_links) { + if (!ieee80211_vif_is_mld(&sdata->vif)) { struct ieee80211_sub_if_data *ap_sdata; /* override chanctx_conf from AP (we don't have one) */ @@ -2661,7 +2646,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, /* DA BSSID SA */ memcpy(hdr.addr1, skb->data, ETH_ALEN); - if (sdata->vif.valid_links && sta && !sta->sta.mlo) { + if (ieee80211_vif_is_mld(&sdata->vif) && sta && !sta->sta.mlo) { struct ieee80211_link_data *link; link_id = sta->deflink.link_id; @@ -2769,10 +2754,20 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, tdls_peer = test_sta_flag(sta, WLAN_STA_TDLS_PEER); if (tdls_peer) { + /* For TDLS only one link can be valid with peer STA */ + int tdls_link_id = sta->sta.valid_links ? + __ffs(sta->sta.valid_links) : 0; + struct ieee80211_link_data *link; + /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, sdata->deflink.u.mgd.bssid, ETH_ALEN); + link = rcu_dereference(sdata->link[tdls_link_id]); + if (WARN_ON_ONCE(!link)) { + ret = -EINVAL; + goto free; + } + memcpy(hdr.addr3, link->u.mgd.bssid, ETH_ALEN); hdrlen = 24; } else if (sdata->u.mgd.use_4addr && cpu_to_be16(ethertype) != sdata->control_port_protocol) { @@ -2813,7 +2808,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } if (!chanctx_conf) { - if (!sdata->vif.valid_links) { + if (!ieee80211_vif_is_mld(&sdata->vif)) { ret = -ENOTCONN; goto free; } @@ -3055,7 +3050,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) !ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG)) goto out; - if (!sdata->vif.valid_links) { + if (!ieee80211_vif_is_mld(&sdata->vif)) { rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); @@ -3082,10 +3077,18 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) break; case NL80211_IFTYPE_STATION: if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + /* For TDLS only one link can be valid with peer STA */ + int tdls_link_id = sta->sta.valid_links ? + __ffs(sta->sta.valid_links) : 0; + struct ieee80211_link_data *link; + /* DA SA BSSID */ build.da_offs = offsetof(struct ieee80211_hdr, addr1); build.sa_offs = offsetof(struct ieee80211_hdr, addr2); - memcpy(hdr->addr3, sdata->deflink.u.mgd.bssid, ETH_ALEN); + link = rcu_dereference(sdata->link[tdls_link_id]); + if (WARN_ON_ONCE(!link)) + break; + memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN); build.hdr_len = 24; break; } @@ -3126,7 +3129,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ build.da_offs = offsetof(struct ieee80211_hdr, addr1); - if (sta->sta.mlo || !sdata->vif.valid_links) { + if (sta->sta.mlo || !ieee80211_vif_is_mld(&sdata->vif)) { memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); } else { unsigned int link_id = sta->deflink.link_id; @@ -4495,7 +4498,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, __ieee80211_subif_start_xmit(skb, dev, 0, IEEE80211_TX_CTRL_MLO_LINK_UNSPEC, NULL); - } else if (sdata->vif.valid_links && + } else if (ieee80211_vif_is_mld(&sdata->vif) && sdata->vif.type == NL80211_IFTYPE_AP && !ieee80211_hw_check(&sdata->local->hw, MLO_MCAST_MULTI_LINK_TX)) { ieee80211_mlo_multicast_tx(dev, skb); @@ -4771,7 +4774,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, if (info->control.flags & IEEE80211_TX_INTCFL_NEED_TXPROCESSING) { /* update band only for non-MLD */ - if (!sdata->vif.valid_links) { + if (!ieee80211_vif_is_mld(&sdata->vif)) { chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (unlikely(!chanctx_conf)) { @@ -6018,7 +6021,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, BUILD_BUG_ON(!FIELD_FIT(IEEE80211_TX_CTRL_MLO_LINK, IEEE80211_LINK_UNSPECIFIED)); - if (!sdata->vif.valid_links) { + if (!ieee80211_vif_is_mld(&sdata->vif)) { link = 0; } else if (link_id >= 0) { link = link_id; @@ -6064,7 +6067,7 @@ void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata, enum nl80211_band band; rcu_read_lock(); - if (!sdata->vif.valid_links) { + if (!ieee80211_vif_is_mld(&sdata->vif)) { WARN_ON(link_id >= 0); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3bd07a0a782f..8a6917cf63cf 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -6,7 +6,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * utilities for mac80211 */ @@ -918,6 +918,7 @@ ieee80211_parse_extension_element(u32 *crc, struct ieee80211_elems_parse_params *params) { const void *data = elem->data + 1; + bool calc_crc = false; u8 len; if (!elem->datalen) @@ -927,12 +928,9 @@ ieee80211_parse_extension_element(u32 *crc, switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: - if (len >= sizeof(*elems->mu_edca_param_set)) { + calc_crc = true; + if (len >= sizeof(*elems->mu_edca_param_set)) elems->mu_edca_param_set = data; - if (crc) - *crc = crc32_be(*crc, (void *)elem, - elem->datalen + 2); - } break; case WLAN_EID_EXT_HE_CAPABILITY: if (ieee80211_he_capa_size_ok(data, len)) { @@ -941,13 +939,10 @@ ieee80211_parse_extension_element(u32 *crc, } break; case WLAN_EID_EXT_HE_OPERATION: + calc_crc = true; if (len >= sizeof(*elems->he_operation) && - len >= ieee80211_he_oper_size(data) - 1) { - if (crc) - *crc = crc32_be(*crc, (void *)elem, - elem->datalen + 2); + len >= ieee80211_he_oper_size(data) - 1) elems->he_operation = data; - } break; case WLAN_EID_EXT_UORA: if (len >= 1) @@ -981,14 +976,36 @@ ieee80211_parse_extension_element(u32 *crc, case WLAN_EID_EXT_EHT_OPERATION: if (ieee80211_eht_oper_size_ok(data, len)) elems->eht_operation = data; + calc_crc = true; break; case WLAN_EID_EXT_EHT_MULTI_LINK: + calc_crc = true; + if (ieee80211_mle_size_ok(data, len)) { - elems->multi_link = (void *)data; - elems->multi_link_len = len; + const struct ieee80211_multi_link_elem *mle = + (void *)data; + + switch (le16_get_bits(mle->control, + IEEE80211_ML_CONTROL_TYPE)) { + case IEEE80211_ML_CONTROL_TYPE_BASIC: + elems->ml_basic_elem = (void *)elem; + elems->ml_basic = data; + elems->ml_basic_len = len; + break; + case IEEE80211_ML_CONTROL_TYPE_RECONF: + elems->ml_reconf_elem = (void *)elem; + elems->ml_reconf = data; + elems->ml_reconf_len = len; + break; + default: + break; + } } break; } + + if (crc && calc_crc) + *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2); } static u32 @@ -1458,56 +1475,11 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, return found ? profile_len : 0; } -static void ieee80211_defragment_element(struct ieee802_11_elems *elems, - void **elem_ptr, size_t *len, - size_t total_len, u8 frag_id) -{ - u8 *data = *elem_ptr, *pos, *start; - const struct element *elem; - - /* - * Since 'data' points to the data of the element, not the element - * itself, allow 254 in case it was an extended element where the - * extended ID isn't part of the data we see here and thus not part of - * 'len' either. - */ - if (!data || (*len != 254 && *len != 255)) - return; - - start = elems->scratch_pos; - - if (WARN_ON(*len > (elems->scratch + elems->scratch_len - - elems->scratch_pos))) - return; - - memcpy(elems->scratch_pos, data, *len); - elems->scratch_pos += *len; - - pos = data + *len; - total_len -= *len; - for_each_element(elem, pos, total_len) { - if (elem->id != frag_id) - break; - - if (WARN_ON(elem->datalen > - (elems->scratch + elems->scratch_len - - elems->scratch_pos))) - return; - - memcpy(elems->scratch_pos, elem->data, elem->datalen); - elems->scratch_pos += elem->datalen; - - *len += elem->datalen; - } - - *elem_ptr = start; -} - static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, u8 link_id) { - const struct ieee80211_multi_link_elem *ml = elems->multi_link; - size_t ml_len = elems->multi_link_len; + const struct ieee80211_multi_link_elem *ml = elems->ml_basic; + ssize_t ml_len = elems->ml_basic_len; const struct element *sub; if (!ml || !ml_len) @@ -1519,12 +1491,14 @@ static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, for_each_mle_subelement(sub, (u8 *)ml, ml_len) { struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; + ssize_t sta_prof_len; u16 control; if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE) continue; - if (!ieee80211_mle_sta_prof_size_ok(sub->data, sub->datalen)) + if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data, + sub->datalen)) return; control = le16_to_cpu(prof->control); @@ -1536,14 +1510,23 @@ static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE)) return; - elems->prof = prof; - elems->sta_prof_len = sub->datalen; - /* the sub element can be fragmented */ - ieee80211_defragment_element(elems, (void **)&elems->prof, - &elems->sta_prof_len, - ml_len - (sub->data - (u8 *)ml), - IEEE80211_MLE_SUBELEM_FRAGMENT); + sta_prof_len = + cfg80211_defragment_element(sub, + (u8 *)ml, ml_len, + elems->scratch_pos, + elems->scratch + + elems->scratch_len - + elems->scratch_pos, + IEEE80211_MLE_SUBELEM_FRAGMENT); + + if (sta_prof_len < 0) + return; + + elems->prof = (void *)elems->scratch_pos; + elems->sta_prof_len = sta_prof_len; + elems->scratch_pos += sta_prof_len; + return; } } @@ -1557,17 +1540,27 @@ static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, .from_ap = params->from_ap, .link_id = -1, }; + ssize_t ml_len = elems->ml_basic_len; const struct element *non_inherit = NULL; const u8 *end; if (params->link_id == -1) return; - ieee80211_defragment_element(elems, (void **)&elems->multi_link, - &elems->multi_link_len, - elems->total_len - ((u8 *)elems->multi_link - - elems->ie_start), - WLAN_EID_FRAGMENT); + ml_len = cfg80211_defragment_element(elems->ml_basic_elem, + elems->ie_start, + elems->total_len, + elems->scratch_pos, + elems->scratch + + elems->scratch_len - + elems->scratch_pos, + WLAN_EID_FRAGMENT); + + if (ml_len < 0) + return; + + elems->ml_basic = (const void *)elems->scratch_pos; + elems->ml_basic_len = ml_len; ieee80211_mle_get_sta_prof(elems, params->link_id); prof = elems->prof; @@ -1604,7 +1597,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) const struct element *non_inherit = NULL; u8 *nontransmitted_profile; int nontransmitted_profile_len = 0; - size_t scratch_len = params->scratch_len ?: 3 * params->len; + size_t scratch_len = 3 * params->len; elems = kzalloc(sizeof(*elems) + scratch_len, GFP_ATOMIC); if (!elems) @@ -1824,7 +1817,7 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - bool multi_link = sdata->vif.valid_links; + bool multi_link = ieee80211_vif_is_mld(&sdata->vif); struct { u8 id; u8 len; @@ -1918,7 +1911,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, } } -static u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end) +u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end) { if ((end - pos) < 5) return pos; @@ -2121,8 +2114,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, *offset = noffset; } - he_cap = ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); + he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); if (he_cap && cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), IEEE80211_CHAN_NO_HE)) { @@ -2131,8 +2123,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, goto out_err; } - eht_cap = ieee80211_get_eht_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); + eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); if (eht_cap && cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), @@ -2150,8 +2141,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband6; sband6 = local->hw.wiphy->bands[NL80211_BAND_6GHZ]; - he_cap = ieee80211_get_he_iftype_cap(sband6, - ieee80211_vif_type_p2p(&sdata->vif)); + he_cap = ieee80211_get_he_iftype_cap_vif(sband6, &sdata->vif); if (he_cap) { enum nl80211_iftype iftype = @@ -2373,6 +2363,7 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) local->resuming = false; local->suspended = false; local->in_reconfig = false; + local->reconfig_failure = true; ieee80211_flush_completed_scan(local, true); @@ -2475,6 +2466,35 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) return 0; } +static void ieee80211_reconfig_ap_links(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u64 changed) +{ + int link_id; + + for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { + struct ieee80211_link_data *link; + + if (!(sdata->vif.active_links & BIT(link_id))) + continue; + + link = sdata_dereference(sdata->link[link_id], sdata); + if (!link) + continue; + + if (rcu_access_pointer(link->u.ap.beacon)) + drv_start_ap(local, sdata, link->conf); + + if (!link->conf->enable_beacon) + continue; + + changed |= BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED; + + ieee80211_link_info_change_notify(sdata, link, changed); + } +} + int ieee80211_reconfig(struct ieee80211_local *local) { struct ieee80211_hw *hw = &local->hw; @@ -2624,21 +2644,55 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* Finally also reconfigure all the BSS information */ list_for_each_entry(sdata, &local->interfaces, list) { + /* common change flags for all interface types - link only */ + u64 changed = BSS_CHANGED_ERP_CTS_PROT | + BSS_CHANGED_ERP_PREAMBLE | + BSS_CHANGED_ERP_SLOT | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT | + BSS_CHANGED_BSSID | + BSS_CHANGED_CQM | + BSS_CHANGED_QOS | + BSS_CHANGED_TXPOWER | + BSS_CHANGED_MCAST_RATE; + struct ieee80211_link_data *link = NULL; unsigned int link_id; - u32 changed; + u32 active_links = 0; if (!ieee80211_sdata_running(sdata)) continue; sdata_lock(sdata); + if (ieee80211_vif_is_mld(&sdata->vif)) { + struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS] = { + [0] = &sdata->vif.bss_conf, + }; + + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + /* start with a single active link */ + active_links = sdata->vif.active_links; + link_id = ffs(active_links) - 1; + sdata->vif.active_links = BIT(link_id); + } + + drv_change_vif_links(local, sdata, 0, + sdata->vif.active_links, + old); + } + for (link_id = 0; link_id < ARRAY_SIZE(sdata->vif.link_conf); link_id++) { - struct ieee80211_link_data *link; + if (ieee80211_vif_is_mld(&sdata->vif) && + !(sdata->vif.active_links & BIT(link_id))) + continue; link = sdata_dereference(sdata->link[link_id], sdata); - if (link) - ieee80211_assign_chanctx(local, sdata, link); + if (!link) + continue; + + ieee80211_assign_chanctx(local, sdata, link); } switch (sdata->vif.type) { @@ -2658,42 +2712,42 @@ int ieee80211_reconfig(struct ieee80211_local *local) &sdata->deflink.tx_conf[i]); break; } - sdata_unlock(sdata); - - /* common change flags for all interface types */ - changed = BSS_CHANGED_ERP_CTS_PROT | - BSS_CHANGED_ERP_PREAMBLE | - BSS_CHANGED_ERP_SLOT | - BSS_CHANGED_HT | - BSS_CHANGED_BASIC_RATES | - BSS_CHANGED_BEACON_INT | - BSS_CHANGED_BSSID | - BSS_CHANGED_CQM | - BSS_CHANGED_QOS | - BSS_CHANGED_IDLE | - BSS_CHANGED_TXPOWER | - BSS_CHANGED_MCAST_RATE; if (sdata->vif.bss_conf.mu_mimo_owner) changed |= BSS_CHANGED_MU_GROUPS; + if (!ieee80211_vif_is_mld(&sdata->vif)) + changed |= BSS_CHANGED_IDLE; + switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: - changed |= BSS_CHANGED_ASSOC | - BSS_CHANGED_ARP_FILTER | - BSS_CHANGED_PS; - - /* Re-send beacon info report to the driver */ - if (sdata->deflink.u.mgd.have_beacon) - changed |= BSS_CHANGED_BEACON_INFO; - - if (sdata->vif.bss_conf.max_idle_period || - sdata->vif.bss_conf.protected_keep_alive) - changed |= BSS_CHANGED_KEEP_ALIVE; - - sdata_lock(sdata); - ieee80211_bss_info_change_notify(sdata, changed); - sdata_unlock(sdata); + if (!ieee80211_vif_is_mld(&sdata->vif)) { + changed |= BSS_CHANGED_ASSOC | + BSS_CHANGED_ARP_FILTER | + BSS_CHANGED_PS; + + /* Re-send beacon info report to the driver */ + if (sdata->deflink.u.mgd.have_beacon) + changed |= BSS_CHANGED_BEACON_INFO; + + if (sdata->vif.bss_conf.max_idle_period || + sdata->vif.bss_conf.protected_keep_alive) + changed |= BSS_CHANGED_KEEP_ALIVE; + + if (sdata->vif.bss_conf.eht_puncturing) + changed |= BSS_CHANGED_EHT_PUNCTURING; + + ieee80211_bss_info_change_notify(sdata, + changed); + } else if (!WARN_ON(!link)) { + ieee80211_link_info_change_notify(sdata, link, + changed); + changed = BSS_CHANGED_ASSOC | + BSS_CHANGED_IDLE | + BSS_CHANGED_PS | + BSS_CHANGED_ARP_FILTER; + ieee80211_vif_cfg_change_notify(sdata, changed); + } break; case NL80211_IFTYPE_OCB: changed |= BSS_CHANGED_OCB; @@ -2703,7 +2757,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) changed |= BSS_CHANGED_IBSS; fallthrough; case NL80211_IFTYPE_AP: - changed |= BSS_CHANGED_SSID | BSS_CHANGED_P2P_PS; + changed |= BSS_CHANGED_P2P_PS; + + if (ieee80211_vif_is_mld(&sdata->vif)) + ieee80211_vif_cfg_change_notify(sdata, + BSS_CHANGED_SSID); + else + changed |= BSS_CHANGED_SSID; if (sdata->vif.bss_conf.ftm_responder == 1 && wiphy_ext_feature_isset(sdata->local->hw.wiphy, @@ -2713,6 +2773,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_AP) { changed |= BSS_CHANGED_AP_PROBE_RESP; + if (ieee80211_vif_is_mld(&sdata->vif)) { + ieee80211_reconfig_ap_links(local, + sdata, + changed); + break; + } + if (rcu_access_pointer(sdata->deflink.u.ap.beacon)) drv_start_ap(local, sdata, sdata->deflink.conf); @@ -2728,6 +2795,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_NAN: res = ieee80211_reconfig_nan(sdata); if (res < 0) { + sdata_unlock(sdata); ieee80211_handle_reconfig_failure(local); return res; } @@ -2745,6 +2813,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN_ON(1); break; } + sdata_unlock(sdata); + + if (active_links) + ieee80211_set_active_links(&sdata->vif, active_links); } ieee80211_recalc_ps(local); @@ -2860,7 +2932,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* Requeue all works */ list_for_each_entry(sdata, &local->interfaces, list) - ieee80211_queue_work(&local->hw, &sdata->work); + wiphy_work_queue(local->hw.wiphy, &sdata->work); } ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, @@ -3801,10 +3873,8 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, } eht_cap = ieee80211_get_eht_iftype_cap(sband, iftype); - if (!eht_cap) { - sdata_info(sdata, "Missing iftype sband data/EHT cap"); + if (!eht_cap) eht_oper = NULL; - } he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 63bab99ed368..c347ec9ff8c9 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -58,6 +58,7 @@ struct ieee802154_local { /* Scanning */ u8 scan_page; u8 scan_channel; + struct ieee802154_beacon_req_frame scan_beacon_req; struct cfg802154_scan_request __rcu *scan_req; struct delayed_work scan_work; @@ -70,6 +71,8 @@ struct ieee802154_local { /* Asynchronous tasks */ struct list_head rx_beacon_list; struct work_struct rx_beacon_work; + struct list_head rx_mac_cmd_list; + struct work_struct rx_mac_cmd_work; bool started; bool suspended; @@ -154,6 +157,22 @@ ieee802154_sdata_running(struct ieee802154_sub_if_data *sdata) return test_bit(SDATA_STATE_RUNNING, &sdata->state); } +static inline int ieee802154_get_mac_cmd(struct sk_buff *skb, u8 *mac_cmd) +{ + struct ieee802154_mac_cmd_pl mac_pl; + int ret; + + if (mac_cb(skb)->type != IEEE802154_FC_TYPE_MAC_CMD) + return -EINVAL; + + ret = ieee802154_mac_cmd_pl_pull(skb, &mac_pl); + if (ret) + return ret; + + *mac_cmd = mac_pl.cmd_id; + return 0; +} + extern struct ieee802154_mlme_ops mac802154_mlme_wpan; void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb); @@ -275,6 +294,8 @@ static inline bool mac802154_is_beaconing(struct ieee802154_local *local) return test_bit(IEEE802154_IS_BEACONING, &local->ongoing); } +void mac802154_rx_mac_cmd_worker(struct work_struct *work); + /* interface handling */ int ieee802154_iface_init(void); void ieee802154_iface_exit(void); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index ee23e234b998..357ece67432b 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -90,6 +90,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) INIT_LIST_HEAD(&local->interfaces); INIT_LIST_HEAD(&local->rx_beacon_list); + INIT_LIST_HEAD(&local->rx_mac_cmd_list); mutex_init(&local->iflist_mtx); tasklet_setup(&local->tasklet, ieee802154_tasklet_handler); @@ -100,6 +101,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) INIT_DELAYED_WORK(&local->scan_work, mac802154_scan_worker); INIT_WORK(&local->rx_beacon_work, mac802154_rx_beacon_worker); INIT_DELAYED_WORK(&local->beacon_work, mac802154_beacon_worker); + INIT_WORK(&local->rx_mac_cmd_work, mac802154_rx_mac_cmd_worker); /* init supported flags with 802.15.4 default ranges */ phy->supported.max_minbe = 8; diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index da0628ee3c89..e2434b4fe514 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -47,6 +47,62 @@ void mac802154_rx_beacon_worker(struct work_struct *work) kfree(mac_pkt); } +static bool mac802154_should_answer_beacon_req(struct ieee802154_local *local) +{ + struct cfg802154_beacon_request *beacon_req; + unsigned int interval; + + rcu_read_lock(); + beacon_req = rcu_dereference(local->beacon_req); + if (!beacon_req) { + rcu_read_unlock(); + return false; + } + + interval = beacon_req->interval; + rcu_read_unlock(); + + if (!mac802154_is_beaconing(local)) + return false; + + return interval == IEEE802154_ACTIVE_SCAN_DURATION; +} + +void mac802154_rx_mac_cmd_worker(struct work_struct *work) +{ + struct ieee802154_local *local = + container_of(work, struct ieee802154_local, rx_mac_cmd_work); + struct cfg802154_mac_pkt *mac_pkt; + u8 mac_cmd; + int rc; + + mac_pkt = list_first_entry_or_null(&local->rx_mac_cmd_list, + struct cfg802154_mac_pkt, node); + if (!mac_pkt) + return; + + rc = ieee802154_get_mac_cmd(mac_pkt->skb, &mac_cmd); + if (rc) + goto out; + + switch (mac_cmd) { + case IEEE802154_CMD_BEACON_REQ: + dev_dbg(&mac_pkt->sdata->dev->dev, "processing BEACON REQ\n"); + if (!mac802154_should_answer_beacon_req(local)) + break; + + queue_delayed_work(local->mac_wq, &local->beacon_work, 0); + break; + default: + break; + } + +out: + list_del(&mac_pkt->node); + kfree_skb(mac_pkt->skb); + kfree(mac_pkt); +} + static int ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, struct sk_buff *skb, const struct ieee802154_hdr *hdr) @@ -140,8 +196,20 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, list_add_tail(&mac_pkt->node, &sdata->local->rx_beacon_list); queue_work(sdata->local->mac_wq, &sdata->local->rx_beacon_work); return NET_RX_SUCCESS; - case IEEE802154_FC_TYPE_ACK: + case IEEE802154_FC_TYPE_MAC_CMD: + dev_dbg(&sdata->dev->dev, "MAC COMMAND received\n"); + mac_pkt = kzalloc(sizeof(*mac_pkt), GFP_ATOMIC); + if (!mac_pkt) + goto fail; + + mac_pkt->skb = skb_get(skb); + mac_pkt->sdata = sdata; + list_add_tail(&mac_pkt->node, &sdata->local->rx_mac_cmd_list); + queue_work(sdata->local->mac_wq, &sdata->local->rx_mac_cmd_work); + return NET_RX_SUCCESS; + + case IEEE802154_FC_TYPE_ACK: goto fail; case IEEE802154_FC_TYPE_DATA: diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c index 5c191bedd72c..d9658f2c4ae6 100644 --- a/net/mac802154/scan.c +++ b/net/mac802154/scan.c @@ -18,8 +18,12 @@ #define IEEE802154_BEACON_MHR_SZ 13 #define IEEE802154_BEACON_PL_SZ 4 +#define IEEE802154_MAC_CMD_MHR_SZ 23 +#define IEEE802154_MAC_CMD_PL_SZ 1 #define IEEE802154_BEACON_SKB_SZ (IEEE802154_BEACON_MHR_SZ + \ IEEE802154_BEACON_PL_SZ) +#define IEEE802154_MAC_CMD_SKB_SZ (IEEE802154_MAC_CMD_MHR_SZ + \ + IEEE802154_MAC_CMD_PL_SZ) /* mac802154_scan_cleanup_locked() must be called upon scan completion or abort. * - Completions are asynchronous, not locked by the rtnl and decided by the @@ -131,6 +135,42 @@ static int mac802154_scan_find_next_chan(struct ieee802154_local *local, return 0; } +static int mac802154_scan_prepare_beacon_req(struct ieee802154_local *local) +{ + memset(&local->scan_beacon_req, 0, sizeof(local->scan_beacon_req)); + local->scan_beacon_req.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD; + local->scan_beacon_req.mhr.fc.dest_addr_mode = IEEE802154_SHORT_ADDRESSING; + local->scan_beacon_req.mhr.fc.version = IEEE802154_2003_STD; + local->scan_beacon_req.mhr.fc.source_addr_mode = IEEE802154_NO_ADDRESSING; + local->scan_beacon_req.mhr.dest.mode = IEEE802154_ADDR_SHORT; + local->scan_beacon_req.mhr.dest.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); + local->scan_beacon_req.mhr.dest.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); + local->scan_beacon_req.mac_pl.cmd_id = IEEE802154_CMD_BEACON_REQ; + + return 0; +} + +static int mac802154_transmit_beacon_req(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata) +{ + struct sk_buff *skb; + int ret; + + skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + skb->dev = sdata->dev; + + ret = ieee802154_mac_cmd_push(skb, &local->scan_beacon_req, NULL, 0); + if (ret) { + kfree_skb(skb); + return ret; + } + + return ieee802154_mlme_tx(local, sdata, skb); +} + void mac802154_scan_worker(struct work_struct *work) { struct ieee802154_local *local = @@ -206,6 +246,13 @@ void mac802154_scan_worker(struct work_struct *work) goto end_scan; } + if (scan_req->type == NL802154_SCAN_ACTIVE) { + ret = mac802154_transmit_beacon_req(local, sdata); + if (ret) + dev_err(&sdata->dev->dev, + "Error when transmitting beacon request (%d)\n", ret); + } + ieee802154_configure_durations(wpan_phy, page, channel); scan_duration = mac802154_scan_get_channel_time(scan_req_duration, wpan_phy->symbol_duration); @@ -231,8 +278,8 @@ int mac802154_trigger_scan_locked(struct ieee802154_sub_if_data *sdata, if (mac802154_is_scanning(local)) return -EBUSY; - /* TODO: support other scanning type */ - if (request->type != NL802154_SCAN_PASSIVE) + if (request->type != NL802154_SCAN_PASSIVE && + request->type != NL802154_SCAN_ACTIVE) return -EOPNOTSUPP; /* Store scanning parameters */ @@ -247,6 +294,8 @@ int mac802154_trigger_scan_locked(struct ieee802154_sub_if_data *sdata, local->scan_page = request->page; local->scan_channel = -1; set_bit(IEEE802154_IS_SCANNING, &local->ongoing); + if (request->type == NL802154_SCAN_ACTIVE) + mac802154_scan_prepare_beacon_req(local); nl802154_scan_started(request->wpan_phy, request->wpan_dev); @@ -354,6 +403,7 @@ void mac802154_beacon_worker(struct work_struct *work) struct cfg802154_beacon_request *beacon_req; struct ieee802154_sub_if_data *sdata; struct wpan_dev *wpan_dev; + u8 interval; int ret; rcu_read_lock(); @@ -374,6 +424,7 @@ void mac802154_beacon_worker(struct work_struct *work) } wpan_dev = beacon_req->wpan_dev; + interval = beacon_req->interval; rcu_read_unlock(); @@ -383,8 +434,9 @@ void mac802154_beacon_worker(struct work_struct *work) dev_err(&sdata->dev->dev, "Beacon could not be transmitted (%d)\n", ret); - queue_delayed_work(local->mac_wq, &local->beacon_work, - local->beacon_interval); + if (interval < IEEE802154_ACTIVE_SCAN_DURATION) + queue_delayed_work(local->mac_wq, &local->beacon_work, + local->beacon_interval); } int mac802154_stop_beacons_locked(struct ieee802154_local *local, @@ -439,13 +491,17 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata, local->beacon.mhr.source.pan_id = request->wpan_dev->pan_id; local->beacon.mhr.source.extended_addr = request->wpan_dev->extended_addr; local->beacon.mac_pl.beacon_order = request->interval; - local->beacon.mac_pl.superframe_order = request->interval; + if (request->interval <= IEEE802154_MAX_SCAN_DURATION) + local->beacon.mac_pl.superframe_order = request->interval; local->beacon.mac_pl.final_cap_slot = 0xf; local->beacon.mac_pl.battery_life_ext = 0; - /* TODO: Fill this field depending on the coordinator capacity */ + /* TODO: Fill this field with the coordinator situation in the network */ local->beacon.mac_pl.pan_coordinator = 1; local->beacon.mac_pl.assoc_permit = 1; + if (request->interval == IEEE802154_ACTIVE_SCAN_DURATION) + return 0; + /* Start the beacon work */ local->beacon_interval = mac802154_scan_get_channel_time(request->interval, diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h index 689396d6c76a..1574ecc48075 100644 --- a/net/mac802154/trace.h +++ b/net/mac802154/trace.h @@ -14,7 +14,7 @@ #define MAXNAME 32 #define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME) -#define LOCAL_ASSIGN strlcpy(__entry->wpan_phy_name, \ +#define LOCAL_ASSIGN strscpy(__entry->wpan_phy_name, \ wpan_phy_name(local->hw.phy), MAXNAME) #define LOCAL_PR_FMT "%s" #define LOCAL_PR_ARG __entry->wpan_phy_name diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index bb4bd0b6a4f7..f6be58b68c6f 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -485,7 +485,6 @@ static const struct proto_ops mctp_dgram_ops = { .sendmsg = mctp_sendmsg, .recvmsg = mctp_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = mctp_compat_ioctl, #endif diff --git a/net/mctp/route.c b/net/mctp/route.c index f51a05ec7162..ab62fe447038 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1249,9 +1249,6 @@ static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, mtu = nla_get_u32(tbx[RTAX_MTU]); } - if (rtm->rtm_type != RTN_UNICAST) - return -EINVAL; - rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, rtm->rtm_type); return rc; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index dc5165d3eec4..bf6e81d56263 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -12,6 +12,7 @@ #include <linux/nospec.h> #include <linux/vmalloc.h> #include <linux/percpu.h> +#include <net/gso.h> #include <net/ip.h> #include <net/dst.h> #include <net/sock.h> diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 1482259de9b5..533d082f0701 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -14,6 +14,7 @@ #include <linux/netdev_features.h> #include <linux/netdevice.h> #include <linux/skbuff.h> +#include <net/gso.h> #include <net/mpls.h> static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 0dac2863c6e1..a0990c365a2e 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -34,7 +34,11 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA), SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), + SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), + SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), + SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX), + SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP), SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP), SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), @@ -44,6 +48,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP), + SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX), + SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 2be3596374f4..cae71d947252 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -27,7 +27,15 @@ enum linux_mptcp_mib_field { MPTCP_MIB_NODSSWINDOW, /* Segments not in MPTCP windows */ MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */ MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */ + MPTCP_MIB_ADDADDRTX, /* Sent ADD_ADDR with echo-flag=0 */ + MPTCP_MIB_ADDADDRTXDROP, /* ADD_ADDR with echo-flag=0 not send due to + * resource exhaustion + */ MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */ + MPTCP_MIB_ECHOADDTX, /* Send ADD_ADDR with echo-flag=1 */ + MPTCP_MIB_ECHOADDTXDROP, /* ADD_ADDR with echo-flag=1 not send due + * to resource exhaustion + */ MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */ MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */ MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */ @@ -37,6 +45,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */ MPTCP_MIB_RMADDR, /* Received RM_ADDR */ MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */ + MPTCP_MIB_RMADDRTX, /* Sent RM_ADDR */ + MPTCP_MIB_RMADDRTXDROP, /* RM_ADDR not sent due to resource exhaustion */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ @@ -63,6 +73,14 @@ struct mptcp_mib { unsigned long mibs[LINUX_MIB_MPTCP_MAX]; }; +static inline void MPTCP_ADD_STATS(struct net *net, + enum linux_mptcp_mib_field field, + int val) +{ + if (likely(net->mib.mptcp_statistics)) + SNMP_ADD_STATS(net->mib.mptcp_statistics, field, val); +} + static inline void MPTCP_INC_STATS(struct net *net, enum linux_mptcp_mib_field field) { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 19a01b6566f1..c254accb14de 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -687,9 +687,12 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * } opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX); opts->ahmac = add_addr_generate_hmac(msk->local_key, msk->remote_key, &opts->addr); + } else { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); } pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d", opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); @@ -723,7 +726,7 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, for (i = 0; i < opts->rm_list.nr; i++) pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]); - + MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr); return true; } @@ -1023,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) return cur_seq; } +static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) +{ + msk->bytes_acked += new_snd_una - msk->snd_una; + msk->snd_una = new_snd_una; +} + static void ack_update_msk(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_options_received *mp_opt) @@ -1054,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk, __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { - msk->snd_una = new_snd_una; + __mptcp_snd_una_update(msk, new_snd_una); __mptcp_data_acked(sk); } mptcp_data_unlock(sk); @@ -1116,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mptcp_data_lock(subflow->conn); if (sk_stream_memory_free(sk)) __mptcp_check_push(subflow->conn, sk); + + /* on fallback we just need to ignore the msk-level snd_una, as + * this is really plain TCP + */ + __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt)); + __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return true; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 76612bca275a..7dbbad1e4f55 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -26,7 +26,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, if (add_addr & (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { - pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); + MPTCP_INC_STATS(sock_net((struct sock *)msk), + echo ? MPTCP_MIB_ECHOADDTXDROP : MPTCP_MIB_ADDADDRTXDROP); return -EINVAL; } @@ -48,7 +49,8 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); if (rm_addr) { - pr_warn("addr_signal error, rm_addr=%d", rm_addr); + MPTCP_ADD_STATS(sock_net((struct sock *)msk), + MPTCP_MIB_RMADDRTXDROP, rm_list->nr); return -EINVAL; } @@ -413,7 +415,46 @@ out_unlock: int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) { - return mptcp_pm_nl_get_local_id(msk, skc); + struct mptcp_addr_info skc_local; + struct mptcp_addr_info msk_local; + + if (WARN_ON_ONCE(!msk)) + return -1; + + /* The 0 ID mapping is defined by the first subflow, copied into the msk + * addr + */ + mptcp_local_address((struct sock_common *)msk, &msk_local); + mptcp_local_address((struct sock_common *)skc, &skc_local); + if (mptcp_addresses_equal(&msk_local, &skc_local, false)) + return 0; + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_local_id(msk, &skc_local); + return mptcp_pm_nl_get_local_id(msk, &skc_local); +} + +int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, + u8 *flags, int *ifindex) +{ + *flags = 0; + *ifindex = 0; + + if (!id) + return 0; + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); + return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); +} + +int mptcp_pm_set_flags(struct net *net, struct nlattr *token, + struct mptcp_pm_addr_entry *loc, + struct mptcp_pm_addr_entry *rem, u8 bkup) +{ + if (token) + return mptcp_userspace_pm_set_flags(net, token, loc, rem, bkup); + return mptcp_pm_nl_set_flags(net, loc, bkup); } void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 59f8f3124855..5692daf57a4d 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -25,9 +25,9 @@ static int pm_nl_pernet_id; struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; + u8 retrans_times; struct timer_list add_timer; struct mptcp_sock *sock; - u8 retrans_times; }; struct pm_nl_pernet { @@ -86,8 +86,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a, return a->port == b->port; } -static void local_address(const struct sock_common *skc, - struct mptcp_addr_info *addr) +void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) { addr->family = skc->skc_family; addr->port = htons(skc->skc_num); @@ -122,7 +121,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list, list_for_each_entry(subflow, list, node) { skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); - local_address(skc, &cur); + mptcp_local_address(skc, &cur); if (mptcp_addresses_equal(&cur, saddr, saddr->port)) return true; } @@ -263,7 +262,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) struct mptcp_addr_info saddr; bool ret = false; - local_address((struct sock_common *)sk, &saddr); + mptcp_local_address((struct sock_common *)sk, &saddr); spin_lock_bh(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.anno_list, list) { @@ -342,7 +341,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, } bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_pm_addr_entry *entry) + const struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; @@ -350,7 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); - add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr); + add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (add_entry) { if (mptcp_pm_is_kernel(msk)) @@ -367,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, list_add(&add_entry->list, &msk->pm.anno_list); - add_entry->addr = entry->addr; + add_entry->addr = *addr; add_entry->sock = msk; add_entry->retrans_times = 0; @@ -541,7 +540,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) struct mptcp_addr_info mpc_addr; bool backup = false; - local_address((struct sock_common *)msk->first, &mpc_addr); + mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); rcu_read_lock(); entry = __lookup_addr(pernet, &mpc_addr, false); if (entry) { @@ -577,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) return; if (local) { - if (mptcp_pm_alloc_anno_list(msk, local)) { + if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &local->addr, false); @@ -752,7 +751,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; - local_address((struct sock_common *)ssk, &local); + mptcp_local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; @@ -1047,6 +1046,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; + inet_sk_state_store(newsk, TCP_LISTEN); err = kernel_listen(ssock, backlog); if (err) return err; @@ -1056,33 +1056,17 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, return 0; } -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { struct mptcp_pm_addr_entry *entry; - struct mptcp_addr_info skc_local; - struct mptcp_addr_info msk_local; struct pm_nl_pernet *pernet; int ret = -1; - if (WARN_ON_ONCE(!msk)) - return -1; - - /* The 0 ID mapping is defined by the first subflow, copied into the msk - * addr - */ - local_address((struct sock_common *)msk, &msk_local); - local_address((struct sock_common *)skc, &skc_local); - if (mptcp_addresses_equal(&msk_local, &skc_local, false)) - return 0; - - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_get_local_id(msk, &skc_local); - pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) { + if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { ret = entry->addr.id; break; } @@ -1096,7 +1080,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) if (!entry) return -ENOMEM; - entry->addr = skc_local; + entry->addr = *skc; entry->addr.id = 0; entry->addr.port = 0; entry->ifindex = 0; @@ -1373,31 +1357,20 @@ out_free: return ret; } -int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex) +int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, + u8 *flags, int *ifindex) { struct mptcp_pm_addr_entry *entry; struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); - *flags = 0; - *ifindex = 0; - - if (id) { - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, - id, - flags, - ifindex); - - rcu_read_lock(); - entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); - if (entry) { - *flags = entry->flags; - *ifindex = entry->ifindex; - } - rcu_read_unlock(); + rcu_read_lock(); + entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); + if (entry) { + *flags = entry->flags; + *ifindex = entry->ifindex; } + rcu_read_unlock(); return 0; } @@ -1491,7 +1464,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next; - local_address((struct sock_common *)msk, &msk_local); + mptcp_local_address((struct sock_common *)msk, &msk_local); if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) goto next; @@ -1910,18 +1883,50 @@ next: return ret; } +int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); + u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | + MPTCP_PM_ADDR_FLAG_FULLMESH; + struct mptcp_pm_addr_entry *entry; + u8 lookup_by_id = 0; + + if (addr->addr.family == AF_UNSPEC) { + lookup_by_id = 1; + if (!addr->addr.id) + return -EOPNOTSUPP; + } + + spin_lock_bh(&pernet->lock); + entry = __lookup_addr(pernet, &addr->addr, lookup_by_id); + if (!entry) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } + if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } + + changed = (addr->flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (addr->flags & mask); + *addr = *entry; + spin_unlock_bh(&pernet->lock); + + mptcp_nl_set_flags(net, &addr->addr, bkup, changed); + return 0; +} + static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) { - struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry; struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; + struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | - MPTCP_PM_ADDR_FLAG_FULLMESH; struct net *net = sock_net(skb->sk); - u8 bkup = 0, lookup_by_id = 0; + u8 bkup = 0; int ret; ret = mptcp_pm_parse_entry(attr, info, false, &addr); @@ -1936,34 +1941,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; - if (addr.addr.family == AF_UNSPEC) { - lookup_by_id = 1; - if (!addr.addr.id) - return -EOPNOTSUPP; - } - - if (token) - return mptcp_userspace_pm_set_flags(net, token, &addr, &remote, bkup); - spin_lock_bh(&pernet->lock); - entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); - if (!entry) { - spin_unlock_bh(&pernet->lock); - return -EINVAL; - } - if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - spin_unlock_bh(&pernet->lock); - return -EINVAL; - } - - changed = (addr.flags ^ entry->flags) & mask; - entry->flags = (entry->flags & ~mask) | (addr.flags & mask); - addr = *entry; - spin_unlock_bh(&pernet->lock); - - mptcp_nl_set_flags(net, &addr.addr, bkup, changed); - return 0; + return mptcp_pm_set_flags(net, token, &addr, &remote, bkup); } static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index b06aa58dfcf2..b5a8aa4c1ebd 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -111,9 +111,6 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, { struct mptcp_pm_addr_entry *entry, *match = NULL; - *flags = 0; - *ifindex = 0; - spin_lock_bh(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { if (id == entry->addr.id) { @@ -196,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) lock_sock((struct sock *)msk); spin_lock_bh(&msk->pm.lock); - if (mptcp_pm_alloc_anno_list(msk, &addr_val)) { + if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &addr_val.addr, false); mptcp_pm_nl_addr_send_ack(msk); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 67311e7d5b21..e892673deb73 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -44,7 +44,7 @@ enum { static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp; static void __mptcp_destroy_sock(struct sock *sk); -static void __mptcp_check_send_data_fin(struct sock *sk); +static void mptcp_check_send_data_fin(struct sock *sk); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; @@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); subflow->request_mptcp = 1; + subflow->subflow_id = msk->subflow_id++; /* This is the first subflow, always with id 0 */ subflow->local_id_valid = 1; @@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { /* in sequence */ + msk->bytes_received += copy_len; WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); tail = skb_peek_tail(&sk->sk_receive_queue); if (tail && mptcp_try_coalesce(sk, tail, skb)) @@ -424,8 +426,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - return !__mptcp_check_fallback(msk) && - ((1 << sk->sk_state) & + return ((1 << sk->sk_state) & (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) && msk->write_seq == READ_ONCE(msk->snd_una); } @@ -583,9 +584,6 @@ static bool mptcp_check_data_fin(struct sock *sk) u64 rcv_data_fin_seq; bool ret = false; - if (__mptcp_check_fallback(msk)) - return ret; - /* Need to ack a DATA_FIN received from a peer while this side * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2. * msk->rcv_data_fin was set when parsing the incoming options @@ -623,7 +621,8 @@ static bool mptcp_check_data_fin(struct sock *sk) } ret = true; - mptcp_send_ack(msk); + if (!__mptcp_check_fallback(msk)) + mptcp_send_ack(msk); mptcp_close_wake_up(sk); } return ret; @@ -760,6 +759,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) MPTCP_SKB_CB(skb)->map_seq += delta; __skb_queue_tail(&sk->sk_receive_queue, skb); } + msk->bytes_received += end_seq - msk->ack_seq; msk->ack_seq = end_seq; moved = true; } @@ -845,17 +845,18 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); + mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); return true; } -static void __mptcp_flush_join_list(struct sock *sk) +static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list) { struct mptcp_subflow_context *tmp, *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) { + list_for_each_entry_safe(subflow, tmp, join_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); @@ -897,49 +898,6 @@ bool mptcp_schedule_work(struct sock *sk) return false; } -void mptcp_subflow_eof(struct sock *sk) -{ - if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags)) - mptcp_schedule_work(sk); -} - -static void mptcp_check_for_eof(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - int receivers = 0; - - mptcp_for_each_subflow(msk, subflow) - receivers += !subflow->rx_eof; - if (receivers) - return; - - if (!(sk->sk_shutdown & RCV_SHUTDOWN)) { - /* hopefully temporary hack: propagate shutdown status - * to msk, when all subflows agree on it - */ - WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); - - smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ - sk->sk_data_ready(sk); - } - - switch (sk->sk_state) { - case TCP_ESTABLISHED: - inet_sk_state_store(sk, TCP_CLOSE_WAIT); - break; - case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_CLOSING); - break; - case TCP_FIN_WAIT2: - inet_sk_state_store(sk, TCP_CLOSE); - break; - default: - return; - } - mptcp_close_wake_up(sk); -} - static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -1004,12 +962,6 @@ static void __mptcp_clean_una(struct sock *sk) struct mptcp_data_frag *dtmp, *dfrag; u64 snd_una; - /* on fallback we just need to ignore snd_una, as this is really - * plain TCP - */ - if (__mptcp_check_fallback(msk)) - msk->snd_una = READ_ONCE(msk->snd_nxt); - snd_una = msk->snd_una; list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) @@ -1537,8 +1489,10 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, * that has been handed to the subflow for transmission * and skip update in case it was old dfrag. */ - if (likely(after64(snd_nxt_new, msk->snd_nxt))) + if (likely(after64(snd_nxt_new, msk->snd_nxt))) { + msk->bytes_sent += snd_nxt_new - msk->snd_nxt; msk->snd_nxt = snd_nxt_new; + } } void mptcp_check_and_set_pending(struct sock *sk) @@ -1609,7 +1563,7 @@ out: if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); if (do_check_data_fin) - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); } static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first) @@ -1727,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) *copied_syn = 0; } else if (ret && ret != -EINPROGRESS) { - mptcp_disconnect(sk, 0); + /* The disconnect() op called by tcp_sendmsg_fastopen()/ + * __inet_stream_connect() can fail, due to looking check, + * see mptcp_disconnect(). + * Attempt it again outside the problematic scope. + */ + if (!mptcp_disconnect(sk, 0)) + sk->sk_socket->state = SS_UNCONNECTED; } inet_sk(sk)->defer_connect = 0; @@ -2158,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, break; } - if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) - mptcp_check_for_eof(msk); - if (sk->sk_shutdown & RCV_SHUTDOWN) { /* race breaker: the shutdown could be after the * previous receive queue check @@ -2389,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { - tcp_disconnect(ssk, 0); + /* The MPTCP code never wait on the subflow sockets, TCP-level + * disconnect should never fail + */ + WARN_ON_ONCE(tcp_disconnect(ssk, 0)); msk->subflow->state = SS_UNCONNECTED; mptcp_subflow_ctx_reset(subflow); release_sock(ssk); @@ -2408,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); - mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - } - __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -2596,6 +2549,7 @@ static void __mptcp_retrans(struct sock *sk) } if (copied) { dfrag->already_sent = max(dfrag->already_sent, info.sent); + msk->bytes_retrans += copied; tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); WRITE_ONCE(msk->allow_infinite_fallback, false); @@ -2654,6 +2608,7 @@ static void mptcp_do_fastclose(struct sock *sk) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); + inet_sk_state_store(sk, TCP_CLOSE); mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, MPTCP_CF_FASTCLOSE); @@ -2671,16 +2626,12 @@ static void mptcp_worker(struct work_struct *work) if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN))) goto unlock; - mptcp_check_data_fin_ack(sk); - mptcp_check_fastclose(msk); mptcp_pm_nl_work(msk); - if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) - mptcp_check_for_eof(msk); - - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); + mptcp_check_data_fin_ack(sk); mptcp_check_data_fin(sk); if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) @@ -2691,10 +2642,9 @@ static void mptcp_worker(struct work_struct *work) * even if it is orphaned and in FIN_WAIT2 state */ if (sock_flag(sk, SOCK_DEAD)) { - if (mptcp_should_close(sk)) { - inet_sk_state_store(sk, TCP_CLOSE); + if (mptcp_should_close(sk)) mptcp_do_fastclose(sk); - } + if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); goto unlock; @@ -2733,6 +2683,7 @@ static int __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); WRITE_ONCE(msk->allow_infinite_fallback, true); msk->recovery = false; + msk->subflow_id = 1; mptcp_pm_data_init(msk); @@ -2812,13 +2763,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) break; fallthrough; case TCP_SYN_SENT: - tcp_disconnect(ssk, O_NONBLOCK); + WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK)); break; default: if (__mptcp_check_fallback(mptcp_sk(sk))) { pr_debug("Fallback"); ssk->sk_shutdown |= how; tcp_shutdown(ssk, how); + + /* simulate the data_fin ack reception to let the state + * machine move forward + */ + WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt); + mptcp_schedule_work(sk); } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); @@ -2858,7 +2815,7 @@ static int mptcp_close_state(struct sock *sk) return next & TCP_ACTION_FIN; } -static void __mptcp_check_send_data_fin(struct sock *sk) +static void mptcp_check_send_data_fin(struct sock *sk) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); @@ -2876,19 +2833,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk) WRITE_ONCE(msk->snd_nxt, msk->write_seq); - /* fallback socket will not get data_fin/ack, can move to the next - * state now - */ - if (__mptcp_check_fallback(msk)) { - WRITE_ONCE(msk->snd_una, msk->write_seq); - if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) { - inet_sk_state_store(sk, TCP_CLOSE); - mptcp_close_wake_up(sk); - } else if (sk->sk_state == TCP_FIN_WAIT1) { - inet_sk_state_store(sk, TCP_FIN_WAIT2); - } - } - mptcp_for_each_subflow(msk, subflow) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); @@ -2908,7 +2852,7 @@ static void __mptcp_wr_shutdown(struct sock *sk) WRITE_ONCE(msk->write_seq, msk->write_seq + 1); WRITE_ONCE(msk->snd_data_fin_enable, 1); - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); } static void __mptcp_destroy_sock(struct sock *sk) @@ -2936,7 +2880,6 @@ static void __mptcp_destroy_sock(struct sock *sk) void __mptcp_unaccepted_force_close(struct sock *sk) { sock_set_flag(sk, SOCK_DEAD); - inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); __mptcp_destroy_sock(sk); } @@ -2953,10 +2896,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk) return EPOLLIN | EPOLLRDNORM; } -static void mptcp_listen_inuse_dec(struct sock *sk) +static void mptcp_check_listen_stop(struct sock *sk) { - if (inet_sk_state_load(sk) == TCP_LISTEN) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + struct sock *ssk; + + if (inet_sk_state_load(sk) != TCP_LISTEN) + return; + + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + ssk = mptcp_sk(sk)->first; + if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN)) + return; + + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_queue_clean(sk, ssk); + inet_csk_listen_stop(ssk); + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); + tcp_set_state(ssk, TCP_CLOSE); + release_sock(ssk); } bool __mptcp_close(struct sock *sk, long timeout) @@ -2969,7 +2926,7 @@ bool __mptcp_close(struct sock *sk, long timeout) WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); goto cleanup; } @@ -2978,7 +2935,6 @@ bool __mptcp_close(struct sock *sk, long timeout) /* If the msk has read data, or the caller explicitly ask it, * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose */ - inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); timeout = 0; } else if (mptcp_close_state(sk)) { @@ -3073,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); + /* Deny disconnect if other threads are blocked in sk_wait_event() + * or inet_wait_for_connect(). + */ + if (sk->sk_wait_pending) + return -EBUSY; + /* We are on the fastopen error path. We can't call straight into the * subflows cleanup code due to lock nesting (we are already under - * msk->firstsocket lock). Do nothing and leave the cleanup to the - * caller. + * msk->firstsocket lock). */ if (msk->fastopening) - return 0; + return -EBUSY; - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_timer(sk); @@ -3108,6 +3069,10 @@ static int mptcp_disconnect(struct sock *sk, int flags) WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); + msk->bytes_acked = 0; + msk->bytes_received = 0; + msk->bytes_sent = 0; + msk->bytes_retrans = 0; WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); @@ -3140,6 +3105,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif + nsk->sk_wait_pending = 0; __mptcp_init_sock(nsk); msk = mptcp_sk(nsk); @@ -3157,6 +3123,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; + /* passive msk is created after the first/MPC subflow */ + msk->subflow_id = 2; + sock_reset_flag(nsk, SOCK_RCU_FREE); security_inet_csk_clone(nsk, req); @@ -3327,9 +3296,14 @@ static void mptcp_release_cb(struct sock *sk) for (;;) { unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | msk->push_pending; + struct list_head join_list; + if (!flags) break; + INIT_LIST_HEAD(&join_list); + list_splice_init(&msk->join_list, &join_list); + /* the following actions acquire the subflow socket lock * * 1) can't be invoked in atomic scope @@ -3340,8 +3314,9 @@ static void mptcp_release_cb(struct sock *sk) msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); + if (flags & BIT(MPTCP_FLUSH_JOIN_LIST)) - __mptcp_flush_join_list(sk); + __mptcp_flush_join_list(sk, &join_list); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); if (flags & BIT(MPTCP_RETRANSMIT)) @@ -3570,11 +3545,10 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) return (int)delta; } -static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int mptcp_ioctl(struct sock *sk, int cmd, int *karg) { struct mptcp_sock *msk = mptcp_sk(sk); bool slow; - int answ; switch (cmd) { case SIOCINQ: @@ -3583,24 +3557,24 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) lock_sock(sk); __mptcp_move_skbs(msk); - answ = mptcp_inq_hint(sk); + *karg = mptcp_inq_hint(sk); release_sock(sk); break; case SIOCOUTQ: slow = lock_sock_fast(sk); - answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); + *karg = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); unlock_sock_fast(sk, slow); break; case SIOCOUTQNSD: slow = lock_sock_fast(sk); - answ = mptcp_ioctl_outq(msk, msk->snd_nxt); + *karg = mptcp_ioctl_outq(msk, msk->snd_nxt); unlock_sock_fast(sk, slow); break; default: return -ENOIOCTLCMD; } - return put_user(answ, (int __user *)arg); + return 0; } static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, @@ -3758,6 +3732,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct socket *ssock; + struct sock *newsk; int err; pr_debug("msk=%p", msk); @@ -3769,17 +3744,20 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssock) return -EINVAL; - err = ssock->ops->accept(sock, newsock, flags, kern); - if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) { - struct mptcp_sock *msk = mptcp_sk(newsock->sk); + newsk = mptcp_accept(sock->sk, flags, &err, kern); + if (!newsk) + return err; + + lock_sock(newsk); + + __inet_accept(sock, newsock, newsk); + if (!mptcp_is_tcpsk(newsock->sk)) { + struct mptcp_sock *msk = mptcp_sk(newsk); struct mptcp_subflow_context *subflow; - struct sock *newsk = newsock->sk; set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); msk->in_accept_queue = 0; - lock_sock(newsk); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ @@ -3800,11 +3778,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (unlikely(list_empty(&msk->conn_list))) inet_sk_state_store(newsk, TCP_CLOSE); } - - release_sock(newsk); } + release_sock(newsk); - return err; + return 0; } static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) @@ -3889,7 +3866,6 @@ static const struct proto_ops mptcp_stream_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, }; static struct inet_protosw mptcp_protosw = { @@ -3984,7 +3960,6 @@ static const struct proto_ops mptcp_v6_stream_ops = { .sendmsg = inet6_sendmsg, .recvmsg = inet6_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 70c957bc56a8..37fbe22e2433 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -113,7 +113,6 @@ /* MPTCP socket atomic flags */ #define MPTCP_NOSPACE 1 #define MPTCP_WORK_RTX 2 -#define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 @@ -262,10 +261,13 @@ struct mptcp_sock { u64 local_key; u64 remote_key; u64 write_seq; + u64 bytes_sent; u64 snd_nxt; + u64 bytes_received; u64 ack_seq; atomic64_t rcv_wnd_sent; u64 rcv_data_fin_seq; + u64 bytes_retrans; int rmem_fwd_alloc; struct sock *last_snd; int snd_burst; @@ -274,6 +276,7 @@ struct mptcp_sock { * recovery related fields are under data_lock * protection */ + u64 bytes_acked; u64 snd_una; u64 wnd_end; unsigned long timer_ival; @@ -319,7 +322,8 @@ struct mptcp_sock { u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; - u32 setsockopt_seq; + u32 subflow_id; + u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; struct mptcp_sock *dl_next; }; @@ -476,14 +480,13 @@ struct mptcp_subflow_context { send_mp_fail : 1, send_fastclose : 1, send_infinite_map : 1, - rx_eof : 1, remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 8; + __unused : 9; enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; @@ -500,6 +503,8 @@ struct mptcp_subflow_context { u8 reset_reason:4; u8 stale_count; + u32 subflow_id; + long delegated_status; unsigned long fail_tout; @@ -638,6 +643,7 @@ void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); +void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, @@ -720,7 +726,6 @@ static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit) void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); -void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) { @@ -809,7 +814,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *rem, u8 bkup); bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_pm_addr_entry *entry); + const struct mptcp_addr_info *addr); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * @@ -821,9 +826,15 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); +int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, + u8 *flags, int *ifindex); int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); +int mptcp_pm_set_flags(struct net *net, struct nlattr *token, + struct mptcp_pm_addr_entry *loc, + struct mptcp_pm_addr_entry *rem, u8 bkup); +int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup); int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, struct mptcp_pm_addr_entry *loc, struct mptcp_pm_addr_entry *rem, u8 bkup); @@ -916,13 +927,13 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index d4258869ac48..63f7a09335c5 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -14,7 +14,8 @@ #include <net/mptcp.h> #include "protocol.h" -#define MIN_INFO_OPTLEN_SIZE 16 +#define MIN_INFO_OPTLEN_SIZE 16 +#define MIN_FULL_INFO_OPTLEN_SIZE 40 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { @@ -355,6 +356,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_BROADCAST: case SO_BSDCOMPAT: case SO_PASSCRED: + case SO_PASSPIDFD: case SO_PASSSEC: case SO_RXQ_OVFL: case SO_WIFI_STATUS: @@ -888,7 +890,9 @@ out: void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) { + struct sock *sk = (struct sock *)msk; u32 flags = 0; + bool slow; memset(info, 0, sizeof(*info)); @@ -897,6 +901,9 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); + if (inet_sk_state_load(sk) == TCP_LISTEN) + return; + /* The following limits only make sense for the in-kernel PM */ if (mptcp_pm_is_kernel(msk)) { info->mptcpi_subflows_max = @@ -914,11 +921,21 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) if (READ_ONCE(msk->can_ack)) flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; info->mptcpi_flags = flags; - info->mptcpi_token = READ_ONCE(msk->token); - info->mptcpi_write_seq = READ_ONCE(msk->write_seq); - info->mptcpi_snd_una = READ_ONCE(msk->snd_una); - info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); - info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); + mptcp_data_lock(sk); + info->mptcpi_snd_una = msk->snd_una; + info->mptcpi_rcv_nxt = msk->ack_seq; + info->mptcpi_bytes_acked = msk->bytes_acked; + mptcp_data_unlock(sk); + + slow = lock_sock_fast(sk); + info->mptcpi_csum_enabled = msk->csum_enabled; + info->mptcpi_token = msk->token; + info->mptcpi_write_seq = msk->write_seq; + info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; + info->mptcpi_bytes_sent = msk->bytes_sent; + info->mptcpi_bytes_received = msk->bytes_received; + info->mptcpi_bytes_retrans = msk->bytes_retrans; + unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); @@ -965,7 +982,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, } static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, - char __user *optval, int __user *optlen) + char __user *optval, + int __user *optlen) { int len, copylen; @@ -1146,6 +1164,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o return 0; } +static int mptcp_get_full_info(struct mptcp_full_info *mfi, + char __user *optval, + int __user *optlen) +{ + int len; + + BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != + MIN_FULL_INFO_OPTLEN_SIZE); + + if (get_user(len, optlen)) + return -EFAULT; + + if (len < MIN_FULL_INFO_OPTLEN_SIZE) + return -EINVAL; + + memset(mfi, 0, sizeof(*mfi)); + if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) + return -EFAULT; + + if (mfi->size_tcpinfo_kernel || + mfi->size_sfinfo_kernel || + mfi->num_subflows) + return -EINVAL; + + if (mfi->size_sfinfo_user > INT_MAX || + mfi->size_tcpinfo_user > INT_MAX) + return -EINVAL; + + return len - MIN_FULL_INFO_OPTLEN_SIZE; +} + +static int mptcp_put_full_info(struct mptcp_full_info *mfi, + char __user *optval, + u32 copylen, + int __user *optlen) +{ + copylen += MIN_FULL_INFO_OPTLEN_SIZE; + if (put_user(copylen, optlen)) + return -EFAULT; + + if (copy_to_user(optval, mfi, copylen)) + return -EFAULT; + return 0; +} + +static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, + int __user *optlen) +{ + unsigned int sfcount = 0, copylen = 0; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + void __user *tcpinfoptr, *sfinfoptr; + struct mptcp_full_info mfi; + int len; + + len = mptcp_get_full_info(&mfi, optval, optlen); + if (len < 0) + return len; + + /* don't bother filling the mptcp info if there is not enough + * user-space-provided storage + */ + if (len > 0) { + mptcp_diag_fill_info(msk, &mfi.mptcp_info); + copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); + } + + mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); + mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, + sizeof(struct tcp_info)); + sfinfoptr = u64_to_user_ptr(mfi.subflow_info); + mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); + mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, + sizeof(struct mptcp_subflow_info)); + tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct mptcp_subflow_info sfinfo; + struct tcp_info tcp_info; + + if (sfcount++ >= mfi.size_arrays_user) + continue; + + /* fetch addr/tcp_info only if the user space buffers + * are wide enough + */ + memset(&sfinfo, 0, sizeof(sfinfo)); + sfinfo.id = subflow->subflow_id; + if (mfi.size_sfinfo_user > + offsetof(struct mptcp_subflow_info, addrs)) + mptcp_get_sub_addrs(ssk, &sfinfo.addrs); + if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) + goto fail_release; + + if (mfi.size_tcpinfo_user) { + tcp_get_info(ssk, &tcp_info); + if (copy_to_user(tcpinfoptr, &tcp_info, + mfi.size_tcpinfo_user)) + goto fail_release; + } + + tcpinfoptr += mfi.size_tcpinfo_user; + sfinfoptr += mfi.size_sfinfo_user; + } + release_sock(sk); + + mfi.num_subflows = sfcount; + if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) + return -EFAULT; + + return 0; + +fail_release: + release_sock(sk); + return -EFAULT; +} + static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, int __user *optlen, int val) { @@ -1219,6 +1356,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, switch (optname) { case MPTCP_INFO: return mptcp_getsockopt_info(msk, optval, optlen); + case MPTCP_FULL_INFO: + return mptcp_getsockopt_full_info(msk, optval, optlen); case MPTCP_TCPINFO: return mptcp_getsockopt_tcpinfo(msk, optval, optlen); case MPTCP_SUBFLOW_ADDRS: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4688daa6b38b..9ee3b7abbaf6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -819,6 +819,7 @@ create_child: if (!ctx->conn) goto fallback; + ctx->subflow_id = 1; owner = mptcp_sk(ctx->conn); mptcp_pm_new_connection(owner, child, 1); @@ -1574,6 +1575,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->remote_id = remote_id; subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); + subflow->subflow_id = msk->subflow_id++; mptcp_info2sockaddr(remote, &addr, ssk->sk_family); sock_hold(ssk); @@ -1668,6 +1670,10 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING); + err = security_mptcp_add_subflow(sk, sf->sk); + if (err) + goto release_ssk; + /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); @@ -1680,6 +1686,8 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); err = tcp_set_ulp(sf->sk, "mptcp"); + +release_ssk: release_sock(sf->sk); if (err) { @@ -1749,14 +1757,16 @@ static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; + struct mptcp_sock *msk; __subflow_state_change(sk); + msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { mptcp_propagate_sndbuf(parent, sk); mptcp_do_fallback(sk); - mptcp_rcv_space_init(mptcp_sk(parent), sk); - pr_fallback(mptcp_sk(parent)); + mptcp_rcv_space_init(msk, sk); + pr_fallback(msk); subflow->conn_finished = 1; mptcp_set_connected(parent); } @@ -1772,11 +1782,12 @@ static void subflow_state_change(struct sock *sk) subflow_sched_work_if_closed(mptcp_sk(parent), sk); - if (__mptcp_check_fallback(mptcp_sk(parent)) && - !subflow->rx_eof && subflow_is_done(sk)) { - subflow->rx_eof = 1; - mptcp_subflow_eof(parent); - } + /* when the fallback subflow closes the rx side, trigger a 'dummy' + * ingress data fin, so that the msk state will follow along + */ + if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk && + mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) + mptcp_schedule_work(parent); } void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 6447a09932f5..069c2659074b 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -611,14 +611,14 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) return 0; } -/* Response handler for Mellanox command Get Mac Address */ -static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr) +/* Response handler for Get Mac Address command */ +static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id) { struct ncsi_dev_priv *ndp = nr->ndp; struct net_device *ndev = ndp->ndev.dev; - const struct net_device_ops *ops = ndev->netdev_ops; struct ncsi_rsp_oem_pkt *rsp; struct sockaddr saddr; + u32 mac_addr_off = 0; int ret = 0; /* Get the response header */ @@ -626,11 +626,25 @@ static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr) saddr.sa_family = ndev->type; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - memcpy(saddr.sa_data, &rsp->data[MLX_MAC_ADDR_OFFSET], ETH_ALEN); + if (mfr_id == NCSI_OEM_MFR_BCM_ID) + mac_addr_off = BCM_MAC_ADDR_OFFSET; + else if (mfr_id == NCSI_OEM_MFR_MLX_ID) + mac_addr_off = MLX_MAC_ADDR_OFFSET; + else if (mfr_id == NCSI_OEM_MFR_INTEL_ID) + mac_addr_off = INTEL_MAC_ADDR_OFFSET; + + memcpy(saddr.sa_data, &rsp->data[mac_addr_off], ETH_ALEN); + if (mfr_id == NCSI_OEM_MFR_BCM_ID || mfr_id == NCSI_OEM_MFR_INTEL_ID) + eth_addr_inc((u8 *)saddr.sa_data); + if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) + return -ENXIO; + /* Set the flag for GMA command which should only be called once */ ndp->gma_flag = 1; - ret = ops->ndo_set_mac_address(ndev, &saddr); + rtnl_lock(); + ret = dev_set_mac_address(ndev, &saddr, NULL); + rtnl_unlock(); if (ret < 0) netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); @@ -649,41 +663,10 @@ static int ncsi_rsp_handler_oem_mlx(struct ncsi_request *nr) if (mlx->cmd == NCSI_OEM_MLX_CMD_GMA && mlx->param == NCSI_OEM_MLX_CMD_GMA_PARAM) - return ncsi_rsp_handler_oem_mlx_gma(nr); + return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_MLX_ID); return 0; } -/* Response handler for Broadcom command Get Mac Address */ -static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr) -{ - struct ncsi_dev_priv *ndp = nr->ndp; - struct net_device *ndev = ndp->ndev.dev; - const struct net_device_ops *ops = ndev->netdev_ops; - struct ncsi_rsp_oem_pkt *rsp; - struct sockaddr saddr; - int ret = 0; - - /* Get the response header */ - rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); - - saddr.sa_family = ndev->type; - ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN); - /* Increase mac address by 1 for BMC's address */ - eth_addr_inc((u8 *)saddr.sa_data); - if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) - return -ENXIO; - - /* Set the flag for GMA command which should only be called once */ - ndp->gma_flag = 1; - - ret = ops->ndo_set_mac_address(ndev, &saddr); - if (ret < 0) - netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); - - return ret; -} - /* Response handler for Broadcom card */ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr) { @@ -695,42 +678,10 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr) bcm = (struct ncsi_rsp_oem_bcm_pkt *)(rsp->data); if (bcm->type == NCSI_OEM_BCM_CMD_GMA) - return ncsi_rsp_handler_oem_bcm_gma(nr); + return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_BCM_ID); return 0; } -/* Response handler for Intel command Get Mac Address */ -static int ncsi_rsp_handler_oem_intel_gma(struct ncsi_request *nr) -{ - struct ncsi_dev_priv *ndp = nr->ndp; - struct net_device *ndev = ndp->ndev.dev; - const struct net_device_ops *ops = ndev->netdev_ops; - struct ncsi_rsp_oem_pkt *rsp; - struct sockaddr saddr; - int ret = 0; - - /* Get the response header */ - rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); - - saddr.sa_family = ndev->type; - ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - memcpy(saddr.sa_data, &rsp->data[INTEL_MAC_ADDR_OFFSET], ETH_ALEN); - /* Increase mac address by 1 for BMC's address */ - eth_addr_inc((u8 *)saddr.sa_data); - if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) - return -ENXIO; - - /* Set the flag for GMA command which should only be called once */ - ndp->gma_flag = 1; - - ret = ops->ndo_set_mac_address(ndev, &saddr); - if (ret < 0) - netdev_warn(ndev, - "NCSI: 'Writing mac address to device failed\n"); - - return ret; -} - /* Response handler for Intel card */ static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr) { @@ -742,7 +693,7 @@ static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr) intel = (struct ncsi_rsp_oem_intel_pkt *)(rsp->data); if (intel->cmd == NCSI_OEM_INTEL_CMD_GMA) - return ncsi_rsp_handler_oem_intel_gma(nr); + return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_INTEL_ID); return 0; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9a6b64779e64..0b68e2e2824e 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -739,9 +739,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return 0; - rcu_read_lock_bh(); ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); - rcu_read_unlock_bh(); if (ret == -EAGAIN) { /* Type requests element to be completed */ diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 031073286236..95aeb31c60e0 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -40,7 +40,7 @@ MODULE_ALIAS("ip_set_hash:net,iface"); #define IP_SET_HASH_WITH_MULTI #define IP_SET_HASH_WITH_NET0 -#define STRLCPY(a, b) strlcpy(a, b, IFNAMSIZ) +#define STRSCPY(a, b) strscpy(a, b, IFNAMSIZ) /* IPv4 variant */ @@ -182,11 +182,11 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, if (!eiface) return -EINVAL; - STRLCPY(e.iface, eiface); + STRSCPY(e.iface, eiface); e.physdev = 1; #endif } else { - STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); + STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); } if (strlen(e.iface) == 0) @@ -400,11 +400,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, if (!eiface) return -EINVAL; - STRLCPY(e.iface, eiface); + STRSCPY(e.iface, eiface); e.physdev = 1; #endif } else { - STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); + STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); } if (strlen(e.iface) == 0) diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 271da8447b29..2a3017b9c001 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -44,7 +44,8 @@ config IP_VS_DEBUG config IP_VS_TAB_BITS int "IPVS connection table size (the Nth power of 2)" - range 8 20 + range 8 20 if !64BIT + range 8 27 if 64BIT default 12 help The IPVS connection hash table uses the chaining scheme to handle @@ -54,24 +55,24 @@ config IP_VS_TAB_BITS Note the table size must be power of 2. The table size will be the value of 2 to the your input number power. The number to choose is - from 8 to 20, the default number is 12, which means the table size - is 4096. Don't input the number too small, otherwise you will lose - performance on it. You can adapt the table size yourself, according - to your virtual server application. It is good to set the table size - not far less than the number of connections per second multiplying - average lasting time of connection in the table. For example, your - virtual server gets 200 connections per second, the connection lasts - for 200 seconds in average in the connection table, the table size - should be not far less than 200x200, it is good to set the table - size 32768 (2**15). + from 8 to 27 for 64BIT(20 otherwise), the default number is 12, + which means the table size is 4096. Don't input the number too + small, otherwise you will lose performance on it. You can adapt the + table size yourself, according to your virtual server application. + It is good to set the table size not far less than the number of + connections per second multiplying average lasting time of + connection in the table. For example, your virtual server gets 200 + connections per second, the connection lasts for 200 seconds in + average in the connection table, the table size should be not far + less than 200x200, it is good to set the table size 32768 (2**15). Another note that each connection occupies 128 bytes effectively and each hash entry uses 8 bytes, so you can estimate how much memory is needed for your box. You can overwrite this number setting conn_tab_bits module parameter - or by appending ip_vs.conn_tab_bits=? to the kernel command line - if IP VS was compiled built-in. + or by appending ip_vs.conn_tab_bits=? to the kernel command line if + IP VS was compiled built-in. comment "IPVS transport protocol load balancing support" diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 928e64653837..9065da3cdd12 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -26,7 +26,6 @@ #include <linux/net.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/vmalloc.h> #include <linux/proc_fs.h> /* for proc_net_* */ #include <linux/slab.h> #include <linux/seq_file.h> @@ -1482,13 +1481,21 @@ void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) int __init ip_vs_conn_init(void) { size_t tab_array_size; + int max_avail; +#if BITS_PER_LONG > 32 + int max = 27; +#else + int max = 20; +#endif + int min = 8; int idx; - /* Compute size and mask */ - if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) { - pr_info("conn_tab_bits not in [8, 20]. Using default value\n"); - ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; - } + max_avail = order_base_2(totalram_pages()) + PAGE_SHIFT; + max_avail -= 2; /* ~4 in hash row */ + max_avail -= 1; /* IPVS up to 1/2 of mem */ + max_avail -= order_base_2(sizeof(struct ip_vs_conn)); + max = clamp(max, min, max_avail); + ip_vs_conn_tab_bits = clamp_val(ip_vs_conn_tab_bits, min, max); ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1; @@ -1497,7 +1504,8 @@ int __init ip_vs_conn_init(void) */ tab_array_size = array_size(ip_vs_conn_tab_size, sizeof(*ip_vs_conn_tab)); - ip_vs_conn_tab = vmalloc(tab_array_size); + ip_vs_conn_tab = kvmalloc_array(ip_vs_conn_tab_size, + sizeof(*ip_vs_conn_tab), GFP_KERNEL); if (!ip_vs_conn_tab) return -ENOMEM; @@ -1506,7 +1514,7 @@ int __init ip_vs_conn_init(void) sizeof(struct ip_vs_conn), 0, SLAB_HWCACHE_ALIGN, NULL); if (!ip_vs_conn_cachep) { - vfree(ip_vs_conn_tab); + kvfree(ip_vs_conn_tab); return -ENOMEM; } @@ -1534,5 +1542,5 @@ void ip_vs_conn_cleanup(void) rcu_barrier(); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - vfree(ip_vs_conn_tab); + kvfree(ip_vs_conn_tab); } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index feb1d7fcb09f..9193e109e6b3 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -139,7 +139,7 @@ retry: if (PTR_ERR(rt) == -EINVAL && *saddr && rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { *saddr = 0; - flowi4_update_output(&fl4, 0, 0, daddr, 0); + flowi4_update_output(&fl4, 0, daddr, 0); goto retry; } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); @@ -147,7 +147,7 @@ retry: } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); *saddr = fl4.saddr; - flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); + flowi4_update_output(&fl4, 0, daddr, fl4.saddr); loop = true; goto retry; } @@ -1207,6 +1207,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->transport_header = skb->network_header; skb_set_inner_ipproto(skb, next_protocol); + skb_set_inner_mac_header(skb, skb_inner_network_offset(skb)); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { bool check = false; @@ -1349,6 +1350,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->transport_header = skb->network_header; skb_set_inner_ipproto(skb, next_protocol); + skb_set_inner_mac_header(skb, skb_inner_network_offset(skb)); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { bool check = false; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index c1557d47ccd1..d4fd626d2b8c 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -432,9 +432,19 @@ static bool dccp_error(const struct dccp_hdr *dh, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { + static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST | + 1 << DCCP_PKT_RESPONSE | + 1 << DCCP_PKT_CLOSEREQ | + 1 << DCCP_PKT_CLOSE | + 1 << DCCP_PKT_RESET | + 1 << DCCP_PKT_SYNC | + 1 << DCCP_PKT_SYNCACK; unsigned int dccp_len = skb->len - dataoff; unsigned int cscov; const char *msg; + u8 type; + + BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG); if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || dh->dccph_doff * 4 > dccp_len) { @@ -459,34 +469,70 @@ static bool dccp_error(const struct dccp_hdr *dh, goto out_invalid; } - if (dh->dccph_type >= DCCP_PKT_INVALID) { + type = dh->dccph_type; + if (type >= DCCP_PKT_INVALID) { msg = "nf_ct_dccp: reserved packet type "; goto out_invalid; } + + if (test_bit(type, &require_seq48) && !dh->dccph_x) { + msg = "nf_ct_dccp: type lacks 48bit sequence numbers"; + goto out_invalid; + } + return false; out_invalid: nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg); return true; } +struct nf_conntrack_dccp_buf { + struct dccp_hdr dh; /* generic header part */ + struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */ + union { /* depends on header type */ + struct dccp_hdr_ack_bits ack; + struct dccp_hdr_request req; + struct dccp_hdr_response response; + struct dccp_hdr_reset rst; + } u; +}; + +static struct dccp_hdr * +dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh, + struct nf_conntrack_dccp_buf *buf) +{ + unsigned int hdrlen = __dccp_hdr_len(dh); + + if (hdrlen > sizeof(*buf)) + return NULL; + + return skb_header_pointer(skb, offset, hdrlen, buf); +} + int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - struct dccp_hdr _dh, *dh; + struct nf_conntrack_dccp_buf _dh; u_int8_t type, old_state, new_state; enum ct_dccp_roles role; unsigned int *timeouts; + struct dccp_hdr *dh; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); + dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh); if (!dh) return NF_DROP; if (dccp_error(dh, skb, dataoff, state)) return -NF_ACCEPT; + /* pull again, including possible 48 bit sequences and subtype header */ + dh = dccp_header_pointer(skb, dataoff, dh, &_dh); + if (!dh) + return NF_DROP; + type = dh->dccph_type; if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state)) return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 728eeb0aea87..ad6f0ca40cd2 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -296,6 +296,7 @@ void nf_conntrack_gre_init_net(struct net *net) /* protocol helper struct */ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { .l4proto = IPPROTO_GRE, + .allow_clash = true, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = gre_print_conntrack, #endif diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 77f5e82d8e3f..d0eac27f6ba0 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -611,7 +611,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, start += strlen(name); *val = simple_strtoul(start, &end, 0); if (start == end) - return 0; + return -1; if (matchoff && matchlen) { *matchoff = start - dptr; *matchlen = end - start; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index b0ef48b21dcb..1d34d700bd09 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -125,9 +125,6 @@ static int flow_offload_fill_route(struct flow_offload *flow, break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: - if (!dst_hold_safe(route->tuple[dir].dst)) - return -1; - flow_tuple->dst_cache = dst; flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); break; @@ -148,27 +145,12 @@ static void nft_flow_dst_release(struct flow_offload *flow, dst_release(flow->tuplehash[dir].tuple.dst_cache); } -int flow_offload_route_init(struct flow_offload *flow, +void flow_offload_route_init(struct flow_offload *flow, const struct nf_flow_route *route) { - int err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); - if (err < 0) - return err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); - if (err < 0) - goto err_route_reply; - + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); flow->type = NF_FLOW_OFFLOAD_ROUTE; - - return 0; - -err_route_reply: - nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); - - return err; } EXPORT_SYMBOL_GPL(flow_offload_route_init); diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 3bbaf9c7ea46..e45fade76409 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -8,6 +8,7 @@ #include <linux/ipv6.h> #include <linux/netdevice.h> #include <linux/if_ether.h> +#include <net/gso.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> @@ -163,38 +164,43 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, } } -static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, - struct flow_offload_tuple *tuple, u32 *hdrsize, - u32 offset) +struct nf_flowtable_ctx { + const struct net_device *in; + u32 offset; + u32 hdrsize; +}; + +static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, + struct flow_offload_tuple *tuple) { struct flow_ports *ports; unsigned int thoff; struct iphdr *iph; u8 ipproto; - if (!pskb_may_pull(skb, sizeof(*iph) + offset)) + if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset)) return -1; - iph = (struct iphdr *)(skb_network_header(skb) + offset); + iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); thoff = (iph->ihl * 4); if (ip_is_fragment(iph) || unlikely(ip_has_options(thoff))) return -1; - thoff += offset; + thoff += ctx->offset; ipproto = iph->protocol; switch (ipproto) { case IPPROTO_TCP: - *hdrsize = sizeof(struct tcphdr); + ctx->hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: - *hdrsize = sizeof(struct udphdr); + ctx->hdrsize = sizeof(struct udphdr); break; #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: - *hdrsize = sizeof(struct gre_base_hdr); + ctx->hdrsize = sizeof(struct gre_base_hdr); break; #endif default: @@ -204,7 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, if (iph->ttl <= 1) return -1; - if (!pskb_may_pull(skb, thoff + *hdrsize)) + if (!pskb_may_pull(skb, thoff + ctx->hdrsize)) return -1; switch (ipproto) { @@ -224,13 +230,13 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, } } - iph = (struct iphdr *)(skb_network_header(skb) + offset); + iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); tuple->src_v4.s_addr = iph->saddr; tuple->dst_v4.s_addr = iph->daddr; tuple->l3proto = AF_INET; tuple->l4proto = ipproto; - tuple->iifidx = dev->ifindex; + tuple->iifidx = ctx->in->ifindex; nf_flow_tuple_encap(skb, tuple); return 0; @@ -336,58 +342,56 @@ static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, return NF_STOLEN; } -unsigned int -nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) +static struct flow_offload_tuple_rhash * +nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, + struct nf_flowtable *flow_table, struct sk_buff *skb) { - struct flow_offload_tuple_rhash *tuplehash; - struct nf_flowtable *flow_table = priv; struct flow_offload_tuple tuple = {}; - enum flow_offload_tuple_dir dir; - struct flow_offload *flow; - struct net_device *outdev; - u32 hdrsize, offset = 0; - unsigned int thoff, mtu; - struct rtable *rt; - struct iphdr *iph; - __be32 nexthop; - int ret; if (skb->protocol != htons(ETH_P_IP) && - !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset)) - return NF_ACCEPT; + !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset)) + return NULL; - if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0) - return NF_ACCEPT; + if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0) + return NULL; - tuplehash = flow_offload_lookup(flow_table, &tuple); - if (tuplehash == NULL) - return NF_ACCEPT; + return flow_offload_lookup(flow_table, &tuple); +} + +static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, + struct nf_flowtable *flow_table, + struct flow_offload_tuple_rhash *tuplehash, + struct sk_buff *skb) +{ + enum flow_offload_tuple_dir dir; + struct flow_offload *flow; + unsigned int thoff, mtu; + struct iphdr *iph; dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); - mtu = flow->tuplehash[dir].tuple.mtu + offset; + mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) - return NF_ACCEPT; + return 0; - iph = (struct iphdr *)(skb_network_header(skb) + offset); - thoff = (iph->ihl * 4) + offset; + iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); + thoff = (iph->ihl * 4) + ctx->offset; if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) - return NF_ACCEPT; + return 0; if (!nf_flow_dst_check(&tuplehash->tuple)) { flow_offload_teardown(flow); - return NF_ACCEPT; + return 0; } - if (skb_try_make_writable(skb, thoff + hdrsize)) - return NF_DROP; + if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + return -1; flow_offload_refresh(flow_table, flow, false); nf_flow_encap_pop(skb, tuplehash); - thoff -= offset; + thoff -= ctx->offset; iph = ip_hdr(skb); nf_flow_nat_ip(flow, skb, thoff, dir, iph); @@ -398,6 +402,35 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); + return 1; +} + +unsigned int +nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table = priv; + enum flow_offload_tuple_dir dir; + struct nf_flowtable_ctx ctx = { + .in = state->in, + }; + struct flow_offload *flow; + struct net_device *outdev; + struct rtable *rt; + __be32 nexthop; + int ret; + + tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb); + if (!tuplehash) + return NF_ACCEPT; + + ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb); + if (ret < 0) + return NF_DROP; + else if (ret == 0) + return NF_ACCEPT; + if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { rt = (struct rtable *)tuplehash->tuple.dst_cache; memset(skb->cb, 0, sizeof(struct inet_skb_parm)); @@ -406,6 +439,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, return nf_flow_xmit_xfrm(skb, state, &rt->dst); } + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = (struct rtable *)tuplehash->tuple.dst_cache; @@ -535,32 +571,31 @@ static void nf_flow_nat_ipv6(const struct flow_offload *flow, } } -static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, - struct flow_offload_tuple *tuple, u32 *hdrsize, - u32 offset) +static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, + struct flow_offload_tuple *tuple) { struct flow_ports *ports; struct ipv6hdr *ip6h; unsigned int thoff; u8 nexthdr; - thoff = sizeof(*ip6h) + offset; + thoff = sizeof(*ip6h) + ctx->offset; if (!pskb_may_pull(skb, thoff)) return -1; - ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); nexthdr = ip6h->nexthdr; switch (nexthdr) { case IPPROTO_TCP: - *hdrsize = sizeof(struct tcphdr); + ctx->hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: - *hdrsize = sizeof(struct udphdr); + ctx->hdrsize = sizeof(struct udphdr); break; #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: - *hdrsize = sizeof(struct gre_base_hdr); + ctx->hdrsize = sizeof(struct gre_base_hdr); break; #endif default: @@ -570,7 +605,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, if (ip6h->hop_limit <= 1) return -1; - if (!pskb_may_pull(skb, thoff + *hdrsize)) + if (!pskb_may_pull(skb, thoff + ctx->hdrsize)) return -1; switch (nexthdr) { @@ -590,65 +625,47 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, } } - ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); tuple->src_v6 = ip6h->saddr; tuple->dst_v6 = ip6h->daddr; tuple->l3proto = AF_INET6; tuple->l4proto = nexthdr; - tuple->iifidx = dev->ifindex; + tuple->iifidx = ctx->in->ifindex; nf_flow_tuple_encap(skb, tuple); return 0; } -unsigned int -nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) +static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, + struct nf_flowtable *flow_table, + struct flow_offload_tuple_rhash *tuplehash, + struct sk_buff *skb) { - struct flow_offload_tuple_rhash *tuplehash; - struct nf_flowtable *flow_table = priv; - struct flow_offload_tuple tuple = {}; enum flow_offload_tuple_dir dir; - const struct in6_addr *nexthop; struct flow_offload *flow; - struct net_device *outdev; unsigned int thoff, mtu; - u32 hdrsize, offset = 0; struct ipv6hdr *ip6h; - struct rt6_info *rt; - int ret; - - if (skb->protocol != htons(ETH_P_IPV6) && - !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset)) - return NF_ACCEPT; - - if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0) - return NF_ACCEPT; - - tuplehash = flow_offload_lookup(flow_table, &tuple); - if (tuplehash == NULL) - return NF_ACCEPT; dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); - mtu = flow->tuplehash[dir].tuple.mtu + offset; + mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) - return NF_ACCEPT; + return 0; - ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); - thoff = sizeof(*ip6h) + offset; + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); + thoff = sizeof(*ip6h) + ctx->offset; if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) - return NF_ACCEPT; + return 0; if (!nf_flow_dst_check(&tuplehash->tuple)) { flow_offload_teardown(flow); - return NF_ACCEPT; + return 0; } - if (skb_try_make_writable(skb, thoff + hdrsize)) - return NF_DROP; + if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + return -1; flow_offload_refresh(flow_table, flow, false); @@ -663,6 +680,52 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); + return 1; +} + +static struct flow_offload_tuple_rhash * +nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx, + struct nf_flowtable *flow_table, + struct sk_buff *skb) +{ + struct flow_offload_tuple tuple = {}; + + if (skb->protocol != htons(ETH_P_IPV6) && + !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset)) + return NULL; + + if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0) + return NULL; + + return flow_offload_lookup(flow_table, &tuple); +} + +unsigned int +nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table = priv; + enum flow_offload_tuple_dir dir; + struct nf_flowtable_ctx ctx = { + .in = state->in, + }; + const struct in6_addr *nexthop; + struct flow_offload *flow; + struct net_device *outdev; + struct rt6_info *rt; + int ret; + + tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb); + if (tuplehash == NULL) + return NF_ACCEPT; + + ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb); + if (ret < 0) + return NF_DROP; + else if (ret == 0) + return NF_ACCEPT; + if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { rt = (struct rt6_info *)tuplehash->tuple.dst_cache; memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); @@ -671,6 +734,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, return nf_flow_xmit_xfrm(skb, state, &rt->dst); } + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = (struct rt6_info *)tuplehash->tuple.dst_cache; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index ce829d434f13..fadbd4ed3dc0 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -27,6 +27,9 @@ #include "nf_internals.h" +#define NF_NAT_MAX_ATTEMPTS 128 +#define NF_NAT_HARDER_THRESH (NF_NAT_MAX_ATTEMPTS / 4) + static spinlock_t nf_nat_locks[CONNTRACK_LOCKS]; static DEFINE_MUTEX(nf_nat_proto_mutex); @@ -197,6 +200,88 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, return nf_conntrack_tuple_taken(&reply, ignored_conntrack); } +static bool nf_nat_may_kill(struct nf_conn *ct, unsigned long flags) +{ + static const unsigned long flags_refuse = IPS_FIXED_TIMEOUT | + IPS_DYING; + static const unsigned long flags_needed = IPS_SRC_NAT; + enum tcp_conntrack old_state; + + old_state = READ_ONCE(ct->proto.tcp.state); + if (old_state < TCP_CONNTRACK_TIME_WAIT) + return false; + + if (flags & flags_refuse) + return false; + + return (flags & flags_needed) == flags_needed; +} + +/* reverse direction will send packets to new source, so + * make sure such packets are invalid. + */ +static bool nf_seq_has_advanced(const struct nf_conn *old, const struct nf_conn *new) +{ + return (__s32)(new->proto.tcp.seen[0].td_end - + old->proto.tcp.seen[0].td_end) > 0; +} + +static int +nf_nat_used_tuple_harder(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack, + unsigned int attempts_left) +{ + static const unsigned long flags_offload = IPS_OFFLOAD | IPS_HW_OFFLOAD; + struct nf_conntrack_tuple_hash *thash; + const struct nf_conntrack_zone *zone; + struct nf_conntrack_tuple reply; + unsigned long flags; + struct nf_conn *ct; + bool taken = true; + struct net *net; + + nf_ct_invert_tuple(&reply, tuple); + + if (attempts_left > NF_NAT_HARDER_THRESH || + tuple->dst.protonum != IPPROTO_TCP || + ignored_conntrack->proto.tcp.state != TCP_CONNTRACK_SYN_SENT) + return nf_conntrack_tuple_taken(&reply, ignored_conntrack); + + /* :ast few attempts to find a free tcp port. Destructive + * action: evict colliding if its in timewait state and the + * tcp sequence number has advanced past the one used by the + * old entry. + */ + net = nf_ct_net(ignored_conntrack); + zone = nf_ct_zone(ignored_conntrack); + + thash = nf_conntrack_find_get(net, zone, &reply); + if (!thash) + return false; + + ct = nf_ct_tuplehash_to_ctrack(thash); + + if (thash->tuple.dst.dir == IP_CT_DIR_ORIGINAL) + goto out; + + if (WARN_ON_ONCE(ct == ignored_conntrack)) + goto out; + + flags = READ_ONCE(ct->status); + if (!nf_nat_may_kill(ct, flags)) + goto out; + + if (!nf_seq_has_advanced(ct, ignored_conntrack)) + goto out; + + /* Even if we can evict do not reuse if entry is offloaded. */ + if (nf_ct_kill(ct)) + taken = flags & flags_offload; +out: + nf_ct_put(ct); + return taken; +} + static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t, const struct nf_nat_range2 *range) { @@ -385,7 +470,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, unsigned int range_size, min, max, i, attempts; __be16 *keyptr; u16 off; - static const unsigned int max_attempts = 128; switch (tuple->dst.protonum) { case IPPROTO_ICMP: @@ -471,8 +555,8 @@ find_free_id: off = get_random_u16(); attempts = range_size; - if (attempts > max_attempts) - attempts = max_attempts; + if (attempts > NF_NAT_MAX_ATTEMPTS) + attempts = NF_NAT_MAX_ATTEMPTS; /* We are in softirq; doing a search of the entire range risks * soft lockup when all tuples are already used. @@ -483,7 +567,7 @@ find_free_id: another_round: for (i = 0; i < attempts; i++, off++) { *keyptr = htons(min + off % range_size); - if (!nf_nat_used_tuple(tuple, ct)) + if (!nf_nat_used_tuple_harder(tuple, ct, attempts - i)) return; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 69bceefaa5c8..9573a8fcad79 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -151,6 +151,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, return NULL; INIT_LIST_HEAD(&trans->list); + INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -163,13 +164,20 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } -static void nft_trans_destroy(struct nft_trans *trans) +static void nft_trans_list_del(struct nft_trans *trans) { list_del(&trans->list); + list_del(&trans->binding_list); +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + nft_trans_list_del(trans); kfree(trans); } -static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set, + bool bind) { struct nftables_pernet *nft_net; struct net *net = ctx->net; @@ -183,16 +191,80 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) - nft_trans_set_bound(trans) = true; + nft_trans_set_bound(trans) = bind; break; case NFT_MSG_NEWSETELEM: if (nft_trans_elem_set(trans) == set) - nft_trans_elem_set_bound(trans) = true; + nft_trans_elem_set_bound(trans) = bind; + break; + } + } +} + +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, true); +} + +static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, false); +} + +static void __nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain, bool bind) +{ + struct nftables_pernet *nft_net; + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_chain_binding(chain)) + return; + + nft_net = nft_pernet(net); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain(trans) == chain) + nft_trans_chain_bound(trans) = bind; + break; + case NFT_MSG_NEWRULE: + if (trans->ctx.chain == chain) + nft_trans_rule_bound(trans) = bind; break; } } } +static void nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, true); +} + +int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + if (!nft_chain_binding(chain)) + return 0; + + if (nft_chain_binding(ctx->chain)) + return -EOPNOTSUPP; + + if (chain->bound) + return -EBUSY; + + chain->bound = true; + chain->use++; + nft_chain_trans_bind(ctx, chain); + + return 0; +} + +void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, false); +} + static int nft_netdev_register_hooks(struct net *net, struct list_head *hook_list) { @@ -292,6 +364,19 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr { struct nftables_pernet *nft_net = nft_pernet(net); + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (!nft_trans_set_update(trans) && + nft_set_is_anonymous(nft_trans_set(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + } + list_add_tail(&trans->list, &nft_net->commit_list); } @@ -338,8 +423,9 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } - + nft_trans_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); + return trans; } @@ -357,8 +443,7 @@ static int nft_delchain(struct nft_ctx *ctx) return 0; } -static void nft_rule_expr_activate(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr; @@ -371,9 +456,8 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, } } -static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule, - enum nft_trans_phase phase) +void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr; @@ -483,6 +567,7 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, nft_trans_set_update(trans) = true; nft_trans_set_gc_int(trans) = desc->gc_int; nft_trans_set_timeout(trans) = desc->timeout; + nft_trans_set_size(trans) = desc->size; } nft_trans_commit_list_add_tail(ctx->net, trans); @@ -495,6 +580,58 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, return __nft_trans_set_add(ctx, msg_type, set, NULL); } +static void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_deactivate(ctx->net, set, elem); + + return 0; +} + +struct nft_set_elem_catchall { + struct list_head list; + struct rcu_head rcu; + void *elem; +}; + +static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, + struct nft_set *set) +{ + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_elem_catchall *catchall; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + list_for_each_entry(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + if (!nft_set_elem_active(ext, genmask)) + continue; + + elem.priv = catchall->elem; + nft_setelem_data_deactivate(ctx->net, set, &elem); + break; + } +} + +static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_deactivate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); + + nft_map_catchall_deactivate(ctx, set); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -503,6 +640,9 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) if (err < 0) return err; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); ctx->table->use--; @@ -2226,7 +2366,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, return 0; } -static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) +int nft_chain_add(struct nft_table *table, struct nft_chain *chain) { int err; @@ -2528,6 +2668,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, nft_trans_basechain(trans) = basechain; INIT_LIST_HEAD(&nft_trans_chain_hooks(trans)); list_splice(&hook.list, &nft_trans_chain_hooks(trans)); + if (nla[NFTA_CHAIN_HOOK]) + module_put(hook.type->owner); nft_trans_commit_list_add_tail(ctx->net, trans); @@ -2670,21 +2812,18 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack); } -static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_chain *chain, +static int nft_delchain_hook(struct nft_ctx *ctx, + struct nft_base_chain *basechain, struct netlink_ext_ack *extack) { + const struct nft_chain *chain = &basechain->chain; const struct nlattr * const *nla = ctx->nla; struct nft_chain_hook chain_hook = {}; - struct nft_base_chain *basechain; struct nft_hook *this, *hook; LIST_HEAD(chain_del_list); struct nft_trans *trans; int err; - if (!nft_is_base_chain(chain)) - return -EOPNOTSUPP; - - basechain = nft_base_chain(chain); err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook, ctx->family, chain->flags, extack); if (err < 0) @@ -2769,7 +2908,12 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, if (chain->flags & NFT_CHAIN_HW_OFFLOAD) return -EOPNOTSUPP; - return nft_delchain_hook(&ctx, chain, extack); + if (nft_is_base_chain(chain)) { + struct nft_base_chain *basechain = nft_base_chain(chain); + + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) + return nft_delchain_hook(&ctx, basechain, extack); + } } if (info->nlh->nlmsg_flags & NLM_F_NONREC && @@ -3490,8 +3634,7 @@ err_fill_rule_info: return err; } -static void nf_tables_rule_destroy(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr, *next; @@ -3508,7 +3651,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) +static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); @@ -3596,12 +3739,6 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, return 0; } -struct nft_set_elem_catchall { - struct list_head list; - struct rcu_head rcu; - void *elem; -}; - int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); @@ -3844,7 +3981,7 @@ err_destroy_flow_rule: if (flow) nft_flow_rule_destroy(flow); err_release_rule: - nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR); nf_tables_rule_destroy(&ctx, rule); err_release_expr: for (i = 0; i < n; i++) { @@ -4777,6 +4914,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; @@ -4785,6 +4925,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } @@ -4831,6 +4975,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); if (err < 0) return err; @@ -4934,7 +5081,7 @@ err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); err_set_destroy: - ops->destroy(set); + ops->destroy(&ctx, set); err_set_init: kfree(set->name); err_set_name: @@ -4949,7 +5096,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { list_del_rcu(&catchall->list); - nft_set_elem_destroy(set, catchall->elem, true); + nf_tables_set_elem_destroy(ctx, set, catchall->elem); kfree_rcu(catchall, rcu); } } @@ -4964,7 +5111,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(ctx, set->exprs[i]); - set->ops->destroy(set); + set->ops->destroy(ctx, set); nft_set_catchall_destroy(ctx, set); kfree(set->name); kvfree(set); @@ -5129,10 +5276,60 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } } +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_activate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_activate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_catchall_activate(const struct nft_ctx *ctx, + struct nft_set *set) +{ + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_elem_catchall *catchall; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + list_for_each_entry(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + if (!nft_set_elem_active(ext, genmask)) + continue; + + elem.priv = catchall->elem; + nft_setelem_data_activate(ctx->net, set, &elem); + break; + } +} + +static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_activate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); + + nft_map_catchall_activate(ctx, set); +} + void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (nft_set_is_anonymous(set)) + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(ctx, set); + nft_clear(ctx->net, set); + } set->use++; } @@ -5143,14 +5340,30 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, enum nft_trans_phase phase) { switch (phase) { - case NFT_TRANS_PREPARE: + case NFT_TRANS_PREPARE_ERROR: + nft_set_trans_unbind(ctx, set); if (nft_set_is_anonymous(set)) nft_deactivate_next(ctx->net, set); + else + list_del_rcu(&binding->list); set->use--; + break; + case NFT_TRANS_PREPARE: + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + + nft_deactivate_next(ctx->net, set); + } + set->use--; return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: + if (nft_set_is_anonymous(set) && + set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + set->use--; fallthrough; default: @@ -5232,7 +5445,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + static int nft_set_elem_expr_dump(struct sk_buff *skb, const struct nft_set *set, - const struct nft_set_ext *ext) + const struct nft_set_ext *ext, + bool reset) { struct nft_set_elem_expr *elem_expr; u32 size, num_exprs = 0; @@ -5245,7 +5459,7 @@ static int nft_set_elem_expr_dump(struct sk_buff *skb, if (num_exprs == 1) { expr = nft_setelem_expr_at(elem_expr, 0); - if (nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, expr, false) < 0) + if (nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, expr, reset) < 0) return -1; return 0; @@ -5256,7 +5470,7 @@ static int nft_set_elem_expr_dump(struct sk_buff *skb, nft_setelem_expr_foreach(expr, elem_expr, size) { expr = nft_setelem_expr_at(elem_expr, size); - if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr, false) < 0) + if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr, reset) < 0) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -5269,11 +5483,13 @@ nla_put_failure: static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + bool reset) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; + u64 timeout = 0; nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM); if (nest == NULL) @@ -5296,7 +5512,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) && - nft_set_elem_expr_dump(skb, set, ext)) + nft_set_elem_expr_dump(skb, set, ext, reset)) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && @@ -5309,11 +5525,15 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, htonl(*nft_set_ext_flags(ext)))) goto nla_put_failure; - if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && - nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)), - NFTA_SET_ELEM_PAD)) - goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) { + timeout = *nft_set_ext_timeout(ext); + if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, + nf_jiffies64_to_msecs(timeout), + NFTA_SET_ELEM_PAD)) + goto nla_put_failure; + } else if (set->flags & NFT_SET_TIMEOUT) { + timeout = READ_ONCE(set->timeout); + } if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { u64 expires, now = get_jiffies_64(); @@ -5328,6 +5548,9 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, nf_jiffies64_to_msecs(expires), NFTA_SET_ELEM_PAD)) goto nla_put_failure; + + if (reset) + *nft_set_ext_expiration(ext) = now + timeout; } if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { @@ -5351,6 +5574,7 @@ struct nft_set_dump_args { const struct netlink_callback *cb; struct nft_set_iter iter; struct sk_buff *skb; + bool reset; }; static int nf_tables_dump_setelem(const struct nft_ctx *ctx, @@ -5361,7 +5585,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, struct nft_set_dump_args *args; args = container_of(iter, struct nft_set_dump_args, iter); - return nf_tables_fill_setelem(args->skb, set, elem); + return nf_tables_fill_setelem(args->skb, set, elem, args->reset); } struct nft_set_dump_ctx { @@ -5370,7 +5594,7 @@ struct nft_set_dump_ctx { }; static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, - const struct nft_set *set) + const struct nft_set *set, bool reset) { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_cur(net); @@ -5385,7 +5609,7 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, continue; elem.priv = catchall->elem; - ret = nf_tables_fill_setelem(skb, set, &elem); + ret = nf_tables_fill_setelem(skb, set, &elem, reset); break; } @@ -5403,6 +5627,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) bool set_found = false; struct nlmsghdr *nlh; struct nlattr *nest; + bool reset = false; u32 portid, seq; int event; @@ -5450,8 +5675,12 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) + reset = true; + args.cb = cb; args.skb = skb; + args.reset = reset; args.iter.genmask = nft_genmask_cur(net); args.iter.skip = cb->args[0]; args.iter.count = 0; @@ -5460,7 +5689,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) set->ops->walk(&dump_ctx->ctx, set, &args.iter); if (!args.iter.err && args.iter.count == cb->args[0]) - args.iter.err = nft_set_catchall_dump(net, skb, set); + args.iter.err = nft_set_catchall_dump(net, skb, set, reset); rcu_read_unlock(); nla_nest_end(skb, nest); @@ -5498,7 +5727,8 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq, u32 portid, int event, u16 flags, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + bool reset) { struct nlmsghdr *nlh; struct nlattr *nest; @@ -5519,7 +5749,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - err = nf_tables_fill_setelem(skb, set, elem); + err = nf_tables_fill_setelem(skb, set, elem, reset); if (err < 0) goto nla_put_failure; @@ -5625,7 +5855,7 @@ static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set, } static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr) + const struct nlattr *attr, bool reset) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_set_elem elem; @@ -5669,7 +5899,8 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, - NFT_MSG_NEWSETELEM, 0, set, &elem); + NFT_MSG_NEWSETELEM, 0, set, &elem, + reset); if (err < 0) goto err_fill_setelem; @@ -5693,6 +5924,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb, struct nft_set *set; struct nlattr *attr; struct nft_ctx ctx; + bool reset = false; int rem, err = 0; table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, @@ -5727,8 +5959,11 @@ static int nf_tables_getsetelem(struct sk_buff *skb, if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return -EINVAL; + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) + reset = true; + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_get_set_elem(&ctx, set, attr); + err = nft_get_set_elem(&ctx, set, attr, reset); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); break; @@ -5761,7 +5996,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, - set, elem); + set, elem, false); if (err < 0) { kfree_skb(skb); goto err; @@ -5903,6 +6138,7 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, __nft_set_elem_expr_destroy(ctx, expr); } +/* Drop references and destroy. Called from gc, dynset and abort path. */ void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr) { @@ -5924,11 +6160,11 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_setelem_data_deactivate() already deals - * with the refcounting from the preparation phase. +/* Destroy element. References have been already dropped in the preparation + * path via nft_setelem_data_deactivate(). */ -static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); @@ -6491,19 +6727,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (flags) *nft_set_ext_flags(ext) = flags; + if (obj) { + *nft_set_ext_obj(ext) = obj; + obj->use++; + } if (ulen > 0) { if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) { err = -EINVAL; - goto err_elem_userdata; + goto err_elem_free; } udata = nft_set_ext_userdata(ext); udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } - if (obj) { - *nft_set_ext_obj(ext) = obj; - obj->use++; - } err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs); if (err < 0) goto err_elem_free; @@ -6543,10 +6779,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err_element_clash; } - if (!(flags & NFT_SET_ELEM_CATCHALL) && set->size && - !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) { - err = -ENFILE; - goto err_set_full; + if (!(flags & NFT_SET_ELEM_CATCHALL)) { + unsigned int max = set->size ? set->size + set->ndeact : UINT_MAX; + + if (!atomic_add_unless(&set->nelems, 1, max)) { + err = -ENFILE; + goto err_set_full; + } } nft_trans_elem(trans) = elem; @@ -6558,10 +6797,9 @@ err_set_full: err_element_clash: kfree(trans); err_elem_free: + nf_tables_set_elem_destroy(ctx, set, elem.priv); if (obj) obj->use--; -err_elem_userdata: - nf_tables_set_elem_destroy(ctx, set, elem.priv); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_release(&elem.data.val, desc.type); @@ -6605,7 +6843,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); @@ -6638,7 +6877,6 @@ static int nf_tables_newsetelem(struct sk_buff *skb, void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { struct nft_chain *chain; - struct nft_rule *rule; if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { @@ -6646,15 +6884,6 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) case NFT_GOTO: chain = data->verdict.chain; chain->use++; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use++; - list_for_each_entry(rule, &chain->rules, list) - chain->use++; - - nft_chain_add(chain->table, chain); break; } } @@ -6889,7 +7118,9 @@ static int nf_tables_delsetelem(struct sk_buff *skb, set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); @@ -7671,6 +7902,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: @@ -8715,6 +8947,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, + [NFT_MSG_GETSETELEM_RESET] = { + .call = nf_tables_getsetelem, + .type = NFNL_CB_RCU, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, [NFT_MSG_DELSETELEM] = { .call = nf_tables_delsetelem, .type = NFNL_CB_BATCH, @@ -8943,7 +9181,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) synchronize_rcu(); list_for_each_entry_safe(trans, next, &head, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nft_commit_release(trans); } } @@ -9308,6 +9546,27 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } + list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (!nft_trans_set_update(trans) && + nft_set_is_anonymous(nft_trans_set(trans)) && + !nft_trans_set_bound(trans)) { + pr_warn_once("nftables ruleset with unbound set\n"); + return -EINVAL; + } + break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans)) && + !nft_trans_chain_bound(trans)) { + pr_warn_once("nftables ruleset with unbound chain\n"); + return -EINVAL; + } + break; + } + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN; @@ -9444,6 +9703,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans)); WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans)); + + if (nft_trans_set_size(trans)) + WRITE_ONCE(set->size, nft_trans_set_size(trans)); } else { nft_clear(net, nft_trans_set(trans)); /* This avoids hitting -EBUSY when deleting the table @@ -9677,7 +9939,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { - if (nft_chain_is_bound(trans->ctx.chain)) { + if (nft_trans_chain_bound(trans)) { nft_trans_destroy(trans); break; } @@ -9700,6 +9962,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: + if (nft_trans_rule_bound(trans)) { + nft_trans_destroy(trans); + break; + } trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, @@ -9734,6 +10000,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DESTROYSET: trans->ctx.table->use++; nft_clear(trans->ctx.net, nft_trans_set(trans)); + if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -9814,7 +10083,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nf_tables_abort_release(trans); } @@ -10263,22 +10532,12 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { struct nft_chain *chain; - struct nft_rule *rule; switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; chain->use--; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use--; - list_for_each_entry(rule, &chain->rules, list) - chain->use--; - - nft_chain_del(chain); break; } } @@ -10513,6 +10772,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table) list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); table->use--; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(&ctx, set); + nft_set_destroy(&ctx, set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { @@ -10597,6 +10859,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index ee6840bd5933..8f1bfa6ccc2d 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -439,3 +439,4 @@ module_init(nfnl_osf_init); module_exit(nfnl_osf_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e311462f6d98..556bc902af00 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -30,6 +30,7 @@ #include <linux/netfilter/nf_conntrack_common.h> #include <linux/list.h> #include <linux/cgroup-defs.h> +#include <net/gso.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/netfilter/nf_queue.h> diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 2527a01486ef..ca857afbf061 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -86,7 +86,7 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, [NFTA_BITWISE_XOR] = { .type = NLA_NESTED }, - [NFTA_BITWISE_OP] = { .type = NLA_U32 }, + [NFTA_BITWISE_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BITWISE_DATA] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index b66647a5a171..9a85e797ed58 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -88,9 +88,9 @@ void nft_byteorder_eval(const struct nft_expr *expr, static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, - [NFTA_BYTEORDER_OP] = { .type = NLA_U32 }, - [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 }, - [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_OP] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_BYTEORDER_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_BYTEORDER_SIZE] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_byteorder_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index b9c84499438b..38958e067aa8 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -332,7 +332,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr, static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_DREG] = { .type = NLA_U32 }, - [NFTA_CT_KEY] = { .type = NLA_U32 }, + [NFTA_CT_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, [NFTA_CT_SREG] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index bd19c7aec92e..4fb34d76dbea 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -148,7 +148,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 }, - [NFTA_DYNSET_OP] = { .type = NLA_U32 }, + [NFTA_DYNSET_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 }, [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index a54a7f772cec..7f856ceb3a66 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -10,6 +10,7 @@ #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> +#include <linux/dccp.h> #include <linux/sctp.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> @@ -406,13 +407,89 @@ err: regs->verdict.code = NFT_BREAK; } +static void nft_exthdr_dccp_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + unsigned int thoff, dataoff, optoff, optlen, i; + u32 *dest = ®s->data[priv->dreg]; + const struct dccp_hdr *dh; + struct dccp_hdr _dh; + + if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff) + goto err; + + thoff = nft_thoff(pkt); + + dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh); + if (!dh) + goto err; + + dataoff = dh->dccph_doff * sizeof(u32); + optoff = __dccp_hdr_len(dh); + if (dataoff <= optoff) + goto err; + + optlen = dataoff - optoff; + + for (i = 0; i < optlen; ) { + /* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in + * the length; the remaining options are at least 2B long. In + * all cases, the first byte contains the option type. In + * multi-byte options, the second byte contains the option + * length, which must be at least two: 1 for the type plus 1 for + * the length plus 0-253 for any following option data. We + * aren't interested in the option data, only the type and the + * length, so we don't need to read more than two bytes at a + * time. + */ + unsigned int buflen = optlen - i; + u8 buf[2], *bufp; + u8 type, len; + + if (buflen > sizeof(buf)) + buflen = sizeof(buf); + + bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen, + &buf); + if (!bufp) + goto err; + + type = bufp[0]; + + if (type == priv->type) { + *dest = 1; + return; + } + + if (type <= DCCPO_MAX_RESERVED) { + i++; + continue; + } + + if (buflen < 2) + goto err; + + len = bufp[1]; + + if (len < 2) + goto err; + + i += len; + } + +err: + *dest = 0; +} + static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, - [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, + [NFTA_EXTHDR_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, - [NFTA_EXTHDR_OP] = { .type = NLA_U32 }, + [NFTA_EXTHDR_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_EXTHDR_SREG] = { .type = NLA_U32 }, }; @@ -557,6 +634,22 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, return 0; } +static int nft_exthdr_dccp_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + int err = nft_exthdr_init(ctx, expr, tb); + + if (err < 0) + return err; + + if (!(priv->flags & NFT_EXTHDR_F_PRESENT)) + return -EOPNOTSUPP; + + return 0; +} + static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv) { if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) @@ -686,6 +779,15 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = { .reduce = nft_exthdr_reduce, }; +static const struct nft_expr_ops nft_exthdr_dccp_ops = { + .type = &nft_exthdr_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .eval = nft_exthdr_dccp_eval, + .init = nft_exthdr_dccp_init, + .dump = nft_exthdr_dump, + .reduce = nft_exthdr_reduce, +}; + static const struct nft_expr_ops * nft_exthdr_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -720,6 +822,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_sctp_ops; break; + case NFT_EXTHDR_OP_DCCP: + if (tb[NFTA_EXTHDR_DREG]) + return &nft_exthdr_dccp_ops; + break; } return ERR_PTR(-EOPNOTSUPP); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index e860d8fe0e5e..5ef9146e74ad 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -250,9 +250,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, break; } + if (!dst_hold_safe(this_dst)) + return -ENOENT; + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); - if (!other_dst) + if (!other_dst) { + dst_release(this_dst); return -ENOENT; + } nft_default_forward_path(route, this_dst, dir); nft_default_forward_path(route, other_dst, !dir); @@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (!flow) goto err_flow_alloc; - if (flow_offload_route_init(flow, &route) < 0) - goto err_flow_add; + flow_offload_route_init(flow, &route); if (tcph) { ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; @@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (ret < 0) goto err_flow_add; - dst_release(route.tuple[!dir].dst); return; err_flow_add: flow_offload_free(flow); err_flow_alloc: + dst_release(route.tuple[dir].dst); dst_release(route.tuple[!dir].dst); err_flow_route: clear_bit(IPS_OFFLOAD_BIT, &ct->status); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 7b9d4d1bd17c..a5268e6dd32f 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -40,7 +40,7 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr, static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { [NFTA_FWD_SREG_DEV] = { .type = NLA_U32 }, [NFTA_FWD_SREG_ADDR] = { .type = NLA_U32 }, - [NFTA_FWD_NFPROTO] = { .type = NLA_U32 }, + [NFTA_FWD_NFPROTO] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_fwd_netdev_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index ee8d487b69c0..92d47e469204 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -59,7 +59,7 @@ static void nft_symhash_eval(const struct nft_expr *expr, static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SREG] = { .type = NLA_U32 }, [NFTA_HASH_DREG] = { .type = NLA_U32 }, - [NFTA_HASH_LEN] = { .type = NLA_U32 }, + [NFTA_HASH_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index c9d2f7c29f53..3d76ebfe8939 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -76,11 +76,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx, switch (priv->data.verdict.code) { case NFT_JUMP: case NFT_GOTO: - if (nft_chain_is_bound(chain)) { - err = -EBUSY; - goto err1; - } - chain->bound = true; + err = nf_tables_bind_chain(ctx, chain); + if (err < 0) + return err; break; default: break; @@ -98,6 +96,31 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + const struct nft_data *data = &priv->data; + struct nft_ctx chain_ctx; + struct nft_chain *chain; + struct nft_rule *rule; + + if (priv->dreg == NFT_REG_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; + if (!nft_chain_binding(chain)) + break; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_activate(&chain_ctx, rule); + + nft_clear(ctx->net, chain); + break; + default: + break; + } + } return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); } @@ -107,6 +130,43 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + const struct nft_data *data = &priv->data; + struct nft_ctx chain_ctx; + struct nft_chain *chain; + struct nft_rule *rule; + + if (priv->dreg == NFT_REG_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; + if (!nft_chain_binding(chain)) + break; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_deactivate(&chain_ctx, rule, phase); + + switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nf_tables_unbind_chain(ctx, chain); + fallthrough; + case NFT_TRANS_PREPARE: + nft_deactivate_next(ctx->net, chain); + break; + default: + nft_chain_del(chain); + chain->bound = false; + chain->table->use--; + break; + } + break; + default: + break; + } + } if (phase == NFT_TRANS_COMMIT) return; @@ -131,15 +191,27 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, case NFT_GOTO: chain = data->verdict.chain; - if (!nft_chain_is_bound(chain)) + if (!nft_chain_binding(chain)) + break; + + /* Rule construction failed, but chain is already bound: + * let the transaction records release this chain and its rules. + */ + if (chain->bound) { + chain->use--; break; + } + /* Rule has been deleted, release chain and its rules. */ chain_ctx = *ctx; chain_ctx.chain = chain; - list_for_each_entry_safe(rule, n, &chain->rules, list) - nf_tables_rule_release(&chain_ctx, rule); - + chain->use--; + list_for_each_entry_safe(rule, n, &chain->rules, list) { + chain->use--; + list_del(&rule->list); + nf_tables_rule_destroy(&chain_ctx, rule); + } nf_tables_chain_destroy(&chain_ctx); break; default: diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 03ef4fdaa460..29ac48cdd6db 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -19,6 +19,7 @@ struct nft_lookup { struct nft_set *set; u8 sreg; u8 dreg; + bool dreg_set; bool invert; struct nft_set_binding binding; }; @@ -75,7 +76,7 @@ void nft_lookup_eval(const struct nft_expr *expr, } if (ext) { - if (set->flags & NFT_SET_MAP) + if (priv->dreg_set) nft_data_copy(®s->data[priv->dreg], nft_set_ext_data(ext), set->dlen); @@ -122,11 +123,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (flags & ~NFT_LOOKUP_F_INV) return -EINVAL; - if (flags & NFT_LOOKUP_F_INV) { - if (set->flags & NFT_SET_MAP) - return -EINVAL; + if (flags & NFT_LOOKUP_F_INV) priv->invert = true; - } } if (tb[NFTA_LOOKUP_DREG] != NULL) { @@ -140,8 +138,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx, set->dlen); if (err < 0) return err; - } else if (set->flags & NFT_SET_MAP) - return -EINVAL; + priv->dreg_set = true; + } else if (set->flags & NFT_SET_MAP) { + /* Map given, but user asks for lookup only (i.e. to + * ignore value assoicated with key). + * + * This makes no sense for anonymous maps since they are + * scoped to the rule, but for named sets this can be useful. + */ + if (set->flags & NFT_SET_ANONYMOUS) + return -EINVAL; + } priv->binding.flags = set->flags & NFT_SET_MAP; @@ -188,7 +195,7 @@ static int nft_lookup_dump(struct sk_buff *skb, goto nla_put_failure; if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg)) goto nla_put_failure; - if (priv->set->flags & NFT_SET_MAP) + if (priv->dreg_set) if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags))) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e384e0de7a54..8fdc7318c03c 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -458,7 +458,7 @@ EXPORT_SYMBOL_GPL(nft_meta_set_eval); const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, - [NFTA_META_KEY] = { .type = NLA_U32 }, + [NFTA_META_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_META_SREG] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(nft_meta_policy); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 3a3c7746e88f..8cb800989947 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -171,7 +171,8 @@ void nft_payload_eval(const struct nft_expr *expr, if (!skb_mac_header_was_set(skb)) goto err; - if (skb_vlan_tag_present(skb)) { + if (skb_vlan_tag_present(skb) && + priv->offset >= offsetof(struct ethhdr, h_proto)) { if (!nft_payload_copy_vlan(dest, skb, priv->offset, priv->len)) goto err; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 0566d6aaf1e5..51ae64cd268f 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -42,7 +42,7 @@ void nft_range_eval(const struct nft_expr *expr, static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { [NFTA_RANGE_SREG] = { .type = NLA_U32 }, - [NFTA_RANGE_OP] = { .type = NLA_U32 }, + [NFTA_RANGE_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED }, [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index f2addc844dd2..ed2e668474d6 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -18,7 +18,7 @@ #include <linux/icmpv6.h> const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { - [NFTA_REJECT_TYPE] = { .type = NLA_U32 }, + [NFTA_REJECT_TYPE] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 }, }; EXPORT_SYMBOL_GPL(nft_reject_policy); diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 5990fdd7b3cc..35a2c28caa60 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -104,7 +104,7 @@ err: static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { [NFTA_RT_DREG] = { .type = NLA_U32 }, - [NFTA_RT_KEY] = { .type = NLA_U32 }, + [NFTA_RT_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_rt_get_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 96081ac8d2b4..1e5e7a181e0b 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -271,13 +271,14 @@ static int nft_bitmap_init(const struct nft_set *set, return 0; } -static void nft_bitmap_destroy(const struct nft_set *set) +static void nft_bitmap_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) - nft_set_elem_destroy(set, be, true); + nf_tables_set_elem_destroy(ctx, set, be); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 76de6c8d9865..0b73cb0e752f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -400,19 +400,31 @@ static int nft_rhash_init(const struct nft_set *set, return 0; } +struct nft_rhash_ctx { + const struct nft_ctx ctx; + const struct nft_set *set; +}; + static void nft_rhash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy(arg, ptr, true); + struct nft_rhash_ctx *rhash_ctx = arg; + + nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr); } -static void nft_rhash_destroy(const struct nft_set *set) +static void nft_rhash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_ctx rhash_ctx = { + .ctx = *ctx, + .set = set, + }; cancel_delayed_work_sync(&priv->gc_work); rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, - (void *)set); + (void *)&rhash_ctx); } /* Number of buckets is stored in u32, so cap our result to 1U<<31 */ @@ -643,7 +655,8 @@ static int nft_hash_init(const struct nft_set *set, return 0; } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; @@ -653,7 +666,7 @@ static void nft_hash_destroy(const struct nft_set *set) for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); - nft_set_elem_destroy(set, he, true); + nf_tables_set_elem_destroy(ctx, set, he); } } } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 15e451dc3fc4..db526cb7a485 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1274,8 +1274,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) struct nft_pipapo_match *new; int i; - new = kmalloc(sizeof(*new) + sizeof(*dst) * old->field_count, - GFP_KERNEL); + new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL); if (!new) return ERR_PTR(-ENOMEM); @@ -1974,12 +1973,16 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_pipapo *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->net); struct nft_pipapo_match *m; struct nft_pipapo_field *f; int i, r; rcu_read_lock(); - m = rcu_dereference(priv->match); + if (iter->genmask == nft_genmask_cur(net)) + m = rcu_dereference(priv->match); + else + m = priv->clone; if (unlikely(!m)) goto out; @@ -2080,8 +2083,7 @@ static int nft_pipapo_init(const struct nft_set *set, if (field_count > NFT_PIPAPO_MAX_FIELDS) return -EINVAL; - m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count, - GFP_KERNEL); + m = kmalloc(struct_size(m, f, field_count), GFP_KERNEL); if (!m) return -ENOMEM; @@ -2148,10 +2150,12 @@ out_scratch: /** * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array + * @ctx: context * @set: nftables API set representation * @m: matching data pointing to key mapping array */ -static void nft_set_pipapo_match_destroy(const struct nft_set *set, +static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo_field *f; @@ -2168,15 +2172,17 @@ static void nft_set_pipapo_match_destroy(const struct nft_set *set, e = f->mt[r].e; - nft_set_elem_destroy(set, e, true); + nf_tables_set_elem_destroy(ctx, set, e); } } /** * nft_pipapo_destroy() - Free private data for set and all committed elements + * @ctx: context * @set: nftables API set representation */ -static void nft_pipapo_destroy(const struct nft_set *set) +static void nft_pipapo_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; @@ -2186,7 +2192,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) if (m) { rcu_barrier(); - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); @@ -2203,7 +2209,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) m = priv->clone; if (priv->dirty) - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(priv->clone->scratch_aligned); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 2f114aa10f1a..5c05c9b990fb 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -664,7 +664,8 @@ static int nft_rbtree_init(const struct nft_set *set, return 0; } -static void nft_rbtree_destroy(const struct nft_set *set) +static void nft_rbtree_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; @@ -675,7 +676,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe, true); + nf_tables_set_elem_destroy(ctx, set, rbe); } } diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 85f8df87efda..84def74698b7 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -138,9 +138,9 @@ static void nft_socket_eval(const struct nft_expr *expr, } static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { - [NFTA_SOCKET_KEY] = { .type = NLA_U32 }, + [NFTA_SOCKET_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_SOCKET_DREG] = { .type = NLA_U32 }, - [NFTA_SOCKET_LEVEL] = { .type = NLA_U32 }, + [NFTA_SOCKET_LEVEL] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_socket_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index ea83f661417e..ae15cd693f0e 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -183,7 +183,7 @@ static void nft_tproxy_eval(const struct nft_expr *expr, } static const struct nla_policy nft_tproxy_policy[NFTA_TPROXY_MAX + 1] = { - [NFTA_TPROXY_FAMILY] = { .type = NLA_U32 }, + [NFTA_TPROXY_FAMILY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_TPROXY_REG_ADDR] = { .type = NLA_U32 }, [NFTA_TPROXY_REG_PORT] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index b059aa541798..9f21953c7433 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -66,9 +66,9 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, } static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = { - [NFTA_TUNNEL_KEY] = { .type = NLA_U32 }, + [NFTA_TUNNEL_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_TUNNEL_DREG] = { .type = NLA_U32 }, - [NFTA_TUNNEL_MODE] = { .type = NLA_U32 }, + [NFTA_TUNNEL_MODE] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_tunnel_get_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index c88fd078a9ae..452f8587adda 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -16,9 +16,9 @@ #include <net/xfrm.h> static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = { - [NFTA_XFRM_KEY] = { .type = NLA_U32 }, + [NFTA_XFRM_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_XFRM_DIR] = { .type = NLA_U8 }, - [NFTA_XFRM_SPNUM] = { .type = NLA_U32 }, + [NFTA_XFRM_SPNUM] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_XFRM_DREG] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index e1990baf3a3b..dc9485854002 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -71,4 +71,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); MODULE_DESCRIPTION("Passive OS fingerprint matching."); MODULE_ALIAS("ipt_osf"); MODULE_ALIAS("ip6t_osf"); -MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 9f80972ae39b..7eaa35fdd9bd 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -57,8 +57,8 @@ struct netlbl_domaddr6_map { struct netlbl_dom_map { char *domain; - u16 family; struct netlbl_dommap_def def; + u16 family; u32 valid; struct list_head list; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3a1e0fd5bf14..383631873748 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1600,6 +1600,7 @@ out: int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; + unsigned long flags; struct sock *sk; int ret = 0; @@ -1609,12 +1610,12 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) /* sk->sk_err wants a positive error value */ info.code = -code; - read_lock(&nl_table_lock); + read_lock_irqsave(&nl_table_lock, flags); sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) ret += do_one_set_err(sk, &info); - read_unlock(&nl_table_lock); + read_unlock_irqrestore(&nl_table_lock, flags); return ret; } EXPORT_SYMBOL(netlink_set_err); @@ -2360,7 +2361,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK); if (control->start) { + cb->extack = control->extack; ret = control->start(cb); + cb->extack = NULL; if (ret) goto error_put; } @@ -2813,7 +2816,6 @@ static const struct proto_ops netlink_ops = { .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static const struct net_proto_family netlink_family_ops = { diff --git a/net/netlink/diag.c b/net/netlink/diag.c index c6255eac305c..e4f21b1067bc 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -94,6 +94,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; + unsigned long flags; struct sock *sk; int num = 2; int ret = 0; @@ -152,7 +153,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, num++; mc_list: - read_lock(&nl_table_lock); + read_lock_irqsave(&nl_table_lock, flags); sk_for_each_bound(sk, &tbl->mc_list) { if (sk_hashed(sk)) continue; @@ -167,13 +168,13 @@ mc_list: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - sock_i_ino(sk)) < 0) { + __sock_i_ino(sk)) < 0) { ret = 1; break; } num++; } - read_unlock(&nl_table_lock); + read_unlock_irqrestore(&nl_table_lock, flags); done: cb->args[0] = num; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 04c4036bf406..a157247a1e45 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -912,6 +912,7 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family, .start = genl_start, .dump = genl_lock_dumpit, .done = genl_lock_done, + .extack = extack, }; genl_unlock(); @@ -924,6 +925,7 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family, .start = genl_start, .dump = ops->dumpit, .done = genl_parallel_done, + .extack = extack, }; err = __netlink_dump_start(net->genl_sock, skb, nlh, &c); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 5a4cb796150f..eb8ccbd58df7 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1364,7 +1364,6 @@ static const struct proto_ops nr_proto_ops = { .sendmsg = nr_sendmsg, .recvmsg = nr_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct notifier_block nr_dev_notifier = { diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index c1d9be636933..d8345ed57c95 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -201,7 +201,6 @@ void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *s); void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *s); void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); -struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local); int nfc_llcp_local_put(struct nfc_llcp_local *local); u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, struct nfc_llcp_sock *sock); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 41e3a20c8935..e2680a3bef79 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -359,6 +359,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) struct sk_buff *skb; struct nfc_llcp_local *local; u16 size = 0; + int err; local = nfc_llcp_find_local(dev); if (local == NULL) @@ -368,8 +369,10 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; skb = alloc_skb(size, GFP_KERNEL); - if (skb == NULL) - return -ENOMEM; + if (skb == NULL) { + err = -ENOMEM; + goto out; + } skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE); @@ -379,8 +382,11 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX); - return nfc_data_exchange(dev, local->target_idx, skb, + err = nfc_data_exchange(dev, local->target_idx, skb, nfc_llcp_recv, local); +out: + nfc_llcp_local_put(local); + return err; } int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) @@ -390,7 +396,8 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) const u8 *service_name_tlv = NULL; const u8 *miux_tlv = NULL; const u8 *rw_tlv = NULL; - u8 service_name_tlv_length, miux_tlv_length, rw_tlv_length, rw; + u8 service_name_tlv_length = 0; + u8 miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index a27e1842b2a0..f60e424e0607 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -17,6 +17,8 @@ static u8 llcp_magic[3] = {0x46, 0x66, 0x6d}; static LIST_HEAD(llcp_devices); +/* Protects llcp_devices list */ +static DEFINE_SPINLOCK(llcp_devices_lock); static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb); @@ -141,7 +143,7 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device, write_unlock(&local->raw_sockets.lock); } -struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) +static struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) { kref_get(&local->ref); @@ -169,7 +171,6 @@ static void local_release(struct kref *ref) local = container_of(ref, struct nfc_llcp_local, ref); - list_del(&local->list); local_cleanup(local); kfree(local); } @@ -282,12 +283,33 @@ static void nfc_llcp_sdreq_timer(struct timer_list *t) struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) { struct nfc_llcp_local *local; + struct nfc_llcp_local *res = NULL; + spin_lock(&llcp_devices_lock); list_for_each_entry(local, &llcp_devices, list) - if (local->dev == dev) + if (local->dev == dev) { + res = nfc_llcp_local_get(local); + break; + } + spin_unlock(&llcp_devices_lock); + + return res; +} + +static struct nfc_llcp_local *nfc_llcp_remove_local(struct nfc_dev *dev) +{ + struct nfc_llcp_local *local, *tmp; + + spin_lock(&llcp_devices_lock); + list_for_each_entry_safe(local, tmp, &llcp_devices, list) + if (local->dev == dev) { + list_del(&local->list); + spin_unlock(&llcp_devices_lock); return local; + } + spin_unlock(&llcp_devices_lock); - pr_debug("No device found\n"); + pr_warn("Shutting down device not found\n"); return NULL; } @@ -608,12 +630,15 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len) *general_bytes_len = local->gb_len; + nfc_llcp_local_put(local); + return local->gb; } int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { struct nfc_llcp_local *local; + int err; if (gb_len < 3 || gb_len > NFC_MAX_GT_LEN) return -EINVAL; @@ -630,12 +655,16 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len) if (memcmp(local->remote_gb, llcp_magic, 3)) { pr_err("MAC does not support LLCP\n"); - return -EINVAL; + err = -EINVAL; + goto out; } - return nfc_llcp_parse_gb_tlv(local, + err = nfc_llcp_parse_gb_tlv(local, &local->remote_gb[3], local->remote_gb_len - 3); +out: + nfc_llcp_local_put(local); + return err; } static u8 nfc_llcp_dsap(const struct sk_buff *pdu) @@ -1517,6 +1546,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) __nfc_llcp_recv(local, skb); + nfc_llcp_local_put(local); + return 0; } @@ -1533,6 +1564,8 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev) /* Close and purge all existing sockets */ nfc_llcp_socket_release(local, true, 0); + + nfc_llcp_local_put(local); } void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, @@ -1558,6 +1591,8 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, mod_timer(&local->link_timer, jiffies + msecs_to_jiffies(local->remote_lto)); } + + nfc_llcp_local_put(local); } int nfc_llcp_register_device(struct nfc_dev *ndev) @@ -1608,7 +1643,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) void nfc_llcp_unregister_device(struct nfc_dev *dev) { - struct nfc_llcp_local *local = nfc_llcp_find_local(dev); + struct nfc_llcp_local *local = nfc_llcp_remove_local(dev); if (local == NULL) { pr_debug("No such device\n"); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 77642d18a3b4..645677f84dba 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -99,7 +99,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) } llcp_sock->dev = dev; - llcp_sock->local = nfc_llcp_local_get(local); + llcp_sock->local = local; llcp_sock->nfc_protocol = llcp_addr.nfc_protocol; llcp_sock->service_name_len = min_t(unsigned int, llcp_addr.service_name_len, @@ -186,7 +186,7 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, } llcp_sock->dev = dev; - llcp_sock->local = nfc_llcp_local_get(local); + llcp_sock->local = local; llcp_sock->nfc_protocol = llcp_addr.nfc_protocol; nfc_llcp_sock_link(&local->raw_sockets, sk); @@ -696,22 +696,22 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, if (dev->dep_link_up == false) { ret = -ENOLINK; device_unlock(&dev->dev); - goto put_dev; + goto sock_llcp_put_local; } device_unlock(&dev->dev); if (local->rf_mode == NFC_RF_INITIATOR && addr->target_idx != local->target_idx) { ret = -ENOLINK; - goto put_dev; + goto sock_llcp_put_local; } llcp_sock->dev = dev; - llcp_sock->local = nfc_llcp_local_get(local); + llcp_sock->local = local; llcp_sock->ssap = nfc_llcp_get_local_ssap(local); if (llcp_sock->ssap == LLCP_SAP_MAX) { ret = -ENOMEM; - goto sock_llcp_put_local; + goto sock_llcp_nullify; } llcp_sock->reserved_ssap = llcp_sock->ssap; @@ -757,11 +757,13 @@ sock_unlink: sock_llcp_release: nfc_llcp_put_ssap(local, llcp_sock->ssap); -sock_llcp_put_local: - nfc_llcp_local_put(llcp_sock->local); +sock_llcp_nullify: llcp_sock->local = NULL; llcp_sock->dev = NULL; +sock_llcp_put_local: + nfc_llcp_local_put(local); + put_dev: nfc_put_device(dev); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index b9264e730fd9..e9ac6a6f934e 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1039,11 +1039,14 @@ static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info) msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { rc = -ENOMEM; - goto exit; + goto put_local; } rc = nfc_genl_send_params(msg, local, info->snd_portid, info->snd_seq); +put_local: + nfc_llcp_local_put(local); + exit: device_unlock(&dev->dev); @@ -1105,7 +1108,7 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NFC_ATTR_LLC_PARAM_LTO]) { if (dev->dep_link_up) { rc = -EINPROGRESS; - goto exit; + goto put_local; } local->lto = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_LTO]); @@ -1117,6 +1120,9 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) local->miux = cpu_to_be16(miux); +put_local: + nfc_llcp_local_put(local); + exit: device_unlock(&dev->dev); @@ -1172,7 +1178,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) if (rc != 0) { rc = -EINVAL; - goto exit; + goto put_local; } if (!sdp_attrs[NFC_SDP_ATTR_URI]) @@ -1191,7 +1197,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len); if (sdreq == NULL) { rc = -ENOMEM; - goto exit; + goto put_local; } tlvs_len += sdreq->tlv_len; @@ -1201,10 +1207,14 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) if (hlist_empty(&sdreq_list)) { rc = -EINVAL; - goto exit; + goto put_local; } rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len); + +put_local: + nfc_llcp_local_put(local); + exit: device_unlock(&dev->dev); diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index de2ec66d7e83..0b1e6466f4fb 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -52,6 +52,7 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); +int nfc_llcp_local_put(struct nfc_llcp_local *local); int __init nfc_llcp_init(void); void nfc_llcp_exit(void); void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 0f23e5e8e03e..f4a38bd6a7e0 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> +#include <net/gso.h> #include <net/nsh.h> #include <net/tun_proto.h> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index a8cf9a88758e..cab1e02b63e0 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -17,6 +17,7 @@ #include <linux/if_vlan.h> #include <net/dst.h> +#include <net/gso.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_fib.h> @@ -1072,8 +1073,16 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, struct ovs_action_hash *hash_act = nla_data(attr); u32 hash = 0; - /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */ - hash = skb_get_hash(skb); + if (hash_act->hash_alg == OVS_HASH_ALG_L4) { + /* OVS_HASH_ALG_L4 hasing type. */ + hash = skb_get_hash(skb); + } else if (hash_act->hash_alg == OVS_HASH_ALG_SYM_L4) { + /* OVS_HASH_ALG_SYM_L4 hashing type. NOTE: this doesn't + * extend past an encapsulated header. + */ + hash = __skb_get_hash_symmetric(skb); + } + hash = jhash_1word(hash, hash_act->hash_basis); if (!hash) hash = 0x1; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 58f530f60172..a6d2a0b1aa21 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -35,6 +35,7 @@ #include <linux/rculist.h> #include <linux/dmi.h> #include <net/genetlink.h> +#include <net/gso.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/pkt_cls.h> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ead5418c126e..41116361433d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -3221,6 +3221,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, switch (act_hash->hash_alg) { case OVS_HASH_ALG_L4: + fallthrough; + case OVS_HASH_ALG_SYM_L4: break; default: return -EINVAL; diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index f2698d2316df..c4ebf810e4b1 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -69,9 +69,7 @@ static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size) { struct dp_meter_instance *ti; - ti = kvzalloc(sizeof(*ti) + - sizeof(struct dp_meter *) * size, - GFP_KERNEL); + ti = kvzalloc(struct_size(ti, dp_meters, size), GFP_KERNEL); if (!ti) return NULL; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a2dbeb264f26..85ff90a03b0c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4621,7 +4621,6 @@ static const struct proto_ops packet_ops_spkt = { .sendmsg = packet_sendmsg_spkt, .recvmsg = packet_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static const struct proto_ops packet_ops = { @@ -4643,7 +4642,6 @@ static const struct proto_ops packet_ops = { .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, - .sendpage = sock_no_sendpage, }; static const struct net_proto_family packet_family_ops = { diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index ff5f49ab236e..3aa50dc7535b 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -28,24 +28,21 @@ static void pn_sock_close(struct sock *sk, long timeout) sk_common_release(sk); } -static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int pn_ioctl(struct sock *sk, int cmd, int *karg) { struct sk_buff *skb; - int answ; switch (cmd) { case SIOCINQ: lock_sock(sk); skb = skb_peek(&sk->sk_receive_queue); - answ = skb ? skb->len : 0; + *karg = skb ? skb->len : 0; release_sock(sk); - return put_user(answ, (int __user *)arg); + return 0; case SIOCPNADDRESOURCE: case SIOCPNDELRESOURCE: { - u32 res; - if (get_user(res, (u32 __user *)arg)) - return -EFAULT; + u32 res = *karg; if (res >= 256) return -EINVAL; if (cmd == SIOCPNADDRESOURCE) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 83ea13a50690..faba31f2eff2 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,10 +917,9 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } -static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int pep_ioctl(struct sock *sk, int cmd, int *karg) { struct pep_sock *pn = pep_sk(sk); - int answ; int ret = -ENOIOCTLCMD; switch (cmd) { @@ -933,13 +932,13 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) lock_sock(sk); if (sock_flag(sk, SOCK_URGINLINE) && !skb_queue_empty(&pn->ctrlreq_queue)) - answ = skb_peek(&pn->ctrlreq_queue)->len; + *karg = skb_peek(&pn->ctrlreq_queue)->len; else if (!skb_queue_empty(&sk->sk_receive_queue)) - answ = skb_peek(&sk->sk_receive_queue)->len; + *karg = skb_peek(&sk->sk_receive_queue)->len; else - answ = 0; + *karg = 0; release_sock(sk); - ret = put_user(answ, (int __user *)arg); + ret = 0; break; case SIOCPNENABLEPIPE: diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 71e2caf6ab85..1018340d89a7 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -387,7 +387,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, return put_user(handle, (__u16 __user *)arg); } - return sk->sk_prot->ioctl(sk, cmd, arg); + return sk_ioctl(sk, cmd, (void __user *)arg); } static int pn_socket_listen(struct socket *sock, int backlog) @@ -441,7 +441,6 @@ const struct proto_ops phonet_dgram_ops = { .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; const struct proto_ops phonet_stream_ops = { @@ -462,7 +461,6 @@ const struct proto_ops phonet_stream_ops = { .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; EXPORT_SYMBOL(phonet_stream_ops); diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 76f0434d3d06..78beb74146e7 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -1244,7 +1244,6 @@ static const struct proto_ops qrtr_proto_ops = { .shutdown = sock_no_shutdown, .release = qrtr_release, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto qrtr_proto = { diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 0f25a386138c..0f7a729f1a1f 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -783,7 +783,7 @@ int qrtr_ns_init(void) goto err_sock; } - qrtr_ns.workqueue = alloc_workqueue("qrtr_ns_handler", WQ_UNBOUND, 1); + qrtr_ns.workqueue = alloc_ordered_workqueue("qrtr_ns_handler", 0); if (!qrtr_ns.workqueue) { ret = -ENOMEM; goto err_sock; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 3ff6995244e5..01c4cdfef45d 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -653,7 +653,6 @@ static const struct proto_ops rds_proto_ops = { .sendmsg = rds_sendmsg, .recvmsg = rds_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static void rds_sock_destruct(struct sock *sk) diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 8c4d1d6e9249..7d284ac7e81a 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -72,9 +72,10 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, { struct rds_conn_path *cp = rm->m_inc.i_conn_path; struct rds_tcp_connection *tc = cp->cp_transport_data; + struct msghdr msg = {}; + struct bio_vec bvec; int done = 0; int ret = 0; - int more; if (hdr_off == 0) { /* @@ -111,15 +112,17 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, goto out; } - more = rm->data.op_nents > 1 ? (MSG_MORE | MSG_SENDPAGE_NOTLAST) : 0; while (sg < rm->data.op_nents) { - int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more; - - ret = tc->t_sock->ops->sendpage(tc->t_sock, - sg_page(&rm->data.op_sg[sg]), - rm->data.op_sg[sg].offset + off, - rm->data.op_sg[sg].length - off, - flags); + msg.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | MSG_NOSIGNAL; + if (sg + 1 < rm->data.op_nents) + msg.msg_flags |= MSG_MORE; + + bvec_set_page(&bvec, sg_page(&rm->data.op_sg[sg]), + rm->data.op_sg[sg].length - off, + rm->data.op_sg[sg].offset + off); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, + rm->data.op_sg[sg].length - off); + ret = sock_sendmsg(tc->t_sock, &msg); rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]), rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off, ret); @@ -132,8 +135,6 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, off = 0; sg++; } - if (sg == rm->data.op_nents - 1) - more = 0; } out: diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ca2b17f32670..49dafe9ac72f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1496,7 +1496,6 @@ static const struct proto_ops rose_proto_ops = { .sendmsg = rose_sendmsg, .recvmsg = rose_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct notifier_block rose_dev_notifier = { diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index da0b3b5157d5..fa8aec78f63d 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -954,7 +954,6 @@ static const struct proto_ops rxrpc_rpc_ops = { .sendmsg = rxrpc_sendmsg, .recvmsg = rxrpc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto rxrpc_proto = { @@ -989,7 +988,7 @@ static int __init af_rxrpc_init(void) goto error_call_jar; } - rxrpc_workqueue = alloc_workqueue("krxrpcd", WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1); + rxrpc_workqueue = alloc_ordered_workqueue("krxrpcd", WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!rxrpc_workqueue) { pr_notice("Failed to allocate work queue\n"); goto error_work_queue; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index c819b812a899..b562fc2bb5b1 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -245,14 +245,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, nparms->tcfp_flags = parm->flags; nparms->tcfp_nkeys = parm->nkeys; - nparms->tcfp_keys = kmalloc(ksize, GFP_KERNEL); + nparms->tcfp_keys = kmemdup(parm->keys, ksize, GFP_KERNEL); if (!nparms->tcfp_keys) { ret = -ENOMEM; goto put_chain; } - memcpy(nparms->tcfp_keys, parm->keys, ksize); - for (i = 0; i < nparms->tcfp_nkeys; ++i) { u32 offmask = nparms->tcfp_keys[i].offmask; u32 cur = nparms->tcfp_keys[i].off; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 2e9dce03d1ec..f3121c5a85e9 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <net/act_api.h> +#include <net/gso.h> #include <net/netlink.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_police.h> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 815c3e416bc5..56065cc5a661 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -11,6 +11,7 @@ #include <linux/rhashtable.h> #include <linux/workqueue.h> #include <linux/refcount.h> +#include <linux/bitfield.h> #include <linux/if_ether.h> #include <linux/in6.h> @@ -71,6 +72,7 @@ struct fl_flow_key { struct flow_dissector_key_num_of_vlans num_of_vlans; struct flow_dissector_key_pppoe pppoe; struct flow_dissector_key_l2tpv3 l2tpv3; + struct flow_dissector_key_cfm cfm; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -120,6 +122,7 @@ struct cls_fl_filter { u32 handle; u32 flags; u32 in_hw_count; + u8 needs_tc_skb_ext:1; struct rcu_work rwork; struct net_device *hw_dev; /* Flower classifier is unlocked, which means that its reference counter @@ -415,6 +418,8 @@ static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp) static void __fl_destroy_filter(struct cls_fl_filter *f) { + if (f->needs_tc_skb_ext) + tc_skb_ext_tc_disable(); tcf_exts_destroy(&f->exts); tcf_exts_put_net(&f->exts); kfree(f); @@ -615,7 +620,8 @@ static void *fl_get(struct tcf_proto *tp, u32 handle) } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { - [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC }, + [TCA_FLOWER_UNSPEC] = { .strict_start_type = + TCA_FLOWER_L2_MISS }, [TCA_FLOWER_CLASSID] = { .type = NLA_U32 }, [TCA_FLOWER_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ }, @@ -720,7 +726,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 }, - + [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), + [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, }; static const struct nla_policy @@ -769,6 +776,12 @@ mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = { [TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 }, }; +static const struct nla_policy cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX] = { + [TCA_FLOWER_KEY_CFM_MD_LEVEL] = NLA_POLICY_MAX(NLA_U8, + FLOW_DIS_CFM_MDL_MAX), + [TCA_FLOWER_KEY_CFM_OPCODE] = { .type = NLA_U8 }, +}; + static void fl_set_key_val(struct nlattr **tb, void *val, int val_type, void *mask, int mask_type, int len) @@ -1656,6 +1669,53 @@ static bool is_vlan_key(struct nlattr *tb, __be16 *ethertype, return false; } +static void fl_set_key_cfm_md_level(struct nlattr **tb, + struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + u8 level; + + if (!tb[TCA_FLOWER_KEY_CFM_MD_LEVEL]) + return; + + level = nla_get_u8(tb[TCA_FLOWER_KEY_CFM_MD_LEVEL]); + key->cfm.mdl_ver = FIELD_PREP(FLOW_DIS_CFM_MDL_MASK, level); + mask->cfm.mdl_ver = FLOW_DIS_CFM_MDL_MASK; +} + +static void fl_set_key_cfm_opcode(struct nlattr **tb, + struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + fl_set_key_val(tb, &key->cfm.opcode, TCA_FLOWER_KEY_CFM_OPCODE, + &mask->cfm.opcode, TCA_FLOWER_UNSPEC, + sizeof(key->cfm.opcode)); +} + +static int fl_set_key_cfm(struct nlattr **tb, + struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX]; + int err; + + if (!tb[TCA_FLOWER_KEY_CFM]) + return 0; + + err = nla_parse_nested(nla_cfm_opt, TCA_FLOWER_KEY_CFM_OPT_MAX, + tb[TCA_FLOWER_KEY_CFM], cfm_opt_policy, extack); + if (err < 0) + return err; + + fl_set_key_cfm_opcode(nla_cfm_opt, key, mask, extack); + fl_set_key_cfm_md_level(nla_cfm_opt, key, mask, extack); + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -1671,6 +1731,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb, mask->meta.ingress_ifindex = 0xffffffff; } + fl_set_key_val(tb, &key->meta.l2_miss, TCA_FLOWER_L2_MISS, + &mask->meta.l2_miss, TCA_FLOWER_UNSPEC, + sizeof(key->meta.l2_miss)); + fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, sizeof(key->eth.dst)); @@ -1806,6 +1870,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb, TCA_FLOWER_KEY_L2TPV3_SID, &mask->l2tpv3.session_id, TCA_FLOWER_UNSPEC, sizeof(key->l2tpv3.session_id)); + } else if (key->basic.n_proto == htons(ETH_P_CFM)) { + ret = fl_set_key_cfm(tb, key, mask, extack); + if (ret) + return ret; } if (key->basic.ip_proto == IPPROTO_TCP || @@ -1988,6 +2056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_PPPOE, pppoe); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_CFM, cfm); skb_flow_dissector_init(dissector, keys, cnt); } @@ -2088,6 +2158,11 @@ errout_cleanup: return ret; } +static bool fl_needs_tc_skb_ext(const struct fl_flow_key *mask) +{ + return mask->meta.l2_miss; +} + static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, @@ -2124,6 +2199,14 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, return -EINVAL; } + /* Enable tc skb extension if filter matches on data extracted from + * this extension. + */ + if (fl_needs_tc_skb_ext(&mask->key)) { + f->needs_tc_skb_ext = 1; + tc_skb_ext_tc_enable(); + } + return 0; } @@ -3008,6 +3091,43 @@ nla_put_failure: return -EMSGSIZE; } +static int fl_dump_key_cfm(struct sk_buff *skb, + struct flow_dissector_key_cfm *key, + struct flow_dissector_key_cfm *mask) +{ + struct nlattr *opts; + int err; + u8 mdl; + + if (!memchr_inv(mask, 0, sizeof(*mask))) + return 0; + + opts = nla_nest_start(skb, TCA_FLOWER_KEY_CFM); + if (!opts) + return -EMSGSIZE; + + if (FIELD_GET(FLOW_DIS_CFM_MDL_MASK, mask->mdl_ver)) { + mdl = FIELD_GET(FLOW_DIS_CFM_MDL_MASK, key->mdl_ver); + err = nla_put_u8(skb, TCA_FLOWER_KEY_CFM_MD_LEVEL, mdl); + if (err) + goto err_cfm_opts; + } + + if (mask->opcode) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_CFM_OPCODE, key->opcode); + if (err) + goto err_cfm_opts; + } + + nla_nest_end(skb, opts); + + return 0; + +err_cfm_opts: + nla_nest_cancel(skb, opts); + return err; +} + static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, struct flow_dissector_key_enc_opts *enc_opts) { @@ -3077,6 +3197,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, goto nla_put_failure; } + if (fl_dump_key_val(skb, &key->meta.l2_miss, + TCA_FLOWER_L2_MISS, &mask->meta.l2_miss, + TCA_FLOWER_UNSPEC, sizeof(key->meta.l2_miss))) + goto nla_put_failure; + if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, sizeof(key->eth.dst)) || @@ -3290,6 +3415,9 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, sizeof(key->hash.hash))) goto nla_put_failure; + if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 891e007d5c0b..9cff99558694 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -65,6 +65,7 @@ #include <linux/reciprocal_div.h> #include <net/netlink.h> #include <linux/if_vlan.h> +#include <net/gso.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tcp.h> diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 8aef7dd9fb88..325c29041c7d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1814,10 +1814,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); goto failure; } - if (hopt->prio) { - NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); - goto failure; - } } /* Keeping backward compatible with rate_table based iproute2 tc */ @@ -1913,6 +1909,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, TC_HTB_CLASSID_ROOT, .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, .extack = extack, }; err = htb_offload(dev, &offload_opt); @@ -1933,6 +1930,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, TC_H_MIN(parent->common.classid), .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, .extack = extack, }; err = htb_offload(dev, &offload_opt); @@ -2018,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, .classid = cl->common.classid, .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, .extack = extack, }; err = htb_offload(dev, &offload_opt); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6ef3021e1169..38d9aa0cd30e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -21,6 +21,7 @@ #include <linux/reciprocal_div.h> #include <linux/rbtree.h> +#include <net/gso.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> @@ -773,12 +774,10 @@ static void dist_free(struct disttable *d) * signed 16 bit values. */ -static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, - const struct nlattr *attr) +static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) { size_t n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); - spinlock_t *root_lock; struct disttable *d; int i; @@ -793,13 +792,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, for (i = 0; i < n; i++) d->table[i] = data[i]; - root_lock = qdisc_root_sleeping_lock(sch); - - spin_lock_bh(root_lock); - swap(*tbl, d); - spin_unlock_bh(root_lock); - - dist_free(d); + *tbl = d; return 0; } @@ -956,6 +949,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, { struct netem_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_NETEM_MAX + 1]; + struct disttable *delay_dist = NULL; + struct disttable *slot_dist = NULL; struct tc_netem_qopt *qopt; struct clgstate old_clg; int old_loss_model = CLG_RANDOM; @@ -966,6 +961,19 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, if (ret < 0) return ret; + if (tb[TCA_NETEM_DELAY_DIST]) { + ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]); + if (ret) + goto table_free; + } + + if (tb[TCA_NETEM_SLOT_DIST]) { + ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]); + if (ret) + goto table_free; + } + + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; old_loss_model = q->loss_model; @@ -974,26 +982,17 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]); if (ret) { q->loss_model = old_loss_model; - return ret; + q->clg = old_clg; + goto unlock; } } else { q->loss_model = CLG_RANDOM; } - if (tb[TCA_NETEM_DELAY_DIST]) { - ret = get_dist_table(sch, &q->delay_dist, - tb[TCA_NETEM_DELAY_DIST]); - if (ret) - goto get_table_failure; - } - - if (tb[TCA_NETEM_SLOT_DIST]) { - ret = get_dist_table(sch, &q->slot_dist, - tb[TCA_NETEM_SLOT_DIST]); - if (ret) - goto get_table_failure; - } - + if (delay_dist) + swap(q->delay_dist, delay_dist); + if (slot_dist) + swap(q->slot_dist, slot_dist); sch->limit = qopt->limit; q->latency = PSCHED_TICKS2NS(qopt->latency); @@ -1041,15 +1040,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); - return ret; +unlock: + sch_tree_unlock(sch); -get_table_failure: - /* recover clg and loss_model, in case of - * q->clg and q->loss_model were modified - * in get_loss_clg() - */ - q->clg = old_clg; - q->loss_model = old_loss_model; +table_free: + dist_free(delay_dist); + dist_free(slot_dist); return ret; } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index cf0e61ed9225..717ae51d94a0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -20,6 +20,7 @@ #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/time.h> +#include <net/gso.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -27,6 +28,8 @@ #include <net/sock.h> #include <net/tcp.h> +#define TAPRIO_STAT_NOT_SET (~0ULL) + #include "sch_mqprio_lib.h" static LIST_HEAD(taprio_list); @@ -1527,7 +1530,7 @@ static int taprio_enable_offload(struct net_device *dev, "Not enough memory for enabling offload mode"); return -ENOMEM; } - offload->enable = 1; + offload->cmd = TAPRIO_CMD_REPLACE; offload->extack = extack; mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt); offload->mqprio.extack = extack; @@ -1575,7 +1578,7 @@ static int taprio_disable_offload(struct net_device *dev, "Not enough memory to disable offload mode"); return -ENOMEM; } - offload->enable = 0; + offload->cmd = TAPRIO_CMD_DESTROY; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); if (err < 0) { @@ -2292,6 +2295,72 @@ nla_put_failure: return -EMSGSIZE; } +static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype) +{ + if (val == TAPRIO_STAT_NOT_SET) + return 0; + if (nla_put_u64_64bit(skb, attrtype, val, TCA_TAPRIO_OFFLOAD_STATS_PAD)) + return -EMSGSIZE; + return 0; +} + +static int taprio_dump_xstats(struct Qdisc *sch, struct gnet_dump *d, + struct tc_taprio_qopt_offload *offload, + struct tc_taprio_qopt_stats *stats) +{ + struct net_device *dev = qdisc_dev(sch); + const struct net_device_ops *ops; + struct sk_buff *skb = d->skb; + struct nlattr *xstats; + int err; + + ops = qdisc_dev(sch)->netdev_ops; + + /* FIXME I could use qdisc_offload_dump_helper(), but that messes + * with sch->flags depending on whether the device reports taprio + * stats, and I'm not sure whether that's a good idea, considering + * that stats are optional to the offload itself + */ + if (!ops->ndo_setup_tc) + return 0; + + memset(stats, 0xff, sizeof(*stats)); + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + xstats = nla_nest_start(skb, TCA_STATS_APP); + if (!xstats) + goto err; + + if (taprio_put_stat(skb, stats->window_drops, + TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS) || + taprio_put_stat(skb, stats->tx_overruns, + TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS)) + goto err_cancel; + + nla_nest_end(skb, xstats); + + return 0; + +err_cancel: + nla_nest_cancel(skb, xstats); +err: + return -EMSGSIZE; +} + +static int taprio_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct tc_taprio_qopt_offload offload = { + .cmd = TAPRIO_CMD_STATS, + }; + + return taprio_dump_xstats(sch, d, &offload, &offload.stats); +} + static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct taprio_sched *q = qdisc_priv(sch); @@ -2391,12 +2460,20 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, __acquires(d->lock) { struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct tc_taprio_qopt_offload offload = { + .cmd = TAPRIO_CMD_QUEUE_STATS, + .queue_stats = { + .queue = cl - 1, + }, + }; + struct Qdisc *child; - sch = rtnl_dereference(dev_queue->qdisc_sleeping); - if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || - qdisc_qstats_copy(d, sch) < 0) + child = rtnl_dereference(dev_queue->qdisc_sleeping); + if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 || + qdisc_qstats_copy(d, child) < 0) return -1; - return 0; + + return taprio_dump_xstats(sch, d, &offload, &offload.queue_stats.stats); } static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) @@ -2443,6 +2520,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .dequeue = taprio_dequeue, .enqueue = taprio_enqueue, .dump = taprio_dump, + .dump_stats = taprio_dump_stats, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 277ad11f4d61..17d2d00ddb18 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -13,6 +13,7 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <net/gso.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> diff --git a/net/sctp/offload.c b/net/sctp/offload.c index eb874e3c399a..502095173d88 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -22,6 +22,7 @@ #include <net/sctp/sctp.h> #include <net/sctp/checksum.h> #include <net/protocol.h> +#include <net/gso.h> static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index c365df24ad33..274d07bd774f 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -500,9 +500,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, continue; fl4->fl4_sport = laddr->a.v4.sin_port; - flowi4_update_output(fl4, - asoc->base.sk->sk_bound_dev_if, - RT_CONN_FLAGS_TOS(asoc->base.sk, tos), + flowi4_update_output(fl4, asoc->base.sk->sk_bound_dev_if, daddr->v4.sin_addr.s_addr, laddr->a.v4.sin_addr.s_addr); @@ -1135,7 +1133,6 @@ static const struct proto_ops inet_seqpacket_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; /* Registration with AF_INET family. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index cda8c2874691..6554a357fe33 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4895,7 +4895,7 @@ out: } /* The SCTP ioctl handler. */ -static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int sctp_ioctl(struct sock *sk, int cmd, int *karg) { int rc = -ENOTCONN; @@ -4911,7 +4911,7 @@ static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCINQ: { struct sk_buff *skb; - unsigned int amount = 0; + *karg = 0; skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) { @@ -4919,9 +4919,9 @@ static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) * We will only return the amount of this packet since * that is all that will be read. */ - amount = skb->len; + *karg = skb->len; } - rc = put_user(amount, (int __user *)arg); + rc = 0; break; } default: @@ -8281,6 +8281,22 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, return retval; } +static bool sctp_bpf_bypass_getsockopt(int level, int optname) +{ + if (level == SOL_SCTP) { + switch (optname) { + case SCTP_SOCKOPT_PEELOFF: + case SCTP_SOCKOPT_PEELOFF_FLAGS: + case SCTP_SOCKOPT_CONNECTX3: + return true; + default: + return false; + } + } + + return false; +} + static int sctp_hash(struct sock *sk) { /* STUB */ @@ -9650,6 +9666,7 @@ struct proto sctp_prot = { .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, + .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt, .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, @@ -9705,6 +9722,7 @@ struct proto sctpv6_prot = { .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, + .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt, .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index e843760e9aaa..54afbe4fb087 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -148,18 +148,19 @@ static void sctp_sched_free_sched(struct sctp_stream *stream) int sctp_sched_set_sched(struct sctp_association *asoc, enum sctp_sched_type sched) { - struct sctp_sched_ops *n = sctp_sched_ops[sched]; struct sctp_sched_ops *old = asoc->outqueue.sched; struct sctp_datamsg *msg = NULL; + struct sctp_sched_ops *n; struct sctp_chunk *ch; int i, ret = 0; - if (old == n) - return ret; - if (sched > SCTP_SS_MAX) return -EINVAL; + n = sctp_sched_ops[sched]; + if (old == n) + return ret; + if (old) sctp_sched_free_sched(&asoc->stream); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 538e9c6ec8c9..a7f887d91d89 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3133,34 +3133,6 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd, return put_user(answ, (int __user *)arg); } -static ssize_t smc_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags) -{ - struct sock *sk = sock->sk; - struct smc_sock *smc; - int rc = -EPIPE; - - smc = smc_sk(sk); - lock_sock(sk); - if (sk->sk_state != SMC_ACTIVE) { - release_sock(sk); - goto out; - } - release_sock(sk); - if (smc->use_fallback) { - rc = kernel_sendpage(smc->clcsock, page, offset, - size, flags); - } else { - lock_sock(sk); - rc = smc_tx_sendpage(smc, page, offset, size, flags); - release_sock(sk); - SMC_STAT_INC(smc, sendpage_cnt); - } - -out: - return rc; -} - /* Map the affected portions of the rmbe into an spd, note the number of bytes * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor * updates till whenever a respective page has been fully processed. @@ -3232,7 +3204,6 @@ static const struct proto_ops smc_sock_ops = { .sendmsg = smc_sendmsg, .recvmsg = smc_recvmsg, .mmap = sock_no_mmap, - .sendpage = smc_sendpage, .splice_read = smc_splice_read, }; diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c index e80e34f7ac15..ca14c0f3a07d 100644 --- a/net/smc/smc_stats.c +++ b/net/smc/smc_stats.c @@ -227,7 +227,7 @@ static int smc_nl_fill_stats_tech_data(struct sk_buff *skb, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SENDPAGE_CNT, - smc_tech->sendpage_cnt, + 0, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CORK_CNT, diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index 84b7ecd8c05c..b60fe1eb37ab 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -71,7 +71,6 @@ struct smc_stats_tech { u64 clnt_v2_succ_cnt; u64 srv_v1_succ_cnt; u64 srv_v2_succ_cnt; - u64 sendpage_cnt; u64 urg_data_cnt; u64 splice_cnt; u64 cork_cnt; diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 45128443f1f1..3b0ff3b589c7 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -168,8 +168,7 @@ static bool smc_tx_should_cork(struct smc_sock *smc, struct msghdr *msg) * should known how/when to uncork it. */ if ((msg->msg_flags & MSG_MORE || - smc_tx_is_corked(smc) || - msg->msg_flags & MSG_SENDPAGE_NOTLAST) && + smc_tx_is_corked(smc)) && atomic_read(&conn->sndbuf_space)) return true; @@ -298,22 +297,6 @@ out_err: return rc; } -int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset, - size_t size, int flags) -{ - struct msghdr msg = {.msg_flags = flags}; - char *kaddr = kmap(page); - struct kvec iov; - int rc; - - iov.iov_base = kaddr + offset; - iov.iov_len = size; - iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, size); - rc = smc_tx_sendmsg(smc, &msg, size); - kunmap(page); - return rc; -} - /***************************** sndbuf consumer *******************************/ /* sndbuf consumer: actual data transfer of one target chunk with ISM write */ diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h index 34b578498b1f..a59f370b8b43 100644 --- a/net/smc/smc_tx.h +++ b/net/smc/smc_tx.h @@ -31,8 +31,6 @@ void smc_tx_pending(struct smc_connection *conn); void smc_tx_work(struct work_struct *work); void smc_tx_init(struct smc_sock *smc); int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len); -int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset, - size_t size, int flags); int smc_tx_sndbuf_nonempty(struct smc_connection *conn); void smc_tx_sndbuf_nonfull(struct smc_sock *smc); void smc_tx_consumer_update(struct smc_connection *conn, bool force); diff --git a/net/socket.c b/net/socket.c index b7e01d0fe082..2b0e54b2405c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -57,6 +57,7 @@ #include <linux/mm.h> #include <linux/socket.h> #include <linux/file.h> +#include <linux/splice.h> #include <linux/net.h> #include <linux/interrupt.h> #include <linux/thread_info.h> @@ -126,11 +127,10 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #endif static int sock_fasync(int fd, struct file *filp, int on); -static ssize_t sock_sendpage(struct file *file, struct page *page, - int offset, size_t size, loff_t *ppos, int more); static ssize_t sock_splice_read(struct file *file, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +static void sock_splice_eof(struct file *file); #ifdef CONFIG_PROC_FS static void sock_show_fdinfo(struct seq_file *m, struct file *f) @@ -162,9 +162,9 @@ static const struct file_operations socket_file_ops = { .mmap = sock_mmap, .release = sock_close, .fasync = sock_fasync, - .sendpage = sock_sendpage, - .splice_write = generic_splice_sendpage, + .splice_write = splice_to_socket, .splice_read = sock_splice_read, + .splice_eof = sock_splice_eof, .show_fdinfo = sock_show_fdinfo, }; @@ -471,6 +471,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) return file; } + file->f_mode |= FMODE_NOWAIT; sock->file = file; file->private_data = sock; stream_open(SOCK_INODE(sock), file); @@ -1066,26 +1067,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_recvmsg); -static ssize_t sock_sendpage(struct file *file, struct page *page, - int offset, size_t size, loff_t *ppos, int more) -{ - struct socket *sock; - int flags; - int ret; - - sock = file->private_data; - - flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ - flags |= more; - - ret = kernel_sendpage(sock, page, offset, size, flags); - - if (trace_sock_send_length_enabled()) - call_trace_sock_send_length(sock->sk, ret, 0); - return ret; -} - static ssize_t sock_splice_read(struct file *file, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) @@ -1093,11 +1074,19 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, struct socket *sock = file->private_data; if (unlikely(!sock->ops->splice_read)) - return generic_file_splice_read(file, ppos, pipe, len, flags); + return copy_splice_read(file, ppos, pipe, len, flags); return sock->ops->splice_read(sock, ppos, pipe, len, flags); } +static void sock_splice_eof(struct file *file) +{ + struct socket *sock = file->private_data; + + if (sock->ops->splice_eof) + sock->ops->splice_eof(sock); +} + static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; @@ -2138,6 +2127,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, msg.msg_name = (struct sockaddr *)&address; msg.msg_namelen = addr_len; } + flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; @@ -2483,6 +2473,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, msg_sys->msg_control = ctl_buf; msg_sys->msg_control_is_user = false; } + flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; msg_sys->msg_flags = flags; if (sock->file->f_flags & O_NONBLOCK) @@ -3563,54 +3554,6 @@ int kernel_getpeername(struct socket *sock, struct sockaddr *addr) EXPORT_SYMBOL(kernel_getpeername); /** - * kernel_sendpage - send a &page through a socket (kernel space) - * @sock: socket - * @page: page - * @offset: page offset - * @size: total size in bytes - * @flags: flags (MSG_DONTWAIT, ...) - * - * Returns the total amount sent in bytes or an error. - */ - -int kernel_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) -{ - if (sock->ops->sendpage) { - /* Warn in case the improper page to zero-copy send */ - WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send"); - return sock->ops->sendpage(sock, page, offset, size, flags); - } - return sock_no_sendpage(sock, page, offset, size, flags); -} -EXPORT_SYMBOL(kernel_sendpage); - -/** - * kernel_sendpage_locked - send a &page through the locked sock (kernel space) - * @sk: sock - * @page: page - * @offset: page offset - * @size: total size in bytes - * @flags: flags (MSG_DONTWAIT, ...) - * - * Returns the total amount sent in bytes or an error. - * Caller must hold @sk. - */ - -int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct socket *sock = sk->sk_socket; - - if (sock->ops->sendpage_locked) - return sock->ops->sendpage_locked(sk, page, offset, size, - flags); - - return sock_no_sendpage_locked(sk, page, offset, size, flags); -} -EXPORT_SYMBOL(kernel_sendpage_locked); - -/** * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space) * @sock: socket * @how: connection part diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 79967b6925bd..587811a002c9 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -109,15 +109,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp) switch (*ip) { case SVC_POOL_AUTO: - return strlcpy(buf, "auto\n", 20); + return sysfs_emit(buf, "auto\n"); case SVC_POOL_GLOBAL: - return strlcpy(buf, "global\n", 20); + return sysfs_emit(buf, "global\n"); case SVC_POOL_PERCPU: - return strlcpy(buf, "percpu\n", 20); + return sysfs_emit(buf, "percpu\n"); case SVC_POOL_PERNODE: - return strlcpy(buf, "pernode\n", 20); + return sysfs_emit(buf, "pernode\n"); default: - return sprintf(buf, "%d\n", *ip); + return sysfs_emit(buf, "%d\n", *ip); } } @@ -597,34 +597,25 @@ svc_destroy(struct kref *ref) } EXPORT_SYMBOL_GPL(svc_destroy); -/* - * Allocate an RPC server's buffer space. - * We allocate pages and place them in rq_pages. - */ -static int +static bool svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) { - unsigned int pages, arghi; + unsigned long pages, ret; /* bc_xprt uses fore channel allocated buffers */ if (svc_is_backchannel(rqstp)) - return 1; + return true; pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. * We assume one is at most one page */ - arghi = 0; WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); if (pages > RPCSVC_MAXPAGES) pages = RPCSVC_MAXPAGES; - while (pages) { - struct page *p = alloc_pages_node(node, GFP_KERNEL, 0); - if (!p) - break; - rqstp->rq_pages[arghi++] = p; - pages--; - } - return pages == 0; + + ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages, + rqstp->rq_pages); + return ret == pages; } /* @@ -649,7 +640,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return rqstp; - pagevec_init(&rqstp->rq_pvec); + folio_batch_init(&rqstp->rq_fbatch); __set_bit(RQ_BUSY, &rqstp->rq_flags); rqstp->rq_server = serv; @@ -860,9 +851,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) } if (*rqstp->rq_next_page) { - if (!pagevec_space(&rqstp->rq_pvec)) - __pagevec_release(&rqstp->rq_pvec); - pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page); + if (!folio_batch_add(&rqstp->rq_fbatch, + page_folio(*rqstp->rq_next_page))) + __folio_batch_release(&rqstp->rq_fbatch); } get_page(page); @@ -896,7 +887,7 @@ void svc_rqst_release_pages(struct svc_rqst *rqstp) void svc_rqst_free(struct svc_rqst *rqstp) { - pagevec_release(&rqstp->rq_pvec); + folio_batch_release(&rqstp->rq_fbatch); svc_release_buffer(rqstp); if (rqstp->rq_scratch_page) put_page(rqstp->rq_scratch_page); @@ -1173,6 +1164,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi */ static void svc_unregister(const struct svc_serv *serv, struct net *net) { + struct sighand_struct *sighand; struct svc_program *progp; unsigned long flags; unsigned int i; @@ -1189,9 +1181,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) } } - spin_lock_irqsave(¤t->sighand->siglock, flags); + rcu_read_lock(); + sighand = rcu_dereference(current->sighand); + spin_lock_irqsave(&sighand->siglock, flags); recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); + spin_unlock_irqrestore(&sighand->siglock, flags); + rcu_read_unlock(); } /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 13a14897bc17..62c7919ea610 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -74,13 +74,18 @@ static LIST_HEAD(svc_xprt_class_list); * that no other thread will be using the transport or will * try to set XPT_DEAD. */ + +/** + * svc_reg_xprt_class - Register a server-side RPC transport class + * @xcl: New transport class to be registered + * + * Returns zero on success; otherwise a negative errno is returned. + */ int svc_reg_xprt_class(struct svc_xprt_class *xcl) { struct svc_xprt_class *cl; int res = -EEXIST; - dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name); - INIT_LIST_HEAD(&xcl->xcl_list); spin_lock(&svc_xprt_class_lock); /* Make sure there isn't already a class with the same name */ @@ -96,9 +101,13 @@ out: } EXPORT_SYMBOL_GPL(svc_reg_xprt_class); +/** + * svc_unreg_xprt_class - Unregister a server-side RPC transport class + * @xcl: Transport class to be unregistered + * + */ void svc_unreg_xprt_class(struct svc_xprt_class *xcl) { - dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name); spin_lock(&svc_xprt_class_lock); list_del_init(&xcl->xcl_list); spin_unlock(&svc_xprt_class_lock); @@ -685,8 +694,9 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) } for (filled = 0; filled < pages; filled = ret) { - ret = alloc_pages_bulk_array(GFP_KERNEL, pages, - rqstp->rq_pages); + ret = alloc_pages_bulk_array_node(GFP_KERNEL, + rqstp->rq_pool->sp_id, + pages, rqstp->rq_pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; @@ -843,15 +853,11 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) svc_xprt_received(xprt); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ - dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", - rqstp, rqstp->rq_pool->sp_id, xprt, - kref_read(&xprt->xpt_ref)); rqstp->rq_deferred = svc_deferred_dequeue(xprt); if (rqstp->rq_deferred) len = svc_deferred_recv(rqstp); else len = xprt->xpt_ops->xpo_recvfrom(rqstp); - rqstp->rq_stime = ktime_get(); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } else @@ -894,6 +900,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) err = -EAGAIN; if (len <= 0) goto out_release; + trace_svc_xdr_recvfrom(&rqstp->rq_arg); clear_bit(XPT_OLD, &xprt->xpt_flags); @@ -902,6 +909,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; + rqstp->rq_stime = ktime_get(); return len; out_release: rqstp->rq_res.len = 0; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f77cebe2c071..e43f26382411 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -826,12 +826,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) trace_sk_data_ready(sk); - if (svsk) { - /* Refer to svc_setup_socket() for details. */ - rmb(); - svsk->sk_odata(sk); - } - /* * This callback may called twice when a new connection * is established as a child socket inherits everything @@ -840,13 +834,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk) * when one of child sockets become ESTABLISHED. * 2) data_ready method of the child socket may be called * when it receives data before the socket is accepted. - * In case of 2, we should ignore it silently. + * In case of 2, we should ignore it silently and DO NOT + * dereference svsk. */ - if (sk->sk_state == TCP_LISTEN) { - if (svsk) { - set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); - svc_xprt_enqueue(&svsk->sk_xprt); - } + if (sk->sk_state != TCP_LISTEN) + return; + + if (svsk) { + /* Refer to svc_setup_socket() for details. */ + rmb(); + svsk->sk_odata(sk); + set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); + svc_xprt_enqueue(&svsk->sk_xprt); } } @@ -887,13 +886,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); err = kernel_accept(sock, &newsock, O_NONBLOCK); if (err < 0) { - if (err == -ENOMEM) - printk(KERN_WARNING "%s: no more sockets!\n", - serv->sv_name); - else if (err != -EAGAIN) - net_warn_ratelimited("%s: accept failed (err %d)!\n", - serv->sv_name, -err); - trace_svcsock_accept_err(xprt, serv->sv_name, err); + if (err != -EAGAIN) + trace_svcsock_accept_err(xprt, serv->sv_name, err); return NULL; } if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL))) @@ -1203,13 +1197,14 @@ err_noclose: static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec, int flags) { - return kernel_sendpage(sock, virt_to_page(vec->iov_base), - offset_in_page(vec->iov_base), - vec->iov_len, flags); + struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, }; + + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len); + return sock_sendmsg(sock, &msg); } /* - * kernel_sendpage() is used exclusively to reduce the number of + * MSG_SPLICE_PAGES is used exclusively to reduce the number of * copy operations in this path. Therefore the caller must ensure * that the pages backing @xdr are unchanging. * @@ -1249,28 +1244,13 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, if (ret != head->iov_len) goto out; - if (xdr->page_len) { - unsigned int offset, len, remaining; - struct bio_vec *bvec; - - bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT); - offset = offset_in_page(xdr->page_base); - remaining = xdr->page_len; - while (remaining > 0) { - len = min(remaining, bvec->bv_len - offset); - ret = kernel_sendpage(sock, bvec->bv_page, - bvec->bv_offset + offset, - len, 0); - if (ret < 0) - return ret; - *sentp += ret; - if (ret != len) - goto out; - remaining -= len; - offset = 0; - bvec++; - } - } + msg.msg_flags = MSG_SPLICE_PAGES; + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec, + xdr_buf_pagecount(xdr), xdr->page_len); + ret = sock_sendmsg(sock, &msg); + if (ret < 0) + return ret; + *sentp += ret; if (tail->iov_len) { ret = svc_tcp_send_kvec(sock, tail, 0); @@ -1464,7 +1444,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, svsk->sk_owspace = inet->sk_write_space; /* * This barrier is necessary in order to prevent race condition - * with svc_data_ready(), svc_listen_data_ready() and others + * with svc_data_ready(), svc_tcp_listen_data_ready(), and others * when calling callbacks above. */ wmb(); @@ -1476,7 +1456,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); - trace_svcsock_new_socket(sock); + trace_svcsock_new(svsk, sock); return svsk; } @@ -1657,6 +1637,8 @@ static void svc_sock_free(struct svc_xprt *xprt) struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct socket *sock = svsk->sk_sock; + trace_svcsock_free(svsk, sock); + tls_handshake_cancel(sock->sk); if (sock->file) sockfd_put(sock); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 36835b2f5446..2a22e78af116 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_reserve_space); - /** * xdr_reserve_space_vec - Reserves a large amount of buffer space for sending * @xdr: pointer to xdr_stream - * @vec: pointer to a kvec array * @nbytes: number of bytes to reserve * - * Reserves enough buffer space to encode 'nbytes' of data and stores the - * pointers in 'vec'. The size argument passed to xdr_reserve_space() is - * determined based on the number of bytes remaining in the current page to - * avoid invalidating iov_base pointers when xdr_commit_encode() is called. + * The size argument passed to xdr_reserve_space() is determined based + * on the number of bytes remaining in the current page to avoid + * invalidating iov_base pointers when xdr_commit_encode() is called. + * + * Return values: + * %0: success + * %-EMSGSIZE: not enough space is available in @xdr */ -int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes) +int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes) { - int thislen; - int v = 0; + size_t thislen; __be32 *p; /* @@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte xdr->end = xdr->p; } + /* XXX: Let's find a way to make this more efficient */ while (nbytes) { thislen = xdr->buf->page_len % PAGE_SIZE; thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen); p = xdr_reserve_space(xdr, thislen); if (!p) - return -EIO; + return -EMSGSIZE; - vec[v].iov_base = p; - vec[v].iov_len = thislen; - v++; nbytes -= thislen; } - return v; + return 0; } EXPORT_SYMBOL_GPL(xdr_reserve_space_vec); diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index aa2227a7e552..7420a2c990c7 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, */ get_page(virt_to_page(rqst->rq_buffer)); sctxt->sc_send_wr.opcode = IB_WR_SEND; - ret = svc_rdma_send(rdma, sctxt); - if (ret < 0) - return ret; - - ret = wait_for_completion_killable(&sctxt->sc_done); - svc_rdma_send_ctxt_put(rdma, sctxt); - return ret; + return svc_rdma_send(rdma, sctxt); } /* Server-side transport endpoint wants a whole page for its send diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index a22fe7587fa6..85c8bcaebb80 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, static struct svc_rdma_recv_ctxt * svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) { + int node = ibdev_to_node(rdma->sc_cm_id->device); struct svc_rdma_recv_ctxt *ctxt; dma_addr_t addr; void *buffer; - ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); + ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node); if (!ctxt) goto fail0; - buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); + buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); if (!buffer) goto fail1; addr = ib_dma_map_single(rdma->sc_pd->device, buffer, @@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->rc_recv_sge.length = rdma->sc_max_req_size; ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; ctxt->rc_recv_buf = buffer; - ctxt->rc_temp = false; return ctxt; fail2: @@ -232,10 +232,7 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, pcl_free(&ctxt->rc_write_pcl); pcl_free(&ctxt->rc_reply_pcl); - if (!ctxt->rc_temp) - llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); - else - svc_rdma_recv_ctxt_destroy(rdma, ctxt); + llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); } /** @@ -258,7 +255,7 @@ void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt) } static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, - unsigned int wanted, bool temp) + unsigned int wanted) { const struct ib_recv_wr *bad_wr = NULL; struct svc_rdma_recv_ctxt *ctxt; @@ -275,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, break; trace_svcrdma_post_recv(ctxt); - ctxt->rc_temp = temp; ctxt->rc_recv_wr.next = recv_chain; recv_chain = &ctxt->rc_recv_wr; rdma->sc_pending_recvs++; @@ -309,7 +305,7 @@ err_free: */ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) { - return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); + return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests); } /** @@ -343,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) * client reconnects. */ if (rdma->sc_pending_recvs < rdma->sc_max_requests) - if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false)) + if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch)) goto dropped; /* All wc fields are now known to be valid */ @@ -775,9 +771,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, * * The next ctxt is removed from the "receive" lists. * - * - If the ctxt completes a Read, then finish assembling the Call - * message and return the number of bytes in the message. - * * - If the ctxt completes a Receive, then construct the Call * message from the contents of the Receive buffer. * @@ -786,7 +779,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, * in the message. * * - If there are Read chunks in this message, post Read WRs to - * pull that payload and return 0. + * pull that payload. When the Read WRs complete, build the + * full message and return the number of bytes in it. */ int svc_rdma_recvfrom(struct svc_rqst *rqstp) { @@ -796,6 +790,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) struct svc_rdma_recv_ctxt *ctxt; int ret; + /* Prevent svc_xprt_release() from releasing pages in rq_pages + * when returning 0 or an error. + */ + rqstp->rq_respages = rqstp->rq_pages; + rqstp->rq_next_page = rqstp->rq_respages; + rqstp->rq_xprt_ctxt = NULL; ctxt = NULL; @@ -819,12 +819,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) DMA_FROM_DEVICE); svc_rdma_build_arg_xdr(rqstp, ctxt); - /* Prevent svc_xprt_release from releasing pages in rq_pages - * if we return 0 or an error. - */ - rqstp->rq_respages = rqstp->rq_pages; - rqstp->rq_next_page = rqstp->rq_respages; - ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); if (ret < 0) goto out_err; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 11cf7c646644..e460e25a1d6d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -62,8 +62,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) if (node) { ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); } else { - ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), - GFP_KERNEL); + ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), + GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device)); if (!ctxt) goto out_noctx; @@ -84,8 +84,7 @@ out_noctx: return NULL; } -static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, - struct svc_rdma_rw_ctxt *ctxt, +static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt, struct llist_head *list) { sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); @@ -95,7 +94,7 @@ static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, struct svc_rdma_rw_ctxt *ctxt) { - __svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts); + __svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts); } /** @@ -191,6 +190,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, struct svc_rdma_rw_ctxt *ctxt; LLIST_HEAD(free); + trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount); + first = last = NULL; while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) { list_del(&ctxt->rw_list); @@ -198,7 +199,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num, ctxt->rw_sg_table.sgl, ctxt->rw_nents, dir); - __svc_rdma_put_rw_ctxt(rdma, ctxt, &free); + __svc_rdma_put_rw_ctxt(ctxt, &free); ctxt->rw_node.next = first; first = &ctxt->rw_node; @@ -234,7 +235,8 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, { struct svc_rdma_write_info *info; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kmalloc_node(sizeof(*info), GFP_KERNEL, + ibdev_to_node(rdma->sc_cm_id->device)); if (!info) return info; @@ -304,7 +306,8 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma) { struct svc_rdma_read_info *info; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kmalloc_node(sizeof(*info), GFP_KERNEL, + ibdev_to_node(rdma->sc_cm_id->device)); if (!info) return info; @@ -351,8 +354,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) return; } -/* This function sleeps when the transport's Send Queue is congested. - * +/* * Assumptions: * - If ib_post_send() succeeds, only one completion is expected, * even if one or more WRs are flushed. This is true when posting @@ -367,6 +369,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) struct ib_cqe *cqe; int ret; + might_sleep(); + if (cc->cc_sqecount > rdma->sc_sq_depth) return -EINVAL; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 22a871e6fe4d..c6644cca52c5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -123,18 +123,17 @@ static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, static struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) { + int node = ibdev_to_node(rdma->sc_cm_id->device); struct svc_rdma_send_ctxt *ctxt; dma_addr_t addr; void *buffer; - size_t size; int i; - size = sizeof(*ctxt); - size += rdma->sc_max_send_sges * sizeof(struct ib_sge); - ctxt = kmalloc(size, GFP_KERNEL); + ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges), + GFP_KERNEL, node); if (!ctxt) goto fail0; - buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); + buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); if (!buffer) goto fail1; addr = ib_dma_map_single(rdma->sc_pd->device, buffer, @@ -148,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; - init_completion(&ctxt->sc_done); ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_xprt_buf = buffer; xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, @@ -214,6 +212,7 @@ out: ctxt->sc_send_wr.num_sge = 0; ctxt->sc_cur_sge_no = 0; + ctxt->sc_page_count = 0; return ctxt; out_empty: @@ -228,6 +227,8 @@ out_empty: * svc_rdma_send_ctxt_put - Return send_ctxt to free list * @rdma: controlling svcxprt_rdma * @ctxt: object to return to the free list + * + * Pages left in sc_pages are DMA unmapped and released. */ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) @@ -235,6 +236,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct ib_device *device = rdma->sc_cm_id->device; unsigned int i; + if (ctxt->sc_page_count) + release_pages(ctxt->sc_pages, ctxt->sc_page_count); + /* The first SGE contains the transport header, which * remains mapped until @ctxt is destroyed. */ @@ -281,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); svc_rdma_wake_send_waiters(rdma, 1); - complete(&ctxt->sc_done); if (unlikely(wc->status != IB_WC_SUCCESS)) goto flushed; trace_svcrdma_wc_send(wc, &ctxt->sc_cid); + svc_rdma_send_ctxt_put(rdma, ctxt); return; flushed: @@ -294,6 +298,7 @@ flushed: trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid); else trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid); + svc_rdma_send_ctxt_put(rdma, ctxt); svc_xprt_deferred_close(&rdma->sc_xprt); } @@ -310,7 +315,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) struct ib_send_wr *wr = &ctxt->sc_send_wr; int ret; - reinit_completion(&ctxt->sc_done); + might_sleep(); /* Sync the transport header buffer */ ib_dma_sync_single_for_device(rdma->sc_pd->device, @@ -799,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, svc_rdma_xb_dma_map, &args); } +/* The svc_rqst and all resources it owns are released as soon as + * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt + * so they are released by the Send completion handler. + */ +static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, + struct svc_rdma_send_ctxt *ctxt) +{ + int i, pages = rqstp->rq_next_page - rqstp->rq_respages; + + ctxt->sc_page_count += pages; + for (i = 0; i < pages; i++) { + ctxt->sc_pages[i] = rqstp->rq_respages[i]; + rqstp->rq_respages[i] = NULL; + } + + /* Prevent svc_xprt_release from releasing pages in rq_pages */ + rqstp->rq_next_page = rqstp->rq_respages; +} + /* Prepare the portion of the RPC Reply that will be transmitted * via RDMA Send. The RPC-over-RDMA transport header is prepared * in sc_sges[0], and the RPC xdr_buf is prepared in following sges. @@ -828,6 +852,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, if (ret < 0) return ret; + svc_rdma_save_io_pages(rqstp, sctxt); + if (rctxt->rc_inv_rkey) { sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; @@ -835,13 +861,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, sctxt->sc_send_wr.opcode = IB_WR_SEND; } - ret = svc_rdma_send(rdma, sctxt); - if (ret < 0) - return ret; - - ret = wait_for_completion_killable(&sctxt->sc_done); - svc_rdma_send_ctxt_put(rdma, sctxt); - return ret; + return svc_rdma_send(rdma, sctxt); } /** @@ -907,8 +927,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; if (svc_rdma_send(rdma, sctxt)) goto put_ctxt; - - wait_for_completion_killable(&sctxt->sc_done); + return; put_ctxt: svc_rdma_send_ctxt_put(rdma, sctxt); @@ -976,17 +995,16 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); if (ret < 0) goto put_ctxt; - - /* Prevent svc_xprt_release() from releasing the page backing - * rq_res.head[0].iov_base. It's no longer being accessed by - * the I/O device. */ - rqstp->rq_respages++; return 0; reply_chunk: if (ret != -E2BIG && ret != -EINVAL) goto put_ctxt; + /* Send completion releases payload pages that were part + * of previously posted RDMA Writes. + */ + svc_rdma_save_io_pages(rqstp, sctxt); svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret); return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ca04f7a6a085..2abd895046ee 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -64,7 +64,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, - struct net *net); + struct net *net, int node); static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -123,14 +123,14 @@ static void qp_event_handler(struct ib_event *event, void *context) } static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, - struct net *net) + struct net *net, int node) { - struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); + struct svcxprt_rdma *cma_xprt; - if (!cma_xprt) { - dprintk("svcrdma: failed to create new transport\n"); + cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node); + if (!cma_xprt) return NULL; - } + svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); @@ -193,9 +193,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, struct svcxprt_rdma *newxprt; struct sockaddr *sa; - /* Create a new transport */ newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, - listen_xprt->sc_xprt.xpt_net); + listen_xprt->sc_xprt.xpt_net, + ibdev_to_node(new_cma_id->device)); if (!newxprt) return; newxprt->sc_cm_id = new_cma_id; @@ -304,7 +304,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) return ERR_PTR(-EAFNOSUPPORT); - cma_xprt = svc_rdma_create_xprt(serv, net); + cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE); if (!cma_xprt) return ERR_PTR(-ENOMEM); set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index cdcd2731860b..2cde375477e3 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -176,7 +176,7 @@ static int bearer_name_validate(const char *name, */ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; @@ -211,11 +211,10 @@ int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { - struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference(tn->bearer_list[bearer_id]); + b = bearer_get(net, bearer_id); if (b) tipc_disc_add_dest(b->disc); rcu_read_unlock(); @@ -223,11 +222,10 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { - struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference(tn->bearer_list[bearer_id]); + b = bearer_get(net, bearer_id); if (b) tipc_disc_remove_dest(b->disc); rcu_read_unlock(); @@ -431,7 +429,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, dev = dev_get_by_name(net, dev_name); if (!dev) return -ENODEV; - if (tipc_mtu_bad(dev, 0)) { + if (tipc_mtu_bad(dev)) { dev_put(dev); return -EINVAL; } @@ -534,7 +532,7 @@ int tipc_bearer_mtu(struct net *net, u32 bearer_id) struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference(tipc_net(net)->bearer_list[bearer_id]); + b = bearer_get(net, bearer_id); if (b) mtu = b->mtu; rcu_read_unlock(); @@ -708,7 +706,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, test_and_set_bit_lock(0, &b->up); break; case NETDEV_CHANGEMTU: - if (tipc_mtu_bad(dev, 0)) { + if (tipc_mtu_bad(dev)) { bearer_disable(net, b); break; } @@ -745,7 +743,7 @@ void tipc_bearer_cleanup(void) void tipc_bearer_stop(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; @@ -881,7 +879,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); if (i == MAX_BEARERS) return 0; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index bd0cc5c287ef..1ee60649bd17 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -257,9 +257,9 @@ static inline void tipc_loopback_trace(struct net *net, } /* check if device MTU is too low for tipc headers */ -static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve) +static inline bool tipc_mtu_bad(struct net_device *dev) { - if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve) + if (dev->mtu >= TIPC_MIN_BEARER_MTU) return false; netdev_warn(dev, "MTU too low for tipc bearer\n"); return true; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index dd73d71c02a9..ef8e5139a873 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3375,7 +3375,6 @@ static const struct proto_ops msg_ops = { .sendmsg = tipc_sendmsg, .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct proto_ops packet_ops = { @@ -3396,7 +3395,6 @@ static const struct proto_ops packet_ops = { .sendmsg = tipc_send_packet, .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct proto_ops stream_ops = { @@ -3417,7 +3415,6 @@ static const struct proto_ops stream_ops = { .sendmsg = tipc_sendstream, .recvmsg = tipc_recvstream, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct net_proto_family tipc_family_ops = { diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 0a85244fd618..926232557e77 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -739,10 +739,6 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, udp_conf.use_udp_checksums = false; ub->ifindex = dev->ifindex; b->encap_hlen = sizeof(struct iphdr) + sizeof(struct udphdr); - if (tipc_mtu_bad(dev, b->encap_hlen)) { - err = -EINVAL; - goto err; - } b->mtu = b->media->mtu; #if IS_ENABLED(CONFIG_IPV6) } else if (local.proto == htons(ETH_P_IPV6)) { diff --git a/net/tls/tls.h b/net/tls/tls.h index 0672acab2773..86cef1c68e03 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -97,10 +97,7 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); -int tls_sw_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags); -int tls_sw_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); +void tls_sw_splice_eof(struct socket *sock); void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); void tls_sw_release_resources_tx(struct sock *sk); void tls_sw_free_ctx_tx(struct tls_context *tls_ctx); @@ -115,8 +112,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, size_t len, unsigned int flags); int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); -int tls_device_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); +void tls_device_splice_eof(struct socket *sock); int tls_tx_records(struct sock *sk, int flags); void tls_sw_write_space(struct sock *sk, struct tls_context *ctx); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index bf69c9d6d06c..2021fe557e50 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -268,9 +268,8 @@ static void tls_append_frag(struct tls_record_info *record, skb_frag_size_add(frag, size); } else { ++frag; - __skb_frag_set_page(frag, pfrag->page); - skb_frag_off_set(frag, pfrag->offset); - skb_frag_size_set(frag, size); + skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset, + size); ++record->num_frags; get_page(pfrag->page); } @@ -357,9 +356,8 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, return -ENOMEM; frag = &record->frags[0]; - __skb_frag_set_page(frag, pfrag->page); - skb_frag_off_set(frag, pfrag->offset); - skb_frag_size_set(frag, prepend_size); + skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset, + prepend_size); get_page(pfrag->page); pfrag->offset += prepend_size; @@ -424,16 +422,10 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i) return 0; } -union tls_iter_offset { - struct iov_iter *msg_iter; - int offset; -}; - static int tls_push_data(struct sock *sk, - union tls_iter_offset iter_offset, + struct iov_iter *iter, size_t size, int flags, - unsigned char record_type, - struct page *zc_page) + unsigned char record_type) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -449,14 +441,14 @@ static int tls_push_data(struct sock *sk, long timeo; if (flags & - ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST)) + ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SPLICE_PAGES)) return -EOPNOTSUPP; if (unlikely(sk->sk_err)) return -sk->sk_err; flags |= MSG_SENDPAGE_DECRYPTED; - tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + tls_push_record_flags = flags | MSG_MORE; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (tls_is_partially_sent_record(tls_ctx)) { @@ -501,21 +493,35 @@ handle_error: record = ctx->open_record; copy = min_t(size_t, size, max_open_record_len - record->len); - if (copy && zc_page) { + if (copy && (flags & MSG_SPLICE_PAGES)) { struct page_frag zc_pfrag; + struct page **pages = &zc_pfrag.page; + size_t off; + + rc = iov_iter_extract_pages(iter, &pages, + copy, 1, 0, &off); + if (rc <= 0) { + if (rc == 0) + rc = -EIO; + goto handle_error; + } + copy = rc; + + if (WARN_ON_ONCE(!sendpage_ok(zc_pfrag.page))) { + iov_iter_revert(iter, copy); + rc = -EIO; + goto handle_error; + } - zc_pfrag.page = zc_page; - zc_pfrag.offset = iter_offset.offset; + zc_pfrag.offset = off; zc_pfrag.size = copy; tls_append_frag(record, &zc_pfrag, copy); - - iter_offset.offset += copy; } else if (copy) { copy = min_t(size_t, copy, pfrag->size - pfrag->offset); rc = tls_device_copy_data(page_address(pfrag->page) + pfrag->offset, copy, - iter_offset.msg_iter); + iter); if (rc) goto handle_error; tls_append_frag(record, pfrag, copy); @@ -525,7 +531,7 @@ handle_error: if (!size) { last_record: tls_push_record_flags = flags; - if (flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE)) { + if (flags & MSG_MORE) { more = true; break; } @@ -570,9 +576,11 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { unsigned char record_type = TLS_RECORD_TYPE_DATA; struct tls_context *tls_ctx = tls_get_ctx(sk); - union tls_iter_offset iter; int rc; + if (!tls_ctx->zerocopy_sendfile) + msg->msg_flags &= ~MSG_SPLICE_PAGES; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); @@ -582,8 +590,8 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto out; } - iter.msg_iter = &msg->msg_iter; - rc = tls_push_data(sk, iter, size, msg->msg_flags, record_type, NULL); + rc = tls_push_data(sk, &msg->msg_iter, size, msg->msg_flags, + record_type); out: release_sock(sk); @@ -591,47 +599,25 @@ out: return rc; } -int tls_device_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) +void tls_device_splice_eof(struct socket *sock) { + struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); - union tls_iter_offset iter_offset; - struct iov_iter msg_iter; - char *kaddr; - struct kvec iov; - int rc; + struct iov_iter iter = {}; - if (flags & MSG_SENDPAGE_NOTLAST) - flags |= MSG_MORE; + if (!tls_is_partially_sent_record(tls_ctx)) + return; mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); - if (flags & MSG_OOB) { - rc = -EOPNOTSUPP; - goto out; - } - - if (tls_ctx->zerocopy_sendfile) { - iter_offset.offset = offset; - rc = tls_push_data(sk, iter_offset, size, - flags, TLS_RECORD_TYPE_DATA, page); - goto out; + if (tls_is_partially_sent_record(tls_ctx)) { + iov_iter_bvec(&iter, ITER_SOURCE, NULL, 0, 0); + tls_push_data(sk, &iter, 0, 0, TLS_RECORD_TYPE_DATA); } - kaddr = kmap(page); - iov.iov_base = kaddr + offset; - iov.iov_len = size; - iov_iter_kvec(&msg_iter, ITER_SOURCE, &iov, 1, size); - iter_offset.msg_iter = &msg_iter; - rc = tls_push_data(sk, iter_offset, size, flags, TLS_RECORD_TYPE_DATA, - NULL); - kunmap(page); - -out: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); - return rc; } struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, @@ -696,12 +682,10 @@ EXPORT_SYMBOL(tls_get_record); static int tls_device_push_pending_record(struct sock *sk, int flags) { - union tls_iter_offset iter; - struct iov_iter msg_iter; + struct iov_iter iter; - iov_iter_kvec(&msg_iter, ITER_SOURCE, NULL, 0, 0); - iter.msg_iter = &msg_iter; - return tls_push_data(sk, iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL); + iov_iter_kvec(&iter, ITER_SOURCE, NULL, 0, 0); + return tls_push_data(sk, &iter, 0, flags, TLS_RECORD_TYPE_DATA); } void tls_device_write_space(struct sock *sk, struct tls_context *ctx) @@ -1217,7 +1201,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) tls_device_attach(ctx, sk, netdev); up_read(&device_offload_lock); - /* following this assignment tls_is_sk_tx_device_offloaded + /* following this assignment tls_is_skb_tx_device_offloaded * will return true and the context might be accessed * by the netdev's xmit function. */ @@ -1370,7 +1354,7 @@ static int tls_device_down(struct net_device *netdev) list_for_each_entry_safe(ctx, tmp, &list, list) { /* Stop offloaded TX and switch to the fallback. - * tls_is_sk_tx_device_offloaded will return false. + * tls_is_skb_tx_device_offloaded will return false. */ WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw); diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 7fbb1d0b69b3..b28c5e296dfd 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -271,7 +271,7 @@ static int fill_sg_in(struct scatterlist *sg_in, * There is a corner case where the packet contains * both an acked and a non-acked record. * We currently don't handle that case and rely - * on TCP to retranmit a packet that doesn't contain + * on TCP to retransmit a packet that doesn't contain * already acked payload. */ if (!is_start_marker) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index f2e7302a4d96..b6896126bb92 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -125,7 +125,10 @@ int tls_push_sg(struct sock *sk, u16 first_offset, int flags) { - int sendpage_flags = flags | MSG_SENDPAGE_NOTLAST; + struct bio_vec bvec; + struct msghdr msg = { + .msg_flags = MSG_SPLICE_PAGES | flags, + }; int ret = 0; struct page *p; size_t size; @@ -134,16 +137,19 @@ int tls_push_sg(struct sock *sk, size = sg->length - offset; offset += sg->offset; - ctx->in_tcp_sendpages = true; + ctx->splicing_pages = true; while (1) { if (sg_is_last(sg)) - sendpage_flags = flags; + msg.msg_flags = flags; /* is sending application-limited? */ tcp_rate_check_app_limited(sk); p = sg_page(sg); retry: - ret = do_tcp_sendpages(sk, p, offset, size, sendpage_flags); + bvec_set_page(&bvec, p, size, offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); + + ret = tcp_sendmsg_locked(sk, &msg, size); if (ret != size) { if (ret > 0) { @@ -155,7 +161,7 @@ retry: offset -= sg->offset; ctx->partially_sent_offset = offset; ctx->partially_sent_record = (void *)sg; - ctx->in_tcp_sendpages = false; + ctx->splicing_pages = false; return ret; } @@ -169,7 +175,7 @@ retry: size = sg->length; } - ctx->in_tcp_sendpages = false; + ctx->splicing_pages = false; return 0; } @@ -247,11 +253,11 @@ static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); - /* If in_tcp_sendpages call lower protocol write space handler + /* If splicing_pages call lower protocol write space handler * to ensure we wake up any waiting operations there. For example - * if do_tcp_sendpages where to call sk_wait_event. + * if splicing pages where to call sk_wait_event. */ - if (ctx->in_tcp_sendpages) { + if (ctx->splicing_pages) { ctx->sk_write_space(sk); return; } @@ -352,6 +358,39 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) tls_ctx_free(sk, ctx); } +static __poll_t tls_sk_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) +{ + struct tls_sw_context_rx *ctx; + struct tls_context *tls_ctx; + struct sock *sk = sock->sk; + struct sk_psock *psock; + __poll_t mask = 0; + u8 shutdown; + int state; + + mask = tcp_poll(file, sock, wait); + + state = inet_sk_state_load(sk); + shutdown = READ_ONCE(sk->sk_shutdown); + if (unlikely(state != TCP_ESTABLISHED || shutdown & RCV_SHUTDOWN)) + return mask; + + tls_ctx = tls_get_ctx(sk); + ctx = tls_sw_ctx_rx(tls_ctx); + psock = sk_psock_get(sk); + + if (skb_queue_empty_lockless(&ctx->rx_list) && + !tls_strp_msg_ready(ctx) && + sk_psock_queue_empty(psock)) + mask &= ~(EPOLLIN | EPOLLRDNORM); + + if (psock) + sk_psock_put(sk, psock); + + return mask; +} + static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, int __user *optlen, int tx) { @@ -918,27 +957,26 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] ops[TLS_BASE][TLS_BASE] = *base; ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; - ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked; + ops[TLS_SW ][TLS_BASE].splice_eof = tls_sw_splice_eof; ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read; + ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll; ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE]; ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read; + ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll; #ifdef CONFIG_TLS_DEVICE ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; - ops[TLS_HW ][TLS_BASE].sendpage_locked = NULL; ops[TLS_HW ][TLS_SW ] = ops[TLS_BASE][TLS_SW ]; - ops[TLS_HW ][TLS_SW ].sendpage_locked = NULL; ops[TLS_BASE][TLS_HW ] = ops[TLS_BASE][TLS_SW ]; ops[TLS_SW ][TLS_HW ] = ops[TLS_SW ][TLS_SW ]; ops[TLS_HW ][TLS_HW ] = ops[TLS_HW ][TLS_SW ]; - ops[TLS_HW ][TLS_HW ].sendpage_locked = NULL; #endif #ifdef CONFIG_TLS_TOE ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base; @@ -986,7 +1024,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; - prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage; + prot[TLS_SW][TLS_BASE].splice_eof = tls_sw_splice_eof; prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg; @@ -1001,11 +1039,11 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], #ifdef CONFIG_TLS_DEVICE prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg; - prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage; + prot[TLS_HW][TLS_BASE].splice_eof = tls_device_splice_eof; prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW]; prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg; - prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage; + prot[TLS_HW][TLS_SW].splice_eof = tls_device_splice_eof; prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW]; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1a53c8f481e9..53f944e6d8ef 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -931,7 +931,37 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags) &copied, flags); } -int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg, + struct sk_msg *msg_pl, size_t try_to_copy, + ssize_t *copied) +{ + struct page *page = NULL, **pages = &page; + + do { + ssize_t part; + size_t off; + + part = iov_iter_extract_pages(&msg->msg_iter, &pages, + try_to_copy, 1, 0, &off); + if (part <= 0) + return part ?: -EIO; + + if (WARN_ON_ONCE(!sendpage_ok(page))) { + iov_iter_revert(&msg->msg_iter, part); + return -EIO; + } + + sk_msg_page_add(msg_pl, page, part, off); + sk_mem_charge(sk, part); + *copied += part; + try_to_copy -= part; + } while (try_to_copy && !sk_msg_full(msg_pl)); + + return 0; +} + +static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, + size_t size) { long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -954,15 +984,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int ret = 0; int pending; - if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_CMSG_COMPAT)) - return -EOPNOTSUPP; - - ret = mutex_lock_interruptible(&tls_ctx->tx_lock); - if (ret) - return ret; - lock_sock(sk); - if (unlikely(msg->msg_controllen)) { ret = tls_process_cmsg(sk, msg, &record_type); if (ret) { @@ -1020,6 +1041,17 @@ alloc_encrypted: full_record = true; } + if (try_to_copy && (msg->msg_flags & MSG_SPLICE_PAGES)) { + ret = tls_sw_sendmsg_splice(sk, msg, msg_pl, + try_to_copy, &copied); + if (ret < 0) + goto send_end; + tls_ctx->pending_open_record_frags = true; + if (full_record || eor || sk_msg_full(msg_pl)) + goto copied; + continue; + } + if (!is_kvec && (full_record || eor) && !async_capable) { u32 first = msg_pl->sg.end; @@ -1084,6 +1116,7 @@ fallback_to_reg_send: */ tls_ctx->pending_open_record_frags = true; copied += try_to_copy; +copied: if (full_record || eor) { ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, record_type, &copied, @@ -1151,157 +1184,101 @@ trim_sgl: send_end: ret = sk_stream_error(sk, msg->msg_flags, ret); + return copied > 0 ? copied : ret; +} + +int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + int ret; + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_CMSG_COMPAT | MSG_SPLICE_PAGES | + MSG_SENDPAGE_NOPOLICY)) + return -EOPNOTSUPP; + + ret = mutex_lock_interruptible(&tls_ctx->tx_lock); + if (ret) + return ret; + lock_sock(sk); + ret = tls_sw_sendmsg_locked(sk, msg, size); release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); - return copied > 0 ? copied : ret; + return ret; } -static int tls_sw_do_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) +/* + * Handle unexpected EOF during splice without SPLICE_F_MORE set. + */ +void tls_sw_splice_eof(struct socket *sock) { - long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); - struct tls_prot_info *prot = &tls_ctx->prot_info; - unsigned char record_type = TLS_RECORD_TYPE_DATA; - struct sk_msg *msg_pl; struct tls_rec *rec; - int num_async = 0; + struct sk_msg *msg_pl; ssize_t copied = 0; - bool full_record; - int record_room; + bool retrying = false; int ret = 0; - bool eor; - - eor = !(flags & MSG_SENDPAGE_NOTLAST); - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - - /* Call the sk_stream functions to manage the sndbuf mem. */ - while (size > 0) { - size_t copy, required_size; - - if (sk->sk_err) { - ret = -sk->sk_err; - goto sendpage_end; - } - - if (ctx->open_rec) - rec = ctx->open_rec; - else - rec = ctx->open_rec = tls_get_rec(sk); - if (!rec) { - ret = -ENOMEM; - goto sendpage_end; - } - - msg_pl = &rec->msg_plaintext; - - full_record = false; - record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size; - copy = size; - if (copy >= record_room) { - copy = record_room; - full_record = true; - } - - required_size = msg_pl->sg.size + copy + prot->overhead_size; - - if (!sk_stream_memory_free(sk)) - goto wait_for_sndbuf; -alloc_payload: - ret = tls_alloc_encrypted_msg(sk, required_size); - if (ret) { - if (ret != -ENOSPC) - goto wait_for_memory; + int pending; - /* Adjust copy according to the amount that was - * actually allocated. The difference is due - * to max sg elements limit - */ - copy -= required_size - msg_pl->sg.size; - full_record = true; - } + if (!ctx->open_rec) + return; - sk_msg_page_add(msg_pl, page, copy, offset); - sk_mem_charge(sk, copy); + mutex_lock(&tls_ctx->tx_lock); + lock_sock(sk); - offset += copy; - size -= copy; - copied += copy; +retry: + rec = ctx->open_rec; + if (!rec) + goto unlock; - tls_ctx->pending_open_record_frags = true; - if (full_record || eor || sk_msg_full(msg_pl)) { - ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, - record_type, &copied, flags); - if (ret) { - if (ret == -EINPROGRESS) - num_async++; - else if (ret == -ENOMEM) - goto wait_for_memory; - else if (ret != -EAGAIN) { - if (ret == -ENOSPC) - ret = 0; - goto sendpage_end; - } - } - } - continue; -wait_for_sndbuf: - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -wait_for_memory: - ret = sk_stream_wait_memory(sk, &timeo); - if (ret) { - if (ctx->open_rec) - tls_trim_both_msgs(sk, msg_pl->sg.size); - goto sendpage_end; - } + msg_pl = &rec->msg_plaintext; - if (ctx->open_rec) - goto alloc_payload; + /* Check the BPF advisor and perform transmission. */ + ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, + &copied, 0); + switch (ret) { + case 0: + case -EAGAIN: + if (retrying) + goto unlock; + retrying = true; + goto retry; + case -EINPROGRESS: + break; + default: + goto unlock; } - if (num_async) { - /* Transmit if any encryptions have completed */ - if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { - cancel_delayed_work(&ctx->tx_work.work); - tls_tx_records(sk, flags); - } - } -sendpage_end: - ret = sk_stream_error(sk, flags, ret); - return copied > 0 ? copied : ret; -} + /* Wait for pending encryptions to get completed */ + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; -int tls_sw_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY | - MSG_NO_SHARED_FRAGS)) - return -EOPNOTSUPP; + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + else + reinit_completion(&ctx->async_wait.completion); - return tls_sw_do_sendpage(sk, page, offset, size, flags); -} + /* There can be no concurrent accesses, since we have no pending + * encrypt operations + */ + WRITE_ONCE(ctx->async_notify, false); -int tls_sw_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - int ret; + if (ctx->async_wait.err) + goto unlock; - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) - return -EOPNOTSUPP; + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, 0); + } - ret = mutex_lock_interruptible(&tls_ctx->tx_lock); - if (ret) - return ret; - lock_sock(sk); - ret = tls_sw_do_sendpage(sk, page, offset, size, flags); +unlock: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); - return ret; } static int diff --git a/net/unix/Kconfig b/net/unix/Kconfig index b7f811216820..28b232f281ab 100644 --- a/net/unix/Kconfig +++ b/net/unix/Kconfig @@ -4,7 +4,7 @@ # config UNIX - tristate "Unix domain sockets" + bool "Unix domain sockets" help If you say Y here, you will include support for Unix domain sockets; sockets are the standard Unix mechanism for establishing and @@ -14,10 +14,6 @@ config UNIX an embedded system or something similar, you therefore definitely want to say Y here. - To compile this driver as a module, choose M here: the module will be - called unix. Note that several important services won't work - correctly if you say M here and then neglect to load the module. - Say Y unless you know what you are doing. config UNIX_SCM diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e7728b57a8c7..123b35ddfd71 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -758,8 +758,6 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); -static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, - size_t size, int flags); static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, struct pipe_inode_info *, size_t size, unsigned int flags); @@ -852,7 +850,6 @@ static const struct proto_ops unix_stream_ops = { .recvmsg = unix_stream_recvmsg, .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, - .sendpage = unix_stream_sendpage, .splice_read = unix_stream_splice_read, .set_peek_off = unix_set_peek_off, .show_fdinfo = unix_show_fdinfo, @@ -878,7 +875,6 @@ static const struct proto_ops unix_dgram_ops = { .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -902,7 +898,6 @@ static const struct proto_ops unix_seqpacket_ops = { .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -921,11 +916,26 @@ static void unix_unhash(struct sock *sk) */ } +static bool unix_bpf_bypass_getsockopt(int level, int optname) +{ + if (level == SOL_SOCKET) { + switch (optname) { + case SO_PEERPIDFD: + return true; + default: + return false; + } + } + + return false; +} + struct proto unix_dgram_proto = { .name = "UNIX", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, + .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = unix_dgram_bpf_update_proto, #endif @@ -937,6 +947,7 @@ struct proto unix_stream_proto = { .obj_size = sizeof(struct unix_sock), .close = unix_close, .unhash = unix_unhash, + .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = unix_stream_bpf_update_proto, #endif @@ -1361,7 +1372,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; - if (test_bit(SOCK_PASSCRED, &sock->flags) && + if ((test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags)) && !unix_sk(sk)->addr) { err = unix_autobind(sk); if (err) @@ -1469,7 +1481,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; - if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { + if ((test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); if (err) goto out; @@ -1670,6 +1683,8 @@ static void unix_sock_inherit_flags(const struct socket *old, { if (test_bit(SOCK_PASSCRED, &old->flags)) set_bit(SOCK_PASSCRED, &new->flags); + if (test_bit(SOCK_PASSPIDFD, &old->flags)) + set_bit(SOCK_PASSPIDFD, &new->flags); if (test_bit(SOCK_PASSSEC, &old->flags)) set_bit(SOCK_PASSSEC, &new->flags); } @@ -1819,8 +1834,10 @@ static bool unix_passcred_enabled(const struct socket *sock, const struct sock *other) { return test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags) || !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags); + test_bit(SOCK_PASSCRED, &other->sk_socket->flags) || + test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags); } /* @@ -1839,24 +1856,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, } } -static int maybe_init_creds(struct scm_cookie *scm, - struct socket *socket, - const struct sock *other) -{ - int err; - struct msghdr msg = { .msg_controllen = 0 }; - - err = scm_send(socket, &msg, scm, false); - if (err) - return err; - - if (unix_passcred_enabled(socket, other)) { - scm->pid = get_pid(task_tgid(current)); - current_uid_gid(&scm->creds.uid, &scm->creds.gid); - } - return err; -} - static bool unix_skb_scm_eq(struct sk_buff *skb, struct scm_cookie *scm) { @@ -1922,7 +1921,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { + if ((test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); if (err) goto out; @@ -2200,19 +2200,25 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, while (sent < len) { size = len - sent; - /* Keep two messages in the pipe so it schedules better */ - size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); + if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { + skb = sock_alloc_send_pskb(sk, 0, 0, + msg->msg_flags & MSG_DONTWAIT, + &err, 0); + } else { + /* Keep two messages in the pipe so it schedules better */ + size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); - /* allow fallback to order-0 allocations */ - size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); + /* allow fallback to order-0 allocations */ + size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); - data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); + data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); - data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); + data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); - skb = sock_alloc_send_pskb(sk, size - data_len, data_len, - msg->msg_flags & MSG_DONTWAIT, &err, - get_order(UNIX_SKB_FRAGS_SZ)); + skb = sock_alloc_send_pskb(sk, size - data_len, data_len, + msg->msg_flags & MSG_DONTWAIT, &err, + get_order(UNIX_SKB_FRAGS_SZ)); + } if (!skb) goto out_err; @@ -2224,13 +2230,24 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, } fds_sent = true; - skb_put(skb, size - data_len); - skb->data_len = data_len; - skb->len = size; - err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); - if (err) { - kfree_skb(skb); - goto out_err; + if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { + err = skb_splice_from_iter(skb, &msg->msg_iter, size, + sk->sk_allocation); + if (err < 0) { + kfree_skb(skb); + goto out_err; + } + size = err; + refcount_add(size, &sk->sk_wmem_alloc); + } else { + skb_put(skb, size - data_len); + skb->data_len = data_len; + skb->len = size; + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); + if (err) { + kfree_skb(skb); + goto out_err; + } } unix_state_lock(other); @@ -2272,122 +2289,6 @@ out_err: return sent ? : err; } -static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, - int offset, size_t size, int flags) -{ - int err; - bool send_sigpipe = false; - bool init_scm = true; - struct scm_cookie scm; - struct sock *other, *sk = socket->sk; - struct sk_buff *skb, *newskb = NULL, *tail = NULL; - - if (flags & MSG_OOB) - return -EOPNOTSUPP; - - other = unix_peer(sk); - if (!other || sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; - - if (false) { -alloc_skb: - unix_state_unlock(other); - mutex_unlock(&unix_sk(other)->iolock); - newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, - &err, 0); - if (!newskb) - goto err; - } - - /* we must acquire iolock as we modify already present - * skbs in the sk_receive_queue and mess with skb->len - */ - err = mutex_lock_interruptible(&unix_sk(other)->iolock); - if (err) { - err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; - goto err; - } - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - err = -EPIPE; - send_sigpipe = true; - goto err_unlock; - } - - unix_state_lock(other); - - if (sock_flag(other, SOCK_DEAD) || - other->sk_shutdown & RCV_SHUTDOWN) { - err = -EPIPE; - send_sigpipe = true; - goto err_state_unlock; - } - - if (init_scm) { - err = maybe_init_creds(&scm, socket, other); - if (err) - goto err_state_unlock; - init_scm = false; - } - - skb = skb_peek_tail(&other->sk_receive_queue); - if (tail && tail == skb) { - skb = newskb; - } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { - if (newskb) { - skb = newskb; - } else { - tail = skb; - goto alloc_skb; - } - } else if (newskb) { - /* this is fast path, we don't necessarily need to - * call to kfree_skb even though with newskb == NULL - * this - does no harm - */ - consume_skb(newskb); - newskb = NULL; - } - - if (skb_append_pagefrags(skb, page, offset, size)) { - tail = skb; - goto alloc_skb; - } - - skb->len += size; - skb->data_len += size; - skb->truesize += size; - refcount_add(size, &sk->sk_wmem_alloc); - - if (newskb) { - err = unix_scm_to_skb(&scm, skb, false); - if (err) - goto err_state_unlock; - spin_lock(&other->sk_receive_queue.lock); - __skb_queue_tail(&other->sk_receive_queue, newskb); - spin_unlock(&other->sk_receive_queue.lock); - } - - unix_state_unlock(other); - mutex_unlock(&unix_sk(other)->iolock); - - other->sk_data_ready(other); - scm_destroy(&scm); - return size; - -err_state_unlock: - unix_state_unlock(other); -err_unlock: - mutex_unlock(&unix_sk(other)->iolock); -err: - kfree_skb(newskb); - if (send_sigpipe && !(flags & MSG_NOSIGNAL)) - send_sig(SIGPIPE, current, 0); - if (!init_scm) - scm_destroy(&scm); - return err; -} - static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -2526,7 +2427,7 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, } err = (flags & MSG_TRUNC) ? skb->len - skip : size; - scm_recv(sock, msg, &scm, flags); + scm_recv_unix(sock, msg, &scm, flags); out_free: skb_free_datagram(sk, skb); @@ -2821,7 +2722,8 @@ unlock: /* Never glue messages from different writers */ if (!unix_skb_scm_eq(skb, &scm)) break; - } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { + } else if (test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags)) { /* Copy credentials */ scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(&scm, skb); @@ -2906,7 +2808,7 @@ unlock: mutex_unlock(&u->iolock); if (state->msg) - scm_recv(sock, state->msg, &scm, flags); + scm_recv_unix(sock, state->msg, &scm, flags); else scm_destroy(&scm); out: diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index efb8a0937a13..020cf17ab7e4 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1306,7 +1306,6 @@ static const struct proto_ops vsock_dgram_ops = { .sendmsg = vsock_dgram_sendmsg, .recvmsg = vsock_dgram_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .read_skb = vsock_read_skb, }; @@ -2234,7 +2233,6 @@ static const struct proto_ops vsock_stream_ops = { .sendmsg = vsock_connectible_sendmsg, .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .set_rcvlowat = vsock_set_rcvlowat, .read_skb = vsock_read_skb, }; @@ -2257,7 +2255,6 @@ static const struct proto_ops vsock_seqpacket_ops = { .sendmsg = vsock_connectible_sendmsg, .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .read_skb = vsock_read_skb, }; diff --git a/net/wireless/core.c b/net/wireless/core.c index b3ec9eaec36b..25bc2e50a061 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -129,6 +129,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, int result; ASSERT_RTNL(); + lockdep_assert_wiphy(&rdev->wiphy); /* Ignore nop renames */ if (strcmp(newname, wiphy_name(&rdev->wiphy)) == 0) @@ -195,6 +196,8 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, continue; nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); } + + wiphy_lock(&rdev->wiphy); nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY); wiphy_net_set(&rdev->wiphy, net); @@ -203,6 +206,8 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, WARN_ON(err); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); + wiphy_unlock(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; @@ -360,7 +365,8 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work) rtnl_unlock(); } -static void cfg80211_sched_scan_stop_wk(struct work_struct *work) +static void cfg80211_sched_scan_stop_wk(struct wiphy *wiphy, + struct wiphy_work *work) { struct cfg80211_registered_device *rdev; struct cfg80211_sched_scan_request *req, *tmp; @@ -368,12 +374,10 @@ static void cfg80211_sched_scan_stop_wk(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, sched_scan_stop_wk); - wiphy_lock(&rdev->wiphy); list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) { if (req->nl_owner_dead) cfg80211_stop_sched_scan_req(rdev, req, false); } - wiphy_unlock(&rdev->wiphy); } static void cfg80211_propagate_radar_detect_wk(struct work_struct *work) @@ -408,6 +412,34 @@ static void cfg80211_propagate_cac_done_wk(struct work_struct *work) rtnl_unlock(); } +static void cfg80211_wiphy_work(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + struct wiphy_work *wk; + + rdev = container_of(work, struct cfg80211_registered_device, wiphy_work); + + wiphy_lock(&rdev->wiphy); + if (rdev->suspended) + goto out; + + spin_lock_irq(&rdev->wiphy_work_lock); + wk = list_first_entry_or_null(&rdev->wiphy_work_list, + struct wiphy_work, entry); + if (wk) { + list_del_init(&wk->entry); + if (!list_empty(&rdev->wiphy_work_list)) + schedule_work(work); + spin_unlock_irq(&rdev->wiphy_work_lock); + + wk->func(&rdev->wiphy, wk); + } else { + spin_unlock_irq(&rdev->wiphy_work_lock); + } +out: + wiphy_unlock(&rdev->wiphy); +} + /* exported functions */ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, @@ -495,7 +527,7 @@ use_default_name: spin_lock_init(&rdev->bss_lock); INIT_LIST_HEAD(&rdev->bss_list); INIT_LIST_HEAD(&rdev->sched_scan_req_list); - INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); + wiphy_work_init(&rdev->scan_done_wk, __cfg80211_scan_done); INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk, cfg80211_dfs_channels_update_work); #ifdef CONFIG_CFG80211_WEXT @@ -508,7 +540,7 @@ use_default_name: device_enable_async_suspend(&rdev->wiphy.dev); INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk); - INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk); + wiphy_work_init(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk); INIT_WORK(&rdev->sched_scan_res_wk, cfg80211_sched_scan_results_wk); INIT_WORK(&rdev->propagate_radar_detect_wk, cfg80211_propagate_radar_detect_wk); @@ -533,6 +565,9 @@ use_default_name: return NULL; } + INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work); + INIT_LIST_HEAD(&rdev->wiphy_work_list); + spin_lock_init(&rdev->wiphy_work_lock); INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); @@ -721,22 +756,6 @@ int wiphy_register(struct wiphy *wiphy) return -EINVAL; } - /* - * if a wiphy has unsupported modes for regulatory channel enforcement, - * opt-out of enforcement checking - */ - if (wiphy->interface_modes & ~(BIT(NL80211_IFTYPE_STATION) | - BIT(NL80211_IFTYPE_P2P_CLIENT) | - BIT(NL80211_IFTYPE_AP) | - BIT(NL80211_IFTYPE_MESH_POINT) | - BIT(NL80211_IFTYPE_P2P_GO) | - BIT(NL80211_IFTYPE_ADHOC) | - BIT(NL80211_IFTYPE_P2P_DEVICE) | - BIT(NL80211_IFTYPE_NAN) | - BIT(NL80211_IFTYPE_AP_VLAN) | - BIT(NL80211_IFTYPE_MONITOR))) - wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF; - if (WARN_ON((wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) && (wiphy->regulatory_flags & (REGULATORY_CUSTOM_REG | @@ -941,8 +960,10 @@ int wiphy_register(struct wiphy *wiphy) rdev->wiphy.features |= NL80211_FEATURE_SCAN_FLUSH; rtnl_lock(); + wiphy_lock(&rdev->wiphy); res = device_add(&rdev->wiphy.dev); if (res) { + wiphy_unlock(&rdev->wiphy); rtnl_unlock(); return res; } @@ -956,6 +977,7 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_debugfs_rdev_add(rdev); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); + wiphy_unlock(&rdev->wiphy); /* set up regulatory info */ wiphy_regulatory_register(wiphy); @@ -1027,6 +1049,31 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy) } EXPORT_SYMBOL(wiphy_rfkill_start_polling); +void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev) +{ + unsigned int runaway_limit = 100; + unsigned long flags; + + lockdep_assert_held(&rdev->wiphy.mtx); + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + while (!list_empty(&rdev->wiphy_work_list)) { + struct wiphy_work *wk; + + wk = list_first_entry(&rdev->wiphy_work_list, + struct wiphy_work, entry); + list_del_init(&wk->entry); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + + wk->func(&rdev->wiphy, wk); + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + if (WARN_ON(--runaway_limit == 0)) + INIT_LIST_HEAD(&rdev->wiphy_work_list); + } + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); +} + void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -1065,25 +1112,29 @@ void wiphy_unregister(struct wiphy *wiphy) cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); +#ifdef CONFIG_PM + if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) + rdev_set_wakeup(rdev, false); +#endif + + /* surely nothing is reachable now, clean up work */ + cfg80211_process_wiphy_works(rdev); wiphy_unlock(&rdev->wiphy); rtnl_unlock(); - flush_work(&rdev->scan_done_wk); + /* this has nothing to do now but make sure it's gone */ + cancel_work_sync(&rdev->wiphy_work); + cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); cancel_delayed_work_sync(&rdev->background_cac_done_wk); flush_work(&rdev->destroy_work); - flush_work(&rdev->sched_scan_stop_wk); flush_work(&rdev->propagate_radar_detect_wk); flush_work(&rdev->propagate_cac_done_wk); flush_work(&rdev->mgmt_registrations_update_wk); flush_work(&rdev->background_cac_abort_wk); -#ifdef CONFIG_PM - if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) - rdev_set_wakeup(rdev, false); -#endif cfg80211_rdev_free_wowlan(rdev); cfg80211_rdev_free_coalesce(rdev); } @@ -1145,8 +1196,6 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, ASSERT_RTNL(); lockdep_assert_held(&rdev->wiphy.mtx); - flush_work(&wdev->pmsr_free_wk); - nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); wdev->registered = false; @@ -1178,10 +1227,6 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, kfree_sensitive(wdev->wext.keys); wdev->wext.keys = NULL; #endif - /* only initialized if we have a netdev */ - if (wdev->netdev) - flush_work(&wdev->disconnect_wk); - cfg80211_cqm_config_free(wdev); /* @@ -1455,6 +1500,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, cfg80211_leave(rdev, wdev); cfg80211_remove_links(wdev); wiphy_unlock(&rdev->wiphy); + /* since we just did cfg80211_leave() nothing to do there */ + cancel_work_sync(&wdev->disconnect_wk); + cancel_work_sync(&wdev->pmsr_free_wk); break; case NETDEV_DOWN: wiphy_lock(&rdev->wiphy); @@ -1564,6 +1612,66 @@ static struct pernet_operations cfg80211_pernet_ops = { .exit = cfg80211_pernet_exit, }; +void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + unsigned long flags; + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + if (list_empty(&work->entry)) + list_add_tail(&work->entry, &rdev->wiphy_work_list); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + + schedule_work(&rdev->wiphy_work); +} +EXPORT_SYMBOL_GPL(wiphy_work_queue); + +void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + unsigned long flags; + + lockdep_assert_held(&wiphy->mtx); + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + if (!list_empty(&work->entry)) + list_del_init(&work->entry); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); +} +EXPORT_SYMBOL_GPL(wiphy_work_cancel); + +void wiphy_delayed_work_timer(struct timer_list *t) +{ + struct wiphy_delayed_work *dwork = from_timer(dwork, t, timer); + + wiphy_work_queue(dwork->wiphy, &dwork->work); +} +EXPORT_SYMBOL(wiphy_delayed_work_timer); + +void wiphy_delayed_work_queue(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork, + unsigned long delay) +{ + if (!delay) { + wiphy_work_queue(wiphy, &dwork->work); + return; + } + + dwork->wiphy = wiphy; + mod_timer(&dwork->timer, jiffies + delay); +} +EXPORT_SYMBOL_GPL(wiphy_delayed_work_queue); + +void wiphy_delayed_work_cancel(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork) +{ + lockdep_assert_held(&wiphy->mtx); + + del_timer_sync(&dwork->timer); + wiphy_work_cancel(wiphy, &dwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_delayed_work_cancel); + static int __init cfg80211_init(void) { int err; diff --git a/net/wireless/core.h b/net/wireless/core.h index 7c61752f6d83..8a807b609ef7 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -75,7 +75,7 @@ struct cfg80211_registered_device { struct sk_buff *scan_msg; struct list_head sched_scan_req_list; time64_t suspend_at; - struct work_struct scan_done_wk; + struct wiphy_work scan_done_wk; struct genl_info *cur_cmd_info; @@ -95,7 +95,7 @@ struct cfg80211_registered_device { struct cfg80211_coalesce *coalesce; struct work_struct destroy_work; - struct work_struct sched_scan_stop_wk; + struct wiphy_work sched_scan_stop_wk; struct work_struct sched_scan_res_wk; struct cfg80211_chan_def radar_chandef; @@ -108,6 +108,12 @@ struct cfg80211_registered_device { /* lock for all wdev lists */ spinlock_t mgmt_registrations_lock; + struct work_struct wiphy_work; + struct list_head wiphy_work_list; + /* protects the list above */ + spinlock_t wiphy_work_lock; + bool suspended; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); @@ -435,7 +441,7 @@ bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev, int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); -void __cfg80211_scan_done(struct work_struct *wk); +void __cfg80211_scan_done(struct wiphy *wiphy, struct wiphy_work *wk); void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message); void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev, @@ -453,6 +459,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); +void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev); void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, @@ -569,5 +576,6 @@ void cfg80211_remove_link(struct wireless_dev *wdev, unsigned int link_id); void cfg80211_remove_links(struct wireless_dev *wdev); int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask); #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 087d60c0f6e4..0da2e6a2a7ea 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #include <linux/if.h> @@ -816,6 +816,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS] = { .type = NLA_U16 }, [NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG }, [NL80211_ATTR_EMA_RNR_ELEMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_MLO_LINK_DISABLED] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -3081,6 +3082,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) if (state->filter_wiphy != -1 && state->filter_wiphy != rdev->wiphy_idx) continue; + wiphy_lock(&rdev->wiphy); /* attempt to fit multiple wiphy data chunks into the skb */ do { ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, @@ -3107,6 +3109,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; state->split_start = 0; + wiphy_unlock(&rdev->wiphy); rtnl_unlock(); return 1; } @@ -3114,6 +3117,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) break; } } while (state->split_start > 0); + wiphy_unlock(&rdev->wiphy); break; } rtnl_unlock(); @@ -6365,12 +6369,27 @@ bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr) return false; switch (info->bw) { + case RATE_INFO_BW_1: + rate_flg = NL80211_RATE_INFO_1_MHZ_WIDTH; + break; + case RATE_INFO_BW_2: + rate_flg = NL80211_RATE_INFO_2_MHZ_WIDTH; + break; + case RATE_INFO_BW_4: + rate_flg = NL80211_RATE_INFO_4_MHZ_WIDTH; + break; case RATE_INFO_BW_5: rate_flg = NL80211_RATE_INFO_5_MHZ_WIDTH; break; + case RATE_INFO_BW_8: + rate_flg = NL80211_RATE_INFO_8_MHZ_WIDTH; + break; case RATE_INFO_BW_10: rate_flg = NL80211_RATE_INFO_10_MHZ_WIDTH; break; + case RATE_INFO_BW_16: + rate_flg = NL80211_RATE_INFO_16_MHZ_WIDTH; + break; default: WARN_ON(1); fallthrough; @@ -6429,6 +6448,14 @@ bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr) nla_put_u8(msg, NL80211_RATE_INFO_HE_RU_ALLOC, info->he_ru_alloc)) return false; + } else if (info->flags & RATE_INFO_FLAGS_S1G_MCS) { + if (nla_put_u8(msg, NL80211_RATE_INFO_S1G_MCS, info->mcs)) + return false; + if (nla_put_u8(msg, NL80211_RATE_INFO_S1G_NSS, info->nss)) + return false; + if (info->flags & RATE_INFO_FLAGS_SHORT_GI && + nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) + return false; } else if (info->flags & RATE_INFO_FLAGS_EHT_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_MCS, info->mcs)) return false; @@ -11112,6 +11139,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) goto free; } } + + req.links[link_id].disabled = + nla_get_flag(attrs[NL80211_ATTR_MLO_LINK_DISABLED]); } if (!req.links[req.link_id].bss) { @@ -11126,6 +11156,13 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) goto free; } + if (req.links[req.link_id].disabled) { + GENL_SET_ERR_MSG(info, + "cannot have assoc link disabled"); + err = -EINVAL; + goto free; + } + kfree(attrs); attrs = NULL; } else { @@ -12225,6 +12262,7 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) u32 peer_capability = 0; u16 status_code; u8 *peer; + int link_id; bool initiator; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) || @@ -12246,8 +12284,9 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]) peer_capability = nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]); + link_id = nl80211_link_id_or_invalid(info->attrs); - return rdev_tdls_mgmt(rdev, dev, peer, action_code, + return rdev_tdls_mgmt(rdev, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, nla_data(info->attrs[NL80211_ATTR_IE]), @@ -17114,7 +17153,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_mgmt, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_TDLS_OPER, @@ -18248,6 +18288,76 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void cfg80211_links_removed(struct net_device *dev, u16 link_mask) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + struct nlattr *links; + void *hdr; + + ASSERT_WDEV_LOCK(wdev); + trace_cfg80211_links_removed(dev, link_mask); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) + return; + + if (WARN_ON(!wdev->valid_links || !link_mask || + (wdev->valid_links & link_mask) != link_mask || + wdev->valid_links == link_mask)) + return; + + cfg80211_wdev_release_link_bsses(wdev, link_mask); + wdev->valid_links &= ~link_mask; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_LINKS_REMOVED); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) + goto nla_put_failure; + + links = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); + if (!links) + goto nla_put_failure; + + while (link_mask) { + struct nlattr *link; + int link_id = __ffs(link_mask); + + link = nla_nest_start(msg, link_id + 1); + if (!link) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) + goto nla_put_failure; + + nla_nest_end(msg, link); + link_mask &= ~(1 << link_id); + } + + nla_nest_end(msg, links); + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, GFP_KERNEL); + return; + + nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_links_removed); + void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, gfp_t gfp) @@ -19774,7 +19884,8 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list) { if (sched_scan_req->owner_nlportid == notify->portid) { sched_scan_req->nl_owner_dead = true; - schedule_work(&rdev->sched_scan_stop_wk); + wiphy_work_queue(&rdev->wiphy, + &rdev->sched_scan_stop_wk); } } diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 2bc647720cda..77000a264855 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2021, 2023 Intel Corporation */ #include <net/cfg80211.h> #include "core.h" @@ -623,9 +623,11 @@ void cfg80211_pmsr_free_wk(struct work_struct *work) struct wireless_dev *wdev = container_of(work, struct wireless_dev, pmsr_free_wk); + wiphy_lock(wdev->wiphy); wdev_lock(wdev); cfg80211_pmsr_process_abort(wdev); wdev_unlock(wdev); + wiphy_unlock(wdev->wiphy); } void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 69b508743e57..90bb7ac4b930 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -407,6 +407,18 @@ static inline int rdev_change_bss(struct cfg80211_registered_device *rdev, return ret; } +static inline void rdev_inform_bss(struct cfg80211_registered_device *rdev, + struct cfg80211_bss *bss, + const struct cfg80211_bss_ies *ies, + void *drv_data) + +{ + trace_rdev_inform_bss(&rdev->wiphy, bss); + if (rdev->ops->inform_bss) + rdev->ops->inform_bss(&rdev->wiphy, bss, ies, drv_data); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_set_txq_params(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_txq_params *params) @@ -899,17 +911,18 @@ static inline int rdev_set_rekey_data(struct cfg80211_registered_device *rdev, static inline int rdev_tdls_mgmt(struct cfg80211_registered_device *rdev, struct net_device *dev, u8 *peer, - u8 action_code, u8 dialog_token, - u16 status_code, u32 peer_capability, - bool initiator, const u8 *buf, size_t len) + int link_id, u8 action_code, + u8 dialog_token, u16 status_code, + u32 peer_capability, bool initiator, + const u8 *buf, size_t len) { int ret; - trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, action_code, + trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, buf, len); - ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, action_code, - dialog_token, status_code, peer_capability, - initiator, buf, len); + ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, link_id, + action_code, dialog_token, status_code, + peer_capability, initiator, buf, len); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 26f11e4746c0..0317cf9da307 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -5,7 +5,7 @@ * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2022 Intel Corporation + * Copyright (C) 2018 - 2023 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -1587,6 +1587,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_HE; if (rd_flags & NL80211_RRF_NO_320MHZ) channel_flags |= IEEE80211_CHAN_NO_320MHZ; + if (rd_flags & NL80211_RRF_NO_EHT) + channel_flags |= IEEE80211_CHAN_NO_EHT; return channel_flags; } @@ -2352,7 +2354,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) if (!wdev->valid_links && link > 0) break; - if (!(wdev->valid_links & BIT(link))) + if (wdev->valid_links && !(wdev->valid_links & BIT(link))) continue; switch (iftype) { case NL80211_IFTYPE_AP: @@ -2391,9 +2393,17 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_P2P_DEVICE: /* no enforcement required */ break; + case NL80211_IFTYPE_OCB: + if (!wdev->u.ocb.chandef.chan) + continue; + chandef = wdev->u.ocb.chandef; + break; + case NL80211_IFTYPE_NAN: + /* we have no info, but NAN is also pretty universal */ + continue; default: /* others not implemented for now */ - WARN_ON(1); + WARN_ON_ONCE(1); break; } @@ -2452,9 +2462,7 @@ static void reg_check_chans_work(struct work_struct *work) rtnl_lock(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) - if (!(rdev->wiphy.regulatory_flags & - REGULATORY_IGNORE_STALE_KICKOFF)) - reg_leave_invalid_chans(&rdev->wiphy); + reg_leave_invalid_chans(&rdev->wiphy); rtnl_unlock(); } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index c501db7bbdb3..8bf00caf5d29 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -96,6 +96,7 @@ MODULE_PARM_DESC(bss_entries_limit, * colocated and can be discovered via legacy bands. * @short_ssid_valid: short_ssid is valid and can be used * @short_ssid: the short SSID for this SSID + * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP */ struct cfg80211_colocated_ap { struct list_head list; @@ -111,6 +112,7 @@ struct cfg80211_colocated_ap { transmitted_bssid:1, colocated_ess:1, short_ssid_valid:1; + s8 psd_20; }; static void bss_free(struct cfg80211_internal_bss *bss) @@ -218,6 +220,10 @@ bool cfg80211_is_element_inherited(const struct element *elem, if (elem->id == WLAN_EID_MULTIPLE_BSSID) return false; + if (elem->id == WLAN_EID_EXTENSION && elem->datalen > 1 && + elem->data[0] == WLAN_EID_EXT_EHT_MULTI_LINK) + return false; + if (!non_inherit_elem || non_inherit_elem->datalen < 2) return true; @@ -259,117 +265,152 @@ bool cfg80211_is_element_inherited(const struct element *elem, } EXPORT_SYMBOL(cfg80211_is_element_inherited); -static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, - const u8 *subelement, size_t subie_len, - u8 *new_ie, gfp_t gfp) +static size_t cfg80211_copy_elem_with_frags(const struct element *elem, + const u8 *ie, size_t ie_len, + u8 **pos, u8 *buf, size_t buf_len) { - u8 *pos, *tmp; - const u8 *tmp_old, *tmp_new; - const struct element *non_inherit_elem; - u8 *sub_copy; + if (WARN_ON((u8 *)elem < ie || elem->data > ie + ie_len || + elem->data + elem->datalen > ie + ie_len)) + return 0; - /* copy subelement as we need to change its content to - * mark an ie after it is processed. - */ - sub_copy = kmemdup(subelement, subie_len, gfp); - if (!sub_copy) + if (elem->datalen + 2 > buf + buf_len - *pos) return 0; - pos = &new_ie[0]; + memcpy(*pos, elem, elem->datalen + 2); + *pos += elem->datalen + 2; + + /* Finish if it is not fragmented */ + if (elem->datalen != 255) + return *pos - buf; + + ie_len = ie + ie_len - elem->data - elem->datalen; + ie = (const u8 *)elem->data + elem->datalen; + + for_each_element(elem, ie, ie_len) { + if (elem->id != WLAN_EID_FRAGMENT) + break; + + if (elem->datalen + 2 > buf + buf_len - *pos) + return 0; + + memcpy(*pos, elem, elem->datalen + 2); + *pos += elem->datalen + 2; - /* set new ssid */ - tmp_new = cfg80211_find_ie(WLAN_EID_SSID, sub_copy, subie_len); - if (tmp_new) { - memcpy(pos, tmp_new, tmp_new[1] + 2); - pos += (tmp_new[1] + 2); + if (elem->datalen != 255) + break; } - /* get non inheritance list if exists */ - non_inherit_elem = - cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - sub_copy, subie_len); + return *pos - buf; +} - /* go through IEs in ie (skip SSID) and subelement, - * merge them into new_ie +static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, + const u8 *subie, size_t subie_len, + u8 *new_ie, size_t new_ie_len) +{ + const struct element *non_inherit_elem, *parent, *sub; + u8 *pos = new_ie; + u8 id, ext_id; + unsigned int match_len; + + non_inherit_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + subie, subie_len); + + /* We copy the elements one by one from the parent to the generated + * elements. + * If they are not inherited (included in subie or in the non + * inheritance element), then we copy all occurrences the first time + * we see this element type. */ - tmp_old = cfg80211_find_ie(WLAN_EID_SSID, ie, ielen); - tmp_old = (tmp_old) ? tmp_old + tmp_old[1] + 2 : ie; - - while (tmp_old + 2 - ie <= ielen && - tmp_old + tmp_old[1] + 2 - ie <= ielen) { - if (tmp_old[0] == 0) { - tmp_old++; + for_each_element(parent, ie, ielen) { + if (parent->id == WLAN_EID_FRAGMENT) continue; + + if (parent->id == WLAN_EID_EXTENSION) { + if (parent->datalen < 1) + continue; + + id = WLAN_EID_EXTENSION; + ext_id = parent->data[0]; + match_len = 1; + } else { + id = parent->id; + match_len = 0; } - if (tmp_old[0] == WLAN_EID_EXTENSION) - tmp = (u8 *)cfg80211_find_ext_ie(tmp_old[2], sub_copy, - subie_len); - else - tmp = (u8 *)cfg80211_find_ie(tmp_old[0], sub_copy, - subie_len); + /* Find first occurrence in subie */ + sub = cfg80211_find_elem_match(id, subie, subie_len, + &ext_id, match_len, 0); - if (!tmp) { - const struct element *old_elem = (void *)tmp_old; + /* Copy from parent if not in subie and inherited */ + if (!sub && + cfg80211_is_element_inherited(parent, non_inherit_elem)) { + if (!cfg80211_copy_elem_with_frags(parent, + ie, ielen, + &pos, new_ie, + new_ie_len)) + return 0; - /* ie in old ie but not in subelement */ - if (cfg80211_is_element_inherited(old_elem, - non_inherit_elem)) { - memcpy(pos, tmp_old, tmp_old[1] + 2); - pos += tmp_old[1] + 2; - } - } else { - /* ie in transmitting ie also in subelement, - * copy from subelement and flag the ie in subelement - * as copied (by setting eid field to WLAN_EID_SSID, - * which is skipped anyway). - * For vendor ie, compare OUI + type + subType to - * determine if they are the same ie. - */ - if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) { - if (tmp_old[1] >= 5 && tmp[1] >= 5 && - !memcmp(tmp_old + 2, tmp + 2, 5)) { - /* same vendor ie, copy from - * subelement - */ - memcpy(pos, tmp, tmp[1] + 2); - pos += tmp[1] + 2; - tmp[0] = WLAN_EID_SSID; - } else { - memcpy(pos, tmp_old, tmp_old[1] + 2); - pos += tmp_old[1] + 2; - } - } else { - /* copy ie from subelement into new ie */ - memcpy(pos, tmp, tmp[1] + 2); - pos += tmp[1] + 2; - tmp[0] = WLAN_EID_SSID; - } + continue; } - if (tmp_old + tmp_old[1] + 2 - ie == ielen) - break; + /* Already copied if an earlier element had the same type */ + if (cfg80211_find_elem_match(id, ie, (u8 *)parent - ie, + &ext_id, match_len, 0)) + continue; - tmp_old += tmp_old[1] + 2; + /* Not inheriting, copy all similar elements from subie */ + while (sub) { + if (!cfg80211_copy_elem_with_frags(sub, + subie, subie_len, + &pos, new_ie, + new_ie_len)) + return 0; + + sub = cfg80211_find_elem_match(id, + sub->data + sub->datalen, + subie_len + subie - + (sub->data + + sub->datalen), + &ext_id, match_len, 0); + } } - /* go through subelement again to check if there is any ie not - * copied to new ie, skip ssid, capability, bssid-index ie + /* The above misses elements that are included in subie but not in the + * parent, so do a pass over subie and append those. + * Skip the non-tx BSSID caps and non-inheritance element. */ - tmp_new = sub_copy; - while (tmp_new + 2 - sub_copy <= subie_len && - tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) { - if (!(tmp_new[0] == WLAN_EID_NON_TX_BSSID_CAP || - tmp_new[0] == WLAN_EID_SSID)) { - memcpy(pos, tmp_new, tmp_new[1] + 2); - pos += tmp_new[1] + 2; + for_each_element(sub, subie, subie_len) { + if (sub->id == WLAN_EID_NON_TX_BSSID_CAP) + continue; + + if (sub->id == WLAN_EID_FRAGMENT) + continue; + + if (sub->id == WLAN_EID_EXTENSION) { + if (sub->datalen < 1) + continue; + + id = WLAN_EID_EXTENSION; + ext_id = sub->data[0]; + match_len = 1; + + if (ext_id == WLAN_EID_EXT_NON_INHERITANCE) + continue; + } else { + id = sub->id; + match_len = 0; } - if (tmp_new + tmp_new[1] + 2 - sub_copy == subie_len) - break; - tmp_new += tmp_new[1] + 2; + + /* Processed if one was included in the parent */ + if (cfg80211_find_elem_match(id, ie, ielen, + &ext_id, match_len, 0)) + continue; + + if (!cfg80211_copy_elem_with_frags(sub, subie, subie_len, + &pos, new_ie, new_ie_len)) + return 0; } - kfree(sub_copy); return pos - new_ie; } @@ -535,39 +576,58 @@ static void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list) static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry, const u8 *pos, u8 length, const struct element *ssid_elem, - int s_ssid_tmp) + u32 s_ssid_tmp) { - /* skip the TBTT offset */ - pos++; + u8 bss_params; - /* ignore entries with invalid BSSID */ - if (!is_valid_ether_addr(pos)) - return -EINVAL; + entry->psd_20 = IEEE80211_RNR_TBTT_PARAMS_PSD_RESERVED; - memcpy(entry->bssid, pos, ETH_ALEN); - pos += ETH_ALEN; + /* The length is already verified by the caller to contain bss_params */ + if (length > sizeof(struct ieee80211_tbtt_info_7_8_9)) { + struct ieee80211_tbtt_info_ge_11 *tbtt_info = (void *)pos; - if (length >= IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM) { - memcpy(&entry->short_ssid, pos, - sizeof(entry->short_ssid)); + memcpy(entry->bssid, tbtt_info->bssid, ETH_ALEN); + entry->short_ssid = le32_to_cpu(tbtt_info->short_ssid); entry->short_ssid_valid = true; - pos += 4; + + bss_params = tbtt_info->bss_params; + + /* Ignore disabled links */ + if (length >= offsetofend(typeof(*tbtt_info), mld_params)) { + if (le16_get_bits(tbtt_info->mld_params.params, + IEEE80211_RNR_MLD_PARAMS_DISABLED_LINK)) + return -EINVAL; + } + + if (length >= offsetofend(struct ieee80211_tbtt_info_ge_11, + psd_20)) + entry->psd_20 = tbtt_info->psd_20; + } else { + struct ieee80211_tbtt_info_7_8_9 *tbtt_info = (void *)pos; + + memcpy(entry->bssid, tbtt_info->bssid, ETH_ALEN); + + bss_params = tbtt_info->bss_params; + + if (length == offsetofend(struct ieee80211_tbtt_info_7_8_9, + psd_20)) + entry->psd_20 = tbtt_info->psd_20; } + /* ignore entries with invalid BSSID */ + if (!is_valid_ether_addr(entry->bssid)) + return -EINVAL; + /* skip non colocated APs */ - if (!cfg80211_parse_bss_param(*pos, entry)) + if (!cfg80211_parse_bss_param(bss_params, entry)) return -EINVAL; - pos++; - if (length == IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM) { - /* - * no information about the short ssid. Consider the entry valid - * for now. It would later be dropped in case there are explicit - * SSIDs that need to be matched - */ - if (!entry->same_ssid) - return 0; - } + /* no information about the short ssid. Consider the entry valid + * for now. It would later be dropped in case there are explicit + * SSIDs that need to be matched + */ + if (!entry->same_ssid && !entry->short_ssid_valid) + return 0; if (entry->same_ssid) { entry->short_ssid = s_ssid_tmp; @@ -578,10 +638,10 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry, * cfg80211_parse_colocated_ap(), before calling this * function. */ - memcpy(&entry->ssid, &ssid_elem->data, - ssid_elem->datalen); + memcpy(&entry->ssid, &ssid_elem->data, ssid_elem->datalen); entry->ssid_len = ssid_elem->datalen; } + return 0; } @@ -595,79 +655,89 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, int n_coloc = 0, ret; LIST_HEAD(ap_list); - elem = cfg80211_find_elem(WLAN_EID_REDUCED_NEIGHBOR_REPORT, ies->data, - ies->len); - if (!elem) - return 0; - - pos = elem->data; - end = pos + elem->datalen; - ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp); if (ret) return ret; - /* RNR IE may contain more than one NEIGHBOR_AP_INFO */ - while (pos + sizeof(*ap_info) <= end) { - enum nl80211_band band; - int freq; - u8 length, i, count; + for_each_element_id(elem, WLAN_EID_REDUCED_NEIGHBOR_REPORT, + ies->data, ies->len) { + pos = elem->data; + end = elem->data + elem->datalen; - ap_info = (void *)pos; - count = u8_get_bits(ap_info->tbtt_info_hdr, - IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1; - length = ap_info->tbtt_info_len; + /* RNR IE may contain more than one NEIGHBOR_AP_INFO */ + while (pos + sizeof(*ap_info) <= end) { + enum nl80211_band band; + int freq; + u8 length, i, count; - pos += sizeof(*ap_info); + ap_info = (void *)pos; + count = u8_get_bits(ap_info->tbtt_info_hdr, + IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1; + length = ap_info->tbtt_info_len; - if (!ieee80211_operating_class_to_band(ap_info->op_class, - &band)) - break; + pos += sizeof(*ap_info); - freq = ieee80211_channel_to_frequency(ap_info->channel, band); + if (!ieee80211_operating_class_to_band(ap_info->op_class, + &band)) + break; - if (end - pos < count * length) - break; + freq = ieee80211_channel_to_frequency(ap_info->channel, + band); - /* - * TBTT info must include bss param + BSSID + - * (short SSID or same_ssid bit to be set). - * ignore other options, and move to the - * next AP info - */ - if (band != NL80211_BAND_6GHZ || - (length != IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM && - length < IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM)) { - pos += count * length; - continue; - } + if (end - pos < count * length) + break; + + if (u8_get_bits(ap_info->tbtt_info_hdr, + IEEE80211_AP_INFO_TBTT_HDR_TYPE) != + IEEE80211_TBTT_INFO_TYPE_TBTT) { + pos += count * length; + continue; + } - for (i = 0; i < count; i++) { - struct cfg80211_colocated_ap *entry; + /* TBTT info must include bss param + BSSID + + * (short SSID or same_ssid bit to be set). + * ignore other options, and move to the + * next AP info + */ + if (band != NL80211_BAND_6GHZ || + !(length == offsetofend(struct ieee80211_tbtt_info_7_8_9, + bss_params) || + length == sizeof(struct ieee80211_tbtt_info_7_8_9) || + length >= offsetofend(struct ieee80211_tbtt_info_ge_11, + bss_params))) { + pos += count * length; + continue; + } - entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN, - GFP_ATOMIC); + for (i = 0; i < count; i++) { + struct cfg80211_colocated_ap *entry; - if (!entry) - break; + entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN, + GFP_ATOMIC); - entry->center_freq = freq; + if (!entry) + goto error; - if (!cfg80211_parse_ap_info(entry, pos, length, - ssid_elem, s_ssid_tmp)) { - n_coloc++; - list_add_tail(&entry->list, &ap_list); - } else { - kfree(entry); - } + entry->center_freq = freq; - pos += length; + if (!cfg80211_parse_ap_info(entry, pos, length, + ssid_elem, + s_ssid_tmp)) { + n_coloc++; + list_add_tail(&entry->list, &ap_list); + } else { + kfree(entry); + } + + pos += length; + } } - } - if (pos != end) { - cfg80211_free_coloc_ap_list(&ap_list); - return 0; +error: + if (pos != end) { + cfg80211_free_coloc_ap_list(&ap_list); + return 0; + } } list_splice_tail(&ap_list, list); @@ -846,6 +916,7 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) scan_6ghz_params->short_ssid = ap->short_ssid; scan_6ghz_params->short_ssid_valid = ap->short_ssid_valid; scan_6ghz_params->unsolicited_probe = ap->unsolicited_probe; + scan_6ghz_params->psd_20 = ap->psd_20; /* * If a PSC channel is added to the scan and 'need_scan_psc' is @@ -1004,16 +1075,9 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, nl80211_send_scan_msg(rdev, msg); } -void __cfg80211_scan_done(struct work_struct *wk) +void __cfg80211_scan_done(struct wiphy *wiphy, struct wiphy_work *wk) { - struct cfg80211_registered_device *rdev; - - rdev = container_of(wk, struct cfg80211_registered_device, - scan_done_wk); - - wiphy_lock(&rdev->wiphy); - ___cfg80211_scan_done(rdev, true); - wiphy_unlock(&rdev->wiphy); + ___cfg80211_scan_done(wiphy_to_rdev(wiphy), true); } void cfg80211_scan_done(struct cfg80211_scan_request *request, @@ -1039,7 +1103,8 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, } request->notified = true; - queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk); + wiphy_work_queue(request->wiphy, + &wiphy_to_rdev(request->wiphy)->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -1604,12 +1669,6 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, return true; } -struct cfg80211_non_tx_bss { - struct cfg80211_bss *tx_bss; - u8 max_bssid_indicator; - u8 bssid_index; -}; - static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known, const struct cfg80211_bss_ies *new_ies, const struct cfg80211_bss_ies *old_ies) @@ -1707,10 +1766,10 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, } /* Returned bss is reference counted and must be cleaned up appropriately. */ -struct cfg80211_internal_bss * -cfg80211_bss_update(struct cfg80211_registered_device *rdev, - struct cfg80211_internal_bss *tmp, - bool signal_valid, unsigned long ts) +static struct cfg80211_internal_bss * +__cfg80211_bss_update(struct cfg80211_registered_device *rdev, + struct cfg80211_internal_bss *tmp, + bool signal_valid, unsigned long ts) { struct cfg80211_internal_bss *found = NULL; @@ -1719,10 +1778,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, tmp->ts = ts; - spin_lock_bh(&rdev->bss_lock); - if (WARN_ON(!rcu_access_pointer(tmp->pub.ies))) { - spin_unlock_bh(&rdev->bss_lock); return NULL; } @@ -1730,7 +1786,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, if (found) { if (!cfg80211_update_known_bss(rdev, found, tmp, signal_valid)) - goto drop; + return NULL; } else { struct cfg80211_internal_bss *new; struct cfg80211_internal_bss *hidden; @@ -1750,7 +1806,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, ies = (void *)rcu_dereference(tmp->pub.proberesp_ies); if (ies) kfree_rcu(ies, rcu_head); - goto drop; + return NULL; } memcpy(new, tmp, sizeof(*new)); new->refcount = 1; @@ -1781,14 +1837,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, */ if (!cfg80211_combine_bsses(rdev, new)) { bss_ref_put(rdev, new); - goto drop; + return NULL; } } if (rdev->bss_entries >= bss_entries_limit && !cfg80211_bss_expire_oldest(rdev)) { bss_ref_put(rdev, new); - goto drop; + return NULL; } /* This must be before the call to bss_ref_get */ @@ -1805,12 +1861,22 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, rdev->bss_generation++; bss_ref_get(rdev, found); - spin_unlock_bh(&rdev->bss_lock); return found; - drop: +} + +struct cfg80211_internal_bss * +cfg80211_bss_update(struct cfg80211_registered_device *rdev, + struct cfg80211_internal_bss *tmp, + bool signal_valid, unsigned long ts) +{ + struct cfg80211_internal_bss *res; + + spin_lock_bh(&rdev->bss_lock); + res = __cfg80211_bss_update(rdev, tmp, signal_valid, ts); spin_unlock_bh(&rdev->bss_lock); - return NULL; + + return res; } int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, @@ -1930,17 +1996,36 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, return alt_channel; } +struct cfg80211_inform_single_bss_data { + struct cfg80211_inform_bss *drv_data; + enum cfg80211_bss_frame_type ftype; + struct ieee80211_channel *channel; + u8 bssid[ETH_ALEN]; + u64 tsf; + u16 capability; + u16 beacon_interval; + const u8 *ie; + size_t ielen; + + enum { + BSS_SOURCE_DIRECT = 0, + BSS_SOURCE_MBSSID, + BSS_SOURCE_STA_PROFILE, + } bss_source; + /* Set if reporting bss_source != BSS_SOURCE_DIRECT */ + struct cfg80211_bss *source_bss; + u8 max_bssid_indicator; + u8 bssid_index; +}; + /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss * cfg80211_inform_single_bss_data(struct wiphy *wiphy, - struct cfg80211_inform_bss *data, - enum cfg80211_bss_frame_type ftype, - const u8 *bssid, u64 tsf, u16 capability, - u16 beacon_interval, const u8 *ie, size_t ielen, - struct cfg80211_non_tx_bss *non_tx_data, + struct cfg80211_inform_single_bss_data *data, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct cfg80211_inform_bss *drv_data = data->drv_data; struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; struct cfg80211_internal_bss tmp = {}, *res; @@ -1952,31 +2037,53 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, return NULL; if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && - (data->signal < 0 || data->signal > 100))) + (drv_data->signal < 0 || drv_data->signal > 100))) return NULL; - channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, - data->scan_width); + if (WARN_ON(data->bss_source != BSS_SOURCE_DIRECT && !data->source_bss)) + return NULL; + + channel = data->channel; + if (!channel) + channel = cfg80211_get_bss_channel(wiphy, data->ie, data->ielen, + drv_data->chan, + drv_data->scan_width); if (!channel) return NULL; - memcpy(tmp.pub.bssid, bssid, ETH_ALEN); + memcpy(tmp.pub.bssid, data->bssid, ETH_ALEN); tmp.pub.channel = channel; - tmp.pub.scan_width = data->scan_width; - tmp.pub.signal = data->signal; - tmp.pub.beacon_interval = beacon_interval; - tmp.pub.capability = capability; - tmp.ts_boottime = data->boottime_ns; - tmp.parent_tsf = data->parent_tsf; - ether_addr_copy(tmp.parent_bssid, data->parent_bssid); - - if (non_tx_data) { - tmp.pub.transmitted_bss = non_tx_data->tx_bss; - ts = bss_from_pub(non_tx_data->tx_bss)->ts; - tmp.pub.bssid_index = non_tx_data->bssid_index; - tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator; + tmp.pub.scan_width = drv_data->scan_width; + if (data->bss_source != BSS_SOURCE_STA_PROFILE) + tmp.pub.signal = drv_data->signal; + else + tmp.pub.signal = 0; + tmp.pub.beacon_interval = data->beacon_interval; + tmp.pub.capability = data->capability; + tmp.ts_boottime = drv_data->boottime_ns; + tmp.parent_tsf = drv_data->parent_tsf; + ether_addr_copy(tmp.parent_bssid, drv_data->parent_bssid); + + if (data->bss_source != BSS_SOURCE_DIRECT) { + tmp.pub.transmitted_bss = data->source_bss; + ts = bss_from_pub(data->source_bss)->ts; + tmp.pub.bssid_index = data->bssid_index; + tmp.pub.max_bssid_indicator = data->max_bssid_indicator; } else { ts = jiffies; + + if (channel->band == NL80211_BAND_60GHZ) { + bss_type = data->capability & + WLAN_CAPABILITY_DMG_TYPE_MASK; + if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || + bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) + regulatory_hint_found_beacon(wiphy, channel, + gfp); + } else { + if (data->capability & WLAN_CAPABILITY_ESS) + regulatory_hint_found_beacon(wiphy, channel, + gfp); + } } /* @@ -1987,15 +2094,15 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, * override the IEs pointer should we have received an earlier * indication of Probe Response data. */ - ies = kzalloc(sizeof(*ies) + ielen, gfp); + ies = kzalloc(sizeof(*ies) + data->ielen, gfp); if (!ies) return NULL; - ies->len = ielen; - ies->tsf = tsf; + ies->len = data->ielen; + ies->tsf = data->tsf; ies->from_beacon = false; - memcpy(ies->data, ie, ielen); + memcpy(ies->data, data->ie, data->ielen); - switch (ftype) { + switch (data->ftype) { case CFG80211_BSS_FTYPE_BEACON: ies->from_beacon = true; fallthrough; @@ -2008,42 +2115,37 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, } rcu_assign_pointer(tmp.pub.ies, ies); - signal_valid = data->chan == channel; - res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, ts); + signal_valid = drv_data->chan == channel; + spin_lock_bh(&rdev->bss_lock); + res = __cfg80211_bss_update(rdev, &tmp, signal_valid, ts); if (!res) - return NULL; + goto drop; - if (channel->band == NL80211_BAND_60GHZ) { - bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK; - if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || - bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) - regulatory_hint_found_beacon(wiphy, channel, gfp); - } else { - if (res->pub.capability & WLAN_CAPABILITY_ESS) - regulatory_hint_found_beacon(wiphy, channel, gfp); - } + rdev_inform_bss(rdev, &res->pub, ies, data->drv_data); - if (non_tx_data) { + if (data->bss_source == BSS_SOURCE_MBSSID) { /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ - spin_lock_bh(&rdev->bss_lock); - if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, - &res->pub)) { + if (cfg80211_add_nontrans_list(data->source_bss, &res->pub)) { if (__cfg80211_unlink_bss(rdev, res)) { rdev->bss_generation++; res = NULL; } } - spin_unlock_bh(&rdev->bss_lock); if (!res) - return NULL; + goto drop; } + spin_unlock_bh(&rdev->bss_lock); trace_cfg80211_return_bss(&res->pub); - /* cfg80211_bss_update gives us a referenced result */ + /* __cfg80211_bss_update gives us a referenced result */ return &res->pub; + +drop: + spin_unlock_bh(&rdev->bss_lock); + return NULL; } static const struct element @@ -2118,43 +2220,48 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, } EXPORT_SYMBOL(cfg80211_merge_profile); -static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, - struct cfg80211_inform_bss *data, - enum cfg80211_bss_frame_type ftype, - const u8 *bssid, u64 tsf, - u16 beacon_interval, const u8 *ie, - size_t ielen, - struct cfg80211_non_tx_bss *non_tx_data, - gfp_t gfp) -{ +static void +cfg80211_parse_mbssid_data(struct wiphy *wiphy, + struct cfg80211_inform_single_bss_data *tx_data, + struct cfg80211_bss *source_bss, + gfp_t gfp) +{ + struct cfg80211_inform_single_bss_data data = { + .drv_data = tx_data->drv_data, + .ftype = tx_data->ftype, + .tsf = tx_data->tsf, + .beacon_interval = tx_data->beacon_interval, + .source_bss = source_bss, + .bss_source = BSS_SOURCE_MBSSID, + }; const u8 *mbssid_index_ie; const struct element *elem, *sub; - size_t new_ie_len; - u8 new_bssid[ETH_ALEN]; u8 *new_ie, *profile; u64 seen_indices = 0; - u16 capability; struct cfg80211_bss *bss; - if (!non_tx_data) + if (!source_bss) return; - if (!cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) + if (!cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, + tx_data->ie, tx_data->ielen)) return; if (!wiphy->support_mbssid) return; if (wiphy->support_only_he_mbssid && - !cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) + !cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, + tx_data->ie, tx_data->ielen)) return; new_ie = kmalloc(IEEE80211_MAX_DATA_LEN, gfp); if (!new_ie) return; - profile = kmalloc(ielen, gfp); + profile = kmalloc(tx_data->ielen, gfp); if (!profile) goto out; - for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, ie, ielen) { + for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, + tx_data->ie, tx_data->ielen) { if (elem->datalen < 4) continue; if (elem->data[0] < 1 || (int)elem->data[0] > 8) @@ -2176,12 +2283,13 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, continue; } - memset(profile, 0, ielen); - profile_len = cfg80211_merge_profile(ie, ielen, + memset(profile, 0, tx_data->ielen); + profile_len = cfg80211_merge_profile(tx_data->ie, + tx_data->ielen, elem, sub, profile, - ielen); + tx_data->ielen); /* found a Nontransmitted BSSID Profile */ mbssid_index_ie = cfg80211_find_ie @@ -2201,31 +2309,27 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, seen_indices |= BIT_ULL(mbssid_index_ie[2]); - non_tx_data->bssid_index = mbssid_index_ie[2]; - non_tx_data->max_bssid_indicator = elem->data[0]; + data.bssid_index = mbssid_index_ie[2]; + data.max_bssid_indicator = elem->data[0]; + + cfg80211_gen_new_bssid(tx_data->bssid, + data.max_bssid_indicator, + data.bssid_index, + data.bssid); - cfg80211_gen_new_bssid(bssid, - non_tx_data->max_bssid_indicator, - non_tx_data->bssid_index, - new_bssid); memset(new_ie, 0, IEEE80211_MAX_DATA_LEN); - new_ie_len = cfg80211_gen_new_ie(ie, ielen, + data.ie = new_ie; + data.ielen = cfg80211_gen_new_ie(tx_data->ie, + tx_data->ielen, profile, - profile_len, new_ie, - gfp); - if (!new_ie_len) + profile_len, + new_ie, + IEEE80211_MAX_DATA_LEN); + if (!data.ielen) continue; - capability = get_unaligned_le16(profile + 2); - bss = cfg80211_inform_single_bss_data(wiphy, data, - ftype, - new_bssid, tsf, - capability, - beacon_interval, - new_ie, - new_ie_len, - non_tx_data, - gfp); + data.capability = get_unaligned_le16(profile + 2); + bss = cfg80211_inform_single_bss_data(wiphy, &data, gfp); if (!bss) break; cfg80211_put_bss(wiphy, bss); @@ -2237,142 +2341,425 @@ out: kfree(profile); } -struct cfg80211_bss * -cfg80211_inform_bss_data(struct wiphy *wiphy, - struct cfg80211_inform_bss *data, - enum cfg80211_bss_frame_type ftype, - const u8 *bssid, u64 tsf, u16 capability, - u16 beacon_interval, const u8 *ie, size_t ielen, - gfp_t gfp) +ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies, + size_t ieslen, u8 *data, size_t data_len, + u8 frag_id) { - struct cfg80211_bss *res; - struct cfg80211_non_tx_bss non_tx_data; + const struct element *next; + ssize_t copied; + u8 elem_datalen; + + if (!elem) + return -EINVAL; + + /* elem might be invalid after the memmove */ + next = (void *)(elem->data + elem->datalen); + + elem_datalen = elem->datalen; + if (elem->id == WLAN_EID_EXTENSION) { + copied = elem->datalen - 1; + if (copied > data_len) + return -ENOSPC; + + memmove(data, elem->data + 1, copied); + } else { + copied = elem->datalen; + if (copied > data_len) + return -ENOSPC; + + memmove(data, elem->data, copied); + } + + /* Fragmented elements must have 255 bytes */ + if (elem_datalen < 255) + return copied; + + for (elem = next; + elem->data < ies + ieslen && + elem->data + elem->datalen < ies + ieslen; + elem = next) { + /* elem might be invalid after the memmove */ + next = (void *)(elem->data + elem->datalen); + + if (elem->id != frag_id) + break; + + elem_datalen = elem->datalen; + + if (copied + elem_datalen > data_len) + return -ENOSPC; + + memmove(data + copied, elem->data, elem_datalen); + copied += elem_datalen; + + /* Only the last fragment may be short */ + if (elem_datalen != 255) + break; + } - res = cfg80211_inform_single_bss_data(wiphy, data, ftype, bssid, tsf, - capability, beacon_interval, ie, - ielen, NULL, gfp); + return copied; +} +EXPORT_SYMBOL(cfg80211_defragment_element); + +struct cfg80211_mle { + struct ieee80211_multi_link_elem *mle; + struct ieee80211_mle_per_sta_profile + *sta_prof[IEEE80211_MLD_MAX_NUM_LINKS]; + ssize_t sta_prof_len[IEEE80211_MLD_MAX_NUM_LINKS]; + + u8 data[]; +}; + +static struct cfg80211_mle * +cfg80211_defrag_mle(const struct element *mle, const u8 *ie, size_t ielen, + gfp_t gfp) +{ + const struct element *elem; + struct cfg80211_mle *res; + size_t buf_len; + ssize_t mle_len; + u8 common_size, idx; + + if (!mle || !ieee80211_mle_size_ok(mle->data + 1, mle->datalen - 1)) + return NULL; + + /* Required length for first defragmentation */ + buf_len = mle->datalen - 1; + for_each_element(elem, mle->data + mle->datalen, + ielen - sizeof(*mle) + mle->datalen) { + if (elem->id != WLAN_EID_FRAGMENT) + break; + + buf_len += elem->datalen; + } + + res = kzalloc(struct_size(res, data, buf_len), gfp); if (!res) return NULL; - non_tx_data.tx_bss = res; - cfg80211_parse_mbssid_data(wiphy, data, ftype, bssid, tsf, - beacon_interval, ie, ielen, &non_tx_data, - gfp); + + mle_len = cfg80211_defragment_element(mle, ie, ielen, + res->data, buf_len, + WLAN_EID_FRAGMENT); + if (mle_len < 0) + goto error; + + res->mle = (void *)res->data; + + /* Find the sub-element area in the buffer */ + common_size = ieee80211_mle_common_size((u8 *)res->mle); + ie = res->data + common_size; + ielen = mle_len - common_size; + + idx = 0; + for_each_element_id(elem, IEEE80211_MLE_SUBELEM_PER_STA_PROFILE, + ie, ielen) { + res->sta_prof[idx] = (void *)elem->data; + res->sta_prof_len[idx] = elem->datalen; + + idx++; + if (idx >= IEEE80211_MLD_MAX_NUM_LINKS) + break; + } + if (!for_each_element_completed(elem, ie, ielen)) + goto error; + + /* Defragment sta_info in-place */ + for (idx = 0; idx < IEEE80211_MLD_MAX_NUM_LINKS && res->sta_prof[idx]; + idx++) { + if (res->sta_prof_len[idx] < 255) + continue; + + elem = (void *)res->sta_prof[idx] - 2; + + if (idx + 1 < ARRAY_SIZE(res->sta_prof) && + res->sta_prof[idx + 1]) + buf_len = (u8 *)res->sta_prof[idx + 1] - + (u8 *)res->sta_prof[idx]; + else + buf_len = ielen + ie - (u8 *)elem; + + res->sta_prof_len[idx] = + cfg80211_defragment_element(elem, + (u8 *)elem, buf_len, + (u8 *)res->sta_prof[idx], + buf_len, + IEEE80211_MLE_SUBELEM_FRAGMENT); + if (res->sta_prof_len[idx] < 0) + goto error; + } + return res; + +error: + kfree(res); + return NULL; } -EXPORT_SYMBOL(cfg80211_inform_bss_data); -static void -cfg80211_parse_mbssid_frame_data(struct wiphy *wiphy, - struct cfg80211_inform_bss *data, - struct ieee80211_mgmt *mgmt, size_t len, - struct cfg80211_non_tx_bss *non_tx_data, - gfp_t gfp) +static bool +cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, + const struct ieee80211_neighbor_ap_info **ap_info, + const u8 **tbtt_info) { - enum cfg80211_bss_frame_type ftype; - const u8 *ie = mgmt->u.probe_resp.variable; - size_t ielen = len - offsetof(struct ieee80211_mgmt, - u.probe_resp.variable); + const struct ieee80211_neighbor_ap_info *info; + const struct element *rnr; + const u8 *pos, *end; - ftype = ieee80211_is_beacon(mgmt->frame_control) ? - CFG80211_BSS_FTYPE_BEACON : CFG80211_BSS_FTYPE_PRESP; + for_each_element_id(rnr, WLAN_EID_REDUCED_NEIGHBOR_REPORT, ie, ielen) { + pos = rnr->data; + end = rnr->data + rnr->datalen; + + /* RNR IE may contain more than one NEIGHBOR_AP_INFO */ + while (sizeof(*info) <= end - pos) { + const struct ieee80211_rnr_mld_params *mld_params; + u16 params; + u8 length, i, count, mld_params_offset; + u8 type, lid; + + info = (void *)pos; + count = u8_get_bits(info->tbtt_info_hdr, + IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1; + length = info->tbtt_info_len; + + pos += sizeof(*info); + + if (count * length > end - pos) + return false; + + type = u8_get_bits(info->tbtt_info_hdr, + IEEE80211_AP_INFO_TBTT_HDR_TYPE); + + /* Only accept full TBTT information. NSTR mobile APs + * use the shortened version, but we ignore them here. + */ + if (type == IEEE80211_TBTT_INFO_TYPE_TBTT && + length >= + offsetofend(struct ieee80211_tbtt_info_ge_11, + mld_params)) { + mld_params_offset = + offsetof(struct ieee80211_tbtt_info_ge_11, mld_params); + } else { + pos += count * length; + continue; + } + + for (i = 0; i < count; i++) { + mld_params = (void *)pos + mld_params_offset; + params = le16_to_cpu(mld_params->params); - cfg80211_parse_mbssid_data(wiphy, data, ftype, mgmt->bssid, - le64_to_cpu(mgmt->u.probe_resp.timestamp), - le16_to_cpu(mgmt->u.probe_resp.beacon_int), - ie, ielen, non_tx_data, gfp); + lid = u16_get_bits(params, + IEEE80211_RNR_MLD_PARAMS_LINK_ID); + + if (mld_id == mld_params->mld_id && + link_id == lid) { + *ap_info = info; + *tbtt_info = pos; + + return true; + } + + pos += length; + } + } + } + + return false; } -static void -cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, - struct cfg80211_bss *nontrans_bss, - struct ieee80211_mgmt *mgmt, size_t len) -{ - u8 *ie, *new_ie, *pos; - const struct element *nontrans_ssid; - const u8 *trans_ssid, *mbssid; - size_t ielen = len - offsetof(struct ieee80211_mgmt, - u.probe_resp.variable); - size_t new_ie_len; - struct cfg80211_bss_ies *new_ies; - const struct cfg80211_bss_ies *old; - size_t cpy_len; - - lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock); - - ie = mgmt->u.probe_resp.variable; - - new_ie_len = ielen; - trans_ssid = cfg80211_find_ie(WLAN_EID_SSID, ie, ielen); - if (!trans_ssid) +static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy, + struct cfg80211_inform_single_bss_data *tx_data, + struct cfg80211_bss *source_bss, + gfp_t gfp) +{ + struct cfg80211_inform_single_bss_data data = { + .drv_data = tx_data->drv_data, + .ftype = tx_data->ftype, + .source_bss = source_bss, + .bss_source = BSS_SOURCE_STA_PROFILE, + }; + struct ieee80211_multi_link_elem *ml_elem; + const struct element *elem; + struct cfg80211_mle *mle; + u16 control; + u8 *new_ie; + struct cfg80211_bss *bss; + int mld_id; + u16 seen_links = 0; + const u8 *pos; + u8 i; + + if (!source_bss) return; - new_ie_len -= trans_ssid[1]; - mbssid = cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen); - /* - * It's not valid to have the MBSSID element before SSID - * ignore if that happens - the code below assumes it is - * after (while copying things inbetween). - */ - if (!mbssid || mbssid < trans_ssid) + + if (tx_data->ftype != CFG80211_BSS_FTYPE_PRESP) return; - new_ie_len -= mbssid[1]; - nontrans_ssid = ieee80211_bss_get_elem(nontrans_bss, WLAN_EID_SSID); - if (!nontrans_ssid) + elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK, + tx_data->ie, tx_data->ielen); + if (!elem || !ieee80211_mle_size_ok(elem->data + 1, elem->datalen - 1)) return; - new_ie_len += nontrans_ssid->datalen; + ml_elem = (void *)elem->data + 1; + control = le16_to_cpu(ml_elem->control); + if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) != + IEEE80211_ML_CONTROL_TYPE_BASIC) + return; - /* generate new ie for nontrans BSS - * 1. replace SSID with nontrans BSS' SSID - * 2. skip MBSSID IE - */ - new_ie = kzalloc(new_ie_len, GFP_ATOMIC); - if (!new_ie) + /* Must be present when transmitted by an AP (in a probe response) */ + if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) || + !(control & IEEE80211_MLC_BASIC_PRES_LINK_ID) || + !(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) return; - new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, GFP_ATOMIC); - if (!new_ies) - goto out_free; - - pos = new_ie; - - /* copy the nontransmitted SSID */ - cpy_len = nontrans_ssid->datalen + 2; - memcpy(pos, nontrans_ssid, cpy_len); - pos += cpy_len; - /* copy the IEs between SSID and MBSSID */ - cpy_len = trans_ssid[1] + 2; - memcpy(pos, (trans_ssid + cpy_len), (mbssid - (trans_ssid + cpy_len))); - pos += (mbssid - (trans_ssid + cpy_len)); - /* copy the IEs after MBSSID */ - cpy_len = mbssid[1] + 2; - memcpy(pos, mbssid + cpy_len, ((ie + ielen) - (mbssid + cpy_len))); - - /* update ie */ - new_ies->len = new_ie_len; - new_ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); - new_ies->from_beacon = ieee80211_is_beacon(mgmt->frame_control); - memcpy(new_ies->data, new_ie, new_ie_len); - if (ieee80211_is_probe_resp(mgmt->frame_control)) { - old = rcu_access_pointer(nontrans_bss->proberesp_ies); - rcu_assign_pointer(nontrans_bss->proberesp_ies, new_ies); - rcu_assign_pointer(nontrans_bss->ies, new_ies); - if (old) - kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); + /* length + MLD MAC address + link ID info + BSS Params Change Count */ + pos = ml_elem->variable + 1 + 6 + 1 + 1; + + if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)) + pos += 2; + if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_EML_CAPA)) + pos += 2; + + /* MLD capabilities and operations */ + pos += 2; + + /* Not included when the (nontransmitted) AP is responding itself, + * but defined to zero then (Draft P802.11be_D3.0, 9.4.2.170.2) + */ + if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MLD_ID)) { + mld_id = *pos; + pos += 1; } else { - old = rcu_access_pointer(nontrans_bss->beacon_ies); - rcu_assign_pointer(nontrans_bss->beacon_ies, new_ies); - cfg80211_update_hidden_bsses(bss_from_pub(nontrans_bss), - new_ies, old); - rcu_assign_pointer(nontrans_bss->ies, new_ies); - if (old) - kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); + mld_id = 0; + } + + /* Extended MLD capabilities and operations */ + pos += 2; + + /* Fully defrag the ML element for sta information/profile iteration */ + mle = cfg80211_defrag_mle(elem, tx_data->ie, tx_data->ielen, gfp); + if (!mle) + return; + + new_ie = kmalloc(IEEE80211_MAX_DATA_LEN, gfp); + if (!new_ie) + goto out; + + for (i = 0; i < ARRAY_SIZE(mle->sta_prof) && mle->sta_prof[i]; i++) { + const struct ieee80211_neighbor_ap_info *ap_info; + enum nl80211_band band; + u32 freq; + const u8 *profile; + const u8 *tbtt_info; + ssize_t profile_len; + u8 link_id; + + if (!ieee80211_mle_basic_sta_prof_size_ok((u8 *)mle->sta_prof[i], + mle->sta_prof_len[i])) + continue; + + control = le16_to_cpu(mle->sta_prof[i]->control); + + if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE)) + continue; + + link_id = u16_get_bits(control, + IEEE80211_MLE_STA_CONTROL_LINK_ID); + if (seen_links & BIT(link_id)) + break; + seen_links |= BIT(link_id); + + if (!(control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT) || + !(control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT) || + !(control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT)) + continue; + + memcpy(data.bssid, mle->sta_prof[i]->variable, ETH_ALEN); + data.beacon_interval = + get_unaligned_le16(mle->sta_prof[i]->variable + 6); + data.tsf = tx_data->tsf + + get_unaligned_le64(mle->sta_prof[i]->variable + 8); + + /* sta_info_len counts itself */ + profile = mle->sta_prof[i]->variable + + mle->sta_prof[i]->sta_info_len - 1; + profile_len = (u8 *)mle->sta_prof[i] + mle->sta_prof_len[i] - + profile; + + if (profile_len < 2) + continue; + + data.capability = get_unaligned_le16(profile); + profile += 2; + profile_len -= 2; + + /* Find in RNR to look up channel information */ + if (!cfg80211_tbtt_info_for_mld_ap(tx_data->ie, tx_data->ielen, + mld_id, link_id, + &ap_info, &tbtt_info)) + continue; + + /* We could sanity check the BSSID is included */ + + if (!ieee80211_operating_class_to_band(ap_info->op_class, + &band)) + continue; + + freq = ieee80211_channel_to_freq_khz(ap_info->channel, band); + data.channel = ieee80211_get_channel_khz(wiphy, freq); + + /* Generate new elements */ + memset(new_ie, 0, IEEE80211_MAX_DATA_LEN); + data.ie = new_ie; + data.ielen = cfg80211_gen_new_ie(tx_data->ie, tx_data->ielen, + profile, profile_len, + new_ie, + IEEE80211_MAX_DATA_LEN); + if (!data.ielen) + continue; + + bss = cfg80211_inform_single_bss_data(wiphy, &data, gfp); + if (!bss) + break; + cfg80211_put_bss(wiphy, bss); } -out_free: +out: kfree(new_ie); + kfree(mle); } +struct cfg80211_bss * +cfg80211_inform_bss_data(struct wiphy *wiphy, + struct cfg80211_inform_bss *data, + enum cfg80211_bss_frame_type ftype, + const u8 *bssid, u64 tsf, u16 capability, + u16 beacon_interval, const u8 *ie, size_t ielen, + gfp_t gfp) +{ + struct cfg80211_inform_single_bss_data inform_data = { + .drv_data = data, + .ftype = ftype, + .tsf = tsf, + .capability = capability, + .beacon_interval = beacon_interval, + .ie = ie, + .ielen = ielen, + }; + struct cfg80211_bss *res; + + memcpy(inform_data.bssid, bssid, ETH_ALEN); + + res = cfg80211_inform_single_bss_data(wiphy, &inform_data, gfp); + if (!res) + return NULL; + + cfg80211_parse_mbssid_data(wiphy, &inform_data, res, gfp); + + cfg80211_parse_ml_sta_data(wiphy, &inform_data, res, gfp); + + return res; +} +EXPORT_SYMBOL(cfg80211_inform_bss_data); + /* cfg80211_inform_bss_width_frame helper */ static struct cfg80211_bss * cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, @@ -2380,6 +2767,7 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len, gfp_t gfp) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss tmp = {}, *res; struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; @@ -2451,6 +2839,16 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); } + if (channel->band == NL80211_BAND_60GHZ) { + bss_type = capability & WLAN_CAPABILITY_DMG_TYPE_MASK; + if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || + bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) + regulatory_hint_found_beacon(wiphy, channel, gfp); + } else { + if (capability & WLAN_CAPABILITY_ESS) + regulatory_hint_found_beacon(wiphy, channel, gfp); + } + ies = kzalloc(sizeof(*ies) + ielen, gfp); if (!ies) return NULL; @@ -2479,24 +2877,22 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, ether_addr_copy(tmp.parent_bssid, data->parent_bssid); signal_valid = data->chan == channel; - res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, - jiffies); + spin_lock_bh(&rdev->bss_lock); + res = __cfg80211_bss_update(rdev, &tmp, signal_valid, jiffies); if (!res) - return NULL; + goto drop; - if (channel->band == NL80211_BAND_60GHZ) { - bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK; - if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || - bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) - regulatory_hint_found_beacon(wiphy, channel, gfp); - } else { - if (res->pub.capability & WLAN_CAPABILITY_ESS) - regulatory_hint_found_beacon(wiphy, channel, gfp); - } + rdev_inform_bss(rdev, &res->pub, ies, data->drv_data); + + spin_unlock_bh(&rdev->bss_lock); trace_cfg80211_return_bss(&res->pub); - /* cfg80211_bss_update gives us a referenced result */ + /* __cfg80211_bss_update gives us a referenced result */ return &res->pub; + +drop: + spin_unlock_bh(&rdev->bss_lock); + return NULL; } struct cfg80211_bss * @@ -2505,51 +2901,34 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len, gfp_t gfp) { - struct cfg80211_bss *res, *tmp_bss; - const u8 *ie = mgmt->u.probe_resp.variable; - const struct cfg80211_bss_ies *ies1, *ies2; - size_t ielen = len - offsetof(struct ieee80211_mgmt, - u.probe_resp.variable); - struct cfg80211_non_tx_bss non_tx_data = {}; + struct cfg80211_inform_single_bss_data inform_data = { + .drv_data = data, + .ie = mgmt->u.probe_resp.variable, + .ielen = len - offsetof(struct ieee80211_mgmt, + u.probe_resp.variable), + }; + struct cfg80211_bss *res; res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, len, gfp); + if (!res) + return NULL; - /* don't do any further MBSSID handling for S1G */ + /* don't do any further MBSSID/ML handling for S1G */ if (ieee80211_is_s1g_beacon(mgmt->frame_control)) return res; - if (!res || !wiphy->support_mbssid || - !cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) - return res; - if (wiphy->support_only_he_mbssid && - !cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) - return res; + inform_data.ftype = ieee80211_is_beacon(mgmt->frame_control) ? + CFG80211_BSS_FTYPE_BEACON : CFG80211_BSS_FTYPE_PRESP; + memcpy(inform_data.bssid, mgmt->bssid, ETH_ALEN); + inform_data.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); + inform_data.beacon_interval = + le16_to_cpu(mgmt->u.probe_resp.beacon_int); - non_tx_data.tx_bss = res; /* process each non-transmitting bss */ - cfg80211_parse_mbssid_frame_data(wiphy, data, mgmt, len, - &non_tx_data, gfp); - - spin_lock_bh(&wiphy_to_rdev(wiphy)->bss_lock); + cfg80211_parse_mbssid_data(wiphy, &inform_data, res, gfp); - /* check if the res has other nontransmitting bss which is not - * in MBSSID IE - */ - ies1 = rcu_access_pointer(res->ies); - - /* go through nontrans_list, if the timestamp of the BSS is - * earlier than the timestamp of the transmitting BSS then - * update it - */ - list_for_each_entry(tmp_bss, &res->nontrans_list, - nontrans_list) { - ies2 = rcu_access_pointer(tmp_bss->ies); - if (ies2->tsf < ies1->tsf) - cfg80211_update_notlisted_nontrans(wiphy, tmp_bss, - mgmt, len); - } - spin_unlock_bh(&wiphy_to_rdev(wiphy)->bss_lock); + cfg80211_parse_ml_sta_data(wiphy, &inform_data, res, gfp); return res; } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 7bdeb8eea92d..9bba233b5a6e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2009, 2020, 2022 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020, 2022-2023 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -491,6 +491,21 @@ static void cfg80211_wdev_release_bsses(struct wireless_dev *wdev) } } +void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask) +{ + unsigned int link; + + for_each_valid_link(wdev, link) { + if (!wdev->links[link].client.current_bss || + !(link_mask & BIT(link))) + continue; + cfg80211_unhold_bss(wdev->links[link].client.current_bss); + cfg80211_put_bss(wdev->wiphy, + &wdev->links[link].client.current_bss->pub); + wdev->links[link].client.current_bss = NULL; + } +} + static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, const u8 *ies, size_t ies_len, const u8 **out_ies, size_t *out_ies_len) @@ -1569,6 +1584,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) container_of(work, struct wireless_dev, disconnect_wk); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + wiphy_lock(wdev->wiphy); wdev_lock(wdev); if (wdev->conn_owner_nlportid) { @@ -1607,4 +1623,5 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) } wdev_unlock(wdev); + wiphy_unlock(wdev->wiphy); } diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 268f670835e9..c629bac3f298 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -5,7 +5,7 @@ * * Copyright 2005-2006 Jiri Benc <jbenc@suse.cz> * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2021, 2023 Intel Corporation */ #include <linux/device.h> @@ -105,14 +105,18 @@ static int wiphy_suspend(struct device *dev) cfg80211_leave_all(rdev); cfg80211_process_rdev_events(rdev); } + cfg80211_process_wiphy_works(rdev); if (rdev->ops->suspend) ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); cfg80211_process_rdev_events(rdev); + cfg80211_process_wiphy_works(rdev); ret = rdev_suspend(rdev, NULL); } + if (ret == 0) + rdev->suspended = true; } wiphy_unlock(&rdev->wiphy); rtnl_unlock(); @@ -132,6 +136,8 @@ static int wiphy_resume(struct device *dev) wiphy_lock(&rdev->wiphy); if (rdev->wiphy.registered && rdev->ops->resume) ret = rdev_resume(rdev); + rdev->suspended = false; + schedule_work(&rdev->wiphy_work); wiphy_unlock(&rdev->wiphy); if (ret) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 716a1fa70069..617c0d0dfa96 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -22,7 +22,7 @@ #define MAXNAME 32 #define WIPHY_ENTRY __array(char, wiphy_name, 32) -#define WIPHY_ASSIGN strlcpy(__entry->wiphy_name, wiphy_name(wiphy), MAXNAME) +#define WIPHY_ASSIGN strscpy(__entry->wiphy_name, wiphy_name(wiphy), MAXNAME) #define WIPHY_PR_FMT "%s" #define WIPHY_PR_ARG __entry->wiphy_name @@ -1159,6 +1159,23 @@ TRACE_EVENT(rdev_change_bss, __entry->ap_isolate, __entry->ht_opmode) ); +TRACE_EVENT(rdev_inform_bss, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_bss *bss), + TP_ARGS(wiphy, bss), + TP_STRUCT__entry( + WIPHY_ENTRY + MAC_ENTRY(bssid) + CHAN_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + MAC_ASSIGN(bssid, bss->bssid); + CHAN_ASSIGN(bss->channel); + ), + TP_printk(WIPHY_PR_FMT ", %pM, " CHAN_PR_FMT, + WIPHY_PR_ARG, __entry->bssid, CHAN_PR_ARG) +); + TRACE_EVENT(rdev_set_txq_params, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct ieee80211_txq_params *params), @@ -1779,15 +1796,16 @@ DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_stop, TRACE_EVENT(rdev_tdls_mgmt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - u8 *peer, u8 action_code, u8 dialog_token, + u8 *peer, int link_id, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, bool initiator, const u8 *buf, size_t len), - TP_ARGS(wiphy, netdev, peer, action_code, dialog_token, status_code, - peer_capability, initiator, buf, len), + TP_ARGS(wiphy, netdev, peer, link_id, action_code, dialog_token, + status_code, peer_capability, initiator, buf, len), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) + __field(int, link_id) __field(u8, action_code) __field(u8, dialog_token) __field(u16, status_code) @@ -1799,6 +1817,7 @@ TRACE_EVENT(rdev_tdls_mgmt, WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); + __entry->link_id = link_id; __entry->action_code = action_code; __entry->dialog_token = dialog_token; __entry->status_code = status_code; @@ -1806,11 +1825,12 @@ TRACE_EVENT(rdev_tdls_mgmt, __entry->initiator = initiator; memcpy(__get_dynamic_array(buf), buf, len); ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, action_code: %u, " + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM" + ", link_id: %d, action_code: %u " "dialog_token: %u, status_code: %u, peer_capability: %u " "initiator: %s buf: %#.2x ", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, - __entry->action_code, __entry->dialog_token, + __entry->link_id, __entry->action_code, __entry->dialog_token, __entry->status_code, __entry->peer_capability, BOOL_TO_STR(__entry->initiator), ((u8 *)__get_dynamic_array(buf))[0]) @@ -3946,6 +3966,21 @@ TRACE_EVENT(rdev_set_hw_timestamp, __entry->enable) ); +TRACE_EVENT(cfg80211_links_removed, + TP_PROTO(struct net_device *netdev, u16 link_mask), + TP_ARGS(netdev, link_mask), + TP_STRUCT__entry( + NETDEV_ENTRY + __field(u16, link_mask) + ), + TP_fast_assign( + NETDEV_ASSIGN; + __entry->link_mask = link_mask; + ), + TP_printk(NETDEV_PR_FMT ", link_mask:%u", NETDEV_PR_ARG, + __entry->link_mask) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 9755ef281040..89c9ad6c886e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1646,6 +1646,114 @@ static u32 cfg80211_calculate_bitrate_eht(struct rate_info *rate) return result / 10000; } +static u32 cfg80211_calculate_bitrate_s1g(struct rate_info *rate) +{ + /* For 1, 2, 4, 8 and 16 MHz channels */ + static const u32 base[5][11] = { + { 300000, + 600000, + 900000, + 1200000, + 1800000, + 2400000, + 2700000, + 3000000, + 3600000, + 4000000, + /* MCS 10 supported in 1 MHz only */ + 150000, + }, + { 650000, + 1300000, + 1950000, + 2600000, + 3900000, + 5200000, + 5850000, + 6500000, + 7800000, + /* MCS 9 not valid */ + }, + { 1350000, + 2700000, + 4050000, + 5400000, + 8100000, + 10800000, + 12150000, + 13500000, + 16200000, + 18000000, + }, + { 2925000, + 5850000, + 8775000, + 11700000, + 17550000, + 23400000, + 26325000, + 29250000, + 35100000, + 39000000, + }, + { 8580000, + 11700000, + 17550000, + 23400000, + 35100000, + 46800000, + 52650000, + 58500000, + 70200000, + 78000000, + }, + }; + u32 bitrate; + /* default is 1 MHz index */ + int idx = 0; + + if (rate->mcs >= 11) + goto warn; + + switch (rate->bw) { + case RATE_INFO_BW_16: + idx = 4; + break; + case RATE_INFO_BW_8: + idx = 3; + break; + case RATE_INFO_BW_4: + idx = 2; + break; + case RATE_INFO_BW_2: + idx = 1; + break; + case RATE_INFO_BW_1: + idx = 0; + break; + case RATE_INFO_BW_5: + case RATE_INFO_BW_10: + case RATE_INFO_BW_20: + case RATE_INFO_BW_40: + case RATE_INFO_BW_80: + case RATE_INFO_BW_160: + default: + goto warn; + } + + bitrate = base[idx][rate->mcs]; + bitrate *= rate->nss; + + if (rate->flags & RATE_INFO_FLAGS_SHORT_GI) + bitrate = (bitrate / 9) * 10; + /* do NOT round down here */ + return (bitrate + 50000) / 100000; +warn: + WARN_ONCE(1, "invalid rate bw=%d, mcs=%d, nss=%d\n", + rate->bw, rate->mcs, rate->nss); + return 0; +} + u32 cfg80211_calculate_bitrate(struct rate_info *rate) { if (rate->flags & RATE_INFO_FLAGS_MCS) @@ -1662,6 +1770,8 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate) return cfg80211_calculate_bitrate_he(rate); if (rate->flags & RATE_INFO_FLAGS_EHT_MCS) return cfg80211_calculate_bitrate_eht(rate); + if (rate->flags & RATE_INFO_FLAGS_S1G_MCS) + return cfg80211_calculate_bitrate_s1g(rate); return rate->legacy; } diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index a125fd1fa134..a161c64d1765 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -815,6 +815,12 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, } } + /* Sanity-check to ensure we never end up _allocating_ zero + * bytes of data for extra. + */ + if (extra_size <= 0) + return -EFAULT; + /* kzalloc() ensures NULL-termination for essid_compat. */ extra = kzalloc(extra_size, GFP_KERNEL); if (!extra) diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index f231207ca210..f3eaa3388694 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -3,7 +3,7 @@ * cfg80211 wext compat for managed mode. * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2009, 2020-2022 Intel Corporation + * Copyright (C) 2009, 2020-2023 Intel Corporation */ #include <linux/export.h> @@ -338,6 +338,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, if (!ie_len) ie = NULL; + wiphy_lock(wdev->wiphy); wdev_lock(wdev); /* no change */ @@ -370,6 +371,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, err = 0; out: wdev_unlock(wdev); + wiphy_unlock(wdev->wiphy); return err; } diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5c7ad301d742..0fb5143bec7a 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1757,7 +1757,6 @@ static const struct proto_ops x25_proto_ops = { .sendmsg = x25_sendmsg, .recvmsg = x25_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct packet_type x25_packet_type __read_mostly = { diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 02207e852d79..06cead2b8e34 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -103,7 +103,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) mmap_read_lock(current->mm); npgs = pin_user_pages(address, umem->npgs, - gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL); + gup_flags | FOLL_LONGTERM, &umem->pgs[0]); mmap_read_unlock(current->mm); if (npgs != umem->npgs) { diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index cc1e7f15fa73..5a8c0dd250af 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1389,7 +1389,6 @@ static const struct proto_ops xsk_proto_ops = { .sendmsg = xsk_sendmsg, .recvmsg = xsk_recvmsg, .mmap = xsk_mmap, - .sendpage = sock_no_sendpage, }; static void xsk_destruct(struct sock *sk) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index b2df1e0f8153..26f6d304451e 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -350,7 +350,7 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { struct xsk_dma_map *dma_map; - if (pool->dma_pages_cnt == 0) + if (!pool->dma_pages) return; dma_map = xp_find_dma_map(pool); @@ -364,6 +364,7 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) __xp_dma_unmap(dma_map, attrs); kvfree(pool->dma_pages); + pool->dma_pages = NULL; pool->dma_pages_cnt = 0; pool->dev = NULL; } @@ -503,7 +504,7 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) if (pool->unaligned) { xskb = pool->free_heads[--pool->free_heads_cnt]; xp_init_xskb_addr(xskb, pool, addr); - if (pool->dma_pages_cnt) + if (pool->dma_pages) xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); } else { xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; @@ -569,7 +570,7 @@ static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xd if (pool->unaligned) { xskb = pool->free_heads[--pool->free_heads_cnt]; xp_init_xskb_addr(xskb, pool, addr); - if (pool->dma_pages_cnt) + if (pool->dma_pages) xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); } else { xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 2c1427074a3b..e1c526f97ce3 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -5,7 +5,6 @@ #include <linux/bpf.h> #include <linux/filter.h> -#include <linux/capability.h> #include <net/xdp_sock.h> #include <linux/slab.h> #include <linux/sched.h> @@ -68,9 +67,6 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) int numa_node; u64 size; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size != 4 || attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 872b80188e83..d3b3f9e720b3 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -205,26 +205,32 @@ static int espintcp_sendskb_locked(struct sock *sk, struct espintcp_msg *emsg, static int espintcp_sendskmsg_locked(struct sock *sk, struct espintcp_msg *emsg, int flags) { + struct msghdr msghdr = { + .msg_flags = flags | MSG_SPLICE_PAGES | MSG_MORE, + }; struct sk_msg *skmsg = &emsg->skmsg; + bool more = flags & MSG_MORE; struct scatterlist *sg; int done = 0; int ret; - flags |= MSG_SENDPAGE_NOTLAST; sg = &skmsg->sg.data[skmsg->sg.start]; do { + struct bio_vec bvec; size_t size = sg->length - emsg->offset; int offset = sg->offset + emsg->offset; struct page *p; emsg->offset = 0; - if (sg_is_last(sg)) - flags &= ~MSG_SENDPAGE_NOTLAST; + if (sg_is_last(sg) && !more) + msghdr.msg_flags &= ~MSG_MORE; p = sg_page(sg); retry: - ret = do_tcp_sendpages(sk, p, offset, size, flags); + bvec_set_page(&bvec, p, size, offset); + iov_iter_bvec(&msghdr.msg_iter, ITER_SOURCE, &bvec, 1, size); + ret = tcp_sendmsg_locked(sk, &msghdr, size); if (ret < 0) { emsg->offset = offset - sg->offset; skmsg->sg.start += done; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 408f5e55744e..533697e2488f 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> +#include <net/gso.h> #include <net/xfrm.h> #include <linux/notifier.h> diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 39fb91ff23d9..815b38080401 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -131,6 +131,7 @@ struct sec_path *secpath_set(struct sk_buff *skb) memset(sp->ovec, 0, sizeof(sp->ovec)); sp->olen = 0; sp->len = 0; + sp->verified_cnt = 0; return sp; } @@ -330,11 +331,10 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, { switch (x->props.mode) { case XFRM_MODE_BEET: - switch (XFRM_MODE_SKB_CB(skb)->protocol) { - case IPPROTO_IPIP: - case IPPROTO_BEETPH: + switch (x->sel.family) { + case AF_INET: return xfrm4_remove_beet_encap(x, skb); - case IPPROTO_IPV6: + case AF_INET6: return xfrm6_remove_beet_encap(x, skb); } break; diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 1f99dc469027..a3319965470a 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -33,6 +33,7 @@ #include <linux/uaccess.h> #include <linux/atomic.h> +#include <net/gso.h> #include <net/icmp.h> #include <net/ip.h> #include <net/ipv6.h> @@ -310,6 +311,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) skb->mark = 0; } +static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type, unsigned short family) +{ + struct sec_path *sp; + + sp = skb_sec_path(skb); + if (sp && (sp->len || sp->olen) && + !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + goto discard; + + XFRM_SPI_SKB_CB(skb)->family = family; + if (family == AF_INET) { + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + } else { + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + } + + return xfrm_input(skb, nexthdr, spi, encap_type); +discard: + kfree_skb(skb); + return 0; +} + +static int xfrmi4_rcv(struct sk_buff *skb) +{ + return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET); +} + +static int xfrmi6_rcv(struct sk_buff *skb) +{ + return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], + 0, 0, AF_INET6); +} + +static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) +{ + return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET); +} + +static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) +{ + return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6); +} + static int xfrmi_rcv_cb(struct sk_buff *skb, int err) { const struct xfrm_mode *inner_mode; @@ -945,8 +992,8 @@ static struct pernet_operations xfrmi_net_ops = { }; static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { - .handler = xfrm6_rcv, - .input_handler = xfrm_input, + .handler = xfrmi6_rcv, + .input_handler = xfrmi6_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -996,8 +1043,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = { #endif static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { - .handler = xfrm4_rcv, - .input_handler = xfrm_input, + .handler = xfrmi4_rcv, + .input_handler = xfrmi4_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, .priority = 10, diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 80143360bf09..9c0fa0e1786a 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -74,14 +74,11 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) if (!page) return -ENOMEM; - __skb_frag_set_page(frag, page); - len = PAGE_SIZE; if (dlen < len) len = dlen; - skb_frag_off_set(frag, 0); - skb_frag_size_set(frag, len); + skb_frag_fill_page_desc(frag, page, 0, len); memcpy(skb_frag_address(frag), scratch, len); skb->truesize += len; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 369e5de8558f..662c83beb345 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> +#include <net/gso.h> #include <net/icmp.h> #include <net/inet_ecn.h> #include <net/xfrm.h> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6d15788b5123..e7617c9959c3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1831,6 +1831,7 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -1869,6 +1870,7 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -3349,6 +3351,13 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id)) return ++idx; if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { + if (idx < sp->verified_cnt) { + /* Secpath entry previously verified, consider optional and + * continue searching + */ + continue; + } + if (start == -1) start = -2-idx; break; @@ -3723,6 +3732,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. + * Upon success, marks secpath entries as having been + * verified to allow them to be skipped in future policy + * checks (e.g. nested tunnels). */ for (i = xfrm_nr-1, k = 0; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family, if_id); @@ -3741,6 +3753,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } xfrm_pols_put(pols, npols); + sp->verified_cnt = k; + return 1; } XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); |