diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 17:27:38 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 17:27:38 +0100 |
commit | 643a7234e0960cf63f1a51a15cfc969fafcbabad (patch) | |
tree | e6522bf2e6a74148952af11a03606b152ee1e251 /drivers/block/nbd.c | |
parent | Merge tag 'for-5.16/block-2021-10-29' of git://git.kernel.dk/linux-block (diff) | |
parent | null_blk: Fix handling of submit_queues and poll_queues attributes (diff) | |
download | linux-643a7234e0960cf63f1a51a15cfc969fafcbabad.tar.xz linux-643a7234e0960cf63f1a51a15cfc969fafcbabad.zip |
Merge tag 'for-5.16/drivers-2021-10-29' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
- paride driver cleanups (Christoph)
- Remove cryptoloop support (Christoph)
- null_blk poll support (me)
- Now that add_disk() supports proper error handling, add it to various
drivers (Luis)
- Make ataflop actually work again (Michael)
- s390 dasd fixes (Stefan, Heiko)
- nbd fixes (Yu, Ye)
- Remove redundant wq flush in mtip32xx (Christophe)
- NVMe updates
- fix a multipath partition scanning deadlock (Hannes Reinecke)
- generate uevent once a multipath namespace is operational again
(Hannes Reinecke)
- support unique discovery controller NQNs (Hannes Reinecke)
- fix use-after-free when a port is removed (Israel Rukshin)
- clear shadow doorbell memory on resets (Keith Busch)
- use struct_size (Len Baker)
- add error handling support for add_disk (Luis Chamberlain)
- limit the maximal queue size for RDMA controllers (Max Gurtovoy)
- use a few more symbolic names (Max Gurtovoy)
- fix error code in nvme_rdma_setup_ctrl (Max Gurtovoy)
- add support for ->map_queues on FC (Saurav Kashyap)
- support the current discovery subsystem entry (Hannes Reinecke)
- use flex_array_size and struct_size (Len Baker)
- bcache fixes (Christoph, Coly, Chao, Lin, Qing)
- MD updates (Christoph, Guoqing, Xiao)
- Misc fixes (Dan, Ding, Jiapeng, Shin'ichiro, Ye)
* tag 'for-5.16/drivers-2021-10-29' of git://git.kernel.dk/linux-block: (117 commits)
null_blk: Fix handling of submit_queues and poll_queues attributes
block: ataflop: Fix warning comparing pointer to 0
bcache: replace snprintf in show functions with sysfs_emit
bcache: move uapi header bcache.h to bcache code directory
nvmet: use flex_array_size and struct_size
nvmet: register discovery subsystem as 'current'
nvmet: switch check for subsystem type
nvme: add new discovery log page entry definitions
block: ataflop: more blk-mq refactoring fixes
block: remove support for cryptoloop and the xor transfer
mtd: add add_disk() error handling
rnbd: add error handling support for add_disk()
um/drivers/ubd_kern: add error handling support for add_disk()
m68k/emu/nfblock: add error handling support for add_disk()
xen-blkfront: add error handling support for add_disk()
bcache: add error handling support for add_disk()
dm: add add_disk() error handling
block: aoe: fixup coccinelle warnings
nvmet: use struct_size over open coded arithmetic
nvme: drop scan_lock and always kick requeue list when removing namespaces
...
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r-- | drivers/block/nbd.c | 161 |
1 files changed, 112 insertions, 49 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 504c20a2f33e..b47b2a87ae8f 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -122,15 +122,21 @@ struct nbd_device { struct work_struct remove_work; struct list_head list; - struct task_struct *task_recv; struct task_struct *task_setup; unsigned long flags; + pid_t pid; /* pid of nbd-client, if attached */ char *backend; }; #define NBD_CMD_REQUEUED 1 +/* + * This flag will be set if nbd_queue_rq() succeed, and will be checked and + * cleared in completion. Both setting and clearing of the flag are protected + * by cmd->lock. + */ +#define NBD_CMD_INFLIGHT 2 struct nbd_cmd { struct nbd_device *nbd; @@ -217,7 +223,7 @@ static ssize_t pid_show(struct device *dev, struct gendisk *disk = dev_to_disk(dev); struct nbd_device *nbd = (struct nbd_device *)disk->private_data; - return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); + return sprintf(buf, "%d\n", nbd->pid); } static const struct device_attribute pid_attr = { @@ -322,7 +328,7 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, nbd->config->bytesize = bytesize; nbd->config->blksize_bits = __ffs(blksize); - if (!nbd->task_recv) + if (!nbd->pid) return 0; if (nbd->config->flags & NBD_FLAG_SEND_TRIM) { @@ -398,6 +404,11 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, if (!mutex_trylock(&cmd->lock)) return BLK_EH_RESET_TIMER; + if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + mutex_unlock(&cmd->lock); + return BLK_EH_DONE; + } + if (!refcount_inc_not_zero(&nbd->config_refs)) { cmd->status = BLK_STS_TIMEOUT; mutex_unlock(&cmd->lock); @@ -477,7 +488,8 @@ done: } /* - * Send or receive packet. + * Send or receive packet. Return a positive value on success and + * negtive value on failue, and never return 0. */ static int sock_xmit(struct nbd_device *nbd, int index, int send, struct iov_iter *iter, int msg_flags, int *sent) @@ -603,7 +615,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) result = sock_xmit(nbd, index, 1, &from, (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); trace_nbd_header_sent(req, handle); - if (result <= 0) { + if (result < 0) { if (was_interrupted(result)) { /* If we havne't sent anything we can just return BUSY, * however if we have sent something we need to make @@ -647,7 +659,7 @@ send_pages: skip = 0; } result = sock_xmit(nbd, index, 1, &from, flags, &sent); - if (result <= 0) { + if (result < 0) { if (was_interrupted(result)) { /* We've already sent the header, we * have no choice but to set pending and @@ -681,38 +693,45 @@ out: return 0; } -/* NULL returned = something went wrong, inform userspace */ -static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) +static int nbd_read_reply(struct nbd_device *nbd, int index, + struct nbd_reply *reply) { - struct nbd_config *config = nbd->config; - int result; - struct nbd_reply reply; - struct nbd_cmd *cmd; - struct request *req = NULL; - u64 handle; - u16 hwq; - u32 tag; - struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; + struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)}; struct iov_iter to; - int ret = 0; + int result; - reply.magic = 0; - iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply)); + reply->magic = 0; + iov_iter_kvec(&to, READ, &iov, 1, sizeof(*reply)); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); - if (result <= 0) { - if (!nbd_disconnected(config)) + if (result < 0) { + if (!nbd_disconnected(nbd->config)) dev_err(disk_to_dev(nbd->disk), "Receive control failed (result %d)\n", result); - return ERR_PTR(result); + return result; } - if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { + if (ntohl(reply->magic) != NBD_REPLY_MAGIC) { dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", - (unsigned long)ntohl(reply.magic)); - return ERR_PTR(-EPROTO); + (unsigned long)ntohl(reply->magic)); + return -EPROTO; } - memcpy(&handle, reply.handle, sizeof(handle)); + return 0; +} + +/* NULL returned = something went wrong, inform userspace */ +static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index, + struct nbd_reply *reply) +{ + int result; + struct nbd_cmd *cmd; + struct request *req = NULL; + u64 handle; + u16 hwq; + u32 tag; + int ret = 0; + + memcpy(&handle, reply->handle, sizeof(handle)); tag = nbd_handle_to_tag(handle); hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < nbd->tag_set.nr_hw_queues) @@ -727,6 +746,16 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); + if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)", + tag, cmd->status, cmd->flags); + ret = -ENOENT; + goto out; + } + if (cmd->index != index) { + dev_err(disk_to_dev(nbd->disk), "Unexpected reply %d from different sock %d (expected %d)", + tag, index, cmd->index); + } if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); @@ -745,9 +774,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ret = -ENOENT; goto out; } - if (ntohl(reply.error)) { + if (ntohl(reply->error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", - ntohl(reply.error)); + ntohl(reply->error)); cmd->status = BLK_STS_IOERR; goto out; } @@ -756,11 +785,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (rq_data_dir(req) != WRITE) { struct req_iterator iter; struct bio_vec bvec; + struct iov_iter to; rq_for_each_segment(bvec, req, iter) { iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); - if (result <= 0) { + if (result < 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); /* @@ -769,7 +799,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) * and let the timeout stuff handle resubmitting * this request onto another connection. */ - if (nbd_disconnected(config)) { + if (nbd_disconnected(nbd->config)) { cmd->status = BLK_STS_IOERR; goto out; } @@ -793,24 +823,46 @@ static void recv_work(struct work_struct *work) work); struct nbd_device *nbd = args->nbd; struct nbd_config *config = nbd->config; + struct request_queue *q = nbd->disk->queue; + struct nbd_sock *nsock; struct nbd_cmd *cmd; struct request *rq; while (1) { - cmd = nbd_read_stat(nbd, args->index); - if (IS_ERR(cmd)) { - struct nbd_sock *nsock = config->socks[args->index]; + struct nbd_reply reply; - mutex_lock(&nsock->tx_lock); - nbd_mark_nsock_dead(nbd, nsock, 1); - mutex_unlock(&nsock->tx_lock); + if (nbd_read_reply(nbd, args->index, &reply)) + break; + + /* + * Grab .q_usage_counter so request pool won't go away, then no + * request use-after-free is possible during nbd_handle_reply(). + * If queue is frozen, there won't be any inflight requests, we + * needn't to handle the incoming garbage message. + */ + if (!percpu_ref_tryget(&q->q_usage_counter)) { + dev_err(disk_to_dev(nbd->disk), "%s: no io inflight\n", + __func__); + break; + } + + cmd = nbd_handle_reply(nbd, args->index, &reply); + if (IS_ERR(cmd)) { + percpu_ref_put(&q->q_usage_counter); break; } rq = blk_mq_rq_from_pdu(cmd); if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); + percpu_ref_put(&q->q_usage_counter); } + + nsock = config->socks[args->index]; + mutex_lock(&nsock->tx_lock); + nbd_mark_nsock_dead(nbd, nsock, 1); + mutex_unlock(&nsock->tx_lock); + nbd_config_put(nbd); atomic_dec(&config->recv_threads); wake_up(&config->recv_wq); @@ -826,6 +878,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved) return true; mutex_lock(&cmd->lock); + if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + mutex_unlock(&cmd->lock); + return true; + } cmd->status = BLK_STS_IOERR; mutex_unlock(&cmd->lock); @@ -907,7 +963,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) if (!refcount_inc_not_zero(&nbd->config_refs)) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Socks array is empty\n"); - blk_mq_start_request(req); return -EINVAL; } config = nbd->config; @@ -916,7 +971,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted send on invalid socket\n"); nbd_config_put(nbd); - blk_mq_start_request(req); return -EINVAL; } cmd->status = BLK_STS_OK; @@ -940,7 +994,6 @@ again: */ sock_shutdown(nbd); nbd_config_put(nbd); - blk_mq_start_request(req); return -EIO; } goto again; @@ -962,7 +1015,13 @@ again: * returns EAGAIN can be retried on a different socket. */ ret = nbd_send_cmd(nbd, cmd, index); - if (ret == -EAGAIN) { + /* + * Access to this flag is protected by cmd->lock, thus it's safe to set + * the flag after nbd_send_cmd() succeed to send request to server. + */ + if (!ret) + __set_bit(NBD_CMD_INFLIGHT, &cmd->flags); + else if (ret == -EAGAIN) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Request send failed, requeueing\n"); nbd_mark_nsock_dead(nbd, nsock, 1); @@ -1199,7 +1258,7 @@ static void send_disconnects(struct nbd_device *nbd) iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); mutex_lock(&nsock->tx_lock); ret = sock_xmit(nbd, i, 1, &from, 0, NULL); - if (ret <= 0) + if (ret < 0) dev_err(disk_to_dev(nbd->disk), "Send disconnect failed %d\n", ret); mutex_unlock(&nsock->tx_lock); @@ -1236,7 +1295,7 @@ static void nbd_config_put(struct nbd_device *nbd) if (test_and_clear_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags)) device_remove_file(disk_to_dev(nbd->disk), &pid_attr); - nbd->task_recv = NULL; + nbd->pid = 0; if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags)) { device_remove_file(disk_to_dev(nbd->disk), &backend_attr); @@ -1277,7 +1336,7 @@ static int nbd_start_device(struct nbd_device *nbd) int num_connections = config->num_connections; int error = 0, i; - if (nbd->task_recv) + if (nbd->pid) return -EBUSY; if (!config->socks) return -EINVAL; @@ -1296,7 +1355,7 @@ static int nbd_start_device(struct nbd_device *nbd) } blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); - nbd->task_recv = current; + nbd->pid = task_pid_nr(current); nbd_parse_flags(nbd); @@ -1552,8 +1611,8 @@ static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) { struct nbd_device *nbd = s->private; - if (nbd->task_recv) - seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); + if (nbd->pid) + seq_printf(s, "recv: %d\n", nbd->pid); return 0; } @@ -1757,7 +1816,9 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); - add_disk(disk); + err = add_disk(disk); + if (err) + goto out_err_disk; /* * Now publish the device. @@ -1766,6 +1827,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) nbd_total_devices++; return nbd; +out_err_disk: + blk_cleanup_disk(disk); out_free_idr: mutex_lock(&nbd_index_mutex); idr_remove(&nbd_index_idr, index); @@ -2130,7 +2193,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) mutex_lock(&nbd->config_lock); config = nbd->config; if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) || - !nbd->task_recv) { + !nbd->pid) { dev_err(nbd_to_dev(nbd), "not configured, cannot reconfigure\n"); ret = -EINVAL; |