diff options
Diffstat (limited to 'drivers/block')
37 files changed, 1149 insertions, 1309 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 223ff2fcae7e..f744de7a0f9b 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -69,6 +69,7 @@ config AMIGA_Z2RAM config GDROM tristate "SEGA Dreamcast GD-ROM drive" depends on SH_DREAMCAST + select BLK_SCSI_REQUEST # only for the generic cdrom code help A standard SEGA Dreamcast comes with a modified CD ROM drive called a "GD-ROM" by SEGA to signify it is capable of reading special disks @@ -114,6 +115,7 @@ config BLK_CPQ_CISS_DA tristate "Compaq Smart Array 5xxx support" depends on PCI select CHECK_SIGNATURE + select BLK_SCSI_REQUEST help This is the driver for Compaq Smart Array 5xxx controllers. Everyone using these boards should say Y here. @@ -386,6 +388,7 @@ config BLK_DEV_RAM_DAX config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media (DEPRECATED)" depends on !UML + select BLK_SCSI_REQUEST help Note: This driver is deprecated and will be removed from the kernel in the near future! @@ -501,6 +504,16 @@ config VIRTIO_BLK This is the virtual block driver for virtio. It can be used with lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. +config VIRTIO_BLK_SCSI + bool "SCSI passthrough request for the Virtio block driver" + depends on VIRTIO_BLK + select BLK_SCSI_REQUEST + ---help--- + Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on + virtio-blk devices. This is only supported for the legacy + virtio protocol and not enabled by default by any hypervisor. + Your probably want to virtio-scsi instead. + config BLK_DEV_HD bool "Very old hard disk (MFM/RLL/IDE) driver" depends on HAVE_IDE diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index ec9d8610b25f..027b876370bc 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -396,8 +396,8 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); - q->backing_dev_info.name = "aoe"; - q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_SIZE; + q->backing_dev_info->name = "aoe"; + q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE; d->bufpool = mp; d->blkq = gd->queue = q; q->queuedata = d; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index e5c5b8eb14a9..8e1a4554951c 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -52,6 +52,7 @@ #include <scsi/scsi.h> #include <scsi/sg.h> #include <scsi/scsi_ioctl.h> +#include <scsi/scsi_request.h> #include <linux/cdrom.h> #include <linux/scatterlist.h> #include <linux/kthread.h> @@ -347,7 +348,7 @@ static void cciss_unmap_sg_chain_block(ctlr_info_t *h, CommandList_struct *c) pci_unmap_single(h->pdev, temp64.val, chain_sg->Len, PCI_DMA_TODEVICE); } -static void cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c, +static int cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c, SGDescriptor_struct *chain_block, int len) { SGDescriptor_struct *chain_sg; @@ -358,8 +359,16 @@ static void cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c, chain_sg->Len = len; temp64.val = pci_map_single(h->pdev, chain_block, len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&h->pdev->dev, temp64.val)) { + dev_warn(&h->pdev->dev, + "%s: error mapping chain block for DMA\n", + __func__); + return -1; + } chain_sg->Addr.lower = temp64.val32.lower; chain_sg->Addr.upper = temp64.val32.upper; + + return 0; } #include "cciss_scsi.c" /* For SCSI tape support */ @@ -1853,8 +1862,8 @@ static void cciss_softirq_done(struct request *rq) dev_dbg(&h->pdev->dev, "Done with %p\n", rq); /* set the residual count for pc requests */ - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) - rq->resid_len = c->err_info->ResidualCnt; + if (blk_rq_is_passthrough(rq)) + scsi_req(rq)->resid_len = c->err_info->ResidualCnt; blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO); @@ -1941,9 +1950,16 @@ static void cciss_get_serial_no(ctlr_info_t *h, int logvol, static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, int drv_index) { - disk->queue = blk_init_queue(do_cciss_request, &h->lock); + disk->queue = blk_alloc_queue(GFP_KERNEL); if (!disk->queue) goto init_queue_failure; + + disk->queue->cmd_size = sizeof(struct scsi_request); + disk->queue->request_fn = do_cciss_request; + disk->queue->queue_lock = &h->lock; + if (blk_init_allocated_queue(disk->queue) < 0) + goto cleanup_queue; + sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index); disk->major = h->major; disk->first_minor = drv_index << NWD_SHIFT; @@ -3075,7 +3091,7 @@ static inline int evaluate_target_status(ctlr_info_t *h, driver_byte = DRIVER_OK; msg_byte = cmd->err_info->CommandStatus; /* correct? seems too device specific */ - if (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) + if (blk_rq_is_passthrough(cmd->rq)) host_byte = DID_PASSTHROUGH; else host_byte = DID_OK; @@ -3084,7 +3100,7 @@ static inline int evaluate_target_status(ctlr_info_t *h, host_byte, driver_byte); if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) { - if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC) + if (!blk_rq_is_passthrough(cmd->rq)) dev_warn(&h->pdev->dev, "cmd %p " "has SCSI Status 0x%x\n", cmd, cmd->err_info->ScsiStatus); @@ -3095,31 +3111,23 @@ static inline int evaluate_target_status(ctlr_info_t *h, sense_key = 0xf & cmd->err_info->SenseInfo[2]; /* no status or recovered error */ if (((sense_key == 0x0) || (sense_key == 0x1)) && - (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC)) + !blk_rq_is_passthrough(cmd->rq)) error_value = 0; if (check_for_unit_attention(h, cmd)) { - *retry_cmd = !(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC); + *retry_cmd = !blk_rq_is_passthrough(cmd->rq); return 0; } /* Not SG_IO or similar? */ - if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC) { + if (!blk_rq_is_passthrough(cmd->rq)) { if (error_value != 0) dev_warn(&h->pdev->dev, "cmd %p has CHECK CONDITION" " sense key = 0x%x\n", cmd, sense_key); return error_value; } - /* SG_IO or similar, copy sense data back */ - if (cmd->rq->sense) { - if (cmd->rq->sense_len > cmd->err_info->SenseLen) - cmd->rq->sense_len = cmd->err_info->SenseLen; - memcpy(cmd->rq->sense, cmd->err_info->SenseInfo, - cmd->rq->sense_len); - } else - cmd->rq->sense_len = 0; - + scsi_req(cmd->rq)->sense_len = cmd->err_info->SenseLen; return error_value; } @@ -3146,15 +3154,14 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, rq->errors = evaluate_target_status(h, cmd, &retry_cmd); break; case CMD_DATA_UNDERRUN: - if (cmd->rq->cmd_type == REQ_TYPE_FS) { + if (!blk_rq_is_passthrough(cmd->rq)) { dev_warn(&h->pdev->dev, "cmd %p has" " completed with data underrun " "reported\n", cmd); - cmd->rq->resid_len = cmd->err_info->ResidualCnt; } break; case CMD_DATA_OVERRUN: - if (cmd->rq->cmd_type == REQ_TYPE_FS) + if (!blk_rq_is_passthrough(cmd->rq)) dev_warn(&h->pdev->dev, "cciss: cmd %p has" " completed with data overrun " "reported\n", cmd); @@ -3164,7 +3171,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "reported invalid\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_PROTOCOL_ERR: @@ -3172,7 +3179,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "protocol error\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_HARDWARE_ERR: @@ -3180,7 +3187,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, " hardware error\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_CONNECTION_LOST: @@ -3188,7 +3195,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "connection lost\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_ABORTED: @@ -3196,7 +3203,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "aborted\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); break; case CMD_ABORT_FAILED: @@ -3204,7 +3211,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "abort failed\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_UNSOLICITED_ABORT: @@ -3219,21 +3226,21 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "%p retried too many times\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); break; case CMD_TIMEOUT: dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_UNABORTABLE: dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; default: @@ -3242,7 +3249,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, cmd->err_info->CommandStatus); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); } @@ -3370,15 +3377,31 @@ static void do_cciss_request(struct request_queue *q) temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]), tmp_sg[i].offset, tmp_sg[i].length, dir); + if (dma_mapping_error(&h->pdev->dev, temp64.val)) { + dev_warn(&h->pdev->dev, + "%s: error mapping page for DMA\n", __func__); + creq->errors = make_status_bytes(SAM_STAT_GOOD, + 0, DRIVER_OK, + DID_SOFT_ERROR); + cmd_free(h, c); + return; + } curr_sg[sg_index].Addr.lower = temp64.val32.lower; curr_sg[sg_index].Addr.upper = temp64.val32.upper; curr_sg[sg_index].Ext = 0; /* we are not chaining */ ++sg_index; } - if (chained) - cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex], + if (chained) { + if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex], (seg - (h->max_cmd_sgentries - 1)) * - sizeof(SGDescriptor_struct)); + sizeof(SGDescriptor_struct))) { + creq->errors = make_status_bytes(SAM_STAT_GOOD, + 0, DRIVER_OK, + DID_SOFT_ERROR); + cmd_free(h, c); + return; + } + } /* track how many SG entries we are using */ if (seg > h->maxSG) @@ -3395,7 +3418,9 @@ static void do_cciss_request(struct request_queue *q) c->Header.SGList = h->max_cmd_sgentries; set_performant_mode(h, c); - if (likely(creq->cmd_type == REQ_TYPE_FS)) { + switch (req_op(creq)) { + case REQ_OP_READ: + case REQ_OP_WRITE: if(h->cciss_read == CCISS_READ_10) { c->Request.CDB[1] = 0; c->Request.CDB[2] = (start_blk >> 24) & 0xff; /* MSB */ @@ -3425,12 +3450,16 @@ static void do_cciss_request(struct request_queue *q) c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff; c->Request.CDB[14] = c->Request.CDB[15] = 0; } - } else if (creq->cmd_type == REQ_TYPE_BLOCK_PC) { - c->Request.CDBLen = creq->cmd_len; - memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB); - } else { + break; + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: + c->Request.CDBLen = scsi_req(creq)->cmd_len; + memcpy(c->Request.CDB, scsi_req(creq)->cmd, BLK_MAX_CDB); + scsi_req(creq)->sense = c->err_info->SenseInfo; + break; + default: dev_warn(&h->pdev->dev, "bad request type %d\n", - creq->cmd_type); + creq->cmd_flags); BUG(); } @@ -4074,41 +4103,27 @@ clean_up: static void cciss_interrupt_mode(ctlr_info_t *h) { -#ifdef CONFIG_PCI_MSI - int err; - struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1}, - {0, 2}, {0, 3} - }; + int ret; /* Some boards advertise MSI but don't really support it */ if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) || (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11)) goto default_int_mode; - if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { - err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4); - if (!err) { - h->intr[0] = cciss_msix_entries[0].vector; - h->intr[1] = cciss_msix_entries[1].vector; - h->intr[2] = cciss_msix_entries[2].vector; - h->intr[3] = cciss_msix_entries[3].vector; - h->msix_vector = 1; - return; - } else { - dev_warn(&h->pdev->dev, - "MSI-X init failed %d\n", err); - } - } - if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) { - if (!pci_enable_msi(h->pdev)) - h->msi_vector = 1; - else - dev_warn(&h->pdev->dev, "MSI init failed\n"); + ret = pci_alloc_irq_vectors(h->pdev, 4, 4, PCI_IRQ_MSIX); + if (ret >= 0) { + h->intr[0] = pci_irq_vector(h->pdev, 0); + h->intr[1] = pci_irq_vector(h->pdev, 1); + h->intr[2] = pci_irq_vector(h->pdev, 2); + h->intr[3] = pci_irq_vector(h->pdev, 3); + return; } + + ret = pci_alloc_irq_vectors(h->pdev, 1, 1, PCI_IRQ_MSI); + default_int_mode: -#endif /* CONFIG_PCI_MSI */ /* if we get here we're going to use the default interrupt mode */ - h->intr[h->intr_mode] = h->pdev->irq; + h->intr[h->intr_mode] = pci_irq_vector(h->pdev, 0); return; } @@ -4888,7 +4903,7 @@ static int cciss_request_irq(ctlr_info_t *h, irqreturn_t (*msixhandler)(int, void *), irqreturn_t (*intxhandler)(int, void *)) { - if (h->msix_vector || h->msi_vector) { + if (h->pdev->msi_enabled || h->pdev->msix_enabled) { if (!request_irq(h->intr[h->intr_mode], msixhandler, 0, h->devname, h)) return 0; @@ -4934,12 +4949,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h) int ctlr = h->ctlr; free_irq(h->intr[h->intr_mode], h); -#ifdef CONFIG_PCI_MSI - if (h->msix_vector) - pci_disable_msix(h->pdev); - else if (h->msi_vector) - pci_disable_msi(h->pdev); -#endif /* CONFIG_PCI_MSI */ + pci_free_irq_vectors(h->pdev); cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); cciss_free_scatterlists(h); cciss_free_cmd_pool(h); @@ -5295,12 +5305,7 @@ static void cciss_remove_one(struct pci_dev *pdev) cciss_shutdown(pdev); -#ifdef CONFIG_PCI_MSI - if (h->msix_vector) - pci_disable_msix(h->pdev); - else if (h->msi_vector) - pci_disable_msi(h->pdev); -#endif /* CONFIG_PCI_MSI */ + pci_free_irq_vectors(h->pdev); iounmap(h->transtable); iounmap(h->cfgtable); diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 7fda30e4a241..24b5fd75501a 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -90,8 +90,6 @@ struct ctlr_info # define SIMPLE_MODE_INT 2 # define MEMQ_MODE_INT 3 unsigned int intr[4]; - unsigned int msix_vector; - unsigned int msi_vector; int intr_mode; int cciss_max_sectors; BYTE cciss_read; @@ -333,7 +331,7 @@ static unsigned long SA5_performant_completed(ctlr_info_t *h) */ register_value = readl(h->vaddr + SA5_OUTDB_STATUS); /* msi auto clears the interrupt pending bit. */ - if (!(h->msi_vector || h->msix_vector)) { + if (!(h->pdev->msi_enabled || h->pdev->msix_enabled)) { writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR); /* Do a read in order to flush the write to the controller * (as per spec.) @@ -393,7 +391,7 @@ static bool SA5_performant_intr_pending(ctlr_info_t *h) if (!register_value) return false; - if (h->msi_vector || h->msix_vector) + if (h->pdev->msi_enabled || h->pdev->msix_enabled) return true; /* Read outbound doorbell to flush */ @@ -402,27 +400,27 @@ static bool SA5_performant_intr_pending(ctlr_info_t *h) } static struct access_method SA5_access = { - SA5_submit_command, - SA5_intr_mask, - SA5_fifo_full, - SA5_intr_pending, - SA5_completed, + .submit_command = SA5_submit_command, + .set_intr_mask = SA5_intr_mask, + .fifo_full = SA5_fifo_full, + .intr_pending = SA5_intr_pending, + .command_completed = SA5_completed, }; static struct access_method SA5B_access = { - SA5_submit_command, - SA5B_intr_mask, - SA5_fifo_full, - SA5B_intr_pending, - SA5_completed, + .submit_command = SA5_submit_command, + .set_intr_mask = SA5B_intr_mask, + .fifo_full = SA5_fifo_full, + .intr_pending = SA5B_intr_pending, + .command_completed = SA5_completed, }; static struct access_method SA5_performant_access = { - SA5_submit_command, - SA5_performant_intr_mask, - SA5_fifo_full, - SA5_performant_intr_pending, - SA5_performant_completed, + .submit_command = SA5_submit_command, + .set_intr_mask = SA5_performant_intr_mask, + .fifo_full = SA5_fifo_full, + .intr_pending = SA5_performant_intr_pending, + .command_completed = SA5_performant_completed, }; struct board_type { diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index a18de9d727b0..01a1f7e24978 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -17,15 +17,15 @@ * 02111-1307, USA. * * Questions/Comments/Bugfixes to iss_storagedev@hp.com - * + * * Author: Stephen M. Cameron */ #ifdef CONFIG_CISS_SCSI_TAPE -/* Here we have code to present the driver as a scsi driver - as it is simultaneously presented as a block driver. The +/* Here we have code to present the driver as a scsi driver + as it is simultaneously presented as a block driver. The reason for doing this is to allow access to SCSI tape drives - through the array controller. Note in particular, neither + through the array controller. Note in particular, neither physical nor logical disks are presented through the scsi layer. */ #include <linux/timer.h> @@ -37,7 +37,7 @@ #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> -#include <scsi/scsi_host.h> +#include <scsi/scsi_host.h> #include "cciss_scsi.h" @@ -120,7 +120,7 @@ struct cciss_scsi_adapter_data_t { struct cciss_scsi_cmd_stack_t cmd_stack; SGDescriptor_struct **cmd_sg_list; int registered; - spinlock_t lock; // to protect ccissscsi[ctlr]; + spinlock_t lock; // to protect ccissscsi[ctlr]; }; #define CPQ_TAPE_LOCK(h, flags) spin_lock_irqsave( \ @@ -143,36 +143,36 @@ scsi_cmd_alloc(ctlr_info_t *h) u64bit temp64; sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; - if (stk->top < 0) + if (stk->top < 0) return NULL; - c = stk->elem[stk->top]; + c = stk->elem[stk->top]; /* memset(c, 0, sizeof(*c)); */ memset(&c->cmd, 0, sizeof(c->cmd)); memset(&c->Err, 0, sizeof(c->Err)); /* set physical addr of cmd and addr of scsi parameters */ - c->cmd.busaddr = c->busaddr; + c->cmd.busaddr = c->busaddr; c->cmd.cmdindex = c->cmdindex; - /* (__u32) (stk->cmd_pool_handle + + /* (__u32) (stk->cmd_pool_handle + (sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top)); */ temp64.val = (__u64) (c->busaddr + sizeof(CommandList_struct)); - /* (__u64) (stk->cmd_pool_handle + + /* (__u64) (stk->cmd_pool_handle + (sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top) + sizeof(CommandList_struct)); */ stk->top--; c->cmd.ErrDesc.Addr.lower = temp64.val32.lower; c->cmd.ErrDesc.Addr.upper = temp64.val32.upper; c->cmd.ErrDesc.Len = sizeof(ErrorInfo_struct); - + c->cmd.ctlr = h->ctlr; c->cmd.err_info = &c->Err; return (CommandList_struct *) c; } -static void +static void scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c) { /* assume only one process in here at a time, locking done by caller. */ @@ -183,7 +183,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c) struct cciss_scsi_cmd_stack_t *stk; sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; stk->top++; if (stk->top >= stk->nelems) { dev_err(&h->pdev->dev, @@ -228,7 +228,7 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa) } for (i = 0; i < stk->nelems; i++) { stk->elem[i] = &stk->pool[i]; - stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + + stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + (sizeof(struct cciss_scsi_cmd_stack_elem_t) * i)); stk->elem[i]->cmdindex = i; } @@ -244,7 +244,7 @@ scsi_cmd_stack_free(ctlr_info_t *h) size_t size; sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; if (stk->top != stk->nelems-1) { dev_warn(&h->pdev->dev, "bug: %d scsi commands are still outstanding.\n", @@ -266,7 +266,7 @@ print_cmd(CommandList_struct *cp) printk("queue:%d\n", cp->Header.ReplyQueue); printk("sglist:%d\n", cp->Header.SGList); printk("sgtot:%d\n", cp->Header.SGTotal); - printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper, + printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper, cp->Header.Tag.lower); printk("LUN:0x%8phN\n", cp->Header.LUN.LunAddrBytes); printk("CDBLen:%d\n", cp->Request.CDBLen); @@ -275,8 +275,8 @@ print_cmd(CommandList_struct *cp) printk(" Dir:%d\n",cp->Request.Type.Direction); printk("Timeout:%d\n",cp->Request.Timeout); printk("CDB: %16ph\n", cp->Request.CDB); - printk("edesc.Addr: 0x%08x/0%08x, Len = %d\n", - cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower, + printk("edesc.Addr: 0x%08x/0%08x, Len = %d\n", + cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower, cp->ErrDesc.Len); printk("sgs..........Errorinfo:\n"); printk("scsistatus:%d\n", cp->err_info->ScsiStatus); @@ -289,7 +289,7 @@ print_cmd(CommandList_struct *cp) } #endif -static int +static int find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun) { /* finds an unused bus, target, lun for a new device */ @@ -299,24 +299,24 @@ find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun) memset(&target_taken[0], 0, CCISS_MAX_SCSI_DEVS_PER_HBA); - target_taken[SELF_SCSI_ID] = 1; + target_taken[SELF_SCSI_ID] = 1; for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) target_taken[ccissscsi[h->ctlr].dev[i].target] = 1; - + for (i = 0; i < CCISS_MAX_SCSI_DEVS_PER_HBA; i++) { if (!target_taken[i]) { *bus = 0; *target=i; *lun = 0; found=1; break; } } - return (!found); + return (!found); } struct scsi2map { char scsi3addr[8]; int bus, target, lun; }; -static int +static int cciss_scsi_add_entry(ctlr_info_t *h, int hostno, struct cciss_scsi_dev_t *device, struct scsi2map *added, int *nadded) @@ -381,8 +381,8 @@ cciss_scsi_add_entry(ctlr_info_t *h, int hostno, ccissscsi[h->ctlr].ndevices++; - /* initially, (before registering with scsi layer) we don't - know our hostno and we don't want to print anything first + /* initially, (before registering with scsi layer) we don't + know our hostno and we don't want to print anything first time anyway (the scsi layer's inquiries will show that info) */ if (hostno != -1) dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n", @@ -467,7 +467,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno, /* sd contains scsi3 addresses and devtypes, but bus target and lun are not filled in. This funciton takes what's in sd to be the current and adjusts - ccissscsi[] to be in line with what's in sd. */ + ccissscsi[] to be in line with what's in sd. */ int i,j, found, changes=0; struct cciss_scsi_dev_t *csd; @@ -492,7 +492,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno, if (hostno != -1) /* if it's not the first time... */ sh = h->scsi_ctlr->scsi_host; - /* find any devices in ccissscsi[] that are not in + /* find any devices in ccissscsi[] that are not in sd[] and remove them from ccissscsi[] */ i = 0; @@ -512,7 +512,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno, } } - if (found == 0) { /* device no longer present. */ + if (found == 0) { /* device no longer present. */ changes++; cciss_scsi_remove_entry(h, hostno, i, removed, &nremoved); @@ -641,14 +641,13 @@ lookup_scsi3addr(ctlr_info_t *h, int bus, int target, int lun, char *scsi3addr) return -1; } -static void +static void cciss_scsi_setup(ctlr_info_t *h) { struct cciss_scsi_adapter_data_t * shba; ccissscsi[h->ctlr].ndevices = 0; - shba = (struct cciss_scsi_adapter_data_t *) - kmalloc(sizeof(*shba), GFP_KERNEL); + shba = kmalloc(sizeof(*shba), GFP_KERNEL); if (shba == NULL) return; shba->scsi_host = NULL; @@ -693,20 +692,18 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, /* copy the sense data whether we need to or not. */ - memcpy(cmd->sense_buffer, ei->SenseInfo, + memcpy(cmd->sense_buffer, ei->SenseInfo, ei->SenseLen > SCSI_SENSE_BUFFERSIZE ? - SCSI_SENSE_BUFFERSIZE : + SCSI_SENSE_BUFFERSIZE : ei->SenseLen); scsi_set_resid(cmd, ei->ResidualCnt); - if(ei->CommandStatus != 0) - { /* an error has occurred */ - switch(ei->CommandStatus) - { + if (ei->CommandStatus != 0) { /* an error has occurred */ + switch (ei->CommandStatus) { case CMD_TARGET_STATUS: /* Pass it up to the upper layers... */ if (!ei->ScsiStatus) { - + /* Ordinarily, this case should never happen, but there is a bug in some released firmware revisions that allows it to happen if, for example, a 4100 backplane loses power and the tape @@ -731,7 +728,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, print_cmd(c); */ /* We get CMD_INVALID if you address a non-existent tape drive instead - of a selection timeout (no response). You will see this if you yank + of a selection timeout (no response). You will see this if you yank out a tape drive, then try to access it. This is kind of a shame because it means that any other CMD_INVALID (e.g. driver bug) will get interpreted as a missing target. */ @@ -780,7 +777,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, cmd->result = DID_ERROR << 16; dev_warn(&h->pdev->dev, "%p returned unknown status %x\n", c, - ei->CommandStatus); + ei->CommandStatus); } } cmd->scsi_done(cmd); @@ -796,15 +793,15 @@ cciss_scsi_detect(ctlr_info_t *h) sh = scsi_host_alloc(&cciss_driver_template, sizeof(struct ctlr_info *)); if (sh == NULL) goto fail; - sh->io_port = 0; // good enough? FIXME, + sh->io_port = 0; // good enough? FIXME, sh->n_io_port = 0; // I don't think we use these two... - sh->this_id = SELF_SCSI_ID; + sh->this_id = SELF_SCSI_ID; sh->can_queue = cciss_tape_cmds; sh->sg_tablesize = h->maxsgentries; sh->max_cmd_len = MAX_COMMAND_SIZE; sh->max_sectors = h->cciss_max_sectors; - ((struct cciss_scsi_adapter_data_t *) + ((struct cciss_scsi_adapter_data_t *) h->scsi_ctlr)->scsi_host = sh; sh->hostdata[0] = (unsigned long) h; sh->irq = h->intr[SIMPLE_MODE_INT]; @@ -856,7 +853,7 @@ cciss_map_one(struct pci_dev *pdev, static int cciss_scsi_do_simple_cmd(ctlr_info_t *h, CommandList_struct *c, - unsigned char *scsi3addr, + unsigned char *scsi3addr, unsigned char *cdb, unsigned char cdblen, unsigned char *buf, int bufsize, @@ -871,7 +868,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h, c->Header.Tag.lower = c->busaddr; /* Use k. address of cmd as tag */ // Fill in the request block... - /* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n", + /* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n", scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3], scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]); */ @@ -885,7 +882,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h, /* Fill in the SG list and do dma mapping */ cciss_map_one(h->pdev, c, (unsigned char *) buf, - bufsize, DMA_FROM_DEVICE); + bufsize, DMA_FROM_DEVICE); c->waiting = &wait; enqueue_cmd_and_start_io(h, c); @@ -896,14 +893,13 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h, return(0); } -static void +static void cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c) { ErrorInfo_struct *ei; ei = c->err_info; - switch(ei->CommandStatus) - { + switch (ei->CommandStatus) { case CMD_TARGET_STATUS: dev_warn(&h->pdev->dev, "cmd %p has completed with errors\n", c); @@ -1005,7 +1001,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr, if (rc != 0) return rc; /* something went wrong */ - if (ei->CommandStatus != 0 && + if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) { cciss_scsi_interpret_error(h, c); rc = -1; @@ -1013,7 +1009,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr, spin_lock_irqsave(&h->lock, flags); scsi_cmd_free(h, c); spin_unlock_irqrestore(&h->lock, flags); - return rc; + return rc; } /* Get the device id from inquiry page 0x83 */ @@ -1042,7 +1038,7 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h, int rc; CommandList_struct *c; unsigned char cdb[12]; - unsigned char scsi3addr[8]; + unsigned char scsi3addr[8]; ErrorInfo_struct *ei; unsigned long flags; @@ -1069,14 +1065,14 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h, cdb[11] = 0; rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr, - cdb, 12, - (unsigned char *) buf, + cdb, 12, + (unsigned char *) buf, bufsize, XFER_READ); if (rc != 0) return rc; /* something went wrong */ ei = c->err_info; - if (ei->CommandStatus != 0 && + if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) { cciss_scsi_interpret_error(h, c); rc = -1; @@ -1084,36 +1080,36 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h, spin_lock_irqsave(&h->lock, flags); scsi_cmd_free(h, c); spin_unlock_irqrestore(&h->lock, flags); - return rc; + return rc; } static void cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) { /* the idea here is we could get notified from /proc - that some devices have changed, so we do a report - physical luns cmd, and adjust our list of devices + that some devices have changed, so we do a report + physical luns cmd, and adjust our list of devices accordingly. (We can't rely on the scsi-mid layer just - doing inquiries, because the "busses" that the scsi + doing inquiries, because the "busses" that the scsi mid-layer probes are totally fabricated by this driver, so new devices wouldn't show up. - the scsi3addr's of devices won't change so long as the - adapter is not reset. That means we can rescan and - tell which devices we already know about, vs. new + the scsi3addr's of devices won't change so long as the + adapter is not reset. That means we can rescan and + tell which devices we already know about, vs. new devices, vs. disappearing devices. Also, if you yank out a tape drive, then put in a disk - in it's place, (say, a configured volume from another - array controller for instance) _don't_ poke this driver - (so it thinks it's still a tape, but _do_ poke the scsi - mid layer, so it does an inquiry... the scsi mid layer + in it's place, (say, a configured volume from another + array controller for instance) _don't_ poke this driver + (so it thinks it's still a tape, but _do_ poke the scsi + mid layer, so it does an inquiry... the scsi mid layer will see the physical disk. This would be bad. Need to - think about how to prevent that. One idea would be to + think about how to prevent that. One idea would be to snoop all scsi responses and if an inquiry repsonse comes back that reports a disk, chuck it an return selection timeout instead and adjust our table... Not sure i like - that though. + that though. */ #define OBDR_TAPE_INQ_SIZE 49 @@ -1141,9 +1137,9 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) ch = &ld_buff->LUNListLength[0]; num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8; if (num_luns > CISS_MAX_PHYS_LUN) { - printk(KERN_WARNING + printk(KERN_WARNING "cciss: Maximum physical LUNs (%d) exceeded. " - "%d LUNs ignored.\n", CISS_MAX_PHYS_LUN, + "%d LUNs ignored.\n", CISS_MAX_PHYS_LUN, num_luns - CISS_MAX_PHYS_LUN); num_luns = CISS_MAX_PHYS_LUN; } @@ -1154,7 +1150,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) } - /* adjust our table of devices */ + /* adjust our table of devices */ for (i = 0; i < num_luns; i++) { /* for each physical lun, do an inquiry */ if (ld_buff->LUN[i][3] & 0xC0) continue; @@ -1182,8 +1178,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) cciss_scsi_get_device_id(h, scsi3addr, this_device->device_id, sizeof(this_device->device_id)); - switch (this_device->devtype) - { + switch (this_device->devtype) { case 0x05: /* CD-ROM */ { /* We don't *really* support actual CD-ROM devices, @@ -1213,7 +1208,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) currentsd[ncurrent] = *this_device; ncurrent++; break; - default: + default: break; } } @@ -1258,8 +1253,8 @@ cciss_scsi_write_info(struct Scsi_Host *sh, return -EINVAL; return cciss_scsi_user_command(h, sh->host_no, - buffer, length); -} + buffer, length); +} static int cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh) @@ -1297,8 +1292,8 @@ cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh) return 0; } -/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci - dma mapping and fills in the scatter gather entries of the +/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci + dma mapping and fills in the scatter gather entries of the cciss command, c. */ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c, @@ -1394,7 +1389,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn // Fill in the command list header - cmd->scsi_done = done; // save this for use by completion code + cmd->scsi_done = done; // save this for use by completion code /* save c in case we have to abort it */ cmd->host_scribble = (unsigned char *) c; @@ -1404,7 +1399,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn c->Header.ReplyQueue = 0; /* unused in simple mode */ memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8); c->Header.Tag.lower = c->busaddr; /* Use k. address of cmd as tag */ - + // Fill in the request block... c->Request.Timeout = 0; @@ -1414,8 +1409,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len); c->Request.Type.Type = TYPE_CMD; c->Request.Type.Attribute = ATTR_SIMPLE; - switch(cmd->sc_data_direction) - { + switch (cmd->sc_data_direction) { case DMA_TO_DEVICE: c->Request.Type.Direction = XFER_WRITE; break; @@ -1432,15 +1426,15 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn c->Request.Type.Direction = XFER_RSVD; // This is technically wrong, and cciss controllers should - // reject it with CMD_INVALID, which is the most correct - // response, but non-fibre backends appear to let it + // reject it with CMD_INVALID, which is the most correct + // response, but non-fibre backends appear to let it // slide by, and give the same results as if this field // were set correctly. Either way is acceptable for // our purposes here. break; - default: + default: dev_warn(&h->pdev->dev, "unknown data direction: %d\n", cmd->sc_data_direction); BUG(); @@ -1464,9 +1458,9 @@ static void cciss_unregister_scsi(ctlr_info_t *h) spin_lock_irqsave(&h->lock, flags); sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; - /* if we weren't ever actually registered, don't unregister */ + /* if we weren't ever actually registered, don't unregister */ if (sa->registered) { spin_unlock_irqrestore(&h->lock, flags); scsi_remove_host(sa->scsi_host); @@ -1474,7 +1468,7 @@ static void cciss_unregister_scsi(ctlr_info_t *h) spin_lock_irqsave(&h->lock, flags); } - /* set scsi_host to NULL so our detect routine will + /* set scsi_host to NULL so our detect routine will find us on register */ sa->scsi_host = NULL; spin_unlock_irqrestore(&h->lock, flags); @@ -1490,7 +1484,7 @@ static int cciss_engage_scsi(ctlr_info_t *h) spin_lock_irqsave(&h->lock, flags); sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; if (sa->registered) { dev_info(&h->pdev->dev, "SCSI subsystem already engaged.\n"); @@ -1586,13 +1580,13 @@ retry_tur: return rc; } -/* Need at least one of these error handlers to keep ../scsi/hosts.c from - * complaining. Doing a host- or bus-reset can't do anything good here. +/* Need at least one of these error handlers to keep ../scsi/hosts.c from + * complaining. Doing a host- or bus-reset can't do anything good here. * Despite what it might say in scsi_error.c, there may well be commands * on the controller, as the cciss driver registers twice, once as a block * device for the logical drives, and once as a scsi device, for any tape * drives. So we know there are no commands out on the tape drives, but we - * don't know there are no commands on the controller, and it is likely + * don't know there are no commands on the controller, and it is likely * that there probably are, as the cciss block device is most commonly used * as a boot device (embedded controller on HP/Compaq systems.) */ diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index ab62b81c2ca7..dece26f119d4 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1070,7 +1070,7 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned .done = 0, .flags = flags, .error = 0, - .kref = { ATOMIC_INIT(2) }, + .kref = KREF_INIT(2), }; if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */ diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4cb8f21ff4ef..724d1c50fc52 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -30,7 +30,7 @@ #include <linux/compiler.h> #include <linux/types.h> #include <linux/list.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/bitops.h> #include <linux/slab.h> #include <linux/ratelimit.h> diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 83482721bc01..92c60cbd04ee 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -52,6 +52,7 @@ #define __KERNEL_SYSCALLS__ #include <linux/unistd.h> #include <linux/vmalloc.h> +#include <linux/sched/signal.h> #include <linux/drbd_limits.h> #include "drbd_int.h" @@ -1846,7 +1847,7 @@ int drbd_send_out_of_sync(struct drbd_peer_device *peer_device, struct drbd_requ int drbd_send(struct drbd_connection *connection, struct socket *sock, void *buf, size_t size, unsigned msg_flags) { - struct kvec iov; + struct kvec iov = {.iov_base = buf, .iov_len = size}; struct msghdr msg; int rv, sent = 0; @@ -1855,15 +1856,14 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, /* THINK if (signal_pending) return ... ? */ - iov.iov_base = buf; - iov.iov_len = size; - msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size); + if (sock == connection->data.socket) { rcu_read_lock(); connection->ko_count = rcu_dereference(connection->net_conf)->ko_count; @@ -1871,7 +1871,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, drbd_update_congested(connection); } do { - rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); + rv = sock_sendmsg(sock, &msg); if (rv == -EAGAIN) { if (we_should_drop_the_connection(connection, sock)) break; @@ -1885,8 +1885,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, if (rv < 0) break; sent += rv; - iov.iov_base += rv; - iov.iov_len -= rv; } while (sent < size); if (sock == connection->data.socket) @@ -2462,7 +2460,7 @@ static int drbd_congested(void *congested_data, int bdi_bits) if (get_ldev(device)) { q = bdev_get_queue(device->ldev->backing_bdev); - r = bdi_congested(&q->backing_dev_info, bdi_bits); + r = bdi_congested(q->backing_dev_info, bdi_bits); put_ldev(device); if (r) reason = 'b'; @@ -2834,8 +2832,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig /* we have no partitions. we contain only ourselves. */ device->this_bdev->bd_contains = device->this_bdev; - q->backing_dev_info.congested_fn = drbd_congested; - q->backing_dev_info.congested_data = device; + q->backing_dev_info->congested_fn = drbd_congested; + q->backing_dev_info->congested_data = device; blk_queue_make_request(q, drbd_make_request); blk_queue_write_cache(q, true, true); @@ -2915,11 +2913,9 @@ out_idr_remove_vol: idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: for_each_connection(connection, resource) { - peer_device = idr_find(&connection->peer_devices, vnr); - if (peer_device) { - idr_remove(&connection->peer_devices, vnr); + peer_device = idr_remove(&connection->peer_devices, vnr); + if (peer_device) kref_put(&connection->kref, drbd_destroy_connection); - } } for_each_peer_device_safe(peer_device, tmp_peer_device, device) { list_del(&peer_device->peer_devices); @@ -2948,7 +2944,6 @@ void drbd_delete_device(struct drbd_device *device) struct drbd_resource *resource = device->resource; struct drbd_connection *connection; struct drbd_peer_device *peer_device; - int refs = 3; /* move to free_peer_device() */ for_each_peer_device(peer_device, device) @@ -2956,13 +2951,15 @@ void drbd_delete_device(struct drbd_device *device) drbd_debugfs_device_cleanup(device); for_each_connection(connection, resource) { idr_remove(&connection->peer_devices, device->vnr); - refs++; + kref_put(&device->kref, drbd_destroy_device); } idr_remove(&resource->devices, device->vnr); + kref_put(&device->kref, drbd_destroy_device); idr_remove(&drbd_devices, device_to_minor(device)); + kref_put(&device->kref, drbd_destroy_device); del_gendisk(device->vdisk); synchronize_rcu(); - kref_sub(&device->kref, refs, drbd_destroy_device); + kref_put(&device->kref, drbd_destroy_device); } static int __init drbd_init(void) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f35db29cac76..908c704e20aa 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1328,11 +1328,13 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi if (b) { blk_queue_stack_limits(q, b); - if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { + if (q->backing_dev_info->ra_pages != + b->backing_dev_info->ra_pages) { drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", - q->backing_dev_info.ra_pages, - b->backing_dev_info.ra_pages); - q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; + q->backing_dev_info->ra_pages, + b->backing_dev_info->ra_pages); + q->backing_dev_info->ra_pages = + b->backing_dev_info->ra_pages; } } fixup_discard_if_not_supported(q); @@ -3345,7 +3347,7 @@ static void device_to_statistics(struct device_statistics *s, s->dev_disk_flags = md->flags; q = bdev_get_queue(device->ldev->backing_bdev); s->dev_lower_blocked = - bdi_congested(&q->backing_dev_info, + bdi_congested(q->backing_dev_info, (1 << WB_async_congested) | (1 << WB_sync_congested)); put_ldev(device); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index be2b93fd2c11..8378142f7a55 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -288,7 +288,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2d: cs:Unconfigured\n", i); } else { /* reset device->congestion_reason */ - bdi_rw_congested(&device->rq_queue->backing_dev_info); + bdi_rw_congested(device->rq_queue->backing_dev_info); nc = rcu_dereference(first_peer_device(device)->connection->net_conf); wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' '; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c7728dd77230..aa6bf9692eff 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -36,6 +36,8 @@ #include <linux/memcontrol.h> #include <linux/mm_inline.h> #include <linux/slab.h> +#include <uapi/linux/sched/types.h> +#include <linux/sched/signal.h> #include <linux/pkt_sched.h> #define __KERNEL_SYSCALLS__ #include <linux/unistd.h> diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index de279fe4e4fd..652114ae1a8a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -421,7 +421,6 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, struct drbd_peer_device *peer_device = first_peer_device(device); unsigned s = req->rq_state; int c_put = 0; - int k_put = 0; if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP)) set |= RQ_COMPLETION_SUSP; @@ -437,6 +436,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, /* intent: get references */ + kref_get(&req->kref); + if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING)) atomic_inc(&req->completion_ref); @@ -473,15 +474,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) { D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING); - /* local completion may still come in later, - * we need to keep the req object around. */ - kref_get(&req->kref); ++c_put; } if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) { if (req->rq_state & RQ_LOCAL_ABORTED) - ++k_put; + kref_put(&req->kref, drbd_req_destroy); else ++c_put; list_del_init(&req->req_pending_local); @@ -503,7 +501,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (s & RQ_NET_SENT) atomic_sub(req->i.size >> 9, &device->ap_in_flight); if (s & RQ_EXP_BARR_ACK) - ++k_put; + kref_put(&req->kref, drbd_req_destroy); req->net_done_jif = jiffies; /* in ahead/behind mode, or just in case, @@ -516,25 +514,16 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, /* potentially complete and destroy */ - if (k_put || c_put) { - /* Completion does it's own kref_put. If we are going to - * kref_sub below, we need req to be still around then. */ - int at_least = k_put + !!c_put; - int refcount = atomic_read(&req->kref.refcount); - if (refcount < at_least) - drbd_err(device, - "mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n", - s, req->rq_state, refcount, at_least); - } - /* If we made progress, retry conflicting peer requests, if any. */ if (req->i.waiting) wake_up(&device->misc_wait); - if (c_put) - k_put += drbd_req_put_completion_ref(req, m, c_put); - if (k_put) - kref_sub(&req->kref, k_put, drbd_req_destroy); + if (c_put) { + if (drbd_req_put_completion_ref(req, m, c_put)) + kref_put(&req->kref, drbd_req_destroy); + } else { + kref_put(&req->kref, drbd_req_destroy); + } } static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) @@ -938,7 +927,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se switch (rbm) { case RB_CONGESTED_REMOTE: - bdi = &device->ldev->backing_bdev->bd_disk->queue->backing_dev_info; + bdi = device->ldev->backing_bdev->bd_disk->queue->backing_dev_info; return bdi_read_congested(bdi); case RB_LEAST_PENDING: return atomic_read(&device->local_cnt) > diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c6755c9a0aea..3bff33f21435 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -25,7 +25,7 @@ #include <linux/module.h> #include <linux/drbd.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/wait.h> #include <linux/mm.h> #include <linux/memcontrol.h> diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a391a3cfb3fe..45b4384f650c 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2900,8 +2900,8 @@ static void do_fd_request(struct request_queue *q) return; if (WARN(atomic_read(&usage_count) == 0, - "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n", - current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, + "warning: usage count=0, current_req=%p sect=%ld flags=%llx\n", + current_req, (long)blk_rq_pos(current_req), (unsigned long long) current_req->cmd_flags)) return; @@ -3119,7 +3119,7 @@ static int raw_cmd_copyin(int cmd, void __user *param, *rcmd = NULL; loop: - ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_USER); + ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL); if (!ptr) return -ENOMEM; *rcmd = ptr; diff --git a/drivers/block/hd.c b/drivers/block/hd.c index a9b48ed7a3cd..6043648da1e8 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -626,30 +626,29 @@ repeat: req_data_dir(req) == READ ? "read" : "writ", cyl, head, sec, nsect, bio_data(req->bio)); #endif - if (req->cmd_type == REQ_TYPE_FS) { - switch (rq_data_dir(req)) { - case READ: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ, - &read_intr); - if (reset) - goto repeat; - break; - case WRITE: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE, - &write_intr); - if (reset) - goto repeat; - if (wait_DRQ()) { - bad_rw_intr(); - goto repeat; - } - outsw(HD_DATA, bio_data(req->bio), 256); - break; - default: - printk("unknown hd-command\n"); - hd_end_request_cur(-EIO); - break; + + switch (req_op(req)) { + case REQ_OP_READ: + hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ, + &read_intr); + if (reset) + goto repeat; + break; + case REQ_OP_WRITE: + hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE, + &write_intr); + if (reset) + goto repeat; + if (wait_DRQ()) { + bad_rw_intr(); + goto repeat; } + outsw(HD_DATA, bio_data(req->bio), 256); + break; + default: + printk("unknown hd-command\n"); + hd_end_request_cur(-EIO); + break; } } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f347285c67ec..0ecb6461ed81 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -186,7 +186,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) * * TODO: the above condition may be loosed in the future, and * direct I/O may be switched runtime at that time because most - * of requests in sane appplications should be PAGE_SIZE algined + * of requests in sane applications should be PAGE_SIZE aligned */ if (dio) { if (queue_logical_block_size(lo->lo_queue) >= sb_bsize && @@ -501,9 +501,9 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_flags = IOCB_DIRECT; if (rw == WRITE) - ret = file->f_op->write_iter(&cmd->iocb, &iter); + ret = call_write_iter(file, &cmd->iocb, &iter); else - ret = file->f_op->read_iter(&cmd->iocb, &iter); + ret = call_read_iter(file, &cmd->iocb, &iter); if (ret != -EIOCBQUEUED) cmd->iocb.ki_complete(&cmd->iocb, ret, 0); @@ -1097,9 +1097,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; + /* I/O need to be drained during transfer transition */ + blk_mq_freeze_queue(lo->lo_queue); + err = loop_release_xfer(lo); if (err) - return err; + goto exit; if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; @@ -1114,12 +1117,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) err = loop_init_xfer(lo, xfer, info); if (err) - return err; + goto exit; if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) - return -EFBIG; + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { + err = -EFBIG; + goto exit; + } loop_config_discard(lo); @@ -1137,13 +1142,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) (info->lo_flags & LO_FLAGS_AUTOCLEAR)) lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; - if ((info->lo_flags & LO_FLAGS_PARTSCAN) && - !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { - lo->lo_flags |= LO_FLAGS_PARTSCAN; - lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - loop_reread_partitions(lo, lo->lo_device); - } - lo->lo_encrypt_key_size = info->lo_encrypt_key_size; lo->lo_init[0] = info->lo_init[0]; lo->lo_init[1] = info->lo_init[1]; @@ -1156,7 +1154,17 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) /* update dio if lo_offset or transfer is changed */ __loop_update_dio(lo, lo->use_dio); - return 0; + exit: + blk_mq_unfreeze_queue(lo->lo_queue); + + if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && + !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { + lo->lo_flags |= LO_FLAGS_PARTSCAN; + lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; + loop_reread_partitions(lo, lo->lo_device); + } + + return err; } static int @@ -1168,7 +1176,8 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) if (lo->lo_state != Lo_bound) return -ENXIO; - error = vfs_getattr(&file->f_path, &stat); + error = vfs_getattr(&file->f_path, &stat, + STATX_INO, AT_STATX_SYNC_AS_STAT); if (error) return error; memset(info, 0, sizeof(*info)); diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index e937fcf71769..286f276f586e 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -670,15 +670,17 @@ static void mg_request_poll(struct request_queue *q) break; } - if (unlikely(host->req->cmd_type != REQ_TYPE_FS)) { - mg_end_request_cur(host, -EIO); - continue; - } - - if (rq_data_dir(host->req) == READ) + switch (req_op(host->req)) { + case REQ_OP_READ: mg_read(host->req); - else + break; + case REQ_OP_WRITE: mg_write(host->req); + break; + default: + mg_end_request_cur(host, -EIO); + break; + } } } @@ -687,13 +689,15 @@ static unsigned int mg_issue_req(struct request *req, unsigned int sect_num, unsigned int sect_cnt) { - if (rq_data_dir(req) == READ) { + switch (req_op(host->req)) { + case REQ_OP_READ: if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) != MG_ERR_NONE) { mg_bad_rw_intr(host); return host->error; } - } else { + break; + case REQ_OP_WRITE: /* TODO : handler */ outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr) @@ -712,6 +716,10 @@ static unsigned int mg_issue_req(struct request *req, mod_timer(&host->timer, jiffies + 3 * HZ); outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); + break; + default: + mg_end_request_cur(host, -EIO); + break; } return MG_ERR_NONE; } @@ -753,11 +761,6 @@ static void mg_request(struct request_queue *q) continue; } - if (unlikely(req->cmd_type != REQ_TYPE_FS)) { - mg_end_request_cur(host, -EIO); - continue; - } - if (!mg_issue_req(req, host, sect_num, sect_cnt)) return; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9fd06eeb1a17..7e4287bc19e5 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -41,6 +41,9 @@ #include <linux/nbd.h> +static DEFINE_IDR(nbd_index_idr); +static DEFINE_MUTEX(nbd_index_mutex); + struct nbd_sock { struct socket *sock; struct mutex tx_lock; @@ -89,8 +92,13 @@ static struct dentry *nbd_dbg_dir; #define NBD_MAGIC 0x68797548 static unsigned int nbds_max = 16; -static struct nbd_device *nbd_dev; static int max_part; +static struct workqueue_struct *recv_workqueue; +static int part_shift; + +static int nbd_dev_dbg_init(struct nbd_device *nbd); +static void nbd_dev_dbg_close(struct nbd_device *nbd); + static inline struct device *nbd_to_dev(struct nbd_device *nbd) { @@ -116,7 +124,7 @@ static const char *nbdcmd_to_ascii(int cmd) static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) { - bdev->bd_inode->i_size = 0; + bd_set_size(bdev, 0); set_capacity(nbd->disk, 0); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -125,29 +133,20 @@ static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev) { - if (!nbd_is_connected(nbd)) - return; - - bdev->bd_inode->i_size = nbd->bytesize; + blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize); + blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize); + bd_set_size(bdev, nbd->bytesize); set_capacity(nbd->disk, nbd->bytesize >> 9); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); } -static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, +static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, loff_t blocksize, loff_t nr_blocks) { - int ret; - - ret = set_blocksize(bdev, blocksize); - if (ret) - return ret; - nbd->blksize = blocksize; nbd->bytesize = blocksize * nr_blocks; - - nbd_size_update(nbd, bdev); - - return 0; + if (nbd_is_connected(nbd)) + nbd_size_update(nbd, bdev); } static void nbd_end_request(struct nbd_cmd *cmd) @@ -193,13 +192,6 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); req->errors++; - /* - * If our disconnect packet times out then we're already holding the - * config_lock and could deadlock here, so just set an error and return, - * we'll handle shutting everything down later. - */ - if (req->cmd_type == REQ_TYPE_DRV_PRIV) - return BLK_EH_HANDLED; mutex_lock(&nbd->config_lock); sock_shutdown(nbd); mutex_unlock(&nbd->config_lock); @@ -209,13 +201,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, /* * Send or receive packet. */ -static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf, - int size, int msg_flags) +static int sock_xmit(struct nbd_device *nbd, int index, int send, + struct iov_iter *iter, int msg_flags) { struct socket *sock = nbd->socks[index]->sock; int result; struct msghdr msg; - struct kvec iov; unsigned long pflags = current->flags; if (unlikely(!sock)) { @@ -225,11 +216,11 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf, return -EINVAL; } + msg.msg_iter = *iter; + current->flags |= PF_MEMALLOC; do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; - iov.iov_base = buf; - iov.iov_len = size; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_control = NULL; @@ -237,58 +228,61 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf, msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (send) - result = kernel_sendmsg(sock, &msg, &iov, 1, size); + result = sock_sendmsg(sock, &msg); else - result = kernel_recvmsg(sock, &msg, &iov, 1, size, - msg.msg_flags); + result = sock_recvmsg(sock, &msg, msg.msg_flags); if (result <= 0) { if (result == 0) result = -EPIPE; /* short read */ break; } - size -= result; - buf += result; - } while (size > 0); + } while (msg_data_left(&msg)); tsk_restore_flags(current, pflags, PF_MEMALLOC); return result; } -static inline int sock_send_bvec(struct nbd_device *nbd, int index, - struct bio_vec *bvec, int flags) -{ - int result; - void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(nbd, index, 1, kaddr + bvec->bv_offset, - bvec->bv_len, flags); - kunmap(bvec->bv_page); - return result; -} - /* always call with the tx_lock held */ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); int result; - struct nbd_request request; + struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; + struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; + struct iov_iter from; unsigned long size = blk_rq_bytes(req); struct bio *bio; u32 type; u32 tag = blk_mq_unique_tag(req); - if (req_op(req) == REQ_OP_DISCARD) + iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); + + switch (req_op(req)) { + case REQ_OP_DISCARD: type = NBD_CMD_TRIM; - else if (req_op(req) == REQ_OP_FLUSH) + break; + case REQ_OP_FLUSH: type = NBD_CMD_FLUSH; - else if (rq_data_dir(req) == WRITE) + break; + case REQ_OP_WRITE: type = NBD_CMD_WRITE; - else + break; + case REQ_OP_READ: type = NBD_CMD_READ; + break; + default: + return -EIO; + } + + if (rq_data_dir(req) == WRITE && + (nbd->flags & NBD_FLAG_READ_ONLY)) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Write on read-only\n"); + return -EIO; + } - memset(&request, 0, sizeof(request)); - request.magic = htonl(NBD_REQUEST_MAGIC); request.type = htonl(type); if (type != NBD_CMD_FLUSH) { request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); @@ -299,7 +293,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", cmd, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); - result = sock_xmit(nbd, index, 1, &request, sizeof(request), + result = sock_xmit(nbd, index, 1, &from, (type == NBD_CMD_WRITE) ? MSG_MORE : 0); if (result <= 0) { dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -322,7 +316,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", cmd, bvec.bv_len); - result = sock_send_bvec(nbd, index, &bvec, flags); + iov_iter_bvec(&from, ITER_BVEC | WRITE, + &bvec, 1, bvec.bv_len); + result = sock_xmit(nbd, index, 1, &from, flags); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", @@ -343,17 +339,6 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) return 0; } -static inline int sock_recv_bvec(struct nbd_device *nbd, int index, - struct bio_vec *bvec) -{ - int result; - void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(nbd, index, 0, kaddr + bvec->bv_offset, - bvec->bv_len, MSG_WAITALL); - kunmap(bvec->bv_page); - return result; -} - /* NULL returned = something went wrong, inform userspace */ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) { @@ -363,9 +348,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) struct request *req = NULL; u16 hwq; u32 tag; + struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; + struct iov_iter to; reply.magic = 0; - result = sock_xmit(nbd, index, 0, &reply, sizeof(reply), MSG_WAITALL); + iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); + result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); if (result <= 0) { if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) @@ -405,7 +393,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) struct bio_vec bvec; rq_for_each_segment(bvec, req, iter) { - result = sock_recv_bvec(nbd, index, &bvec); + iov_iter_bvec(&to, ITER_BVEC | READ, + &bvec, 1, bvec.bv_len); + result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); @@ -510,18 +500,6 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) goto error_out; } - if (req->cmd_type != REQ_TYPE_FS && - req->cmd_type != REQ_TYPE_DRV_PRIV) - goto error_out; - - if (req->cmd_type == REQ_TYPE_FS && - rq_data_dir(req) == WRITE && - (nbd->flags & NBD_FLAG_READ_ONLY)) { - dev_err_ratelimited(disk_to_dev(nbd->disk), - "Write on read-only\n"); - goto error_out; - } - req->errors = 0; nsock = nbd->socks[index]; @@ -571,10 +549,17 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_OK; } -static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock) +static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, + unsigned long arg) { + struct socket *sock; struct nbd_sock **socks; struct nbd_sock *nsock; + int err; + + sock = sockfd_lookup(arg, &err); + if (!sock) + return err; if (!nbd->task_setup) nbd->task_setup = current; @@ -598,26 +583,20 @@ static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock) nsock->sock = sock; socks[nbd->num_connections++] = nsock; + if (max_part) + bdev->bd_invalidated = 1; return 0; } /* Reset all properties of an NBD device */ static void nbd_reset(struct nbd_device *nbd) { - int i; - - for (i = 0; i < nbd->num_connections; i++) - kfree(nbd->socks[i]); - kfree(nbd->socks); - nbd->socks = NULL; nbd->runtime_flags = 0; nbd->blksize = 1024; nbd->bytesize = 0; set_capacity(nbd->disk, 0); nbd->flags = 0; nbd->tag_set.timeout = 0; - nbd->num_connections = 0; - nbd->task_setup = NULL; queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); } @@ -645,95 +624,162 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) static void send_disconnects(struct nbd_device *nbd) { - struct nbd_request request = {}; + struct nbd_request request = { + .magic = htonl(NBD_REQUEST_MAGIC), + .type = htonl(NBD_CMD_DISC), + }; + struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; + struct iov_iter from; int i, ret; - request.magic = htonl(NBD_REQUEST_MAGIC); - request.type = htonl(NBD_CMD_DISC); - for (i = 0; i < nbd->num_connections; i++) { - ret = sock_xmit(nbd, i, 1, &request, sizeof(request), 0); + iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); + ret = sock_xmit(nbd, i, 1, &from, 0); if (ret <= 0) dev_err(disk_to_dev(nbd->disk), "Send disconnect failed %d\n", ret); } } -static int nbd_dev_dbg_init(struct nbd_device *nbd); -static void nbd_dev_dbg_close(struct nbd_device *nbd); +static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev) +{ + dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); + if (!nbd->socks) + return -EINVAL; -/* Must be called with config_lock held */ -static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, - unsigned int cmd, unsigned long arg) + mutex_unlock(&nbd->config_lock); + fsync_bdev(bdev); + mutex_lock(&nbd->config_lock); + + /* Check again after getting mutex back. */ + if (!nbd->socks) + return -EINVAL; + + if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, + &nbd->runtime_flags)) + send_disconnects(nbd); + return 0; +} + +static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev) { - switch (cmd) { - case NBD_DISCONNECT: { - dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); - if (!nbd->socks) - return -EINVAL; - - mutex_unlock(&nbd->config_lock); - fsync_bdev(bdev); - mutex_lock(&nbd->config_lock); - - /* Check again after getting mutex back. */ - if (!nbd->socks) - return -EINVAL; - - if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, - &nbd->runtime_flags)) - send_disconnects(nbd); - return 0; - } + sock_shutdown(nbd); + nbd_clear_que(nbd); + kill_bdev(bdev); + nbd_bdev_reset(bdev); + /* + * We want to give the run thread a chance to wait for everybody + * to clean up and then do it's own cleanup. + */ + if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) && + nbd->num_connections) { + int i; - case NBD_CLEAR_SOCK: - sock_shutdown(nbd); - nbd_clear_que(nbd); - kill_bdev(bdev); - nbd_bdev_reset(bdev); - /* - * We want to give the run thread a chance to wait for everybody - * to clean up and then do it's own cleanup. - */ - if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) { - int i; - - for (i = 0; i < nbd->num_connections; i++) - kfree(nbd->socks[i]); - kfree(nbd->socks); - nbd->socks = NULL; - nbd->num_connections = 0; - nbd->task_setup = NULL; + for (i = 0; i < nbd->num_connections; i++) { + sockfd_put(nbd->socks[i]->sock); + kfree(nbd->socks[i]); } - return 0; + kfree(nbd->socks); + nbd->socks = NULL; + nbd->num_connections = 0; + } + nbd->task_setup = NULL; - case NBD_SET_SOCK: { - int err; - struct socket *sock = sockfd_lookup(arg, &err); + return 0; +} - if (!sock) - return err; +static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev) +{ + struct recv_thread_args *args; + int num_connections = nbd->num_connections; + int error = 0, i; - err = nbd_add_socket(nbd, sock); - if (!err && max_part) - bdev->bd_invalidated = 1; + if (nbd->task_recv) + return -EBUSY; + if (!nbd->socks) + return -EINVAL; + if (num_connections > 1 && + !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { + dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); + error = -EINVAL; + goto out_err; + } - return err; + set_bit(NBD_RUNNING, &nbd->runtime_flags); + blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); + args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); + if (!args) { + error = -ENOMEM; + goto out_err; } + nbd->task_recv = current; + mutex_unlock(&nbd->config_lock); - case NBD_SET_BLKSIZE: { - loff_t bsize = div_s64(nbd->bytesize, arg); + nbd_parse_flags(nbd, bdev); - return nbd_size_set(nbd, bdev, arg, bsize); + error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); + if (error) { + dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + goto out_recv; } - case NBD_SET_SIZE: - return nbd_size_set(nbd, bdev, nbd->blksize, - div_s64(arg, nbd->blksize)); + nbd_size_update(nbd, bdev); - case NBD_SET_SIZE_BLOCKS: - return nbd_size_set(nbd, bdev, nbd->blksize, arg); + nbd_dev_dbg_init(nbd); + for (i = 0; i < num_connections; i++) { + sk_set_memalloc(nbd->socks[i]->sock->sk); + atomic_inc(&nbd->recv_threads); + INIT_WORK(&args[i].work, recv_work); + args[i].nbd = nbd; + args[i].index = i; + queue_work(recv_workqueue, &args[i].work); + } + wait_event_interruptible(nbd->recv_wq, + atomic_read(&nbd->recv_threads) == 0); + for (i = 0; i < num_connections; i++) + flush_work(&args[i].work); + nbd_dev_dbg_close(nbd); + nbd_size_clear(nbd, bdev); + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); +out_recv: + mutex_lock(&nbd->config_lock); + nbd->task_recv = NULL; +out_err: + clear_bit(NBD_RUNNING, &nbd->runtime_flags); + nbd_clear_sock(nbd, bdev); + + /* user requested, ignore socket errors */ + if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) + error = 0; + if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags)) + error = -ETIMEDOUT; + + nbd_reset(nbd); + return error; +} +/* Must be called with config_lock held */ +static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, + unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case NBD_DISCONNECT: + return nbd_disconnect(nbd, bdev); + case NBD_CLEAR_SOCK: + return nbd_clear_sock(nbd, bdev); + case NBD_SET_SOCK: + return nbd_add_socket(nbd, bdev, arg); + case NBD_SET_BLKSIZE: + nbd_size_set(nbd, bdev, arg, + div_s64(nbd->bytesize, arg)); + return 0; + case NBD_SET_SIZE: + nbd_size_set(nbd, bdev, nbd->blksize, + div_s64(arg, nbd->blksize)); + return 0; + case NBD_SET_SIZE_BLOCKS: + nbd_size_set(nbd, bdev, nbd->blksize, arg); + return 0; case NBD_SET_TIMEOUT: nbd->tag_set.timeout = arg * HZ; return 0; @@ -741,85 +787,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_FLAGS: nbd->flags = arg; return 0; - - case NBD_DO_IT: { - struct recv_thread_args *args; - int num_connections = nbd->num_connections; - int error = 0, i; - - if (nbd->task_recv) - return -EBUSY; - if (!nbd->socks) - return -EINVAL; - if (num_connections > 1 && - !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { - dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); - error = -EINVAL; - goto out_err; - } - - set_bit(NBD_RUNNING, &nbd->runtime_flags); - blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); - args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); - if (!args) { - error = -ENOMEM; - goto out_err; - } - nbd->task_recv = current; - mutex_unlock(&nbd->config_lock); - - nbd_parse_flags(nbd, bdev); - - error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); - if (error) { - dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - goto out_recv; - } - - nbd_size_update(nbd, bdev); - - nbd_dev_dbg_init(nbd); - for (i = 0; i < num_connections; i++) { - sk_set_memalloc(nbd->socks[i]->sock->sk); - atomic_inc(&nbd->recv_threads); - INIT_WORK(&args[i].work, recv_work); - args[i].nbd = nbd; - args[i].index = i; - queue_work(system_long_wq, &args[i].work); - } - wait_event_interruptible(nbd->recv_wq, - atomic_read(&nbd->recv_threads) == 0); - for (i = 0; i < num_connections; i++) - flush_work(&args[i].work); - nbd_dev_dbg_close(nbd); - nbd_size_clear(nbd, bdev); - device_remove_file(disk_to_dev(nbd->disk), &pid_attr); -out_recv: - mutex_lock(&nbd->config_lock); - nbd->task_recv = NULL; -out_err: - sock_shutdown(nbd); - nbd_clear_que(nbd); - kill_bdev(bdev); - nbd_bdev_reset(bdev); - - /* user requested, ignore socket errors */ - if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) - error = 0; - if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags)) - error = -ETIMEDOUT; - - nbd_reset(nbd); - return error; - } - + case NBD_DO_IT: + return nbd_start_device(nbd, bdev); case NBD_CLEAR_QUE: /* * This is for compatibility only. The queue is always cleared * by NBD_DO_IT or NBD_CLEAR_SOCK. */ return 0; - case NBD_PRINT_DEBUG: /* * For compatibility only, we no longer keep a list of @@ -996,6 +971,103 @@ static struct blk_mq_ops nbd_mq_ops = { .timeout = nbd_xmit_timeout, }; +static void nbd_dev_remove(struct nbd_device *nbd) +{ + struct gendisk *disk = nbd->disk; + nbd->magic = 0; + if (disk) { + del_gendisk(disk); + blk_cleanup_queue(disk->queue); + blk_mq_free_tag_set(&nbd->tag_set); + put_disk(disk); + } + kfree(nbd); +} + +static int nbd_dev_add(int index) +{ + struct nbd_device *nbd; + struct gendisk *disk; + struct request_queue *q; + int err = -ENOMEM; + + nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); + if (!nbd) + goto out; + + disk = alloc_disk(1 << part_shift); + if (!disk) + goto out_free_nbd; + + if (index >= 0) { + err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, + GFP_KERNEL); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); + if (err >= 0) + index = err; + } + if (err < 0) + goto out_free_disk; + + nbd->disk = disk; + nbd->tag_set.ops = &nbd_mq_ops; + nbd->tag_set.nr_hw_queues = 1; + nbd->tag_set.queue_depth = 128; + nbd->tag_set.numa_node = NUMA_NO_NODE; + nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); + nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | + BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; + nbd->tag_set.driver_data = nbd; + + err = blk_mq_alloc_tag_set(&nbd->tag_set); + if (err) + goto out_free_idr; + + q = blk_mq_init_queue(&nbd->tag_set); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto out_free_tags; + } + disk->queue = q; + + /* + * Tell the block layer that we are not a rotational device + */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); + disk->queue->limits.discard_granularity = 512; + blk_queue_max_discard_sectors(disk->queue, UINT_MAX); + disk->queue->limits.discard_zeroes_data = 0; + blk_queue_max_hw_sectors(disk->queue, 65536); + disk->queue->limits.max_sectors = 256; + + nbd->magic = NBD_MAGIC; + mutex_init(&nbd->config_lock); + disk->major = NBD_MAJOR; + disk->first_minor = index << part_shift; + disk->fops = &nbd_fops; + disk->private_data = nbd; + sprintf(disk->disk_name, "nbd%d", index); + init_waitqueue_head(&nbd->recv_wq); + nbd_reset(nbd); + add_disk(disk); + return index; + +out_free_tags: + blk_mq_free_tag_set(&nbd->tag_set); +out_free_idr: + idr_remove(&nbd_index_idr, index); +out_free_disk: + put_disk(disk); +out_free_nbd: + kfree(nbd); +out: + return err; +} + /* * And here should be modules and kernel interface * (Just smiley confuses emacs :-) @@ -1003,9 +1075,7 @@ static struct blk_mq_ops nbd_mq_ops = { static int __init nbd_init(void) { - int err = -ENOMEM; int i; - int part_shift; BUILD_BUG_ON(sizeof(struct nbd_request) != 28); @@ -1034,111 +1104,40 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; - - nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); - if (!nbd_dev) + recv_workqueue = alloc_workqueue("knbd-recv", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (!recv_workqueue) return -ENOMEM; - for (i = 0; i < nbds_max; i++) { - struct request_queue *q; - struct gendisk *disk = alloc_disk(1 << part_shift); - if (!disk) - goto out; - nbd_dev[i].disk = disk; - - nbd_dev[i].tag_set.ops = &nbd_mq_ops; - nbd_dev[i].tag_set.nr_hw_queues = 1; - nbd_dev[i].tag_set.queue_depth = 128; - nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE; - nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd); - nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; - nbd_dev[i].tag_set.driver_data = &nbd_dev[i]; - - err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set); - if (err) { - put_disk(disk); - goto out; - } - - /* - * The new linux 2.5 block layer implementation requires - * every gendisk to have its very own request_queue struct. - * These structs are big so we dynamically allocate them. - */ - q = blk_mq_init_queue(&nbd_dev[i].tag_set); - if (IS_ERR(q)) { - blk_mq_free_tag_set(&nbd_dev[i].tag_set); - put_disk(disk); - goto out; - } - disk->queue = q; - - /* - * Tell the block layer that we are not a rotational device - */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); - disk->queue->limits.discard_granularity = 512; - blk_queue_max_discard_sectors(disk->queue, UINT_MAX); - disk->queue->limits.discard_zeroes_data = 0; - blk_queue_max_hw_sectors(disk->queue, 65536); - disk->queue->limits.max_sectors = 256; - } - if (register_blkdev(NBD_MAJOR, "nbd")) { - err = -EIO; - goto out; + destroy_workqueue(recv_workqueue); + return -EIO; } - printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR); - nbd_dbg_init(); - for (i = 0; i < nbds_max; i++) { - struct gendisk *disk = nbd_dev[i].disk; - nbd_dev[i].magic = NBD_MAGIC; - mutex_init(&nbd_dev[i].config_lock); - disk->major = NBD_MAJOR; - disk->first_minor = i << part_shift; - disk->fops = &nbd_fops; - disk->private_data = &nbd_dev[i]; - sprintf(disk->disk_name, "nbd%d", i); - init_waitqueue_head(&nbd_dev[i].recv_wq); - nbd_reset(&nbd_dev[i]); - add_disk(disk); - } + mutex_lock(&nbd_index_mutex); + for (i = 0; i < nbds_max; i++) + nbd_dev_add(i); + mutex_unlock(&nbd_index_mutex); + return 0; +} +static int nbd_exit_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + nbd_dev_remove(nbd); return 0; -out: - while (i--) { - blk_mq_free_tag_set(&nbd_dev[i].tag_set); - blk_cleanup_queue(nbd_dev[i].disk->queue); - put_disk(nbd_dev[i].disk); - } - kfree(nbd_dev); - return err; } static void __exit nbd_cleanup(void) { - int i; - nbd_dbg_close(); - for (i = 0; i < nbds_max; i++) { - struct gendisk *disk = nbd_dev[i].disk; - nbd_dev[i].magic = 0; - if (disk) { - del_gendisk(disk); - blk_cleanup_queue(disk->queue); - blk_mq_free_tag_set(&nbd_dev[i].tag_set); - put_disk(disk); - } - } + idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL); + idr_destroy(&nbd_index_idr); + destroy_workqueue(recv_workqueue); unregister_blkdev(NBD_MAJOR, "nbd"); - kfree(nbd_dev); - printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR); } module_init(nbd_init); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index c0e14e54909b..6f2e565bccc5 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -420,7 +420,8 @@ static void null_lnvm_end_io(struct request *rq, int error) { struct nvm_rq *rqd = rq->end_io_data; - nvm_end_io(rqd, error); + rqd->error = error; + nvm_end_io(rqd); blk_put_request(rq); } @@ -431,11 +432,11 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) struct request *rq; struct bio *bio = rqd->bio; - rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0); + rq = blk_mq_alloc_request(q, + op_is_write(bio_op(bio)) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return -ENOMEM; - rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->__sector = bio->bi_iter.bi_sector; rq->ioprio = bio_prio(bio); @@ -460,7 +461,6 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) id->ver_id = 0x1; id->vmnt = 0; - id->cgrps = 1; id->cap = 0x2; id->dom = 0x1; @@ -479,7 +479,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) sector_div(size, bs); /* convert size to pages */ size >>= 8; /* concert size to pgs pr blk */ - grp = &id->groups[0]; + grp = &id->grp; grp->mtype = 0; grp->fmtype = 0; grp->num_ch = 1; diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 92900f5f0b47..8127b8201a01 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -308,12 +308,6 @@ static void osdblk_rq_fn(struct request_queue *q) if (!rq) break; - /* filter out block requests we don't understand */ - if (rq->cmd_type != REQ_TYPE_FS) { - blk_end_request_all(rq, 0); - continue; - } - /* deduce our operation (read, write, flush) */ /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] * into a clearly defined set of RPC commands: diff --git a/drivers/block/paride/Kconfig b/drivers/block/paride/Kconfig index efefb5ac3004..3a15247942e4 100644 --- a/drivers/block/paride/Kconfig +++ b/drivers/block/paride/Kconfig @@ -25,6 +25,7 @@ config PARIDE_PD config PARIDE_PCD tristate "Parallel port ATAPI CD-ROMs" depends on PARIDE + select BLK_SCSI_REQUEST # only for the generic cdrom code ---help--- This option enables the high-level driver for ATAPI CD-ROM devices connected through a parallel port. If you chose to build PARIDE diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 5fd2d0e25567..10aed84244f5 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -273,7 +273,7 @@ static const struct block_device_operations pcd_bdops = { .check_events = pcd_block_check_events, }; -static struct cdrom_device_ops pcd_dops = { +static const struct cdrom_device_ops pcd_dops = { .open = pcd_open, .release = pcd_release, .drive_status = pcd_drive_status, diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index c3ed2fc72daa..644ba0888bd4 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -439,18 +439,16 @@ static int pd_retries = 0; /* i/o error retry count */ static int pd_block; /* address of next requested block */ static int pd_count; /* number of blocks still to do */ static int pd_run; /* sectors in current cluster */ -static int pd_cmd; /* current command READ/WRITE */ static char *pd_buf; /* buffer for request in progress */ static enum action do_pd_io_start(void) { - if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) { + switch (req_op(pd_req)) { + case REQ_OP_DRV_IN: phase = pd_special; return pd_special(); - } - - pd_cmd = rq_data_dir(pd_req); - if (pd_cmd == READ || pd_cmd == WRITE) { + case REQ_OP_READ: + case REQ_OP_WRITE: pd_block = blk_rq_pos(pd_req); pd_count = blk_rq_cur_sectors(pd_req); if (pd_block + pd_count > get_capacity(pd_req->rq_disk)) @@ -458,7 +456,7 @@ static enum action do_pd_io_start(void) pd_run = blk_rq_sectors(pd_req); pd_buf = bio_data(pd_req->bio); pd_retries = 0; - if (pd_cmd == READ) + if (req_op(pd_req) == REQ_OP_READ) return do_pd_read_start(); else return do_pd_write_start(); @@ -723,11 +721,10 @@ static int pd_special_command(struct pd_unit *disk, struct request *rq; int err = 0; - rq = blk_get_request(disk->gd->queue, READ, __GFP_RECLAIM); + rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); - rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->special = func; err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 1b94c1ca5c5f..66d846ba85a9 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -704,10 +704,10 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * int ret = 0; rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? - WRITE : READ, __GFP_RECLAIM); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); - blk_rq_set_block_pc(rq); + scsi_req_init(rq); if (cgc->buflen) { ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, @@ -716,8 +716,8 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * goto out; } - rq->cmd_len = COMMAND_SIZE(cgc->cmd[0]); - memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); + scsi_req(rq)->cmd_len = COMMAND_SIZE(cgc->cmd[0]); + memcpy(scsi_req(rq)->cmd, cgc->cmd, CDROM_PACKET_SIZE); rq->timeout = 60*HZ; if (cgc->quiet) @@ -1243,7 +1243,7 @@ try_next_bio: && pd->bio_queue_size <= pd->write_congestion_off); spin_unlock(&pd->lock); if (wakeup) { - clear_bdi_congested(&pd->disk->queue->backing_dev_info, + clear_bdi_congested(pd->disk->queue->backing_dev_info, BLK_RW_ASYNC); } @@ -2370,7 +2370,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) spin_lock(&pd->lock); if (pd->write_congestion_on > 0 && pd->bio_queue_size >= pd->write_congestion_on) { - set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC); + set_bdi_congested(q->backing_dev_info, BLK_RW_ASYNC); do { spin_unlock(&pd->lock); congestion_wait(BLK_RW_ASYNC, HZ); diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 76f33c84ce3d..a809e3e9feb8 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -196,16 +196,19 @@ static void ps3disk_do_request(struct ps3_storage_device *dev, dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); while ((req = blk_fetch_request(q))) { - if (req_op(req) == REQ_OP_FLUSH) { + switch (req_op(req)) { + case REQ_OP_FLUSH: if (ps3disk_submit_flush_request(dev, req)) - break; - } else if (req->cmd_type == REQ_TYPE_FS) { + return; + break; + case REQ_OP_READ: + case REQ_OP_WRITE: if (ps3disk_submit_request_sg(dev, req)) - break; - } else { + return; + break; + default: blk_dump_rq_flags(req, DEVICE_NAME " bad request"); __blk_end_request_all(req, -EIO); - continue; } } } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 36d2b9f4e836..4d6807723798 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -123,9 +123,11 @@ static int atomic_dec_return_safe(atomic_t *v) #define RBD_FEATURE_LAYERING (1<<0) #define RBD_FEATURE_STRIPINGV2 (1<<1) #define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2) +#define RBD_FEATURE_DATA_POOL (1<<7) #define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ RBD_FEATURE_STRIPINGV2 | \ - RBD_FEATURE_EXCLUSIVE_LOCK) + RBD_FEATURE_EXCLUSIVE_LOCK | \ + RBD_FEATURE_DATA_POOL) /* Features supported by this (client software) implementation. */ @@ -144,10 +146,9 @@ struct rbd_image_header { /* These six fields never change for a given rbd image */ char *object_prefix; __u8 obj_order; - __u8 crypt_type; - __u8 comp_type; u64 stripe_unit; u64 stripe_count; + s64 data_pool_id; u64 features; /* Might be changeable someday? */ /* The remaining fields need to be updated occasionally */ @@ -230,7 +231,7 @@ enum obj_req_flags { }; struct rbd_obj_request { - const char *object_name; + u64 object_no; u64 offset; /* object start byte */ u64 length; /* bytes from offset */ unsigned long flags; @@ -438,7 +439,6 @@ static DEFINE_SPINLOCK(rbd_client_list_lock); static struct kmem_cache *rbd_img_request_cache; static struct kmem_cache *rbd_obj_request_cache; -static struct kmem_cache *rbd_segment_name_cache; static int rbd_major; static DEFINE_IDA(rbd_dev_id_ida); @@ -973,6 +973,30 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) } /* + * returns the size of an object in the image + */ +static u32 rbd_obj_bytes(struct rbd_image_header *header) +{ + return 1U << header->obj_order; +} + +static void rbd_init_layout(struct rbd_device *rbd_dev) +{ + if (rbd_dev->header.stripe_unit == 0 || + rbd_dev->header.stripe_count == 0) { + rbd_dev->header.stripe_unit = rbd_obj_bytes(&rbd_dev->header); + rbd_dev->header.stripe_count = 1; + } + + rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit; + rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count; + rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header); + rbd_dev->layout.pool_id = rbd_dev->header.data_pool_id == CEPH_NOPOOL ? + rbd_dev->spec->pool_id : rbd_dev->header.data_pool_id; + RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); +} + +/* * Fill an rbd image header with information from the given format 1 * on-disk header. */ @@ -992,15 +1016,11 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, /* Allocate this now to avoid having to handle failure below */ if (first_time) { - size_t len; - - len = strnlen(ondisk->object_prefix, - sizeof (ondisk->object_prefix)); - object_prefix = kmalloc(len + 1, GFP_KERNEL); + object_prefix = kstrndup(ondisk->object_prefix, + sizeof(ondisk->object_prefix), + GFP_KERNEL); if (!object_prefix) return -ENOMEM; - memcpy(object_prefix, ondisk->object_prefix, len); - object_prefix[len] = '\0'; } /* Allocate the snapshot context and fill it in */ @@ -1051,12 +1071,7 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, if (first_time) { header->object_prefix = object_prefix; header->obj_order = ondisk->options.order; - header->crypt_type = ondisk->options.crypt_type; - header->comp_type = ondisk->options.comp_type; - /* The rest aren't used for format 1 images */ - header->stripe_unit = 0; - header->stripe_count = 0; - header->features = 0; + rbd_init_layout(rbd_dev); } else { ceph_put_snap_context(header->snapc); kfree(header->snap_names); @@ -1232,42 +1247,9 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev) rbd_dev->mapping.features = 0; } -static void rbd_segment_name_free(const char *name) -{ - /* The explicit cast here is needed to drop the const qualifier */ - - kmem_cache_free(rbd_segment_name_cache, (void *)name); -} - -static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) -{ - char *name; - u64 segment; - int ret; - char *name_format; - - name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO); - if (!name) - return NULL; - segment = offset >> rbd_dev->header.obj_order; - name_format = "%s.%012llx"; - if (rbd_dev->image_format == 2) - name_format = "%s.%016llx"; - ret = snprintf(name, CEPH_MAX_OID_NAME_LEN + 1, name_format, - rbd_dev->header.object_prefix, segment); - if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) { - pr_err("error formatting segment name for #%llu (%d)\n", - segment, ret); - rbd_segment_name_free(name); - name = NULL; - } - - return name; -} - static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) { - u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; + u64 segment_size = rbd_obj_bytes(&rbd_dev->header); return offset & (segment_size - 1); } @@ -1275,7 +1257,7 @@ static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) static u64 rbd_segment_length(struct rbd_device *rbd_dev, u64 offset, u64 length) { - u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; + u64 segment_size = rbd_obj_bytes(&rbd_dev->header); offset &= segment_size - 1; @@ -1287,14 +1269,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev, } /* - * returns the size of an object in the image - */ -static u64 rbd_obj_bytes(struct rbd_image_header *header) -{ - return 1 << header->obj_order; -} - -/* * bio helpers */ @@ -1535,7 +1509,7 @@ static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request) static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { dout("%s: obj %p (was %d)\n", __func__, obj_request, - atomic_read(&obj_request->kref.refcount)); + kref_read(&obj_request->kref)); kref_get(&obj_request->kref); } @@ -1544,14 +1518,14 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request) { rbd_assert(obj_request != NULL); dout("%s: obj %p (was %d)\n", __func__, obj_request, - atomic_read(&obj_request->kref.refcount)); + kref_read(&obj_request->kref)); kref_put(&obj_request->kref, rbd_obj_request_destroy); } static void rbd_img_request_get(struct rbd_img_request *img_request) { dout("%s: img %p (was %d)\n", __func__, img_request, - atomic_read(&img_request->kref.refcount)); + kref_read(&img_request->kref)); kref_get(&img_request->kref); } @@ -1562,7 +1536,7 @@ static void rbd_img_request_put(struct rbd_img_request *img_request) { rbd_assert(img_request != NULL); dout("%s: img %p (was %d)\n", __func__, img_request, - atomic_read(&img_request->kref.refcount)); + kref_read(&img_request->kref)); if (img_request_child_test(img_request)) kref_put(&img_request->kref, rbd_parent_request_destroy); else @@ -1623,7 +1597,9 @@ static void rbd_obj_request_submit(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req; - dout("%s %p osd_req %p\n", __func__, obj_request, osd_req); + dout("%s %p object_no %016llx %llu~%llu osd_req %p\n", __func__, + obj_request, obj_request->object_no, obj_request->offset, + obj_request->length, osd_req); if (obj_request_img_data_test(obj_request)) { WARN_ON(obj_request->callback != rbd_img_obj_callback); rbd_img_request_get(obj_request->img_request); @@ -1631,44 +1607,6 @@ static void rbd_obj_request_submit(struct rbd_obj_request *obj_request) ceph_osdc_start_request(osd_req->r_osdc, osd_req, false); } -static void rbd_obj_request_end(struct rbd_obj_request *obj_request) -{ - dout("%s %p\n", __func__, obj_request); - ceph_osdc_cancel_request(obj_request->osd_req); -} - -/* - * Wait for an object request to complete. If interrupted, cancel the - * underlying osd request. - * - * @timeout: in jiffies, 0 means "wait forever" - */ -static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request, - unsigned long timeout) -{ - long ret; - - dout("%s %p\n", __func__, obj_request); - ret = wait_for_completion_interruptible_timeout( - &obj_request->completion, - ceph_timeout_jiffies(timeout)); - if (ret <= 0) { - if (ret == 0) - ret = -ETIMEDOUT; - rbd_obj_request_end(obj_request); - } else { - ret = 0; - } - - dout("%s %p ret %d\n", __func__, obj_request, (int)ret); - return ret; -} - -static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) -{ - return __rbd_obj_request_wait(obj_request, 0); -} - static void rbd_img_request_complete(struct rbd_img_request *img_request) { @@ -1955,8 +1893,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req) rbd_osd_call_callback(obj_request); break; default: - rbd_warn(NULL, "%s: unsupported op %hu", - obj_request->object_name, (unsigned short) opcode); + rbd_warn(NULL, "unexpected OSD op: object_no %016llx opcode %d", + obj_request->object_no, opcode); break; } @@ -1980,6 +1918,40 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) osd_req->r_data_offset = obj_request->offset; } +static struct ceph_osd_request * +__rbd_osd_req_create(struct rbd_device *rbd_dev, + struct ceph_snap_context *snapc, + int num_ops, unsigned int flags, + struct rbd_obj_request *obj_request) +{ + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct ceph_osd_request *req; + const char *name_format = rbd_dev->image_format == 1 ? + RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT; + + req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO); + if (!req) + return NULL; + + req->r_flags = flags; + req->r_callback = rbd_osd_req_callback; + req->r_priv = obj_request; + + req->r_base_oloc.pool = rbd_dev->layout.pool_id; + if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format, + rbd_dev->header.object_prefix, obj_request->object_no)) + goto err_req; + + if (ceph_osdc_alloc_messages(req, GFP_NOIO)) + goto err_req; + + return req; + +err_req: + ceph_osdc_put_request(req); + return NULL; +} + /* * Create an osd request. A read request has one osd op (read). * A write request has either one (watch) or two (hint+write) osd ops. @@ -1993,8 +1965,6 @@ static struct ceph_osd_request *rbd_osd_req_create( struct rbd_obj_request *obj_request) { struct ceph_snap_context *snapc = NULL; - struct ceph_osd_client *osdc; - struct ceph_osd_request *osd_req; if (obj_request_img_data_test(obj_request) && (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) { @@ -2009,35 +1979,9 @@ static struct ceph_osd_request *rbd_osd_req_create( rbd_assert(num_ops == 1 || ((op_type == OBJ_OP_WRITE) && num_ops == 2)); - /* Allocate and initialize the request, for the num_ops ops */ - - osdc = &rbd_dev->rbd_client->client->osdc; - osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, - GFP_NOIO); - if (!osd_req) - goto fail; - - if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) - osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; - else - osd_req->r_flags = CEPH_OSD_FLAG_READ; - - osd_req->r_callback = rbd_osd_req_callback; - osd_req->r_priv = obj_request; - - osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id; - if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s", - obj_request->object_name)) - goto fail; - - if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO)) - goto fail; - - return osd_req; - -fail: - ceph_osdc_put_request(osd_req); - return NULL; + return __rbd_osd_req_create(rbd_dev, snapc, num_ops, + (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) ? + CEPH_OSD_FLAG_WRITE : CEPH_OSD_FLAG_READ, obj_request); } /* @@ -2050,10 +1994,6 @@ static struct ceph_osd_request * rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) { struct rbd_img_request *img_request; - struct ceph_snap_context *snapc; - struct rbd_device *rbd_dev; - struct ceph_osd_client *osdc; - struct ceph_osd_request *osd_req; int num_osd_ops = 3; rbd_assert(obj_request_img_data_test(obj_request)); @@ -2065,77 +2005,34 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) if (img_request_discard_test(img_request)) num_osd_ops = 2; - /* Allocate and initialize the request, for all the ops */ - - snapc = img_request->snapc; - rbd_dev = img_request->rbd_dev; - osdc = &rbd_dev->rbd_client->client->osdc; - osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops, - false, GFP_NOIO); - if (!osd_req) - goto fail; - - osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; - osd_req->r_callback = rbd_osd_req_callback; - osd_req->r_priv = obj_request; - - osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id; - if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s", - obj_request->object_name)) - goto fail; - - if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO)) - goto fail; - - return osd_req; - -fail: - ceph_osdc_put_request(osd_req); - return NULL; + return __rbd_osd_req_create(img_request->rbd_dev, + img_request->snapc, num_osd_ops, + CEPH_OSD_FLAG_WRITE, obj_request); } - static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) { ceph_osdc_put_request(osd_req); } -/* object_name is assumed to be a non-null pointer and NUL-terminated */ - -static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, - u64 offset, u64 length, - enum obj_request_type type) +static struct rbd_obj_request * +rbd_obj_request_create(enum obj_request_type type) { struct rbd_obj_request *obj_request; - size_t size; - char *name; rbd_assert(obj_request_type_valid(type)); - size = strlen(object_name) + 1; - name = kmalloc(size, GFP_NOIO); - if (!name) - return NULL; - obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO); - if (!obj_request) { - kfree(name); + if (!obj_request) return NULL; - } - obj_request->object_name = memcpy(name, object_name, size); - obj_request->offset = offset; - obj_request->length = length; - obj_request->flags = 0; obj_request->which = BAD_WHICH; obj_request->type = type; INIT_LIST_HEAD(&obj_request->links); init_completion(&obj_request->completion); kref_init(&obj_request->kref); - dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name, - offset, length, (int)type, obj_request); - + dout("%s %p\n", __func__, obj_request); return obj_request; } @@ -2170,8 +2067,6 @@ static void rbd_obj_request_destroy(struct kref *kref) break; } - kfree(obj_request->object_name); - obj_request->object_name = NULL; kmem_cache_free(rbd_obj_request_cache, obj_request); } @@ -2546,22 +2441,18 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, while (resid) { struct ceph_osd_request *osd_req; - const char *object_name; - u64 offset; - u64 length; + u64 object_no = img_offset >> rbd_dev->header.obj_order; + u64 offset = rbd_segment_offset(rbd_dev, img_offset); + u64 length = rbd_segment_length(rbd_dev, img_offset, resid); - object_name = rbd_segment_name(rbd_dev, img_offset); - if (!object_name) - goto out_unwind; - offset = rbd_segment_offset(rbd_dev, img_offset); - length = rbd_segment_length(rbd_dev, img_offset, resid); - obj_request = rbd_obj_request_create(object_name, - offset, length, type); - /* object request has its own copy of the object name */ - rbd_segment_name_free(object_name); + obj_request = rbd_obj_request_create(type); if (!obj_request) goto out_unwind; + obj_request->object_no = object_no; + obj_request->offset = offset; + obj_request->length = length; + /* * set obj_request->img_request before creating the * osd_request so that it gets the right snapc @@ -2771,7 +2662,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) * child image to which the original request was to be sent. */ img_offset = obj_request->img_offset - obj_request->offset; - length = (u64)1 << rbd_dev->header.obj_order; + length = rbd_obj_bytes(&rbd_dev->header); /* * There is no defined parent data beyond the parent @@ -2900,11 +2791,12 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) size_t size; int ret; - stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0, - OBJ_REQUEST_PAGES); + stat_request = rbd_obj_request_create(OBJ_REQUEST_PAGES); if (!stat_request) return -ENOMEM; + stat_request->object_no = obj_request->object_no; + stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, stat_request); if (!stat_request->osd_req) { @@ -3983,17 +3875,17 @@ out: * returned in the outbound buffer, or a negative error code. */ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, - const char *object_name, - const char *class_name, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, const char *method_name, const void *outbound, size_t outbound_size, void *inbound, size_t inbound_size) { - struct rbd_obj_request *obj_request; - struct page **pages; - u32 page_count; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct page *req_page = NULL; + struct page *reply_page; int ret; /* @@ -4003,61 +3895,35 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, * method. Currently if this is present it will be a * snapshot id. */ - page_count = (u32)calc_pages_for(0, inbound_size); - pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - ret = -ENOMEM; - obj_request = rbd_obj_request_create(object_name, 0, inbound_size, - OBJ_REQUEST_PAGES); - if (!obj_request) - goto out; - - obj_request->pages = pages; - obj_request->page_count = page_count; + if (outbound) { + if (outbound_size > PAGE_SIZE) + return -E2BIG; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, - obj_request); - if (!obj_request->osd_req) - goto out; - - osd_req_op_cls_init(obj_request->osd_req, 0, CEPH_OSD_OP_CALL, - class_name, method_name); - if (outbound_size) { - struct ceph_pagelist *pagelist; - - pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS); - if (!pagelist) - goto out; + req_page = alloc_page(GFP_KERNEL); + if (!req_page) + return -ENOMEM; - ceph_pagelist_init(pagelist); - ceph_pagelist_append(pagelist, outbound, outbound_size); - osd_req_op_cls_request_data_pagelist(obj_request->osd_req, 0, - pagelist); + memcpy(page_address(req_page), outbound, outbound_size); } - osd_req_op_cls_response_data_pages(obj_request->osd_req, 0, - obj_request->pages, inbound_size, - 0, false, false); - - rbd_obj_request_submit(obj_request); - ret = rbd_obj_request_wait(obj_request); - if (ret) - goto out; - ret = obj_request->result; - if (ret < 0) - goto out; + reply_page = alloc_page(GFP_KERNEL); + if (!reply_page) { + if (req_page) + __free_page(req_page); + return -ENOMEM; + } - rbd_assert(obj_request->xferred < (u64)INT_MAX); - ret = (int)obj_request->xferred; - ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); -out: - if (obj_request) - rbd_obj_request_put(obj_request); - else - ceph_release_page_vector(pages, page_count); + ret = ceph_osdc_call(osdc, oid, oloc, RBD_DRV_NAME, method_name, + CEPH_OSD_FLAG_READ, req_page, outbound_size, + reply_page, &inbound_size); + if (!ret) { + memcpy(inbound, page_address(reply_page), inbound_size); + ret = inbound_size; + } + if (req_page) + __free_page(req_page); + __free_page(reply_page); return ret; } @@ -4099,19 +3965,21 @@ static void rbd_queue_workfn(struct work_struct *work) bool must_be_locked; int result; - if (rq->cmd_type != REQ_TYPE_FS) { - dout("%s: non-fs request type %d\n", __func__, - (int) rq->cmd_type); - result = -EIO; - goto err; - } - - if (req_op(rq) == REQ_OP_DISCARD) + switch (req_op(rq)) { + case REQ_OP_DISCARD: op_type = OBJ_OP_DISCARD; - else if (req_op(rq) == REQ_OP_WRITE) + break; + case REQ_OP_WRITE: op_type = OBJ_OP_WRITE; - else + break; + case REQ_OP_READ: op_type = OBJ_OP_READ; + break; + default: + dout("%s: non-fs request type %d\n", __func__, req_op(rq)); + result = -EIO; + goto err; + } /* Ignore/skip any zero-length requests */ @@ -4254,63 +4122,46 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) } static int rbd_obj_read_sync(struct rbd_device *rbd_dev, - const char *object_name, - u64 offset, u64 length, void *buf) + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + void *buf, int buf_len) { - struct rbd_obj_request *obj_request; - struct page **pages = NULL; - u32 page_count; - size_t size; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct ceph_osd_request *req; + struct page **pages; + int num_pages = calc_pages_for(0, buf_len); int ret; - page_count = (u32) calc_pages_for(offset, length); - pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - ret = -ENOMEM; - obj_request = rbd_obj_request_create(object_name, offset, length, - OBJ_REQUEST_PAGES); - if (!obj_request) - goto out; - - obj_request->pages = pages; - obj_request->page_count = page_count; - - obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, - obj_request); - if (!obj_request->osd_req) - goto out; + req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL); + if (!req) + return -ENOMEM; - osd_req_op_extent_init(obj_request->osd_req, 0, CEPH_OSD_OP_READ, - offset, length, 0, 0); - osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0, - obj_request->pages, - obj_request->length, - obj_request->offset & ~PAGE_MASK, - false, false); + ceph_oid_copy(&req->r_base_oid, oid); + ceph_oloc_copy(&req->r_base_oloc, oloc); + req->r_flags = CEPH_OSD_FLAG_READ; - rbd_obj_request_submit(obj_request); - ret = rbd_obj_request_wait(obj_request); + ret = ceph_osdc_alloc_messages(req, GFP_KERNEL); if (ret) - goto out; + goto out_req; - ret = obj_request->result; - if (ret < 0) - goto out; + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); + if (IS_ERR(pages)) { + ret = PTR_ERR(pages); + goto out_req; + } - rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); - size = (size_t) obj_request->xferred; - ceph_copy_from_page_vector(pages, buf, 0, size); - rbd_assert(size <= (size_t)INT_MAX); - ret = (int)size; -out: - if (obj_request) - rbd_obj_request_put(obj_request); - else - ceph_release_page_vector(pages, page_count); + osd_req_op_extent_init(req, 0, CEPH_OSD_OP_READ, 0, buf_len, 0, 0); + osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false, + true); + + ceph_osdc_start_request(osdc, req, false); + ret = ceph_osdc_wait_request(osdc, req); + if (ret >= 0) + ceph_copy_from_page_vector(pages, buf, 0, ret); +out_req: + ceph_osdc_put_request(req); return ret; } @@ -4346,8 +4197,8 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) if (!ondisk) return -ENOMEM; - ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_oid.name, - 0, size, ondisk); + ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid, + &rbd_dev->header_oloc, ondisk, size); if (ret < 0) goto out; if ((size_t)ret < size) { @@ -4524,7 +4375,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_zeroes_data = 1; if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) - q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; disk->queue = q; @@ -4779,7 +4630,7 @@ static const struct attribute_group *rbd_attr_groups[] = { static void rbd_dev_release(struct device *dev); -static struct device_type rbd_device_type = { +static const struct device_type rbd_device_type = { .name = "rbd", .groups = rbd_attr_groups, .release = rbd_dev_release, @@ -4874,8 +4725,9 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc, INIT_LIST_HEAD(&rbd_dev->node); init_rwsem(&rbd_dev->header_rwsem); + rbd_dev->header.data_pool_id = CEPH_NOPOOL; ceph_oid_init(&rbd_dev->header_oid); - ceph_oloc_init(&rbd_dev->header_oloc); + rbd_dev->header_oloc.pool = spec->pool_id; mutex_init(&rbd_dev->watch_mutex); rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; @@ -4897,12 +4749,6 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc, rbd_dev->rbd_client = rbdc; rbd_dev->spec = spec; - rbd_dev->layout.stripe_unit = 1 << RBD_MAX_OBJ_ORDER; - rbd_dev->layout.stripe_count = 1; - rbd_dev->layout.object_size = 1 << RBD_MAX_OBJ_ORDER; - rbd_dev->layout.pool_id = spec->pool_id; - RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); - return rbd_dev; } @@ -4968,10 +4814,10 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, __le64 size; } __attribute__ ((packed)) size_buf = { 0 }; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, - "rbd", "get_size", - &snapid, sizeof (snapid), - &size_buf, sizeof (size_buf)); + ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, + &rbd_dev->header_oloc, "get_size", + &snapid, sizeof(snapid), + &size_buf, sizeof(size_buf)); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -5008,9 +4854,9 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) if (!reply_buf) return -ENOMEM; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, - "rbd", "get_object_prefix", NULL, 0, - reply_buf, RBD_OBJ_PREFIX_LEN_MAX); + ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, + &rbd_dev->header_oloc, "get_object_prefix", + NULL, 0, reply_buf, RBD_OBJ_PREFIX_LEN_MAX); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -5043,10 +4889,10 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, u64 unsup; int ret; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, - "rbd", "get_features", - &snapid, sizeof (snapid), - &features_buf, sizeof (features_buf)); + ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, + &rbd_dev->header_oloc, "get_features", + &snapid, sizeof(snapid), + &features_buf, sizeof(features_buf)); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -5105,10 +4951,9 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } snapid = cpu_to_le64(rbd_dev->spec->snap_id); - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, - "rbd", "get_parent", - &snapid, sizeof (snapid), - reply_buf, size); + ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, + &rbd_dev->header_oloc, "get_parent", + &snapid, sizeof(snapid), reply_buf, size); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out_err; @@ -5208,9 +5053,9 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) u64 stripe_count; int ret; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, - "rbd", "get_stripe_unit_count", NULL, 0, - (char *)&striping_info_buf, size); + ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, + &rbd_dev->header_oloc, "get_stripe_unit_count", + NULL, 0, &striping_info_buf, size); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -5224,7 +5069,7 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) * out, and only fail if the image has non-default values. */ ret = -EINVAL; - obj_size = (u64)1 << rbd_dev->header.obj_order; + obj_size = rbd_obj_bytes(&rbd_dev->header); p = &striping_info_buf; stripe_unit = ceph_decode_64(&p); if (stripe_unit != obj_size) { @@ -5245,8 +5090,27 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) return 0; } +static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev) +{ + __le64 data_pool_id; + int ret; + + ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, + &rbd_dev->header_oloc, "get_data_pool", + NULL, 0, &data_pool_id, sizeof(data_pool_id)); + if (ret < 0) + return ret; + if (ret < sizeof(data_pool_id)) + return -EBADMSG; + + rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id); + WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL); + return 0; +} + static char *rbd_dev_image_name(struct rbd_device *rbd_dev) { + CEPH_DEFINE_OID_ONSTACK(oid); size_t image_id_size; char *image_id; void *p; @@ -5274,10 +5138,10 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) if (!reply_buf) goto out; - ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, - "rbd", "dir_get_name", - image_id, image_id_size, - reply_buf, size); + ceph_oid_printf(&oid, "%s", RBD_DIRECTORY); + ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, + "dir_get_name", image_id, image_id_size, + reply_buf, size); if (ret < 0) goto out; p = reply_buf; @@ -5456,9 +5320,9 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) if (!reply_buf) return -ENOMEM; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, - "rbd", "get_snapcontext", NULL, 0, - reply_buf, size); + ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, + &rbd_dev->header_oloc, "get_snapcontext", + NULL, 0, reply_buf, size); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -5521,10 +5385,9 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, return ERR_PTR(-ENOMEM); snapid = cpu_to_le64(snap_id); - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, - "rbd", "get_snapshot_name", - &snapid, sizeof (snapid), - reply_buf, size); + ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, + &rbd_dev->header_oloc, "get_snapshot_name", + &snapid, sizeof(snapid), reply_buf, size); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) { snap_name = ERR_PTR(ret); @@ -5831,7 +5694,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) { int ret; size_t size; - char *object_name; + CEPH_DEFINE_OID_ONSTACK(oid); void *response; char *image_id; @@ -5851,12 +5714,12 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. */ - size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); - object_name = kmalloc(size, GFP_NOIO); - if (!object_name) - return -ENOMEM; - sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name); - dout("rbd id object name is %s\n", object_name); + ret = ceph_oid_aprintf(&oid, GFP_KERNEL, "%s%s", RBD_ID_PREFIX, + rbd_dev->spec->image_name); + if (ret) + return ret; + + dout("rbd id object name is %s\n", oid.name); /* Response will be an encoded string, which includes a length */ @@ -5869,9 +5732,9 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) /* If it doesn't exist we'll assume it's a format 1 image */ - ret = rbd_obj_method_sync(rbd_dev, object_name, - "rbd", "get_id", NULL, 0, - response, RBD_IMAGE_ID_LEN_MAX); + ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, + "get_id", NULL, 0, + response, RBD_IMAGE_ID_LEN_MAX); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret == -ENOENT) { image_id = kstrdup("", GFP_KERNEL); @@ -5894,8 +5757,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) } out: kfree(response); - kfree(object_name); - + ceph_oid_destroy(&oid); return ret; } @@ -5942,14 +5804,20 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) if (ret < 0) goto out_err; } - /* No support for crypto and compression type format 2 images */ + if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) { + ret = rbd_dev_v2_data_pool(rbd_dev); + if (ret) + goto out_err; + } + + rbd_init_layout(rbd_dev); return 0; + out_err: rbd_dev->header.features = 0; kfree(rbd_dev->header.object_prefix); rbd_dev->header.object_prefix = NULL; - return ret; } @@ -6075,8 +5943,6 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) /* Record the header object name for this rbd image. */ rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); - - rbd_dev->header_oloc.pool = rbd_dev->layout.pool_id; if (rbd_dev->image_format == 1) ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", spec->image_name, RBD_SUFFIX); @@ -6469,27 +6335,16 @@ static int rbd_slab_init(void) if (!rbd_obj_request_cache) goto out_err; - rbd_assert(!rbd_segment_name_cache); - rbd_segment_name_cache = kmem_cache_create("rbd_segment_name", - CEPH_MAX_OID_NAME_LEN + 1, 1, 0, NULL); - if (rbd_segment_name_cache) - return 0; -out_err: - kmem_cache_destroy(rbd_obj_request_cache); - rbd_obj_request_cache = NULL; + return 0; +out_err: kmem_cache_destroy(rbd_img_request_cache); rbd_img_request_cache = NULL; - return -ENOMEM; } static void rbd_slab_exit(void) { - rbd_assert(rbd_segment_name_cache); - kmem_cache_destroy(rbd_segment_name_cache); - rbd_segment_name_cache = NULL; - rbd_assert(rbd_obj_request_cache); kmem_cache_destroy(rbd_obj_request_cache); rbd_obj_request_cache = NULL; diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h index 94f367db27b0..62ff50d3e7a6 100644 --- a/drivers/block/rbd_types.h +++ b/drivers/block/rbd_types.h @@ -25,8 +25,8 @@ */ #define RBD_HEADER_PREFIX "rbd_header." -#define RBD_DATA_PREFIX "rbd_data." #define RBD_ID_PREFIX "rbd_id." +#define RBD_V2_DATA_FORMAT "%s.%016llx" #define RBD_LOCK_NAME "rbd_lock" #define RBD_LOCK_TAG "internal" @@ -42,13 +42,14 @@ enum rbd_notify_op { /* * For format version 1, rbd image 'foo' consists of objects * foo.rbd - image metadata - * rb.<idhi>.<idlo>.00000000 - * rb.<idhi>.<idlo>.00000001 + * rb.<idhi>.<idlo>.<extra>.000000000000 + * rb.<idhi>.<idlo>.<extra>.000000000001 * ... - data * There is no notion of a persistent image id in rbd format 1. */ #define RBD_SUFFIX ".rbd" +#define RBD_V1_DATA_FORMAT "%s.%012llx" #define RBD_DIRECTORY "rbd_directory" #define RBD_INFO "rbd_info" @@ -57,9 +58,6 @@ enum rbd_notify_op { #define RBD_MIN_OBJ_ORDER 16 #define RBD_MAX_OBJ_ORDER 30 -#define RBD_COMP_NONE 0 -#define RBD_CRYPT_NONE 0 - #define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n" #define RBD_HEADER_SIGNATURE "RBD" #define RBD_HEADER_VERSION "001.005" diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index abf805e332e2..27833e4dae2a 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -1204,10 +1204,11 @@ static void skd_complete_special(struct skd_device *skdev, static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode, uint cmd_in, ulong arg) { - int rc = 0; + static const int sg_version_num = 30527; + int rc = 0, timeout; struct gendisk *disk = bdev->bd_disk; struct skd_device *skdev = disk->private_data; - void __user *p = (void *)arg; + int __user *p = (int __user *)arg; pr_debug("%s:%s:%d %s: CMD[%s] ioctl mode 0x%x, cmd 0x%x arg %0lx\n", skdev->name, __func__, __LINE__, @@ -1218,12 +1219,18 @@ static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd_in) { case SG_SET_TIMEOUT: + rc = get_user(timeout, p); + if (!rc) + disk->queue->sg_timeout = clock_t_to_jiffies(timeout); + break; case SG_GET_TIMEOUT: + rc = jiffies_to_clock_t(disk->queue->sg_timeout); + break; case SG_GET_VERSION_NUM: - rc = scsi_cmd_ioctl(disk->queue, disk, mode, cmd_in, p); + rc = put_user(sg_version_num, p); break; case SG_IO: - rc = skd_ioctl_sg_io(skdev, mode, p); + rc = skd_ioctl_sg_io(skdev, mode, (void __user *)arg); break; default: diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index cab157331c4e..3f3a3ab3d50a 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -34,6 +34,7 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); #define VDC_TX_RING_SIZE 512 +#define VDC_DEFAULT_BLK_SIZE 512 #define WAITING_FOR_LINK_UP 0x01 #define WAITING_FOR_TX_SPACE 0x02 @@ -73,6 +74,7 @@ struct vdc_port { u32 vdisk_size; u8 vdisk_type; u8 vdisk_mtype; + u32 vdisk_phys_blksz; char disk_name[32]; }; @@ -88,6 +90,7 @@ static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio) /* Ordered from largest major to lowest */ static struct vio_version vdc_versions[] = { + { .major = 1, .minor = 2 }, { .major = 1, .minor = 1 }, { .major = 1, .minor = 0 }, }; @@ -271,6 +274,11 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg) if (pkt->max_xfer_size < port->max_xfer_size) port->max_xfer_size = pkt->max_xfer_size; port->vdisk_block_size = pkt->vdisk_block_size; + + port->vdisk_phys_blksz = VDC_DEFAULT_BLK_SIZE; + if (vdc_version_supported(port, 1, 2)) + port->vdisk_phys_blksz = pkt->phys_block_size; + return 0; } else { printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name); @@ -754,6 +762,12 @@ static int probe_disk(struct vdc_port *port) if (err) return err; + /* Using version 1.2 means vdisk_phys_blksz should be set unless the + * disk is reserved by another system. + */ + if (vdc_version_supported(port, 1, 2) && !port->vdisk_phys_blksz) + return -ENODEV; + if (vdc_version_supported(port, 1, 1)) { /* vdisk_size should be set during the handshake, if it wasn't * then the underlying disk is reserved by another system @@ -829,6 +843,8 @@ static int probe_disk(struct vdc_port *port) } } + blk_queue_physical_block_size(q, port->vdisk_phys_blksz); + pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n", g->disk_name, port->vdisk_size, (port->vdisk_size >> (20 - 9)), @@ -910,7 +926,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) if (err) goto err_out_free_port; - port->vdisk_block_size = 512; + port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE; port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size); port->ring_cookies = ((port->max_xfer_size * port->vdisk_block_size) / PAGE_SIZE) + 2; diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index aabd8e9d3035..61b3ffa4f458 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -20,7 +20,7 @@ #include <linux/stddef.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/timer.h> #include <linux/delay.h> #include <linux/fd.h> diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 0e93ad7b8511..c8e072caf56f 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -567,7 +567,7 @@ static struct carm_request *carm_get_special(struct carm_host *host) if (!crq) return NULL; - rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL); + rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, GFP_KERNEL); if (IS_ERR(rq)) { spin_lock_irqsave(&host->lock, flags); carm_put_request(host, crq); @@ -620,7 +620,6 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx) spin_unlock_irq(&host->lock); DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->cmd_type = REQ_TYPE_DRV_PRIV; crq->rq->special = crq; blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); @@ -661,7 +660,6 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func) crq->msg_bucket = (u32) rc; DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->cmd_type = REQ_TYPE_DRV_PRIV; crq->rq->special = crq; blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 10332c24f961..1d4c9f8bc1e1 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -5,6 +5,7 @@ #include <linux/hdreg.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/interrupt.h> #include <linux/virtio.h> #include <linux/virtio_blk.h> #include <linux/scatterlist.h> @@ -12,6 +13,7 @@ #include <scsi/scsi_cmnd.h> #include <linux/idr.h> #include <linux/blk-mq.h> +#include <linux/blk-mq-virtio.h> #include <linux/numa.h> #define PART_BITS 4 @@ -52,11 +54,13 @@ struct virtio_blk { }; struct virtblk_req { - struct request *req; - struct virtio_blk_outhdr out_hdr; +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; u8 status; - u8 sense[SCSI_SENSE_BUFFERSIZE]; struct scatterlist sg[]; }; @@ -72,28 +76,88 @@ static inline int virtblk_result(struct virtblk_req *vbr) } } -static int __virtblk_add_req(struct virtqueue *vq, - struct virtblk_req *vbr, - struct scatterlist *data_sg, - bool have_data) +/* + * If this is a packet command we need a couple of additional headers. Behind + * the normal outhdr we put a segment with the scsi command block, and before + * the normal inhdr we put the sense data and the inhdr with additional status + * information. + */ +#ifdef CONFIG_VIRTIO_BLK_SCSI +static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr, + struct scatterlist *data_sg, bool have_data) { struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6]; unsigned int num_out = 0, num_in = 0; - __virtio32 type = vbr->out_hdr.type & ~cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT); sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); sgs[num_out++] = &hdr; + sg_init_one(&cmd, vbr->sreq.cmd, vbr->sreq.cmd_len); + sgs[num_out++] = &cmd; + + if (have_data) { + if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) + sgs[num_out++] = data_sg; + else + sgs[num_out + num_in++] = data_sg; + } + + sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE); + sgs[num_out + num_in++] = &sense; + sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); + sgs[num_out + num_in++] = &inhdr; + sg_init_one(&status, &vbr->status, sizeof(vbr->status)); + sgs[num_out + num_in++] = &status; + + return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); +} + +static inline void virtblk_scsi_reques_done(struct request *req) +{ + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + struct virtio_blk *vblk = req->q->queuedata; + struct scsi_request *sreq = &vbr->sreq; + + sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); + sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); + req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); +} + +static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long data) +{ + struct gendisk *disk = bdev->bd_disk; + struct virtio_blk *vblk = disk->private_data; /* - * If this is a packet command we need a couple of additional headers. - * Behind the normal outhdr we put a segment with the scsi command - * block, and before the normal inhdr we put the sense data and the - * inhdr with additional status information. + * Only allow the generic SCSI ioctls if the host can support it. */ - if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { - sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len); - sgs[num_out++] = &cmd; - } + if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) + return -ENOTTY; + + return scsi_cmd_blk_ioctl(bdev, mode, cmd, + (void __user *)data); +} +#else +static inline int virtblk_add_req_scsi(struct virtqueue *vq, + struct virtblk_req *vbr, struct scatterlist *data_sg, + bool have_data) +{ + return -EIO; +} +static inline void virtblk_scsi_reques_done(struct request *req) +{ +} +#define virtblk_ioctl NULL +#endif /* CONFIG_VIRTIO_BLK_SCSI */ + +static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, + struct scatterlist *data_sg, bool have_data) +{ + struct scatterlist hdr, status, *sgs[3]; + unsigned int num_out = 0, num_in = 0; + + sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); + sgs[num_out++] = &hdr; if (have_data) { if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) @@ -102,14 +166,6 @@ static int __virtblk_add_req(struct virtqueue *vq, sgs[num_out + num_in++] = data_sg; } - if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { - memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); - sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE); - sgs[num_out + num_in++] = &sense; - sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); - sgs[num_out + num_in++] = &inhdr; - } - sg_init_one(&status, &vbr->status, sizeof(vbr->status)); sgs[num_out + num_in++] = &status; @@ -119,15 +175,16 @@ static int __virtblk_add_req(struct virtqueue *vq, static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - struct virtio_blk *vblk = req->q->queuedata; int error = virtblk_result(vbr); - if (req->cmd_type == REQ_TYPE_BLOCK_PC) { - req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); - req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); - req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); - } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) { + switch (req_op(req)) { + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: + virtblk_scsi_reques_done(req); + break; + case REQ_OP_DRV_IN: req->errors = (error != 0); + break; } blk_mq_end_request(req, error); @@ -146,7 +203,9 @@ static void virtblk_done(struct virtqueue *vq) do { virtqueue_disable_cb(vq); while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { - blk_mq_complete_request(vbr->req, vbr->req->errors); + struct request *req = blk_mq_rq_from_pdu(vbr); + + blk_mq_complete_request(req, req->errors); req_done = true; } if (unlikely(virtqueue_is_broken(vq))) @@ -170,49 +229,50 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, int qid = hctx->queue_num; int err; bool notify = false; + u32 type; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); - vbr->req = req; - if (req_op(req) == REQ_OP_FLUSH) { - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH); - vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); - } else { - switch (req->cmd_type) { - case REQ_TYPE_FS: - vbr->out_hdr.type = 0; - vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, blk_rq_pos(vbr->req)); - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); - break; - case REQ_TYPE_BLOCK_PC: - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_SCSI_CMD); - vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); - break; - case REQ_TYPE_DRV_PRIV: - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); - vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); - break; - default: - /* We don't put anything else in the queue. */ - BUG(); - } + switch (req_op(req)) { + case REQ_OP_READ: + case REQ_OP_WRITE: + type = 0; + break; + case REQ_OP_FLUSH: + type = VIRTIO_BLK_T_FLUSH; + break; + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: + type = VIRTIO_BLK_T_SCSI_CMD; + break; + case REQ_OP_DRV_IN: + type = VIRTIO_BLK_T_GET_ID; + break; + default: + WARN_ON_ONCE(1); + return BLK_MQ_RQ_QUEUE_ERROR; } + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); + vbr->out_hdr.sector = type ? + 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); + vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); + blk_mq_start_request(req); - num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg); + num = blk_rq_map_sg(hctx->queue, req, vbr->sg); if (num) { - if (rq_data_dir(vbr->req) == WRITE) + if (rq_data_dir(req) == WRITE) vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); else vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); } spin_lock_irqsave(&vblk->vqs[qid].lock, flags); - err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); + if (req_op(req) == REQ_OP_SCSI_IN || req_op(req) == REQ_OP_SCSI_OUT) + err = virtblk_add_req_scsi(vblk->vqs[qid].vq, vbr, vbr->sg, num); + else + err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); blk_mq_stop_hw_queue(hctx); @@ -242,10 +302,9 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) struct request *req; int err; - req = blk_get_request(q, READ, GFP_KERNEL); + req = blk_get_request(q, REQ_OP_DRV_IN, GFP_KERNEL); if (IS_ERR(req)) return PTR_ERR(req); - req->cmd_type = REQ_TYPE_DRV_PRIV; err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); if (err) @@ -257,22 +316,6 @@ out: return err; } -static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long data) -{ - struct gendisk *disk = bdev->bd_disk; - struct virtio_blk *vblk = disk->private_data; - - /* - * Only allow the generic SCSI ioctls if the host can support it. - */ - if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) - return -ENOTTY; - - return scsi_cmd_blk_ioctl(bdev, mode, cmd, - (void __user *)data); -} - /* We provide getgeo only to please some old bootloader/partitioning tools */ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) { @@ -385,6 +428,7 @@ static int init_vq(struct virtio_blk *vblk) struct virtqueue **vqs; unsigned short num_vqs; struct virtio_device *vdev = vblk->vdev; + struct irq_affinity desc = { 0, }; err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, struct virtio_blk_config, num_queues, @@ -411,7 +455,8 @@ static int init_vq(struct virtio_blk *vblk) } /* Discover virtqueues and write information to configuration. */ - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); + err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, + &desc); if (err) goto out; @@ -538,14 +583,25 @@ static int virtblk_init_request(void *data, struct request *rq, struct virtio_blk *vblk = data; struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); +#ifdef CONFIG_VIRTIO_BLK_SCSI + vbr->sreq.sense = vbr->sense; +#endif sg_init_table(vbr->sg, vblk->sg_elems); return 0; } +static int virtblk_map_queues(struct blk_mq_tag_set *set) +{ + struct virtio_blk *vblk = set->driver_data; + + return blk_mq_virtio_map_queues(set, vblk->vdev, 0); +} + static struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .complete = virtblk_request_done, .init_request = virtblk_init_request, + .map_queues = virtblk_map_queues, }; static unsigned int virtblk_queue_depth; @@ -770,7 +826,7 @@ static void virtblk_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); - refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); + refc = kref_read(&disk_to_dev(vblk->disk)->kobj.kref); put_disk(vblk->disk); vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -821,7 +877,10 @@ static const struct virtio_device_id id_table[] = { static unsigned int features_legacy[] = { VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, - VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, + VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, +#ifdef CONFIG_VIRTIO_BLK_SCSI + VIRTIO_BLK_F_SCSI, +#endif VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, } diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 415e79b69d34..8fe61b5dc5a6 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -38,8 +38,8 @@ struct backend_info { static struct kmem_cache *xen_blkif_cachep; static void connect(struct backend_info *); static int connect_ring(struct backend_info *); -static void backend_changed(struct xenbus_watch *, const char **, - unsigned int); +static void backend_changed(struct xenbus_watch *, const char *, + const char *); static void xen_blkif_free(struct xen_blkif *blkif); static void xen_vbd_free(struct xen_vbd *vbd); @@ -661,7 +661,7 @@ fail: * ready, connect. */ static void backend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { int err; unsigned major; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 265f1a7072e9..5067a0a952cb 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -865,7 +865,7 @@ static inline void flush_requests(struct blkfront_ring_info *rinfo) static inline bool blkif_request_flush_invalid(struct request *req, struct blkfront_info *info) { - return ((req->cmd_type != REQ_TYPE_FS) || + return (blk_rq_is_passthrough(req) || ((req_op(req) == REQ_OP_FLUSH) && !info->feature_flush) || ((req->cmd_flags & REQ_FUA) && diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index c4328d9d9981..757dce2147e0 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -468,7 +468,7 @@ static struct request *ace_get_next_request(struct request_queue *q) struct request *req; while ((req = blk_peek_request(q)) != NULL) { - if (req->cmd_type == REQ_TYPE_FS) + if (!blk_rq_is_passthrough(req)) break; blk_start_request(req); __blk_end_request_all(req, -EIO); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e5ab7d9e8c45..e27d89a36c34 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -45,27 +45,6 @@ static const char *default_compressor = "lzo"; /* Module params (documentation at end) */ static unsigned int num_devices = 1; -static inline void deprecated_attr_warn(const char *name) -{ - pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n", - task_pid_nr(current), - current->comm, - name, - "See zram documentation."); -} - -#define ZRAM_ATTR_RO(name) \ -static ssize_t name##_show(struct device *d, \ - struct device_attribute *attr, char *b) \ -{ \ - struct zram *zram = dev_to_zram(d); \ - \ - deprecated_attr_warn(__stringify(name)); \ - return scnprintf(b, PAGE_SIZE, "%llu\n", \ - (u64)atomic64_read(&zram->stats.name)); \ -} \ -static DEVICE_ATTR_RO(name); - static inline bool init_done(struct zram *zram) { return zram->disksize; @@ -95,6 +74,17 @@ static void zram_clear_flag(struct zram_meta *meta, u32 index, meta->table[index].value &= ~BIT(flag); } +static inline void zram_set_element(struct zram_meta *meta, u32 index, + unsigned long element) +{ + meta->table[index].element = element; +} + +static inline void zram_clear_element(struct zram_meta *meta, u32 index) +{ + meta->table[index].element = 0; +} + static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) { return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); @@ -117,7 +107,7 @@ static void zram_revalidate_disk(struct zram *zram) { revalidate_disk(zram->disk); /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ - zram->disk->queue->backing_dev_info.capabilities |= + zram->disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; } @@ -167,31 +157,46 @@ static inline void update_used_max(struct zram *zram, } while (old_max != cur_max); } -static bool page_zero_filled(void *ptr) +static inline void zram_fill_page(char *ptr, unsigned long len, + unsigned long value) +{ + int i; + unsigned long *page = (unsigned long *)ptr; + + WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); + + if (likely(value == 0)) { + memset(ptr, 0, len); + } else { + for (i = 0; i < len / sizeof(*page); i++) + page[i] = value; + } +} + +static bool page_same_filled(void *ptr, unsigned long *element) { unsigned int pos; unsigned long *page; page = (unsigned long *)ptr; - for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { - if (page[pos]) + for (pos = 0; pos < PAGE_SIZE / sizeof(*page) - 1; pos++) { + if (page[pos] != page[pos + 1]) return false; } + *element = page[pos]; + return true; } -static void handle_zero_page(struct bio_vec *bvec) +static void handle_same_page(struct bio_vec *bvec, unsigned long element) { struct page *page = bvec->bv_page; void *user_mem; user_mem = kmap_atomic(page); - if (is_partial_io(bvec)) - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); - else - clear_page(user_mem); + zram_fill_page(user_mem + bvec->bv_offset, bvec->bv_len, element); kunmap_atomic(user_mem); flush_dcache_page(page); @@ -218,47 +223,6 @@ static ssize_t disksize_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); } -static ssize_t orig_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - deprecated_attr_warn("orig_data_size"); - return scnprintf(buf, PAGE_SIZE, "%llu\n", - (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); -} - -static ssize_t mem_used_total_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val = 0; - struct zram *zram = dev_to_zram(dev); - - deprecated_attr_warn("mem_used_total"); - down_read(&zram->init_lock); - if (init_done(zram)) { - struct zram_meta *meta = zram->meta; - val = zs_get_total_pages(meta->mem_pool); - } - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); -} - -static ssize_t mem_limit_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val; - struct zram *zram = dev_to_zram(dev); - - deprecated_attr_warn("mem_limit"); - down_read(&zram->init_lock); - val = zram->limit_pages; - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); -} - static ssize_t mem_limit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -277,21 +241,6 @@ static ssize_t mem_limit_store(struct device *dev, return len; } -static ssize_t mem_used_max_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val = 0; - struct zram *zram = dev_to_zram(dev); - - deprecated_attr_warn("mem_used_max"); - down_read(&zram->init_lock); - if (init_done(zram)) - val = atomic_long_read(&zram->stats.max_used_pages); - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); -} - static ssize_t mem_used_max_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -440,7 +389,7 @@ static ssize_t mm_stat_show(struct device *dev, mem_used << PAGE_SHIFT, zram->limit_pages << PAGE_SHIFT, max_used << PAGE_SHIFT, - (u64)atomic64_read(&zram->stats.zero_pages), + (u64)atomic64_read(&zram->stats.same_pages), pool_stats.pages_compacted); up_read(&zram->init_lock); @@ -467,26 +416,6 @@ static ssize_t debug_stat_show(struct device *dev, static DEVICE_ATTR_RO(io_stat); static DEVICE_ATTR_RO(mm_stat); static DEVICE_ATTR_RO(debug_stat); -ZRAM_ATTR_RO(num_reads); -ZRAM_ATTR_RO(num_writes); -ZRAM_ATTR_RO(failed_reads); -ZRAM_ATTR_RO(failed_writes); -ZRAM_ATTR_RO(invalid_io); -ZRAM_ATTR_RO(notify_free); -ZRAM_ATTR_RO(zero_pages); -ZRAM_ATTR_RO(compr_data_size); - -static inline bool zram_meta_get(struct zram *zram) -{ - if (atomic_inc_not_zero(&zram->refcount)) - return true; - return false; -} - -static inline void zram_meta_put(struct zram *zram) -{ - atomic_dec(&zram->refcount); -} static void zram_meta_free(struct zram_meta *meta, u64 disksize) { @@ -496,8 +425,11 @@ static void zram_meta_free(struct zram_meta *meta, u64 disksize) /* Free all pages that are still in this zram device */ for (index = 0; index < num_pages; index++) { unsigned long handle = meta->table[index].handle; - - if (!handle) + /* + * No memory is allocated for same element filled pages. + * Simply clear same page flag. + */ + if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) continue; zs_free(meta->mem_pool, handle); @@ -547,18 +479,20 @@ static void zram_free_page(struct zram *zram, size_t index) struct zram_meta *meta = zram->meta; unsigned long handle = meta->table[index].handle; - if (unlikely(!handle)) { - /* - * No memory is allocated for zero filled pages. - * Simply clear zero page flag. - */ - if (zram_test_flag(meta, index, ZRAM_ZERO)) { - zram_clear_flag(meta, index, ZRAM_ZERO); - atomic64_dec(&zram->stats.zero_pages); - } + /* + * No memory is allocated for same element filled pages. + * Simply clear same page flag. + */ + if (zram_test_flag(meta, index, ZRAM_SAME)) { + zram_clear_flag(meta, index, ZRAM_SAME); + zram_clear_element(meta, index); + atomic64_dec(&zram->stats.same_pages); return; } + if (!handle) + return; + zs_free(meta->mem_pool, handle); atomic64_sub(zram_get_obj_size(meta, index), @@ -581,9 +515,9 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) handle = meta->table[index].handle; size = zram_get_obj_size(meta, index); - if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { + if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) { bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); - clear_page(mem); + zram_fill_page(mem, PAGE_SIZE, meta->table[index].element); return 0; } @@ -619,9 +553,9 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); if (unlikely(!meta->table[index].handle) || - zram_test_flag(meta, index, ZRAM_ZERO)) { + zram_test_flag(meta, index, ZRAM_SAME)) { bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); - handle_zero_page(bvec); + handle_same_page(bvec, meta->table[index].element); return 0; } bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); @@ -669,6 +603,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct zram_meta *meta = zram->meta; struct zcomp_strm *zstrm = NULL; unsigned long alloced_pages; + unsigned long element; page = bvec->bv_page; if (is_partial_io(bvec)) { @@ -697,16 +632,17 @@ compress_again: uncmem = user_mem; } - if (page_zero_filled(uncmem)) { + if (page_same_filled(uncmem, &element)) { if (user_mem) kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); - zram_set_flag(meta, index, ZRAM_ZERO); + zram_set_flag(meta, index, ZRAM_SAME); + zram_set_element(meta, index, element); bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); - atomic64_inc(&zram->stats.zero_pages); + atomic64_inc(&zram->stats.same_pages); ret = 0; goto out; } @@ -944,22 +880,17 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) { struct zram *zram = queue->queuedata; - if (unlikely(!zram_meta_get(zram))) - goto error; - blk_queue_split(queue, &bio, queue->bio_split); if (!valid_io_request(zram, bio->bi_iter.bi_sector, bio->bi_iter.bi_size)) { atomic64_inc(&zram->stats.invalid_io); - goto put_zram; + goto error; } __zram_make_request(zram, bio); - zram_meta_put(zram); return BLK_QC_T_NONE; -put_zram: - zram_meta_put(zram); + error: bio_io_error(bio); return BLK_QC_T_NONE; @@ -989,13 +920,11 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, struct bio_vec bv; zram = bdev->bd_disk->private_data; - if (unlikely(!zram_meta_get(zram))) - goto out; if (!valid_io_request(zram, sector, PAGE_SIZE)) { atomic64_inc(&zram->stats.invalid_io); err = -EINVAL; - goto put_zram; + goto out; } index = sector >> SECTORS_PER_PAGE_SHIFT; @@ -1006,8 +935,6 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_offset = 0; err = zram_bvec_rw(zram, &bv, index, offset, is_write); -put_zram: - zram_meta_put(zram); out: /* * If I/O fails, just return error(ie, non-zero) without @@ -1040,17 +967,6 @@ static void zram_reset_device(struct zram *zram) meta = zram->meta; comp = zram->comp; disksize = zram->disksize; - /* - * Refcount will go down to 0 eventually and r/w handler - * cannot handle further I/O so it will bail out by - * check zram_meta_get. - */ - zram_meta_put(zram); - /* - * We want to free zram_meta in process context to avoid - * deadlock between reclaim path and any other locks. - */ - wait_event(zram->io_done, atomic_read(&zram->refcount) == 0); /* Reset stats */ memset(&zram->stats, 0, sizeof(zram->stats)); @@ -1098,8 +1014,6 @@ static ssize_t disksize_store(struct device *dev, goto out_destroy_comp; } - init_waitqueue_head(&zram->io_done); - atomic_set(&zram->refcount, 1); zram->meta = meta; zram->comp = comp; zram->disksize = disksize; @@ -1188,10 +1102,8 @@ static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_RW(disksize); static DEVICE_ATTR_RO(initstate); static DEVICE_ATTR_WO(reset); -static DEVICE_ATTR_RO(orig_data_size); -static DEVICE_ATTR_RO(mem_used_total); -static DEVICE_ATTR_RW(mem_limit); -static DEVICE_ATTR_RW(mem_used_max); +static DEVICE_ATTR_WO(mem_limit); +static DEVICE_ATTR_WO(mem_used_max); static DEVICE_ATTR_RW(max_comp_streams); static DEVICE_ATTR_RW(comp_algorithm); @@ -1199,17 +1111,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, &dev_attr_initstate.attr, &dev_attr_reset.attr, - &dev_attr_num_reads.attr, - &dev_attr_num_writes.attr, - &dev_attr_failed_reads.attr, - &dev_attr_failed_writes.attr, &dev_attr_compact.attr, - &dev_attr_invalid_io.attr, - &dev_attr_notify_free.attr, - &dev_attr_zero_pages.attr, - &dev_attr_orig_data_size.attr, - &dev_attr_compr_data_size.attr, - &dev_attr_mem_used_total.attr, &dev_attr_mem_limit.attr, &dev_attr_mem_used_max.attr, &dev_attr_max_comp_streams.attr, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 74fcf10da374..caeff51f1571 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -61,7 +61,7 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /* Flags for zram pages (table[page_no].value) */ enum zram_pageflags { /* Page consists entirely of zeros */ - ZRAM_ZERO = ZRAM_FLAG_SHIFT, + ZRAM_SAME = ZRAM_FLAG_SHIFT, ZRAM_ACCESS, /* page is now accessed */ __NR_ZRAM_PAGEFLAGS, @@ -71,7 +71,10 @@ enum zram_pageflags { /* Allocated for each disk page */ struct zram_table_entry { - unsigned long handle; + union { + unsigned long handle; + unsigned long element; + }; unsigned long value; }; @@ -83,7 +86,7 @@ struct zram_stats { atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ - atomic64_t zero_pages; /* no. of zero filled pages */ + atomic64_t same_pages; /* no. of same element filled pages */ atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ atomic64_t writestall; /* no. of write slow paths */ @@ -106,9 +109,6 @@ struct zram { unsigned long limit_pages; struct zram_stats stats; - atomic_t refcount; /* refcount for zram_meta */ - /* wait all IO under all of cpu are done */ - wait_queue_head_t io_done; /* * This is the limit on amount of *uncompressed* worth of data * we can store in a disk. |