diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/DAC960.c | 18 | ||||
-rw-r--r-- | drivers/block/Kconfig | 2 | ||||
-rw-r--r-- | drivers/block/brd.c | 20 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 50 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 6 | ||||
-rw-r--r-- | drivers/block/floppy.c | 2 | ||||
-rw-r--r-- | drivers/block/loop.c | 16 | ||||
-rw-r--r-- | drivers/block/nvme.c | 3 | ||||
-rw-r--r-- | drivers/block/pktcdvd.c | 8 | ||||
-rw-r--r-- | drivers/block/rbd.c | 730 | ||||
-rw-r--r-- | drivers/block/rbd_types.h | 4 | ||||
-rw-r--r-- | drivers/block/sx8.c | 2 | ||||
-rw-r--r-- | drivers/block/ub.c | 39 | ||||
-rw-r--r-- | drivers/block/viodasd.c | 809 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 3 |
15 files changed, 518 insertions, 1194 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index e086fbbbe853..8db9089127c5 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -1177,7 +1177,8 @@ static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T int TimeoutCounter; int i; - + memset(&CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T)); + if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) return DAC960_Failure(Controller, "DMA mask out of range"); Controller->BounceBufferLimit = DMA_BIT_MASK(32); @@ -4627,7 +4628,8 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) DAC960_Controller_T *Controller = Command->Controller; DAC960_CommandType_T CommandType = Command->CommandType; DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_IOCTL_Opcode_T CommandOpcode = CommandMailbox->Common.IOCTL_Opcode; + DAC960_V2_IOCTL_Opcode_T IOCTLOpcode = CommandMailbox->Common.IOCTL_Opcode; + DAC960_V2_CommandOpcode_T CommandOpcode = CommandMailbox->SCSI_10.CommandOpcode; DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus; if (CommandType == DAC960_ReadCommand || @@ -4699,7 +4701,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) { if (Controller->ShutdownMonitoringTimer) return; - if (CommandOpcode == DAC960_V2_GetControllerInfo) + if (IOCTLOpcode == DAC960_V2_GetControllerInfo) { DAC960_V2_ControllerInfo_T *NewControllerInfo = Controller->V2.NewControllerInformation; @@ -4719,14 +4721,14 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) memcpy(ControllerInfo, NewControllerInfo, sizeof(DAC960_V2_ControllerInfo_T)); } - else if (CommandOpcode == DAC960_V2_GetEvent) + else if (IOCTLOpcode == DAC960_V2_GetEvent) { if (CommandStatus == DAC960_V2_NormalCompletion) { DAC960_V2_ReportEvent(Controller, Controller->V2.Event); } Controller->V2.NextEventSequenceNumber++; } - else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid && + else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid && CommandStatus == DAC960_V2_NormalCompletion) { DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = @@ -4915,7 +4917,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) NewPhysicalDeviceInfo->LogicalUnit++; Controller->V2.PhysicalDeviceIndex++; } - else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid) + else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid) { unsigned int DeviceIndex; for (DeviceIndex = Controller->V2.PhysicalDeviceIndex; @@ -4938,7 +4940,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) } Controller->V2.NeedPhysicalDeviceInformation = false; } - else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid && + else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid && CommandStatus == DAC960_V2_NormalCompletion) { DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = @@ -5065,7 +5067,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) [LogicalDeviceNumber] = true; NewLogicalDeviceInfo->LogicalDeviceNumber++; } - else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid) + else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid) { int LogicalDriveNumber; for (LogicalDriveNumber = 0; diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 4e4c8a4a5fd3..a796407123c7 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -354,7 +354,7 @@ config BLK_DEV_SX8 Use devices /dev/sx8/$N and /dev/sx8/$Np$M. config BLK_DEV_UB - tristate "Low Performance USB Block driver" + tristate "Low Performance USB Block driver (deprecated)" depends on USB help This driver supports certain USB attached storage devices diff --git a/drivers/block/brd.c b/drivers/block/brd.c index ec246437f5a4..531ceb31d0ff 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -242,9 +242,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src, page = brd_lookup_page(brd, sector); BUG_ON(!page); - dst = kmap_atomic(page, KM_USER1); + dst = kmap_atomic(page); memcpy(dst + offset, src, copy); - kunmap_atomic(dst, KM_USER1); + kunmap_atomic(dst); if (copy < n) { src += copy; @@ -253,9 +253,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src, page = brd_lookup_page(brd, sector); BUG_ON(!page); - dst = kmap_atomic(page, KM_USER1); + dst = kmap_atomic(page); memcpy(dst, src, copy); - kunmap_atomic(dst, KM_USER1); + kunmap_atomic(dst); } } @@ -273,9 +273,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd, copy = min_t(size_t, n, PAGE_SIZE - offset); page = brd_lookup_page(brd, sector); if (page) { - src = kmap_atomic(page, KM_USER1); + src = kmap_atomic(page); memcpy(dst, src + offset, copy); - kunmap_atomic(src, KM_USER1); + kunmap_atomic(src); } else memset(dst, 0, copy); @@ -285,9 +285,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd, copy = n - copy; page = brd_lookup_page(brd, sector); if (page) { - src = kmap_atomic(page, KM_USER1); + src = kmap_atomic(page); memcpy(dst, src, copy); - kunmap_atomic(src, KM_USER1); + kunmap_atomic(src); } else memset(dst, 0, copy); } @@ -309,7 +309,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, goto out; } - mem = kmap_atomic(page, KM_USER0); + mem = kmap_atomic(page); if (rw == READ) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); @@ -317,7 +317,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, flush_dcache_page(page); copy_to_brd(brd, mem + off, sector, len); } - kunmap_atomic(mem, KM_USER0); + kunmap_atomic(mem); out: return err; diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 912f585a760f..3030201c69d8 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -289,25 +289,25 @@ static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) return page_nr; } -static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) +static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) { struct page *page = b->bm_pages[idx]; - return (unsigned long *) kmap_atomic(page, km); + return (unsigned long *) kmap_atomic(page); } static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) { - return __bm_map_pidx(b, idx, KM_IRQ1); + return __bm_map_pidx(b, idx); } -static void __bm_unmap(unsigned long *p_addr, const enum km_type km) +static void __bm_unmap(unsigned long *p_addr) { - kunmap_atomic(p_addr, km); + kunmap_atomic(p_addr); }; static void bm_unmap(unsigned long *p_addr) { - return __bm_unmap(p_addr, KM_IRQ1); + return __bm_unmap(p_addr); } /* long word offset of _bitmap_ sector */ @@ -543,15 +543,15 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) /* all but last page */ for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { - p_addr = __bm_map_pidx(b, idx, KM_USER0); + p_addr = __bm_map_pidx(b, idx); for (i = 0; i < LWPP; i++) bits += hweight_long(p_addr[i]); - __bm_unmap(p_addr, KM_USER0); + __bm_unmap(p_addr); cond_resched(); } /* last (or only) page */ last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; - p_addr = __bm_map_pidx(b, idx, KM_USER0); + p_addr = __bm_map_pidx(b, idx); for (i = 0; i < last_word; i++) bits += hweight_long(p_addr[i]); p_addr[last_word] &= cpu_to_lel(mask); @@ -559,7 +559,7 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) /* 32bit arch, may have an unused padding long */ if (BITS_PER_LONG == 32 && (last_word & 1) == 0) p_addr[last_word+1] = 0; - __bm_unmap(p_addr, KM_USER0); + __bm_unmap(p_addr); return bits; } @@ -970,11 +970,11 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must * to use pre-allocated page pool */ void *src, *dest; page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); - dest = kmap_atomic(page, KM_USER0); - src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); + dest = kmap_atomic(page); + src = kmap_atomic(b->bm_pages[page_nr]); memcpy(dest, src, PAGE_SIZE); - kunmap_atomic(src, KM_USER1); - kunmap_atomic(dest, KM_USER0); + kunmap_atomic(src); + kunmap_atomic(dest); bm_store_page_idx(page, page_nr); } else page = b->bm_pages[page_nr]; @@ -1163,7 +1163,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc * this returns a bit number, NOT a sector! */ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, - const int find_zero_bit, const enum km_type km) + const int find_zero_bit) { struct drbd_bitmap *b = mdev->bitmap; unsigned long *p_addr; @@ -1178,7 +1178,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, while (bm_fo < b->bm_bits) { /* bit offset of the first bit in the page */ bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; - p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); + p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo)); if (find_zero_bit) i = find_next_zero_bit_le(p_addr, @@ -1187,7 +1187,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, i = find_next_bit_le(p_addr, PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); - __bm_unmap(p_addr, km); + __bm_unmap(p_addr); if (i < PAGE_SIZE*8) { bm_fo = bit_offset + i; if (bm_fo >= b->bm_bits) @@ -1215,7 +1215,7 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, if (BM_DONT_TEST & b->bm_flags) bm_print_lock_info(mdev); - i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); + i = __bm_find_next(mdev, bm_fo, find_zero_bit); spin_unlock_irq(&b->bm_lock); return i; @@ -1239,13 +1239,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) { /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ - return __bm_find_next(mdev, bm_fo, 0, KM_USER1); + return __bm_find_next(mdev, bm_fo, 0); } unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) { /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ - return __bm_find_next(mdev, bm_fo, 1, KM_USER1); + return __bm_find_next(mdev, bm_fo, 1); } /* returns number of bits actually changed. @@ -1273,14 +1273,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); if (page_nr != last_page_nr) { if (p_addr) - __bm_unmap(p_addr, KM_IRQ1); + __bm_unmap(p_addr); if (c < 0) bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); else if (c > 0) bm_set_page_need_writeout(b->bm_pages[last_page_nr]); changed_total += c; c = 0; - p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1); + p_addr = __bm_map_pidx(b, page_nr); last_page_nr = page_nr; } if (val) @@ -1289,7 +1289,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); } if (p_addr) - __bm_unmap(p_addr, KM_IRQ1); + __bm_unmap(p_addr); if (c < 0) bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); else if (c > 0) @@ -1342,13 +1342,13 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, { int i; int bits; - unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1); + unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); for (i = first_word; i < last_word; i++) { bits = hweight_long(paddr[i]); paddr[i] = ~0UL; b->bm_set += BITS_PER_LONG - bits; } - kunmap_atomic(paddr, KM_IRQ1); + kunmap_atomic(paddr); } /* Same thing as drbd_bm_set_bits, diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index af2a25049bce..abfaacaaf346 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -179,7 +179,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); drbd_bcast_ev_helper(mdev, cmd); - ret = call_usermodehelper(usermode_helper, argv, envp, 1); + ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, @@ -2526,10 +2526,10 @@ void drbd_bcast_ee(struct drbd_conf *mdev, page = e->pages; page_chain_for_each(page) { - void *d = kmap_atomic(page, KM_USER0); + void *d = kmap_atomic(page); unsigned l = min_t(unsigned, len, PAGE_SIZE); memcpy(tl, d, l); - kunmap_atomic(d, KM_USER0); + kunmap_atomic(d); tl = (unsigned short*)((char*)tl + l); len -= l; if (len == 0) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 9baf11e86362..744f078f4dd8 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3832,7 +3832,7 @@ static int __floppy_read_block_0(struct block_device *bdev) bio.bi_size = size; bio.bi_bdev = bdev; bio.bi_sector = 0; - bio.bi_flags = BIO_QUIET; + bio.bi_flags = (1 << BIO_QUIET); init_completion(&complete); bio.bi_private = &complete; bio.bi_end_io = floppy_rb0_complete; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cd504353b278..bbca966f8f66 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -93,16 +93,16 @@ static int transfer_none(struct loop_device *lo, int cmd, struct page *loop_page, unsigned loop_off, int size, sector_t real_block) { - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; + char *raw_buf = kmap_atomic(raw_page) + raw_off; + char *loop_buf = kmap_atomic(loop_page) + loop_off; if (cmd == READ) memcpy(loop_buf, raw_buf, size); else memcpy(raw_buf, loop_buf, size); - kunmap_atomic(loop_buf, KM_USER1); - kunmap_atomic(raw_buf, KM_USER0); + kunmap_atomic(loop_buf); + kunmap_atomic(raw_buf); cond_resched(); return 0; } @@ -112,8 +112,8 @@ static int transfer_xor(struct loop_device *lo, int cmd, struct page *loop_page, unsigned loop_off, int size, sector_t real_block) { - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; + char *raw_buf = kmap_atomic(raw_page) + raw_off; + char *loop_buf = kmap_atomic(loop_page) + loop_off; char *in, *out, *key; int i, keysize; @@ -130,8 +130,8 @@ static int transfer_xor(struct loop_device *lo, int cmd, for (i = 0; i < size; i++) *out++ = *in++ ^ key[(i & 511) % keysize]; - kunmap_atomic(loop_buf, KM_USER1); - kunmap_atomic(raw_buf, KM_USER0); + kunmap_atomic(loop_buf); + kunmap_atomic(raw_buf); cond_resched(); return 0; } diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index c1dc4d86c221..38a2d0631882 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -39,7 +39,8 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/types.h> -#include <linux/version.h> + +#include <asm-generic/io-64-nonatomic-lo-hi.h> #define NVME_Q_DEPTH 1024 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index d59edeabd93f..ba66e4445f41 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -987,14 +987,14 @@ static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct pag while (copy_size > 0) { struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg); - void *vfrom = kmap_atomic(src_bvl->bv_page, KM_USER0) + + void *vfrom = kmap_atomic(src_bvl->bv_page) + src_bvl->bv_offset + offs; void *vto = page_address(dst_page) + dst_offs; int len = min_t(int, copy_size, src_bvl->bv_len - offs); BUG_ON(len < 0); memcpy(vto, vfrom, len); - kunmap_atomic(vfrom, KM_USER0); + kunmap_atomic(vfrom); seg++; offs = 0; @@ -1019,10 +1019,10 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec) offs = 0; for (f = 0; f < pkt->frames; f++) { if (bvec[f].bv_page != pkt->pages[p]) { - void *vfrom = kmap_atomic(bvec[f].bv_page, KM_USER0) + bvec[f].bv_offset; + void *vfrom = kmap_atomic(bvec[f].bv_page) + bvec[f].bv_offset; void *vto = page_address(pkt->pages[p]) + offs; memcpy(vto, vfrom, CD_FRAMESIZE); - kunmap_atomic(vfrom, KM_USER0); + kunmap_atomic(vfrom); bvec[f].bv_page = pkt->pages[p]; bvec[f].bv_offset = offs; } else { diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a6278e7e61a0..013c7a549fb6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -41,19 +41,35 @@ #include "rbd_types.h" -#define DRV_NAME "rbd" -#define DRV_NAME_LONG "rbd (rados block device)" +/* + * The basic unit of block I/O is a sector. It is interpreted in a + * number of contexts in Linux (blk, bio, genhd), but the default is + * universally 512 bytes. These symbols are just slightly more + * meaningful than the bare numbers they represent. + */ +#define SECTOR_SHIFT 9 +#define SECTOR_SIZE (1ULL << SECTOR_SHIFT) + +#define RBD_DRV_NAME "rbd" +#define RBD_DRV_NAME_LONG "rbd (rados block device)" #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ -#define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) +#define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX)) #define RBD_MAX_POOL_NAME_LEN 64 #define RBD_MAX_SNAP_NAME_LEN 32 #define RBD_MAX_OPT_LEN 1024 #define RBD_SNAP_HEAD_NAME "-" +/* + * An RBD device name will be "rbd#", where the "rbd" comes from + * RBD_DRV_NAME above, and # is a unique integer identifier. + * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big + * enough to hold all possible device names. + */ #define DEV_NAME_LEN 32 +#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) #define RBD_NOTIFY_TIMEOUT_DEFAULT 10 @@ -66,7 +82,6 @@ struct rbd_image_header { __u8 obj_order; __u8 crypt_type; __u8 comp_type; - struct rw_semaphore snap_rwsem; struct ceph_snap_context *snapc; size_t snap_names_len; u64 snap_seq; @@ -83,7 +98,7 @@ struct rbd_options { }; /* - * an instance of the client. multiple devices may share a client. + * an instance of the client. multiple devices may share an rbd client. */ struct rbd_client { struct ceph_client *client; @@ -92,20 +107,9 @@ struct rbd_client { struct list_head node; }; -struct rbd_req_coll; - /* - * a single io request + * a request completion status */ -struct rbd_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct page **pages; /* list of used pages */ - u64 len; - int coll_index; - struct rbd_req_coll *coll; -}; - struct rbd_req_status { int done; int rc; @@ -122,6 +126,18 @@ struct rbd_req_coll { struct rbd_req_status status[0]; }; +/* + * a single io request + */ +struct rbd_request { + struct request *rq; /* blk layer request */ + struct bio *bio; /* cloned bio */ + struct page **pages; /* list of used pages */ + u64 len; + int coll_index; + struct rbd_req_coll *coll; +}; + struct rbd_snap { struct device dev; const char *name; @@ -140,7 +156,6 @@ struct rbd_device { struct gendisk *disk; /* blkdev's gendisk and rq */ struct request_queue *q; - struct ceph_client *client; struct rbd_client *rbd_client; char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ @@ -157,6 +172,8 @@ struct rbd_device { struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; + /* protects updating the header */ + struct rw_semaphore header_rwsem; char snap_name[RBD_MAX_SNAP_NAME_LEN]; u32 cur_snap; /* index+1 of current snapshot within snap context 0 - for the head */ @@ -171,15 +188,13 @@ struct rbd_device { struct device dev; }; -static struct bus_type rbd_bus_type = { - .name = "rbd", -}; - -static spinlock_t node_lock; /* protects client get/put */ - static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ + static LIST_HEAD(rbd_dev_list); /* devices */ -static LIST_HEAD(rbd_client_list); /* clients */ +static DEFINE_SPINLOCK(rbd_dev_list_lock); + +static LIST_HEAD(rbd_client_list); /* clients */ +static DEFINE_SPINLOCK(rbd_client_list_lock); static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); static void rbd_dev_release(struct device *dev); @@ -190,12 +205,32 @@ static ssize_t rbd_snap_add(struct device *dev, static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, struct rbd_snap *snap); +static ssize_t rbd_add(struct bus_type *bus, const char *buf, + size_t count); +static ssize_t rbd_remove(struct bus_type *bus, const char *buf, + size_t count); -static struct rbd_device *dev_to_rbd(struct device *dev) +static struct bus_attribute rbd_bus_attrs[] = { + __ATTR(add, S_IWUSR, NULL, rbd_add), + __ATTR(remove, S_IWUSR, NULL, rbd_remove), + __ATTR_NULL +}; + +static struct bus_type rbd_bus_type = { + .name = "rbd", + .bus_attrs = rbd_bus_attrs, +}; + +static void rbd_root_dev_release(struct device *dev) { - return container_of(dev, struct rbd_device, dev); } +static struct device rbd_root_dev = { + .init_name = "rbd", + .release = rbd_root_dev_release, +}; + + static struct device *rbd_get_dev(struct rbd_device *rbd_dev) { return get_device(&rbd_dev->dev); @@ -210,8 +245,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev); static int rbd_open(struct block_device *bdev, fmode_t mode) { - struct gendisk *disk = bdev->bd_disk; - struct rbd_device *rbd_dev = disk->private_data; + struct rbd_device *rbd_dev = bdev->bd_disk->private_data; rbd_get_dev(rbd_dev); @@ -256,9 +290,11 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt, kref_init(&rbdc->kref); INIT_LIST_HEAD(&rbdc->node); + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + rbdc->client = ceph_create_client(opt, rbdc, 0, 0); if (IS_ERR(rbdc->client)) - goto out_rbdc; + goto out_mutex; opt = NULL; /* Now rbdc->client is responsible for opt */ ret = ceph_open_session(rbdc->client); @@ -267,16 +303,19 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt, rbdc->rbd_opts = rbd_opts; - spin_lock(&node_lock); + spin_lock(&rbd_client_list_lock); list_add_tail(&rbdc->node, &rbd_client_list); - spin_unlock(&node_lock); + spin_unlock(&rbd_client_list_lock); + + mutex_unlock(&ctl_mutex); dout("rbd_client_create created %p\n", rbdc); return rbdc; out_err: ceph_destroy_client(rbdc->client); -out_rbdc: +out_mutex: + mutex_unlock(&ctl_mutex); kfree(rbdc); out_opt: if (opt) @@ -324,7 +363,7 @@ static int parse_rbd_opts_token(char *c, void *private) substring_t argstr[MAX_OPT_ARGS]; int token, intval, ret; - token = match_token((char *)c, rbdopt_tokens, argstr); + token = match_token(c, rbdopt_tokens, argstr); if (token < 0) return -EINVAL; @@ -357,58 +396,54 @@ static int parse_rbd_opts_token(char *c, void *private) * Get a ceph client with specific addr and configuration, if one does * not exist create it. */ -static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, - char *options) +static struct rbd_client *rbd_get_client(const char *mon_addr, + size_t mon_addr_len, + char *options) { struct rbd_client *rbdc; struct ceph_options *opt; - int ret; struct rbd_options *rbd_opts; rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); if (!rbd_opts) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; - ret = ceph_parse_options(&opt, options, mon_addr, - mon_addr + strlen(mon_addr), parse_rbd_opts_token, rbd_opts); - if (ret < 0) - goto done_err; + opt = ceph_parse_options(options, mon_addr, + mon_addr + mon_addr_len, + parse_rbd_opts_token, rbd_opts); + if (IS_ERR(opt)) { + kfree(rbd_opts); + return ERR_CAST(opt); + } - spin_lock(&node_lock); + spin_lock(&rbd_client_list_lock); rbdc = __rbd_client_find(opt); if (rbdc) { + /* using an existing client */ + kref_get(&rbdc->kref); + spin_unlock(&rbd_client_list_lock); + ceph_destroy_options(opt); kfree(rbd_opts); - /* using an existing client */ - kref_get(&rbdc->kref); - rbd_dev->rbd_client = rbdc; - rbd_dev->client = rbdc->client; - spin_unlock(&node_lock); - return 0; + return rbdc; } - spin_unlock(&node_lock); + spin_unlock(&rbd_client_list_lock); rbdc = rbd_client_create(opt, rbd_opts); - if (IS_ERR(rbdc)) { - ret = PTR_ERR(rbdc); - goto done_err; - } - rbd_dev->rbd_client = rbdc; - rbd_dev->client = rbdc->client; - return 0; -done_err: - kfree(rbd_opts); - return ret; + if (IS_ERR(rbdc)) + kfree(rbd_opts); + + return rbdc; } /* * Destroy ceph client * - * Caller must hold node_lock. + * Caller must hold rbd_client_list_lock. */ static void rbd_client_release(struct kref *kref) { @@ -428,11 +463,10 @@ static void rbd_client_release(struct kref *kref) */ static void rbd_put_client(struct rbd_device *rbd_dev) { - spin_lock(&node_lock); + spin_lock(&rbd_client_list_lock); kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); - spin_unlock(&node_lock); + spin_unlock(&rbd_client_list_lock); rbd_dev->rbd_client = NULL; - rbd_dev->client = NULL; } /* @@ -457,21 +491,19 @@ static int rbd_header_from_disk(struct rbd_image_header *header, gfp_t gfp_flags) { int i; - u32 snap_count = le32_to_cpu(ondisk->snap_count); - int ret = -ENOMEM; + u32 snap_count; - if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) { + if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) return -ENXIO; - } - init_rwsem(&header->snap_rwsem); - header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); + snap_count = le32_to_cpu(ondisk->snap_count); header->snapc = kmalloc(sizeof(struct ceph_snap_context) + - snap_count * - sizeof(struct rbd_image_snap_ondisk), + snap_count * sizeof (*ondisk), gfp_flags); if (!header->snapc) return -ENOMEM; + + header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); if (snap_count) { header->snap_names = kmalloc(header->snap_names_len, GFP_KERNEL); @@ -498,8 +530,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header, header->snapc->num_snaps = snap_count; header->total_snaps = snap_count; - if (snap_count && - allocated_snaps == snap_count) { + if (snap_count && allocated_snaps == snap_count) { for (i = 0; i < snap_count; i++) { header->snapc->snaps[i] = le64_to_cpu(ondisk->snaps[i].id); @@ -518,7 +549,7 @@ err_names: kfree(header->snap_names); err_snapc: kfree(header->snapc); - return ret; + return -ENOMEM; } static int snap_index(struct rbd_image_header *header, int snap_num) @@ -542,35 +573,34 @@ static int snap_by_name(struct rbd_image_header *header, const char *snap_name, int i; char *p = header->snap_names; - for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { - if (strcmp(snap_name, p) == 0) - break; - } - if (i == header->total_snaps) - return -ENOENT; - if (seq) - *seq = header->snapc->snaps[i]; + for (i = 0; i < header->total_snaps; i++) { + if (!strcmp(snap_name, p)) { - if (size) - *size = header->snap_sizes[i]; + /* Found it. Pass back its id and/or size */ - return i; + if (seq) + *seq = header->snapc->snaps[i]; + if (size) + *size = header->snap_sizes[i]; + return i; + } + p += strlen(p) + 1; /* Skip ahead to the next name */ + } + return -ENOENT; } -static int rbd_header_set_snap(struct rbd_device *dev, - const char *snap_name, - u64 *size) +static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) { struct rbd_image_header *header = &dev->header; struct ceph_snap_context *snapc = header->snapc; int ret = -ENOENT; - down_write(&header->snap_rwsem); + BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME)); - if (!snap_name || - !*snap_name || - strcmp(snap_name, "-") == 0 || - strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { + down_write(&dev->header_rwsem); + + if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME, + sizeof (RBD_SNAP_HEAD_NAME))) { if (header->total_snaps) snapc->seq = header->snap_seq; else @@ -580,7 +610,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, if (size) *size = header->image_size; } else { - ret = snap_by_name(header, snap_name, &snapc->seq, size); + ret = snap_by_name(header, dev->snap_name, &snapc->seq, size); if (ret < 0) goto done; @@ -590,7 +620,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, ret = 0; done: - up_write(&header->snap_rwsem); + up_write(&dev->header_rwsem); return ret; } @@ -717,7 +747,7 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next, /* split the bio. We'll release it either in the next call, or it will have to be released outside */ - bp = bio_split(old_chain, (len - total) / 512ULL); + bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); if (!bp) goto err_out; @@ -857,7 +887,7 @@ static int rbd_do_request(struct request *rq, struct timespec mtime = CURRENT_TIME; struct rbd_request *req_data; struct ceph_osd_request_head *reqhead; - struct rbd_image_header *header = &dev->header; + struct ceph_osd_client *osdc; req_data = kzalloc(sizeof(*req_data), GFP_NOIO); if (!req_data) { @@ -874,15 +904,13 @@ static int rbd_do_request(struct request *rq, dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); - down_read(&header->snap_rwsem); + down_read(&dev->header_rwsem); - req = ceph_osdc_alloc_request(&dev->client->osdc, flags, - snapc, - ops, - false, - GFP_NOIO, pages, bio); + osdc = &dev->rbd_client->client->osdc; + req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, + false, GFP_NOIO, pages, bio); if (!req) { - up_read(&header->snap_rwsem); + up_read(&dev->header_rwsem); ret = -ENOMEM; goto done_pages; } @@ -909,27 +937,27 @@ static int rbd_do_request(struct request *rq, layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_pg_preferred = cpu_to_le32(-1); layout->fl_pg_pool = cpu_to_le32(dev->poolid); - ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, - ofs, &len, &bno, req, ops); + ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, + req, ops); ceph_osdc_build_request(req, ofs, &len, ops, snapc, &mtime, req->r_oid, req->r_oid_len); - up_read(&header->snap_rwsem); + up_read(&dev->header_rwsem); if (linger_req) { - ceph_osdc_set_request_linger(&dev->client->osdc, req); + ceph_osdc_set_request_linger(osdc, req); *linger_req = req; } - ret = ceph_osdc_start_request(&dev->client->osdc, req, false); + ret = ceph_osdc_start_request(osdc, req, false); if (ret < 0) goto done_err; if (!rbd_cb) { - ret = ceph_osdc_wait_request(&dev->client->osdc, req); + ret = ceph_osdc_wait_request(osdc, req); if (ver) *ver = le64_to_cpu(req->r_reassert_version.version); dout("reassert_ver=%lld\n", @@ -1213,8 +1241,8 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) rc = __rbd_update_snaps(dev); mutex_unlock(&ctl_mutex); if (rc) - pr_warning(DRV_NAME "%d got notification but failed to update" - " snaps: %d\n", dev->major, rc); + pr_warning(RBD_DRV_NAME "%d got notification but failed to " + " update snaps: %d\n", dev->major, rc); rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); } @@ -1227,7 +1255,7 @@ static int rbd_req_sync_watch(struct rbd_device *dev, u64 ver) { struct ceph_osd_req_op *ops; - struct ceph_osd_client *osdc = &dev->client->osdc; + struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); if (ret < 0) @@ -1314,7 +1342,7 @@ static int rbd_req_sync_notify(struct rbd_device *dev, const char *obj) { struct ceph_osd_req_op *ops; - struct ceph_osd_client *osdc = &dev->client->osdc; + struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; struct ceph_osd_event *event; struct rbd_notify_info info; int payload_len = sizeof(u32) + sizeof(u32); @@ -1421,9 +1449,7 @@ static void rbd_rq_fn(struct request_queue *q) struct request *rq; struct bio_pair *bp = NULL; - rq = blk_fetch_request(q); - - while (1) { + while ((rq = blk_fetch_request(q))) { struct bio *bio; struct bio *rq_bio, *next_bio = NULL; bool do_write; @@ -1441,32 +1467,32 @@ static void rbd_rq_fn(struct request_queue *q) /* filter out block requests we don't understand */ if ((rq->cmd_type != REQ_TYPE_FS)) { __blk_end_request_all(rq, 0); - goto next; + continue; } /* deduce our operation (read, write) */ do_write = (rq_data_dir(rq) == WRITE); size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * 512ULL; + ofs = blk_rq_pos(rq) * SECTOR_SIZE; rq_bio = rq->bio; if (do_write && rbd_dev->read_only) { __blk_end_request_all(rq, -EROFS); - goto next; + continue; } spin_unlock_irq(q->queue_lock); dout("%s 0x%x bytes at 0x%llx\n", do_write ? "write" : "read", - size, blk_rq_pos(rq) * 512ULL); + size, blk_rq_pos(rq) * SECTOR_SIZE); num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); coll = rbd_alloc_coll(num_segs); if (!coll) { spin_lock_irq(q->queue_lock); __blk_end_request_all(rq, -ENOMEM); - goto next; + continue; } do { @@ -1512,8 +1538,6 @@ next_seg: if (bp) bio_pair_release(bp); spin_lock_irq(q->queue_lock); -next: - rq = blk_fetch_request(q); } } @@ -1526,13 +1550,17 @@ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, struct bio_vec *bvec) { struct rbd_device *rbd_dev = q->queuedata; - unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); - sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); - unsigned int bio_sectors = bmd->bi_size >> 9; + unsigned int chunk_sectors; + sector_t sector; + unsigned int bio_sectors; int max; + chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); + sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); + bio_sectors = bmd->bi_size >> SECTOR_SHIFT; + max = (chunk_sectors - ((sector & (chunk_sectors - 1)) - + bio_sectors)) << 9; + + bio_sectors)) << SECTOR_SHIFT; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ if (max <= bvec->bv_len && bio_sectors == 0) @@ -1565,15 +1593,16 @@ static int rbd_read_header(struct rbd_device *rbd_dev, ssize_t rc; struct rbd_image_header_ondisk *dh; int snap_count = 0; - u64 snap_names_len = 0; u64 ver; + size_t len; + /* + * First reads the fixed-size header to determine the number + * of snapshots, then re-reads it, along with all snapshot + * records as well as their stored names. + */ + len = sizeof (*dh); while (1) { - int len = sizeof(*dh) + - snap_count * sizeof(struct rbd_image_snap_ondisk) + - snap_names_len; - - rc = -ENOMEM; dh = kmalloc(len, GFP_KERNEL); if (!dh) return -ENOMEM; @@ -1588,21 +1617,22 @@ static int rbd_read_header(struct rbd_device *rbd_dev, rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); if (rc < 0) { - if (rc == -ENXIO) { + if (rc == -ENXIO) pr_warning("unrecognized header format" " for image %s", rbd_dev->obj); - } goto out_dh; } - if (snap_count != header->total_snaps) { - snap_count = header->total_snaps; - snap_names_len = header->snap_names_len; - rbd_header_free(header); - kfree(dh); - continue; - } - break; + if (snap_count == header->total_snaps) + break; + + snap_count = header->total_snaps; + len = sizeof (*dh) + + snap_count * sizeof(struct rbd_image_snap_ondisk) + + header->snap_names_len; + + rbd_header_free(header); + kfree(dh); } header->obj_version = ver; @@ -1623,13 +1653,14 @@ static int rbd_header_add_snap(struct rbd_device *dev, int ret; void *data, *p, *e; u64 ver; + struct ceph_mon_client *monc; /* we should create a snapshot only if we're pointing at the head */ if (dev->cur_snap) return -EINVAL; - ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, - &new_snapid); + monc = &dev->rbd_client->client->monc; + ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid); dout("created snapid=%lld\n", new_snapid); if (ret < 0) return ret; @@ -1684,9 +1715,9 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev) return ret; /* resized? */ - set_capacity(rbd_dev->disk, h.image_size / 512ULL); + set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE); - down_write(&rbd_dev->header.snap_rwsem); + down_write(&rbd_dev->header_rwsem); snap_seq = rbd_dev->header.snapc->seq; if (rbd_dev->header.total_snaps && @@ -1711,7 +1742,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev) ret = __rbd_init_snaps_header(rbd_dev); - up_write(&rbd_dev->header.snap_rwsem); + up_write(&rbd_dev->header_rwsem); return ret; } @@ -1721,6 +1752,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) struct gendisk *disk; struct request_queue *q; int rc; + u64 segment_size; u64 total_size = 0; /* contact OSD, request size info about the object being mapped */ @@ -1733,7 +1765,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (rc) return rc; - rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); + rc = rbd_header_set_snap(rbd_dev, &total_size); if (rc) return rc; @@ -1743,7 +1775,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (!disk) goto out; - snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d", + snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", rbd_dev->id); disk->major = rbd_dev->major; disk->first_minor = 0; @@ -1756,11 +1788,15 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (!q) goto out_disk; + /* We use the default size, but let's be explicit about it. */ + blk_queue_physical_block_size(q, SECTOR_SIZE); + /* set io sizes to object size */ - blk_queue_max_hw_sectors(q, rbd_obj_bytes(&rbd_dev->header) / 512ULL); - blk_queue_max_segment_size(q, rbd_obj_bytes(&rbd_dev->header)); - blk_queue_io_min(q, rbd_obj_bytes(&rbd_dev->header)); - blk_queue_io_opt(q, rbd_obj_bytes(&rbd_dev->header)); + segment_size = rbd_obj_bytes(&rbd_dev->header); + blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_segment_size(q, segment_size); + blk_queue_io_min(q, segment_size); + blk_queue_io_opt(q, segment_size); blk_queue_merge_bvec(q, rbd_merge_bvec); disk->queue = q; @@ -1771,7 +1807,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) rbd_dev->q = q; /* finally, announce the disk to the world */ - set_capacity(disk, total_size / 512ULL); + set_capacity(disk, total_size / SECTOR_SIZE); add_disk(disk); pr_info("%s: added with size 0x%llx\n", @@ -1788,10 +1824,15 @@ out: sysfs */ +static struct rbd_device *dev_to_rbd_dev(struct device *dev) +{ + return container_of(dev, struct rbd_device, dev); +} + static ssize_t rbd_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size); } @@ -1799,7 +1840,7 @@ static ssize_t rbd_size_show(struct device *dev, static ssize_t rbd_major_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%d\n", rbd_dev->major); } @@ -1807,15 +1848,16 @@ static ssize_t rbd_major_show(struct device *dev, static ssize_t rbd_client_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "client%lld\n", ceph_client_id(rbd_dev->client)); + return sprintf(buf, "client%lld\n", + ceph_client_id(rbd_dev->rbd_client->client)); } static ssize_t rbd_pool_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%s\n", rbd_dev->pool_name); } @@ -1823,7 +1865,7 @@ static ssize_t rbd_pool_show(struct device *dev, static ssize_t rbd_name_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%s\n", rbd_dev->obj); } @@ -1832,7 +1874,7 @@ static ssize_t rbd_snap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%s\n", rbd_dev->snap_name); } @@ -1842,7 +1884,7 @@ static ssize_t rbd_image_refresh(struct device *dev, const char *buf, size_t size) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int rc; int ret = size; @@ -1907,7 +1949,7 @@ static ssize_t rbd_snap_size_show(struct device *dev, { struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - return sprintf(buf, "%lld\n", (long long)snap->size); + return sprintf(buf, "%zd\n", snap->size); } static ssize_t rbd_snap_id_show(struct device *dev, @@ -1916,7 +1958,7 @@ static ssize_t rbd_snap_id_show(struct device *dev, { struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - return sprintf(buf, "%lld\n", (long long)snap->id); + return sprintf(buf, "%llu\n", (unsigned long long) snap->id); } static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); @@ -2088,19 +2130,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) return 0; } - -static void rbd_root_dev_release(struct device *dev) -{ -} - -static struct device rbd_root_dev = { - .init_name = "rbd", - .release = rbd_root_dev_release, -}; - static int rbd_bus_add_dev(struct rbd_device *rbd_dev) { - int ret = -ENOMEM; + int ret; struct device *dev; struct rbd_snap *snap; @@ -2114,7 +2146,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) dev_set_name(dev, "%d", rbd_dev->id); ret = device_register(dev); if (ret < 0) - goto done_free; + goto out; list_for_each_entry(snap, &rbd_dev->snaps, node) { ret = rbd_register_snap_dev(rbd_dev, snap, @@ -2122,10 +2154,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) if (ret < 0) break; } - - mutex_unlock(&ctl_mutex); - return 0; -done_free: +out: mutex_unlock(&ctl_mutex); return ret; } @@ -2154,104 +2183,250 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) return ret; } +static atomic64_t rbd_id_max = ATOMIC64_INIT(0); + +/* + * Get a unique rbd identifier for the given new rbd_dev, and add + * the rbd_dev to the global list. The minimum rbd id is 1. + */ +static void rbd_id_get(struct rbd_device *rbd_dev) +{ + rbd_dev->id = atomic64_inc_return(&rbd_id_max); + + spin_lock(&rbd_dev_list_lock); + list_add_tail(&rbd_dev->node, &rbd_dev_list); + spin_unlock(&rbd_dev_list_lock); +} + +/* + * Remove an rbd_dev from the global list, and record that its + * identifier is no longer in use. + */ +static void rbd_id_put(struct rbd_device *rbd_dev) +{ + struct list_head *tmp; + int rbd_id = rbd_dev->id; + int max_id; + + BUG_ON(rbd_id < 1); + + spin_lock(&rbd_dev_list_lock); + list_del_init(&rbd_dev->node); + + /* + * If the id being "put" is not the current maximum, there + * is nothing special we need to do. + */ + if (rbd_id != atomic64_read(&rbd_id_max)) { + spin_unlock(&rbd_dev_list_lock); + return; + } + + /* + * We need to update the current maximum id. Search the + * list to find out what it is. We're more likely to find + * the maximum at the end, so search the list backward. + */ + max_id = 0; + list_for_each_prev(tmp, &rbd_dev_list) { + struct rbd_device *rbd_dev; + + rbd_dev = list_entry(tmp, struct rbd_device, node); + if (rbd_id > max_id) + max_id = rbd_id; + } + spin_unlock(&rbd_dev_list_lock); + + /* + * The max id could have been updated by rbd_id_get(), in + * which case it now accurately reflects the new maximum. + * Be careful not to overwrite the maximum value in that + * case. + */ + atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id); +} + +/* + * Skips over white space at *buf, and updates *buf to point to the + * first found non-space character (if any). Returns the length of + * the token (string of non-white space characters) found. Note + * that *buf must be terminated with '\0'. + */ +static inline size_t next_token(const char **buf) +{ + /* + * These are the characters that produce nonzero for + * isspace() in the "C" and "POSIX" locales. + */ + const char *spaces = " \f\n\r\t\v"; + + *buf += strspn(*buf, spaces); /* Find start of token */ + + return strcspn(*buf, spaces); /* Return token length */ +} + +/* + * Finds the next token in *buf, and if the provided token buffer is + * big enough, copies the found token into it. The result, if + * copied, is guaranteed to be terminated with '\0'. Note that *buf + * must be terminated with '\0' on entry. + * + * Returns the length of the token found (not including the '\0'). + * Return value will be 0 if no token is found, and it will be >= + * token_size if the token would not fit. + * + * The *buf pointer will be updated to point beyond the end of the + * found token. Note that this occurs even if the token buffer is + * too small to hold it. + */ +static inline size_t copy_token(const char **buf, + char *token, + size_t token_size) +{ + size_t len; + + len = next_token(buf); + if (len < token_size) { + memcpy(token, *buf, len); + *(token + len) = '\0'; + } + *buf += len; + + return len; +} + +/* + * This fills in the pool_name, obj, obj_len, snap_name, obj_len, + * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based + * on the list of monitor addresses and other options provided via + * /sys/bus/rbd/add. + */ +static int rbd_add_parse_args(struct rbd_device *rbd_dev, + const char *buf, + const char **mon_addrs, + size_t *mon_addrs_size, + char *options, + size_t options_size) +{ + size_t len; + + /* The first four tokens are required */ + + len = next_token(&buf); + if (!len) + return -EINVAL; + *mon_addrs_size = len + 1; + *mon_addrs = buf; + + buf += len; + + len = copy_token(&buf, options, options_size); + if (!len || len >= options_size) + return -EINVAL; + + len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name)); + if (!len || len >= sizeof (rbd_dev->pool_name)) + return -EINVAL; + + len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj)); + if (!len || len >= sizeof (rbd_dev->obj)) + return -EINVAL; + + /* We have the object length in hand, save it. */ + + rbd_dev->obj_len = len; + + BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN + < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX)); + sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX); + + /* + * The snapshot name is optional, but it's an error if it's + * too long. If no snapshot is supplied, fill in the default. + */ + len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name)); + if (!len) + memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, + sizeof (RBD_SNAP_HEAD_NAME)); + else if (len >= sizeof (rbd_dev->snap_name)) + return -EINVAL; + + return 0; +} + static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count) { - struct ceph_osd_client *osdc; struct rbd_device *rbd_dev; - ssize_t rc = -ENOMEM; - int irc, new_id = 0; - struct list_head *tmp; - char *mon_dev_name; - char *options; + const char *mon_addrs = NULL; + size_t mon_addrs_size = 0; + char *options = NULL; + struct ceph_osd_client *osdc; + int rc = -ENOMEM; if (!try_module_get(THIS_MODULE)) return -ENODEV; - mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); - if (!mon_dev_name) - goto err_out_mod; - - options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); - if (!options) - goto err_mon_dev; - - /* new rbd_device object */ rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); if (!rbd_dev) - goto err_out_opt; + goto err_nomem; + options = kmalloc(count, GFP_KERNEL); + if (!options) + goto err_nomem; /* static rbd_device initialization */ spin_lock_init(&rbd_dev->lock); INIT_LIST_HEAD(&rbd_dev->node); INIT_LIST_HEAD(&rbd_dev->snaps); + init_rwsem(&rbd_dev->header_rwsem); - init_rwsem(&rbd_dev->header.snap_rwsem); + init_rwsem(&rbd_dev->header_rwsem); /* generate unique id: find highest unique id, add one */ - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &rbd_dev_list) { - struct rbd_device *rbd_dev; + rbd_id_get(rbd_dev); - rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_dev->id >= new_id) - new_id = rbd_dev->id + 1; - } - - rbd_dev->id = new_id; - - /* add to global list */ - list_add_tail(&rbd_dev->node, &rbd_dev_list); + /* Fill in the device name, now that we have its id. */ + BUILD_BUG_ON(DEV_NAME_LEN + < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); + sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id); /* parse add command */ - if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " - "%" __stringify(RBD_MAX_OPT_LEN) "s " - "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " - "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" - "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", - mon_dev_name, options, rbd_dev->pool_name, - rbd_dev->obj, rbd_dev->snap_name) < 4) { - rc = -EINVAL; - goto err_out_slot; - } - - if (rbd_dev->snap_name[0] == 0) - rbd_dev->snap_name[0] = '-'; - - rbd_dev->obj_len = strlen(rbd_dev->obj); - snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", - rbd_dev->obj, RBD_SUFFIX); - - /* initialize rest of new object */ - snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); - rc = rbd_get_client(rbd_dev, mon_dev_name, options); - if (rc < 0) - goto err_out_slot; + rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, + options, count); + if (rc) + goto err_put_id; - mutex_unlock(&ctl_mutex); + rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1, + options); + if (IS_ERR(rbd_dev->rbd_client)) { + rc = PTR_ERR(rbd_dev->rbd_client); + goto err_put_id; + } /* pick the pool */ - osdc = &rbd_dev->client->osdc; + osdc = &rbd_dev->rbd_client->client->osdc; rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); if (rc < 0) goto err_out_client; rbd_dev->poolid = rc; /* register our block device */ - irc = register_blkdev(0, rbd_dev->name); - if (irc < 0) { - rc = irc; + rc = register_blkdev(0, rbd_dev->name); + if (rc < 0) goto err_out_client; - } - rbd_dev->major = irc; + rbd_dev->major = rc; rc = rbd_bus_add_dev(rbd_dev); if (rc) goto err_out_blkdev; - /* set up and announce blkdev mapping */ + /* + * At this point cleanup in the event of an error is the job + * of the sysfs code (initiated by rbd_bus_del_dev()). + * + * Set up and announce blkdev mapping. + */ rc = rbd_init_disk(rbd_dev); if (rc) goto err_out_bus; @@ -2263,35 +2438,26 @@ static ssize_t rbd_add(struct bus_type *bus, return count; err_out_bus: - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - list_del_init(&rbd_dev->node); - mutex_unlock(&ctl_mutex); - /* this will also clean up rest of rbd_dev stuff */ rbd_bus_del_dev(rbd_dev); kfree(options); - kfree(mon_dev_name); return rc; err_out_blkdev: unregister_blkdev(rbd_dev->major, rbd_dev->name); err_out_client: rbd_put_client(rbd_dev); - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); -err_out_slot: - list_del_init(&rbd_dev->node); - mutex_unlock(&ctl_mutex); - - kfree(rbd_dev); -err_out_opt: +err_put_id: + rbd_id_put(rbd_dev); +err_nomem: kfree(options); -err_mon_dev: - kfree(mon_dev_name); -err_out_mod: + kfree(rbd_dev); + dout("Error adding device %s\n", buf); module_put(THIS_MODULE); - return rc; + + return (ssize_t) rc; } static struct rbd_device *__rbd_get_dev(unsigned long id) @@ -2299,22 +2465,28 @@ static struct rbd_device *__rbd_get_dev(unsigned long id) struct list_head *tmp; struct rbd_device *rbd_dev; + spin_lock(&rbd_dev_list_lock); list_for_each(tmp, &rbd_dev_list) { rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_dev->id == id) + if (rbd_dev->id == id) { + spin_unlock(&rbd_dev_list_lock); return rbd_dev; + } } + spin_unlock(&rbd_dev_list_lock); return NULL; } static void rbd_dev_release(struct device *dev) { - struct rbd_device *rbd_dev = - container_of(dev, struct rbd_device, dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - if (rbd_dev->watch_request) - ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc, + if (rbd_dev->watch_request) { + struct ceph_client *client = rbd_dev->rbd_client->client; + + ceph_osdc_unregister_linger_request(&client->osdc, rbd_dev->watch_request); + } if (rbd_dev->watch_event) rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name); @@ -2323,6 +2495,9 @@ static void rbd_dev_release(struct device *dev) /* clean up and free blkdev */ rbd_free_disk(rbd_dev); unregister_blkdev(rbd_dev->major, rbd_dev->name); + + /* done with the id, and with the rbd_dev */ + rbd_id_put(rbd_dev); kfree(rbd_dev); /* release module ref */ @@ -2355,8 +2530,6 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - list_del_init(&rbd_dev->node); - __rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); @@ -2370,7 +2543,7 @@ static ssize_t rbd_snap_add(struct device *dev, const char *buf, size_t count) { - struct rbd_device *rbd_dev = dev_to_rbd(dev); + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int ret; char *name = kmalloc(count + 1, GFP_KERNEL); if (!name) @@ -2406,12 +2579,6 @@ err_unlock: return ret; } -static struct bus_attribute rbd_bus_attrs[] = { - __ATTR(add, S_IWUSR, NULL, rbd_add), - __ATTR(remove, S_IWUSR, NULL, rbd_remove), - __ATTR_NULL -}; - /* * create control files in sysfs * /sys/bus/rbd/... @@ -2420,21 +2587,21 @@ static int rbd_sysfs_init(void) { int ret; - rbd_bus_type.bus_attrs = rbd_bus_attrs; - - ret = bus_register(&rbd_bus_type); - if (ret < 0) + ret = device_register(&rbd_root_dev); + if (ret < 0) return ret; - ret = device_register(&rbd_root_dev); + ret = bus_register(&rbd_bus_type); + if (ret < 0) + device_unregister(&rbd_root_dev); return ret; } static void rbd_sysfs_cleanup(void) { - device_unregister(&rbd_root_dev); bus_unregister(&rbd_bus_type); + device_unregister(&rbd_root_dev); } int __init rbd_init(void) @@ -2444,8 +2611,7 @@ int __init rbd_init(void) rc = rbd_sysfs_init(); if (rc) return rc; - spin_lock_init(&node_lock); - pr_info("loaded " DRV_NAME_LONG "\n"); + pr_info("loaded " RBD_DRV_NAME_LONG "\n"); return 0; } diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h index fc6c678aa2cb..950708688f17 100644 --- a/drivers/block/rbd_types.h +++ b/drivers/block/rbd_types.h @@ -41,10 +41,6 @@ #define RBD_HEADER_SIGNATURE "RBD" #define RBD_HEADER_VERSION "001.005" -struct rbd_info { - __le64 max_id; -} __attribute__ ((packed)); - struct rbd_image_snap_ondisk { __le64 id; __le64 image_size; diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index e7472f567c9d..3fb6ab4c8b4e 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1120,7 +1120,7 @@ static inline void carm_handle_resp(struct carm_host *host, break; case MISC_GET_FW_VER: { struct carm_fw_ver *ver = (struct carm_fw_ver *) - mem + sizeof(struct carm_msg_get_fw_ver); + (mem + sizeof(struct carm_msg_get_fw_ver)); if (!error) { host->fw_ver = le32_to_cpu(ver->version); host->flags |= (ver->features & FL_FW_VER_MASK); diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 7333b9e44411..fcec0225ac76 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -119,43 +119,6 @@ /* */ - -/* command block wrapper */ -struct bulk_cb_wrap { - __le32 Signature; /* contains 'USBC' */ - u32 Tag; /* unique per command id */ - __le32 DataTransferLength; /* size of data */ - u8 Flags; /* direction in bit 0 */ - u8 Lun; /* LUN */ - u8 Length; /* of of the CDB */ - u8 CDB[UB_MAX_CDB_SIZE]; /* max command */ -}; - -#define US_BULK_CB_WRAP_LEN 31 -#define US_BULK_CB_SIGN 0x43425355 /*spells out USBC */ -#define US_BULK_FLAG_IN 1 -#define US_BULK_FLAG_OUT 0 - -/* command status wrapper */ -struct bulk_cs_wrap { - __le32 Signature; /* should = 'USBS' */ - u32 Tag; /* same as original command */ - __le32 Residue; /* amount not transferred */ - u8 Status; /* see below */ -}; - -#define US_BULK_CS_WRAP_LEN 13 -#define US_BULK_CS_SIGN 0x53425355 /* spells out 'USBS' */ -#define US_BULK_STAT_OK 0 -#define US_BULK_STAT_FAIL 1 -#define US_BULK_STAT_PHASE 2 - -/* bulk-only class specific requests */ -#define US_BULK_RESET_REQUEST 0xff -#define US_BULK_GET_MAX_LUN 0xfe - -/* - */ struct ub_dev; #define UB_MAX_REQ_SG 9 /* cdrecord requires 32KB and maybe a header */ @@ -2477,6 +2440,8 @@ static int __init ub_init(void) int rc; int i; + pr_info("'Low Performance USB Block' driver is deprecated. " + "Please switch to usb-storage\n"); for (i = 0; i < UB_QLOCK_NUM; i++) spin_lock_init(&ub_qlockv[i]); diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c deleted file mode 100644 index 9a5b2a2d616d..000000000000 --- a/drivers/block/viodasd.c +++ /dev/null @@ -1,809 +0,0 @@ -/* -*- linux-c -*- - * viodasd.c - * Authors: Dave Boutcher <boutcher@us.ibm.com> - * Ryan Arnold <ryanarn@us.ibm.com> - * Colin Devilbiss <devilbis@us.ibm.com> - * Stephen Rothwell - * - * (C) Copyright 2000-2004 IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * This routine provides access to disk space (termed "DASD" in historical - * IBM terms) owned and managed by an OS/400 partition running on the - * same box as this Linux partition. - * - * All disk operations are performed by sending messages back and forth to - * the OS/400 partition. - */ - -#define pr_fmt(fmt) "viod: " fmt - -#include <linux/major.h> -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/blkdev.h> -#include <linux/genhd.h> -#include <linux/hdreg.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/mutex.h> -#include <linux/dma-mapping.h> -#include <linux/completion.h> -#include <linux/device.h> -#include <linux/scatterlist.h> - -#include <asm/uaccess.h> -#include <asm/vio.h> -#include <asm/iseries/hv_types.h> -#include <asm/iseries/hv_lp_event.h> -#include <asm/iseries/hv_lp_config.h> -#include <asm/iseries/vio.h> -#include <asm/firmware.h> - -MODULE_DESCRIPTION("iSeries Virtual DASD"); -MODULE_AUTHOR("Dave Boutcher"); -MODULE_LICENSE("GPL"); - -/* - * We only support 7 partitions per physical disk....so with minor - * numbers 0-255 we get a maximum of 32 disks. - */ -#define VIOD_GENHD_NAME "iseries/vd" - -#define VIOD_VERS "1.64" - -enum { - PARTITION_SHIFT = 3, - MAX_DISKNO = HVMAXARCHITECTEDVIRTUALDISKS, - MAX_DISK_NAME = FIELD_SIZEOF(struct gendisk, disk_name) -}; - -static DEFINE_MUTEX(viodasd_mutex); -static DEFINE_SPINLOCK(viodasd_spinlock); - -#define VIOMAXREQ 16 - -#define DEVICE_NO(cell) ((struct viodasd_device *)(cell) - &viodasd_devices[0]) - -struct viodasd_waitevent { - struct completion com; - int rc; - u16 sub_result; - int max_disk; /* open */ -}; - -static const struct vio_error_entry viodasd_err_table[] = { - { 0x0201, EINVAL, "Invalid Range" }, - { 0x0202, EINVAL, "Invalid Token" }, - { 0x0203, EIO, "DMA Error" }, - { 0x0204, EIO, "Use Error" }, - { 0x0205, EIO, "Release Error" }, - { 0x0206, EINVAL, "Invalid Disk" }, - { 0x0207, EBUSY, "Can't Lock" }, - { 0x0208, EIO, "Already Locked" }, - { 0x0209, EIO, "Already Unlocked" }, - { 0x020A, EIO, "Invalid Arg" }, - { 0x020B, EIO, "Bad IFS File" }, - { 0x020C, EROFS, "Read Only Device" }, - { 0x02FF, EIO, "Internal Error" }, - { 0x0000, 0, NULL }, -}; - -/* - * Figure out the biggest I/O request (in sectors) we can accept - */ -#define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA) - -/* - * Number of disk I/O requests we've sent to OS/400 - */ -static int num_req_outstanding; - -/* - * This is our internal structure for keeping track of disk devices - */ -struct viodasd_device { - u16 cylinders; - u16 tracks; - u16 sectors; - u16 bytes_per_sector; - u64 size; - int read_only; - spinlock_t q_lock; - struct gendisk *disk; - struct device *dev; -} viodasd_devices[MAX_DISKNO]; - -/* - * External open entry point. - */ -static int viodasd_open(struct block_device *bdev, fmode_t mode) -{ - struct viodasd_device *d = bdev->bd_disk->private_data; - HvLpEvent_Rc hvrc; - struct viodasd_waitevent we; - u16 flags = 0; - - if (d->read_only) { - if (mode & FMODE_WRITE) - return -EROFS; - flags = vioblockflags_ro; - } - - init_completion(&we.com); - - /* Send the open event to OS/400 */ - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, - viomajorsubtype_blockio | vioblockopen, - HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - (u64)(unsigned long)&we, VIOVERSION << 16, - ((u64)DEVICE_NO(d) << 48) | ((u64)flags << 32), - 0, 0, 0); - if (hvrc != 0) { - pr_warning("HV open failed %d\n", (int)hvrc); - return -EIO; - } - - wait_for_completion(&we.com); - - /* Check the return code */ - if (we.rc != 0) { - const struct vio_error_entry *err = - vio_lookup_rc(viodasd_err_table, we.sub_result); - - pr_warning("bad rc opening disk: %d:0x%04x (%s)\n", - (int)we.rc, we.sub_result, err->msg); - return -EIO; - } - - return 0; -} - -static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode) -{ - int ret; - - mutex_lock(&viodasd_mutex); - ret = viodasd_open(bdev, mode); - mutex_unlock(&viodasd_mutex); - - return ret; -} - - -/* - * External release entry point. - */ -static int viodasd_release(struct gendisk *disk, fmode_t mode) -{ - struct viodasd_device *d = disk->private_data; - HvLpEvent_Rc hvrc; - - mutex_lock(&viodasd_mutex); - /* Send the event to OS/400. We DON'T expect a response */ - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, - viomajorsubtype_blockio | vioblockclose, - HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - 0, VIOVERSION << 16, - ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */, - 0, 0, 0); - if (hvrc != 0) - pr_warning("HV close call failed %d\n", (int)hvrc); - - mutex_unlock(&viodasd_mutex); - - return 0; -} - - -/* External ioctl entry point. - */ -static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct gendisk *disk = bdev->bd_disk; - struct viodasd_device *d = disk->private_data; - - geo->sectors = d->sectors ? d->sectors : 32; - geo->heads = d->tracks ? d->tracks : 64; - geo->cylinders = d->cylinders ? d->cylinders : - get_capacity(disk) / (geo->sectors * geo->heads); - - return 0; -} - -/* - * Our file operations table - */ -static const struct block_device_operations viodasd_fops = { - .owner = THIS_MODULE, - .open = viodasd_unlocked_open, - .release = viodasd_release, - .getgeo = viodasd_getgeo, -}; - -/* - * End a request - */ -static void viodasd_end_request(struct request *req, int error, - int num_sectors) -{ - __blk_end_request(req, error, num_sectors << 9); -} - -/* - * Send an actual I/O request to OS/400 - */ -static int send_request(struct request *req) -{ - u64 start; - int direction; - int nsg; - u16 viocmd; - HvLpEvent_Rc hvrc; - struct vioblocklpevent *bevent; - struct HvLpEvent *hev; - struct scatterlist sg[VIOMAXBLOCKDMA]; - int sgindex; - struct viodasd_device *d; - unsigned long flags; - - start = (u64)blk_rq_pos(req) << 9; - - if (rq_data_dir(req) == READ) { - direction = DMA_FROM_DEVICE; - viocmd = viomajorsubtype_blockio | vioblockread; - } else { - direction = DMA_TO_DEVICE; - viocmd = viomajorsubtype_blockio | vioblockwrite; - } - - d = req->rq_disk->private_data; - - /* Now build the scatter-gather list */ - sg_init_table(sg, VIOMAXBLOCKDMA); - nsg = blk_rq_map_sg(req->q, req, sg); - nsg = dma_map_sg(d->dev, sg, nsg, direction); - - spin_lock_irqsave(&viodasd_spinlock, flags); - num_req_outstanding++; - - /* This optimization handles a single DMA block */ - if (nsg == 1) - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, viocmd, - HvLpEvent_AckInd_DoAck, - HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - (u64)(unsigned long)req, VIOVERSION << 16, - ((u64)DEVICE_NO(d) << 48), start, - ((u64)sg_dma_address(&sg[0])) << 32, - sg_dma_len(&sg[0])); - else { - bevent = (struct vioblocklpevent *) - vio_get_event_buffer(viomajorsubtype_blockio); - if (bevent == NULL) { - pr_warning("error allocating disk event buffer\n"); - goto error_ret; - } - - /* - * Now build up the actual request. Note that we store - * the pointer to the request in the correlation - * token so we can match the response up later - */ - memset(bevent, 0, sizeof(struct vioblocklpevent)); - hev = &bevent->event; - hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DO_ACK | - HV_LP_EVENT_INT; - hev->xType = HvLpEvent_Type_VirtualIo; - hev->xSubtype = viocmd; - hev->xSourceLp = HvLpConfig_getLpIndex(); - hev->xTargetLp = viopath_hostLp; - hev->xSizeMinus1 = - offsetof(struct vioblocklpevent, u.rw_data.dma_info) + - (sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1; - hev->xSourceInstanceId = viopath_sourceinst(viopath_hostLp); - hev->xTargetInstanceId = viopath_targetinst(viopath_hostLp); - hev->xCorrelationToken = (u64)req; - bevent->version = VIOVERSION; - bevent->disk = DEVICE_NO(d); - bevent->u.rw_data.offset = start; - - /* - * Copy just the dma information from the sg list - * into the request - */ - for (sgindex = 0; sgindex < nsg; sgindex++) { - bevent->u.rw_data.dma_info[sgindex].token = - sg_dma_address(&sg[sgindex]); - bevent->u.rw_data.dma_info[sgindex].len = - sg_dma_len(&sg[sgindex]); - } - - /* Send the request */ - hvrc = HvCallEvent_signalLpEvent(&bevent->event); - vio_free_event_buffer(viomajorsubtype_blockio, bevent); - } - - if (hvrc != HvLpEvent_Rc_Good) { - pr_warning("error sending disk event to OS/400 (rc %d)\n", - (int)hvrc); - goto error_ret; - } - spin_unlock_irqrestore(&viodasd_spinlock, flags); - return 0; - -error_ret: - num_req_outstanding--; - spin_unlock_irqrestore(&viodasd_spinlock, flags); - dma_unmap_sg(d->dev, sg, nsg, direction); - return -1; -} - -/* - * This is the external request processing routine - */ -static void do_viodasd_request(struct request_queue *q) -{ - struct request *req; - - /* - * If we already have the maximum number of requests - * outstanding to OS/400 just bail out. We'll come - * back later. - */ - while (num_req_outstanding < VIOMAXREQ) { - req = blk_fetch_request(q); - if (req == NULL) - return; - /* check that request contains a valid command */ - if (req->cmd_type != REQ_TYPE_FS) { - viodasd_end_request(req, -EIO, blk_rq_sectors(req)); - continue; - } - /* Try sending the request */ - if (send_request(req) != 0) - viodasd_end_request(req, -EIO, blk_rq_sectors(req)); - } -} - -/* - * Probe a single disk and fill in the viodasd_device structure - * for it. - */ -static int probe_disk(struct viodasd_device *d) -{ - HvLpEvent_Rc hvrc; - struct viodasd_waitevent we; - int dev_no = DEVICE_NO(d); - struct gendisk *g; - struct request_queue *q; - u16 flags = 0; - -retry: - init_completion(&we.com); - - /* Send the open event to OS/400 */ - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, - viomajorsubtype_blockio | vioblockopen, - HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - (u64)(unsigned long)&we, VIOVERSION << 16, - ((u64)dev_no << 48) | ((u64)flags<< 32), - 0, 0, 0); - if (hvrc != 0) { - pr_warning("bad rc on HV open %d\n", (int)hvrc); - return 0; - } - - wait_for_completion(&we.com); - - if (we.rc != 0) { - if (flags != 0) - return 0; - /* try again with read only flag set */ - flags = vioblockflags_ro; - goto retry; - } - if (we.max_disk > (MAX_DISKNO - 1)) { - printk_once(KERN_INFO pr_fmt("Only examining the first %d of %d disks connected\n"), - MAX_DISKNO, we.max_disk + 1); - } - - /* Send the close event to OS/400. We DON'T expect a response */ - hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, - HvLpEvent_Type_VirtualIo, - viomajorsubtype_blockio | vioblockclose, - HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck, - viopath_sourceinst(viopath_hostLp), - viopath_targetinst(viopath_hostLp), - 0, VIOVERSION << 16, - ((u64)dev_no << 48) | ((u64)flags << 32), - 0, 0, 0); - if (hvrc != 0) { - pr_warning("bad rc sending event to OS/400 %d\n", (int)hvrc); - return 0; - } - - if (d->dev == NULL) { - /* this is when we reprobe for new disks */ - if (vio_create_viodasd(dev_no) == NULL) { - pr_warning("cannot allocate virtual device for disk %d\n", - dev_no); - return 0; - } - /* - * The vio_create_viodasd will have recursed into this - * routine with d->dev set to the new vio device and - * will finish the setup of the disk below. - */ - return 1; - } - - /* create the request queue for the disk */ - spin_lock_init(&d->q_lock); - q = blk_init_queue(do_viodasd_request, &d->q_lock); - if (q == NULL) { - pr_warning("cannot allocate queue for disk %d\n", dev_no); - return 0; - } - g = alloc_disk(1 << PARTITION_SHIFT); - if (g == NULL) { - pr_warning("cannot allocate disk structure for disk %d\n", - dev_no); - blk_cleanup_queue(q); - return 0; - } - - d->disk = g; - blk_queue_max_segments(q, VIOMAXBLOCKDMA); - blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS); - g->major = VIODASD_MAJOR; - g->first_minor = dev_no << PARTITION_SHIFT; - if (dev_no >= 26) - snprintf(g->disk_name, sizeof(g->disk_name), - VIOD_GENHD_NAME "%c%c", - 'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26)); - else - snprintf(g->disk_name, sizeof(g->disk_name), - VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26)); - g->fops = &viodasd_fops; - g->queue = q; - g->private_data = d; - g->driverfs_dev = d->dev; - set_capacity(g, d->size >> 9); - - pr_info("disk %d: %lu sectors (%lu MB) CHS=%d/%d/%d sector size %d%s\n", - dev_no, (unsigned long)(d->size >> 9), - (unsigned long)(d->size >> 20), - (int)d->cylinders, (int)d->tracks, - (int)d->sectors, (int)d->bytes_per_sector, - d->read_only ? " (RO)" : ""); - - /* register us in the global list */ - add_disk(g); - return 1; -} - -/* returns the total number of scatterlist elements converted */ -static int block_event_to_scatterlist(const struct vioblocklpevent *bevent, - struct scatterlist *sg, int *total_len) -{ - int i, numsg; - const struct rw_data *rw_data = &bevent->u.rw_data; - static const int offset = - offsetof(struct vioblocklpevent, u.rw_data.dma_info); - static const int element_size = sizeof(rw_data->dma_info[0]); - - numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size; - if (numsg > VIOMAXBLOCKDMA) - numsg = VIOMAXBLOCKDMA; - - *total_len = 0; - sg_init_table(sg, VIOMAXBLOCKDMA); - for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) { - sg_dma_address(&sg[i]) = rw_data->dma_info[i].token; - sg_dma_len(&sg[i]) = rw_data->dma_info[i].len; - *total_len += rw_data->dma_info[i].len; - } - return i; -} - -/* - * Restart all queues, starting with the one _after_ the disk given, - * thus reducing the chance of starvation of higher numbered disks. - */ -static void viodasd_restart_all_queues_starting_from(int first_index) -{ - int i; - - for (i = first_index + 1; i < MAX_DISKNO; ++i) - if (viodasd_devices[i].disk) - blk_run_queue(viodasd_devices[i].disk->queue); - for (i = 0; i <= first_index; ++i) - if (viodasd_devices[i].disk) - blk_run_queue(viodasd_devices[i].disk->queue); -} - -/* - * For read and write requests, decrement the number of outstanding requests, - * Free the DMA buffers we allocated. - */ -static int viodasd_handle_read_write(struct vioblocklpevent *bevent) -{ - int num_sg, num_sect, pci_direction, total_len; - struct request *req; - struct scatterlist sg[VIOMAXBLOCKDMA]; - struct HvLpEvent *event = &bevent->event; - unsigned long irq_flags; - struct viodasd_device *d; - int error; - spinlock_t *qlock; - - num_sg = block_event_to_scatterlist(bevent, sg, &total_len); - num_sect = total_len >> 9; - if (event->xSubtype == (viomajorsubtype_blockio | vioblockread)) - pci_direction = DMA_FROM_DEVICE; - else - pci_direction = DMA_TO_DEVICE; - req = (struct request *)bevent->event.xCorrelationToken; - d = req->rq_disk->private_data; - - dma_unmap_sg(d->dev, sg, num_sg, pci_direction); - - /* - * Since this is running in interrupt mode, we need to make sure - * we're not stepping on any global I/O operations - */ - spin_lock_irqsave(&viodasd_spinlock, irq_flags); - num_req_outstanding--; - spin_unlock_irqrestore(&viodasd_spinlock, irq_flags); - - error = (event->xRc == HvLpEvent_Rc_Good) ? 0 : -EIO; - if (error) { - const struct vio_error_entry *err; - err = vio_lookup_rc(viodasd_err_table, bevent->sub_result); - pr_warning("read/write error %d:0x%04x (%s)\n", - event->xRc, bevent->sub_result, err->msg); - num_sect = blk_rq_sectors(req); - } - qlock = req->q->queue_lock; - spin_lock_irqsave(qlock, irq_flags); - viodasd_end_request(req, error, num_sect); - spin_unlock_irqrestore(qlock, irq_flags); - - /* Finally, try to get more requests off of this device's queue */ - viodasd_restart_all_queues_starting_from(DEVICE_NO(d)); - - return 0; -} - -/* This routine handles incoming block LP events */ -static void handle_block_event(struct HvLpEvent *event) -{ - struct vioblocklpevent *bevent = (struct vioblocklpevent *)event; - struct viodasd_waitevent *pwe; - - if (event == NULL) - /* Notification that a partition went away! */ - return; - /* First, we should NEVER get an int here...only acks */ - if (hvlpevent_is_int(event)) { - pr_warning("Yikes! got an int in viodasd event handler!\n"); - if (hvlpevent_need_ack(event)) { - event->xRc = HvLpEvent_Rc_InvalidSubtype; - HvCallEvent_ackLpEvent(event); - } - } - - switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) { - case vioblockopen: - /* - * Handle a response to an open request. We get all the - * disk information in the response, so update it. The - * correlation token contains a pointer to a waitevent - * structure that has a completion in it. update the - * return code in the waitevent structure and post the - * completion to wake up the guy who sent the request - */ - pwe = (struct viodasd_waitevent *)event->xCorrelationToken; - pwe->rc = event->xRc; - pwe->sub_result = bevent->sub_result; - if (event->xRc == HvLpEvent_Rc_Good) { - const struct open_data *data = &bevent->u.open_data; - struct viodasd_device *device = - &viodasd_devices[bevent->disk]; - device->read_only = - bevent->flags & vioblockflags_ro; - device->size = data->disk_size; - device->cylinders = data->cylinders; - device->tracks = data->tracks; - device->sectors = data->sectors; - device->bytes_per_sector = data->bytes_per_sector; - pwe->max_disk = data->max_disk; - } - complete(&pwe->com); - break; - case vioblockclose: - break; - case vioblockread: - case vioblockwrite: - viodasd_handle_read_write(bevent); - break; - - default: - pr_warning("invalid subtype!"); - if (hvlpevent_need_ack(event)) { - event->xRc = HvLpEvent_Rc_InvalidSubtype; - HvCallEvent_ackLpEvent(event); - } - } -} - -/* - * Get the driver to reprobe for more disks. - */ -static ssize_t probe_disks(struct device_driver *drv, const char *buf, - size_t count) -{ - struct viodasd_device *d; - - for (d = viodasd_devices; d < &viodasd_devices[MAX_DISKNO]; d++) { - if (d->disk == NULL) - probe_disk(d); - } - return count; -} -static DRIVER_ATTR(probe, S_IWUSR, NULL, probe_disks); - -static int viodasd_probe(struct vio_dev *vdev, const struct vio_device_id *id) -{ - struct viodasd_device *d = &viodasd_devices[vdev->unit_address]; - - d->dev = &vdev->dev; - if (!probe_disk(d)) - return -ENODEV; - return 0; -} - -static int viodasd_remove(struct vio_dev *vdev) -{ - struct viodasd_device *d; - - d = &viodasd_devices[vdev->unit_address]; - if (d->disk) { - del_gendisk(d->disk); - blk_cleanup_queue(d->disk->queue); - put_disk(d->disk); - d->disk = NULL; - } - d->dev = NULL; - return 0; -} - -/** - * viodasd_device_table: Used by vio.c to match devices that we - * support. - */ -static struct vio_device_id viodasd_device_table[] __devinitdata = { - { "block", "IBM,iSeries-viodasd" }, - { "", "" } -}; -MODULE_DEVICE_TABLE(vio, viodasd_device_table); - -static struct vio_driver viodasd_driver = { - .id_table = viodasd_device_table, - .probe = viodasd_probe, - .remove = viodasd_remove, - .driver = { - .name = "viodasd", - .owner = THIS_MODULE, - } -}; - -static int need_delete_probe; - -/* - * Initialize the whole device driver. Handle module and non-module - * versions - */ -static int __init viodasd_init(void) -{ - int rc; - - if (!firmware_has_feature(FW_FEATURE_ISERIES)) { - rc = -ENODEV; - goto early_fail; - } - - /* Try to open to our host lp */ - if (viopath_hostLp == HvLpIndexInvalid) - vio_set_hostlp(); - - if (viopath_hostLp == HvLpIndexInvalid) { - pr_warning("invalid hosting partition\n"); - rc = -EIO; - goto early_fail; - } - - pr_info("vers " VIOD_VERS ", hosting partition %d\n", viopath_hostLp); - - /* register the block device */ - rc = register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME); - if (rc) { - pr_warning("Unable to get major number %d for %s\n", - VIODASD_MAJOR, VIOD_GENHD_NAME); - goto early_fail; - } - /* Actually open the path to the hosting partition */ - rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio, - VIOMAXREQ + 2); - if (rc) { - pr_warning("error opening path to host partition %d\n", - viopath_hostLp); - goto unregister_blk; - } - - /* Initialize our request handler */ - vio_setHandler(viomajorsubtype_blockio, handle_block_event); - - rc = vio_register_driver(&viodasd_driver); - if (rc) { - pr_warning("vio_register_driver failed\n"); - goto unset_handler; - } - - /* - * If this call fails, it just means that we cannot dynamically - * add virtual disks, but the driver will still work fine for - * all existing disk, so ignore the failure. - */ - if (!driver_create_file(&viodasd_driver.driver, &driver_attr_probe)) - need_delete_probe = 1; - - return 0; - -unset_handler: - vio_clearHandler(viomajorsubtype_blockio); - viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2); -unregister_blk: - unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME); -early_fail: - return rc; -} -module_init(viodasd_init); - -void __exit viodasd_exit(void) -{ - if (need_delete_probe) - driver_remove_file(&viodasd_driver.driver, &driver_attr_probe); - vio_unregister_driver(&viodasd_driver); - vio_clearHandler(viomajorsubtype_blockio); - viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2); - unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME); -} -module_exit(viodasd_exit); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2f22874c0a37..d5e1ab956740 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1475,6 +1475,9 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (!xen_platform_pci_unplug) + return -ENODEV; + if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", XENVBD_MAJOR, DEV_NAME); |