diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/aoe/aoe.h | 11 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 156 | ||||
-rw-r--r-- | drivers/block/aoe/aoedev.c | 3 | ||||
-rw-r--r-- | drivers/block/aoe/aoenet.c | 7 | ||||
-rw-r--r-- | drivers/block/cryptoloop.c | 2 | ||||
-rw-r--r-- | drivers/block/loop.c | 2 | ||||
-rw-r--r-- | drivers/block/loop.h | 85 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 3 | ||||
-rw-r--r-- | drivers/block/nbd.c | 11 | ||||
-rw-r--r-- | drivers/block/rbd.c | 184 | ||||
-rw-r--r-- | drivers/block/swim.c | 2 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 2 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 2 |
13 files changed, 337 insertions, 133 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 175649468c95..025c41d3cb33 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,5 +1,5 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ -#define VERSION "81" +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ +#define VERSION "83" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" @@ -196,9 +196,11 @@ struct ktstate { struct completion rendez; struct task_struct *task; wait_queue_head_t *waitq; - int (*fn) (void); - char *name; + int (*fn) (int); + char name[12]; spinlock_t *lock; + int id; + int active; }; int aoeblk_init(void); @@ -222,6 +224,7 @@ int aoecmd_init(void); struct sk_buff *aoecmd_ata_id(struct aoedev *); void aoe_freetframe(struct frame *); void aoe_flush_iocq(void); +void aoe_flush_iocq_by_index(int); void aoe_end_request(struct aoedev *, struct request *, int); int aoe_ktstart(struct ktstate *k); void aoe_ktstop(struct ktstate *k); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index fc803ecbbce4..99cb944a002d 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoecmd.c * Filesystem request handling methods @@ -35,14 +35,27 @@ module_param(aoe_maxout, int, 0644); MODULE_PARM_DESC(aoe_maxout, "Only aoe_maxout outstanding packets for every MAC on eX.Y."); -static wait_queue_head_t ktiowq; -static struct ktstate kts; +/* The number of online cpus during module initialization gives us a + * convenient heuristic cap on the parallelism used for ktio threads + * doing I/O completion. It is not important that the cap equal the + * actual number of running CPUs at any given time, but because of CPU + * hotplug, we take care to use ncpus instead of using + * num_online_cpus() after module initialization. + */ +static int ncpus; + +/* mutex lock used for synchronization while thread spawning */ +static DEFINE_MUTEX(ktio_spawn_lock); + +static wait_queue_head_t *ktiowq; +static struct ktstate *kts; /* io completion queue */ -static struct { +struct iocq_ktio { struct list_head head; spinlock_t lock; -} iocq; +}; +static struct iocq_ktio *iocq; static struct page *empty_page; @@ -1278,23 +1291,36 @@ out: * Returns true iff responses needing processing remain. */ static int -ktio(void) +ktio(int id) { struct frame *f; struct list_head *pos; int i; + int actual_id; for (i = 0; ; ++i) { if (i == MAXIOC) return 1; - if (list_empty(&iocq.head)) + if (list_empty(&iocq[id].head)) return 0; - pos = iocq.head.next; + pos = iocq[id].head.next; list_del(pos); - spin_unlock_irq(&iocq.lock); f = list_entry(pos, struct frame, head); + spin_unlock_irq(&iocq[id].lock); ktiocomplete(f); - spin_lock_irq(&iocq.lock); + + /* Figure out if extra threads are required. */ + actual_id = f->t->d->aoeminor % ncpus; + + if (!kts[actual_id].active) { + BUG_ON(id != 0); + mutex_lock(&ktio_spawn_lock); + if (!kts[actual_id].active + && aoe_ktstart(&kts[actual_id]) == 0) + kts[actual_id].active = 1; + mutex_unlock(&ktio_spawn_lock); + } + spin_lock_irq(&iocq[id].lock); } } @@ -1311,7 +1337,7 @@ kthread(void *vp) complete(&k->rendez); /* tell spawner we're running */ do { spin_lock_irq(k->lock); - more = k->fn(); + more = k->fn(k->id); if (!more) { add_wait_queue(k->waitq, &wait); __set_current_state(TASK_INTERRUPTIBLE); @@ -1340,7 +1366,7 @@ aoe_ktstart(struct ktstate *k) struct task_struct *task; init_completion(&k->rendez); - task = kthread_run(kthread, k, k->name); + task = kthread_run(kthread, k, "%s", k->name); if (task == NULL || IS_ERR(task)) return -ENOMEM; k->task = task; @@ -1353,13 +1379,24 @@ aoe_ktstart(struct ktstate *k) static void ktcomplete(struct frame *f, struct sk_buff *skb) { + int id; ulong flags; f->r_skb = skb; - spin_lock_irqsave(&iocq.lock, flags); - list_add_tail(&f->head, &iocq.head); - spin_unlock_irqrestore(&iocq.lock, flags); - wake_up(&ktiowq); + id = f->t->d->aoeminor % ncpus; + spin_lock_irqsave(&iocq[id].lock, flags); + if (!kts[id].active) { + spin_unlock_irqrestore(&iocq[id].lock, flags); + /* The thread with id has not been spawned yet, + * so delegate the work to the main thread and + * try spawning a new thread. + */ + id = 0; + spin_lock_irqsave(&iocq[id].lock, flags); + } + list_add_tail(&f->head, &iocq[id].head); + spin_unlock_irqrestore(&iocq[id].lock, flags); + wake_up(&ktiowq[id]); } struct sk_buff * @@ -1706,6 +1743,17 @@ aoe_failbuf(struct aoedev *d, struct buf *buf) void aoe_flush_iocq(void) { + int i; + + for (i = 0; i < ncpus; i++) { + if (kts[i].active) + aoe_flush_iocq_by_index(i); + } +} + +void +aoe_flush_iocq_by_index(int id) +{ struct frame *f; struct aoedev *d; LIST_HEAD(flist); @@ -1713,9 +1761,9 @@ aoe_flush_iocq(void) struct sk_buff *skb; ulong flags; - spin_lock_irqsave(&iocq.lock, flags); - list_splice_init(&iocq.head, &flist); - spin_unlock_irqrestore(&iocq.lock, flags); + spin_lock_irqsave(&iocq[id].lock, flags); + list_splice_init(&iocq[id].head, &flist); + spin_unlock_irqrestore(&iocq[id].lock, flags); while (!list_empty(&flist)) { pos = flist.next; list_del(pos); @@ -1738,6 +1786,8 @@ int __init aoecmd_init(void) { void *p; + int i; + int ret; /* get_zeroed_page returns page with ref count 1 */ p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); @@ -1745,22 +1795,72 @@ aoecmd_init(void) return -ENOMEM; empty_page = virt_to_page(p); - INIT_LIST_HEAD(&iocq.head); - spin_lock_init(&iocq.lock); - init_waitqueue_head(&ktiowq); - kts.name = "aoe_ktio"; - kts.fn = ktio; - kts.waitq = &ktiowq; - kts.lock = &iocq.lock; - return aoe_ktstart(&kts); + ncpus = num_online_cpus(); + + iocq = kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL); + if (!iocq) + return -ENOMEM; + + kts = kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL); + if (!kts) { + ret = -ENOMEM; + goto kts_fail; + } + + ktiowq = kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL); + if (!ktiowq) { + ret = -ENOMEM; + goto ktiowq_fail; + } + + mutex_init(&ktio_spawn_lock); + + for (i = 0; i < ncpus; i++) { + INIT_LIST_HEAD(&iocq[i].head); + spin_lock_init(&iocq[i].lock); + init_waitqueue_head(&ktiowq[i]); + snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i); + kts[i].fn = ktio; + kts[i].waitq = &ktiowq[i]; + kts[i].lock = &iocq[i].lock; + kts[i].id = i; + kts[i].active = 0; + } + kts[0].active = 1; + if (aoe_ktstart(&kts[0])) { + ret = -ENOMEM; + goto ktstart_fail; + } + return 0; + +ktstart_fail: + kfree(ktiowq); +ktiowq_fail: + kfree(kts); +kts_fail: + kfree(iocq); + + return ret; } void aoecmd_exit(void) { - aoe_ktstop(&kts); + int i; + + for (i = 0; i < ncpus; i++) + if (kts[i].active) + aoe_ktstop(&kts[i]); + aoe_flush_iocq(); + /* Free up the iocq and thread speicific configuration + * allocated during startup. + */ + kfree(iocq); + kfree(kts); + kfree(ktiowq); + free_page((unsigned long) page_address(empty_page)); empty_page = NULL; } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 98f2965778b9..784c92e038d1 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoedev.c * AoE device utility functions; maintains device list. @@ -518,7 +518,6 @@ void aoedev_exit(void) { flush_scheduled_work(); - aoe_flush_iocq(); flush(NULL, 0, EXITING); } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 71d3ea8d3006..63773a90581d 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoenet.c * Ethernet portion of AoE driver @@ -52,7 +52,7 @@ static struct sk_buff_head skbtxq; /* enters with txlock held */ static int -tx(void) __must_hold(&txlock) +tx(int id) __must_hold(&txlock) { struct sk_buff *skb; struct net_device *ifp; @@ -205,7 +205,8 @@ aoenet_init(void) kts.lock = &txlock; kts.fn = tx; kts.waitq = &txwq; - kts.name = "aoe_tx"; + kts.id = 0; + snprintf(kts.name, sizeof(kts.name), "aoe_tx%d", kts.id); if (aoe_ktstart(&kts)) return -EAGAIN; dev_add_pack(&aoe_pt); diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c index 8b6bb764b0a3..99e773cb70d0 100644 --- a/drivers/block/cryptoloop.c +++ b/drivers/block/cryptoloop.c @@ -25,9 +25,9 @@ #include <linux/string.h> #include <linux/crypto.h> #include <linux/blkdev.h> -#include <linux/loop.h> #include <linux/scatterlist.h> #include <asm/uaccess.h> +#include "loop.h" MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("loop blockdevice transferfunction adaptor / CryptoAPI"); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d92d50fd84b7..40e715531aa6 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -63,7 +63,6 @@ #include <linux/init.h> #include <linux/swap.h> #include <linux/slab.h> -#include <linux/loop.h> #include <linux/compat.h> #include <linux/suspend.h> #include <linux/freezer.h> @@ -76,6 +75,7 @@ #include <linux/sysfs.h> #include <linux/miscdevice.h> #include <linux/falloc.h> +#include "loop.h" #include <asm/uaccess.h> diff --git a/drivers/block/loop.h b/drivers/block/loop.h new file mode 100644 index 000000000000..90df5d6485b6 --- /dev/null +++ b/drivers/block/loop.h @@ -0,0 +1,85 @@ +/* + * loop.h + * + * Written by Theodore Ts'o, 3/29/93. + * + * Copyright 1993 by Theodore Ts'o. Redistribution of this file is + * permitted under the GNU General Public License. + */ +#ifndef _LINUX_LOOP_H +#define _LINUX_LOOP_H + +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <uapi/linux/loop.h> + +/* Possible states of device */ +enum { + Lo_unbound, + Lo_bound, + Lo_rundown, +}; + +struct loop_func_table; + +struct loop_device { + int lo_number; + int lo_refcnt; + loff_t lo_offset; + loff_t lo_sizelimit; + int lo_flags; + int (*transfer)(struct loop_device *, int cmd, + struct page *raw_page, unsigned raw_off, + struct page *loop_page, unsigned loop_off, + int size, sector_t real_block); + char lo_file_name[LO_NAME_SIZE]; + char lo_crypt_name[LO_NAME_SIZE]; + char lo_encrypt_key[LO_KEY_SIZE]; + int lo_encrypt_key_size; + struct loop_func_table *lo_encryption; + __u32 lo_init[2]; + kuid_t lo_key_owner; /* Who set the key */ + int (*ioctl)(struct loop_device *, int cmd, + unsigned long arg); + + struct file * lo_backing_file; + struct block_device *lo_device; + unsigned lo_blocksize; + void *key_data; + + gfp_t old_gfp_mask; + + spinlock_t lo_lock; + struct bio_list lo_bio_list; + unsigned int lo_bio_count; + int lo_state; + struct mutex lo_ctl_mutex; + struct task_struct *lo_thread; + wait_queue_head_t lo_event; + /* wait queue for incoming requests */ + wait_queue_head_t lo_req_wait; + + struct request_queue *lo_queue; + struct gendisk *lo_disk; +}; + +/* Support for loadable transfer modules */ +struct loop_func_table { + int number; /* filter type */ + int (*transfer)(struct loop_device *lo, int cmd, + struct page *raw_page, unsigned raw_off, + struct page *loop_page, unsigned loop_off, + int size, sector_t real_block); + int (*init)(struct loop_device *, const struct loop_info64 *); + /* release is called from loop_unregister_transfer or clr_fd */ + int (*release)(struct loop_device *); + int (*ioctl)(struct loop_device *, int cmd, unsigned long arg); + struct module *owner; +}; + +int loop_register_transfer(struct loop_func_table *funcs); +int loop_unregister_transfer(int number); + +#endif diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 20dd52a2f92f..952dbfe22126 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4087,7 +4087,8 @@ skip_create_disk: start_service_thread: sprintf(thd_name, "mtip_svc_thd_%02d", index); dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread, - dd, dd->numa_node, thd_name); + dd, dd->numa_node, "%s", + thd_name); if (IS_ERR(dd->mtip_svc_handler)) { dev_err(&dd->pdev->dev, "service thread failed to start\n"); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 037288e7874d..2dc3b5153f0d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -623,8 +623,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (!nbd->sock) return -EINVAL; + nbd->disconnect = 1; + nbd_send_req(nbd, &sreq); - return 0; + return 0; } case NBD_CLEAR_SOCK: { @@ -654,6 +656,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd->sock = SOCKET_I(inode); if (max_part > 0) bdev->bd_invalidated = 1; + nbd->disconnect = 0; /* we're connected now */ return 0; } else { fput(file); @@ -714,7 +717,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, else blk_queue_flush(nbd->disk->queue, 0); - thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); + thread = kthread_create(nbd_thread, nbd, "%s", + nbd->disk->disk_name); if (IS_ERR(thread)) { mutex_lock(&nbd->tx_lock); return PTR_ERR(thread); @@ -742,6 +746,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, set_capacity(nbd->disk, 0); if (max_part > 0) ioctl_by_bdev(bdev, BLKRRPART, 0); + if (nbd->disconnect) /* user requested, ignore socket errors */ + return 0; return nbd->harderror; } @@ -750,7 +756,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, * This is for compatibility only. The queue is always cleared * by NBD_DO_IT or NBD_CLEAR_SOCK. */ - BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head)); return 0; case NBD_PRINT_DEBUG: diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 49394e3f31bc..4ad2ad9a5bb0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -372,7 +372,7 @@ enum rbd_dev_flags { RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ }; -static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ +static DEFINE_MUTEX(client_mutex); /* Serialize client creation */ static LIST_HEAD(rbd_dev_list); /* devices */ static DEFINE_SPINLOCK(rbd_dev_list_lock); @@ -489,10 +489,8 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) if (removing) return -ENOENT; - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); (void) get_device(&rbd_dev->dev); set_device_ro(bdev, rbd_dev->mapping.read_only); - mutex_unlock(&ctl_mutex); return 0; } @@ -507,9 +505,7 @@ static void rbd_release(struct gendisk *disk, fmode_t mode) spin_unlock_irq(&rbd_dev->lock); rbd_assert(open_count_before > 0); - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); put_device(&rbd_dev->dev); - mutex_unlock(&ctl_mutex); } static const struct block_device_operations rbd_bd_ops = { @@ -520,7 +516,7 @@ static const struct block_device_operations rbd_bd_ops = { /* * Initialize an rbd client instance. Success or not, this function - * consumes ceph_opts. + * consumes ceph_opts. Caller holds client_mutex. */ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) { @@ -535,30 +531,25 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) kref_init(&rbdc->kref); INIT_LIST_HEAD(&rbdc->node); - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); if (IS_ERR(rbdc->client)) - goto out_mutex; + goto out_rbdc; ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ ret = ceph_open_session(rbdc->client); if (ret < 0) - goto out_err; + goto out_client; spin_lock(&rbd_client_list_lock); list_add_tail(&rbdc->node, &rbd_client_list); spin_unlock(&rbd_client_list_lock); - mutex_unlock(&ctl_mutex); dout("%s: rbdc %p\n", __func__, rbdc); return rbdc; - -out_err: +out_client: ceph_destroy_client(rbdc->client); -out_mutex: - mutex_unlock(&ctl_mutex); +out_rbdc: kfree(rbdc); out_opt: if (ceph_opts) @@ -682,11 +673,13 @@ static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) { struct rbd_client *rbdc; + mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING); rbdc = rbd_client_find(ceph_opts); if (rbdc) /* using an existing client */ ceph_destroy_options(ceph_opts); else rbdc = rbd_client_create(ceph_opts); + mutex_unlock(&client_mutex); return rbdc; } @@ -840,7 +833,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, /* We won't fail any more, fill in the header */ - down_write(&rbd_dev->header_rwsem); if (first_time) { header->object_prefix = object_prefix; header->obj_order = ondisk->options.order; @@ -869,8 +861,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, if (rbd_dev->mapping.size != header->image_size) rbd_dev->mapping.size = header->image_size; - up_write(&rbd_dev->header_rwsem); - return 0; out_2big: ret = -EIO; @@ -1126,6 +1116,7 @@ static void zero_bio_chain(struct bio *chain, int start_ofs) buf = bvec_kmap_irq(bv, &flags); memset(buf + remainder, 0, bv->bv_len - remainder); + flush_dcache_page(bv->bv_page); bvec_kunmap_irq(buf, &flags); } pos += bv->bv_len; @@ -1153,11 +1144,12 @@ static void zero_pages(struct page **pages, u64 offset, u64 end) unsigned long flags; void *kaddr; - page_offset = (size_t)(offset & ~PAGE_MASK); - length = min(PAGE_SIZE - page_offset, (size_t)(end - offset)); + page_offset = offset & ~PAGE_MASK; + length = min_t(size_t, PAGE_SIZE - page_offset, end - offset); local_irq_save(flags); kaddr = kmap_atomic(*page); memset(kaddr + page_offset, 0, length); + flush_dcache_page(*page); kunmap_atomic(kaddr); local_irq_restore(flags); @@ -2171,9 +2163,9 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, struct rbd_obj_request *obj_request = NULL; struct rbd_obj_request *next_obj_request; bool write_request = img_request_write_test(img_request); - struct bio *bio_list; + struct bio *bio_list = 0; unsigned int bio_offset = 0; - struct page **pages; + struct page **pages = 0; u64 img_offset; u64 resid; u16 opcode; @@ -2252,13 +2244,17 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, obj_request->pages, length, offset & ~PAGE_MASK, false, false); + /* + * set obj_request->img_request before formatting + * the osd_request so that it gets the right snapc + */ + rbd_img_obj_request_add(img_request, obj_request); if (write_request) rbd_osd_req_format_write(obj_request); else rbd_osd_req_format_read(obj_request); obj_request->img_offset = img_offset; - rbd_img_obj_request_add(img_request, obj_request); img_offset += length; resid -= length; @@ -2531,6 +2527,7 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) */ orig_request = obj_request->obj_request; obj_request->obj_request = NULL; + rbd_obj_request_put(orig_request); rbd_assert(orig_request); rbd_assert(orig_request->img_request); @@ -2551,7 +2548,6 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) if (!rbd_dev->parent_overlap) { struct ceph_osd_client *osdc; - rbd_obj_request_put(orig_request); osdc = &rbd_dev->rbd_client->client->osdc; result = rbd_obj_request_submit(osdc, orig_request); if (!result) @@ -2581,7 +2577,6 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) out: if (orig_request->result) rbd_obj_request_complete(orig_request); - rbd_obj_request_put(orig_request); } static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) @@ -2855,7 +2850,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) (unsigned int)opcode); ret = rbd_dev_refresh(rbd_dev); if (ret) - rbd_warn(rbd_dev, ": header refresh error (%d)\n", ret); + rbd_warn(rbd_dev, "header refresh error (%d)\n", ret); rbd_obj_notify_ack(rbd_dev, notify_id); } @@ -3335,8 +3330,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) int ret; rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + down_write(&rbd_dev->header_rwsem); mapping_size = rbd_dev->mapping.size; - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); if (rbd_dev->image_format == 1) ret = rbd_dev_v1_header_info(rbd_dev); else @@ -3345,7 +3340,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) /* If it's a mapped snapshot, validate its EXISTS flag */ rbd_exists_validate(rbd_dev); - mutex_unlock(&ctl_mutex); + up_write(&rbd_dev->header_rwsem); + if (mapping_size != rbd_dev->mapping.size) { sector_t size; @@ -3809,6 +3805,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) void *end; u64 pool_id; char *image_id; + u64 snap_id; u64 overlap; int ret; @@ -3868,24 +3865,56 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) (unsigned long long)pool_id, U32_MAX); goto out_err; } - parent_spec->pool_id = pool_id; image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(image_id)) { ret = PTR_ERR(image_id); goto out_err; } - parent_spec->image_id = image_id; - ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); + ceph_decode_64_safe(&p, end, snap_id, out_err); ceph_decode_64_safe(&p, end, overlap, out_err); - if (overlap) { - rbd_spec_put(rbd_dev->parent_spec); + /* + * The parent won't change (except when the clone is + * flattened, already handled that). So we only need to + * record the parent spec we have not already done so. + */ + if (!rbd_dev->parent_spec) { + parent_spec->pool_id = pool_id; + parent_spec->image_id = image_id; + parent_spec->snap_id = snap_id; rbd_dev->parent_spec = parent_spec; parent_spec = NULL; /* rbd_dev now owns this */ - rbd_dev->parent_overlap = overlap; - } else { - rbd_warn(rbd_dev, "ignoring parent of clone with overlap 0\n"); + } + + /* + * We always update the parent overlap. If it's zero we + * treat it specially. + */ + rbd_dev->parent_overlap = overlap; + smp_mb(); + if (!overlap) { + + /* A null parent_spec indicates it's the initial probe */ + + if (parent_spec) { + /* + * The overlap has become zero, so the clone + * must have been resized down to 0 at some + * point. Treat this the same as a flatten. + */ + rbd_dev_parent_put(rbd_dev); + pr_info("%s: clone image now standalone\n", + rbd_dev->disk->disk_name); + } else { + /* + * For the initial probe, if we find the + * overlap is zero we just pretend there was + * no parent image. + */ + rbd_warn(rbd_dev, "ignoring parent of " + "clone with overlap 0\n"); + } } out: ret = 0; @@ -4241,12 +4270,14 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) bool first_time = rbd_dev->header.object_prefix == NULL; int ret; - down_write(&rbd_dev->header_rwsem); + ret = rbd_dev_v2_image_size(rbd_dev); + if (ret) + return ret; if (first_time) { ret = rbd_dev_v2_header_onetime(rbd_dev); if (ret) - goto out; + return ret; } /* @@ -4261,7 +4292,7 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) ret = rbd_dev_v2_parent_info(rbd_dev); if (ret) - goto out; + return ret; /* * Print a warning if this is the initial probe and @@ -4276,18 +4307,12 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) "is EXPERIMENTAL!"); } - ret = rbd_dev_v2_image_size(rbd_dev); - if (ret) - goto out; - if (rbd_dev->spec->snap_id == CEPH_NOSNAP) if (rbd_dev->mapping.size != rbd_dev->header.image_size) rbd_dev->mapping.size = rbd_dev->header.image_size; ret = rbd_dev_v2_snap_context(rbd_dev); dout("rbd_dev_v2_snap_context returned %d\n", ret); -out: - up_write(&rbd_dev->header_rwsem); return ret; } @@ -4297,8 +4322,6 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) struct device *dev; int ret; - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - dev = &rbd_dev->dev; dev->bus = &rbd_bus_type; dev->type = &rbd_device_type; @@ -4307,8 +4330,6 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) dev_set_name(dev, "%d", rbd_dev->dev_id); ret = device_register(dev); - mutex_unlock(&ctl_mutex); - return ret; } @@ -5055,23 +5076,6 @@ err_out_module: return (ssize_t)rc; } -static struct rbd_device *__rbd_get_dev(unsigned long dev_id) -{ - struct list_head *tmp; - struct rbd_device *rbd_dev; - - spin_lock(&rbd_dev_list_lock); - list_for_each(tmp, &rbd_dev_list) { - rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_dev->dev_id == dev_id) { - spin_unlock(&rbd_dev_list_lock); - return rbd_dev; - } - } - spin_unlock(&rbd_dev_list_lock); - return NULL; -} - static void rbd_dev_device_release(struct device *dev) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); @@ -5116,8 +5120,10 @@ static ssize_t rbd_remove(struct bus_type *bus, size_t count) { struct rbd_device *rbd_dev = NULL; - int target_id; + struct list_head *tmp; + int dev_id; unsigned long ul; + bool already = false; int ret; ret = strict_strtoul(buf, 10, &ul); @@ -5125,37 +5131,40 @@ static ssize_t rbd_remove(struct bus_type *bus, return ret; /* convert to int; abort if we lost anything in the conversion */ - target_id = (int) ul; - if (target_id != ul) + dev_id = (int)ul; + if (dev_id != ul) return -EINVAL; - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - rbd_dev = __rbd_get_dev(target_id); - if (!rbd_dev) { - ret = -ENOENT; - goto done; + ret = -ENOENT; + spin_lock(&rbd_dev_list_lock); + list_for_each(tmp, &rbd_dev_list) { + rbd_dev = list_entry(tmp, struct rbd_device, node); + if (rbd_dev->dev_id == dev_id) { + ret = 0; + break; + } + } + if (!ret) { + spin_lock_irq(&rbd_dev->lock); + if (rbd_dev->open_count) + ret = -EBUSY; + else + already = test_and_set_bit(RBD_DEV_FLAG_REMOVING, + &rbd_dev->flags); + spin_unlock_irq(&rbd_dev->lock); } + spin_unlock(&rbd_dev_list_lock); + if (ret < 0 || already) + return ret; - spin_lock_irq(&rbd_dev->lock); - if (rbd_dev->open_count) - ret = -EBUSY; - else - set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); - spin_unlock_irq(&rbd_dev->lock); - if (ret < 0) - goto done; rbd_bus_del_dev(rbd_dev); ret = rbd_dev_header_watch_sync(rbd_dev, false); if (ret) rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); rbd_dev_image_release(rbd_dev); module_put(THIS_MODULE); - ret = count; -done: - mutex_unlock(&ctl_mutex); - return ret; + return count; } /* @@ -5263,6 +5272,7 @@ static void __exit rbd_exit(void) module_init(rbd_init); module_exit(rbd_exit); +MODULE_AUTHOR("Alex Elder <elder@inktank.com>"); MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); MODULE_DESCRIPTION("rados block device"); diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 2f445b7a174e..8ed6ccb748cf 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -893,7 +893,7 @@ static int swim_probe(struct platform_device *dev) swim_base = ioremap(res->start, resource_size(res)); if (!swim_base) { - return -ENOMEM; + ret = -ENOMEM; goto out_release_io; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 64723953e1c9..5cdf88b7ad9e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -20,7 +20,7 @@ module_param(use_bio, bool, S_IRUGO); static int major; static DEFINE_IDA(vd_index_ida); -struct workqueue_struct *virtblk_wq; +static struct workqueue_struct *virtblk_wq; struct virtio_blk { diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8bfd1bcf95ec..04608a6502d7 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -93,7 +93,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) } invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); - blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name); + blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; |