diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 6 | ||||
-rw-r--r-- | drivers/md/Makefile | 2 | ||||
-rw-r--r-- | drivers/md/dm-bio-list.h | 4 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 33 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 23 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 76 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-mpath-rdac.c | 700 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 34 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 75 | ||||
-rw-r--r-- | drivers/md/dm-round-robin.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 116 | ||||
-rw-r--r-- | drivers/md/dm-snap.h | 6 | ||||
-rw-r--r-- | drivers/md/dm.c | 33 | ||||
-rw-r--r-- | drivers/md/dm.h | 40 | ||||
-rw-r--r-- | drivers/md/kcopyd.c | 11 |
16 files changed, 984 insertions, 182 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index bfd9b9c6252c..64bf3a81db93 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -264,6 +264,12 @@ config DM_MULTIPATH_EMC ---help--- Multipath support for EMC CX/AX series hardware. +config DM_MULTIPATH_RDAC + tristate "LSI/Engenio RDAC multipath support (EXPERIMENTAL)" + depends on DM_MULTIPATH && BLK_DEV_DM && EXPERIMENTAL + ---help--- + Multipath support for LSI/Engenio RDAC. + config DM_DELAY tristate "I/O delaying target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 71eb45f74171..c49366cdc05d 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -7,6 +7,7 @@ dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-mirror-objs := dm-log.o dm-raid1.o +dm-rdac-objs := dm-mpath-rdac.o md-mod-objs := md.o bitmap.o raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ raid6int1.o raid6int2.o raid6int4.o \ @@ -34,6 +35,7 @@ obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o +obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o obj-$(CONFIG_DM_ZERO) += dm-zero.o diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h index c6be88826fae..16ee3b018b3a 100644 --- a/drivers/md/dm-bio-list.h +++ b/drivers/md/dm-bio-list.h @@ -8,7 +8,6 @@ #define DM_BIO_LIST_H #include <linux/bio.h> -#include <linux/prefetch.h> struct bio_list { struct bio *head; @@ -31,8 +30,7 @@ static inline void bio_list_init(struct bio_list *bl) } #define bio_list_for_each(bio, bl) \ - for (bio = (bl)->head; bio && ({ prefetch(bio->bi_next); 1; }); \ - bio = bio->bi_next) + for (bio = (bl)->head; bio; bio = bio->bi_next) static inline unsigned bio_list_size(const struct bio_list *bl) { diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 7b0fcfc9eaa5..ba952a032598 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -30,7 +30,7 @@ /* * per bio private data */ -struct crypt_io { +struct dm_crypt_io { struct dm_target *target; struct bio *base_bio; struct work_struct work; @@ -106,7 +106,7 @@ struct crypt_config { static struct kmem_cache *_crypt_io_pool; -static void clone_init(struct crypt_io *, struct bio *); +static void clone_init(struct dm_crypt_io *, struct bio *); /* * Different IV generation algorithms: @@ -382,7 +382,7 @@ static int crypt_convert(struct crypt_config *cc, static void dm_crypt_bio_destructor(struct bio *bio) { - struct crypt_io *io = bio->bi_private; + struct dm_crypt_io *io = bio->bi_private; struct crypt_config *cc = io->target->private; bio_free(bio, cc->bs); @@ -393,7 +393,7 @@ static int crypt_convert(struct crypt_config *cc, * This should never violate the device limitations * May return a smaller bio when running out of pages */ -static struct bio *crypt_alloc_buffer(struct crypt_io *io, unsigned int size) +static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) { struct crypt_config *cc = io->target->private; struct bio *clone; @@ -479,7 +479,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. */ -static void dec_pending(struct crypt_io *io, int error) +static void dec_pending(struct dm_crypt_io *io, int error) { struct crypt_config *cc = (struct crypt_config *) io->target->private; @@ -503,7 +503,7 @@ static void dec_pending(struct crypt_io *io, int error) static struct workqueue_struct *_kcryptd_workqueue; static void kcryptd_do_work(struct work_struct *work); -static void kcryptd_queue_io(struct crypt_io *io) +static void kcryptd_queue_io(struct dm_crypt_io *io) { INIT_WORK(&io->work, kcryptd_do_work); queue_work(_kcryptd_workqueue, &io->work); @@ -511,7 +511,7 @@ static void kcryptd_queue_io(struct crypt_io *io) static int crypt_endio(struct bio *clone, unsigned int done, int error) { - struct crypt_io *io = clone->bi_private; + struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->target->private; unsigned read_io = bio_data_dir(clone) == READ; @@ -545,7 +545,7 @@ out: return error; } -static void clone_init(struct crypt_io *io, struct bio *clone) +static void clone_init(struct dm_crypt_io *io, struct bio *clone) { struct crypt_config *cc = io->target->private; @@ -556,7 +556,7 @@ static void clone_init(struct crypt_io *io, struct bio *clone) clone->bi_destructor = dm_crypt_bio_destructor; } -static void process_read(struct crypt_io *io) +static void process_read(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; struct bio *base_bio = io->base_bio; @@ -587,7 +587,7 @@ static void process_read(struct crypt_io *io) generic_make_request(clone); } -static void process_write(struct crypt_io *io) +static void process_write(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; struct bio *base_bio = io->base_bio; @@ -644,7 +644,7 @@ static void process_write(struct crypt_io *io) } } -static void process_read_endio(struct crypt_io *io) +static void process_read_endio(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; struct convert_context ctx; @@ -657,7 +657,7 @@ static void process_read_endio(struct crypt_io *io) static void kcryptd_do_work(struct work_struct *work) { - struct crypt_io *io = container_of(work, struct crypt_io, work); + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); if (io->post_process) process_read_endio(io); @@ -939,10 +939,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { struct crypt_config *cc = ti->private; - struct crypt_io *io; - - if (bio_barrier(bio)) - return -EOPNOTSUPP; + struct dm_crypt_io *io; io = mempool_alloc(cc->io_pool, GFP_NOIO); io->target = ti; @@ -1062,9 +1059,7 @@ static int __init dm_crypt_init(void) { int r; - _crypt_io_pool = kmem_cache_create("dm-crypt_io", - sizeof(struct crypt_io), - 0, 0, NULL, NULL); + _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0); if (!_crypt_io_pool) return -ENOMEM; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 52c7cf9e5803..6928c136d3c5 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -20,7 +20,7 @@ struct delay_c { struct timer_list delay_timer; - struct semaphore timer_lock; + struct mutex timer_lock; struct work_struct flush_expired_bios; struct list_head delayed_bios; atomic_t may_delay; @@ -37,7 +37,7 @@ struct delay_c { unsigned writes; }; -struct delay_info { +struct dm_delay_info { struct delay_c *context; struct list_head list; struct bio *bio; @@ -58,12 +58,12 @@ static void handle_delayed_timer(unsigned long data) static void queue_timeout(struct delay_c *dc, unsigned long expires) { - down(&dc->timer_lock); + mutex_lock(&dc->timer_lock); if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires) mod_timer(&dc->delay_timer, expires); - up(&dc->timer_lock); + mutex_unlock(&dc->timer_lock); } static void flush_bios(struct bio *bio) @@ -80,7 +80,7 @@ static void flush_bios(struct bio *bio) static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) { - struct delay_info *delayed, *next; + struct dm_delay_info *delayed, *next; unsigned long next_expires = 0; int start_timer = 0; BIO_LIST(flush_bios); @@ -193,13 +193,11 @@ out: goto bad; } - init_timer(&dc->delay_timer); - dc->delay_timer.function = handle_delayed_timer; - dc->delay_timer.data = (unsigned long)dc; + setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc); INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); INIT_LIST_HEAD(&dc->delayed_bios); - init_MUTEX(&dc->timer_lock); + mutex_init(&dc->timer_lock); atomic_set(&dc->may_delay, 1); ti->private = dc; @@ -227,7 +225,7 @@ static void delay_dtr(struct dm_target *ti) static int delay_bio(struct delay_c *dc, int delay, struct bio *bio) { - struct delay_info *delayed; + struct dm_delay_info *delayed; unsigned long expires = 0; if (!delay || !atomic_read(&dc->may_delay)) @@ -338,10 +336,7 @@ static int __init dm_delay_init(void) goto bad_queue; } - delayed_cache = kmem_cache_create("dm-delay", - sizeof(struct delay_info), - __alignof__(struct delay_info), - 0, NULL, NULL); + delayed_cache = KMEM_CACHE(dm_delay_info, 0); if (!delayed_cache) { DMERR("Couldn't create delayed bio cache."); goto bad_memcache; diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 07e0a0c84f6e..3d65917a1bbb 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -125,9 +125,11 @@ struct pstore { uint32_t callback_count; struct commit_callback *callbacks; struct dm_io_client *io_client; + + struct workqueue_struct *metadata_wq; }; -static inline unsigned int sectors_to_pages(unsigned int sectors) +static unsigned sectors_to_pages(unsigned sectors) { return sectors / (PAGE_SIZE >> 9); } @@ -156,10 +158,24 @@ static void free_area(struct pstore *ps) ps->area = NULL; } +struct mdata_req { + struct io_region *where; + struct dm_io_request *io_req; + struct work_struct work; + int result; +}; + +static void do_metadata(struct work_struct *work) +{ + struct mdata_req *req = container_of(work, struct mdata_req, work); + + req->result = dm_io(req->io_req, 1, req->where, NULL); +} + /* * Read or write a chunk aligned and sized block of data from a device. */ -static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) +static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) { struct io_region where = { .bdev = ps->snap->cow->bdev, @@ -173,8 +189,23 @@ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) .client = ps->io_client, .notify.fn = NULL, }; + struct mdata_req req; + + if (!metadata) + return dm_io(&io_req, 1, &where, NULL); + + req.where = &where; + req.io_req = &io_req; - return dm_io(&io_req, 1, &where, NULL); + /* + * Issue the synchronous I/O from a different thread + * to avoid generic_make_request recursion. + */ + INIT_WORK(&req.work, do_metadata); + queue_work(ps->metadata_wq, &req.work); + flush_workqueue(ps->metadata_wq); + + return req.result; } /* @@ -189,7 +220,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw) /* convert a metadata area index to a chunk index */ chunk = 1 + ((ps->exceptions_per_area + 1) * area); - r = chunk_io(ps, chunk, rw); + r = chunk_io(ps, chunk, rw, 0); if (r) return r; @@ -230,7 +261,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) if (r) return r; - r = chunk_io(ps, 0, READ); + r = chunk_io(ps, 0, READ, 1); if (r) goto bad; @@ -292,7 +323,7 @@ static int write_header(struct pstore *ps) dh->version = cpu_to_le32(ps->version); dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); - return chunk_io(ps, 0, WRITE); + return chunk_io(ps, 0, WRITE, 1); } /* @@ -393,7 +424,7 @@ static int read_exceptions(struct pstore *ps) return 0; } -static inline struct pstore *get_info(struct exception_store *store) +static struct pstore *get_info(struct exception_store *store) { return (struct pstore *) store->context; } @@ -409,6 +440,7 @@ static void persistent_destroy(struct exception_store *store) { struct pstore *ps = get_info(store); + destroy_workqueue(ps->metadata_wq); dm_io_client_destroy(ps->io_client); vfree(ps->callbacks); free_area(ps); @@ -457,11 +489,6 @@ static int persistent_read_metadata(struct exception_store *store) /* * Sanity checks. */ - if (!ps->valid) { - DMWARN("snapshot is marked invalid"); - return -EINVAL; - } - if (ps->version != SNAPSHOT_DISK_VERSION) { DMWARN("unable to handle snapshot disk version %d", ps->version); @@ -469,6 +496,12 @@ static int persistent_read_metadata(struct exception_store *store) } /* + * Metadata are valid, but snapshot is invalidated + */ + if (!ps->valid) + return 1; + + /* * Read the metadata. */ r = read_exceptions(ps); @@ -480,7 +513,7 @@ static int persistent_read_metadata(struct exception_store *store) } static int persistent_prepare(struct exception_store *store, - struct exception *e) + struct dm_snap_exception *e) { struct pstore *ps = get_info(store); uint32_t stride; @@ -505,7 +538,7 @@ static int persistent_prepare(struct exception_store *store, } static void persistent_commit(struct exception_store *store, - struct exception *e, + struct dm_snap_exception *e, void (*callback) (void *, int success), void *callback_context) { @@ -588,6 +621,12 @@ int dm_create_persistent(struct exception_store *store) atomic_set(&ps->pending_count, 0); ps->callbacks = NULL; + ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); + if (!ps->metadata_wq) { + DMERR("couldn't start header metadata update thread"); + return -ENOMEM; + } + store->destroy = persistent_destroy; store->read_metadata = persistent_read_metadata; store->prepare_exception = persistent_prepare; @@ -616,7 +655,8 @@ static int transient_read_metadata(struct exception_store *store) return 0; } -static int transient_prepare(struct exception_store *store, struct exception *e) +static int transient_prepare(struct exception_store *store, + struct dm_snap_exception *e) { struct transient_c *tc = (struct transient_c *) store->context; sector_t size = get_dev_size(store->snap->cow->bdev); @@ -631,9 +671,9 @@ static int transient_prepare(struct exception_store *store, struct exception *e) } static void transient_commit(struct exception_store *store, - struct exception *e, - void (*callback) (void *, int success), - void *callback_context) + struct dm_snap_exception *e, + void (*callback) (void *, int success), + void *callback_context) { /* Just succeed */ callback(callback_context, 1); diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 352c6fbeac53..f3a772486437 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -293,7 +293,10 @@ static void do_region(int rw, unsigned int region, struct io_region *where, * bvec for bio_get/set_region() and decrement bi_max_vecs * to hide it from bio_add_page(). */ - num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2; + num_bvecs = dm_sector_div_up(remaining, + (PAGE_SIZE >> SECTOR_SHIFT)); + num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev), + num_bvecs); bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); bio->bi_sector = where->sector + (where->count - remaining); bio->bi_bdev = where->bdev; diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c new file mode 100644 index 000000000000..8b776b8cb7f7 --- /dev/null +++ b/drivers/md/dm-mpath-rdac.c @@ -0,0 +1,700 @@ +/* + * Engenio/LSI RDAC DM HW handler + * + * Copyright (C) 2005 Mike Christie. All rights reserved. + * Copyright (C) Chandra Seetharaman, IBM Corp. 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_eh.h> + +#define DM_MSG_PREFIX "multipath rdac" + +#include "dm.h" +#include "dm-hw-handler.h" + +#define RDAC_DM_HWH_NAME "rdac" +#define RDAC_DM_HWH_VER "0.4" + +/* + * LSI mode page stuff + * + * These struct definitions and the forming of the + * mode page were taken from the LSI RDAC 2.4 GPL'd + * driver, and then converted to Linux conventions. + */ +#define RDAC_QUIESCENCE_TIME 20; +/* + * Page Codes + */ +#define RDAC_PAGE_CODE_REDUNDANT_CONTROLLER 0x2c + +/* + * Controller modes definitions + */ +#define RDAC_MODE_TRANSFER_ALL_LUNS 0x01 +#define RDAC_MODE_TRANSFER_SPECIFIED_LUNS 0x02 + +/* + * RDAC Options field + */ +#define RDAC_FORCED_QUIESENCE 0x02 + +#define RDAC_FAILOVER_TIMEOUT (60 * HZ) + +struct rdac_mode_6_hdr { + u8 data_len; + u8 medium_type; + u8 device_params; + u8 block_desc_len; +}; + +struct rdac_mode_10_hdr { + u16 data_len; + u8 medium_type; + u8 device_params; + u16 reserved; + u16 block_desc_len; +}; + +struct rdac_mode_common { + u8 controller_serial[16]; + u8 alt_controller_serial[16]; + u8 rdac_mode[2]; + u8 alt_rdac_mode[2]; + u8 quiescence_timeout; + u8 rdac_options; +}; + +struct rdac_pg_legacy { + struct rdac_mode_6_hdr hdr; + u8 page_code; + u8 page_len; + struct rdac_mode_common common; +#define MODE6_MAX_LUN 32 + u8 lun_table[MODE6_MAX_LUN]; + u8 reserved2[32]; + u8 reserved3; + u8 reserved4; +}; + +struct rdac_pg_expanded { + struct rdac_mode_10_hdr hdr; + u8 page_code; + u8 subpage_code; + u8 page_len[2]; + struct rdac_mode_common common; + u8 lun_table[256]; + u8 reserved3; + u8 reserved4; +}; + +struct c9_inquiry { + u8 peripheral_info; + u8 page_code; /* 0xC9 */ + u8 reserved1; + u8 page_len; + u8 page_id[4]; /* "vace" */ + u8 avte_cvp; + u8 path_prio; + u8 reserved2[38]; +}; + +#define SUBSYS_ID_LEN 16 +#define SLOT_ID_LEN 2 + +struct c4_inquiry { + u8 peripheral_info; + u8 page_code; /* 0xC4 */ + u8 reserved1; + u8 page_len; + u8 page_id[4]; /* "subs" */ + u8 subsys_id[SUBSYS_ID_LEN]; + u8 revision[4]; + u8 slot_id[SLOT_ID_LEN]; + u8 reserved[2]; +}; + +struct rdac_controller { + u8 subsys_id[SUBSYS_ID_LEN]; + u8 slot_id[SLOT_ID_LEN]; + int use_10_ms; + struct kref kref; + struct list_head node; /* list of all controllers */ + spinlock_t lock; + int submitted; + struct list_head cmd_list; /* list of commands to be submitted */ + union { + struct rdac_pg_legacy legacy; + struct rdac_pg_expanded expanded; + } mode_select; +}; +struct c8_inquiry { + u8 peripheral_info; + u8 page_code; /* 0xC8 */ + u8 reserved1; + u8 page_len; + u8 page_id[4]; /* "edid" */ + u8 reserved2[3]; + u8 vol_uniq_id_len; + u8 vol_uniq_id[16]; + u8 vol_user_label_len; + u8 vol_user_label[60]; + u8 array_uniq_id_len; + u8 array_unique_id[16]; + u8 array_user_label_len; + u8 array_user_label[60]; + u8 lun[8]; +}; + +struct c2_inquiry { + u8 peripheral_info; + u8 page_code; /* 0xC2 */ + u8 reserved1; + u8 page_len; + u8 page_id[4]; /* "swr4" */ + u8 sw_version[3]; + u8 sw_date[3]; + u8 features_enabled; + u8 max_lun_supported; + u8 partitions[239]; /* Total allocation length should be 0xFF */ +}; + +struct rdac_handler { + struct list_head entry; /* list waiting to submit MODE SELECT */ + unsigned timeout; + struct rdac_controller *ctlr; +#define UNINITIALIZED_LUN (1 << 8) + unsigned lun; + unsigned char sense[SCSI_SENSE_BUFFERSIZE]; + struct dm_path *path; + struct work_struct work; +#define SEND_C2_INQUIRY 1 +#define SEND_C4_INQUIRY 2 +#define SEND_C8_INQUIRY 3 +#define SEND_C9_INQUIRY 4 +#define SEND_MODE_SELECT 5 + int cmd_to_send; + union { + struct c2_inquiry c2; + struct c4_inquiry c4; + struct c8_inquiry c8; + struct c9_inquiry c9; + } inq; +}; + +static LIST_HEAD(ctlr_list); +static DEFINE_SPINLOCK(list_lock); +static struct workqueue_struct *rdac_wkqd; + +static inline int had_failures(struct request *req, int error) +{ + return (error || host_byte(req->errors) != DID_OK || + msg_byte(req->errors) != COMMAND_COMPLETE); +} + +static void rdac_resubmit_all(struct rdac_handler *h) +{ + struct rdac_controller *ctlr = h->ctlr; + struct rdac_handler *tmp, *h1; + + spin_lock(&ctlr->lock); + list_for_each_entry_safe(h1, tmp, &ctlr->cmd_list, entry) { + h1->cmd_to_send = SEND_C9_INQUIRY; + queue_work(rdac_wkqd, &h1->work); + list_del(&h1->entry); + } + ctlr->submitted = 0; + spin_unlock(&ctlr->lock); +} + +static void mode_select_endio(struct request *req, int error) +{ + struct rdac_handler *h = req->end_io_data; + struct scsi_sense_hdr sense_hdr; + int sense = 0, fail = 0; + + if (had_failures(req, error)) { + fail = 1; + goto failed; + } + + if (status_byte(req->errors) == CHECK_CONDITION) { + scsi_normalize_sense(req->sense, SCSI_SENSE_BUFFERSIZE, + &sense_hdr); + sense = (sense_hdr.sense_key << 16) | (sense_hdr.asc << 8) | + sense_hdr.ascq; + /* If it is retryable failure, submit the c9 inquiry again */ + if (sense == 0x59136 || sense == 0x68b02 || sense == 0xb8b02 || + sense == 0x62900) { + /* 0x59136 - Command lock contention + * 0x[6b]8b02 - Quiesense in progress or achieved + * 0x62900 - Power On, Reset, or Bus Device Reset + */ + h->cmd_to_send = SEND_C9_INQUIRY; + queue_work(rdac_wkqd, &h->work); + goto done; + } + if (sense) + DMINFO("MODE_SELECT failed on %s with sense 0x%x", + h->path->dev->name, sense); + } +failed: + if (fail || sense) + dm_pg_init_complete(h->path, MP_FAIL_PATH); + else + dm_pg_init_complete(h->path, 0); + +done: + rdac_resubmit_all(h); + __blk_put_request(req->q, req); +} + +static struct request *get_rdac_req(struct rdac_handler *h, + void *buffer, unsigned buflen, int rw) +{ + struct request *rq; + struct request_queue *q = bdev_get_queue(h->path->dev->bdev); + + rq = blk_get_request(q, rw, GFP_KERNEL); + + if (!rq) { + DMINFO("get_rdac_req: blk_get_request failed"); + return NULL; + } + + if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_KERNEL)) { + blk_put_request(rq); + DMINFO("get_rdac_req: blk_rq_map_kern failed"); + return NULL; + } + + memset(&rq->cmd, 0, BLK_MAX_CDB); + rq->sense = h->sense; + memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); + rq->sense_len = 0; + + rq->end_io_data = h; + rq->timeout = h->timeout; + rq->cmd_type = REQ_TYPE_BLOCK_PC; + rq->cmd_flags = REQ_FAILFAST | REQ_NOMERGE; + return rq; +} + +static struct request *rdac_failover_get(struct rdac_handler *h) +{ + struct request *rq; + struct rdac_mode_common *common; + unsigned data_size; + + if (h->ctlr->use_10_ms) { + struct rdac_pg_expanded *rdac_pg; + + data_size = sizeof(struct rdac_pg_expanded); + rdac_pg = &h->ctlr->mode_select.expanded; + memset(rdac_pg, 0, data_size); + common = &rdac_pg->common; + rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40; + rdac_pg->subpage_code = 0x1; + rdac_pg->page_len[0] = 0x01; + rdac_pg->page_len[1] = 0x28; + rdac_pg->lun_table[h->lun] = 0x81; + } else { + struct rdac_pg_legacy *rdac_pg; + + data_size = sizeof(struct rdac_pg_legacy); + rdac_pg = &h->ctlr->mode_select.legacy; + memset(rdac_pg, 0, data_size); + common = &rdac_pg->common; + rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER; + rdac_pg->page_len = 0x68; + rdac_pg->lun_table[h->lun] = 0x81; + } + common->rdac_mode[1] = RDAC_MODE_TRANSFER_SPECIFIED_LUNS; + common->quiescence_timeout = RDAC_QUIESCENCE_TIME; + common->rdac_options = RDAC_FORCED_QUIESENCE; + + /* get request for block layer packet command */ + rq = get_rdac_req(h, &h->ctlr->mode_select, data_size, WRITE); + if (!rq) { + DMERR("rdac_failover_get: no rq"); + return NULL; + } + + /* Prepare the command. */ + if (h->ctlr->use_10_ms) { + rq->cmd[0] = MODE_SELECT_10; + rq->cmd[7] = data_size >> 8; + rq->cmd[8] = data_size & 0xff; + } else { + rq->cmd[0] = MODE_SELECT; + rq->cmd[4] = data_size; + } + rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); + + return rq; +} + +/* Acquires h->ctlr->lock */ +static void submit_mode_select(struct rdac_handler *h) +{ + struct request *rq; + struct request_queue *q = bdev_get_queue(h->path->dev->bdev); + + spin_lock(&h->ctlr->lock); + if (h->ctlr->submitted) { + list_add(&h->entry, &h->ctlr->cmd_list); + goto drop_lock; + } + + if (!q) { + DMINFO("submit_mode_select: no queue"); + goto fail_path; + } + + rq = rdac_failover_get(h); + if (!rq) { + DMERR("submit_mode_select: no rq"); + goto fail_path; + } + + DMINFO("queueing MODE_SELECT command on %s", h->path->dev->name); + + blk_execute_rq_nowait(q, NULL, rq, 1, mode_select_endio); + h->ctlr->submitted = 1; + goto drop_lock; +fail_path: + dm_pg_init_complete(h->path, MP_FAIL_PATH); +drop_lock: + spin_unlock(&h->ctlr->lock); +} + +static void release_ctlr(struct kref *kref) +{ + struct rdac_controller *ctlr; + ctlr = container_of(kref, struct rdac_controller, kref); + + spin_lock(&list_lock); + list_del(&ctlr->node); + spin_unlock(&list_lock); + kfree(ctlr); +} + +static struct rdac_controller *get_controller(u8 *subsys_id, u8 *slot_id) +{ + struct rdac_controller *ctlr, *tmp; + + spin_lock(&list_lock); + + list_for_each_entry(tmp, &ctlr_list, node) { + if ((memcmp(tmp->subsys_id, subsys_id, SUBSYS_ID_LEN) == 0) && + (memcmp(tmp->slot_id, slot_id, SLOT_ID_LEN) == 0)) { + kref_get(&tmp->kref); + spin_unlock(&list_lock); + return tmp; + } + } + ctlr = kmalloc(sizeof(*ctlr), GFP_ATOMIC); + if (!ctlr) + goto done; + + /* initialize fields of controller */ + memcpy(ctlr->subsys_id, subsys_id, SUBSYS_ID_LEN); + memcpy(ctlr->slot_id, slot_id, SLOT_ID_LEN); + kref_init(&ctlr->kref); + spin_lock_init(&ctlr->lock); + ctlr->submitted = 0; + ctlr->use_10_ms = -1; + INIT_LIST_HEAD(&ctlr->cmd_list); + list_add(&ctlr->node, &ctlr_list); +done: + spin_unlock(&list_lock); + return ctlr; +} + +static void c4_endio(struct request *req, int error) +{ + struct rdac_handler *h = req->end_io_data; + struct c4_inquiry *sp; + + if (had_failures(req, error)) { + dm_pg_init_complete(h->path, MP_FAIL_PATH); + goto done; + } + + sp = &h->inq.c4; + + h->ctlr = get_controller(sp->subsys_id, sp->slot_id); + + if (h->ctlr) { + h->cmd_to_send = SEND_C9_INQUIRY; + queue_work(rdac_wkqd, &h->work); + } else + dm_pg_init_complete(h->path, MP_FAIL_PATH); +done: + __blk_put_request(req->q, req); +} + +static void c2_endio(struct request *req, int error) +{ + struct rdac_handler *h = req->end_io_data; + struct c2_inquiry *sp; + + if (had_failures(req, error)) { + dm_pg_init_complete(h->path, MP_FAIL_PATH); + goto done; + } + + sp = &h->inq.c2; + + /* If more than MODE6_MAX_LUN luns are supported, use mode select 10 */ + if (sp->max_lun_supported >= MODE6_MAX_LUN) + h->ctlr->use_10_ms = 1; + else + h->ctlr->use_10_ms = 0; + + h->cmd_to_send = SEND_MODE_SELECT; + queue_work(rdac_wkqd, &h->work); +done: + __blk_put_request(req->q, req); +} + +static void c9_endio(struct request *req, int error) +{ + struct rdac_handler *h = req->end_io_data; + struct c9_inquiry *sp; + + if (had_failures(req, error)) { + dm_pg_init_complete(h->path, MP_FAIL_PATH); + goto done; + } + + /* We need to look at the sense keys here to take clear action. + * For now simple logic: If the host is in AVT mode or if controller + * owns the lun, return dm_pg_init_complete(), otherwise submit + * MODE SELECT. + */ + sp = &h->inq.c9; + + /* If in AVT mode, return success */ + if ((sp->avte_cvp >> 7) == 0x1) { + dm_pg_init_complete(h->path, 0); + goto done; + } + + /* If the controller on this path owns the LUN, return success */ + if (sp->avte_cvp & 0x1) { + dm_pg_init_complete(h->path, 0); + goto done; + } + + if (h->ctlr) { + if (h->ctlr->use_10_ms == -1) + h->cmd_to_send = SEND_C2_INQUIRY; + else + h->cmd_to_send = SEND_MODE_SELECT; + } else + h->cmd_to_send = SEND_C4_INQUIRY; + queue_work(rdac_wkqd, &h->work); +done: + __blk_put_request(req->q, req); +} + +static void c8_endio(struct request *req, int error) +{ + struct rdac_handler *h = req->end_io_data; + struct c8_inquiry *sp; + + if (had_failures(req, error)) { + dm_pg_init_complete(h->path, MP_FAIL_PATH); + goto done; + } + + /* We need to look at the sense keys here to take clear action. + * For now simple logic: Get the lun from the inquiry page. + */ + sp = &h->inq.c8; + h->lun = sp->lun[7]; /* currently it uses only one byte */ + h->cmd_to_send = SEND_C9_INQUIRY; + queue_work(rdac_wkqd, &h->work); +done: + __blk_put_request(req->q, req); +} + +static void submit_inquiry(struct rdac_handler *h, int page_code, + unsigned int len, rq_end_io_fn endio) +{ + struct request *rq; + struct request_queue *q = bdev_get_queue(h->path->dev->bdev); + + if (!q) + goto fail_path; + + rq = get_rdac_req(h, &h->inq, len, READ); + if (!rq) + goto fail_path; + + /* Prepare the command. */ + rq->cmd[0] = INQUIRY; + rq->cmd[1] = 1; + rq->cmd[2] = page_code; + rq->cmd[4] = len; + rq->cmd_len = COMMAND_SIZE(INQUIRY); + blk_execute_rq_nowait(q, NULL, rq, 1, endio); + return; + +fail_path: + dm_pg_init_complete(h->path, MP_FAIL_PATH); +} + +static void service_wkq(struct work_struct *work) +{ + struct rdac_handler *h = container_of(work, struct rdac_handler, work); + + switch (h->cmd_to_send) { + case SEND_C2_INQUIRY: + submit_inquiry(h, 0xC2, sizeof(struct c2_inquiry), c2_endio); + break; + case SEND_C4_INQUIRY: + submit_inquiry(h, 0xC4, sizeof(struct c4_inquiry), c4_endio); + break; + case SEND_C8_INQUIRY: + submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio); + break; + case SEND_C9_INQUIRY: + submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio); + break; + case SEND_MODE_SELECT: + submit_mode_select(h); + break; + default: + BUG(); + } +} +/* + * only support subpage2c until we confirm that this is just a matter of + * of updating firmware or not, and RDAC (basic AVT works already) for now + * but we can add these in in when we get time and testers + */ +static int rdac_create(struct hw_handler *hwh, unsigned argc, char **argv) +{ + struct rdac_handler *h; + unsigned timeout; + + if (argc == 0) { + /* No arguments: use defaults */ + timeout = RDAC_FAILOVER_TIMEOUT; + } else if (argc != 1) { + DMWARN("incorrect number of arguments"); + return -EINVAL; + } else { + if (sscanf(argv[1], "%u", &timeout) != 1) { + DMWARN("invalid timeout value"); + return -EINVAL; + } + } + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + hwh->context = h; + h->timeout = timeout; + h->lun = UNINITIALIZED_LUN; + INIT_WORK(&h->work, service_wkq); + DMWARN("using RDAC command with timeout %u", h->timeout); + + return 0; +} + +static void rdac_destroy(struct hw_handler *hwh) +{ + struct rdac_handler *h = hwh->context; + + if (h->ctlr) + kref_put(&h->ctlr->kref, release_ctlr); + kfree(h); + hwh->context = NULL; +} + +static unsigned rdac_error(struct hw_handler *hwh, struct bio *bio) +{ + /* Try default handler */ + return dm_scsi_err_handler(hwh, bio); +} + +static void rdac_pg_init(struct hw_handler *hwh, unsigned bypassed, + struct dm_path *path) +{ + struct rdac_handler *h = hwh->context; + + h->path = path; + switch (h->lun) { + case UNINITIALIZED_LUN: + submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio); + break; + default: + submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio); + } +} + +static struct hw_handler_type rdac_handler = { + .name = RDAC_DM_HWH_NAME, + .module = THIS_MODULE, + .create = rdac_create, + .destroy = rdac_destroy, + .pg_init = rdac_pg_init, + .error = rdac_error, +}; + +static int __init rdac_init(void) +{ + int r = dm_register_hw_handler(&rdac_handler); + + if (r < 0) { + DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r); + return r; + } + + rdac_wkqd = create_singlethread_workqueue("rdac_wkqd"); + if (!rdac_wkqd) { + DMERR("Failed to create workqueue rdac_wkqd."); + dm_unregister_hw_handler(&rdac_handler); + return -ENOMEM; + } + + DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER); + return 0; +} + +static void __exit rdac_exit(void) +{ + int r = dm_unregister_hw_handler(&rdac_handler); + + destroy_workqueue(rdac_wkqd); + if (r < 0) + DMERR("%s: unregister failed %d", RDAC_DM_HWH_NAME, r); +} + +module_init(rdac_init); +module_exit(rdac_exit); + +MODULE_DESCRIPTION("DM Multipath LSI/Engenio RDAC support"); +MODULE_AUTHOR("Mike Christie, Chandra Seetharaman"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(RDAC_DM_HWH_VER); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index de54b39e6ffe..d6ca9d0a6fd1 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -83,7 +83,7 @@ struct multipath { struct work_struct trigger_event; /* - * We must use a mempool of mpath_io structs so that we + * We must use a mempool of dm_mpath_io structs so that we * can resubmit bios on error. */ mempool_t *mpio_pool; @@ -92,7 +92,7 @@ struct multipath { /* * Context information attached to each bio we process. */ -struct mpath_io { +struct dm_mpath_io { struct pgpath *pgpath; struct dm_bio_details details; }; @@ -122,7 +122,7 @@ static struct pgpath *alloc_pgpath(void) return pgpath; } -static inline void free_pgpath(struct pgpath *pgpath) +static void free_pgpath(struct pgpath *pgpath) { kfree(pgpath); } @@ -299,8 +299,8 @@ static int __must_push_back(struct multipath *m) dm_noflush_suspending(m->ti)); } -static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio, - unsigned was_queued) +static int map_io(struct multipath *m, struct bio *bio, + struct dm_mpath_io *mpio, unsigned was_queued) { int r = DM_MAPIO_REMAPPED; unsigned long flags; @@ -374,7 +374,7 @@ static void dispatch_queued_ios(struct multipath *m) int r; unsigned long flags; struct bio *bio = NULL, *next; - struct mpath_io *mpio; + struct dm_mpath_io *mpio; union map_info *info; spin_lock_irqsave(&m->lock, flags); @@ -795,12 +795,9 @@ static int multipath_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { int r; - struct mpath_io *mpio; + struct dm_mpath_io *mpio; struct multipath *m = (struct multipath *) ti->private; - if (bio_barrier(bio)) - return -EOPNOTSUPP; - mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); dm_bio_record(&mpio->details, bio); @@ -1014,7 +1011,7 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) * end_io handling */ static int do_end_io(struct multipath *m, struct bio *bio, - int error, struct mpath_io *mpio) + int error, struct dm_mpath_io *mpio) { struct hw_handler *hwh = &m->hw_handler; unsigned err_flags = MP_FAIL_PATH; /* Default behavior */ @@ -1075,8 +1072,8 @@ static int do_end_io(struct multipath *m, struct bio *bio, static int multipath_end_io(struct dm_target *ti, struct bio *bio, int error, union map_info *map_context) { - struct multipath *m = (struct multipath *) ti->private; - struct mpath_io *mpio = (struct mpath_io *) map_context->ptr; + struct multipath *m = ti->private; + struct dm_mpath_io *mpio = map_context->ptr; struct pgpath *pgpath = mpio->pgpath; struct path_selector *ps; int r; @@ -1346,22 +1343,20 @@ static int __init dm_multipath_init(void) int r; /* allocate a slab for the dm_ios */ - _mpio_cache = kmem_cache_create("dm_mpath", sizeof(struct mpath_io), - 0, 0, NULL, NULL); + _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); if (!_mpio_cache) return -ENOMEM; r = dm_register_target(&multipath_target); if (r < 0) { - DMERR("%s: register failed %d", multipath_target.name, r); + DMERR("register failed %d", r); kmem_cache_destroy(_mpio_cache); return -EINVAL; } kmultipathd = create_workqueue("kmpathd"); if (!kmultipathd) { - DMERR("%s: failed to create workqueue kmpathd", - multipath_target.name); + DMERR("failed to create workqueue kmpathd"); dm_unregister_target(&multipath_target); kmem_cache_destroy(_mpio_cache); return -ENOMEM; @@ -1382,8 +1377,7 @@ static void __exit dm_multipath_exit(void) r = dm_unregister_target(&multipath_target); if (r < 0) - DMERR("%s: target unregister failed %d", - multipath_target.name, r); + DMERR("target unregister failed %d", r); kmem_cache_destroy(_mpio_cache); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ef124b71ccc8..1a876f9965e0 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -24,6 +24,7 @@ #define DM_IO_PAGES 64 #define DM_RAID1_HANDLE_ERRORS 0x01 +#define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); @@ -85,6 +86,7 @@ struct region_hash { struct list_head clean_regions; struct list_head quiesced_regions; struct list_head recovered_regions; + struct list_head failed_recovered_regions; }; enum { @@ -132,6 +134,7 @@ struct mirror_set { /* recovery */ region_t nr_regions; int in_sync; + int log_failure; struct mirror *default_mirror; /* Default mirror */ @@ -204,6 +207,7 @@ static int rh_init(struct region_hash *rh, struct mirror_set *ms, INIT_LIST_HEAD(&rh->clean_regions); INIT_LIST_HEAD(&rh->quiesced_regions); INIT_LIST_HEAD(&rh->recovered_regions); + INIT_LIST_HEAD(&rh->failed_recovered_regions); rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, sizeof(struct region)); @@ -368,6 +372,7 @@ static void rh_update_states(struct region_hash *rh) LIST_HEAD(clean); LIST_HEAD(recovered); + LIST_HEAD(failed_recovered); /* * Quickly grab the lists. @@ -378,10 +383,8 @@ static void rh_update_states(struct region_hash *rh) list_splice(&rh->clean_regions, &clean); INIT_LIST_HEAD(&rh->clean_regions); - list_for_each_entry (reg, &clean, list) { - rh->log->type->clear_region(rh->log, reg->key); + list_for_each_entry(reg, &clean, list) list_del(®->hash_list); - } } if (!list_empty(&rh->recovered_regions)) { @@ -391,6 +394,15 @@ static void rh_update_states(struct region_hash *rh) list_for_each_entry (reg, &recovered, list) list_del(®->hash_list); } + + if (!list_empty(&rh->failed_recovered_regions)) { + list_splice(&rh->failed_recovered_regions, &failed_recovered); + INIT_LIST_HEAD(&rh->failed_recovered_regions); + + list_for_each_entry(reg, &failed_recovered, list) + list_del(®->hash_list); + } + spin_unlock(&rh->region_lock); write_unlock_irq(&rh->hash_lock); @@ -405,10 +417,17 @@ static void rh_update_states(struct region_hash *rh) mempool_free(reg, rh->region_pool); } - rh->log->type->flush(rh->log); + list_for_each_entry_safe(reg, next, &failed_recovered, list) { + complete_resync_work(reg, errors_handled(rh->ms) ? 0 : 1); + mempool_free(reg, rh->region_pool); + } - list_for_each_entry_safe (reg, next, &clean, list) + list_for_each_entry_safe(reg, next, &clean, list) { + rh->log->type->clear_region(rh->log, reg->key); mempool_free(reg, rh->region_pool); + } + + rh->log->type->flush(rh->log); } static void rh_inc(struct region_hash *rh, region_t region) @@ -555,21 +574,25 @@ static struct region *rh_recovery_start(struct region_hash *rh) return reg; } -/* FIXME: success ignored for now */ static void rh_recovery_end(struct region *reg, int success) { struct region_hash *rh = reg->rh; spin_lock_irq(&rh->region_lock); - list_add(®->list, ®->rh->recovered_regions); + if (success) + list_add(®->list, ®->rh->recovered_regions); + else { + reg->state = RH_NOSYNC; + list_add(®->list, ®->rh->failed_recovered_regions); + } spin_unlock_irq(&rh->region_lock); wake(rh->ms); } -static void rh_flush(struct region_hash *rh) +static int rh_flush(struct region_hash *rh) { - rh->log->type->flush(rh->log); + return rh->log->type->flush(rh->log); } static void rh_delay(struct region_hash *rh, struct bio *bio) @@ -633,7 +656,14 @@ static void recovery_complete(int read_err, unsigned int write_err, { struct region *reg = (struct region *) context; - /* FIXME: better error handling */ + if (read_err) + /* Read error means the failure of default mirror. */ + DMERR_LIMIT("Unable to read primary mirror during recovery"); + + if (write_err) + DMERR_LIMIT("Write error during recovery (error = 0x%x)", + write_err); + rh_recovery_end(reg, !(read_err || write_err)); } @@ -863,12 +893,15 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) */ rh_inc_pending(&ms->rh, &sync); rh_inc_pending(&ms->rh, &nosync); - rh_flush(&ms->rh); + ms->log_failure = rh_flush(&ms->rh) ? 1 : 0; /* * Dispatch io. */ - while ((bio = bio_list_pop(&sync))) + if (unlikely(ms->log_failure)) + while ((bio = bio_list_pop(&sync))) + bio_endio(bio, bio->bi_size, -EIO); + else while ((bio = bio_list_pop(&sync))) do_write(ms, bio); while ((bio = bio_list_pop(&recover))) @@ -1145,6 +1178,15 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) argv += args_used; argc -= args_used; + /* + * Any read-balancing addition depends on the + * DM_RAID1_HANDLE_ERRORS flag being present. + * This is because the decision to balance depends + * on the sync state of a region. If the above + * flag is not present, we ignore errors; and + * the sync state may be inaccurate. + */ + if (argc) { ti->error = "Too many mirror arguments"; free_context(ms, ti, ms->nr_mirrors); @@ -1288,12 +1330,12 @@ static int mirror_status(struct dm_target *ti, status_type_t type, for (m = 0; m < ms->nr_mirrors; m++) DMEMIT("%s ", ms->mirror[m].dev->name); - DMEMIT("%llu/%llu", + DMEMIT("%llu/%llu 0 ", (unsigned long long)ms->rh.log->type-> get_sync_count(ms->rh.log), (unsigned long long)ms->nr_regions); - sz = ms->rh.log->type->status(ms->rh.log, type, result, maxlen); + sz += ms->rh.log->type->status(ms->rh.log, type, result+sz, maxlen-sz); break; @@ -1335,8 +1377,7 @@ static int __init dm_mirror_init(void) r = dm_register_target(&mirror_target); if (r < 0) { - DMERR("%s: Failed to register mirror target", - mirror_target.name); + DMERR("Failed to register mirror target"); dm_dirty_log_exit(); } @@ -1349,7 +1390,7 @@ static void __exit dm_mirror_exit(void) r = dm_unregister_target(&mirror_target); if (r < 0) - DMERR("%s: unregister failed %d", mirror_target.name, r); + DMERR("unregister failed %d", r); dm_dirty_log_exit(); } diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c index a348a97b65af..391dfa2ad434 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-round-robin.c @@ -205,7 +205,7 @@ static void __exit dm_rr_exit(void) int r = dm_unregister_path_selector(&rr_ps); if (r < 0) - DMERR("round-robin: unregister failed %d", r); + DMERR("unregister failed %d", r); } module_init(dm_rr_init); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 0821a2b68a73..83ddbfe6b8a4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -42,8 +42,8 @@ static struct workqueue_struct *ksnapd; static void flush_queued_bios(struct work_struct *work); -struct pending_exception { - struct exception e; +struct dm_snap_pending_exception { + struct dm_snap_exception e; /* * Origin buffers waiting for this to complete are held @@ -63,7 +63,7 @@ struct pending_exception { * group of pending_exceptions. It is always last to get freed. * These fields get set up when writing to the origin. */ - struct pending_exception *primary_pe; + struct dm_snap_pending_exception *primary_pe; /* * Number of pending_exceptions processing this chunk. @@ -137,7 +137,7 @@ static void exit_origin_hash(void) kfree(_origins); } -static inline unsigned int origin_hash(struct block_device *bdev) +static unsigned origin_hash(struct block_device *bdev) { return bdev->bd_dev & ORIGIN_MASK; } @@ -231,7 +231,7 @@ static int init_exception_table(struct exception_table *et, uint32_t size) static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem) { struct list_head *slot; - struct exception *ex, *next; + struct dm_snap_exception *ex, *next; int i, size; size = et->hash_mask + 1; @@ -245,18 +245,19 @@ static void exit_exception_table(struct exception_table *et, struct kmem_cache * vfree(et->table); } -static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk) +static uint32_t exception_hash(struct exception_table *et, chunk_t chunk) { return chunk & et->hash_mask; } -static void insert_exception(struct exception_table *eh, struct exception *e) +static void insert_exception(struct exception_table *eh, + struct dm_snap_exception *e) { struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)]; list_add(&e->hash_list, l); } -static inline void remove_exception(struct exception *e) +static void remove_exception(struct dm_snap_exception *e) { list_del(&e->hash_list); } @@ -265,11 +266,11 @@ static inline void remove_exception(struct exception *e) * Return the exception data for a sector, or NULL if not * remapped. */ -static struct exception *lookup_exception(struct exception_table *et, - chunk_t chunk) +static struct dm_snap_exception *lookup_exception(struct exception_table *et, + chunk_t chunk) { struct list_head *slot; - struct exception *e; + struct dm_snap_exception *e; slot = &et->table[exception_hash(et, chunk)]; list_for_each_entry (e, slot, hash_list) @@ -279,9 +280,9 @@ static struct exception *lookup_exception(struct exception_table *et, return NULL; } -static inline struct exception *alloc_exception(void) +static struct dm_snap_exception *alloc_exception(void) { - struct exception *e; + struct dm_snap_exception *e; e = kmem_cache_alloc(exception_cache, GFP_NOIO); if (!e) @@ -290,24 +291,24 @@ static inline struct exception *alloc_exception(void) return e; } -static inline void free_exception(struct exception *e) +static void free_exception(struct dm_snap_exception *e) { kmem_cache_free(exception_cache, e); } -static inline struct pending_exception *alloc_pending_exception(void) +static struct dm_snap_pending_exception *alloc_pending_exception(void) { return mempool_alloc(pending_pool, GFP_NOIO); } -static inline void free_pending_exception(struct pending_exception *pe) +static void free_pending_exception(struct dm_snap_pending_exception *pe) { mempool_free(pe, pending_pool); } int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) { - struct exception *e; + struct dm_snap_exception *e; e = alloc_exception(); if (!e) @@ -334,7 +335,7 @@ static int calc_max_buckets(void) /* * Rounds a number down to a power of 2. */ -static inline uint32_t round_down(uint32_t n) +static uint32_t round_down(uint32_t n) { while (n & (n - 1)) n &= (n - 1); @@ -384,7 +385,7 @@ static int init_hash_tables(struct dm_snapshot *s) * Round a number up to the nearest 'size' boundary. size must * be a power of 2. */ -static inline ulong round_up(ulong n, ulong size) +static ulong round_up(ulong n, ulong size) { size--; return (n + size) & ~size; @@ -522,9 +523,12 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* Metadata must only be loaded into one table at once */ r = s->store.read_metadata(&s->store); - if (r) { + if (r < 0) { ti->error = "Failed to read snapshot metadata"; goto bad6; + } else if (r > 0) { + s->valid = 0; + DMWARN("Snapshot is marked invalid."); } bio_list_init(&s->queued_bios); @@ -577,7 +581,7 @@ static void __free_exceptions(struct dm_snapshot *s) static void snapshot_dtr(struct dm_target *ti) { - struct dm_snapshot *s = (struct dm_snapshot *) ti->private; + struct dm_snapshot *s = ti->private; flush_workqueue(ksnapd); @@ -655,14 +659,14 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) dm_table_event(s->table); } -static void get_pending_exception(struct pending_exception *pe) +static void get_pending_exception(struct dm_snap_pending_exception *pe) { atomic_inc(&pe->ref_count); } -static struct bio *put_pending_exception(struct pending_exception *pe) +static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe) { - struct pending_exception *primary_pe; + struct dm_snap_pending_exception *primary_pe; struct bio *origin_bios = NULL; primary_pe = pe->primary_pe; @@ -692,9 +696,9 @@ static struct bio *put_pending_exception(struct pending_exception *pe) return origin_bios; } -static void pending_complete(struct pending_exception *pe, int success) +static void pending_complete(struct dm_snap_pending_exception *pe, int success) { - struct exception *e; + struct dm_snap_exception *e; struct dm_snapshot *s = pe->snap; struct bio *origin_bios = NULL; struct bio *snapshot_bios = NULL; @@ -748,7 +752,8 @@ static void pending_complete(struct pending_exception *pe, int success) static void commit_callback(void *context, int success) { - struct pending_exception *pe = (struct pending_exception *) context; + struct dm_snap_pending_exception *pe = context; + pending_complete(pe, success); } @@ -758,7 +763,7 @@ static void commit_callback(void *context, int success) */ static void copy_callback(int read_err, unsigned int write_err, void *context) { - struct pending_exception *pe = (struct pending_exception *) context; + struct dm_snap_pending_exception *pe = context; struct dm_snapshot *s = pe->snap; if (read_err || write_err) @@ -773,7 +778,7 @@ static void copy_callback(int read_err, unsigned int write_err, void *context) /* * Dispatches the copy operation to kcopyd. */ -static void start_copy(struct pending_exception *pe) +static void start_copy(struct dm_snap_pending_exception *pe) { struct dm_snapshot *s = pe->snap; struct io_region src, dest; @@ -803,11 +808,11 @@ static void start_copy(struct pending_exception *pe) * NOTE: a write lock must be held on snap->lock before calling * this. */ -static struct pending_exception * +static struct dm_snap_pending_exception * __find_pending_exception(struct dm_snapshot *s, struct bio *bio) { - struct exception *e; - struct pending_exception *pe; + struct dm_snap_exception *e; + struct dm_snap_pending_exception *pe; chunk_t chunk = sector_to_chunk(s, bio->bi_sector); /* @@ -816,7 +821,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) e = lookup_exception(&s->pending, chunk); if (e) { /* cast the exception to a pending exception */ - pe = container_of(e, struct pending_exception, e); + pe = container_of(e, struct dm_snap_pending_exception, e); goto out; } @@ -836,7 +841,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) e = lookup_exception(&s->pending, chunk); if (e) { free_pending_exception(pe); - pe = container_of(e, struct pending_exception, e); + pe = container_of(e, struct dm_snap_pending_exception, e); goto out; } @@ -860,8 +865,8 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) return pe; } -static inline void remap_exception(struct dm_snapshot *s, struct exception *e, - struct bio *bio) +static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e, + struct bio *bio) { bio->bi_bdev = s->cow->bdev; bio->bi_sector = chunk_to_sector(s, e->new_chunk) + @@ -871,11 +876,11 @@ static inline void remap_exception(struct dm_snapshot *s, struct exception *e, static int snapshot_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { - struct exception *e; - struct dm_snapshot *s = (struct dm_snapshot *) ti->private; + struct dm_snap_exception *e; + struct dm_snapshot *s = ti->private; int r = DM_MAPIO_REMAPPED; chunk_t chunk; - struct pending_exception *pe = NULL; + struct dm_snap_pending_exception *pe = NULL; chunk = sector_to_chunk(s, bio->bi_sector); @@ -884,9 +889,6 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, if (!s->valid) return -EIO; - if (unlikely(bio_barrier(bio))) - return -EOPNOTSUPP; - /* FIXME: should only take write lock if we need * to copy an exception */ down_write(&s->lock); @@ -945,7 +947,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, static void snapshot_resume(struct dm_target *ti) { - struct dm_snapshot *s = (struct dm_snapshot *) ti->private; + struct dm_snapshot *s = ti->private; down_write(&s->lock); s->active = 1; @@ -955,7 +957,7 @@ static void snapshot_resume(struct dm_target *ti) static int snapshot_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { - struct dm_snapshot *snap = (struct dm_snapshot *) ti->private; + struct dm_snapshot *snap = ti->private; switch (type) { case STATUSTYPE_INFO: @@ -999,8 +1001,8 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) { int r = DM_MAPIO_REMAPPED, first = 0; struct dm_snapshot *snap; - struct exception *e; - struct pending_exception *pe, *next_pe, *primary_pe = NULL; + struct dm_snap_exception *e; + struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL; chunk_t chunk; LIST_HEAD(pe_queue); @@ -1147,19 +1149,16 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) static void origin_dtr(struct dm_target *ti) { - struct dm_dev *dev = (struct dm_dev *) ti->private; + struct dm_dev *dev = ti->private; dm_put_device(ti, dev); } static int origin_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { - struct dm_dev *dev = (struct dm_dev *) ti->private; + struct dm_dev *dev = ti->private; bio->bi_bdev = dev->bdev; - if (unlikely(bio_barrier(bio))) - return -EOPNOTSUPP; - /* Only tell snapshots if this is a write */ return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; } @@ -1172,7 +1171,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio, */ static void origin_resume(struct dm_target *ti) { - struct dm_dev *dev = (struct dm_dev *) ti->private; + struct dm_dev *dev = ti->private; struct dm_snapshot *snap; struct origin *o; chunk_t chunk_size = 0; @@ -1190,7 +1189,7 @@ static void origin_resume(struct dm_target *ti) static int origin_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { - struct dm_dev *dev = (struct dm_dev *) ti->private; + struct dm_dev *dev = ti->private; switch (type) { case STATUSTYPE_INFO: @@ -1249,21 +1248,14 @@ static int __init dm_snapshot_init(void) goto bad2; } - exception_cache = kmem_cache_create("dm-snapshot-ex", - sizeof(struct exception), - __alignof__(struct exception), - 0, NULL, NULL); + exception_cache = KMEM_CACHE(dm_snap_exception, 0); if (!exception_cache) { DMERR("Couldn't create exception cache."); r = -ENOMEM; goto bad3; } - pending_cache = - kmem_cache_create("dm-snapshot-in", - sizeof(struct pending_exception), - __alignof__(struct pending_exception), - 0, NULL, NULL); + pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0); if (!pending_cache) { DMERR("Couldn't create pending cache."); r = -ENOMEM; diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 15fa2ae6cdc2..650e0f1f51d8 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -30,7 +30,7 @@ typedef sector_t chunk_t; * An exception is used where an old chunk of data has been * replaced by a new one. */ -struct exception { +struct dm_snap_exception { struct list_head hash_list; chunk_t old_chunk; @@ -58,13 +58,13 @@ struct exception_store { * Find somewhere to store the next exception. */ int (*prepare_exception) (struct exception_store *store, - struct exception *e); + struct dm_snap_exception *e); /* * Update the metadata with this exception. */ void (*commit_exception) (struct exception_store *store, - struct exception *e, + struct dm_snap_exception *e, void (*callback) (void *, int success), void *callback_context); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2717a355dc5b..f4f7d35561ab 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -45,7 +45,7 @@ struct dm_io { * One of these is allocated per target within a bio. Hopefully * this will be simplified out one day. */ -struct target_io { +struct dm_target_io { struct dm_io *io; struct dm_target *ti; union map_info info; @@ -54,7 +54,7 @@ struct target_io { union map_info *dm_get_mapinfo(struct bio *bio) { if (bio && bio->bi_private) - return &((struct target_io *)bio->bi_private)->info; + return &((struct dm_target_io *)bio->bi_private)->info; return NULL; } @@ -132,14 +132,12 @@ static int __init local_init(void) int r; /* allocate a slab for the dm_ios */ - _io_cache = kmem_cache_create("dm_io", - sizeof(struct dm_io), 0, 0, NULL, NULL); + _io_cache = KMEM_CACHE(dm_io, 0); if (!_io_cache) return -ENOMEM; /* allocate a slab for the target ios */ - _tio_cache = kmem_cache_create("dm_tio", sizeof(struct target_io), - 0, 0, NULL, NULL); + _tio_cache = KMEM_CACHE(dm_target_io, 0); if (!_tio_cache) { kmem_cache_destroy(_io_cache); return -ENOMEM; @@ -325,22 +323,22 @@ out: return r; } -static inline struct dm_io *alloc_io(struct mapped_device *md) +static struct dm_io *alloc_io(struct mapped_device *md) { return mempool_alloc(md->io_pool, GFP_NOIO); } -static inline void free_io(struct mapped_device *md, struct dm_io *io) +static void free_io(struct mapped_device *md, struct dm_io *io) { mempool_free(io, md->io_pool); } -static inline struct target_io *alloc_tio(struct mapped_device *md) +static struct dm_target_io *alloc_tio(struct mapped_device *md) { return mempool_alloc(md->tio_pool, GFP_NOIO); } -static inline void free_tio(struct mapped_device *md, struct target_io *tio) +static void free_tio(struct mapped_device *md, struct dm_target_io *tio) { mempool_free(tio, md->tio_pool); } @@ -498,7 +496,7 @@ static void dec_pending(struct dm_io *io, int error) static int clone_endio(struct bio *bio, unsigned int done, int error) { int r = 0; - struct target_io *tio = bio->bi_private; + struct dm_target_io *tio = bio->bi_private; struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; @@ -558,7 +556,7 @@ static sector_t max_io_len(struct mapped_device *md, } static void __map_bio(struct dm_target *ti, struct bio *clone, - struct target_io *tio) + struct dm_target_io *tio) { int r; sector_t sector; @@ -672,7 +670,7 @@ static void __clone_and_map(struct clone_info *ci) struct bio *clone, *bio = ci->bio; struct dm_target *ti = dm_table_find_target(ci->map, ci->sector); sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti); - struct target_io *tio; + struct dm_target_io *tio; /* * Allocate a target io object. @@ -802,6 +800,15 @@ static int dm_request(request_queue_t *q, struct bio *bio) int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; + /* + * There is no use in forwarding any barrier request since we can't + * guarantee it is (or can be) handled by the targets correctly. + */ + if (unlikely(bio_barrier(bio))) { + bio_endio(bio, bio->bi_size, -EOPNOTSUPP); + return 0; + } + down_read(&md->io_lock); disk_stat_inc(dm_disk(md), ios[rw]); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 2f796b1436b2..462ee652a890 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -18,13 +18,45 @@ #define DM_NAME "device-mapper" -#define DMERR(f, arg...) printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMWARN(f, arg...) printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMINFO(f, arg...) printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) +#define DMERR(f, arg...) \ + printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) +#define DMERR_LIMIT(f, arg...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " \ + f "\n", ## arg); \ + } while (0) + +#define DMWARN(f, arg...) \ + printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) +#define DMWARN_LIMIT(f, arg...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " \ + f "\n", ## arg); \ + } while (0) + +#define DMINFO(f, arg...) \ + printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) +#define DMINFO_LIMIT(f, arg...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f \ + "\n", ## arg); \ + } while (0) + #ifdef CONFIG_DM_DEBUG -# define DMDEBUG(f, arg...) printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg) +# define DMDEBUG(f, arg...) \ + printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg) +# define DMDEBUG_LIMIT(f, arg...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX ": " f \ + "\n", ## arg); \ + } while (0) #else # define DMDEBUG(f, arg...) do {} while (0) +# define DMDEBUG_LIMIT(f, arg...) do {} while (0) #endif #define DMEMIT(x...) sz += ((sz >= maxlen) ? \ diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index dbc234e3c69f..7e052378c47e 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -29,7 +29,7 @@ static struct workqueue_struct *_kcopyd_wq; static struct work_struct _kcopyd_work; -static inline void wake(void) +static void wake(void) { queue_work(_kcopyd_wq, &_kcopyd_work); } @@ -226,10 +226,7 @@ static LIST_HEAD(_pages_jobs); static int jobs_init(void) { - _job_cache = kmem_cache_create("kcopyd-jobs", - sizeof(struct kcopyd_job), - __alignof__(struct kcopyd_job), - 0, NULL, NULL); + _job_cache = KMEM_CACHE(kcopyd_job, 0); if (!_job_cache) return -ENOMEM; @@ -258,7 +255,7 @@ static void jobs_exit(void) * Functions to push and pop a job onto the head of a given job * list. */ -static inline struct kcopyd_job *pop(struct list_head *jobs) +static struct kcopyd_job *pop(struct list_head *jobs) { struct kcopyd_job *job = NULL; unsigned long flags; @@ -274,7 +271,7 @@ static inline struct kcopyd_job *pop(struct list_head *jobs) return job; } -static inline void push(struct list_head *jobs, struct kcopyd_job *job) +static void push(struct list_head *jobs, struct kcopyd_job *job) { unsigned long flags; |