summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig23
-rw-r--r--drivers/md/Makefile7
-rw-r--r--drivers/md/bitmap.c17
-rw-r--r--drivers/md/dm-crypt.c1
-rw-r--r--drivers/md/dm-emc.c345
-rw-r--r--drivers/md/dm-hw-handler.c213
-rw-r--r--drivers/md/dm-hw-handler.h63
-rw-r--r--drivers/md/dm-mpath-hp-sw.c247
-rw-r--r--drivers/md/dm-mpath-rdac.c700
-rw-r--r--drivers/md/dm-mpath.c163
-rw-r--r--drivers/md/dm-mpath.h1
-rw-r--r--drivers/md/linear.c10
-rw-r--r--drivers/md/md.c87
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/raid0.c10
-rw-r--r--drivers/md/raid1.c29
-rw-r--r--drivers/md/raid10.c31
-rw-r--r--drivers/md/raid5.c78
18 files changed, 281 insertions, 1747 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 610af916891e..07d92c11b5d8 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -252,27 +252,14 @@ config DM_ZERO
config DM_MULTIPATH
tristate "Multipath target"
depends on BLK_DEV_DM
+ # nasty syntax but means make DM_MULTIPATH independent
+ # of SCSI_DH if the latter isn't defined but if
+ # it is, DM_MULTIPATH must depend on it. We get a build
+ # error if SCSI_DH=m and DM_MULTIPATH=y
+ depends on SCSI_DH || !SCSI_DH
---help---
Allow volume managers to support multipath hardware.
-config DM_MULTIPATH_EMC
- tristate "EMC CX/AX multipath support"
- depends on DM_MULTIPATH && BLK_DEV_DM
- ---help---
- Multipath support for EMC CX/AX series hardware.
-
-config DM_MULTIPATH_RDAC
- tristate "LSI/Engenio RDAC multipath support (EXPERIMENTAL)"
- depends on DM_MULTIPATH && BLK_DEV_DM && SCSI && EXPERIMENTAL
- ---help---
- Multipath support for LSI/Engenio RDAC.
-
-config DM_MULTIPATH_HP
- tristate "HP MSA multipath support (EXPERIMENTAL)"
- depends on DM_MULTIPATH && BLK_DEV_DM && SCSI && EXPERIMENTAL
- ---help---
- Multipath support for HP MSA (Active/Passive) series hardware.
-
config DM_DELAY
tristate "I/O delaying target (EXPERIMENTAL)"
depends on BLK_DEV_DM && EXPERIMENTAL
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 7be09eeea293..f1ef33dfd8cf 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -4,11 +4,9 @@
dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o dm-kcopyd.o
-dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o
+dm-multipath-objs := dm-path-selector.o dm-mpath.o
dm-snapshot-objs := dm-snap.o dm-exception-store.o
dm-mirror-objs := dm-raid1.o
-dm-rdac-objs := dm-mpath-rdac.o
-dm-hp-sw-objs := dm-mpath-hp-sw.o
md-mod-objs := md.o bitmap.o
raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
raid6int1.o raid6int2.o raid6int4.o \
@@ -35,9 +33,6 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
obj-$(CONFIG_DM_DELAY) += dm-delay.o
obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
-obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o
-obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o
-obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index c14dacdacfac..b26927ce889c 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -203,17 +203,6 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
* bitmap file handling - read and write the bitmap file and its superblock
*/
-/* copy the pathname of a file to a buffer */
-char *file_path(struct file *file, char *buf, int count)
-{
- if (!buf)
- return NULL;
-
- buf = d_path(&file->f_path, buf, count);
-
- return IS_ERR(buf) ? NULL : buf;
-}
-
/*
* basic page I/O operations
*/
@@ -721,11 +710,13 @@ static void bitmap_file_kick(struct bitmap *bitmap)
if (bitmap->file) {
path = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (path)
- ptr = file_path(bitmap->file, path, PAGE_SIZE);
+ ptr = d_path(&bitmap->file->f_path, path,
+ PAGE_SIZE);
+
printk(KERN_ALERT
"%s: kicking failed bitmap file %s from array!\n",
- bmname(bitmap), ptr ? ptr : "");
+ bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
kfree(path);
} else
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 835def11419d..ab6a61db63ce 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -432,6 +432,7 @@ static int crypt_convert(struct crypt_config *cc,
case 0:
atomic_dec(&ctx->pending);
ctx->sector++;
+ cond_resched();
continue;
/* error */
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
deleted file mode 100644
index 3ea5ad4b7805..000000000000
--- a/drivers/md/dm-emc.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Copyright (C) 2004 SUSE LINUX Products GmbH. All rights reserved.
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- *
- * Multipath support for EMC CLARiiON AX/CX-series hardware.
- */
-
-#include "dm.h"
-#include "dm-hw-handler.h"
-#include <scsi/scsi.h>
-#include <scsi/scsi_cmnd.h>
-
-#define DM_MSG_PREFIX "multipath emc"
-
-struct emc_handler {
- spinlock_t lock;
-
- /* Whether we should send the short trespass command (FC-series)
- * or the long version (default for AX/CX CLARiiON arrays). */
- unsigned short_trespass;
- /* Whether or not to honor SCSI reservations when initiating a
- * switch-over. Default: Don't. */
- unsigned hr;
-
- unsigned char sense[SCSI_SENSE_BUFFERSIZE];
-};
-
-#define TRESPASS_PAGE 0x22
-#define EMC_FAILOVER_TIMEOUT (60 * HZ)
-
-/* Code borrowed from dm-lsi-rdac by Mike Christie */
-
-static inline void free_bio(struct bio *bio)
-{
- __free_page(bio->bi_io_vec[0].bv_page);
- bio_put(bio);
-}
-
-static void emc_endio(struct bio *bio, int error)
-{
- struct dm_path *path = bio->bi_private;
-
- /* We also need to look at the sense keys here whether or not to
- * switch to the next PG etc.
- *
- * For now simple logic: either it works or it doesn't.
- */
- if (error)
- dm_pg_init_complete(path, MP_FAIL_PATH);
- else
- dm_pg_init_complete(path, 0);
-
- /* request is freed in block layer */
- free_bio(bio);
-}
-
-static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size)
-{
- struct bio *bio;
- struct page *page;
-
- bio = bio_alloc(GFP_ATOMIC, 1);
- if (!bio) {
- DMERR("get_failover_bio: bio_alloc() failed.");
- return NULL;
- }
-
- bio->bi_rw |= (1 << BIO_RW);
- bio->bi_bdev = path->dev->bdev;
- bio->bi_sector = 0;
- bio->bi_private = path;
- bio->bi_end_io = emc_endio;
-
- page = alloc_page(GFP_ATOMIC);
- if (!page) {
- DMERR("get_failover_bio: alloc_page() failed.");
- bio_put(bio);
- return NULL;
- }
-
- if (bio_add_page(bio, page, data_size, 0) != data_size) {
- DMERR("get_failover_bio: bio_add_page() failed.");
- __free_page(page);
- bio_put(bio);
- return NULL;
- }
-
- return bio;
-}
-
-static struct request *get_failover_req(struct emc_handler *h,
- struct bio *bio, struct dm_path *path)
-{
- struct request *rq;
- struct block_device *bdev = bio->bi_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
-
- /* FIXME: Figure out why it fails with GFP_ATOMIC. */
- rq = blk_get_request(q, WRITE, __GFP_WAIT);
- if (!rq) {
- DMERR("get_failover_req: blk_get_request failed");
- return NULL;
- }
-
- blk_rq_append_bio(q, rq, bio);
-
- rq->sense = h->sense;
- memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
- rq->sense_len = 0;
-
- rq->timeout = EMC_FAILOVER_TIMEOUT;
- rq->cmd_type = REQ_TYPE_BLOCK_PC;
- rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
-
- return rq;
-}
-
-static struct request *emc_trespass_get(struct emc_handler *h,
- struct dm_path *path)
-{
- struct bio *bio;
- struct request *rq;
- unsigned char *page22;
- unsigned char long_trespass_pg[] = {
- 0, 0, 0, 0,
- TRESPASS_PAGE, /* Page code */
- 0x09, /* Page length - 2 */
- h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */
- 0xff, 0xff, /* Trespass target */
- 0, 0, 0, 0, 0, 0 /* Reserved bytes / unknown */
- };
- unsigned char short_trespass_pg[] = {
- 0, 0, 0, 0,
- TRESPASS_PAGE, /* Page code */
- 0x02, /* Page length - 2 */
- h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */
- 0xff, /* Trespass target */
- };
- unsigned data_size = h->short_trespass ? sizeof(short_trespass_pg) :
- sizeof(long_trespass_pg);
-
- /* get bio backing */
- if (data_size > PAGE_SIZE)
- /* this should never happen */
- return NULL;
-
- bio = get_failover_bio(path, data_size);
- if (!bio) {
- DMERR("emc_trespass_get: no bio");
- return NULL;
- }
-
- page22 = (unsigned char *)bio_data(bio);
- memset(page22, 0, data_size);
-
- memcpy(page22, h->short_trespass ?
- short_trespass_pg : long_trespass_pg, data_size);
-
- /* get request for block layer packet command */
- rq = get_failover_req(h, bio, path);
- if (!rq) {
- DMERR("emc_trespass_get: no rq");
- free_bio(bio);
- return NULL;
- }
-
- /* Prepare the command. */
- rq->cmd[0] = MODE_SELECT;
- rq->cmd[1] = 0x10;
- rq->cmd[4] = data_size;
- rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
-
- return rq;
-}
-
-static void emc_pg_init(struct hw_handler *hwh, unsigned bypassed,
- struct dm_path *path)
-{
- struct request *rq;
- struct request_queue *q = bdev_get_queue(path->dev->bdev);
-
- /*
- * We can either blindly init the pg (then look at the sense),
- * or we can send some commands to get the state here (then
- * possibly send the fo cmnd), or we can also have the
- * initial state passed into us and then get an update here.
- */
- if (!q) {
- DMINFO("emc_pg_init: no queue");
- goto fail_path;
- }
-
- /* FIXME: The request should be pre-allocated. */
- rq = emc_trespass_get(hwh->context, path);
- if (!rq) {
- DMERR("emc_pg_init: no rq");
- goto fail_path;
- }
-
- DMINFO("emc_pg_init: sending switch-over command");
- elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1);
- return;
-
-fail_path:
- dm_pg_init_complete(path, MP_FAIL_PATH);
-}
-
-static struct emc_handler *alloc_emc_handler(void)
-{
- struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL);
-
- if (h)
- spin_lock_init(&h->lock);
-
- return h;
-}
-
-static int emc_create(struct hw_handler *hwh, unsigned argc, char **argv)
-{
- struct emc_handler *h;
- unsigned hr, short_trespass;
-
- if (argc == 0) {
- /* No arguments: use defaults */
- hr = 0;
- short_trespass = 0;
- } else if (argc != 2) {
- DMWARN("incorrect number of arguments");
- return -EINVAL;
- } else {
- if ((sscanf(argv[0], "%u", &short_trespass) != 1)
- || (short_trespass > 1)) {
- DMWARN("invalid trespass mode selected");
- return -EINVAL;
- }
-
- if ((sscanf(argv[1], "%u", &hr) != 1)
- || (hr > 1)) {
- DMWARN("invalid honor reservation flag selected");
- return -EINVAL;
- }
- }
-
- h = alloc_emc_handler();
- if (!h)
- return -ENOMEM;
-
- hwh->context = h;
-
- if ((h->short_trespass = short_trespass))
- DMWARN("short trespass command will be send");
- else
- DMWARN("long trespass command will be send");
-
- if ((h->hr = hr))
- DMWARN("honor reservation bit will be set");
- else
- DMWARN("honor reservation bit will not be set (default)");
-
- return 0;
-}
-
-static void emc_destroy(struct hw_handler *hwh)
-{
- struct emc_handler *h = (struct emc_handler *) hwh->context;
-
- kfree(h);
- hwh->context = NULL;
-}
-
-static unsigned emc_error(struct hw_handler *hwh, struct bio *bio)
-{
- /* FIXME: Patch from axboe still missing */
-#if 0
- int sense;
-
- if (bio->bi_error & BIO_SENSE) {
- sense = bio->bi_error & 0xffffff; /* sense key / asc / ascq */
-
- if (sense == 0x020403) {
- /* LUN Not Ready - Manual Intervention Required
- * indicates this is a passive path.
- *
- * FIXME: However, if this is seen and EVPD C0
- * indicates that this is due to a NDU in
- * progress, we should set FAIL_PATH too.
- * This indicates we might have to do a SCSI
- * inquiry in the end_io path. Ugh. */
- return MP_BYPASS_PG | MP_RETRY_IO;
- } else if (sense == 0x052501) {
- /* An array based copy is in progress. Do not
- * fail the path, do not bypass to another PG,
- * do not retry. Fail the IO immediately.
- * (Actually this is the same conclusion as in
- * the default handler, but lets make sure.) */
- return 0;
- } else if (sense == 0x062900) {
- /* Unit Attention Code. This is the first IO
- * to the new path, so just retry. */
- return MP_RETRY_IO;
- }
- }
-#endif
-
- /* Try default handler */
- return dm_scsi_err_handler(hwh, bio);
-}
-
-static struct hw_handler_type emc_hwh = {
- .name = "emc",
- .module = THIS_MODULE,
- .create = emc_create,
- .destroy = emc_destroy,
- .pg_init = emc_pg_init,
- .error = emc_error,
-};
-
-static int __init dm_emc_init(void)
-{
- int r = dm_register_hw_handler(&emc_hwh);
-
- if (r < 0)
- DMERR("register failed %d", r);
-
- DMINFO("version 0.0.3 loaded");
-
- return r;
-}
-
-static void __exit dm_emc_exit(void)
-{
- int r = dm_unregister_hw_handler(&emc_hwh);
-
- if (r < 0)
- DMERR("unregister failed %d", r);
-}
-
-module_init(dm_emc_init);
-module_exit(dm_emc_exit);
-
-MODULE_DESCRIPTION(DM_NAME " EMC CX/AX/FC-family multipath");
-MODULE_AUTHOR("Lars Marowsky-Bree <lmb@suse.de>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c
deleted file mode 100644
index 2ee84d8aa0bf..000000000000
--- a/drivers/md/dm-hw-handler.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- *
- * Multipath hardware handler registration.
- */
-
-#include "dm.h"
-#include "dm-hw-handler.h"
-
-#include <linux/slab.h>
-
-struct hwh_internal {
- struct hw_handler_type hwht;
-
- struct list_head list;
- long use;
-};
-
-#define hwht_to_hwhi(__hwht) container_of((__hwht), struct hwh_internal, hwht)
-
-static LIST_HEAD(_hw_handlers);
-static DECLARE_RWSEM(_hwh_lock);
-
-static struct hwh_internal *__find_hw_handler_type(const char *name)
-{
- struct hwh_internal *hwhi;
-
- list_for_each_entry(hwhi, &_hw_handlers, list) {
- if (!strcmp(name, hwhi->hwht.name))
- return hwhi;
- }
-
- return NULL;
-}
-
-static struct hwh_internal *get_hw_handler(const char *name)
-{
- struct hwh_internal *hwhi;
-
- down_read(&_hwh_lock);
- hwhi = __find_hw_handler_type(name);
- if (hwhi) {
- if ((hwhi->use == 0) && !try_module_get(hwhi->hwht.module))
- hwhi = NULL;
- else
- hwhi->use++;
- }
- up_read(&_hwh_lock);
-
- return hwhi;
-}
-
-struct hw_handler_type *dm_get_hw_handler(const char *name)
-{
- struct hwh_internal *hwhi;
-
- if (!name)
- return NULL;
-
- hwhi = get_hw_handler(name);
- if (!hwhi) {
- request_module("dm-%s", name);
- hwhi = get_hw_handler(name);
- }
-
- return hwhi ? &hwhi->hwht : NULL;
-}
-
-void dm_put_hw_handler(struct hw_handler_type *hwht)
-{
- struct hwh_internal *hwhi;
-
- if (!hwht)
- return;
-
- down_read(&_hwh_lock);
- hwhi = __find_hw_handler_type(hwht->name);
- if (!hwhi)
- goto out;
-
- if (--hwhi->use == 0)
- module_put(hwhi->hwht.module);
-
- BUG_ON(hwhi->use < 0);
-
- out:
- up_read(&_hwh_lock);
-}
-
-static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht)
-{
- struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL);
-
- if (hwhi)
- hwhi->hwht = *hwht;
-
- return hwhi;
-}
-
-int dm_register_hw_handler(struct hw_handler_type *hwht)
-{
- int r = 0;
- struct hwh_internal *hwhi = _alloc_hw_handler(hwht);
-
- if (!hwhi)
- return -ENOMEM;
-
- down_write(&_hwh_lock);
-
- if (__find_hw_handler_type(hwht->name)) {
- kfree(hwhi);
- r = -EEXIST;
- } else
- list_add(&hwhi->list, &_hw_handlers);
-
- up_write(&_hwh_lock);
-
- return r;
-}
-
-int dm_unregister_hw_handler(struct hw_handler_type *hwht)
-{
- struct hwh_internal *hwhi;
-
- down_write(&_hwh_lock);
-
- hwhi = __find_hw_handler_type(hwht->name);
- if (!hwhi) {
- up_write(&_hwh_lock);
- return -EINVAL;
- }
-
- if (hwhi->use) {
- up_write(&_hwh_lock);
- return -ETXTBSY;
- }
-
- list_del(&hwhi->list);
-
- up_write(&_hwh_lock);
-
- kfree(hwhi);
-
- return 0;
-}
-
-unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio)
-{
-#if 0
- int sense_key, asc, ascq;
-
- if (bio->bi_error & BIO_SENSE) {
- /* FIXME: This is just an initial guess. */
- /* key / asc / ascq */
- sense_key = (bio->bi_error >> 16) & 0xff;
- asc = (bio->bi_error >> 8) & 0xff;
- ascq = bio->bi_error & 0xff;
-
- switch (sense_key) {
- /* This block as a whole comes from the device.
- * So no point retrying on another path. */
- case 0x03: /* Medium error */
- case 0x05: /* Illegal request */
- case 0x07: /* Data protect */
- case 0x08: /* Blank check */
- case 0x0a: /* copy aborted */
- case 0x0c: /* obsolete - no clue ;-) */
- case 0x0d: /* volume overflow */
- case 0x0e: /* data miscompare */
- case 0x0f: /* reserved - no idea either. */
- return MP_ERROR_IO;
-
- /* For these errors it's unclear whether they
- * come from the device or the controller.
- * So just lets try a different path, and if
- * it eventually succeeds, user-space will clear
- * the paths again... */
- case 0x02: /* Not ready */
- case 0x04: /* Hardware error */
- case 0x09: /* vendor specific */
- case 0x0b: /* Aborted command */
- return MP_FAIL_PATH;
-
- case 0x06: /* Unit attention - might want to decode */
- if (asc == 0x04 && ascq == 0x01)
- /* "Unit in the process of
- * becoming ready" */
- return 0;
- return MP_FAIL_PATH;
-
- /* FIXME: For Unit Not Ready we may want
- * to have a generic pg activation
- * feature (START_UNIT). */
-
- /* Should these two ever end up in the
- * error path? I don't think so. */
- case 0x00: /* No sense */
- case 0x01: /* Recovered error */
- return 0;
- }
- }
-#endif
-
- /* We got no idea how to decode the other kinds of errors ->
- * assume generic error condition. */
- return MP_FAIL_PATH;
-}
-
-EXPORT_SYMBOL_GPL(dm_register_hw_handler);
-EXPORT_SYMBOL_GPL(dm_unregister_hw_handler);
-EXPORT_SYMBOL_GPL(dm_scsi_err_handler);
diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h
deleted file mode 100644
index 46809dcb121a..000000000000
--- a/drivers/md/dm-hw-handler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- *
- * Multipath hardware handler registration.
- */
-
-#ifndef DM_HW_HANDLER_H
-#define DM_HW_HANDLER_H
-
-#include <linux/device-mapper.h>
-
-#include "dm-mpath.h"
-
-struct hw_handler_type;
-struct hw_handler {
- struct hw_handler_type *type;
- struct mapped_device *md;
- void *context;
-};
-
-/*
- * Constructs a hardware handler object, takes custom arguments
- */
-/* Information about a hardware handler type */
-struct hw_handler_type {
- char *name;
- struct module *module;
-
- int (*create) (struct hw_handler *handler, unsigned int argc,
- char **argv);
- void (*destroy) (struct hw_handler *hwh);
-
- void (*pg_init) (struct hw_handler *hwh, unsigned bypassed,
- struct dm_path *path);
- unsigned (*error) (struct hw_handler *hwh, struct bio *bio);
- int (*status) (struct hw_handler *hwh, status_type_t type,
- char *result, unsigned int maxlen);
-};
-
-/* Register a hardware handler */
-int dm_register_hw_handler(struct hw_handler_type *type);
-
-/* Unregister a hardware handler */
-int dm_unregister_hw_handler(struct hw_handler_type *type);
-
-/* Returns a registered hardware handler type */
-struct hw_handler_type *dm_get_hw_handler(const char *name);
-
-/* Releases a hardware handler */
-void dm_put_hw_handler(struct hw_handler_type *hwht);
-
-/* Default err function */
-unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio);
-
-/* Error flags for err and dm_pg_init_complete */
-#define MP_FAIL_PATH 1
-#define MP_BYPASS_PG 2
-#define MP_ERROR_IO 4 /* Don't retry this I/O */
-#define MP_RETRY 8
-
-#endif
diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c
deleted file mode 100644
index b63a0ab37c53..000000000000
--- a/drivers/md/dm-mpath-hp-sw.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (C) 2005 Mike Christie, All rights reserved.
- * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
- * Authors: Mike Christie
- * Dave Wysochanski
- *
- * This file is released under the GPL.
- *
- * This module implements the specific path activation code for
- * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive)
- * storage arrays.
- * These storage arrays have controller-based failover, not
- * LUN-based failover. However, LUN-based failover is the design
- * of dm-multipath. Thus, this module is written for LUN-based failover.
- */
-#include <linux/blkdev.h>
-#include <linux/list.h>
-#include <linux/types.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_dbg.h>
-
-#include "dm.h"
-#include "dm-hw-handler.h"
-
-#define DM_MSG_PREFIX "multipath hp-sw"
-#define DM_HP_HWH_NAME "hp-sw"
-#define DM_HP_HWH_VER "1.0.0"
-
-struct hp_sw_context {
- unsigned char sense[SCSI_SENSE_BUFFERSIZE];
-};
-
-/*
- * hp_sw_error_is_retryable - Is an HP-specific check condition retryable?
- * @req: path activation request
- *
- * Examine error codes of request and determine whether the error is retryable.
- * Some error codes are already retried by scsi-ml (see
- * scsi_decide_disposition), but some HP specific codes are not.
- * The intent of this routine is to supply the logic for the HP specific
- * check conditions.
- *
- * Returns:
- * 1 - command completed with retryable error
- * 0 - command completed with non-retryable error
- *
- * Possible optimizations
- * 1. More hardware-specific error codes
- */
-static int hp_sw_error_is_retryable(struct request *req)
-{
- /*
- * NOT_READY is known to be retryable
- * For now we just dump out the sense data and call it retryable
- */
- if (status_byte(req->errors) == CHECK_CONDITION)
- __scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len);
-
- /*
- * At this point we don't have complete information about all the error
- * codes from this hardware, so we are just conservative and retry
- * when in doubt.
- */
- return 1;
-}
-
-/*
- * hp_sw_end_io - Completion handler for HP path activation.
- * @req: path activation request
- * @error: scsi-ml error
- *
- * Check sense data, free request structure, and notify dm that
- * pg initialization has completed.
- *
- * Context: scsi-ml softirq
- *
- */
-static void hp_sw_end_io(struct request *req, int error)
-{
- struct dm_path *path = req->end_io_data;
- unsigned err_flags = 0;
-
- if (!error) {
- DMDEBUG("%s path activation command - success",
- path->dev->name);
- goto out;
- }
-
- if (hp_sw_error_is_retryable(req)) {
- DMDEBUG("%s path activation command - retry",
- path->dev->name);
- err_flags = MP_RETRY;
- goto out;
- }
-
- DMWARN("%s path activation fail - error=0x%x",
- path->dev->name, error);
- err_flags = MP_FAIL_PATH;
-
-out:
- req->end_io_data = NULL;
- __blk_put_request(req->q, req);
- dm_pg_init_complete(path, err_flags);
-}
-
-/*
- * hp_sw_get_request - Allocate an HP specific path activation request
- * @path: path on which request will be sent (needed for request queue)
- *
- * The START command is used for path activation request.
- * These arrays are controller-based failover, not LUN based.
- * One START command issued to a single path will fail over all
- * LUNs for the same controller.
- *
- * Possible optimizations
- * 1. Make timeout configurable
- * 2. Preallocate request
- */
-static struct request *hp_sw_get_request(struct dm_path *path)
-{
- struct request *req;
- struct block_device *bdev = path->dev->bdev;
- struct request_queue *q = bdev_get_queue(bdev);
- struct hp_sw_context *h = path->hwhcontext;
-
- req = blk_get_request(q, WRITE, GFP_NOIO);
- if (!req)
- goto out;
-
- req->timeout = 60 * HZ;
-
- req->errors = 0;
- req->cmd_type = REQ_TYPE_BLOCK_PC;
- req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
- req->end_io_data = path;
- req->sense = h->sense;
- memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
-
- req->cmd[0] = START_STOP;
- req->cmd[4] = 1;
- req->cmd_len = COMMAND_SIZE(req->cmd[0]);
-
-out:
- return req;
-}
-
-/*
- * hp_sw_pg_init - HP path activation implementation.
- * @hwh: hardware handler specific data
- * @bypassed: unused; is the path group bypassed? (see dm-mpath.c)
- * @path: path to send initialization command
- *
- * Send an HP-specific path activation command on 'path'.
- * Do not try to optimize in any way, just send the activation command.
- * More than one path activation command may be sent to the same controller.
- * This seems to work fine for basic failover support.
- *
- * Possible optimizations
- * 1. Detect an in-progress activation request and avoid submitting another one
- * 2. Model the controller and only send a single activation request at a time
- * 3. Determine the state of a path before sending an activation request
- *
- * Context: kmpathd (see process_queued_ios() in dm-mpath.c)
- */
-static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed,
- struct dm_path *path)
-{
- struct request *req;
- struct hp_sw_context *h;
-
- path->hwhcontext = hwh->context;
- h = hwh->context;
-
- req = hp_sw_get_request(path);
- if (!req) {
- DMERR("%s path activation command - allocation fail",
- path->dev->name);
- goto retry;
- }
-
- DMDEBUG("%s path activation command - sent", path->dev->name);
-
- blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io);
- return;
-
-retry:
- dm_pg_init_complete(path, MP_RETRY);
-}
-
-static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv)
-{
- struct hp_sw_context *h;
-
- h = kmalloc(sizeof(*h), GFP_KERNEL);
- if (!h)
- return -ENOMEM;
-
- hwh->context = h;
-
- return 0;
-}
-
-static void hp_sw_destroy(struct hw_handler *hwh)
-{
- struct hp_sw_context *h = hwh->context;
-
- kfree(h);
-}
-
-static struct hw_handler_type hp_sw_hwh = {
- .name = DM_HP_HWH_NAME,
- .module = THIS_MODULE,
- .create = hp_sw_create,
- .destroy = hp_sw_destroy,
- .pg_init = hp_sw_pg_init,
-};
-
-static int __init hp_sw_init(void)
-{
- int r;
-
- r = dm_register_hw_handler(&hp_sw_hwh);
- if (r < 0)
- DMERR("register failed %d", r);
- else
- DMINFO("version " DM_HP_HWH_VER " loaded");
-
- return r;
-}
-
-static void __exit hp_sw_exit(void)
-{
- int r;
-
- r = dm_unregister_hw_handler(&hp_sw_hwh);
- if (r < 0)
- DMERR("unregister failed %d", r);
-}
-
-module_init(hp_sw_init);
-module_exit(hp_sw_exit);
-
-MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support");
-MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(DM_HP_HWH_VER);
diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c
deleted file mode 100644
index 95e77734880a..000000000000
--- a/drivers/md/dm-mpath-rdac.c
+++ /dev/null
@@ -1,700 +0,0 @@
-/*
- * Engenio/LSI RDAC DM HW handler
- *
- * Copyright (C) 2005 Mike Christie. All rights reserved.
- * Copyright (C) Chandra Seetharaman, IBM Corp. 2007
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- */
-#include <scsi/scsi.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_eh.h>
-
-#define DM_MSG_PREFIX "multipath rdac"
-
-#include "dm.h"
-#include "dm-hw-handler.h"
-
-#define RDAC_DM_HWH_NAME "rdac"
-#define RDAC_DM_HWH_VER "0.4"
-
-/*
- * LSI mode page stuff
- *
- * These struct definitions and the forming of the
- * mode page were taken from the LSI RDAC 2.4 GPL'd
- * driver, and then converted to Linux conventions.
- */
-#define RDAC_QUIESCENCE_TIME 20;
-/*
- * Page Codes
- */
-#define RDAC_PAGE_CODE_REDUNDANT_CONTROLLER 0x2c
-
-/*
- * Controller modes definitions
- */
-#define RDAC_MODE_TRANSFER_ALL_LUNS 0x01
-#define RDAC_MODE_TRANSFER_SPECIFIED_LUNS 0x02
-
-/*
- * RDAC Options field
- */
-#define RDAC_FORCED_QUIESENCE 0x02
-
-#define RDAC_FAILOVER_TIMEOUT (60 * HZ)
-
-struct rdac_mode_6_hdr {
- u8 data_len;
- u8 medium_type;
- u8 device_params;
- u8 block_desc_len;
-};
-
-struct rdac_mode_10_hdr {
- u16 data_len;
- u8 medium_type;
- u8 device_params;
- u16 reserved;
- u16 block_desc_len;
-};
-
-struct rdac_mode_common {
- u8 controller_serial[16];
- u8 alt_controller_serial[16];
- u8 rdac_mode[2];
- u8 alt_rdac_mode[2];
- u8 quiescence_timeout;
- u8 rdac_options;
-};
-
-struct rdac_pg_legacy {
- struct rdac_mode_6_hdr hdr;
- u8 page_code;
- u8 page_len;
- struct rdac_mode_common common;
-#define MODE6_MAX_LUN 32
- u8 lun_table[MODE6_MAX_LUN];
- u8 reserved2[32];
- u8 reserved3;
- u8 reserved4;
-};
-
-struct rdac_pg_expanded {
- struct rdac_mode_10_hdr hdr;
- u8 page_code;
- u8 subpage_code;
- u8 page_len[2];
- struct rdac_mode_common common;
- u8 lun_table[256];
- u8 reserved3;
- u8 reserved4;
-};
-
-struct c9_inquiry {
- u8 peripheral_info;
- u8 page_code; /* 0xC9 */
- u8 reserved1;
- u8 page_len;
- u8 page_id[4]; /* "vace" */
- u8 avte_cvp;
- u8 path_prio;
- u8 reserved2[38];
-};
-
-#define SUBSYS_ID_LEN 16
-#define SLOT_ID_LEN 2
-
-struct c4_inquiry {
- u8 peripheral_info;
- u8 page_code; /* 0xC4 */
- u8 reserved1;
- u8 page_len;
- u8 page_id[4]; /* "subs" */
- u8 subsys_id[SUBSYS_ID_LEN];
- u8 revision[4];
- u8 slot_id[SLOT_ID_LEN];
- u8 reserved[2];
-};
-
-struct rdac_controller {
- u8 subsys_id[SUBSYS_ID_LEN];
- u8 slot_id[SLOT_ID_LEN];
- int use_10_ms;
- struct kref kref;
- struct list_head node; /* list of all controllers */
- spinlock_t lock;
- int submitted;
- struct list_head cmd_list; /* list of commands to be submitted */
- union {
- struct rdac_pg_legacy legacy;
- struct rdac_pg_expanded expanded;
- } mode_select;
-};
-struct c8_inquiry {
- u8 peripheral_info;
- u8 page_code; /* 0xC8 */
- u8 reserved1;
- u8 page_len;
- u8 page_id[4]; /* "edid" */
- u8 reserved2[3];
- u8 vol_uniq_id_len;
- u8 vol_uniq_id[16];
- u8 vol_user_label_len;
- u8 vol_user_label[60];
- u8 array_uniq_id_len;
- u8 array_unique_id[16];
- u8 array_user_label_len;
- u8 array_user_label[60];
- u8 lun[8];
-};
-
-struct c2_inquiry {
- u8 peripheral_info;
- u8 page_code; /* 0xC2 */
- u8 reserved1;
- u8 page_len;
- u8 page_id[4]; /* "swr4" */
- u8 sw_version[3];
- u8 sw_date[3];
- u8 features_enabled;
- u8 max_lun_supported;
- u8 partitions[239]; /* Total allocation length should be 0xFF */
-};
-
-struct rdac_handler {
- struct list_head entry; /* list waiting to submit MODE SELECT */
- unsigned timeout;
- struct rdac_controller *ctlr;
-#define UNINITIALIZED_LUN (1 << 8)
- unsigned lun;
- unsigned char sense[SCSI_SENSE_BUFFERSIZE];
- struct dm_path *path;
- struct work_struct work;
-#define SEND_C2_INQUIRY 1
-#define SEND_C4_INQUIRY 2
-#define SEND_C8_INQUIRY 3
-#define SEND_C9_INQUIRY 4
-#define SEND_MODE_SELECT 5
- int cmd_to_send;
- union {
- struct c2_inquiry c2;
- struct c4_inquiry c4;
- struct c8_inquiry c8;
- struct c9_inquiry c9;
- } inq;
-};
-
-static LIST_HEAD(ctlr_list);
-static DEFINE_SPINLOCK(list_lock);
-static struct workqueue_struct *rdac_wkqd;
-
-static inline int had_failures(struct request *req, int error)
-{
- return (error || host_byte(req->errors) != DID_OK ||
- msg_byte(req->errors) != COMMAND_COMPLETE);
-}
-
-static void rdac_resubmit_all(struct rdac_handler *h)
-{
- struct rdac_controller *ctlr = h->ctlr;
- struct rdac_handler *tmp, *h1;
-
- spin_lock(&ctlr->lock);
- list_for_each_entry_safe(h1, tmp, &ctlr->cmd_list, entry) {
- h1->cmd_to_send = SEND_C9_INQUIRY;
- queue_work(rdac_wkqd, &h1->work);
- list_del(&h1->entry);
- }
- ctlr->submitted = 0;
- spin_unlock(&ctlr->lock);
-}
-
-static void mode_select_endio(struct request *req, int error)
-{
- struct rdac_handler *h = req->end_io_data;
- struct scsi_sense_hdr sense_hdr;
- int sense = 0, fail = 0;
-
- if (had_failures(req, error)) {
- fail = 1;
- goto failed;
- }
-
- if (status_byte(req->errors) == CHECK_CONDITION) {
- scsi_normalize_sense(req->sense, SCSI_SENSE_BUFFERSIZE,
- &sense_hdr);
- sense = (sense_hdr.sense_key << 16) | (sense_hdr.asc << 8) |
- sense_hdr.ascq;
- /* If it is retryable failure, submit the c9 inquiry again */
- if (sense == 0x59136 || sense == 0x68b02 || sense == 0xb8b02 ||
- sense == 0x62900) {
- /* 0x59136 - Command lock contention
- * 0x[6b]8b02 - Quiesense in progress or achieved
- * 0x62900 - Power On, Reset, or Bus Device Reset
- */
- h->cmd_to_send = SEND_C9_INQUIRY;
- queue_work(rdac_wkqd, &h->work);
- goto done;
- }
- if (sense)
- DMINFO("MODE_SELECT failed on %s with sense 0x%x",
- h->path->dev->name, sense);
- }
-failed:
- if (fail || sense)
- dm_pg_init_complete(h->path, MP_FAIL_PATH);
- else
- dm_pg_init_complete(h->path, 0);
-
-done:
- rdac_resubmit_all(h);
- __blk_put_request(req->q, req);
-}
-
-static struct request *get_rdac_req(struct rdac_handler *h,
- void *buffer, unsigned buflen, int rw)
-{
- struct request *rq;
- struct request_queue *q = bdev_get_queue(h->path->dev->bdev);
-
- rq = blk_get_request(q, rw, GFP_KERNEL);
-
- if (!rq) {
- DMINFO("get_rdac_req: blk_get_request failed");
- return NULL;
- }
-
- if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_KERNEL)) {
- blk_put_request(rq);
- DMINFO("get_rdac_req: blk_rq_map_kern failed");
- return NULL;
- }
-
- rq->sense = h->sense;
- memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
- rq->sense_len = 0;
-
- rq->end_io_data = h;
- rq->timeout = h->timeout;
- rq->cmd_type = REQ_TYPE_BLOCK_PC;
- rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
- return rq;
-}
-
-static struct request *rdac_failover_get(struct rdac_handler *h)
-{
- struct request *rq;
- struct rdac_mode_common *common;
- unsigned data_size;
-
- if (h->ctlr->use_10_ms) {
- struct rdac_pg_expanded *rdac_pg;
-
- data_size = sizeof(struct rdac_pg_expanded);
- rdac_pg = &h->ctlr->mode_select.expanded;
- memset(rdac_pg, 0, data_size);
- common = &rdac_pg->common;
- rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40;
- rdac_pg->subpage_code = 0x1;
- rdac_pg->page_len[0] = 0x01;
- rdac_pg->page_len[1] = 0x28;
- rdac_pg->lun_table[h->lun] = 0x81;
- } else {
- struct rdac_pg_legacy *rdac_pg;
-
- data_size = sizeof(struct rdac_pg_legacy);
- rdac_pg = &h->ctlr->mode_select.legacy;
- memset(rdac_pg, 0, data_size);
- common = &rdac_pg->common;
- rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER;
- rdac_pg->page_len = 0x68;
- rdac_pg->lun_table[h->lun] = 0x81;
- }
- common->rdac_mode[1] = RDAC_MODE_TRANSFER_SPECIFIED_LUNS;
- common->quiescence_timeout = RDAC_QUIESCENCE_TIME;
- common->rdac_options = RDAC_FORCED_QUIESENCE;
-
- /* get request for block layer packet command */
- rq = get_rdac_req(h, &h->ctlr->mode_select, data_size, WRITE);
- if (!rq) {
- DMERR("rdac_failover_get: no rq");
- return NULL;
- }
-
- /* Prepare the command. */
- if (h->ctlr->use_10_ms) {
- rq->cmd[0] = MODE_SELECT_10;
- rq->cmd[7] = data_size >> 8;
- rq->cmd[8] = data_size & 0xff;
- } else {
- rq->cmd[0] = MODE_SELECT;
- rq->cmd[4] = data_size;
- }
- rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
-
- return rq;
-}
-
-/* Acquires h->ctlr->lock */
-static void submit_mode_select(struct rdac_handler *h)
-{
- struct request *rq;
- struct request_queue *q = bdev_get_queue(h->path->dev->bdev);
-
- spin_lock(&h->ctlr->lock);
- if (h->ctlr->submitted) {
- list_add(&h->entry, &h->ctlr->cmd_list);
- goto drop_lock;
- }
-
- if (!q) {
- DMINFO("submit_mode_select: no queue");
- goto fail_path;
- }
-
- rq = rdac_failover_get(h);
- if (!rq) {
- DMERR("submit_mode_select: no rq");
- goto fail_path;
- }
-
- DMINFO("queueing MODE_SELECT command on %s", h->path->dev->name);
-
- blk_execute_rq_nowait(q, NULL, rq, 1, mode_select_endio);
- h->ctlr->submitted = 1;
- goto drop_lock;
-fail_path:
- dm_pg_init_complete(h->path, MP_FAIL_PATH);
-drop_lock:
- spin_unlock(&h->ctlr->lock);
-}
-
-static void release_ctlr(struct kref *kref)
-{
- struct rdac_controller *ctlr;
- ctlr = container_of(kref, struct rdac_controller, kref);
-
- spin_lock(&list_lock);
- list_del(&ctlr->node);
- spin_unlock(&list_lock);
- kfree(ctlr);
-}
-
-static struct rdac_controller *get_controller(u8 *subsys_id, u8 *slot_id)
-{
- struct rdac_controller *ctlr, *tmp;
-
- spin_lock(&list_lock);
-
- list_for_each_entry(tmp, &ctlr_list, node) {
- if ((memcmp(tmp->subsys_id, subsys_id, SUBSYS_ID_LEN) == 0) &&
- (memcmp(tmp->slot_id, slot_id, SLOT_ID_LEN) == 0)) {
- kref_get(&tmp->kref);
- spin_unlock(&list_lock);
- return tmp;
- }
- }
- ctlr = kmalloc(sizeof(*ctlr), GFP_ATOMIC);
- if (!ctlr)
- goto done;
-
- /* initialize fields of controller */
- memcpy(ctlr->subsys_id, subsys_id, SUBSYS_ID_LEN);
- memcpy(ctlr->slot_id, slot_id, SLOT_ID_LEN);
- kref_init(&ctlr->kref);
- spin_lock_init(&ctlr->lock);
- ctlr->submitted = 0;
- ctlr->use_10_ms = -1;
- INIT_LIST_HEAD(&ctlr->cmd_list);
- list_add(&ctlr->node, &ctlr_list);
-done:
- spin_unlock(&list_lock);
- return ctlr;
-}
-
-static void c4_endio(struct request *req, int error)
-{
- struct rdac_handler *h = req->end_io_data;
- struct c4_inquiry *sp;
-
- if (had_failures(req, error)) {
- dm_pg_init_complete(h->path, MP_FAIL_PATH);
- goto done;
- }
-
- sp = &h->inq.c4;
-
- h->ctlr = get_controller(sp->subsys_id, sp->slot_id);
-
- if (h->ctlr) {
- h->cmd_to_send = SEND_C9_INQUIRY;
- queue_work(rdac_wkqd, &h->work);
- } else
- dm_pg_init_complete(h->path, MP_FAIL_PATH);
-done:
- __blk_put_request(req->q, req);
-}
-
-static void c2_endio(struct request *req, int error)
-{
- struct rdac_handler *h = req->end_io_data;
- struct c2_inquiry *sp;
-
- if (had_failures(req, error)) {
- dm_pg_init_complete(h->path, MP_FAIL_PATH);
- goto done;
- }
-
- sp = &h->inq.c2;
-
- /* If more than MODE6_MAX_LUN luns are supported, use mode select 10 */
- if (sp->max_lun_supported >= MODE6_MAX_LUN)
- h->ctlr->use_10_ms = 1;
- else
- h->ctlr->use_10_ms = 0;
-
- h->cmd_to_send = SEND_MODE_SELECT;
- queue_work(rdac_wkqd, &h->work);
-done:
- __blk_put_request(req->q, req);
-}
-
-static void c9_endio(struct request *req, int error)
-{
- struct rdac_handler *h = req->end_io_data;
- struct c9_inquiry *sp;
-
- if (had_failures(req, error)) {
- dm_pg_init_complete(h->path, MP_FAIL_PATH);
- goto done;
- }
-
- /* We need to look at the sense keys here to take clear action.
- * For now simple logic: If the host is in AVT mode or if controller
- * owns the lun, return dm_pg_init_complete(), otherwise submit
- * MODE SELECT.
- */
- sp = &h->inq.c9;
-
- /* If in AVT mode, return success */
- if ((sp->avte_cvp >> 7) == 0x1) {
- dm_pg_init_complete(h->path, 0);
- goto done;
- }
-
- /* If the controller on this path owns the LUN, return success */
- if (sp->avte_cvp & 0x1) {
- dm_pg_init_complete(h->path, 0);
- goto done;
- }
-
- if (h->ctlr) {
- if (h->ctlr->use_10_ms == -1)
- h->cmd_to_send = SEND_C2_INQUIRY;
- else
- h->cmd_to_send = SEND_MODE_SELECT;
- } else
- h->cmd_to_send = SEND_C4_INQUIRY;
- queue_work(rdac_wkqd, &h->work);
-done:
- __blk_put_request(req->q, req);
-}
-
-static void c8_endio(struct request *req, int error)
-{
- struct rdac_handler *h = req->end_io_data;
- struct c8_inquiry *sp;
-
- if (had_failures(req, error)) {
- dm_pg_init_complete(h->path, MP_FAIL_PATH);
- goto done;
- }
-
- /* We need to look at the sense keys here to take clear action.
- * For now simple logic: Get the lun from the inquiry page.
- */
- sp = &h->inq.c8;
- h->lun = sp->lun[7]; /* currently it uses only one byte */
- h->cmd_to_send = SEND_C9_INQUIRY;
- queue_work(rdac_wkqd, &h->work);
-done:
- __blk_put_request(req->q, req);
-}
-
-static void submit_inquiry(struct rdac_handler *h, int page_code,
- unsigned int len, rq_end_io_fn endio)
-{
- struct request *rq;
- struct request_queue *q = bdev_get_queue(h->path->dev->bdev);
-
- if (!q)
- goto fail_path;
-
- rq = get_rdac_req(h, &h->inq, len, READ);
- if (!rq)
- goto fail_path;
-
- /* Prepare the command. */
- rq->cmd[0] = INQUIRY;
- rq->cmd[1] = 1;
- rq->cmd[2] = page_code;
- rq->cmd[4] = len;
- rq->cmd_len = COMMAND_SIZE(INQUIRY);
- blk_execute_rq_nowait(q, NULL, rq, 1, endio);
- return;
-
-fail_path:
- dm_pg_init_complete(h->path, MP_FAIL_PATH);
-}
-
-static void service_wkq(struct work_struct *work)
-{
- struct rdac_handler *h = container_of(work, struct rdac_handler, work);
-
- switch (h->cmd_to_send) {
- case SEND_C2_INQUIRY:
- submit_inquiry(h, 0xC2, sizeof(struct c2_inquiry), c2_endio);
- break;
- case SEND_C4_INQUIRY:
- submit_inquiry(h, 0xC4, sizeof(struct c4_inquiry), c4_endio);
- break;
- case SEND_C8_INQUIRY:
- submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio);
- break;
- case SEND_C9_INQUIRY:
- submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio);
- break;
- case SEND_MODE_SELECT:
- submit_mode_select(h);
- break;
- default:
- BUG();
- }
-}
-/*
- * only support subpage2c until we confirm that this is just a matter of
- * of updating firmware or not, and RDAC (basic AVT works already) for now
- * but we can add these in in when we get time and testers
- */
-static int rdac_create(struct hw_handler *hwh, unsigned argc, char **argv)
-{
- struct rdac_handler *h;
- unsigned timeout;
-
- if (argc == 0) {
- /* No arguments: use defaults */
- timeout = RDAC_FAILOVER_TIMEOUT;
- } else if (argc != 1) {
- DMWARN("incorrect number of arguments");
- return -EINVAL;
- } else {
- if (sscanf(argv[1], "%u", &timeout) != 1) {
- DMWARN("invalid timeout value");
- return -EINVAL;
- }
- }
-
- h = kzalloc(sizeof(*h), GFP_KERNEL);
- if (!h)
- return -ENOMEM;
-
- hwh->context = h;
- h->timeout = timeout;
- h->lun = UNINITIALIZED_LUN;
- INIT_WORK(&h->work, service_wkq);
- DMWARN("using RDAC command with timeout %u", h->timeout);
-
- return 0;
-}
-
-static void rdac_destroy(struct hw_handler *hwh)
-{
- struct rdac_handler *h = hwh->context;
-
- if (h->ctlr)
- kref_put(&h->ctlr->kref, release_ctlr);
- kfree(h);
- hwh->context = NULL;
-}
-
-static unsigned rdac_error(struct hw_handler *hwh, struct bio *bio)
-{
- /* Try default handler */
- return dm_scsi_err_handler(hwh, bio);
-}
-
-static void rdac_pg_init(struct hw_handler *hwh, unsigned bypassed,
- struct dm_path *path)
-{
- struct rdac_handler *h = hwh->context;
-
- h->path = path;
- switch (h->lun) {
- case UNINITIALIZED_LUN:
- submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio);
- break;
- default:
- submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio);
- }
-}
-
-static struct hw_handler_type rdac_handler = {
- .name = RDAC_DM_HWH_NAME,
- .module = THIS_MODULE,
- .create = rdac_create,
- .destroy = rdac_destroy,
- .pg_init = rdac_pg_init,
- .error = rdac_error,
-};
-
-static int __init rdac_init(void)
-{
- int r;
-
- rdac_wkqd = create_singlethread_workqueue("rdac_wkqd");
- if (!rdac_wkqd) {
- DMERR("Failed to create workqueue rdac_wkqd.");
- return -ENOMEM;
- }
-
- r = dm_register_hw_handler(&rdac_handler);
- if (r < 0) {
- DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
- destroy_workqueue(rdac_wkqd);
- return r;
- }
-
- DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER);
- return 0;
-}
-
-static void __exit rdac_exit(void)
-{
- int r = dm_unregister_hw_handler(&rdac_handler);
-
- destroy_workqueue(rdac_wkqd);
- if (r < 0)
- DMERR("%s: unregister failed %d", RDAC_DM_HWH_NAME, r);
-}
-
-module_init(rdac_init);
-module_exit(rdac_exit);
-
-MODULE_DESCRIPTION("DM Multipath LSI/Engenio RDAC support");
-MODULE_AUTHOR("Mike Christie, Chandra Seetharaman");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(RDAC_DM_HWH_VER);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index e7ee59e655d5..9f7302d4878d 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -7,7 +7,6 @@
#include "dm.h"
#include "dm-path-selector.h"
-#include "dm-hw-handler.h"
#include "dm-bio-list.h"
#include "dm-bio-record.h"
#include "dm-uevent.h"
@@ -20,6 +19,7 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/workqueue.h>
+#include <scsi/scsi_dh.h>
#include <asm/atomic.h>
#define DM_MSG_PREFIX "multipath"
@@ -61,7 +61,8 @@ struct multipath {
spinlock_t lock;
- struct hw_handler hw_handler;
+ const char *hw_handler_name;
+ struct work_struct activate_path;
unsigned nr_priority_groups;
struct list_head priority_groups;
unsigned pg_init_required; /* pg_init needs calling? */
@@ -106,9 +107,10 @@ typedef int (*action_fn) (struct pgpath *pgpath);
static struct kmem_cache *_mpio_cache;
-static struct workqueue_struct *kmultipathd;
+static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
static void process_queued_ios(struct work_struct *work);
static void trigger_event(struct work_struct *work);
+static void activate_path(struct work_struct *work);
/*-----------------------------------------------
@@ -178,6 +180,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
INIT_WORK(&m->trigger_event, trigger_event);
+ INIT_WORK(&m->activate_path, activate_path);
m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
if (!m->mpio_pool) {
kfree(m);
@@ -193,18 +196,13 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
static void free_multipath(struct multipath *m)
{
struct priority_group *pg, *tmp;
- struct hw_handler *hwh = &m->hw_handler;
list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
list_del(&pg->list);
free_priority_group(pg, m->ti);
}
- if (hwh->type) {
- hwh->type->destroy(hwh);
- dm_put_hw_handler(hwh->type);
- }
-
+ kfree(m->hw_handler_name);
mempool_destroy(m->mpio_pool);
kfree(m);
}
@@ -216,12 +214,10 @@ static void free_multipath(struct multipath *m)
static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
{
- struct hw_handler *hwh = &m->hw_handler;
-
m->current_pg = pgpath->pg;
/* Must we initialise the PG first, and queue I/O till it's ready? */
- if (hwh->type && hwh->type->pg_init) {
+ if (m->hw_handler_name) {
m->pg_init_required = 1;
m->queue_io = 1;
} else {
@@ -409,7 +405,6 @@ static void process_queued_ios(struct work_struct *work)
{
struct multipath *m =
container_of(work, struct multipath, process_queued_ios);
- struct hw_handler *hwh = &m->hw_handler;
struct pgpath *pgpath = NULL;
unsigned init_required = 0, must_queue = 1;
unsigned long flags;
@@ -439,7 +434,7 @@ out:
spin_unlock_irqrestore(&m->lock, flags);
if (init_required)
- hwh->type->pg_init(hwh, pgpath->pg->bypassed, &pgpath->path);
+ queue_work(kmpath_handlerd, &m->activate_path);
if (!must_queue)
dispatch_queued_ios(m);
@@ -652,8 +647,6 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
static int parse_hw_handler(struct arg_set *as, struct multipath *m)
{
- int r;
- struct hw_handler_type *hwht;
unsigned hw_argc;
struct dm_target *ti = m->ti;
@@ -661,30 +654,20 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
{0, 1024, "invalid number of hardware handler args"},
};
- r = read_param(_params, shift(as), &hw_argc, &ti->error);
- if (r)
+ if (read_param(_params, shift(as), &hw_argc, &ti->error))
return -EINVAL;
if (!hw_argc)
return 0;
- hwht = dm_get_hw_handler(shift(as));
- if (!hwht) {
+ m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
+ request_module("scsi_dh_%s", m->hw_handler_name);
+ if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
ti->error = "unknown hardware handler type";
+ kfree(m->hw_handler_name);
+ m->hw_handler_name = NULL;
return -EINVAL;
}
-
- m->hw_handler.md = dm_table_get_md(ti->table);
- dm_put(m->hw_handler.md);
-
- r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv);
- if (r) {
- dm_put_hw_handler(hwht);
- ti->error = "hardware handler constructor failed";
- return r;
- }
-
- m->hw_handler.type = hwht;
consume(as, hw_argc - 1);
return 0;
@@ -808,6 +791,7 @@ static void multipath_dtr(struct dm_target *ti)
{
struct multipath *m = (struct multipath *) ti->private;
+ flush_workqueue(kmpath_handlerd);
flush_workqueue(kmultipathd);
free_multipath(m);
}
@@ -1025,52 +1009,85 @@ static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
return limit_reached;
}
-/*
- * pg_init must call this when it has completed its initialisation
- */
-void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
+static void pg_init_done(struct dm_path *path, int errors)
{
struct pgpath *pgpath = path_to_pgpath(path);
struct priority_group *pg = pgpath->pg;
struct multipath *m = pg->m;
unsigned long flags;
- /*
- * If requested, retry pg_init until maximum number of retries exceeded.
- * If retry not requested and PG already bypassed, always fail the path.
- */
- if (err_flags & MP_RETRY) {
- if (pg_init_limit_reached(m, pgpath))
- err_flags |= MP_FAIL_PATH;
- } else if (err_flags && pg->bypassed)
- err_flags |= MP_FAIL_PATH;
-
- if (err_flags & MP_FAIL_PATH)
+ /* device or driver problems */
+ switch (errors) {
+ case SCSI_DH_OK:
+ break;
+ case SCSI_DH_NOSYS:
+ if (!m->hw_handler_name) {
+ errors = 0;
+ break;
+ }
+ DMERR("Cannot failover device because scsi_dh_%s was not "
+ "loaded.", m->hw_handler_name);
+ /*
+ * Fail path for now, so we do not ping pong
+ */
fail_path(pgpath);
-
- if (err_flags & MP_BYPASS_PG)
+ break;
+ case SCSI_DH_DEV_TEMP_BUSY:
+ /*
+ * Probably doing something like FW upgrade on the
+ * controller so try the other pg.
+ */
bypass_pg(m, pg, 1);
+ break;
+ /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
+ case SCSI_DH_RETRY:
+ case SCSI_DH_IMM_RETRY:
+ case SCSI_DH_RES_TEMP_UNAVAIL:
+ if (pg_init_limit_reached(m, pgpath))
+ fail_path(pgpath);
+ errors = 0;
+ break;
+ default:
+ /*
+ * We probably do not want to fail the path for a device
+ * error, but this is what the old dm did. In future
+ * patches we can do more advanced handling.
+ */
+ fail_path(pgpath);
+ }
spin_lock_irqsave(&m->lock, flags);
- if (err_flags & ~MP_RETRY) {
+ if (errors) {
+ DMERR("Could not failover device. Error %d.", errors);
m->current_pgpath = NULL;
m->current_pg = NULL;
- } else if (!m->pg_init_required)
+ } else if (!m->pg_init_required) {
m->queue_io = 0;
+ pg->bypassed = 0;
+ }
m->pg_init_in_progress = 0;
queue_work(kmultipathd, &m->process_queued_ios);
spin_unlock_irqrestore(&m->lock, flags);
}
+static void activate_path(struct work_struct *work)
+{
+ int ret;
+ struct multipath *m =
+ container_of(work, struct multipath, activate_path);
+ struct dm_path *path = &m->current_pgpath->path;
+
+ ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
+ pg_init_done(path, ret);
+}
+
/*
* end_io handling
*/
static int do_end_io(struct multipath *m, struct bio *bio,
int error, struct dm_mpath_io *mpio)
{
- struct hw_handler *hwh = &m->hw_handler;
- unsigned err_flags = MP_FAIL_PATH; /* Default behavior */
unsigned long flags;
if (!error)
@@ -1097,19 +1114,8 @@ static int do_end_io(struct multipath *m, struct bio *bio,
}
spin_unlock_irqrestore(&m->lock, flags);
- if (hwh->type && hwh->type->error)
- err_flags = hwh->type->error(hwh, bio);
-
- if (mpio->pgpath) {
- if (err_flags & MP_FAIL_PATH)
- fail_path(mpio->pgpath);
-
- if (err_flags & MP_BYPASS_PG)
- bypass_pg(m, mpio->pgpath->pg, 1);
- }
-
- if (err_flags & MP_ERROR_IO)
- return -EIO;
+ if (mpio->pgpath)
+ fail_path(mpio->pgpath);
requeue:
dm_bio_restore(&mpio->details, bio);
@@ -1194,7 +1200,6 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
int sz = 0;
unsigned long flags;
struct multipath *m = (struct multipath *) ti->private;
- struct hw_handler *hwh = &m->hw_handler;
struct priority_group *pg;
struct pgpath *p;
unsigned pg_num;
@@ -1214,12 +1219,10 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("pg_init_retries %u ", m->pg_init_retries);
}
- if (hwh->type && hwh->type->status)
- sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);
- else if (!hwh->type || type == STATUSTYPE_INFO)
+ if (!m->hw_handler_name || type == STATUSTYPE_INFO)
DMEMIT("0 ");
else
- DMEMIT("1 %s ", hwh->type->name);
+ DMEMIT("1 %s ", m->hw_handler_name);
DMEMIT("%u ", m->nr_priority_groups);
@@ -1422,6 +1425,21 @@ static int __init dm_multipath_init(void)
return -ENOMEM;
}
+ /*
+ * A separate workqueue is used to handle the device handlers
+ * to avoid overloading existing workqueue. Overloading the
+ * old workqueue would also create a bottleneck in the
+ * path of the storage hardware device activation.
+ */
+ kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
+ if (!kmpath_handlerd) {
+ DMERR("failed to create workqueue kmpath_handlerd");
+ destroy_workqueue(kmultipathd);
+ dm_unregister_target(&multipath_target);
+ kmem_cache_destroy(_mpio_cache);
+ return -ENOMEM;
+ }
+
DMINFO("version %u.%u.%u loaded",
multipath_target.version[0], multipath_target.version[1],
multipath_target.version[2]);
@@ -1433,6 +1451,7 @@ static void __exit dm_multipath_exit(void)
{
int r;
+ destroy_workqueue(kmpath_handlerd);
destroy_workqueue(kmultipathd);
r = dm_unregister_target(&multipath_target);
@@ -1441,8 +1460,6 @@ static void __exit dm_multipath_exit(void)
kmem_cache_destroy(_mpio_cache);
}
-EXPORT_SYMBOL_GPL(dm_pg_init_complete);
-
module_init(dm_multipath_init);
module_exit(dm_multipath_exit);
diff --git a/drivers/md/dm-mpath.h b/drivers/md/dm-mpath.h
index b9cdcbb3ed59..c198b856a452 100644
--- a/drivers/md/dm-mpath.h
+++ b/drivers/md/dm-mpath.h
@@ -16,7 +16,6 @@ struct dm_path {
unsigned is_active; /* Read-only */
void *pscontext; /* For path-selector use */
- void *hwhcontext; /* For hw-handler use */
};
/* Callback for hwh_pg_init_fn to use when complete */
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 10748240cb2f..6a866d7c8ae5 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -50,17 +50,19 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
/**
* linear_mergeable_bvec -- tell bio layer if two requests can be merged
* @q: request queue
- * @bio: the buffer head that's been built up so far
+ * @bvm: properties of new bio
* @biovec: the request that could be merged to it.
*
* Return amount of bytes we can take at this offset
*/
-static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
+static int linear_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
dev_info_t *dev0;
- unsigned long maxsectors, bio_sectors = bio->bi_size >> 9;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
dev0 = which_dev(mddev, sector);
maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1));
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 83eb78b00137..2580ac1b9b0f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock);
static void md_print_devices(void);
+static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
+
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
/*
@@ -274,6 +276,7 @@ static mddev_t * mddev_find(dev_t unit)
atomic_set(&new->active, 1);
spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
+ init_waitqueue_head(&new->recovery_wait);
new->reshape_position = MaxSector;
new->resync_max = MaxSector;
new->level = LEVEL_NONE;
@@ -3013,6 +3016,36 @@ degraded_show(mddev_t *mddev, char *page)
static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
static ssize_t
+sync_force_parallel_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%d\n", mddev->parallel_resync);
+}
+
+static ssize_t
+sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ long n;
+
+ if (strict_strtol(buf, 10, &n))
+ return -EINVAL;
+
+ if (n != 0 && n != 1)
+ return -EINVAL;
+
+ mddev->parallel_resync = n;
+
+ if (mddev->sync_thread)
+ wake_up(&resync_wait);
+
+ return len;
+}
+
+/* force parallel resync, even with shared block devices */
+static struct md_sysfs_entry md_sync_force_parallel =
+__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
+ sync_force_parallel_show, sync_force_parallel_store);
+
+static ssize_t
sync_speed_show(mddev_t *mddev, char *page)
{
unsigned long resync, dt, db;
@@ -3187,6 +3220,7 @@ static struct attribute *md_redundancy_attrs[] = {
&md_sync_min.attr,
&md_sync_max.attr,
&md_sync_speed.attr,
+ &md_sync_force_parallel.attr,
&md_sync_completed.attr,
&md_max_sync.attr,
&md_suspend_lo.attr,
@@ -3691,6 +3725,8 @@ static int do_md_stop(mddev_t * mddev, int mode)
module_put(mddev->pers->owner);
mddev->pers = NULL;
+ /* tell userspace to handle 'inactive' */
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
set_capacity(disk, 0);
mddev->changed = 1;
@@ -3861,8 +3897,10 @@ static void autorun_devices(int part)
md_probe(dev, NULL, NULL);
mddev = mddev_find(dev);
- if (!mddev) {
- printk(KERN_ERR
+ if (!mddev || !mddev->gendisk) {
+ if (mddev)
+ mddev_put(mddev);
+ printk(KERN_ERR
"md: cannot allocate memory for md drive.\n");
break;
}
@@ -3987,8 +4025,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg)
if (!buf)
goto out;
- ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname));
- if (!ptr)
+ ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
+ if (IS_ERR(ptr))
goto out;
strcpy(file->pathname, ptr);
@@ -5399,7 +5437,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
atomic_sub(blocks, &mddev->recovery_active);
wake_up(&mddev->recovery_wait);
if (!ok) {
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_wakeup_thread(mddev->thread);
// stop recovery, signal do_sync ....
}
@@ -5435,8 +5473,11 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
md_wakeup_thread(mddev->thread);
}
spin_unlock_irq(&mddev->write_lock);
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
}
- wait_event(mddev->sb_wait, mddev->flags==0);
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
}
void md_write_end(mddev_t *mddev)
@@ -5471,13 +5512,17 @@ void md_allow_write(mddev_t *mddev)
mddev->safemode = 1;
spin_unlock_irq(&mddev->write_lock);
md_update_sb(mddev, 0);
+
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
+ /* wait for the dirty state to be recorded in the metadata */
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
} else
spin_unlock_irq(&mddev->write_lock);
}
EXPORT_SYMBOL_GPL(md_allow_write);
-static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
-
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
void md_do_sync(mddev_t *mddev)
@@ -5541,8 +5586,9 @@ void md_do_sync(mddev_t *mddev)
for_each_mddev(mddev2, tmp) {
if (mddev2 == mddev)
continue;
- if (mddev2->curr_resync &&
- match_mddev_units(mddev,mddev2)) {
+ if (!mddev->parallel_resync
+ && mddev2->curr_resync
+ && match_mddev_units(mddev, mddev2)) {
DEFINE_WAIT(wq);
if (mddev < mddev2 && mddev->curr_resync == 2) {
/* arbitrarily yield */
@@ -5622,7 +5668,6 @@ void md_do_sync(mddev_t *mddev)
window/2,(unsigned long long) max_sectors/2);
atomic_set(&mddev->recovery_active, 0);
- init_waitqueue_head(&mddev->recovery_wait);
last_check = 0;
if (j>2) {
@@ -5647,7 +5692,7 @@ void md_do_sync(mddev_t *mddev)
sectors = mddev->pers->sync_request(mddev, j, &skipped,
currspeed < speed_min(mddev));
if (sectors == 0) {
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto out;
}
@@ -5670,8 +5715,7 @@ void md_do_sync(mddev_t *mddev)
last_check = io_sectors;
- if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
- test_bit(MD_RECOVERY_ERR, &mddev->recovery))
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
break;
repeat:
@@ -5725,8 +5769,7 @@ void md_do_sync(mddev_t *mddev)
/* tell personality that we are finished */
mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
- if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
- !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
+ if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
mddev->curr_resync > 2) {
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5795,7 +5838,10 @@ static int remove_and_add_spares(mddev_t *mddev)
}
if (mddev->degraded) {
- rdev_for_each(rdev, rtmp, mddev)
+ rdev_for_each(rdev, rtmp, mddev) {
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(In_sync, &rdev->flags))
+ spares++;
if (rdev->raid_disk < 0
&& !test_bit(Faulty, &rdev->flags)) {
rdev->recovery_offset = 0;
@@ -5813,6 +5859,7 @@ static int remove_and_add_spares(mddev_t *mddev)
} else
break;
}
+ }
}
return spares;
}
@@ -5826,7 +5873,7 @@ static int remove_and_add_spares(mddev_t *mddev)
* to do that as needed.
* When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in
* "->recovery" and create a thread at ->sync_thread.
- * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR)
+ * When the thread finishes it sets MD_RECOVERY_DONE
* and wakeups up this thread which will reap the thread and finish up.
* This thread also removes any faulty devices (with nr_pending == 0).
*
@@ -5901,8 +5948,7 @@ void md_check_recovery(mddev_t *mddev)
/* resync has finished, collect result */
md_unregister_thread(mddev->sync_thread);
mddev->sync_thread = NULL;
- if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
- !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+ if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* success...*/
/* activate any spares */
mddev->pers->spare_active(mddev);
@@ -5926,7 +5972,6 @@ void md_check_recovery(mddev_t *mddev)
* might be left set
*/
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 4f4d1f383842..e968116e0de9 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -327,7 +327,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
if (rdev) {
if (test_bit(In_sync, &rdev->flags) ||
atomic_read(&rdev->nr_pending)) {
- printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number);
+ printk(KERN_ERR "hot-remove-disk, slot %d is identified"
+ " but is still operational!\n", number);
err = -EBUSY;
goto abort;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 914c04ddec7c..bcbb82594a19 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -241,18 +241,20 @@ static int create_strip_zones (mddev_t *mddev)
/**
* raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
* @q: request queue
- * @bio: the buffer head that's been built up so far
+ * @bvm: properties of new bio
* @biovec: the request that could be merged to it.
*
* Return amount of bytes we can accept at this offset
*/
-static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
+static int raid0_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
+ unsigned int bio_sectors = bvm->bi_size >> 9;
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if (max < 0) max = 0; /* bio_add cannot handle a negative return */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ac409b7d83f5..c610b947218a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -773,7 +773,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
r1bio_t *r1_bio;
struct bio *read_bio;
int i, targets = 0, disks;
- struct bitmap *bitmap = mddev->bitmap;
+ struct bitmap *bitmap;
unsigned long flags;
struct bio_list bl;
struct page **behind_pages = NULL;
@@ -802,6 +802,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
wait_barrier(conf);
+ bitmap = mddev->bitmap;
+
disk_stat_inc(mddev->gendisk, ios[rw]);
disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
@@ -1025,7 +1027,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
/*
* if recovery is running, make sure it aborts.
*/
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
} else
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1146,6 +1148,14 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
err = -EBUSY;
goto abort;
}
+ /* Only remove non-faulty devices is recovery
+ * is not possible.
+ */
+ if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->degraded < conf->raid_disks) {
+ err = -EBUSY;
+ goto abort;
+ }
p->rdev = NULL;
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
@@ -1282,6 +1292,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
} else {
/* fixup the bio for reuse */
+ int size;
sbio->bi_vcnt = vcnt;
sbio->bi_size = r1_bio->sectors << 9;
sbio->bi_idx = 0;
@@ -1295,10 +1306,20 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
sbio->bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
- for (j = 0; j < vcnt ; j++)
- memcpy(page_address(sbio->bi_io_vec[j].bv_page),
+ size = sbio->bi_size;
+ for (j = 0; j < vcnt ; j++) {
+ struct bio_vec *bi;
+ bi = &sbio->bi_io_vec[j];
+ bi->bv_offset = 0;
+ if (size > PAGE_SIZE)
+ bi->bv_len = PAGE_SIZE;
+ else
+ bi->bv_len = size;
+ size -= PAGE_SIZE;
+ memcpy(page_address(bi->bv_page),
page_address(pbio->bi_io_vec[j].bv_page),
PAGE_SIZE);
+ }
}
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8536ede1e712..22bb2b1b886d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -439,26 +439,27 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev)
/**
* raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
* @q: request queue
- * @bio: the buffer head that's been built up so far
+ * @bvm: properties of new bio
* @biovec: the request that could be merged to it.
*
* Return amount of bytes we can accept at this offset
* If near_copies == raid_disk, there are no striping issues,
* but in that case, the function isn't called at all.
*/
-static int raid10_mergeable_bvec(struct request_queue *q, struct bio *bio,
- struct bio_vec *bio_vec)
+static int raid10_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
+ unsigned int bio_sectors = bvm->bi_size >> 9;
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if (max < 0) max = 0; /* bio_add cannot handle a negative return */
- if (max <= bio_vec->bv_len && bio_sectors == 0)
- return bio_vec->bv_len;
+ if (max <= biovec->bv_len && bio_sectors == 0)
+ return biovec->bv_len;
else
return max;
}
@@ -1020,7 +1021,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
/*
* if recovery is running, make sure it aborts.
*/
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1171,6 +1172,14 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
err = -EBUSY;
goto abort;
}
+ /* Only remove faulty devices in recovery
+ * is not possible.
+ */
+ if (!test_bit(Faulty, &rdev->flags) &&
+ enough(conf)) {
+ err = -EBUSY;
+ goto abort;
+ }
p->rdev = NULL;
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
@@ -1237,6 +1246,7 @@ static void end_sync_write(struct bio *bio, int error)
if (!uptodate)
md_error(mddev, conf->mirrors[d].rdev);
+
update_head_pos(i, r10_bio);
while (atomic_dec_and_test(&r10_bio->remaining)) {
@@ -1844,7 +1854,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
if (rb2)
atomic_dec(&rb2->remaining);
r10_bio = rb2;
- if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery))
+ if (!test_and_set_bit(MD_RECOVERY_INTR,
+ &mddev->recovery))
printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",
mdname(mddev));
break;
@@ -2127,6 +2138,8 @@ static int run(mddev_t *mddev)
!test_bit(In_sync, &disk->rdev->flags)) {
disk->head_position = 0;
mddev->degraded++;
+ if (disk->rdev)
+ conf->fullsync = 1;
}
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 93fde48c0f42..9ce7154845c6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -94,6 +94,8 @@
#define __inline__
#endif
+#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
+
#if !RAID6_USE_EMPTY_ZERO_PAGE
/* In .bss so it's zeroed */
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
@@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error)
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
rdev = conf->disks[i].rdev;
- printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",
- mdname(conf->mddev), STRIPE_SECTORS,
- (unsigned long long)(sh->sector + rdev->data_offset),
- bdevname(rdev->bdev, b));
+ printk_rl(KERN_INFO "raid5:%s: read error corrected"
+ " (%lu sectors at %llu on %s)\n",
+ mdname(conf->mddev), STRIPE_SECTORS,
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdevname(rdev->bdev, b));
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
}
@@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error)
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
if (conf->mddev->degraded)
- printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector + rdev->data_offset),
- bdn);
+ printk_rl(KERN_WARNING
+ "raid5:%s: read error not correctable "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
/* Oh, no!!! */
- printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector + rdev->data_offset),
- bdn);
+ printk_rl(KERN_WARNING
+ "raid5:%s: read error NOT corrected!! "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
printk(KERN_WARNING
@@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
/*
* if recovery was running, make sure it aborts.
*/
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
set_bit(Faulty, &rdev->flags);
printk (KERN_ALERT
@@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
* have quiesced.
*/
if ((s->uptodate == disks - 1) &&
+ (s->failed && disk_idx == s->failed_num) &&
!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
set_bit(R5_Wantcompute, &dev->flags);
@@ -2006,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
*/
s->uptodate++;
return 0; /* uptodate + compute == disks */
- } else if ((s->uptodate < disks - 1) &&
- test_bit(R5_Insync, &dev->flags)) {
- /* Note: we hold off compute operations while checks are
- * in flight, but we still prefer 'compute' over 'read'
- * hence we only read if (uptodate < * disks-1)
- */
+ } else if (test_bit(R5_Insync, &dev->flags)) {
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
@@ -2077,7 +2083,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh,
/* we would like to get this block, possibly
* by computing it, but we might not be able to
*/
- if (s->uptodate == disks-1) {
+ if ((s->uptodate == disks - 1) &&
+ (s->failed && (i == r6s->failed_num[0] ||
+ i == r6s->failed_num[1]))) {
pr_debug("Computing stripe %llu block %d\n",
(unsigned long long)sh->sector, i);
compute_block_1(sh, i, 0);
@@ -2635,6 +2643,7 @@ static void handle_stripe5(struct stripe_head *sh)
struct r5dev *dev;
unsigned long pending = 0;
mdk_rdev_t *blocked_rdev = NULL;
+ int prexor;
memset(&s, 0, sizeof(s));
pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2764,9 +2773,11 @@ static void handle_stripe5(struct stripe_head *sh)
/* leave prexor set until postxor is done, allows us to distinguish
* a rmw from a rcw during biodrain
*/
+ prexor = 0;
if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+ prexor = 1;
clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
@@ -2800,6 +2811,8 @@ static void handle_stripe5(struct stripe_head *sh)
if (!test_and_set_bit(
STRIPE_OP_IO, &sh->ops.pending))
sh->ops.count++;
+ if (prexor)
+ continue;
if (!test_bit(R5_Insync, &dev->flags) ||
(i == sh->pd_idx && s.failed == 0))
set_bit(STRIPE_INSYNC, &sh->state);
@@ -2880,6 +2893,8 @@ static void handle_stripe5(struct stripe_head *sh)
for (i = conf->raid_disks; i--; ) {
set_bit(R5_Wantwrite, &sh->dev[i].flags);
+ set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
sh->ops.count++;
}
@@ -2893,6 +2908,7 @@ static void handle_stripe5(struct stripe_head *sh)
conf->raid_disks);
s.locked += handle_write_operations5(sh, 1, 1);
} else if (s.expanded &&
+ s.locked == 0 &&
!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
@@ -3298,15 +3314,17 @@ static int raid5_congested(void *data, int bits)
/* We want read requests to align with chunks where possible,
* but write requests don't need to.
*/
-static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
+static int raid5_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
+ unsigned int bio_sectors = bvm->bi_size >> 9;
- if (bio_data_dir(bio) == WRITE)
+ if ((bvm->bi_rw & 1) == WRITE)
return biovec->bv_len; /* always allow writes to be mergeable */
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
@@ -4287,7 +4305,9 @@ static int run(mddev_t *mddev)
" disk %d\n", bdevname(rdev->bdev,b),
raid_disk);
working_disks++;
- }
+ } else
+ /* Cannot rely on bitmap to complete recovery */
+ conf->fullsync = 1;
}
/*
@@ -4564,6 +4584,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
err = -EBUSY;
goto abort;
}
+ /* Only remove non-faulty devices if recovery
+ * isn't possible.
+ */
+ if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->degraded <= conf->max_degraded) {
+ err = -EBUSY;
+ goto abort;
+ }
p->rdev = NULL;
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {