diff options
author | Neil Brown <neilb@suse.de> | 2002-04-04 03:58:32 +0200 |
---|---|---|
committer | Neil Brown <neilb@suse.de> | 2002-04-04 03:58:32 +0200 |
commit | e0d1903663dac9307a37646c26abf7991b0a9593 (patch) | |
tree | 882780adec3f7fa11826a34156426414db22d982 | |
parent | mdadm-0.7.2 (diff) | |
download | mdadm-e0d1903663dac9307a37646c26abf7991b0a9593.tar.xz mdadm-e0d1903663dac9307a37646c26abf7991b0a9593.zip |
mdadm-0.8mdadm-0.8
-rw-r--r-- | Assemble.c | 9 | ||||
-rw-r--r-- | Build.c | 2 | ||||
-rw-r--r-- | ChangeLog | 22 | ||||
-rw-r--r-- | Create.c | 2 | ||||
-rw-r--r-- | Detail.c | 15 | ||||
-rw-r--r-- | Examine.c | 12 | ||||
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | Manage.c | 3 | ||||
-rw-r--r-- | Monitor.c | 300 | ||||
-rw-r--r-- | Query.c | 149 | ||||
-rw-r--r-- | ReadMe.c | 258 | ||||
-rw-r--r-- | TODO | 15 | ||||
-rw-r--r-- | config.c | 73 | ||||
-rw-r--r-- | dlink.h | 2 | ||||
-rw-r--r-- | md.4 | 130 | ||||
-rw-r--r-- | md.man | 137 | ||||
-rw-r--r-- | mdadm.8 | 360 | ||||
-rw-r--r-- | mdadm.c | 338 | ||||
-rw-r--r-- | mdadm.conf-example | 17 | ||||
-rw-r--r-- | mdadm.conf.5 | 52 | ||||
-rw-r--r-- | mdadm.conf.man | 75 | ||||
-rw-r--r-- | mdadm.h | 73 | ||||
-rw-r--r-- | mdadm.man | 423 | ||||
-rw-r--r-- | mdadm.spec | 7 | ||||
-rw-r--r-- | mdstat.c | 180 | ||||
-rw-r--r-- | util.c | 71 |
26 files changed, 1886 insertions, 843 deletions
@@ -115,7 +115,7 @@ int Assemble(char *mddev, int mdfd, vers = md_get_version(mdfd); if (vers <= 0) { - fprintf(stderr, Name ": %s appears not to be an md device.\n"); + fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev); return 1; } if (vers < 9000) { @@ -405,8 +405,8 @@ This doesnt work yet if (devices[j].oldmajor != super.disks[i].major || devices[j].oldminor != super.disks[i].minor) { change |= 2; - super.disks[i].major = devices[i].oldmajor; - super.disks[i].minor = devices[i].oldminor; + super.disks[i].major = devices[j].oldmajor; + super.disks[i].minor = devices[j].oldminor; } if (devices[j].uptodate && (super.disks[i].state != desired_state)) { @@ -491,7 +491,7 @@ This doesnt work yet if (runstop == 1 || (runstop == 0 && ( first_super.raid_disks == okcnt - || start_partial_ok && enough(first_super.level, first_super.raid_disks, okcnt)) + || (start_partial_ok && enough(first_super.level, first_super.raid_disks, okcnt))) )) { if (ioctl(mdfd, RUN_ARRAY, NULL)==0) { fprintf(stderr, Name ": %s has been started with %d drive%s", @@ -527,4 +527,5 @@ This doesnt work yet } } + return 0; } @@ -130,7 +130,7 @@ int Build(char *mddev, int mdfd, int chunk, int level, } } else { if (ioctl(mdfd, REGISTER_DEV, &stb.st_rdev)) { - fprintf(stderr, Name ": REGISTER_DEV failed for %s.\n", + fprintf(stderr, Name ": REGISTER_DEV failed for %s: %s.\n", dv->devname, strerror(errno)); goto abort; } @@ -1,4 +1,26 @@ Changes Prior to this release + - Fix another bug in Assemble.c due to confusing 'i' with 'j' + - Minimal, untested, support for multipath + - re-write of argument parsing to have more coherent modes, + - add --query,-Q option + - Update mdadm.8 to reflect arg processing change and --query + - Change "long" to "unsigned long" for device sizes + - Handle "mailaddr" and "program" lines in config file for follow/scan mode. + - --follow --scan will exit if no program or mail found + - Add MAILADDR and PROGRAM to mdadm.conf-example + - Spell check man pages + - consistently use "component devices" instead of "subdevices" + - Make -Wall -Werror really work and fix lots of errors. + - --detail and --stop can have --scan which chooses devices from /proc/mdstat + - --monitor detects 20% changes in resync, failed spares, + disappearing arrays, + - --monitor --scan will automatically add any devices found in /proc/mdstat + - --monitor will move spares between arrays with same spare-group if necessary + - Documentation for Monitor Mode + - --query notes if the array containing the given device is active or not + - Finished md.4 man page. + +Changes Prior to 0.7.2 release - mdadm.spec updates and ifdef BLKGETSIZE64 from Luca Berra -- bluca@comedia.it - more mdadm.spec updates from Gregory Leblanc <gleblanc@linuxweasel.com> - make directory for mdadm.conf configurable in Makefile @@ -194,7 +194,7 @@ int Create(char *mddev, int mdfd, fprintf(stderr, Name ": size set to %dK\n", size); } if (level >= 1 && ((maxsize-size)*100 > maxsize)) { - fprintf(stderr, Name ": largest drive (%s) exceed size (%dK) by more than 1%\n", + fprintf(stderr, Name ": largest drive (%s) exceed size (%dK) by more than 1%%\n", maxdisc, size); warn = 1; } @@ -81,8 +81,8 @@ int Detail(char *dev, int brief) if (brief) printf("ARRAY %s level=%s disks=%d", dev, c?c:"-unknown-",array.raid_disks ); else { - long array_size; - long long larray_size; + unsigned long array_size; + unsigned long long larray_size; #ifdef BLKGETSIZE64 if (ioctl(fd, BLKGETSIZE64, &larray_size)==0) ; @@ -137,15 +137,20 @@ int Detail(char *dev, int brief) printf("\n"); printf(" Number Major Minor RaidDisk State\n"); } - for (d= 0; d<array.raid_disks+array.spare_disks; d++) { + for (d= 0; d<MD_SB_DISKS; d++) { mdu_disk_info_t disk; char *dv; disk.number = d; if (ioctl(fd, GET_DISK_INFO, &disk) < 0) { - fprintf(stderr, Name ": cannot get disk detail for disk %d: %s\n", - d, strerror(errno)); + if (d < array.raid_disks) + fprintf(stderr, Name ": cannot get disk detail for disk %d: %s\n", + d, strerror(errno)); continue; } + if (d >= array.raid_disks && + disk.major == 0 && + disk.minor == 0) + continue; if (!brief) { printf(" %5d %5d %5d %5d ", disk.number, disk.major, disk.minor, disk.raid_disk); @@ -35,7 +35,7 @@ #endif #include "md_u.h" #include "md_p.h" -int Examine(mddev_dev_t devlist, int brief, char *conffile) +int Examine(mddev_dev_t devlist, int brief, int scan) { /* Read the raid superblock from a device and @@ -60,7 +60,6 @@ int Examine(mddev_dev_t devlist, int brief, char *conffile) char *c; int rv = 0; int err; - int scan= 0; struct array { mdp_super_t super; @@ -68,15 +67,6 @@ int Examine(mddev_dev_t devlist, int brief, char *conffile) struct array *next; } *arrays = NULL; - if (devlist == NULL) { - devlist = conf_get_devs(conffile); - scan=1; - } - if (devlist == NULL) { - fprintf(stderr, Name ": No devices listed in %s\n", conffile); - return 1; - } - for (; devlist ; devlist=devlist->next) { fd = open(devlist->devname, O_RDONLY); if (fd < 0) { @@ -30,7 +30,7 @@ CC = gcc SYSCONFDIR = /etc CONFFILE = $(SYSCONFDIR)/mdadm.conf -CFLAGS = -Wall,error,strict-prototypes -ggdb -DCONFFILE=\"$(CONFFILE)\" +CFLAGS = -Wall -Werror -Wstrict-prototypes -ggdb -DCONFFILE=\"$(CONFFILE)\" # If you want a static binary, you might uncomment these # LDFLAGS = -static @@ -41,7 +41,7 @@ DESTDIR = /. BINDIR = /sbin MANDIR = /usr/share/man/man8 -OBJS = mdadm.o config.o ReadMe.o util.o Manage.o Assemble.o Build.o Create.o Detail.o Examine.o Monitor.o dlink.o Kill.o +OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o Create.o Detail.o Examine.o Monitor.o dlink.o Kill.o Query.o all : mdadm mdadm.man md.man mdadm.conf.man @@ -77,7 +77,6 @@ int Manage_runstop(char *devname, int fd, int runstop) /* Run or stop the array. array must already be configured * required >= 0.90.0 */ - mdu_array_info_t array; mdu_param_t param; /* unused */ if (runstop == -1 && md_get_version(fd) < 9000) { @@ -132,7 +131,7 @@ int Manage_subdevs(char *devname, int fd, struct stat stb; int i,j; int save_errno; - static buf[4096]; + static char buf[4096]; if (ioctl(fd, GET_ARRAY_INFO, &array)) { fprintf(stderr, Name ": cannot get array info for %s\n", @@ -30,13 +30,22 @@ #include "mdadm.h" #include "md_p.h" #include "md_u.h" +#include <sys/wait.h> #include <sys/signal.h> static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd); +static char *percentalerts[] = { + "RebuildStarted", + "Rebuild20", + "Rebuild40", + "Rebuild60", + "Rebuild80", +}; + int Monitor(mddev_dev_t devlist, char *mailaddr, char *alert_cmd, - int period, + int period, int scan, char *config) { /* @@ -48,13 +57,27 @@ int Monitor(mddev_dev_t devlist, * Update time * active/working/failed/spare drives * State of each device. + * %rebuilt if rebuilding * * If the update time changes, check out all the data again * It is possible that we cannot get the state of each device * due to bugs in the md kernel module. + * We also read /proc/mdstat to get rebuild percent, + * and to get state on all active devices incase of kernel bug. * - * if active_drives decreases, generate a "Fail" event - * if active_drives increases, generate a "SpareActive" event + * Events are: + * Fail + * An active device had Faulty set or Active/Sync removed + * FailSpare + * A spare device had Faulty set + * SpareActive + * An active device had a reverse transition + * RebuildStarted + * percent went from -1 to +ve + * Rebuild20 Rebuild40 Rebuild60 Rebuild80 + * percent went from below to not-below that number + * DeviceDisappeared + * Couldn't access a device which was previously visible * * if we detect an array with active<raid and spare==0 * we look at other arrays that have same spare-group @@ -62,100 +85,178 @@ int Monitor(mddev_dev_t devlist, * and if we can get_disk_info and find a name * Then we hot-remove and hot-add to the other array * + * If devlist is NULL, then we can monitor everything because --scan + * was given. We get an initial list from config file and add anything + * that appears in /proc/mdstat */ struct state { char *devname; + int devnum; /* to sync with mdstat info */ long utime; int err; - int active, working, failed, spare; + char *spare_group; + int active, working, failed, spare, raid; int devstate[MD_SB_DISKS]; + int devid[MD_SB_DISKS]; + int percent; struct state *next; } *statelist = NULL; int finished = 0; - while (! finished) { - mddev_ident_t mdlist = NULL; + struct mdstat_ent *mdstat = NULL; + + if (!mailaddr && scan) + mailaddr = conf_get_mailaddr(config); + if (!alert_cmd && scan) + alert_cmd = conf_get_program(config); + if (scan && !mailaddr && !alert_cmd) + return 0; + + if (devlist == NULL) { + mddev_ident_t mdlist = conf_get_ident(config, NULL); + for (; mdlist; mdlist=mdlist->next) { + struct state *st = malloc(sizeof *st); + if (st == NULL) + continue; + st->devname = strdup(mdlist->devname); + st->utime = 0; + st->next = statelist; + st->err = 1; + st->devnum = -1; + st->percent = -2; + if (mdlist->spare_group) + st->spare_group = strdup(mdlist->spare_group); + else + st->spare_group = NULL; + statelist = st; + } + } else { mddev_dev_t dv; - int dnum=0; - if (devlist== NULL) - mdlist = conf_get_ident(config, NULL); - dv = devlist; - while (dv || mdlist) { - mddev_ident_t mdident; - struct state *st; + for (dv=devlist ; dv; dv=dv->next) { + struct state *st = malloc(sizeof *st); + if (st == NULL) + continue; + st->devname = strdup(dv->devname); + st->utime = 0; + st->next = statelist; + st->err = 1; + st->devnum = -1; + st->percent = -2; + st->spare_group = NULL; + statelist = st; + } + } + + + while (! finished) { + struct state *st; + + if (mdstat) + free_mdstat(mdstat); + mdstat = mdstat_read(); + + for (st=statelist; st; st=st->next) { mdu_array_info_t array; - char *dev; + struct mdstat_ent *mse; + char *dev = st->devname; int fd; - char *event = NULL; int i; - char *event_disc = NULL; - if (dv) { - dev = dv->devname; - mdident = conf_get_ident(config, dev); - dv = dv->next; - } else { - mdident = mdlist; - dev = mdident->devname; - mdlist = mdlist->next; - } - for (st=statelist; st ; st=st->next) - if (strcmp(st->devname, dev)==0) - break; - if (!st) { - st =malloc(sizeof *st); - if (st == NULL) - continue; - st->devname = strdup(dev); - st->utime = 0; - st->next = statelist; - st->err = 0; - statelist = st; - } + fd = open(dev, O_RDONLY); if (fd < 0) { if (!st->err) - fprintf(stderr, Name ": cannot open %s: %s\n", + alert("DeviceDisappeared", dev, NULL, + mailaddr, alert_cmd); +/* fprintf(stderr, Name ": cannot open %s: %s\n", dev, strerror(errno)); - st->err=1; +*/ st->err=1; continue; } if (ioctl(fd, GET_ARRAY_INFO, &array)<0) { if (!st->err) - fprintf(stderr, Name ": cannot get array info for %s: %s\n", + alert("DeviceDisappeared", dev, NULL, + mailaddr, alert_cmd); +/* fprintf(stderr, Name ": cannot get array info for %s: %s\n", dev, strerror(errno)); - st->err=1; +*/ st->err=1; close(fd); continue; } - st->err = 0; - + if (st->devnum < 0) { + struct stat stb; + if (fstat(fd, &stb) == 0 && + (S_IFMT&stb.st_mode)==S_IFBLK) + st->devnum = MINOR(stb.st_rdev); + } + + for (mse = mdstat ; mse ; mse=mse->next) + if (mse->devnum == st->devnum) { + mse->devnum = -1; /* flag it as "used" */ + break; + } + if (st->utime == array.utime && - st->failed == array.failed_disks) { + st->failed == array.failed_disks && + st->working == array.working_disks && + st->spare == array.spare_disks && + (mse == NULL || ( + mse->percent == st->percent + ))) { close(fd); + st->err = 0; continue; } - event = NULL; - if (st->utime) { - int i; - if (st->active > array.active_disks) - event = "Fail"; - else if (st->working > array.working_disks) - event = "FailSpare"; - else if (st->active < array.active_disks) - event = "ActiveSpare"; - } - for (i=0; i<array.raid_disks+array.spare_disks; i++) { + if (mse && + st->percent == -1 && + mse->percent >= 0) + alert("RebuildStarted", dev, NULL, mailaddr, alert_cmd); + if (mse && + st->percent >= 0 && + mse->percent >= 0 && + (mse->percent / 20) > (st->percent / 20)) + alert(percentalerts[mse->percent/20], + dev, NULL, mailaddr, alert_cmd); + + if (mse) + st->percent = mse->percent; + + for (i=0; i<MD_SB_DISKS; i++) { mdu_disk_info_t disc; + int newstate=0; + int change; + char *dv = NULL; disc.number = i; if (ioctl(fd, GET_DISK_INFO, &disc)>= 0) { - if (event && event_disc == NULL && - st->devstate[i] != disc.state) { - char * dv = map_dev(disc.major, disc.minor); - if (dv) - event_disc = strdup(dv); + newstate = disc.state; + dv = map_dev(disc.major, disc.minor); + } else if (mse && i < strlen(mse->pattern)) + switch(mse->pattern[i]) { + case 'U': newstate = 6 /* ACTIVE/SYNC */; break; + case '_': newstate = 0; break; } - st->devstate[i] = disc.state; + change = newstate ^ st->devstate[i]; + if (st->utime && change && !st->err) { + if (i < array.raid_disks && + (((newstate&change)&(1<<MD_DISK_FAULTY)) || + ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) || + ((st->devstate[i]&change)&(1<<MD_DISK_SYNC))) + ) + alert("Fail", dev, dv, mailaddr, alert_cmd); + else if (i>=array.raid_disks && + (disc.major || disc.minor) && + st->devid[i] == MKDEV(disc.major, disc.minor) && + ((newstate&change)&(1<<MD_DISK_FAULTY)) + ) + alert("FailSpare", dev, dv, mailaddr, alert_cmd); + else if (i < array.raid_disks && + (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) || + ((newstate&change)&(1<<MD_DISK_ACTIVE)) || + ((newstate&change)&(1<<MD_DISK_SYNC))) + ) + alert("SpareActive", dev, dv, mailaddr, alert_cmd); } + st->devstate[i] = disc.state; + st->devid[i] = MKDEV(disc.major, disc.minor); } close(fd); st->active = array.active_disks; @@ -163,9 +264,78 @@ int Monitor(mddev_dev_t devlist, st->spare = array.spare_disks; st->failed = array.failed_disks; st->utime = array.utime; - if (event) - alert(event, dev, event_disc, mailaddr, alert_cmd); + st->raid = array.raid_disks; + st->err = 0; } + /* now check if there are any new devices found in mdstat */ + if (scan) { + struct mdstat_ent *mse; + for (mse=mdstat; mse; mse=mse->next) + if (mse->devnum > 0) { + struct state *st = malloc(sizeof *st); + if (st == NULL) + continue; + st->devname = strdup(get_md_name(mse->devnum)); + st->utime = 0; + st->next = statelist; + st->err = 1; + st->devnum = mse->devnum; + st->percent = -2; + st->spare_group = NULL; + statelist = st; + alert("NewArray", st->devname, NULL, mailaddr, alert_cmd); + } + } + /* If an array has active < raid && spare == 0 && spare_group != NULL + * Look for another array with spare > 0 and active == raid and same spare_group + * if found, choose a device and hotremove/hotadd + */ + for (st = statelist; st; st=st->next) + if (st->active < st->raid && + st->spare == 0 && + st->spare_group != NULL) { + struct state *st2; + for (st2=statelist ; st2 ; st2=st2->next) + if (st2 != st && + st2->spare > 0 && + st2->active == st2->raid && + st2->spare_group != NULL && + strcmp(st->spare_group, st2->spare_group) == 0) { + /* try to remove and add */ + int fd1 = open(st->devname, O_RDONLY); + int fd2 = open(st2->devname, O_RDONLY); + int dev = -1; + int d; + if (fd1 < 0 || fd2 < 0) { + if (fd1>=0) close(fd1); + if (fd2>=0) close(fd2); + continue; + } + for (d=st2->raid; d<MD_SB_DISKS; d++) { + if (st2->devid[d] > 0 && + st2->devstate[d] == 0) { + dev = st2->devid[d]; + break; + } + } + if (dev > 0) { + if (ioctl(fd2, HOT_REMOVE_DISK, + (unsigned long)dev) == 0) { + if (ioctl(fd1, HOT_ADD_DISK, + (unsigned long)dev) == 0) { + alert("MoveSpare", st->devname, st2->devname, mailaddr, alert_cmd); + close(fd1); + close(fd2); + break; + } + else ioctl(fd2, HOT_ADD_DISK, (unsigned long) dev); + } + } + close(fd1); + close(fd2); + } + } + sleep(period); } return 0; @@ -177,7 +347,7 @@ static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd) if (!cmd && !mailaddr) { time_t now = time(0); - printf("%0.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device"); + printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device"); } if (cmd) { int pid = fork(); diff --git a/Query.c b/Query.c new file mode 100644 index 00000000..c65d83a0 --- /dev/null +++ b/Query.c @@ -0,0 +1,149 @@ +/* + * mdadm - manage Linux "md" devices aka RAID arrays. + * + * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Neil Brown + * Email: <neilb@cse.unsw.edu.au> + * Paper: Neil Brown + * School of Computer Science and Engineering + * The University of New South Wales + * Sydney, 2052 + * Australia + */ + +#include "mdadm.h" +#include "md_p.h" +#include "md_u.h" + +int Query(char *dev) +{ + /* Give a brief description of the device, + * whether it is an md device and whether it has + * a superblock + */ + int fd = open(dev, O_RDONLY, 0); + int vers; + int ioctlerr; + int superror, superrno; + mdp_super_t super; + mdu_array_info_t array; + unsigned long long larray_size; + unsigned long array_size; + struct stat stb; + char *mddev; + mdu_disk_info_t disc; + char *activity; + + if (fd < 0){ + fprintf(stderr, Name ": cannot open %s: %s\n", + dev, strerror(errno)); + return 1; + } + + vers = md_get_version(fd); + if (ioctl(fd, GET_ARRAY_INFO, &array)<0) + ioctlerr = errno; + else ioctlerr = 0; + superror = load_super(fd, &super); + superrno = errno; + + fstat(fd, &stb); + + if (vers>=9000 && !ioctlerr) { +#ifdef BLKGETSIZE64 + if (ioctl(fd, BLKGETSIZE64, &larray_size)==0) + ; + else +#endif + if (ioctl(fd, BLKGETSIZE, &array_size)==0) + larray_size = array_size<<9; + else larray_size = 0; + } + close(fd); + + if (vers < 0) + printf("%s: is not an md array\n", dev); + else if (vers < 9000) + printf("%s: is an md device, but kernel cannot provide details\n", dev); + else if (ioctlerr == ENODEV) + printf("%s: is an md device which is not active\n", dev); + else if (ioctlerr) + printf("%s: is an md device, but gives \"%s\" when queried\n", + dev, strerror(ioctlerr)); + else { + printf("%s: %s %s %d devices, %d spare%s. Use mdadm --detail for more detail.\n", + dev, + human_size_brief(larray_size), + map_num(pers, array.level), + array.raid_disks, + array.spare_disks, array.spare_disks==1?"":"s"); + } + switch(superror) { + case 1: + printf("%s: cannot find device size: %s\n", + dev, strerror(superrno)); + break; + case 2: + printf("%s: is too small to be an md componenet.\n", + dev); + break; + case 3: + printf("%s: Cannot seek to superblock: %s\n", + dev, strerror(superrno)); + break; + case 4: + printf("%s: Cannot read md superblock.\n", + dev); + break; + case 5: + printf("%s: No md super block found, not an md component.\n", + dev); + break; + case 6: + printf("%s: md superblock present with wrong version: %d\n", + dev, super.major_version); + break; + default: + /* array might be active... */ + mddev = get_md_name(super.md_minor); + disc.number = super.this_disk.number; + activity = "inactive"; + if (mddev && (fd = open(mddev, O_RDONLY))>=0) { + if (md_get_version(fd) >= 9000 && + ioctl(fd, GET_ARRAY_INFO, &array)>= 0) { + if (ioctl(fd, GET_DISK_INFO, &disc) >= 0 && + MKDEV(disc.major,disc.minor) == stb.st_rdev) + activity = "active"; + else + activity = "mismatch"; + } + close(fd); + } + printf("%s: device %d in %d device %s %s md%d. Use mdadm --examine for more detail.\n", + dev, + super.this_disk.number, super.raid_disks, + activity, + map_num(pers, super.level), + super.md_minor); + break; + } + return 0; +} + + @@ -29,7 +29,7 @@ #include "mdadm.h" -char Version[] = Name " - v0.7.2 - 21 March 2002\n"; +char Version[] = Name " - v0.8 - 4 April 2002\n"; /* * File: ReadMe.c * @@ -58,7 +58,7 @@ char Version[] = Name " - v0.7.2 - 21 March 2002\n"; */ /* - * mdadm has 4 major modes of operation: + * mdadm has 6 major modes of operation: * 1/ Create * This mode is used to create a new array with a superbock * It can progress in several step create-add-add-run @@ -72,15 +72,24 @@ char Version[] = Name " - v0.7.2 - 21 March 2002\n"; * 3/ Build * This is for building legacy arrays without superblocks * 4/ Manage - * This is for odd bits an pieces like hotadd, hotremove, setfaulty, - * stop, readonly,readwrite - * If an array is only partially setup by the Create/Assemble/Build - * command, subsequent Manage commands can finish the job. + * This is for doing something to one or more devices + * in an array, such as add,remove,fail. + * run/stop/readonly/readwrite are also available + * 5/ Misc + * This is for doing things to individual devices. + * They might be parts of an array so + * zero-superblock, examine might be appropriate + * They might be md arrays so + * run,stop,rw,ro,detail might be appropriate + * Also query will treat it as either + * 6/ Monitor + * This mode never exits but just monitors arrays and reports changes. */ -char short_options[]="-ABCDEFhVvbc:l:p:m:n:x:u:c:d:z:sarfRSow"; +char short_options[]="-ABCDEFGQhVvbc:l:p:m:n:x:u:c:d:z:sarfRSow"; struct option long_options[] = { {"manage", 0, 0, '@'}, + {"misc", 0, 0, '#'}, {"assemble", 0, 0, 'A'}, {"build", 0, 0, 'B'}, {"create", 0, 0, 'C'}, @@ -88,7 +97,8 @@ struct option long_options[] = { {"examine", 0, 0, 'E'}, {"follow", 0, 0, 'F'}, {"grow", 0, 0, 'G'}, /* not yet implemented */ - {"zero-superblock", 0, 0, 'H'}, + {"zero-superblock", 0, 0, 'K'}, /* deliberately no a short_option */ + {"query", 0, 0, 'Q'}, /* synonyms */ {"monitor", 0, 0, 'F'}, @@ -146,31 +156,38 @@ char Help[] = "Usage: mdadm --create device options...\n" " mdadm --assemble device options...\n" " mdadm --build device options...\n" -" mdadm --detail device\n" -" mdadm --examine device\n" -" mdadm --follow options...\n" +" mdadm --manage device options...\n" +" mdadm --misc options... devices\n" +" mdadm --monitor options...\n" " mdadm device options...\n" -" mdadm is used for controlling Linux md devices (aka RAID arrays)\n" -" For detail help on major modes use, e.g.\n" +" mdadm is used for building, manageing, and monitoring\n" +" Linux md devices (aka RAID arrays)\n" +" For detail help on the above major modes use --help after the mode\n" +" e.g.\n" " mdadm --assemble --help\n" "\n" "Any parameter that does not start with '-' is treated as a device name\n" -"The first such name is normally the name of an md device. Subsequent\n" -"names are names of component devices." -"\n" -"Available options are:\n" -" --create -C : Create a new array\n" -" --assemble -A : Assemble an existing array\n" -" --build -B : Build a legacy array without superblock\n" -" --detail -D : Print detail of a given md array\n" -" --examine -E : Print content of md superblock on device\n" -" --follow -F : Follow (monitor) any changes to devices and respond to them\n" -" --monitor : same as --follow\n" +"The first such name is often the name of an md device. Subsequent\n" +"names are often names of component devices." "\n" +"Some common options are:\n" " --help -h : This help message or, after above option,\n" " mode specific help message\n" " --version -V : Print version information for mdadm\n" " --verbose -v : Be more verbose about what is happening\n" +" --brief -b : Be less verbose, more brief\n" +" --force -f : Override normal checks and be more forceful\n" +"\n" +" --assemble -A : Assemble an array\n" +" --build -B : Build a legacy array\n" +" --create -C : Create a new array\n" +" --detail -D : Display details of an array\n" +" --examine -E : Examine superblock on an array componenet\n" +" --monitor -F : monitor (follow) some arrays\n" +" --query -Q : Display general information about how a\n" +" device relates to the md driver\n" +; +/* "\n" " For create or build:\n" " --chunk= -c : chunk size of kibibytes\n" @@ -213,32 +230,43 @@ char Help[] = " --readwrite -w : mark array as readwrite\n" " --zero-superblock : erase the MD superblock from a device.\n" ; - +*/ char Help_create[] = "Usage: mdadm --create device -chunk=X --level=Y --raid-disks=Z devices\n" "\n" -" This usage will initialise a new md array and possibly associate some\n" +" This usage will initialise a new md array and associate some\n" " devices with it. If enough devices are given to complete the array,\n" " the array will be activated. Otherwise it will be left inactive\n" " to be completed and activated by subsequent management commands.\n" "\n" -" As devices are added, they are checked to see if they contain\n" -" raid superblocks or filesystems. They are also check to see if\n" +" As devices are added, they are checked to see if they already contain\n" +" raid superblocks or filesystems. They are also checked to see if\n" " the variance in device size exceeds 1%.\n" " If any discrepancy is found, the array will not automatically\n" " be run, though the presence of a '--run' can override this\n" " caution.\n" "\n" -" If the --size option is given, it is not necessary to list any subdevices\n" -" in this command. They can be added later, before a --run.\n" +" If the --size option is given then only that many kilobytes of each\n" +" device is used, no matter how big each device is.\n" " If no --size is given, the apparent size of the smallest drive given\n" -" is used.\n" +" is used for raid level 1 and greater, and the full device is used for\n" +" other levels.\n" "\n" -" The General management options that are valid with --create are:\n" -" --run : insist of running the array even if not all devices\n" -" are present or some look odd.\n" -" --readonly: start the array readonly - not supported yet.\n" +" Options that are valid with --create (-C) are:\n" +" --chunk= -c : chunk size of kibibytes\n" +" --rounding= : rounding factor for linear array (==chunck size)\n" +" --level= -l : raid level: 0,1,4,5,linear,multipath and synonyms\n" +" --paritiy= -p : raid5 parity algorith: {left,right}-{,a}symmetric\n" +" --layout= : same as --parity\n" +" --raid-disks= -n : number of active devices in array\n" +" --spare-disks= -x : number of spares (eXtras) devices in initial array\n" +" --size= -z : Size (in K) of each drive in RAID1/4/5 - optional\n" +" --force -f : Honour devices as listed on command line. Don't\n" +" : insert a missing drive for RAID5.\n" +" --run : insist of running the array even if not all\n" +" : devices are present or some look odd.\n" +" --readonly : start the array readonly - not supported yet.\n" "\n" ; @@ -246,13 +274,18 @@ char Help_build[] = "Usage: mdadm --build device -chunk=X --level=Y --raid-disks=Z devices\n" "\n" " This usage is similar to --create. The difference is that it creates\n" -" a legacy array with a superblock. With these arrays there is no\n" +" a legacy array without a superblock. With these arrays there is no\n" " different between initially creating the array and subsequently\n" " assembling the array, except that hopefully there is useful data\n" " there in the second case.\n" "\n" -" The level may only be 0 or linear.\n" +" The level may only be 0, raid0, or linear.\n" " All devices must be listed and the array will be started once complete.\n" +" Options that are valid with --build (-B) are:\n" +" --chunk= -c : chunk size of kibibytes\n" +" --rounding= : rounding factor for linear array (==chunck size)\n" +" --level= -l : 0, raid0, or linear\n" +" --raid-disks= -n : number of active devices in array\n" ; char Help_assemble[] = @@ -261,53 +294,140 @@ char Help_assemble[] = "\n" "This usage assembles one or more raid arrays from pre-existing\n" "components.\n" -"For each array, mdadm needs to know the md device, the identify of\n" +"For each array, mdadm needs to know the md device, the identity of\n" "the array, and a number of sub devices. These can be found in a number\n" "of ways.\n" "\n" "The md device is either given on the command line or is found listed\n" "in the config file. The array identity is determined either from the\n" -"--uuid or --super-minor commandline arguments, or from the config file,\n" +"--uuid or --super-minor commandline arguments, from the config file,\n" "or from the first component device on the command line.\n" "\n" "The different combinations of these are as follows:\n" " If the --scan option is not given, then only devices and identities\n" " listed on the command line are considered.\n" -" The first device will be the array devices, and the remainder will\n" +" The first device will be the array device, and the remainder will be\n" " examined when looking for components.\n" " If an explicit identity is given with --uuid or --super-minor, then\n" -" Each device with a superblock which matches that identity is considered,\n" +" only devices with a superblock which matches that identity is considered,\n" " otherwise every device listed is considered.\n" "\n" " If the --scan option is given, and no devices are listed, then\n" " every array listed in the config file is considered for assembly.\n" -" The identity can candidate devices are determined from the config file.\n" +" The identity of candidate devices are determined from the config file.\n" "\n" " If the --scan option is given as well as one or more devices, then\n" " Those devices are md devices that are to be assembled. Their identity\n" " and components are determined from the config file.\n" "\n" -"The config file contains, apart from blank lines and comment lines that\n" -"start with a has, two sorts of configuration lines, array lines and\n" -"device lines.\n" -"Each configuration line is constructed of a number of space separated\n" -"words, and can be continued on subsequent physical lines by indenting\n" -"those lines.\n" -"\n" -"A device line starts with the word 'device' and then has a number of words\n" -"which identify devices. These words should be names of devices in the filesystem,\n" -"and can contain wildcards. There can be multiple words or each device line,\n" -"and multiple device lines. All devices so listed are checked for relevant\n" -"super blocks when assembling arrays.\n" -"\n" -"An array line start with the word 'array'. This is followed by the name of\n" -"the array device in the filesystem, e.g. '/dev/md2'. Subsequent words\n" -"describe the identity of the array, used to recognise devices to include in the\n" -"array. The identity can be given as a UUID with a word starting 'uuid=', or\n" -"as a minor-number stored in the superblock using 'super-minor=', or as a list\n" -"of devices. This is given as a comma separated list of names, possibly containing\n" -"wildcards, preceeded by 'devices='. If multiple critea are given, than a device\n" -"must match all of them to be considered.\n" +"Options that are valid with --assemble (-A) are:\n" +" --uuid= -u : uuid of array to assemble. Devices which don't\n" +" have this uuid are excluded\n" +" --super-minor= -m : minor number to look for in super-block when\n" +" choosing devices to use.\n" +" --config= -c : config file\n" +" --scan -s : scan config file for missing information\n" +" --run -R : Try to start the array even if not enough devices\n" +" for a full array are present\n" +" --force -f : Assemble the array even if some superblocks appear\n" +" : out-of-date. This involves modifying the superblocks.\n" +; + +char Help_manage[] = +"Usage: mdadm arraydevice options component devices...\n" +"\n" +"This usage is for managing the component devices within an array.\n" +"The --manage option is not needed and is assumed if the first argument\n" +"is a device name or a management option.\n" +"The first device listed will be taken to be an md array device, and\n" +"subsequent devices are (potential) components of that array.\n" +"\n" +"Options that are valid with management mode are:\n" +" --add -a : hotadd subsequent devices to the array\n" +" --remove -r : remove subsequent devices, which must not be active\n" +" --fail -f : mark subsequent devices a faulty\n" +" --set-faulty : same as --fail\n" +" --run -R : start a partially built array\n" +" --stop -S : deactive array, releasing all resources\n" +" --readonly -o : mark array as readonly\n" +" --readwrite -w : mark array as readwrite\n" +; + +char Help_misc[] = +"Usage: mdadm misc_option devices...\n" +"\n" +"This usage is for performing some task on one or more devices, which\n" +"may be arrays or components, depending on the task.\n" +"The --misc option is not needed (though it is allowed) and is assumed\n" +"if the first argument in a misc option.\n" +"\n" +"Options that are valid with the miscellaneous mode are:\n" +" --query -Q : Display general information about how a\n" +" device relates to the md driver\n" +" --detail -D : Display details of an array\n" +" --examine -E : Examine superblock on an array componenet\n" +" --zero-superblock : erase the MD superblock from a device.\n" +" --run -R : start a partially built array\n" +" --stop -S : deactive array, releasing all resources\n" +" --readonly -o : mark array as readonly\n" +" --readwrite -w : mark array as readwrite\n" +; + +char Help_monitor[] = +"Usage: mdadm --monitor options devices\n" +"\n" +"This usage causes mdadm to monitor a number of md arrays by periodically\n" +"polling their status and acting on any changes.\n" +"If any devices are listed then those devices are monitored, otherwise\n" +"all devices listed in the config file are monitored.\n" +"The address for mailing advisories to, and the program to handle\n" +"each change can be specified in the config file or on the command line.\n" +"If no mail address or program are specified, then mdadm reports all\n" +"state changes to stdout.\n" +"\n" +"Options that are valid with the monitor (--F --follow) mode are:\n" +" --mail= -m : Address to mail alerts of failure to\n" +" --program= -p : Program to run when an event is detected\n" +" --alert= : same as --program\n" +" --delay= -d : seconds of delay between polling state. default=60\n" +" --config= -c : specify a different config file\n" +" --scan -s : find mail-address/program in config file\n" +; + + + + +char Help_config[] = +"The /etc/mdadm.conf config file:\n\n" +" The config file contains, apart from blank lines and comment lines that\n" +" start with a hash(#), four sorts of configuration lines: array lines, \n" +" device lines, mailaddr lines and program lines.\n" +" Each configuration line is constructed of a number of space separated\n" +" words, and can be continued on subsequent physical lines by indenting\n" +" those lines.\n" +"\n" +" A device line starts with the word 'device' and then has a number of words\n" +" which identify devices. These words should be names of devices in the\n" +" filesystem, and can contain wildcards. There can be multiple words or each\n" +" device line, and multiple device lines. All devices so listed are checked\n" +" for relevant super blocks when assembling arrays.\n" +"\n" +" An array line start with the word 'array'. This is followed by the name of\n" +" the array device in the filesystem, e.g. '/dev/md2'. Subsequent words\n" +" describe the identity of the array, used to recognise devices to include in the\n" +" array. The identity can be given as a UUID with a word starting 'uuid=', or\n" +" as a minor-number stored in the superblock using 'super-minor=', or as a list\n" +" of devices. This is given as a comma separated list of names, possibly\n" +" containing wildcards, preceeded by 'devices='. If multiple critea are given,\n" +" than a device must match all of them to be considered.\n" +"\n" +" A mailaddr line starts with the word 'mailaddr' and should contain exactly\n" +" one Email address. 'mdadm --monitor --scan' will send alerts of failed drives\n" +" to this Email address." +"\n" +" A program line starts with the word 'program' and should contain exactly\n" +" one program name. 'mdadm --monitor --scan' will run this program when any\n" +" event is detected.\n" "\n" ; @@ -340,5 +460,17 @@ mapping_t pers[] = { { "4", 4}, { "raid5", 5}, { "5", 5}, + { "multipath", -4}, + { "mp", -4}, { NULL, 0} }; + + +mapping_t modes[] = { + { "assemble", ASSEMBLE}, + { "build", BUILD}, + { "create", CREATE}, + { "manage", MANAGE}, + { "misc", MISC}, + { "monitor", MONITOR}, +}; @@ -1,20 +1,27 @@ -?? Allow -S /dev/md? - current complains subsequent not a/d/r +?? Allow -S /dev/md? - current complains subsequent not a/d/r - DONE * new "Query" mode to subsume --detail and --examine. --query or -Q, takes a device and tells if it is an MD device, - and also tells in a raid superblock is found. + and also tells in a raid superblock is found. + DONE -* write proc.c to parse /proc/mdstat file, and maybe /proc/partitions too. +* write mdstat.c to parse /proc/mdstat file Build list of arrays: name, rebuild-percent + DONE -* --detail --scan to read mdadm.conf, and then iterate over these, +* parse /proc/partitions and map major/minor into /dev/* names, + and use that for default DEVICE list ???? + +* --detail --scan to read /proc/mdstat, and then iterate over these, but assume --brief. --verbose can override check each subdevice to see if it is in conf_get_devs. Warn if not. + DONE, but don't warn yet... * Support multipath ... maybe... + maybe DONE * --follow to syslog @@ -70,7 +70,7 @@ #endif char DefaultConfFile[] = CONFFILE; -char *keywords[] = { "device", "array", NULL }; +char *keywords[] = { "device", "array", "mailaddr", "program", NULL }; /* * match_keyword returns an index into the keywords array, or -1 for no match @@ -202,7 +202,7 @@ struct conf_dev { -int devline(char *line) +void devline(char *line) { char *w; struct conf_dev *cd; @@ -236,6 +236,7 @@ void arrayline(char *line) mis.raid_disks = -1; mis.devices = NULL; mis.devname = NULL; + mis.spare_group = NULL; for (w=dl_next(line); w!=line; w=dl_next(w)) { if (w[0] == '/') { @@ -302,7 +303,37 @@ void arrayline(char *line) mddevlp = &mi->next; } } - + +static char *alert_email = NULL; +void mailline(char *line) +{ + char *w; + + for (w=dl_next(line); w != line ; w=dl_next(w)) { + if (alert_email == NULL) + alert_email = strdup(w); + else + fprintf(stderr, Name ": excess address on MAIL line: %s - ignored\n", + w); + } +} + + +static char *alert_program = NULL; +void programline(char *line) +{ + char *w; + + for (w=dl_next(line); w != line ; w=dl_next(w)) { + if (alert_program == NULL) + alert_program = strdup(w); + else + fprintf(stderr, Name ": excess program on PROGRAM line: %s - ignored\n", + w); + } +} + + int loaded = 0; void load_conffile(char *conffile) @@ -324,9 +355,15 @@ void load_conffile(char *conffile) case 0: /* DEVICE */ devline(line); break; - case 1: + case 1: /* ARRAY */ arrayline(line); break; + case 2: /* MAIL */ + mailline(line); + break; + case 3: /* PROGRAM */ + programline(line); + break; default: fprintf(stderr, Name ": Unknown keyword %s\n", line); } @@ -337,6 +374,18 @@ void load_conffile(char *conffile) /* printf("got file\n"); */ } +char *conf_get_mailaddr(char *conffile) +{ + load_conffile(conffile); + return alert_email; +} + +char *conf_get_program(char *conffile) +{ + load_conffile(conffile); + return alert_program; +} + mddev_ident_t conf_get_ident(char *conffile, char *dev) { @@ -369,16 +418,16 @@ mddev_dev_t conf_get_devs(char *conffile) glob(cd->name, flags, NULL, &globbuf); flags |= GLOB_APPEND; } - - for (i=0; i<globbuf.gl_pathc; i++) { - mddev_dev_t t = malloc(sizeof(*t)); - t->devname = strdup(globbuf.gl_pathv[i]); - t->next = dlist; - dlist = t; + if (flags & GLOB_APPEND) { + for (i=0; i<globbuf.gl_pathc; i++) { + mddev_dev_t t = malloc(sizeof(*t)); + t->devname = strdup(globbuf.gl_pathv[i]); + t->next = dlist; + dlist = t; /* printf("one dev is %s\n", t->devname);*/ + } + globfree(&globbuf); } - globfree(&globbuf); - return dlist; } @@ -15,7 +15,7 @@ struct __dl_head #define dl_next(p) *((void**)&(((struct __dl_head*)(p))[-1].dh_next)) #define dl_prev(p) *((void**)&(((struct __dl_head*)(p))[-1].dh_prev)) -void *dl_head(); +void *dl_head(void); char *dl_strdup(char *); char *dl_strndup(char *, int); void dl_insert(void*, void*); @@ -9,9 +9,9 @@ md \- Multiple Device driver aka Linux Software Raid The .B md driver provides virtual devices that are created from one or more -independant underlying devices. This array of devices often contains +independent underlying devices. This array of devices often contains redundancy, and hence the acronym RAID which stands for a Redundant -Array of Independant Devices. +Array of Independent Devices. .PP .B md support RAID levels 1 (mirroring) 4 (striped array with parity device) and 5 @@ -20,13 +20,13 @@ device fails while using one of these level, the array will continue to function. .PP .B md -also supports a number of pseudo RAID (non-redundant) configuations +also supports a number of pseudo RAID (non-redundant) configurations including RAID0 (striped array), LINEAR (catenated array) and MULTIPATH (a set of different interfaces to the same device). .SS MD SUPER BLOCK With the exception of Legacy Arrays described below, each device that -is incorportated into an MD array has a +is incorporated into an MD array has a .I super block written towards the end of the device. This superblock records information about the structure and state of the array so that the @@ -74,16 +74,134 @@ data that is on the array. However this cannot be done on a live array. - .SS RAID0 A RAID0 array (which has zero redundancy) is also known as a striped array. +A RAID0 array is configured at creation with a +.B "Chunk Size" +which must be a multiple of 4 kibibytes. + +The RAID0 driver places the first chunk of the array to the first +device, the second chunk to the second device, and so on until all +drives have been assigned one chuck. This collection of chunks forms +a +.BR stripe . +Further chunks are gathered into stripes in the same way which are +assigned to the remaining space in the drives. + +If device in the array are not all the same size, then once the +smallest devices has been exhausted, the RAID0 driver starts +collecting chunks into smaller stripes that only span the drives which +still have remaining space. + + .SS RAID1 + +A RAID1 array is also known as a mirrored set (though mirrors tend to +provide reflect images, which RAID1 does not) or a plex. + +Once initialised, each device in a RAID1 array contains exactly the +same data. Changes are written to all devices in parallel. Data is +read from any one device. The driver attempts to distribute read +requests across all devices to maximise performance. + +All devices in a RAID1 array should be the same size. If they are +not, then only the amount of space available on the smallest device is +used. Any extra space on other devices is wasted. + .SS RAID4 + +A RAID4 array is like a RAID0 array with an extra device for storing +parity. Unlike RAID0, RAID4 also requires that all stripes span all +drives, so extra space on devices that are larger than the smallest is +wasted. + +When any block in a RAID4 array is modified the parity block for that +stripe (i.e. the block in the parity device at the same device offset +as the stripe) is also modified so that the parity block always +contains the "parity" for the whole stripe. i.e. its contents is +equivalent to the result of performing an exclusive-or operation +between all the data blocks in the stripe. + +This allows the array to continue to function if one device fails. +The data that was on that device can be calculated as needed from the +parity block and the other data blocks. + .SS RAID5 + +RAID5 is very similar to RAID4. The difference is that the parity +blocks for each stripe, instead of being on a single device, are +distributed across all devices. This allows more parallelism when +writing as two different block updates will quite possibly affect +parity blocks on different devices so there is less contention. + +This also allows more parallelism when reading as read requests are +distributed over all the devices in the array instead of all but one. + .SS MUTIPATH -.SS REBUILD/RESYNC + +MULTIPATH is not really a RAID at all as there is only one real device +in a MULTIPATH md array. However there are multiple access points +(paths) to this device, and one of these paths might fail, so there +are some similarities. + +A MULTIPATH array is composed of a number of different devices, often +fibre channel interfaces, that all refer the the same real device. +If one of these interfaces fails (e.g. due to cable problems), the +multipath driver to attempt to redirect requests to another +interface. + + +.SS UNCLEAN SHUTDOWN + +When changes are made to an RAID1, RAID4, or RAID5 array there is a +possibility of inconsistency for short periods of time as each update +requires are least two block to be written to different devices, and +these writes probably wont happen at exactly the same time. +This is a system with one of these arrays is shutdown in the middle of +a write operation (e.g. due to power failure), the array may not be +consistent. + +The handle this situation, the md driver marks an array as "dirty" +before writing any data to it, and marks it as "clean" when the array +is being disabled, e.g. at shutdown. +If the md driver finds an array to be dirty at startup, it proceeds to +correct any possibly inconsistency. For RAID1, this involves copying +the contents of the first drive onto all other drives. +For RAID4 or RAID5 this involves recalculating the parity for each +stripe and making sure that the parity block has the correct data. + +If a RAID4 or RAID5 array is degraded (missing one drive) when it is +restarted after an unclean shutdown, it cannot recalculate parity, and +so it is possible that data might be undetectably corrupted. +The md driver currently +.B does not +alert the operator to this condition. It should probably fail to +start an array in this condition without manual intervention. + +.SS RECOVERY + +If the md driver detects any error on a device in a RAID1, RAID4, or +RAID5 array, it immediately disables that device (marking it as faulty) +and continues operation on the remaining devices. If there is a spare +drive, the driver will start recreating on one of the spare drives the +data what was on that failed drive, either by copying a working drive +in a RAID1 configuration, or by doing calculations with the parity +block on RAID4 and RAID5. + +Why this recovery process is happening, the md driver will monitor +accesses to the array and will slow down the rate of recovery if other +activity is happening, so that normal access to the array will not be +unduly affected. When no other activity is happening, the recovery +process proceeds at full speed. The actual speed targets for the two +different situations can be controlled by the +.B speed_limit_min +and +.B speed_limit_max +control files mentioned below. + + .SH FILES .TP .B /proc/mdstat @@ -11,10 +11,10 @@ SSYYNNOOPPSSIISS DDEESSCCRRIIPPTTIIOONN The mmdd driver provides virtual devices that are created - from one or more independant underlying devices. This + from one or more independent underlying devices. This array of devices often contains redundancy, and hence the acronym RAID which stands for a Redundant Array of Inde- - pendant Devices. + pendent Devices. mmdd support RAID levels 1 (mirroring) 4 (striped array with parity device) and 5 (striped array with distributed par- @@ -23,14 +23,14 @@ DDEESSCCRRIIPPTTIIOONN function. mmdd also supports a number of pseudo RAID (non-redundant) - configuations including RAID0 (striped array), LINEAR + configurations including RAID0 (striped array), LINEAR (catenated array) and MULTIPATH (a set of different inter- faces to the same device). MMDD SSUUPPEERR BBLLOOCCKK With the exception of Legacy Arrays described below, each - device that is incorportated into an MD array has a _s_u_p_e_r + device that is incorporated into an MD array has a _s_u_p_e_r _b_l_o_c_k written towards the end of the device. This superblock records information about the structure and state of the array so that the array can be reliably re- @@ -77,16 +77,139 @@ DDEESSCCRRIIPPTTIIOONN - RRAAIIDD00 A RAID0 array (which has zero redundancy) is also known as - a striped array. + a striped array. A RAID0 array is configured at creation + with a CChhuunnkk SSiizzee which must be a multiple of 4 kibibytes. + + The RAID0 driver places the first chunk of the array to + the first device, the second chunk to the second device, + and so on until all drives have been assigned one chuck. + This collection of chunks forms a ssttrriippee. Further chunks + are gathered into stripes in the same way which are + assigned to the remaining space in the drives. + + If device in the array are not all the same size, then + once the smallest devices has been exhausted, the RAID0 + driver starts collecting chunks into smaller stripes that + only span the drives which still have remaining space. + + RRAAIIDD11 + A RAID1 array is also known as a mirrored set (though mir- + rors tend to provide reflect images, which RAID1 does not) + or a plex. + + Once initialised, each device in a RAID1 array contains + exactly the same data. Changes are written to all devices + in parallel. Data is read from any one device. The + driver attempts to distribute read requests across all + devices to maximise performance. + + All devices in a RAID1 array should be the same size. If + they are not, then only the amount of space available on + the smallest device is used. Any extra space on other + devices is wasted. + + RRAAIIDD44 + A RAID4 array is like a RAID0 array with an extra device + for storing parity. Unlike RAID0, RAID4 also requires + that all stripes span all drives, so extra space on + devices that are larger than the smallest is wasted. + + When any block in a RAID4 array is modified the parity + block for that stripe (i.e. the block in the parity device + at the same device offset as the stripe) is also modified + so that the parity block always contains the "parity" for + the whole stripe. i.e. its contents is equivalent to the + result of performing an exclusive-or operation between all + the data blocks in the stripe. + + This allows the array to continue to function if one + device fails. The data that was on that device can be + calculated as needed from the parity block and the other + data blocks. + + RRAAIIDD55 + RAID5 is very similar to RAID4. The difference is that + the parity blocks for each stripe, instead of being on a + single device, are distributed across all devices. This + allows more parallelism when writing as two different + block updates will quite possibly affect parity blocks on + different devices so there is less contention. + + This also allows more parallelism when reading as read + requests are distributed over all the devices in the array + instead of all but one. + + MMUUTTIIPPAATTHH - RREEBBUUIILLDD//RREESSYYNNCC + MULTIPATH is not really a RAID at all as there is only one + real device in a MULTIPATH md array. However there are + multiple access points (paths) to this device, and one of + these paths might fail, so there are some similarities. + + A MULTIPATH array is composed of a number of different + devices, often fibre channel interfaces, that all refer + the the same real device. If one of these interfaces + fails (e.g. due to cable problems), the multipath driver + to attempt to redirect requests to another interface. + + + + UUNNCCLLEEAANN SSHHUUTTDDOOWWNN + When changes are made to an RAID1, RAID4, or RAID5 array + there is a possibility of inconsistency for short periods + of time as each update requires are least two block to be + written to different devices, and these writes probably + wont happen at exactly the same time. This is a system + with one of these arrays is shutdown in the middle of a + write operation (e.g. due to power failure), the array may + not be consistent. + + The handle this situation, the md driver marks an array as + "dirty" before writing any data to it, and marks it as + "clean" when the array is being disabled, e.g. at shut- + down. If the md driver finds an array to be dirty at + startup, it proceeds to correct any possibly inconsis- + tency. For RAID1, this involves copying the contents of + the first drive onto all other drives. For RAID4 or RAID5 + this involves recalculating the parity for each stripe and + making sure that the parity block has the correct data. + + If a RAID4 or RAID5 array is degraded (missing one drive) + when it is restarted after an unclean shutdown, it cannot + recalculate parity, and so it is possible that data might + be undetectably corrupted. The md driver currently ddooeess + nnoott alert the operator to this condition. It should prob- + ably fail to start an array in this condition without man- + ual intervention. + + + RREECCOOVVEERRYY + If the md driver detects any error on a device in a RAID1, + RAID4, or RAID5 array, it immediately disables that device + (marking it as faulty) and continues operation on the + remaining devices. If there is a spare drive, the driver + will start recreating on one of the spare drives the data + what was on that failed drive, either by copying a working + drive in a RAID1 configuration, or by doing calculations + with the parity block on RAID4 and RAID5. + + Why this recovery process is happening, the md driver will + monitor accesses to the array and will slow down the rate + of recovery if other activity is happening, so that normal + access to the array will not be unduly affected. When no + other activity is happening, the recovery process proceeds + at full speed. The actual speed targets for the two dif- + ferent situations can be controlled by the ssppeeeedd__lliimmiitt__mmiinn + and ssppeeeedd__lliimmiitt__mmaaxx control files mentioned below. + + + FFIILLEESS //pprroocc//mmddssttaatt Contains information about the status of currently @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.TH mdadm 8 +.TH MDADM 8 .SH NAME mdadm \- manage MD devices .I aka @@ -7,7 +7,7 @@ Linux Software Raid. .SH SYNOPSIS -.BI mdadm " [mode] <raiddevice> [options] <subdevices>" +.BI mdadm " [mode] <raiddevice> [options] <component-devices>" .SH DESCRIPTION RAID devices are virtual devices created from two or more @@ -33,7 +33,7 @@ and Recent kernels (2002) also support a mode known as .BR MULTIPATH . .B mdadm -does not support MULTIPATH as yet. +only provides limited support for MULTIPATH as yet. .B mdadm is a program that can be used to create, manage, and monitor @@ -56,18 +56,13 @@ configuration file. Also mdadm helps with management of the configuration file. .IP \(bu 4 .B mdadm -can provide information about your arrays (through Detail and Examine) +can provide information about your arrays (through Query, Detail, and Examine) that .B raidtools cannot. -.IP \(bu 4 -.B raidtools -can manage MULTIPATH devices which -.B mdadm -cannot yet manage. .SH MODES -mdadm has 7 major modes of operation: +mdadm has 6 major modes of operation: .TP .B Assemble Assemble the parts of a previously created @@ -89,32 +84,19 @@ Create a new array with per-device superblocks. '''in several step create-add-add-run or it can all happen with one command. .TP -.B Detail -Display the details of a given md device. Details include the RAID -level, the number of devices, which ones are faulty (if any), and the -array UUID. +.B Manage +This is for doing things to specific components of an array such as +adding new spares and removing faulty devices. .TP -.B Examine -Examine a device to see if it is part of an md array, and print out -the details of that array. -This mode can also be used to examine a large number of devices and to -print out a summary of the arrays found in a format suitable for the -.B mdadm.conf -configuration file. +.B Misc +This mode allows operations on independent devices such as examine MD +superblocks, erasing old superblocks and stopping active arrays. .TP .B "Follow or Monitor" Monitor one or more md devices and act on any state changes. -.TP -.B Manage -This is for odd bits an pieces like hotadd, hotremove, setfaulty, stop, -readonly, readwrite. -'''If an array is only partially setup by the -'''Create or Assemble commands, subsequent Manage commands can finish the -'''job. - .SH OPTIONS Available options are: @@ -132,6 +114,13 @@ Build a legacy array without superblocks. Create a new array. .TP +.BR -Q ", " --query +Examine a device to see +(1) if it is an md device and (2) if it is a component of an md +array. +Information about what is discovered is presented. + +.TP .BR -D ", " --detail Print detail of one or more md devices. @@ -164,6 +153,36 @@ Be less verbose. This is used with and .BR --examine . +.TP +.BR -f ", " --force +Be more forceful about certain operations. See the various modes of +the exact meaning of this option in different contexts. + +.TP +.BR -c ", " --config= +Specify the config file. Default is +.BR /etc/mdadm.conf . + +.TP +.BR -s ", " --scan +scan config file or +.B /proc/mdstat +for missing information. +In general, this option gives +.B mdadm +permission to get any missing information, like component devices, +array devices, array identities, and alert destination from the +configuration file: +.BR /etc/mdadm.conf . +One exception is MISC mode when using +.B --detail +or +.B --stop +in which case +.B --scan +says to get a list of array devices from +.BR /proc/mdstat . + .SH For create or build: .TP @@ -223,15 +242,6 @@ don't have this minor number are excluded. If you create an array as the array is later assembled as /dev/md2. .TP -.BR -c ", " --config= -config file. Default is -.BR /etc/mdadm.conf . - -.TP -.BR -s ", " --scan -scan config file for missing information - -.TP .BR -f ", " --force Assemble the array even if some superblocks appear out-of-date @@ -245,7 +255,7 @@ With .B --run an attempt will be made to start it anyway. -.SH General management +.SH For Manage mode: .TP .BR -a ", " --add @@ -265,6 +275,8 @@ mark listed devices as faulty. .BR --set-faulty same as --fail. +.SH For Misc mode: + .TP .BR -R ", " --run start a partially built array. @@ -281,8 +293,31 @@ mark array as readonly. .BR -w ", " --readwrite mark array as readwrite. +.TP +.B --zero-superblock +If the device contains a valid md superblock, the block is +over-written with zeros. With +--force +the block where the superblock would be is over-written even if it +doesn't appear to be valid. + +.SH For Monitor mode: +.TP +.BR -m ", " --mail +Give a mail address to send alerts to. -.SH ASSEMBLY MODE +.TP +.BR -p ", " --program ", " --alert +Give a program to be run whenever an event is detected. + +.TP +.BR -d ", " --delay +Give a delay in seconds. +.B mdadm +polls the md arrays and then waits this many seconds before polling +again. The default is 60 seconds. + +.SH ASSEMBLE MODE .HP 12 Usage: @@ -296,7 +331,7 @@ Usage: .PP This usage assembles one or more raid arrays from pre-existing components. For each array, mdadm needs to know the md device, the identity of the -array, and a number of sub devices. These can be found in a number of ways. +array, and a number of component-devices. These can be found in a number of ways. The md device is either given before .B --scan @@ -308,7 +343,7 @@ The identity can be given with the option, with the .B --super-minor option, can be found in in the config file, or will be taken from the -super block on the first subdevice listed on the command line. +super block on the first component-device listed on the command line. Devices can be given on the .B --assemble @@ -387,7 +422,7 @@ can override this caution. '''If the '''.B --size -'''option is given, it is not necessary to list any subdevices in this command. +'''option is given, it is not necessary to list any component-devices in this command. '''They can be added later, before a '''.B --run. '''If no @@ -404,64 +439,223 @@ be in use. .B --readonly start the array readonly - not supported yet. -.SH DETAIL MODE +.SH MANAGE MODE .HP 12 Usage: -.B mdadm --detail -.RB [ --brief ] -.I device ... +.B mdadm +.I device +.I options... devices... .PP -This usage sill print out the details of the given array including a -list of component devices. To determine names for the devices, +This usage will allow individual devices in an array to be failed, +removed or added. It is possible to perform multiple operations with +on command. For example: +.br +mdadm /dev/md0 -f /dev/hda1 -r /dev/hda1 /a /dev/hda1 +.br +will firstly mark +.B /dev/hda1 +as faulty in +.B /dev/md0 +and will then remove it from the array and finally add it back +in as a spare. However only one md array can be affect by a single +command. + +.SH MISC MODE +.HP 12 +Usage: .B mdadm -searches -.B /dev -for device files with the right major and minor numbers. +.I options ... +.I devices ... +.PP -With +MISC mode includes a number if distinct operations that +operate on distinct devices. The operations are: +.TP +--query +The device is examined to see if it is +(1) an active md array, or +(2) a component of an md array. +The information discovered is reported. + +.TP +--detail +The device should be an active md device. mdadm will display +a detailed description of the array. .B --brief -.B mdadm -prints a single line that identifies the level, number of disks, and -UUID of the array. This line is suitable for inclusion in +will cause the output to be less detailed and format to be +suitable for inclusion in .BR /etc/mdadm.conf . -.SH EXAMINE MODE +.TP +--examine +The device should be a component of an md array. mdadm will +read the md superblock of the device and display the contents. +If +.B --brief +is given, or +.B --scan +then multiple devices that are components of the one array +are grouped together and reported in a single entry suitable +for inclusion in +.BR /etc/mdadm.conf . + +Have +.B --scan +without listing any devices will cause all devices listed in the +config file to be examined. + +.TP +--stop +This devices should active md arrays which will be deactivated, if +they are not currently in use. + +.TP +--run +This will fully activate a partially assembled md array. + +.TP +--readonly +This will mark an active array as read-only, providing that it is +not currently being used. + +.TP +--readwrite +This will change a +.B readonly +array back to being read/write. + +.SH MONITOR MODE + .HP 12 Usage: -.B mdadm --examine -.RB [ --scan ] -.RB [ --brief ] -.I device ... +.B mdadm --monitor +.I options... devices... + .PP -This usage will examine some block devices to see if that have a valid -RAID superblock on them. The information in each valid raid -superblock will be printed. +This usage causes +.B mdadm +to periodically poll a number of md arrays and to report on any events +noticed. +.B mdadm +will never exit once it decides that there are arrays to be checked, +so it should normally be run in the background. + +If any devices are listed on the command line, +.B mdadm +will only monitor those devices. Otherwise all arrays listed in the +configuration file will be monitored. Further, if +.B --scan +is given, then any other md devices that appear in +.B /proc/mdstat +will also be monitored. + +The result of monitoring the arrays is the generation of events. +These events are passed to a separate program (is specified) and may +be mail to a given E-mail address. + If .B --scan -is used, the no devices should be listed, and the complete set of -devices identified in the configuration file are checked. +is given, then a program or an E-mail address must be specified on the +command line or in the config file. If neither are available, then +.B mdadm +will not monitor anything. +Without .B --scan -implies -.B --brief -but this implication can be countered by specifying -.BR --verbose . +.B mdadm +will continue monitoring along as something was found to monitor. If +no program or email is given, then each event is reported to +.BR stdout . -With -.B --brief +The different events are: + +.RS 4 +.TP +.B DeviceDisappeared +An md array which previously was configured appear to no longer be +configured. + +.TP +.B RebuildStarted +An md array started reconstruction. + +.TP +.BI Rebuild NN +Where +.I NN +is 20, 40, 60, or 80, this indicates that rebuild has passed that many +percentage of the total. + +.TP +.B Fail +An active component device of an array has been marked as faulty. + +.TP +.B FailSpare +A spare component device which was being rebuilt to replace a faulty +device has failed. + +.TP +.B SpareActive +A spare component device which was being rebuilt to replace a faulty +device as been successfully rebuild and has been made active. + +.TP +.B NewArray +A new md array has been detected in the +.B /proc/mdstat +file. + +.TP +.B MoveSpare +A spare drive has been moved from one array in a +.B spare-group +to another to allow a failed drive to be replaced. + +.RE + +Only +.B Fail +and +.B FailSpare +cause Email to be sent. All events cause the program to be run. +The program is run with two or three arguments, they being the event +name, the array device and possibly a second device. + +Each event has an associated array device (e.g. +.BR /dev/md1 ) +and possibly a second device. For +.BR Fail , +.BR FailSpare , +and +.B SpareActive +the second device is the relevant component device. +For +.B MoveSpare +the second device is the array that the spare was moved from. + +For +.B mdadm +to move spares from one array to another, the different arrays need to +be labelled with the same +.B spare-group +in the configuration file. The +.B spare-group +name can be any string. It is only necessary that different spare +groups use different name. + +When .B mdadm -will output an config file entry of each distinct array that was -found. This entry will list the UUID, the raid level, and a list of -the individual devices on which a superblock for that array was found. -This output will by syntactically suitable for inclusion in the -configuration file, but should -.B NOT -be used blindly. Often the array description that you want in the -configuration file is much less specific than that given by -.BR "mdadm -Bs" . -For example, you normally do not want to list the devices, -particularly if they are SCSI devices. +detects that an array which is in a spare group has fewer active +devices than necessary for the complete array, and has no spare +devices, it will look for another array in the same spare group that +has a full complement of working drive and a spare. It will then +attempt to remove the spare from the second drive and add it to the +first. +If the removal succeeds but the adding fails, then it is added back to +the original array. + '''.SH BUGS '''no known bugs. @@ -34,7 +34,7 @@ int open_mddev(char *dev) { int mdfd = open(dev, O_RDWR, 0); if (mdfd < 0) - fprintf(stderr,Name ": error opening %s: %s\n", + fprintf(stderr, Name ": error opening %s: %s\n", dev, strerror(errno)); else if (md_get_version(mdfd) <= 0) { fprintf(stderr, Name ": %s does not appear to be an md device\n", @@ -49,12 +49,12 @@ int open_mddev(char *dev) int main(int argc, char *argv[]) { - char mode = '\0'; + int mode = 0; int opt; + int option_index; char *help_text; char *c; int rv; - int i; int chunk = 0; int size = 0; @@ -89,36 +89,22 @@ int main(int argc, char *argv[]) ident.super_minor= -1; ident.devices=0; - while ((opt=getopt_long(argc, argv, + while ((option_index = -1) , + (opt=getopt_long(argc, argv, short_options, long_options, - NULL)) != -1) { - + &option_index)) != -1) { + int newmode = mode; + /* firstly, so mode-independant options */ switch(opt) { - case '@': /* just incase they say --manage */ - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - case 'H': - /* setting mode - only once */ - if (mode) { - fprintf(stderr, Name ": --%s/-%c not allowed, mode already set to %s\n", - long_options[opt-'A'+1].name, - long_options[opt-'A'+1].val, - long_options[mode-'A'+1].name); - exit(2); - } - mode = opt; - continue; - case 'h': help_text = Help; switch (mode) { - case 'C': help_text = Help_create; break; - case 'B': help_text = Help_build; break; - case 'A': help_text = Help_assemble; break; + case ASSEMBLE : help_text = Help_assemble; break; + case BUILD : help_text = Help_build; break; + case CREATE : help_text = Help_create; break; + case MANAGE : help_text = Help_manage; break; + case MISC : help_text = Help_misc; break; + case MONITOR : help_text = Help_monitor; break; } fputs(help_text,stderr); exit(0); @@ -133,18 +119,83 @@ int main(int argc, char *argv[]) case 'b': brief = 1; continue; - case 1: /* an undecorated option - must be a device name. - * Depending on mode, it could be that: - * All devices listed are "md" devices : --Detail, -As - * No devices are "md" devices : --Examine - * First device is "md", others are component: -A,-B,-C - * Only accept on device before mode is determined. - * If mode is @, then require devmode for other devices. - */ - if (devs_found > 0 && !mode ) { - fprintf(stderr, Name ": Must give mode flag before second device name at %s\n", optarg); - exit(2); + case ':': + case '?': + fputs(Usage, stderr); + exit(2); + } + /* second, figure out the mode. + * Some options force the mode. Others + * set the mode if it isn't already + */ + + switch(opt) { + case '@': /* just incase they say --manage */ + newmode = MANAGE; break; + case 'a': + case 'r': + case 'f': + case 1 : if (!mode) newmode = MANAGE; break; + + case 'A': newmode = ASSEMBLE; break; + case 'B': newmode = BUILD; break; + case 'C': newmode = CREATE; break; + case 'F': newmode = MONITOR;break; + + case '#': + case 'D': + case 'E': + case 'Q': newmode = MISC; break; + case 'R': + case 'S': + case 'o': + case 'w': + case 'K': if (!mode) newmode = MISC; break; + } + if (mode && newmode == mode) { + /* everybody happy ! */ + } else if (mode && newmode != mode) { + /* not allowed.. */ + fprintf(stderr, Name ": "); + if (option_index >= 0) + fprintf(stderr, "--%s", long_options[option_index].name); + else + fprintf(stderr, "-%c", opt); + fprintf(stderr, " would set mode to %s, but it is already %s.\n", + map_num(modes, newmode), + map_num(modes, mode)); + exit(2); + } else if (!mode && newmode) { + mode = newmode; + } else { + /* special case of -c --help */ + if (opt == 'c' && + ( strncmp(optarg, "--h", 3)==0 || + strncmp(optarg, "-h", 2)==0)) { + fputs(Help_config, stderr); + exit(0); } + if (option_index >= 0) + fprintf(stderr, "--%s", long_options[option_index].name); + else + fprintf(stderr, "-%c", opt); + fprintf(stderr, " does not set the mode, and so cannot be first.\n"); + exit(2); + } + + /* if we just set the mode, then done */ + switch(opt) { + case '@': + case '#': + case 'A': + case 'B': + case 'C': + case 'F': + continue; + } + if (opt == 1) { + /* an undecorated option - must be a device name. + */ if (devs_found > 0 && mode == '@' && !devmode) { fprintf(stderr, Name ": Must give on of -a/-r/-f for subsequent devices at %s\n", optarg); exit(2); @@ -162,22 +213,14 @@ int main(int argc, char *argv[]) devs_found++; continue; - - case ':': - case '?': - fputs(Usage, stderr); - exit(2); - default: - /* force mode setting - @==manage if nothing else */ - if (!mode) mode = '@'; } /* We've got a mode, and opt is now something else which * could depend on the mode */ #define O(a,b) ((a<<8)|b) switch (O(mode,opt)) { - case O('C','c'): - case O('B','c'): /* chunk or rounding */ + case O(CREATE,'c'): + case O(BUILD,'c'): /* chunk or rounding */ if (chunk) { fprintf(stderr, Name ": chunk/rounding may only be specified once. " "Second value is %s.\n", optarg); @@ -191,7 +234,7 @@ int main(int argc, char *argv[]) } continue; - case O('C','z'): /* size */ + case O(CREATE,'z'): /* size */ if (size) { fprintf(stderr, Name ": size may only be specified once. " "Second value is %s.\n", optarg); @@ -205,8 +248,8 @@ int main(int argc, char *argv[]) } continue; - case O('C','l'): - case O('B','l'): /* set raid level*/ + case O(CREATE,'l'): + case O(BUILD,'l'): /* set raid level*/ if (level != -10) { fprintf(stderr, Name ": raid level may only be set once. " "Second value is %s.\n", optarg); @@ -218,12 +261,12 @@ int main(int argc, char *argv[]) optarg); exit(2); } - if (level > 0 && mode == 'B') { + if (level != 0 && level != -1 && mode == BUILD) { fprintf(stderr, Name ": Raid level %s not permitted with --build.\n", optarg); exit(2); } - if (sparedisks > 0 && level < 1) { + if (sparedisks > 0 && level < 1 && level >= -1) { fprintf(stderr, Name ": raid level %s is incompatible with spare-disks setting.\n", optarg); exit(2); @@ -231,7 +274,7 @@ int main(int argc, char *argv[]) ident.level = level; continue; - case O('C','p'): /* raid5 layout */ + case O(CREATE,'p'): /* raid5 layout */ if (layout >= 0) { fprintf(stderr,Name ": layout may only be sent once. " "Second value was %s\n", optarg); @@ -257,8 +300,8 @@ int main(int argc, char *argv[]) } continue; - case O('C','n'): - case O('B','n'): /* number of raid disks */ + case O(CREATE,'n'): + case O(BUILD,'n'): /* number of raid disks */ if (raiddisks) { fprintf(stderr, Name ": raid-disks set twice: %d and %s\n", raiddisks, optarg); @@ -273,13 +316,13 @@ int main(int argc, char *argv[]) ident.raid_disks = raiddisks; continue; - case O('C','x'): /* number of spare (eXtra) discs */ + case O(CREATE,'x'): /* number of spare (eXtra) discs */ if (sparedisks) { fprintf(stderr,Name ": spare-disks set twice: %d and %s\n", sparedisks, optarg); exit(2); } - if (level > -10 && level < 1) { + if (level > -10 && level <= 0 && level >= -1) { fprintf(stderr, Name ": spare-disks setting is incompatible with raid level %d\n", level); exit(2); @@ -291,14 +334,14 @@ int main(int argc, char *argv[]) exit(2); } continue; - case O('C','f'): /* force honouring of device list */ - case O('A','f'): /* force assembly */ - case O('H','f'): /* force zero */ + case O(CREATE,'f'): /* force honouring of device list */ + case O(ASSEMBLE,'f'): /* force assembly */ + case O(MISC,'f'): /* force zero */ force=1; continue; /* now for the Assemble options */ - case O('A','u'): /* uuid of array */ + case O(ASSEMBLE,'u'): /* uuid of array */ if (ident.uuid_set) { fprintf(stderr, Name ": uuid cannot be set twice. " "Second value %s.\n", optarg); @@ -312,7 +355,7 @@ int main(int argc, char *argv[]) } continue; - case O('A','m'): /* super-minor for array */ + case O(ASSEMBLE,'m'): /* super-minor for array */ if (ident.super_minor >= 0) { fprintf(stderr, Name ": super-minor cannot be set twice. " "Second value: %s.\n", optarg); @@ -325,8 +368,8 @@ int main(int argc, char *argv[]) } continue; - case O('A','c'): /* config file */ - case O('F','c'): + case O(ASSEMBLE,'c'): /* config file */ + case O(MONITOR,'c'): if (configfile) { fprintf(stderr, Name ": configfile cannot be set twice. " "Second value is %s.\n", optarg); @@ -335,12 +378,13 @@ int main(int argc, char *argv[]) configfile = optarg; /* FIXME possibly check that config file exists. Even parse it */ continue; - case O('A','s'): /* scan */ - case O('E','s'): + case O(ASSEMBLE,'s'): /* scan */ + case O(MISC,'s'): + case O(MONITOR,'s'): scan = 1; continue; - case O('F','m'): /* mail address */ + case O(MONITOR,'m'): /* mail address */ if (mailaddr) fprintf(stderr, Name ": only specify one mailaddress. %s ignored.\n", optarg); @@ -348,7 +392,7 @@ int main(int argc, char *argv[]) mailaddr = optarg; continue; - case O('F','p'): /* alert program */ + case O(MONITOR,'p'): /* alert program */ if (program) fprintf(stderr, Name ": only specify one alter program. %s ignored.\n", optarg); @@ -356,7 +400,7 @@ int main(int argc, char *argv[]) program = optarg; continue; - case O('F','d'): /* delay in seconds */ + case O(MONITOR,'d'): /* delay in seconds */ if (delay) fprintf(stderr, Name ": only specify delay once. %s ignored.\n", optarg); @@ -374,29 +418,29 @@ int main(int argc, char *argv[]) /* now the general management options. Some are applicable * to other modes. None have arguments. */ - case O('@','a'): - case O('C','a'): - case O('B','a'): - case O('A','a'): /* add a drive */ + case O(MANAGE,'a'): + case O(CREATE,'a'): + case O(BUILD,'a'): + case O(ASSEMBLE,'a'): /* add a drive */ devmode = 'a'; continue; - case O('@','r'): /* remove a drive */ + case O(MANAGE,'r'): /* remove a drive */ devmode = 'r'; continue; - case O('@','f'): /* set faulty */ + case O(MANAGE,'f'): /* set faulty */ devmode = 'f'; continue; - case O('@','R'): - case O('A','R'): - case O('B','R'): - case O('C','R'): /* Run the array */ + case O(MANAGE,'R'): + case O(ASSEMBLE,'R'): + case O(BUILD,'R'): + case O(CREATE,'R'): /* Run the array */ if (runstop < 0) { fprintf(stderr, Name ": Cannot both Stop and Run an array\n"); exit(2); } runstop = 1; continue; - case O('@','S'): + case O(MANAGE,'S'): if (runstop > 0) { fprintf(stderr, Name ": Cannot both Run and Stop an array\n"); exit(2); @@ -404,26 +448,44 @@ int main(int argc, char *argv[]) runstop = -1; continue; - case O('@','o'): + case O(MANAGE,'o'): if (readonly < 0) { fprintf(stderr, Name ": Cannot have both readonly and readwrite\n"); exit(2); } readonly = 1; continue; - case O('@','w'): + case O(MANAGE,'w'): if (readonly > 0) { - fprintf(stderr, "mkdctl: Cannot have both readwrite and readonly.\n"); + fprintf(stderr, Name ": Cannot have both readwrite and readonly.\n"); exit(2); } readonly = -1; continue; + + case O(MISC,'Q'): + case O(MISC,'D'): + case O(MISC,'E'): + case O(MISC,'K'): + case O(MISC,'R'): + case O(MISC,'S'): + case O(MISC,'o'): + case O(MISC,'w'): + if (devmode && devmode != opt && + (devmode == 'E' || (opt == 'E' && devmode != 'Q'))) { + fprintf(stderr, Name ": --examine/-E cannot be given with -%c\n", + devmode =='E'?opt:devmode); + exit(2); + } + devmode = opt; + continue; + } /* We have now processed all the valid options. Anything else is * an error */ - fprintf(stderr, Name ": option %c not valid in mode %c\n", - opt, mode); + fprintf(stderr, Name ": option %c not valid in %s mode\n", + opt, map_num(modes, mode)); exit(2); } @@ -436,13 +498,13 @@ int main(int argc, char *argv[]) * hopefully it's mostly right but there might be some stuff * missing * - * That is mosty checked in ther per-mode stuff but... + * That is mosty checked in the per-mode stuff but... * * For @,B,C and A without -s, the first device listed must be an md device * we check that here and open it. */ - if (mode=='@' || mode == 'B' || mode == 'C' || (mode == 'A' && ! scan)) { + if (mode==MANAGE || mode == BUILD || mode == CREATE || (mode == ASSEMBLE && ! scan)) { if (devs_found < 1) { fprintf(stderr, Name ": an md device must be given in this mode\n"); exit(2); @@ -452,10 +514,9 @@ int main(int argc, char *argv[]) exit(1); } - rv = 0; switch(mode) { - case '@':/* Management */ + case MANAGE: /* readonly, add/remove, readwrite, runstop */ if (readonly>0) rv = Manage_ro(devlist->devname, mdfd, readonly); @@ -467,7 +528,7 @@ int main(int argc, char *argv[]) if (!rv && runstop) rv = Manage_runstop(devlist->devname, mdfd, runstop); break; - case 'A': /* Assemble */ + case ASSEMBLE: if (!scan) rv = Assemble(devlist->devname, mdfd, &ident, configfile, devlist->next, @@ -513,32 +574,89 @@ int main(int argc, char *argv[]) } } break; - case 'B': /* Build */ + case BUILD: rv = Build(devlist->devname, mdfd, chunk, level, raiddisks, devlist->next); break; - case 'C': /* Create */ + case CREATE: rv = Create(devlist->devname, mdfd, chunk, level, layout, size, raiddisks, sparedisks, devs_found-1, devlist->next, runstop, verbose, force); break; - case 'D': /* Detail */ - for (dv=devlist ; dv; dv=dv->next) - rv |= Detail(dv->devname, brief); - break; - case 'E': /* Examine */ - if (devlist == NULL && scan==0) { - fprintf(stderr, Name ": No devices to examine\n"); - exit(2); + case MISC: + + if (devmode == 'E') { + if (devlist == NULL && !scan) { + fprintf(stderr, Name ": No devices to examine\n"); + exit(2); + } + if (devlist == NULL) + devlist = conf_get_devs(configfile); + if (devlist == NULL) { + fprintf(stderr, Name ": No devices listed in %s\n", configfile); + exit(1); + } + rv = Examine(devlist, devlist?brief:!verbose, scan); + } else { + if (devlist == NULL) { + if ((devmode == 'S' ||devmode=='D') && scan) { + /* apply to all devices in /proc/mdstat */ + struct mdstat_ent *ms = mdstat_read(); + struct mdstat_ent *e; + for (e=ms ; e ; e=e->next) { + char *name = get_md_name(e->devnum); + + if (!name) { + fprintf(stderr, Name ": cannot find device file for %s\n", + e->dev); + continue; + } + if (devmode == 'D') + rv |= Detail(name, !verbose); + else if (devmode=='S') { + mdfd = open_mddev(name); + if (mdfd >= 0) + rv |= Manage_runstop(name, mdfd, -1); + } + put_md_name(name); + } + } else { + fprintf(stderr, Name ": No devices given.\n"); + exit(2); + } + } + for (dv=devlist ; dv; dv=dv->next) { + switch(dv->disposition) { + case 'D': + rv |= Detail(dv->devname, brief); continue; + case 'K': /* Zero superblock */ + rv |= Kill(dv->devname, force); continue; + case 'Q': + rv |= Query(dv->devname); continue; + } + mdfd = open_mddev(dv->devname); + if (mdfd>=0) + switch(dv->disposition) { + case 'R': + rv |= Manage_runstop(dv->devname, mdfd, 1); break; + case 'S': + rv |= Manage_runstop(dv->devname, mdfd, -1); break; + case 'o': + rv |= Manage_ro(dv->devname, mdfd, 1); break; + case 'w': + rv |= Manage_ro(dv->devname, mdfd, -1); break; + } + } } - rv = Examine(devlist, devlist?brief:!verbose, configfile); - break; - case 'F': /* Follow */ - rv= Monitor(devlist, mailaddr, program, - delay?delay:60, configfile); break; - case 'H': /* Zero superblock */ - for (dv=devlist ; dv; dv=dv->next) - rv |= Kill(dv->devname, force); + case MONITOR: +/* + if (!devlist && !scan) { + fprintf(stderr, Name ": Cannot monitor: need --scan or at least one device\n"); + rv = 1; + break; + } +*/ rv= Monitor(devlist, mailaddr, program, + delay?delay:60, scan, configfile); break; } exit(rv); diff --git a/mdadm.conf-example b/mdadm.conf-example index f1a5b8fb..65c97b70 100644 --- a/mdadm.conf-example +++ b/mdadm.conf-example @@ -23,6 +23,9 @@ #DEVICE /dev/sd[bcdjkl]1 #DEVICE /dev/hda1 /dev/hdb1 # +# If you mount devfs on /dev, then a suitable way to list all devices is: +#DEVICE /dev/discs/*/* +# # # # ARRAY lines specify an array to assemble and a method of identification. @@ -38,3 +41,17 @@ #ARRAY /dev/md0 UUID=3aaa0122:29827cfa:5331ad66:ca767371 #ARRAY /dev/md1 superminor=1 #ARRAY /dev/md2 devices=/dev/hda1,/dev/hda2 +# +# ARRAY lines can also specify a "spare-group" for each array. mdadm --monitor +# will then move a spare between arrays in a spare-group if one array has a failed +# drive but no spare +#ARRAY /dev/md4 uuid=b23f3c6d:aec43a9f:fd65db85:369432df spare-group=group1 +#ARRAY /dev/md5 uuid=19464854:03f71b1b:e0df2edd:246cc977 spare-group=group1 +# +# When used in --follow (aka --monitor) mode, mdadm needs a +# mail address and/or a program. This can be given with "mailaddr" +# and "program" lines to that monitoring can be started using +# mdadm --follow --scan & echo $! > /var/run/mdadm +# If the lines are not found, mdadm will exit quietly +#MAILADDR root@mydomain.tld +#PROGRAM /usr/sbin/handle-mdadm-events diff --git a/mdadm.conf.5 b/mdadm.conf.5 index f8c0e34a..72717e71 100644 --- a/mdadm.conf.5 +++ b/mdadm.conf.5 @@ -11,7 +11,7 @@ is a tool for creating, managing, and monitoring RAID devices using the driver in Linux. .PP Some common tasks, such as assembling all arrays, can be simplified -by describing the devices and array in this configuation file. +by describing the devices and array in this configuration file. .SS SYNTAX The file should be seen as a collection of words separated by white @@ -24,7 +24,7 @@ though it were a continuation of the previous line. Empty lines are ignored, but otherwise each (non continuation) line must start with a keyword as listed below. The key words are case -insensitve and can be abbreviated to 3 characters. +insensitive and can be abbreviated to 3 characters. The keywords are: .TP @@ -59,7 +59,8 @@ The ARRAY lines identify actual arrays. The second word on the line should be the name of the device where the array is normally assembled, such as .BR /dev/md1 . -Subsequent words identify the array. If multiple identities are given, +Subsequent words identify the array, or identify the array as a member +of a group. If multiple identities are given, then the array must match ALL identities to be considered a match. Each identity word has a tag, and equals sign, and some value. The options are: @@ -95,7 +96,52 @@ The value is the number of disks in a complete active array. As with this is mainly for compatibility with the output of .BR "mdadm --examine --scan" . + +.TP +.B spare-group= +The value is a textual name for a group of arrays. All arrays with +the same +.B spare-group +name are considered to be part of the same group. The significance of +a group of arrays is that +.B mdadm +will, when monitoring the arrays, move a spare drive from one array in +a group to another array in that group if the first array had a failed +or missing drive but no spare. .RE + +.TP +.B MAILADDR +The +.B mailaddr +line gives an E-mail address that alerts should be +sent to when +.M mdadm +is running in +.B --monitor +mode (and was given the +.B --scan +option). There should only be one +.B MAILADDR +line and it should have only one address. + + +.TP +.B PROGRAM +The +.B program +line gives the name of a program to be run when +.B "mdadm --monitor" +detects potentially interesting events on any of the arrays that it +is monitoring. This program gets run with two or three arguments, they +being the Event, the md device, and possibly the related component +device. + +There should only be one +.B program +line and it should be give only one program. + + .SH SEE ALSO .BR mdadm (8), .BR md (4). diff --git a/mdadm.conf.man b/mdadm.conf.man index 37d75855..70ac49aa 100644 --- a/mdadm.conf.man +++ b/mdadm.conf.man @@ -15,7 +15,7 @@ DDEESSCCRRIIPPTTIIOONN Some common tasks, such as assembling all arrays, can be simplified by describing the devices and array in this - configuation file. + configuration file. SSYYNNTTAAXX @@ -30,7 +30,7 @@ DDEESSCCRRIIPPTTIIOONN Empty lines are ignored, but otherwise each (non continua- tion) line must start with a keyword as listed below. The - key words are case insensitve and can be abbreviated to 3 + key words are case insensitive and can be abbreviated to 3 characters. The keywords are: @@ -57,46 +57,81 @@ DDEESSCCRRIIPPTTIIOONN AARRRRAAYY The ARRAY lines identify actual arrays. The second word on the line should be the name of the device where the array is normally assembled, such as - //ddeevv//mmdd11. Subsequent words identify the array. If - multiple identities are given, then the array must - match ALL identities to be considered a match. - Each identity word has a tag, and equals sign, and + //ddeevv//mmdd11. Subsequent words identify the array, or + identify the array as a member of a group. If mul- + tiple identities are given, then the array must + match ALL identities to be considered a match. + Each identity word has a tag, and equals sign, and some value. The options are: uuuuiidd== The value should be a 128 bit uuid in hexadeci- - mal, with punctuation interspersed if desired. - This must match the uuid stored in the + mal, with punctuation interspersed if desired. + This must match the uuid stored in the superblock. ssuuppeerr--mmiinnoorr== - The value is an integer which indicates the - minor number that was stored in the superblock - when the array was created. When an array is + The value is an integer which indicates the + minor number that was stored in the superblock + when the array was created. When an array is created as /dev/mdX, then the minor number X is stored. ddeevviicceess== - The value is a comma separated list of device - names. Precisely these devices will be used to - assemble the array. Note that the devices - listed there must also be listed on a DEVICE + The value is a comma separated list of device + names. Precisely these devices will be used to + assemble the array. Note that the devices + listed there must also be listed on a DEVICE line. - lleevveell== The value is a raid level. This is not nor- - mally used to identify an array, but is sup- + lleevveell== The value is a raid level. This is not nor- + mally used to identify an array, but is sup- ported so that the output of mmddaaddmm ----eexxaammiinnee ----ssccaann - can be use directly in the configuration file. + can be use directly in the configuration file. - ddiisskkss== The value is the number of disks in a complete - active array. As with lleevveell== this is mainly + ddiisskkss== The value is the number of disks in a complete + active array. As with lleevveell== this is mainly for compatibility with the output of mmddaaddmm ----eexxaammiinnee ----ssccaann. + + ssppaarree--ggrroouupp== + The value is a textual name for a group of + arrays. All arrays with the same ssppaarree--ggrroouupp + name are considered to be part of the same + group. The significance of a group of arrays + is that mmddaaddmm will, when monitoring the arrays, + move a spare drive from one array in a group to + another array in that group if the first array + had a failed or missing drive but no spare. + + + MMAAIILLAADDDDRR + The mmaaiillaaddddrr line gives an E-mail address that + alerts should be sent to when is running in ----mmoonnii-- + ttoorr mode (and was given the ----ssccaann option). There + should only be one MMAAIILLAADDDDRR line and it should have + only one address. + + + + PPRROOGGRRAAMM + The pprrooggrraamm line gives the name of a program to be + run when mmddaaddmm ----mmoonniittoorr detects potentially inter- + esting events on any of the arrays that it is moni- + toring. This program gets run with two or three + arguments, they being the Event, the md device, and + possibly the related component device. + + There should only be one pprrooggrraamm line and it should + be give only one program. + + + SSEEEE AALLSSOO mmddaaddmm(8), mmdd(4). @@ -42,21 +42,36 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence)); #include <string.h> #include <linux/kdev_t.h> -#include <linux/fs.h> +/*#include <linux/fs.h> */ +#include <sys/mount.h> +#include <asm/types.h> #include <sys/ioctl.h> #define MD_MAJOR 9 -/* I seem to need this to make BLKGETSIZE64 to work... */ -#define u64 __u64 +#ifndef BLKGETSIZE64 +#define BLKGETSIZE64 _IOR(0x12,114,sizeof(__u64)) /* return device size in bytes (u64 *arg) */ +#endif #include "md_u.h" +#include "md_p.h" #define Name "mdadm" +enum mode { + ASSEMBLE=1, + BUILD, + CREATE, + MANAGE, + MISC, + MONITOR, +}; + extern char short_options[]; extern struct option long_options[]; -extern char Version[], Usage[], Help[], Help_create[], Help_build[], Help_assemble[]; +extern char Version[], Usage[], Help[], + Help_create[], Help_build[], Help_assemble[], + Help_manage[], Help_misc[], Help_monitor[], Help_config[]; /* structures read from config file */ /* List of mddevice names and identifiers @@ -99,13 +114,27 @@ typedef struct mapping { int num; } mapping_t; + +struct mdstat_ent { + char *dev; + int devnum; + int active; + char *level; + char *pattern; /* U or up, _ for down */ + int percent; /* -1 if no resync */ + struct mdstat_ent *next; +}; + +extern struct mdstat_ent *mdstat_read(void); +extern void free_mdstat(struct mdstat_ent *ms); + #ifndef Sendmail #define Sendmail "/usr/lib/sendmail -t" #endif extern char *map_num(mapping_t *map, int num); extern int map_name(mapping_t *map, char *name); -extern mapping_t r5layout[], pers[]; +extern mapping_t r5layout[], pers[], modes[]; extern char *map_dev(int major, int minor); @@ -134,22 +163,42 @@ extern int Create(char *mddev, int mdfd, int runstop, int verbose, int force); extern int Detail(char *dev, int brief); -extern int Examine(mddev_dev_t devlist, int brief, char *conffile); +extern int Query(char *dev); +extern int Examine(mddev_dev_t devlist, int brief, int scan); extern int Monitor(mddev_dev_t devlist, - char *mailaddr, char *alert_cmd, - int period, - char *config); + char *mailaddr, char *alert_cmd, + int period, int scan, + char *config); extern int Kill(char *dev, int force); extern int md_get_version(int fd); -extern int get_linux_version(); +extern int get_linux_version(void); extern int parse_uuid(char *str, int uuid[4]); extern int check_ext2(int fd, char *name); extern int check_reiser(int fd, char *name); extern int check_raid(int fd, char *name); -extern mddev_ident_t conf_get_ident(char *, char*); -extern mddev_dev_t conf_get_devs(char *); +extern mddev_ident_t conf_get_ident(char *conffile, char *dev); +extern mddev_dev_t conf_get_devs(char *conffile); +extern char *conf_get_mailaddr(char *conffile); +extern char *conf_get_program(char *conffile); +extern char *conf_line(FILE *file); +extern void free_line(char *line); +extern int match_oneof(char *devices, char *devname); +extern int load_super(int fd, mdp_super_t *super); +extern void uuid_from_super(int uuid[4], mdp_super_t *super); +extern int same_uuid(int a[4], int b[4]); +extern int compare_super(mdp_super_t *first, mdp_super_t *second); +extern int calc_sb_csum(mdp_super_t *super); +extern int store_super(int fd, mdp_super_t *super); +extern int enough(int level, int raid_disks, int avail_disks); +extern int ask(char *mesg); + extern char *human_size(long long bytes); +char *human_size_brief(long long bytes); + +extern void put_md_name(char *name); +extern char *get_md_name(int dev); + diff --git a/mdadm.man b/mdadm.man deleted file mode 100644 index 6c7895d7..00000000 --- a/mdadm.man +++ /dev/null @@ -1,423 +0,0 @@ -mdadm(8) mdadm(8) - - - -NNAAMMEE - mdadm - manage MD devices _a_k_a Linux Software Raid. - - -SSYYNNOOPPSSIISS - mmddaaddmm _[_m_o_d_e_] _<_r_a_i_d_d_e_v_i_c_e_> _[_o_p_t_i_o_n_s_] _<_s_u_b_d_e_v_i_c_e_s_> - - -DDEESSCCRRIIPPTTIIOONN - RAID devices are virtual devices created from two or more - real block devices. This allows multiple devices (typi- - cally disk drives or partitions there-of) to be combined - into a single device to hold (for example) a single - filesystem. Some RAID levels included redundancy and so - can survive some degree of device failure. - - Linux Software RAID devices are implemented through the md - (Multiple Devices) device driver. - - Currently, Linux supports LLIINNEEAARR md devices, RRAAIIDD00 (strip- - ing), RRAAIIDD11 (mirroring), RRAAIIDD44 and RRAAIIDD55.. - - Recent kernels (2002) also support a mode known as MMUULLTTII-- - PPAATTHH. mmddaaddmm does not support MULTIPATH as yet. - - mmddaaddmm is a program that can be used to create, manage, and - monitor MD devices. As such it provides a similar set of - functionality to the rraaiiddttoooollss packages. The key differ- - ences between mmddaaddmm and rraaiiddttoooollss are: - - +o mmddaaddmm is a single program and not a collection of pro- - grams. - - +o mmddaaddmm can perform (almost) all of its functions with- - out having a configuration file. Also mdadm helps - with management of the configuration file. - - +o mmddaaddmm can provide information about your arrays - (through Detail and Examine) that rraaiiddttoooollss cannot. - - +o rraaiiddttoooollss can manage MULTIPATH devices which mmddaaddmm - cannot yet manage. - - -MMOODDEESS - mdadm has 7 major modes of operation: - - AAsssseemmbbllee - Assemble the parts of a previously created array - into an active array. Components can be explicitly - given or can be searched for. mmddaaddmm checks that - the components do form a bona fide array, and can, - on request, fiddle superblock information so as to - assemble a faulty array. - - - BBuuiilldd Build a legacy array without per-device - superblocks. - - - CCrreeaattee Create a new array with per-device superblocks. - - - DDeettaaiill Display the details of a given md device. Details - include the RAID level, the number of devices, - which ones are faulty (if any), and the array UUID. - - - EExxaammiinnee - Examine a device to see if it is part of an md - array, and print out the details of that array. - This mode can also be used to examine a large num- - ber of devices and to print out a summary of the - arrays found in a format suitable for the - mmddaaddmm..ccoonnff configuration file. - - - FFoollllooww oorr MMoonniittoorr - Monitor one or more md devices and act on any state - changes. - - - MMaannaaggee This is for odd bits an pieces like hotadd, - hotremove, setfaulty, stop, readonly, readwrite. - - -OOPPTTIIOONNSS - Available options are: - - - --AA, ----aasssseemmbbllee - Assemble an existing array. - - - --BB, ----bbuuiilldd - Build a legacy array without superblocks. - - - --CC, ----ccrreeaattee - Create a new array. - - - --DD, ----ddeettaaiill - Print detail of one or more md devices. - - - --EE, ----eexxaammiinnee - Print content of md superblock on device(s). - - - --FF, ----ffoollllooww, ----mmoonniittoorr - Select MMoonniittoorr mode. - - - --hh, ----hheellpp - Display help message or, after above option, mode - specific help message. - - - --VV, ----vveerrssiioonn - Print version information for mdadm. - - - --vv, ----vveerrbboossee - Be more verbose about what is happening. - - - --bb, ----bbrriieeff - Be less verbose. This is used with ----ddeettaaiill and - ----eexxaammiinnee. - - -FFoorr ccrreeaattee oorr bbuuiilldd:: - --cc, ----cchhuunnkk== - Specify chunk size of kibibytes. The default is - 64. - - - ----rroouunnddiinngg== - Specify rounding factor for linear array (==chunk - size) - - - --ll, ----lleevveell== - Set raid level. Options are: linear, raid0, 0, - stripe, raid1, 1, mirror, raid5, 4, raid5, 5. - Obviously some of these are synonymous. Only the - first 4 are valid when Building. - - - --pp, ----ppaarriittyy== - Set raid5 parity algorithm. Options are: - {left,right}-{,a}symmetric, la, ra, ls, rs. The - default is left-symmetric. - - - ----llaayyoouutt== - same as --parity - - - --nn, ----rraaiidd--ddiisskkss== - number of active devices in array. - - - --xx, ----ssppaarree--ddiisskkss== - number of spare (eXtra) disks in initial array. - Spares can be added and removed later. - - - --zz, ----ssiizzee== - Amount (in Kibibytes) of space to use from each - drive in RAID1/4/5. This must be a multiple of the - chunk size, and must leave about 128Kb of space at - the end of the drive for the RAID superblock. If - this is not specified (as it normally is not) the - smallest drive (or partition) sets the size, though - if there is a variance among the drives of greater - than 1%, a warning is issued. - - -FFoorr aasssseemmbbllee:: - --uu, ----uuuuiidd== - uuid of array to assemble. Devices which don't have - this uuid are excluded - - - --mm, ----ssuuppeerr--mmiinnoorr== - Minor number of device that array was created for. - Devices which don't have this minor number are - excluded. If you create an array as /dev/md1, then - all superblock will contain the minor number 1, - even if the array is later assembled as /dev/md2. - - - --cc, ----ccoonnffiigg== - config file. Default is //eettcc//mmddaaddmm..ccoonnff. - - - --ss, ----ssccaann - scan config file for missing information - - - --ff, ----ffoorrccee - Assemble the array even if some superblocks appear - out-of-date - - - --RR, ----rruunn - Attempt to start the array even if fewer drives - were given than are needed for a full array. Nor- - mally if not all drives are found and ----ssccaann is not - used, then the array will be assembled but not - started. With ----rruunn an attempt will be made to - start it anyway. - - -GGeenneerraall mmaannaaggeemmeenntt - --aa, ----aadddd - hotadd listed devices. - - - --rr, ----rreemmoovvee - remove listed devices. The must not be active. - i.e. they should be failed or spare devices. - - - --ff, ----ffaaiill - mark listed devices as faulty. - - - ----sseett--ffaauullttyy - same as --fail. - - - --RR, ----rruunn - start a partially built array. - - - --SS, ----ssttoopp - deactivate array, releasing all resources. - - - --oo, ----rreeaaddoonnllyy - mark array as readonly. - - - --ww, ----rreeaaddwwrriittee - mark array as readwrite. - - - -AASSSSEEMMBBLLYY MMOODDEE - Usage: mmddaaddmm ----aasssseemmbbllee _d_e_v_i_c_e _o_p_t_i_o_n_s_._._. - - Usage: mmddaaddmm ----aasssseemmbbllee ----ssccaann _o_p_t_i_o_n_s_._._. - - - This usage assembles one or more raid arrays from pre- - existing components. For each array, mdadm needs to know - the md device, the identity of the array, and a number of - sub devices. These can be found in a number of ways. - - The md device is either given before ----ssccaann or is found - from the config file. In the latter case, multiple md - devices can be started with a single mdadm command. - - The identity can be given with the ----uuuuiidd option, with the - ----ssuuppeerr--mmiinnoorr option, can be found in in the config file, - or will be taken from the super block on the first subde- - vice listed on the command line. - - Devices can be given on the ----aasssseemmbbllee command line or - from the config file. Only devices which have an md - superblock which contains the right identity will be con- - sidered for any device. - - The config file is only used if explicitly named with - ----ccoonnffiigg or requested with ----ssccaann.. In the later case, - //eettcc//mmddaaddmm..ccoonnff is used. - - If ----ssccaann is not given, then the config file will only be - used to find the identity of md arrays. - - Normally the array will be started after it is assembled. - However is ----ssccaann is not given and insufficient drives - were lists to start a complete (non-degraded) array, then - the array is not started (to guard against usage errors). - To insist that the array be started in this case (as may - work for RAID1 or RAID5), give the ----rruunn flag. - - - -BBUUIILLDD MMOODDEE - Usage: mmddaaddmm ----bbuuiilldd _d_e_v_i_c_e ----cchhuunnkk==_X ----lleevveell==_Y ----rraaiidd-- - ddiisskkss==_Z _d_e_v_i_c_e_s - - - This usage is similar to ----ccrreeaattee. The difference is that - it creates a legacy array without a superblock. With these - arrays there is no difference between initially creating - the array and subsequently assembling the array, except - that hopefully there is useful data there in the second - case. - - The level may only be 0, raid0, or linear. All devices - must be listed and the array will be started once com- - plete. - - -CCRREEAATTEE MMOODDEE - Usage: mmddaaddmm ----ccrreeaattee _d_e_v_i_c_e ----cchhuunnkk==_X ----lleevveell==_Y - ----rraaiidd--ddiisskkss==_Z _d_e_v_i_c_e_s - - - This usage will initialise a new md array, associate some - devices with it, and activate the array. - - As devices are added, they are checked to see if they con- - tain raid superblocks or filesystems. They are also check - to see if the variance in device size exceeds 1%. - - If any discrepancy is found, the array will not automati- - cally be run, though the presence of a ----rruunn can override - this caution. - - - The General Management options that are valid with --cre- - ate are: - - ----rruunn insist of running the array even if some devices - look like they might be in use. - - - ----rreeaaddoonnllyy - start the array readonly - not supported yet. - - -DDEETTAAIILL MMOODDEE - Usage: mmddaaddmm ----ddeettaaiill [----bbrriieeff] _d_e_v_i_c_e _._._. - - - This usage sill print out the details of the given array - including a list of component devices. To determine names - for the devices, mmddaaddmm searches //ddeevv for device files with - the right major and minor numbers. - - With ----bbrriieeff mmddaaddmm prints a single line that identifies - the level, number of disks, and UUID of the array. This - line is suitable for inclusion in //eettcc//mmddaaddmm..ccoonnff. - - -EEXXAAMMIINNEE MMOODDEE - Usage: mmddaaddmm ----eexxaammiinnee [----ssccaann] [----bbrriieeff] _d_e_v_i_c_e _._._. - - This usage will examine some block devices to see if that - have a valid RAID superblock on them. The information in - each valid raid superblock will be printed. - - If ----ssccaann is used, the no devices should be listed, and - the complete set of devices identified in the configura- - tion file are checked. ----ssccaann implies ----bbrriieeff but this - implication can be countered by specifying ----vveerrbboossee. - - With ----bbrriieeff mmddaaddmm will output an config file entry of - each distinct array that was found. This entry will list - the UUID, the raid level, and a list of the individual - devices on which a superblock for that array was found. - This output will by syntactically suitable for inclusion - in the configuration file, but should NNOOTT be used blindly. - Often the array description that you want in the configu- - ration file is much less specific than that given by mmddaaddmm - --BBss. For example, you normally do not want to list the - devices, particularly if they are SCSI devices. - - - -FFIILLEESS - //pprroocc//mmddssttaatt - If you're using the //pprroocc filesystem, //pprroocc//mmddssttaatt gives - you informations about md devices status. This file is - not currently used by mmddaaddmm. - - - //eettcc//mmddaaddmm..ccoonnff - The config file lists which devices may be scanned to see - if they contain MD super block, and gives identifying - information (e.g. UUID) about known MD arrays. See - mmddaaddmm..ccoonnff(5) for more details. - - - -TTOODDOO - Finish and document Follow mode. - - -SSEEEE AALLSSOO - For information on the various levels of RAID, check out: - - - http://ostenfeld.dk/~jakob/Software-RAID.HOWTO/ - - for new releases of the RAID driver check out: - - - ftp://ftp.kernel.org/pub/linux/kernel/peo- - ple/mingo/raid-patches - - or - - http://www.cse.unsw.edu.au/~neilb/patches/linux- - stable/ - - mmddaaddmm..ccoonnff(5), mmdd(4). - - _r_a_i_d_t_a_b(5), _r_a_i_d_0_r_u_n(8), _r_a_i_d_s_t_o_p(8), _m_k_r_a_i_d(8) - - - - mdadm(8) @@ -1,13 +1,12 @@ Summary: mdadm is used for controlling Linux md devices (aka RAID arrays) Name: mdadm -Version: 0.7.2 +Version: 0.8 Release: 1 Source: http://www.cse.unsw.edu.au/~neilb/source/mdadm/mdadm-%{version}.tgz URL: http://www.cse.unsw.edu.au/~neilb/source/mdadm/ License: GPL Group: Utilities/System -BuildRoot: ${_tmppath}/%{name}-root -Packager: Danilo Godec <danci@agenda.si> (et.al.) +BuildRoot: %{_tmppath}/%{name}-root Obsoletes: mdctl %description @@ -33,7 +32,7 @@ make CFLAGS="$RPM_OPT_FLAGS" SYSCONFDIR="%{_sysconfdir}" #rm -rf $RPM_BUILD_ROOT mkdir -p $RPM_BUILD_ROOT/%{_sbindir} install -m755 mdadm $RPM_BUILD_ROOT/%{_sbindir} -mkdir -p $RPM_BUILD_ROOT/${_sysconfdir} +mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir} install -m644 mdadm.conf-example $RPM_BUILD_ROOT/%{_sysconfdir}/mdadm.conf mkdir -p $RPM_BUILD_ROOT/%{_mandir}/man4 mkdir -p $RPM_BUILD_ROOT/%{_mandir}/man5 diff --git a/mdstat.c b/mdstat.c new file mode 100644 index 00000000..c5b8f1e5 --- /dev/null +++ b/mdstat.c @@ -0,0 +1,180 @@ +/* + * mdstat - parse /proc/mdstat file. Part of: + * mdadm - manage Linux "md" devices aka RAID arrays. + * + * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Neil Brown + * Email: <neilb@cse.unsw.edu.au> + * Paper: Neil Brown + * School of Computer Science and Engineering + * The University of New South Wales + * Sydney, 2052 + * Australia + */ + +/* + * The /proc/mdstat file comes in at least 3 flavours: + * In an unpatched 2.2 kernel (md 0.36.6): + * Personalities : [n raidx] ... + * read_ahead {not set|%d sectors} + * md0 : {in}active{ raidX /dev/hda... %d blocks{ maxfault=%d}} + * md1 : ..... + * + * Normally only 4 md lines, but all are listed. + * + * In a patched 2.2 kernel (md 0.90.0) + * Personalities : [raidx] ... + * read_ahead {not set|%d sectors} + * mdN : {in}active {(readonly)} raidX dev[%d]{(F)} ... %d blocks STATUS RESYNC + * ... Only initialised arrays listed + * unused: dev dev dev | <none> + * + * STATUS is personality dependant: + * linear: %dk rounding + * raid0: %dk chunks + * raid1: [%d/%d] [U_U] ( raid/working. operational or not) + * raid5: level 4/5, %dk chunk, algorithm %d [%d/%d] [U_U] + * + * RESYNC is empty or: + * {resync|recovery}=%u%% finish=%u.%umin + * or + * resync=DELAYED + * + * In a 2.4 kernel (md 0.90.0/2.4) + * Personalities : [raidX] ... + * read_ahead {not set|%d sectors} + * mdN : {in}active {(read-only)} raidX dev[%d]{(F)} ... + * %d blocks STATUS + * RESYNC + * unused: dev dev .. | <none> + * + * STATUS matches 0.90.0/2.2 + * RESYNC includes [===>....], + * adds a space after {resync|recovery} and before and after '=' + * adds a decimal to the recovery percent. + * adds (%d/%d) resync amount and max_blocks, before finish. + * adds speed=%dK/sec after finish + * + * + * + * Out of this we want to extract: + * list of devices, active or not + * pattern of failed drives (so need number of drives) + * percent resync complete + * + * As continuation is indicated by leading space, we use + * conf_line from config.c to read logical lines + * + */ + +#include "mdadm.h" +#include "dlink.h" + +void free_mdstat(struct mdstat_ent *ms) +{ + while (ms) { + struct mdstat_ent *t; + if (ms->dev) free(ms->dev); + if (ms->level) free(ms->level); + if (ms->pattern) free(ms->pattern); + t = ms; + ms = ms->next; + free(t); + } +} + +struct mdstat_ent *mdstat_read() +{ + FILE *f; + struct mdstat_ent *all, **end; + char *line; + + f = fopen("/proc/mdstat", "r"); + if (f == NULL) + return NULL; + + all = NULL; + end = &all; + for (; (line = conf_line(f)) ; free_line(line)) { + struct mdstat_ent *ent; + char *w; + + if (strcmp(line, "Personalities")==0) + continue; + if (strcmp(line, "read_ahead")==0) + continue; + if (strcmp(line, "unused")==0) + continue; + /* Better be an md line.. */ + if (strncmp(line, "md", 2)!= 0 + || atoi(line+2)<0) { + fprintf(stderr, Name ": bad /proc/mdstat line starts: %s\n", line); + continue; + } + + ent = malloc(sizeof(*ent)); + if (!ent) { + fprintf(stderr, Name ": malloc failed reading /proc/mdstat.\n"); + free_line(line); + fclose(f); + return all; + } + ent->dev = ent->level = ent->pattern= NULL; + ent->next = NULL; + ent->percent = -1; + ent->active = -1; + + ent->dev = strdup(line); + ent->devnum = atoi(line+2); + + for (w=dl_next(line); w!= line ; w=dl_next(w)) { + int l = strlen(w); + char *eq; + if (strcmp(w, "active")==0) + ent->active = 1; + else if (strcmp(w, "inactive")==0) + ent->active = 0; + else if (ent->active >=0 && + ent->level == NULL && + w[0] != '(' /*readonly*/) + ent->level = strdup(w); + else if (!ent->pattern && + w[0] == '[' && + (w[1] == 'U' || w[1] == '_')) { + ent->pattern = strdup(w+1); + if (ent->pattern[l-2]==']') + ent->pattern[l-2] = '\0'; + } else if (ent->percent == -1 && + strncmp(w, "re", 2)== 0 && + w[l-1] == '%' && + (eq=strchr(w, '=')) != NULL ) { + ent->percent = atoi(eq+1); + } else if (ent->percent == -1 && + w[0] >= '0' && + w[0] <= '9' && + w[l-1] == '%') { + ent->percent = atoi(w); + } + } + *end = ent; + end = &ent->next; + } + fclose(f); + return all; +} @@ -113,6 +113,8 @@ int get_linux_version() int enough(int level, int raid_disks, int avail_disks) { switch (level) { + case -4: + return avail_disks>= 1; case -1: case 0: return avail_disks == raid_disks; @@ -197,8 +199,8 @@ int load_super(int fd, mdp_super_t *super) * 5 - no magic * 6 - wrong major version */ - long size; - long long offset; + unsigned long size; + unsigned long long offset; if (ioctl(fd, BLKGETSIZE, &size)) return 1; @@ -433,14 +435,14 @@ char *human_size(long long bytes) if (bytes < 5000*1024) buf[0]=0; else if (bytes < 2*1024LL*1024LL*1024LL) - sprintf(buf, " (%d.%02d MiB %d.%02d MB)", + sprintf(buf, " (%ld.%02ld MiB %ld.%02ld MB)", (long)(bytes>>20), (long)(bytes&0xfffff)/(0x100000/100), (long)(bytes/1000/1000), (long)((bytes%1000000)/10000) ); else - sprintf(buf, " (%d.%02d GiB %d.%02d GB)", + sprintf(buf, " (%ld.%02ld GiB %ld.%02ld GB)", (long)(bytes>>30), (long)((bytes>>10)&0xfffff)/(0x100000/100), (long)(bytes/1000LL/1000LL/1000LL), @@ -448,3 +450,64 @@ char *human_size(long long bytes) ); return buf; } + +char *human_size_brief(long long bytes) +{ + static char buf[30]; + + + if (bytes < 5000*1024) + sprintf(buf, "%ld.%02ldKiB", + (long)(bytes>>10), (long)((bytes&1023)*100/1024) + ); + else if (bytes < 2*1024LL*1024LL*1024LL) + sprintf(buf, "%ld.%02ldMiB", + (long)(bytes>>20), + (long)(bytes&0xfffff)/(0x100000/100) + ); + else + sprintf(buf, "%ld.%02ldGiB", + (long)(bytes>>30), + (long)((bytes>>10)&0xfffff)/(0x100000/100) + ); + return buf; +} + + +#define MD_MAJOR 9 +char *get_md_name(int dev) +{ + /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */ + static char devname[50]; + struct stat stb; + dev_t rdev = MKDEV(MD_MAJOR, dev); + + sprintf(devname, "/dev/md%d", dev); + if (stat(devname, &stb) == 0 + && (S_IFMT&stb.st_mode) == S_IFBLK + && (stb.st_rdev == rdev)) + return devname; + + sprintf(devname, "/dev/md/%d", dev); + if (stat(devname, &stb) == 0 + && (S_IFMT&stb.st_mode) == S_IFBLK + && (stb.st_rdev == rdev)) + return devname; + + sprintf(devname, "/dev/.tmp.md%d", dev); + if (mknod(devname, S_IFBLK | 0600, rdev) == -1) + return NULL; + + if (stat(devname, &stb) == 0 + && (S_IFMT&stb.st_mode) == S_IFBLK + && (stb.st_rdev == rdev)) + return devname; + unlink(devname); + return NULL; +} + +void put_md_name(char *name) +{ + if (strncmp(name, "/dev/.tmp.md", 12)==0) + unlink(name); +} |