From fc5a40a2301aa241eedb16caf9169ca5763707c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:49:02 +0100 Subject: UAPI: (Scripted) Disintegrate include/linux/raid Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/raid/Kbuild | 2 - include/linux/raid/md_p.h | 301 ----------------------------------------- include/linux/raid/md_u.h | 141 +------------------ include/uapi/linux/raid/Kbuild | 2 + include/uapi/linux/raid/md_p.h | 301 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/raid/md_u.h | 155 +++++++++++++++++++++ 6 files changed, 459 insertions(+), 443 deletions(-) delete mode 100644 include/linux/raid/md_p.h create mode 100644 include/uapi/linux/raid/md_p.h create mode 100644 include/uapi/linux/raid/md_u.h diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild index 2415a64c5e51..e69de29bb2d1 100644 --- a/include/linux/raid/Kbuild +++ b/include/linux/raid/Kbuild @@ -1,2 +0,0 @@ -header-y += md_p.h -header-y += md_u.h diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h deleted file mode 100644 index ee753536ab70..000000000000 --- a/include/linux/raid/md_p.h +++ /dev/null @@ -1,301 +0,0 @@ -/* - md_p.h : physical layout of Linux RAID devices - Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _MD_P_H -#define _MD_P_H - -#include - -/* - * RAID superblock. - * - * The RAID superblock maintains some statistics on each RAID configuration. - * Each real device in the RAID set contains it near the end of the device. - * Some of the ideas are copied from the ext2fs implementation. - * - * We currently use 4096 bytes as follows: - * - * word offset function - * - * 0 - 31 Constant generic RAID device information. - * 32 - 63 Generic state information. - * 64 - 127 Personality specific information. - * 128 - 511 12 32-words descriptors of the disks in the raid set. - * 512 - 911 Reserved. - * 912 - 1023 Disk specific descriptor. - */ - -/* - * If x is the real device size in bytes, we return an apparent size of: - * - * y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES - * - * and place the 4kB superblock at offset y. - */ -#define MD_RESERVED_BYTES (64 * 1024) -#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512) - -#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS) - -#define MD_SB_BYTES 4096 -#define MD_SB_WORDS (MD_SB_BYTES / 4) -#define MD_SB_SECTORS (MD_SB_BYTES / 512) - -/* - * The following are counted in 32-bit words - */ -#define MD_SB_GENERIC_OFFSET 0 -#define MD_SB_PERSONALITY_OFFSET 64 -#define MD_SB_DISKS_OFFSET 128 -#define MD_SB_DESCRIPTOR_OFFSET 992 - -#define MD_SB_GENERIC_CONSTANT_WORDS 32 -#define MD_SB_GENERIC_STATE_WORDS 32 -#define MD_SB_GENERIC_WORDS (MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS) -#define MD_SB_PERSONALITY_WORDS 64 -#define MD_SB_DESCRIPTOR_WORDS 32 -#define MD_SB_DISKS 27 -#define MD_SB_DISKS_WORDS (MD_SB_DISKS*MD_SB_DESCRIPTOR_WORDS) -#define MD_SB_RESERVED_WORDS (1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS) -#define MD_SB_EQUAL_WORDS (MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS) - -/* - * Device "operational" state bits - */ -#define MD_DISK_FAULTY 0 /* disk is faulty / operational */ -#define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ -#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ -#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ - -#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. - * read requests will only be sent here in - * dire need - */ - -typedef struct mdp_device_descriptor_s { - __u32 number; /* 0 Device number in the entire set */ - __u32 major; /* 1 Device major number */ - __u32 minor; /* 2 Device minor number */ - __u32 raid_disk; /* 3 The role of the device in the raid set */ - __u32 state; /* 4 Operational state */ - __u32 reserved[MD_SB_DESCRIPTOR_WORDS - 5]; -} mdp_disk_t; - -#define MD_SB_MAGIC 0xa92b4efc - -/* - * Superblock state bits - */ -#define MD_SB_CLEAN 0 -#define MD_SB_ERRORS 1 - -#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ - -/* - * Notes: - * - if an array is being reshaped (restriped) in order to change the - * the number of active devices in the array, 'raid_disks' will be - * the larger of the old and new numbers. 'delta_disks' will - * be the "new - old". So if +ve, raid_disks is the new value, and - * "raid_disks-delta_disks" is the old. If -ve, raid_disks is the - * old value and "raid_disks+delta_disks" is the new (smaller) value. - */ - - -typedef struct mdp_superblock_s { - /* - * Constant generic information - */ - __u32 md_magic; /* 0 MD identifier */ - __u32 major_version; /* 1 major version to which the set conforms */ - __u32 minor_version; /* 2 minor version ... */ - __u32 patch_version; /* 3 patchlevel version ... */ - __u32 gvalid_words; /* 4 Number of used words in this section */ - __u32 set_uuid0; /* 5 Raid set identifier */ - __u32 ctime; /* 6 Creation time */ - __u32 level; /* 7 Raid personality */ - __u32 size; /* 8 Apparent size of each individual disk */ - __u32 nr_disks; /* 9 total disks in the raid set */ - __u32 raid_disks; /* 10 disks in a fully functional raid set */ - __u32 md_minor; /* 11 preferred MD minor device number */ - __u32 not_persistent; /* 12 does it have a persistent superblock */ - __u32 set_uuid1; /* 13 Raid set identifier #2 */ - __u32 set_uuid2; /* 14 Raid set identifier #3 */ - __u32 set_uuid3; /* 15 Raid set identifier #4 */ - __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16]; - - /* - * Generic state information - */ - __u32 utime; /* 0 Superblock update time */ - __u32 state; /* 1 State bits (clean, ...) */ - __u32 active_disks; /* 2 Number of currently active disks */ - __u32 working_disks; /* 3 Number of working disks */ - __u32 failed_disks; /* 4 Number of failed disks */ - __u32 spare_disks; /* 5 Number of spare disks */ - __u32 sb_csum; /* 6 checksum of the whole superblock */ -#ifdef __BIG_ENDIAN - __u32 events_hi; /* 7 high-order of superblock update count */ - __u32 events_lo; /* 8 low-order of superblock update count */ - __u32 cp_events_hi; /* 9 high-order of checkpoint update count */ - __u32 cp_events_lo; /* 10 low-order of checkpoint update count */ -#else - __u32 events_lo; /* 7 low-order of superblock update count */ - __u32 events_hi; /* 8 high-order of superblock update count */ - __u32 cp_events_lo; /* 9 low-order of checkpoint update count */ - __u32 cp_events_hi; /* 10 high-order of checkpoint update count */ -#endif - __u32 recovery_cp; /* 11 recovery checkpoint sector count */ - /* There are only valid for minor_version > 90 */ - __u64 reshape_position; /* 12,13 next address in array-space for reshape */ - __u32 new_level; /* 14 new level we are reshaping to */ - __u32 delta_disks; /* 15 change in number of raid_disks */ - __u32 new_layout; /* 16 new layout */ - __u32 new_chunk; /* 17 new chunk size (bytes) */ - __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 18]; - - /* - * Personality information - */ - __u32 layout; /* 0 the array's physical layout */ - __u32 chunk_size; /* 1 chunk size in bytes */ - __u32 root_pv; /* 2 LV root PV */ - __u32 root_block; /* 3 LV root block */ - __u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 4]; - - /* - * Disks information - */ - mdp_disk_t disks[MD_SB_DISKS]; - - /* - * Reserved - */ - __u32 reserved[MD_SB_RESERVED_WORDS]; - - /* - * Active descriptor - */ - mdp_disk_t this_disk; - -} mdp_super_t; - -static inline __u64 md_event(mdp_super_t *sb) { - __u64 ev = sb->events_hi; - return (ev<<32)| sb->events_lo; -} - -#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1) - -/* - * The version-1 superblock : - * All numeric fields are little-endian. - * - * total size: 256 bytes plus 2 per device. - * 1K allows 384 devices. - */ -struct mdp_superblock_1 { - /* constant array information - 128 bytes */ - __le32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ - __le32 major_version; /* 1 */ - __le32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ - __le32 pad0; /* always set to 0 when writing */ - - __u8 set_uuid[16]; /* user-space generated. */ - char set_name[32]; /* set and interpreted by user-space */ - - __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ - __le32 layout; /* only for raid5 and raid10 currently */ - __le64 size; /* used size of component devices, in 512byte sectors */ - - __le32 chunksize; /* in 512byte sectors */ - __le32 raid_disks; - __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts - * NOTE: signed, so bitmap can be before superblock - * only meaningful of feature_map[0] is set. - */ - - /* These are only valid with feature bit '4' */ - __le32 new_level; /* new level we are reshaping to */ - __le64 reshape_position; /* next address in array-space for reshape */ - __le32 delta_disks; /* change in number of raid_disks */ - __le32 new_layout; /* new layout */ - __le32 new_chunk; /* new chunk size (512byte sectors) */ - __le32 new_offset; /* signed number to add to data_offset in new - * layout. 0 == no-change. This can be - * different on each device in the array. - */ - - /* constant this-device information - 64 bytes */ - __le64 data_offset; /* sector start of data, often 0 */ - __le64 data_size; /* sectors in this device that can be used for data */ - __le64 super_offset; /* sector start of this superblock */ - __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ - __le32 dev_number; /* permanent identifier of this device - not role in raid */ - __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ - __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ - __u8 devflags; /* per-device flags. Only one defined...*/ -#define WriteMostly1 1 /* mask for writemostly flag in above */ - /* Bad block log. If there are any bad blocks the feature flag is set. - * If offset and size are non-zero, that space is reserved and available - */ - __u8 bblog_shift; /* shift from sectors to block size */ - __le16 bblog_size; /* number of sectors reserved for list */ - __le32 bblog_offset; /* sector offset from superblock to bblog, - * signed - not unsigned */ - - /* array state information - 64 bytes */ - __le64 utime; /* 40 bits second, 24 bits microseconds */ - __le64 events; /* incremented when superblock updated */ - __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ - __le32 sb_csum; /* checksum up to devs[max_dev] */ - __le32 max_dev; /* size of devs[] array to consider */ - __u8 pad3[64-32]; /* set to 0 when writing */ - - /* device state information. Indexed by dev_number. - * 2 bytes per device - * Note there are no per-device state flags. State information is rolled - * into the 'roles' value. If a device is spare or faulty, then it doesn't - * have a meaningful role. - */ - __le16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ -}; - -/* feature_map bits */ -#define MD_FEATURE_BITMAP_OFFSET 1 -#define MD_FEATURE_RECOVERY_OFFSET 2 /* recovery_offset is present and - * must be honoured - */ -#define MD_FEATURE_RESHAPE_ACTIVE 4 -#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */ -#define MD_FEATURE_REPLACEMENT 16 /* This device is replacing an - * active device with same 'role'. - * 'recovery_offset' is also set. - */ -#define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number - * of devices, but is going - * backwards anyway. - */ -#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ -#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ - |MD_FEATURE_RECOVERY_OFFSET \ - |MD_FEATURE_RESHAPE_ACTIVE \ - |MD_FEATURE_BAD_BLOCKS \ - |MD_FEATURE_REPLACEMENT \ - |MD_FEATURE_RESHAPE_BACKWARDS \ - |MD_FEATURE_NEW_OFFSET \ - ) - -#endif diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h index fb1abb3367e9..358c04bfbe2a 100644 --- a/include/linux/raid/md_u.h +++ b/include/linux/raid/md_u.h @@ -11,149 +11,10 @@ (for example /usr/src/linux/COPYING); if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - #ifndef _MD_U_H #define _MD_U_H -/* - * Different major versions are not compatible. - * Different minor versions are only downward compatible. - * Different patchlevel versions are downward and upward compatible. - */ -#define MD_MAJOR_VERSION 0 -#define MD_MINOR_VERSION 90 -/* - * MD_PATCHLEVEL_VERSION indicates kernel functionality. - * >=1 means different superblock formats are selectable using SET_ARRAY_INFO - * and major_version/minor_version accordingly - * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT - * in the super status byte - * >=3 means that bitmap superblock version 4 is supported, which uses - * little-ending representation rather than host-endian - */ -#define MD_PATCHLEVEL_VERSION 3 - -/* ioctls */ - -/* status */ -#define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t) -#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t) -#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) -#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) -#define RAID_AUTORUN _IO (MD_MAJOR, 0x14) -#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t) - -/* configuration */ -#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) -#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t) -#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22) -#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t) -#define SET_DISK_INFO _IO (MD_MAJOR, 0x24) -#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25) -#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26) -#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27) -#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28) -#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29) -#define HOT_GENERATE_ERROR _IO (MD_MAJOR, 0x2a) -#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int) +#include -/* usage */ -#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t) -/* 0x31 was START_ARRAY */ -#define STOP_ARRAY _IO (MD_MAJOR, 0x32) -#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) -#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) - -/* 63 partitions with the alternate major number (mdp) */ -#define MdpMinorShift 6 -#ifdef __KERNEL__ extern int mdp_major; -#endif - -typedef struct mdu_version_s { - int major; - int minor; - int patchlevel; -} mdu_version_t; - -typedef struct mdu_array_info_s { - /* - * Generic constant information - */ - int major_version; - int minor_version; - int patch_version; - int ctime; - int level; - int size; - int nr_disks; - int raid_disks; - int md_minor; - int not_persistent; - - /* - * Generic state information - */ - int utime; /* 0 Superblock update time */ - int state; /* 1 State bits (clean, ...) */ - int active_disks; /* 2 Number of currently active disks */ - int working_disks; /* 3 Number of working disks */ - int failed_disks; /* 4 Number of failed disks */ - int spare_disks; /* 5 Number of spare disks */ - - /* - * Personality information - */ - int layout; /* 0 the array's physical layout */ - int chunk_size; /* 1 chunk size in bytes */ - -} mdu_array_info_t; - -/* non-obvious values for 'level' */ -#define LEVEL_MULTIPATH (-4) -#define LEVEL_LINEAR (-1) -#define LEVEL_FAULTY (-5) - -/* we need a value for 'no level specified' and 0 - * means 'raid0', so we need something else. This is - * for internal use only - */ -#define LEVEL_NONE (-1000000) - -typedef struct mdu_disk_info_s { - /* - * configuration/status of one particular disk - */ - int number; - int major; - int minor; - int raid_disk; - int state; - -} mdu_disk_info_t; - -typedef struct mdu_start_info_s { - /* - * configuration/status of one particular disk - */ - int major; - int minor; - int raid_disk; - int state; - -} mdu_start_info_t; - -typedef struct mdu_bitmap_file_s -{ - char pathname[4096]; -} mdu_bitmap_file_t; - -typedef struct mdu_param_s -{ - int personality; /* 1,2,3,4 */ - int chunk_size; /* in bytes */ - int max_fault; /* unused for now */ -} mdu_param_t; - #endif - diff --git a/include/uapi/linux/raid/Kbuild b/include/uapi/linux/raid/Kbuild index aafaa5aa54d4..e2c3d25405d7 100644 --- a/include/uapi/linux/raid/Kbuild +++ b/include/uapi/linux/raid/Kbuild @@ -1 +1,3 @@ # UAPI Header export list +header-y += md_p.h +header-y += md_u.h diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h new file mode 100644 index 000000000000..ee753536ab70 --- /dev/null +++ b/include/uapi/linux/raid/md_p.h @@ -0,0 +1,301 @@ +/* + md_p.h : physical layout of Linux RAID devices + Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + You should have received a copy of the GNU General Public License + (for example /usr/src/linux/COPYING); if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef _MD_P_H +#define _MD_P_H + +#include + +/* + * RAID superblock. + * + * The RAID superblock maintains some statistics on each RAID configuration. + * Each real device in the RAID set contains it near the end of the device. + * Some of the ideas are copied from the ext2fs implementation. + * + * We currently use 4096 bytes as follows: + * + * word offset function + * + * 0 - 31 Constant generic RAID device information. + * 32 - 63 Generic state information. + * 64 - 127 Personality specific information. + * 128 - 511 12 32-words descriptors of the disks in the raid set. + * 512 - 911 Reserved. + * 912 - 1023 Disk specific descriptor. + */ + +/* + * If x is the real device size in bytes, we return an apparent size of: + * + * y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES + * + * and place the 4kB superblock at offset y. + */ +#define MD_RESERVED_BYTES (64 * 1024) +#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512) + +#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS) + +#define MD_SB_BYTES 4096 +#define MD_SB_WORDS (MD_SB_BYTES / 4) +#define MD_SB_SECTORS (MD_SB_BYTES / 512) + +/* + * The following are counted in 32-bit words + */ +#define MD_SB_GENERIC_OFFSET 0 +#define MD_SB_PERSONALITY_OFFSET 64 +#define MD_SB_DISKS_OFFSET 128 +#define MD_SB_DESCRIPTOR_OFFSET 992 + +#define MD_SB_GENERIC_CONSTANT_WORDS 32 +#define MD_SB_GENERIC_STATE_WORDS 32 +#define MD_SB_GENERIC_WORDS (MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS) +#define MD_SB_PERSONALITY_WORDS 64 +#define MD_SB_DESCRIPTOR_WORDS 32 +#define MD_SB_DISKS 27 +#define MD_SB_DISKS_WORDS (MD_SB_DISKS*MD_SB_DESCRIPTOR_WORDS) +#define MD_SB_RESERVED_WORDS (1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS) +#define MD_SB_EQUAL_WORDS (MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS) + +/* + * Device "operational" state bits + */ +#define MD_DISK_FAULTY 0 /* disk is faulty / operational */ +#define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ +#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ +#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ + +#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. + * read requests will only be sent here in + * dire need + */ + +typedef struct mdp_device_descriptor_s { + __u32 number; /* 0 Device number in the entire set */ + __u32 major; /* 1 Device major number */ + __u32 minor; /* 2 Device minor number */ + __u32 raid_disk; /* 3 The role of the device in the raid set */ + __u32 state; /* 4 Operational state */ + __u32 reserved[MD_SB_DESCRIPTOR_WORDS - 5]; +} mdp_disk_t; + +#define MD_SB_MAGIC 0xa92b4efc + +/* + * Superblock state bits + */ +#define MD_SB_CLEAN 0 +#define MD_SB_ERRORS 1 + +#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ + +/* + * Notes: + * - if an array is being reshaped (restriped) in order to change the + * the number of active devices in the array, 'raid_disks' will be + * the larger of the old and new numbers. 'delta_disks' will + * be the "new - old". So if +ve, raid_disks is the new value, and + * "raid_disks-delta_disks" is the old. If -ve, raid_disks is the + * old value and "raid_disks+delta_disks" is the new (smaller) value. + */ + + +typedef struct mdp_superblock_s { + /* + * Constant generic information + */ + __u32 md_magic; /* 0 MD identifier */ + __u32 major_version; /* 1 major version to which the set conforms */ + __u32 minor_version; /* 2 minor version ... */ + __u32 patch_version; /* 3 patchlevel version ... */ + __u32 gvalid_words; /* 4 Number of used words in this section */ + __u32 set_uuid0; /* 5 Raid set identifier */ + __u32 ctime; /* 6 Creation time */ + __u32 level; /* 7 Raid personality */ + __u32 size; /* 8 Apparent size of each individual disk */ + __u32 nr_disks; /* 9 total disks in the raid set */ + __u32 raid_disks; /* 10 disks in a fully functional raid set */ + __u32 md_minor; /* 11 preferred MD minor device number */ + __u32 not_persistent; /* 12 does it have a persistent superblock */ + __u32 set_uuid1; /* 13 Raid set identifier #2 */ + __u32 set_uuid2; /* 14 Raid set identifier #3 */ + __u32 set_uuid3; /* 15 Raid set identifier #4 */ + __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16]; + + /* + * Generic state information + */ + __u32 utime; /* 0 Superblock update time */ + __u32 state; /* 1 State bits (clean, ...) */ + __u32 active_disks; /* 2 Number of currently active disks */ + __u32 working_disks; /* 3 Number of working disks */ + __u32 failed_disks; /* 4 Number of failed disks */ + __u32 spare_disks; /* 5 Number of spare disks */ + __u32 sb_csum; /* 6 checksum of the whole superblock */ +#ifdef __BIG_ENDIAN + __u32 events_hi; /* 7 high-order of superblock update count */ + __u32 events_lo; /* 8 low-order of superblock update count */ + __u32 cp_events_hi; /* 9 high-order of checkpoint update count */ + __u32 cp_events_lo; /* 10 low-order of checkpoint update count */ +#else + __u32 events_lo; /* 7 low-order of superblock update count */ + __u32 events_hi; /* 8 high-order of superblock update count */ + __u32 cp_events_lo; /* 9 low-order of checkpoint update count */ + __u32 cp_events_hi; /* 10 high-order of checkpoint update count */ +#endif + __u32 recovery_cp; /* 11 recovery checkpoint sector count */ + /* There are only valid for minor_version > 90 */ + __u64 reshape_position; /* 12,13 next address in array-space for reshape */ + __u32 new_level; /* 14 new level we are reshaping to */ + __u32 delta_disks; /* 15 change in number of raid_disks */ + __u32 new_layout; /* 16 new layout */ + __u32 new_chunk; /* 17 new chunk size (bytes) */ + __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 18]; + + /* + * Personality information + */ + __u32 layout; /* 0 the array's physical layout */ + __u32 chunk_size; /* 1 chunk size in bytes */ + __u32 root_pv; /* 2 LV root PV */ + __u32 root_block; /* 3 LV root block */ + __u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 4]; + + /* + * Disks information + */ + mdp_disk_t disks[MD_SB_DISKS]; + + /* + * Reserved + */ + __u32 reserved[MD_SB_RESERVED_WORDS]; + + /* + * Active descriptor + */ + mdp_disk_t this_disk; + +} mdp_super_t; + +static inline __u64 md_event(mdp_super_t *sb) { + __u64 ev = sb->events_hi; + return (ev<<32)| sb->events_lo; +} + +#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1) + +/* + * The version-1 superblock : + * All numeric fields are little-endian. + * + * total size: 256 bytes plus 2 per device. + * 1K allows 384 devices. + */ +struct mdp_superblock_1 { + /* constant array information - 128 bytes */ + __le32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ + __le32 major_version; /* 1 */ + __le32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ + __le32 pad0; /* always set to 0 when writing */ + + __u8 set_uuid[16]; /* user-space generated. */ + char set_name[32]; /* set and interpreted by user-space */ + + __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ + __le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ + __le32 layout; /* only for raid5 and raid10 currently */ + __le64 size; /* used size of component devices, in 512byte sectors */ + + __le32 chunksize; /* in 512byte sectors */ + __le32 raid_disks; + __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts + * NOTE: signed, so bitmap can be before superblock + * only meaningful of feature_map[0] is set. + */ + + /* These are only valid with feature bit '4' */ + __le32 new_level; /* new level we are reshaping to */ + __le64 reshape_position; /* next address in array-space for reshape */ + __le32 delta_disks; /* change in number of raid_disks */ + __le32 new_layout; /* new layout */ + __le32 new_chunk; /* new chunk size (512byte sectors) */ + __le32 new_offset; /* signed number to add to data_offset in new + * layout. 0 == no-change. This can be + * different on each device in the array. + */ + + /* constant this-device information - 64 bytes */ + __le64 data_offset; /* sector start of data, often 0 */ + __le64 data_size; /* sectors in this device that can be used for data */ + __le64 super_offset; /* sector start of this superblock */ + __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ + __le32 dev_number; /* permanent identifier of this device - not role in raid */ + __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ + __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ + __u8 devflags; /* per-device flags. Only one defined...*/ +#define WriteMostly1 1 /* mask for writemostly flag in above */ + /* Bad block log. If there are any bad blocks the feature flag is set. + * If offset and size are non-zero, that space is reserved and available + */ + __u8 bblog_shift; /* shift from sectors to block size */ + __le16 bblog_size; /* number of sectors reserved for list */ + __le32 bblog_offset; /* sector offset from superblock to bblog, + * signed - not unsigned */ + + /* array state information - 64 bytes */ + __le64 utime; /* 40 bits second, 24 bits microseconds */ + __le64 events; /* incremented when superblock updated */ + __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ + __le32 sb_csum; /* checksum up to devs[max_dev] */ + __le32 max_dev; /* size of devs[] array to consider */ + __u8 pad3[64-32]; /* set to 0 when writing */ + + /* device state information. Indexed by dev_number. + * 2 bytes per device + * Note there are no per-device state flags. State information is rolled + * into the 'roles' value. If a device is spare or faulty, then it doesn't + * have a meaningful role. + */ + __le16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ +}; + +/* feature_map bits */ +#define MD_FEATURE_BITMAP_OFFSET 1 +#define MD_FEATURE_RECOVERY_OFFSET 2 /* recovery_offset is present and + * must be honoured + */ +#define MD_FEATURE_RESHAPE_ACTIVE 4 +#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */ +#define MD_FEATURE_REPLACEMENT 16 /* This device is replacing an + * active device with same 'role'. + * 'recovery_offset' is also set. + */ +#define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number + * of devices, but is going + * backwards anyway. + */ +#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ +#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ + |MD_FEATURE_RECOVERY_OFFSET \ + |MD_FEATURE_RESHAPE_ACTIVE \ + |MD_FEATURE_BAD_BLOCKS \ + |MD_FEATURE_REPLACEMENT \ + |MD_FEATURE_RESHAPE_BACKWARDS \ + |MD_FEATURE_NEW_OFFSET \ + ) + +#endif diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h new file mode 100644 index 000000000000..4133e744e4e6 --- /dev/null +++ b/include/uapi/linux/raid/md_u.h @@ -0,0 +1,155 @@ +/* + md_u.h : user <=> kernel API between Linux raidtools and RAID drivers + Copyright (C) 1998 Ingo Molnar + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + You should have received a copy of the GNU General Public License + (for example /usr/src/linux/COPYING); if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef _UAPI_MD_U_H +#define _UAPI_MD_U_H + +/* + * Different major versions are not compatible. + * Different minor versions are only downward compatible. + * Different patchlevel versions are downward and upward compatible. + */ +#define MD_MAJOR_VERSION 0 +#define MD_MINOR_VERSION 90 +/* + * MD_PATCHLEVEL_VERSION indicates kernel functionality. + * >=1 means different superblock formats are selectable using SET_ARRAY_INFO + * and major_version/minor_version accordingly + * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT + * in the super status byte + * >=3 means that bitmap superblock version 4 is supported, which uses + * little-ending representation rather than host-endian + */ +#define MD_PATCHLEVEL_VERSION 3 + +/* ioctls */ + +/* status */ +#define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t) +#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t) +#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) +#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) +#define RAID_AUTORUN _IO (MD_MAJOR, 0x14) +#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t) + +/* configuration */ +#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) +#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t) +#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22) +#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t) +#define SET_DISK_INFO _IO (MD_MAJOR, 0x24) +#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25) +#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26) +#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27) +#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28) +#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29) +#define HOT_GENERATE_ERROR _IO (MD_MAJOR, 0x2a) +#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int) + +/* usage */ +#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t) +/* 0x31 was START_ARRAY */ +#define STOP_ARRAY _IO (MD_MAJOR, 0x32) +#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) +#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) + +/* 63 partitions with the alternate major number (mdp) */ +#define MdpMinorShift 6 + +typedef struct mdu_version_s { + int major; + int minor; + int patchlevel; +} mdu_version_t; + +typedef struct mdu_array_info_s { + /* + * Generic constant information + */ + int major_version; + int minor_version; + int patch_version; + int ctime; + int level; + int size; + int nr_disks; + int raid_disks; + int md_minor; + int not_persistent; + + /* + * Generic state information + */ + int utime; /* 0 Superblock update time */ + int state; /* 1 State bits (clean, ...) */ + int active_disks; /* 2 Number of currently active disks */ + int working_disks; /* 3 Number of working disks */ + int failed_disks; /* 4 Number of failed disks */ + int spare_disks; /* 5 Number of spare disks */ + + /* + * Personality information + */ + int layout; /* 0 the array's physical layout */ + int chunk_size; /* 1 chunk size in bytes */ + +} mdu_array_info_t; + +/* non-obvious values for 'level' */ +#define LEVEL_MULTIPATH (-4) +#define LEVEL_LINEAR (-1) +#define LEVEL_FAULTY (-5) + +/* we need a value for 'no level specified' and 0 + * means 'raid0', so we need something else. This is + * for internal use only + */ +#define LEVEL_NONE (-1000000) + +typedef struct mdu_disk_info_s { + /* + * configuration/status of one particular disk + */ + int number; + int major; + int minor; + int raid_disk; + int state; + +} mdu_disk_info_t; + +typedef struct mdu_start_info_s { + /* + * configuration/status of one particular disk + */ + int major; + int minor; + int raid_disk; + int state; + +} mdu_start_info_t; + +typedef struct mdu_bitmap_file_s +{ + char pathname[4096]; +} mdu_bitmap_file_t; + +typedef struct mdu_param_s +{ + int personality; /* 1,2,3,4 */ + int chunk_size; /* in bytes */ + int max_fault; /* unused for now */ +} mdu_param_t; + +#endif /* _UAPI_MD_U_H */ -- cgit v1.2.3 From 0be1fecd7ee61b5a6d2b2e94b052b8a070b946ef Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 22 Oct 2012 10:44:55 +1100 Subject: md faulty: use disk_stack_limits() in: fe86cdce block: do not artificially constrain max_sectors for stacking drivers max_sectors defaults to UINT_MAX. md faulty wasn't using disk_stack_limits(), so inherited this large value as well. This triggered a bug in XFS when stressed over md_faulty, when a very large bio_alloc() failed. That was on an older kernel, and I can't reproduce exactly the same thing upstream, but I think the fix is appropriate in any case. Thanks to Mike Snitzer for pointing out the problem. Signed-off-by: Eric Sandeen Signed-off-by: NeilBrown --- drivers/md/faulty.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 45135f69509c..5e7dc772f5de 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -315,8 +315,11 @@ static int run(struct mddev *mddev) } conf->nfaults = 0; - rdev_for_each(rdev, mddev) + rdev_for_each(rdev, mddev) { conf->rdev = rdev; + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + } md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); mddev->private = conf; -- cgit v1.2.3 From 02b898f2f04e418094f0093a3ad0b415bcdbe8eb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 31 Oct 2012 11:42:03 +1100 Subject: md/raid1: Fix assembling of arrays containing Replacements. setup_conf in raid1.c uses conf->raid_disks before assigning a value. It is used when including 'Replacement' devices. The consequence is that assembling an array which contains a replacement will misbehave and either not include the replacement, or not include the device being replaced. Though this doesn't lead directly to data corruption, it could lead to reduced data safety. So use mddev->raid_disks, which is initialised, instead. Bug was introduced by commit c19d57980b38a5bb613a898937a1cf85f422fb9b md/raid1: recognise replacements when assembling arrays. in 3.3, so fix is suitable for 3.3.y thru 3.6.y. Cc: stable@vger.kernel.org Signed-off-by: NeilBrown --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8034fbd6190c..636bae0405e8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2710,7 +2710,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) || disk_idx < 0) continue; if (test_bit(Replacement, &rdev->flags)) - disk = conf->mirrors + conf->raid_disks + disk_idx; + disk = conf->mirrors + mddev->raid_disks + disk_idx; else disk = conf->mirrors + disk_idx; -- cgit v1.2.3 From ed30be077e705e0dff53bfc51d23feb8aeeab78f Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Wed, 31 Oct 2012 11:42:30 +1100 Subject: MD RAID10: Fix oops when creating RAID10 arrays via dm-raid.c Commit 2863b9eb didn't take into account the changes to add TRIM support to RAID10 (commit 532a2a3fb). That is, when using dm-raid.c to create the RAID10 arrays, there is no mddev->gendisk or mddev->queue. The code added to support TRIM simply assumes that mddev->queue is available without checking. The result is an oops any time dm-raid.c attempts to create a RAID10 device. Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/raid10.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 906ccbd0f7dc..d1295aff4173 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1783,7 +1783,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) clear_bit(Unmerged, &rdev->flags); } md_integrity_add_rdev(rdev, mddev); - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) + if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); print_conf(conf); @@ -3613,11 +3613,14 @@ static int run(struct mddev *mddev) discard_supported = true; } - if (discard_supported) - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); - else - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); - + if (mddev->queue) { + if (discard_supported) + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, + mddev->queue); + else + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, + mddev->queue); + } /* need to check that every block has at least one working mirror */ if (!enough(conf, -1)) { printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", -- cgit v1.2.3