diff options
Diffstat (limited to 'fs/btrfs/super.c')
-rw-r--r-- | fs/btrfs/super.c | 348 |
1 files changed, 187 insertions, 161 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3a4dce153645..6e71a2a78363 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,12 +61,21 @@ #include "tests/btrfs-tests.h" #include "qgroup.h" -#include "backref.h" #define CREATE_TRACE_POINTS #include <trace/events/btrfs.h> static const struct super_operations btrfs_super_ops; + +/* + * Types for mounting the default subvolume and a subvolume explicitly + * requested by subvol=/path. That way the callchain is straightforward and we + * don't have to play tricks with the mount options and recursive calls to + * btrfs_mount. + * + * The new btrfs_root_fs_type also servers as a tag for the bdev_holder. + */ static struct file_system_type btrfs_fs_type; +static struct file_system_type btrfs_root_fs_type; static int btrfs_remount(struct super_block *sb, int *flags, char *data); @@ -98,30 +107,6 @@ const char *btrfs_decode_error(int errno) return errstr; } -/* btrfs handle error by forcing the filesystem readonly */ -static void btrfs_handle_error(struct btrfs_fs_info *fs_info) -{ - struct super_block *sb = fs_info->sb; - - if (sb_rdonly(sb)) - return; - - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { - sb->s_flags |= SB_RDONLY; - btrfs_info(fs_info, "forced readonly"); - /* - * Note that a running device replace operation is not - * canceled here although there is no way to update - * the progress. It would add the risk of a deadlock, - * therefore the canceling is omitted. The only penalty - * is that some I/O remains active until the procedure - * completes. The next time when the filesystem is - * mounted writeable again, the device replace - * operation continues. - */ - } -} - /* * __btrfs_handle_fs_error decodes expected errors from the caller and * invokes the approciate error response. @@ -168,8 +153,23 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); /* Don't go through full error handling during mount */ - if (sb->s_flags & SB_BORN) - btrfs_handle_error(fs_info); + if (!(sb->s_flags & SB_BORN)) + return; + + if (sb_rdonly(sb)) + return; + + /* btrfs handle error by forcing the filesystem readonly */ + sb->s_flags |= SB_RDONLY; + btrfs_info(fs_info, "forced readonly"); + /* + * Note that a running device replace operation is not canceled here + * although there is no way to update the progress. It would add the + * risk of a deadlock, therefore the canceling is omitted. The only + * penalty is that some I/O remains active until the procedure + * completes. The next time when the filesystem is mounted writeable + * again, the device replace operation continues. + */ } #ifdef CONFIG_PRINTK @@ -405,7 +405,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, unsigned long new_flags) { substring_t args[MAX_OPT_ARGS]; - char *p, *num, *orig = NULL; + char *p, *num; u64 cache_gen; int intarg; int ret = 0; @@ -428,16 +428,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, if (!options) goto check; - /* - * strsep changes the string, duplicate it because parse_options - * gets called twice - */ - options = kstrdup(options, GFP_KERNEL); - if (!options) - return -ENOMEM; - - orig = options; - while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) @@ -454,7 +444,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_subvolrootid: case Opt_device: /* - * These are parsed by btrfs_parse_early_options + * These are parsed by btrfs_parse_subvol_options + * and btrfs_parse_early_options * and can be happily ignored here. */ break; @@ -877,7 +868,6 @@ out: btrfs_info(info, "disk space caching is enabled"); if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE)) btrfs_info(info, "using free space tree"); - kfree(orig); return ret; } @@ -888,11 +878,60 @@ out: * only when we need to allocate a new super block. */ static int btrfs_parse_early_options(const char *options, fmode_t flags, - void *holder, char **subvol_name, u64 *subvol_objectid, - struct btrfs_fs_devices **fs_devices) + void *holder, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; + int error = 0; + + if (!options) + return 0; + + /* + * strsep changes the string, duplicate it because btrfs_parse_options + * gets called later + */ + opts = kstrdup(options, GFP_KERNEL); + if (!opts) + return -ENOMEM; + orig = opts; + + while ((p = strsep(&opts, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + if (token == Opt_device) { + device_name = match_strdup(&args[0]); + if (!device_name) { + error = -ENOMEM; + goto out; + } + error = btrfs_scan_one_device(device_name, + flags, holder, fs_devices); + kfree(device_name); + if (error) + goto out; + } + } + +out: + kfree(orig); + return error; +} + +/* + * Parse mount options that are related to subvolume id + * + * The value is later passed to mount_subvol() + */ +static int btrfs_parse_subvol_options(const char *options, fmode_t flags, + char **subvol_name, u64 *subvol_objectid) +{ + substring_t args[MAX_OPT_ARGS]; + char *opts, *orig, *p; char *num = NULL; int error = 0; @@ -900,8 +939,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, return 0; /* - * strsep changes the string, duplicate it because parse_options - * gets called twice + * strsep changes the string, duplicate it because + * btrfs_parse_early_options gets called later */ opts = kstrdup(options, GFP_KERNEL); if (!opts) @@ -940,18 +979,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, case Opt_subvolrootid: pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n"); break; - case Opt_device: - device_name = match_strdup(&args[0]); - if (!device_name) { - error = -ENOMEM; - goto out; - } - error = btrfs_scan_one_device(device_name, - flags, holder, fs_devices); - kfree(device_name); - if (error) - goto out; - break; default: break; } @@ -1243,7 +1270,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); - char *compress_type; + const char *compress_type; if (btrfs_test_opt(info, DEGRADED)) seq_puts(seq, ",degraded"); @@ -1259,12 +1286,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); if (btrfs_test_opt(info, COMPRESS)) { - if (info->compress_type == BTRFS_COMPRESS_ZLIB) - compress_type = "zlib"; - else if (info->compress_type == BTRFS_COMPRESS_LZO) - compress_type = "lzo"; - else - compress_type = "zstd"; + compress_type = btrfs_compress_type2str(info->compress_type); if (btrfs_test_opt(info, FORCE_COMPRESS)) seq_printf(seq, ",compress-force=%s", compress_type); else @@ -1365,86 +1387,12 @@ static inline int is_subvolume_inode(struct inode *inode) return 0; } -/* - * This will add subvolid=0 to the argument string while removing any subvol= - * and subvolid= arguments to make sure we get the top-level root for path - * walking to the subvol we want. - */ -static char *setup_root_args(char *args) -{ - char *buf, *dst, *sep; - - if (!args) - return kstrdup("subvolid=0", GFP_KERNEL); - - /* The worst case is that we add ",subvolid=0" to the end. */ - buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, - GFP_KERNEL); - if (!buf) - return NULL; - - while (1) { - sep = strchrnul(args, ','); - if (!strstarts(args, "subvol=") && - !strstarts(args, "subvolid=")) { - memcpy(dst, args, sep - args); - dst += sep - args; - *dst++ = ','; - } - if (*sep) - args = sep + 1; - else - break; - } - strcpy(dst, "subvolid=0"); - - return buf; -} - static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, - int flags, const char *device_name, - char *data) + const char *device_name, struct vfsmount *mnt) { struct dentry *root; - struct vfsmount *mnt = NULL; - char *newargs; int ret; - newargs = setup_root_args(data); - if (!newargs) { - root = ERR_PTR(-ENOMEM); - goto out; - } - - mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); - if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) { - if (flags & SB_RDONLY) { - mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY, - device_name, newargs); - } else { - mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY, - device_name, newargs); - if (IS_ERR(mnt)) { - root = ERR_CAST(mnt); - mnt = NULL; - goto out; - } - - down_write(&mnt->mnt_sb->s_umount); - ret = btrfs_remount(mnt->mnt_sb, &flags, NULL); - up_write(&mnt->mnt_sb->s_umount); - if (ret < 0) { - root = ERR_PTR(ret); - goto out; - } - } - } - if (IS_ERR(mnt)) { - root = ERR_CAST(mnt); - mnt = NULL; - goto out; - } - if (!subvol_name) { if (!subvol_objectid) { ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), @@ -1500,7 +1448,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, out: mntput(mnt); - kfree(newargs); kfree(subvol_name); return root; } @@ -1558,11 +1505,11 @@ static int setup_security_options(struct btrfs_fs_info *fs_info, /* * Find a superblock for the given device / mount point. * - * Note: This is based on get_sb_bdev from fs/super.c with a few additions - * for multiple device setup. Make sure to keep it in sync. + * Note: This is based on mount_bdev from fs/super.c with a few additions + * for multiple device setup. Make sure to keep it in sync. */ -static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, - const char *device_name, void *data) +static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, + int flags, const char *device_name, void *data) { struct block_device *bdev = NULL; struct super_block *s; @@ -1570,27 +1517,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, struct btrfs_fs_info *fs_info = NULL; struct security_mnt_opts new_sec_opts; fmode_t mode = FMODE_READ; - char *subvol_name = NULL; - u64 subvol_objectid = 0; int error = 0; if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; error = btrfs_parse_early_options(data, mode, fs_type, - &subvol_name, &subvol_objectid, &fs_devices); if (error) { - kfree(subvol_name); return ERR_PTR(error); } - if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) { - /* mount_subvol() will free subvol_name. */ - return mount_subvol(subvol_name, subvol_objectid, flags, - device_name, data); - } - security_init_mnt_opts(&new_sec_opts); if (data) { error = parse_security_options(data, &new_sec_opts); @@ -1674,6 +1611,84 @@ error_sec_opts: return ERR_PTR(error); } +/* + * Mount function which is called by VFS layer. + * + * In order to allow mounting a subvolume directly, btrfs uses mount_subtree() + * which needs vfsmount* of device's root (/). This means device's root has to + * be mounted internally in any case. + * + * Operation flow: + * 1. Parse subvol id related options for later use in mount_subvol(). + * + * 2. Mount device's root (/) by calling vfs_kern_mount(). + * + * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the + * first place. In order to avoid calling btrfs_mount() again, we use + * different file_system_type which is not registered to VFS by + * register_filesystem() (btrfs_root_fs_type). As a result, + * btrfs_mount_root() is called. The return value will be used by + * mount_subtree() in mount_subvol(). + * + * 3. Call mount_subvol() to get the dentry of subvolume. Since there is + * "btrfs subvolume set-default", mount_subvol() is called always. + */ +static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, + const char *device_name, void *data) +{ + struct vfsmount *mnt_root; + struct dentry *root; + fmode_t mode = FMODE_READ; + char *subvol_name = NULL; + u64 subvol_objectid = 0; + int error = 0; + + if (!(flags & SB_RDONLY)) + mode |= FMODE_WRITE; + + error = btrfs_parse_subvol_options(data, mode, + &subvol_name, &subvol_objectid); + if (error) { + kfree(subvol_name); + return ERR_PTR(error); + } + + /* mount device's root (/) */ + mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data); + if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) { + if (flags & SB_RDONLY) { + mnt_root = vfs_kern_mount(&btrfs_root_fs_type, + flags & ~SB_RDONLY, device_name, data); + } else { + mnt_root = vfs_kern_mount(&btrfs_root_fs_type, + flags | SB_RDONLY, device_name, data); + if (IS_ERR(mnt_root)) { + root = ERR_CAST(mnt_root); + goto out; + } + + down_write(&mnt_root->mnt_sb->s_umount); + error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL); + up_write(&mnt_root->mnt_sb->s_umount); + if (error < 0) { + root = ERR_PTR(error); + mntput(mnt_root); + goto out; + } + } + } + if (IS_ERR(mnt_root)) { + root = ERR_CAST(mnt_root); + goto out; + } + + /* mount_subvol() will free subvol_name and mnt_root */ + root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root); + +out: + return root; +} + static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, int new_pool_size, int old_pool_size) { @@ -1820,7 +1835,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) goto restore; } - if (!btrfs_check_rw_degradable(fs_info)) { + if (!btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "too many missing devices, writeable remount is not allowed"); ret = -EACCES; @@ -1972,8 +1987,10 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, rcu_read_lock(); list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { - if (!device->in_fs_metadata || !device->bdev || - device->is_tgtdev_for_dev_replace) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, + &device->dev_state) || + !device->bdev || + test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) continue; if (i >= nr_devices) @@ -2174,6 +2191,15 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, }; + +static struct file_system_type btrfs_root_fs_type = { + .owner = THIS_MODULE, + .name = "btrfs", + .mount = btrfs_mount_root, + .kill_sb = btrfs_kill_super, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, +}; + MODULE_ALIAS_FS("btrfs"); static int btrfs_control_open(struct inode *inode, struct file *file) @@ -2207,11 +2233,11 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case BTRFS_IOC_SCAN_DEV: ret = btrfs_scan_one_device(vol->name, FMODE_READ, - &btrfs_fs_type, &fs_devices); + &btrfs_root_fs_type, &fs_devices); break; case BTRFS_IOC_DEVICES_READY: ret = btrfs_scan_one_device(vol->name, FMODE_READ, - &btrfs_fs_type, &fs_devices); + &btrfs_root_fs_type, &fs_devices); if (ret) break; ret = !(fs_devices->num_devices == fs_devices->total_devices); @@ -2269,7 +2295,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) while (cur_devices) { head = &cur_devices->devices; list_for_each_entry(dev, head, dev_list) { - if (dev->missing) + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->name) continue; @@ -2324,7 +2350,7 @@ static struct miscdevice btrfs_misc = { MODULE_ALIAS_MISCDEV(BTRFS_MINOR); MODULE_ALIAS("devname:btrfs-control"); -static int btrfs_interface_init(void) +static int __init btrfs_interface_init(void) { return misc_register(&btrfs_misc); } @@ -2334,7 +2360,7 @@ static void btrfs_interface_exit(void) misc_deregister(&btrfs_misc); } -static void btrfs_print_mod_info(void) +static void __init btrfs_print_mod_info(void) { pr_info("Btrfs loaded, crc32c=%s" #ifdef CONFIG_BTRFS_DEBUG |