summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c374
1 files changed, 349 insertions, 25 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1411b99555a4..8a3d2594b807 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -23,6 +23,7 @@
#include <linux/random.h>
#include <linux/iocontext.h>
#include <linux/capability.h>
+#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <asm/div64.h>
#include "compat.h"
@@ -34,11 +35,14 @@
#include "volumes.h"
#include "async-thread.h"
#include "check-integrity.h"
+#include "rcu-string.h"
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_device *device);
static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
+static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
+static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
static DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
@@ -61,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
device = list_entry(fs_devices->devices.next,
struct btrfs_device, dev_list);
list_del(&device->dev_list);
- kfree(device->name);
+ rcu_string_free(device->name);
kfree(device);
}
kfree(fs_devices);
@@ -331,8 +335,8 @@ static noinline int device_list_add(const char *path,
{
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices;
+ struct rcu_string *name;
u64 found_transid = btrfs_super_generation(disk_super);
- char *name;
fs_devices = find_fsid(disk_super->fsid);
if (!fs_devices) {
@@ -361,15 +365,18 @@ static noinline int device_list_add(const char *path,
return -ENOMEM;
}
device->devid = devid;
+ device->dev_stats_valid = 0;
device->work.func = pending_bios_fn;
memcpy(device->uuid, disk_super->dev_item.uuid,
BTRFS_UUID_SIZE);
spin_lock_init(&device->io_lock);
- device->name = kstrdup(path, GFP_NOFS);
- if (!device->name) {
+
+ name = rcu_string_strdup(path, GFP_NOFS);
+ if (!name) {
kfree(device);
return -ENOMEM;
}
+ rcu_assign_pointer(device->name, name);
INIT_LIST_HEAD(&device->dev_alloc_list);
/* init readahead state */
@@ -386,12 +393,12 @@ static noinline int device_list_add(const char *path,
device->fs_devices = fs_devices;
fs_devices->num_devices++;
- } else if (!device->name || strcmp(device->name, path)) {
- name = kstrdup(path, GFP_NOFS);
+ } else if (!device->name || strcmp(device->name->str, path)) {
+ name = rcu_string_strdup(path, GFP_NOFS);
if (!name)
return -ENOMEM;
- kfree(device->name);
- device->name = name;
+ rcu_string_free(device->name);
+ rcu_assign_pointer(device->name, name);
if (device->missing) {
fs_devices->missing_devices--;
device->missing = 0;
@@ -426,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
/* We have held the volume lock, it is safe to get the devices. */
list_for_each_entry(orig_dev, &orig->devices, dev_list) {
+ struct rcu_string *name;
+
device = kzalloc(sizeof(*device), GFP_NOFS);
if (!device)
goto error;
- device->name = kstrdup(orig_dev->name, GFP_NOFS);
- if (!device->name) {
+ /*
+ * This is ok to do without rcu read locked because we hold the
+ * uuid mutex so nothing we touch in here is going to disappear.
+ */
+ name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
+ if (!name) {
kfree(device);
goto error;
}
+ rcu_assign_pointer(device->name, name);
device->devid = orig_dev->devid;
device->work.func = pending_bios_fn;
@@ -487,7 +501,7 @@ again:
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
- kfree(device->name);
+ rcu_string_free(device->name);
kfree(device);
}
@@ -512,7 +526,7 @@ static void __free_device(struct work_struct *work)
if (device->bdev)
blkdev_put(device->bdev, device->mode);
- kfree(device->name);
+ rcu_string_free(device->name);
kfree(device);
}
@@ -536,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
struct btrfs_device *new_device;
+ struct rcu_string *name;
if (device->bdev)
fs_devices->open_devices--;
@@ -551,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
BUG_ON(!new_device); /* -ENOMEM */
memcpy(new_device, device, sizeof(*new_device));
- new_device->name = kstrdup(device->name, GFP_NOFS);
- BUG_ON(device->name && !new_device->name); /* -ENOMEM */
+
+ /* Safe because we are under uuid_mutex */
+ name = rcu_string_strdup(device->name->str, GFP_NOFS);
+ BUG_ON(device->name && !name); /* -ENOMEM */
+ rcu_assign_pointer(new_device->name, name);
new_device->bdev = NULL;
new_device->writeable = 0;
new_device->in_fs_metadata = 0;
@@ -617,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
if (!device->name)
continue;
- bdev = blkdev_get_by_path(device->name, flags, holder);
+ bdev = blkdev_get_by_path(device->name->str, flags, holder);
if (IS_ERR(bdev)) {
- printk(KERN_INFO "open %s failed\n", device->name);
+ printk(KERN_INFO "open %s failed\n", device->name->str);
goto error;
}
filemap_write_and_wait(bdev->bd_inode->i_mapping);
@@ -1628,12 +1646,13 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
struct block_device *bdev;
struct list_head *devices;
struct super_block *sb = root->fs_info->sb;
+ struct rcu_string *name;
u64 total_bytes;
int seeding_dev = 0;
int ret = 0;
if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
- return -EINVAL;
+ return -EROFS;
bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
root->fs_info->bdev_holder);
@@ -1667,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
goto error;
}
- device->name = kstrdup(device_path, GFP_NOFS);
- if (!device->name) {
+ name = rcu_string_strdup(device_path, GFP_NOFS);
+ if (!name) {
kfree(device);
ret = -ENOMEM;
goto error;
}
+ rcu_assign_pointer(device->name, name);
ret = find_next_devid(root, &device->devid);
if (ret) {
- kfree(device->name);
+ rcu_string_free(device->name);
kfree(device);
goto error;
}
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
- kfree(device->name);
+ rcu_string_free(device->name);
kfree(device);
ret = PTR_ERR(trans);
goto error;
@@ -1792,7 +1812,7 @@ error_trans:
unlock_chunks(root);
btrfs_abort_transaction(trans, root, ret);
btrfs_end_transaction(trans, root);
- kfree(device->name);
+ rcu_string_free(device->name);
kfree(device);
error:
blkdev_put(bdev, FMODE_EXCL);
@@ -4001,13 +4021,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
return 0;
}
+static void *merge_stripe_index_into_bio_private(void *bi_private,
+ unsigned int stripe_index)
+{
+ /*
+ * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
+ * at most 1.
+ * The alternative solution (instead of stealing bits from the
+ * pointer) would be to allocate an intermediate structure
+ * that contains the old private pointer plus the stripe_index.
+ */
+ BUG_ON((((uintptr_t)bi_private) & 3) != 0);
+ BUG_ON(stripe_index > 3);
+ return (void *)(((uintptr_t)bi_private) | stripe_index);
+}
+
+static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
+{
+ return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
+}
+
+static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
+{
+ return (unsigned int)((uintptr_t)bi_private) & 3;
+}
+
static void btrfs_end_bio(struct bio *bio, int err)
{
- struct btrfs_bio *bbio = bio->bi_private;
+ struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
int is_orig_bio = 0;
- if (err)
+ if (err) {
atomic_inc(&bbio->error);
+ if (err == -EIO || err == -EREMOTEIO) {
+ unsigned int stripe_index =
+ extract_stripe_index_from_bio_private(
+ bio->bi_private);
+ struct btrfs_device *dev;
+
+ BUG_ON(stripe_index >= bbio->num_stripes);
+ dev = bbio->stripes[stripe_index].dev;
+ if (bio->bi_rw & WRITE)
+ btrfs_dev_stat_inc(dev,
+ BTRFS_DEV_STAT_WRITE_ERRS);
+ else
+ btrfs_dev_stat_inc(dev,
+ BTRFS_DEV_STAT_READ_ERRS);
+ if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
+ btrfs_dev_stat_inc(dev,
+ BTRFS_DEV_STAT_FLUSH_ERRS);
+ btrfs_dev_stat_print_on_error(dev);
+ }
+ }
if (bio == bbio->orig_bio)
is_orig_bio = 1;
@@ -4149,14 +4214,23 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bio = first_bio;
}
bio->bi_private = bbio;
+ bio->bi_private = merge_stripe_index_into_bio_private(
+ bio->bi_private, (unsigned int)dev_nr);
bio->bi_end_io = btrfs_end_bio;
bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
dev = bbio->stripes[dev_nr].dev;
if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
+#ifdef DEBUG
+ struct rcu_string *name;
+
+ rcu_read_lock();
+ name = rcu_dereference(dev->name);
pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
"(%s id %llu), size=%u\n", rw,
(u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
- dev->name, dev->devid, bio->bi_size);
+ name->str, dev->devid, bio->bi_size);
+ rcu_read_unlock();
+#endif
bio->bi_bdev = dev->bdev;
if (async_submit)
schedule_bio(root, dev, rw, bio);
@@ -4509,6 +4583,28 @@ int btrfs_read_sys_array(struct btrfs_root *root)
return ret;
}
+struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
+ u64 logical, int mirror_num)
+{
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ int ret;
+ u64 map_length = 0;
+ struct btrfs_bio *bbio = NULL;
+ struct btrfs_device *device;
+
+ BUG_ON(mirror_num == 0);
+ ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
+ mirror_num);
+ if (ret) {
+ BUG_ON(bbio != NULL);
+ return NULL;
+ }
+ BUG_ON(mirror_num != bbio->mirror_num);
+ device = bbio->stripes[mirror_num - 1].dev;
+ kfree(bbio);
+ return device;
+}
+
int btrfs_read_chunk_tree(struct btrfs_root *root)
{
struct btrfs_path *path;
@@ -4583,3 +4679,231 @@ error:
btrfs_free_path(path);
return ret;
}
+
+static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
+ btrfs_dev_stat_reset(dev, i);
+}
+
+int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_root *dev_root = fs_info->dev_root;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct extent_buffer *eb;
+ int slot;
+ int ret = 0;
+ struct btrfs_device *device;
+ struct btrfs_path *path = NULL;
+ int i;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ int item_size;
+ struct btrfs_dev_stats_item *ptr;
+
+ key.objectid = 0;
+ key.type = BTRFS_DEV_STATS_KEY;
+ key.offset = device->devid;
+ ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
+ if (ret) {
+ printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
+ rcu_str_deref(device->name),
+ (unsigned long long)device->devid);
+ __btrfs_reset_dev_stats(device);
+ device->dev_stats_valid = 1;
+ btrfs_release_path(path);
+ continue;
+ }
+ slot = path->slots[0];
+ eb = path->nodes[0];
+ btrfs_item_key_to_cpu(eb, &found_key, slot);
+ item_size = btrfs_item_size_nr(eb, slot);
+
+ ptr = btrfs_item_ptr(eb, slot,
+ struct btrfs_dev_stats_item);
+
+ for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
+ if (item_size >= (1 + i) * sizeof(__le64))
+ btrfs_dev_stat_set(device, i,
+ btrfs_dev_stats_value(eb, ptr, i));
+ else
+ btrfs_dev_stat_reset(device, i);
+ }
+
+ device->dev_stats_valid = 1;
+ btrfs_dev_stat_print_on_load(device);
+ btrfs_release_path(path);
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+
+out:
+ btrfs_free_path(path);
+ return ret < 0 ? ret : 0;
+}
+
+static int update_dev_stat_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *dev_root,
+ struct btrfs_device *device)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+ struct btrfs_dev_stats_item *ptr;
+ int ret;
+ int i;
+
+ key.objectid = 0;
+ key.type = BTRFS_DEV_STATS_KEY;
+ key.offset = device->devid;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
+ if (ret < 0) {
+ printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
+ ret, rcu_str_deref(device->name));
+ goto out;
+ }
+
+ if (ret == 0 &&
+ btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
+ /* need to delete old one and insert a new one */
+ ret = btrfs_del_item(trans, dev_root, path);
+ if (ret != 0) {
+ printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
+ rcu_str_deref(device->name), ret);
+ goto out;
+ }
+ ret = 1;
+ }
+
+ if (ret == 1) {
+ /* need to insert a new item */
+ btrfs_release_path(path);
+ ret = btrfs_insert_empty_item(trans, dev_root, path,
+ &key, sizeof(*ptr));
+ if (ret < 0) {
+ printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
+ rcu_str_deref(device->name), ret);
+ goto out;
+ }
+ }
+
+ eb = path->nodes[0];
+ ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
+ for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
+ btrfs_set_dev_stats_value(eb, ptr, i,
+ btrfs_dev_stat_read(device, i));
+ btrfs_mark_buffer_dirty(eb);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * called from commit_transaction. Writes all changed device stats to disk.
+ */
+int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *dev_root = fs_info->dev_root;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_device *device;
+ int ret = 0;
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ if (!device->dev_stats_valid || !device->dev_stats_dirty)
+ continue;
+
+ ret = update_dev_stat_item(trans, dev_root, device);
+ if (!ret)
+ device->dev_stats_dirty = 0;
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+
+ return ret;
+}
+
+void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
+{
+ btrfs_dev_stat_inc(dev, index);
+ btrfs_dev_stat_print_on_error(dev);
+}
+
+void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
+{
+ if (!dev->dev_stats_valid)
+ return;
+ printk_ratelimited_in_rcu(KERN_ERR
+ "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+ rcu_str_deref(dev->name),
+ btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
+ btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
+ btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
+ btrfs_dev_stat_read(dev,
+ BTRFS_DEV_STAT_CORRUPTION_ERRS),
+ btrfs_dev_stat_read(dev,
+ BTRFS_DEV_STAT_GENERATION_ERRS));
+}
+
+static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
+{
+ printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+ rcu_str_deref(dev->name),
+ btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
+ btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
+ btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
+ btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
+ btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
+}
+
+int btrfs_get_dev_stats(struct btrfs_root *root,
+ struct btrfs_ioctl_get_dev_stats *stats,
+ int reset_after_read)
+{
+ struct btrfs_device *dev;
+ struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+ int i;
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ dev = btrfs_find_device(root, stats->devid, NULL, NULL);
+ mutex_unlock(&fs_devices->device_list_mutex);
+
+ if (!dev) {
+ printk(KERN_WARNING
+ "btrfs: get dev_stats failed, device not found\n");
+ return -ENODEV;
+ } else if (!dev->dev_stats_valid) {
+ printk(KERN_WARNING
+ "btrfs: get dev_stats failed, not yet valid\n");
+ return -ENODEV;
+ } else if (reset_after_read) {
+ for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
+ if (stats->nr_items > i)
+ stats->values[i] =
+ btrfs_dev_stat_read_and_reset(dev, i);
+ else
+ btrfs_dev_stat_reset(dev, i);
+ }
+ } else {
+ for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
+ if (stats->nr_items > i)
+ stats->values[i] = btrfs_dev_stat_read(dev, i);
+ }
+ if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
+ stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
+ return 0;
+}