summaryrefslogtreecommitdiffstats
path: root/fs/block_dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r--fs/block_dev.c226
1 files changed, 145 insertions, 81 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 8c3c6899ccf3..73d6a735b8f3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -25,6 +25,7 @@
#include <linux/uio.h>
#include <linux/namei.h>
#include <linux/log2.h>
+#include <linux/kmemleak.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -76,7 +77,7 @@ int set_blocksize(struct block_device *bdev, int size)
return -EINVAL;
/* Size cannot be smaller than the size supported by the device */
- if (size < bdev_hardsect_size(bdev))
+ if (size < bdev_logical_block_size(bdev))
return -EINVAL;
/* Don't change the size if it is same as current */
@@ -106,7 +107,7 @@ EXPORT_SYMBOL(sb_set_blocksize);
int sb_min_blocksize(struct super_block *sb, int size)
{
- int minsize = bdev_hardsect_size(sb->s_bdev);
+ int minsize = bdev_logical_block_size(sb->s_bdev);
if (size < minsize)
size = minsize;
return sb_set_blocksize(sb, size);
@@ -175,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
iov, offset, nr_segs, blkdev_get_blocks, NULL);
}
+int __sync_blockdev(struct block_device *bdev, int wait)
+{
+ if (!bdev)
+ return 0;
+ if (!wait)
+ return filemap_flush(bdev->bd_inode->i_mapping);
+ return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+}
+
/*
* Write out and wait upon all the dirty data associated with a block
* device via its mapping. Does not take the superblock lock.
*/
int sync_blockdev(struct block_device *bdev)
{
- int ret = 0;
-
- if (bdev)
- ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
- return ret;
+ return __sync_blockdev(bdev, 1);
}
EXPORT_SYMBOL(sync_blockdev);
@@ -198,19 +204,18 @@ int fsync_bdev(struct block_device *bdev)
{
struct super_block *sb = get_super(bdev);
if (sb) {
- int res = fsync_super(sb);
+ int res = sync_filesystem(sb);
drop_super(sb);
return res;
}
return sync_blockdev(bdev);
}
+EXPORT_SYMBOL(fsync_bdev);
/**
* freeze_bdev -- lock a filesystem and force it into a consistent state
* @bdev: blockdevice to lock
*
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
* If a superblock is found on this device, we take the s_umount semaphore
* on it to make sure nobody unmounts until the snapshot creation is done.
* The reference counter (bd_fsfreeze_count) guarantees that only the last
@@ -225,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev)
int error = 0;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (bdev->bd_fsfreeze_count > 0) {
- bdev->bd_fsfreeze_count++;
+ if (++bdev->bd_fsfreeze_count > 1) {
+ /*
+ * We don't even need to grab a reference - the first call
+ * to freeze_bdev grab an active reference and only the last
+ * thaw_bdev drops it.
+ */
sb = get_super(bdev);
+ drop_super(sb);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return sb;
}
- bdev->bd_fsfreeze_count++;
-
- down(&bdev->bd_mount_sem);
- sb = get_super(bdev);
- if (sb && !(sb->s_flags & MS_RDONLY)) {
- sb->s_frozen = SB_FREEZE_WRITE;
- smp_wmb();
-
- __fsync_super(sb);
-
- sb->s_frozen = SB_FREEZE_TRANS;
- smp_wmb();
-
- sync_blockdev(sb->s_bdev);
-
- if (sb->s_op->freeze_fs) {
- error = sb->s_op->freeze_fs(sb);
- if (error) {
- printk(KERN_ERR
- "VFS:Filesystem freeze failed\n");
- sb->s_frozen = SB_UNFROZEN;
- drop_super(sb);
- up(&bdev->bd_mount_sem);
- bdev->bd_fsfreeze_count--;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return ERR_PTR(error);
- }
+
+ sb = get_active_super(bdev);
+ if (!sb)
+ goto out;
+ if (sb->s_flags & MS_RDONLY) {
+ deactivate_locked_super(sb);
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return sb;
+ }
+
+ sb->s_frozen = SB_FREEZE_WRITE;
+ smp_wmb();
+
+ sync_filesystem(sb);
+
+ sb->s_frozen = SB_FREEZE_TRANS;
+ smp_wmb();
+
+ sync_blockdev(sb->s_bdev);
+
+ if (sb->s_op->freeze_fs) {
+ error = sb->s_op->freeze_fs(sb);
+ if (error) {
+ printk(KERN_ERR
+ "VFS:Filesystem freeze failed\n");
+ sb->s_frozen = SB_UNFROZEN;
+ deactivate_locked_super(sb);
+ bdev->bd_fsfreeze_count--;
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return ERR_PTR(error);
}
}
+ up_write(&sb->s_umount);
+ out:
sync_blockdev(bdev);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
-
- return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
+ return sb; /* thaw_bdev releases s->s_umount */
}
EXPORT_SYMBOL(freeze_bdev);
@@ -277,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
*/
int thaw_bdev(struct block_device *bdev, struct super_block *sb)
{
- int error = 0;
+ int error = -EINVAL;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (!bdev->bd_fsfreeze_count) {
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return -EINVAL;
- }
-
- bdev->bd_fsfreeze_count--;
- if (bdev->bd_fsfreeze_count > 0) {
- if (sb)
- drop_super(sb);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return 0;
- }
-
- if (sb) {
- BUG_ON(sb->s_bdev != bdev);
- if (!(sb->s_flags & MS_RDONLY)) {
- if (sb->s_op->unfreeze_fs) {
- error = sb->s_op->unfreeze_fs(sb);
- if (error) {
- printk(KERN_ERR
- "VFS:Filesystem thaw failed\n");
- sb->s_frozen = SB_FREEZE_TRANS;
- bdev->bd_fsfreeze_count++;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return error;
- }
- }
- sb->s_frozen = SB_UNFROZEN;
- smp_wmb();
- wake_up(&sb->s_wait_unfrozen);
+ if (!bdev->bd_fsfreeze_count)
+ goto out_unlock;
+
+ error = 0;
+ if (--bdev->bd_fsfreeze_count > 0)
+ goto out_unlock;
+
+ if (!sb)
+ goto out_unlock;
+
+ BUG_ON(sb->s_bdev != bdev);
+ down_write(&sb->s_umount);
+ if (sb->s_flags & MS_RDONLY)
+ goto out_deactivate;
+
+ if (sb->s_op->unfreeze_fs) {
+ error = sb->s_op->unfreeze_fs(sb);
+ if (error) {
+ printk(KERN_ERR
+ "VFS:Filesystem thaw failed\n");
+ sb->s_frozen = SB_FREEZE_TRANS;
+ bdev->bd_fsfreeze_count++;
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return error;
}
- drop_super(sb);
}
- up(&bdev->bd_mount_sem);
+ sb->s_frozen = SB_UNFROZEN;
+ smp_wmb();
+ wake_up(&sb->s_wait_unfrozen);
+
+out_deactivate:
+ if (sb)
+ deactivate_locked_super(sb);
+out_unlock:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return 0;
}
@@ -391,7 +405,17 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
- return sync_blockdev(I_BDEV(filp->f_mapping->host));
+ struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+ int error;
+
+ error = sync_blockdev(bdev);
+ if (error)
+ return error;
+
+ error = blkdev_issue_flush(bdev, NULL);
+ if (error == -EOPNOTSUPP)
+ error = 0;
+ return error;
}
/*
@@ -413,7 +437,6 @@ static void bdev_destroy_inode(struct inode *inode)
{
struct bdev_inode *bdi = BDEV_I(inode);
- bdi->bdev.bd_inode_backing_dev_info = NULL;
kmem_cache_free(bdev_cachep, bdi);
}
@@ -424,7 +447,6 @@ static void init_once(void *foo)
memset(bdev, 0, sizeof(*bdev));
mutex_init(&bdev->bd_mutex);
- sema_init(&bdev->bd_mount_sem, 1);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
#ifdef CONFIG_SYSFS
@@ -491,6 +513,11 @@ void __init bdev_cache_init(void)
bd_mnt = kern_mount(&bd_type);
if (IS_ERR(bd_mnt))
panic("Cannot create bdev pseudo-fs");
+ /*
+ * This vfsmount structure is only used to obtain the
+ * blockdev_superblock, so tell kmemleak not to report it.
+ */
+ kmemleak_not_leak(bd_mnt);
blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
}
@@ -552,6 +579,16 @@ struct block_device *bdget(dev_t dev)
EXPORT_SYMBOL(bdget);
+/**
+ * bdgrab -- Grab a reference to an already referenced block device
+ * @bdev: Block device to grab a reference to.
+ */
+struct block_device *bdgrab(struct block_device *bdev)
+{
+ atomic_inc(&bdev->bd_inode->i_count);
+ return bdev;
+}
+
long nr_blockdev_pages(void)
{
struct block_device *bdev;
@@ -1093,7 +1130,7 @@ EXPORT_SYMBOL(revalidate_disk);
int check_disk_change(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
- struct block_device_operations * bdops = disk->fops;
+ const struct block_device_operations *bdops = disk->fops;
if (!bdops->media_changed)
return 0;
@@ -1110,7 +1147,7 @@ EXPORT_SYMBOL(check_disk_change);
void bd_set_size(struct block_device *bdev, loff_t size)
{
- unsigned bsize = bdev_hardsect_size(bdev);
+ unsigned bsize = bdev_logical_block_size(bdev);
bdev->bd_inode->i_size = size;
while (bsize < PAGE_CACHE_SIZE) {
@@ -1221,8 +1258,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
}
} else {
- put_disk(disk);
module_put(disk->fops->owner);
+ put_disk(disk);
disk = NULL;
if (bdev->bd_contains == bdev) {
if (bdev->bd_disk->fops->open) {
@@ -1383,6 +1420,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
}
/*
+ * Write data to the block device. Only intended for the block device itself
+ * and the raw driver which basically is a fake block device.
+ *
+ * Does not take i_mutex for the write and thus is not for general purpose
+ * use.
+ */
+ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ ssize_t ret;
+
+ BUG_ON(iocb->ki_pos != pos);
+
+ ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+ if (ret > 0 || ret == -EIOCBQUEUED) {
+ ssize_t err;
+
+ err = generic_write_sync(file, pos, ret);
+ if (err < 0 && ret > 0)
+ ret = err;
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_aio_write);
+
+/*
* Try to release a page associated with block device when the system
* is under memory pressure.
*/
@@ -1414,7 +1478,7 @@ const struct file_operations def_blk_fops = {
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write_nolock,
+ .aio_write = blkdev_aio_write,
.mmap = generic_file_mmap,
.fsync = block_fsync,
.unlocked_ioctl = block_ioctl,