summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/block_dev.c18
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/coda/file.c23
-rw-r--r--fs/crypto/crypto.c11
-rw-r--r--fs/crypto/fname.c85
-rw-r--r--fs/crypto/keyinfo.c71
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/ext4/dir.c8
-rw-r--r--fs/ext4/ext4.h35
-rw-r--r--fs/ext4/extents.c27
-rw-r--r--fs/ext4/file.c10
-rw-r--r--fs/ext4/fsync.c9
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c71
-rw-r--r--fs/ext4/ioctl.c11
-rw-r--r--fs/ext4/move_extent.c7
-rw-r--r--fs/ext4/namei.c22
-rw-r--r--fs/ext4/page-io.c4
-rw-r--r--fs/ext4/super.c126
-rw-r--r--fs/ext4/symlink.c10
-rw-r--r--fs/ext4/xattr.c340
-rw-r--r--fs/f2fs/dir.c6
-rw-r--r--fs/f2fs/namei.c6
-rw-r--r--fs/fuse/dev.c63
-rw-r--r--fs/gfs2/file.c28
-rw-r--r--fs/jbd2/journal.c131
-rw-r--r--fs/jbd2/transaction.c6
-rw-r--r--fs/mbcache.c6
-rw-r--r--fs/nfs/file.c25
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/ocfs2/file.c34
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/pipe.c13
-rw-r--r--fs/splice.c683
-rw-r--r--fs/xfs/xfs_file.c41
-rw-r--r--fs/xfs/xfs_trace.h1
38 files changed, 767 insertions, 1182 deletions
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 7da05b159ade..bfe9f9994935 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -789,7 +789,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
* Will be set to real fs blocksize later.
*
* Linux 2.4.10 and later refuse to read blocks smaller than
- * the hardsect size for the device. But we also need to read at
+ * the logical block size for the device. But we also need to read at
* least 1k to get the second 512 bytes of the volume.
* -WD 10-26-01
*/
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 08ae99343d92..376e4e426324 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -180,9 +180,6 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
struct file *file = iocb->ki_filp;
struct inode *inode = bdev_file_inode(file);
- if (IS_DAX(inode))
- return dax_do_io(iocb, inode, iter, blkdev_get_block,
- NULL, DIO_SKIP_DIO_COUNT);
return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter,
blkdev_get_block, NULL, NULL,
DIO_SKIP_DIO_COUNT);
@@ -302,14 +299,11 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
error = sb->s_op->thaw_super(sb);
else
error = thaw_super(sb);
- if (error) {
+ if (error)
bdev->bd_fsfreeze_count++;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return error;
- }
out:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return 0;
+ return error;
}
EXPORT_SYMBOL(thaw_bdev);
@@ -1275,7 +1269,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
- bdev->bd_inode->i_flags = 0;
if (!partno) {
ret = -ENXIO;
@@ -1303,11 +1296,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
}
- if (!ret) {
+ if (!ret)
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
- if (!bdev_dax_capable(bdev))
- bdev->bd_inode->i_flags &= ~S_DAX;
- }
/*
* If the device is invalidated, rescan partition
@@ -1342,8 +1332,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
}
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
- if (!bdev_dax_capable(bdev))
- bdev->bd_inode->i_flags &= ~S_DAX;
}
} else {
if (bdev->bd_contains == bdev) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e6811c42e41e..ca01106795ea 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8412,7 +8412,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
if (!bio)
return -ENOMEM;
- bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
+ bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio));
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
@@ -8450,7 +8450,8 @@ next_block:
start_sector, GFP_NOFS);
if (!bio)
goto out_err;
- bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
+ bio_set_op_attrs(bio, bio_op(orig_bio),
+ bio_flags(orig_bio));
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index f47c7483863b..8415d4f8d1a1 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -38,27 +38,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
}
static ssize_t
-coda_file_splice_read(struct file *coda_file, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t count,
- unsigned int flags)
-{
- ssize_t (*splice_read)(struct file *, loff_t *,
- struct pipe_inode_info *, size_t, unsigned int);
- struct coda_file_info *cfi;
- struct file *host_file;
-
- cfi = CODA_FTOC(coda_file);
- BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
- host_file = cfi->cfi_container;
-
- splice_read = host_file->f_op->splice_read;
- if (!splice_read)
- splice_read = default_file_splice_read;
-
- return splice_read(host_file, ppos, pipe, count, flags);
-}
-
-static ssize_t
coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *coda_file = iocb->ki_filp;
@@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = {
.open = coda_open,
.release = coda_release,
.fsync = coda_fsync,
- .splice_read = coda_file_splice_read,
+ .splice_read = generic_file_splice_read,
};
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index c502c116924c..61057b7dbddb 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -28,7 +28,6 @@
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/fscrypto.h>
-#include <linux/ecryptfs.h>
static unsigned int num_prealloc_crypto_pages = 32;
static unsigned int num_prealloc_crypto_ctxs = 128;
@@ -128,11 +127,11 @@ struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags)
EXPORT_SYMBOL(fscrypt_get_ctx);
/**
- * fscrypt_complete() - The completion callback for page encryption
- * @req: The asynchronous encryption request context
- * @res: The result of the encryption operation
+ * page_crypt_complete() - completion callback for page crypto
+ * @req: The asynchronous cipher request context
+ * @res: The result of the cipher operation
*/
-static void fscrypt_complete(struct crypto_async_request *req, int res)
+static void page_crypt_complete(struct crypto_async_request *req, int res)
{
struct fscrypt_completion_result *ecr = req->data;
@@ -170,7 +169,7 @@ static int do_page_crypto(struct inode *inode,
skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- fscrypt_complete, &ecr);
+ page_crypt_complete, &ecr);
BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index));
memcpy(xts_tweak, &index, sizeof(index));
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 5d6d49113efa..9a28133ac3b8 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -10,21 +10,16 @@
* This has not yet undergone a rigorous security audit.
*/
-#include <keys/encrypted-type.h>
-#include <keys/user-type.h>
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
#include <linux/fscrypto.h>
-static u32 size_round_up(size_t size, size_t blksize)
-{
- return ((size + blksize - 1) / blksize) * blksize;
-}
-
/**
- * dir_crypt_complete() -
+ * fname_crypt_complete() - completion callback for filename crypto
+ * @req: The asynchronous cipher request context
+ * @res: The result of the cipher operation
*/
-static void dir_crypt_complete(struct crypto_async_request *req, int res)
+static void fname_crypt_complete(struct crypto_async_request *req, int res)
{
struct fscrypt_completion_result *ecr = req->data;
@@ -35,11 +30,11 @@ static void dir_crypt_complete(struct crypto_async_request *req, int res)
}
/**
- * fname_encrypt() -
+ * fname_encrypt() - encrypt a filename
*
- * This function encrypts the input filename, and returns the length of the
- * ciphertext. Errors are returned as negative numbers. We trust the caller to
- * allocate sufficient memory to oname string.
+ * The caller must have allocated sufficient memory for the @oname string.
+ *
+ * Return: 0 on success, -errno on failure
*/
static int fname_encrypt(struct inode *inode,
const struct qstr *iname, struct fscrypt_str *oname)
@@ -60,10 +55,9 @@ static int fname_encrypt(struct inode *inode,
if (iname->len <= 0 || iname->len > lim)
return -EIO;
- ciphertext_len = (iname->len < FS_CRYPTO_BLOCK_SIZE) ?
- FS_CRYPTO_BLOCK_SIZE : iname->len;
- ciphertext_len = size_round_up(ciphertext_len, padding);
- ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len;
+ ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE);
+ ciphertext_len = round_up(ciphertext_len, padding);
+ ciphertext_len = min(ciphertext_len, lim);
if (ciphertext_len <= sizeof(buf)) {
workbuf = buf;
@@ -84,7 +78,7 @@ static int fname_encrypt(struct inode *inode,
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- dir_crypt_complete, &ecr);
+ fname_crypt_complete, &ecr);
/* Copy the input */
memcpy(workbuf, iname->name, iname->len);
@@ -105,20 +99,22 @@ static int fname_encrypt(struct inode *inode,
}
kfree(alloc_buf);
skcipher_request_free(req);
- if (res < 0)
+ if (res < 0) {
printk_ratelimited(KERN_ERR
"%s: Error (error code %d)\n", __func__, res);
+ return res;
+ }
oname->len = ciphertext_len;
- return res;
+ return 0;
}
-/*
- * fname_decrypt()
- * This function decrypts the input filename, and returns
- * the length of the plaintext.
- * Errors are returned as negative numbers.
- * We trust the caller to allocate sufficient memory to oname string.
+/**
+ * fname_decrypt() - decrypt a filename
+ *
+ * The caller must have allocated sufficient memory for the @oname string.
+ *
+ * Return: 0 on success, -errno on failure
*/
static int fname_decrypt(struct inode *inode,
const struct fscrypt_str *iname,
@@ -146,7 +142,7 @@ static int fname_decrypt(struct inode *inode,
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- dir_crypt_complete, &ecr);
+ fname_crypt_complete, &ecr);
/* Initialize IV */
memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
@@ -168,7 +164,7 @@ static int fname_decrypt(struct inode *inode,
}
oname->len = strnlen(oname->name, iname->len);
- return oname->len;
+ return 0;
}
static const char *lookup_table =
@@ -231,9 +227,8 @@ u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen)
if (ci)
padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
- if (ilen < FS_CRYPTO_BLOCK_SIZE)
- ilen = FS_CRYPTO_BLOCK_SIZE;
- return size_round_up(ilen, padding);
+ ilen = max(ilen, (u32)FS_CRYPTO_BLOCK_SIZE);
+ return round_up(ilen, padding);
}
EXPORT_SYMBOL(fscrypt_fname_encrypted_size);
@@ -279,6 +274,10 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer);
/**
* fscrypt_fname_disk_to_usr() - converts a filename from disk space to user
* space
+ *
+ * The caller must have allocated sufficient memory for the @oname string.
+ *
+ * Return: 0 on success, -errno on failure
*/
int fscrypt_fname_disk_to_usr(struct inode *inode,
u32 hash, u32 minor_hash,
@@ -287,13 +286,12 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
{
const struct qstr qname = FSTR_TO_QSTR(iname);
char buf[24];
- int ret;
if (fscrypt_is_dot_dotdot(&qname)) {
oname->name[0] = '.';
oname->name[iname->len - 1] = '.';
oname->len = iname->len;
- return oname->len;
+ return 0;
}
if (iname->len < FS_CRYPTO_BLOCK_SIZE)
@@ -303,9 +301,9 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
return fname_decrypt(inode, iname, oname);
if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) {
- ret = digest_encode(iname->name, iname->len, oname->name);
- oname->len = ret;
- return ret;
+ oname->len = digest_encode(iname->name, iname->len,
+ oname->name);
+ return 0;
}
if (hash) {
memcpy(buf, &hash, 4);
@@ -315,15 +313,18 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
}
memcpy(buf + 8, iname->name + iname->len - 16, 16);
oname->name[0] = '_';
- ret = digest_encode(buf, 24, oname->name + 1);
- oname->len = ret + 1;
- return ret + 1;
+ oname->len = 1 + digest_encode(buf, 24, oname->name + 1);
+ return 0;
}
EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
/**
* fscrypt_fname_usr_to_disk() - converts a filename from user space to disk
* space
+ *
+ * The caller must have allocated sufficient memory for the @oname string.
+ *
+ * Return: 0 on success, -errno on failure
*/
int fscrypt_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
@@ -333,7 +334,7 @@ int fscrypt_fname_usr_to_disk(struct inode *inode,
oname->name[0] = '.';
oname->name[iname->len - 1] = '.';
oname->len = iname->len;
- return oname->len;
+ return 0;
}
if (inode->i_crypt_info)
return fname_encrypt(inode, iname, oname);
@@ -367,10 +368,10 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
if (dir->i_crypt_info) {
ret = fscrypt_fname_alloc_buffer(dir, iname->len,
&fname->crypto_buf);
- if (ret < 0)
+ if (ret)
return ret;
ret = fname_encrypt(dir, iname, &fname->crypto_buf);
- if (ret < 0)
+ if (ret)
goto errout;
fname->disk_name.name = fname->crypto_buf.name;
fname->disk_name.len = fname->crypto_buf.len;
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 1ac263eddc4e..82f0285f5d08 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -8,11 +8,8 @@
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
*/
-#include <keys/encrypted-type.h>
#include <keys/user-type.h>
-#include <linux/random.h>
#include <linux/scatterlist.h>
-#include <uapi/linux/keyctl.h>
#include <linux/fscrypto.h>
static void derive_crypt_complete(struct crypto_async_request *req, int rc)
@@ -139,6 +136,38 @@ out:
return res;
}
+static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode,
+ const char **cipher_str_ret, int *keysize_ret)
+{
+ if (S_ISREG(inode->i_mode)) {
+ if (ci->ci_data_mode == FS_ENCRYPTION_MODE_AES_256_XTS) {
+ *cipher_str_ret = "xts(aes)";
+ *keysize_ret = FS_AES_256_XTS_KEY_SIZE;
+ return 0;
+ }
+ pr_warn_once("fscrypto: unsupported contents encryption mode "
+ "%d for inode %lu\n",
+ ci->ci_data_mode, inode->i_ino);
+ return -ENOKEY;
+ }
+
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
+ if (ci->ci_filename_mode == FS_ENCRYPTION_MODE_AES_256_CTS) {
+ *cipher_str_ret = "cts(cbc(aes))";
+ *keysize_ret = FS_AES_256_CTS_KEY_SIZE;
+ return 0;
+ }
+ pr_warn_once("fscrypto: unsupported filenames encryption mode "
+ "%d for inode %lu\n",
+ ci->ci_filename_mode, inode->i_ino);
+ return -ENOKEY;
+ }
+
+ pr_warn_once("fscrypto: unsupported file type %d for inode %lu\n",
+ (inode->i_mode & S_IFMT), inode->i_ino);
+ return -ENOKEY;
+}
+
static void put_crypt_info(struct fscrypt_info *ci)
{
if (!ci)
@@ -155,8 +184,8 @@ int get_crypt_info(struct inode *inode)
struct fscrypt_context ctx;
struct crypto_skcipher *ctfm;
const char *cipher_str;
+ int keysize;
u8 raw_key[FS_MAX_KEY_SIZE];
- u8 mode;
int res;
res = fscrypt_initialize();
@@ -179,13 +208,19 @@ retry:
if (res < 0) {
if (!fscrypt_dummy_context_enabled(inode))
return res;
+ ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
} else if (res != sizeof(ctx)) {
return -EINVAL;
}
- res = 0;
+
+ if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
+ return -EINVAL;
+
+ if (ctx.flags & ~FS_POLICY_FLAGS_VALID)
+ return -EINVAL;
crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS);
if (!crypt_info)
@@ -198,27 +233,11 @@ retry:
crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
- if (S_ISREG(inode->i_mode))
- mode = crypt_info->ci_data_mode;
- else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- mode = crypt_info->ci_filename_mode;
- else
- BUG();
-
- switch (mode) {
- case FS_ENCRYPTION_MODE_AES_256_XTS:
- cipher_str = "xts(aes)";
- break;
- case FS_ENCRYPTION_MODE_AES_256_CTS:
- cipher_str = "cts(cbc(aes))";
- break;
- default:
- printk_once(KERN_WARNING
- "%s: unsupported key mode %d (ino %u)\n",
- __func__, mode, (unsigned) inode->i_ino);
- res = -ENOKEY;
+
+ res = determine_cipher_type(crypt_info, inode, &cipher_str, &keysize);
+ if (res)
goto out;
- }
+
if (fscrypt_dummy_context_enabled(inode)) {
memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
goto got_key;
@@ -253,7 +272,7 @@ got_key:
crypt_info->ci_ctfm = ctfm;
crypto_skcipher_clear_flags(ctfm, ~0);
crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_skcipher_setkey(ctfm, raw_key, fscrypt_key_size(mode));
+ res = crypto_skcipher_setkey(ctfm, raw_key, keysize);
if (res)
goto out;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7c3ce73cb617..fb9aa16a7727 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
if ((dio->op == REQ_OP_READ) &&
((offset + transferred) > dio->i_size))
transferred = dio->i_size - offset;
+ /* ignore EFAULT if some IO has been done */
+ if (unlikely(ret == -EFAULT) && transferred)
+ ret = 0;
}
if (ret == 0)
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 67415e0e6af0..e8b365000d73 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -260,11 +260,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
/* Directory is encrypted */
err = fscrypt_fname_disk_to_usr(inode,
0, 0, &de_name, &fstr);
+ de_name = fstr;
fstr.len = save_len;
- if (err < 0)
+ if (err)
goto errout;
if (!dir_emit(ctx,
- fstr.name, err,
+ de_name.name, de_name.len,
le32_to_cpu(de->inode),
get_dtype(sb, de->file_type)))
goto done;
@@ -627,7 +628,7 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
int buf_size)
{
struct ext4_dir_entry_2 *de;
- int nlen, rlen;
+ int rlen;
unsigned int offset = 0;
char *top;
@@ -637,7 +638,6 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
if (ext4_check_dir_entry(dir, NULL, de, bh,
buf, buf_size, offset))
return -EFSCORRUPTED;
- nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
offset += rlen;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ea31931386ec..282a51b07c57 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -262,6 +262,9 @@ struct ext4_io_submit {
(s)->s_first_ino)
#endif
#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
+#define EXT4_MAX_BLOCKS(size, offset, blkbits) \
+ ((EXT4_BLOCK_ALIGN(size + offset, blkbits) >> blkbits) - (offset >> \
+ blkbits))
/* Translate a block number to a cluster number */
#define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
@@ -1117,9 +1120,15 @@ struct ext4_inode_info {
#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
-#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
-#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT4_MOUNT_QUOTA 0x40000 /* Some quota option set */
+#define EXT4_MOUNT_USRQUOTA 0x80000 /* "old" user quota,
+ * enable enforcement for hidden
+ * quota files */
+#define EXT4_MOUNT_GRPQUOTA 0x100000 /* "old" group quota, enable
+ * enforcement for hidden quota
+ * files */
+#define EXT4_MOUNT_PRJQUOTA 0x200000 /* Enable project quota
+ * enforcement */
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
@@ -1636,26 +1645,6 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
* Feature set definitions
*/
-/* Use the ext4_{has,set,clear}_feature_* helpers; these will be removed */
-#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
- ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0)
-#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
- ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0)
-#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
- ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0)
-#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
-#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
-#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
-#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
-#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
-#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
-
#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001
#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002
#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d7ccb7f51dfc..c930a0110fb4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4679,6 +4679,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
unsigned int credits;
loff_t epos;
+ BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
map.m_lblk = offset;
map.m_len = len;
/*
@@ -4693,13 +4694,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
* credits to insert 1 extent into extent tree
*/
credits = ext4_chunk_trans_blocks(inode, len);
- /*
- * We can only call ext_depth() on extent based inodes
- */
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- depth = ext_depth(inode);
- else
- depth = -1;
+ depth = ext_depth(inode);
retry:
while (ret >= 0 && len) {
@@ -4966,13 +4961,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
trace_ext4_fallocate_enter(inode, offset, len, mode);
lblk = offset >> blkbits;
- /*
- * We can't just convert len to max_blocks because
- * If blocksize = 4096 offset = 3072 and len = 2048
- */
- max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- - lblk;
+ max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
@@ -5035,12 +5025,8 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
unsigned int credits, blkbits = inode->i_blkbits;
map.m_lblk = offset >> blkbits;
- /*
- * We can't just convert len to max_blocks because
- * If blocksize = 4096 offset = 3072 and len = 2048
- */
- max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
- map.m_lblk);
+ max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
+
/*
* This is somewhat ugly but the idea is clear: When transaction is
* reserved, everything goes into it. Otherwise we rather start several
@@ -5734,6 +5720,9 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
+ } else {
+ ext4_ext_drop_refs(path);
+ kfree(path);
}
ret = ext4_es_remove_extent(inode, offset_lblk,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 28f542bb0bda..36d49cfbf2dc 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -91,9 +91,7 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
static ssize_t
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(iocb->ki_filp);
- struct blk_plug plug;
int o_direct = iocb->ki_flags & IOCB_DIRECT;
int unaligned_aio = 0;
int overwrite = 0;
@@ -134,18 +132,16 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (o_direct) {
size_t length = iov_iter_count(from);
loff_t pos = iocb->ki_pos;
- blk_start_plug(&plug);
/* check whether we do a DIO overwrite or not */
if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
+ pos + length <= i_size_read(inode)) {
struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
int err, len;
map.m_lblk = pos >> blkbits;
- map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
- - map.m_lblk;
+ map.m_len = EXT4_MAX_BLOCKS(length, pos, blkbits);
len = map.m_len;
err = ext4_map_blocks(NULL, inode, &map, 0);
@@ -171,8 +167,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret > 0)
ret = generic_write_sync(iocb, ret);
- if (o_direct)
- blk_finish_plug(&plug);
return ret;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5c4372512ef7..88effb1053c7 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -61,6 +61,13 @@ static int ext4_sync_parent(struct inode *inode)
break;
iput(inode);
inode = next;
+ /*
+ * The directory inode may have gone through rmdir by now. But
+ * the inode itself and its blocks are still allocated (we hold
+ * a reference to the inode so it didn't go through
+ * ext4_evict_inode()) and so we are safe to flush metadata
+ * blocks and the inode.
+ */
ret = sync_mapping_buffers(inode->i_mapping);
if (ret)
break;
@@ -107,7 +114,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (!journal) {
ret = __generic_file_fsync(file, start, end, datasync);
- if (!ret && !hlist_empty(&inode->i_dentry))
+ if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
goto issue_flush;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 9e66cd1d7b78..170421edfdfe 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -802,7 +802,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
} else
inode_init_owner(inode, dir, mode);
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ if (ext4_has_feature_project(sb) &&
ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
ei->i_projid = EXT4_I(dir)->i_projid;
else
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c6ea25a190f8..cd918823b352 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -647,11 +647,19 @@ found:
/*
* We have to zeroout blocks before inserting them into extent
* status tree. Otherwise someone could look them up there and
- * use them before they are really zeroed.
+ * use them before they are really zeroed. We also have to
+ * unmap metadata before zeroing as otherwise writeback can
+ * overwrite zeros with stale data from block device.
*/
if (flags & EXT4_GET_BLOCKS_ZERO &&
map->m_flags & EXT4_MAP_MAPPED &&
map->m_flags & EXT4_MAP_NEW) {
+ ext4_lblk_t i;
+
+ for (i = 0; i < map->m_len; i++) {
+ unmap_underlying_metadata(inode->i_sb->s_bdev,
+ map->m_pblk + i);
+ }
ret = ext4_issue_zeroout(inode, map->m_lblk,
map->m_pblk, map->m_len);
if (ret) {
@@ -1649,6 +1657,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
if (invalidate) {
+ if (page_mapped(page))
+ clear_page_dirty_for_io(page);
block_invalidatepage(page, 0, PAGE_SIZE);
ClearPageUptodate(page);
}
@@ -3526,35 +3536,31 @@ out:
static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
{
- int unlocked = 0;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ struct inode *inode = mapping->host;
ssize_t ret;
- if (ext4_should_dioread_nolock(inode)) {
- /*
- * Nolock dioread optimization may be dynamically disabled
- * via ext4_inode_block_unlocked_dio(). Check inode's state
- * while holding extra i_dio_count ref.
- */
- inode_dio_begin(inode);
- smp_mb();
- if (unlikely(ext4_test_inode_state(inode,
- EXT4_STATE_DIOREAD_LOCK)))
- inode_dio_end(inode);
- else
- unlocked = 1;
- }
+ /*
+ * Shared inode_lock is enough for us - it protects against concurrent
+ * writes & truncates and since we take care of writing back page cache,
+ * we are protected against page writeback as well.
+ */
+ inode_lock_shared(inode);
if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block,
- NULL, unlocked ? 0 : DIO_LOCKING);
+ ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
} else {
+ size_t count = iov_iter_count(iter);
+
+ ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
+ iocb->ki_pos + count);
+ if (ret)
+ goto out_unlock;
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
iter, ext4_dio_get_block,
- NULL, NULL,
- unlocked ? 0 : DIO_LOCKING);
+ NULL, NULL, 0);
}
- if (unlocked)
- inode_dio_end(inode);
+out_unlock:
+ inode_unlock_shared(inode);
return ret;
}
@@ -3890,7 +3896,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
}
/*
- * ext4_punch_hole: punches a hole in a file by releaseing the blocks
+ * ext4_punch_hole: punches a hole in a file by releasing the blocks
* associated with the given offset and length
*
* @inode: File inode
@@ -3919,7 +3925,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
* Write out all dirty pages to avoid race conditions
* Then release them.
*/
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
ret = filemap_write_and_wait_range(mapping, offset,
offset + length - 1);
if (ret)
@@ -4414,7 +4420,7 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
int ext4_get_projid(struct inode *inode, kprojid_t *projid)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+ if (!ext4_has_feature_project(inode->i_sb))
return -EOPNOTSUPP;
*projid = EXT4_I(inode)->i_projid;
return 0;
@@ -4481,7 +4487,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ if (ext4_has_feature_project(sb) &&
EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
@@ -4814,14 +4820,14 @@ static int ext4_do_update_inode(handle_t *handle,
* Fix up interoperability with old kernels. Otherwise, old inodes get
* re-used with the upper 16 bits of the uid/gid intact
*/
- if (!ei->i_dtime) {
+ if (ei->i_dtime && list_empty(&ei->i_orphan)) {
+ raw_inode->i_uid_high = 0;
+ raw_inode->i_gid_high = 0;
+ } else {
raw_inode->i_uid_high =
cpu_to_le16(high_16_bits(i_uid));
raw_inode->i_gid_high =
cpu_to_le16(high_16_bits(i_gid));
- } else {
- raw_inode->i_uid_high = 0;
- raw_inode->i_gid_high = 0;
}
} else {
raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
@@ -4885,8 +4891,7 @@ static int ext4_do_update_inode(handle_t *handle,
}
}
- BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ BUG_ON(!ext4_has_feature_project(inode->i_sb) &&
i_projid != EXT4_DEF_PROJID);
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 1bb7df5e4536..bf5ae8ebbc97 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -19,8 +19,6 @@
#include "ext4_jbd2.h"
#include "ext4.h"
-#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
-
/**
* Swap memory between @a and @b for @len bytes.
*
@@ -310,8 +308,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
struct ext4_inode *raw_inode;
struct dquot *transfer_to[MAXQUOTAS] = { };
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ if (!ext4_has_feature_project(sb)) {
if (projid != EXT4_DEF_PROJID)
return -EOPNOTSUPP;
else
@@ -772,6 +769,9 @@ resizefs_out:
#ifdef CONFIG_EXT4_FS_ENCRYPTION
struct fscrypt_policy policy;
+ if (!ext4_has_feature_encrypt(sb))
+ return -EOPNOTSUPP;
+
if (copy_from_user(&policy,
(struct fscrypt_policy __user *)arg,
sizeof(policy)))
@@ -842,8 +842,7 @@ resizefs_out:
ext4_get_inode_flags(ei);
fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ if (ext4_has_feature_project(inode->i_sb)) {
fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
EXT4_I(inode)->i_projid);
}
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index a920c5d29fac..6fc14def0c70 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -598,6 +598,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
return -EOPNOTSUPP;
}
+ if (ext4_encrypted_inode(orig_inode) ||
+ ext4_encrypted_inode(donor_inode)) {
+ ext4_msg(orig_inode->i_sb, KERN_ERR,
+ "Online defrag not supported for encrypted files");
+ return -EOPNOTSUPP;
+ }
+
/* Protect orig and donor inodes against a truncate */
lock_two_nondirectories(orig_inode, donor_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 34c0142caf6a..c344b819cffa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -639,7 +639,7 @@ static struct stats dx_show_leaf(struct inode *dir,
res = fscrypt_fname_alloc_buffer(
dir, len,
&fname_crypto_str);
- if (res < 0)
+ if (res)
printk(KERN_WARNING "Error "
"allocating crypto "
"buffer--skipping "
@@ -647,7 +647,7 @@ static struct stats dx_show_leaf(struct inode *dir,
res = fscrypt_fname_disk_to_usr(dir,
0, 0, &de_name,
&fname_crypto_str);
- if (res < 0) {
+ if (res) {
printk(KERN_WARNING "Error "
"converting filename "
"from disk to usr"
@@ -1011,7 +1011,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
err = fscrypt_fname_disk_to_usr(dir, hinfo->hash,
hinfo->minor_hash, &de_name,
&fname_crypto_str);
- if (err < 0) {
+ if (err) {
count = err;
goto errout;
}
@@ -2044,33 +2044,31 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
frame->entries = entries;
frame->at = entries;
frame->bh = bh;
- bh = bh2;
retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (retval)
goto out_frames;
- retval = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ retval = ext4_handle_dirty_dirent_node(handle, dir, bh2);
if (retval)
goto out_frames;
- de = do_split(handle,dir, &bh, frame, &fname->hinfo);
+ de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
if (IS_ERR(de)) {
retval = PTR_ERR(de);
goto out_frames;
}
- dx_release(frames);
- retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
- brelse(bh);
- return retval;
+ retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
out_frames:
/*
* Even if the block split failed, we have to properly write
* out all the changes we did so far. Otherwise we can end up
* with corrupted filesystem.
*/
- ext4_mark_inode_dirty(handle, dir);
+ if (retval)
+ ext4_mark_inode_dirty(handle, dir);
dx_release(frames);
+ brelse(bh2);
return retval;
}
@@ -3144,7 +3142,7 @@ static int ext4_symlink(struct inode *dir,
istr.name = (const unsigned char *) symname;
istr.len = len;
err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
- if (err < 0)
+ if (err)
goto err_drop_inode;
sd->len = cpu_to_le16(ostr.len);
disk_link.name = (char *) sd;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index a6132a730967..b4cbee936cf8 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -405,14 +405,12 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
{
struct page *data_page = NULL;
struct inode *inode = page->mapping->host;
- unsigned block_start, blocksize;
+ unsigned block_start;
struct buffer_head *bh, *head;
int ret = 0;
int nr_submitted = 0;
int nr_to_submit = 0;
- blocksize = 1 << inode->i_blkbits;
-
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3ec8708989ca..6db81fbcbaa6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -78,6 +78,8 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
+static struct inode *ext4_get_journal_inode(struct super_block *sb,
+ unsigned int journal_inum);
/*
* Lock ordering
@@ -1267,7 +1269,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
@@ -1327,6 +1329,7 @@ static const match_table_t tokens = {
{Opt_noquota, "noquota"},
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
+ {Opt_prjquota, "prjquota"},
{Opt_barrier, "barrier=%u"},
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
@@ -1546,8 +1549,11 @@ static const struct mount_opts {
MOPT_SET | MOPT_Q},
{Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
MOPT_SET | MOPT_Q},
+ {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
+ MOPT_SET | MOPT_Q},
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
- EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
+ EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
+ MOPT_CLEAR | MOPT_Q},
{Opt_usrjquota, 0, MOPT_Q},
{Opt_grpjquota, 0, MOPT_Q},
{Opt_offusrjquota, 0, MOPT_Q},
@@ -1836,13 +1842,17 @@ static int parse_options(char *options, struct super_block *sb,
return 0;
}
#ifdef CONFIG_QUOTA
- if (ext4_has_feature_quota(sb) &&
- (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
- ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota "
- "mount options ignored.");
- clear_opt(sb, USRQUOTA);
- clear_opt(sb, GRPQUOTA);
- } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ /*
+ * We do the test below only for project quotas. 'usrquota' and
+ * 'grpquota' mount options are allowed even without quota feature
+ * to support legacy quotas in quota files.
+ */
+ if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
+ ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
+ "Cannot enable project quota enforcement.");
+ return 0;
+ }
+ if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
clear_opt(sb, USRQUOTA);
@@ -2741,7 +2751,6 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
sb = elr->lr_super;
ngroups = EXT4_SB(sb)->s_groups_count;
- sb_start_write(sb);
for (group = elr->lr_next_group; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
@@ -2768,8 +2777,6 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
}
- sb_end_write(sb);
-
return ret;
}
@@ -2834,19 +2841,43 @@ cont_thread:
mutex_unlock(&eli->li_list_mtx);
goto exit_thread;
}
-
list_for_each_safe(pos, n, &eli->li_request_list) {
+ int err = 0;
+ int progress = 0;
elr = list_entry(pos, struct ext4_li_request,
lr_request);
- if (time_after_eq(jiffies, elr->lr_next_sched)) {
- if (ext4_run_li_request(elr) != 0) {
- /* error, remove the lazy_init job */
- ext4_remove_li_request(elr);
- continue;
+ if (time_before(jiffies, elr->lr_next_sched)) {
+ if (time_before(elr->lr_next_sched, next_wakeup))
+ next_wakeup = elr->lr_next_sched;
+ continue;
+ }
+ if (down_read_trylock(&elr->lr_super->s_umount)) {
+ if (sb_start_write_trylock(elr->lr_super)) {
+ progress = 1;
+ /*
+ * We hold sb->s_umount, sb can not
+ * be removed from the list, it is
+ * now safe to drop li_list_mtx
+ */
+ mutex_unlock(&eli->li_list_mtx);
+ err = ext4_run_li_request(elr);
+ sb_end_write(elr->lr_super);
+ mutex_lock(&eli->li_list_mtx);
+ n = pos->next;
}
+ up_read((&elr->lr_super->s_umount));
+ }
+ /* error, remove the lazy_init job */
+ if (err) {
+ ext4_remove_li_request(elr);
+ continue;
+ }
+ if (!progress) {
+ elr->lr_next_sched = jiffies +
+ (prandom_u32()
+ % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
}
-
if (time_before(elr->lr_next_sched, next_wakeup))
next_wakeup = elr->lr_next_sched;
}
@@ -3179,6 +3210,8 @@ int ext4_calculate_overhead(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ struct inode *j_inode;
+ unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
ext4_fsblk_t overhead = 0;
char *buf = (char *) get_zeroed_page(GFP_NOFS);
@@ -3209,10 +3242,23 @@ int ext4_calculate_overhead(struct super_block *sb)
memset(buf, 0, PAGE_SIZE);
cond_resched();
}
- /* Add the internal journal blocks as well */
+
+ /*
+ * Add the internal journal blocks whether the journal has been
+ * loaded or not
+ */
if (sbi->s_journal && !sbi->journal_bdev)
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
-
+ else if (ext4_has_feature_journal(sb) && !sbi->s_journal) {
+ j_inode = ext4_get_journal_inode(sb, j_inum);
+ if (j_inode) {
+ j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
+ overhead += EXT4_NUM_B2C(sbi, j_blocks);
+ iput(j_inode);
+ } else {
+ ext4_msg(sb, KERN_ERR, "can't get journal size");
+ }
+ }
sbi->s_overhead = overhead;
smp_wmb();
free_page((unsigned long) buf);
@@ -4208,18 +4254,16 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
write_unlock(&journal->j_state_lock);
}
-static journal_t *ext4_get_journal(struct super_block *sb,
- unsigned int journal_inum)
+static struct inode *ext4_get_journal_inode(struct super_block *sb,
+ unsigned int journal_inum)
{
struct inode *journal_inode;
- journal_t *journal;
-
- BUG_ON(!ext4_has_feature_journal(sb));
-
- /* First, test for the existence of a valid inode on disk. Bad
- * things happen if we iget() an unused inode, as the subsequent
- * iput() will try to delete it. */
+ /*
+ * Test for the existence of a valid inode on disk. Bad things
+ * happen if we iget() an unused inode, as the subsequent iput()
+ * will try to delete it.
+ */
journal_inode = ext4_iget(sb, journal_inum);
if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found");
@@ -4239,6 +4283,20 @@ static journal_t *ext4_get_journal(struct super_block *sb,
iput(journal_inode);
return NULL;
}
+ return journal_inode;
+}
+
+static journal_t *ext4_get_journal(struct super_block *sb,
+ unsigned int journal_inum)
+{
+ struct inode *journal_inode;
+ journal_t *journal;
+
+ BUG_ON(!ext4_has_feature_journal(sb));
+
+ journal_inode = ext4_get_journal_inode(sb, journal_inum);
+ if (!journal_inode)
+ return NULL;
journal = jbd2_journal_init_inode(journal_inode);
if (!journal) {
@@ -5250,12 +5308,18 @@ static int ext4_enable_quotas(struct super_block *sb)
le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
+ bool quota_mopt[EXT4_MAXQUOTAS] = {
+ test_opt(sb, USRQUOTA),
+ test_opt(sb, GRPQUOTA),
+ test_opt(sb, PRJQUOTA),
+ };
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
- DQUOT_USAGE_ENABLED);
+ DQUOT_USAGE_ENABLED |
+ (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
ext4_warning(sb,
"Failed to enable quota tracking "
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 4d83d9e05f2e..fdf1c6154745 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -30,7 +30,6 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
char *caddr, *paddr = NULL;
struct fscrypt_str cstr, pstr;
struct fscrypt_symlink_data *sd;
- loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
int res;
u32 max_size = inode->i_sb->s_blocksize;
@@ -49,7 +48,6 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
if (IS_ERR(cpage))
return ERR_CAST(cpage);
caddr = page_address(cpage);
- caddr[size] = 0;
}
/* Symlink is encrypted */
@@ -65,16 +63,14 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
if (res)
goto errout;
+ paddr = pstr.name;
res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
- if (res < 0)
+ if (res)
goto errout;
- paddr = pstr.name;
-
/* Null-terminate the name */
- if (res <= pstr.len)
- paddr[res] = '\0';
+ paddr[pstr.len] = '\0';
if (cpage)
put_page(cpage);
set_delayed_call(done, kfree_link, paddr);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 2eb935ca5d9e..c15d63389957 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -199,6 +199,8 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
}
while (!IS_LAST_ENTRY(entry)) {
+ if (entry->e_value_block != 0)
+ return -EFSCORRUPTED;
if (entry->e_value_size != 0 &&
(value_start + le16_to_cpu(entry->e_value_offs) <
(void *)e + sizeof(__u32) ||
@@ -641,7 +643,7 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
size_t *min_offs, void *base, int *total)
{
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (!last->e_value_block && last->e_value_size) {
+ if (last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < *min_offs)
*min_offs = offs;
@@ -661,7 +663,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
/* Compute min_offs and last. */
last = s->first;
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (!last->e_value_block && last->e_value_size) {
+ if (last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < min_offs)
min_offs = offs;
@@ -669,7 +671,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
}
free = min_offs - ((void *)last - s->base) - sizeof(__u32);
if (!s->not_found) {
- if (!s->here->e_value_block && s->here->e_value_size) {
+ if (s->here->e_value_size) {
size_t size = le32_to_cpu(s->here->e_value_size);
free += EXT4_XATTR_SIZE(size);
}
@@ -691,7 +693,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
s->here->e_name_len = name_len;
memcpy(s->here->e_name, i->name, name_len);
} else {
- if (!s->here->e_value_block && s->here->e_value_size) {
+ if (s->here->e_value_size) {
void *first_val = s->base + min_offs;
size_t offs = le16_to_cpu(s->here->e_value_offs);
void *val = s->base + offs;
@@ -725,8 +727,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
last = s->first;
while (!IS_LAST_ENTRY(last)) {
size_t o = le16_to_cpu(last->e_value_offs);
- if (!last->e_value_block &&
- last->e_value_size && o < offs)
+ if (last->e_value_size && o < offs)
last->e_value_offs =
cpu_to_le16(o + size);
last = EXT4_XATTR_NEXT(last);
@@ -1318,18 +1319,19 @@ retry:
*/
static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
int value_offs_shift, void *to,
- void *from, size_t n, int blocksize)
+ void *from, size_t n)
{
struct ext4_xattr_entry *last = entry;
int new_offs;
+ /* We always shift xattr headers further thus offsets get lower */
+ BUG_ON(value_offs_shift > 0);
+
/* Adjust the value offsets of the entries */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (!last->e_value_block && last->e_value_size) {
+ if (last->e_value_size) {
new_offs = le16_to_cpu(last->e_value_offs) +
value_offs_shift;
- BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
- > blocksize);
last->e_value_offs = cpu_to_le16(new_offs);
}
}
@@ -1338,6 +1340,141 @@ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
}
/*
+ * Move xattr pointed to by 'entry' from inode into external xattr block
+ */
+static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
+ struct ext4_inode *raw_inode,
+ struct ext4_xattr_entry *entry)
+{
+ struct ext4_xattr_ibody_find *is = NULL;
+ struct ext4_xattr_block_find *bs = NULL;
+ char *buffer = NULL, *b_entry_name = NULL;
+ size_t value_offs, value_size;
+ struct ext4_xattr_info i = {
+ .value = NULL,
+ .value_len = 0,
+ .name_index = entry->e_name_index,
+ };
+ struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
+ int error;
+
+ value_offs = le16_to_cpu(entry->e_value_offs);
+ value_size = le32_to_cpu(entry->e_value_size);
+
+ is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
+ bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
+ buffer = kmalloc(value_size, GFP_NOFS);
+ b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
+ if (!is || !bs || !buffer || !b_entry_name) {
+ error = -ENOMEM;
+ goto out;
+ }
+
+ is->s.not_found = -ENODATA;
+ bs->s.not_found = -ENODATA;
+ is->iloc.bh = NULL;
+ bs->bh = NULL;
+
+ /* Save the entry name and the entry value */
+ memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+ memcpy(b_entry_name, entry->e_name, entry->e_name_len);
+ b_entry_name[entry->e_name_len] = '\0';
+ i.name = b_entry_name;
+
+ error = ext4_get_inode_loc(inode, &is->iloc);
+ if (error)
+ goto out;
+
+ error = ext4_xattr_ibody_find(inode, &i, is);
+ if (error)
+ goto out;
+
+ /* Remove the chosen entry from the inode */
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
+ if (error)
+ goto out;
+
+ i.name = b_entry_name;
+ i.value = buffer;
+ i.value_len = value_size;
+ error = ext4_xattr_block_find(inode, &i, bs);
+ if (error)
+ goto out;
+
+ /* Add entry which was removed from the inode into the block */
+ error = ext4_xattr_block_set(handle, inode, &i, bs);
+ if (error)
+ goto out;
+ error = 0;
+out:
+ kfree(b_entry_name);
+ kfree(buffer);
+ if (is)
+ brelse(is->iloc.bh);
+ kfree(is);
+ kfree(bs);
+
+ return error;
+}
+
+static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
+ struct ext4_inode *raw_inode,
+ int isize_diff, size_t ifree,
+ size_t bfree, int *total_ino)
+{
+ struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
+ struct ext4_xattr_entry *small_entry;
+ struct ext4_xattr_entry *entry;
+ struct ext4_xattr_entry *last;
+ unsigned int entry_size; /* EA entry size */
+ unsigned int total_size; /* EA entry size + value size */
+ unsigned int min_total_size;
+ int error;
+
+ while (isize_diff > ifree) {
+ entry = NULL;
+ small_entry = NULL;
+ min_total_size = ~0U;
+ last = IFIRST(header);
+ /* Find the entry best suited to be pushed into EA block */
+ for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
+ total_size =
+ EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
+ EXT4_XATTR_LEN(last->e_name_len);
+ if (total_size <= bfree &&
+ total_size < min_total_size) {
+ if (total_size + ifree < isize_diff) {
+ small_entry = last;
+ } else {
+ entry = last;
+ min_total_size = total_size;
+ }
+ }
+ }
+
+ if (entry == NULL) {
+ if (small_entry == NULL)
+ return -ENOSPC;
+ entry = small_entry;
+ }
+
+ entry_size = EXT4_XATTR_LEN(entry->e_name_len);
+ total_size = entry_size +
+ EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size));
+ error = ext4_xattr_move_to_block(handle, inode, raw_inode,
+ entry);
+ if (error)
+ return error;
+
+ *total_ino -= entry_size;
+ ifree += total_size;
+ bfree -= total_size;
+ }
+
+ return 0;
+}
+
+/*
* Expand an inode by new_extra_isize bytes when EAs are present.
* Returns 0 on success or negative error number on failure.
*/
@@ -1345,14 +1482,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle)
{
struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_entry *entry, *last, *first;
struct buffer_head *bh = NULL;
- struct ext4_xattr_ibody_find *is = NULL;
- struct ext4_xattr_block_find *bs = NULL;
- char *buffer = NULL, *b_entry_name = NULL;
- size_t min_offs, free;
+ size_t min_offs;
+ size_t ifree, bfree;
int total_ino;
- void *base, *start, *end;
+ void *base, *end;
int error = 0, tried_min_extra_isize = 0;
int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
int isize_diff; /* How much do we need to grow i_extra_isize */
@@ -1368,34 +1502,24 @@ retry:
goto out;
header = IHDR(inode, raw_inode);
- entry = IFIRST(header);
/*
* Check if enough free space is available in the inode to shift the
* entries ahead by new_extra_isize.
*/
- base = start = entry;
+ base = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
min_offs = end - base;
- last = entry;
total_ino = sizeof(struct ext4_xattr_ibody_header);
error = xattr_check_inode(inode, header, end);
if (error)
goto cleanup;
- free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
- if (free >= isize_diff) {
- entry = IFIRST(header);
- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
- - new_extra_isize, (void *)raw_inode +
- EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
- (void *)header, total_ino,
- inode->i_sb->s_blocksize);
- EXT4_I(inode)->i_extra_isize = new_extra_isize;
- goto out;
- }
+ ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino);
+ if (ifree >= isize_diff)
+ goto shift;
/*
* Enough free space isn't available in the inode, check if
@@ -1413,146 +1537,44 @@ retry:
goto cleanup;
}
base = BHDR(bh);
- first = BFIRST(bh);
end = bh->b_data + bh->b_size;
min_offs = end - base;
- free = ext4_xattr_free_space(first, &min_offs, base, NULL);
- if (free < isize_diff) {
+ bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base,
+ NULL);
+ if (bfree + ifree < isize_diff) {
if (!tried_min_extra_isize && s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
brelse(bh);
goto retry;
}
- error = -1;
+ error = -ENOSPC;
goto cleanup;
}
} else {
- free = inode->i_sb->s_blocksize;
+ bfree = inode->i_sb->s_blocksize;
}
- while (isize_diff > 0) {
- size_t offs, size, entry_size;
- struct ext4_xattr_entry *small_entry = NULL;
- struct ext4_xattr_info i = {
- .value = NULL,
- .value_len = 0,
- };
- unsigned int total_size; /* EA entry size + value size */
- unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
- unsigned int min_total_size = ~0U;
-
- is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
- bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
- if (!is || !bs) {
- error = -ENOMEM;
- goto cleanup;
- }
-
- is->s.not_found = -ENODATA;
- bs->s.not_found = -ENODATA;
- is->iloc.bh = NULL;
- bs->bh = NULL;
-
- last = IFIRST(header);
- /* Find the entry best suited to be pushed into EA block */
- entry = NULL;
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- total_size =
- EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
- EXT4_XATTR_LEN(last->e_name_len);
- if (total_size <= free && total_size < min_total_size) {
- if (total_size < isize_diff) {
- small_entry = last;
- } else {
- entry = last;
- min_total_size = total_size;
- }
- }
- }
-
- if (entry == NULL) {
- if (small_entry) {
- entry = small_entry;
- } else {
- if (!tried_min_extra_isize &&
- s_min_extra_isize) {
- tried_min_extra_isize++;
- new_extra_isize = s_min_extra_isize;
- kfree(is); is = NULL;
- kfree(bs); bs = NULL;
- brelse(bh);
- goto retry;
- }
- error = -1;
- goto cleanup;
- }
- }
- offs = le16_to_cpu(entry->e_value_offs);
- size = le32_to_cpu(entry->e_value_size);
- entry_size = EXT4_XATTR_LEN(entry->e_name_len);
- i.name_index = entry->e_name_index,
- buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
- b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
- if (!buffer || !b_entry_name) {
- error = -ENOMEM;
- goto cleanup;
+ error = ext4_xattr_make_inode_space(handle, inode, raw_inode,
+ isize_diff, ifree, bfree,
+ &total_ino);
+ if (error) {
+ if (error == -ENOSPC && !tried_min_extra_isize &&
+ s_min_extra_isize) {
+ tried_min_extra_isize++;
+ new_extra_isize = s_min_extra_isize;
+ brelse(bh);
+ goto retry;
}
- /* Save the entry name and the entry value */
- memcpy(buffer, (void *)IFIRST(header) + offs,
- EXT4_XATTR_SIZE(size));
- memcpy(b_entry_name, entry->e_name, entry->e_name_len);
- b_entry_name[entry->e_name_len] = '\0';
- i.name = b_entry_name;
-
- error = ext4_get_inode_loc(inode, &is->iloc);
- if (error)
- goto cleanup;
-
- error = ext4_xattr_ibody_find(inode, &i, is);
- if (error)
- goto cleanup;
-
- /* Remove the chosen entry from the inode */
- error = ext4_xattr_ibody_set(handle, inode, &i, is);
- if (error)
- goto cleanup;
- total_ino -= entry_size;
-
- entry = IFIRST(header);
- if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
- shift_bytes = isize_diff;
- else
- shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
- /* Adjust the offsets and shift the remaining entries ahead */
- ext4_xattr_shift_entries(entry, -shift_bytes,
- (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
- EXT4_I(inode)->i_extra_isize + shift_bytes,
- (void *)header, total_ino, inode->i_sb->s_blocksize);
-
- isize_diff -= shift_bytes;
- EXT4_I(inode)->i_extra_isize += shift_bytes;
- header = IHDR(inode, raw_inode);
-
- i.name = b_entry_name;
- i.value = buffer;
- i.value_len = size;
- error = ext4_xattr_block_find(inode, &i, bs);
- if (error)
- goto cleanup;
-
- /* Add entry which was removed from the inode into the block */
- error = ext4_xattr_block_set(handle, inode, &i, bs);
- if (error)
- goto cleanup;
- kfree(b_entry_name);
- kfree(buffer);
- b_entry_name = NULL;
- buffer = NULL;
- brelse(is->iloc.bh);
- kfree(is);
- kfree(bs);
+ goto cleanup;
}
+shift:
+ /* Adjust the offsets and shift the remaining entries ahead */
+ ext4_xattr_shift_entries(IFIRST(header), EXT4_I(inode)->i_extra_isize
+ - new_extra_isize, (void *)raw_inode +
+ EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
+ (void *)header, total_ino);
+ EXT4_I(inode)->i_extra_isize = new_extra_isize;
brelse(bh);
out:
ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
@@ -1560,12 +1582,6 @@ out:
return 0;
cleanup:
- kfree(b_entry_name);
- kfree(buffer);
- if (is)
- brelse(is->iloc.bh);
- kfree(is);
- kfree(bs);
brelse(bh);
/*
* We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
@@ -1734,7 +1750,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
*name++;
}
- if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+ if (entry->e_value_size != 0) {
__le32 *value = (__le32 *)((char *)header +
le16_to_cpu(entry->e_value_offs));
for (n = (le32_to_cpu(entry->e_value_size) +
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index cbf85f65ba63..12b5836a1033 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -813,12 +813,12 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
if (f2fs_encrypted_inode(d->inode)) {
int save_len = fstr->len;
- int ret;
+ int err;
- ret = fscrypt_fname_disk_to_usr(d->inode,
+ err = fscrypt_fname_disk_to_usr(d->inode,
(u32)de->hash_code, 0,
&de_name, fstr);
- if (ret < 0)
+ if (err)
return true;
de_name = *fstr;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 300aef8a2d5f..5625b879c98a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -454,7 +454,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
ostr.name = sd->encrypted_path;
ostr.len = disk_link.len;
err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
- if (err < 0)
+ if (err)
goto err_out;
sd->len = cpu_to_le16(ostr.len);
@@ -1051,7 +1051,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
goto errout;
res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
- if (res < 0)
+ if (res)
goto errout;
/* this is broken symlink case */
@@ -1063,7 +1063,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
paddr = pstr.name;
/* Null-terminate the name */
- paddr[res] = '\0';
+ paddr[pstr.len] = '\0';
put_page(cpage);
set_delayed_call(done, kfree_link, paddr);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c41bde26c338..70ea57c7b6bb 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
struct pipe_buffer *buf = cs->pipebufs;
if (!cs->write) {
- err = buf->ops->confirm(cs->pipe, buf);
+ err = pipe_buf_confirm(cs->pipe, buf);
if (err)
return err;
@@ -827,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
fuse_copy_finish(cs);
- err = buf->ops->confirm(cs->pipe, buf);
+ err = pipe_buf_confirm(cs->pipe, buf);
if (err)
return err;
@@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (cs->len != PAGE_SIZE)
goto out_fallback;
- if (buf->ops->steal(cs->pipe, buf) != 0)
+ if (pipe_buf_steal(cs->pipe, buf) != 0)
goto out_fallback;
newpage = buf->page;
@@ -1341,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
- int ret;
+ int total, ret;
int page_nr = 0;
- int do_wakeup = 0;
struct pipe_buffer *bufs;
struct fuse_copy_state cs;
struct fuse_dev *fud = fuse_get_dev(in);
@@ -1362,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (ret < 0)
goto out;
- ret = 0;
- pipe_lock(pipe);
-
- if (!pipe->readers) {
- send_sig(SIGPIPE, current, 0);
- if (!ret)
- ret = -EPIPE;
- goto out_unlock;
- }
-
if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
ret = -EIO;
- goto out_unlock;
+ goto out;
}
- while (page_nr < cs.nr_segs) {
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
- struct pipe_buffer *buf = pipe->bufs + newbuf;
-
- buf->page = bufs[page_nr].page;
- buf->offset = bufs[page_nr].offset;
- buf->len = bufs[page_nr].len;
+ for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
/*
* Need to be careful about this. Having buf->ops in module
* code can Oops if the buffer persists after module unload.
*/
- buf->ops = &nosteal_pipe_buf_ops;
-
- pipe->nrbufs++;
- page_nr++;
- ret += buf->len;
-
- if (pipe->files)
- do_wakeup = 1;
- }
-
-out_unlock:
- pipe_unlock(pipe);
-
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ bufs[page_nr].ops = &nosteal_pipe_buf_ops;
+ ret = add_to_pipe(pipe, &bufs[page_nr++]);
+ if (unlikely(ret < 0))
+ break;
}
-
+ if (total)
+ ret = total;
out:
for (; page_nr < cs.nr_segs; page_nr++)
put_page(bufs[page_nr].page);
@@ -1992,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
- ibuf->ops->get(pipe, ibuf);
+ pipe_buf_get(pipe, ibuf);
*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
@@ -2014,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
- for (idx = 0; idx < nbuf; idx++) {
- struct pipe_buffer *buf = &bufs[idx];
- buf->ops->release(pipe, buf);
- }
+ for (idx = 0; idx < nbuf; idx++)
+ pipe_buf_release(pipe, &bufs[idx]);
+
out:
kfree(bufs);
return ret;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 360188f162bd..e23ff70b3435 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -954,30 +954,6 @@ out_uninit:
return ret;
}
-static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- struct inode *inode = in->f_mapping->host;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- int ret;
-
- inode_lock(inode);
-
- ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
- if (ret) {
- inode_unlock(inode);
- return ret;
- }
-
- gfs2_glock_dq_uninit(&gh);
- inode_unlock(inode);
-
- return generic_file_splice_read(in, ppos, pipe, len, flags);
-}
-
-
static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
@@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = {
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
- .splice_read = gfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
@@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.open = gfs2_open,
.release = gfs2_release,
.fsync = gfs2_fsync,
- .splice_read = gfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 46261a6f902d..927da4956a89 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1090,11 +1090,15 @@ static void jbd2_stats_proc_exit(journal_t *journal)
* very few fields yet: that has to wait until we have created the
* journal structures from from scratch, or loaded them from disk. */
-static journal_t * journal_init_common (void)
+static journal_t *journal_init_common(struct block_device *bdev,
+ struct block_device *fs_dev,
+ unsigned long long start, int len, int blocksize)
{
static struct lock_class_key jbd2_trans_commit_key;
journal_t *journal;
int err;
+ struct buffer_head *bh;
+ int n;
journal = kzalloc(sizeof(*journal), GFP_KERNEL);
if (!journal)
@@ -1131,6 +1135,32 @@ static journal_t * journal_init_common (void)
lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
&jbd2_trans_commit_key, 0);
+ /* journal descriptor can store up to n blocks -bzzz */
+ journal->j_blocksize = blocksize;
+ journal->j_dev = bdev;
+ journal->j_fs_dev = fs_dev;
+ journal->j_blk_offset = start;
+ journal->j_maxlen = len;
+ n = journal->j_blocksize / sizeof(journal_block_tag_t);
+ journal->j_wbufsize = n;
+ journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
+ GFP_KERNEL);
+ if (!journal->j_wbuf) {
+ kfree(journal);
+ return NULL;
+ }
+
+ bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
+ if (!bh) {
+ pr_err("%s: Cannot get buffer for journal superblock\n",
+ __func__);
+ kfree(journal->j_wbuf);
+ kfree(journal);
+ return NULL;
+ }
+ journal->j_sb_buffer = bh;
+ journal->j_superblock = (journal_superblock_t *)bh->b_data;
+
return journal;
}
@@ -1157,51 +1187,21 @@ static journal_t * journal_init_common (void)
* range of blocks on an arbitrary block device.
*
*/
-journal_t * jbd2_journal_init_dev(struct block_device *bdev,
+journal_t *jbd2_journal_init_dev(struct block_device *bdev,
struct block_device *fs_dev,
unsigned long long start, int len, int blocksize)
{
- journal_t *journal = journal_init_common();
- struct buffer_head *bh;
- int n;
+ journal_t *journal;
+ journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
if (!journal)
return NULL;
- /* journal descriptor can store up to n blocks -bzzz */
- journal->j_blocksize = blocksize;
- journal->j_dev = bdev;
- journal->j_fs_dev = fs_dev;
- journal->j_blk_offset = start;
- journal->j_maxlen = len;
bdevname(journal->j_dev, journal->j_devname);
strreplace(journal->j_devname, '/', '!');
jbd2_stats_proc_init(journal);
- n = journal->j_blocksize / sizeof(journal_block_tag_t);
- journal->j_wbufsize = n;
- journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
- if (!journal->j_wbuf) {
- printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
- __func__);
- goto out_err;
- }
-
- bh = __getblk(journal->j_dev, start, journal->j_blocksize);
- if (!bh) {
- printk(KERN_ERR
- "%s: Cannot get buffer for journal superblock\n",
- __func__);
- goto out_err;
- }
- journal->j_sb_buffer = bh;
- journal->j_superblock = (journal_superblock_t *)bh->b_data;
return journal;
-out_err:
- kfree(journal->j_wbuf);
- jbd2_stats_proc_exit(journal);
- kfree(journal);
- return NULL;
}
/**
@@ -1212,67 +1212,36 @@ out_err:
* the journal. The inode must exist already, must support bmap() and
* must have all data blocks preallocated.
*/
-journal_t * jbd2_journal_init_inode (struct inode *inode)
+journal_t *jbd2_journal_init_inode(struct inode *inode)
{
- struct buffer_head *bh;
- journal_t *journal = journal_init_common();
+ journal_t *journal;
char *p;
- int err;
- int n;
unsigned long long blocknr;
+ blocknr = bmap(inode, 0);
+ if (!blocknr) {
+ pr_err("%s: Cannot locate journal superblock\n",
+ __func__);
+ return NULL;
+ }
+
+ jbd_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
+ inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
+ inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
+
+ journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
+ blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
+ inode->i_sb->s_blocksize);
if (!journal)
return NULL;
- journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
journal->j_inode = inode;
bdevname(journal->j_dev, journal->j_devname);
p = strreplace(journal->j_devname, '/', '!');
sprintf(p, "-%lu", journal->j_inode->i_ino);
- jbd_debug(1,
- "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
- journal, inode->i_sb->s_id, inode->i_ino,
- (long long) inode->i_size,
- inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
-
- journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
- journal->j_blocksize = inode->i_sb->s_blocksize;
jbd2_stats_proc_init(journal);
- /* journal descriptor can store up to n blocks -bzzz */
- n = journal->j_blocksize / sizeof(journal_block_tag_t);
- journal->j_wbufsize = n;
- journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
- if (!journal->j_wbuf) {
- printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
- __func__);
- goto out_err;
- }
-
- err = jbd2_journal_bmap(journal, 0, &blocknr);
- /* If that failed, give up */
- if (err) {
- printk(KERN_ERR "%s: Cannot locate journal superblock\n",
- __func__);
- goto out_err;
- }
-
- bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
- if (!bh) {
- printk(KERN_ERR
- "%s: Cannot get buffer for journal superblock\n",
- __func__);
- goto out_err;
- }
- journal->j_sb_buffer = bh;
- journal->j_superblock = (journal_superblock_t *)bh->b_data;
-
return journal;
-out_err:
- kfree(journal->j_wbuf);
- jbd2_stats_proc_exit(journal);
- kfree(journal);
- return NULL;
}
/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b5bc3e249163..3d8246a9faa4 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -159,6 +159,7 @@ static void wait_transaction_locked(journal_t *journal)
read_unlock(&journal->j_state_lock);
if (need_to_start)
jbd2_log_start_commit(journal, tid);
+ jbd2_might_wait_for_commit(journal);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
}
@@ -182,8 +183,6 @@ static int add_transaction_credits(journal_t *journal, int blocks,
int needed;
int total = blocks + rsv_blocks;
- jbd2_might_wait_for_commit(journal);
-
/*
* If the current transaction is locked down for commit, wait
* for the lock to be released.
@@ -214,6 +213,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
if (atomic_read(&journal->j_reserved_credits) + total >
journal->j_max_transaction_buffers) {
read_unlock(&journal->j_state_lock);
+ jbd2_might_wait_for_commit(journal);
wait_event(journal->j_wait_reserved,
atomic_read(&journal->j_reserved_credits) + total <=
journal->j_max_transaction_buffers);
@@ -238,6 +238,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
atomic_sub(total, &t->t_outstanding_credits);
read_unlock(&journal->j_state_lock);
+ jbd2_might_wait_for_commit(journal);
write_lock(&journal->j_state_lock);
if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
__jbd2_log_wait_for_space(journal);
@@ -255,6 +256,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
sub_reserved_credits(journal, rsv_blocks);
atomic_sub(total, &t->t_outstanding_credits);
read_unlock(&journal->j_state_lock);
+ jbd2_might_wait_for_commit(journal);
wait_event(journal->j_wait_reserved,
atomic_read(&journal->j_reserved_credits) + rsv_blocks
<= journal->j_max_transaction_buffers / 2);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index eccda3a02de6..c5bd19ffa326 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -366,7 +366,11 @@ struct mb_cache *mb_cache_create(int bucket_bits)
cache->c_shrink.count_objects = mb_cache_count;
cache->c_shrink.scan_objects = mb_cache_scan;
cache->c_shrink.seeks = DEFAULT_SEEKS;
- register_shrinker(&cache->c_shrink);
+ if (register_shrinker(&cache->c_shrink)) {
+ kfree(cache->c_hash);
+ kfree(cache);
+ goto err_out;
+ }
INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ca699ddc11c1..2efbdde36c3e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
}
EXPORT_SYMBOL_GPL(nfs_file_read);
-ssize_t
-nfs_file_splice_read(struct file *filp, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t count,
- unsigned int flags)
-{
- struct inode *inode = file_inode(filp);
- ssize_t res;
-
- dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
- filp, (unsigned long) count, (unsigned long long) *ppos);
-
- nfs_start_io_read(inode);
- res = nfs_revalidate_mapping(inode, filp->f_mapping);
- if (!res) {
- res = generic_file_splice_read(filp, ppos, pipe, count, flags);
- if (res > 0)
- nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
- }
- nfs_end_io_read(inode);
- return res;
-}
-EXPORT_SYMBOL_GPL(nfs_file_splice_read);
-
int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
@@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
- .splice_read = nfs_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index da9e5584bfdc..4b308a1487a5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
loff_t nfs_file_llseek(struct file *, loff_t, int);
ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
-ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
- size_t, unsigned int);
int nfs_file_mmap(struct file *, struct vm_area_struct *);
ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
int nfs_file_release(struct inode *, struct file *);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index d085ad794884..89a77950e0b0 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
- .splice_read = nfs_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0b055bfb8e86..8f91639f8364 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2321,36 +2321,6 @@ out_mutex:
return ret;
}
-static ssize_t ocfs2_file_splice_read(struct file *in,
- loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t len,
- unsigned int flags)
-{
- int ret = 0, lock_level = 0;
- struct inode *inode = file_inode(in);
-
- trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- in->f_path.dentry->d_name.len,
- in->f_path.dentry->d_name.name, len);
-
- /*
- * See the comment in ocfs2_file_read_iter()
- */
- ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
- if (ret < 0) {
- mlog_errno(ret);
- goto bail;
- }
- ocfs2_inode_unlock(inode, lock_level);
-
- ret = generic_file_splice_read(in, ppos, pipe, len, flags);
-
-bail:
- return ret;
-}
-
static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
struct iov_iter *to)
{
@@ -2509,7 +2479,7 @@ const struct file_operations ocfs2_fops = {
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
- .splice_read = ocfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
@@ -2554,7 +2524,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.flock = ocfs2_flock,
- .splice_read = ocfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index f8f5fc5e6c05..0b58abcf1c6d 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
-DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
-
DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
diff --git a/fs/pipe.c b/fs/pipe.c
index 4ebe6b2e5217..4fc422f0dea8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (bufs) {
int curbuf = pipe->curbuf;
struct pipe_buffer *buf = pipe->bufs + curbuf;
- const struct pipe_buf_operations *ops = buf->ops;
size_t chars = buf->len;
size_t written;
int error;
@@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (chars > total_len)
chars = total_len;
- error = ops->confirm(pipe, buf);
+ error = pipe_buf_confirm(pipe, buf);
if (error) {
if (!ret)
ret = error;
@@ -299,8 +298,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
}
if (!buf->len) {
- buf->ops = NULL;
- ops->release(pipe, buf);
+ pipe_buf_release(pipe, buf);
curbuf = (curbuf + 1) & (pipe->buffers - 1);
pipe->curbuf = curbuf;
pipe->nrbufs = --bufs;
@@ -383,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
(pipe->buffers - 1);
struct pipe_buffer *buf = pipe->bufs + lastbuf;
- const struct pipe_buf_operations *ops = buf->ops;
int offset = buf->offset + buf->len;
- if (ops->can_merge && offset + chars <= PAGE_SIZE) {
- ret = ops->confirm(pipe, buf);
+ if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) {
+ ret = pipe_buf_confirm(pipe, buf);
if (ret)
goto out;
@@ -664,7 +661,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops)
- buf->ops->release(pipe, buf);
+ pipe_buf_release(pipe, buf);
}
if (pipe->tmp_page)
__free_page(pipe->tmp_page);
diff --git a/fs/splice.c b/fs/splice.c
index dd9bf7e410d2..aa38901a4f10 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -183,82 +183,39 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd)
{
unsigned int spd_pages = spd->nr_pages;
- int ret, do_wakeup, page_nr;
+ int ret = 0, page_nr = 0;
if (!spd_pages)
return 0;
- ret = 0;
- do_wakeup = 0;
- page_nr = 0;
-
- pipe_lock(pipe);
-
- for (;;) {
- if (!pipe->readers) {
- send_sig(SIGPIPE, current, 0);
- if (!ret)
- ret = -EPIPE;
- break;
- }
-
- if (pipe->nrbufs < pipe->buffers) {
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
- struct pipe_buffer *buf = pipe->bufs + newbuf;
-
- buf->page = spd->pages[page_nr];
- buf->offset = spd->partial[page_nr].offset;
- buf->len = spd->partial[page_nr].len;
- buf->private = spd->partial[page_nr].private;
- buf->ops = spd->ops;
- if (spd->flags & SPLICE_F_GIFT)
- buf->flags |= PIPE_BUF_FLAG_GIFT;
-
- pipe->nrbufs++;
- page_nr++;
- ret += buf->len;
-
- if (pipe->files)
- do_wakeup = 1;
+ if (unlikely(!pipe->readers)) {
+ send_sig(SIGPIPE, current, 0);
+ ret = -EPIPE;
+ goto out;
+ }
- if (!--spd->nr_pages)
- break;
- if (pipe->nrbufs < pipe->buffers)
- continue;
+ while (pipe->nrbufs < pipe->buffers) {
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+ struct pipe_buffer *buf = pipe->bufs + newbuf;
- break;
- }
+ buf->page = spd->pages[page_nr];
+ buf->offset = spd->partial[page_nr].offset;
+ buf->len = spd->partial[page_nr].len;
+ buf->private = spd->partial[page_nr].private;
+ buf->ops = spd->ops;
- if (spd->flags & SPLICE_F_NONBLOCK) {
- if (!ret)
- ret = -EAGAIN;
- break;
- }
+ pipe->nrbufs++;
+ page_nr++;
+ ret += buf->len;
- if (signal_pending(current)) {
- if (!ret)
- ret = -ERESTARTSYS;
+ if (!--spd->nr_pages)
break;
- }
-
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- do_wakeup = 0;
- }
-
- pipe->waiting_writers++;
- pipe_wait(pipe);
- pipe->waiting_writers--;
}
- pipe_unlock(pipe);
-
- if (do_wakeup)
- wakeup_pipe_readers(pipe);
+ if (!ret)
+ ret = -EAGAIN;
+out:
while (page_nr < spd_pages)
spd->spd_release(spd, page_nr++);
@@ -266,6 +223,26 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
}
EXPORT_SYMBOL_GPL(splice_to_pipe);
+ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+{
+ int ret;
+
+ if (unlikely(!pipe->readers)) {
+ send_sig(SIGPIPE, current, 0);
+ ret = -EPIPE;
+ } else if (pipe->nrbufs == pipe->buffers) {
+ ret = -EAGAIN;
+ } else {
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+ pipe->bufs[newbuf] = *buf;
+ pipe->nrbufs++;
+ return buf->len;
+ }
+ pipe_buf_release(pipe, buf);
+ return ret;
+}
+EXPORT_SYMBOL(add_to_pipe);
+
void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
put_page(spd->pages[i]);
@@ -303,207 +280,6 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
kfree(spd->partial);
}
-static int
-__generic_file_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- struct address_space *mapping = in->f_mapping;
- unsigned int loff, nr_pages, req_pages;
- struct page *pages[PIPE_DEF_BUFFERS];
- struct partial_page partial[PIPE_DEF_BUFFERS];
- struct page *page;
- pgoff_t index, end_index;
- loff_t isize;
- int error, page_nr;
- struct splice_pipe_desc spd = {
- .pages = pages,
- .partial = partial,
- .nr_pages_max = PIPE_DEF_BUFFERS,
- .flags = flags,
- .ops = &page_cache_pipe_buf_ops,
- .spd_release = spd_release_page,
- };
-
- if (splice_grow_spd(pipe, &spd))
- return -ENOMEM;
-
- index = *ppos >> PAGE_SHIFT;
- loff = *ppos & ~PAGE_MASK;
- req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
- nr_pages = min(req_pages, spd.nr_pages_max);
-
- /*
- * Lookup the (hopefully) full range of pages we need.
- */
- spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
- index += spd.nr_pages;
-
- /*
- * If find_get_pages_contig() returned fewer pages than we needed,
- * readahead/allocate the rest and fill in the holes.
- */
- if (spd.nr_pages < nr_pages)
- page_cache_sync_readahead(mapping, &in->f_ra, in,
- index, req_pages - spd.nr_pages);
-
- error = 0;
- while (spd.nr_pages < nr_pages) {
- /*
- * Page could be there, find_get_pages_contig() breaks on
- * the first hole.
- */
- page = find_get_page(mapping, index);
- if (!page) {
- /*
- * page didn't exist, allocate one.
- */
- page = page_cache_alloc_cold(mapping);
- if (!page)
- break;
-
- error = add_to_page_cache_lru(page, mapping, index,
- mapping_gfp_constraint(mapping, GFP_KERNEL));
- if (unlikely(error)) {
- put_page(page);
- if (error == -EEXIST)
- continue;
- break;
- }
- /*
- * add_to_page_cache() locks the page, unlock it
- * to avoid convoluting the logic below even more.
- */
- unlock_page(page);
- }
-
- spd.pages[spd.nr_pages++] = page;
- index++;
- }
-
- /*
- * Now loop over the map and see if we need to start IO on any
- * pages, fill in the partial map, etc.
- */
- index = *ppos >> PAGE_SHIFT;
- nr_pages = spd.nr_pages;
- spd.nr_pages = 0;
- for (page_nr = 0; page_nr < nr_pages; page_nr++) {
- unsigned int this_len;
-
- if (!len)
- break;
-
- /*
- * this_len is the max we'll use from this page
- */
- this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
- page = spd.pages[page_nr];
-
- if (PageReadahead(page))
- page_cache_async_readahead(mapping, &in->f_ra, in,
- page, index, req_pages - page_nr);
-
- /*
- * If the page isn't uptodate, we may need to start io on it
- */
- if (!PageUptodate(page)) {
- lock_page(page);
-
- /*
- * Page was truncated, or invalidated by the
- * filesystem. Redo the find/create, but this time the
- * page is kept locked, so there's no chance of another
- * race with truncate/invalidate.
- */
- if (!page->mapping) {
- unlock_page(page);
-retry_lookup:
- page = find_or_create_page(mapping, index,
- mapping_gfp_mask(mapping));
-
- if (!page) {
- error = -ENOMEM;
- break;
- }
- put_page(spd.pages[page_nr]);
- spd.pages[page_nr] = page;
- }
- /*
- * page was already under io and is now done, great
- */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto fill_it;
- }
-
- /*
- * need to read in the page
- */
- error = mapping->a_ops->readpage(in, page);
- if (unlikely(error)) {
- /*
- * Re-lookup the page
- */
- if (error == AOP_TRUNCATED_PAGE)
- goto retry_lookup;
-
- break;
- }
- }
-fill_it:
- /*
- * i_size must be checked after PageUptodate.
- */
- isize = i_size_read(mapping->host);
- end_index = (isize - 1) >> PAGE_SHIFT;
- if (unlikely(!isize || index > end_index))
- break;
-
- /*
- * if this is the last page, see if we need to shrink
- * the length and stop
- */
- if (end_index == index) {
- unsigned int plen;
-
- /*
- * max good bytes in this page
- */
- plen = ((isize - 1) & ~PAGE_MASK) + 1;
- if (plen <= loff)
- break;
-
- /*
- * force quit after adding this page
- */
- this_len = min(this_len, plen - loff);
- len = this_len;
- }
-
- spd.partial[page_nr].offset = loff;
- spd.partial[page_nr].len = this_len;
- len -= this_len;
- loff = 0;
- spd.nr_pages++;
- index++;
- }
-
- /*
- * Release any pages at the end, if we quit early. 'page_nr' is how far
- * we got, 'nr_pages' is how many pages are in the map.
- */
- while (page_nr < nr_pages)
- put_page(spd.pages[page_nr++]);
- in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
-
- if (spd.nr_pages)
- error = splice_to_pipe(pipe, &spd);
-
- splice_shrink_spd(&spd);
- return error;
-}
-
/**
* generic_file_splice_read - splice data from file to a pipe
* @in: file to splice from
@@ -514,39 +290,53 @@ fill_it:
*
* Description:
* Will read pages from given file and fill them into a pipe. Can be
- * used as long as the address_space operations for the source implements
- * a readpage() hook.
+ * used as long as it has more or less sane ->read_iter().
*
*/
ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
- loff_t isize, left;
- int ret;
-
- if (IS_DAX(in->f_mapping->host))
- return default_file_splice_read(in, ppos, pipe, len, flags);
+ struct iov_iter to;
+ struct kiocb kiocb;
+ loff_t isize;
+ int idx, ret;
isize = i_size_read(in->f_mapping->host);
if (unlikely(*ppos >= isize))
return 0;
- left = isize - *ppos;
- if (unlikely(left < len))
- len = left;
-
- ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
+ iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
+ idx = to.idx;
+ init_sync_kiocb(&kiocb, in);
+ kiocb.ki_pos = *ppos;
+ ret = in->f_op->read_iter(&kiocb, &to);
if (ret > 0) {
- *ppos += ret;
+ *ppos = kiocb.ki_pos;
file_accessed(in);
+ } else if (ret < 0) {
+ if (WARN_ON(to.idx != idx || to.iov_offset)) {
+ /*
+ * a bogus ->read_iter() has copied something and still
+ * returned an error instead of a short read.
+ */
+ to.idx = idx;
+ to.iov_offset = 0;
+ iov_iter_advance(&to, 0); /* to free what was emitted */
+ }
+ /*
+ * callers of ->splice_read() expect -EAGAIN on
+ * "can't put anything in there", rather than -EFAULT.
+ */
+ if (ret == -EFAULT)
+ ret = -EAGAIN;
}
return ret;
}
EXPORT_SYMBOL(generic_file_splice_read);
-static const struct pipe_buf_operations default_pipe_buf_ops = {
+const struct pipe_buf_operations default_pipe_buf_ops = {
.can_merge = 0,
.confirm = generic_pipe_buf_confirm,
.release = generic_pipe_buf_release,
@@ -570,7 +360,7 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
};
EXPORT_SYMBOL(nosteal_pipe_buf_ops);
-static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
unsigned long vlen, loff_t offset)
{
mm_segment_t old_fs;
@@ -602,102 +392,70 @@ ssize_t kernel_write(struct file *file, const char *buf, size_t count,
}
EXPORT_SYMBOL(kernel_write);
-ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
+ struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
+ struct iov_iter to;
+ struct page **pages;
unsigned int nr_pages;
- unsigned int nr_freed;
- size_t offset;
- struct page *pages[PIPE_DEF_BUFFERS];
- struct partial_page partial[PIPE_DEF_BUFFERS];
- struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
+ size_t offset, dummy, copied = 0;
ssize_t res;
- size_t this_len;
- int error;
int i;
- struct splice_pipe_desc spd = {
- .pages = pages,
- .partial = partial,
- .nr_pages_max = PIPE_DEF_BUFFERS,
- .flags = flags,
- .ops = &default_pipe_buf_ops,
- .spd_release = spd_release_page,
- };
- if (splice_grow_spd(pipe, &spd))
+ if (pipe->nrbufs == pipe->buffers)
+ return -EAGAIN;
+
+ /*
+ * Try to keep page boundaries matching to source pagecache ones -
+ * it probably won't be much help, but...
+ */
+ offset = *ppos & ~PAGE_MASK;
+
+ iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
+
+ res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy);
+ if (res <= 0)
return -ENOMEM;
- res = -ENOMEM;
+ nr_pages = res / PAGE_SIZE;
+
vec = __vec;
- if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
- vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
- if (!vec)
- goto shrink_ret;
+ if (nr_pages > PIPE_DEF_BUFFERS) {
+ vec = kmalloc(nr_pages * sizeof(struct kvec), GFP_KERNEL);
+ if (unlikely(!vec)) {
+ res = -ENOMEM;
+ goto out;
+ }
}
- offset = *ppos & ~PAGE_MASK;
- nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
- for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
- struct page *page;
+ pipe->bufs[to.idx].offset = offset;
+ pipe->bufs[to.idx].len -= offset;
- page = alloc_page(GFP_USER);
- error = -ENOMEM;
- if (!page)
- goto err;
-
- this_len = min_t(size_t, len, PAGE_SIZE - offset);
- vec[i].iov_base = (void __user *) page_address(page);
+ for (i = 0; i < nr_pages; i++) {
+ size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
+ vec[i].iov_base = page_address(pages[i]) + offset;
vec[i].iov_len = this_len;
- spd.pages[i] = page;
- spd.nr_pages++;
len -= this_len;
offset = 0;
}
- res = kernel_readv(in, vec, spd.nr_pages, *ppos);
- if (res < 0) {
- error = res;
- goto err;
- }
-
- error = 0;
- if (!res)
- goto err;
-
- nr_freed = 0;
- for (i = 0; i < spd.nr_pages; i++) {
- this_len = min_t(size_t, vec[i].iov_len, res);
- spd.partial[i].offset = 0;
- spd.partial[i].len = this_len;
- if (!this_len) {
- __free_page(spd.pages[i]);
- spd.pages[i] = NULL;
- nr_freed++;
- }
- res -= this_len;
- }
- spd.nr_pages -= nr_freed;
-
- res = splice_to_pipe(pipe, &spd);
- if (res > 0)
+ res = kernel_readv(in, vec, nr_pages, *ppos);
+ if (res > 0) {
+ copied = res;
*ppos += res;
+ }
-shrink_ret:
if (vec != __vec)
kfree(vec);
- splice_shrink_spd(&spd);
+out:
+ for (i = 0; i < nr_pages; i++)
+ put_page(pages[i]);
+ kvfree(pages);
+ iov_iter_advance(&to, copied); /* truncates and discards */
return res;
-
-err:
- for (i = 0; i < spd.nr_pages; i++)
- __free_page(spd.pages[i]);
-
- res = error;
- goto shrink_ret;
}
-EXPORT_SYMBOL(default_file_splice_read);
/*
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
@@ -757,13 +515,12 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
while (pipe->nrbufs) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
- const struct pipe_buf_operations *ops = buf->ops;
sd->len = buf->len;
if (sd->len > sd->total_len)
sd->len = sd->total_len;
- ret = buf->ops->confirm(pipe, buf);
+ ret = pipe_buf_confirm(pipe, buf);
if (unlikely(ret)) {
if (ret == -ENODATA)
ret = 0;
@@ -783,8 +540,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
sd->total_len -= ret;
if (!buf->len) {
- buf->ops = NULL;
- ops->release(pipe, buf);
+ pipe_buf_release(pipe, buf);
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
if (pipe->files)
@@ -1003,7 +759,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
if (idx == pipe->buffers - 1)
idx = -1;
- ret = buf->ops->confirm(pipe, buf);
+ ret = pipe_buf_confirm(pipe, buf);
if (unlikely(ret)) {
if (ret == -ENODATA)
ret = 0;
@@ -1030,11 +786,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
while (ret) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
if (ret >= buf->len) {
- const struct pipe_buf_operations *ops = buf->ops;
ret -= buf->len;
buf->len = 0;
- buf->ops = NULL;
- ops->release(pipe, buf);
+ pipe_buf_release(pipe, buf);
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
if (pipe->files)
@@ -1273,10 +1027,8 @@ out_release:
for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
- if (buf->ops) {
- buf->ops->release(pipe, buf);
- buf->ops = NULL;
- }
+ if (buf->ops)
+ pipe_buf_release(pipe, buf);
}
if (!bytes)
@@ -1342,6 +1094,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
}
EXPORT_SYMBOL(do_splice_direct);
+static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
+{
+ while (pipe->nrbufs == pipe->buffers) {
+ if (flags & SPLICE_F_NONBLOCK)
+ return -EAGAIN;
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+ pipe->waiting_writers++;
+ pipe_wait(pipe);
+ pipe->waiting_writers--;
+ }
+ return 0;
+}
+
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags);
@@ -1424,8 +1190,13 @@ static long do_splice(struct file *in, loff_t __user *off_in,
offset = in->f_pos;
}
- ret = do_splice_to(in, &offset, opipe, len, flags);
-
+ pipe_lock(opipe);
+ ret = wait_for_space(opipe, flags);
+ if (!ret)
+ ret = do_splice_to(in, &offset, opipe, len, flags);
+ pipe_unlock(opipe);
+ if (ret > 0)
+ wakeup_pipe_readers(opipe);
if (!off_in)
in->f_pos = offset;
else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
@@ -1437,106 +1208,50 @@ static long do_splice(struct file *in, loff_t __user *off_in,
return -EINVAL;
}
-/*
- * Map an iov into an array of pages and offset/length tupples. With the
- * partial_page structure, we can map several non-contiguous ranges into
- * our ones pages[] map instead of splitting that operation into pieces.
- * Could easily be exported as a generic helper for other users, in which
- * case one would probably want to add a 'max_nr_pages' parameter as well.
- */
-static int get_iovec_page_array(const struct iovec __user *iov,
- unsigned int nr_vecs, struct page **pages,
- struct partial_page *partial, bool aligned,
- unsigned int pipe_buffers)
+static int iter_to_pipe(struct iov_iter *from,
+ struct pipe_inode_info *pipe,
+ unsigned flags)
{
- int buffers = 0, error = 0;
-
- while (nr_vecs) {
- unsigned long off, npages;
- struct iovec entry;
- void __user *base;
- size_t len;
- int i;
-
- error = -EFAULT;
- if (copy_from_user(&entry, iov, sizeof(entry)))
- break;
-
- base = entry.iov_base;
- len = entry.iov_len;
-
- /*
- * Sanity check this iovec. 0 read succeeds.
- */
- error = 0;
- if (unlikely(!len))
- break;
- error = -EFAULT;
- if (!access_ok(VERIFY_READ, base, len))
- break;
-
- /*
- * Get this base offset and number of pages, then map
- * in the user pages.
- */
- off = (unsigned long) base & ~PAGE_MASK;
-
- /*
- * If asked for alignment, the offset must be zero and the
- * length a multiple of the PAGE_SIZE.
- */
- error = -EINVAL;
- if (aligned && (off || len & ~PAGE_MASK))
- break;
-
- npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (npages > pipe_buffers - buffers)
- npages = pipe_buffers - buffers;
-
- error = get_user_pages_fast((unsigned long)base, npages,
- 0, &pages[buffers]);
-
- if (unlikely(error <= 0))
+ struct pipe_buffer buf = {
+ .ops = &user_page_pipe_buf_ops,
+ .flags = flags
+ };
+ size_t total = 0;
+ int ret = 0;
+ bool failed = false;
+
+ while (iov_iter_count(from) && !failed) {
+ struct page *pages[16];
+ ssize_t copied;
+ size_t start;
+ int n;
+
+ copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start);
+ if (copied <= 0) {
+ ret = copied;
break;
-
- /*
- * Fill this contiguous range into the partial page map.
- */
- for (i = 0; i < error; i++) {
- const int plen = min_t(size_t, len, PAGE_SIZE - off);
-
- partial[buffers].offset = off;
- partial[buffers].len = plen;
-
- off = 0;
- len -= plen;
- buffers++;
}
- /*
- * We didn't complete this iov, stop here since it probably
- * means we have to move some of this into a pipe to
- * be able to continue.
- */
- if (len)
- break;
-
- /*
- * Don't continue if we mapped fewer pages than we asked for,
- * or if we mapped the max number of pages that we have
- * room for.
- */
- if (error < npages || buffers == pipe_buffers)
- break;
-
- nr_vecs--;
- iov++;
+ for (n = 0; copied; n++, start = 0) {
+ int size = min_t(int, copied, PAGE_SIZE - start);
+ if (!failed) {
+ buf.page = pages[n];
+ buf.offset = start;
+ buf.len = size;
+ ret = add_to_pipe(pipe, &buf);
+ if (unlikely(ret < 0)) {
+ failed = true;
+ } else {
+ iov_iter_advance(from, ret);
+ total += ret;
+ }
+ } else {
+ put_page(pages[n]);
+ }
+ copied -= size;
+ }
}
-
- if (buffers)
- return buffers;
-
- return error;
+ return total ? total : ret;
}
static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
@@ -1590,38 +1305,36 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
*/
-static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,
unsigned long nr_segs, unsigned int flags)
{
struct pipe_inode_info *pipe;
- struct page *pages[PIPE_DEF_BUFFERS];
- struct partial_page partial[PIPE_DEF_BUFFERS];
- struct splice_pipe_desc spd = {
- .pages = pages,
- .partial = partial,
- .nr_pages_max = PIPE_DEF_BUFFERS,
- .flags = flags,
- .ops = &user_page_pipe_buf_ops,
- .spd_release = spd_release_page,
- };
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter from;
long ret;
+ unsigned buf_flag = 0;
+
+ if (flags & SPLICE_F_GIFT)
+ buf_flag = PIPE_BUF_FLAG_GIFT;
pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
- if (splice_grow_spd(pipe, &spd))
- return -ENOMEM;
-
- spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
- spd.partial, false,
- spd.nr_pages_max);
- if (spd.nr_pages <= 0)
- ret = spd.nr_pages;
- else
- ret = splice_to_pipe(pipe, &spd);
+ ret = import_iovec(WRITE, uiov, nr_segs,
+ ARRAY_SIZE(iovstack), &iov, &from);
+ if (ret < 0)
+ return ret;
- splice_shrink_spd(&spd);
+ pipe_lock(pipe);
+ ret = wait_for_space(pipe, flags);
+ if (!ret)
+ ret = iter_to_pipe(&from, pipe, buf_flag);
+ pipe_unlock(pipe);
+ if (ret > 0)
+ wakeup_pipe_readers(pipe);
+ kfree(iov);
return ret;
}
@@ -1876,7 +1589,7 @@ retry:
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- ibuf->ops->get(ipipe, ibuf);
+ pipe_buf_get(ipipe, ibuf);
*obuf = *ibuf;
/*
@@ -1948,7 +1661,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- ibuf->ops->get(ipipe, ibuf);
+ pipe_buf_get(ipipe, ibuf);
obuf = opipe->bufs + nbuf;
*obuf = *ibuf;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index bac55c687085..26acfbb331ae 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -393,45 +393,6 @@ xfs_file_read_iter(
return ret;
}
-STATIC ssize_t
-xfs_file_splice_read(
- struct file *infilp,
- loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t count,
- unsigned int flags)
-{
- struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
- ssize_t ret;
-
- XFS_STATS_INC(ip->i_mount, xs_read_calls);
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- trace_xfs_file_splice_read(ip, count, *ppos);
-
- /*
- * DAX inodes cannot ues the page cache for splice, so we have to push
- * them through the VFS IO path. This means it goes through
- * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
- * cannot lock the splice operation at this level for DAX inodes.
- */
- if (IS_DAX(VFS_I(ip))) {
- ret = default_file_splice_read(infilp, ppos, pipe, count,
- flags);
- goto out;
- }
-
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-out:
- if (ret > 0)
- XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
- return ret;
-}
-
/*
* Zero any on disk space between the current EOF and the new, larger EOF.
*
@@ -1608,7 +1569,7 @@ const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
.read_iter = xfs_file_read_iter,
.write_iter = xfs_file_write_iter,
- .splice_read = xfs_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c6b2b1dcde75..16093c7dacde 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1170,7 +1170,6 @@ DEFINE_RW_EVENT(xfs_file_dax_read);
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_dax_write);
-DEFINE_RW_EVENT(xfs_file_splice_read);
DECLARE_EVENT_CLASS(xfs_page_class,
TP_PROTO(struct inode *inode, struct page *page, unsigned long off,