diff options
Diffstat (limited to 'fs')
38 files changed, 767 insertions, 1182 deletions
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 7da05b159ade..bfe9f9994935 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -789,7 +789,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) * Will be set to real fs blocksize later. * * Linux 2.4.10 and later refuse to read blocks smaller than - * the hardsect size for the device. But we also need to read at + * the logical block size for the device. But we also need to read at * least 1k to get the second 512 bytes of the volume. * -WD 10-26-01 */ diff --git a/fs/block_dev.c b/fs/block_dev.c index 08ae99343d92..376e4e426324 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -180,9 +180,6 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct file *file = iocb->ki_filp; struct inode *inode = bdev_file_inode(file); - if (IS_DAX(inode)) - return dax_do_io(iocb, inode, iter, blkdev_get_block, - NULL, DIO_SKIP_DIO_COUNT); return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, blkdev_get_block, NULL, NULL, DIO_SKIP_DIO_COUNT); @@ -302,14 +299,11 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) error = sb->s_op->thaw_super(sb); else error = thaw_super(sb); - if (error) { + if (error) bdev->bd_fsfreeze_count++; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return error; - } out: mutex_unlock(&bdev->bd_fsfreeze_mutex); - return 0; + return error; } EXPORT_SYMBOL(thaw_bdev); @@ -1275,7 +1269,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; - bdev->bd_inode->i_flags = 0; if (!partno) { ret = -ENXIO; @@ -1303,11 +1296,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } - if (!ret) { + if (!ret) bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); - if (!bdev_dax_capable(bdev)) - bdev->bd_inode->i_flags &= ~S_DAX; - } /* * If the device is invalidated, rescan partition @@ -1342,8 +1332,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); - if (!bdev_dax_capable(bdev)) - bdev->bd_inode->i_flags &= ~S_DAX; } } else { if (bdev->bd_contains == bdev) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e6811c42e41e..ca01106795ea 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8412,7 +8412,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, if (!bio) return -ENOMEM; - bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf); + bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio)); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; @@ -8450,7 +8450,8 @@ next_block: start_sector, GFP_NOFS); if (!bio) goto out_err; - bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf); + bio_set_op_attrs(bio, bio_op(orig_bio), + bio_flags(orig_bio)); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; diff --git a/fs/coda/file.c b/fs/coda/file.c index f47c7483863b..8415d4f8d1a1 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -38,27 +38,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to) } static ssize_t -coda_file_splice_read(struct file *coda_file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t count, - unsigned int flags) -{ - ssize_t (*splice_read)(struct file *, loff_t *, - struct pipe_inode_info *, size_t, unsigned int); - struct coda_file_info *cfi; - struct file *host_file; - - cfi = CODA_FTOC(coda_file); - BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); - host_file = cfi->cfi_container; - - splice_read = host_file->f_op->splice_read; - if (!splice_read) - splice_read = default_file_splice_read; - - return splice_read(host_file, ppos, pipe, count, flags); -} - -static ssize_t coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *coda_file = iocb->ki_filp; @@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = { .open = coda_open, .release = coda_release, .fsync = coda_fsync, - .splice_read = coda_file_splice_read, + .splice_read = generic_file_splice_read, }; diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index c502c116924c..61057b7dbddb 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -28,7 +28,6 @@ #include <linux/dcache.h> #include <linux/namei.h> #include <linux/fscrypto.h> -#include <linux/ecryptfs.h> static unsigned int num_prealloc_crypto_pages = 32; static unsigned int num_prealloc_crypto_ctxs = 128; @@ -128,11 +127,11 @@ struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags) EXPORT_SYMBOL(fscrypt_get_ctx); /** - * fscrypt_complete() - The completion callback for page encryption - * @req: The asynchronous encryption request context - * @res: The result of the encryption operation + * page_crypt_complete() - completion callback for page crypto + * @req: The asynchronous cipher request context + * @res: The result of the cipher operation */ -static void fscrypt_complete(struct crypto_async_request *req, int res) +static void page_crypt_complete(struct crypto_async_request *req, int res) { struct fscrypt_completion_result *ecr = req->data; @@ -170,7 +169,7 @@ static int do_page_crypto(struct inode *inode, skcipher_request_set_callback( req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - fscrypt_complete, &ecr); + page_crypt_complete, &ecr); BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index)); memcpy(xts_tweak, &index, sizeof(index)); diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 5d6d49113efa..9a28133ac3b8 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -10,21 +10,16 @@ * This has not yet undergone a rigorous security audit. */ -#include <keys/encrypted-type.h> -#include <keys/user-type.h> #include <linux/scatterlist.h> #include <linux/ratelimit.h> #include <linux/fscrypto.h> -static u32 size_round_up(size_t size, size_t blksize) -{ - return ((size + blksize - 1) / blksize) * blksize; -} - /** - * dir_crypt_complete() - + * fname_crypt_complete() - completion callback for filename crypto + * @req: The asynchronous cipher request context + * @res: The result of the cipher operation */ -static void dir_crypt_complete(struct crypto_async_request *req, int res) +static void fname_crypt_complete(struct crypto_async_request *req, int res) { struct fscrypt_completion_result *ecr = req->data; @@ -35,11 +30,11 @@ static void dir_crypt_complete(struct crypto_async_request *req, int res) } /** - * fname_encrypt() - + * fname_encrypt() - encrypt a filename * - * This function encrypts the input filename, and returns the length of the - * ciphertext. Errors are returned as negative numbers. We trust the caller to - * allocate sufficient memory to oname string. + * The caller must have allocated sufficient memory for the @oname string. + * + * Return: 0 on success, -errno on failure */ static int fname_encrypt(struct inode *inode, const struct qstr *iname, struct fscrypt_str *oname) @@ -60,10 +55,9 @@ static int fname_encrypt(struct inode *inode, if (iname->len <= 0 || iname->len > lim) return -EIO; - ciphertext_len = (iname->len < FS_CRYPTO_BLOCK_SIZE) ? - FS_CRYPTO_BLOCK_SIZE : iname->len; - ciphertext_len = size_round_up(ciphertext_len, padding); - ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len; + ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE); + ciphertext_len = round_up(ciphertext_len, padding); + ciphertext_len = min(ciphertext_len, lim); if (ciphertext_len <= sizeof(buf)) { workbuf = buf; @@ -84,7 +78,7 @@ static int fname_encrypt(struct inode *inode, } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - dir_crypt_complete, &ecr); + fname_crypt_complete, &ecr); /* Copy the input */ memcpy(workbuf, iname->name, iname->len); @@ -105,20 +99,22 @@ static int fname_encrypt(struct inode *inode, } kfree(alloc_buf); skcipher_request_free(req); - if (res < 0) + if (res < 0) { printk_ratelimited(KERN_ERR "%s: Error (error code %d)\n", __func__, res); + return res; + } oname->len = ciphertext_len; - return res; + return 0; } -/* - * fname_decrypt() - * This function decrypts the input filename, and returns - * the length of the plaintext. - * Errors are returned as negative numbers. - * We trust the caller to allocate sufficient memory to oname string. +/** + * fname_decrypt() - decrypt a filename + * + * The caller must have allocated sufficient memory for the @oname string. + * + * Return: 0 on success, -errno on failure */ static int fname_decrypt(struct inode *inode, const struct fscrypt_str *iname, @@ -146,7 +142,7 @@ static int fname_decrypt(struct inode *inode, } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - dir_crypt_complete, &ecr); + fname_crypt_complete, &ecr); /* Initialize IV */ memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); @@ -168,7 +164,7 @@ static int fname_decrypt(struct inode *inode, } oname->len = strnlen(oname->name, iname->len); - return oname->len; + return 0; } static const char *lookup_table = @@ -231,9 +227,8 @@ u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen) if (ci) padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - if (ilen < FS_CRYPTO_BLOCK_SIZE) - ilen = FS_CRYPTO_BLOCK_SIZE; - return size_round_up(ilen, padding); + ilen = max(ilen, (u32)FS_CRYPTO_BLOCK_SIZE); + return round_up(ilen, padding); } EXPORT_SYMBOL(fscrypt_fname_encrypted_size); @@ -279,6 +274,10 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer); /** * fscrypt_fname_disk_to_usr() - converts a filename from disk space to user * space + * + * The caller must have allocated sufficient memory for the @oname string. + * + * Return: 0 on success, -errno on failure */ int fscrypt_fname_disk_to_usr(struct inode *inode, u32 hash, u32 minor_hash, @@ -287,13 +286,12 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, { const struct qstr qname = FSTR_TO_QSTR(iname); char buf[24]; - int ret; if (fscrypt_is_dot_dotdot(&qname)) { oname->name[0] = '.'; oname->name[iname->len - 1] = '.'; oname->len = iname->len; - return oname->len; + return 0; } if (iname->len < FS_CRYPTO_BLOCK_SIZE) @@ -303,9 +301,9 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, return fname_decrypt(inode, iname, oname); if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) { - ret = digest_encode(iname->name, iname->len, oname->name); - oname->len = ret; - return ret; + oname->len = digest_encode(iname->name, iname->len, + oname->name); + return 0; } if (hash) { memcpy(buf, &hash, 4); @@ -315,15 +313,18 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, } memcpy(buf + 8, iname->name + iname->len - 16, 16); oname->name[0] = '_'; - ret = digest_encode(buf, 24, oname->name + 1); - oname->len = ret + 1; - return ret + 1; + oname->len = 1 + digest_encode(buf, 24, oname->name + 1); + return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); /** * fscrypt_fname_usr_to_disk() - converts a filename from user space to disk * space + * + * The caller must have allocated sufficient memory for the @oname string. + * + * Return: 0 on success, -errno on failure */ int fscrypt_fname_usr_to_disk(struct inode *inode, const struct qstr *iname, @@ -333,7 +334,7 @@ int fscrypt_fname_usr_to_disk(struct inode *inode, oname->name[0] = '.'; oname->name[iname->len - 1] = '.'; oname->len = iname->len; - return oname->len; + return 0; } if (inode->i_crypt_info) return fname_encrypt(inode, iname, oname); @@ -367,10 +368,10 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, if (dir->i_crypt_info) { ret = fscrypt_fname_alloc_buffer(dir, iname->len, &fname->crypto_buf); - if (ret < 0) + if (ret) return ret; ret = fname_encrypt(dir, iname, &fname->crypto_buf); - if (ret < 0) + if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 1ac263eddc4e..82f0285f5d08 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -8,11 +8,8 @@ * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015. */ -#include <keys/encrypted-type.h> #include <keys/user-type.h> -#include <linux/random.h> #include <linux/scatterlist.h> -#include <uapi/linux/keyctl.h> #include <linux/fscrypto.h> static void derive_crypt_complete(struct crypto_async_request *req, int rc) @@ -139,6 +136,38 @@ out: return res; } +static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode, + const char **cipher_str_ret, int *keysize_ret) +{ + if (S_ISREG(inode->i_mode)) { + if (ci->ci_data_mode == FS_ENCRYPTION_MODE_AES_256_XTS) { + *cipher_str_ret = "xts(aes)"; + *keysize_ret = FS_AES_256_XTS_KEY_SIZE; + return 0; + } + pr_warn_once("fscrypto: unsupported contents encryption mode " + "%d for inode %lu\n", + ci->ci_data_mode, inode->i_ino); + return -ENOKEY; + } + + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { + if (ci->ci_filename_mode == FS_ENCRYPTION_MODE_AES_256_CTS) { + *cipher_str_ret = "cts(cbc(aes))"; + *keysize_ret = FS_AES_256_CTS_KEY_SIZE; + return 0; + } + pr_warn_once("fscrypto: unsupported filenames encryption mode " + "%d for inode %lu\n", + ci->ci_filename_mode, inode->i_ino); + return -ENOKEY; + } + + pr_warn_once("fscrypto: unsupported file type %d for inode %lu\n", + (inode->i_mode & S_IFMT), inode->i_ino); + return -ENOKEY; +} + static void put_crypt_info(struct fscrypt_info *ci) { if (!ci) @@ -155,8 +184,8 @@ int get_crypt_info(struct inode *inode) struct fscrypt_context ctx; struct crypto_skcipher *ctfm; const char *cipher_str; + int keysize; u8 raw_key[FS_MAX_KEY_SIZE]; - u8 mode; int res; res = fscrypt_initialize(); @@ -179,13 +208,19 @@ retry: if (res < 0) { if (!fscrypt_dummy_context_enabled(inode)) return res; + ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1; ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS; ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS; ctx.flags = 0; } else if (res != sizeof(ctx)) { return -EINVAL; } - res = 0; + + if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1) + return -EINVAL; + + if (ctx.flags & ~FS_POLICY_FLAGS_VALID) + return -EINVAL; crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS); if (!crypt_info) @@ -198,27 +233,11 @@ retry: crypt_info->ci_keyring_key = NULL; memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); - if (S_ISREG(inode->i_mode)) - mode = crypt_info->ci_data_mode; - else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - mode = crypt_info->ci_filename_mode; - else - BUG(); - - switch (mode) { - case FS_ENCRYPTION_MODE_AES_256_XTS: - cipher_str = "xts(aes)"; - break; - case FS_ENCRYPTION_MODE_AES_256_CTS: - cipher_str = "cts(cbc(aes))"; - break; - default: - printk_once(KERN_WARNING - "%s: unsupported key mode %d (ino %u)\n", - __func__, mode, (unsigned) inode->i_ino); - res = -ENOKEY; + + res = determine_cipher_type(crypt_info, inode, &cipher_str, &keysize); + if (res) goto out; - } + if (fscrypt_dummy_context_enabled(inode)) { memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); goto got_key; @@ -253,7 +272,7 @@ got_key: crypt_info->ci_ctfm = ctfm; crypto_skcipher_clear_flags(ctfm, ~0); crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY); - res = crypto_skcipher_setkey(ctfm, raw_key, fscrypt_key_size(mode)); + res = crypto_skcipher_setkey(ctfm, raw_key, keysize); if (res) goto out; diff --git a/fs/direct-io.c b/fs/direct-io.c index 7c3ce73cb617..fb9aa16a7727 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) if ((dio->op == REQ_OP_READ) && ((offset + transferred) > dio->i_size)) transferred = dio->i_size - offset; + /* ignore EFAULT if some IO has been done */ + if (unlikely(ret == -EFAULT) && transferred) + ret = 0; } if (ret == 0) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 67415e0e6af0..e8b365000d73 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -260,11 +260,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) /* Directory is encrypted */ err = fscrypt_fname_disk_to_usr(inode, 0, 0, &de_name, &fstr); + de_name = fstr; fstr.len = save_len; - if (err < 0) + if (err) goto errout; if (!dir_emit(ctx, - fstr.name, err, + de_name.name, de_name.len, le32_to_cpu(de->inode), get_dtype(sb, de->file_type))) goto done; @@ -627,7 +628,7 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, int buf_size) { struct ext4_dir_entry_2 *de; - int nlen, rlen; + int rlen; unsigned int offset = 0; char *top; @@ -637,7 +638,6 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, if (ext4_check_dir_entry(dir, NULL, de, bh, buf, buf_size, offset)) return -EFSCORRUPTED; - nlen = EXT4_DIR_REC_LEN(de->name_len); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); de = (struct ext4_dir_entry_2 *)((char *)de + rlen); offset += rlen; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ea31931386ec..282a51b07c57 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -262,6 +262,9 @@ struct ext4_io_submit { (s)->s_first_ino) #endif #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) +#define EXT4_MAX_BLOCKS(size, offset, blkbits) \ + ((EXT4_BLOCK_ALIGN(size + offset, blkbits) >> blkbits) - (offset >> \ + blkbits)) /* Translate a block number to a cluster number */ #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits) @@ -1117,9 +1120,15 @@ struct ext4_inode_info { #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ -#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ -#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ -#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT4_MOUNT_QUOTA 0x40000 /* Some quota option set */ +#define EXT4_MOUNT_USRQUOTA 0x80000 /* "old" user quota, + * enable enforcement for hidden + * quota files */ +#define EXT4_MOUNT_GRPQUOTA 0x100000 /* "old" group quota, enable + * enforcement for hidden quota + * files */ +#define EXT4_MOUNT_PRJQUOTA 0x200000 /* Enable project quota + * enforcement */ #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ @@ -1636,26 +1645,6 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) * Feature set definitions */ -/* Use the ext4_{has,set,clear}_feature_* helpers; these will be removed */ -#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ - ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0) -#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ - ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0) -#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ - ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0) -#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) -#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) -#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) -#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) -#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) -#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) - #define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 #define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 #define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d7ccb7f51dfc..c930a0110fb4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4679,6 +4679,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, unsigned int credits; loff_t epos; + BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)); map.m_lblk = offset; map.m_len = len; /* @@ -4693,13 +4694,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, * credits to insert 1 extent into extent tree */ credits = ext4_chunk_trans_blocks(inode, len); - /* - * We can only call ext_depth() on extent based inodes - */ - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - depth = ext_depth(inode); - else - depth = -1; + depth = ext_depth(inode); retry: while (ret >= 0 && len) { @@ -4966,13 +4961,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) trace_ext4_fallocate_enter(inode, offset, len, mode); lblk = offset >> blkbits; - /* - * We can't just convert len to max_blocks because - * If blocksize = 4096 offset = 3072 and len = 2048 - */ - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - - lblk; + max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits); flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; @@ -5035,12 +5025,8 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, unsigned int credits, blkbits = inode->i_blkbits; map.m_lblk = offset >> blkbits; - /* - * We can't just convert len to max_blocks because - * If blocksize = 4096 offset = 3072 and len = 2048 - */ - max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - - map.m_lblk); + max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits); + /* * This is somewhat ugly but the idea is clear: When transaction is * reserved, everything goes into it. Otherwise we rather start several @@ -5734,6 +5720,9 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } + } else { + ext4_ext_drop_refs(path); + kfree(path); } ret = ext4_es_remove_extent(inode, offset_lblk, diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 28f542bb0bda..36d49cfbf2dc 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -91,9 +91,7 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) static ssize_t ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { - struct file *file = iocb->ki_filp; struct inode *inode = file_inode(iocb->ki_filp); - struct blk_plug plug; int o_direct = iocb->ki_flags & IOCB_DIRECT; int unaligned_aio = 0; int overwrite = 0; @@ -134,18 +132,16 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (o_direct) { size_t length = iov_iter_count(from); loff_t pos = iocb->ki_pos; - blk_start_plug(&plug); /* check whether we do a DIO overwrite or not */ if (ext4_should_dioread_nolock(inode) && !unaligned_aio && - !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { + pos + length <= i_size_read(inode)) { struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; int err, len; map.m_lblk = pos >> blkbits; - map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) - - map.m_lblk; + map.m_len = EXT4_MAX_BLOCKS(length, pos, blkbits); len = map.m_len; err = ext4_map_blocks(NULL, inode, &map, 0); @@ -171,8 +167,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret > 0) ret = generic_write_sync(iocb, ret); - if (o_direct) - blk_finish_plug(&plug); return ret; diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 5c4372512ef7..88effb1053c7 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -61,6 +61,13 @@ static int ext4_sync_parent(struct inode *inode) break; iput(inode); inode = next; + /* + * The directory inode may have gone through rmdir by now. But + * the inode itself and its blocks are still allocated (we hold + * a reference to the inode so it didn't go through + * ext4_evict_inode()) and so we are safe to flush metadata + * blocks and the inode. + */ ret = sync_mapping_buffers(inode->i_mapping); if (ret) break; @@ -107,7 +114,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!journal) { ret = __generic_file_fsync(file, start, end, datasync); - if (!ret && !hlist_empty(&inode->i_dentry)) + if (!ret) ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) goto issue_flush; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 9e66cd1d7b78..170421edfdfe 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -802,7 +802,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, } else inode_init_owner(inode, dir, mode); - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) && + if (ext4_has_feature_project(sb) && ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) ei->i_projid = EXT4_I(dir)->i_projid; else diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c6ea25a190f8..cd918823b352 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -647,11 +647,19 @@ found: /* * We have to zeroout blocks before inserting them into extent * status tree. Otherwise someone could look them up there and - * use them before they are really zeroed. + * use them before they are really zeroed. We also have to + * unmap metadata before zeroing as otherwise writeback can + * overwrite zeros with stale data from block device. */ if (flags & EXT4_GET_BLOCKS_ZERO && map->m_flags & EXT4_MAP_MAPPED && map->m_flags & EXT4_MAP_NEW) { + ext4_lblk_t i; + + for (i = 0; i < map->m_len; i++) { + unmap_underlying_metadata(inode->i_sb->s_bdev, + map->m_pblk + i); + } ret = ext4_issue_zeroout(inode, map->m_lblk, map->m_pblk, map->m_len); if (ret) { @@ -1649,6 +1657,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); if (invalidate) { + if (page_mapped(page)) + clear_page_dirty_for_io(page); block_invalidatepage(page, 0, PAGE_SIZE); ClearPageUptodate(page); } @@ -3526,35 +3536,31 @@ out: static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) { - int unlocked = 0; - struct inode *inode = iocb->ki_filp->f_mapping->host; + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; ssize_t ret; - if (ext4_should_dioread_nolock(inode)) { - /* - * Nolock dioread optimization may be dynamically disabled - * via ext4_inode_block_unlocked_dio(). Check inode's state - * while holding extra i_dio_count ref. - */ - inode_dio_begin(inode); - smp_mb(); - if (unlikely(ext4_test_inode_state(inode, - EXT4_STATE_DIOREAD_LOCK))) - inode_dio_end(inode); - else - unlocked = 1; - } + /* + * Shared inode_lock is enough for us - it protects against concurrent + * writes & truncates and since we take care of writing back page cache, + * we are protected against page writeback as well. + */ + inode_lock_shared(inode); if (IS_DAX(inode)) { - ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, - NULL, unlocked ? 0 : DIO_LOCKING); + ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0); } else { + size_t count = iov_iter_count(iter); + + ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, + iocb->ki_pos + count); + if (ret) + goto out_unlock; ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, ext4_dio_get_block, - NULL, NULL, - unlocked ? 0 : DIO_LOCKING); + NULL, NULL, 0); } - if (unlocked) - inode_dio_end(inode); +out_unlock: + inode_unlock_shared(inode); return ret; } @@ -3890,7 +3896,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, } /* - * ext4_punch_hole: punches a hole in a file by releaseing the blocks + * ext4_punch_hole: punches a hole in a file by releasing the blocks * associated with the given offset and length * * @inode: File inode @@ -3919,7 +3925,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) * Write out all dirty pages to avoid race conditions * Then release them. */ - if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { ret = filemap_write_and_wait_range(mapping, offset, offset + length - 1); if (ret) @@ -4414,7 +4420,7 @@ static inline void ext4_iget_extra_inode(struct inode *inode, int ext4_get_projid(struct inode *inode, kprojid_t *projid) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT)) + if (!ext4_has_feature_project(inode->i_sb)) return -EOPNOTSUPP; *projid = EXT4_I(inode)->i_projid; return 0; @@ -4481,7 +4487,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) inode->i_mode = le16_to_cpu(raw_inode->i_mode); i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) && + if (ext4_has_feature_project(sb) && EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE && EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid); @@ -4814,14 +4820,14 @@ static int ext4_do_update_inode(handle_t *handle, * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ - if (!ei->i_dtime) { + if (ei->i_dtime && list_empty(&ei->i_orphan)) { + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } else { raw_inode->i_uid_high = cpu_to_le16(high_16_bits(i_uid)); raw_inode->i_gid_high = cpu_to_le16(high_16_bits(i_gid)); - } else { - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); @@ -4885,8 +4891,7 @@ static int ext4_do_update_inode(handle_t *handle, } } - BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_PROJECT) && + BUG_ON(!ext4_has_feature_project(inode->i_sb) && i_projid != EXT4_DEF_PROJID); if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1bb7df5e4536..bf5ae8ebbc97 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -19,8 +19,6 @@ #include "ext4_jbd2.h" #include "ext4.h" -#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1) - /** * Swap memory between @a and @b for @len bytes. * @@ -310,8 +308,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) struct ext4_inode *raw_inode; struct dquot *transfer_to[MAXQUOTAS] = { }; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_PROJECT)) { + if (!ext4_has_feature_project(sb)) { if (projid != EXT4_DEF_PROJID) return -EOPNOTSUPP; else @@ -772,6 +769,9 @@ resizefs_out: #ifdef CONFIG_EXT4_FS_ENCRYPTION struct fscrypt_policy policy; + if (!ext4_has_feature_encrypt(sb)) + return -EOPNOTSUPP; + if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg, sizeof(policy))) @@ -842,8 +842,7 @@ resizefs_out: ext4_get_inode_flags(ei); fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE); - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_PROJECT)) { + if (ext4_has_feature_project(inode->i_sb)) { fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, EXT4_I(inode)->i_projid); } diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index a920c5d29fac..6fc14def0c70 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -598,6 +598,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, return -EOPNOTSUPP; } + if (ext4_encrypted_inode(orig_inode) || + ext4_encrypted_inode(donor_inode)) { + ext4_msg(orig_inode->i_sb, KERN_ERR, + "Online defrag not supported for encrypted files"); + return -EOPNOTSUPP; + } + /* Protect orig and donor inodes against a truncate */ lock_two_nondirectories(orig_inode, donor_inode); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 34c0142caf6a..c344b819cffa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -639,7 +639,7 @@ static struct stats dx_show_leaf(struct inode *dir, res = fscrypt_fname_alloc_buffer( dir, len, &fname_crypto_str); - if (res < 0) + if (res) printk(KERN_WARNING "Error " "allocating crypto " "buffer--skipping " @@ -647,7 +647,7 @@ static struct stats dx_show_leaf(struct inode *dir, res = fscrypt_fname_disk_to_usr(dir, 0, 0, &de_name, &fname_crypto_str); - if (res < 0) { + if (res) { printk(KERN_WARNING "Error " "converting filename " "from disk to usr" @@ -1011,7 +1011,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, err = fscrypt_fname_disk_to_usr(dir, hinfo->hash, hinfo->minor_hash, &de_name, &fname_crypto_str); - if (err < 0) { + if (err) { count = err; goto errout; } @@ -2044,33 +2044,31 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, frame->entries = entries; frame->at = entries; frame->bh = bh; - bh = bh2; retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (retval) goto out_frames; - retval = ext4_handle_dirty_dirent_node(handle, dir, bh); + retval = ext4_handle_dirty_dirent_node(handle, dir, bh2); if (retval) goto out_frames; - de = do_split(handle,dir, &bh, frame, &fname->hinfo); + de = do_split(handle,dir, &bh2, frame, &fname->hinfo); if (IS_ERR(de)) { retval = PTR_ERR(de); goto out_frames; } - dx_release(frames); - retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh); - brelse(bh); - return retval; + retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2); out_frames: /* * Even if the block split failed, we have to properly write * out all the changes we did so far. Otherwise we can end up * with corrupted filesystem. */ - ext4_mark_inode_dirty(handle, dir); + if (retval) + ext4_mark_inode_dirty(handle, dir); dx_release(frames); + brelse(bh2); return retval; } @@ -3144,7 +3142,7 @@ static int ext4_symlink(struct inode *dir, istr.name = (const unsigned char *) symname; istr.len = len; err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); - if (err < 0) + if (err) goto err_drop_inode; sd->len = cpu_to_le16(ostr.len); disk_link.name = (char *) sd; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index a6132a730967..b4cbee936cf8 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -405,14 +405,12 @@ int ext4_bio_write_page(struct ext4_io_submit *io, { struct page *data_page = NULL; struct inode *inode = page->mapping->host; - unsigned block_start, blocksize; + unsigned block_start; struct buffer_head *bh, *head; int ret = 0; int nr_submitted = 0; int nr_to_submit = 0; - blocksize = 1 << inode->i_blkbits; - BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3ec8708989ca..6db81fbcbaa6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -78,6 +78,8 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); +static struct inode *ext4_get_journal_inode(struct super_block *sb, + unsigned int journal_inum); /* * Lock ordering @@ -1267,7 +1269,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, - Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax, + Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, Opt_lazytime, Opt_nolazytime, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, @@ -1327,6 +1329,7 @@ static const match_table_t tokens = { {Opt_noquota, "noquota"}, {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, + {Opt_prjquota, "prjquota"}, {Opt_barrier, "barrier=%u"}, {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, @@ -1546,8 +1549,11 @@ static const struct mount_opts { MOPT_SET | MOPT_Q}, {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, MOPT_SET | MOPT_Q}, + {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA, + MOPT_SET | MOPT_Q}, {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | - EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, + EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA), + MOPT_CLEAR | MOPT_Q}, {Opt_usrjquota, 0, MOPT_Q}, {Opt_grpjquota, 0, MOPT_Q}, {Opt_offusrjquota, 0, MOPT_Q}, @@ -1836,13 +1842,17 @@ static int parse_options(char *options, struct super_block *sb, return 0; } #ifdef CONFIG_QUOTA - if (ext4_has_feature_quota(sb) && - (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { - ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota " - "mount options ignored."); - clear_opt(sb, USRQUOTA); - clear_opt(sb, GRPQUOTA); - } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + /* + * We do the test below only for project quotas. 'usrquota' and + * 'grpquota' mount options are allowed even without quota feature + * to support legacy quotas in quota files. + */ + if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) { + ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. " + "Cannot enable project quota enforcement."); + return 0; + } + if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sb, USRQUOTA); @@ -2741,7 +2751,6 @@ static int ext4_run_li_request(struct ext4_li_request *elr) sb = elr->lr_super; ngroups = EXT4_SB(sb)->s_groups_count; - sb_start_write(sb); for (group = elr->lr_next_group; group < ngroups; group++) { gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) { @@ -2768,8 +2777,6 @@ static int ext4_run_li_request(struct ext4_li_request *elr) elr->lr_next_sched = jiffies + elr->lr_timeout; elr->lr_next_group = group + 1; } - sb_end_write(sb); - return ret; } @@ -2834,19 +2841,43 @@ cont_thread: mutex_unlock(&eli->li_list_mtx); goto exit_thread; } - list_for_each_safe(pos, n, &eli->li_request_list) { + int err = 0; + int progress = 0; elr = list_entry(pos, struct ext4_li_request, lr_request); - if (time_after_eq(jiffies, elr->lr_next_sched)) { - if (ext4_run_li_request(elr) != 0) { - /* error, remove the lazy_init job */ - ext4_remove_li_request(elr); - continue; + if (time_before(jiffies, elr->lr_next_sched)) { + if (time_before(elr->lr_next_sched, next_wakeup)) + next_wakeup = elr->lr_next_sched; + continue; + } + if (down_read_trylock(&elr->lr_super->s_umount)) { + if (sb_start_write_trylock(elr->lr_super)) { + progress = 1; + /* + * We hold sb->s_umount, sb can not + * be removed from the list, it is + * now safe to drop li_list_mtx + */ + mutex_unlock(&eli->li_list_mtx); + err = ext4_run_li_request(elr); + sb_end_write(elr->lr_super); + mutex_lock(&eli->li_list_mtx); + n = pos->next; } + up_read((&elr->lr_super->s_umount)); + } + /* error, remove the lazy_init job */ + if (err) { + ext4_remove_li_request(elr); + continue; + } + if (!progress) { + elr->lr_next_sched = jiffies + + (prandom_u32() + % (EXT4_DEF_LI_MAX_START_DELAY * HZ)); } - if (time_before(elr->lr_next_sched, next_wakeup)) next_wakeup = elr->lr_next_sched; } @@ -3179,6 +3210,8 @@ int ext4_calculate_overhead(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + struct inode *j_inode; + unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum); ext4_group_t i, ngroups = ext4_get_groups_count(sb); ext4_fsblk_t overhead = 0; char *buf = (char *) get_zeroed_page(GFP_NOFS); @@ -3209,10 +3242,23 @@ int ext4_calculate_overhead(struct super_block *sb) memset(buf, 0, PAGE_SIZE); cond_resched(); } - /* Add the internal journal blocks as well */ + + /* + * Add the internal journal blocks whether the journal has been + * loaded or not + */ if (sbi->s_journal && !sbi->journal_bdev) overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); - + else if (ext4_has_feature_journal(sb) && !sbi->s_journal) { + j_inode = ext4_get_journal_inode(sb, j_inum); + if (j_inode) { + j_blocks = j_inode->i_size >> sb->s_blocksize_bits; + overhead += EXT4_NUM_B2C(sbi, j_blocks); + iput(j_inode); + } else { + ext4_msg(sb, KERN_ERR, "can't get journal size"); + } + } sbi->s_overhead = overhead; smp_wmb(); free_page((unsigned long) buf); @@ -4208,18 +4254,16 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) write_unlock(&journal->j_state_lock); } -static journal_t *ext4_get_journal(struct super_block *sb, - unsigned int journal_inum) +static struct inode *ext4_get_journal_inode(struct super_block *sb, + unsigned int journal_inum) { struct inode *journal_inode; - journal_t *journal; - - BUG_ON(!ext4_has_feature_journal(sb)); - - /* First, test for the existence of a valid inode on disk. Bad - * things happen if we iget() an unused inode, as the subsequent - * iput() will try to delete it. */ + /* + * Test for the existence of a valid inode on disk. Bad things + * happen if we iget() an unused inode, as the subsequent iput() + * will try to delete it. + */ journal_inode = ext4_iget(sb, journal_inum); if (IS_ERR(journal_inode)) { ext4_msg(sb, KERN_ERR, "no journal found"); @@ -4239,6 +4283,20 @@ static journal_t *ext4_get_journal(struct super_block *sb, iput(journal_inode); return NULL; } + return journal_inode; +} + +static journal_t *ext4_get_journal(struct super_block *sb, + unsigned int journal_inum) +{ + struct inode *journal_inode; + journal_t *journal; + + BUG_ON(!ext4_has_feature_journal(sb)); + + journal_inode = ext4_get_journal_inode(sb, journal_inum); + if (!journal_inode) + return NULL; journal = jbd2_journal_init_inode(journal_inode); if (!journal) { @@ -5250,12 +5308,18 @@ static int ext4_enable_quotas(struct super_block *sb) le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum), le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum) }; + bool quota_mopt[EXT4_MAXQUOTAS] = { + test_opt(sb, USRQUOTA), + test_opt(sb, GRPQUOTA), + test_opt(sb, PRJQUOTA), + }; sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; for (type = 0; type < EXT4_MAXQUOTAS; type++) { if (qf_inums[type]) { err = ext4_quota_enable(sb, type, QFMT_VFS_V1, - DQUOT_USAGE_ENABLED); + DQUOT_USAGE_ENABLED | + (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); if (err) { ext4_warning(sb, "Failed to enable quota tracking " diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 4d83d9e05f2e..fdf1c6154745 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -30,7 +30,6 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, char *caddr, *paddr = NULL; struct fscrypt_str cstr, pstr; struct fscrypt_symlink_data *sd; - loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1); int res; u32 max_size = inode->i_sb->s_blocksize; @@ -49,7 +48,6 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, if (IS_ERR(cpage)) return ERR_CAST(cpage); caddr = page_address(cpage); - caddr[size] = 0; } /* Symlink is encrypted */ @@ -65,16 +63,14 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); if (res) goto errout; + paddr = pstr.name; res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (res < 0) + if (res) goto errout; - paddr = pstr.name; - /* Null-terminate the name */ - if (res <= pstr.len) - paddr[res] = '\0'; + paddr[pstr.len] = '\0'; if (cpage) put_page(cpage); set_delayed_call(done, kfree_link, paddr); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 2eb935ca5d9e..c15d63389957 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -199,6 +199,8 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, } while (!IS_LAST_ENTRY(entry)) { + if (entry->e_value_block != 0) + return -EFSCORRUPTED; if (entry->e_value_size != 0 && (value_start + le16_to_cpu(entry->e_value_offs) < (void *)e + sizeof(__u32) || @@ -641,7 +643,7 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, size_t *min_offs, void *base, int *total) { for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { - if (!last->e_value_block && last->e_value_size) { + if (last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < *min_offs) *min_offs = offs; @@ -661,7 +663,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) /* Compute min_offs and last. */ last = s->first; for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { - if (!last->e_value_block && last->e_value_size) { + if (last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) min_offs = offs; @@ -669,7 +671,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) } free = min_offs - ((void *)last - s->base) - sizeof(__u32); if (!s->not_found) { - if (!s->here->e_value_block && s->here->e_value_size) { + if (s->here->e_value_size) { size_t size = le32_to_cpu(s->here->e_value_size); free += EXT4_XATTR_SIZE(size); } @@ -691,7 +693,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) s->here->e_name_len = name_len; memcpy(s->here->e_name, i->name, name_len); } else { - if (!s->here->e_value_block && s->here->e_value_size) { + if (s->here->e_value_size) { void *first_val = s->base + min_offs; size_t offs = le16_to_cpu(s->here->e_value_offs); void *val = s->base + offs; @@ -725,8 +727,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) last = s->first; while (!IS_LAST_ENTRY(last)) { size_t o = le16_to_cpu(last->e_value_offs); - if (!last->e_value_block && - last->e_value_size && o < offs) + if (last->e_value_size && o < offs) last->e_value_offs = cpu_to_le16(o + size); last = EXT4_XATTR_NEXT(last); @@ -1318,18 +1319,19 @@ retry: */ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry, int value_offs_shift, void *to, - void *from, size_t n, int blocksize) + void *from, size_t n) { struct ext4_xattr_entry *last = entry; int new_offs; + /* We always shift xattr headers further thus offsets get lower */ + BUG_ON(value_offs_shift > 0); + /* Adjust the value offsets of the entries */ for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { - if (!last->e_value_block && last->e_value_size) { + if (last->e_value_size) { new_offs = le16_to_cpu(last->e_value_offs) + value_offs_shift; - BUG_ON(new_offs + le32_to_cpu(last->e_value_size) - > blocksize); last->e_value_offs = cpu_to_le16(new_offs); } } @@ -1338,6 +1340,141 @@ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry, } /* + * Move xattr pointed to by 'entry' from inode into external xattr block + */ +static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, + struct ext4_inode *raw_inode, + struct ext4_xattr_entry *entry) +{ + struct ext4_xattr_ibody_find *is = NULL; + struct ext4_xattr_block_find *bs = NULL; + char *buffer = NULL, *b_entry_name = NULL; + size_t value_offs, value_size; + struct ext4_xattr_info i = { + .value = NULL, + .value_len = 0, + .name_index = entry->e_name_index, + }; + struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode); + int error; + + value_offs = le16_to_cpu(entry->e_value_offs); + value_size = le32_to_cpu(entry->e_value_size); + + is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); + bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); + buffer = kmalloc(value_size, GFP_NOFS); + b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); + if (!is || !bs || !buffer || !b_entry_name) { + error = -ENOMEM; + goto out; + } + + is->s.not_found = -ENODATA; + bs->s.not_found = -ENODATA; + is->iloc.bh = NULL; + bs->bh = NULL; + + /* Save the entry name and the entry value */ + memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size); + memcpy(b_entry_name, entry->e_name, entry->e_name_len); + b_entry_name[entry->e_name_len] = '\0'; + i.name = b_entry_name; + + error = ext4_get_inode_loc(inode, &is->iloc); + if (error) + goto out; + + error = ext4_xattr_ibody_find(inode, &i, is); + if (error) + goto out; + + /* Remove the chosen entry from the inode */ + error = ext4_xattr_ibody_set(handle, inode, &i, is); + if (error) + goto out; + + i.name = b_entry_name; + i.value = buffer; + i.value_len = value_size; + error = ext4_xattr_block_find(inode, &i, bs); + if (error) + goto out; + + /* Add entry which was removed from the inode into the block */ + error = ext4_xattr_block_set(handle, inode, &i, bs); + if (error) + goto out; + error = 0; +out: + kfree(b_entry_name); + kfree(buffer); + if (is) + brelse(is->iloc.bh); + kfree(is); + kfree(bs); + + return error; +} + +static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode, + struct ext4_inode *raw_inode, + int isize_diff, size_t ifree, + size_t bfree, int *total_ino) +{ + struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode); + struct ext4_xattr_entry *small_entry; + struct ext4_xattr_entry *entry; + struct ext4_xattr_entry *last; + unsigned int entry_size; /* EA entry size */ + unsigned int total_size; /* EA entry size + value size */ + unsigned int min_total_size; + int error; + + while (isize_diff > ifree) { + entry = NULL; + small_entry = NULL; + min_total_size = ~0U; + last = IFIRST(header); + /* Find the entry best suited to be pushed into EA block */ + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + total_size = + EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + + EXT4_XATTR_LEN(last->e_name_len); + if (total_size <= bfree && + total_size < min_total_size) { + if (total_size + ifree < isize_diff) { + small_entry = last; + } else { + entry = last; + min_total_size = total_size; + } + } + } + + if (entry == NULL) { + if (small_entry == NULL) + return -ENOSPC; + entry = small_entry; + } + + entry_size = EXT4_XATTR_LEN(entry->e_name_len); + total_size = entry_size + + EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); + error = ext4_xattr_move_to_block(handle, inode, raw_inode, + entry); + if (error) + return error; + + *total_ino -= entry_size; + ifree += total_size; + bfree -= total_size; + } + + return 0; +} + +/* * Expand an inode by new_extra_isize bytes when EAs are present. * Returns 0 on success or negative error number on failure. */ @@ -1345,14 +1482,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle) { struct ext4_xattr_ibody_header *header; - struct ext4_xattr_entry *entry, *last, *first; struct buffer_head *bh = NULL; - struct ext4_xattr_ibody_find *is = NULL; - struct ext4_xattr_block_find *bs = NULL; - char *buffer = NULL, *b_entry_name = NULL; - size_t min_offs, free; + size_t min_offs; + size_t ifree, bfree; int total_ino; - void *base, *start, *end; + void *base, *end; int error = 0, tried_min_extra_isize = 0; int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ @@ -1368,34 +1502,24 @@ retry: goto out; header = IHDR(inode, raw_inode); - entry = IFIRST(header); /* * Check if enough free space is available in the inode to shift the * entries ahead by new_extra_isize. */ - base = start = entry; + base = IFIRST(header); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; min_offs = end - base; - last = entry; total_ino = sizeof(struct ext4_xattr_ibody_header); error = xattr_check_inode(inode, header, end); if (error) goto cleanup; - free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); - if (free >= isize_diff) { - entry = IFIRST(header); - ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize - - new_extra_isize, (void *)raw_inode + - EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, - (void *)header, total_ino, - inode->i_sb->s_blocksize); - EXT4_I(inode)->i_extra_isize = new_extra_isize; - goto out; - } + ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino); + if (ifree >= isize_diff) + goto shift; /* * Enough free space isn't available in the inode, check if @@ -1413,146 +1537,44 @@ retry: goto cleanup; } base = BHDR(bh); - first = BFIRST(bh); end = bh->b_data + bh->b_size; min_offs = end - base; - free = ext4_xattr_free_space(first, &min_offs, base, NULL); - if (free < isize_diff) { + bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base, + NULL); + if (bfree + ifree < isize_diff) { if (!tried_min_extra_isize && s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; brelse(bh); goto retry; } - error = -1; + error = -ENOSPC; goto cleanup; } } else { - free = inode->i_sb->s_blocksize; + bfree = inode->i_sb->s_blocksize; } - while (isize_diff > 0) { - size_t offs, size, entry_size; - struct ext4_xattr_entry *small_entry = NULL; - struct ext4_xattr_info i = { - .value = NULL, - .value_len = 0, - }; - unsigned int total_size; /* EA entry size + value size */ - unsigned int shift_bytes; /* No. of bytes to shift EAs by? */ - unsigned int min_total_size = ~0U; - - is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); - bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); - if (!is || !bs) { - error = -ENOMEM; - goto cleanup; - } - - is->s.not_found = -ENODATA; - bs->s.not_found = -ENODATA; - is->iloc.bh = NULL; - bs->bh = NULL; - - last = IFIRST(header); - /* Find the entry best suited to be pushed into EA block */ - entry = NULL; - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { - total_size = - EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + - EXT4_XATTR_LEN(last->e_name_len); - if (total_size <= free && total_size < min_total_size) { - if (total_size < isize_diff) { - small_entry = last; - } else { - entry = last; - min_total_size = total_size; - } - } - } - - if (entry == NULL) { - if (small_entry) { - entry = small_entry; - } else { - if (!tried_min_extra_isize && - s_min_extra_isize) { - tried_min_extra_isize++; - new_extra_isize = s_min_extra_isize; - kfree(is); is = NULL; - kfree(bs); bs = NULL; - brelse(bh); - goto retry; - } - error = -1; - goto cleanup; - } - } - offs = le16_to_cpu(entry->e_value_offs); - size = le32_to_cpu(entry->e_value_size); - entry_size = EXT4_XATTR_LEN(entry->e_name_len); - i.name_index = entry->e_name_index, - buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS); - b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); - if (!buffer || !b_entry_name) { - error = -ENOMEM; - goto cleanup; + error = ext4_xattr_make_inode_space(handle, inode, raw_inode, + isize_diff, ifree, bfree, + &total_ino); + if (error) { + if (error == -ENOSPC && !tried_min_extra_isize && + s_min_extra_isize) { + tried_min_extra_isize++; + new_extra_isize = s_min_extra_isize; + brelse(bh); + goto retry; } - /* Save the entry name and the entry value */ - memcpy(buffer, (void *)IFIRST(header) + offs, - EXT4_XATTR_SIZE(size)); - memcpy(b_entry_name, entry->e_name, entry->e_name_len); - b_entry_name[entry->e_name_len] = '\0'; - i.name = b_entry_name; - - error = ext4_get_inode_loc(inode, &is->iloc); - if (error) - goto cleanup; - - error = ext4_xattr_ibody_find(inode, &i, is); - if (error) - goto cleanup; - - /* Remove the chosen entry from the inode */ - error = ext4_xattr_ibody_set(handle, inode, &i, is); - if (error) - goto cleanup; - total_ino -= entry_size; - - entry = IFIRST(header); - if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) - shift_bytes = isize_diff; - else - shift_bytes = entry_size + EXT4_XATTR_SIZE(size); - /* Adjust the offsets and shift the remaining entries ahead */ - ext4_xattr_shift_entries(entry, -shift_bytes, - (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + - EXT4_I(inode)->i_extra_isize + shift_bytes, - (void *)header, total_ino, inode->i_sb->s_blocksize); - - isize_diff -= shift_bytes; - EXT4_I(inode)->i_extra_isize += shift_bytes; - header = IHDR(inode, raw_inode); - - i.name = b_entry_name; - i.value = buffer; - i.value_len = size; - error = ext4_xattr_block_find(inode, &i, bs); - if (error) - goto cleanup; - - /* Add entry which was removed from the inode into the block */ - error = ext4_xattr_block_set(handle, inode, &i, bs); - if (error) - goto cleanup; - kfree(b_entry_name); - kfree(buffer); - b_entry_name = NULL; - buffer = NULL; - brelse(is->iloc.bh); - kfree(is); - kfree(bs); + goto cleanup; } +shift: + /* Adjust the offsets and shift the remaining entries ahead */ + ext4_xattr_shift_entries(IFIRST(header), EXT4_I(inode)->i_extra_isize + - new_extra_isize, (void *)raw_inode + + EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, + (void *)header, total_ino); + EXT4_I(inode)->i_extra_isize = new_extra_isize; brelse(bh); out: ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); @@ -1560,12 +1582,6 @@ out: return 0; cleanup: - kfree(b_entry_name); - kfree(buffer); - if (is) - brelse(is->iloc.bh); - kfree(is); - kfree(bs); brelse(bh); /* * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode @@ -1734,7 +1750,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header, *name++; } - if (entry->e_value_block == 0 && entry->e_value_size != 0) { + if (entry->e_value_size != 0) { __le32 *value = (__le32 *)((char *)header + le16_to_cpu(entry->e_value_offs)); for (n = (le32_to_cpu(entry->e_value_size) + diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index cbf85f65ba63..12b5836a1033 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -813,12 +813,12 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; - int ret; + int err; - ret = fscrypt_fname_disk_to_usr(d->inode, + err = fscrypt_fname_disk_to_usr(d->inode, (u32)de->hash_code, 0, &de_name, fstr); - if (ret < 0) + if (err) return true; de_name = *fstr; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 300aef8a2d5f..5625b879c98a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -454,7 +454,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, ostr.name = sd->encrypted_path; ostr.len = disk_link.len; err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); - if (err < 0) + if (err) goto err_out; sd->len = cpu_to_le16(ostr.len); @@ -1051,7 +1051,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, goto errout; res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (res < 0) + if (res) goto errout; /* this is broken symlink case */ @@ -1063,7 +1063,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, paddr = pstr.name; /* Null-terminate the name */ - paddr[res] = '\0'; + paddr[pstr.len] = '\0'; put_page(cpage); set_delayed_call(done, kfree_link, paddr); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c41bde26c338..70ea57c7b6bb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) struct pipe_buffer *buf = cs->pipebufs; if (!cs->write) { - err = buf->ops->confirm(cs->pipe, buf); + err = pipe_buf_confirm(cs->pipe, buf); if (err) return err; @@ -827,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) fuse_copy_finish(cs); - err = buf->ops->confirm(cs->pipe, buf); + err = pipe_buf_confirm(cs->pipe, buf); if (err) return err; @@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (cs->len != PAGE_SIZE) goto out_fallback; - if (buf->ops->steal(cs->pipe, buf) != 0) + if (pipe_buf_steal(cs->pipe, buf) != 0) goto out_fallback; newpage = buf->page; @@ -1341,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - int ret; + int total, ret; int page_nr = 0; - int do_wakeup = 0; struct pipe_buffer *bufs; struct fuse_copy_state cs; struct fuse_dev *fud = fuse_get_dev(in); @@ -1362,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (ret < 0) goto out; - ret = 0; - pipe_lock(pipe); - - if (!pipe->readers) { - send_sig(SIGPIPE, current, 0); - if (!ret) - ret = -EPIPE; - goto out_unlock; - } - if (pipe->nrbufs + cs.nr_segs > pipe->buffers) { ret = -EIO; - goto out_unlock; + goto out; } - while (page_nr < cs.nr_segs) { - int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); - struct pipe_buffer *buf = pipe->bufs + newbuf; - - buf->page = bufs[page_nr].page; - buf->offset = bufs[page_nr].offset; - buf->len = bufs[page_nr].len; + for (ret = total = 0; page_nr < cs.nr_segs; total += ret) { /* * Need to be careful about this. Having buf->ops in module * code can Oops if the buffer persists after module unload. */ - buf->ops = &nosteal_pipe_buf_ops; - - pipe->nrbufs++; - page_nr++; - ret += buf->len; - - if (pipe->files) - do_wakeup = 1; - } - -out_unlock: - pipe_unlock(pipe); - - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + bufs[page_nr].ops = &nosteal_pipe_buf_ops; + ret = add_to_pipe(pipe, &bufs[page_nr++]); + if (unlikely(ret < 0)) + break; } - + if (total) + ret = total; out: for (; page_nr < cs.nr_segs; page_nr++) put_page(bufs[page_nr].page); @@ -1992,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); pipe->nrbufs--; } else { - ibuf->ops->get(pipe, ibuf); + pipe_buf_get(pipe, ibuf); *obuf = *ibuf; obuf->flags &= ~PIPE_BUF_FLAG_GIFT; obuf->len = rem; @@ -2014,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ret = fuse_dev_do_write(fud, &cs, len); - for (idx = 0; idx < nbuf; idx++) { - struct pipe_buffer *buf = &bufs[idx]; - buf->ops->release(pipe, buf); - } + for (idx = 0; idx < nbuf; idx++) + pipe_buf_release(pipe, &bufs[idx]); + out: kfree(bufs); return ret; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 360188f162bd..e23ff70b3435 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -954,30 +954,6 @@ out_uninit: return ret; } -static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) -{ - struct inode *inode = in->f_mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int ret; - - inode_lock(inode); - - ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - if (ret) { - inode_unlock(inode); - return ret; - } - - gfs2_glock_dq_uninit(&gh); - inode_unlock(inode); - - return generic_file_splice_read(in, ppos, pipe, len, flags); -} - - static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) @@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = { .fsync = gfs2_fsync, .lock = gfs2_lock, .flock = gfs2_flock, - .splice_read = gfs2_file_splice_read, + .splice_read = generic_file_splice_read, .splice_write = gfs2_file_splice_write, .setlease = simple_nosetlease, .fallocate = gfs2_fallocate, @@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = { .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, - .splice_read = gfs2_file_splice_read, + .splice_read = generic_file_splice_read, .splice_write = gfs2_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 46261a6f902d..927da4956a89 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1090,11 +1090,15 @@ static void jbd2_stats_proc_exit(journal_t *journal) * very few fields yet: that has to wait until we have created the * journal structures from from scratch, or loaded them from disk. */ -static journal_t * journal_init_common (void) +static journal_t *journal_init_common(struct block_device *bdev, + struct block_device *fs_dev, + unsigned long long start, int len, int blocksize) { static struct lock_class_key jbd2_trans_commit_key; journal_t *journal; int err; + struct buffer_head *bh; + int n; journal = kzalloc(sizeof(*journal), GFP_KERNEL); if (!journal) @@ -1131,6 +1135,32 @@ static journal_t * journal_init_common (void) lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle", &jbd2_trans_commit_key, 0); + /* journal descriptor can store up to n blocks -bzzz */ + journal->j_blocksize = blocksize; + journal->j_dev = bdev; + journal->j_fs_dev = fs_dev; + journal->j_blk_offset = start; + journal->j_maxlen = len; + n = journal->j_blocksize / sizeof(journal_block_tag_t); + journal->j_wbufsize = n; + journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *), + GFP_KERNEL); + if (!journal->j_wbuf) { + kfree(journal); + return NULL; + } + + bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize); + if (!bh) { + pr_err("%s: Cannot get buffer for journal superblock\n", + __func__); + kfree(journal->j_wbuf); + kfree(journal); + return NULL; + } + journal->j_sb_buffer = bh; + journal->j_superblock = (journal_superblock_t *)bh->b_data; + return journal; } @@ -1157,51 +1187,21 @@ static journal_t * journal_init_common (void) * range of blocks on an arbitrary block device. * */ -journal_t * jbd2_journal_init_dev(struct block_device *bdev, +journal_t *jbd2_journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, unsigned long long start, int len, int blocksize) { - journal_t *journal = journal_init_common(); - struct buffer_head *bh; - int n; + journal_t *journal; + journal = journal_init_common(bdev, fs_dev, start, len, blocksize); if (!journal) return NULL; - /* journal descriptor can store up to n blocks -bzzz */ - journal->j_blocksize = blocksize; - journal->j_dev = bdev; - journal->j_fs_dev = fs_dev; - journal->j_blk_offset = start; - journal->j_maxlen = len; bdevname(journal->j_dev, journal->j_devname); strreplace(journal->j_devname, '/', '!'); jbd2_stats_proc_init(journal); - n = journal->j_blocksize / sizeof(journal_block_tag_t); - journal->j_wbufsize = n; - journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); - if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", - __func__); - goto out_err; - } - - bh = __getblk(journal->j_dev, start, journal->j_blocksize); - if (!bh) { - printk(KERN_ERR - "%s: Cannot get buffer for journal superblock\n", - __func__); - goto out_err; - } - journal->j_sb_buffer = bh; - journal->j_superblock = (journal_superblock_t *)bh->b_data; return journal; -out_err: - kfree(journal->j_wbuf); - jbd2_stats_proc_exit(journal); - kfree(journal); - return NULL; } /** @@ -1212,67 +1212,36 @@ out_err: * the journal. The inode must exist already, must support bmap() and * must have all data blocks preallocated. */ -journal_t * jbd2_journal_init_inode (struct inode *inode) +journal_t *jbd2_journal_init_inode(struct inode *inode) { - struct buffer_head *bh; - journal_t *journal = journal_init_common(); + journal_t *journal; char *p; - int err; - int n; unsigned long long blocknr; + blocknr = bmap(inode, 0); + if (!blocknr) { + pr_err("%s: Cannot locate journal superblock\n", + __func__); + return NULL; + } + + jbd_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n", + inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size, + inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); + + journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev, + blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits, + inode->i_sb->s_blocksize); if (!journal) return NULL; - journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; journal->j_inode = inode; bdevname(journal->j_dev, journal->j_devname); p = strreplace(journal->j_devname, '/', '!'); sprintf(p, "-%lu", journal->j_inode->i_ino); - jbd_debug(1, - "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", - journal, inode->i_sb->s_id, inode->i_ino, - (long long) inode->i_size, - inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); - - journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; - journal->j_blocksize = inode->i_sb->s_blocksize; jbd2_stats_proc_init(journal); - /* journal descriptor can store up to n blocks -bzzz */ - n = journal->j_blocksize / sizeof(journal_block_tag_t); - journal->j_wbufsize = n; - journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); - if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", - __func__); - goto out_err; - } - - err = jbd2_journal_bmap(journal, 0, &blocknr); - /* If that failed, give up */ - if (err) { - printk(KERN_ERR "%s: Cannot locate journal superblock\n", - __func__); - goto out_err; - } - - bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); - if (!bh) { - printk(KERN_ERR - "%s: Cannot get buffer for journal superblock\n", - __func__); - goto out_err; - } - journal->j_sb_buffer = bh; - journal->j_superblock = (journal_superblock_t *)bh->b_data; - return journal; -out_err: - kfree(journal->j_wbuf); - jbd2_stats_proc_exit(journal); - kfree(journal); - return NULL; } /* diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index b5bc3e249163..3d8246a9faa4 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -159,6 +159,7 @@ static void wait_transaction_locked(journal_t *journal) read_unlock(&journal->j_state_lock); if (need_to_start) jbd2_log_start_commit(journal, tid); + jbd2_might_wait_for_commit(journal); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); } @@ -182,8 +183,6 @@ static int add_transaction_credits(journal_t *journal, int blocks, int needed; int total = blocks + rsv_blocks; - jbd2_might_wait_for_commit(journal); - /* * If the current transaction is locked down for commit, wait * for the lock to be released. @@ -214,6 +213,7 @@ static int add_transaction_credits(journal_t *journal, int blocks, if (atomic_read(&journal->j_reserved_credits) + total > journal->j_max_transaction_buffers) { read_unlock(&journal->j_state_lock); + jbd2_might_wait_for_commit(journal); wait_event(journal->j_wait_reserved, atomic_read(&journal->j_reserved_credits) + total <= journal->j_max_transaction_buffers); @@ -238,6 +238,7 @@ static int add_transaction_credits(journal_t *journal, int blocks, if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { atomic_sub(total, &t->t_outstanding_credits); read_unlock(&journal->j_state_lock); + jbd2_might_wait_for_commit(journal); write_lock(&journal->j_state_lock); if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) __jbd2_log_wait_for_space(journal); @@ -255,6 +256,7 @@ static int add_transaction_credits(journal_t *journal, int blocks, sub_reserved_credits(journal, rsv_blocks); atomic_sub(total, &t->t_outstanding_credits); read_unlock(&journal->j_state_lock); + jbd2_might_wait_for_commit(journal); wait_event(journal->j_wait_reserved, atomic_read(&journal->j_reserved_credits) + rsv_blocks <= journal->j_max_transaction_buffers / 2); diff --git a/fs/mbcache.c b/fs/mbcache.c index eccda3a02de6..c5bd19ffa326 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -366,7 +366,11 @@ struct mb_cache *mb_cache_create(int bucket_bits) cache->c_shrink.count_objects = mb_cache_count; cache->c_shrink.scan_objects = mb_cache_scan; cache->c_shrink.seeks = DEFAULT_SEEKS; - register_shrinker(&cache->c_shrink); + if (register_shrinker(&cache->c_shrink)) { + kfree(cache->c_hash); + kfree(cache); + goto err_out; + } INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ca699ddc11c1..2efbdde36c3e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) } EXPORT_SYMBOL_GPL(nfs_file_read); -ssize_t -nfs_file_splice_read(struct file *filp, loff_t *ppos, - struct pipe_inode_info *pipe, size_t count, - unsigned int flags) -{ - struct inode *inode = file_inode(filp); - ssize_t res; - - dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", - filp, (unsigned long) count, (unsigned long long) *ppos); - - nfs_start_io_read(inode); - res = nfs_revalidate_mapping(inode, filp->f_mapping); - if (!res) { - res = generic_file_splice_read(filp, ppos, pipe, count, flags); - if (res > 0) - nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res); - } - nfs_end_io_read(inode); - return res; -} -EXPORT_SYMBOL_GPL(nfs_file_splice_read); - int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { @@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = { .fsync = nfs_file_fsync, .lock = nfs_lock, .flock = nfs_flock, - .splice_read = nfs_file_splice_read, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, .setlease = simple_nosetlease, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index da9e5584bfdc..4b308a1487a5 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *) int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); loff_t nfs_file_llseek(struct file *, loff_t, int); ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); -ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, - size_t, unsigned int); int nfs_file_mmap(struct file *, struct vm_area_struct *); ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); int nfs_file_release(struct inode *, struct file *); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index d085ad794884..89a77950e0b0 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = { .fsync = nfs_file_fsync, .lock = nfs_lock, .flock = nfs_flock, - .splice_read = nfs_file_splice_read, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, .setlease = simple_nosetlease, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0b055bfb8e86..8f91639f8364 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2321,36 +2321,6 @@ out_mutex: return ret; } -static ssize_t ocfs2_file_splice_read(struct file *in, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, - unsigned int flags) -{ - int ret = 0, lock_level = 0; - struct inode *inode = file_inode(in); - - trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry, - (unsigned long long)OCFS2_I(inode)->ip_blkno, - in->f_path.dentry->d_name.len, - in->f_path.dentry->d_name.name, len); - - /* - * See the comment in ocfs2_file_read_iter() - */ - ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level); - if (ret < 0) { - mlog_errno(ret); - goto bail; - } - ocfs2_inode_unlock(inode, lock_level); - - ret = generic_file_splice_read(in, ppos, pipe, len, flags); - -bail: - return ret; -} - static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { @@ -2509,7 +2479,7 @@ const struct file_operations ocfs2_fops = { #endif .lock = ocfs2_lock, .flock = ocfs2_flock, - .splice_read = ocfs2_file_splice_read, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, }; @@ -2554,7 +2524,7 @@ const struct file_operations ocfs2_fops_no_plocks = { .compat_ioctl = ocfs2_compat_ioctl, #endif .flock = ocfs2_flock, - .splice_read = ocfs2_file_splice_read, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, }; diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index f8f5fc5e6c05..0b58abcf1c6d 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write); DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write); -DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read); - DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); diff --git a/fs/pipe.c b/fs/pipe.c index 4ebe6b2e5217..4fc422f0dea8 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) if (bufs) { int curbuf = pipe->curbuf; struct pipe_buffer *buf = pipe->bufs + curbuf; - const struct pipe_buf_operations *ops = buf->ops; size_t chars = buf->len; size_t written; int error; @@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) if (chars > total_len) chars = total_len; - error = ops->confirm(pipe, buf); + error = pipe_buf_confirm(pipe, buf); if (error) { if (!ret) ret = error; @@ -299,8 +298,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) } if (!buf->len) { - buf->ops = NULL; - ops->release(pipe, buf); + pipe_buf_release(pipe, buf); curbuf = (curbuf + 1) & (pipe->buffers - 1); pipe->curbuf = curbuf; pipe->nrbufs = --bufs; @@ -383,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & (pipe->buffers - 1); struct pipe_buffer *buf = pipe->bufs + lastbuf; - const struct pipe_buf_operations *ops = buf->ops; int offset = buf->offset + buf->len; - if (ops->can_merge && offset + chars <= PAGE_SIZE) { - ret = ops->confirm(pipe, buf); + if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) { + ret = pipe_buf_confirm(pipe, buf); if (ret) goto out; @@ -664,7 +661,7 @@ void free_pipe_info(struct pipe_inode_info *pipe) for (i = 0; i < pipe->buffers; i++) { struct pipe_buffer *buf = pipe->bufs + i; if (buf->ops) - buf->ops->release(pipe, buf); + pipe_buf_release(pipe, buf); } if (pipe->tmp_page) __free_page(pipe->tmp_page); diff --git a/fs/splice.c b/fs/splice.c index dd9bf7e410d2..aa38901a4f10 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -183,82 +183,39 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { unsigned int spd_pages = spd->nr_pages; - int ret, do_wakeup, page_nr; + int ret = 0, page_nr = 0; if (!spd_pages) return 0; - ret = 0; - do_wakeup = 0; - page_nr = 0; - - pipe_lock(pipe); - - for (;;) { - if (!pipe->readers) { - send_sig(SIGPIPE, current, 0); - if (!ret) - ret = -EPIPE; - break; - } - - if (pipe->nrbufs < pipe->buffers) { - int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); - struct pipe_buffer *buf = pipe->bufs + newbuf; - - buf->page = spd->pages[page_nr]; - buf->offset = spd->partial[page_nr].offset; - buf->len = spd->partial[page_nr].len; - buf->private = spd->partial[page_nr].private; - buf->ops = spd->ops; - if (spd->flags & SPLICE_F_GIFT) - buf->flags |= PIPE_BUF_FLAG_GIFT; - - pipe->nrbufs++; - page_nr++; - ret += buf->len; - - if (pipe->files) - do_wakeup = 1; + if (unlikely(!pipe->readers)) { + send_sig(SIGPIPE, current, 0); + ret = -EPIPE; + goto out; + } - if (!--spd->nr_pages) - break; - if (pipe->nrbufs < pipe->buffers) - continue; + while (pipe->nrbufs < pipe->buffers) { + int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); + struct pipe_buffer *buf = pipe->bufs + newbuf; - break; - } + buf->page = spd->pages[page_nr]; + buf->offset = spd->partial[page_nr].offset; + buf->len = spd->partial[page_nr].len; + buf->private = spd->partial[page_nr].private; + buf->ops = spd->ops; - if (spd->flags & SPLICE_F_NONBLOCK) { - if (!ret) - ret = -EAGAIN; - break; - } + pipe->nrbufs++; + page_nr++; + ret += buf->len; - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; + if (!--spd->nr_pages) break; - } - - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible_sync(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - do_wakeup = 0; - } - - pipe->waiting_writers++; - pipe_wait(pipe); - pipe->waiting_writers--; } - pipe_unlock(pipe); - - if (do_wakeup) - wakeup_pipe_readers(pipe); + if (!ret) + ret = -EAGAIN; +out: while (page_nr < spd_pages) spd->spd_release(spd, page_nr++); @@ -266,6 +223,26 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, } EXPORT_SYMBOL_GPL(splice_to_pipe); +ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) +{ + int ret; + + if (unlikely(!pipe->readers)) { + send_sig(SIGPIPE, current, 0); + ret = -EPIPE; + } else if (pipe->nrbufs == pipe->buffers) { + ret = -EAGAIN; + } else { + int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); + pipe->bufs[newbuf] = *buf; + pipe->nrbufs++; + return buf->len; + } + pipe_buf_release(pipe, buf); + return ret; +} +EXPORT_SYMBOL(add_to_pipe); + void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) { put_page(spd->pages[i]); @@ -303,207 +280,6 @@ void splice_shrink_spd(struct splice_pipe_desc *spd) kfree(spd->partial); } -static int -__generic_file_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) -{ - struct address_space *mapping = in->f_mapping; - unsigned int loff, nr_pages, req_pages; - struct page *pages[PIPE_DEF_BUFFERS]; - struct partial_page partial[PIPE_DEF_BUFFERS]; - struct page *page; - pgoff_t index, end_index; - loff_t isize; - int error, page_nr; - struct splice_pipe_desc spd = { - .pages = pages, - .partial = partial, - .nr_pages_max = PIPE_DEF_BUFFERS, - .flags = flags, - .ops = &page_cache_pipe_buf_ops, - .spd_release = spd_release_page, - }; - - if (splice_grow_spd(pipe, &spd)) - return -ENOMEM; - - index = *ppos >> PAGE_SHIFT; - loff = *ppos & ~PAGE_MASK; - req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT; - nr_pages = min(req_pages, spd.nr_pages_max); - - /* - * Lookup the (hopefully) full range of pages we need. - */ - spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); - index += spd.nr_pages; - - /* - * If find_get_pages_contig() returned fewer pages than we needed, - * readahead/allocate the rest and fill in the holes. - */ - if (spd.nr_pages < nr_pages) - page_cache_sync_readahead(mapping, &in->f_ra, in, - index, req_pages - spd.nr_pages); - - error = 0; - while (spd.nr_pages < nr_pages) { - /* - * Page could be there, find_get_pages_contig() breaks on - * the first hole. - */ - page = find_get_page(mapping, index); - if (!page) { - /* - * page didn't exist, allocate one. - */ - page = page_cache_alloc_cold(mapping); - if (!page) - break; - - error = add_to_page_cache_lru(page, mapping, index, - mapping_gfp_constraint(mapping, GFP_KERNEL)); - if (unlikely(error)) { - put_page(page); - if (error == -EEXIST) - continue; - break; - } - /* - * add_to_page_cache() locks the page, unlock it - * to avoid convoluting the logic below even more. - */ - unlock_page(page); - } - - spd.pages[spd.nr_pages++] = page; - index++; - } - - /* - * Now loop over the map and see if we need to start IO on any - * pages, fill in the partial map, etc. - */ - index = *ppos >> PAGE_SHIFT; - nr_pages = spd.nr_pages; - spd.nr_pages = 0; - for (page_nr = 0; page_nr < nr_pages; page_nr++) { - unsigned int this_len; - - if (!len) - break; - - /* - * this_len is the max we'll use from this page - */ - this_len = min_t(unsigned long, len, PAGE_SIZE - loff); - page = spd.pages[page_nr]; - - if (PageReadahead(page)) - page_cache_async_readahead(mapping, &in->f_ra, in, - page, index, req_pages - page_nr); - - /* - * If the page isn't uptodate, we may need to start io on it - */ - if (!PageUptodate(page)) { - lock_page(page); - - /* - * Page was truncated, or invalidated by the - * filesystem. Redo the find/create, but this time the - * page is kept locked, so there's no chance of another - * race with truncate/invalidate. - */ - if (!page->mapping) { - unlock_page(page); -retry_lookup: - page = find_or_create_page(mapping, index, - mapping_gfp_mask(mapping)); - - if (!page) { - error = -ENOMEM; - break; - } - put_page(spd.pages[page_nr]); - spd.pages[page_nr] = page; - } - /* - * page was already under io and is now done, great - */ - if (PageUptodate(page)) { - unlock_page(page); - goto fill_it; - } - - /* - * need to read in the page - */ - error = mapping->a_ops->readpage(in, page); - if (unlikely(error)) { - /* - * Re-lookup the page - */ - if (error == AOP_TRUNCATED_PAGE) - goto retry_lookup; - - break; - } - } -fill_it: - /* - * i_size must be checked after PageUptodate. - */ - isize = i_size_read(mapping->host); - end_index = (isize - 1) >> PAGE_SHIFT; - if (unlikely(!isize || index > end_index)) - break; - - /* - * if this is the last page, see if we need to shrink - * the length and stop - */ - if (end_index == index) { - unsigned int plen; - - /* - * max good bytes in this page - */ - plen = ((isize - 1) & ~PAGE_MASK) + 1; - if (plen <= loff) - break; - - /* - * force quit after adding this page - */ - this_len = min(this_len, plen - loff); - len = this_len; - } - - spd.partial[page_nr].offset = loff; - spd.partial[page_nr].len = this_len; - len -= this_len; - loff = 0; - spd.nr_pages++; - index++; - } - - /* - * Release any pages at the end, if we quit early. 'page_nr' is how far - * we got, 'nr_pages' is how many pages are in the map. - */ - while (page_nr < nr_pages) - put_page(spd.pages[page_nr++]); - in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT; - - if (spd.nr_pages) - error = splice_to_pipe(pipe, &spd); - - splice_shrink_spd(&spd); - return error; -} - /** * generic_file_splice_read - splice data from file to a pipe * @in: file to splice from @@ -514,39 +290,53 @@ fill_it: * * Description: * Will read pages from given file and fill them into a pipe. Can be - * used as long as the address_space operations for the source implements - * a readpage() hook. + * used as long as it has more or less sane ->read_iter(). * */ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - loff_t isize, left; - int ret; - - if (IS_DAX(in->f_mapping->host)) - return default_file_splice_read(in, ppos, pipe, len, flags); + struct iov_iter to; + struct kiocb kiocb; + loff_t isize; + int idx, ret; isize = i_size_read(in->f_mapping->host); if (unlikely(*ppos >= isize)) return 0; - left = isize - *ppos; - if (unlikely(left < len)) - len = left; - - ret = __generic_file_splice_read(in, ppos, pipe, len, flags); + iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len); + idx = to.idx; + init_sync_kiocb(&kiocb, in); + kiocb.ki_pos = *ppos; + ret = in->f_op->read_iter(&kiocb, &to); if (ret > 0) { - *ppos += ret; + *ppos = kiocb.ki_pos; file_accessed(in); + } else if (ret < 0) { + if (WARN_ON(to.idx != idx || to.iov_offset)) { + /* + * a bogus ->read_iter() has copied something and still + * returned an error instead of a short read. + */ + to.idx = idx; + to.iov_offset = 0; + iov_iter_advance(&to, 0); /* to free what was emitted */ + } + /* + * callers of ->splice_read() expect -EAGAIN on + * "can't put anything in there", rather than -EFAULT. + */ + if (ret == -EFAULT) + ret = -EAGAIN; } return ret; } EXPORT_SYMBOL(generic_file_splice_read); -static const struct pipe_buf_operations default_pipe_buf_ops = { +const struct pipe_buf_operations default_pipe_buf_ops = { .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, @@ -570,7 +360,7 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = { }; EXPORT_SYMBOL(nosteal_pipe_buf_ops); -static ssize_t kernel_readv(struct file *file, const struct iovec *vec, +static ssize_t kernel_readv(struct file *file, const struct kvec *vec, unsigned long vlen, loff_t offset) { mm_segment_t old_fs; @@ -602,102 +392,70 @@ ssize_t kernel_write(struct file *file, const char *buf, size_t count, } EXPORT_SYMBOL(kernel_write); -ssize_t default_file_splice_read(struct file *in, loff_t *ppos, +static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { + struct kvec *vec, __vec[PIPE_DEF_BUFFERS]; + struct iov_iter to; + struct page **pages; unsigned int nr_pages; - unsigned int nr_freed; - size_t offset; - struct page *pages[PIPE_DEF_BUFFERS]; - struct partial_page partial[PIPE_DEF_BUFFERS]; - struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; + size_t offset, dummy, copied = 0; ssize_t res; - size_t this_len; - int error; int i; - struct splice_pipe_desc spd = { - .pages = pages, - .partial = partial, - .nr_pages_max = PIPE_DEF_BUFFERS, - .flags = flags, - .ops = &default_pipe_buf_ops, - .spd_release = spd_release_page, - }; - if (splice_grow_spd(pipe, &spd)) + if (pipe->nrbufs == pipe->buffers) + return -EAGAIN; + + /* + * Try to keep page boundaries matching to source pagecache ones - + * it probably won't be much help, but... + */ + offset = *ppos & ~PAGE_MASK; + + iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset); + + res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy); + if (res <= 0) return -ENOMEM; - res = -ENOMEM; + nr_pages = res / PAGE_SIZE; + vec = __vec; - if (spd.nr_pages_max > PIPE_DEF_BUFFERS) { - vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL); - if (!vec) - goto shrink_ret; + if (nr_pages > PIPE_DEF_BUFFERS) { + vec = kmalloc(nr_pages * sizeof(struct kvec), GFP_KERNEL); + if (unlikely(!vec)) { + res = -ENOMEM; + goto out; + } } - offset = *ppos & ~PAGE_MASK; - nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - - for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) { - struct page *page; + pipe->bufs[to.idx].offset = offset; + pipe->bufs[to.idx].len -= offset; - page = alloc_page(GFP_USER); - error = -ENOMEM; - if (!page) - goto err; - - this_len = min_t(size_t, len, PAGE_SIZE - offset); - vec[i].iov_base = (void __user *) page_address(page); + for (i = 0; i < nr_pages; i++) { + size_t this_len = min_t(size_t, len, PAGE_SIZE - offset); + vec[i].iov_base = page_address(pages[i]) + offset; vec[i].iov_len = this_len; - spd.pages[i] = page; - spd.nr_pages++; len -= this_len; offset = 0; } - res = kernel_readv(in, vec, spd.nr_pages, *ppos); - if (res < 0) { - error = res; - goto err; - } - - error = 0; - if (!res) - goto err; - - nr_freed = 0; - for (i = 0; i < spd.nr_pages; i++) { - this_len = min_t(size_t, vec[i].iov_len, res); - spd.partial[i].offset = 0; - spd.partial[i].len = this_len; - if (!this_len) { - __free_page(spd.pages[i]); - spd.pages[i] = NULL; - nr_freed++; - } - res -= this_len; - } - spd.nr_pages -= nr_freed; - - res = splice_to_pipe(pipe, &spd); - if (res > 0) + res = kernel_readv(in, vec, nr_pages, *ppos); + if (res > 0) { + copied = res; *ppos += res; + } -shrink_ret: if (vec != __vec) kfree(vec); - splice_shrink_spd(&spd); +out: + for (i = 0; i < nr_pages; i++) + put_page(pages[i]); + kvfree(pages); + iov_iter_advance(&to, copied); /* truncates and discards */ return res; - -err: - for (i = 0; i < spd.nr_pages; i++) - __free_page(spd.pages[i]); - - res = error; - goto shrink_ret; } -EXPORT_SYMBOL(default_file_splice_read); /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' @@ -757,13 +515,12 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des while (pipe->nrbufs) { struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; - const struct pipe_buf_operations *ops = buf->ops; sd->len = buf->len; if (sd->len > sd->total_len) sd->len = sd->total_len; - ret = buf->ops->confirm(pipe, buf); + ret = pipe_buf_confirm(pipe, buf); if (unlikely(ret)) { if (ret == -ENODATA) ret = 0; @@ -783,8 +540,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des sd->total_len -= ret; if (!buf->len) { - buf->ops = NULL; - ops->release(pipe, buf); + pipe_buf_release(pipe, buf); pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); pipe->nrbufs--; if (pipe->files) @@ -1003,7 +759,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, if (idx == pipe->buffers - 1) idx = -1; - ret = buf->ops->confirm(pipe, buf); + ret = pipe_buf_confirm(pipe, buf); if (unlikely(ret)) { if (ret == -ENODATA) ret = 0; @@ -1030,11 +786,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, while (ret) { struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; if (ret >= buf->len) { - const struct pipe_buf_operations *ops = buf->ops; ret -= buf->len; buf->len = 0; - buf->ops = NULL; - ops->release(pipe, buf); + pipe_buf_release(pipe, buf); pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); pipe->nrbufs--; if (pipe->files) @@ -1273,10 +1027,8 @@ out_release: for (i = 0; i < pipe->buffers; i++) { struct pipe_buffer *buf = pipe->bufs + i; - if (buf->ops) { - buf->ops->release(pipe, buf); - buf->ops = NULL; - } + if (buf->ops) + pipe_buf_release(pipe, buf); } if (!bytes) @@ -1342,6 +1094,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, } EXPORT_SYMBOL(do_splice_direct); +static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) +{ + while (pipe->nrbufs == pipe->buffers) { + if (flags & SPLICE_F_NONBLOCK) + return -EAGAIN; + if (signal_pending(current)) + return -ERESTARTSYS; + pipe->waiting_writers++; + pipe_wait(pipe); + pipe->waiting_writers--; + } + return 0; +} + static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags); @@ -1424,8 +1190,13 @@ static long do_splice(struct file *in, loff_t __user *off_in, offset = in->f_pos; } - ret = do_splice_to(in, &offset, opipe, len, flags); - + pipe_lock(opipe); + ret = wait_for_space(opipe, flags); + if (!ret) + ret = do_splice_to(in, &offset, opipe, len, flags); + pipe_unlock(opipe); + if (ret > 0) + wakeup_pipe_readers(opipe); if (!off_in) in->f_pos = offset; else if (copy_to_user(off_in, &offset, sizeof(loff_t))) @@ -1437,106 +1208,50 @@ static long do_splice(struct file *in, loff_t __user *off_in, return -EINVAL; } -/* - * Map an iov into an array of pages and offset/length tupples. With the - * partial_page structure, we can map several non-contiguous ranges into - * our ones pages[] map instead of splitting that operation into pieces. - * Could easily be exported as a generic helper for other users, in which - * case one would probably want to add a 'max_nr_pages' parameter as well. - */ -static int get_iovec_page_array(const struct iovec __user *iov, - unsigned int nr_vecs, struct page **pages, - struct partial_page *partial, bool aligned, - unsigned int pipe_buffers) +static int iter_to_pipe(struct iov_iter *from, + struct pipe_inode_info *pipe, + unsigned flags) { - int buffers = 0, error = 0; - - while (nr_vecs) { - unsigned long off, npages; - struct iovec entry; - void __user *base; - size_t len; - int i; - - error = -EFAULT; - if (copy_from_user(&entry, iov, sizeof(entry))) - break; - - base = entry.iov_base; - len = entry.iov_len; - - /* - * Sanity check this iovec. 0 read succeeds. - */ - error = 0; - if (unlikely(!len)) - break; - error = -EFAULT; - if (!access_ok(VERIFY_READ, base, len)) - break; - - /* - * Get this base offset and number of pages, then map - * in the user pages. - */ - off = (unsigned long) base & ~PAGE_MASK; - - /* - * If asked for alignment, the offset must be zero and the - * length a multiple of the PAGE_SIZE. - */ - error = -EINVAL; - if (aligned && (off || len & ~PAGE_MASK)) - break; - - npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (npages > pipe_buffers - buffers) - npages = pipe_buffers - buffers; - - error = get_user_pages_fast((unsigned long)base, npages, - 0, &pages[buffers]); - - if (unlikely(error <= 0)) + struct pipe_buffer buf = { + .ops = &user_page_pipe_buf_ops, + .flags = flags + }; + size_t total = 0; + int ret = 0; + bool failed = false; + + while (iov_iter_count(from) && !failed) { + struct page *pages[16]; + ssize_t copied; + size_t start; + int n; + + copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start); + if (copied <= 0) { + ret = copied; break; - - /* - * Fill this contiguous range into the partial page map. - */ - for (i = 0; i < error; i++) { - const int plen = min_t(size_t, len, PAGE_SIZE - off); - - partial[buffers].offset = off; - partial[buffers].len = plen; - - off = 0; - len -= plen; - buffers++; } - /* - * We didn't complete this iov, stop here since it probably - * means we have to move some of this into a pipe to - * be able to continue. - */ - if (len) - break; - - /* - * Don't continue if we mapped fewer pages than we asked for, - * or if we mapped the max number of pages that we have - * room for. - */ - if (error < npages || buffers == pipe_buffers) - break; - - nr_vecs--; - iov++; + for (n = 0; copied; n++, start = 0) { + int size = min_t(int, copied, PAGE_SIZE - start); + if (!failed) { + buf.page = pages[n]; + buf.offset = start; + buf.len = size; + ret = add_to_pipe(pipe, &buf); + if (unlikely(ret < 0)) { + failed = true; + } else { + iov_iter_advance(from, ret); + total += ret; + } + } else { + put_page(pages[n]); + } + copied -= size; + } } - - if (buffers) - return buffers; - - return error; + return total ? total : ret; } static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, @@ -1590,38 +1305,36 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. */ -static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, +static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov, unsigned long nr_segs, unsigned int flags) { struct pipe_inode_info *pipe; - struct page *pages[PIPE_DEF_BUFFERS]; - struct partial_page partial[PIPE_DEF_BUFFERS]; - struct splice_pipe_desc spd = { - .pages = pages, - .partial = partial, - .nr_pages_max = PIPE_DEF_BUFFERS, - .flags = flags, - .ops = &user_page_pipe_buf_ops, - .spd_release = spd_release_page, - }; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter from; long ret; + unsigned buf_flag = 0; + + if (flags & SPLICE_F_GIFT) + buf_flag = PIPE_BUF_FLAG_GIFT; pipe = get_pipe_info(file); if (!pipe) return -EBADF; - if (splice_grow_spd(pipe, &spd)) - return -ENOMEM; - - spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, - spd.partial, false, - spd.nr_pages_max); - if (spd.nr_pages <= 0) - ret = spd.nr_pages; - else - ret = splice_to_pipe(pipe, &spd); + ret = import_iovec(WRITE, uiov, nr_segs, + ARRAY_SIZE(iovstack), &iov, &from); + if (ret < 0) + return ret; - splice_shrink_spd(&spd); + pipe_lock(pipe); + ret = wait_for_space(pipe, flags); + if (!ret) + ret = iter_to_pipe(&from, pipe, buf_flag); + pipe_unlock(pipe); + if (ret > 0) + wakeup_pipe_readers(pipe); + kfree(iov); return ret; } @@ -1876,7 +1589,7 @@ retry: * Get a reference to this pipe buffer, * so we can copy the contents over. */ - ibuf->ops->get(ipipe, ibuf); + pipe_buf_get(ipipe, ibuf); *obuf = *ibuf; /* @@ -1948,7 +1661,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, * Get a reference to this pipe buffer, * so we can copy the contents over. */ - ibuf->ops->get(ipipe, ibuf); + pipe_buf_get(ipipe, ibuf); obuf = opipe->bufs + nbuf; *obuf = *ibuf; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index bac55c687085..26acfbb331ae 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -393,45 +393,6 @@ xfs_file_read_iter( return ret; } -STATIC ssize_t -xfs_file_splice_read( - struct file *infilp, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t count, - unsigned int flags) -{ - struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); - ssize_t ret; - - XFS_STATS_INC(ip->i_mount, xs_read_calls); - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - trace_xfs_file_splice_read(ip, count, *ppos); - - /* - * DAX inodes cannot ues the page cache for splice, so we have to push - * them through the VFS IO path. This means it goes through - * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we - * cannot lock the splice operation at this level for DAX inodes. - */ - if (IS_DAX(VFS_I(ip))) { - ret = default_file_splice_read(infilp, ppos, pipe, count, - flags); - goto out; - } - - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); -out: - if (ret > 0) - XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret); - return ret; -} - /* * Zero any on disk space between the current EOF and the new, larger EOF. * @@ -1608,7 +1569,7 @@ const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, .read_iter = xfs_file_read_iter, .write_iter = xfs_file_write_iter, - .splice_read = xfs_file_splice_read, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index c6b2b1dcde75..16093c7dacde 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1170,7 +1170,6 @@ DEFINE_RW_EVENT(xfs_file_dax_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_dax_write); -DEFINE_RW_EVENT(xfs_file_splice_read); DECLARE_EVENT_CLASS(xfs_page_class, TP_PROTO(struct inode *inode, struct page *page, unsigned long off, |