diff options
Diffstat (limited to 'fs/read_write.c')
-rw-r--r-- | fs/read_write.c | 302 |
1 files changed, 299 insertions, 3 deletions
diff --git a/fs/read_write.c b/fs/read_write.c index 819ef3faf1bb..2116e74a83d3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -16,6 +16,7 @@ #include <linux/pagemap.h> #include <linux/splice.h> #include <linux/compat.h> +#include <linux/mount.h> #include "internal.h" #include <asm/uaccess.h> @@ -395,9 +396,8 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t } if (unlikely(inode->i_flctx && mandatory_lock(inode))) { - retval = locks_mandatory_area( - read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, - inode, file, pos, count); + retval = locks_mandatory_area(inode, file, pos, pos + count - 1, + read_write == READ ? F_RDLCK : F_WRLCK); if (retval < 0) return retval; } @@ -1327,3 +1327,299 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, return do_sendfile(out_fd, in_fd, NULL, count, 0); } #endif + +/* + * copy_file_range() differs from regular file read and write in that it + * specifically allows return partial success. When it does so is up to + * the copy_file_range method. + */ +ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + ssize_t ret; + + if (flags != 0) + return -EINVAL; + + /* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT */ + ret = rw_verify_area(READ, file_in, &pos_in, len); + if (ret >= 0) + ret = rw_verify_area(WRITE, file_out, &pos_out, len); + if (ret < 0) + return ret; + + if (!(file_in->f_mode & FMODE_READ) || + !(file_out->f_mode & FMODE_WRITE) || + (file_out->f_flags & O_APPEND)) + return -EBADF; + + /* this could be relaxed once a method supports cross-fs copies */ + if (inode_in->i_sb != inode_out->i_sb) + return -EXDEV; + + if (len == 0) + return 0; + + ret = mnt_want_write_file(file_out); + if (ret) + return ret; + + ret = -EOPNOTSUPP; + if (file_out->f_op->copy_file_range) + ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, + pos_out, len, flags); + if (ret == -EOPNOTSUPP) + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, + len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); + + if (ret > 0) { + fsnotify_access(file_in); + add_rchar(current, ret); + fsnotify_modify(file_out); + add_wchar(current, ret); + } + inc_syscr(current); + inc_syscw(current); + + mnt_drop_write_file(file_out); + + return ret; +} +EXPORT_SYMBOL(vfs_copy_file_range); + +SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, + int, fd_out, loff_t __user *, off_out, + size_t, len, unsigned int, flags) +{ + loff_t pos_in; + loff_t pos_out; + struct fd f_in; + struct fd f_out; + ssize_t ret = -EBADF; + + f_in = fdget(fd_in); + if (!f_in.file) + goto out2; + + f_out = fdget(fd_out); + if (!f_out.file) + goto out1; + + ret = -EFAULT; + if (off_in) { + if (copy_from_user(&pos_in, off_in, sizeof(loff_t))) + goto out; + } else { + pos_in = f_in.file->f_pos; + } + + if (off_out) { + if (copy_from_user(&pos_out, off_out, sizeof(loff_t))) + goto out; + } else { + pos_out = f_out.file->f_pos; + } + + ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, + flags); + if (ret > 0) { + pos_in += ret; + pos_out += ret; + + if (off_in) { + if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) + ret = -EFAULT; + } else { + f_in.file->f_pos = pos_in; + } + + if (off_out) { + if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) + ret = -EFAULT; + } else { + f_out.file->f_pos = pos_out; + } + } + +out: + fdput(f_out); +out1: + fdput(f_in); +out2: + return ret; +} + +static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) +{ + struct inode *inode = file_inode(file); + + if (unlikely(pos < 0)) + return -EINVAL; + + if (unlikely((loff_t) (pos + len) < 0)) + return -EINVAL; + + if (unlikely(inode->i_flctx && mandatory_lock(inode))) { + loff_t end = len ? pos + len - 1 : OFFSET_MAX; + int retval; + + retval = locks_mandatory_area(inode, file, pos, end, + write ? F_WRLCK : F_RDLCK); + if (retval < 0) + return retval; + } + + return security_file_permission(file, write ? MAY_WRITE : MAY_READ); +} + +int vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + int ret; + + if (inode_in->i_sb != inode_out->i_sb || + file_in->f_path.mnt != file_out->f_path.mnt) + return -EXDEV; + + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + + if (!(file_in->f_mode & FMODE_READ) || + !(file_out->f_mode & FMODE_WRITE) || + (file_out->f_flags & O_APPEND) || + !file_in->f_op->clone_file_range) + return -EBADF; + + ret = clone_verify_area(file_in, pos_in, len, false); + if (ret) + return ret; + + ret = clone_verify_area(file_out, pos_out, len, true); + if (ret) + return ret; + + if (pos_in + len > i_size_read(inode_in)) + return -EINVAL; + + ret = mnt_want_write_file(file_out); + if (ret) + return ret; + + ret = file_in->f_op->clone_file_range(file_in, pos_in, + file_out, pos_out, len); + if (!ret) { + fsnotify_access(file_in); + fsnotify_modify(file_out); + } + + mnt_drop_write_file(file_out); + return ret; +} +EXPORT_SYMBOL(vfs_clone_file_range); + +int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) +{ + struct file_dedupe_range_info *info; + struct inode *src = file_inode(file); + u64 off; + u64 len; + int i; + int ret; + bool is_admin = capable(CAP_SYS_ADMIN); + u16 count = same->dest_count; + struct file *dst_file; + loff_t dst_off; + ssize_t deduped; + + if (!(file->f_mode & FMODE_READ)) + return -EINVAL; + + if (same->reserved1 || same->reserved2) + return -EINVAL; + + off = same->src_offset; + len = same->src_length; + + ret = -EISDIR; + if (S_ISDIR(src->i_mode)) + goto out; + + ret = -EINVAL; + if (!S_ISREG(src->i_mode)) + goto out; + + ret = clone_verify_area(file, off, len, false); + if (ret < 0) + goto out; + ret = 0; + + /* pre-format output fields to sane values */ + for (i = 0; i < count; i++) { + same->info[i].bytes_deduped = 0ULL; + same->info[i].status = FILE_DEDUPE_RANGE_SAME; + } + + for (i = 0, info = same->info; i < count; i++, info++) { + struct inode *dst; + struct fd dst_fd = fdget(info->dest_fd); + + dst_file = dst_fd.file; + if (!dst_file) { + info->status = -EBADF; + goto next_loop; + } + dst = file_inode(dst_file); + + ret = mnt_want_write_file(dst_file); + if (ret) { + info->status = ret; + goto next_loop; + } + + dst_off = info->dest_offset; + ret = clone_verify_area(dst_file, dst_off, len, true); + if (ret < 0) { + info->status = ret; + goto next_file; + } + ret = 0; + + if (info->reserved) { + info->status = -EINVAL; + } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { + info->status = -EINVAL; + } else if (file->f_path.mnt != dst_file->f_path.mnt) { + info->status = -EXDEV; + } else if (S_ISDIR(dst->i_mode)) { + info->status = -EISDIR; + } else if (dst_file->f_op->dedupe_file_range == NULL) { + info->status = -EINVAL; + } else { + deduped = dst_file->f_op->dedupe_file_range(file, off, + len, dst_file, + info->dest_offset); + if (deduped == -EBADE) + info->status = FILE_DEDUPE_RANGE_DIFFERS; + else if (deduped < 0) + info->status = deduped; + else + info->bytes_deduped += deduped; + } + +next_file: + mnt_drop_write_file(dst_file); +next_loop: + fdput(dst_fd); + } + +out: + return ret; +} +EXPORT_SYMBOL(vfs_dedupe_file_range); |