diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/io_uring.c | 19 | ||||
-rw-r--r-- | fs/ksmbd/smb2pdu.c | 48 | ||||
-rw-r--r-- | fs/ksmbd/transport_rdma.c | 10 | ||||
-rw-r--r-- | fs/ksmbd/transport_tcp.c | 2 | ||||
-rw-r--r-- | fs/ksmbd/vfs.c | 12 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 8 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 34 | ||||
-rw-r--r-- | fs/read_write.c | 77 |
8 files changed, 123 insertions, 87 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 5ff2cdb425bc..0d491ad15b66 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1183,6 +1183,7 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollout = 1, .needs_async_setup = 1, + .ioprio = 1, .async_size = sizeof(struct io_async_msghdr), }, [IORING_OP_RECVMSG] = { @@ -1191,6 +1192,7 @@ static const struct io_op_def io_op_defs[] = { .pollin = 1, .buffer_select = 1, .needs_async_setup = 1, + .ioprio = 1, .async_size = sizeof(struct io_async_msghdr), }, [IORING_OP_TIMEOUT] = { @@ -1266,6 +1268,7 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollout = 1, .audit_skip = 1, + .ioprio = 1, }, [IORING_OP_RECV] = { .needs_file = 1, @@ -1273,6 +1276,7 @@ static const struct io_op_def io_op_defs[] = { .pollin = 1, .buffer_select = 1, .audit_skip = 1, + .ioprio = 1, }, [IORING_OP_OPENAT2] = { }, @@ -4314,18 +4318,19 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret < 0)) return ret; } else { + rw = req->async_data; + s = &rw->s; + /* * Safe and required to re-import if we're using provided * buffers, as we dropped the selected one before retry. */ - if (req->flags & REQ_F_BUFFER_SELECT) { + if (io_do_buffer_select(req)) { ret = io_import_iovec(READ, req, &iovec, s, issue_flags); if (unlikely(ret < 0)) return ret; } - rw = req->async_data; - s = &rw->s; /* * We come here from an earlier attempt, restore our state to * match in case it doesn't. It's cheap enough that we don't @@ -6075,12 +6080,12 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = &req->sr_msg; - if (unlikely(sqe->file_index)) + if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); - sr->flags = READ_ONCE(sqe->addr2); + sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) return -EINVAL; sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; @@ -6311,12 +6316,12 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = &req->sr_msg; - if (unlikely(sqe->file_index)) + if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); - sr->flags = READ_ONCE(sqe->addr2); + sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) return -EINVAL; sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index e6f4ccc12f49..353f047e783c 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -6490,6 +6490,7 @@ int smb2_write(struct ksmbd_work *work) goto out; } + ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags)); if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) writethrough = true; @@ -6505,10 +6506,6 @@ int smb2_write(struct ksmbd_work *work) data_buf = (char *)(((char *)&req->hdr.ProtocolId) + le16_to_cpu(req->DataOffset)); - ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags)); - if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) - writethrough = true; - ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n", fp->filp->f_path.dentry, offset, length); err = ksmbd_vfs_write(work, fp, data_buf, length, &offset, @@ -7703,7 +7700,7 @@ int smb2_ioctl(struct ksmbd_work *work) { struct file_zero_data_information *zero_data; struct ksmbd_file *fp; - loff_t off, len; + loff_t off, len, bfz; if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { ksmbd_debug(SMB, @@ -7720,19 +7717,26 @@ int smb2_ioctl(struct ksmbd_work *work) zero_data = (struct file_zero_data_information *)&req->Buffer[0]; - fp = ksmbd_lookup_fd_fast(work, id); - if (!fp) { - ret = -ENOENT; + off = le64_to_cpu(zero_data->FileOffset); + bfz = le64_to_cpu(zero_data->BeyondFinalZero); + if (off > bfz) { + ret = -EINVAL; goto out; } - off = le64_to_cpu(zero_data->FileOffset); - len = le64_to_cpu(zero_data->BeyondFinalZero) - off; + len = bfz - off; + if (len) { + fp = ksmbd_lookup_fd_fast(work, id); + if (!fp) { + ret = -ENOENT; + goto out; + } - ret = ksmbd_vfs_zero_data(work, fp, off, len); - ksmbd_fd_put(work, fp); - if (ret < 0) - goto out; + ret = ksmbd_vfs_zero_data(work, fp, off, len); + ksmbd_fd_put(work, fp); + if (ret < 0) + goto out; + } break; } case FSCTL_QUERY_ALLOCATED_RANGES: @@ -7806,14 +7810,24 @@ int smb2_ioctl(struct ksmbd_work *work) src_off = le64_to_cpu(dup_ext->SourceFileOffset); dst_off = le64_to_cpu(dup_ext->TargetFileOffset); length = le64_to_cpu(dup_ext->ByteCount); - cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp, - dst_off, length, 0); + /* + * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE + * should fall back to vfs_copy_file_range(). This could be + * beneficial when re-exporting nfs/smb mount, but note that + * this can result in partial copy that returns an error status. + * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented, + * fall back to vfs_copy_file_range(), should be avoided when + * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set. + */ + cloned = vfs_clone_file_range(fp_in->filp, src_off, + fp_out->filp, dst_off, length, 0); if (cloned == -EXDEV || cloned == -EOPNOTSUPP) { ret = -EOPNOTSUPP; goto dup_ext_out; } else if (cloned != length) { cloned = vfs_copy_file_range(fp_in->filp, src_off, - fp_out->filp, dst_off, length, 0); + fp_out->filp, dst_off, + length, 0); if (cloned != length) { if (cloned < 0) ret = cloned; diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index d035e060c2f0..35b55ee94fe5 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -5,16 +5,6 @@ * * Author(s): Long Li <longli@microsoft.com>, * Hyunchul Lee <hyc.lee@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. */ #define SUBMOD_NAME "smb_direct" diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 8fef9de787d3..143bba4e4db8 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -230,7 +230,7 @@ static int ksmbd_kthread_fn(void *p) break; } ret = kernel_accept(iface->ksmbd_socket, &client_sk, - O_NONBLOCK); + SOCK_NONBLOCK); mutex_unlock(&iface->sock_release_lock); if (ret) { if (ret == -EAGAIN) diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index dcdd07c6efff..05efcdf7a4a7 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1015,7 +1015,9 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len); - return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len); + return vfs_fallocate(fp->filp, + FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, + off, len); } int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, @@ -1046,7 +1048,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, *out_count = 0; end = start + length; while (start < end && *out_count < in_count) { - extent_start = f->f_op->llseek(f, start, SEEK_DATA); + extent_start = vfs_llseek(f, start, SEEK_DATA); if (extent_start < 0) { if (extent_start != -ENXIO) ret = (int)extent_start; @@ -1056,7 +1058,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, if (extent_start >= end) break; - extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE); + extent_end = vfs_llseek(f, extent_start, SEEK_HOLE); if (extent_end < 0) { if (extent_end != -ENXIO) ret = (int)extent_end; @@ -1777,6 +1779,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, + len, 0); if (ret < 0) return ret; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 840e3af63a6f..b764213bcc55 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -577,6 +577,7 @@ out_err: ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { + ssize_t ret; /* * Limit copy to 4MB to prevent indefinitely blocking an nfsd @@ -587,7 +588,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, * limit like this and pipeline multiple COPY requests. */ count = min_t(u64, count, 1 << 22); - return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src, src_pos, dst, dst_pos, + count, 0); + return ret; } __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c2255b440df9..b08ce0d821a7 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1513,8 +1513,15 @@ static int fanotify_test_fid(struct dentry *dentry) return 0; } -static int fanotify_events_supported(struct path *path, __u64 mask) +static int fanotify_events_supported(struct fsnotify_group *group, + struct path *path, __u64 mask, + unsigned int flags) { + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ + bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || + (mask & FAN_RENAME); + /* * Some filesystems such as 'proc' acquire unusual locks when opening * files. For them fanotify permission events have high chances of @@ -1526,6 +1533,16 @@ static int fanotify_events_supported(struct path *path, __u64 mask) if (mask & FANOTIFY_PERM_EVENTS && path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) return -EINVAL; + + /* + * We shouldn't have allowed setting dirent events and the directory + * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, + * but because we always allowed it, error only when using new APIs. + */ + if (strict_dir_events && mark_type == FAN_MARK_INODE && + !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) + return -ENOTDIR; + return 0; } @@ -1672,7 +1689,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; if (flags & FAN_MARK_ADD) { - ret = fanotify_events_supported(&path, mask); + ret = fanotify_events_supported(group, &path, mask, flags); if (ret) goto path_put_and_out; } @@ -1695,19 +1712,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, else mnt = path.mnt; - /* - * FAN_RENAME is not allowed on non-dir (for now). - * We shouldn't have allowed setting any dirent events in mask of - * non-dir, but because we always allowed it, error only if group - * was initialized with the new flag FAN_REPORT_TARGET_FID. - */ - ret = -ENOTDIR; - if (inode && !S_ISDIR(inode->i_mode) && - ((mask & FAN_RENAME) || - ((mask & FANOTIFY_DIRENT_EVENTS) && - FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID)))) - goto path_put_and_out; - /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (mnt || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; diff --git a/fs/read_write.c b/fs/read_write.c index b1b1cdfee9d3..e0777eefd846 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1397,28 +1397,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_copy_file_range); -static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) -{ - /* - * Although we now allow filesystems to handle cross sb copy, passing - * a file of the wrong filesystem type to filesystem driver can result - * in an attempt to dereference the wrong type of ->private_data, so - * avoid doing that until we really have a good reason. NFS defines - * several different file_system_type structures, but they all end up - * using the same ->copy_file_range() function pointer. - */ - if (file_out->f_op->copy_file_range && - file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) - return file_out->f_op->copy_file_range(file_in, pos_in, - file_out, pos_out, - len, flags); - - return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); -} - /* * Performs necessary checks before doing a file copy * @@ -1440,6 +1418,24 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, if (ret) return ret; + /* + * We allow some filesystems to handle cross sb copy, but passing + * a file of the wrong filesystem type to filesystem driver can result + * in an attempt to dereference the wrong type of ->private_data, so + * avoid doing that until we really have a good reason. + * + * nfs and cifs define several different file_system_type structures + * and several different sets of file_operations, but they all end up + * using the same ->copy_file_range() function pointer. + */ + if (file_out->f_op->copy_file_range) { + if (file_in->f_op->copy_file_range != + file_out->f_op->copy_file_range) + return -EXDEV; + } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { + return -EXDEV; + } + /* Don't touch certain kinds of inodes */ if (IS_IMMUTABLE(inode_out)) return -EPERM; @@ -1505,26 +1501,41 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, file_start_write(file_out); /* - * Try cloning first, this is supported by more file systems, and - * more efficient if both clone and copy are supported (e.g. NFS). + * Cloning is supported by more file systems, so we implement copy on + * same sb using clone, but for filesystems where both clone and copy + * are supported (e.g. nfs,cifs), we only call the copy method. */ + if (file_out->f_op->copy_file_range) { + ret = file_out->f_op->copy_file_range(file_in, pos_in, + file_out, pos_out, + len, flags); + goto done; + } + if (file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { - loff_t cloned; - - cloned = file_in->f_op->remap_file_range(file_in, pos_in, + ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, min_t(loff_t, MAX_RW_COUNT, len), REMAP_FILE_CAN_SHORTEN); - if (cloned > 0) { - ret = cloned; + if (ret > 0) goto done; - } } - ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); - WARN_ON_ONCE(ret == -EOPNOTSUPP); + /* + * We can get here for same sb copy of filesystems that do not implement + * ->copy_file_range() in case filesystem does not support clone or in + * case filesystem supports clone but rejected the clone request (e.g. + * because it was not block aligned). + * + * In both cases, fall back to kernel copy so we are able to maintain a + * consistent story about which filesystems support copy_file_range() + * and which filesystems do not, that will allow userspace tools to + * make consistent desicions w.r.t using copy_file_range(). + */ + ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); + done: if (ret > 0) { fsnotify_access(file_in); |