diff options
Diffstat (limited to 'fs')
63 files changed, 890 insertions, 443 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f6019ce20035..3e21211e99c3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1258,7 +1258,7 @@ struct btrfs_root { atomic_t will_be_snapshoted; /* For qgroup metadata space reserve */ - atomic_t qgroup_meta_rsv; + atomic64_t qgroup_meta_rsv; }; static inline u32 btrfs_inode_sectorsize(const struct inode *inode) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a7d8c342f604..061c1d1f774f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1342,7 +1342,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, atomic_set(&root->orphan_inodes, 0); atomic_set(&root->refs, 1); atomic_set(&root->will_be_snapshoted, 0); - atomic_set(&root->qgroup_meta_rsv, 0); + atomic64_set(&root->qgroup_meta_rsv, 0); root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8df797432740..27fdb250b446 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2584,26 +2584,36 @@ static void end_bio_extent_readpage(struct bio *bio) if (tree->ops) { ret = tree->ops->readpage_io_failed_hook(page, mirror); - if (!ret && !bio->bi_error) - uptodate = 1; - } else { + if (ret == -EAGAIN) { + /* + * Data inode's readpage_io_failed_hook() always + * returns -EAGAIN. + * + * The generic bio_readpage_error handles errors + * the following way: If possible, new read + * requests are created and submitted and will + * end up in end_bio_extent_readpage as well (if + * we're lucky, not in the !uptodate case). In + * that case it returns 0 and we just go on with + * the next page in our bio. If it can't handle + * the error it will return -EIO and we remain + * responsible for that page. + */ + ret = bio_readpage_error(bio, offset, page, + start, end, mirror); + if (ret == 0) { + uptodate = !bio->bi_error; + offset += len; + continue; + } + } + /* - * The generic bio_readpage_error handles errors the - * following way: If possible, new read requests are - * created and submitted and will end up in - * end_bio_extent_readpage as well (if we're lucky, not - * in the !uptodate case). In that case it returns 0 and - * we just go on with the next page in our bio. If it - * can't handle the error it will return -EIO and we - * remain responsible for that page. + * metadata's readpage_io_failed_hook() always returns + * -EIO and fixes nothing. -EIO is also returned if + * data inode error could not be fixed. */ - ret = bio_readpage_error(bio, offset, page, start, end, - mirror); - if (ret == 0) { - uptodate = !bio->bi_error; - offset += len; - continue; - } + ASSERT(ret == -EIO); } readpage_ok: if (likely(uptodate)) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 231503935652..5e71f1ea3391 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7910,7 +7910,6 @@ struct btrfs_retry_complete { static void btrfs_retry_endio_nocsum(struct bio *bio) { struct btrfs_retry_complete *done = bio->bi_private; - struct inode *inode; struct bio_vec *bvec; int i; @@ -7918,12 +7917,12 @@ static void btrfs_retry_endio_nocsum(struct bio *bio) goto end; ASSERT(bio->bi_vcnt == 1); - inode = bio->bi_io_vec->bv_page->mapping->host; - ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode)); + ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode)); done->uptodate = 1; bio_for_each_segment_all(bvec, bio, i) - clean_io_failure(BTRFS_I(done->inode), done->start, bvec->bv_page, 0); + clean_io_failure(BTRFS_I(done->inode), done->start, + bvec->bv_page, 0); end: complete(&done->done); bio_put(bio); @@ -7973,8 +7972,10 @@ next_block_or_try_again: start += sectorsize; - if (nr_sectors--) { + nr_sectors--; + if (nr_sectors) { pgoff += sectorsize; + ASSERT(pgoff < PAGE_SIZE); goto next_block_or_try_again; } } @@ -7986,9 +7987,7 @@ static void btrfs_retry_endio(struct bio *bio) { struct btrfs_retry_complete *done = bio->bi_private; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); - struct inode *inode; struct bio_vec *bvec; - u64 start; int uptodate; int ret; int i; @@ -7998,11 +7997,8 @@ static void btrfs_retry_endio(struct bio *bio) uptodate = 1; - start = done->start; - ASSERT(bio->bi_vcnt == 1); - inode = bio->bi_io_vec->bv_page->mapping->host; - ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode)); + ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode)); bio_for_each_segment_all(bvec, bio, i) { ret = __readpage_endio_check(done->inode, io_bio, i, @@ -8080,8 +8076,10 @@ next: ASSERT(nr_sectors); - if (--nr_sectors) { + nr_sectors--; + if (nr_sectors) { pgoff += sectorsize; + ASSERT(pgoff < PAGE_SIZE); goto next_block; } } @@ -10523,9 +10521,9 @@ out_inode: } __attribute__((const)) -static int dummy_readpage_io_failed_hook(struct page *page, int failed_mirror) +static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror) { - return 0; + return -EAGAIN; } static const struct inode_operations btrfs_dir_inode_operations = { @@ -10570,7 +10568,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = { .submit_bio_hook = btrfs_submit_bio_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .merge_bio_hook = btrfs_merge_bio_hook, - .readpage_io_failed_hook = dummy_readpage_io_failed_hook, + .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, /* optional callbacks */ .fill_delalloc = run_delalloc_range, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a5da750c1087..afbea61d957e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1042,9 +1042,12 @@ static void report_reserved_underflow(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup, u64 num_bytes) { - btrfs_warn(fs_info, +#ifdef CONFIG_BTRFS_DEBUG + WARN_ON(qgroup->reserved < num_bytes); + btrfs_debug(fs_info, "qgroup %llu reserved space underflow, have: %llu, to free: %llu", qgroup->qgroupid, qgroup->reserved, num_bytes); +#endif qgroup->reserved = 0; } /* @@ -1075,7 +1078,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, qgroup->excl += sign * num_bytes; qgroup->excl_cmpr += sign * num_bytes; if (sign > 0) { - if (WARN_ON(qgroup->reserved < num_bytes)) + if (qgroup->reserved < num_bytes) report_reserved_underflow(fs_info, qgroup, num_bytes); else qgroup->reserved -= num_bytes; @@ -1100,7 +1103,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, WARN_ON(sign < 0 && qgroup->excl < num_bytes); qgroup->excl += sign * num_bytes; if (sign > 0) { - if (WARN_ON(qgroup->reserved < num_bytes)) + if (qgroup->reserved < num_bytes) report_reserved_underflow(fs_info, qgroup, num_bytes); else @@ -2469,7 +2472,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, qg = unode_aux_to_qgroup(unode); - if (WARN_ON(qg->reserved < num_bytes)) + if (qg->reserved < num_bytes) report_reserved_underflow(fs_info, qg, num_bytes); else qg->reserved -= num_bytes; @@ -2948,20 +2951,20 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, ret = qgroup_reserve(root, num_bytes, enforce); if (ret < 0) return ret; - atomic_add(num_bytes, &root->qgroup_meta_rsv); + atomic64_add(num_bytes, &root->qgroup_meta_rsv); return ret; } void btrfs_qgroup_free_meta_all(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; - int reserved; + u64 reserved; if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || !is_fstree(root->objectid)) return; - reserved = atomic_xchg(&root->qgroup_meta_rsv, 0); + reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0); if (reserved == 0) return; btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); @@ -2976,8 +2979,8 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) return; BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes); - atomic_sub(num_bytes, &root->qgroup_meta_rsv); + WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes); + atomic64_sub(num_bytes, &root->qgroup_meta_rsv); btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 456c8901489b..a60d5bfb8a49 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6305,8 +6305,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } + /* + * Check that we don't overflow at later allocations, we request + * clone_sources_count + 1 items, and compare to unsigned long inside + * access_ok. + */ if (arg->clone_sources_count > - ULLONG_MAX / sizeof(*arg->clone_sources)) { + ULONG_MAX / sizeof(struct clone_root) - 1) { ret = -EINVAL; goto out; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e0a7503ab31e..72a053c9a7f0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -549,16 +549,19 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_ssd: btrfs_set_and_info(info, SSD, "use ssd allocation scheme"); + btrfs_clear_opt(info->mount_opt, NOSSD); break; case Opt_ssd_spread: btrfs_set_and_info(info, SSD_SPREAD, "use spread ssd allocation scheme"); btrfs_set_opt(info->mount_opt, SSD); + btrfs_clear_opt(info->mount_opt, NOSSD); break; case Opt_nossd: btrfs_set_and_info(info, NOSSD, "not using ssd allocation scheme"); btrfs_clear_opt(info->mount_opt, SSD); + btrfs_clear_opt(info->mount_opt, SSD_SPREAD); break; case Opt_barrier: btrfs_clear_and_info(info, NOBARRIER, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 73d56eef5e60..ab8a66d852f9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6213,7 +6213,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, for (dev_nr = 0; dev_nr < total_devs; dev_nr++) { dev = bbio->stripes[dev_nr].dev; if (!dev || !dev->bdev || - (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) { + (bio_op(first_bio) == REQ_OP_WRITE && !dev->writeable)) { bbio_error(bbio, first_bio, logical); continue; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d449e1c03cbd..d3119fe3ab45 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2071,11 +2071,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (inode_dirty_flags) __mark_inode_dirty(inode, inode_dirty_flags); - if (ia_valid & ATTR_MODE) { - err = posix_acl_chmod(inode, attr->ia_mode); - if (err) - goto out_put; - } if (mask) { req->r_inode = inode; @@ -2089,13 +2084,11 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) ceph_cap_string(dirtied), mask); ceph_mdsc_put_request(req); - if (mask & CEPH_SETATTR_SIZE) - __ceph_do_pending_vmtruncate(inode); - ceph_free_cap_flush(prealloc_cf); - return err; -out_put: - ceph_mdsc_put_request(req); ceph_free_cap_flush(prealloc_cf); + + if (err >= 0 && (mask & CEPH_SETATTR_SIZE)) + __ceph_do_pending_vmtruncate(inode); + return err; } @@ -2114,7 +2107,12 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; - return __ceph_setattr(inode, attr); + err = __ceph_setattr(inode, attr); + + if (err >= 0 && (attr->ia_valid & ATTR_MODE)) + err = posix_acl_chmod(inode, attr->ia_mode); + + return err; } /* diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 502eab6bdbc4..34fee9fb7e4f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -977,6 +977,86 @@ out: return rc; } +ssize_t cifs_file_copychunk_range(unsigned int xid, + struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, + size_t len, unsigned int flags) +{ + struct inode *src_inode = file_inode(src_file); + struct inode *target_inode = file_inode(dst_file); + struct cifsFileInfo *smb_file_src; + struct cifsFileInfo *smb_file_target; + struct cifs_tcon *src_tcon; + struct cifs_tcon *target_tcon; + ssize_t rc; + + cifs_dbg(FYI, "copychunk range\n"); + + if (src_inode == target_inode) { + rc = -EINVAL; + goto out; + } + + if (!src_file->private_data || !dst_file->private_data) { + rc = -EBADF; + cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); + goto out; + } + + rc = -EXDEV; + smb_file_target = dst_file->private_data; + smb_file_src = src_file->private_data; + src_tcon = tlink_tcon(smb_file_src->tlink); + target_tcon = tlink_tcon(smb_file_target->tlink); + + if (src_tcon->ses != target_tcon->ses) { + cifs_dbg(VFS, "source and target of copy not on same server\n"); + goto out; + } + + /* + * Note: cifs case is easier than btrfs since server responsible for + * checks for proper open modes and file type and if it wants + * server could even support copy of range where source = target + */ + lock_two_nondirectories(target_inode, src_inode); + + cifs_dbg(FYI, "about to flush pages\n"); + /* should we flush first and last page first */ + truncate_inode_pages(&target_inode->i_data, 0); + + if (target_tcon->ses->server->ops->copychunk_range) + rc = target_tcon->ses->server->ops->copychunk_range(xid, + smb_file_src, smb_file_target, off, len, destoff); + else + rc = -EOPNOTSUPP; + + /* force revalidate of size and timestamps of target file now + * that target is updated on the server + */ + CIFS_I(target_inode)->time = 0; + /* although unlocking in the reverse order from locking is not + * strictly necessary here it is a little cleaner to be consistent + */ + unlock_two_nondirectories(src_inode, target_inode); + +out: + return rc; +} + +static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, + size_t len, unsigned int flags) +{ + unsigned int xid = get_xid(); + ssize_t rc; + + rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, + len, flags); + free_xid(xid); + return rc; +} + const struct file_operations cifs_file_ops = { .read_iter = cifs_loose_read_iter, .write_iter = cifs_file_write_iter, @@ -989,6 +1069,7 @@ const struct file_operations cifs_file_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1006,6 +1087,7 @@ const struct file_operations cifs_file_strict_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1023,6 +1105,7 @@ const struct file_operations cifs_file_direct_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, @@ -1040,6 +1123,7 @@ const struct file_operations cifs_file_nobrl_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1056,6 +1140,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1072,6 +1157,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, @@ -1083,6 +1169,7 @@ const struct file_operations cifs_dir_ops = { .release = cifs_closedir, .read = generic_read_dir, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .llseek = generic_file_llseek, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index da717fee3026..30bf89b1fd9a 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -139,6 +139,11 @@ extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); # define cifs_listxattr NULL #endif +extern ssize_t cifs_file_copychunk_range(unsigned int xid, + struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, + size_t len, unsigned int flags); + extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); #ifdef CONFIG_CIFS_NFSD_EXPORT extern const struct export_operations cifs_export_ops; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index d42dd3288647..37f5a41cc50c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -243,6 +243,7 @@ struct smb_version_operations { /* verify the message */ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); + int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *, struct cifsInodeInfo *, bool); /* process transaction2 response */ @@ -407,9 +408,10 @@ struct smb_version_operations { char * (*create_lease_buf)(u8 *, u8); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *, unsigned int *); - int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, - struct cifsFileInfo *target_file, u64 src_off, u64 len, - u64 dest_off); + ssize_t (*copychunk_range)(const unsigned int, + struct cifsFileInfo *src_file, + struct cifsFileInfo *target_file, + u64 src_off, u64 len, u64 dest_off); int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); @@ -946,7 +948,6 @@ struct cifs_tcon { bool use_persistent:1; /* use persistent instead of durable handles */ #ifdef CONFIG_CIFS_SMB2 bool print:1; /* set if connection to printer share */ - bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; @@ -1343,6 +1344,7 @@ struct mid_q_entry { void *callback_data; /* general purpose pointer for callback */ void *resp_buf; /* pointer to received SMB header */ int mid_state; /* wish this were enum but can not pass to wait_event */ + unsigned int mid_flags; __le16 command; /* smb command code */ bool large_buf:1; /* if valid response, is pointer to large buf */ bool multiRsp:1; /* multiple trans2 responses for one request */ @@ -1350,6 +1352,12 @@ struct mid_q_entry { bool decrypted:1; /* decrypted entry */ }; +struct close_cancelled_open { + struct cifs_fid fid; + struct cifs_tcon *tcon; + struct work_struct work; +}; + /* Make code in transport.c a little cleaner by moving update of optional stats into function below */ #ifdef CONFIG_CIFS_STATS2 @@ -1481,6 +1489,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define MID_RESPONSE_MALFORMED 0x10 #define MID_SHUTDOWN 0x20 +/* Flags */ +#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ + /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ #define CIFS_SMALL_BUFFER 1 diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 066950671929..5d21f00ae341 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1428,6 +1428,8 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) length = cifs_discard_remaining_data(server); dequeue_mid(mid, rdata->result); + mid->resp_buf = server->smallbuf; + server->smallbuf = NULL; return length; } @@ -1541,6 +1543,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return cifs_readv_discard(server, mid); dequeue_mid(mid, false); + mid->resp_buf = server->smallbuf; + server->smallbuf = NULL; return length; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7f50c8949401..b3c9d8c310f2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -904,10 +904,19 @@ cifs_demultiplex_thread(void *p) server->lstrp = jiffies; if (mid_entry != NULL) { + if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) && + mid_entry->mid_state == MID_RESPONSE_RECEIVED && + server->ops->handle_cancelled_mid) + server->ops->handle_cancelled_mid( + mid_entry->resp_buf, + server); + if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); - } else if (!server->ops->is_oplock_break || - !server->ops->is_oplock_break(buf, server)) { + } else if (server->ops->is_oplock_break && + server->ops->is_oplock_break(buf, server)) { + cifs_dbg(FYI, "Received oplock break\n"); + } else { cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n", atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", buf, @@ -3739,6 +3748,9 @@ try_mount_again: if (IS_ERR(tcon)) { rc = PTR_ERR(tcon); tcon = NULL; + if (rc == -EACCES) + goto mount_fail_check; + goto remote_path_check; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index aa3debbba826..21d404535739 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2597,7 +2597,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, wdata->credits = credits; if (!wdata->cfile->invalidHandle || - !cifs_reopen_file(wdata->cfile, false)) + !(rc = cifs_reopen_file(wdata->cfile, false))) rc = server->ops->async_writev(wdata, cifs_uncached_writedata_release); if (rc) { @@ -3022,7 +3022,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, rdata->credits = credits; if (!rdata->cfile->invalidHandle || - !cifs_reopen_file(rdata->cfile, true)) + !(rc = cifs_reopen_file(rdata->cfile, true))) rc = server->ops->async_readv(rdata); error: if (rc) { @@ -3617,7 +3617,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, } if (!rdata->cfile->invalidHandle || - !cifs_reopen_file(rdata->cfile, true)) + !(rc = cifs_reopen_file(rdata->cfile, true))) rc = server->ops->async_readv(rdata); if (rc) { add_credits_and_wake_if(server, rdata->credits, 0); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 001528781b6b..265c45fe4ea5 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -34,71 +34,14 @@ #include "cifs_ioctl.h" #include <linux/btrfs.h> -static int cifs_file_clone_range(unsigned int xid, struct file *src_file, - struct file *dst_file) -{ - struct inode *src_inode = file_inode(src_file); - struct inode *target_inode = file_inode(dst_file); - struct cifsFileInfo *smb_file_src; - struct cifsFileInfo *smb_file_target; - struct cifs_tcon *src_tcon; - struct cifs_tcon *target_tcon; - int rc; - - cifs_dbg(FYI, "ioctl clone range\n"); - - if (!src_file->private_data || !dst_file->private_data) { - rc = -EBADF; - cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); - goto out; - } - - rc = -EXDEV; - smb_file_target = dst_file->private_data; - smb_file_src = src_file->private_data; - src_tcon = tlink_tcon(smb_file_src->tlink); - target_tcon = tlink_tcon(smb_file_target->tlink); - - if (src_tcon->ses != target_tcon->ses) { - cifs_dbg(VFS, "source and target of copy not on same server\n"); - goto out; - } - - /* - * Note: cifs case is easier than btrfs since server responsible for - * checks for proper open modes and file type and if it wants - * server could even support copy of range where source = target - */ - lock_two_nondirectories(target_inode, src_inode); - - cifs_dbg(FYI, "about to flush pages\n"); - /* should we flush first and last page first */ - truncate_inode_pages(&target_inode->i_data, 0); - - if (target_tcon->ses->server->ops->clone_range) - rc = target_tcon->ses->server->ops->clone_range(xid, - smb_file_src, smb_file_target, 0, src_inode->i_size, 0); - else - rc = -EOPNOTSUPP; - - /* force revalidate of size and timestamps of target file now - that target is updated on the server */ - CIFS_I(target_inode)->time = 0; - /* although unlocking in the reverse order from locking is not - strictly necessary here it is a little cleaner to be consistent */ - unlock_two_nondirectories(src_inode, target_inode); -out: - return rc; -} - -static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, +static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file, unsigned long srcfd) { int rc; struct fd src_file; struct inode *src_inode; - cifs_dbg(FYI, "ioctl clone range\n"); + cifs_dbg(FYI, "ioctl copychunk range\n"); /* the destination must be opened for writing */ if (!(dst_file->f_mode & FMODE_WRITE)) { cifs_dbg(FYI, "file target not open for write\n"); @@ -129,7 +72,8 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, if (S_ISDIR(src_inode->i_mode)) goto out_fput; - rc = cifs_file_clone_range(xid, src_file.file, dst_file); + rc = cifs_file_copychunk_range(xid, src_file.file, 0, dst_file, 0, + src_inode->i_size, 0); out_fput: fdput(src_file); @@ -251,7 +195,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) } break; case CIFS_IOC_COPYCHUNK_FILE: - rc = cifs_ioctl_clone(xid, filep, arg); + rc = cifs_ioctl_copychunk(xid, filep, arg); break; case CIFS_IOC_SET_INTEGRITY: if (pSMBFile == NULL) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index cc93ba4da9b5..27bc360c7ffd 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1015,6 +1015,15 @@ cifs_dir_needs_close(struct cifsFileInfo *cfile) return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle; } +static bool +cifs_can_echo(struct TCP_Server_Info *server) +{ + if (server->tcpStatus == CifsGood) + return true; + + return false; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -1049,6 +1058,7 @@ struct smb_version_operations smb1_operations = { .get_dfs_refer = CIFSGetDFSRefer, .qfs_tcon = cifs_qfs_tcon, .is_path_accessible = cifs_is_path_accessible, + .can_echo = cifs_can_echo, .query_path_info = cifs_query_path_info, .query_file_info = cifs_query_file_info, .get_srv_inum = cifs_get_srv_inum, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index fd516ea8b8f8..1a04b3a5beb1 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -659,3 +659,49 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n"); return false; } + +void +smb2_cancelled_close_fid(struct work_struct *work) +{ + struct close_cancelled_open *cancelled = container_of(work, + struct close_cancelled_open, work); + + cifs_dbg(VFS, "Close unmatched open\n"); + + SMB2_close(0, cancelled->tcon, cancelled->fid.persistent_fid, + cancelled->fid.volatile_fid); + cifs_put_tcon(cancelled->tcon); + kfree(cancelled); +} + +int +smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server) +{ + struct smb2_sync_hdr *sync_hdr = get_sync_hdr(buffer); + struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer; + struct cifs_tcon *tcon; + struct close_cancelled_open *cancelled; + + if (sync_hdr->Command != SMB2_CREATE || + sync_hdr->Status != STATUS_SUCCESS) + return 0; + + cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL); + if (!cancelled) + return -ENOMEM; + + tcon = smb2_find_smb_tcon(server, sync_hdr->SessionId, + sync_hdr->TreeId); + if (!tcon) { + kfree(cancelled); + return -ENOENT; + } + + cancelled->fid.persistent_fid = rsp->PersistentFileId; + cancelled->fid.volatile_fid = rsp->VolatileFileId; + cancelled->tcon = tcon; + INIT_WORK(&cancelled->work, smb2_cancelled_close_fid); + queue_work(cifsiod_wq, &cancelled->work); + + return 0; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0231108d9387..152e37f2ad92 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -21,6 +21,7 @@ #include <linux/vfs.h> #include <linux/falloc.h> #include <linux/scatterlist.h> +#include <linux/uuid.h> #include <crypto/aead.h> #include "cifsglob.h" #include "smb2pdu.h" @@ -592,8 +593,8 @@ req_res_key_exit: return rc; } -static int -smb2_clone_range(const unsigned int xid, +static ssize_t +smb2_copychunk_range(const unsigned int xid, struct cifsFileInfo *srcfile, struct cifsFileInfo *trgtfile, u64 src_off, u64 len, u64 dest_off) @@ -605,13 +606,14 @@ smb2_clone_range(const unsigned int xid, struct cifs_tcon *tcon; int chunks_copied = 0; bool chunk_sizes_updated = false; + ssize_t bytes_written, total_bytes_written = 0; pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); if (pcchunk == NULL) return -ENOMEM; - cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n"); + cifs_dbg(FYI, "in smb2_copychunk_range - about to call request res key\n"); /* Request a key from the server to identify the source of the copy */ rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink), srcfile->fid.persistent_fid, @@ -669,14 +671,16 @@ smb2_clone_range(const unsigned int xid, } chunks_copied++; - src_off += le32_to_cpu(retbuf->TotalBytesWritten); - dest_off += le32_to_cpu(retbuf->TotalBytesWritten); - len -= le32_to_cpu(retbuf->TotalBytesWritten); + bytes_written = le32_to_cpu(retbuf->TotalBytesWritten); + src_off += bytes_written; + dest_off += bytes_written; + len -= bytes_written; + total_bytes_written += bytes_written; - cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n", + cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %zu\n", le32_to_cpu(retbuf->ChunksWritten), le32_to_cpu(retbuf->ChunkBytesWritten), - le32_to_cpu(retbuf->TotalBytesWritten)); + bytes_written); } else if (rc == -EINVAL) { if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) goto cchunk_out; @@ -713,7 +717,10 @@ smb2_clone_range(const unsigned int xid, cchunk_out: kfree(pcchunk); kfree(retbuf); - return rc; + if (rc) + return rc; + else + return total_bytes_written; } static int @@ -2322,6 +2329,7 @@ struct smb_version_operations smb20_operations = { .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -2377,7 +2385,7 @@ struct smb_version_operations smb20_operations = { .set_oplock_level = smb2_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, - .clone_range = smb2_clone_range, + .copychunk_range = smb2_copychunk_range, .wp_retry_size = smb2_wp_retry_size, .dir_needs_close = smb2_dir_needs_close, .get_dfs_refer = smb2_get_dfs_refer, @@ -2404,6 +2412,7 @@ struct smb_version_operations smb21_operations = { .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -2459,7 +2468,7 @@ struct smb_version_operations smb21_operations = { .set_oplock_level = smb21_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, - .clone_range = smb2_clone_range, + .copychunk_range = smb2_copychunk_range, .wp_retry_size = smb2_wp_retry_size, .dir_needs_close = smb2_dir_needs_close, .enum_snapshots = smb3_enum_snapshots, @@ -2488,6 +2497,7 @@ struct smb_version_operations smb30_operations = { .print_stats = smb2_print_stats, .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -2545,7 +2555,7 @@ struct smb_version_operations smb30_operations = { .set_oplock_level = smb3_set_oplock_level, .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, - .clone_range = smb2_clone_range, + .copychunk_range = smb2_copychunk_range, .duplicate_extents = smb2_duplicate_extents, .validate_negotiate = smb3_validate_negotiate, .wp_retry_size = smb2_wp_retry_size, @@ -2582,6 +2592,7 @@ struct smb_version_operations smb311_operations = { .print_stats = smb2_print_stats, .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -2639,7 +2650,7 @@ struct smb_version_operations smb311_operations = { .set_oplock_level = smb3_set_oplock_level, .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, - .clone_range = smb2_clone_range, + .copychunk_range = smb2_copychunk_range, .duplicate_extents = smb2_duplicate_extents, /* .validate_negotiate = smb3_validate_negotiate, */ /* not used in 3.11 */ .wp_retry_size = smb2_wp_retry_size, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7446496850a3..02da648041fc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -562,8 +562,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) * but for time being this is our only auth choice so doesn't matter. * We just found a server which sets blob length to zero expecting raw. */ - if (blob_length == 0) + if (blob_length == 0) { cifs_dbg(FYI, "missing security blob on negprot\n"); + server->sec_ntlmssp = true; + } rc = cifs_enable_signing(server, ses->sign); if (rc) @@ -1171,9 +1173,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, else return -EIO; - if (tcon && tcon->bad_network_name) - return -ENOENT; - unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL); if (unc_path == NULL) return -ENOMEM; @@ -1185,6 +1184,10 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, return -EINVAL; } + /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */ + if (tcon) + tcon->tid = 0; + rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req); if (rc) { kfree(unc_path); @@ -1273,8 +1276,6 @@ tcon_exit: tcon_error_exit: if (rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) { cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); - if (tcon) - tcon->bad_network_name = true; } goto tcon_exit; } @@ -2177,6 +2178,9 @@ void smb2_reconnect_server(struct work_struct *work) struct cifs_tcon *tcon, *tcon2; struct list_head tmp_list; int tcon_exist = false; + int rc; + int resched = false; + /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */ mutex_lock(&server->reconnect_mutex); @@ -2204,13 +2208,18 @@ void smb2_reconnect_server(struct work_struct *work) spin_unlock(&cifs_tcp_ses_lock); list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) { - if (!smb2_reconnect(SMB2_INTERNAL_CMD, tcon)) + rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon); + if (!rc) cifs_reopen_persistent_handles(tcon); + else + resched = true; list_del_init(&tcon->rlist); cifs_put_tcon(tcon); } cifs_dbg(FYI, "Reconnecting tcons finished\n"); + if (resched) + queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ); mutex_unlock(&server->reconnect_mutex); /* now we can safely release srv struct */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 69e35873b1de..6853454fc871 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -48,6 +48,10 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst); extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); +extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server, + __u64 ses_id); +extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, + __u64 ses_id, __u32 tid); extern int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server); extern int smb3_calc_signature(struct smb_rqst *rqst, @@ -164,6 +168,9 @@ extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, const u64 persistent_fid, const u64 volatile_fid, const __u8 oplock_level); +extern int smb2_handle_cancelled_mid(char *buffer, + struct TCP_Server_Info *server); +void smb2_cancelled_close_fid(struct work_struct *work); extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct kstatfs *FSData); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 7c3bb1bd7eed..506b67fc93d9 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -115,23 +115,70 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server) return 0; } -struct cifs_ses * -smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id) +static struct cifs_ses * +smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) { struct cifs_ses *ses; - spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { if (ses->Suid != ses_id) continue; - spin_unlock(&cifs_tcp_ses_lock); return ses; } + + return NULL; +} + +struct cifs_ses * +smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id) +{ + struct cifs_ses *ses; + + spin_lock(&cifs_tcp_ses_lock); + ses = smb2_find_smb_ses_unlocked(server, ses_id); spin_unlock(&cifs_tcp_ses_lock); + return ses; +} + +static struct cifs_tcon * +smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid) +{ + struct cifs_tcon *tcon; + + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (tcon->tid != tid) + continue; + ++tcon->tc_count; + return tcon; + } + return NULL; } +/* + * Obtain tcon corresponding to the tid in the given + * cifs_ses + */ + +struct cifs_tcon * +smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) +{ + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + spin_lock(&cifs_tcp_ses_lock); + ses = smb2_find_smb_ses_unlocked(server, ses_id); + if (!ses) { + spin_unlock(&cifs_tcp_ses_lock); + return NULL; + } + tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid); + spin_unlock(&cifs_tcp_ses_lock); + + return tcon; +} + int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 526f0533cb4e..f6e13a977fc8 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -752,9 +752,11 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, rc = wait_for_response(ses->server, midQ); if (rc != 0) { + cifs_dbg(FYI, "Cancelling wait for mid %llu\n", midQ->mid); send_cancel(ses->server, rqst, midQ); spin_lock(&GlobalMid_Lock); if (midQ->mid_state == MID_REQUEST_SUBMITTED) { + midQ->mid_flags |= MID_WAIT_CANCELLED; midQ->callback = DeleteMidQEntry; spin_unlock(&GlobalMid_Lock); add_credits(ses->server, 1, optype); @@ -373,6 +373,22 @@ restart: } spin_lock_irq(&mapping->tree_lock); + if (!entry) { + /* + * We needed to drop the page_tree lock while calling + * radix_tree_preload() and we didn't have an entry to + * lock. See if another thread inserted an entry at + * our index during this time. + */ + entry = __radix_tree_lookup(&mapping->page_tree, index, + NULL, &slot); + if (entry) { + radix_tree_preload_end(); + spin_unlock_irq(&mapping->tree_lock); + goto restart; + } + } + if (pmd_downgrade) { radix_tree_delete(&mapping->page_tree, index); mapping->nrexceptional--; @@ -388,19 +404,12 @@ restart: if (err) { spin_unlock_irq(&mapping->tree_lock); /* - * Someone already created the entry? This is a - * normal failure when inserting PMDs in a range - * that already contains PTEs. In that case we want - * to return -EEXIST immediately. - */ - if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD)) - goto restart; - /* - * Our insertion of a DAX PMD entry failed, most - * likely because it collided with a PTE sized entry - * at a different index in the PMD range. We haven't - * inserted anything into the radix tree and have no - * waiters to wake. + * Our insertion of a DAX entry failed, most likely + * because we were inserting a PMD entry and it + * collided with a PTE sized entry at a different + * index in the PMD range. We haven't inserted + * anything into the radix tree and have no waiters to + * wake. */ return ERR_PTR(err); } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f493af666591..fb69ee2388db 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2466,6 +2466,7 @@ extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(const struct path *, struct kstat *, u32, unsigned int); extern void ext4_evict_inode(struct inode *); extern void ext4_clear_inode(struct inode *); +extern int ext4_file_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int ext4_sync_inode(handle_t *, struct inode *); extern void ext4_dirty_inode(struct inode *, int); extern int ext4_change_inode_journal_flag(struct inode *, int); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8210c1f43556..cefa9835f275 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -744,7 +744,7 @@ const struct file_operations ext4_file_operations = { const struct inode_operations ext4_file_inode_operations = { .setattr = ext4_setattr, - .getattr = ext4_getattr, + .getattr = ext4_file_getattr, .listxattr = ext4_listxattr, .get_acl = ext4_get_acl, .set_acl = ext4_set_acl, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4247d8d25687..b9ffa9f4191f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5390,11 +5390,46 @@ err_out: int ext4_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - struct inode *inode; - unsigned long long delalloc_blocks; + struct inode *inode = d_inode(path->dentry); + struct ext4_inode *raw_inode; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int flags; + + if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = ei->i_crtime.tv_sec; + stat->btime.tv_nsec = ei->i_crtime.tv_nsec; + } + + flags = ei->i_flags & EXT4_FL_USER_VISIBLE; + if (flags & EXT4_APPEND_FL) + stat->attributes |= STATX_ATTR_APPEND; + if (flags & EXT4_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; + if (flags & EXT4_ENCRYPT_FL) + stat->attributes |= STATX_ATTR_ENCRYPTED; + if (flags & EXT4_IMMUTABLE_FL) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (flags & EXT4_NODUMP_FL) + stat->attributes |= STATX_ATTR_NODUMP; + + stat->attributes_mask |= (STATX_ATTR_APPEND | + STATX_ATTR_COMPRESSED | + STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP); - inode = d_inode(path->dentry); generic_fillattr(inode, stat); + return 0; +} + +int ext4_file_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + u64 delalloc_blocks; + + ext4_getattr(path, stat, request_mask, query_flags); /* * If there is inline data in the inode, the inode will normally not diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6ad612c576fc..07e5e1405771 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3912,6 +3912,7 @@ const struct inode_operations ext4_dir_inode_operations = { .tmpfile = ext4_tmpfile, .rename = ext4_rename2, .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, .get_acl = ext4_get_acl, .set_acl = ext4_set_acl, @@ -3920,6 +3921,7 @@ const struct inode_operations ext4_dir_inode_operations = { const struct inode_operations ext4_special_inode_operations = { .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, .get_acl = ext4_get_acl, .set_acl = ext4_set_acl, diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 73b184d161fc..5c8fc53cb0e5 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -85,17 +85,20 @@ errout: const struct inode_operations ext4_encrypted_symlink_inode_operations = { .get_link = ext4_encrypted_get_link, .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, }; const struct inode_operations ext4_symlink_inode_operations = { .get_link = page_get_link, .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, }; const struct inode_operations ext4_fast_symlink_inode_operations = { .get_link = simple_get_link, .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, }; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8f96461236f6..dde861387a40 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; + /* + * Offset passed to mmap (before page shift) could have been + * negative when represented as a (l)off_t. + */ + if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0) + return -EINVAL; + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); + len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); + /* check for overflow */ + if (len < vma_len) + return -EINVAL; inode_lock(inode); file_accessed(file); ret = -ENOMEM; - len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - if (hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, @@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ret = 0; if (vma->vm_flags & VM_WRITE && inode->i_size < len) - inode->i_size = len; + i_size_write(inode, len); out: inode_unlock(inode); @@ -695,14 +704,11 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, inode = new_inode(sb); if (inode) { - struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); inode->i_mode = S_IFDIR | config->mode; inode->i_uid = config->uid; inode->i_gid = config->gid; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); - info = HUGETLBFS_I(inode); - mpol_shared_policy_init(&info->policy, NULL); inode->i_op = &hugetlbfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ @@ -733,7 +739,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, inode = new_inode(sb); if (inode) { - struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, @@ -741,15 +746,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_mapping->private_data = resv_map; - info = HUGETLBFS_I(inode); - /* - * The policy is initialized here even if we are creating a - * private inode because initialization simply creates an - * an empty rb tree and calls rwlock_init(), later when we - * call mpol_free_shared_policy() it will just return because - * the rb tree will still be empty. - */ - mpol_shared_policy_init(&info->policy, NULL); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); @@ -937,6 +933,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) hugetlbfs_inc_free_inodes(sbinfo); return NULL; } + + /* + * Any time after allocation, hugetlbfs_destroy_inode can be called + * for the inode. mpol_free_shared_policy is unconditionally called + * as part of hugetlbfs_destroy_inode. So, initialize policy here + * in case of a quick call to destroy. + * + * Note that the policy is initialized even if we are creating a + * private inode. This simplifies hugetlbfs_destroy_inode. + */ + mpol_shared_policy_init(&p->policy, NULL); + return &p->vfs_inode; } diff --git a/fs/namei.c b/fs/namei.c index d41fab78798b..19dcf62133cc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2145,6 +2145,9 @@ static const char *path_init(struct nameidata *nd, unsigned flags) int retval = 0; const char *s = nd->name->name; + if (!*s) + flags &= ~LOOKUP_RCU; + nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; nd->depth = 0; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index fb499a3f21b5..f92ba8d6c556 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2055,7 +2055,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct dentry *dentry = NULL, *rehash = NULL; + struct dentry *dentry = NULL; struct rpc_task *task; int error = -EBUSY; @@ -2078,10 +2078,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, * To prevent any new references to the target during the * rename, we unhash the dentry in advance. */ - if (!d_unhashed(new_dentry)) { + if (!d_unhashed(new_dentry)) d_drop(new_dentry); - rehash = new_dentry; - } if (d_count(new_dentry) > 2) { int err; @@ -2098,7 +2096,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; new_dentry = dentry; - rehash = NULL; new_inode = NULL; } } @@ -2119,8 +2116,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, error = task->tk_status; rpc_put_task(task); out: - if (rehash) - d_rehash(rehash); trace_nfs_rename_exit(old_dir, old_dentry, new_dir, new_dentry, error); /* new dentry created? */ diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index aab32fc3d6a8..c1b5fed7c863 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -537,7 +537,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, if (put_dreq(dreq)) nfs_direct_complete(dreq); - return 0; + return requested_bytes; } /** @@ -566,7 +566,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; - ssize_t result = -EINVAL; + ssize_t result = -EINVAL, requested; size_t count = iov_iter_count(iter); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); @@ -600,14 +600,19 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; - result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); + requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); nfs_end_io_direct(inode); - if (!result) { + if (requested > 0) { result = nfs_direct_wait(dreq); - if (result > 0) + if (result > 0) { + requested -= result; iocb->ki_pos += result; + } + iov_iter_revert(iter, requested); + } else { + result = requested; } out_release: @@ -954,7 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, if (put_dreq(dreq)) nfs_direct_write_complete(dreq); - return 0; + return requested_bytes; } /** @@ -979,7 +984,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, */ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) { - ssize_t result = -EINVAL; + ssize_t result = -EINVAL, requested; size_t count; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -1022,7 +1027,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) nfs_start_io_direct(inode); - result = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, @@ -1031,13 +1036,17 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) nfs_end_io_direct(inode); - if (!result) { + if (requested > 0) { result = nfs_direct_wait(dreq); if (result > 0) { + requested -= result; iocb->ki_pos = pos + result; /* XXX: should check the generic_write_sync retval */ generic_write_sync(iocb, result); } + iov_iter_revert(iter, requested); + } else { + result = requested; } out_release: nfs_direct_req_release(dreq); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 44347f4bdc15..acd30baca461 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -202,10 +202,10 @@ static int filelayout_async_handle_error(struct rpc_task *task, task->tk_status); nfs4_mark_deviceid_unavailable(devid); pnfs_error_mark_layout_for_return(inode, lseg); - pnfs_set_lo_fail(lseg); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: + pnfs_set_lo_fail(lseg); reset: dprintk("%s Retry through MDS. Error %d\n", __func__, task->tk_status); @@ -560,6 +560,50 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) return PNFS_ATTEMPTED; } +static int +filelayout_check_deviceid(struct pnfs_layout_hdr *lo, + struct nfs4_filelayout_segment *fl, + gfp_t gfp_flags) +{ + struct nfs4_deviceid_node *d; + struct nfs4_file_layout_dsaddr *dsaddr; + int status = -EINVAL; + + /* find and reference the deviceid */ + d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid, + lo->plh_lc_cred, gfp_flags); + if (d == NULL) + goto out; + + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + /* Found deviceid is unavailable */ + if (filelayout_test_devid_unavailable(&dsaddr->id_node)) + goto out_put; + + fl->dsaddr = dsaddr; + + if (fl->first_stripe_index >= dsaddr->stripe_count) { + dprintk("%s Bad first_stripe_index %u\n", + __func__, fl->first_stripe_index); + goto out_put; + } + + if ((fl->stripe_type == STRIPE_SPARSE && + fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || + (fl->stripe_type == STRIPE_DENSE && + fl->num_fh != dsaddr->stripe_count)) { + dprintk("%s num_fh %u not valid for given packing\n", + __func__, fl->num_fh); + goto out_put; + } + status = 0; +out: + return status; +out_put: + nfs4_fl_put_deviceid(dsaddr); + goto out; +} + /* * filelayout_check_layout() * @@ -572,11 +616,8 @@ static int filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id, gfp_t gfp_flags) { - struct nfs4_deviceid_node *d; - struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; dprintk("--> %s\n", __func__); @@ -601,41 +642,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } - /* find and reference the deviceid */ - d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id, - lo->plh_lc_cred, gfp_flags); - if (d == NULL) - goto out; - - dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); - /* Found deviceid is unavailable */ - if (filelayout_test_devid_unavailable(&dsaddr->id_node)) - goto out_put; - - fl->dsaddr = dsaddr; - - if (fl->first_stripe_index >= dsaddr->stripe_count) { - dprintk("%s Bad first_stripe_index %u\n", - __func__, fl->first_stripe_index); - goto out_put; - } - - if ((fl->stripe_type == STRIPE_SPARSE && - fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || - (fl->stripe_type == STRIPE_DENSE && - fl->num_fh != dsaddr->stripe_count)) { - dprintk("%s num_fh %u not valid for given packing\n", - __func__, fl->num_fh); - goto out_put; - } - status = 0; out: dprintk("--> %s returns %d\n", __func__, status); return status; -out_put: - nfs4_fl_put_deviceid(dsaddr); - goto out; } static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) @@ -657,7 +667,6 @@ static int filelayout_decode_layout(struct pnfs_layout_hdr *flo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id, gfp_t gfp_flags) { struct xdr_stream stream; @@ -682,9 +691,9 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, if (unlikely(!p)) goto out_err; - memcpy(id, p, sizeof(*id)); + memcpy(&fl->deviceid, p, sizeof(fl->deviceid)); p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); - nfs4_print_deviceid(id); + nfs4_print_deviceid(&fl->deviceid); nfl_util = be32_to_cpup(p++); if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) @@ -831,15 +840,14 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, { struct nfs4_filelayout_segment *fl; int rc; - struct nfs4_deviceid id; dprintk("--> %s\n", __func__); fl = kzalloc(sizeof(*fl), gfp_flags); if (!fl) return NULL; - rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags); - if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) { + rc = filelayout_decode_layout(layoutid, fl, lgr, gfp_flags); + if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, gfp_flags)) { _filelayout_free_lseg(fl); return NULL; } @@ -888,18 +896,51 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return min(stripe_unit - (unsigned int)stripe_offset, size); } +static struct pnfs_layout_segment * +fl_pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + loff_t pos, + u64 count, + enum pnfs_iomode iomode, + bool strict_iomode, + gfp_t gfp_flags) +{ + struct pnfs_layout_segment *lseg = NULL; + struct pnfs_layout_hdr *lo; + struct nfs4_filelayout_segment *fl; + int status; + + lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, + gfp_flags); + if (!lseg) + lseg = ERR_PTR(-ENOMEM); + if (IS_ERR(lseg)) + goto out; + + lo = NFS_I(ino)->layout; + fl = FILELAYOUT_LSEG(lseg); + + status = filelayout_check_deviceid(lo, fl, gfp_flags); + if (status) + lseg = ERR_PTR(status); +out: + if (IS_ERR(lseg)) + pnfs_put_lseg(lseg); + return lseg; +} + static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { if (!pgio->pg_lseg) { - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - 0, - NFS4_MAX_UINT64, - IOMODE_READ, - false, - GFP_KERNEL); + pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + false, + GFP_KERNEL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -919,13 +960,13 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, int status; if (!pgio->pg_lseg) { - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - 0, - NFS4_MAX_UINT64, - IOMODE_RW, - false, - GFP_NOFS); + pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + false, + GFP_NOFS); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index 2896cb833a11..79323b5dab0c 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -55,15 +55,16 @@ struct nfs4_file_layout_dsaddr { }; struct nfs4_filelayout_segment { - struct pnfs_layout_segment generic_hdr; - u32 stripe_type; - u32 commit_through_mds; - u32 stripe_unit; - u32 first_stripe_index; - u64 pattern_offset; - struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ - unsigned int num_fh; - struct nfs_fh **fh_array; + struct pnfs_layout_segment generic_hdr; + u32 stripe_type; + u32 commit_through_mds; + u32 stripe_unit; + u32 first_stripe_index; + u64 pattern_offset; + struct nfs4_deviceid deviceid; + struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ + unsigned int num_fh; + struct nfs_fh **fh_array; }; struct nfs4_filelayout { diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 85fde93dff77..457cfeb1d5c1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -208,6 +208,10 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, } else goto outerr; } + + if (IS_ERR(mirror->mirror_ds)) + goto outerr; + if (mirror->mirror_ds->ds == NULL) { struct nfs4_deviceid_node *devid; devid = &mirror->mirror_ds->id_node; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c780d98035cc..201ca3f2c4ba 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2442,17 +2442,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) } nfs4_stateid_copy(&stateid, &delegation->stateid); - if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) || + !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, + &delegation->flags)) { rcu_read_unlock(); nfs_finish_clear_delegation_stateid(state, &stateid); return; } - if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) { - rcu_read_unlock(); - return; - } - cred = get_rpccred(delegation->cred); rcu_read_unlock(); status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index dba2ff8eaa68..452334694a5d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -358,6 +358,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, { unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); + struct kvec *head = rqstp->rq_arg.head; + struct kvec *tail = rqstp->rq_arg.tail; p = decode_fh(p, &args->fh); if (!p) @@ -367,6 +369,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, args->count = ntohl(*p++); args->stable = ntohl(*p++); len = args->len = ntohl(*p++); + if ((void *)p > head->iov_base + head->iov_len) + return 0; /* * The count must equal the amount of data passed. */ @@ -377,9 +381,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, * Check to make sure that we got the right number of * bytes. */ - hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len - + rqstp->rq_arg.tail[0].iov_len - hdr; + hdr = (void*)p - head->iov_base; + dlen = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len - hdr; /* * Round the length of the data which was specified up to * the next multiple of XDR units and then compare that @@ -396,7 +399,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, len = args->len = max_blocksize; } rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + rqstp->rq_vec[0].iov_len = head->iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; @@ -471,6 +474,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, /* first copy and check from the first page */ old = (char*)p; vec = &rqstp->rq_arg.head[0]; + if ((void *)old > vec->iov_base + vec->iov_len) + return 0; avail = vec->iov_len - (old - (char*)vec->iov_base); while (len && avail && *old) { *new++ = *old++; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index cbeeda1e94a2..d86031b6ad79 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2489,7 +2489,7 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) { - if (op->opnum == OP_ILLEGAL) + if (op->opnum == OP_ILLEGAL || op->status == nfserr_notsupp) return op_encode_hdr_size * sizeof(__be32); BUG_ON(OPDESC(op)->op_rsize_bop == NULL); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 73e75ac90525..8bf8f667a8cf 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -538,13 +538,21 @@ out_free: static ssize_t nfsd_print_version_support(char *buf, int remaining, const char *sep, - unsigned vers, unsigned minor) + unsigned vers, int minor) { - const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u"; + const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u"; bool supported = !!nfsd_vers(vers, NFSD_TEST); - if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST)) + if (vers == 4 && minor >= 0 && + !nfsd_minorversion(minor, NFSD_TEST)) supported = false; + if (minor == 0 && supported) + /* + * special case for backward compatability. + * +4.0 is never reported, it is implied by + * +4, unless -4.0 is present. + */ + return 0; return snprintf(buf, remaining, format, sep, supported ? '+' : '-', vers, minor); } @@ -554,7 +562,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) char *mesg = buf; char *vers, *minorp, sign; int len, num, remaining; - unsigned minor; ssize_t tlen = 0; char *sep; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); @@ -575,6 +582,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) if (len <= 0) return -EINVAL; do { enum vers_op cmd; + unsigned minor; sign = *vers; if (sign == '+' || sign == '-') num = simple_strtol((vers+1), &minorp, 0); @@ -585,8 +593,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) return -EINVAL; if (kstrtouint(minorp+1, 0, &minor) < 0) return -EINVAL; - } else - minor = 0; + } + cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { case 2: @@ -594,8 +602,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) nfsd_vers(num, cmd); break; case 4: - if (nfsd_minorversion(minor, cmd) >= 0) - break; + if (*minorp == '.') { + if (nfsd_minorversion(minor, cmd) < 0) + return -EINVAL; + } else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) { + /* + * Either we have +4 and no minors are enabled, + * or we have -4 and at least one minor is enabled. + * In either case, propagate 'cmd' to all minors. + */ + minor = 0; + while (nfsd_minorversion(minor, cmd) >= 0) + minor++; + } + break; default: return -EINVAL; } @@ -612,9 +632,11 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { + int minor; if (!nfsd_vers(num, NFSD_AVAIL)) continue; - minor = 0; + + minor = -1; do { len = nfsd_print_version_support(buf, remaining, sep, num, minor); @@ -624,7 +646,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) buf += len; tlen += len; minor++; - sep = " "; + if (len) + sep = " "; } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION); } out: diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index fa82b7707e85..03a7e9da4da0 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -786,6 +786,7 @@ nfserrno (int errno) { nfserr_serverfault, -ESERVERFAULT }, { nfserr_serverfault, -ENFILE }, { nfserr_io, -EUCLEAN }, + { nfserr_perm, -ENOKEY }, }; int i; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 786a4a2cb2d7..59979f0bbd4b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -167,7 +167,8 @@ nfsd_adjust_nfsd_versions4(void) int nfsd_minorversion(u32 minorversion, enum vers_op change) { - if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) + if (minorversion > NFSD_SUPPORTED_MINOR_VERSION && + change != NFSD_AVAIL) return -1; switch(change) { case NFSD_SET: @@ -415,23 +416,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) void nfsd_reset_versions(void) { - int found_one = 0; int i; - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { - if (nfsd_program.pg_vers[i]) - found_one = 1; - } + for (i = 0; i < NFSD_NRVERS; i++) + if (nfsd_vers(i, NFSD_TEST)) + return; - if (!found_one) { - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) - nfsd_program.pg_vers[i] = nfsd_version[i]; -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) - nfsd_acl_program.pg_vers[i] = - nfsd_acl_version[i]; -#endif - } + for (i = 0; i < NFSD_NRVERS; i++) + if (i != 4) + nfsd_vers(i, NFSD_SET); + else { + int minor = 0; + while (nfsd_minorversion(minor, NFSD_SET) >= 0) + minor++; + } } /* @@ -749,6 +747,37 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr) return nfserr; } +/* + * A write procedure can have a large argument, and a read procedure can + * have a large reply, but no NFSv2 or NFSv3 procedure has argument and + * reply that can both be larger than a page. The xdr code has taken + * advantage of this assumption to be a sloppy about bounds checking in + * some cases. Pending a rewrite of the NFSv2/v3 xdr code to fix that + * problem, we enforce these assumptions here: + */ +static bool nfs_request_too_big(struct svc_rqst *rqstp, + struct svc_procedure *proc) +{ + /* + * The ACL code has more careful bounds-checking and is not + * susceptible to this problem: + */ + if (rqstp->rq_prog != NFS_PROGRAM) + return false; + /* + * Ditto NFSv4 (which can in theory have argument and reply both + * more than a page): + */ + if (rqstp->rq_vers >= 4) + return false; + /* The reply will be small, we're OK: */ + if (proc->pc_xdrressize > 0 && + proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE)) + return false; + + return rqstp->rq_arg.len > PAGE_SIZE; +} + int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) { @@ -761,6 +790,11 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) rqstp->rq_vers, rqstp->rq_proc); proc = rqstp->rq_procinfo; + if (nfs_request_too_big(rqstp, proc)) { + dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers); + *statp = rpc_garbage_args; + return 1; + } /* * Give the xdr decoder a chance to change this if it wants * (necessary in the NFSv4.0 compound case) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 41b468a6a90f..de07ff625777 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -280,6 +280,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_writeargs *args) { unsigned int len, hdr, dlen; + struct kvec *head = rqstp->rq_arg.head; int v; p = decode_fh(p, &args->fh); @@ -300,9 +301,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, * Check to make sure that we got the right number of * bytes. */ - hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len - - hdr; + hdr = (void*)p - head->iov_base; + if (hdr > head->iov_len) + return 0; + dlen = head->iov_len + rqstp->rq_arg.page_len - hdr; /* * Round the length of the data which was specified up to @@ -316,7 +318,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, return 0; rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + rqstp->rq_vec[0].iov_len = head->iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; diff --git a/fs/nsfs.c b/fs/nsfs.c index 1656843e87d2..323f492e0822 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -91,6 +91,7 @@ slow: return ERR_PTR(-ENOMEM); } d_instantiate(dentry, inode); + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_fsdata = (void *)ns->ops; d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); if (d) { diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index c4ab6fdf17a0..e1534c9bab16 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -208,14 +208,19 @@ restart: continue; /* * Skip ops whose filesystem we don't know about unless - * it is being mounted. + * it is being mounted or unmounted. It is possible for + * a filesystem we don't know about to be unmounted if + * it fails to mount in the kernel after userspace has + * been sent the mount request. */ /* XXX: is there a better way to detect this? */ } else if (ret == -1 && !(op->upcall.type == ORANGEFS_VFS_OP_FS_MOUNT || op->upcall.type == - ORANGEFS_VFS_OP_GETATTR)) { + ORANGEFS_VFS_OP_GETATTR || + op->upcall.type == + ORANGEFS_VFS_OP_FS_UMOUNT)) { gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: skipping op tag %llu %s\n", llu(op->tag), get_opname_string(op)); diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 6333cbbdfef7..83b506020718 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -521,13 +521,11 @@ int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter, size_t n = size; if (n > PAGE_SIZE) n = PAGE_SIZE; - n = copy_page_from_iter(page, 0, n, iter); - if (!n) + if (copy_page_from_iter(page, 0, n, iter) != n) return -EFAULT; size -= n; } return 0; - } /* diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 5e48a0be9761..8afac46fcc87 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -249,6 +249,7 @@ struct orangefs_sb_info_s { char devname[ORANGEFS_MAX_SERVER_ADDR_LEN]; struct super_block *sb; int mount_pending; + int no_list; struct list_head list; }; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 67c24351a67f..629d8c917fa6 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -263,8 +263,13 @@ int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb) if (!new_op) return -ENOMEM; new_op->upcall.req.features.features = 0; - ret = service_operation(new_op, "orangefs_features", 0); - orangefs_features = new_op->downcall.resp.features.features; + ret = service_operation(new_op, "orangefs_features", + ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_MUTEX); + if (!ret) + orangefs_features = + new_op->downcall.resp.features.features; + else + orangefs_features = 0; op_release(new_op); } else { orangefs_features = 0; @@ -488,7 +493,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, if (ret) { d = ERR_PTR(ret); - goto free_op; + goto free_sb_and_op; } /* @@ -514,6 +519,9 @@ struct dentry *orangefs_mount(struct file_system_type *fst, spin_unlock(&orangefs_superblocks_lock); op_release(new_op); + /* Must be removed from the list now. */ + ORANGEFS_SB(sb)->no_list = 0; + if (orangefs_userspace_version >= 20906) { new_op = op_alloc(ORANGEFS_VFS_OP_FEATURES); if (!new_op) @@ -528,6 +536,10 @@ struct dentry *orangefs_mount(struct file_system_type *fst, return dget(sb->s_root); +free_sb_and_op: + /* Will call orangefs_kill_sb with sb not in list. */ + ORANGEFS_SB(sb)->no_list = 1; + deactivate_locked_super(sb); free_op: gossip_err("orangefs_mount: mount request failed with %d\n", ret); if (ret == -EINVAL) { @@ -553,12 +565,14 @@ void orangefs_kill_sb(struct super_block *sb) */ orangefs_unmount_sb(sb); - /* remove the sb from our list of orangefs specific sb's */ - - spin_lock(&orangefs_superblocks_lock); - __list_del_entry(&ORANGEFS_SB(sb)->list); /* not list_del_init */ - ORANGEFS_SB(sb)->list.prev = NULL; - spin_unlock(&orangefs_superblocks_lock); + if (!ORANGEFS_SB(sb)->no_list) { + /* remove the sb from our list of orangefs specific sb's */ + spin_lock(&orangefs_superblocks_lock); + /* not list_del_init */ + __list_del_entry(&ORANGEFS_SB(sb)->list); + ORANGEFS_SB(sb)->list.prev = NULL; + spin_unlock(&orangefs_superblocks_lock); + } /* * make sure that ORANGEFS_DEV_REMOUNT_ALL loop that might've seen us diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 8f91ec66baa3..d04ea4349909 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1074,6 +1074,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table) if ((table->proc_handler == proc_dostring) || (table->proc_handler == proc_dointvec) || + (table->proc_handler == proc_douintvec) || (table->proc_handler == proc_dointvec_minmax) || (table->proc_handler == proc_dointvec_jiffies) || (table->proc_handler == proc_dointvec_userhz_jiffies) || diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f08bd31c1081..312578089544 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); + pmd_t pmd = *pmdp; + + /* See comment in change_huge_pmd() */ + pmdp_invalidate(vma, addr, pmdp); + if (pmd_dirty(*pmdp)) + pmd = pmd_mkdirty(pmd); + if (pmd_young(*pmdp)) + pmd = pmd_mkyoung(pmd); pmd = pmd_wrprotect(pmd); pmd = pmd_clear_soft_dirty(pmd); diff --git a/fs/stat.c b/fs/stat.c index fa0be59340cc..a257b872a53d 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -130,9 +130,13 @@ EXPORT_SYMBOL(vfs_getattr); int vfs_statx_fd(unsigned int fd, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - struct fd f = fdget_raw(fd); + struct fd f; int error = -EBADF; + if (query_flags & ~KSTAT_QUERY_FLAGS) + return -EINVAL; + + f = fdget_raw(fd); if (f.file) { error = vfs_getattr(&f.file->f_path, stat, request_mask, query_flags); @@ -155,9 +159,6 @@ EXPORT_SYMBOL(vfs_statx_fd); * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink * at the given name from being referenced. * - * The caller must have preset stat->request_mask as for vfs_getattr(). The - * flags are also used to load up stat->query_flags. - * * 0 will be returned on success, and a -ve error code if unsuccessful. */ int vfs_statx(int dfd, const char __user *filename, int flags, @@ -509,58 +510,50 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, } #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ -static inline int __put_timestamp(struct timespec *kts, - struct statx_timestamp __user *uts) -{ - return (__put_user(kts->tv_sec, &uts->tv_sec ) || - __put_user(kts->tv_nsec, &uts->tv_nsec ) || - __put_user(0, &uts->__reserved )); -} - -/* - * Set the statx results. - */ -static long statx_set_result(struct kstat *stat, struct statx __user *buffer) +static noinline_for_stack int +cp_statx(const struct kstat *stat, struct statx __user *buffer) { - uid_t uid = from_kuid_munged(current_user_ns(), stat->uid); - gid_t gid = from_kgid_munged(current_user_ns(), stat->gid); - - if (__put_user(stat->result_mask, &buffer->stx_mask ) || - __put_user(stat->mode, &buffer->stx_mode ) || - __clear_user(&buffer->__spare0, sizeof(buffer->__spare0)) || - __put_user(stat->nlink, &buffer->stx_nlink ) || - __put_user(uid, &buffer->stx_uid ) || - __put_user(gid, &buffer->stx_gid ) || - __put_user(stat->attributes, &buffer->stx_attributes ) || - __put_user(stat->blksize, &buffer->stx_blksize ) || - __put_user(MAJOR(stat->rdev), &buffer->stx_rdev_major ) || - __put_user(MINOR(stat->rdev), &buffer->stx_rdev_minor ) || - __put_user(MAJOR(stat->dev), &buffer->stx_dev_major ) || - __put_user(MINOR(stat->dev), &buffer->stx_dev_minor ) || - __put_timestamp(&stat->atime, &buffer->stx_atime ) || - __put_timestamp(&stat->btime, &buffer->stx_btime ) || - __put_timestamp(&stat->ctime, &buffer->stx_ctime ) || - __put_timestamp(&stat->mtime, &buffer->stx_mtime ) || - __put_user(stat->ino, &buffer->stx_ino ) || - __put_user(stat->size, &buffer->stx_size ) || - __put_user(stat->blocks, &buffer->stx_blocks ) || - __clear_user(&buffer->__spare1, sizeof(buffer->__spare1)) || - __clear_user(&buffer->__spare2, sizeof(buffer->__spare2))) - return -EFAULT; - - return 0; + struct statx tmp; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.stx_mask = stat->result_mask; + tmp.stx_blksize = stat->blksize; + tmp.stx_attributes = stat->attributes; + tmp.stx_nlink = stat->nlink; + tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid); + tmp.stx_mode = stat->mode; + tmp.stx_ino = stat->ino; + tmp.stx_size = stat->size; + tmp.stx_blocks = stat->blocks; + tmp.stx_attributes_mask = stat->attributes_mask; + tmp.stx_atime.tv_sec = stat->atime.tv_sec; + tmp.stx_atime.tv_nsec = stat->atime.tv_nsec; + tmp.stx_btime.tv_sec = stat->btime.tv_sec; + tmp.stx_btime.tv_nsec = stat->btime.tv_nsec; + tmp.stx_ctime.tv_sec = stat->ctime.tv_sec; + tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec; + tmp.stx_mtime.tv_sec = stat->mtime.tv_sec; + tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec; + tmp.stx_rdev_major = MAJOR(stat->rdev); + tmp.stx_rdev_minor = MINOR(stat->rdev); + tmp.stx_dev_major = MAJOR(stat->dev); + tmp.stx_dev_minor = MINOR(stat->dev); + + return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } /** * sys_statx - System call to get enhanced stats * @dfd: Base directory to pathwalk from *or* fd to stat. - * @filename: File to stat *or* NULL. + * @filename: File to stat or "" with AT_EMPTY_PATH * @flags: AT_* flags to control pathwalk. * @mask: Parts of statx struct actually required. * @buffer: Result buffer. * - * Note that if filename is NULL, then it does the equivalent of fstat() using - * dfd to indicate the file of interest. + * Note that fstat() can be emulated by setting dfd to the fd of interest, + * supplying "" as the filename and setting AT_EMPTY_PATH in the flags. */ SYSCALL_DEFINE5(statx, int, dfd, const char __user *, filename, unsigned, flags, @@ -570,18 +563,16 @@ SYSCALL_DEFINE5(statx, struct kstat stat; int error; + if (mask & STATX__RESERVED) + return -EINVAL; if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; - if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer))) - return -EFAULT; - if (filename) - error = vfs_statx(dfd, filename, flags, &stat, mask); - else - error = vfs_statx_fd(dfd, &stat, mask, flags); + error = vfs_statx(dfd, filename, flags, &stat, mask); if (error) return error; - return statx_set_result(&stat, buffer); + + return cp_statx(&stat, buffer); } /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b803213d1307..39c75a86c67f 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -108,7 +108,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, { const struct sysfs_ops *ops = sysfs_file_ops(of->kn); struct kobject *kobj = of->kn->parent->priv; - size_t len; + ssize_t len; /* * If buf != of->prealloc_buf, we don't know how @@ -117,13 +117,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, if (WARN_ON_ONCE(buf != of->prealloc_buf)) return 0; len = ops->show(kobj, of->kn->priv, buf); + if (len < 0) + return len; if (pos) { if (len <= pos) return 0; len -= pos; memmove(buf, buf + pos, len); } - return min(count, len); + return min_t(ssize_t, count, len); } /* kernfs write callback for regular sysfs files */ diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 1e712a364680..718b749fa11a 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -32,6 +32,7 @@ #include <linux/math64.h> #include <linux/uaccess.h> #include <linux/random.h> +#include <linux/ctype.h> #include "ubifs.h" static DEFINE_SPINLOCK(dbg_lock); @@ -286,8 +287,10 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) break; } - pr_err("\t%d: %s (%s)\n", - count++, dent->name, get_dent_type(dent->type)); + pr_err("\t%d: inode %llu, type %s, len %d\n", + count++, (unsigned long long) le64_to_cpu(dent->inum), + get_dent_type(dent->type), + le16_to_cpu(dent->nlen)); fname_name(&nm) = dent->name; fname_len(&nm) = le16_to_cpu(dent->nlen); @@ -464,7 +467,8 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) pr_err("(bad name length, not printing, bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) - pr_cont("%c", dent->name[i]); + pr_cont("%c", isprint(dent->name[i]) ? + dent->name[i] : '?'); } pr_cont("\n"); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 30825d882aa9..b777bddaa1dd 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -606,8 +606,8 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } while (1) { - dbg_gen("feed '%s', ino %llu, new f_pos %#x", - dent->name, (unsigned long long)le64_to_cpu(dent->inum), + dbg_gen("ino %llu, new f_pos %#x", + (unsigned long long)le64_to_cpu(dent->inum), key_hash_flash(c, &dent->key)); ubifs_assert(le64_to_cpu(dent->ch.sqnum) > ubifs_inode(dir)->creat_sqnum); @@ -748,6 +748,11 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, goto out_fname; lock_2_inodes(dir, inode); + + /* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */ + if (inode->i_nlink == 0) + ubifs_delete_orphan(c, inode->i_ino); + inc_nlink(inode); ihold(inode); inode->i_ctime = ubifs_current_time(inode); @@ -768,6 +773,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; drop_nlink(inode); + if (inode->i_nlink == 0) + ubifs_add_orphan(c, inode->i_ino); unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); iput(inode); @@ -1068,8 +1075,10 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, } err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); - if (err) + if (err) { + kfree(dev); goto out_budg; + } sz_change = CALC_DENT_SIZE(fname_len(&nm)); @@ -1316,9 +1325,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned int uninitialized_var(saved_nlink); struct fscrypt_name old_nm, new_nm; - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - /* * Budget request settings: deletion direntry, new direntry, removing * the old inode, and changing old and new parent directory inodes. diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 1d227b0fcf49..f7555fc25877 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1756,7 +1756,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) * protocols: aa:... bb:... */ seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n", - pending, total, UFFD_API, UFFD_API_FEATURES, + pending, total, UFFD_API, ctx->features, UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS); } #endif diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index eb00bc133bca..39f8604f764e 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -125,8 +125,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); -extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp, - int size); +extern int xfs_dir2_sf_verify(struct xfs_inode *ip); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 96b45cd6c63f..e84af093b2ab 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -632,36 +632,49 @@ xfs_dir2_sf_check( /* Verify the consistency of an inline directory. */ int xfs_dir2_sf_verify( - struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *sfp, - int size) + struct xfs_inode *ip) { + struct xfs_mount *mp = ip->i_mount; + struct xfs_dir2_sf_hdr *sfp; struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_entry *next_sfep; char *endp; const struct xfs_dir_ops *dops; + struct xfs_ifork *ifp; xfs_ino_t ino; int i; int i8count; int offset; + int size; + int error; __uint8_t filetype; + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); + /* + * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops, + * so we can only trust the mountpoint to have the right pointer. + */ dops = xfs_dir_get_ops(mp, NULL); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; + size = ifp->if_bytes; + /* * Give up if the directory is way too short. */ - XFS_WANT_CORRUPTED_RETURN(mp, size > - offsetof(struct xfs_dir2_sf_hdr, parent)); - XFS_WANT_CORRUPTED_RETURN(mp, size >= - xfs_dir2_sf_hdr_size(sfp->i8count)); + if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || + size < xfs_dir2_sf_hdr_size(sfp->i8count)) + return -EFSCORRUPTED; endp = (char *)sfp + size; /* Check .. entry */ ino = dops->sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; - XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + error = xfs_dir_ino_validate(mp, ino); + if (error) + return error; offset = dops->data_first_offset; /* Check all reported entries */ @@ -672,12 +685,12 @@ xfs_dir2_sf_verify( * Check the fixed-offset parts of the structure are * within the data buffer. */ - XFS_WANT_CORRUPTED_RETURN(mp, - ((char *)sfep + sizeof(*sfep)) < endp); + if (((char *)sfep + sizeof(*sfep)) >= endp) + return -EFSCORRUPTED; /* Don't allow names with known bad length. */ - XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0); - XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN); + if (sfep->namelen == 0) + return -EFSCORRUPTED; /* * Check that the variable-length part of the structure is @@ -685,33 +698,39 @@ xfs_dir2_sf_verify( * name component, so nextentry is an acceptable test. */ next_sfep = dops->sf_nextentry(sfp, sfep); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep); + if (endp < (char *)next_sfep) + return -EFSCORRUPTED; /* Check that the offsets always increase. */ - XFS_WANT_CORRUPTED_RETURN(mp, - xfs_dir2_sf_get_offset(sfep) >= offset); + if (xfs_dir2_sf_get_offset(sfep) < offset) + return -EFSCORRUPTED; /* Check the inode number. */ ino = dops->sf_get_ino(sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; - XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + error = xfs_dir_ino_validate(mp, ino); + if (error) + return error; /* Check the file type. */ filetype = dops->sf_get_ftype(sfep); - XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX); + if (filetype >= XFS_DIR3_FT_MAX) + return -EFSCORRUPTED; offset = xfs_dir2_sf_get_offset(sfep) + dops->data_entsize(sfep->namelen); sfep = next_sfep; } - XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count); - XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp); + if (i8count != sfp->i8count) + return -EFSCORRUPTED; + if ((void *)sfep != (void *)endp) + return -EFSCORRUPTED; /* Make sure this whole thing ought to be in local format. */ - XFS_WANT_CORRUPTED_RETURN(mp, offset + - (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + - (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize); + if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) + return -EFSCORRUPTED; return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 9653e964eda4..8a37efe04de3 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -212,6 +212,16 @@ xfs_iformat_fork( if (error) return error; + /* Check inline dir contents. */ + if (S_ISDIR(VFS_I(ip)->i_mode) && + dip->di_format == XFS_DINODE_FMT_LOCAL) { + error = xfs_dir2_sf_verify(ip); + if (error) { + xfs_idestroy_fork(ip, XFS_DATA_FORK); + return error; + } + } + if (xfs_is_reflink_inode(ip)) { ASSERT(ip->i_cowfp == NULL); xfs_ifork_init_cow(ip); @@ -322,8 +332,6 @@ xfs_iformat_local( int whichfork, int size) { - int error; - /* * If the size is unreasonable, then something * is wrong and we just bail out rather than crash in @@ -339,14 +347,6 @@ xfs_iformat_local( return -EFSCORRUPTED; } - if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { - error = xfs_dir2_sf_verify(ip->i_mount, - (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip), - size); - if (error) - return error; - } - xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size); return 0; } @@ -867,7 +867,7 @@ xfs_iextents_copy( * In these cases, the format always takes precedence, because the * format indicates the current state of the fork. */ -int +void xfs_iflush_fork( xfs_inode_t *ip, xfs_dinode_t *dip, @@ -877,7 +877,6 @@ xfs_iflush_fork( char *cp; xfs_ifork_t *ifp; xfs_mount_t *mp; - int error; static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; static const short dataflag[2] = @@ -886,7 +885,7 @@ xfs_iflush_fork( { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; if (!iip) - return 0; + return; ifp = XFS_IFORK_PTR(ip, whichfork); /* * This can happen if we gave up in iformat in an error path, @@ -894,19 +893,12 @@ xfs_iflush_fork( */ if (!ifp) { ASSERT(whichfork == XFS_ATTR_FORK); - return 0; + return; } cp = XFS_DFORK_PTR(dip, whichfork); mp = ip->i_mount; switch (XFS_IFORK_FORMAT(ip, whichfork)) { case XFS_DINODE_FMT_LOCAL: - if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { - error = xfs_dir2_sf_verify(mp, - (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data, - ifp->if_bytes); - if (error) - return error; - } if ((iip->ili_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(ifp->if_u1.if_data != NULL); @@ -959,7 +951,6 @@ xfs_iflush_fork( ASSERT(0); break; } - return 0; } /* diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 132dc59fdde6..7fb8365326d1 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -140,7 +140,7 @@ typedef struct xfs_ifork { struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state); int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); -int xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, +void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, struct xfs_inode_log_item *, int); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 142bbbe06114..8795e9cd867c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1311,8 +1311,16 @@ xfs_free_file_space( /* * Now that we've unmap all full blocks we'll have to zero out any * partial block at the beginning and/or end. xfs_zero_range is - * smart enough to skip any holes, including those we just created. + * smart enough to skip any holes, including those we just created, + * but we must take care not to zero beyond EOF and enlarge i_size. */ + + if (offset >= XFS_ISIZE(ip)) + return 0; + + if (offset + len > XFS_ISIZE(ip)) + len = XFS_ISIZE(ip) - offset; + return xfs_zero_range(ip, offset, len, NULL); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c7fe2c2123ab..7605d8396596 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -50,6 +50,7 @@ #include "xfs_log.h" #include "xfs_bmap_btree.h" #include "xfs_reflink.h" +#include "xfs_dir2_priv.h" kmem_zone_t *xfs_inode_zone; @@ -3475,7 +3476,6 @@ xfs_iflush_int( struct xfs_inode_log_item *iip = ip->i_itemp; struct xfs_dinode *dip; struct xfs_mount *mp = ip->i_mount; - int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); @@ -3547,6 +3547,12 @@ xfs_iflush_int( if (ip->i_d.di_version < 3) ip->i_d.di_flushiter++; + /* Check the inline directory data. */ + if (S_ISDIR(VFS_I(ip)->i_mode) && + ip->i_d.di_format == XFS_DINODE_FMT_LOCAL && + xfs_dir2_sf_verify(ip)) + goto corrupt_out; + /* * Copy the dirty parts of the inode into the on-disk inode. We always * copy out the core of the inode, because if the inode is dirty at all @@ -3558,14 +3564,9 @@ xfs_iflush_int( if (ip->i_d.di_flushiter == DI_MAX_FLUSH) ip->i_d.di_flushiter = 0; - error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); - if (error) - return error; - if (XFS_IFORK_Q(ip)) { - error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); - if (error) - return error; - } + xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); + if (XFS_IFORK_Q(ip)) + xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); xfs_inobp_check(mp, bp); /* diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 229cc6a6d8ef..ebfc13350f9a 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -516,6 +516,20 @@ xfs_vn_getattr( stat->blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + if (ip->i_d.di_version == 3) { + if (request_mask & STATX_BTIME) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = ip->i_d.di_crtime.t_sec; + stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec; + } + } + + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP) + stat->attributes |= STATX_ATTR_NODUMP; switch (inode->i_mode & S_IFMT) { case S_IFBLK: diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2a6d9b1558e0..26d67ce3c18d 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -583,7 +583,7 @@ xfs_inumbers( return error; bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); - buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); + buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP); do { struct xfs_inobt_rec_incore r; int stat; |