diff options
Diffstat (limited to 'fs')
87 files changed, 925 insertions, 546 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 879ff9c7ffd0..6466153f2bf0 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -664,8 +664,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top) { unsigned long random_variable = 0; - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) { + if (current->flags & PF_RANDOMIZE) { random_variable = get_random_long(); random_variable &= STACK_RND_MASK; random_variable <<= PAGE_SHIFT; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 69ec23daa25e..a1e6860b6f46 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -574,7 +574,7 @@ static int load_flat_file(struct linux_binprm *bprm, MAX_SHARED_LIBS * sizeof(unsigned long), FLAT_DATA_ALIGN); - pr_debug("Allocated data+bss+stack (%ld bytes): %lx\n", + pr_debug("Allocated data+bss+stack (%u bytes): %lx\n", data_len + bss_len + stack_len, datapos); fpos = ntohl(hdr->data_start); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 080e2ebb8aa0..f45b61fe9a9a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3516,7 +3516,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device) struct bio *bio = device->flush_bio; if (!device->flush_bio_sent) - return 0; + return BLK_STS_OK; device->flush_bio_sent = 0; wait_for_completion_io(&device->flush_wait); @@ -3563,7 +3563,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) continue; write_dev_flush(dev); - dev->last_flush_error = 0; + dev->last_flush_error = BLK_STS_OK; } /* wait for all the barriers */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 375f8c728d91..e3b0b4196d3d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4825,10 +4825,6 @@ skip_async: else flush = BTRFS_RESERVE_NO_FLUSH; spin_lock(&space_info->lock); - if (can_overcommit(fs_info, space_info, orig, flush, false)) { - spin_unlock(&space_info->lock); - break; - } if (list_empty(&space_info->tickets) && list_empty(&space_info->priority_tickets)) { spin_unlock(&space_info->lock); @@ -7589,6 +7585,10 @@ search: u64 offset; int cached; + /* If the block group is read-only, we can skip it entirely. */ + if (unlikely(block_group->ro)) + continue; + btrfs_grab_block_group(block_group, delalloc); search_start = block_group->key.objectid; @@ -7624,8 +7624,6 @@ have_block_group: if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) goto loop; - if (unlikely(block_group->ro)) - goto loop; /* * Ok we want to try and use the cluster allocator, so @@ -7839,6 +7837,7 @@ loop: failed_alloc = false; BUG_ON(index != get_block_group_index(block_group)); btrfs_release_block_group(block_group, delalloc); + cond_resched(); } up_read(&space_info->groups_sem); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 95c212037095..24bcd5cd9cf2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7924,11 +7924,12 @@ err: return ret; } -static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, - int mirror_num) +static inline blk_status_t submit_dio_repair_bio(struct inode *inode, + struct bio *bio, + int mirror_num) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int ret; + blk_status_t ret; BUG_ON(bio_op(bio) == REQ_OP_WRITE); @@ -7980,10 +7981,10 @@ static int btrfs_check_dio_repairable(struct inode *inode, return 1; } -static int dio_read_error(struct inode *inode, struct bio *failed_bio, - struct page *page, unsigned int pgoff, - u64 start, u64 end, int failed_mirror, - bio_end_io_t *repair_endio, void *repair_arg) +static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, + struct page *page, unsigned int pgoff, + u64 start, u64 end, int failed_mirror, + bio_end_io_t *repair_endio, void *repair_arg) { struct io_failure_record *failrec; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -7993,18 +7994,19 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, int read_mode = 0; int segs; int ret; + blk_status_t status; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); ret = btrfs_get_io_failure_record(inode, start, end, &failrec); if (ret) - return ret; + return errno_to_blk_status(ret); ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, failed_mirror); if (!ret) { free_io_failure(failure_tree, io_tree, failrec); - return -EIO; + return BLK_STS_IOERR; } segs = bio_segments(failed_bio); @@ -8022,13 +8024,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n", read_mode, failrec->this_mirror, failrec->in_validation); - ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror); - if (ret) { + status = submit_dio_repair_bio(inode, bio, failrec->this_mirror); + if (status) { free_io_failure(failure_tree, io_tree, failrec); bio_put(bio); } - return ret; + return status; } struct btrfs_retry_complete { @@ -8065,8 +8067,8 @@ end: bio_put(bio); } -static int __btrfs_correct_data_nocsum(struct inode *inode, - struct btrfs_io_bio *io_bio) +static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode, + struct btrfs_io_bio *io_bio) { struct btrfs_fs_info *fs_info; struct bio_vec bvec; @@ -8076,8 +8078,8 @@ static int __btrfs_correct_data_nocsum(struct inode *inode, unsigned int pgoff; u32 sectorsize; int nr_sectors; - int ret; - int err = 0; + blk_status_t ret; + blk_status_t err = BLK_STS_OK; fs_info = BTRFS_I(inode)->root->fs_info; sectorsize = fs_info->sectorsize; @@ -8183,11 +8185,12 @@ static blk_status_t __btrfs_subio_endio_read(struct inode *inode, int csum_pos; bool uptodate = (err == 0); int ret; + blk_status_t status; fs_info = BTRFS_I(inode)->root->fs_info; sectorsize = fs_info->sectorsize; - err = 0; + err = BLK_STS_OK; start = io_bio->logical; done.inode = inode; io_bio->bio.bi_iter = io_bio->iter; @@ -8209,12 +8212,12 @@ try_again: done.start = start; init_completion(&done.done); - ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page, - pgoff, start, start + sectorsize - 1, - io_bio->mirror_num, - btrfs_retry_endio, &done); - if (ret) { - err = errno_to_blk_status(ret); + status = dio_read_error(inode, &io_bio->bio, bvec.bv_page, + pgoff, start, start + sectorsize - 1, + io_bio->mirror_num, btrfs_retry_endio, + &done); + if (status) { + err = status; goto next; } @@ -8250,7 +8253,7 @@ static blk_status_t btrfs_subio_endio_read(struct inode *inode, if (unlikely(err)) return __btrfs_correct_data_nocsum(inode, io_bio); else - return 0; + return BLK_STS_OK; } else { return __btrfs_subio_endio_read(inode, io_bio, err); } @@ -8423,9 +8426,9 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, return 0; } -static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, - u64 file_offset, int skip_sum, - int async_submit) +static inline blk_status_t +__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, + int skip_sum, int async_submit) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_dio_private *dip = bio->bi_private; @@ -8488,6 +8491,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, int clone_offset = 0; int clone_len; int ret; + blk_status_t status; map_length = orig_bio->bi_iter.bi_size; submit_len = map_length; @@ -8537,9 +8541,9 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, */ atomic_inc(&dip->pending_bios); - ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, - async_submit); - if (ret) { + status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, + async_submit); + if (status) { bio_put(bio); atomic_dec(&dip->pending_bios); goto out_err; @@ -8557,9 +8561,9 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, } while (submit_len > 0); submit: - ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, - async_submit); - if (!ret) + status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, + async_submit); + if (!status) return 0; bio_put(bio); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 208638384cd2..2cf6ba40f7c4 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -905,7 +905,7 @@ static void raid_write_end_io(struct bio *bio) if (!atomic_dec_and_test(&rbio->stripes_pending)) return; - err = 0; + err = BLK_STS_OK; /* OK, we have read all the stripes we need to. */ max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? @@ -1324,7 +1324,7 @@ write_data: return; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); } /* @@ -1475,7 +1475,7 @@ static void raid_rmw_end_io(struct bio *bio) cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); } static void async_rmw_stripe(struct btrfs_raid_bio *rbio) @@ -1579,7 +1579,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) return 0; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); return -EIO; finish: @@ -1795,12 +1795,12 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) void **pointers; int faila = -1, failb = -1; struct page *page; - int err; + blk_status_t err; int i; pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); if (!pointers) { - err = -ENOMEM; + err = BLK_STS_RESOURCE; goto cleanup_io; } @@ -1856,7 +1856,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) * a bad data or Q stripe. * TODO, we should redo the xor here. */ - err = -EIO; + err = BLK_STS_IOERR; goto cleanup; } /* @@ -1882,7 +1882,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) { if (rbio->bbio->raid_map[faila] == RAID5_P_STRIPE) { - err = -EIO; + err = BLK_STS_IOERR; goto cleanup; } /* @@ -1954,13 +1954,13 @@ pstripe: } } - err = 0; + err = BLK_STS_OK; cleanup: kfree(pointers); cleanup_io: if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { - if (err == 0) + if (err == BLK_STS_OK) cache_rbio_pages(rbio); else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); @@ -1968,7 +1968,7 @@ cleanup_io: rbio_orig_end_io(rbio, err); } else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { rbio_orig_end_io(rbio, err); - } else if (err == 0) { + } else if (err == BLK_STS_OK) { rbio->faila = -1; rbio->failb = -1; @@ -2005,7 +2005,7 @@ static void raid_recover_end_io(struct bio *bio) return; if (atomic_read(&rbio->error) > rbio->bbio->max_errors) - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); else __raid_recover_end_io(rbio); } @@ -2104,7 +2104,7 @@ out: cleanup: if (rbio->operation == BTRFS_RBIO_READ_REBUILD || rbio->operation == BTRFS_RBIO_REBUILD_MISSING) - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); return -EIO; } @@ -2431,7 +2431,7 @@ submit_write: nr_data = bio_list_size(&bio_list); if (!nr_data) { /* Every parity is right */ - rbio_orig_end_io(rbio, 0); + rbio_orig_end_io(rbio, BLK_STS_OK); return; } @@ -2451,7 +2451,7 @@ submit_write: return; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); } static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) @@ -2519,7 +2519,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) return; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); } /* @@ -2633,7 +2633,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) return; cleanup: - rbio_orig_end_io(rbio, -EIO); + rbio_orig_end_io(rbio, BLK_STS_IOERR); return; finish: diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f20ef211a73d..3a11ae63676e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2153,8 +2153,7 @@ process_leaf: u32 this_len = sizeof(*di) + name_len + data_len; char *name; - ret = verify_dir_item(fs_info, path->nodes[0], - path->slots[0], di); + ret = verify_dir_item(fs_info, path->nodes[0], i, di); if (ret) { ret = -EIO; goto out; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5eb7217738ed..bd679bc7a1a9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2702,7 +2702,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->chunk_mutex); old_total = btrfs_super_total_bytes(super_copy); - diff = new_size - device->total_bytes; + diff = round_down(new_size - device->total_bytes, fs_info->sectorsize); if (new_size <= device->total_bytes || device->is_tgtdev_for_dev_replace) { @@ -4406,7 +4406,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 diff; new_size = round_down(new_size, fs_info->sectorsize); - diff = old_size - new_size; + diff = round_down(old_size - new_size, fs_info->sectorsize); if (device->is_tgtdev_for_dev_replace) return -EINVAL; @@ -6212,8 +6212,8 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) } } -int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, - int mirror_num, int async_submit) +blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, + int mirror_num, int async_submit) { struct btrfs_device *dev; struct bio *first_bio = bio; @@ -6233,7 +6233,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, &map_length, &bbio, mirror_num, 1); if (ret) { btrfs_bio_counter_dec(fs_info); - return ret; + return errno_to_blk_status(ret); } total_devs = bbio->num_stripes; @@ -6256,7 +6256,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, } btrfs_bio_counter_dec(fs_info); - return ret; + return errno_to_blk_status(ret); } if (map_length < length) { @@ -6283,7 +6283,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, dev_nr, async_submit); } btrfs_bio_counter_dec(fs_info); - return 0; + return BLK_STS_OK; } struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6f45fd60d15a..93277fc60930 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -74,7 +74,7 @@ struct btrfs_device { int missing; int can_discard; int is_tgtdev_for_dev_replace; - int last_flush_error; + blk_status_t last_flush_error; int flush_bio_sent; #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED @@ -416,8 +416,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 type); void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); -int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, - int mirror_num, int async_submit); +blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, + int mirror_num, int async_submit); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder); int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 50836280a6f8..1bc709fe330a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -189,7 +189,7 @@ static int ceph_releasepage(struct page *page, gfp_t g) /* * read a single page, without unlocking it. */ -static int readpage_nounlock(struct file *filp, struct page *page) +static int ceph_do_readpage(struct file *filp, struct page *page) { struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); @@ -219,7 +219,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) err = ceph_readpage_from_fscache(inode, page); if (err == 0) - goto out; + return -EINPROGRESS; dout("readpage inode %p file %p page %p index %lu\n", inode, filp, page, page->index); @@ -249,8 +249,11 @@ out: static int ceph_readpage(struct file *filp, struct page *page) { - int r = readpage_nounlock(filp, page); - unlock_page(page); + int r = ceph_do_readpage(filp, page); + if (r != -EINPROGRESS) + unlock_page(page); + else + r = 0; return r; } @@ -1237,7 +1240,7 @@ retry_locked: goto retry_locked; r = writepage_nounlock(page, NULL); if (r < 0) - goto fail_nosnap; + goto fail_unlock; goto retry_locked; } @@ -1265,11 +1268,14 @@ retry_locked: } /* we need to read it. */ - r = readpage_nounlock(file, page); - if (r < 0) - goto fail_nosnap; + r = ceph_do_readpage(file, page); + if (r < 0) { + if (r == -EINPROGRESS) + return -EAGAIN; + goto fail_unlock; + } goto retry_locked; -fail_nosnap: +fail_unlock: unlock_page(page); return r; } diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index fd1172823f86..337f88673ed9 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -297,13 +297,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp) } } -static void ceph_vfs_readpage_complete(struct page *page, void *data, int error) -{ - if (!error) - SetPageUptodate(page); -} - -static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int error) +static void ceph_readpage_from_fscache_complete(struct page *page, void *data, int error) { if (!error) SetPageUptodate(page); @@ -331,7 +325,7 @@ int ceph_readpage_from_fscache(struct inode *inode, struct page *page) return -ENOBUFS; ret = fscache_read_or_alloc_page(ci->fscache, page, - ceph_vfs_readpage_complete, NULL, + ceph_readpage_from_fscache_complete, NULL, GFP_KERNEL); switch (ret) { @@ -360,7 +354,7 @@ int ceph_readpages_from_fscache(struct inode *inode, return -ENOBUFS; ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages, - ceph_vfs_readpage_complete_unlock, + ceph_readpage_from_fscache_complete, NULL, mapping_gfp_mask(mapping)); switch (ret) { diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e071d23f6148..ef7240ace576 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -271,6 +271,11 @@ out: if (ret < 0) err = ret; dput(last); + /* last_name no longer match cache index */ + if (fi->readdir_cache_idx >= 0) { + fi->readdir_cache_idx = -1; + fi->dir_release_count = 0; + } } return err; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 59647eb72c5f..83a8f52cd879 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1223,6 +1223,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, char *tmp_end, *value; char delim; bool got_ip = false; + bool got_version = false; unsigned short port = 0; struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr; @@ -1874,24 +1875,35 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, pr_warn("CIFS: server netbiosname longer than 15 truncated.\n"); break; case Opt_ver: + /* version of mount userspace tools, not dialect */ string = match_strdup(args); if (string == NULL) goto out_nomem; + /* If interface changes in mount.cifs bump to new ver */ if (strncasecmp(string, "1", 1) == 0) { + if (strlen(string) > 1) { + pr_warn("Bad mount helper ver=%s. Did " + "you want SMB1 (CIFS) dialect " + "and mean to type vers=1.0 " + "instead?\n", string); + goto cifs_parse_mount_err; + } /* This is the default */ break; } /* For all other value, error */ - pr_warn("CIFS: Invalid version specified\n"); + pr_warn("CIFS: Invalid mount helper version specified\n"); goto cifs_parse_mount_err; case Opt_vers: + /* protocol version (dialect) */ string = match_strdup(args); if (string == NULL) goto out_nomem; if (cifs_parse_smb_version(string, vol) != 0) goto cifs_parse_mount_err; + got_version = true; break; case Opt_sec: string = match_strdup(args); @@ -1973,6 +1985,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, else if (override_gid == 1) pr_notice("CIFS: ignoring forcegid mount option specified with no gid= option.\n"); + if (got_version == false) + pr_warn("No dialect specified on mount. Default has changed to " + "a more secure dialect, SMB3 (vers=3.0), from CIFS " + "(SMB1). To use the less secure SMB1 dialect to access " + "old servers which do not support SMB3 specify vers=1.0" + " on mount. For somewhat newer servers such as Windows " + "7 try vers=2.1.\n"); + kfree(mountdata_copy); return 0; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 56366e984076..e702d48bd023 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -194,15 +194,20 @@ cifs_bp_rename_retry: } /* + * Don't allow path components longer than the server max. * Don't allow the separator character in a path component. * The VFS will not allow "/", but "\" is allowed by posix. */ static int -check_name(struct dentry *direntry) +check_name(struct dentry *direntry, struct cifs_tcon *tcon) { struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); int i; + if (unlikely(direntry->d_name.len > + le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength))) + return -ENAMETOOLONG; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { for (i = 0; i < direntry->d_name.len; i++) { if (direntry->d_name.name[i] == '\\') { @@ -500,10 +505,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, return finish_no_open(file, res); } - rc = check_name(direntry); - if (rc) - return rc; - xid = get_xid(); cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", @@ -516,6 +517,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, } tcon = tlink_tcon(tlink); + + rc = check_name(direntry, tcon); + if (rc) + goto out_free_xid; + server = tcon->ses->server; if (server->ops->new_lease_key) @@ -776,7 +782,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } pTcon = tlink_tcon(tlink); - rc = check_name(direntry); + rc = check_name(direntry, pTcon); if (rc) goto lookup_out; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5fb2fc2d0080..7aa67206f6da 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -514,7 +514,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) * No tcon so can't do * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]); */ - if (rc != 0) + if (rc == -EOPNOTSUPP) { + cifs_dbg(VFS, "Dialect not supported by server. Consider " + "specifying vers=1.0 or vers=2.1 on mount for accessing" + " older servers\n"); + goto neg_exit; + } else if (rc != 0) goto neg_exit; cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); @@ -3219,8 +3224,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); - kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); - kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); + kst->f_bfree = kst->f_bavail = + le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); return; } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 18700fd25a0b..2826882c81d1 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -84,8 +84,8 @@ #define NUMBER_OF_SMB2_COMMANDS 0x0013 -/* BB FIXME - analyze following length BB */ -#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */ +/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ +#define MAX_SMB2_HDR_SIZE 0x00b0 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) @@ -646,11 +646,10 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pte_t pte, *ptep = NULL; pmd_t *pmdp = NULL; spinlock_t *ptl; - bool changed; i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { - unsigned long address; + unsigned long address, start, end; cond_resched(); @@ -658,8 +657,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, continue; address = pgoff_address(index, vma); - changed = false; - if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl)) + + /* + * Note because we provide start/end to follow_pte_pmd it will + * call mmu_notifier_invalidate_range_start() on our behalf + * before taking any lock. + */ + if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl)) continue; if (pmdp) { @@ -676,7 +680,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); set_pmd_at(vma->vm_mm, address, pmdp, pmd); - changed = true; + mmu_notifier_invalidate_range(vma->vm_mm, start, end); unlock_pmd: spin_unlock(ptl); #endif @@ -691,13 +695,12 @@ unlock_pmd: pte = pte_wrprotect(pte); pte = pte_mkclean(pte); set_pte_at(vma->vm_mm, address, ptep, pte); - changed = true; + mmu_notifier_invalidate_range(vma->vm_mm, start, end); unlock_pte: pte_unmap_unlock(ptep, ptl); } - if (changed) - mmu_notifier_invalidate_page(vma->vm_mm, address); + mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); } i_mmap_unlock_read(mapping); } @@ -1383,6 +1386,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); + /* + * Make sure that the faulting address's PMD offset (color) matches + * the PMD offset from the start of the file. This is necessary so + * that a PMD range in the page table overlaps exactly with a PMD + * range in the radix tree. + */ + if ((vmf->pgoff & PG_PMD_COLOUR) != + ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) + goto fallback; + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 108df2e3602c..7eae33ffa3fc 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -133,6 +133,50 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } +static int devpts_ptmx_path(struct path *path) +{ + struct super_block *sb; + int err; + + /* Has the devpts filesystem already been found? */ + if (path->mnt->mnt_sb->s_magic == DEVPTS_SUPER_MAGIC) + return 0; + + /* Is a devpts filesystem at "pts" in the same directory? */ + err = path_pts(path); + if (err) + return err; + + /* Is the path the root of a devpts filesystem? */ + sb = path->mnt->mnt_sb; + if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || + (path->mnt->mnt_root != sb->s_root)) + return -ENODEV; + + return 0; +} + +struct vfsmount *devpts_mntget(struct file *filp, struct pts_fs_info *fsi) +{ + struct path path; + int err; + + path = filp->f_path; + path_get(&path); + + err = devpts_ptmx_path(&path); + dput(path.dentry); + if (err) { + mntput(path.mnt); + path.mnt = ERR_PTR(err); + } + if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) { + mntput(path.mnt); + path.mnt = ERR_PTR(-ENODEV); + } + return path.mnt; +} + struct pts_fs_info *devpts_acquire(struct file *filp) { struct pts_fs_info *result; @@ -143,27 +187,16 @@ struct pts_fs_info *devpts_acquire(struct file *filp) path = filp->f_path; path_get(&path); - /* Has the devpts filesystem already been found? */ - sb = path.mnt->mnt_sb; - if (sb->s_magic != DEVPTS_SUPER_MAGIC) { - /* Is a devpts filesystem at "pts" in the same directory? */ - err = path_pts(&path); - if (err) { - result = ERR_PTR(err); - goto out; - } - - /* Is the path the root of a devpts filesystem? */ - result = ERR_PTR(-ENODEV); - sb = path.mnt->mnt_sb; - if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || - (path.mnt->mnt_root != sb->s_root)) - goto out; + err = devpts_ptmx_path(&path); + if (err) { + result = ERR_PTR(err); + goto out; } /* * pty code needs to hold extra references in case of last /dev/tty close */ + sb = path.mnt->mnt_sb; atomic_inc(&sb->s_active); result = DEVPTS_SB(sb); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index e767e4389cb1..adbe328b957c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -600,8 +600,13 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq) wait_queue_head_t *whead; rcu_read_lock(); - /* If it is cleared by POLLFREE, it should be rcu-safe */ - whead = rcu_dereference(pwq->whead); + /* + * If it is cleared by POLLFREE, it should be rcu-safe. + * If we read NULL we need a barrier paired with + * smp_store_release() in ep_poll_callback(), otherwise + * we rely on whead->lock. + */ + whead = smp_load_acquire(&pwq->whead); if (whead) remove_wait_queue(whead, &pwq->wait); rcu_read_unlock(); @@ -1134,17 +1139,6 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v struct eventpoll *ep = epi->ep; int ewake = 0; - if ((unsigned long)key & POLLFREE) { - ep_pwq_from_wait(wait)->whead = NULL; - /* - * whead = NULL above can race with ep_remove_wait_queue() - * which can do another remove_wait_queue() after us, so we - * can't use __remove_wait_queue(). whead->lock is held by - * the caller. - */ - list_del_init(&wait->entry); - } - spin_lock_irqsave(&ep->lock, flags); ep_set_busy_poll_napi_id(epi); @@ -1228,10 +1222,26 @@ out_unlock: if (pwake) ep_poll_safewake(&ep->poll_wait); - if (epi->event.events & EPOLLEXCLUSIVE) - return ewake; + if (!(epi->event.events & EPOLLEXCLUSIVE)) + ewake = 1; + + if ((unsigned long)key & POLLFREE) { + /* + * If we race with ep_remove_wait_queue() it can miss + * ->whead = NULL and do another remove_wait_queue() after + * us, so we can't use __remove_wait_queue(). + */ + list_del_init(&wait->entry); + /* + * ->whead != NULL protects us from the race with ep_free() + * or ep_remove(), ep_remove_wait_queue() takes whead->lock + * held by the caller. Once we nullify it, nothing protects + * ep/epi or even wait. + */ + smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL); + } - return 1; + return ewake; } /* diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 79dafa71effd..51f0aea70cb4 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -175,11 +175,8 @@ ext2_get_acl(struct inode *inode, int type) return acl; } -/* - * inode->i_mutex: down - */ -int -ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +static int +__ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int name_index; void *value = NULL; @@ -189,13 +186,6 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) switch(type) { case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - inode->i_ctime = current_time(inode); - mark_inode_dirty(inode); - } break; case ACL_TYPE_DEFAULT: @@ -222,6 +212,31 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) } /* + * inode->i_mutex: down + */ +int +ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int error; + int update_mode = 0; + umode_t mode = inode->i_mode; + + if (type == ACL_TYPE_ACCESS && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + return error; + update_mode = 1; + } + error = __ext2_set_acl(inode, acl, type); + if (!error && update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + } + return error; +} + +/* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * * dir->i_mutex: down @@ -238,12 +253,12 @@ ext2_init_acl(struct inode *inode, struct inode *dir) return error; if (default_acl) { - error = ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = __ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!error) - error = ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = __ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); } return error; diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 09441ae07a5b..46ff2229ff5e 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -193,13 +193,6 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); - } break; case ACL_TYPE_DEFAULT: @@ -221,8 +214,9 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, value, size, xattr_flags); kfree(value); - if (!error) + if (!error) { set_cached_acl(inode, type, acl); + } return error; } @@ -233,6 +227,8 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) handle_t *handle; int error, credits, retries = 0; size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0; + umode_t mode = inode->i_mode; + int update_mode = 0; error = dquot_initialize(inode); if (error) @@ -247,7 +243,20 @@ retry: if (IS_ERR(handle)) return PTR_ERR(handle); + if ((type == ACL_TYPE_ACCESS) && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + goto out_stop; + update_mode = 1; + } + error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */); + if (!error && update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + ext4_mark_inode_dirty(handle, inode); + } +out_stop: ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9ebde0cd632e..a2bb7d2870e4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -961,7 +961,7 @@ struct ext4_inode_info { /* * i_block_group is the number of the block group which contains * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to + * it is used for making block allocation decisions - we try to * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode. */ @@ -1049,10 +1049,8 @@ struct ext4_inode_info { ext4_group_t i_last_alloc_group; /* allocation reservation info for delalloc */ - /* In case of bigalloc, these refer to clusters rather than blocks */ + /* In case of bigalloc, this refer to clusters rather than blocks */ unsigned int i_reserved_data_blocks; - unsigned int i_reserved_meta_blocks; - unsigned int i_allocated_meta_blocks; ext4_lblk_t i_da_metadata_calc_last_lblock; int i_da_metadata_calc_len; @@ -2022,7 +2020,8 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) #define is_dx(dir) (ext4_has_feature_dir_index((dir)->i_sb) && \ ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) -#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) +#define EXT4_DIR_LINK_MAX(dir) unlikely((dir)->i_nlink >= EXT4_LINK_MAX && \ + !(ext4_has_feature_dir_nlink((dir)->i_sb) && is_dx(dir))) #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) /* Legal values for the dx_root hash_version field: */ @@ -2462,6 +2461,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid); int ext4_inode_is_fast_symlink(struct inode *inode); struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, + bool wait, struct buffer_head **bhs); int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); int ext4_get_block(struct inode *inode, sector_t iblock, @@ -3074,7 +3075,7 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle, struct inode *inode, struct buffer_head *bh); #define S_SHIFT 12 -static const unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { +static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index dabad1bc8617..48143e32411c 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -227,6 +227,9 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); +int ext4_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc *iloc); /* * Wrapper functions with which ext4 calls into JBD. */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e0a8425ff74d..97f0fd06728d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4652,7 +4652,7 @@ retry: static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ext4_lblk_t len, loff_t new_size, - int flags, int mode) + int flags) { struct inode *inode = file_inode(file); handle_t *handle; @@ -4815,7 +4815,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, round_down(offset, 1 << blkbits) >> blkbits, (round_up((offset + len), 1 << blkbits) - round_down(offset, 1 << blkbits)) >> blkbits, - new_size, flags, mode); + new_size, flags); if (ret) goto out_dio; @@ -4841,7 +4841,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, inode->i_mtime = inode->i_ctime = current_time(inode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, - flags, mode); + flags); up_write(&EXT4_I(inode)->i_mmap_sem); if (ret) goto out_dio; @@ -4976,8 +4976,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, - flags, mode); + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); ext4_inode_resume_unlocked_dio(inode); if (ret) goto out; @@ -5837,7 +5836,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, if (e1_blk > lblk1) next1 = e1_blk; if (e2_blk > lblk2) - next2 = e1_blk; + next2 = e2_blk; /* Do we have something to swap */ if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) goto finish; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 58294c9a7e1d..0d7cf0cc9b87 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -537,6 +537,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, lastoff = page_offset(page); bh = head = page_buffers(page); do { + if (lastoff + bh->b_size <= startoff) + goto next; if (buffer_uptodate(bh) || buffer_unwritten(bh)) { if (whence == SEEK_DATA) @@ -551,6 +553,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unlock_page(page); goto out; } +next: lastoff += bh->b_size; bh = bh->b_this_page; } while (bh != head); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3c600f02673f..c774bdc22759 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -892,7 +892,7 @@ static int ext4_dio_get_block_unwritten_async(struct inode *inode, /* * Get block function for non-AIO DIO writes when we create unwritten extent if * blocks are not allocated yet. The extent will be converted to written - * after IO is complete from ext4_ext_direct_IO() function. + * after IO is complete by ext4_direct_IO_write(). */ static int ext4_dio_get_block_unwritten_sync(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -907,7 +907,7 @@ static int ext4_dio_get_block_unwritten_sync(struct inode *inode, /* * Mark inode as having pending DIO writes to unwritten extents. - * ext4_ext_direct_IO() checks this flag and converts extents to + * ext4_direct_IO_write() checks this flag and converts extents to * written. */ if (!ret && buffer_unwritten(bh_result)) @@ -1015,6 +1015,50 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return ERR_PTR(-EIO); } +/* Read a contiguous batch of blocks. */ +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, + bool wait, struct buffer_head **bhs) +{ + int i, err; + + for (i = 0; i < bh_count; i++) { + bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */); + if (IS_ERR(bhs[i])) { + err = PTR_ERR(bhs[i]); + bh_count = i; + goto out_brelse; + } + } + + for (i = 0; i < bh_count; i++) + /* Note that NULL bhs[i] is valid because of holes. */ + if (bhs[i] && !buffer_uptodate(bhs[i])) + ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, + &bhs[i]); + + if (!wait) + return 0; + + for (i = 0; i < bh_count; i++) + if (bhs[i]) + wait_on_buffer(bhs[i]); + + for (i = 0; i < bh_count; i++) { + if (bhs[i] && !buffer_uptodate(bhs[i])) { + err = -EIO; + goto out_brelse; + } + } + return 0; + +out_brelse: + for (i = 0; i < bh_count; i++) { + brelse(bhs[i]); + bhs[i] = NULL; + } + return err; +} + int ext4_walk_page_buffers(handle_t *handle, struct buffer_head *head, unsigned from, @@ -5658,22 +5702,16 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, return err; } -/* - * Expand an inode by new_extra_isize bytes. - * Returns 0 on success or negative error number on failure. - */ -static int ext4_expand_extra_isize(struct inode *inode, - unsigned int new_extra_isize, - struct ext4_iloc iloc, - handle_t *handle) +static int __ext4_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc *iloc, + handle_t *handle, int *no_expand) { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; + int error; - if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) - return 0; - - raw_inode = ext4_raw_inode(&iloc); + raw_inode = ext4_raw_inode(iloc); header = IHDR(inode, raw_inode); @@ -5688,8 +5726,98 @@ static int ext4_expand_extra_isize(struct inode *inode, } /* try to expand with EAs present */ - return ext4_expand_extra_isize_ea(inode, new_extra_isize, - raw_inode, handle); + error = ext4_expand_extra_isize_ea(inode, new_extra_isize, + raw_inode, handle); + if (error) { + /* + * Inode size expansion failed; don't try again + */ + *no_expand = 1; + } + + return error; +} + +/* + * Expand an inode by new_extra_isize bytes. + * Returns 0 on success or negative error number on failure. + */ +static int ext4_try_to_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc iloc, + handle_t *handle) +{ + int no_expand; + int error; + + if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) + return -EOVERFLOW; + + /* + * In nojournal mode, we can immediately attempt to expand + * the inode. When journaled, we first need to obtain extra + * buffer credits since we may write into the EA block + * with this same handle. If journal_extend fails, then it will + * only result in a minor loss of functionality for that inode. + * If this is felt to be critical, then e2fsck should be run to + * force a large enough s_min_extra_isize. + */ + if (ext4_handle_valid(handle) && + jbd2_journal_extend(handle, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) != 0) + return -ENOSPC; + + if (ext4_write_trylock_xattr(inode, &no_expand) == 0) + return -EBUSY; + + error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc, + handle, &no_expand); + ext4_write_unlock_xattr(inode, &no_expand); + + return error; +} + +int ext4_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc *iloc) +{ + handle_t *handle; + int no_expand; + int error, rc; + + if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { + brelse(iloc->bh); + return -EOVERFLOW; + } + + handle = ext4_journal_start(inode, EXT4_HT_INODE, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + brelse(iloc->bh); + return error; + } + + ext4_write_lock_xattr(inode, &no_expand); + + BUFFER_TRACE(iloc.bh, "get_write_access"); + error = ext4_journal_get_write_access(handle, iloc->bh); + if (error) { + brelse(iloc->bh); + goto out_stop; + } + + error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc, + handle, &no_expand); + + rc = ext4_mark_iloc_dirty(handle, inode, iloc); + if (!error) + error = rc; + + ext4_write_unlock_xattr(inode, &no_expand); +out_stop: + ext4_journal_stop(handle); + return error; } /* @@ -5709,44 +5837,18 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) { struct ext4_iloc iloc; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - static unsigned int mnt_count; - int err, ret; + int err; might_sleep(); trace_ext4_mark_inode_dirty(inode, _RET_IP_); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) return err; - if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && - !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { - /* - * In nojournal mode, we can immediately attempt to expand - * the inode. When journaled, we first need to obtain extra - * buffer credits since we may write into the EA block - * with this same handle. If journal_extend fails, then it will - * only result in a minor loss of functionality for that inode. - * If this is felt to be critical, then e2fsck should be run to - * force a large enough s_min_extra_isize. - */ - if (!ext4_handle_valid(handle) || - jbd2_journal_extend(handle, - EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) { - ret = ext4_expand_extra_isize(inode, - sbi->s_want_extra_isize, - iloc, handle); - if (ret) { - if (mnt_count != - le16_to_cpu(sbi->s_es->s_mnt_count)) { - ext4_warning(inode->i_sb, - "Unable to expand inode %lu. Delete" - " some EAs or run e2fsck.", - inode->i_ino); - mnt_count = - le16_to_cpu(sbi->s_es->s_mnt_count); - } - } - } - } + + if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize) + ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize, + iloc, handle); + return ext4_mark_iloc_dirty(handle, inode, &iloc); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 42b3a73143cf..afb66d4ab5cf 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -64,18 +64,16 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) ei1 = EXT4_I(inode1); ei2 = EXT4_I(inode2); - memswap(&inode1->i_flags, &inode2->i_flags, sizeof(inode1->i_flags)); - memswap(&inode1->i_version, &inode2->i_version, - sizeof(inode1->i_version)); - memswap(&inode1->i_blocks, &inode2->i_blocks, - sizeof(inode1->i_blocks)); - memswap(&inode1->i_bytes, &inode2->i_bytes, sizeof(inode1->i_bytes)); - memswap(&inode1->i_atime, &inode2->i_atime, sizeof(inode1->i_atime)); - memswap(&inode1->i_mtime, &inode2->i_mtime, sizeof(inode1->i_mtime)); + swap(inode1->i_flags, inode2->i_flags); + swap(inode1->i_version, inode2->i_version); + swap(inode1->i_blocks, inode2->i_blocks); + swap(inode1->i_bytes, inode2->i_bytes); + swap(inode1->i_atime, inode2->i_atime); + swap(inode1->i_mtime, inode2->i_mtime); memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); - memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); - memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); + swap(ei1->i_flags, ei2->i_flags); + swap(ei1->i_disksize, ei2->i_disksize); ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); @@ -351,11 +349,14 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) raw_inode = ext4_raw_inode(&iloc); if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) { - err = -EOVERFLOW; + err = ext4_expand_extra_isize(inode, + EXT4_SB(sb)->s_want_extra_isize, + &iloc); + if (err) + goto out_unlock; + } else { brelse(iloc.bh); - goto out_unlock; } - brelse(iloc.bh); dquot_initialize(inode); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 581e357e8406..701085620cd8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2295,6 +2295,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) int err, buddy_loaded = 0; struct ext4_buddy e4b; struct ext4_group_info *grinfo; + unsigned char blocksize_bits = min_t(unsigned char, + sb->s_blocksize_bits, + EXT4_MAX_BLOCK_LOG_SIZE); struct sg { struct ext4_group_info info; ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; @@ -2306,8 +2309,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); - i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + + i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + sizeof(struct ext4_group_info); + grinfo = ext4_get_group_info(sb, group); /* Load the group info in memory only if not already loaded. */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { @@ -2327,7 +2331,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, sg.info.bb_fragments, sg.info.bb_first_free); for (i = 0; i <= 13; i++) - seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? + seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? sg.info.bb_counters[i] : 0); seq_printf(seq, " ]\n"); @@ -2892,8 +2896,10 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) break; } - if (discard_bio) + if (discard_bio) { submit_bio_wait(discard_bio); + bio_put(discard_bio); + } } list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 13f0cadb1238..c1cf020d1889 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1342,13 +1342,12 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, struct super_block *sb; struct buffer_head *bh_use[NAMEI_RA_SIZE]; struct buffer_head *bh, *ret = NULL; - ext4_lblk_t start, block, b; + ext4_lblk_t start, block; const u8 *name = d_name->name; - int ra_max = 0; /* Number of bh's in the readahead + size_t ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ - int ra_ptr = 0; /* Current index into readahead + size_t ra_ptr = 0; /* Current index into readahead buffer */ - int num = 0; ext4_lblk_t nblocks; int i, namelen, retval; struct ext4_filename fname; @@ -1411,31 +1410,17 @@ restart: if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; - b = block; - for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { - /* - * Terminate if we reach the end of the - * directory and must wrap, or if our - * search has finished at this block. - */ - if (b >= nblocks || (num && block == start)) { - bh_use[ra_max] = NULL; - break; - } - num++; - bh = ext4_getblk(NULL, dir, b++, 0); - if (IS_ERR(bh)) { - if (ra_max == 0) { - ret = bh; - goto cleanup_and_exit; - } - break; - } - bh_use[ra_max] = bh; - if (bh) - ll_rw_block(REQ_OP_READ, - REQ_META | REQ_PRIO, - 1, &bh); + if (block < start) + ra_max = start - block; + else + ra_max = nblocks - block; + ra_max = min(ra_max, ARRAY_SIZE(bh_use)); + retval = ext4_bread_batch(dir, block, ra_max, + false /* wait */, bh_use); + if (retval) { + ret = ERR_PTR(retval); + ra_max = 0; + goto cleanup_and_exit; } } if ((bh = bh_use[ra_ptr++]) == NULL) @@ -2395,19 +2380,22 @@ out: } /* - * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2, - * since this indicates that nlinks count was previously 1. + * Set directory link count to 1 if nlinks > EXT4_LINK_MAX, or if nlinks == 2 + * since this indicates that nlinks count was previously 1 to avoid overflowing + * the 16-bit i_links_count field on disk. Directories with i_nlink == 1 mean + * that subdirectory link counts are not being maintained accurately. + * + * The caller has already checked for i_nlink overflow in case the DIR_LINK + * feature is not enabled and returned -EMLINK. The is_dx() check is a proxy + * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set + * on regular files) and to avoid creating huge/slow non-HTREE directories. */ static void ext4_inc_count(handle_t *handle, struct inode *inode) { inc_nlink(inode); - if (is_dx(inode) && inode->i_nlink > 1) { - /* limit is 16-bit i_links_count */ - if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { - set_nlink(inode, 1); - ext4_set_feature_dir_nlink(inode->i_sb); - } - } + if (is_dx(inode) && + (inode->i_nlink > EXT4_LINK_MAX || inode->i_nlink == 2)) + set_nlink(inode, 1); } /* diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c3ed9021b781..035cd3f4785e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1927,7 +1927,8 @@ retry: n_desc_blocks = o_desc_blocks + le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); - n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); + n_blocks_count = (ext4_fsblk_t)n_group * + EXT4_BLOCKS_PER_GROUP(sb); n_group--; /* set to last group number */ } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0886fe82e9c4..d61a70e2193a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -978,8 +978,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_es_shk_nr = 0; ei->i_es_shrink_lblk = 0; ei->i_reserved_data_blocks = 0; - ei->i_reserved_meta_blocks = 0; - ei->i_allocated_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; ei->i_da_metadata_calc_last_lblock = 0; spin_lock_init(&(ei->i_block_reservation_lock)); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index cff4f41ced61..3dd970168448 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -317,28 +317,41 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash) */ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size) { - unsigned long block = 0; - struct buffer_head *bh; - int blocksize = ea_inode->i_sb->s_blocksize; - size_t csize, copied = 0; - void *copy_pos = buf; - - while (copied < size) { - csize = (size - copied) > blocksize ? blocksize : size - copied; - bh = ext4_bread(NULL, ea_inode, block, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); - if (!bh) - return -EFSCORRUPTED; + int blocksize = 1 << ea_inode->i_blkbits; + int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits; + int tail_size = (size % blocksize) ?: blocksize; + struct buffer_head *bhs_inline[8]; + struct buffer_head **bhs = bhs_inline; + int i, ret; + + if (bh_count > ARRAY_SIZE(bhs_inline)) { + bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS); + if (!bhs) + return -ENOMEM; + } - memcpy(copy_pos, bh->b_data, csize); - brelse(bh); + ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count, + true /* wait */, bhs); + if (ret) + goto free_bhs; - copy_pos += csize; - block += 1; - copied += csize; + for (i = 0; i < bh_count; i++) { + /* There shouldn't be any holes in ea_inode. */ + if (!bhs[i]) { + ret = -EFSCORRUPTED; + goto put_bhs; + } + memcpy((char *)buf + blocksize * i, bhs[i]->b_data, + i < bh_count - 1 ? blocksize : tail_size); } - return 0; + ret = 0; +put_bhs: + for (i = 0; i < bh_count; i++) + brelse(bhs[i]); +free_bhs: + if (bhs != bhs_inline) + kfree(bhs); + return ret; } static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, @@ -451,6 +464,7 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry, } /* Do not add ea_inode to the cache. */ ea_inode_cache = NULL; + err = 0; } else if (err) goto out; @@ -1529,7 +1543,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, /* Clear padding bytes. */ memset(val + i->value_len, 0, new_size - i->value_len); } - return 0; + goto update_hash; } /* Compute min_offs and last. */ @@ -1693,6 +1707,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, here->e_value_size = cpu_to_le32(i->value_len); } +update_hash: if (i->value) { __le32 hash = 0; @@ -1711,7 +1726,8 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, here->e_name_len, &crc32c_hash, 1); } else if (is_block) { - __le32 *value = s->base + min_offs - new_size; + __le32 *value = s->base + le16_to_cpu( + here->e_value_offs); hash = ext4_xattr_hash_entry(here->e_name, here->e_name_len, value, @@ -1815,9 +1831,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); - if (!error) - ext4_xattr_block_cache_insert(ea_block_cache, - bs->bh); ext4_xattr_block_csum_set(inode, bs->bh); unlock_buffer(bs->bh); if (error == -EFSCORRUPTED) @@ -1973,6 +1986,7 @@ inserted: } else if (bs->bh && s->base == bs->bh->b_data) { /* We were modifying this block in-place. */ ea_bdebug(bs->bh, "keeping this block"); + ext4_xattr_block_cache_insert(ea_block_cache, bs->bh); new_bh = bs->bh; get_bh(new_bh); } else { @@ -2625,23 +2639,21 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle) { struct ext4_xattr_ibody_header *header; - struct buffer_head *bh = NULL; + struct buffer_head *bh; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + static unsigned int mnt_count; size_t min_offs; size_t ifree, bfree; int total_ino; void *base, *end; int error = 0, tried_min_extra_isize = 0; - int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); + int s_min_extra_isize = le16_to_cpu(sbi->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ - int no_expand; - - if (ext4_write_trylock_xattr(inode, &no_expand) == 0) - return 0; retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) - goto out; + return 0; header = IHDR(inode, raw_inode); @@ -2676,6 +2688,7 @@ retry: EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EFSCORRUPTED; + brelse(bh); goto cleanup; } base = BHDR(bh); @@ -2683,11 +2696,11 @@ retry: min_offs = end - base; bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base, NULL); + brelse(bh); if (bfree + ifree < isize_diff) { if (!tried_min_extra_isize && s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; - brelse(bh); goto retry; } error = -ENOSPC; @@ -2705,7 +2718,6 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; - brelse(bh); goto retry; } goto cleanup; @@ -2717,18 +2729,13 @@ shift: EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, (void *)header, total_ino); EXT4_I(inode)->i_extra_isize = new_extra_isize; - brelse(bh); -out: - ext4_write_unlock_xattr(inode, &no_expand); - return 0; cleanup: - brelse(bh); - /* - * Inode size expansion failed; don't try again - */ - no_expand = 1; - ext4_write_unlock_xattr(inode, &no_expand); + if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { + ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", + inode->i_ino); + mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count); + } return error; } diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index a140c5e3dc54..b4b8438c42ef 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -211,7 +211,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { + if (acl && !ipage) { error = posix_acl_update_mode(inode, &inode->i_mode, &acl); if (error) return error; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 56bbf592e487..5b876f6d3f6b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -879,6 +879,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) struct inode *inode; struct f2fs_inode_info *fi; bool is_dir = (type == DIR_INODE); + unsigned long ino = 0; trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, get_pages(sbi, is_dir ? @@ -901,8 +902,17 @@ retry: inode = igrab(&fi->vfs_inode); spin_unlock(&sbi->inode_lock[type]); if (inode) { + unsigned long cur_ino = inode->i_ino; + filemap_fdatawrite(inode->i_mapping); iput(inode); + /* We need to give cpu to another writers. */ + if (ino == cur_ino) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + } else { + ino = cur_ino; + } } else { /* * We should submit bio, since it exists several diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a0e6d2c65a9e..2706130c261b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1538,7 +1538,6 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) { - inode_unlock(inode); ret = -EPERM; goto unlock_out; } @@ -1549,9 +1548,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) { - inode_unlock(inode); ret = -EPERM; - goto out; + goto unlock_out; } } @@ -1564,7 +1562,6 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) f2fs_mark_inode_dirty_sync(inode, false); unlock_out: inode_unlock(inode); -out: mnt_drop_write_file(filp); return ret; } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 9adc202fcd6f..71191d89917d 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -11,6 +11,7 @@ */ #include <linux/proc_fs.h> #include <linux/f2fs_fs.h> +#include <linux/seq_file.h> #include "f2fs.h" #include "segment.h" diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3ee4fdc3da9e..ab60051be6e5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -46,7 +46,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; - ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); + ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL); if (unlikely(!ff)) return NULL; @@ -609,7 +609,7 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) struct fuse_io_priv *io = req->io; ssize_t pos = -1; - fuse_release_user_pages(req, !io->write); + fuse_release_user_pages(req, io->should_dirty); if (io->write) { if (req->misc.write.in.size != req->misc.write.out.size) @@ -1316,7 +1316,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, loff_t *ppos, int flags) { int write = flags & FUSE_DIO_WRITE; - bool should_dirty = !write && iter_is_iovec(iter); int cuse = flags & FUSE_DIO_CUSE; struct file *file = io->file; struct inode *inode = file->f_mapping->host; @@ -1346,6 +1345,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, inode_unlock(inode); } + io->should_dirty = !write && iter_is_iovec(iter); while (count) { size_t nres; fl_owner_t owner = current->files; @@ -1360,7 +1360,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, nres = fuse_send_read(req, io, pos, nbytes, owner); if (!io->async) - fuse_release_user_pages(req, should_dirty); + fuse_release_user_pages(req, io->should_dirty); if (req->out.h.error) { err = req->out.h.error; break; @@ -1669,6 +1669,7 @@ err_nofile: err_free: fuse_request_free(req); err: + mapping_set_error(page->mapping, error); end_page_writeback(page); return error; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1bd7ffdad593..bd4d2a3e1ec1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -249,6 +249,7 @@ struct fuse_io_priv { size_t size; __u64 offset; bool write; + bool should_dirty; int err; struct kiocb *iocb; struct file *file; diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c index 9b92058a1240..6bb5d7c42888 100644 --- a/fs/hfsplus/posix_acl.c +++ b/fs/hfsplus/posix_acl.c @@ -51,8 +51,8 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type) return acl; } -int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, - int type) +static int __hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, + int type) { int err; char *xattr_name; @@ -64,12 +64,6 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, switch (type) { case ACL_TYPE_ACCESS: xattr_name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - err = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (err) - return err; - } - err = 0; break; case ACL_TYPE_DEFAULT: @@ -105,6 +99,18 @@ end_set_acl: return err; } +int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int err; + + if (type == ACL_TYPE_ACCESS && acl) { + err = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (err) + return err; + } + return __hfsplus_set_posix_acl(inode, acl, type); +} + int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) { int err = 0; @@ -122,15 +128,15 @@ int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) return err; if (default_acl) { - err = hfsplus_set_posix_acl(inode, default_acl, - ACL_TYPE_DEFAULT); + err = __hfsplus_set_posix_acl(inode, default_acl, + ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!err) - err = hfsplus_set_posix_acl(inode, acl, - ACL_TYPE_ACCESS); + err = __hfsplus_set_posix_acl(inode, acl, + ACL_TYPE_ACCESS); posix_acl_release(acl); } return err; diff --git a/fs/iomap.c b/fs/iomap.c index 039266128b7f..59cc98ad7577 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -278,7 +278,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, unsigned long bytes; /* Bytes to write to page */ offset = (pos & (PAGE_SIZE - 1)); - bytes = min_t(unsigned long, PAGE_SIZE - offset, length); + bytes = min_t(loff_t, PAGE_SIZE - offset, length); rpage = __iomap_read_page(inode, pos); if (IS_ERR(rpage)) @@ -373,7 +373,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, unsigned offset, bytes; offset = pos & (PAGE_SIZE - 1); /* Within page */ - bytes = min_t(unsigned, PAGE_SIZE - offset, count); + bytes = min_t(loff_t, PAGE_SIZE - offset, count); if (IS_DAX(inode)) status = iomap_dax_zero(pos, offset, bytes, iomap); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 8cf898a59730..217a5e7815da 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -410,7 +410,11 @@ static int parse_options(char *options, struct iso9660_options *popt) if (match_int(&args[0], &option)) return 0; n = option; - if (n > 99) + /* + * Track numbers are supposed to be in range 1-99, the + * mount option starts indexing at 0. + */ + if (n >= 99) return 0; popt->session = n + 1; break; @@ -543,7 +547,7 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) vol_desc_start=0; ms_info.addr_format=CDROM_LBA; - if(session >= 0 && session <= 99) { + if (session > 0) { struct cdrom_tocentry Te; Te.cdte_track=session; Te.cdte_format=CDROM_LBA; diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 7bc186f4ed4d..2e71b6e7e646 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -77,13 +77,6 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: ea_name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - rc = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (rc) - return rc; - inode->i_ctime = current_time(inode); - mark_inode_dirty(inode); - } break; case ACL_TYPE_DEFAULT: ea_name = XATTR_NAME_POSIX_ACL_DEFAULT; @@ -115,12 +108,27 @@ int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int rc; tid_t tid; + int update_mode = 0; + umode_t mode = inode->i_mode; tid = txBegin(inode->i_sb, 0); mutex_lock(&JFS_IP(inode)->commit_mutex); + if (type == ACL_TYPE_ACCESS && acl) { + rc = posix_acl_update_mode(inode, &mode, &acl); + if (rc) + goto end_tx; + update_mode = 1; + } rc = __jfs_set_acl(tid, inode, type, acl); - if (!rc) + if (!rc) { + if (update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + } rc = txCommit(tid, 1, &inode, 0); + } +end_tx: txEnd(tid); mutex_unlock(&JFS_IP(inode)->commit_mutex); return rc; diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index bd9b641ada2c..7ddcb445a3d9 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -98,7 +98,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) goto out; } - VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; + VolumeSize = i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits; if (VolumeSize) { if (newLVSize > VolumeSize) { @@ -211,7 +211,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) txQuiesce(sb); /* Reset size of direct inode */ - sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size; + sbi->direct_inode->i_size = i_size_read(sb->s_bdev->bd_inode); if (sbi->mntflag & JFS_INLINELOG) { /* diff --git a/fs/jfs/super.c b/fs/jfs/super.c index e8aad7d87b8c..60726ae7cf26 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -313,7 +313,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, } case Opt_resize_nosize: { - *newLVSize = sb->s_bdev->bd_inode->i_size >> + *newLVSize = i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits; if (*newLVSize == 0) pr_err("JFS: Cannot determine volume size\n"); @@ -579,7 +579,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_unload; } inode->i_ino = 0; - inode->i_size = sb->s_bdev->bd_inode->i_size; + inode->i_size = i_size_read(sb->s_bdev->bd_inode); inode->i_mapping->a_ops = &jfs_metapage_aops; hlist_add_fake(&inode->i_hash); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); @@ -619,16 +619,10 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) goto out_no_root; - /* logical blocks are represented by 40 bits in pxd_t, etc. */ - sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; -#if BITS_PER_LONG == 32 - /* - * Page cache is indexed by long. - * I would use MAX_LFS_FILESIZE, but it's only half as big + /* logical blocks are represented by 40 bits in pxd_t, etc. + * and page cache is indexed by long */ - sb->s_maxbytes = min(((u64) PAGE_SIZE << 32) - 1, - (u64)sb->s_maxbytes); -#endif + sb->s_maxbytes = min(((loff_t)sb->s_blocksize) << 40, MAX_LFS_FILESIZE); sb->s_time_gran = 1; return 0; diff --git a/fs/mount.h b/fs/mount.h index de45d9e76748..6790767d1883 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -16,7 +16,7 @@ struct mnt_namespace { u64 event; unsigned int mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; -}; +} __randomize_layout; struct mnt_pcp { int mnt_count; @@ -69,7 +69,7 @@ struct mount { struct hlist_head mnt_pins; struct fs_pin mnt_umount; struct dentry *mnt_ex_mountpoint; -}; +} __randomize_layout; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ diff --git a/fs/namei.c b/fs/namei.c index 88fd38d1e3e7..ddb6a7c2b3d4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -524,7 +524,7 @@ struct nameidata { struct inode *link_inode; unsigned root_seq; int dfd; -}; +} __randomize_layout; static void set_nameidata(struct nameidata *p, int dfd, struct filename *name) { diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 69d02cf8cf37..5f93cfacb3d1 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -121,6 +121,7 @@ config PNFS_FILE_LAYOUT config PNFS_BLOCK tristate depends on NFS_V4_1 && BLK_DEV_DM + depends on 64BIT || LBDAF default NFS_V4 config PNFS_FLEXFILE_LAYOUT diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ee5ddbd36088..efebe6cf4378 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -820,6 +820,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->caps = source->caps; target->options = source->options; target->auth_info = source->auth_info; + target->port = source->port; } EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5ac484fe0dee..3522b1249019 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2372,16 +2372,40 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) } EXPORT_SYMBOL_GPL(nfs_access_add_cache); +#define NFS_MAY_READ (NFS4_ACCESS_READ) +#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \ + NFS4_ACCESS_EXTEND | \ + NFS4_ACCESS_DELETE) +#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \ + NFS4_ACCESS_EXTEND) +#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE +#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP) +#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE) +static int +nfs_access_calc_mask(u32 access_result, umode_t umode) +{ + int mask = 0; + + if (access_result & NFS_MAY_READ) + mask |= MAY_READ; + if (S_ISDIR(umode)) { + if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE) + mask |= MAY_WRITE; + if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP) + mask |= MAY_EXEC; + } else if (S_ISREG(umode)) { + if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE) + mask |= MAY_WRITE; + if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE) + mask |= MAY_EXEC; + } else if (access_result & NFS_MAY_WRITE) + mask |= MAY_WRITE; + return mask; +} + void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) { - entry->mask = 0; - if (access_result & NFS4_ACCESS_READ) - entry->mask |= MAY_READ; - if (access_result & - (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; + entry->mask = access_result; } EXPORT_SYMBOL_GPL(nfs_access_set_mask); @@ -2389,6 +2413,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; + int cache_mask; int status; trace_nfs_access_enter(inode); @@ -2404,7 +2429,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) goto out; /* Be clever: ask server to check for all possible rights */ - cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE + | NFS_MAY_WRITE | NFS_MAY_READ; cache.cred = cred; cache.jiffies = jiffies; status = NFS_PROTO(inode)->access(inode, &cache); @@ -2418,7 +2444,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) } nfs_access_add_cache(inode, &cache); out_cached: - if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) + cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode); + if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: trace_nfs_access_exit(inode, status); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5713eb32a45e..af330c31f627 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -617,6 +617,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result) goto out; } + if (iocb->ki_pos > i_size_read(inode)) + nfs_revalidate_mapping(inode, file->f_mapping); nfs_start_io_write(inode); result = generic_write_checks(iocb, from); @@ -750,7 +752,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) */ nfs_sync_mapping(filp->f_mapping); if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - nfs_zap_mapping(inode, filp->f_mapping); + nfs_zap_caches(inode); out: return status; } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 080fc6b278bd..44c638b7876c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -542,6 +542,10 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; + /* Is the deviceid already set? If so, we're good. */ + if (fl->dsaddr != NULL) + return 0; + /* find and reference the deviceid */ d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid, lo->plh_lc_cred, gfp_flags); @@ -553,8 +557,6 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, if (filelayout_test_devid_unavailable(&dsaddr->id_node)) goto out_put; - fl->dsaddr = dsaddr; - if (fl->first_stripe_index >= dsaddr->stripe_count) { dprintk("%s Bad first_stripe_index %u\n", __func__, fl->first_stripe_index); @@ -570,6 +572,13 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, goto out_put; } status = 0; + + /* + * Atomic compare and xchange to ensure we don't scribble + * over a non-NULL pointer. + */ + if (cmpxchg(&fl->dsaddr, NULL, dsaddr) != NULL) + goto out_put; out: return status; out_put: diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1f2ac3dd0fe5..b0fa83a60754 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1842,6 +1842,10 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) int vers, ret; struct nfs_fh *fh; + if (!lseg || !(pnfs_is_valid_lseg(lseg) || + test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))) + goto out_err; + idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); if (!ds) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 6df7a0cf5660..f32c58bbe556 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -32,6 +32,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) { nfs4_print_deviceid(&mirror_ds->id_node.deviceid); nfs4_pnfs_ds_put(mirror_ds->ds); + kfree(mirror_ds->ds_versions); kfree_rcu(mirror_ds, id_node.rcu); } diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 3efe946672be..60bad882c123 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -512,7 +512,7 @@ static const struct rpc_version mnt_version1 = { .counts = mnt_counts, }; -static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)]; +static unsigned int mnt3_counts[ARRAY_SIZE(mnt3_procedures)]; static const struct rpc_version mnt_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(mnt3_procedures), diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index df4a7d3ab915..d1e87ec0df84 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -220,15 +220,8 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, res.fattr); - if (status == 0) { - entry->mask = 0; - if (res.access & NFS3_ACCESS_READ) - entry->mask |= MAY_READ; - if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; - } + if (status == 0) + nfs_access_set_mask(entry, res.access); nfs_free_fattr(res.fattr); out: dprintk("NFS reply access: %d\n", status); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 50566acb5469..e9bea90dc017 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -660,9 +660,6 @@ int nfs4_detect_session_trunking(struct nfs_client *clp, if (!nfs4_check_server_scope(clp->cl_serverscope, res->server_scope)) goto out_err; - /* Session trunking passed, add the xprt */ - rpc_clnt_xprt_switch_add_xprt(clp->cl_rpcclient, xprt); - pr_info("NFS: %s: Session trunking succeeded for %s\n", clp->cl_hostname, xprt->address_strings[RPC_DISPLAY_ADDR]); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a0b4e1091340..d90132642340 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2236,7 +2236,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred, int openflags) { struct nfs_access_entry cache; - u32 mask; + u32 mask, flags; /* access call failed or for some reason the server doesn't * support any access modes -- defer access call until later */ @@ -2250,16 +2250,20 @@ static int nfs4_opendata_access(struct rpc_cred *cred, */ if (openflags & __FMODE_EXEC) { /* ONLY check for exec rights */ - mask = MAY_EXEC; + if (S_ISDIR(state->inode->i_mode)) + mask = NFS4_ACCESS_LOOKUP; + else + mask = NFS4_ACCESS_EXECUTE; } else if ((fmode & FMODE_READ) && !opendata->file_created) - mask = MAY_READ; + mask = NFS4_ACCESS_READ; cache.cred = cred; cache.jiffies = jiffies; nfs_access_set_mask(&cache, opendata->o_res.access_result); nfs_access_add_cache(state->inode, &cache); - if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) + flags = NFS4_ACCESS_READ | NFS4_ACCESS_EXECUTE | NFS4_ACCESS_LOOKUP; + if ((mask & ~cache.mask & flags) == 0) return 0; return -EACCES; @@ -2549,9 +2553,8 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) clear_bit(NFS_O_RDWR_STATE, &state->flags); clear_bit(NFS_OPEN_STATE, &state->flags); stateid->type = NFS4_INVALID_STATEID_TYPE; - } - if (status != NFS_OK) return status; + } if (nfs_open_stateid_recover_openmode(state)) return -NFS4ERR_OPENMODE; return NFS_OK; @@ -6492,7 +6495,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irqrestore(&q->lock, flags); - freezable_schedule_timeout_interruptible(NFS4_LOCK_MAXTIMEOUT); + freezable_schedule_timeout(NFS4_LOCK_MAXTIMEOUT); } finish_wait(q, &wait); @@ -7457,7 +7460,7 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data) cdata->res.server_scope = NULL; } /* Save the EXCHANGE_ID verifier session trunk tests */ - memcpy(clp->cl_confirm.data, cdata->args.verifier->data, + memcpy(clp->cl_confirm.data, cdata->args.verifier.data, sizeof(clp->cl_confirm.data)); } out: @@ -7470,10 +7473,6 @@ static void nfs4_exchange_id_release(void *data) struct nfs41_exchange_id_data *cdata = (struct nfs41_exchange_id_data *)data; - if (cdata->xprt) { - xprt_put(cdata->xprt); - rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient); - } nfs_put_client(cdata->args.client); kfree(cdata->res.impl_id); kfree(cdata->res.server_scope); @@ -7494,7 +7493,6 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = { static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, u32 sp4_how, struct rpc_xprt *xprt) { - nfs4_verifier verifier; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], .rpc_cred = cred, @@ -7503,7 +7501,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, .rpc_client = clp->cl_rpcclient, .callback_ops = &nfs4_exchange_id_call_ops, .rpc_message = &msg, - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + .flags = RPC_TASK_TIMEOUT, }; struct nfs41_exchange_id_data *calldata; struct rpc_task *task; @@ -7518,8 +7516,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, return -ENOMEM; } - if (!xprt) - nfs4_init_boot_verifier(clp, &verifier); + nfs4_init_boot_verifier(clp, &calldata->args.verifier); status = nfs4_init_uniform_client_string(clp); if (status) @@ -7558,11 +7555,9 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (xprt) { calldata->xprt = xprt; task_setup_data.rpc_xprt = xprt; - task_setup_data.flags = - RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC; - calldata->args.verifier = &clp->cl_confirm; - } else { - calldata->args.verifier = &verifier; + task_setup_data.flags |= RPC_TASK_SOFTCONN; + memcpy(calldata->args.verifier.data, clp->cl_confirm.data, + sizeof(calldata->args.verifier.data)); } calldata->args.client = clp; #ifdef CONFIG_NFS_V4_1_MIGRATION @@ -7581,12 +7576,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (IS_ERR(task)) return PTR_ERR(task); - if (!xprt) { - status = rpc_wait_for_completion_task(task); - if (!status) - status = calldata->rpc_status; - } else /* session trunking test */ - status = calldata->rpc_status; + status = calldata->rpc_status; rpc_put_task(task); out: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fa3eb361d4f8..37c8af003275 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1785,7 +1785,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, int len = 0; encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); - encode_nfs4_verifier(xdr, args->verifier); + encode_nfs4_verifier(xdr, &args->verifier); encode_string(xdr, strlen(args->client->cl_owner_id), args->client->cl_owner_id); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index d40755a0984b..25f28fa64c57 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -159,13 +159,18 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, { struct pnfs_commit_bucket *b; struct pnfs_layout_segment *freeme; + int nwritten; int i; lockdep_assert_held(&cinfo->inode->i_lock); restart: for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { - if (pnfs_generic_transfer_commit_list(&b->written, dst, - cinfo, 0)) { + nwritten = pnfs_generic_transfer_commit_list(&b->written, + dst, cinfo, 0); + if (!nwritten) + continue; + cinfo->ds->nwritten -= nwritten; + if (list_empty(&b->written)) { freeme = b->wlseg; b->wlseg = NULL; spin_unlock(&cinfo->inode->i_lock); @@ -174,7 +179,6 @@ restart: goto restart; } } - cinfo->ds->nwritten = 0; } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); @@ -183,6 +187,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; struct pnfs_commit_bucket *bucket; struct pnfs_layout_segment *freeme; + struct list_head *pos; LIST_HEAD(pages); int i; @@ -193,6 +198,8 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) continue; freeme = bucket->clseg; bucket->clseg = NULL; + list_for_each(pos, &bucket->committing) + cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, &pages); spin_unlock(&cinfo->inode->i_lock); nfs_retry_commit(&pages, freeme, cinfo, i); @@ -217,13 +224,6 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { if (list_empty(&bucket->committing)) continue; - /* - * If the layout segment is invalid, then let - * pnfs_generic_retry_commit() clean up the bucket. - */ - if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) && - !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags)) - break; data = nfs_commitdata_alloc(false); if (!data) break; @@ -243,9 +243,12 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, struct nfs_commit_info *cinfo) { struct pnfs_commit_bucket *bucket; + struct list_head *pos; bucket = &cinfo->ds->buckets[data->ds_commit_index]; spin_lock(&cinfo->inode->i_lock); + list_for_each(pos, &bucket->committing) + cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, pages); data->lseg = bucket->clseg; bucket->clseg = NULL; @@ -330,7 +333,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, } } out: - cinfo->ds->ncommitting = 0; return PNFS_ATTEMPTED; } EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index b45083c0f9ae..49b0a9e7ff18 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -720,8 +720,8 @@ static const struct rpc_version nfs_cb_version4 = { .counts = nfs4_cb_counts, }; -static const struct rpc_version *nfs_cb_version[] = { - &nfs_cb_version4, +static const struct rpc_version *nfs_cb_version[2] = { + [1] = &nfs_cb_version4, }; static const struct rpc_program cb_program; @@ -795,7 +795,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c .saddress = (struct sockaddr *) &conn->cb_saddr, .timeout = &timeparms, .program = &cb_program, - .version = 0, + .version = 1, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_clnt *client; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 20fbcab97753..5f940d2a136b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -144,7 +144,7 @@ static void next_decode_page(struct nfsd4_compoundargs *argp) argp->p = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); + argp->end = argp->p + XDR_QUADLEN(argp->pagelen); argp->pagelen = 0; } else { argp->end = argp->p + (PAGE_SIZE>>2); @@ -1279,9 +1279,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) argp->pagelen -= pages * PAGE_SIZE; len -= pages * PAGE_SIZE; - argp->p = (__be32 *)page_address(argp->pagelist[0]); - argp->pagelist++; - argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); + next_decode_page(argp); } argp->p += XDR_QUADLEN(len); diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index dc22ba8c710f..e50a387959bf 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -240,18 +240,6 @@ int ocfs2_set_acl(handle_t *handle, switch (type) { case ACL_TYPE_ACCESS: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - umode_t mode; - - ret = posix_acl_update_mode(inode, &mode, &acl); - if (ret) - return ret; - - ret = ocfs2_acl_set_mode(inode, di_bh, - handle, mode); - if (ret) - return ret; - } break; case ACL_TYPE_DEFAULT: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; @@ -289,7 +277,19 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type) had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); if (had_lock < 0) return had_lock; + if (type == ACL_TYPE_ACCESS && acl) { + umode_t mode; + + status = posix_acl_update_mode(inode, &mode, &acl); + if (status) + goto unlock; + + status = ocfs2_acl_set_mode(inode, bh, NULL, mode); + if (status) + goto unlock; + } status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL); +unlock: ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); brelse(bh); return status; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 641d9ee97f91..48b70e6490f3 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -481,17 +481,30 @@ out_cleanup: } static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, - struct cattr *attr, struct dentry *hardlink) + struct cattr *attr, struct dentry *hardlink, + bool origin) { int err; const struct cred *old_cred; struct cred *override_cred; + struct dentry *parent = dentry->d_parent; - err = ovl_copy_up(dentry->d_parent); + err = ovl_copy_up(parent); if (err) return err; old_cred = ovl_override_creds(dentry->d_sb); + + /* + * When linking a file with copy up origin into a new parent, mark the + * new parent dir "impure". + */ + if (origin) { + err = ovl_set_impure(parent, ovl_dentry_upper(parent)); + if (err) + goto out_revert_creds; + } + err = -ENOMEM; override_cred = prepare_creds(); if (override_cred) { @@ -550,7 +563,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, inode_init_owner(inode, dentry->d_parent->d_inode, mode); attr.mode = inode->i_mode; - err = ovl_create_or_link(dentry, inode, &attr, NULL); + err = ovl_create_or_link(dentry, inode, &attr, NULL, false); if (err) iput(inode); @@ -609,7 +622,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir, inode = d_inode(old); ihold(inode); - err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old)); + err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old), + ovl_type_origin(old)); if (err) iput(inode); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 69f4fc26ee39..5bc71642b226 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -202,37 +202,38 @@ bool ovl_is_private_xattr(const char *name) sizeof(OVL_XATTR_PREFIX) - 1) == 0; } -int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags) +int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags) { int err; - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *upperdentry = ovl_i_dentry_upper(inode); + struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); const struct cred *old_cred; err = ovl_want_write(dentry); if (err) goto out; - if (!value && !OVL_TYPE_UPPER(type)) { - err = vfs_getxattr(realpath.dentry, name, NULL, 0); + if (!value && !upperdentry) { + err = vfs_getxattr(realdentry, name, NULL, 0); if (err < 0) goto out_drop_write; } - err = ovl_copy_up(dentry); - if (err) - goto out_drop_write; + if (!upperdentry) { + err = ovl_copy_up(dentry); + if (err) + goto out_drop_write; - if (!OVL_TYPE_UPPER(type)) - ovl_path_upper(dentry, &realpath); + realdentry = ovl_dentry_upper(dentry); + } old_cred = ovl_override_creds(dentry->d_sb); if (value) - err = vfs_setxattr(realpath.dentry, name, value, size, flags); + err = vfs_setxattr(realdentry, name, value, size, flags); else { WARN_ON(flags != XATTR_REPLACE); - err = vfs_removexattr(realpath.dentry, name); + err = vfs_removexattr(realdentry, name); } revert_creds(old_cred); @@ -242,12 +243,13 @@ out: return err; } -int ovl_xattr_get(struct dentry *dentry, const char *name, +int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { - struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; const struct cred *old_cred; + struct dentry *realdentry = + ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); old_cred = ovl_override_creds(dentry->d_sb); res = vfs_getxattr(realdentry, name, value, size); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 9bc0e580a5b3..8aef2b304b2d 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -397,8 +397,19 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, if (!d_inode(index)) return 0; - err = -EISDIR; - if (d_is_dir(index)) + /* + * Directory index entries are going to be used for looking up + * redirected upper dirs by lower dir fh when decoding an overlay + * file handle of a merge dir. Whiteout index entries are going to be + * used as an indication that an exported overlay file handle should + * be treated as stale (i.e. after unlink of the overlay inode). + * We don't know the verification rules for directory and whiteout + * index entries, because they have not been implemented yet, so return + * EROFS if those entries are found to avoid corrupting an index that + * was created by a newer kernel. + */ + err = -EROFS; + if (d_is_dir(index) || ovl_is_whiteout(index)) goto fail; err = -EINVAL; @@ -436,8 +447,8 @@ out: return err; fail: - pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n", - index, err); + pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", + index, d_inode(index)->i_mode & S_IFMT, err); goto out; } @@ -502,6 +513,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, goto out; } + inode = d_inode(index); if (d_is_negative(index)) { if (upper && d_inode(origin)->i_nlink > 1) { pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", @@ -511,11 +523,22 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, dput(index); index = NULL; - } else if (upper && d_inode(index) != d_inode(upper)) { - inode = d_inode(index); - pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n", - d_inode(index)->i_ino, - d_inode(upper)->i_ino); + } else if (upper && d_inode(upper) != inode) { + pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n", + index, inode->i_ino, d_inode(upper)->i_ino); + goto fail; + } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || + ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { + /* + * Index should always be of the same file type as origin + * except for the case of a whiteout index. A whiteout + * index should only exist if all lower aliases have been + * unlinked, which means that finding a lower origin on lookup + * whose index is a whiteout should be treated as an error. + */ + pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", + index, d_inode(index)->i_mode & S_IFMT, + d_inode(origin)->i_mode & S_IFMT); goto fail; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 60d26605e039..e927a62c97ae 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -47,7 +47,8 @@ enum ovl_flag { /* Is the real inode encoded in fid an upper inode? */ #define OVL_FH_FLAG_PATH_UPPER (1 << 2) -#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN) +#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \ + OVL_FH_FLAG_PATH_UPPER) #if defined(__LITTLE_ENDIAN) #define OVL_FH_FLAG_CPU_ENDIAN 0 @@ -199,6 +200,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); +struct dentry *ovl_i_dentry_upper(struct inode *inode); struct inode *ovl_inode_upper(struct inode *inode); struct inode *ovl_inode_lower(struct inode *inode); struct inode *ovl_inode_real(struct inode *inode); @@ -270,9 +272,9 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); int ovl_permission(struct inode *inode, int mask); -int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags); -int ovl_xattr_get(struct dentry *dentry, const char *name, +int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags); +int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); struct posix_acl *ovl_get_acl(struct inode *inode, int type); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 0298463cf9c3..3d424a51cabb 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -703,7 +703,10 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, err = PTR_ERR(index); break; } - if (ovl_verify_index(index, lowerstack, numlower)) { + err = ovl_verify_index(index, lowerstack, numlower); + if (err) { + if (err == -EROFS) + break; err = ovl_cleanup(dir, index); if (err) break; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 44dc2d6ffe0f..d86e89f97201 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -692,7 +692,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - return ovl_xattr_get(dentry, handler->name, buffer, size); + return ovl_xattr_get(dentry, inode, handler->name, buffer, size); } static int __maybe_unused @@ -742,7 +742,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, return err; } - err = ovl_xattr_set(dentry, handler->name, value, size, flags); + err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); if (!err) ovl_copyattr(ovl_inode_real(inode), inode); @@ -772,7 +772,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - return ovl_xattr_get(dentry, name, buffer, size); + return ovl_xattr_get(dentry, inode, name, buffer, size); } static int ovl_other_xattr_set(const struct xattr_handler *handler, @@ -780,7 +780,7 @@ static int ovl_other_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { - return ovl_xattr_set(dentry, name, value, size, flags); + return ovl_xattr_set(dentry, inode, name, value, size, flags); } static const struct xattr_handler __maybe_unused @@ -1058,10 +1058,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, OVL_INDEXDIR_NAME, true); - err = PTR_ERR(ufs->indexdir); - if (IS_ERR(ufs->indexdir)) - goto out_put_lower_mnt; - if (ufs->indexdir) { /* Verify upper root is index dir origin */ err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, @@ -1090,6 +1086,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) else sb->s_d_op = &ovl_dentry_operations; + err = -ENOMEM; ufs->creator_cred = cred = prepare_creds(); if (!cred) goto out_put_indexdir; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index c492ba75c659..f46ad75dc96a 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -157,9 +157,14 @@ struct dentry *ovl_dentry_real(struct dentry *dentry) return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry); } +struct dentry *ovl_i_dentry_upper(struct inode *inode) +{ + return ovl_upperdentry_dereference(OVL_I(inode)); +} + struct inode *ovl_inode_upper(struct inode *inode) { - struct dentry *upperdentry = ovl_upperdentry_dereference(OVL_I(inode)); + struct dentry *upperdentry = ovl_i_dentry_upper(inode); return upperdentry ? d_inode(upperdentry) : NULL; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 18694598bebf..aa2b89071630 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -51,7 +51,7 @@ struct proc_dir_entry { spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ u8 namelen; char name[]; -}; +} __randomize_layout; union proc_op { int (*proc_get_link)(struct dentry *, struct path *); @@ -70,7 +70,7 @@ struct proc_inode { struct hlist_node sysctl_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; -}; +} __randomize_layout; /* * General functions @@ -279,7 +279,7 @@ struct proc_maps_private { #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif -}; +} __randomize_layout; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 8a428498d6b2..509a61668d90 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -106,13 +106,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_node_page_state(NR_FILE_MAPPED)); show_val_kb(m, "Shmem: ", i.sharedram); show_val_kb(m, "Slab: ", - global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE)); + global_node_page_state(NR_SLAB_RECLAIMABLE) + + global_node_page_state(NR_SLAB_UNRECLAIMABLE)); show_val_kb(m, "SReclaimable: ", - global_page_state(NR_SLAB_RECLAIMABLE)); + global_node_page_state(NR_SLAB_RECLAIMABLE)); show_val_kb(m, "SUnreclaim: ", - global_page_state(NR_SLAB_UNRECLAIMABLE)); + global_node_page_state(NR_SLAB_UNRECLAIMABLE)); seq_printf(m, "KernelStack: %8lu kB\n", global_page_state(NR_KERNEL_STACK_KB)); show_val_kb(m, "PageTables: ", diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b836fd61ed87..fe8f3265e877 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -16,9 +16,10 @@ #include <linux/mmu_notifier.h> #include <linux/page_idle.h> #include <linux/shmem_fs.h> +#include <linux/uaccess.h> #include <asm/elf.h> -#include <linux/uaccess.h> +#include <asm/tlb.h> #include <asm/tlbflush.h> #include "internal.h" @@ -1008,6 +1009,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; + struct mmu_gather tlb; int itype; int rv; @@ -1054,6 +1056,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, } down_read(&mm->mmap_sem); + tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) @@ -1075,7 +1078,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(mm, 0, -1); - flush_tlb_mm(mm); + tlb_finish_mmu(&tlb, 0, -1); up_read(&mm->mmap_sem); out_mm: mmput(mm); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 53a17496c5c5..566e6ef99f07 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1124,6 +1124,10 @@ void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) WARN_ON_ONCE(1); dquot->dq_dqb.dqb_rsvspace = 0; } + if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= + dquot->dq_dqb.dqb_bsoftlimit) + dquot->dq_dqb.dqb_btime = (time64_t) 0; + clear_bit(DQ_BLKS_B, &dquot->dq_flags); } static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) @@ -1145,7 +1149,8 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number) dquot->dq_dqb.dqb_curspace -= number; else dquot->dq_dqb.dqb_curspace = 0; - if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit) + if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= + dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time64_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } @@ -1381,14 +1386,18 @@ static int info_idq_free(struct dquot *dquot, qsize_t inodes) static int info_bdq_free(struct dquot *dquot, qsize_t space) { + qsize_t tspace; + + tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace; + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || - dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit) + tspace <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_NOWARN; - if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit) + if (tspace - space <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_BSOFTBELOW; - if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit && - dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit) + if (tspace >= dquot->dq_dqb.dqb_bhardlimit && + tspace - space < dquot->dq_dqb.dqb_bhardlimit) return QUOTA_NL_BHARDBELOW; return QUOTA_NL_NOWARN; } @@ -2681,7 +2690,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) if (check_blim) { if (!dm->dqb_bsoftlimit || - dm->dqb_curspace < dm->dqb_bsoftlimit) { + dm->dqb_curspace + dm->dqb_rsvspace < dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } else if (!(di->d_fieldmask & QC_SPC_TIMER)) diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 3d2256a425ee..54415f0e3d18 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -23,7 +23,8 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) struct reiserfs_transaction_handle th; size_t jcreate_blocks; int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; - + int update_mode = 0; + umode_t mode = inode->i_mode; /* * Pessimism: We can't assume that anything from the xattr root up @@ -37,7 +38,16 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) error = journal_begin(&th, inode->i_sb, jcreate_blocks); reiserfs_write_unlock(inode->i_sb); if (error == 0) { + if (type == ACL_TYPE_ACCESS && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + goto unlock; + update_mode = 1; + } error = __reiserfs_set_acl(&th, inode, type, acl); + if (!error && update_mode) + inode->i_mode = mode; +unlock: reiserfs_write_lock(inode->i_sb); error2 = journal_end(&th); reiserfs_write_unlock(inode->i_sb); @@ -241,11 +251,6 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - } break; case ACL_TYPE_DEFAULT: name = XATTR_NAME_POSIX_ACL_DEFAULT; diff --git a/fs/select.c b/fs/select.c index 9d5f15ed87fe..c6362e38ae92 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1164,11 +1164,7 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, if (ufdset) { return compat_get_bitmap(fdset, ufdset, nr); } else { - /* Tricky, must clear full unsigned long in the - * kernel fdset at the end, ALIGN makes sure that - * actually happens. - */ - memset(fdset, 0, ALIGN(nr, BITS_PER_LONG)); + zero_fd_set(nr, fdset); return 0; } } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index cadcd12a3d35..b0d5897bc4e6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -854,6 +854,9 @@ wakeup: __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range); spin_unlock(&ctx->fault_pending_wqh.lock); + /* Flush pending events that may still wait on event_wqh */ + wake_up_all(&ctx->event_wqh); + wake_up_poll(&ctx->fd_wqh, POLLHUP); userfaultfd_ctx_put(ctx); return 0; @@ -1597,7 +1600,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, uffdio_copy.len); mmput(ctx->mm); } else { - return -ENOSPC; + return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_copy->copy))) return -EFAULT; @@ -1643,6 +1646,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len); mmput(ctx->mm); + } else { + return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) return -EFAULT; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0a9880777c9c..c09c16b1ad3b 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5435,6 +5435,7 @@ __xfs_bunmapi( xfs_fsblock_t sum; xfs_filblks_t len = *rlen; /* length to unmap in file */ xfs_fileoff_t max_len; + xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); @@ -5534,6 +5535,17 @@ __xfs_bunmapi( */ del = got; wasdel = isnullstartblock(del.br_startblock); + + /* + * Make sure we don't touch multiple AGF headers out of order + * in a single transaction, as that could cause AB-BA deadlocks. + */ + if (!wasdel) { + agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); + if (prev_agno != NULLAGNUMBER && prev_agno > agno) + break; + prev_agno = agno; + } if (got.br_startoff < start) { del.br_startoff = start; del.br_blockcount -= start - got.br_startoff; @@ -6499,6 +6511,15 @@ xfs_bmap_finish_one( xfs_fsblock_t firstfsb; int error = 0; + /* + * firstfsb is tied to the transaction lifetime and is used to + * ensure correct AG locking order and schedule work item + * continuations. XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us + * to only making one bmap call per transaction, so it should + * be safe to have it as a local variable here. + */ + firstfsb = NULLFSBLOCK; + trace_xfs_bmap_deferred(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 4da85fff69ad..e0bcc4a59efd 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -728,7 +728,8 @@ xfs_btree_firstrec( * Get the block pointer for this level. */ block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); + if (xfs_btree_check_block(cur, block, level, bp)) + return 0; /* * It's empty, there is no such record. */ @@ -757,7 +758,8 @@ xfs_btree_lastrec( * Get the block pointer for this level. */ block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); + if (xfs_btree_check_block(cur, block, level, bp)) + return 0; /* * It's empty, there is no such record. */ diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index d478065b9544..8727a43115ef 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -136,6 +136,8 @@ __xfs_dir3_data_check( */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); + XFS_WANT_CORRUPTED_RETURN(mp, endp >= + p + be16_to_cpu(dup->length)); XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == (char *)dup - (char *)hdr); @@ -164,6 +166,8 @@ __xfs_dir3_data_check( XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); + XFS_WANT_CORRUPTED_RETURN(mp, endp >= + p + ops->data_entsize(dep->namelen)); XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(*ops->data_entry_tag_p(dep)) == (char *)dep - (char *)hdr); diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index ffd5a15d1bb6..abf5beaae907 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1246,13 +1246,13 @@ xfs_dialloc_ag_inobt( /* free inodes to the left? */ if (useleft && trec.ir_freecount) { - rec = trec; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); cur = tcur; pag->pagl_leftrec = trec.ir_startino; pag->pagl_rightrec = rec.ir_startino; pag->pagl_pagino = pagino; + rec = trec; goto alloc_inode; } diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 900ea231f9a3..45b1c3b4e047 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1638,6 +1638,10 @@ xfs_refcount_recover_cow_leftovers( error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) goto out_trans; + if (!agbp) { + error = -ENOMEM; + goto out_trans; + } cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); /* Find all the leftover CoW staging extents. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ceef77c0416a..ff48f0096810 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -874,7 +874,6 @@ xfs_ialloc( case S_IFREG: case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { - uint64_t di_flags2 = 0; uint di_flags = 0; if (S_ISDIR(mode)) { @@ -911,20 +910,23 @@ xfs_ialloc( di_flags |= XFS_DIFLAG_NODEFRAG; if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) di_flags |= XFS_DIFLAG_FILESTREAM; - if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) - di_flags2 |= XFS_DIFLAG2_DAX; ip->i_d.di_flags |= di_flags; - ip->i_d.di_flags2 |= di_flags2; } if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) && pip->i_d.di_version == 3 && ip->i_d.di_version == 3) { + uint64_t di_flags2 = 0; + if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { - ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; } + if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) + di_flags2 |= XFS_DIFLAG2_DAX; + + ip->i_d.di_flags2 |= di_flags2; } /* FALLTHROUGH */ case S_IFLNK: diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0053bcf2b10a..4ebd0bafc914 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -749,9 +749,20 @@ xfs_log_mount_finish( return 0; } + /* + * During the second phase of log recovery, we need iget and + * iput to behave like they do for an active filesystem. + * xfs_fs_drop_inode needs to be able to prevent the deletion + * of inodes before we're done replaying log items on those + * inodes. Turn it off immediately after recovery finishes + * so that we don't leak the quota inodes if subsequent mount + * activities fail. + */ + mp->m_super->s_flags |= MS_ACTIVE; error = xlog_recover_finish(mp->m_log); if (!error) xfs_log_work_queue(mp); + mp->m_super->s_flags &= ~MS_ACTIVE; return error; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index fbe72b134bef..43aa42a3a5d3 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -539,6 +539,7 @@ xlog_discard_endio( INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work); queue_work(xfs_discard_wq, &ctx->discard_endio_work); + bio_put(bio); } static void diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 40d4e8b4e193..ea7d4b4e50d0 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -945,15 +945,6 @@ xfs_mountfs( } /* - * During the second phase of log recovery, we need iget and - * iput to behave like they do for an active filesystem. - * xfs_fs_drop_inode needs to be able to prevent the deletion - * of inodes before we're done replaying log items on those - * inodes. - */ - mp->m_super->s_flags |= MS_ACTIVE; - - /* * Finish recovering the file system. This part needed to be delayed * until after the root and real-time bitmap inodes were consistently * read in. @@ -1028,12 +1019,13 @@ xfs_mountfs( out_quota: xfs_qm_unmount_quotas(mp); out_rtunmount: - mp->m_super->s_flags &= ~MS_ACTIVE; xfs_rtunmount_inodes(mp); out_rele_rip: IRELE(rip); cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_WAIT); + /* Clean out dquots that might be in memory after quotacheck. */ + xfs_qm_unmount(mp); out_log_dealloc: mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6ce948c436d5..15751dc2a27d 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -111,6 +111,9 @@ restart: skipped = 0; break; } + /* we're done if id overflows back to zero */ + if (!next_index) + break; } if (skipped) { diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index ab2270a87196..f45fbf0db9bb 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -170,6 +170,8 @@ xfs_reflink_find_shared( error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) return error; + if (!agbp) + return -ENOMEM; cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); @@ -329,7 +331,7 @@ xfs_reflink_convert_cow_extent( xfs_filblks_t count_fsb, struct xfs_defer_ops *dfops) { - xfs_fsblock_t first_block; + xfs_fsblock_t first_block = NULLFSBLOCK; int nimaps = 1; if (imap->br_state == XFS_EXT_NORM) |